diff options
author | Jim Plank <plank@cs.utk.edu> | 2013-03-04 17:06:43 -0500 |
---|---|---|
committer | Jim Plank <plank@cs.utk.edu> | 2013-03-04 17:06:43 -0500 |
commit | 47896e9ddcd26da694d6775bf7fcadc7feebf385 (patch) | |
tree | 379f1f45289ebc08ac6d91fc0f0ff618f9c236c4 | |
parent | 4d5f453827d1d100d83d003d807edd52672c943e (diff) | |
download | gf-complete-47896e9ddcd26da694d6775bf7fcadc7feebf385.tar.gz |
Killing all of these junk files. They should not be in the repository.
-rwxr-xr-x | junk | 8 | ||||
-rwxr-xr-x | junk-pick-best-output | bin | 28584 -> 0 bytes | |||
-rw-r--r-- | junk-pick-best-output.cpp | 78 | ||||
-rw-r--r-- | junk-proc.awk | 11 | ||||
-rw-r--r-- | junk-save.c | 658 | ||||
-rw-r--r-- | junk-w16-backup.c | 1585 | ||||
-rw-r--r-- | junk-w16-timing-tests.sh | 12 | ||||
-rw-r--r-- | junk-w2.eps | 203 | ||||
-rw-r--r-- | junk-w32-backup.c | 1337 | ||||
-rw-r--r-- | junk-w32-single-time.c | 16 | ||||
-rw-r--r-- | junk-w4-out.txt | 60 | ||||
-rw-r--r-- | junk-w4-timing-out.txt | 792 | ||||
-rw-r--r-- | junk-w4-timing-tests.sh | 11 | ||||
-rw-r--r-- | junk-w4-timing.jgr | 11 | ||||
-rw-r--r-- | junk-w4.jgr | 6 | ||||
-rw-r--r-- | junk-w8-timing-out.txt | 936 | ||||
-rw-r--r-- | junk-w8-timing-tests.sh | 13 | ||||
-rw-r--r-- | junk-w8-timing.jgr | 11 | ||||
-rw-r--r-- | junk.c | 18 | ||||
-rw-r--r-- | junk.ps | 199 | ||||
-rw-r--r-- | junk.sh | 14 | ||||
-rw-r--r-- | junk_gf_unit.c | 957 |
22 files changed, 0 insertions, 6936 deletions
@@ -1,8 +0,0 @@ -gf_54 -gf_div -gf_methods -gf_mult -gf_time -gf_unit -junk -junk-pick-best-output diff --git a/junk-pick-best-output b/junk-pick-best-output Binary files differdeleted file mode 100755 index eb0db93..0000000 --- a/junk-pick-best-output +++ /dev/null diff --git a/junk-pick-best-output.cpp b/junk-pick-best-output.cpp deleted file mode 100644 index 4f5fed8..0000000 --- a/junk-pick-best-output.cpp +++ /dev/null @@ -1,78 +0,0 @@ -#include <string> -#include <vector> -#include <list> -#include <algorithm> -#include <map> -#include <set> -#include <iostream> -#include <sstream> -#include <cstdio> -#include <cstdlib> -using namespace std; - -#define VIT(i, v) for (i = 0; i < v.size(); i++) -#define IT(it, ds) for (it = ds.begin(); it != ds.end(); it++) -#define FUP(i, n) for (i = 0; i < n; i++) - -typedef map<int, string> ISmap; -typedef map<int, int> IImap; -typedef map<string, double> SDmap; - -typedef ISmap::iterator ISmit; -typedef IImap::iterator IImit; -typedef SDmap::iterator SDmit; - -typedef vector <string> SVec; - -void StoSVec(string &s, SVec &sv) -{ - istringstream ss; - string s2; - - ss.clear(); - ss.str(s); - while (ss >> s2) sv.push_back(s2); -} - -main() -{ - string s, k; - double d, b; - int i; - SVec sv; - SDmap bmap; - SDmit bmit; - - while (getline(cin, s)) { - sv.clear(); - StoSVec(s, sv); - - if (sv[0] == "Seed:") { - b = 0; - for (i = 0; i < 2; i++) { - getline(cin, s); - sv.clear(); - StoSVec(s, sv); - sscanf(sv[3].c_str(), "%lf", &d); - if (d > b) b = d; - } - getline(cin, s); - sv.clear(); - StoSVec(s, sv); - k = sv[2]; - k += " "; - k += sv[3]; - for (i = 4; i < sv.size(); i++) { - if (sv[i] != "-") { - k += " "; - k += sv[i]; - } - } - if (bmap[k] < b) bmap[k] = b; - } - } - - IT(bmit, bmap) { - printf("%10.4lf %s\n", bmit->second, bmit->first.c_str()); - } -} diff --git a/junk-proc.awk b/junk-proc.awk deleted file mode 100644 index ed1f4cd..0000000 --- a/junk-proc.awk +++ /dev/null @@ -1,11 +0,0 @@ -($1 == "Seed:") { l = 0; n++; t=0 } -{ if (l >= 1 && l <= 4) { - t += $4 - if (l == 4) avg = t/4.0 - } - if (l == 5) { - printf("xaxis max %d hash_label at %d : %s\n", n+1, n, $0 ) - printf("newcurve marktype xbar marksize 1 cfill 1 1 0 pts %d %.2lf\n", n, avg); - } - l++ -} diff --git a/junk-save.c b/junk-save.c deleted file mode 100644 index 9b73025..0000000 --- a/junk-save.c +++ /dev/null @@ -1,658 +0,0 @@ - -/* - c = gf.multiply.w32(&gf, a, b); - tested = 0; - -*/ - /* If this is not composite, then first test against the default: */ - -/* - if (h->mult_type != GF_MULT_COMPOSITE) { - tested = 1; - d = gf_def.multiply.w32(&gf_def, a, b); - - if (c != d) { - printf("Error in single multiplication (all numbers in hex):\n\n"); - printf(" gf.multiply.w32() of %x and %x returned %x\n", a, b, c); - printf(" The default returned %x\n", d); - exit(1); - } - } - -*/ - /* Now, we also need to double-check, in case the default is wanky, and when - we're performing composite operations. Start with 0 and 1: */ - -/* - if (a == 0 || b == 0 || a == 1 || b == 1) { - tested = 1; - if (((a == 0 || b == 0) && c != 0) || - (a == 1 && c != b) || - (b == 1 && c != a)) { - printf("Error in single multiplication (all numbers in hex):\n\n"); - printf(" gf.multiply.w32() of %x and %x returned %x, which is clearly wrong.\n", a, b, c); - exit(1); - } - -*/ - /* If division or inverses are defined, let's test all combinations to make sure - that the operations are consistent with each other. */ - -/* - } else { - if ((c & mask) != c) { - printf("Error in single multiplication (all numbers in hex):\n\n"); - printf(" gf.multiply.w32() of %x and %x returned %x, which is too big.\n", a, b, c); - exit(1); - } - - } - if (gf.inverse.w32 != NULL && (a != 0 || b != 0)) { - tested = 1; - if (a != 0) { - ai = gf.inverse.w32(&gf, a); - - if (gf.multiply.w32(&gf, c, ai) != b) { - printf("Error in single multiplication (all numbers in hex):\n\n"); - printf(" gf.multiply.w32() of %x and %x returned %x\n", a, b, c); - printf(" The inverse of %x is %x, and gf_multiply.w32() of %x and %x equals %x\n", - a, ai, c, ai, gf.multiply.w32(&gf, c, ai)); - exit(1); - } - } - if (b != 0) { - bi = gf.inverse.w32(&gf, b); - if (gf.multiply.w32(&gf, c, bi) != a) { - printf("Error in single multiplication (all numbers in hex):\n\n"); - printf(" gf.multiply.w32() of %x and %x returned %x\n", a, b, c); - printf(" The inverse of %x is %x, and gf_multiply.w32() of %x and %x equals %x\n", - b, bi, c, bi, gf.multiply.w32(&gf, c, bi)); - exit(1); - } - } - } - if (gf.divide.w32 != NULL && (a != 0 || b != 0)) { - tested = 1; - - if (a != 0) { - ai = gf.divide.w32(&gf, c, a); - - if (ai != b) { - printf("Error in single multiplication/division (all numbers in hex):\n\n"); - printf(" gf.multiply.w32() of %x and %x returned %x\n", a, b, c); - printf(" gf.divide.w32() of %x and %x returned %x\n", c, a, ai); - exit(1); - } - } - if (b != 0) { - bi = gf.divide.w32(&gf, c, b); - - if (bi != a) { - printf("Error in single multiplication/division (all numbers in hex):\n\n"); - printf(" gf.multiply.w32() of %x and %x returned %x\n", a, b, c); - printf(" gf.divide.w32() of %x and %x returned %x\n", c, b, bi); - exit(1); - } - } - } - - if (!tested) problem("There is no way to test multiplication.\n"); - } -*/ - -/* - if (region) { - - if (w == 4) { - if (gf.multiply_region.w32 == NULL) { - printf("No multiply_region.\n"); - } else { - r8b = (uint8_t *) malloc(REGION_SIZE); - r8c = (uint8_t *) malloc(REGION_SIZE); - r8d = (uint8_t *) malloc(REGION_SIZE); - fill_random_region(r8b, REGION_SIZE); - for (xor = 0; xor < 2; xor++) { - if (verbose) { - printf("Testing buffer-constant, src != dest, xor = %d\n", xor); - fflush(stdout); - } - for (a = 0; a < 16; a++) { - fill_random_region(r8c, REGION_SIZE); - memcpy(r8d, r8c, REGION_SIZE); - sindex = MOA_Random_W(3, 1); - eindex = REGION_SIZE/sizeof(uint8_t)-MOA_Random_W(3, 1); - size = (eindex-sindex)*sizeof(uint8_t); - gf.multiply_region.w32(&gf, (void *) (r8b+sindex), (void *) (r8c+sindex), a, size, xor); - for (i = sindex; i < eindex; i++) { - b = (r8b[i] >> 4); - c = (r8c[i] >> 4); - d = (r8d[i] >> 4); - if (!xor && gf.multiply.w32(&gf, a, b) != c) { - printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r8b+i)); - printf(" %d * %d = %d, but should equal %d\n", a, b, c, gf.multiply.w32(&gf, a, b) ); - printf("i=%d. %d %d %d %d\n", i, a, r8b[i], r8c[i], r8d[i]); - problem("Failed buffer-constant, xor=0"); - } - if (xor && (gf.multiply.w32(&gf, a, b) ^ d) != c) { - printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r8b+i)); - printf(" %d %d %d %d\n", a, b, c, d); - printf(" %d %d %d %d\n", a, r8b[i], r8c[i], r8d[i]); - problem("Failed buffer-constant, xor=1"); - } - b = (r8b[i] & 0xf); - c = (r8c[i] & 0xf); - d = (r8d[i] & 0xf); - if (!xor && gf.multiply.w32(&gf, a, b) != c) { - printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r8b+i)); - printf(" %d * %d = %d, but should equal %d\n", a, b, c, gf.multiply.w32(&gf, a, b) ); - printf("i=%d. 0x%x 0x%x 0x%x 0x%x\n", i, a, r8b[i], r8c[i], r8d[i]); - problem("Failed buffer-constant, xor=0"); - } - if (xor && (gf.multiply.w32(&gf, a, b) ^ d) != c) { - printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r8b+i)); - printf(" (%d * %d ^ %d) should equal %d - equals %d\n", - a, b, d, (gf.multiply.w32(&gf, a, b) ^ d), c); - printf(" %d %d %d %d\n", a, r8b[i], r8c[i], r8d[i]); - problem("Failed buffer-constant, xor=1"); - } - } - } - } - for (xor = 0; xor < 2; xor++) { - if (verbose) { - printf("Testing buffer-constant, src == dest, xor = %d\n", xor); - fflush(stdout); - } - for (a = 0; a < 16; a++) { - fill_random_region(r8b, REGION_SIZE); - memcpy(r8d, r8b, REGION_SIZE); - sindex = MOA_Random_W(3, 1); - eindex = REGION_SIZE/sizeof(uint8_t)-MOA_Random_W(3, 1); - size = (eindex-sindex)*sizeof(uint8_t); - gf.multiply_region.w32(&gf, (void *) (r8b+sindex), (void *) (r8b+sindex), a, size, xor); - for (i = sindex; i < eindex; i++) { - b = (r8b[i] >> 4); - d = (r8d[i] >> 4); - if (!xor && gf.multiply.w32(&gf, a, d) != b) problem("Failed buffer-constant, xor=0"); - if (xor && (gf.multiply.w32(&gf, a, d) ^ d) != b) { - printf("i=%d. %d %d %d\n", i, a, b, d); - printf("i=%d. %d %d %d\n", i, a, r8b[i], r8d[i]); - problem("Failed buffer-constant, xor=1"); - } - b = (r8b[i] & 0xf); - d = (r8d[i] & 0xf); - if (!xor && gf.multiply.w32(&gf, a, d) != b) problem("Failed buffer-constant, xor=0"); - if (xor && (gf.multiply.w32(&gf, a, d) ^ d) != b) { - printf("%d %d %d\n", a, b, d); - problem("Failed buffer-constant, xor=1"); - } - } - } - } - free(r8b); - free(r8c); - free(r8d); - } - } else if (w == 8) { - if (gf.multiply_region.w32 == NULL) { - printf("No multiply_region.\n"); - } else { - r8b = (uint8_t *) malloc(REGION_SIZE); - r8c = (uint8_t *) malloc(REGION_SIZE); - r8d = (uint8_t *) malloc(REGION_SIZE); - fill_random_region(r8b, REGION_SIZE); - for (xor = 0; xor < 2; xor++) { - if (verbose) { - printf("Testing buffer-constant, src != dest, xor = %d\n", xor); - fflush(stdout); - } - for (a = 0; a < 256; a++) { - fill_random_region(r8c, REGION_SIZE); - memcpy(r8d, r8c, REGION_SIZE); - if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && - (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { - sindex = 0; - eindex = REGION_SIZE; - } else { - sindex = MOA_Random_W(3, 1); - eindex = REGION_SIZE/sizeof(uint8_t)-MOA_Random_W(3, 1); - } - size = (eindex-sindex)*sizeof(uint8_t); - gf.multiply_region.w32(&gf, (void *) (r8b+sindex), (void *) (r8c+sindex), a, size, xor); - for (i = sindex; i < eindex; i++) { - if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && - (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { - b = get_alt_map_2w8(i, (uint8_t*)r8b, REGION_SIZE / 2); - c = get_alt_map_2w8(i, (uint8_t*)r8c, REGION_SIZE / 2); - d = get_alt_map_2w8(i, (uint8_t*)r8d, REGION_SIZE / 2); - } else { - b = r8b[i]; - c = r8c[i]; - d = r8d[i]; - } - if (!xor && gf.multiply.w32(&gf, a, b) != c) { - printf("i=%d. %d %d %d %d\n", i, a, b, c, d); - printf("i=%d. %d %d %d %d\n", i, a, r8b[i], r8c[i], r8d[i]); - printf("%llx. Sindex: %d\n", r8b+i, sindex); - problem("Failed buffer-constant, xor=0"); - } - if (xor && (gf.multiply.w32(&gf, a, b) ^ d) != c) { - printf("i=%d. %d %d %d %d\n", i, a, b, c, d); - printf("i=%d. %d %d %d %d\n", i, a, r8b[i], r8c[i], r8d[i]); - problem("Failed buffer-constant, xor=1"); - } - } - } - } - for (xor = 0; xor < 2; xor++) { - if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && - (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { - continue; - } - if (verbose) { - printf("Testing buffer-constant, src == dest, xor = %d\n", xor); - fflush(stdout); - } - for (a = 0; a < 256; a++) { - fill_random_region(r8b, REGION_SIZE); - memcpy(r8d, r8b, REGION_SIZE); - sindex = MOA_Random_W(3, 1); - eindex = REGION_SIZE/sizeof(uint8_t)-MOA_Random_W(3, 1); - size = (eindex-sindex)*sizeof(uint8_t); - gf.multiply_region.w32(&gf, (void *) (r8b+sindex), (void *) (r8b+sindex), a, size, xor); - for (i = sindex; i < eindex; i++) { - b = r8b[i]; - d = r8d[i]; - if (!xor && gf.multiply.w32(&gf, a, d) != b) problem("Failed buffer-constant, xor=0"); - if (xor && (gf.multiply.w32(&gf, a, d) ^ d) != b) { - printf("i=%d. %d %d %d\n", i, a, b, d); - printf("i=%d. %d %d %d\n", i, a, r8b[i], r8d[i]); - problem("Failed buffer-constant, xor=1"); - } - } - } - } - free(r8b); - free(r8c); - free(r8d); - } - } else if (w == 16) { - if (gf.multiply_region.w32 == NULL) { - printf("No multiply_region.\n"); - } else { - r16b = (uint16_t *) malloc(REGION_SIZE); - r16c = (uint16_t *) malloc(REGION_SIZE); - r16d = (uint16_t *) malloc(REGION_SIZE); - for (xor = 0; xor < 2; xor++) { - if (verbose) { - printf("Testing buffer-constant, src != dest, xor = %d\n", xor); - fflush(stdout); - } - for (j = 0; j < 1000; j++) { - fill_random_region(r16b, REGION_SIZE); - a = MOA_Random_W(w, 0); - fill_random_region(r16c, REGION_SIZE); - memcpy(r16d, r16c, REGION_SIZE); - if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && - (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { - sindex = 0; - eindex = REGION_SIZE / sizeof(uint16_t); - } else { - sindex = MOA_Random_W(3, 1); - eindex = REGION_SIZE/sizeof(uint16_t)-MOA_Random_W(3, 1); - } - size = (eindex-sindex)*sizeof(uint16_t); - gf.multiply_region.w32(&gf, (void *) (r16b+sindex), (void *) (r16c+sindex), a, size, xor); - ai = gf.inverse.w32(&gf, a); - if (!xor) { - gf.multiply_region.w32(&gf, (void *) (r16c+sindex), (void *) (r16d+sindex), ai, size, xor); - } else { - gf.multiply_region.w32(&gf, (void *) (r16c+sindex), (void *) (r16d+sindex), 1, size, xor); - gf.multiply_region.w32(&gf, (void *) (r16d+sindex), (void *) (r16b+sindex), ai, size, xor); - } - - for (i = sindex; i < eindex; i++) { - if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && - (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { - b = get_alt_map_2w16(i, (uint8_t*)r16b, size / 2); - c = get_alt_map_2w16(i, (uint8_t*)r16c, size / 2); - d = get_alt_map_2w16(i, (uint8_t*)r16d, size / 2); - } else { - b = r16b[i]; - c = r16c[i]; - d = r16d[i]; - } - if (!xor && d != b) { - printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r16b+i)); - printf("We have %d * %d = %d, and %d * %d = %d.\n", a, b, c, c, ai, d); - printf("%d is the inverse of %d\n", ai, a); - problem("Failed buffer-constant, xor=0"); - } - if (xor && b != 0) { - printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r16b+i)); - printf("We did d=c; c ^= ba; d ^= c; b ^= (a^-1)d;\n"); - printf(" b should equal 0, but it doesn't. Probe into it.\n"); - problem("Failed buffer-constant, xor=1"); - } - } - } - } - for (xor = 0; xor < 2; xor++) { - if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && - (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { - continue; - } - if (verbose) { - printf("Testing buffer-constant, src == dest, xor = %d\n", xor); - fflush(stdout); - } - for (j = 0; j < 1000; j++) { - a = MOA_Random_W(w, 0); - fill_random_region(r16b, REGION_SIZE); - memcpy(r16d, r16b, REGION_SIZE); - sindex = MOA_Random_W(3, 1); - eindex = REGION_SIZE/sizeof(uint16_t)-MOA_Random_W(3, 1); - size = (eindex-sindex)*sizeof(uint16_t); - gf.multiply_region.w32(&gf, (void *) (r16b+sindex), (void *) (r16b+sindex), a, size, xor); - ai = gf.inverse.w32(&gf, a); - if (!xor) { - gf.multiply_region.w32(&gf, (void *) (r16b+sindex), (void *) (r16b+sindex), ai, size, xor); - } else { - gf.multiply_region.w32(&gf, (void *) (r16d+sindex), (void *) (r16b+sindex), 1, size, xor); - gf.multiply_region.w32(&gf, (void *) (r16b+sindex), (void *) (r16b+sindex), ai, size, 0); - } - - for (i = sindex; i < eindex; i++) { - b = r16b[i]; - c = r16c[i]; - d = r16d[i]; - if (!xor && (d != b)) { - printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r16b+i)); - printf("We did d=b; b = ba; b = b(a^-1).\n"); - printf("So, b should equal d, but it doesn't. Look into it.\n"); - printf("b = %d. d = %d. a = %d\n", b, d, a); - problem("Failed buffer-constant, xor=0"); - } - if (xor && d != b) { - printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r16b+i)); - printf("We did d=b; b = b + ba; b += d; b = b(a^-1);\n"); - printf("We did d=c; c ^= ba; d ^= c; b ^= (a^-1)d;\n"); - printf("So, b should equal d, but it doesn't. Look into it.\n"); - problem("Failed buffer-constant, xor=1"); - } - } - } - } - free(r16b); - free(r16c); - free(r16d); - } - } else if (w == 32) { - if (gf.multiply_region.w32 == NULL) { - printf("No multiply_region.\n"); - } else { - r32b = (uint32_t *) malloc(REGION_SIZE); - r32c = (uint32_t *) malloc(REGION_SIZE); - r32d = (uint32_t *) malloc(REGION_SIZE); - for (xor = 0; xor < 2; xor++) { - if (verbose) { - printf("Testing buffer-constant, src != dest, xor = %d\n", xor); - fflush(stdout); - } - for (j = 0; j < 1000; j++) { - a = MOA_Random_32(); - fill_random_region(r32b, REGION_SIZE); - fill_random_region(r32c, REGION_SIZE); - memcpy(r32d, r32c, REGION_SIZE); - if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && - (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { - sindex = 0; - eindex = REGION_SIZE / sizeof(uint32_t); - } else { - sindex = MOA_Random_W(3, 1); - eindex = REGION_SIZE/sizeof(uint32_t)-MOA_Random_W(3, 1); - } - size = (eindex-sindex)*sizeof(uint32_t); - gf.multiply_region.w32(&gf, (void *) (r32b+sindex), (void *) (r32c+sindex), a, size, xor); - ai = gf.inverse.w32(&gf, a); - if (!xor) { - gf.multiply_region.w32(&gf, (void *) (r32c+sindex), (void *) (r32d+sindex), ai, size, xor); - } else { - gf.multiply_region.w32(&gf, (void *) (r32c+sindex), (void *) (r32d+sindex), 1, size, xor); - gf.multiply_region.w32(&gf, (void *) (r32d+sindex), (void *) (r32b+sindex), ai, size, xor); - } - for (i = sindex; i < eindex; i++) { - if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && - (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { - b = get_alt_map_2w32(i, (uint8_t*)r32b, size / 2); - c = get_alt_map_2w32(i, (uint8_t*)r32c, size / 2); - d = get_alt_map_2w32(i, (uint8_t*)r32d, size / 2); - i++; - } else { - b = r32b[i]; - c = r32c[i]; - d = r32d[i]; - } - if (!xor && d != b) { - printf("i=%d. Addresses: b: 0x%lx\n", i, (unsigned long) (r32b+i)); - printf("We have %d * %d = %d, and %d * %d = %d.\n", a, b, c, c, ai, d); - printf("%d is the inverse of %d\n", ai, a); - problem("Failed buffer-constant, xor=0"); - } - if (xor && b != 0) { - printf("i=%d. Addresses: b: 0x%lx c: 0x%lx d: 0x%lx\n", i, - (unsigned long) (r32b+i), (unsigned long) (r32c+i), (unsigned long) (r32d+i)); - printf("We did d=c; c ^= ba; d ^= c; b ^= (a^-1)d;\n"); - printf(" b should equal 0, but it doesn't. Probe into it.\n"); - printf("a: %8x b: %8x c: %8x, d: %8x\n", a, b, c, d); - problem("Failed buffer-constant, xor=1"); - } - - } - } - } - for (xor = 0; xor < 2; xor++) { - if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && - (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { - continue; - } - if (verbose) { - printf("Testing buffer-constant, src == dest, xor = %d\n", xor); - fflush(stdout); - } - for (j = 0; j < 1000; j++) { - a = MOA_Random_32(); - fill_random_region(r32b, REGION_SIZE); - memcpy(r32d, r32b, REGION_SIZE); - sindex = MOA_Random_W(3, 1); - eindex = REGION_SIZE/sizeof(uint32_t)-MOA_Random_W(3, 1); - size = (eindex-sindex)*sizeof(uint32_t); - gf.multiply_region.w32(&gf, (void *) (r32b+sindex), (void *) (r32b+sindex), a, size, xor); - ai = gf.inverse.w32(&gf, a); - if (!xor) { - gf.multiply_region.w32(&gf, (void *) (r32b+sindex), (void *) (r32b+sindex), ai, size, xor); - } else { - gf.multiply_region.w32(&gf, (void *) (r32d+sindex), (void *) (r32b+sindex), 1, size, xor); - gf.multiply_region.w32(&gf, (void *) (r32b+sindex), (void *) (r32b+sindex), ai, size, 0); - } - - for (i = sindex; i < eindex; i++) { - b = r32b[i]; - c = r32c[i]; - d = r32d[i]; - if (!xor && (d != b)) { - printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r32b+i)); - printf("We did d=b; b = ba; b = b(a^-1).\n"); - printf("So, b should equal d, but it doesn't. Look into it.\n"); - printf("b = %d. d = %d. a = %d\n", b, d, a); - problem("Failed buffer-constant, xor=0"); - } - if (xor && d != b) { - printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r32b+i)); - printf("We did d=b; b = b + ba; b += d; b = b(a^-1);\n"); - printf("We did d=c; c ^= ba; d ^= c; b ^= (a^-1)d;\n"); - printf("So, b should equal d, but it doesn't. Look into it.\n"); - problem("Failed buffer-constant, xor=1"); - } - } - } - } - free(r32b); - free(r32c); - free(r32d); - } - } else if (w == 64) { - if (gf.multiply_region.w64 == NULL) { - printf("No multiply_region.\n"); - } else { - r64b = (uint64_t *) malloc(REGION_SIZE); - r64c = (uint64_t *) malloc(REGION_SIZE); - r64d = (uint64_t *) malloc(REGION_SIZE); - fill_random_region(r64b, REGION_SIZE); - for (xor = 0; xor < 2; xor++) { - if (verbose) { - printf("Testing buffer-constant, src != dest, xor = %d\n", xor); - fflush(stdout); - } - for (j = 0; j < 1000; j++) { - a64 = MOA_Random_64(); - fill_random_region(r64c, REGION_SIZE); - memcpy(r64d, r64c, REGION_SIZE); - sindex = MOA_Random_W(3, 1); - eindex = REGION_SIZE/sizeof(uint64_t)-MOA_Random_W(3, 1); - size = (eindex-sindex)*sizeof(uint64_t); - gf.multiply_region.w64(&gf, (void *) (r64b+sindex), (void *) (r64c+sindex), a64, size, xor); - for (i = sindex; i < eindex; i++) { - b64 = r64b[i]; - c64 = r64c[i]; - d64 = r64d[i]; - if (!xor && gf.multiply.w64(&gf, a64, b64) != c64) { - printf("i=%d. 0x%llx 0x%llx 0x%llx should be 0x%llx\n", i, a64, b64, c64, - gf.multiply.w64(&gf, a64, b64)); - printf("i=%d. 0x%llx 0x%llx 0x%llx\n", i, a64, r64b[i], r64c[i]); - problem("Failed buffer-constant, xor=0"); - } - if (xor && (gf.multiply.w64(&gf, a64, b64) ^ d64) != c64) { - printf("i=%d. 0x%llx 0x%llx 0x%llx 0x%llx\n", i, a64, b64, c64, d64); - printf("i=%d. 0x%llx 0x%llx 0x%llx 0x%llx\n", i, a64, r64b[i], r64c[i], r64d[i]); - problem("Failed buffer-constant, xor=1"); - } - } - } - } - for (xor = 0; xor < 2; xor++) { - if (verbose) { - printf("Testing buffer-constant, src == dest, xor = %d\n", xor); - fflush(stdout); - } - for (j = 0; j < 1000; j++) { - a64 = MOA_Random_64(); - fill_random_region(r64b, REGION_SIZE); - memcpy(r64d, r64b, REGION_SIZE); - sindex = MOA_Random_W(3, 1); - eindex = REGION_SIZE/sizeof(uint64_t)-MOA_Random_W(3, 1); - size = (eindex-sindex)*sizeof(uint64_t); - gf.multiply_region.w64(&gf, (void *) (r64b+sindex), (void *) (r64b+sindex), a64, size, xor); - for (i = sindex; i < eindex; i++) { - b64 = r64b[i]; - d64 = r64d[i]; - if (!xor && gf.multiply.w64(&gf, a64, d64) != b64) problem("Failed buffer-constant, xor=0"); - if (xor && (gf.multiply.w64(&gf, a64, d64) ^ d64) != b64) { - printf("i=%d. 0x%llx 0x%llx 0x%llx\n", i, a64, b64, d64); - printf("i=%d. 0x%llx 0x%llx 0x%llx\n", i, a64, r64b[i], r64d[i]); - problem("Failed buffer-constant, xor=1"); - } - } - } - } - free(r64b); - free(r64c); - free(r64d); - } - } else if (w == 128) { - if (gf.multiply_region.w128 == NULL) { - printf("No multiply_region.\n"); - } else { - r128b = (uint64_t *) malloc(REGION_SIZE); - r128c = (uint64_t *) malloc(REGION_SIZE); - r128d = (uint64_t *) malloc(REGION_SIZE); - fill_random_region(r128b, REGION_SIZE); - for (xor = 0; xor < 2; xor++) { - if (verbose) { - printf("Testing buffer-constant, src != dest, xor = %d\n", xor); - fflush(stdout); - } - for (j = 0; j < 1000; j++) { - MOA_Random_128(a128); - fill_random_region(r128c, REGION_SIZE); - memcpy(r128d, r128c, REGION_SIZE); - sindex = MOA_Random_W(3, 1); - eindex = REGION_SIZE/(2*sizeof(uint64_t))-MOA_Random_W(3, 1); - size = (eindex-sindex)*sizeof(uint64_t)*2; - gf.multiply_region.w128(&gf, (void *) (r128b+sindex*2), (void *) (r128c+sindex*2), a128, size, xor); - for (i = sindex; i < eindex; i++) { - b128[0] = r128b[2*i]; - b128[1] = r128b[2*i+1]; - c128[0] = r128c[2*i]; - c128[1] = r128c[2*i+1]; - d128[0] = r128d[2*i]; - d128[1] = r128d[2*i+1]; - gf.multiply.w128(&gf, a128, b128, e128); - if (xor) { - e128[0] ^= d128[0]; - e128[1] ^= d128[1]; - } - if (!xor && !GF_W128_EQUAL(c128, e128)) { - printf("i=%d. 0x%llx%llx 0x%llx%llx 0x%llx%llx should be 0x%llx%llx\n", - i, a128[0], a128[1], b128[0], b128[1], c128[0], c128[1], e128[0], e128[1]); - problem("Failed buffer-constant, xor=0"); - } - if (xor && !GF_W128_EQUAL(e128, c128)) { - printf("i=%d. 0x%llx%llx 0x%llx%llx 0x%llx%llx 0x%llx%llx\n", i, - a128[0], a128[1], b128[0], b128[1], c128[0], c128[1], d128[0], d128[1]); - problem("Failed buffer-constant, xor=1"); - } - } - } - } - for (xor = 0; xor < 2; xor++) { - if (verbose) { - printf("Testing buffer-constant, src == dest, xor = %d\n", xor); - fflush(stdout); - } - for (j = 0; j < 1000; j++) { - MOA_Random_128(a128); - fill_random_region(r128b, REGION_SIZE); - memcpy(r128d, r128b, REGION_SIZE); - sindex = MOA_Random_W(3, 1); - sindex = 0; - eindex = REGION_SIZE/(2*sizeof(uint64_t))-MOA_Random_W(3, 1); - eindex = REGION_SIZE/(2*sizeof(uint64_t)); - size = (eindex-sindex)*sizeof(uint64_t)*2; - gf.multiply_region.w128(&gf, (void *) (r128b+sindex), (void *) (r128b+sindex), a128, size, xor); - for (i = sindex; i < eindex; i++) { - b128[0] = r128b[2*i]; - b128[1] = r128b[2*i + 1]; - d128[0] = r128d[2*i]; - d128[1] = r128d[2*i + 1]; - gf.multiply.w128(&gf, a128, d128, e128); - if (xor) { - e128[0] ^= d128[0]; - e128[1] ^= d128[1]; - } - if (!xor && !GF_W128_EQUAL(b128, e128)) problem("Failed buffer-constant, xor=0"); - if (xor && !GF_W128_EQUAL(b128, e128)) { - problem("Failed buffer-constant, xor=1"); - } - } - } - } - free(r128b); - free(r128c); - free(r128d); - } - } - } - exit(0); - */ -} diff --git a/junk-w16-backup.c b/junk-w16-backup.c deleted file mode 100644 index ad0788a..0000000 --- a/junk-w16-backup.c +++ /dev/null @@ -1,1585 +0,0 @@ -/* - * gf_w16.c - * - * Routines for 16-bit Galois fields - */ - -#include "gf_int.h" -#include <stdio.h> -#include <stdlib.h> - -#define GF_FIELD_WIDTH (16) -#define GF_FIELD_SIZE (1 << GF_FIELD_WIDTH) -#define GF_MULT_GROUP_SIZE GF_FIELD_SIZE-1 - -#define GF_BASE_FIELD_WIDTH (8) -#define GF_BASE_FIELD_SIZE (1 << GF_BASE_FIELD_WIDTH) -#define GF_S_GF_8_2 (63) - -struct gf_logtable_data { - int log_tbl[GF_FIELD_SIZE]; - gf_val_16_t antilog_tbl[GF_FIELD_SIZE * 2]; - gf_val_16_t inv_tbl[GF_FIELD_SIZE]; -}; - -struct gf_zero_logtable_data { - int log_tbl[GF_FIELD_SIZE]; - gf_val_16_t _antilog_tbl[GF_FIELD_SIZE * 4]; - gf_val_16_t *antilog_tbl; - gf_val_16_t inv_tbl[GF_FIELD_SIZE]; -}; - -struct gf_lazytable_data { - int log_tbl[GF_FIELD_SIZE]; - gf_val_16_t antilog_tbl[GF_FIELD_SIZE * 2]; - gf_val_16_t inv_tbl[GF_FIELD_SIZE]; - gf_val_16_t lazytable[GF_FIELD_SIZE]; -}; - -struct gf_w8_logtable_data { - gf_val_8_t log_tbl[GF_BASE_FIELD_SIZE]; - gf_val_8_t antilog_tbl[GF_BASE_FIELD_SIZE * 2]; - gf_val_8_t *antilog_tbl_div; -}; - -struct gf_w8_single_table_data { - gf_val_8_t mult[GF_BASE_FIELD_SIZE][GF_BASE_FIELD_SIZE]; - gf_val_8_t div[GF_BASE_FIELD_SIZE][GF_BASE_FIELD_SIZE]; -}; - -struct gf_w8_double_table_data { - gf_val_8_t div[GF_BASE_FIELD_SIZE][GF_BASE_FIELD_SIZE]; - gf_val_8_t mult[GF_BASE_FIELD_SIZE][GF_BASE_FIELD_SIZE*GF_BASE_FIELD_SIZE]; -}; - - -#define MM_PRINT(s, r) { uint8_t blah[16], ii; printf("%-12s", s); _mm_storeu_si128((__m128i *)blah, r); for (ii = 0; ii < 16; ii += 2) printf(" %02x %02x", blah[15-ii], blah[14-ii]); printf("\n"); } - -static -inline -gf_val_16_t gf_w16_inverse_from_divide (gf_t *gf, gf_val_16_t a) -{ - return gf->divide.w16(gf, 1, a); -} - -static -inline -gf_val_16_t gf_w16_divide_from_inverse (gf_t *gf, gf_val_16_t a, gf_val_16_t b) -{ - b = gf->inverse.w16(gf, b); - return gf->multiply.w16(gf, a, b); -} - -static -void -gf_w16_multiply_region_from_single(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor) -{ - int i; - gf_val_16_t *s16; - gf_val_16_t *d16; - - s16 = (gf_val_16_t *) src; - d16 = (gf_val_16_t *) dest; - - if (xor) { - for (i = 0; i < bytes/2; i++) { - d16[i] ^= gf->multiply.w16(gf, val, s16[i]); - } - } else { - for (i = 0; i < bytes/2; i++) { - d16[i] = gf->multiply.w16(gf, val, s16[i]); - } - } -} - -static -inline -gf_val_16_t gf_w16_euclid (gf_t *gf, gf_val_16_t b) -{ - gf_val_32_t e_i, e_im1, e_ip1; - gf_val_32_t d_i, d_im1, d_ip1; - gf_val_16_t y_i, y_im1, y_ip1; - gf_val_16_t c_i; - - if (b == 0) return -1; - e_im1 = ((gf_internal_t *) (gf->scratch))->prim_poly; - e_i = b; - d_im1 = 16; - for (d_i = d_im1; ((1 << d_i) & e_i) == 0; d_i--) ; - y_i = 1; - y_im1 = 0; - - while (e_i != 1) { - - e_ip1 = e_im1; - d_ip1 = d_im1; - c_i = 0; - - while (d_ip1 >= d_i) { - c_i ^= (1 << (d_ip1 - d_i)); - e_ip1 ^= (e_i << (d_ip1 - d_i)); - while ((e_ip1 & (1 << d_ip1)) == 0) d_ip1--; - } - - y_ip1 = y_im1 ^ gf->multiply.w16(gf, c_i, y_i); - y_im1 = y_i; - y_i = y_ip1; - - e_im1 = e_i; - d_im1 = d_i; - e_i = e_ip1; - d_i = d_ip1; - } - - return y_i; -} - -static -inline -gf_val_16_t gf_w16_matrix (gf_t *gf, gf_val_16_t b) -{ - return gf_bitmatrix_inverse(b, 16, ((gf_internal_t *) (gf->scratch))->prim_poly); -} - -/* JSP: GF_MULT_SHIFT: The world's dumbest multiplication algorithm. I only - include it for completeness. It does have the feature that it requires no - extra memory. -*/ - -static -inline -gf_val_16_t -gf_w16_shift_multiply (gf_t *gf, gf_val_16_t a16, gf_val_16_t b16) -{ - uint32_t product, i, pp, a, b; - gf_internal_t *h; - - a = a16; - b = b16; - h = (gf_internal_t *) gf->scratch; - pp = h->prim_poly; - - product = 0; - - for (i = 0; i < GF_FIELD_WIDTH; i++) { - if (a & (1 << i)) product ^= (b << i); - } - for (i = (GF_FIELD_WIDTH*2-1); i >= GF_FIELD_WIDTH; i--) { - if (product & (1 << i)) product ^= (pp << (i-GF_FIELD_WIDTH)); - } - return product; -} - -static -int gf_w16_shift_init(gf_t *gf) -{ - gf->multiply.w16 = gf_w16_shift_multiply; - gf->inverse.w16 = gf_w16_euclid; - gf->multiply_region.w16 = gf_w16_multiply_region_from_single; - return 1; -} - -/* KMG: GF_MULT_LOGTABLE: */ - -static -void -gf_w16_log_multiply_region(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor) -{ - unsigned long uls, uld; - int i; - uint16_t lv, b, c; - uint16_t *s16, *d16; - int num_syms = bytes >> 1; - int sym_divisible = bytes % 2; - - struct gf_logtable_data *ltd; - - uls = (unsigned long) src; - uld = (unsigned long) dest; - if ((uls & 0x7) != (uld & 0x7)) gf_alignment_error("gf_w16_buf_const_log", 2); - if (sym_divisible) { - gf_alignment_error("gf_w16_buf_const_log: buffer size not divisible by symbol size = 2 bytes", 2); - } - - if (val == 0) { - if (xor) return; - bzero(dest, bytes); - return; - } - - ltd = (struct gf_logtable_data *) ((gf_internal_t *) gf->scratch)->private; - s16 = (uint16_t *) src; - d16 = (uint16_t *) dest; - - lv = ltd->log_tbl[val]; - - if (xor) { - for (i = 0; i < num_syms; i++) { - d16[i] ^= (s16[i] == 0 ? 0 : ltd->antilog_tbl[lv + ltd->log_tbl[s16[i]]]); - } - } else { - for (i = 0; i < num_syms; i++) { - d16[i] = (s16[i] == 0 ? 0 : ltd->antilog_tbl[lv + ltd->log_tbl[s16[i]]]); - } - } -} - -static -inline -gf_val_16_t -gf_w16_log_multiply(gf_t *gf, gf_val_16_t a, gf_val_16_t b) -{ - struct gf_logtable_data *ltd; - - ltd = (struct gf_logtable_data *) ((gf_internal_t *) gf->scratch)->private; - return (a == 0 || b == 0) ? 0 : ltd->antilog_tbl[ltd->log_tbl[a] + ltd->log_tbl[b]]; -} - -static -inline -gf_val_16_t -gf_w16_log_divide(gf_t *gf, gf_val_16_t a, gf_val_16_t b) -{ - int log_sum = 0; - struct gf_logtable_data *ltd; - - if (a == 0 || b == 0) return 0; - ltd = (struct gf_logtable_data *) ((gf_internal_t *) gf->scratch)->private; - - log_sum = ltd->log_tbl[a] - ltd->log_tbl[b] + (GF_MULT_GROUP_SIZE); - return (ltd->antilog_tbl[log_sum]); -} - -static -gf_val_16_t -gf_w16_log_inverse(gf_t *gf, gf_val_16_t a) -{ - struct gf_logtable_data *ltd; - - ltd = (struct gf_logtable_data *) ((gf_internal_t *) gf->scratch)->private; - return (ltd->inv_tbl[a]); -} - -static -int gf_w16_log_init(gf_t *gf) -{ - gf_internal_t *h; - struct gf_logtable_data *ltd; - int i, b; - - h = (gf_internal_t *) gf->scratch; - ltd = h->private; - - ltd->log_tbl[0] = 0; - - b = 1; - for (i = 0; i < GF_MULT_GROUP_SIZE; i++) { - ltd->log_tbl[b] = (gf_val_16_t)i; - ltd->antilog_tbl[i] = (gf_val_16_t)b; - ltd->antilog_tbl[i+GF_MULT_GROUP_SIZE] = (gf_val_16_t)b; - b <<= 1; - if (b & GF_FIELD_SIZE) { - b = b ^ h->prim_poly; - } - } - ltd->inv_tbl[0] = 0; /* Not really, but we need to fill it with something */ - ltd->inv_tbl[1] = 1; - for (i = 2; i < GF_FIELD_SIZE; i++) { - ltd->inv_tbl[i] = ltd->antilog_tbl[GF_MULT_GROUP_SIZE-ltd->log_tbl[i]]; - } - - gf->inverse.w16 = gf_w16_log_inverse; - gf->divide.w16 = gf_w16_log_divide; - gf->multiply.w16 = gf_w16_log_multiply; - gf->multiply_region.w16 = gf_w16_log_multiply_region; - - return 1; -} - -/* JSP: GF_MULT_SPLIT_TABLE: Using 8 multiplication tables to leverage SSE instructions. -*/ - -static -void -gf_w16_split_4_16_lazy_multiply_region(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor) -{ - uint64_t i, j, a, c, prod; - uint16_t *s16, *d16, *top; - gf_internal_t *h; - uint16_t table[4][16]; - - h = (gf_internal_t *) gf->scratch; - - for (j = 0; j < 16; j++) { - for (i = 0; i < 4; i++) { - c = (j << (i*4)); - table[i][j] = gf_w16_log_multiply(gf, c, val); - } - } - - s16 = (uint16_t *) src; - d16 = (uint16_t *) dest; - top = (uint16_t *) (dest+bytes); - - while (d16 < top) { - a = *s16; - prod = (xor) ? *d16 : 0; - for (i = 0; i < 4; i++) { - prod ^= table[i][a&0xf]; - a >>= 4; - } - *d16 = prod; - s16++; - d16++; - } -} - -static -void -gf_w16_split_8_16_lazy_multiply_region(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor) -{ - uint64_t j, a, c, prod, *s64, *d64, *top64; - uint16_t *s16, *d16, *top; - gf_internal_t *h; - uint64_t htable[256], ltable[256]; - unsigned long uls, uld; - - h = (gf_internal_t *) gf->scratch; - - uls = ((unsigned long) src) & 0xf; - uld = ((unsigned long) dest) & 0xf; - if (uls != uld || uls % 2 != 0 || bytes % 2 != 0) gf_alignment_error("gf_w16_split_8_16_lazy_multiply_region", 2); - - if (val == 0) { - if (xor) return; - bzero(dest, bytes); - return; - } - - for (j = 0; j < 256; j++) { - ltable[j] = gf_w16_log_multiply(gf, j, val); - htable[j] = gf_w16_log_multiply(gf, (j<<8), val); - } - - s16 = (uint16_t *) src; - d16 = (uint16_t *) dest; - top = (uint16_t *) (dest+bytes); - - if (uls != 0) { - while (uls != 16 && d16 < top) { - a = *s16; - prod = (xor) ? *d16 : 0; - prod ^= ltable[a&0xff]; - a >>= 8; - prod ^= htable[a]; - *d16 = prod; - s16++; - d16++; - uls += 2; - } - if (d16 == top) return; - } - - uls = ((unsigned long) top) & 0xf; - uld = ((unsigned long) top) ^ uls; - top64 = (uint64_t *) uld; - s64 = (uint64_t *) s16; - d64 = (uint64_t *) d16; - -/* Does Unrolling Matter? -- Doesn't seem to. - while (d64 != top64) { - a = *s64; - - prod = htable[a >> 56]; - a <<= 8; - prod ^= ltable[a >> 56]; - a <<= 8; - prod <<= 16; - - prod ^= htable[a >> 56]; - a <<= 8; - prod ^= ltable[a >> 56]; - a <<= 8; - prod <<= 16; - - prod ^= htable[a >> 56]; - a <<= 8; - prod ^= ltable[a >> 56]; - a <<= 8; - prod <<= 16; - - prod ^= htable[a >> 56]; - a <<= 8; - prod ^= ltable[a >> 56]; - prod ^= ((xor) ? *d64 : 0); - *d64 = prod; - *s64++; - *d64++; - } -*/ - - while (d64 != top64) { - a = *s64; - - prod = 0; - for (j = 0; j < 4; j++) { - prod <<= 16; - prod ^= htable[a >> 56]; - a <<= 8; - prod ^= ltable[a >> 56]; - a <<= 8; - } - - prod ^= ((xor) ? *d64 : 0); - *d64 = prod; - *s64++; - *d64++; - } - - - if (uls != 0) { - d16 = (uint16_t *) d64; - s16 = (uint16_t *) s64; - while (d16 < top) { - a = *s16; - prod = (xor) ? *d16 : 0; - prod ^= ltable[a&0xff]; - a >>= 8; - prod ^= htable[a]; - *d16 = prod; - s16++; - d16++; - } - } - return; -} - -static -void -gf_w16_table_lazy_multiply_region(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor) -{ - uint64_t j, a, c, prod, *s64, *d64, *top64, pp; - uint16_t *s16, *d16, *top; - gf_internal_t *h; - struct gf_lazytable_data *ltd; - unsigned long uls, uld; - - h = (gf_internal_t *) gf->scratch; - - uls = ((unsigned long) src) & 0xf; - uld = ((unsigned long) dest) & 0xf; - if (uls != uld || uls % 2 != 0 || bytes % 2 != 0) gf_alignment_error("gf_w16_table_lazy_multiply_region", 2); - - if (val == 0) { - if (xor) return; - bzero(dest, bytes); - return; - } - - ltd = (struct gf_lazytable_data *) h->private; - - ltd->lazytable[0] = 0; - a = val; - c = 1; - pp = h->prim_poly; - - do { - ltd->lazytable[c] = a; - c <<= 1; - if (c & (1 << GF_FIELD_WIDTH)) c ^= pp; - a <<= 1; - if (a & (1 << GF_FIELD_WIDTH)) a ^= pp; - } while (c != 1); - - s16 = (uint16_t *) src; - d16 = (uint16_t *) dest; - top = (uint16_t *) (dest+bytes); - - if (uls != 0) { - while (uls != 16 && d16 < top) { - prod = (xor) ? *d16 : 0; - prod ^= ltd->lazytable[*s16]; - *d16 = prod; - s16++; - d16++; - uls += 2; - } - if (d16 == top) return; - } - - uls = ((unsigned long) top) & 0xf; - uld = ((unsigned long) top) ^ uls; - top64 = (uint64_t *) uld; - s64 = (uint64_t *) s16; - d64 = (uint64_t *) d16; - - /* Unrolling doesn't seem to matter - while (d64 != top64) { - a = *s64; - - prod = ltd->lazytable[a >> 48]; - a <<= 16; - prod <<= 16; - - prod ^= ltd->lazytable[a >> 48]; - a <<= 16; - prod <<= 16; - - prod ^= ltd->lazytable[a >> 48]; - a <<= 16; - prod <<= 16; - - prod ^= ltd->lazytable[a >> 48]; - - prod ^= ((xor) ? *d64 : 0); - *d64 = prod; - *s64++; - *d64++; - } - */ - - while (d64 != top64) { - a = *s64; - - prod = 0; - for (j = 0; j < 4; j++) { - prod <<= 16; - prod ^= ltd->lazytable[a >> 48]; - a <<= 16; - } - prod ^= ((xor) ? *d64 : 0); - *d64 = prod; - *s64++; - *d64++; - } - - if (uls != 0) { - d16 = (uint16_t *) d64; - s16 = (uint16_t *) s64; - while (d16 < top) { - prod = (xor) ? *d16 : 0; - prod ^= ltd->lazytable[*s16]; - *d16 = prod; - s16++; - d16++; - } - } - return; -} - -static -void -gf_w16_split_4_16_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor) -{ -#ifdef INTEL_SSE4 - uint64_t i, j, *s64, *d64, *top64;; - uint64_t a, c, prod; - uint16_t *s16, *d16, *top; - uint8_t low[4][16]; - uint8_t high[4][16]; - unsigned long uls, uld; - - __m128i mask, ta, tb, ti, tpl, tph, tlow[4], thigh[4], shuffler, unshuffler, tta, ttb; - - struct gf_single_table_data *std; - - uls = ((unsigned long) src) & 0xf; - uld = ((unsigned long) dest) & 0xf; - if (uls != uld || uls % 2 != 0 || bytes % 2 != 0) gf_alignment_error("gf_w16_split_4_16_lazy_sse_altmap_multiply_region", 2); - - if (val == 0) { - if (xor) return; - bzero(dest, bytes); - return; - } - - for (j = 0; j < 16; j++) { - for (i = 0; i < 4; i++) { - c = (j << (i*4)); - prod = gf_w16_log_multiply(gf, c, val); - low[i][j] = (prod & 0xff); - high[i][j] = (prod >> 8); - } - } - - s16 = (uint16_t *) src; - d16 = (uint16_t *) dest; - top = (uint16_t *) (dest+bytes); - - if (uls != 0) { - while (uls != 16 && d16 < top) { - a = *s16; - prod = (xor) ? *d16 : 0; - for (i = 0; i < 4; i++) { - c = a & 0xf; - prod ^= low[i][c]; - prod ^= (high[i][c] << 8); - a >>= 4; - } - *d16 = prod; - s16++; - d16++; - uls += 2; - } - if (d16 == top) return; - } - - for (i = 0; i < 4; i++) { - tlow[i] = _mm_loadu_si128((__m128i *)low[i]); - thigh[i] = _mm_loadu_si128((__m128i *)high[i]); - } - - uls = ((unsigned long) top); - uld = ((unsigned long) d16); - bytes = (uls - uld); - if ((bytes & 0x1f) != 0) bytes -= (bytes & 0x1f); - - top64 = (uint64_t *) (uld + bytes); - s64 = (uint64_t *) s16; - d64 = (uint64_t *) d16; - mask = _mm_set1_epi8 (0x0f); - shuffler = _mm_set_epi8(0xf, 0xd, 0xb, 0x9, 7, 5, 3, 1, 0xe, 0xc, 0xa, 8, 6, 4, 2, 0); - unshuffler = _mm_set_epi8(0xf, 7, 0xe, 6, 0xd, 5, 0xc, 4, 0xb, 3, 0xa, 2, 9, 1, 8, 0); - - if (xor) { - while (d64 != top64) { - - ta = _mm_load_si128((__m128i *) s64); - MM_PRINT("Ta", ta); - tb = _mm_load_si128((__m128i *) (s64+2)); - MM_PRINT("Tb", tb); - tta = _mm_shuffle_epi8(ta, shuffler); - ttb = _mm_shuffle_epi8(tb, shuffler); - ta = _mm_unpackhi_epi64(ttb, tta); - MM_PRINT("New ta", ta); - tb = _mm_unpacklo_epi64(ttb, tta); - MM_PRINT("New tb", tb); - exit(0); - - - ti = _mm_and_si128 (mask, tb); - tph = _mm_shuffle_epi8 (thigh[0], ti); - tpl = _mm_shuffle_epi8 (tlow[0], ti); - - tb = _mm_srli_epi16(tb, 4); - ti = _mm_and_si128 (mask, tb); - tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[1], ti), tpl); - tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[1], ti), tph); - - ti = _mm_and_si128 (mask, ta); - tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[2], ti), tpl); - tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[2], ti), tph); - - ta = _mm_srli_epi16(ta, 4); - ti = _mm_and_si128 (mask, ta); - tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[3], ti), tpl); - tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[3], ti), tph); - - tta = _mm_unpackhi_epi64(tpl, tph); - ttb = _mm_unpacklo_epi64(tpl, tph); - ta = _mm_shuffle_epi8(tta, unshuffler); - tb = _mm_shuffle_epi8(ttb, unshuffler); - tta = _mm_load_si128((__m128i *) d64); - ta = _mm_xor_si128(ta, tta); - ttb = _mm_load_si128((__m128i *) (d64+2)); - tb = _mm_xor_si128(tb, ttb); - _mm_store_si128 ((__m128i *)d64, ta); - _mm_store_si128 ((__m128i *)(d64+2), tb); - - d64 += 4; - s64 += 4; - - } - } else { - while (d64 != top64) { - - ta = _mm_load_si128((__m128i *) s64); - tb = _mm_load_si128((__m128i *) (s64+2)); - tta = _mm_shuffle_epi8(ta, shuffler); - ttb = _mm_shuffle_epi8(tb, shuffler); - ta = _mm_unpackhi_epi64(ttb, tta); - tb = _mm_unpacklo_epi64(ttb, tta); - - ti = _mm_and_si128 (mask, tb); - tph = _mm_shuffle_epi8 (thigh[0], ti); - tpl = _mm_shuffle_epi8 (tlow[0], ti); - - tb = _mm_srli_epi16(tb, 4); - ti = _mm_and_si128 (mask, tb); - tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[1], ti), tpl); - tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[1], ti), tph); - - ti = _mm_and_si128 (mask, ta); - tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[2], ti), tpl); - tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[2], ti), tph); - - ta = _mm_srli_epi16(ta, 4); - ti = _mm_and_si128 (mask, ta); - tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[3], ti), tpl); - tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[3], ti), tph); - - tta = _mm_unpackhi_epi64(tpl, tph); - ttb = _mm_unpacklo_epi64(tpl, tph); - ta = _mm_shuffle_epi8(tta, unshuffler); - tb = _mm_shuffle_epi8(ttb, unshuffler); - _mm_store_si128 ((__m128i *)d64, ta); - _mm_store_si128 ((__m128i *)(d64+2), tb); - - d64 += 4; - s64 += 4; - } - } - - d16 = (uint16_t *) d64; - s16 = (uint16_t *) s64; - - while (d16 != top) { - a = *s16; - prod = (xor) ? *d16 : 0; - for (i = 0; i < 4; i++) { - c = a & 0xf; - prod ^= low[i][c]; - prod ^= (high[i][c] << 8); - a >>= 4; - } - *d16 = prod; - s16++; - d16++; - } -#endif -} - -/* -static -void -gf_w16_split_4_16_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor) -{ -#ifdef INTEL_SSE4 - uint64_t i, j, *s64, *d64, *top64;; - uint64_t a, c, prod; - uint16_t *s16, *d16, *top; - uint8_t low[4][16]; - uint8_t high[4][16]; - unsigned long uls, uld; - - __m128i mask, ta, ti, tp, tlow[4], thigh[4]; - - struct gf_single_table_data *std; - - uls = ((unsigned long) src) & 0xf; - uld = ((unsigned long) dest) & 0xf; - if (uls != uld || uls % 2 != 0 || bytes % 2 != 0) gf_alignment_error("gf_w16_split_4_16_lazy_sse_multiply_region", 2); - - if (val == 0) { - if (xor) return; - bzero(dest, bytes); - return; - } - - for (j = 0; j < 16; j++) { - for (i = 0; i < 4; i++) { - c = (j << (i*4)); - prod = gf_w16_log_multiply(gf, c, val); - low[i][j] = (prod & 0xff); - high[i][j] = (prod >> 8); - } - } - - s16 = (uint16_t *) src; - d16 = (uint16_t *) dest; - top = (uint16_t *) (dest+bytes); - - if (uls != 0) { - while (uls != 16 && d16 < top) { - a = *s16; - prod = (xor) ? *d16 : 0; - for (i = 0; i < 4; i++) { - c = a & 0xf; - prod ^= low[i][c]; - prod ^= (high[i][c] << 8); - a >>= 4; - } - *d16 = prod; - s16++; - d16++; - uls += 2; - } - if (d16 == top) return; - } - - for (i = 0; i < 4; i++) { - tlow[i] = _mm_loadu_si128((__m128i *)low[i]); - thigh[i] = _mm_loadu_si128((__m128i *)high[i]); - } - - uls = ((unsigned long) top) & 0xf; - uld = ((unsigned long) top) ^ uls; - top64 = (uint64_t *) uld; - s64 = (uint64_t *) s16; - d64 = (uint64_t *) d16; - mask = _mm_set1_epi16 (0x0f); - - if (xor) { - while (d64 != top64) { - ta = _mm_load_si128((__m128i *) s64); - ti = _mm_and_si128 (mask, ta); - tp = _mm_shuffle_epi8 (tlow[0], ti); - ti = _mm_slli_epi16 (ti, 8); - ti = _mm_shuffle_epi8 (thigh[0], ti); - tp = _mm_xor_si128 (tp, ti); - - ta = _mm_srli_epi16(ta, 4); - ti = _mm_and_si128 (mask, ta); - tp = _mm_xor_si128(_mm_shuffle_epi8 (tlow[1], ti), tp); - ti = _mm_slli_epi16 (ti, 8); - ti = _mm_shuffle_epi8 (thigh[1], ti); - tp = _mm_xor_si128 (tp, ti); - - ta = _mm_srli_epi16(ta, 4); - ti = _mm_and_si128 (mask, ta); - tp = _mm_xor_si128(_mm_shuffle_epi8 (tlow[2], ti), tp); - ti = _mm_slli_epi16 (ti, 8); - ti = _mm_shuffle_epi8 (thigh[2], ti); - tp = _mm_xor_si128 (tp, ti); - - ti = _mm_srli_epi16(ta, 4); - tp = _mm_xor_si128(_mm_shuffle_epi8 (tlow[3], ti), tp); - ti = _mm_slli_epi16 (ti, 8); - ti = _mm_shuffle_epi8 (thigh[3], ti); - tp = _mm_xor_si128 (tp, ti); - ti = _mm_load_si128((__m128i *)d64); - tp = _mm_xor_si128 (tp, ti); - _mm_store_si128 ((__m128i *)d64, tp); - s64 += 2; - d64 += 2; - } - } else { - while (d64 != top64) { - ta = _mm_load_si128((__m128i *) s64); - ti = _mm_and_si128 (mask, ta); - tp = _mm_shuffle_epi8 (tlow[0], ti); - ti = _mm_slli_epi16 (ti, 8); - ti = _mm_shuffle_epi8 (thigh[0], ti); - tp = _mm_xor_si128 (tp, ti); - - ta = _mm_srli_epi16(ta, 4); - ti = _mm_and_si128 (mask, ta); - tp = _mm_xor_si128(_mm_shuffle_epi8 (tlow[1], ti), tp); - ti = _mm_slli_epi16 (ti, 8); - ti = _mm_shuffle_epi8 (thigh[1], ti); - tp = _mm_xor_si128 (tp, ti); - - ta = _mm_srli_epi16(ta, 4); - ti = _mm_and_si128 (mask, ta); - tp = _mm_xor_si128(_mm_shuffle_epi8 (tlow[2], ti), tp); - ti = _mm_slli_epi16 (ti, 8); - ti = _mm_shuffle_epi8 (thigh[2], ti); - tp = _mm_xor_si128 (tp, ti); - - ti = _mm_srli_epi16(ta, 4); - tp = _mm_xor_si128(_mm_shuffle_epi8 (tlow[3], ti), tp); - ti = _mm_slli_epi16 (ti, 8); - ti = _mm_shuffle_epi8 (thigh[3], ti); - tp = _mm_xor_si128 (tp, ti); - _mm_store_si128 ((__m128i *)d64, tp); - s64 += 2; - d64 += 2; - } - } - - d16 = (uint16_t *) d64; - s16 = (uint16_t *) s64; - - while (d16 != top) { - a = *s16; - prod = (xor) ? *d16 : 0; - for (i = 0; i < 4; i++) { - c = a & 0xf; - prod ^= low[i][c]; - prod ^= (high[i][c] << 8); - a >>= 4; - } - *d16 = prod; - s16++; - d16++; - } -#endif -} -*/ - - -static -void -gf_w16_split_4_16_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor) -{ -#ifdef INTEL_SSE4 - uint64_t i, j, *s64, *d64, *top64;; - uint64_t a, c, prod; - uint16_t *s16, *d16, *top; - uint8_t low[4][16]; - uint8_t high[4][16]; - unsigned long uls, uld; - - __m128i mask, ta, tb, ti, tpl, tph, tlow[4], thigh[4]; - - struct gf_single_table_data *std; - - uls = ((unsigned long) src) & 0xf; - uld = ((unsigned long) dest) & 0xf; - if (uls != uld || uls % 2 != 0 || bytes % 2 != 0) gf_alignment_error("gf_w16_split_4_16_lazy_sse_altmap_multiply_region", 2); - - if (val == 0) { - if (xor) return; - bzero(dest, bytes); - return; - } - - for (j = 0; j < 16; j++) { - for (i = 0; i < 4; i++) { - c = (j << (i*4)); - prod = gf_w16_log_multiply(gf, c, val); - low[i][j] = (prod & 0xff); - high[i][j] = (prod >> 8); - } - } - - s16 = (uint16_t *) src; - d16 = (uint16_t *) dest; - top = (uint16_t *) (dest+bytes); - - if (uls != 0) { - while (uls != 16 && d16 < top) { - a = *s16; - prod = (xor) ? *d16 : 0; - for (i = 0; i < 4; i++) { - c = a & 0xf; - prod ^= low[i][c]; - prod ^= (high[i][c] << 8); - a >>= 4; - } - *d16 = prod; - s16++; - d16++; - uls += 2; - } - if (d16 == top) return; - } - - for (i = 0; i < 4; i++) { - tlow[i] = _mm_loadu_si128((__m128i *)low[i]); - thigh[i] = _mm_loadu_si128((__m128i *)high[i]); - } - - uls = ((unsigned long) top); - uld = ((unsigned long) d16); - bytes = (uls - uld); - if ((bytes & 0x1f) != 0) bytes -= (bytes & 0x1f); - - top64 = (uint64_t *) (uld + bytes); - s64 = (uint64_t *) s16; - d64 = (uint64_t *) d16; - mask = _mm_set1_epi8 (0x0f); - - if (xor) { - while (d64 != top64) { - - ta = _mm_load_si128((__m128i *) s64); - tb = _mm_load_si128((__m128i *) (s64+2)); - - ti = _mm_and_si128 (mask, tb); - tph = _mm_shuffle_epi8 (thigh[0], ti); - tpl = _mm_shuffle_epi8 (tlow[0], ti); - - tb = _mm_srli_epi16(tb, 4); - ti = _mm_and_si128 (mask, tb); - tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[1], ti), tpl); - tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[1], ti), tph); - - ti = _mm_and_si128 (mask, ta); - tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[2], ti), tpl); - tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[2], ti), tph); - - ta = _mm_srli_epi16(ta, 4); - ti = _mm_and_si128 (mask, ta); - tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[3], ti), tpl); - tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[3], ti), tph); - - ta = _mm_load_si128((__m128i *) d64); - tph = _mm_xor_si128(tph, ta); - _mm_store_si128 ((__m128i *)d64, tph); - tb = _mm_load_si128((__m128i *) (d64+2)); - tpl = _mm_xor_si128(tpl, tb); - _mm_store_si128 ((__m128i *)(d64+2), tpl); - - d64 += 4; - s64 += 4; - } - } else { - while (d64 != top64) { - - ta = _mm_load_si128((__m128i *) s64); - tb = _mm_load_si128((__m128i *) (s64+2)); - - ti = _mm_and_si128 (mask, tb); - tph = _mm_shuffle_epi8 (thigh[0], ti); - tpl = _mm_shuffle_epi8 (tlow[0], ti); - - tb = _mm_srli_epi16(tb, 4); - ti = _mm_and_si128 (mask, tb); - tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[1], ti), tpl); - tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[1], ti), tph); - - ti = _mm_and_si128 (mask, ta); - tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[2], ti), tpl); - tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[2], ti), tph); - - ta = _mm_srli_epi16(ta, 4); - ti = _mm_and_si128 (mask, ta); - tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[3], ti), tpl); - tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[3], ti), tph); - - _mm_store_si128 ((__m128i *)d64, tph); - _mm_store_si128 ((__m128i *)(d64+2), tpl); - - d64 += 4; - s64 += 4; - - } - } - - d16 = (uint16_t *) d64; - s16 = (uint16_t *) s64; - - while (d16 != top) { - a = *s16; - prod = (xor) ? *d16 : 0; - for (i = 0; i < 4; i++) { - c = a & 0xf; - prod ^= low[i][c]; - prod ^= (high[i][c] << 8); - a >>= 4; - } - *d16 = prod; - s16++; - d16++; - } -#endif -} - -static -int gf_w16_split_init(gf_t *gf) -{ - gf_internal_t *h; - gf_w16_log_init(gf); - - h = (gf_internal_t *) gf->scratch; - if (h->arg1 == 8 || h->arg2 == 8) { - gf->multiply_region.w16 = gf_w16_split_8_16_lazy_multiply_region; - } else if (h->arg1 == 4 || h->arg2 == 4) { - if (h->region_type & GF_REGION_SSE) { - if (h->region_type & GF_REGION_ALTMAP) { - gf->multiply_region.w16 = gf_w16_split_4_16_lazy_sse_altmap_multiply_region; - } else { - gf->multiply_region.w16 = gf_w16_split_4_16_lazy_sse_multiply_region; - } - } else { - gf->multiply_region.w16 = gf_w16_split_4_16_lazy_multiply_region; - } - } - return 1; -} - -static -int gf_w16_table_init(gf_t *gf) -{ - gf_internal_t *h; - gf_w16_log_init(gf); - - h = (gf_internal_t *) gf->scratch; - gf->multiply_region.w16 = NULL; - gf->multiply_region.w16 = gf_w16_table_lazy_multiply_region; - return 1; -} - -static -void -gf_w16_log_zero_multiply_region(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor) -{ - unsigned long uls, uld; - int i; - uint16_t lv, b, c; - uint16_t *s16, *d16; - int num_syms = bytes >> 1; - int sym_divisible = bytes % 2; - - struct gf_zero_logtable_data *ltd; - - uls = (unsigned long) src; - uld = (unsigned long) dest; - if ((uls & 0x7) != (uld & 0x7)) gf_alignment_error("gf_w16_buf_const_log", 2); - if (sym_divisible) { - gf_alignment_error("gf_w16_buf_const_log: buffer size not divisible by symbol size = 2 bytes", 2); - } - - if (val == 0) { - if (xor) return; - bzero(dest, bytes); - return; - } - - ltd = (struct gf_zero_logtable_data*) ((gf_internal_t *) gf->scratch)->private; - s16 = (uint16_t *) src; - d16 = (uint16_t *) dest; - - lv = ltd->log_tbl[val]; - - if (xor) { - for (i = 0; i < num_syms; i++) { - d16[i] ^= ltd->antilog_tbl[lv + ltd->log_tbl[s16[i]]]; - } - } else { - for (i = 0; i < num_syms; i++) { - d16[i] = ltd->antilog_tbl[lv + ltd->log_tbl[s16[i]]]; - } - } -} - -static -inline -gf_val_16_t -gf_w16_log_zero_multiply (gf_t *gf, gf_val_16_t a, gf_val_16_t b) -{ - struct gf_zero_logtable_data *ltd; - - ltd = (struct gf_zero_logtable_data *) ((gf_internal_t *) gf->scratch)->private; - return ltd->antilog_tbl[ltd->log_tbl[a] + ltd->log_tbl[b]]; -} - -static -inline -gf_val_16_t -gf_w16_log_zero_divide (gf_t *gf, gf_val_16_t a, gf_val_16_t b) -{ - int log_sum = 0; - struct gf_zero_logtable_data *ltd; - - if (a == 0 || b == 0) return 0; - ltd = (struct gf_zero_logtable_data *) ((gf_internal_t *) gf->scratch)->private; - - log_sum = ltd->log_tbl[a] - ltd->log_tbl[b] + (GF_MULT_GROUP_SIZE); - return (ltd->antilog_tbl[log_sum]); -} - -static -gf_val_16_t -gf_w16_log_zero_inverse (gf_t *gf, gf_val_16_t a) -{ - struct gf_zero_logtable_data *ltd; - - ltd = (struct gf_zero_logtable_data *) ((gf_internal_t *) gf->scratch)->private; - return (ltd->inv_tbl[a]); -} - -static -int gf_w16_log_zero_init(gf_t *gf) -{ - gf_internal_t *h; - struct gf_zero_logtable_data *ltd; - int i, b; - - h = (gf_internal_t *) gf->scratch; - ltd = h->private; - - ltd->log_tbl[0] = (-GF_MULT_GROUP_SIZE) + 1; - - bzero(&(ltd->_antilog_tbl[0]), sizeof(ltd->_antilog_tbl)); - - ltd->antilog_tbl = &(ltd->_antilog_tbl[GF_FIELD_SIZE * 2]); - - b = 1; - for (i = 0; i < GF_MULT_GROUP_SIZE; i++) { - ltd->log_tbl[b] = (gf_val_16_t)i; - ltd->antilog_tbl[i] = (gf_val_16_t)b; - ltd->antilog_tbl[i+GF_MULT_GROUP_SIZE] = (gf_val_16_t)b; - b <<= 1; - if (b & GF_FIELD_SIZE) { - b = b ^ h->prim_poly; - } - } - ltd->inv_tbl[0] = 0; /* Not really, but we need to fill it with something */ - ltd->inv_tbl[1] = 1; - for (i = 2; i < GF_FIELD_SIZE; i++) { - ltd->inv_tbl[i] = ltd->antilog_tbl[GF_MULT_GROUP_SIZE-ltd->log_tbl[i]]; - } - - gf->inverse.w16 = gf_w16_log_zero_inverse; - gf->divide.w16 = gf_w16_log_zero_divide; - gf->multiply.w16 = gf_w16_log_zero_multiply; - gf->multiply_region.w16 = gf_w16_log_zero_multiply_region; - return 1; -} - -static -gf_val_16_t -gf_w16_composite_multiply(gf_t *gf, gf_val_16_t a, gf_val_16_t b) -{ - gf_internal_t *h = (gf_internal_t *) gf->scratch; - gf_t *base_gf = h->base_gf; - uint8_t b0 = b & 0x00ff; - uint8_t b1 = (b & 0xff00) >> 8; - uint8_t a0 = a & 0x00ff; - uint8_t a1 = (a & 0xff00) >> 8; - uint8_t a1b1; - - a1b1 = base_gf->multiply.w8(base_gf, a1, b1); - - return ((base_gf->multiply.w8(base_gf, a0, b0) ^ a1b1) | ((base_gf->multiply.w8(base_gf, a1, b0) ^ base_gf->multiply.w8(base_gf, a0, b1) ^ base_gf->multiply.w8(base_gf, a1b1, GF_S_GF_8_2)) << 8)); -} - -/* - * Composite field division trick (explained in 2007 tech report) - * - * Compute a / b = a*b^-1, where p(x) = x^2 + sx + 1 - * - * let c = b^-1 - * - * c*b = (s*b1c1+b1c0+b0c1)x+(b1c1+b0c0) - * - * want (s*b1c1+b1c0+b0c1) = 0 and (b1c1+b0c0) = 1 - * - * let d = b1c1 and d+1 = b0c0 - * - * solve s*b1c1+b1c0+b0c1 = 0 - * - * solution: d = (b1b0^-1)(b1b0^-1+b0b1^-1+s)^-1 - * - * c0 = (d+1)b0^-1 - * c1 = d*b1^-1 - * - * a / b = a * c - */ -static -gf_val_16_t -gf_w16_composite_inverse(gf_t *gf, gf_val_16_t a) -{ - gf_internal_t *h = (gf_internal_t *) gf->scratch; - gf_t *base_gf = h->base_gf; - uint8_t a0 = a & 0x00ff; - uint8_t a1 = (a & 0xff00) >> 8; - uint8_t c0, c1, d, tmp; - uint16_t c; - uint8_t a0inv, a1inv; - - if (a0 == 0) { - a1inv = base_gf->inverse.w8(base_gf, a1); - c0 = base_gf->multiply.w8(base_gf, a1inv, GF_S_GF_8_2); - c1 = a1inv; - } else if (a1 == 0) { - c0 = base_gf->inverse.w8(base_gf, a0); - c1 = 0; - } else { - a1inv = base_gf->inverse.w8(base_gf, a1); - a0inv = base_gf->inverse.w8(base_gf, a0); - - d = base_gf->multiply.w8(base_gf, a1, a0inv); - - tmp = (base_gf->multiply.w8(base_gf, a1, a0inv) ^ base_gf->multiply.w8(base_gf, a0, a1inv) ^ GF_S_GF_8_2); - tmp = base_gf->inverse.w8(base_gf, tmp); - - d = base_gf->multiply.w8(base_gf, d, tmp); - - c0 = base_gf->multiply.w8(base_gf, (d^1), a0inv); - c1 = base_gf->multiply.w8(base_gf, d, a1inv); - } - - c = c0 | (c1 << 8); - - return c; -} - -static -gf_val_16_t -gf_w16_composite_divide(gf_t *gf, gf_val_16_t a, gf_val_16_t b) -{ - gf_val_16_t binv; - - binv = gf_w16_composite_inverse(gf, b); - - return gf_w16_composite_multiply(gf, a, binv); -} - -static -void -gf_w16_composite_multiply_region_table(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor) -{ - unsigned long uls, uld; - gf_internal_t *h = (gf_internal_t *) gf->scratch; - gf_t *base_gf = h->base_gf; - int i=0; - struct gf_w8_single_table_data * std; - uint8_t b0 = val & 0x00ff; - uint8_t b1 = (val & 0xff00) >> 8; - uint16_t *s16 = (uint16_t *) src; - uint16_t *d16 = (uint16_t *) dest; - uint8_t a0, a1, a1b1; - int num_syms = bytes >> 1; - int sym_divisible = bytes % 2; - - struct gf_logtable_data *ltd; - - uls = (unsigned long) src; - uld = (unsigned long) dest; - if ((uls & 0x7) != (uld & 0x7)) gf_alignment_error("gf_w16_buf_const_log", 2); - if (sym_divisible) { - gf_alignment_error("gf_w16_buf_const_log: buffer size not divisible by symbol size = 2 bytes", 2); - } - - if (val == 0) { - if (xor) return; - bzero(dest, bytes); - return; - } - - std = (struct gf_w8_single_table_data *) h->private; - - if (xor) { - for (i = 0;i < num_syms; i++) { - a0 = s16[i] & 0x00ff; - a1 = (s16[i] & 0xff00) >> 8; - a1b1 = std->mult[a1][b1]; - - d16[i] ^= ((std->mult[a0][b0] ^ a1b1) | ((std->mult[a1][b0] ^ std->mult[a0][b1] ^ std->mult[a1b1][GF_S_GF_8_2]) << 8)); - - } - } else { - for (i = 0;i < num_syms; i++) { - a0 = s16[i] & 0x00ff; - a1 = (s16[i] & 0xff00) >> 8; - a1b1 = std->mult[a1][b1]; - - d16[i] = ((std->mult[a0][b0] ^ a1b1) | ((std->mult[a1][b0] ^ std->mult[a0][b1] ^ std->mult[a1b1][GF_S_GF_8_2]) << 8)); - } - } -} - -static -void -gf_w16_composite_multiply_region(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor) -{ - unsigned long uls, uld; - gf_internal_t *h = (gf_internal_t *) gf->scratch; - gf_t *base_gf = h->base_gf; - int i=0; - struct gf_w8_single_table_data * std; - uint8_t b0 = val & 0x00ff; - uint8_t b1 = (val & 0xff00) >> 8; - uint16_t *s16 = (uint16_t *) src; - uint16_t *d16 = (uint16_t *) dest; - uint8_t a0, a1, a1b1; - int num_syms = bytes >> 1; - int sym_divisible = bytes % 2; - - struct gf_logtable_data *ltd; - - uls = (unsigned long) src; - uld = (unsigned long) dest; - if ((uls & 0x7) != (uld & 0x7)) gf_alignment_error("gf_w16_buf_const_log", 2); - if (sym_divisible) { - gf_alignment_error("gf_w16_buf_const_log: buffer size not divisible by symbol size = 2 bytes", 2); - } - - if (val == 0) { - if (xor) return; - bzero(dest, bytes); - return; - } - - std = (struct gf_w8_single_table_data *) h->private; - - if (xor) { - for (i = 0;i < num_syms; i++) { - a0 = s16[i] & 0x00ff; - a1 = (s16[i] & 0xff00) >> 8; - a1b1 = std->mult[a1][b1]; - - d16[i] ^= ((base_gf->multiply.w8(base_gf, a0, b0) ^ a1b1) | - ((base_gf->multiply.w8(base_gf, a1, b0) ^ base_gf->multiply.w8(base_gf, a0, b1) ^ base_gf->multiply.w8(base_gf, a1b1, GF_S_GF_8_2)) << 8)); - - } - } else { - for (i = 0;i < num_syms; i++) { - a0 = s16[i] & 0x00ff; - a1 = (s16[i] & 0xff00) >> 8; - a1b1 = std->mult[a1][b1]; - - d16[i] = ((base_gf->multiply.w8(base_gf, a0, b0) ^ a1b1) | - ((base_gf->multiply.w8(base_gf, a1, b0) ^ base_gf->multiply.w8(base_gf, a0, b1) ^ base_gf->multiply.w8(base_gf, a1b1, GF_S_GF_8_2)) << 8)); - } - } -} - -static -void -gf_w16_composite_multiply_region_alt(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor) -{ - gf_internal_t *h = (gf_internal_t *) gf->scratch; - gf_t *base_gf = h->base_gf; - gf_val_8_t val0 = val & 0x00ff; - gf_val_8_t val1 = (val & 0xff00) >> 8; - int sub_reg_size = bytes / 2; - - if (!xor) { - memset(dest, 0, bytes); - } - - if (bytes % 2 != 0) gf_alignment_error("gf_w8_composite_multiply_region_alt", 1); - - base_gf->multiply_region.w8(base_gf, src, dest, val0, sub_reg_size, xor); - base_gf->multiply_region.w8(base_gf, src+sub_reg_size, dest, val1, sub_reg_size, 1); - base_gf->multiply_region.w8(base_gf, src, dest+sub_reg_size, val1, sub_reg_size, xor); - base_gf->multiply_region.w8(base_gf, src+sub_reg_size, dest+sub_reg_size, val0, sub_reg_size, 1); - base_gf->multiply_region.w8(base_gf, src+sub_reg_size, dest+sub_reg_size, base_gf->multiply.w8(base_gf, GF_S_GF_8_2, val1), sub_reg_size, 1); -} - -static -int gf_w16_composite_init(gf_t *gf) -{ - struct gf_w8_single_table_data * std; - gf_internal_t *h = (gf_internal_t *) gf->scratch; - gf_t *base_gf = h->base_gf; - gf_val_16_t a, b; - - std = (struct gf_w8_single_table_data *) h->private; - - for (a = 0; a < 256; a++) { - for (b = 0; b < 256; b++) { - std->mult[a][b] = base_gf->multiply.w8(base_gf, a, b); - } - } - - if (h->region_type & GF_REGION_ALTMAP) { - gf->multiply_region.w16 = gf_w16_composite_multiply_region_alt; - } else { - if (h->region_type & GF_REGION_SINGLE_TABLE) { - gf->multiply_region.w16 = gf_w16_composite_multiply_region_table; - } else { - gf->multiply_region.w16 = gf_w16_composite_multiply_region; - } - } - - gf->multiply.w16 = gf_w16_composite_multiply; - gf->divide.w16 = gf_w16_composite_divide; - gf->inverse.w16 = gf_w16_composite_inverse; - - return 1; -} - - - -int gf_w16_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2) -{ - int ss; - int sa; - - ss = (GF_REGION_SSE | GF_REGION_NOSSE); - sa = (GF_REGION_STDMAP | GF_REGION_ALTMAP); - - switch(mult_type) - { - case GF_MULT_TABLE: - region_type |= GF_REGION_LAZY; - if (arg1 != 0 || arg2 != 0 || region_type != GF_REGION_LAZY) return -1; - return sizeof(gf_internal_t) + sizeof(struct gf_lazytable_data) + 64; - break; - case GF_MULT_LOG_TABLE: - if (arg2 != 0) return -1; - if (region_type != GF_REGION_DEFAULT) return -1; - if (arg1 == 1) { - return sizeof(gf_internal_t) + sizeof(struct gf_zero_logtable_data) + 64; - } else if (arg1 == 0) { - return sizeof(gf_internal_t) + sizeof(struct gf_logtable_data) + 64; - } else { - return -1; - } - break; - case GF_MULT_SPLIT_TABLE: - if ((arg1 == 8 && arg2 == 16) || (arg2 == 8 && arg1 == 16)) { - region_type |= GF_REGION_LAZY; - if (region_type != GF_REGION_LAZY) return -1; - return sizeof(gf_internal_t) + sizeof(struct gf_logtable_data) + 64; - } else if ((arg1 == 4 && arg2 == 16) || (arg2 == 4 && arg1 == 16)) { - region_type &= (~GF_REGION_LAZY); /* Ignore GF_REGION_LAZY */ - if ((region_type & ss) == ss) return -1; - if ((region_type & sa) == sa) return -1; - if ((region_type & ss) == 0) region_type |= GF_REGION_SSE; - if (region_type & GF_REGION_NOSSE) { - if (region_type != GF_REGION_NOSSE) return -1; - return sizeof(gf_internal_t) + sizeof(struct gf_logtable_data) + 64; - } else { - if ((region_type | ss | sa) != (ss|sa)) return -1; - return sizeof(gf_internal_t) + sizeof(struct gf_logtable_data) + 64; - } - } - return -1; - break; - case GF_MULT_DEFAULT: - case GF_MULT_SHIFT: - if (arg1 != 0 || arg2 != 0 || region_type != 0) return -1; - return sizeof(gf_internal_t); - break; - case GF_MULT_COMPOSITE: - if (region_type & ~(GF_REGION_SINGLE_TABLE | GF_REGION_ALTMAP | GF_REGION_STDMAP)) return -1; - if ((region_type & (GF_REGION_SINGLE_TABLE | GF_REGION_ALTMAP)) == (GF_REGION_SINGLE_TABLE | GF_REGION_ALTMAP)) return -1; - if (arg1 == 2 && arg2 == 8) { - return sizeof(gf_internal_t) + sizeof(struct gf_w8_single_table_data) + 64; - } else { - return -1; - } - - default: - return -1; - } -} - -int gf_w16_init(gf_t *gf) -{ - gf_internal_t *h; - - h = (gf_internal_t *) gf->scratch; - if (h->prim_poly == 0) h->prim_poly = 0x1100b; - - gf->multiply.w16 = NULL; - gf->divide.w16 = NULL; - gf->inverse.w16 = NULL; - gf->multiply_region.w16 = NULL; - - switch(h->mult_type) { - case GF_MULT_LOG_TABLE: - if (h->arg1 == 1) { - return gf_w16_log_zero_init(gf); - } else { - return gf_w16_log_init(gf); - } - case GF_MULT_SPLIT_TABLE: return gf_w16_split_init(gf); - case GF_MULT_TABLE: return gf_w16_table_init(gf); - case GF_MULT_DEFAULT: - case GF_MULT_SHIFT: if (gf_w16_shift_init(gf) == 0) return 0; break; - case GF_MULT_COMPOSITE: if (gf_w16_composite_init(gf) == 0) return 0; break; - default: return 0; - } - if (h->divide_type == GF_DIVIDE_EUCLID) { - gf->divide.w16 = gf_w16_divide_from_inverse; - gf->inverse.w16 = gf_w16_euclid; - } else if (h->divide_type == GF_DIVIDE_MATRIX) { - gf->divide.w16 = gf_w16_divide_from_inverse; - gf->inverse.w16 = gf_w16_matrix; - } - - if (gf->inverse.w16 != NULL && gf->divide.w16 == NULL) { - gf->divide.w16 = gf_w16_divide_from_inverse; - } - if (gf->inverse.w16 == NULL && gf->divide.w16 != NULL) { - gf->inverse.w16 = gf_w16_inverse_from_divide; - } - return 1; -} diff --git a/junk-w16-timing-tests.sh b/junk-w16-timing-tests.sh deleted file mode 100644 index cea73cf..0000000 --- a/junk-w16-timing-tests.sh +++ /dev/null @@ -1,12 +0,0 @@ -sh tmp-time-test.sh 16 LOG - - -sh tmp-time-test.sh 16 LOG_ZERO - - -sh tmp-time-test.sh 16 TABLE - - -sh tmp-time-test.sh 16 TABLE LE,LAZY - -sh tmp-time-test.sh 16 SPLIT 16 4 ALTMAP,NOSSE - -sh tmp-time-test.sh 16 SPLIT 16 4 ALTMAP,LAZY,SSE - -sh tmp-time-test.sh 16 SPLIT 16 4 ALTMAP,LAZY,NOSSE - -sh tmp-time-test.sh 16 SPLIT 16 4 ALTMAP,SSE - -sh tmp-time-test.sh 16 SPLIT 16 4 NOSSE - -sh tmp-time-test.sh 16 SPLIT 16 4 LAZY,SSE - -sh tmp-time-test.sh 16 SPLIT 16 4 LAZY,NOSSE - -sh tmp-time-test.sh 16 SPLIT 16 4 SSE - diff --git a/junk-w2.eps b/junk-w2.eps deleted file mode 100644 index 1796352..0000000 --- a/junk-w2.eps +++ /dev/null @@ -1,203 +0,0 @@ -%!PS-Adobe-2.0 EPSF-1.2 -%%Page: 1 1 -%%BoundingBox: -40 -93 289 73 -%%EndComments -1 setlinecap 1 setlinejoin -0.700 setlinewidth -0.00 setgray - -/Jrnd { exch cvi exch cvi dup 3 1 roll idiv mul } def -/JDEdict 8 dict def -JDEdict /mtrx matrix put -/JDE { - JDEdict begin - /yrad exch def - /xrad exch def - /savematrix mtrx currentmatrix def - xrad yrad scale - 0 0 1 0 360 arc - savematrix setmatrix - end -} def -/JSTR { - gsave 1 eq { gsave 1 setgray fill grestore } if - exch neg exch neg translate - clip - rotate - 4 dict begin - pathbbox /&top exch def - /&right exch def - /&bottom exch def - &right sub /&width exch def - newpath - currentlinewidth mul round dup - &bottom exch Jrnd exch &top - 4 -1 roll currentlinewidth mul setlinewidth - { &right exch moveto &width 0 rlineto stroke } for - end - grestore - newpath -} bind def - gsave /Times-Roman findfont 9.000000 scalefont setfont -0.000000 0.000000 translate -0.700000 setlinewidth gsave newpath 0.000000 0.000000 moveto 288.000000 0.000000 lineto stroke -newpath 0.000000 0.000000 moveto 0.000000 -5.000000 lineto stroke -newpath 26.181818 0.000000 moveto 26.181818 -2.000000 lineto stroke -newpath 52.363636 0.000000 moveto 52.363636 -5.000000 lineto stroke -newpath 78.545456 0.000000 moveto 78.545456 -2.000000 lineto stroke -newpath 104.727272 0.000000 moveto 104.727272 -5.000000 lineto stroke -newpath 130.909088 0.000000 moveto 130.909088 -2.000000 lineto stroke -newpath 157.090912 0.000000 moveto 157.090912 -5.000000 lineto stroke -newpath 183.272720 0.000000 moveto 183.272720 -2.000000 lineto stroke -newpath 209.454544 0.000000 moveto 209.454544 -5.000000 lineto stroke -newpath 235.636368 0.000000 moveto 235.636368 -2.000000 lineto stroke -newpath 261.818176 0.000000 moveto 261.818176 -5.000000 lineto stroke -newpath 288.000000 0.000000 moveto 288.000000 -2.000000 lineto stroke -/Times-Roman findfont 11.000000 scalefont setfont -gsave 26.181818 -8.000000 translate -90.000000 rotate -0 -3.300000 translate (BYTWO_p) dup stringwidth pop pop 0 0 moveto -show -grestore -gsave 52.363636 -8.000000 translate -90.000000 rotate -0 -3.300000 translate (BYTWO_p SSE) dup stringwidth pop pop 0 0 moveto -show -grestore -gsave 78.545456 -8.000000 translate -90.000000 rotate -0 -3.300000 translate (BYTWO_b) dup stringwidth pop pop 0 0 moveto -show -grestore -gsave 104.727272 -8.000000 translate -90.000000 rotate -0 -3.300000 translate (BYTWO_b SSE) dup stringwidth pop pop 0 0 moveto -show -grestore -gsave 130.909088 -8.000000 translate -90.000000 rotate -0 -3.300000 translate (TABLE SINGLE) dup stringwidth pop pop 0 0 moveto -show -grestore -gsave 157.090912 -8.000000 translate -90.000000 rotate -0 -3.300000 translate (TABLE DOUBLE) dup stringwidth pop pop 0 0 moveto -show -grestore -gsave 183.272720 -8.000000 translate -90.000000 rotate -0 -3.300000 translate (TABLE QUAD) dup stringwidth pop pop 0 0 moveto -show -grestore -gsave 209.454544 -8.000000 translate -90.000000 rotate -0 -3.300000 translate (TABLE QUAD,LAZY) dup stringwidth pop pop 0 0 moveto -show -grestore -gsave 235.636368 -8.000000 translate -90.000000 rotate -0 -3.300000 translate (TABLE SINGLE,SSE) dup stringwidth pop pop 0 0 moveto -show -grestore -gsave 261.818176 -8.000000 translate -90.000000 rotate -0 -3.300000 translate (LOG) dup stringwidth pop pop 0 0 moveto -show -grestore - grestore -0.700000 setlinewidth gsave newpath 0.000000 0.000000 moveto 0.000000 72.000000 lineto stroke -newpath 0.000000 0.000000 moveto -5.000000 0.000000 lineto stroke -newpath 0.000000 9.916304 moveto -2.000000 9.916304 lineto stroke -newpath 0.000000 19.832607 moveto -5.000000 19.832607 lineto stroke -newpath 0.000000 29.748911 moveto -2.000000 29.748911 lineto stroke -newpath 0.000000 39.665215 moveto -5.000000 39.665215 lineto stroke -newpath 0.000000 49.581520 moveto -2.000000 49.581520 lineto stroke -newpath 0.000000 59.497822 moveto -5.000000 59.497822 lineto stroke -newpath 0.000000 69.414124 moveto -2.000000 69.414124 lineto stroke -/Times-Roman findfont 9.000000 scalefont setfont -gsave -8.000000 0.000000 translate 0.000000 rotate -0 -2.700000 translate (0) dup stringwidth pop neg 0 moveto -show -grestore -gsave -8.000000 19.832607 translate 0.000000 rotate -0 -2.700000 translate (2000) dup stringwidth pop neg 0 moveto -show -grestore -gsave -8.000000 39.665215 translate 0.000000 rotate -0 -2.700000 translate (4000) dup stringwidth pop neg 0 moveto -show -grestore -gsave -8.000000 59.497822 translate 0.000000 rotate -0 -2.700000 translate (6000) dup stringwidth pop neg 0 moveto -show -grestore -/Times-Bold findfont 10.000000 scalefont setfont -gsave -33.279999 36.000000 translate 90.000000 rotate -0 0.000000 translate (MB/s) dup stringwidth pop 2 div neg 0 moveto -show -grestore - grestore - gsave - gsave gsave 26.181818 9.564870 translate 0.000000 rotate - newpath 13.090909 0.000000 moveto -13.090909 0.000000 lineto - -13.090909 -9.564870 lineto - 13.090909 -9.564870 lineto -closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore -stroke - grestore grestore - gsave gsave 52.363636 15.887009 translate 0.000000 rotate - newpath 13.090909 0.000000 moveto -13.090909 0.000000 lineto - -13.090909 -15.887009 lineto - 13.090909 -15.887009 lineto -closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore -stroke - grestore grestore - gsave gsave 78.545456 20.109272 translate 0.000000 rotate - newpath 13.090909 0.000000 moveto -13.090909 0.000000 lineto - -13.090909 -20.109272 lineto - 13.090909 -20.109272 lineto -closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore -stroke - grestore grestore - gsave gsave 104.727272 26.881811 translate 0.000000 rotate - newpath 13.090909 0.000000 moveto -13.090909 0.000000 lineto - -13.090909 -26.881811 lineto - 13.090909 -26.881811 lineto -closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore -stroke - grestore grestore - gsave gsave 130.909088 4.538296 translate 0.000000 rotate - newpath 13.090909 0.000000 moveto -13.090909 0.000000 lineto - -13.090909 -4.538296 lineto - 13.090909 -4.538296 lineto -closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore -stroke - grestore grestore - gsave gsave 157.090912 8.978618 translate 0.000000 rotate - newpath 13.090909 0.000000 moveto -13.090909 0.000000 lineto - -13.090909 -8.978618 lineto - 13.090909 -8.978618 lineto -closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore -stroke - grestore grestore - gsave gsave 183.272720 13.178271 translate 0.000000 rotate - newpath 13.090909 0.000000 moveto -13.090909 0.000000 lineto - -13.090909 -13.178271 lineto - 13.090909 -13.178271 lineto -closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore -stroke - grestore grestore - gsave gsave 209.454544 11.003130 translate 0.000000 rotate - newpath 13.090909 0.000000 moveto -13.090909 0.000000 lineto - -13.090909 -11.003130 lineto - 13.090909 -11.003130 lineto -closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore -stroke - grestore grestore - gsave gsave 235.636368 72.000000 translate 0.000000 rotate - newpath 13.090909 0.000000 moveto -13.090909 0.000000 lineto - -13.090909 -72.000000 lineto - 13.090909 -72.000000 lineto -closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore -stroke - grestore grestore - gsave gsave 261.818176 2.016877 translate 0.000000 rotate - newpath 13.090909 0.000000 moveto -13.090909 0.000000 lineto - -13.090909 -2.016877 lineto - 13.090909 -2.016877 lineto -closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore -stroke - grestore grestore - grestore --0.000000 -0.000000 translate - grestore diff --git a/junk-w32-backup.c b/junk-w32-backup.c deleted file mode 100644 index d742a3f..0000000 --- a/junk-w32-backup.c +++ /dev/null @@ -1,1337 +0,0 @@ -/* - * gf_w32.c - * - * Routines for 32-bit Galois fields - */ - -#define MM_PRINT32(s, r) { uint8_t blah[16], ii; printf("%-12s", s); _mm_storeu_si128((__m128i *)blah, r); for (ii = 0; ii < 16; ii += 4) printf(" %02x%02x%02x%02x", blah[15-ii], blah[14-ii], blah[13-ii], blah[12-ii]); printf("\n"); } - -#define MM_PRINT8(s, r) { uint8_t blah[16], ii; printf("%-12s", s); _mm_storeu_si128((__m128i *)blah, r); for (ii = 0; ii < 16; ii += 1) printf("%s%02x", (ii%4==0) ? " " : " ", blah[15-ii]); printf("\n"); } - -#include "gf_int.h" -#include <stdio.h> -#include <stdlib.h> - -#define GF_FIELD_WIDTH (32) -#define GF_FIRST_BIT (1 << 31) - -#define GF_BASE_FIELD_WIDTH (16) -#define GF_BASE_FIELD_SIZE (1 << GF_BASE_FIELD_WIDTH) -#define GF_BASE_FIELD_GROUP_SIZE GF_BASE_FIELD_SIZE-1 -#define GF_S_GF_16_2 (40188) -#define GF_MULTBY_TWO(p) (((p) & GF_FIRST_BIT) ? (((p) << 1) ^ h->prim_poly) : (p) << 1); - - -struct gf_w16_logtable_data { - int log_tbl[GF_BASE_FIELD_SIZE]; - gf_val_16_t _antilog_tbl[GF_BASE_FIELD_SIZE * 4]; - gf_val_16_t *antilog_tbl; - gf_val_16_t inv_tbl[GF_BASE_FIELD_SIZE]; -}; - -struct gf_split_2_32_lazy_data { - gf_val_32_t last_value; - gf_val_32_t tables[16][4]; -}; - -struct gf_split_8_8_data { - gf_val_32_t tables[7][256][256]; -}; - -struct gf_split_4_32_lazy_data { - gf_val_32_t last_value; - gf_val_32_t tables[8][16]; -}; - -static -inline -gf_val_32_t gf_w32_inverse_from_divide (gf_t *gf, gf_val_32_t a) -{ - return gf->divide.w32(gf, 1, a); -} - -static -inline -gf_val_32_t gf_w32_divide_from_inverse (gf_t *gf, gf_val_32_t a, gf_val_32_t b) -{ - b = gf->inverse.w32(gf, b); - return gf->multiply.w32(gf, a, b); -} - -static -void -gf_w32_multiply_region_from_single(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int -xor) -{ - int i; - gf_val_32_t *s32; - gf_val_32_t *d32; - - s32 = (gf_val_32_t *) src; - d32 = (gf_val_32_t *) dest; - - if (xor) { - for (i = 0; i < bytes/sizeof(gf_val_32_t); i++) { - d32[i] ^= gf->multiply.w32(gf, val, s32[i]); - } - } else { - for (i = 0; i < bytes/sizeof(gf_val_32_t); i++) { - d32[i] = gf->multiply.w32(gf, val, s32[i]); - } - } -} - -static -inline -gf_val_32_t gf_w32_euclid (gf_t *gf, gf_val_32_t b) -{ - gf_val_32_t e_i, e_im1, e_ip1; - gf_val_32_t d_i, d_im1, d_ip1; - gf_val_32_t y_i, y_im1, y_ip1; - gf_val_32_t c_i; - - if (b == 0) return -1; - e_im1 = ((gf_internal_t *) (gf->scratch))->prim_poly; - e_i = b; - d_im1 = 32; - for (d_i = d_im1-1; ((1 << d_i) & e_i) == 0; d_i--) ; - y_i = 1; - y_im1 = 0; - - while (e_i != 1) { - - e_ip1 = e_im1; - d_ip1 = d_im1; - c_i = 0; - - while (d_ip1 >= d_i) { - c_i ^= (1 << (d_ip1 - d_i)); - e_ip1 ^= (e_i << (d_ip1 - d_i)); - d_ip1--; - while ((e_ip1 & (1 << d_ip1)) == 0) d_ip1--; - } - - y_ip1 = y_im1 ^ gf->multiply.w32(gf, c_i, y_i); - y_im1 = y_i; - y_i = y_ip1; - - e_im1 = e_i; - d_im1 = d_i; - e_i = e_ip1; - d_i = d_ip1; - } - - return y_i; -} - -static -inline -gf_val_32_t gf_w32_matrix (gf_t *gf, gf_val_32_t b) -{ - return gf_bitmatrix_inverse(b, 32, ((gf_internal_t *) (gf->scratch))->prim_poly); -} - -/* JSP: GF_MULT_SHIFT: The world's dumbest multiplication algorithm. I only - include it for completeness. It does have the feature that it requires no - extra memory. -*/ - -static -inline -gf_val_32_t -gf_w32_shift_multiply (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32) -{ - uint64_t product, i, pp, a, b, one; - gf_internal_t *h; - - a = a32; - b = b32; - h = (gf_internal_t *) gf->scratch; - one = 1; - pp = h->prim_poly | (one << 32); - - product = 0; - - for (i = 0; i < GF_FIELD_WIDTH; i++) { - if (a & (one << i)) product ^= (b << i); - } - for (i = (GF_FIELD_WIDTH*2-1); i >= GF_FIELD_WIDTH; i--) { - if (product & (one << i)) product ^= (pp << (i-GF_FIELD_WIDTH)); - } - return product; -} - -static -int gf_w32_shift_init(gf_t *gf) -{ - gf->multiply.w32 = gf_w32_shift_multiply; - gf->inverse.w32 = gf_w32_euclid; - gf->multiply_region.w32 = gf_w32_multiply_region_from_single; - return 1; -} - -static -inline -gf_val_32_t -gf_w32_split_8_8_multiply (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32) -{ - uint32_t product, i, j, mask, tb; - gf_internal_t *h; - struct gf_split_8_8_data *d8; - - h = (gf_internal_t *) gf->scratch; - d8 = (struct gf_split_8_8_data *) h->private; - product = 0; - mask = 0xff; - - for (i = 0; i < 4; i++) { - tb = b32; - for (j = 0; j < 4; j++) { - product ^= d8->tables[i+j][a32&mask][tb&mask]; - tb >>= 8; - } - a32 >>= 8; - } - return product; -} - -static -inline -void -gf_w32_split_8_8_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) -{ - uint32_t product, mask, tb, tv, tp; - gf_internal_t *h; - struct gf_split_8_8_data *d8; - uint32_t *p00, *p01, *p02, *p03; - uint32_t *p10, *p11, *p12, *p13; - uint32_t *p20, *p21, *p22, *p23; - uint32_t *p30, *p31, *p32, *p33; - uint32_t *s32, *d32, *top; - unsigned long uls, uld; - - uls = (unsigned long) src; - uld = (unsigned long) dest; - if (uls %4 != 0 || ((uls & 0x7) != (uld & 0x7))) gf_alignment_error("gf_w32_split_8_8_multiply_region", 4); - if (bytes % 4 != 0) { - gf_alignment_error("gf_w32_split_8_8_multiply_region: buffer size not divisible by symbol size = 4 bytes", 4); - } - - tv = val; - h = (gf_internal_t *) gf->scratch; - d8 = (struct gf_split_8_8_data *) h->private; - mask = 0xff; - - p00 = &(d8->tables[0][val&mask][0]); - p01 = &(d8->tables[1][val&mask][0]); - p02 = &(d8->tables[2][val&mask][0]); - p03 = &(d8->tables[3][val&mask][0]); - val >>= 8; - p10 = &(d8->tables[1][val&mask][0]); - p11 = &(d8->tables[2][val&mask][0]); - p12 = &(d8->tables[3][val&mask][0]); - p13 = &(d8->tables[4][val&mask][0]); - val >>= 8; - p20 = &(d8->tables[2][val&mask][0]); - p21 = &(d8->tables[3][val&mask][0]); - p22 = &(d8->tables[4][val&mask][0]); - p23 = &(d8->tables[5][val&mask][0]); - val >>= 8; - p30 = &(d8->tables[3][val&mask][0]); - p31 = &(d8->tables[4][val&mask][0]); - p32 = &(d8->tables[5][val&mask][0]); - p33 = &(d8->tables[6][val&mask][0]); - - s32 = (uint32_t *) src; - d32 = (uint32_t *) dest; - top = (d32 + (bytes/4)); - - while (d32 < top) { - tb = *s32; - tp = *d32; - product = (xor) ? (*d32) : 0; - product ^= p00[tb&mask]; - product ^= p10[tb&mask]; - product ^= p20[tb&mask]; - product ^= p30[tb&mask]; - - tb >>= 8; - product ^= p01[tb&mask]; - product ^= p11[tb&mask]; - product ^= p21[tb&mask]; - product ^= p31[tb&mask]; - - tb >>= 8; - product ^= p02[tb&mask]; - product ^= p12[tb&mask]; - product ^= p22[tb&mask]; - product ^= p32[tb&mask]; - - tb >>= 8; - product ^= p03[tb&mask]; - product ^= p13[tb&mask]; - product ^= p23[tb&mask]; - product ^= p33[tb&mask]; - *d32 = product; - s32++; - d32++; - } -} - -static -void -gf_w32_split_2_32_lazy_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) -{ - unsigned long uls, uld; - gf_internal_t *h; - struct gf_split_2_32_lazy_data *ld; - int i; - gf_val_32_t pp, v, v2, s, *s32, *d32, *top; - - uls = (unsigned long) src; - uld = (unsigned long) dest; - if (uls %4 != 0 || ((uls & 0x7) != (uld & 0x7))) gf_alignment_error("gf_w32_split_2_32_lazy_multiply_region", 4); - if (bytes % 4 != 0) { - gf_alignment_error("gf_w32_split_2_32_lazy_multiply_region: buffer size not divisible by symbol size = 4 bytes", 4); - } - - if (val == 0) { - if (xor) return; - bzero(dest, bytes); - return; - } - - h = (gf_internal_t *) gf->scratch; - pp = h->prim_poly; - - ld = (struct gf_split_2_32_lazy_data *) h->private; - - if (ld->last_value != val) { - v = val; - for (i = 0; i < 16; i++) { - v2 = (v << 1); - if (v & GF_FIRST_BIT) v2 ^= pp; - ld->tables[i][0] = 0; - ld->tables[i][1] = v; - ld->tables[i][2] = v2; - ld->tables[i][3] = (v2 ^ v); - v = (v2 << 1); - if (v2 & GF_FIRST_BIT) v ^= pp; - } - } - ld->last_value = val; - - s32 = (gf_val_32_t *) src; - d32 = (gf_val_32_t *) dest; - top = d32 + (bytes/4); - - while (d32 != top) { - v = (xor) ? *d32 : 0; - s = *s32; - i = 0; - while (s != 0) { - v ^= ld->tables[i][s&3]; - s >>= 2; - i++; - } - *d32 = v; - d32++; - s32++; - } -} - -static -void -gf_w32_split_2_32_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) -{ -#ifdef INTEL_SSE4 - unsigned long uls, uld; - gf_internal_t *h; - int i, m, j, tindex; - gf_val_32_t pp, v, v2, s, *s32, *d32, *top; - __m128i vi, si, pi, shuffler, tables[16], adder, xi, mask1, mask2; - - uls = (unsigned long) src; - uld = (unsigned long) dest; - if (uls %4 != 0 || ((uls & 0xf) != (uld & 0xf))) gf_alignment_error("gf_w32_split_2_32_lazy_sse_multiply_region", 4); - if (bytes % 4 != 0) { - gf_alignment_error("gf_w32_split_2_32_lazy_sse_multiply_region: buffer size not divisible by symbol size = 4 bytes", 4); - } - - if (val == 0) { - if (xor) return; - bzero(dest, bytes); - return; - } - - h = (gf_internal_t *) gf->scratch; - pp = h->prim_poly; - - uls &= 0xf; - - s32 = (gf_val_32_t *) src; - d32 = (gf_val_32_t *) dest; - top = d32 + (bytes/4); - - if (uls != 0) { - while (uls != 16) { - if (xor) { - *d32 ^= gf->multiply.w32(gf, *s32, val); - } else { - *d32 = gf->multiply.w32(gf, *s32, val); - } - *s32++; - *d32++; - if (d32 == top) return; - uls += 4; - } - } - - uld = (unsigned long) top; - top = (gf_val_32_t *) (uld - (uld & 0xf)); - uld &= 0xf; - - v = val; - for (i = 0; i < 16; i++) { - v2 = (v << 1); - if (v & GF_FIRST_BIT) v2 ^= pp; - tables[i] = _mm_set_epi32(v2 ^ v, v2, v, 0); - v = (v2 << 1); - if (v2 & GF_FIRST_BIT) v ^= pp; - } - - shuffler = _mm_set_epi8(0xc, 0xc, 0xc, 0xc, 8, 8, 8, 8, 4, 4, 4, 4, 0, 0, 0, 0); - adder = _mm_set_epi8(3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0); - mask1 = _mm_set1_epi8(0x3); - mask2 = _mm_set1_epi8(0xc); - - while (d32 != top) { - pi = (xor) ? _mm_load_si128 ((__m128i *) d32) : _mm_setzero_si128(); - vi = _mm_load_si128((__m128i *) s32); - - tindex = 0; - for (i = 0; i < 4; i++) { - si = _mm_shuffle_epi8(vi, shuffler); - - xi = _mm_and_si128(si, mask1); - xi = _mm_slli_epi16(xi, 2); - xi = _mm_xor_si128(xi, adder); - pi = _mm_xor_si128(pi, _mm_shuffle_epi8(tables[tindex], xi)); - tindex++; - - xi = _mm_and_si128(si, mask2); - xi = _mm_xor_si128(xi, adder); - pi = _mm_xor_si128(pi, _mm_shuffle_epi8(tables[tindex], xi)); - si = _mm_srli_epi16(si, 2); - tindex++; - - xi = _mm_and_si128(si, mask2); - xi = _mm_xor_si128(xi, adder); - pi = _mm_xor_si128(pi, _mm_shuffle_epi8(tables[tindex], xi)); - si = _mm_srli_epi16(si, 2); - tindex++; - - xi = _mm_and_si128(si, mask2); - xi = _mm_xor_si128(xi, adder); - pi = _mm_xor_si128(pi, _mm_shuffle_epi8(tables[tindex], xi)); - si = _mm_srli_epi16(si, 2); - tindex++; - - vi = _mm_srli_epi32(vi, 8); - } - _mm_store_si128((__m128i *) d32, pi); - d32 += 4; - s32 += 4; - } - - while (uld > 0) { - if (xor) { - *d32 ^= gf->multiply.w32(gf, *s32, val); - } else { - *d32 = gf->multiply.w32(gf, *s32, val); - } - *s32++; - *d32++; - uld -= 4; - } - - -#endif -} - -static -void -gf_w32_split_4_32_lazy_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) -{ - unsigned long uls, uld; - gf_internal_t *h; - struct gf_split_4_32_lazy_data *ld; - int i, j, k; - gf_val_32_t pp, v, s, *s32, *d32, *top; - - uls = (unsigned long) src; - uld = (unsigned long) dest; - if (uls %4 != 0 || ((uls & 0x7) != (uld & 0x7))) gf_alignment_error("gf_w32_split_4_32_lazy_multiply_region", 4); - if (bytes % 4 != 0) { - gf_alignment_error("gf_w32_split_4_32_lazy_multiply_region: buffer size not divisible by symbol size = 4 bytes", 4); - } - - if (val == 0) { - if (xor) return; - bzero(dest, bytes); - return; - } - - h = (gf_internal_t *) gf->scratch; - pp = h->prim_poly; - - ld = (struct gf_split_4_32_lazy_data *) h->private; - - if (ld->last_value != val) { - v = val; - for (i = 0; i < 8; i++) { - ld->tables[i][0] = 0; - for (j = 1; j < 16; j <<= 1) { - for (k = 0; k < j; k++) { - ld->tables[i][k^j] = (v ^ ld->tables[i][k]); - } - v = (v & GF_FIRST_BIT) ? ((v << 1) ^ pp) : (v << 1); - } - } - } - ld->last_value = val; - - s32 = (gf_val_32_t *) src; - d32 = (gf_val_32_t *) dest; - top = d32 + (bytes/4); - - while (d32 != top) { - v = (xor) ? *d32 : 0; - s = *s32; - i = 0; - while (s != 0) { - v ^= ld->tables[i][s&0xf]; - s >>= 4; - i++; - } - *d32 = v; - d32++; - s32++; - } -} - -static -void -gf_w32_split_4_32_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) -{ -#ifdef INTEL_SSE4 - unsigned long uls, uld; - gf_internal_t *h; - int i, m, j, k, tindex; - gf_val_32_t pp, v, s, *s32, *d32, *top, *realtop; - __m128i vi, si, tables[8][4], p0, p1, p2, p3, mask1, v0, v1, v2, v3; - __m128i tv1, tv2, tv3, tv0; - struct gf_split_4_32_lazy_data *ld; - uint8_t btable[16]; - - uls = (unsigned long) src; - uld = (unsigned long) dest; - if (uls %4 != 0 || ((uls & 0xf) != (uld & 0xf))) gf_alignment_error("gf_w32_split_4_32_lazy_sse_multiply_region", 4); - if (bytes % 4 != 0) { - gf_alignment_error("gf_w32_split_4_32_lazy_sse_multiply_region: buffer size not divisible by symbol size = 4 bytes", 4); - } - - if (val == 0) { - if (xor) return; - bzero(dest, bytes); - return; - } - - h = (gf_internal_t *) gf->scratch; - pp = h->prim_poly; - - uls &= 0xf; - - s32 = (gf_val_32_t *) src; - d32 = (gf_val_32_t *) dest; - top = d32 + (bytes/4); - - if (uls != 0) { - while (uls != 16) { - if (xor) { - *d32 ^= gf->multiply.w32(gf, *s32, val); - } else { - *d32 = gf->multiply.w32(gf, *s32, val); - } - *s32++; - *d32++; - if (d32 == top) return; - uls += 4; - } - } - - uld = (unsigned long) top; - realtop = top; - - /* You need the size of this region to be a multiple of 64 bytes */ - bytes = (top - d32); - bytes -= (bytes & 0xf); - top = (d32 + bytes); - - ld = (struct gf_split_4_32_lazy_data *) h->private; - - v = val; - for (i = 0; i < 8; i++) { - ld->tables[i][0] = 0; - for (j = 1; j < 16; j <<= 1) { - for (k = 0; k < j; k++) { - ld->tables[i][k^j] = (v ^ ld->tables[i][k]); - } - v = (v & GF_FIRST_BIT) ? ((v << 1) ^ pp) : (v << 1); - } - for (j = 0; j < 4; j++) { - for (k = 0; k < 16; k++) { - btable[k] = (uint8_t) ld->tables[i][k]; - ld->tables[i][k] >>= 8; - } - tables[i][j] = _mm_loadu_si128((__m128i *) btable); - } - } - - mask1 = _mm_set1_epi8(0xf); - - if (xor) { - while (d32 != top) { - p0 = _mm_load_si128 ((__m128i *) d32); - p1 = _mm_load_si128 ((__m128i *) (d32+4)); - p2 = _mm_load_si128 ((__m128i *) (d32+8)); - p3 = _mm_load_si128 ((__m128i *) (d32+12)); - - v0 = _mm_load_si128((__m128i *) s32); s32 += 4; - v1 = _mm_load_si128((__m128i *) s32); s32 += 4; - v2 = _mm_load_si128((__m128i *) s32); s32 += 4; - v3 = _mm_load_si128((__m128i *) s32); s32 += 4; - - si = _mm_and_si128(v0, mask1); - p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[0][0], si)); - p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[0][1], si)); - p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[0][2], si)); - p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[0][3], si)); - - v0 = _mm_srli_epi32(v0, 4); - si = _mm_and_si128(v0, mask1); - p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[1][0], si)); - p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[1][1], si)); - p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[1][2], si)); - p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[1][3], si)); - - si = _mm_and_si128(v1, mask1); - p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[2][0], si)); - p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[2][1], si)); - p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[2][2], si)); - p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[2][3], si)); - - v1 = _mm_srli_epi32(v1, 4); - si = _mm_and_si128(v1, mask1); - p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[3][0], si)); - p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[3][1], si)); - p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[3][2], si)); - p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[3][3], si)); - - si = _mm_and_si128(v2, mask1); - p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[4][0], si)); - p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[4][1], si)); - p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[4][2], si)); - p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[4][3], si)); - - v2 = _mm_srli_epi32(v2, 4); - si = _mm_and_si128(v2, mask1); - p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[5][0], si)); - p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[5][1], si)); - p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[5][2], si)); - p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[5][3], si)); - - si = _mm_and_si128(v3, mask1); - p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[6][0], si)); - p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[6][1], si)); - p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[6][2], si)); - p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[6][3], si)); - - v3 = _mm_srli_epi32(v3, 4); - si = _mm_and_si128(v3, mask1); - p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[7][0], si)); - p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[7][1], si)); - p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[7][2], si)); - p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[7][3], si)); - - _mm_store_si128((__m128i *) d32, p0); - _mm_store_si128((__m128i *) (d32+4), p1); - _mm_store_si128((__m128i *) (d32+8), p2); - _mm_store_si128((__m128i *) (d32+12), p3); - d32 += 16; - } - } else { - while (d32 != top) { - - v0 = _mm_load_si128((__m128i *) s32); s32 += 4; - v1 = _mm_load_si128((__m128i *) s32); s32 += 4; - v2 = _mm_load_si128((__m128i *) s32); s32 += 4; - v3 = _mm_load_si128((__m128i *) s32); s32 += 4; - - - - si = _mm_and_si128(v0, mask1); - p0 = _mm_shuffle_epi8(tables[0][0], si); - p1 = _mm_shuffle_epi8(tables[0][1], si); - p2 = _mm_shuffle_epi8(tables[0][2], si); - p3 = _mm_shuffle_epi8(tables[0][3], si); - - v0 = _mm_srli_epi32(v0, 4); - si = _mm_and_si128(v0, mask1); - p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[1][0], si)); - p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[1][1], si)); - p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[1][2], si)); - p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[1][3], si)); - - si = _mm_and_si128(v1, mask1); - p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[2][0], si)); - p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[2][1], si)); - p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[2][2], si)); - p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[2][3], si)); - - v1 = _mm_srli_epi32(v1, 4); - si = _mm_and_si128(v1, mask1); - p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[3][0], si)); - p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[3][1], si)); - p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[3][2], si)); - p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[3][3], si)); - - si = _mm_and_si128(v2, mask1); - p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[4][0], si)); - p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[4][1], si)); - p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[4][2], si)); - p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[4][3], si)); - - v2 = _mm_srli_epi32(v2, 4); - si = _mm_and_si128(v2, mask1); - p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[5][0], si)); - p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[5][1], si)); - p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[5][2], si)); - p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[5][3], si)); - - si = _mm_and_si128(v3, mask1); - p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[6][0], si)); - p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[6][1], si)); - p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[6][2], si)); - p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[6][3], si)); - - v3 = _mm_srli_epi32(v3, 4); - si = _mm_and_si128(v3, mask1); - p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[7][0], si)); - p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[7][1], si)); - p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[7][2], si)); - p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[7][3], si)); - - _mm_store_si128((__m128i *) d32, p0); - _mm_store_si128((__m128i *) (d32+4), p1); - _mm_store_si128((__m128i *) (d32+8), p2); - _mm_store_si128((__m128i *) (d32+12), p3); - d32 += 16; - } - } - - while (d32 < realtop) { - if (xor) { - *d32 ^= gf->multiply.w32(gf, *s32, val); - } else { - *d32 = gf->multiply.w32(gf, *s32, val); - } - *s32++; - *d32++; - } - - -#endif -} - -/* -static -void -gf_w32_split_4_32_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) -{ -#ifdef INTEL_SSE4 - unsigned long uls, uld; - gf_internal_t *h; - int i, m, j, k, tindex; - gf_val_32_t pp, v, s, *s32, *d32, *top, *realtop; - __m128i vi, si, tables[8][4], p0, p1, p2, p3, mask1; - struct gf_split_4_32_lazy_data *ld; - uint8_t btable[16]; - - uls = (unsigned long) src; - uld = (unsigned long) dest; - if (uls %4 != 0 || ((uls & 0xf) != (uld & 0xf))) gf_alignment_error("gf_w32_split_4_32_lazy_sse_multiply_region", 4); - if (bytes % 4 != 0) { - gf_alignment_error("gf_w32_split_4_32_lazy_sse_multiply_region: buffer size not divisible by symbol size = 4 bytes", 4); - } - - if (val == 0) { - if (xor) return; - bzero(dest, bytes); - return; - } - - h = (gf_internal_t *) gf->scratch; - pp = h->prim_poly; - - uls &= 0xf; - - s32 = (gf_val_32_t *) src; - d32 = (gf_val_32_t *) dest; - top = d32 + (bytes/4); - - if (uls != 0) { - while (uls != 16) { - if (xor) { - *d32 ^= gf->multiply.w32(gf, *s32, val); - } else { - *d32 = gf->multiply.w32(gf, *s32, val); - } - *s32++; - *d32++; - if (d32 == top) return; - uls += 4; - } - } - - uld = (unsigned long) top; - realtop = top; - - bytes = (top - d32); - bytes -= (bytes & 0xf); - top = (d32 + bytes); - - ld = (struct gf_split_4_32_lazy_data *) h->private; - - v = val; - for (i = 0; i < 8; i++) { - ld->tables[i][0] = 0; - for (j = 1; j < 16; j <<= 1) { - for (k = 0; k < j; k++) { - ld->tables[i][k^j] = (v ^ ld->tables[i][k]); - } - v = (v & GF_FIRST_BIT) ? ((v << 1) ^ pp) : (v << 1); - } - for (j = 0; j < 4; j++) { - for (k = 0; k < 16; k++) { - btable[k] = (uint8_t) ld->tables[i][k]; - ld->tables[i][k] >>= 8; - } - tables[i][j] = _mm_loadu_si128((__m128i *) btable); - } - } - - mask1 = _mm_set1_epi8(0xf); - - if (xor) { - while (d32 != top) { - p0 = _mm_load_si128 ((__m128i *) d32); - p1 = _mm_load_si128 ((__m128i *) (d32+4)); - p2 = _mm_load_si128 ((__m128i *) (d32+8)); - p3 = _mm_load_si128 ((__m128i *) (d32+12)); - - for (i = 0; i < 8; i++) { - vi = _mm_load_si128((__m128i *) s32); - - si = _mm_and_si128(vi, mask1); - p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[i][0], si)); - p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[i][1], si)); - p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[i][2], si)); - p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[i][3], si)); - - i++; - vi = _mm_srli_epi32(vi, 4); - si = _mm_and_si128(vi, mask1); - p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[i][0], si)); - p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[i][1], si)); - p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[i][2], si)); - p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[i][3], si)); - s32 += 4; - } - _mm_store_si128((__m128i *) d32, p0); - _mm_store_si128((__m128i *) (d32+4), p1); - _mm_store_si128((__m128i *) (d32+8), p2); - _mm_store_si128((__m128i *) (d32+12), p3); - d32 += 16; - } - } else { - while (d32 != top) { - for (i = 0; i < 8; i++) { - vi = _mm_load_si128((__m128i *) s32); - - si = _mm_and_si128(vi, mask1); - p0 = _mm_shuffle_epi8(tables[i][0], si); - p1 = _mm_shuffle_epi8(tables[i][1], si); - p2 = _mm_shuffle_epi8(tables[i][2], si); - p3 = _mm_shuffle_epi8(tables[i][3], si); - - i++; - vi = _mm_srli_epi32(vi, 4); - si = _mm_and_si128(vi, mask1); - p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[i][0], si)); - p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[i][1], si)); - p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[i][2], si)); - p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[i][3], si)); - s32 += 4; - } - _mm_store_si128((__m128i *) d32, p0); - _mm_store_si128((__m128i *) (d32+4), p1); - _mm_store_si128((__m128i *) (d32+8), p2); - _mm_store_si128((__m128i *) (d32+12), p3); - d32 += 16; - } - } - - while (d32 < realtop) { - if (xor) { - *d32 ^= gf->multiply.w32(gf, *s32, val); - } else { - *d32 = gf->multiply.w32(gf, *s32, val); - } - *s32++; - *d32++; - } - - -#endif -} -*/ - -static -int gf_w32_split_init(gf_t *gf) -{ - gf_internal_t *h; - struct gf_split_2_32_lazy_data *ld2; - struct gf_split_4_32_lazy_data *ld4; - struct gf_split_8_8_data *d8; - uint32_t p, basep; - int i, j, exp; - - h = (gf_internal_t *) gf->scratch; - - /* Defaults */ - gf->multiply_region.w32 = gf_w32_multiply_region_from_single; - gf->multiply.w32 = gf_w32_shift_multiply; - gf->inverse.w32 = gf_w32_euclid; - - if (h->arg1 == 8 && h->arg2 == 8) { - gf->multiply.w32 = gf_w32_split_8_8_multiply; - gf->multiply_region.w32 = gf_w32_split_8_8_multiply_region; - d8 = (struct gf_split_8_8_data *) h->private; - basep = 1; - for (exp = 0; exp < 7; exp++) { - for (j = 0; j < 256; j++) d8->tables[exp][0][j] = 0; - for (i = 0; i < 256; i++) d8->tables[exp][i][0] = 0; - d8->tables[exp][1][1] = basep; - for (i = 2; i < 256; i++) { - if (i&1) { - p = d8->tables[exp][i^1][1]; - d8->tables[exp][i][1] = p ^ basep; - } else { - p = d8->tables[exp][i>>1][1]; - d8->tables[exp][i][1] = GF_MULTBY_TWO(p); - } - } - for (i = 1; i < 256; i++) { - p = d8->tables[exp][i][1]; - for (j = 1; j < 256; j++) { - if (j&1) { - d8->tables[exp][i][j] = d8->tables[exp][i][j^1] ^ p; - } else { - d8->tables[exp][i][j] = GF_MULTBY_TWO(d8->tables[exp][i][j>>1]); - } - } - } - for (i = 0; i < 8; i++) basep = GF_MULTBY_TWO(basep); - } - } - if ((h->arg1 == 2 && h->arg2 == 32) || (h->arg1 == 32 && h->arg2 == 2)) { - ld2 = (struct gf_split_2_32_lazy_data *) h->private; - ld2->last_value = 0; - if (h->region_type & GF_REGION_SSE) { - gf->multiply_region.w32 = gf_w32_split_2_32_lazy_sse_multiply_region; - } else { - gf->multiply_region.w32 = gf_w32_split_2_32_lazy_multiply_region; - } - } - if ((h->arg1 == 4 && h->arg2 == 32) || (h->arg1 == 32 && h->arg2 == 4)) { - ld4 = (struct gf_split_4_32_lazy_data *) h->private; - ld4->last_value = 0; - if (h->region_type & GF_REGION_SSE) { - if (h->region_type & GF_REGION_ALTMAP) { - gf->multiply_region.w32 = gf_w32_split_4_32_lazy_sse_altmap_multiply_region; - } - } else { - gf->multiply_region.w32 = gf_w32_split_4_32_lazy_multiply_region; - } - } - return 1; -} - -static -gf_val_32_t -gf_w32_composite_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b) -{ - gf_internal_t *h = (gf_internal_t *) gf->scratch; - gf_t *base_gf = h->base_gf; - uint16_t b0 = b & 0x0000ffff; - uint16_t b1 = (b & 0xffff0000) >> 16; - uint16_t a0 = a & 0x0000ffff; - uint16_t a1 = (a & 0xffff0000) >> 16; - uint16_t a1b1; - - a1b1 = base_gf->multiply.w16(base_gf, a1, b1); - - return ((base_gf->multiply.w16(base_gf, a0, b0) ^ a1b1) | ((base_gf->multiply.w16(base_gf, a1, b0) ^ base_gf->multiply.w16(base_gf, a0, b1) ^ base_gf->multiply.w16(base_gf, a1b1, GF_S_GF_16_2)) << 16)); -} - -/* - * Composite field division trick (explained in 2007 tech report) - * - * Compute a / b = a*b^-1, where p(x) = x^2 + sx + 1 - * - * let c = b^-1 - * - * c*b = (s*b1c1+b1c0+b0c1)x+(b1c1+b0c0) - * - * want (s*b1c1+b1c0+b0c1) = 0 and (b1c1+b0c0) = 1 - * - * let d = b1c1 and d+1 = b0c0 - * - * solve s*b1c1+b1c0+b0c1 = 0 - * - * solution: d = (b1b0^-1)(b1b0^-1+b0b1^-1+s)^-1 - * - * c0 = (d+1)b0^-1 - * c1 = d*b1^-1 - * - * a / b = a * c - */ -static -gf_val_32_t -gf_w32_composite_inverse(gf_t *gf, gf_val_32_t a) -{ - gf_internal_t *h = (gf_internal_t *) gf->scratch; - gf_t *base_gf = h->base_gf; - uint16_t a0 = a & 0x0000ffff; - uint16_t a1 = (a & 0xffff0000) >> 16; - uint16_t c0, c1, d, tmp; - uint32_t c; - uint16_t a0inv, a1inv; - - if (a0 == 0) { - a1inv = base_gf->inverse.w16(base_gf, a1); - c0 = base_gf->multiply.w16(base_gf, a1inv, GF_S_GF_16_2); - c1 = a1inv; - } else if (a1 == 0) { - c0 = base_gf->inverse.w16(base_gf, a0); - c1 = 0; - } else { - a1inv = base_gf->inverse.w16(base_gf, a1); - a0inv = base_gf->inverse.w16(base_gf, a0); - - d = base_gf->multiply.w16(base_gf, a1, a0inv); - - tmp = (base_gf->multiply.w16(base_gf, a1, a0inv) ^ base_gf->multiply.w16(base_gf, a0, a1inv) ^ GF_S_GF_16_2); - tmp = base_gf->inverse.w16(base_gf, tmp); - - d = base_gf->multiply.w16(base_gf, d, tmp); - - c0 = base_gf->multiply.w16(base_gf, (d^1), a0inv); - c1 = base_gf->multiply.w16(base_gf, d, a1inv); - } - - c = c0 | (c1 << 16); - - return c; -} - -static -gf_val_32_t -gf_w32_composite_divide(gf_t *gf, gf_val_32_t a, gf_val_32_t b) -{ - gf_val_32_t binv; - - binv = gf_w32_composite_inverse(gf, b); - - return gf_w32_composite_multiply(gf, a, binv); -} - -static -void -gf_w32_composite_multiply_region_table(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) -{ - unsigned long uls, uld; - gf_internal_t *h = (gf_internal_t *) gf->scratch; - gf_t *base_gf = h->base_gf; - int i=0; - struct gf_w16_logtable_data * ltd; - uint16_t b0 = val & 0x0000ffff; - uint16_t b1 = (val & 0xffff0000) >> 16; - uint32_t *s32 = (uint32_t *) src; - uint32_t *d32 = (uint32_t *) dest; - uint16_t a0, a1, a1b1; - int num_syms = bytes >> 2; - int sym_divisible = bytes % 4; - - uls = (unsigned long) src; - uld = (unsigned long) dest; - if ((uls & 0x7) != (uld & 0x7)) gf_alignment_error("gf_w32_buf_const_log", 2); - if (sym_divisible) { - gf_alignment_error("gf_w32_buf_const_log: buffer size not divisible by symbol size = 2 bytes", 2); - } - - if (val == 0) { - if (xor) return; - bzero(dest, bytes); - return; - } - - ltd = (struct gf_w16_logtable_data *) h->private; - - if (xor) { - for (i = 0;i < num_syms; i++) { - a0 = s32[i] & 0x0000ffff; - a1 = (s32[i] & 0xffff0000) >> 16; - a1b1 = ltd->antilog_tbl[ltd->log_tbl[a1] + ltd->log_tbl[b1]]; - - d32[i] ^= ((ltd->antilog_tbl[ltd->log_tbl[a0] + ltd->log_tbl[b0]] ^ a1b1) | - ((ltd->antilog_tbl[ltd->log_tbl[a1] + ltd->log_tbl[b0]] ^ ltd->antilog_tbl[ltd->log_tbl[a0] + ltd->log_tbl[b1]] ^ - ltd->antilog_tbl[ltd->log_tbl[a1b1] + ltd->log_tbl[GF_S_GF_16_2]]) << 16)); - - } - } else { - for (i = 0;i < num_syms; i++) { - a0 = s32[i] & 0x0000ffff; - a1 = (s32[i] & 0xffff0000) >> 16; - a1b1 = ltd->antilog_tbl[ltd->log_tbl[a1] + ltd->log_tbl[b1]]; - - d32[i] = ((ltd->antilog_tbl[ltd->log_tbl[a0] + ltd->log_tbl[b0]] ^ a1b1) | - ((ltd->antilog_tbl[ltd->log_tbl[a1] + ltd->log_tbl[b0]] ^ ltd->antilog_tbl[ltd->log_tbl[a0] + ltd->log_tbl[b1]] ^ - ltd->antilog_tbl[ltd->log_tbl[a1b1] + ltd->log_tbl[GF_S_GF_16_2]]) << 16)); - } - } -} - -static -void -gf_w32_composite_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) -{ - unsigned long uls, uld; - gf_internal_t *h = (gf_internal_t *) gf->scratch; - gf_t *base_gf = h->base_gf; - int i=0; - struct gf_w16_logtable_data * ltd; - uint16_t b0 = val & 0x0000ffff; - uint16_t b1 = (val & 0xffff0000) >> 16; - uint32_t *s32 = (uint32_t *) src; - uint32_t *d32 = (uint32_t *) dest; - uint16_t a0, a1, a1b1; - int num_syms = bytes >> 2; - int sym_divisible = bytes % 4; - - uls = (unsigned long) src; - uld = (unsigned long) dest; - if ((uls & 0x7) != (uld & 0x7)) gf_alignment_error("gf_w32_buf_const_log", 2); - if (sym_divisible) { - gf_alignment_error("gf_w32_buf_const_log: buffer size not divisible by symbol size = 2 bytes", 2); - } - - if (val == 0) { - if (xor) return; - bzero(dest, bytes); - return; - } - - ltd = (struct gf_w16_logtable_data *) h->private; - - if (xor) { - for (i = 0;i < num_syms; i++) { - a0 = s32[i] & 0x0000ffff; - a1 = (s32[i] & 0xffff0000) >> 16; - a1b1 = base_gf->multiply.w16(base_gf, a1, b1); - - d32[i] ^= ((base_gf->multiply.w16(base_gf, a0, b0) ^ a1b1) | - ((base_gf->multiply.w16(base_gf, a1, b0) ^ base_gf->multiply.w16(base_gf, a0, b1) ^ base_gf->multiply.w16(base_gf, a1b1, GF_S_GF_16_2)) << 16)); - - } - } else { - for (i = 0;i < num_syms; i++) { - a0 = s32[i] & 0x0000ffff; - a1 = (s32[i] & 0xffff0000) >> 16; - a1b1 = base_gf->multiply.w16(base_gf, a1, b1); - - d32[i] = ((base_gf->multiply.w16(base_gf, a0, b0) ^ a1b1) | - ((base_gf->multiply.w16(base_gf, a1, b0) ^ base_gf->multiply.w16(base_gf, a0, b1) ^ base_gf->multiply.w16(base_gf, a1b1, GF_S_GF_16_2)) << 16)); - } - } -} - - - -static -void -gf_w32_composite_multiply_region_alt(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) -{ - gf_internal_t *h = (gf_internal_t *) gf->scratch; - gf_t *base_gf = h->base_gf; - gf_val_16_t val0 = val & 0x0000ffff; - gf_val_16_t val1 = (val & 0xffff0000) >> 16; - int sub_reg_size = bytes / 2; - - if (bytes % 2 != 0) gf_alignment_error("gf_w32_composite_multiply_region_alt", 1); - if (sub_reg_size % 2 != 0) gf_alignment_error("gf_w32_composite_multiply_region_alt", 1); - - if (!xor) { - memset(dest, 0, bytes); - } - - base_gf->multiply_region.w16(base_gf, src, dest, val0, sub_reg_size, xor); - base_gf->multiply_region.w16(base_gf, src+sub_reg_size, dest, val1, sub_reg_size, 1); - base_gf->multiply_region.w16(base_gf, src, dest+sub_reg_size, val1, sub_reg_size, xor); - base_gf->multiply_region.w16(base_gf, src+sub_reg_size, dest+sub_reg_size, val0, sub_reg_size, 1); - base_gf->multiply_region.w16(base_gf, src+sub_reg_size, dest+sub_reg_size, base_gf->multiply.w16(base_gf, GF_S_GF_16_2, val1), sub_reg_size, 1); -} - -static -int gf_w32_composite_init(gf_t *gf) -{ - struct gf_w16_logtable_data *ltd; - gf_internal_t *h = (gf_internal_t *) gf->scratch; - gf_t *base_gf = h->base_gf; - gf_val_32_t a, b; - uint64_t prim_poly = ((gf_internal_t *) base_gf->scratch)->prim_poly; - int i; - - ltd = (struct gf_w16_logtable_data *) h->private; - - ltd->log_tbl[0] = 0; - - bzero(&(ltd->_antilog_tbl[0]), sizeof(ltd->_antilog_tbl)); - - ltd->antilog_tbl = &(ltd->_antilog_tbl[GF_BASE_FIELD_SIZE * 2]); - - b = 1; - for (i = 0; i < GF_BASE_FIELD_GROUP_SIZE; i++) { - ltd->log_tbl[b] = (gf_val_16_t)i; - ltd->antilog_tbl[i] = (gf_val_16_t)b; - ltd->antilog_tbl[i+GF_BASE_FIELD_GROUP_SIZE] = (gf_val_16_t)b; - b <<= 1; - if (b & GF_BASE_FIELD_SIZE) { - b = b ^ prim_poly; - } - } - ltd->inv_tbl[0] = 0; /* Not really, but we need to fill it with something */ - ltd->inv_tbl[1] = 1; - for (i = 2; i < GF_BASE_FIELD_SIZE; i++) { - ltd->inv_tbl[i] = ltd->antilog_tbl[GF_BASE_FIELD_GROUP_SIZE-ltd->log_tbl[i]]; - } - - if (h->region_type & GF_REGION_ALTMAP) { - gf->multiply_region.w32 = gf_w32_composite_multiply_region_alt; - } else { - if (h->region_type & GF_REGION_SINGLE_TABLE) { - gf->multiply_region.w32 = gf_w32_composite_multiply_region_table; - } else { - gf->multiply_region.w32 = gf_w32_composite_multiply_region; - } - } - - gf->multiply.w32 = gf_w32_composite_multiply; - gf->divide.w32 = gf_w32_composite_divide; - gf->inverse.w32 = gf_w32_composite_inverse; - - return 1; -} - -int gf_w32_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2) -{ - int ss; - - ss = (GF_REGION_SSE | GF_REGION_NOSSE); - switch(mult_type) - { - case GF_MULT_SPLIT_TABLE: - if (arg1 == 8 && arg2 == 8){ - if (region_type != GF_REGION_DEFAULT) return -1; - return sizeof(gf_internal_t) + sizeof(struct gf_split_8_8_data) + 64; - } - if ((arg1 == 2 && arg2 == 32) || (arg2 == 2 && arg1 == 32)) { - region_type &= (~GF_REGION_LAZY); - if ((region_type & ss) == ss) return -1; - if ((region_type | ss) != ss) return -1; - return sizeof(gf_internal_t) + sizeof(struct gf_split_2_32_lazy_data) + 64; - } - if ((arg1 == 4 && arg2 == 32) || (arg2 == 4 && arg1 == 32)) { - region_type &= (~GF_REGION_LAZY); - if (region_type & GF_REGION_ALTMAP) { - region_type &= (~GF_REGION_ALTMAP); - if ((region_type & ss) == ss) return -1; - if ((region_type | ss) != ss) return -1; - return sizeof(gf_internal_t) + sizeof(struct gf_split_4_32_lazy_data) + 64; - } else return -1; - } - return -1; - case GF_MULT_DEFAULT: - case GF_MULT_SHIFT: - if (arg1 != 0 || arg2 != 0 || region_type != 0) return -1; - return sizeof(gf_internal_t); - break; - case GF_MULT_COMPOSITE: - if (region_type & ~(GF_REGION_SINGLE_TABLE | GF_REGION_ALTMAP | GF_REGION_STDMAP)) return -1; - if ((region_type & (GF_REGION_SINGLE_TABLE | GF_REGION_ALTMAP)) == (GF_REGION_SINGLE_TABLE | GF_REGION_ALTMAP)) return -1; - if (arg1 == 2 && arg2 == 16 || arg2 == 2 && arg1 == 16) { - return sizeof(gf_internal_t) + sizeof(struct gf_w16_logtable_data) + 64; - } else { - return -1; - } - default: - return -1; - } -} - -int gf_w32_init(gf_t *gf) -{ - gf_internal_t *h; - - h = (gf_internal_t *) gf->scratch; - if (h->prim_poly == 0) h->prim_poly = 0x400007; - - gf->multiply.w32 = NULL; - gf->divide.w32 = NULL; - gf->inverse.w32 = NULL; - gf->multiply_region.w32 = NULL; - - switch(h->mult_type) { - case GF_MULT_DEFAULT: - case GF_MULT_SHIFT: if (gf_w32_shift_init(gf) == 0) return 0; break; - case GF_MULT_COMPOSITE: if (gf_w32_composite_init(gf) == 0) return 0; break; - case GF_MULT_SPLIT_TABLE: if (gf_w32_split_init(gf) == 0) return 0; break; - default: return 0; - } - if (h->divide_type == GF_DIVIDE_EUCLID) { - gf->divide.w32 = gf_w32_divide_from_inverse; - gf->inverse.w32 = gf_w32_euclid; - } else if (h->divide_type == GF_DIVIDE_MATRIX) { - gf->divide.w32 = gf_w32_divide_from_inverse; - gf->inverse.w32 = gf_w32_matrix; - } - - if (gf->inverse.w32 != NULL && gf->divide.w32 == NULL) { - gf->divide.w32 = gf_w32_divide_from_inverse; - } - if (gf->inverse.w32 == NULL && gf->divide.w32 != NULL) { - gf->inverse.w32 = gf_w32_inverse_from_divide; - } - return 1; -} diff --git a/junk-w32-single-time.c b/junk-w32-single-time.c deleted file mode 100644 index ab406b7..0000000 --- a/junk-w32-single-time.c +++ /dev/null @@ -1,16 +0,0 @@ -echo "SHIFT" `gf_time 32 M 0 10240 10240 SHIFT - - | tail -n 1` -echo "GROUP 2 4" `gf_time 32 M 0 10240 10240 GROUP 2 4 - - | tail -n 1` -echo "GROUP 3 4" `gf_time 32 M 0 10240 10240 GROUP 3 4 - - | tail -n 1` -echo "GROUP 4 4" `gf_time 32 M 0 10240 10240 GROUP 4 4 - - | tail -n 1` -echo "GROUP 2 8" `gf_time 32 M 0 10240 10240 GROUP 2 8 - - | tail -n 1` -echo "GROUP 3 8" `gf_time 32 M 0 10240 10240 GROUP 3 8 - - | tail -n 1` -echo "GROUP 4 8" `gf_time 32 M 0 10240 10240 GROUP 4 8 - - | tail -n 1` -echo "GROUP 2 2" `gf_time 32 M 0 10240 10240 GROUP 2 2 - - | tail -n 1` -echo "GROUP 3 3" `gf_time 32 M 0 10240 10240 GROUP 3 3 - - | tail -n 1` -echo "BYTWO_p" `gf_time 32 M 0 10240 10240 BYTWO_p - - | tail -n 1` -echo "BYTWO_b" `gf_time 32 M 0 10240 10240 BYTWO_b - - | tail -n 1` -echo "SPLIT 32 2" `gf_time 32 M 0 10240 10240 SPLIT 32 2 - - | tail -n 1` -echo "SPLIT 32 4" `gf_time 32 M 0 10240 10240 SPLIT 32 4 - - | tail -n 1` -echo "SPLIT 32 8" `gf_time 32 M 0 10240 10240 SPLIT 32 8 - - | tail -n 1` -echo "SPLIT 8 8" `gf_time 32 M 0 10240 10240 SPLIT 8 8 - - | tail -n 1` -echo "COMPOSITE 2 16 -" `gf_time 32 M 0 10240 10240 COMPOSITE 2 16 - - - | tail -n 1` diff --git a/junk-w4-out.txt b/junk-w4-out.txt deleted file mode 100644 index 60efcdc..0000000 --- a/junk-w4-out.txt +++ /dev/null @@ -1,60 +0,0 @@ -Seed: 1345648646 -Buffer-Const,s!=d,xor=0: 1.005451 s 971.268 MB/s -Buffer-Const,s!=d,xor=1: 1.029715 s 948.382 MB/s -Buffer-Const,s==d,xor=0: 0.989556 s 986.869 MB/s -Buffer-Const,s==d,xor=1: 1.026105 s 951.718 MB/s -BYTWO_p -Seed: 1345648655 -Buffer-Const,s!=d,xor=0: 0.603574 s 1617.966 MB/s -Buffer-Const,s!=d,xor=1: 0.612757 s 1593.720 MB/s -Buffer-Const,s==d,xor=0: 0.599630 s 1628.609 MB/s -Buffer-Const,s==d,xor=1: 0.622749 s 1568.149 MB/s -BYTWO_p SSE -Seed: 1345648662 -Buffer-Const,s!=d,xor=0: 0.487348 s 2003.831 MB/s -Buffer-Const,s!=d,xor=1: 0.488745 s 1998.100 MB/s -Buffer-Const,s==d,xor=0: 0.470528 s 2075.463 MB/s -Buffer-Const,s==d,xor=1: 0.480067 s 2034.223 MB/s -BYTWO_b -Seed: 1345648669 -Buffer-Const,s!=d,xor=0: 0.359088 s 2719.564 MB/s -Buffer-Const,s!=d,xor=1: 0.365816 s 2669.543 MB/s -Buffer-Const,s==d,xor=0: 0.361701 s 2699.920 MB/s -Buffer-Const,s==d,xor=1: 0.354540 s 2754.449 MB/s -BYTWO_b SSE -Seed: 1345648689 -Buffer-Const,s!=d,xor=0: 2.036338 s 479.568 MB/s -Buffer-Const,s!=d,xor=1: 2.237701 s 436.413 MB/s -Buffer-Const,s==d,xor=0: 2.048971 s 476.611 MB/s -Buffer-Const,s==d,xor=1: 2.229312 s 438.056 MB/s -TABLE SINGLE -Seed: 1345648703 -Buffer-Const,s!=d,xor=0: 1.074082 s 909.207 MB/s -Buffer-Const,s!=d,xor=1: 1.083797 s 901.057 MB/s -Buffer-Const,s==d,xor=0: 1.077001 s 906.743 MB/s -Buffer-Const,s==d,xor=1: 1.079369 s 904.753 MB/s -TABLE DOUBLE -Seed: 1345648712 -Buffer-Const,s!=d,xor=0: 0.743830 s 1312.884 MB/s -Buffer-Const,s!=d,xor=1: 0.760719 s 1283.736 MB/s -Buffer-Const,s==d,xor=0: 0.708908 s 1377.559 MB/s -Buffer-Const,s==d,xor=1: 0.727896 s 1341.624 MB/s -TABLE QUAD -Seed: 1345648720 -Buffer-Const,s!=d,xor=0: 0.898810 s 1086.506 MB/s -Buffer-Const,s!=d,xor=1: 0.876269 s 1114.455 MB/s -Buffer-Const,s==d,xor=0: 0.872698 s 1119.015 MB/s -Buffer-Const,s==d,xor=1: 0.873175 s 1118.404 MB/s -TABLE QUAD,LAZY -Seed: 1345648729 -Buffer-Const,s!=d,xor=0: 0.143798 s 6791.205 MB/s -Buffer-Const,s!=d,xor=1: 0.151166 s 6460.201 MB/s -Buffer-Const,s==d,xor=0: 0.123824 s 7886.721 MB/s -Buffer-Const,s==d,xor=1: 0.123538 s 7904.940 MB/s -TABLE SINGLE,SSE -Seed: 1345648748 -Buffer-Const,s!=d,xor=0: 4.562493 s 214.041 MB/s -Buffer-Const,s!=d,xor=1: 5.116838 s 190.853 MB/s -Buffer-Const,s==d,xor=0: 4.533105 s 215.429 MB/s -Buffer-Const,s==d,xor=1: 5.053730 s 193.236 MB/s -LOG diff --git a/junk-w4-timing-out.txt b/junk-w4-timing-out.txt deleted file mode 100644 index 6daadd4..0000000 --- a/junk-w4-timing-out.txt +++ /dev/null @@ -1,792 +0,0 @@ -Seed: 1352748099 -Buffer-Const,s!=d,xor=0: 0.608121 s 210.484 MB/s -Buffer-Const,s!=d,xor=1: 0.692329 s 184.883 MB/s -1024 131072 4 LOG - - -Seed: 1352748102 -Buffer-Const,s!=d,xor=0: 0.699226 s 183.060 MB/s -Buffer-Const,s!=d,xor=1: 0.687310 s 186.233 MB/s -2048 65536 4 LOG - - -Seed: 1352748106 -Buffer-Const,s!=d,xor=0: 0.604397 s 211.781 MB/s -Buffer-Const,s!=d,xor=1: 0.682591 s 187.521 MB/s -4096 32768 4 LOG - - -Seed: 1352748109 -Buffer-Const,s!=d,xor=0: 0.602384 s 212.489 MB/s -Buffer-Const,s!=d,xor=1: 0.678849 s 188.555 MB/s -8192 16384 4 LOG - - -Seed: 1352748112 -Buffer-Const,s!=d,xor=0: 0.602103 s 212.588 MB/s -Buffer-Const,s!=d,xor=1: 0.688450 s 185.925 MB/s -16384 8192 4 LOG - - -Seed: 1352748115 -Buffer-Const,s!=d,xor=0: 0.598464 s 213.881 MB/s -Buffer-Const,s!=d,xor=1: 0.676076 s 189.328 MB/s -32768 4096 4 LOG - - -Seed: 1352748119 -Buffer-Const,s!=d,xor=0: 0.611499 s 209.322 MB/s -Buffer-Const,s!=d,xor=1: 0.693351 s 184.611 MB/s -65536 2048 4 LOG - - -Seed: 1352748122 -Buffer-Const,s!=d,xor=0: 0.609786 s 209.910 MB/s -Buffer-Const,s!=d,xor=1: 0.689794 s 185.563 MB/s -131072 1024 4 LOG - - -Seed: 1352748125 -Buffer-Const,s!=d,xor=0: 0.619027 s 206.776 MB/s -Buffer-Const,s!=d,xor=1: 0.703627 s 181.915 MB/s -262144 512 4 LOG - - -Seed: 1352748129 -Buffer-Const,s!=d,xor=0: 0.605785 s 211.296 MB/s -Buffer-Const,s!=d,xor=1: 0.696728 s 183.716 MB/s -524288 256 4 LOG - - -Seed: 1352748132 -Buffer-Const,s!=d,xor=0: 0.591555 s 216.379 MB/s -Buffer-Const,s!=d,xor=1: 0.666735 s 191.980 MB/s -1048576 128 4 LOG - - -Seed: 1352748135 -Buffer-Const,s!=d,xor=0: 0.623167 s 205.403 MB/s -Buffer-Const,s!=d,xor=1: 0.675010 s 189.627 MB/s -2097152 64 4 LOG - - -Seed: 1352748138 -Buffer-Const,s!=d,xor=0: 0.572467 s 223.594 MB/s -Buffer-Const,s!=d,xor=1: 0.733714 s 174.455 MB/s -4194304 32 4 LOG - - -Seed: 1352748142 -Buffer-Const,s!=d,xor=0: 0.617676 s 207.228 MB/s -Buffer-Const,s!=d,xor=1: 0.742744 s 172.334 MB/s -8388608 16 4 LOG - - -Seed: 1352748145 -Buffer-Const,s!=d,xor=0: 0.579833 s 220.753 MB/s -Buffer-Const,s!=d,xor=1: 0.736355 s 173.829 MB/s -16777216 8 4 LOG - - -Seed: 1352748148 -Buffer-Const,s!=d,xor=0: 0.682980 s 187.414 MB/s -Buffer-Const,s!=d,xor=1: 0.738846 s 173.243 MB/s -33554432 4 4 LOG - - -Seed: 1352748152 -Buffer-Const,s!=d,xor=0: 0.692141 s 184.933 MB/s -Buffer-Const,s!=d,xor=1: 0.725968 s 176.316 MB/s -67108864 2 4 LOG - - -Seed: 1352748155 -Buffer-Const,s!=d,xor=0: 0.737346 s 173.596 MB/s -Buffer-Const,s!=d,xor=1: 0.725769 s 176.365 MB/s -134217728 1 4 LOG - - -Seed: 1352748159 -Buffer-Const,s!=d,xor=0: 0.252694 s 506.541 MB/s -Buffer-Const,s!=d,xor=1: 0.280102 s 456.976 MB/s -1024 131072 4 TABLE SINGLE - -Seed: 1352748160 -Buffer-Const,s!=d,xor=0: 0.246866 s 518.501 MB/s -Buffer-Const,s!=d,xor=1: 0.276830 s 462.377 MB/s -2048 65536 4 TABLE SINGLE - -Seed: 1352748162 -Buffer-Const,s!=d,xor=0: 0.246874 s 518.482 MB/s -Buffer-Const,s!=d,xor=1: 0.274016 s 467.125 MB/s -4096 32768 4 TABLE SINGLE - -Seed: 1352748164 -Buffer-Const,s!=d,xor=0: 0.247869 s 516.402 MB/s -Buffer-Const,s!=d,xor=1: 0.271679 s 471.144 MB/s -8192 16384 4 TABLE SINGLE - -Seed: 1352748166 -Buffer-Const,s!=d,xor=0: 0.244581 s 523.345 MB/s -Buffer-Const,s!=d,xor=1: 0.270779 s 472.710 MB/s -16384 8192 4 TABLE SINGLE - -Seed: 1352748167 -Buffer-Const,s!=d,xor=0: 0.256167 s 499.675 MB/s -Buffer-Const,s!=d,xor=1: 0.278188 s 460.121 MB/s -32768 4096 4 TABLE SINGLE - -Seed: 1352748169 -Buffer-Const,s!=d,xor=0: 0.248786 s 514.498 MB/s -Buffer-Const,s!=d,xor=1: 0.305109 s 419.522 MB/s -65536 2048 4 TABLE SINGLE - -Seed: 1352748171 -Buffer-Const,s!=d,xor=0: 0.249003 s 514.050 MB/s -Buffer-Const,s!=d,xor=1: 0.276043 s 463.696 MB/s -131072 1024 4 TABLE SINGLE - -Seed: 1352748173 -Buffer-Const,s!=d,xor=0: 0.249019 s 514.016 MB/s -Buffer-Const,s!=d,xor=1: 0.278464 s 459.665 MB/s -262144 512 4 TABLE SINGLE - -Seed: 1352748174 -Buffer-Const,s!=d,xor=0: 0.257905 s 496.308 MB/s -Buffer-Const,s!=d,xor=1: 0.266241 s 480.767 MB/s -524288 256 4 TABLE SINGLE - -Seed: 1352748176 -Buffer-Const,s!=d,xor=0: 0.254655 s 502.641 MB/s -Buffer-Const,s!=d,xor=1: 0.267730 s 478.093 MB/s -1048576 128 4 TABLE SINGLE - -Seed: 1352748178 -Buffer-Const,s!=d,xor=0: 0.264532 s 483.874 MB/s -Buffer-Const,s!=d,xor=1: 0.270533 s 473.140 MB/s -2097152 64 4 TABLE SINGLE - -Seed: 1352748180 -Buffer-Const,s!=d,xor=0: 0.249658 s 512.702 MB/s -Buffer-Const,s!=d,xor=1: 0.265106 s 482.826 MB/s -4194304 32 4 TABLE SINGLE - -Seed: 1352748181 -Buffer-Const,s!=d,xor=0: 0.244030 s 524.527 MB/s -Buffer-Const,s!=d,xor=1: 0.301052 s 425.176 MB/s -8388608 16 4 TABLE SINGLE - -Seed: 1352748183 -Buffer-Const,s!=d,xor=0: 0.263009 s 486.676 MB/s -Buffer-Const,s!=d,xor=1: 0.270075 s 473.943 MB/s -16777216 8 4 TABLE SINGLE - -Seed: 1352748185 -Buffer-Const,s!=d,xor=0: 0.318133 s 402.348 MB/s -Buffer-Const,s!=d,xor=1: 0.315726 s 405.415 MB/s -33554432 4 4 TABLE SINGLE - -Seed: 1352748187 -Buffer-Const,s!=d,xor=0: 0.329082 s 388.961 MB/s -Buffer-Const,s!=d,xor=1: 0.303774 s 421.366 MB/s -67108864 2 4 TABLE SINGLE - -Seed: 1352748189 -Buffer-Const,s!=d,xor=0: 0.373282 s 342.904 MB/s -Buffer-Const,s!=d,xor=1: 0.299255 s 427.729 MB/s -134217728 1 4 TABLE SINGLE - -Seed: 1352748191 -Buffer-Const,s!=d,xor=0: 0.026432 s 4842.652 MB/s -Buffer-Const,s!=d,xor=1: 0.028027 s 4566.976 MB/s -1024 131072 4 TABLE SINGLE,SSE - -Seed: 1352748192 -Buffer-Const,s!=d,xor=0: 0.020923 s 6117.629 MB/s -Buffer-Const,s!=d,xor=1: 0.021753 s 5884.226 MB/s -2048 65536 4 TABLE SINGLE,SSE - -Seed: 1352748193 -Buffer-Const,s!=d,xor=0: 0.017533 s 7300.592 MB/s -Buffer-Const,s!=d,xor=1: 0.018308 s 6991.599 MB/s -4096 32768 4 TABLE SINGLE,SSE - -Seed: 1352748193 -Buffer-Const,s!=d,xor=0: 0.016224 s 7889.591 MB/s -Buffer-Const,s!=d,xor=1: 0.016537 s 7740.353 MB/s -8192 16384 4 TABLE SINGLE,SSE - -Seed: 1352748194 -Buffer-Const,s!=d,xor=0: 0.015627 s 8191.000 MB/s -Buffer-Const,s!=d,xor=1: 0.016160 s 7921.020 MB/s -16384 8192 4 TABLE SINGLE,SSE - -Seed: 1352748195 -Buffer-Const,s!=d,xor=0: 0.015679 s 8163.599 MB/s -Buffer-Const,s!=d,xor=1: 0.016548 s 7735.000 MB/s -32768 4096 4 TABLE SINGLE,SSE - -Seed: 1352748196 -Buffer-Const,s!=d,xor=0: 0.016351 s 7828.046 MB/s -Buffer-Const,s!=d,xor=1: 0.017147 s 7464.939 MB/s -65536 2048 4 TABLE SINGLE,SSE - -Seed: 1352748196 -Buffer-Const,s!=d,xor=0: 0.015204 s 8418.863 MB/s -Buffer-Const,s!=d,xor=1: 0.016621 s 7701.049 MB/s -131072 1024 4 TABLE SINGLE,SSE - -Seed: 1352748197 -Buffer-Const,s!=d,xor=0: 0.019366 s 6609.594 MB/s -Buffer-Const,s!=d,xor=1: 0.020611 s 6210.405 MB/s -262144 512 4 TABLE SINGLE,SSE - -Seed: 1352748198 -Buffer-Const,s!=d,xor=0: 0.019287 s 6636.721 MB/s -Buffer-Const,s!=d,xor=1: 0.020470 s 6253.155 MB/s -524288 256 4 TABLE SINGLE,SSE - -Seed: 1352748199 -Buffer-Const,s!=d,xor=0: 0.019210 s 6663.244 MB/s -Buffer-Const,s!=d,xor=1: 0.021175 s 6044.754 MB/s -1048576 128 4 TABLE SINGLE,SSE - -Seed: 1352748199 -Buffer-Const,s!=d,xor=0: 0.035533 s 3602.314 MB/s -Buffer-Const,s!=d,xor=1: 0.032351 s 3956.628 MB/s -2097152 64 4 TABLE SINGLE,SSE - -Seed: 1352748200 -Buffer-Const,s!=d,xor=0: 0.048733 s 2626.557 MB/s -Buffer-Const,s!=d,xor=1: 0.044163 s 2898.370 MB/s -4194304 32 4 TABLE SINGLE,SSE - -Seed: 1352748201 -Buffer-Const,s!=d,xor=0: 0.051737 s 2474.071 MB/s -Buffer-Const,s!=d,xor=1: 0.048826 s 2621.555 MB/s -8388608 16 4 TABLE SINGLE,SSE - -Seed: 1352748202 -Buffer-Const,s!=d,xor=0: 0.056330 s 2272.306 MB/s -Buffer-Const,s!=d,xor=1: 0.029557 s 4330.617 MB/s -16777216 8 4 TABLE SINGLE,SSE - -Seed: 1352748203 -Buffer-Const,s!=d,xor=0: 0.066551 s 1923.338 MB/s -Buffer-Const,s!=d,xor=1: 0.037378 s 3424.489 MB/s -33554432 4 4 TABLE SINGLE,SSE - -Seed: 1352748203 -Buffer-Const,s!=d,xor=0: 0.082171 s 1557.728 MB/s -Buffer-Const,s!=d,xor=1: 0.048228 s 2654.058 MB/s -67108864 2 4 TABLE SINGLE,SSE - -Seed: 1352748204 -Buffer-Const,s!=d,xor=0: 0.125187 s 1022.469 MB/s -Buffer-Const,s!=d,xor=1: 0.047497 s 2694.905 MB/s -134217728 1 4 TABLE SINGLE,SSE - -Seed: 1352748205 -Buffer-Const,s!=d,xor=0: 0.151542 s 844.651 MB/s -Buffer-Const,s!=d,xor=1: 0.153138 s 835.847 MB/s -1024 131072 4 TABLE DOUBLE - -Seed: 1352748207 -Buffer-Const,s!=d,xor=0: 0.146267 s 875.111 MB/s -Buffer-Const,s!=d,xor=1: 0.150025 s 853.189 MB/s -2048 65536 4 TABLE DOUBLE - -Seed: 1352748208 -Buffer-Const,s!=d,xor=0: 0.145038 s 882.529 MB/s -Buffer-Const,s!=d,xor=1: 0.146365 s 874.525 MB/s -4096 32768 4 TABLE DOUBLE - -Seed: 1352748209 -Buffer-Const,s!=d,xor=0: 0.142601 s 897.608 MB/s -Buffer-Const,s!=d,xor=1: 0.144650 s 884.893 MB/s -8192 16384 4 TABLE DOUBLE - -Seed: 1352748211 -Buffer-Const,s!=d,xor=0: 0.141861 s 902.293 MB/s -Buffer-Const,s!=d,xor=1: 0.142722 s 896.848 MB/s -16384 8192 4 TABLE DOUBLE - -Seed: 1352748212 -Buffer-Const,s!=d,xor=0: 0.140131 s 913.433 MB/s -Buffer-Const,s!=d,xor=1: 0.143035 s 894.888 MB/s -32768 4096 4 TABLE DOUBLE - -Seed: 1352748213 -Buffer-Const,s!=d,xor=0: 0.141368 s 905.436 MB/s -Buffer-Const,s!=d,xor=1: 0.142083 s 900.879 MB/s -65536 2048 4 TABLE DOUBLE - -Seed: 1352748214 -Buffer-Const,s!=d,xor=0: 0.144412 s 886.351 MB/s -Buffer-Const,s!=d,xor=1: 0.145837 s 877.693 MB/s -131072 1024 4 TABLE DOUBLE - -Seed: 1352748216 -Buffer-Const,s!=d,xor=0: 0.141466 s 904.810 MB/s -Buffer-Const,s!=d,xor=1: 0.146338 s 874.686 MB/s -262144 512 4 TABLE DOUBLE - -Seed: 1352748217 -Buffer-Const,s!=d,xor=0: 0.141775 s 902.837 MB/s -Buffer-Const,s!=d,xor=1: 0.143733 s 890.543 MB/s -524288 256 4 TABLE DOUBLE - -Seed: 1352748218 -Buffer-Const,s!=d,xor=0: 0.144309 s 886.984 MB/s -Buffer-Const,s!=d,xor=1: 0.145978 s 876.843 MB/s -1048576 128 4 TABLE DOUBLE - -Seed: 1352748219 -Buffer-Const,s!=d,xor=0: 0.145523 s 879.584 MB/s -Buffer-Const,s!=d,xor=1: 0.152104 s 841.530 MB/s -2097152 64 4 TABLE DOUBLE - -Seed: 1352748221 -Buffer-Const,s!=d,xor=0: 0.150421 s 850.944 MB/s -Buffer-Const,s!=d,xor=1: 0.154586 s 828.018 MB/s -4194304 32 4 TABLE DOUBLE - -Seed: 1352748222 -Buffer-Const,s!=d,xor=0: 0.151304 s 845.978 MB/s -Buffer-Const,s!=d,xor=1: 0.151530 s 844.720 MB/s -8388608 16 4 TABLE DOUBLE - -Seed: 1352748223 -Buffer-Const,s!=d,xor=0: 0.160126 s 799.369 MB/s -Buffer-Const,s!=d,xor=1: 0.151316 s 845.910 MB/s -16777216 8 4 TABLE DOUBLE - -Seed: 1352748224 -Buffer-Const,s!=d,xor=0: 0.167688 s 763.323 MB/s -Buffer-Const,s!=d,xor=1: 0.152321 s 840.331 MB/s -33554432 4 4 TABLE DOUBLE - -Seed: 1352748226 -Buffer-Const,s!=d,xor=0: 0.194515 s 658.047 MB/s -Buffer-Const,s!=d,xor=1: 0.149023 s 858.929 MB/s -67108864 2 4 TABLE DOUBLE - -Seed: 1352748227 -Buffer-Const,s!=d,xor=0: 0.237898 s 538.046 MB/s -Buffer-Const,s!=d,xor=1: 0.148526 s 861.802 MB/s -134217728 1 4 TABLE DOUBLE - -Seed: 1352748229 -Buffer-Const,s!=d,xor=0: 0.151483 s 844.979 MB/s -Buffer-Const,s!=d,xor=1: 0.153012 s 836.535 MB/s -1024 131072 4 TABLE DOUBLE - -Seed: 1352748230 -Buffer-Const,s!=d,xor=0: 0.146577 s 873.259 MB/s -Buffer-Const,s!=d,xor=1: 0.146274 s 875.070 MB/s -2048 65536 4 TABLE DOUBLE - -Seed: 1352748231 -Buffer-Const,s!=d,xor=0: 0.145069 s 882.341 MB/s -Buffer-Const,s!=d,xor=1: 0.143911 s 889.436 MB/s -4096 32768 4 TABLE DOUBLE - -Seed: 1352748233 -Buffer-Const,s!=d,xor=0: 0.143011 s 895.035 MB/s -Buffer-Const,s!=d,xor=1: 0.142096 s 900.798 MB/s -8192 16384 4 TABLE DOUBLE - -Seed: 1352748234 -Buffer-Const,s!=d,xor=0: 0.142743 s 896.719 MB/s -Buffer-Const,s!=d,xor=1: 0.142004 s 901.383 MB/s -16384 8192 4 TABLE DOUBLE - -Seed: 1352748235 -Buffer-Const,s!=d,xor=0: 0.141290 s 905.940 MB/s -Buffer-Const,s!=d,xor=1: 0.142891 s 895.785 MB/s -32768 4096 4 TABLE DOUBLE - -Seed: 1352748236 -Buffer-Const,s!=d,xor=0: 0.141509 s 904.534 MB/s -Buffer-Const,s!=d,xor=1: 0.142357 s 899.150 MB/s -65536 2048 4 TABLE DOUBLE - -Seed: 1352748237 -Buffer-Const,s!=d,xor=0: 0.141353 s 905.532 MB/s -Buffer-Const,s!=d,xor=1: 0.147224 s 869.422 MB/s -131072 1024 4 TABLE DOUBLE - -Seed: 1352748239 -Buffer-Const,s!=d,xor=0: 0.142758 s 896.623 MB/s -Buffer-Const,s!=d,xor=1: 0.144537 s 885.585 MB/s -262144 512 4 TABLE DOUBLE - -Seed: 1352748240 -Buffer-Const,s!=d,xor=0: 0.141772 s 902.858 MB/s -Buffer-Const,s!=d,xor=1: 0.145832 s 877.723 MB/s -524288 256 4 TABLE DOUBLE - -Seed: 1352748241 -Buffer-Const,s!=d,xor=0: 0.142111 s 900.705 MB/s -Buffer-Const,s!=d,xor=1: 0.143957 s 889.155 MB/s -1048576 128 4 TABLE DOUBLE - -Seed: 1352748242 -Buffer-Const,s!=d,xor=0: 0.144863 s 883.596 MB/s -Buffer-Const,s!=d,xor=1: 0.148948 s 859.359 MB/s -2097152 64 4 TABLE DOUBLE - -Seed: 1352748244 -Buffer-Const,s!=d,xor=0: 0.150453 s 850.766 MB/s -Buffer-Const,s!=d,xor=1: 0.151897 s 842.677 MB/s -4194304 32 4 TABLE DOUBLE - -Seed: 1352748245 -Buffer-Const,s!=d,xor=0: 0.152495 s 839.371 MB/s -Buffer-Const,s!=d,xor=1: 0.153424 s 834.289 MB/s -8388608 16 4 TABLE DOUBLE - -Seed: 1352748246 -Buffer-Const,s!=d,xor=0: 0.159227 s 803.886 MB/s -Buffer-Const,s!=d,xor=1: 0.151101 s 847.118 MB/s -16777216 8 4 TABLE DOUBLE - -Seed: 1352748248 -Buffer-Const,s!=d,xor=0: 0.167903 s 762.344 MB/s -Buffer-Const,s!=d,xor=1: 0.152000 s 842.106 MB/s -33554432 4 4 TABLE DOUBLE - -Seed: 1352748249 -Buffer-Const,s!=d,xor=0: 0.193370 s 661.943 MB/s -Buffer-Const,s!=d,xor=1: 0.153193 s 835.547 MB/s -67108864 2 4 TABLE DOUBLE - -Seed: 1352748250 -Buffer-Const,s!=d,xor=0: 0.241834 s 529.288 MB/s -Buffer-Const,s!=d,xor=1: 0.150811 s 848.745 MB/s -134217728 1 4 TABLE DOUBLE - -Seed: 1352748252 -Buffer-Const,s!=d,xor=0: 0.158047 s 809.887 MB/s -Buffer-Const,s!=d,xor=1: 0.156660 s 817.057 MB/s -1024 131072 4 TABLE QUAD - -Seed: 1352748253 -Buffer-Const,s!=d,xor=0: 0.141239 s 906.264 MB/s -Buffer-Const,s!=d,xor=1: 0.146382 s 874.422 MB/s -2048 65536 4 TABLE QUAD - -Seed: 1352748254 -Buffer-Const,s!=d,xor=0: 0.134986 s 948.245 MB/s -Buffer-Const,s!=d,xor=1: 0.140656 s 910.023 MB/s -4096 32768 4 TABLE QUAD - -Seed: 1352748256 -Buffer-Const,s!=d,xor=0: 0.153383 s 834.514 MB/s -Buffer-Const,s!=d,xor=1: 0.128968 s 992.498 MB/s -8192 16384 4 TABLE QUAD - -Seed: 1352748257 -Buffer-Const,s!=d,xor=0: 0.120985 s 1057.984 MB/s -Buffer-Const,s!=d,xor=1: 0.121486 s 1053.618 MB/s -16384 8192 4 TABLE QUAD - -Seed: 1352748258 -Buffer-Const,s!=d,xor=0: 0.113212 s 1130.626 MB/s -Buffer-Const,s!=d,xor=1: 0.116994 s 1094.076 MB/s -32768 4096 4 TABLE QUAD - -Seed: 1352748259 -Buffer-Const,s!=d,xor=0: 0.106910 s 1197.266 MB/s -Buffer-Const,s!=d,xor=1: 0.109951 s 1164.152 MB/s -65536 2048 4 TABLE QUAD - -Seed: 1352748260 -Buffer-Const,s!=d,xor=0: 0.106585 s 1200.916 MB/s -Buffer-Const,s!=d,xor=1: 0.119656 s 1069.735 MB/s -131072 1024 4 TABLE QUAD - -Seed: 1352748261 -Buffer-Const,s!=d,xor=0: 0.108813 s 1176.332 MB/s -Buffer-Const,s!=d,xor=1: 0.109021 s 1174.081 MB/s -262144 512 4 TABLE QUAD - -Seed: 1352748263 -Buffer-Const,s!=d,xor=0: 0.103341 s 1238.614 MB/s -Buffer-Const,s!=d,xor=1: 0.108952 s 1174.826 MB/s -524288 256 4 TABLE QUAD - -Seed: 1352748264 -Buffer-Const,s!=d,xor=0: 0.105469 s 1213.627 MB/s -Buffer-Const,s!=d,xor=1: 0.110848 s 1154.735 MB/s -1048576 128 4 TABLE QUAD - -Seed: 1352748265 -Buffer-Const,s!=d,xor=0: 0.105542 s 1212.785 MB/s -Buffer-Const,s!=d,xor=1: 0.108646 s 1178.134 MB/s -2097152 64 4 TABLE QUAD - -Seed: 1352748266 -Buffer-Const,s!=d,xor=0: 0.106677 s 1199.889 MB/s -Buffer-Const,s!=d,xor=1: 0.112022 s 1142.631 MB/s -4194304 32 4 TABLE QUAD - -Seed: 1352748267 -Buffer-Const,s!=d,xor=0: 0.110966 s 1153.507 MB/s -Buffer-Const,s!=d,xor=1: 0.100766 s 1270.264 MB/s -8388608 16 4 TABLE QUAD - -Seed: 1352748268 -Buffer-Const,s!=d,xor=0: 0.108207 s 1182.915 MB/s -Buffer-Const,s!=d,xor=1: 0.113488 s 1127.871 MB/s -16777216 8 4 TABLE QUAD - -Seed: 1352748269 -Buffer-Const,s!=d,xor=0: 0.129142 s 991.157 MB/s -Buffer-Const,s!=d,xor=1: 0.110923 s 1153.953 MB/s -33554432 4 4 TABLE QUAD - -Seed: 1352748270 -Buffer-Const,s!=d,xor=0: 0.156426 s 818.279 MB/s -Buffer-Const,s!=d,xor=1: 0.110093 s 1162.652 MB/s -67108864 2 4 TABLE QUAD - -Seed: 1352748272 -Buffer-Const,s!=d,xor=0: 0.203508 s 628.967 MB/s -Buffer-Const,s!=d,xor=1: 0.111907 s 1143.807 MB/s -134217728 1 4 TABLE QUAD - -Seed: 1352748273 -Buffer-Const,s!=d,xor=0: 8.741033 s 14.644 MB/s -Buffer-Const,s!=d,xor=1: 8.972750 s 14.265 MB/s -1024 131072 4 TABLE QUAD,LAZY - -Seed: 1352748309 -Buffer-Const,s!=d,xor=0: 4.387740 s 29.172 MB/s -Buffer-Const,s!=d,xor=1: 4.401799 s 29.079 MB/s -2048 65536 4 TABLE QUAD,LAZY - -Seed: 1352748327 -Buffer-Const,s!=d,xor=0: 2.255454 s 56.751 MB/s -Buffer-Const,s!=d,xor=1: 2.243299 s 57.059 MB/s -4096 32768 4 TABLE QUAD,LAZY - -Seed: 1352748337 -Buffer-Const,s!=d,xor=0: 1.166870 s 109.695 MB/s -Buffer-Const,s!=d,xor=1: 1.180004 s 108.474 MB/s -8192 16384 4 TABLE QUAD,LAZY - -Seed: 1352748342 -Buffer-Const,s!=d,xor=0: 0.661613 s 193.467 MB/s -Buffer-Const,s!=d,xor=1: 0.629827 s 203.230 MB/s -16384 8192 4 TABLE QUAD,LAZY - -Seed: 1352748345 -Buffer-Const,s!=d,xor=0: 0.364647 s 351.024 MB/s -Buffer-Const,s!=d,xor=1: 0.376395 s 340.069 MB/s -32768 4096 4 TABLE QUAD,LAZY - -Seed: 1352748348 -Buffer-Const,s!=d,xor=0: 0.226271 s 565.694 MB/s -Buffer-Const,s!=d,xor=1: 0.234560 s 545.704 MB/s -65536 2048 4 TABLE QUAD,LAZY - -Seed: 1352748349 -Buffer-Const,s!=d,xor=0: 0.160475 s 797.630 MB/s -Buffer-Const,s!=d,xor=1: 0.166329 s 769.561 MB/s -131072 1024 4 TABLE QUAD,LAZY - -Seed: 1352748351 -Buffer-Const,s!=d,xor=0: 0.130999 s 977.110 MB/s -Buffer-Const,s!=d,xor=1: 0.134676 s 950.431 MB/s -262144 512 4 TABLE QUAD,LAZY - -Seed: 1352748352 -Buffer-Const,s!=d,xor=0: 0.110626 s 1157.057 MB/s -Buffer-Const,s!=d,xor=1: 0.118067 s 1084.134 MB/s -524288 256 4 TABLE QUAD,LAZY - -Seed: 1352748353 -Buffer-Const,s!=d,xor=0: 0.105213 s 1216.581 MB/s -Buffer-Const,s!=d,xor=1: 0.109697 s 1166.854 MB/s -1048576 128 4 TABLE QUAD,LAZY - -Seed: 1352748354 -Buffer-Const,s!=d,xor=0: 0.107641 s 1189.138 MB/s -Buffer-Const,s!=d,xor=1: 0.108062 s 1184.502 MB/s -2097152 64 4 TABLE QUAD,LAZY - -Seed: 1352748355 -Buffer-Const,s!=d,xor=0: 0.103473 s 1237.035 MB/s -Buffer-Const,s!=d,xor=1: 0.098362 s 1301.310 MB/s -4194304 32 4 TABLE QUAD,LAZY - -Seed: 1352748356 -Buffer-Const,s!=d,xor=0: 0.107058 s 1195.616 MB/s -Buffer-Const,s!=d,xor=1: 0.097883 s 1307.687 MB/s -8388608 16 4 TABLE QUAD,LAZY - -Seed: 1352748357 -Buffer-Const,s!=d,xor=0: 0.116388 s 1099.769 MB/s -Buffer-Const,s!=d,xor=1: 0.098690 s 1296.990 MB/s -16777216 8 4 TABLE QUAD,LAZY - -Seed: 1352748358 -Buffer-Const,s!=d,xor=0: 0.129120 s 991.325 MB/s -Buffer-Const,s!=d,xor=1: 0.109833 s 1165.403 MB/s -33554432 4 4 TABLE QUAD,LAZY - -Seed: 1352748360 -Buffer-Const,s!=d,xor=0: 0.157534 s 812.524 MB/s -Buffer-Const,s!=d,xor=1: 0.114721 s 1115.750 MB/s -67108864 2 4 TABLE QUAD,LAZY - -Seed: 1352748361 -Buffer-Const,s!=d,xor=0: 0.205053 s 624.229 MB/s -Buffer-Const,s!=d,xor=1: 0.110099 s 1162.589 MB/s -134217728 1 4 TABLE QUAD,LAZY - -Seed: 1352748362 -Buffer-Const,s!=d,xor=0: 0.142388 s 898.955 MB/s -Buffer-Const,s!=d,xor=1: 0.146045 s 876.440 MB/s -1024 131072 4 BYTWO_p - - -Seed: 1352748363 -Buffer-Const,s!=d,xor=0: 0.135040 s 947.867 MB/s -Buffer-Const,s!=d,xor=1: 0.140142 s 913.360 MB/s -2048 65536 4 BYTWO_p - - -Seed: 1352748365 -Buffer-Const,s!=d,xor=0: 0.131358 s 974.437 MB/s -Buffer-Const,s!=d,xor=1: 0.137115 s 933.525 MB/s -4096 32768 4 BYTWO_p - - -Seed: 1352748366 -Buffer-Const,s!=d,xor=0: 0.129772 s 986.347 MB/s -Buffer-Const,s!=d,xor=1: 0.135098 s 947.462 MB/s -8192 16384 4 BYTWO_p - - -Seed: 1352748367 -Buffer-Const,s!=d,xor=0: 0.128670 s 994.795 MB/s -Buffer-Const,s!=d,xor=1: 0.133591 s 958.145 MB/s -16384 8192 4 BYTWO_p - - -Seed: 1352748368 -Buffer-Const,s!=d,xor=0: 0.130064 s 984.129 MB/s -Buffer-Const,s!=d,xor=1: 0.135170 s 946.959 MB/s -32768 4096 4 BYTWO_p - - -Seed: 1352748369 -Buffer-Const,s!=d,xor=0: 0.129942 s 985.052 MB/s -Buffer-Const,s!=d,xor=1: 0.134780 s 949.695 MB/s -65536 2048 4 BYTWO_p - - -Seed: 1352748371 -Buffer-Const,s!=d,xor=0: 0.130649 s 979.725 MB/s -Buffer-Const,s!=d,xor=1: 0.134556 s 951.280 MB/s -131072 1024 4 BYTWO_p - - -Seed: 1352748372 -Buffer-Const,s!=d,xor=0: 0.129390 s 989.255 MB/s -Buffer-Const,s!=d,xor=1: 0.134418 s 952.257 MB/s -262144 512 4 BYTWO_p - - -Seed: 1352748373 -Buffer-Const,s!=d,xor=0: 0.130153 s 983.455 MB/s -Buffer-Const,s!=d,xor=1: 0.137027 s 934.126 MB/s -524288 256 4 BYTWO_p - - -Seed: 1352748374 -Buffer-Const,s!=d,xor=0: 0.128065 s 999.493 MB/s -Buffer-Const,s!=d,xor=1: 0.136548 s 937.402 MB/s -1048576 128 4 BYTWO_p - - -Seed: 1352748375 -Buffer-Const,s!=d,xor=0: 0.137841 s 928.608 MB/s -Buffer-Const,s!=d,xor=1: 0.149983 s 853.428 MB/s -2097152 64 4 BYTWO_p - - -Seed: 1352748377 -Buffer-Const,s!=d,xor=0: 0.143009 s 895.049 MB/s -Buffer-Const,s!=d,xor=1: 0.151799 s 843.218 MB/s -4194304 32 4 BYTWO_p - - -Seed: 1352748378 -Buffer-Const,s!=d,xor=0: 0.148001 s 864.859 MB/s -Buffer-Const,s!=d,xor=1: 0.150979 s 847.802 MB/s -8388608 16 4 BYTWO_p - - -Seed: 1352748379 -Buffer-Const,s!=d,xor=0: 0.153637 s 833.133 MB/s -Buffer-Const,s!=d,xor=1: 0.133152 s 961.307 MB/s -16777216 8 4 BYTWO_p - - -Seed: 1352748380 -Buffer-Const,s!=d,xor=0: 0.164125 s 779.894 MB/s -Buffer-Const,s!=d,xor=1: 0.150620 s 849.821 MB/s -33554432 4 4 BYTWO_p - - -Seed: 1352748382 -Buffer-Const,s!=d,xor=0: 0.188526 s 678.952 MB/s -Buffer-Const,s!=d,xor=1: 0.153114 s 835.979 MB/s -67108864 2 4 BYTWO_p - - -Seed: 1352748383 -Buffer-Const,s!=d,xor=0: 0.235626 s 543.234 MB/s -Buffer-Const,s!=d,xor=1: 0.158839 s 805.847 MB/s -134217728 1 4 BYTWO_p - - -Seed: 1352748385 -Buffer-Const,s!=d,xor=0: 0.076323 s 1677.087 MB/s -Buffer-Const,s!=d,xor=1: 0.077654 s 1648.345 MB/s -1024 131072 4 BYTWO_b - - -Seed: 1352748386 -Buffer-Const,s!=d,xor=0: 0.068027 s 1881.605 MB/s -Buffer-Const,s!=d,xor=1: 0.070778 s 1808.462 MB/s -2048 65536 4 BYTWO_b - - -Seed: 1352748387 -Buffer-Const,s!=d,xor=0: 0.065722 s 1947.591 MB/s -Buffer-Const,s!=d,xor=1: 0.068535 s 1867.669 MB/s -4096 32768 4 BYTWO_b - - -Seed: 1352748388 -Buffer-Const,s!=d,xor=0: 0.063732 s 2008.398 MB/s -Buffer-Const,s!=d,xor=1: 0.066054 s 1937.805 MB/s -8192 16384 4 BYTWO_b - - -Seed: 1352748389 -Buffer-Const,s!=d,xor=0: 0.062660 s 2042.779 MB/s -Buffer-Const,s!=d,xor=1: 0.065213 s 1962.793 MB/s -16384 8192 4 BYTWO_b - - -Seed: 1352748390 -Buffer-Const,s!=d,xor=0: 0.062758 s 2039.566 MB/s -Buffer-Const,s!=d,xor=1: 0.066957 s 1911.668 MB/s -32768 4096 4 BYTWO_b - - -Seed: 1352748390 -Buffer-Const,s!=d,xor=0: 0.063058 s 2029.865 MB/s -Buffer-Const,s!=d,xor=1: 0.065829 s 1944.424 MB/s -65536 2048 4 BYTWO_b - - -Seed: 1352748391 -Buffer-Const,s!=d,xor=0: 0.065844 s 1943.994 MB/s -Buffer-Const,s!=d,xor=1: 0.065374 s 1957.968 MB/s -131072 1024 4 BYTWO_b - - -Seed: 1352748392 -Buffer-Const,s!=d,xor=0: 0.062168 s 2058.949 MB/s -Buffer-Const,s!=d,xor=1: 0.068710 s 1862.906 MB/s -262144 512 4 BYTWO_b - - -Seed: 1352748393 -Buffer-Const,s!=d,xor=0: 0.062623 s 2043.984 MB/s -Buffer-Const,s!=d,xor=1: 0.066550 s 1923.379 MB/s -524288 256 4 BYTWO_b - - -Seed: 1352748394 -Buffer-Const,s!=d,xor=0: 0.064571 s 1982.317 MB/s -Buffer-Const,s!=d,xor=1: 0.061325 s 2087.246 MB/s -1048576 128 4 BYTWO_b - - -Seed: 1352748395 -Buffer-Const,s!=d,xor=0: 0.070771 s 1808.657 MB/s -Buffer-Const,s!=d,xor=1: 0.072981 s 1753.878 MB/s -2097152 64 4 BYTWO_b - - -Seed: 1352748396 -Buffer-Const,s!=d,xor=0: 0.078018 s 1640.643 MB/s -Buffer-Const,s!=d,xor=1: 0.072307 s 1770.227 MB/s -4194304 32 4 BYTWO_b - - -Seed: 1352748397 -Buffer-Const,s!=d,xor=0: 0.079478 s 1610.508 MB/s -Buffer-Const,s!=d,xor=1: 0.073757 s 1735.424 MB/s -8388608 16 4 BYTWO_b - - -Seed: 1352748398 -Buffer-Const,s!=d,xor=0: 0.085826 s 1491.383 MB/s -Buffer-Const,s!=d,xor=1: 0.087615 s 1460.945 MB/s -16777216 8 4 BYTWO_b - - -Seed: 1352748399 -Buffer-Const,s!=d,xor=0: 0.081822 s 1564.373 MB/s -Buffer-Const,s!=d,xor=1: 0.083410 s 1534.583 MB/s -33554432 4 4 BYTWO_b - - -Seed: 1352748400 -Buffer-Const,s!=d,xor=0: 0.101873 s 1256.467 MB/s -Buffer-Const,s!=d,xor=1: 0.074412 s 1720.150 MB/s -67108864 2 4 BYTWO_b - - -Seed: 1352748401 -Buffer-Const,s!=d,xor=0: 0.188405 s 679.387 MB/s -Buffer-Const,s!=d,xor=1: 0.053904 s 2374.589 MB/s -134217728 1 4 BYTWO_b - - -Seed: 1352748403 -Buffer-Const,s!=d,xor=0: 0.092518 s 1383.520 MB/s -Buffer-Const,s!=d,xor=1: 0.097347 s 1314.877 MB/s -1024 131072 4 BYTWO_p SSE - -Seed: 1352748404 -Buffer-Const,s!=d,xor=0: 0.086226 s 1484.463 MB/s -Buffer-Const,s!=d,xor=1: 0.092092 s 1389.910 MB/s -2048 65536 4 BYTWO_p SSE - -Seed: 1352748405 -Buffer-Const,s!=d,xor=0: 0.082721 s 1547.370 MB/s -Buffer-Const,s!=d,xor=1: 0.088092 s 1453.025 MB/s -4096 32768 4 BYTWO_p SSE - -Seed: 1352748406 -Buffer-Const,s!=d,xor=0: 0.081612 s 1568.395 MB/s -Buffer-Const,s!=d,xor=1: 0.086144 s 1485.885 MB/s -8192 16384 4 BYTWO_p SSE - -Seed: 1352748407 -Buffer-Const,s!=d,xor=0: 0.080819 s 1583.783 MB/s -Buffer-Const,s!=d,xor=1: 0.085448 s 1497.982 MB/s -16384 8192 4 BYTWO_p SSE - -Seed: 1352748408 -Buffer-Const,s!=d,xor=0: 0.080971 s 1580.804 MB/s -Buffer-Const,s!=d,xor=1: 0.086504 s 1479.709 MB/s -32768 4096 4 BYTWO_p SSE - -Seed: 1352748409 -Buffer-Const,s!=d,xor=0: 0.080746 s 1585.214 MB/s -Buffer-Const,s!=d,xor=1: 0.085679 s 1493.943 MB/s -65536 2048 4 BYTWO_p SSE - -Seed: 1352748410 -Buffer-Const,s!=d,xor=0: 0.081038 s 1579.511 MB/s -Buffer-Const,s!=d,xor=1: 0.086381 s 1481.804 MB/s -131072 1024 4 BYTWO_p SSE - -Seed: 1352748411 -Buffer-Const,s!=d,xor=0: 0.079807 s 1603.873 MB/s -Buffer-Const,s!=d,xor=1: 0.085420 s 1498.484 MB/s -262144 512 4 BYTWO_p SSE - -Seed: 1352748412 -Buffer-Const,s!=d,xor=0: 0.080044 s 1599.115 MB/s -Buffer-Const,s!=d,xor=1: 0.083843 s 1526.654 MB/s -524288 256 4 BYTWO_p SSE - -Seed: 1352748413 -Buffer-Const,s!=d,xor=0: 0.082954 s 1543.016 MB/s -Buffer-Const,s!=d,xor=1: 0.086807 s 1474.535 MB/s -1048576 128 4 BYTWO_p SSE - -Seed: 1352748414 -Buffer-Const,s!=d,xor=0: 0.090553 s 1413.536 MB/s -Buffer-Const,s!=d,xor=1: 0.092115 s 1389.565 MB/s -2097152 64 4 BYTWO_p SSE - -Seed: 1352748415 -Buffer-Const,s!=d,xor=0: 0.087072 s 1470.054 MB/s -Buffer-Const,s!=d,xor=1: 0.093465 s 1369.492 MB/s -4194304 32 4 BYTWO_p SSE - -Seed: 1352748416 -Buffer-Const,s!=d,xor=0: 0.097724 s 1309.812 MB/s -Buffer-Const,s!=d,xor=1: 0.090922 s 1407.795 MB/s -8388608 16 4 BYTWO_p SSE - -Seed: 1352748417 -Buffer-Const,s!=d,xor=0: 0.104649 s 1223.136 MB/s -Buffer-Const,s!=d,xor=1: 0.084963 s 1506.532 MB/s -16777216 8 4 BYTWO_p SSE - -Seed: 1352748418 -Buffer-Const,s!=d,xor=0: 0.112079 s 1142.050 MB/s -Buffer-Const,s!=d,xor=1: 0.096727 s 1323.313 MB/s -33554432 4 4 BYTWO_p SSE - -Seed: 1352748419 -Buffer-Const,s!=d,xor=0: 0.136256 s 939.408 MB/s -Buffer-Const,s!=d,xor=1: 0.103244 s 1239.781 MB/s -67108864 2 4 BYTWO_p SSE - -Seed: 1352748420 -Buffer-Const,s!=d,xor=0: 0.181231 s 706.281 MB/s -Buffer-Const,s!=d,xor=1: 0.092887 s 1378.016 MB/s -134217728 1 4 BYTWO_p SSE - -Seed: 1352748422 -Buffer-Const,s!=d,xor=0: 0.107760 s 1187.825 MB/s -Buffer-Const,s!=d,xor=1: 0.065748 s 1946.828 MB/s -1024 131072 4 BYTWO_b SSE - -Seed: 1352748423 -Buffer-Const,s!=d,xor=0: 0.104705 s 1222.484 MB/s -Buffer-Const,s!=d,xor=1: 0.058541 s 2186.508 MB/s -2048 65536 4 BYTWO_b SSE - -Seed: 1352748424 -Buffer-Const,s!=d,xor=0: 0.098082 s 1305.026 MB/s -Buffer-Const,s!=d,xor=1: 0.053539 s 2390.768 MB/s -4096 32768 4 BYTWO_b SSE - -Seed: 1352748425 -Buffer-Const,s!=d,xor=0: 0.094147 s 1359.576 MB/s -Buffer-Const,s!=d,xor=1: 0.051867 s 2467.839 MB/s -8192 16384 4 BYTWO_b SSE - -Seed: 1352748426 -Buffer-Const,s!=d,xor=0: 0.092755 s 1379.975 MB/s -Buffer-Const,s!=d,xor=1: 0.049600 s 2580.651 MB/s -16384 8192 4 BYTWO_b SSE - -Seed: 1352748427 -Buffer-Const,s!=d,xor=0: 0.093161 s 1373.971 MB/s -Buffer-Const,s!=d,xor=1: 0.048734 s 2626.480 MB/s -32768 4096 4 BYTWO_b SSE - -Seed: 1352748428 -Buffer-Const,s!=d,xor=0: 0.092071 s 1390.227 MB/s -Buffer-Const,s!=d,xor=1: 0.048645 s 2631.282 MB/s -65536 2048 4 BYTWO_b SSE - -Seed: 1352748429 -Buffer-Const,s!=d,xor=0: 0.093282 s 1372.191 MB/s -Buffer-Const,s!=d,xor=1: 0.047374 s 2701.903 MB/s -131072 1024 4 BYTWO_b SSE - -Seed: 1352748430 -Buffer-Const,s!=d,xor=0: 0.094085 s 1360.479 MB/s -Buffer-Const,s!=d,xor=1: 0.050752 s 2522.072 MB/s -262144 512 4 BYTWO_b SSE - -Seed: 1352748431 -Buffer-Const,s!=d,xor=0: 0.099099 s 1291.639 MB/s -Buffer-Const,s!=d,xor=1: 0.046550 s 2749.729 MB/s -524288 256 4 BYTWO_b SSE - -Seed: 1352748431 -Buffer-Const,s!=d,xor=0: 0.093943 s 1362.530 MB/s -Buffer-Const,s!=d,xor=1: 0.050178 s 2550.940 MB/s -1048576 128 4 BYTWO_b SSE - -Seed: 1352748432 -Buffer-Const,s!=d,xor=0: 0.121096 s 1057.011 MB/s -Buffer-Const,s!=d,xor=1: 0.055513 s 2305.770 MB/s -2097152 64 4 BYTWO_b SSE - -Seed: 1352748433 -Buffer-Const,s!=d,xor=0: 0.109734 s 1166.456 MB/s -Buffer-Const,s!=d,xor=1: 0.057743 s 2216.716 MB/s -4194304 32 4 BYTWO_b SSE - -Seed: 1352748434 -Buffer-Const,s!=d,xor=0: 0.117161 s 1092.513 MB/s -Buffer-Const,s!=d,xor=1: 0.057568 s 2223.464 MB/s -8388608 16 4 BYTWO_b SSE - -Seed: 1352748436 -Buffer-Const,s!=d,xor=0: 0.102332 s 1250.832 MB/s -Buffer-Const,s!=d,xor=1: 0.061185 s 2092.004 MB/s -16777216 8 4 BYTWO_b SSE - -Seed: 1352748437 -Buffer-Const,s!=d,xor=0: 0.173641 s 737.153 MB/s -Buffer-Const,s!=d,xor=1: 0.054822 s 2334.830 MB/s -33554432 4 4 BYTWO_b SSE - -Seed: 1352748438 -Buffer-Const,s!=d,xor=0: 0.130181 s 983.246 MB/s -Buffer-Const,s!=d,xor=1: 0.051398 s 2490.367 MB/s -67108864 2 4 BYTWO_b SSE - -Seed: 1352748439 -Buffer-Const,s!=d,xor=0: 0.150805 s 848.778 MB/s -Buffer-Const,s!=d,xor=1: 0.000005 s 2330.524 MB/s -134217728 1 4 BYTWO_b SSE - diff --git a/junk-w4-timing-tests.sh b/junk-w4-timing-tests.sh deleted file mode 100644 index 8ed8c08..0000000 --- a/junk-w4-timing-tests.sh +++ /dev/null @@ -1,11 +0,0 @@ -sh tmp-time-test.sh 4 LOG - - -sh tmp-time-test.sh 4 TABLE SINGLE - -sh tmp-time-test.sh 4 TABLE SINGLE,SSE - -sh tmp-time-test.sh 4 TABLE DOUBLE - -sh tmp-time-test.sh 4 TABLE DOUBLE - -sh tmp-time-test.sh 4 TABLE QUAD - -sh tmp-time-test.sh 4 TABLE QUAD,LAZY - -sh tmp-time-test.sh 4 BYTWO_p - - -sh tmp-time-test.sh 4 BYTWO_b - - -sh tmp-time-test.sh 4 BYTWO_p SSE - -sh tmp-time-test.sh 4 BYTWO_b SSE - diff --git a/junk-w4-timing.jgr b/junk-w4-timing.jgr deleted file mode 100644 index 9123257..0000000 --- a/junk-w4-timing.jgr +++ /dev/null @@ -1,11 +0,0 @@ -newgraph -xaxis size 4 min 0 no_auto_hash_labels - hash_labels hjl vjc rotate -90 fontsize 11 - -shell : junk-pick-best-output < junk-w4-timing-out.txt | sort -nr | sed 's/.............//' | awk '{ print "hash_label at ", ++l, ":", $0 }' - -yaxis size 1 min 0 label : MB/s - -newcurve marktype xbar cfill 1 1 0 marksize 1 pts -shell : junk-pick-best-output < junk-w4-timing-out.txt | sort -nr | awk '{ print $1 }' | cat -n - diff --git a/junk-w4.jgr b/junk-w4.jgr deleted file mode 100644 index e4c4a82..0000000 --- a/junk-w4.jgr +++ /dev/null @@ -1,6 +0,0 @@ -newgraph -xaxis size 4 min 0 no_auto_hash_labels - hash_labels hjl vjc rotate -90 fontsize 11 -yaxis size 1 min 0 label : MB/s - -shell : awk -f junk-proc.awk < junk-w4-out.txt diff --git a/junk-w8-timing-out.txt b/junk-w8-timing-out.txt deleted file mode 100644 index cf542be..0000000 --- a/junk-w8-timing-out.txt +++ /dev/null @@ -1,936 +0,0 @@ -Seed: 1352746852 -Buffer-Const,s!=d,xor=0: 0.205907 s 621.640 MB/s -Buffer-Const,s!=d,xor=1: 0.252565 s 506.800 MB/s -1024 131072 8 LOG - - -Seed: 1352746854 -Buffer-Const,s!=d,xor=0: 0.206410 s 620.126 MB/s -Buffer-Const,s!=d,xor=1: 0.251469 s 509.008 MB/s -2048 65536 8 LOG - - -Seed: 1352746856 -Buffer-Const,s!=d,xor=0: 0.209941 s 609.695 MB/s -Buffer-Const,s!=d,xor=1: 0.255838 s 500.316 MB/s -4096 32768 8 LOG - - -Seed: 1352746857 -Buffer-Const,s!=d,xor=0: 0.206109 s 621.030 MB/s -Buffer-Const,s!=d,xor=1: 0.262056 s 488.445 MB/s -8192 16384 8 LOG - - -Seed: 1352746859 -Buffer-Const,s!=d,xor=0: 0.201892 s 634.001 MB/s -Buffer-Const,s!=d,xor=1: 0.250816 s 510.335 MB/s -16384 8192 8 LOG - - -Seed: 1352746860 -Buffer-Const,s!=d,xor=0: 0.201995 s 633.679 MB/s -Buffer-Const,s!=d,xor=1: 0.254832 s 502.292 MB/s -32768 4096 8 LOG - - -Seed: 1352746862 -Buffer-Const,s!=d,xor=0: 0.203099 s 630.236 MB/s -Buffer-Const,s!=d,xor=1: 0.255779 s 500.431 MB/s -65536 2048 8 LOG - - -Seed: 1352746864 -Buffer-Const,s!=d,xor=0: 0.200691 s 637.796 MB/s -Buffer-Const,s!=d,xor=1: 0.256675 s 498.685 MB/s -131072 1024 8 LOG - - -Seed: 1352746865 -Buffer-Const,s!=d,xor=0: 0.201240 s 636.057 MB/s -Buffer-Const,s!=d,xor=1: 0.255231 s 501.506 MB/s -262144 512 8 LOG - - -Seed: 1352746867 -Buffer-Const,s!=d,xor=0: 0.202006 s 633.645 MB/s -Buffer-Const,s!=d,xor=1: 0.251845 s 508.250 MB/s -524288 256 8 LOG - - -Seed: 1352746868 -Buffer-Const,s!=d,xor=0: 0.203552 s 628.830 MB/s -Buffer-Const,s!=d,xor=1: 0.255775 s 500.440 MB/s -1048576 128 8 LOG - - -Seed: 1352746870 -Buffer-Const,s!=d,xor=0: 0.206480 s 619.915 MB/s -Buffer-Const,s!=d,xor=1: 0.256771 s 498.498 MB/s -2097152 64 8 LOG - - -Seed: 1352746872 -Buffer-Const,s!=d,xor=0: 0.210690 s 607.528 MB/s -Buffer-Const,s!=d,xor=1: 0.260851 s 490.701 MB/s -4194304 32 8 LOG - - -Seed: 1352746873 -Buffer-Const,s!=d,xor=0: 0.212292 s 602.944 MB/s -Buffer-Const,s!=d,xor=1: 0.263464 s 485.834 MB/s -8388608 16 8 LOG - - -Seed: 1352746875 -Buffer-Const,s!=d,xor=0: 0.217703 s 587.957 MB/s -Buffer-Const,s!=d,xor=1: 0.260255 s 491.826 MB/s -16777216 8 8 LOG - - -Seed: 1352746876 -Buffer-Const,s!=d,xor=0: 0.229996 s 556.531 MB/s -Buffer-Const,s!=d,xor=1: 0.268077 s 477.475 MB/s -33554432 4 8 LOG - - -Seed: 1352746878 -Buffer-Const,s!=d,xor=0: 0.255076 s 501.811 MB/s -Buffer-Const,s!=d,xor=1: 0.268757 s 476.266 MB/s -67108864 2 8 LOG - - -Seed: 1352746880 -Buffer-Const,s!=d,xor=0: 0.299095 s 427.958 MB/s -Buffer-Const,s!=d,xor=1: 0.271954 s 470.668 MB/s -134217728 1 8 LOG - - -Seed: 1352746882 -Buffer-Const,s!=d,xor=0: 0.198089 s 646.175 MB/s -Buffer-Const,s!=d,xor=1: 0.199934 s 640.212 MB/s -1024 131072 8 LOG_ZERO - - -Seed: 1352746883 -Buffer-Const,s!=d,xor=0: 0.191693 s 667.733 MB/s -Buffer-Const,s!=d,xor=1: 0.195976 s 653.142 MB/s -2048 65536 8 LOG_ZERO - - -Seed: 1352746885 -Buffer-Const,s!=d,xor=0: 0.190896 s 670.524 MB/s -Buffer-Const,s!=d,xor=1: 0.194985 s 656.459 MB/s -4096 32768 8 LOG_ZERO - - -Seed: 1352746886 -Buffer-Const,s!=d,xor=0: 0.190779 s 670.933 MB/s -Buffer-Const,s!=d,xor=1: 0.195833 s 653.617 MB/s -8192 16384 8 LOG_ZERO - - -Seed: 1352746887 -Buffer-Const,s!=d,xor=0: 0.188468 s 679.159 MB/s -Buffer-Const,s!=d,xor=1: 0.192885 s 663.608 MB/s -16384 8192 8 LOG_ZERO - - -Seed: 1352746889 -Buffer-Const,s!=d,xor=0: 0.187547 s 682.497 MB/s -Buffer-Const,s!=d,xor=1: 0.193131 s 662.763 MB/s -32768 4096 8 LOG_ZERO - - -Seed: 1352746890 -Buffer-Const,s!=d,xor=0: 0.185810 s 688.875 MB/s -Buffer-Const,s!=d,xor=1: 0.192531 s 664.829 MB/s -65536 2048 8 LOG_ZERO - - -Seed: 1352746892 -Buffer-Const,s!=d,xor=0: 0.186486 s 686.379 MB/s -Buffer-Const,s!=d,xor=1: 0.192416 s 665.226 MB/s -131072 1024 8 LOG_ZERO - - -Seed: 1352746893 -Buffer-Const,s!=d,xor=0: 0.187854 s 681.379 MB/s -Buffer-Const,s!=d,xor=1: 0.193211 s 662.488 MB/s -262144 512 8 LOG_ZERO - - -Seed: 1352746895 -Buffer-Const,s!=d,xor=0: 0.186622 s 685.880 MB/s -Buffer-Const,s!=d,xor=1: 0.193951 s 659.961 MB/s -524288 256 8 LOG_ZERO - - -Seed: 1352746896 -Buffer-Const,s!=d,xor=0: 0.193502 s 661.492 MB/s -Buffer-Const,s!=d,xor=1: 0.194600 s 657.760 MB/s -1048576 128 8 LOG_ZERO - - -Seed: 1352746897 -Buffer-Const,s!=d,xor=0: 0.191789 s 667.400 MB/s -Buffer-Const,s!=d,xor=1: 0.206557 s 619.683 MB/s -2097152 64 8 LOG_ZERO - - -Seed: 1352746899 -Buffer-Const,s!=d,xor=0: 0.216762 s 590.509 MB/s -Buffer-Const,s!=d,xor=1: 0.220943 s 579.334 MB/s -4194304 32 8 LOG_ZERO - - -Seed: 1352746901 -Buffer-Const,s!=d,xor=0: 0.212998 s 600.944 MB/s -Buffer-Const,s!=d,xor=1: 0.229660 s 557.346 MB/s -8388608 16 8 LOG_ZERO - - -Seed: 1352746902 -Buffer-Const,s!=d,xor=0: 0.225217 s 568.340 MB/s -Buffer-Const,s!=d,xor=1: 0.208174 s 614.871 MB/s -16777216 8 8 LOG_ZERO - - -Seed: 1352746904 -Buffer-Const,s!=d,xor=0: 0.215686 s 593.456 MB/s -Buffer-Const,s!=d,xor=1: 0.204155 s 626.975 MB/s -33554432 4 8 LOG_ZERO - - -Seed: 1352746905 -Buffer-Const,s!=d,xor=0: 0.250863 s 510.239 MB/s -Buffer-Const,s!=d,xor=1: 0.200680 s 637.832 MB/s -67108864 2 8 LOG_ZERO - - -Seed: 1352746907 -Buffer-Const,s!=d,xor=0: 0.285895 s 447.717 MB/s -Buffer-Const,s!=d,xor=1: 0.201105 s 636.484 MB/s -134217728 1 8 LOG_ZERO - - -Seed: 1352746909 -Buffer-Const,s!=d,xor=0: 0.154129 s 830.473 MB/s -Buffer-Const,s!=d,xor=1: 0.200737 s 637.650 MB/s -1024 131072 8 TABLE - - -Seed: 1352746910 -Buffer-Const,s!=d,xor=0: 0.150785 s 848.888 MB/s -Buffer-Const,s!=d,xor=1: 0.199187 s 642.614 MB/s -2048 65536 8 TABLE - - -Seed: 1352746911 -Buffer-Const,s!=d,xor=0: 0.149158 s 858.153 MB/s -Buffer-Const,s!=d,xor=1: 0.196224 s 652.316 MB/s -4096 32768 8 TABLE - - -Seed: 1352746913 -Buffer-Const,s!=d,xor=0: 0.147988 s 864.936 MB/s -Buffer-Const,s!=d,xor=1: 0.195025 s 656.325 MB/s -8192 16384 8 TABLE - - -Seed: 1352746914 -Buffer-Const,s!=d,xor=0: 0.146994 s 870.786 MB/s -Buffer-Const,s!=d,xor=1: 0.193489 s 661.536 MB/s -16384 8192 8 TABLE - - -Seed: 1352746915 -Buffer-Const,s!=d,xor=0: 0.151192 s 846.606 MB/s -Buffer-Const,s!=d,xor=1: 0.196197 s 652.405 MB/s -32768 4096 8 TABLE - - -Seed: 1352746917 -Buffer-Const,s!=d,xor=0: 0.149436 s 856.553 MB/s -Buffer-Const,s!=d,xor=1: 0.194907 s 656.724 MB/s -65536 2048 8 TABLE - - -Seed: 1352746918 -Buffer-Const,s!=d,xor=0: 0.150252 s 851.900 MB/s -Buffer-Const,s!=d,xor=1: 0.196657 s 650.878 MB/s -131072 1024 8 TABLE - - -Seed: 1352746920 -Buffer-Const,s!=d,xor=0: 0.152423 s 839.767 MB/s -Buffer-Const,s!=d,xor=1: 0.196896 s 650.090 MB/s -262144 512 8 TABLE - - -Seed: 1352746921 -Buffer-Const,s!=d,xor=0: 0.149577 s 855.748 MB/s -Buffer-Const,s!=d,xor=1: 0.196668 s 650.843 MB/s -524288 256 8 TABLE - - -Seed: 1352746922 -Buffer-Const,s!=d,xor=0: 0.151604 s 844.307 MB/s -Buffer-Const,s!=d,xor=1: 0.198012 s 646.425 MB/s -1048576 128 8 TABLE - - -Seed: 1352746924 -Buffer-Const,s!=d,xor=0: 0.155570 s 822.779 MB/s -Buffer-Const,s!=d,xor=1: 0.195111 s 656.036 MB/s -2097152 64 8 TABLE - - -Seed: 1352746925 -Buffer-Const,s!=d,xor=0: 0.159052 s 804.766 MB/s -Buffer-Const,s!=d,xor=1: 0.204684 s 625.353 MB/s -4194304 32 8 TABLE - - -Seed: 1352746926 -Buffer-Const,s!=d,xor=0: 0.163852 s 781.193 MB/s -Buffer-Const,s!=d,xor=1: 0.204403 s 626.215 MB/s -8388608 16 8 TABLE - - -Seed: 1352746928 -Buffer-Const,s!=d,xor=0: 0.174190 s 734.832 MB/s -Buffer-Const,s!=d,xor=1: 0.202681 s 631.535 MB/s -16777216 8 8 TABLE - - -Seed: 1352746929 -Buffer-Const,s!=d,xor=0: 0.184380 s 694.218 MB/s -Buffer-Const,s!=d,xor=1: 0.204282 s 626.585 MB/s -33554432 4 8 TABLE - - -Seed: 1352746931 -Buffer-Const,s!=d,xor=0: 0.204508 s 625.892 MB/s -Buffer-Const,s!=d,xor=1: 0.207667 s 616.371 MB/s -67108864 2 8 TABLE - - -Seed: 1352746932 -Buffer-Const,s!=d,xor=0: 0.252662 s 506.606 MB/s -Buffer-Const,s!=d,xor=1: 0.208596 s 613.626 MB/s -134217728 1 8 TABLE - - -Seed: 1352746934 -Buffer-Const,s!=d,xor=0: 0.870799 s 146.991 MB/s -Buffer-Const,s!=d,xor=1: 0.888333 s 144.090 MB/s -1024 131072 8 TABLE DOUBLE - -Seed: 1352746938 -Buffer-Const,s!=d,xor=0: 0.808797 s 158.260 MB/s -Buffer-Const,s!=d,xor=1: 0.812444 s 157.549 MB/s -2048 65536 8 TABLE DOUBLE - -Seed: 1352746942 -Buffer-Const,s!=d,xor=0: 0.724551 s 176.661 MB/s -Buffer-Const,s!=d,xor=1: 0.733140 s 174.591 MB/s -4096 32768 8 TABLE DOUBLE - -Seed: 1352746946 -Buffer-Const,s!=d,xor=0: 0.622008 s 205.785 MB/s -Buffer-Const,s!=d,xor=1: 0.636914 s 200.969 MB/s -8192 16384 8 TABLE DOUBLE - -Seed: 1352746949 -Buffer-Const,s!=d,xor=0: 0.454528 s 281.611 MB/s -Buffer-Const,s!=d,xor=1: 0.467266 s 273.934 MB/s -16384 8192 8 TABLE DOUBLE - -Seed: 1352746952 -Buffer-Const,s!=d,xor=0: 0.285370 s 448.541 MB/s -Buffer-Const,s!=d,xor=1: 0.292051 s 438.279 MB/s -32768 4096 8 TABLE DOUBLE - -Seed: 1352746954 -Buffer-Const,s!=d,xor=0: 0.193707 s 660.791 MB/s -Buffer-Const,s!=d,xor=1: 0.202114 s 633.307 MB/s -65536 2048 8 TABLE DOUBLE - -Seed: 1352746955 -Buffer-Const,s!=d,xor=0: 0.147023 s 870.614 MB/s -Buffer-Const,s!=d,xor=1: 0.151774 s 843.360 MB/s -131072 1024 8 TABLE DOUBLE - -Seed: 1352746957 -Buffer-Const,s!=d,xor=0: 0.127245 s 1005.930 MB/s -Buffer-Const,s!=d,xor=1: 0.130981 s 977.243 MB/s -262144 512 8 TABLE DOUBLE - -Seed: 1352746958 -Buffer-Const,s!=d,xor=0: 0.112772 s 1135.034 MB/s -Buffer-Const,s!=d,xor=1: 0.117758 s 1086.972 MB/s -524288 256 8 TABLE DOUBLE - -Seed: 1352746959 -Buffer-Const,s!=d,xor=0: 0.106724 s 1199.355 MB/s -Buffer-Const,s!=d,xor=1: 0.110677 s 1156.521 MB/s -1048576 128 8 TABLE DOUBLE - -Seed: 1352746960 -Buffer-Const,s!=d,xor=0: 0.109126 s 1172.960 MB/s -Buffer-Const,s!=d,xor=1: 0.115353 s 1109.641 MB/s -2097152 64 8 TABLE DOUBLE - -Seed: 1352746962 -Buffer-Const,s!=d,xor=0: 0.111492 s 1148.063 MB/s -Buffer-Const,s!=d,xor=1: 0.114936 s 1113.660 MB/s -4194304 32 8 TABLE DOUBLE - -Seed: 1352746963 -Buffer-Const,s!=d,xor=0: 0.114727 s 1115.694 MB/s -Buffer-Const,s!=d,xor=1: 0.112702 s 1135.740 MB/s -8388608 16 8 TABLE DOUBLE - -Seed: 1352746964 -Buffer-Const,s!=d,xor=0: 0.122290 s 1046.691 MB/s -Buffer-Const,s!=d,xor=1: 0.112557 s 1137.205 MB/s -16777216 8 8 TABLE DOUBLE - -Seed: 1352746965 -Buffer-Const,s!=d,xor=0: 0.130774 s 978.789 MB/s -Buffer-Const,s!=d,xor=1: 0.115443 s 1108.772 MB/s -33554432 4 8 TABLE DOUBLE - -Seed: 1352746966 -Buffer-Const,s!=d,xor=0: 0.152678 s 838.367 MB/s -Buffer-Const,s!=d,xor=1: 0.112051 s 1142.337 MB/s -67108864 2 8 TABLE DOUBLE - -Seed: 1352746968 -Buffer-Const,s!=d,xor=0: 0.199972 s 640.090 MB/s -Buffer-Const,s!=d,xor=1: 0.111309 s 1149.951 MB/s -134217728 1 8 TABLE DOUBLE - -Seed: 1352746969 -Buffer-Const,s!=d,xor=0: 12.353054 s 10.362 MB/s -Buffer-Const,s!=d,xor=1: 12.311798 s 10.397 MB/s -1024 131072 8 TABLE DOUBLE,LAZY - -Seed: 1352747019 -Buffer-Const,s!=d,xor=0: 6.245450 s 20.495 MB/s -Buffer-Const,s!=d,xor=1: 6.251623 s 20.475 MB/s -2048 65536 8 TABLE DOUBLE,LAZY - -Seed: 1352747045 -Buffer-Const,s!=d,xor=0: 3.157618 s 40.537 MB/s -Buffer-Const,s!=d,xor=1: 3.147050 s 40.673 MB/s -4096 32768 8 TABLE DOUBLE,LAZY - -Seed: 1352747058 -Buffer-Const,s!=d,xor=0: 1.631175 s 78.471 MB/s -Buffer-Const,s!=d,xor=1: 1.657020 s 77.247 MB/s -8192 16384 8 TABLE DOUBLE,LAZY - -Seed: 1352747065 -Buffer-Const,s!=d,xor=0: 0.860207 s 148.801 MB/s -Buffer-Const,s!=d,xor=1: 0.874988 s 146.288 MB/s -16384 8192 8 TABLE DOUBLE,LAZY - -Seed: 1352747069 -Buffer-Const,s!=d,xor=0: 0.478988 s 267.230 MB/s -Buffer-Const,s!=d,xor=1: 0.485077 s 263.876 MB/s -32768 4096 8 TABLE DOUBLE,LAZY - -Seed: 1352747072 -Buffer-Const,s!=d,xor=0: 0.291041 s 439.800 MB/s -Buffer-Const,s!=d,xor=1: 0.294611 s 434.472 MB/s -65536 2048 8 TABLE DOUBLE,LAZY - -Seed: 1352747074 -Buffer-Const,s!=d,xor=0: 0.195826 s 653.643 MB/s -Buffer-Const,s!=d,xor=1: 0.201743 s 634.472 MB/s -131072 1024 8 TABLE DOUBLE,LAZY - -Seed: 1352747075 -Buffer-Const,s!=d,xor=0: 0.148775 s 860.359 MB/s -Buffer-Const,s!=d,xor=1: 0.153898 s 831.717 MB/s -262144 512 8 TABLE DOUBLE,LAZY - -Seed: 1352747077 -Buffer-Const,s!=d,xor=0: 0.128037 s 999.707 MB/s -Buffer-Const,s!=d,xor=1: 0.130179 s 983.260 MB/s -524288 256 8 TABLE DOUBLE,LAZY - -Seed: 1352747078 -Buffer-Const,s!=d,xor=0: 0.112728 s 1135.473 MB/s -Buffer-Const,s!=d,xor=1: 0.119275 s 1073.152 MB/s -1048576 128 8 TABLE DOUBLE,LAZY - -Seed: 1352747079 -Buffer-Const,s!=d,xor=0: 0.113098 s 1131.763 MB/s -Buffer-Const,s!=d,xor=1: 0.117425 s 1090.056 MB/s -2097152 64 8 TABLE DOUBLE,LAZY - -Seed: 1352747080 -Buffer-Const,s!=d,xor=0: 0.113271 s 1130.033 MB/s -Buffer-Const,s!=d,xor=1: 0.116355 s 1100.082 MB/s -4194304 32 8 TABLE DOUBLE,LAZY - -Seed: 1352747081 -Buffer-Const,s!=d,xor=0: 0.109173 s 1172.448 MB/s -Buffer-Const,s!=d,xor=1: 0.114466 s 1118.239 MB/s -8388608 16 8 TABLE DOUBLE,LAZY - -Seed: 1352747082 -Buffer-Const,s!=d,xor=0: 0.120238 s 1064.555 MB/s -Buffer-Const,s!=d,xor=1: 0.113906 s 1123.737 MB/s -16777216 8 8 TABLE DOUBLE,LAZY - -Seed: 1352747084 -Buffer-Const,s!=d,xor=0: 0.127838 s 1001.266 MB/s -Buffer-Const,s!=d,xor=1: 0.112099 s 1141.846 MB/s -33554432 4 8 TABLE DOUBLE,LAZY - -Seed: 1352747085 -Buffer-Const,s!=d,xor=0: 0.154731 s 827.243 MB/s -Buffer-Const,s!=d,xor=1: 0.111025 s 1152.893 MB/s -67108864 2 8 TABLE DOUBLE,LAZY - -Seed: 1352747086 -Buffer-Const,s!=d,xor=0: 0.202618 s 631.730 MB/s -Buffer-Const,s!=d,xor=1: 0.110840 s 1154.819 MB/s -134217728 1 8 TABLE DOUBLE,LAZY - -Seed: 1352747087 -Buffer-Const,s!=d,xor=0: 0.400666 s 319.468 MB/s -Buffer-Const,s!=d,xor=1: 0.408545 s 313.307 MB/s -1024 131072 8 BYTWO_p - - -Seed: 1352747090 -Buffer-Const,s!=d,xor=0: 0.393822 s 325.020 MB/s -Buffer-Const,s!=d,xor=1: 0.400213 s 319.829 MB/s -2048 65536 8 BYTWO_p - - -Seed: 1352747092 -Buffer-Const,s!=d,xor=0: 0.388415 s 329.545 MB/s -Buffer-Const,s!=d,xor=1: 0.396545 s 322.788 MB/s -4096 32768 8 BYTWO_p - - -Seed: 1352747094 -Buffer-Const,s!=d,xor=0: 0.389005 s 329.044 MB/s -Buffer-Const,s!=d,xor=1: 0.395450 s 323.682 MB/s -8192 16384 8 BYTWO_p - - -Seed: 1352747096 -Buffer-Const,s!=d,xor=0: 0.385698 s 331.866 MB/s -Buffer-Const,s!=d,xor=1: 0.395319 s 323.789 MB/s -16384 8192 8 BYTWO_p - - -Seed: 1352747099 -Buffer-Const,s!=d,xor=0: 0.385273 s 332.232 MB/s -Buffer-Const,s!=d,xor=1: 0.396203 s 323.067 MB/s -32768 4096 8 BYTWO_p - - -Seed: 1352747101 -Buffer-Const,s!=d,xor=0: 0.387427 s 330.385 MB/s -Buffer-Const,s!=d,xor=1: 0.394610 s 324.371 MB/s -65536 2048 8 BYTWO_p - - -Seed: 1352747103 -Buffer-Const,s!=d,xor=0: 0.389866 s 328.318 MB/s -Buffer-Const,s!=d,xor=1: 0.398012 s 321.598 MB/s -131072 1024 8 BYTWO_p - - -Seed: 1352747105 -Buffer-Const,s!=d,xor=0: 0.389453 s 328.666 MB/s -Buffer-Const,s!=d,xor=1: 0.397982 s 321.622 MB/s -262144 512 8 BYTWO_p - - -Seed: 1352747108 -Buffer-Const,s!=d,xor=0: 0.388304 s 329.638 MB/s -Buffer-Const,s!=d,xor=1: 0.399512 s 320.391 MB/s -524288 256 8 BYTWO_p - - -Seed: 1352747110 -Buffer-Const,s!=d,xor=0: 0.390699 s 327.618 MB/s -Buffer-Const,s!=d,xor=1: 0.407622 s 314.016 MB/s -1048576 128 8 BYTWO_p - - -Seed: 1352747112 -Buffer-Const,s!=d,xor=0: 0.398830 s 320.939 MB/s -Buffer-Const,s!=d,xor=1: 0.401909 s 318.480 MB/s -2097152 64 8 BYTWO_p - - -Seed: 1352747114 -Buffer-Const,s!=d,xor=0: 0.402605 s 317.930 MB/s -Buffer-Const,s!=d,xor=1: 0.410941 s 311.480 MB/s -4194304 32 8 BYTWO_p - - -Seed: 1352747117 -Buffer-Const,s!=d,xor=0: 0.404638 s 316.332 MB/s -Buffer-Const,s!=d,xor=1: 0.406369 s 314.984 MB/s -8388608 16 8 BYTWO_p - - -Seed: 1352747119 -Buffer-Const,s!=d,xor=0: 0.412950 s 309.965 MB/s -Buffer-Const,s!=d,xor=1: 0.411819 s 310.816 MB/s -16777216 8 8 BYTWO_p - - -Seed: 1352747121 -Buffer-Const,s!=d,xor=0: 0.417898 s 306.295 MB/s -Buffer-Const,s!=d,xor=1: 0.412159 s 310.560 MB/s -33554432 4 8 BYTWO_p - - -Seed: 1352747124 -Buffer-Const,s!=d,xor=0: 0.444945 s 287.676 MB/s -Buffer-Const,s!=d,xor=1: 0.404381 s 316.533 MB/s -67108864 2 8 BYTWO_p - - -Seed: 1352747126 -Buffer-Const,s!=d,xor=0: 0.494330 s 258.936 MB/s -Buffer-Const,s!=d,xor=1: 0.412325 s 310.435 MB/s -134217728 1 8 BYTWO_p - - -Seed: 1352747129 -Buffer-Const,s!=d,xor=0: 0.306549 s 417.552 MB/s -Buffer-Const,s!=d,xor=1: 0.309033 s 414.195 MB/s -1024 131072 8 BYTWO_b - - -Seed: 1352747131 -Buffer-Const,s!=d,xor=0: 0.297702 s 429.961 MB/s -Buffer-Const,s!=d,xor=1: 0.297253 s 430.609 MB/s -2048 65536 8 BYTWO_b - - -Seed: 1352747132 -Buffer-Const,s!=d,xor=0: 0.293193 s 436.572 MB/s -Buffer-Const,s!=d,xor=1: 0.293018 s 436.833 MB/s -4096 32768 8 BYTWO_b - - -Seed: 1352747134 -Buffer-Const,s!=d,xor=0: 0.294984 s 433.922 MB/s -Buffer-Const,s!=d,xor=1: 0.290863 s 440.070 MB/s -8192 16384 8 BYTWO_b - - -Seed: 1352747136 -Buffer-Const,s!=d,xor=0: 0.288896 s 443.067 MB/s -Buffer-Const,s!=d,xor=1: 0.288462 s 443.732 MB/s -16384 8192 8 BYTWO_b - - -Seed: 1352747138 -Buffer-Const,s!=d,xor=0: 0.290112 s 441.208 MB/s -Buffer-Const,s!=d,xor=1: 0.288533 s 443.623 MB/s -32768 4096 8 BYTWO_b - - -Seed: 1352747140 -Buffer-Const,s!=d,xor=0: 0.288124 s 444.253 MB/s -Buffer-Const,s!=d,xor=1: 0.286360 s 446.989 MB/s -65536 2048 8 BYTWO_b - - -Seed: 1352747142 -Buffer-Const,s!=d,xor=0: 0.292166 s 438.106 MB/s -Buffer-Const,s!=d,xor=1: 0.288037 s 444.388 MB/s -131072 1024 8 BYTWO_b - - -Seed: 1352747143 -Buffer-Const,s!=d,xor=0: 0.295804 s 432.719 MB/s -Buffer-Const,s!=d,xor=1: 0.292226 s 438.017 MB/s -262144 512 8 BYTWO_b - - -Seed: 1352747145 -Buffer-Const,s!=d,xor=0: 0.284928 s 449.236 MB/s -Buffer-Const,s!=d,xor=1: 0.286746 s 446.388 MB/s -524288 256 8 BYTWO_b - - -Seed: 1352747147 -Buffer-Const,s!=d,xor=0: 0.295747 s 432.803 MB/s -Buffer-Const,s!=d,xor=1: 0.291578 s 438.990 MB/s -1048576 128 8 BYTWO_b - - -Seed: 1352747149 -Buffer-Const,s!=d,xor=0: 0.300418 s 426.073 MB/s -Buffer-Const,s!=d,xor=1: 0.283470 s 451.547 MB/s -2097152 64 8 BYTWO_b - - -Seed: 1352747151 -Buffer-Const,s!=d,xor=0: 0.310105 s 412.764 MB/s -Buffer-Const,s!=d,xor=1: 0.306506 s 417.610 MB/s -4194304 32 8 BYTWO_b - - -Seed: 1352747153 -Buffer-Const,s!=d,xor=0: 0.303049 s 422.373 MB/s -Buffer-Const,s!=d,xor=1: 0.294477 s 434.669 MB/s -8388608 16 8 BYTWO_b - - -Seed: 1352747155 -Buffer-Const,s!=d,xor=0: 0.318920 s 401.354 MB/s -Buffer-Const,s!=d,xor=1: 0.292649 s 437.384 MB/s -16777216 8 8 BYTWO_b - - -Seed: 1352747157 -Buffer-Const,s!=d,xor=0: 0.369239 s 346.659 MB/s -Buffer-Const,s!=d,xor=1: 0.299009 s 428.081 MB/s -33554432 4 8 BYTWO_b - - -Seed: 1352747159 -Buffer-Const,s!=d,xor=0: 0.370332 s 345.636 MB/s -Buffer-Const,s!=d,xor=1: 0.292907 s 436.999 MB/s -67108864 2 8 BYTWO_b - - -Seed: 1352747161 -Buffer-Const,s!=d,xor=0: 0.437750 s 292.404 MB/s -Buffer-Const,s!=d,xor=1: 0.303224 s 422.130 MB/s -134217728 1 8 BYTWO_b - - -Seed: 1352747163 -Buffer-Const,s!=d,xor=0: 0.199102 s 642.888 MB/s -Buffer-Const,s!=d,xor=1: 0.198709 s 644.159 MB/s -1024 131072 8 BYTWO_p SSE - -Seed: 1352747164 -Buffer-Const,s!=d,xor=0: 0.188358 s 679.558 MB/s -Buffer-Const,s!=d,xor=1: 0.190699 s 671.215 MB/s -2048 65536 8 BYTWO_p SSE - -Seed: 1352747166 -Buffer-Const,s!=d,xor=0: 0.184177 s 694.985 MB/s -Buffer-Const,s!=d,xor=1: 0.186848 s 685.049 MB/s -4096 32768 8 BYTWO_p SSE - -Seed: 1352747167 -Buffer-Const,s!=d,xor=0: 0.189242 s 676.384 MB/s -Buffer-Const,s!=d,xor=1: 0.186107 s 687.776 MB/s -8192 16384 8 BYTWO_p SSE - -Seed: 1352747169 -Buffer-Const,s!=d,xor=0: 0.179632 s 712.566 MB/s -Buffer-Const,s!=d,xor=1: 0.182739 s 700.454 MB/s -16384 8192 8 BYTWO_p SSE - -Seed: 1352747170 -Buffer-Const,s!=d,xor=0: 0.199486 s 641.648 MB/s -Buffer-Const,s!=d,xor=1: 0.187585 s 682.357 MB/s -32768 4096 8 BYTWO_p SSE - -Seed: 1352747172 -Buffer-Const,s!=d,xor=0: 0.181719 s 704.385 MB/s -Buffer-Const,s!=d,xor=1: 0.183744 s 696.620 MB/s -65536 2048 8 BYTWO_p SSE - -Seed: 1352747173 -Buffer-Const,s!=d,xor=0: 0.179243 s 714.114 MB/s -Buffer-Const,s!=d,xor=1: 0.181455 s 705.409 MB/s -131072 1024 8 BYTWO_p SSE - -Seed: 1352747174 -Buffer-Const,s!=d,xor=0: 0.178887 s 715.536 MB/s -Buffer-Const,s!=d,xor=1: 0.180799 s 707.969 MB/s -262144 512 8 BYTWO_p SSE - -Seed: 1352747176 -Buffer-Const,s!=d,xor=0: 0.180232 s 710.196 MB/s -Buffer-Const,s!=d,xor=1: 0.180657 s 708.523 MB/s -524288 256 8 BYTWO_p SSE - -Seed: 1352747177 -Buffer-Const,s!=d,xor=0: 0.180044 s 710.938 MB/s -Buffer-Const,s!=d,xor=1: 0.183542 s 697.386 MB/s -1048576 128 8 BYTWO_p SSE - -Seed: 1352747179 -Buffer-Const,s!=d,xor=0: 0.188030 s 680.743 MB/s -Buffer-Const,s!=d,xor=1: 0.189776 s 674.480 MB/s -2097152 64 8 BYTWO_p SSE - -Seed: 1352747180 -Buffer-Const,s!=d,xor=0: 0.188869 s 677.718 MB/s -Buffer-Const,s!=d,xor=1: 0.199248 s 642.415 MB/s -4194304 32 8 BYTWO_p SSE - -Seed: 1352747181 -Buffer-Const,s!=d,xor=0: 0.191749 s 667.538 MB/s -Buffer-Const,s!=d,xor=1: 0.188193 s 680.153 MB/s -8388608 16 8 BYTWO_p SSE - -Seed: 1352747183 -Buffer-Const,s!=d,xor=0: 0.200427 s 638.638 MB/s -Buffer-Const,s!=d,xor=1: 0.189489 s 675.501 MB/s -16777216 8 8 BYTWO_p SSE - -Seed: 1352747184 -Buffer-Const,s!=d,xor=0: 0.206467 s 619.954 MB/s -Buffer-Const,s!=d,xor=1: 0.195798 s 653.735 MB/s -33554432 4 8 BYTWO_p SSE - -Seed: 1352747186 -Buffer-Const,s!=d,xor=0: 0.226630 s 564.797 MB/s -Buffer-Const,s!=d,xor=1: 0.189382 s 675.883 MB/s -67108864 2 8 BYTWO_p SSE - -Seed: 1352747187 -Buffer-Const,s!=d,xor=0: 0.279772 s 457.515 MB/s -Buffer-Const,s!=d,xor=1: 0.196061 s 652.858 MB/s -134217728 1 8 BYTWO_p SSE - -Seed: 1352747189 -Buffer-Const,s!=d,xor=0: 0.148536 s 861.741 MB/s -Buffer-Const,s!=d,xor=1: 0.276922 s 462.224 MB/s -1024 131072 8 BYTWO_b SSE - -Seed: 1352747191 -Buffer-Const,s!=d,xor=0: 0.137811 s 928.805 MB/s -Buffer-Const,s!=d,xor=1: 0.268928 s 475.964 MB/s -2048 65536 8 BYTWO_b SSE - -Seed: 1352747192 -Buffer-Const,s!=d,xor=0: 0.132821 s 963.706 MB/s -Buffer-Const,s!=d,xor=1: 0.265851 s 481.474 MB/s -4096 32768 8 BYTWO_b SSE - -Seed: 1352747194 -Buffer-Const,s!=d,xor=0: 0.131842 s 970.862 MB/s -Buffer-Const,s!=d,xor=1: 0.263387 s 485.977 MB/s -8192 16384 8 BYTWO_b SSE - -Seed: 1352747195 -Buffer-Const,s!=d,xor=0: 0.131891 s 970.495 MB/s -Buffer-Const,s!=d,xor=1: 0.260863 s 490.680 MB/s -16384 8192 8 BYTWO_b SSE - -Seed: 1352747197 -Buffer-Const,s!=d,xor=0: 0.128815 s 993.670 MB/s -Buffer-Const,s!=d,xor=1: 0.260589 s 491.196 MB/s -32768 4096 8 BYTWO_b SSE - -Seed: 1352747198 -Buffer-Const,s!=d,xor=0: 0.127239 s 1005.979 MB/s -Buffer-Const,s!=d,xor=1: 0.261076 s 490.278 MB/s -65536 2048 8 BYTWO_b SSE - -Seed: 1352747200 -Buffer-Const,s!=d,xor=0: 0.127946 s 1000.421 MB/s -Buffer-Const,s!=d,xor=1: 0.266347 s 480.576 MB/s -131072 1024 8 BYTWO_b SSE - -Seed: 1352747201 -Buffer-Const,s!=d,xor=0: 0.129641 s 987.340 MB/s -Buffer-Const,s!=d,xor=1: 0.261065 s 490.299 MB/s -262144 512 8 BYTWO_b SSE - -Seed: 1352747202 -Buffer-Const,s!=d,xor=0: 0.131109 s 976.285 MB/s -Buffer-Const,s!=d,xor=1: 0.259368 s 493.507 MB/s -524288 256 8 BYTWO_b SSE - -Seed: 1352747204 -Buffer-Const,s!=d,xor=0: 0.130358 s 981.911 MB/s -Buffer-Const,s!=d,xor=1: 0.268218 s 477.224 MB/s -1048576 128 8 BYTWO_b SSE - -Seed: 1352747205 -Buffer-Const,s!=d,xor=0: 0.135308 s 945.990 MB/s -Buffer-Const,s!=d,xor=1: 0.282554 s 453.011 MB/s -2097152 64 8 BYTWO_b SSE - -Seed: 1352747207 -Buffer-Const,s!=d,xor=0: 0.141210 s 906.454 MB/s -Buffer-Const,s!=d,xor=1: 0.284272 s 450.272 MB/s -4194304 32 8 BYTWO_b SSE - -Seed: 1352747208 -Buffer-Const,s!=d,xor=0: 0.150900 s 848.245 MB/s -Buffer-Const,s!=d,xor=1: 0.291628 s 438.916 MB/s -8388608 16 8 BYTWO_b SSE - -Seed: 1352747210 -Buffer-Const,s!=d,xor=0: 0.147792 s 866.084 MB/s -Buffer-Const,s!=d,xor=1: 0.278963 s 458.842 MB/s -16777216 8 8 BYTWO_b SSE - -Seed: 1352747211 -Buffer-Const,s!=d,xor=0: 0.154891 s 826.390 MB/s -Buffer-Const,s!=d,xor=1: 0.176620 s 724.721 MB/s -33554432 4 8 BYTWO_b SSE - -Seed: 1352747213 -Buffer-Const,s!=d,xor=0: 0.193885 s 660.186 MB/s -Buffer-Const,s!=d,xor=1: 0.268795 s 476.199 MB/s -67108864 2 8 BYTWO_b SSE - -Seed: 1352747214 -Buffer-Const,s!=d,xor=0: 0.204667 s 625.407 MB/s -Buffer-Const,s!=d,xor=1: 0.269170 s 475.536 MB/s -134217728 1 8 BYTWO_b SSE - -Seed: 1352747216 -Buffer-Const,s!=d,xor=0: 1.940300 s 65.969 MB/s -Buffer-Const,s!=d,xor=1: 2.143284 s 59.721 MB/s -1024 131072 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - -Seed: 1352747225 -Buffer-Const,s!=d,xor=0: 1.923481 s 66.546 MB/s -Buffer-Const,s!=d,xor=1: 2.147470 s 59.605 MB/s -2048 65536 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - -Seed: 1352747234 -Buffer-Const,s!=d,xor=0: 1.916270 s 66.796 MB/s -Buffer-Const,s!=d,xor=1: 2.139770 s 59.820 MB/s -4096 32768 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - -Seed: 1352747243 -Buffer-Const,s!=d,xor=0: 1.938715 s 66.023 MB/s -Buffer-Const,s!=d,xor=1: 2.137380 s 59.886 MB/s -8192 16384 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - -Seed: 1352747252 -Buffer-Const,s!=d,xor=0: 1.922527 s 66.579 MB/s -Buffer-Const,s!=d,xor=1: 2.148529 s 59.576 MB/s -16384 8192 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - -Seed: 1352747261 -Buffer-Const,s!=d,xor=0: 1.929218 s 66.348 MB/s -Buffer-Const,s!=d,xor=1: 2.138858 s 59.845 MB/s -32768 4096 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - -Seed: 1352747270 -Buffer-Const,s!=d,xor=0: 1.921590 s 66.612 MB/s -Buffer-Const,s!=d,xor=1: 2.137566 s 59.881 MB/s -65536 2048 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - -Seed: 1352747278 -Buffer-Const,s!=d,xor=0: 1.932345 s 66.241 MB/s -Buffer-Const,s!=d,xor=1: 2.130586 s 60.077 MB/s -131072 1024 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - -Seed: 1352747287 -Buffer-Const,s!=d,xor=0: 1.944353 s 65.832 MB/s -Buffer-Const,s!=d,xor=1: 2.126287 s 60.199 MB/s -262144 512 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - -Seed: 1352747296 -Buffer-Const,s!=d,xor=0: 1.921692 s 66.608 MB/s -Buffer-Const,s!=d,xor=1: 2.128691 s 60.131 MB/s -524288 256 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - -Seed: 1352747305 -Buffer-Const,s!=d,xor=0: 1.883663 s 67.953 MB/s -Buffer-Const,s!=d,xor=1: 2.149924 s 59.537 MB/s -1048576 128 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - -Seed: 1352747314 -Buffer-Const,s!=d,xor=0: 1.957364 s 65.394 MB/s -Buffer-Const,s!=d,xor=1: 2.167789 s 59.046 MB/s -2097152 64 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - -Seed: 1352747323 -Buffer-Const,s!=d,xor=0: 1.958212 s 65.366 MB/s -Buffer-Const,s!=d,xor=1: 2.159558 s 59.271 MB/s -4194304 32 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - -Seed: 1352747332 -Buffer-Const,s!=d,xor=0: 1.958506 s 65.356 MB/s -Buffer-Const,s!=d,xor=1: 2.019473 s 63.383 MB/s -8388608 16 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - -Seed: 1352747341 -Buffer-Const,s!=d,xor=0: 1.949758 s 65.649 MB/s -Buffer-Const,s!=d,xor=1: 2.165875 s 59.099 MB/s -16777216 8 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - -Seed: 1352747349 -Buffer-Const,s!=d,xor=0: 1.964626 s 65.152 MB/s -Buffer-Const,s!=d,xor=1: 2.151822 s 59.484 MB/s -33554432 4 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - -Seed: 1352747358 -Buffer-Const,s!=d,xor=0: 2.045733 s 62.569 MB/s -Buffer-Const,s!=d,xor=1: 2.177383 s 58.786 MB/s -67108864 2 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - -Seed: 1352747367 -Buffer-Const,s!=d,xor=0: 2.055240 s 62.280 MB/s -Buffer-Const,s!=d,xor=1: 2.190975 s 58.421 MB/s -134217728 1 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - -Seed: 1352747377 -Buffer-Const,s!=d,xor=0: 0.080290 s 1594.215 MB/s -Buffer-Const,s!=d,xor=1: 0.082083 s 1559.402 MB/s -1024 131072 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - -Seed: 1352747378 -Buffer-Const,s!=d,xor=0: 0.059030 s 2168.378 MB/s -Buffer-Const,s!=d,xor=1: 0.064752 s 1976.763 MB/s -2048 65536 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - -Seed: 1352747379 -Buffer-Const,s!=d,xor=0: 0.050239 s 2547.829 MB/s -Buffer-Const,s!=d,xor=1: 0.050503 s 2534.526 MB/s -4096 32768 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - -Seed: 1352747379 -Buffer-Const,s!=d,xor=0: 0.044825 s 2855.560 MB/s -Buffer-Const,s!=d,xor=1: 0.045130 s 2836.220 MB/s -8192 16384 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - -Seed: 1352747380 -Buffer-Const,s!=d,xor=0: 0.042018 s 3046.301 MB/s -Buffer-Const,s!=d,xor=1: 0.042297 s 3026.210 MB/s -16384 8192 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - -Seed: 1352747381 -Buffer-Const,s!=d,xor=0: 0.040955 s 3125.413 MB/s -Buffer-Const,s!=d,xor=1: 0.041454 s 3087.754 MB/s -32768 4096 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - -Seed: 1352747382 -Buffer-Const,s!=d,xor=0: 0.040984 s 3123.195 MB/s -Buffer-Const,s!=d,xor=1: 0.041577 s 3078.635 MB/s -65536 2048 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - -Seed: 1352747383 -Buffer-Const,s!=d,xor=0: 0.041093 s 3114.859 MB/s -Buffer-Const,s!=d,xor=1: 0.042611 s 3003.911 MB/s -131072 1024 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - -Seed: 1352747384 -Buffer-Const,s!=d,xor=0: 0.047338 s 2703.972 MB/s -Buffer-Const,s!=d,xor=1: 0.049673 s 2576.836 MB/s -262144 512 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - -Seed: 1352747385 -Buffer-Const,s!=d,xor=0: 0.049656 s 2577.739 MB/s -Buffer-Const,s!=d,xor=1: 0.050634 s 2527.950 MB/s -524288 256 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - -Seed: 1352747386 -Buffer-Const,s!=d,xor=0: 0.049906 s 2564.833 MB/s -Buffer-Const,s!=d,xor=1: 0.051381 s 2491.188 MB/s -1048576 128 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - -Seed: 1352747386 -Buffer-Const,s!=d,xor=0: 0.075184 s 1702.487 MB/s -Buffer-Const,s!=d,xor=1: 0.070414 s 1817.825 MB/s -2097152 64 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - -Seed: 1352747387 -Buffer-Const,s!=d,xor=0: 0.108748 s 1177.034 MB/s -Buffer-Const,s!=d,xor=1: 0.111286 s 1150.190 MB/s -4194304 32 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - -Seed: 1352747388 -Buffer-Const,s!=d,xor=0: 0.117474 s 1089.600 MB/s -Buffer-Const,s!=d,xor=1: 0.114860 s 1114.400 MB/s -8388608 16 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - -Seed: 1352747389 -Buffer-Const,s!=d,xor=0: 0.126348 s 1013.075 MB/s -Buffer-Const,s!=d,xor=1: 0.109330 s 1170.768 MB/s -16777216 8 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - -Seed: 1352747391 -Buffer-Const,s!=d,xor=0: 0.123002 s 1040.635 MB/s -Buffer-Const,s!=d,xor=1: 0.110046 s 1163.148 MB/s -33554432 4 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - -Seed: 1352747392 -Buffer-Const,s!=d,xor=0: 0.159381 s 803.107 MB/s -Buffer-Const,s!=d,xor=1: 0.120685 s 1060.611 MB/s -67108864 2 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - -Seed: 1352747393 -Buffer-Const,s!=d,xor=0: 0.196446 s 651.578 MB/s -Buffer-Const,s!=d,xor=1: 0.121685 s 1051.896 MB/s -134217728 1 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - -Seed: 1352747520 -Buffer-Const,s!=d,xor=0: 0.244784 s 522.910 MB/s -Buffer-Const,s!=d,xor=1: 0.259940 s 492.421 MB/s -1024 131072 8 SPLIT 8 4 NOSSE - -Seed: 1352747522 -Buffer-Const,s!=d,xor=0: 0.243595 s 525.463 MB/s -Buffer-Const,s!=d,xor=1: 0.253145 s 505.640 MB/s -2048 65536 8 SPLIT 8 4 NOSSE - -Seed: 1352747523 -Buffer-Const,s!=d,xor=0: 0.240463 s 532.307 MB/s -Buffer-Const,s!=d,xor=1: 0.251567 s 508.811 MB/s -4096 32768 8 SPLIT 8 4 NOSSE - -Seed: 1352747525 -Buffer-Const,s!=d,xor=0: 0.240079 s 533.157 MB/s -Buffer-Const,s!=d,xor=1: 0.255671 s 500.643 MB/s -8192 16384 8 SPLIT 8 4 NOSSE - -Seed: 1352747527 -Buffer-Const,s!=d,xor=0: 0.242857 s 527.059 MB/s -Buffer-Const,s!=d,xor=1: 0.251837 s 508.264 MB/s -16384 8192 8 SPLIT 8 4 NOSSE - -Seed: 1352747528 -Buffer-Const,s!=d,xor=0: 0.240757 s 531.657 MB/s -Buffer-Const,s!=d,xor=1: 0.253888 s 504.160 MB/s -32768 4096 8 SPLIT 8 4 NOSSE - -Seed: 1352747530 -Buffer-Const,s!=d,xor=0: 0.240586 s 532.034 MB/s -Buffer-Const,s!=d,xor=1: 0.256642 s 498.749 MB/s -65536 2048 8 SPLIT 8 4 NOSSE - -Seed: 1352747532 -Buffer-Const,s!=d,xor=0: 0.238570 s 536.529 MB/s -Buffer-Const,s!=d,xor=1: 0.254111 s 503.717 MB/s -131072 1024 8 SPLIT 8 4 NOSSE - -Seed: 1352747533 -Buffer-Const,s!=d,xor=0: 0.237666 s 538.572 MB/s -Buffer-Const,s!=d,xor=1: 0.254334 s 503.275 MB/s -262144 512 8 SPLIT 8 4 NOSSE - -Seed: 1352747535 -Buffer-Const,s!=d,xor=0: 0.244512 s 523.491 MB/s -Buffer-Const,s!=d,xor=1: 0.255911 s 500.174 MB/s -524288 256 8 SPLIT 8 4 NOSSE - -Seed: 1352747537 -Buffer-Const,s!=d,xor=0: 0.242439 s 527.968 MB/s -Buffer-Const,s!=d,xor=1: 0.255622 s 500.740 MB/s -1048576 128 8 SPLIT 8 4 NOSSE - -Seed: 1352747538 -Buffer-Const,s!=d,xor=0: 0.248633 s 514.815 MB/s -Buffer-Const,s!=d,xor=1: 0.257451 s 497.181 MB/s -2097152 64 8 SPLIT 8 4 NOSSE - -Seed: 1352747540 -Buffer-Const,s!=d,xor=0: 0.241531 s 529.952 MB/s -Buffer-Const,s!=d,xor=1: 0.264452 s 484.020 MB/s -4194304 32 8 SPLIT 8 4 NOSSE - -Seed: 1352747542 -Buffer-Const,s!=d,xor=0: 0.255533 s 500.914 MB/s -Buffer-Const,s!=d,xor=1: 0.248849 s 514.368 MB/s -8388608 16 8 SPLIT 8 4 NOSSE - -Seed: 1352747543 -Buffer-Const,s!=d,xor=0: 0.259687 s 492.902 MB/s -Buffer-Const,s!=d,xor=1: 0.264417 s 484.084 MB/s -16777216 8 8 SPLIT 8 4 NOSSE - -Seed: 1352747545 -Buffer-Const,s!=d,xor=0: 0.267928 s 477.740 MB/s -Buffer-Const,s!=d,xor=1: 0.269417 s 475.100 MB/s -33554432 4 8 SPLIT 8 4 NOSSE - -Seed: 1352747547 -Buffer-Const,s!=d,xor=0: 0.295526 s 433.126 MB/s -Buffer-Const,s!=d,xor=1: 0.270747 s 472.766 MB/s -67108864 2 8 SPLIT 8 4 NOSSE - -Seed: 1352747549 -Buffer-Const,s!=d,xor=0: 0.342706 s 373.498 MB/s -Buffer-Const,s!=d,xor=1: 0.266642 s 480.045 MB/s -134217728 1 8 SPLIT 8 4 NOSSE - -Seed: 1352747551 -Buffer-Const,s!=d,xor=0: 0.027748 s 4612.927 MB/s -Buffer-Const,s!=d,xor=1: 0.028090 s 4556.704 MB/s -1024 131072 8 SPLIT 8 4 SSE - -Seed: 1352747552 -Buffer-Const,s!=d,xor=0: 0.023128 s 5534.409 MB/s -Buffer-Const,s!=d,xor=1: 0.023134 s 5533.040 MB/s -2048 65536 8 SPLIT 8 4 SSE - -Seed: 1352747552 -Buffer-Const,s!=d,xor=0: 0.019114 s 6696.740 MB/s -Buffer-Const,s!=d,xor=1: 0.019763 s 6476.596 MB/s -4096 32768 8 SPLIT 8 4 SSE - -Seed: 1352747553 -Buffer-Const,s!=d,xor=0: 0.017541 s 7297.119 MB/s -Buffer-Const,s!=d,xor=1: 0.018266 s 7007.661 MB/s -8192 16384 8 SPLIT 8 4 SSE - -Seed: 1352747554 -Buffer-Const,s!=d,xor=0: 0.017010 s 7524.892 MB/s -Buffer-Const,s!=d,xor=1: 0.017399 s 7356.613 MB/s -16384 8192 8 SPLIT 8 4 SSE - -Seed: 1352747555 -Buffer-Const,s!=d,xor=0: 0.016979 s 7538.522 MB/s -Buffer-Const,s!=d,xor=1: 0.017508 s 7311.130 MB/s -32768 4096 8 SPLIT 8 4 SSE - -Seed: 1352747555 -Buffer-Const,s!=d,xor=0: 0.016780 s 7628.283 MB/s -Buffer-Const,s!=d,xor=1: 0.017439 s 7340.018 MB/s -65536 2048 8 SPLIT 8 4 SSE - -Seed: 1352747556 -Buffer-Const,s!=d,xor=0: 0.017527 s 7302.876 MB/s -Buffer-Const,s!=d,xor=1: 0.018656 s 6861.145 MB/s -131072 1024 8 SPLIT 8 4 SSE - -Seed: 1352747557 -Buffer-Const,s!=d,xor=0: 0.020679 s 6189.855 MB/s -Buffer-Const,s!=d,xor=1: 0.022183 s 5770.138 MB/s -262144 512 8 SPLIT 8 4 SSE - -Seed: 1352747558 -Buffer-Const,s!=d,xor=0: 0.020437 s 6263.296 MB/s -Buffer-Const,s!=d,xor=1: 0.021715 s 5894.434 MB/s -524288 256 8 SPLIT 8 4 SSE - -Seed: 1352747558 -Buffer-Const,s!=d,xor=0: 0.020800 s 6153.883 MB/s -Buffer-Const,s!=d,xor=1: 0.021934 s 5835.617 MB/s -1048576 128 8 SPLIT 8 4 SSE - -Seed: 1352747559 -Buffer-Const,s!=d,xor=0: 0.035634 s 3592.095 MB/s -Buffer-Const,s!=d,xor=1: 0.036323 s 3523.977 MB/s -2097152 64 8 SPLIT 8 4 SSE - -Seed: 1352747560 -Buffer-Const,s!=d,xor=0: 0.050565 s 2531.419 MB/s -Buffer-Const,s!=d,xor=1: 0.048358 s 2646.914 MB/s -4194304 32 8 SPLIT 8 4 SSE - -Seed: 1352747561 -Buffer-Const,s!=d,xor=0: 0.053646 s 2386.008 MB/s -Buffer-Const,s!=d,xor=1: 0.047063 s 2719.766 MB/s -8388608 16 8 SPLIT 8 4 SSE - -Seed: 1352747562 -Buffer-Const,s!=d,xor=0: 0.055658 s 2299.775 MB/s -Buffer-Const,s!=d,xor=1: 0.047532 s 2692.918 MB/s -16777216 8 8 SPLIT 8 4 SSE - -Seed: 1352747563 -Buffer-Const,s!=d,xor=0: 0.064355 s 1988.963 MB/s -Buffer-Const,s!=d,xor=1: 0.047547 s 2692.067 MB/s -33554432 4 8 SPLIT 8 4 SSE - -Seed: 1352747563 -Buffer-Const,s!=d,xor=0: 0.084876 s 1508.086 MB/s -Buffer-Const,s!=d,xor=1: 0.048017 s 2665.721 MB/s -67108864 2 8 SPLIT 8 4 SSE - -Seed: 1352747564 -Buffer-Const,s!=d,xor=0: 0.121661 s 1052.104 MB/s -Buffer-Const,s!=d,xor=1: 0.047558 s 2691.447 MB/s -134217728 1 8 SPLIT 8 4 SSE - diff --git a/junk-w8-timing-tests.sh b/junk-w8-timing-tests.sh deleted file mode 100644 index 6b78dab..0000000 --- a/junk-w8-timing-tests.sh +++ /dev/null @@ -1,13 +0,0 @@ -sh tmp-time-test.sh 8 LOG - - -sh tmp-time-test.sh 8 LOG_ZERO - - -sh tmp-time-test.sh 8 TABLE - - -sh tmp-time-test.sh 8 TABLE DOUBLE - -sh tmp-time-test.sh 8 TABLE DOUBLE,LAZY - -sh tmp-time-test.sh 8 BYTWO_p - - -sh tmp-time-test.sh 8 BYTWO_b - - -sh tmp-time-test.sh 8 BYTWO_p SSE - -sh tmp-time-test.sh 8 BYTWO_b SSE - -sh tmp-time-test.sh 8 SPLIT 8 4 NOSSE - -sh tmp-time-test.sh 8 SPLIT 8 4 SSE - -sh tmp-time-test.sh 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - -sh tmp-time-test.sh 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - diff --git a/junk-w8-timing.jgr b/junk-w8-timing.jgr deleted file mode 100644 index 0245111..0000000 --- a/junk-w8-timing.jgr +++ /dev/null @@ -1,11 +0,0 @@ -newgraph -xaxis size 4 min 0 no_auto_hash_labels - hash_labels hjl vjc rotate -90 fontsize 11 - -shell : junk-pick-best-output < junk-w8-timing-out.txt | sort -nr | sed 's/.............//' | awk '{ print "hash_label at ", ++l, ":", $0 }' - -yaxis size 1 min 0 label : MB/s - -newcurve marktype xbar cfill 1 1 0 marksize 1 pts -shell : junk-pick-best-output < junk-w8-timing-out.txt | sort -nr | awk '{ print $1 }' | cat -n - @@ -1,18 +0,0 @@ -/* - * Multiplies four and five in GF(2^4). - */ - -#include <stdio.h> -#include <stdint.h> -#include <stdlib.h> - -#include "gf_complete.h" - -main() -{ - gf_t gf; - - gf_init_easy(&gf, 4); - printf("%d\n", gf.multiply.w32(&gf, 5, 4)); - exit(0); -} diff --git a/junk.ps b/junk.ps deleted file mode 100644 index f8e80c7..0000000 --- a/junk.ps +++ /dev/null @@ -1,199 +0,0 @@ -%!PS-Adobe-2.0 EPSF-1.2 -%%Page: 1 1 -%%BoundingBox: -40 -93 292 73 -%%EndComments -180.000000 406.000000 translate -1 setlinecap 1 setlinejoin -0.700 setlinewidth -0.00 setgray - -/Jrnd { exch cvi exch cvi dup 3 1 roll idiv mul } def -/JDEdict 8 dict def -JDEdict /mtrx matrix put -/JDE { - JDEdict begin - /yrad exch def - /xrad exch def - /savematrix mtrx currentmatrix def - xrad yrad scale - 0 0 1 0 360 arc - savematrix setmatrix - end -} def -/JSTR { - gsave 1 eq { gsave 1 setgray fill grestore } if - exch neg exch neg translate - clip - rotate - 4 dict begin - pathbbox /&top exch def - /&right exch def - /&bottom exch def - &right sub /&width exch def - newpath - currentlinewidth mul round dup - &bottom exch Jrnd exch &top - 4 -1 roll currentlinewidth mul setlinewidth - { &right exch moveto &width 0 rlineto stroke } for - end - grestore - newpath -} bind def - gsave /Times-Roman findfont 9.000000 scalefont setfont -0.000000 0.000000 translate -0.700000 setlinewidth gsave newpath 0.000000 0.000000 moveto 288.000000 0.000000 lineto stroke -newpath 0.000000 0.000000 moveto 0.000000 -5.000000 lineto stroke -newpath 28.799999 0.000000 moveto 28.799999 -2.000000 lineto stroke -newpath 57.599998 0.000000 moveto 57.599998 -5.000000 lineto stroke -newpath 86.399994 0.000000 moveto 86.399994 -2.000000 lineto stroke -newpath 115.199997 0.000000 moveto 115.199997 -5.000000 lineto stroke -newpath 144.000000 0.000000 moveto 144.000000 -2.000000 lineto stroke -newpath 172.799988 0.000000 moveto 172.799988 -5.000000 lineto stroke -newpath 201.599991 0.000000 moveto 201.599991 -2.000000 lineto stroke -newpath 230.399994 0.000000 moveto 230.399994 -5.000000 lineto stroke -newpath 259.199982 0.000000 moveto 259.199982 -2.000000 lineto stroke -newpath 288.000000 0.000000 moveto 288.000000 -5.000000 lineto stroke -/Times-Roman findfont 11.000000 scalefont setfont -gsave 28.799999 -8.000000 translate -90.000000 rotate -0 -3.300000 translate (TABLE SINGLE,SSE) dup stringwidth pop pop 0 0 moveto -show -grestore -gsave 57.599998 -8.000000 translate -90.000000 rotate -0 -3.300000 translate (BYTWO_b SSE) dup stringwidth pop pop 0 0 moveto -show -grestore -gsave 86.399994 -8.000000 translate -90.000000 rotate -0 -3.300000 translate (BYTWO_b) dup stringwidth pop pop 0 0 moveto -show -grestore -gsave 115.199997 -8.000000 translate -90.000000 rotate -0 -3.300000 translate (BYTWO_p SSE) dup stringwidth pop pop 0 0 moveto -show -grestore -gsave 144.000000 -8.000000 translate -90.000000 rotate -0 -3.300000 translate (TABLE QUAD,LAZY) dup stringwidth pop pop 0 0 moveto -show -grestore -gsave 172.799988 -8.000000 translate -90.000000 rotate -0 -3.300000 translate (TABLE QUAD) dup stringwidth pop pop 0 0 moveto -show -grestore -gsave 201.599991 -8.000000 translate -90.000000 rotate -0 -3.300000 translate (BYTWO_p) dup stringwidth pop pop 0 0 moveto -show -grestore -gsave 230.399994 -8.000000 translate -90.000000 rotate -0 -3.300000 translate (TABLE DOUBLE) dup stringwidth pop pop 0 0 moveto -show -grestore -gsave 259.199982 -8.000000 translate -90.000000 rotate -0 -3.300000 translate (TABLE SINGLE) dup stringwidth pop pop 0 0 moveto -show -grestore -gsave 288.000000 -8.000000 translate -90.000000 rotate -0 -3.300000 translate (LOG) dup stringwidth pop pop 0 0 moveto -show -grestore - grestore -0.700000 setlinewidth gsave newpath 0.000000 0.000000 moveto 0.000000 72.000000 lineto stroke -newpath 0.000000 0.000000 moveto -5.000000 0.000000 lineto stroke -newpath 0.000000 8.552223 moveto -2.000000 8.552223 lineto stroke -newpath 0.000000 17.104446 moveto -5.000000 17.104446 lineto stroke -newpath 0.000000 25.656670 moveto -2.000000 25.656670 lineto stroke -newpath 0.000000 34.208893 moveto -5.000000 34.208893 lineto stroke -newpath 0.000000 42.761116 moveto -2.000000 42.761116 lineto stroke -newpath 0.000000 51.313339 moveto -5.000000 51.313339 lineto stroke -newpath 0.000000 59.865562 moveto -2.000000 59.865562 lineto stroke -newpath 0.000000 68.417786 moveto -5.000000 68.417786 lineto stroke -/Times-Roman findfont 9.000000 scalefont setfont -gsave -8.000000 0.000000 translate 0.000000 rotate -0 -2.700000 translate (0) dup stringwidth pop neg 0 moveto -show -grestore -gsave -8.000000 17.104446 translate 0.000000 rotate -0 -2.700000 translate (2000) dup stringwidth pop neg 0 moveto -show -grestore -gsave -8.000000 34.208893 translate 0.000000 rotate -0 -2.700000 translate (4000) dup stringwidth pop neg 0 moveto -show -grestore -gsave -8.000000 51.313339 translate 0.000000 rotate -0 -2.700000 translate (6000) dup stringwidth pop neg 0 moveto -show -grestore -gsave -8.000000 68.417786 translate 0.000000 rotate -0 -2.700000 translate (8000) dup stringwidth pop neg 0 moveto -show -grestore -/Times-Bold findfont 10.000000 scalefont setfont -gsave -33.279999 36.000000 translate 90.000000 rotate -0 0.000000 translate (MB/s) dup stringwidth pop 2 div neg 0 moveto -show -grestore - grestore - gsave - gsave gsave 28.799999 72.000000 translate 0.000000 rotate - newpath 14.400000 0.000000 moveto -14.400000 0.000000 lineto - -14.400000 -72.000000 lineto - 14.400000 -72.000000 lineto -closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore -stroke - grestore gsave 57.599998 23.516296 translate 0.000000 rotate - newpath 14.400000 0.000000 moveto -14.400000 0.000000 lineto - -14.400000 -23.516296 lineto - 14.400000 -23.516296 lineto -closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore -stroke - grestore gsave 86.399994 20.308016 translate 0.000000 rotate - newpath 14.400000 0.000000 moveto -14.400000 0.000000 lineto - -14.400000 -20.308016 lineto - 14.400000 -20.308016 lineto -closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore -stroke - grestore gsave 115.199997 13.716681 translate 0.000000 rotate - newpath 14.400000 0.000000 moveto -14.400000 0.000000 lineto - -14.400000 -13.716681 lineto - 14.400000 -13.716681 lineto -closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore -stroke - grestore gsave 144.000000 11.183632 translate 0.000000 rotate - newpath 14.400000 0.000000 moveto -14.400000 0.000000 lineto - -14.400000 -11.183632 lineto - 14.400000 -11.183632 lineto -closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore -stroke - grestore gsave 172.799988 10.863582 translate 0.000000 rotate - newpath 14.400000 0.000000 moveto -14.400000 0.000000 lineto - -14.400000 -10.863582 lineto - 14.400000 -10.863582 lineto -closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore -stroke - grestore gsave 201.599991 8.547887 translate 0.000000 rotate - newpath 14.400000 0.000000 moveto -14.400000 0.000000 lineto - -14.400000 -8.547887 lineto - 14.400000 -8.547887 lineto -closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore -stroke - grestore gsave 230.399994 7.811883 translate 0.000000 rotate - newpath 14.400000 0.000000 moveto -14.400000 0.000000 lineto - -14.400000 -7.811883 lineto - 14.400000 -7.811883 lineto -closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore -stroke - grestore gsave 259.199982 4.485872 translate 0.000000 rotate - newpath 14.400000 0.000000 moveto -14.400000 0.000000 lineto - -14.400000 -4.485872 lineto - 14.400000 -4.485872 lineto -closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore -stroke - grestore gsave 288.000000 1.912226 translate 0.000000 rotate - newpath 14.400000 0.000000 moveto -14.400000 0.000000 lineto - -14.400000 -1.912226 lineto - 14.400000 -1.912226 lineto -closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore -stroke - grestore grestore - grestore --0.000000 -0.000000 translate - grestore showpage diff --git a/junk.sh b/junk.sh deleted file mode 100644 index 8e62234..0000000 --- a/junk.sh +++ /dev/null @@ -1,14 +0,0 @@ -gf_time 4 R -1 1024000 1000 - ; echo '-' -gf_time 4 R -1 1024000 1000 SHIFT - - ; echo 'SHIFT - -' -gf_time 4 R -1 1024000 1000 BYTWO_p - - ; echo 'BYTWO_p - -' -gf_time 4 R -1 1024000 1000 BYTWO_p SSE - ; echo 'BYTWO_p SSE -' -gf_time 4 R -1 1024000 1000 BYTWO_b - - ; echo 'BYTWO_b - -' -gf_time 4 R -1 1024000 1000 BYTWO_b SSE - ; echo 'BYTWO_b SSE -' -gf_time 4 R -1 1024000 1000 TABLE - - ; echo 'TABLE - -' -gf_time 4 R -1 1024000 1000 TABLE SINGLE - ; echo 'TABLE SINGLE -' -gf_time 4 R -1 1024000 1000 TABLE DOUBLE - ; echo 'TABLE DOUBLE -' -gf_time 4 R -1 1024000 1000 TABLE QUAD - ; echo 'TABLE QUAD -' -gf_time 4 R -1 1024000 1000 TABLE QUAD,LAZY - ; echo 'TABLE QUAD,LAZY -' -gf_time 4 R -1 1024000 1000 TABLE SINGLE,SSE - ; echo 'TABLE SINGLE,SSE -' -gf_time 4 R -1 1024000 1000 TABLE SINGLE,NOSSE - ; echo 'TABLE SINGLE,NOSSE -' -gf_time 4 R -1 1024000 1000 LOG - - ; echo 'LOG - -' diff --git a/junk_gf_unit.c b/junk_gf_unit.c deleted file mode 100644 index 3ae45a3..0000000 --- a/junk_gf_unit.c +++ /dev/null @@ -1,957 +0,0 @@ -/* - * gf_unit.c - * - * Performs unit testing for gf arithmetic - */ - -#include <stdio.h> -#include <getopt.h> -#include <stdint.h> -#include <string.h> -#include <stdlib.h> -#include <time.h> - -#include "gf.h" -#include "gf_int.h" -#include "gf_method.h" -#include "gf_rand.h" - -#define REGION_SIZE (65536) - -static -uint8_t get_alt_map_2w8(int offset, uint8_t *buf, int region_size) -{ - uint8_t symbol = 0; - int bit_off = offset % 2; - - if (bit_off == 0) { - symbol = buf[offset / 2] & 0x0f | ((buf[(offset / 2)+region_size] & 0x0f) << 4); - } else { - symbol = ((buf[offset / 2] & 0xf0) >> 4) | (buf[(offset / 2)+region_size] & 0xf0); - } - - return symbol; -} - -static -uint16_t get_alt_map_2w16(int offset, uint8_t *buf, int region_size) -{ - uint16_t symbol = 0; - - symbol = buf[offset] | (buf[offset+region_size] << 8); - - return symbol; -} - -static -uint32_t get_alt_map_2w32(int offset, uint8_t *buf, int region_size) -{ - uint32_t symbol = 0; - uint16_t buf_a = buf[offset] | (buf[offset + 1] << 8); - uint16_t buf_b = buf[offset + region_size] | (buf[offset + region_size + 1] << 8); - - symbol = buf_a | (buf_b << 16); - - return symbol; -} - -static -void test_alt_map() -{ - uint8_t* buf = (uint8_t*)malloc(sizeof(uint8_t)*REGION_SIZE); - int i=0; - uint8_t c=1, next_c; - - for (i=0; i < REGION_SIZE/2;i++) { - if (c == 255) c = 1; - buf[i] = c; - buf[i+(REGION_SIZE/2)] = c; - c++; - } - - - c = 1; - for (i=0; i < REGION_SIZE;i++) { - uint8_t sym_w8 = get_alt_map_2w8(i, buf, REGION_SIZE/2); - uint8_t c_val = ((i % 2) == 0) ? (c & 0x0f) : ((c & 0xf0) >> 4); - uint8_t exp_sym_w8 = c_val | c_val << 4; - - if (exp_sym_w8 != sym_w8) { - fprintf(stderr, "Alt mapping failure (w=8,c=%d,i=%d): %u != %u\n", c, i, exp_sym_w8, sym_w8); - exit(1); - } - - if ((i % 2) == 1) { - c++; - } - if (c == 255) { - c = 1; - } - } - - c = 1; - - for (i=0; i < REGION_SIZE/2;i++) { - uint16_t sym_w16 = get_alt_map_2w16(i, buf, REGION_SIZE/2); - uint16_t exp_sym_w16 = c | c << 8; - - if (exp_sym_w16 != sym_w16) { - fprintf(stderr, "Alt mapping failure (w=16,c=%d,i=%d): %u != %u\n", c, i, exp_sym_w16, sym_w16); - exit(1); - } - - c++; - if (c == 255) { - c = 1; - } - } - - c = 1; - next_c = 2; - - for (i=0; i < REGION_SIZE/4;i++) { - uint32_t sym_w32 = get_alt_map_2w32(i, buf, REGION_SIZE/2); - uint32_t exp_sym_w32 = c | (next_c << 8) | c << 16 | (next_c << 24); - - if (exp_sym_w32 != sym_w32) { - fprintf(stderr, "Alt mapping failure (w=32,c=%d,i=%d): %u != %u\n", c, i, exp_sym_w32, sym_w32); - exit(1); - } - c++; - next_c++; - if (c == 255) { - c = 1; - next_c = 2; - } else if (c == 254) { - next_c = 1; - } - } - -} - -void fill_random_region(void *reg, int size) -{ - uint32_t *r; - int i; - - r = (uint32_t *) reg; - for (i = 0; i < size/sizeof(uint32_t); i++) { - r[i] = MOA_Random_32(); - } -} - -void problem(char *s) -{ - fprintf(stderr, "Unit test failed.\n"); - fprintf(stderr, "%s\n", s); - exit(1); -} - -void usage(char *s) -{ - fprintf(stderr, "usage: gf_unit w tests seed [method] - does unit testing in GF(2^w)\n"); - fprintf(stderr, "\n"); - fprintf(stderr, "Legal w are: 4, 8, 16, 32, 64 and 128\n"); - fprintf(stderr, "\n"); - fprintf(stderr, "Tests may be any combination of:\n"); - fprintf(stderr, " A: All\n"); - fprintf(stderr, " S: Single operations (multiplication/division)\n"); - fprintf(stderr, " R: Region operations\n"); - fprintf(stderr, " V: Verbose Output\n"); - fprintf(stderr, "\n"); - fprintf(stderr, "Use -1 for time(0) as a seed.\n"); - fprintf(stderr, "\n"); - fprintf(stderr, "For method specification, type gf_methods\n"); - fprintf(stderr, "\n"); - if (s != NULL) fprintf(stderr, "%s\n", s); - exit(1); -} - -int main(int argc, char **argv) -{ - int w, i, j, verbose, single, region, xor, off, size, sindex, eindex, tested, top; - uint32_t a, b, c, d, ai, da, bi, mask; - uint64_t a64, b64, c64, d64; - uint64_t a128[2], b128[2], c128[2], d128[2], e128[2]; - gf_t gf, gf_def; - uint8_t *r8b, *r8c, *r8d; - uint16_t *r16b, *r16c, *r16d; - uint32_t *r32b, *r32c, *r32d; - uint64_t *r64b, *r64c, *r64d; - uint64_t *r128b, *r128c, *r128d; - time_t t0; - gf_internal_t *h; - - if (argc < 4) usage(NULL); - if (sscanf(argv[1], "%d", &w) == 0) usage("Bad w\n"); - if (sscanf(argv[3], "%ld", &t0) == 0) usage("Bad seed\n"); - if (t0 == -1) t0 = time(0); - MOA_Seed(t0); - - if (w > 32 && w != 64 && w != 128) usage("Bad w"); - - if (create_gf_from_argv(&gf, w, argc, argv, 4) == 0) usage("Bad Method"); - - for (i = 0; i < strlen(argv[2]); i++) { - if (strchr("ASRV", argv[2][i]) == NULL) usage("Bad test\n"); - } - - h = (gf_internal_t *) gf.scratch; - if (w <= 32) { - mask = 0; - for (i = 0; i < w; i++) mask |= (1 << i); - } - - verbose = (strchr(argv[2], 'V') != NULL); - single = (strchr(argv[2], 'S') != NULL || strchr(argv[2], 'A') != NULL); - region = (strchr(argv[2], 'R') != NULL || strchr(argv[2], 'A') != NULL); - - if (((h->region_type & GF_REGION_ALTMAP) != 0) && (h->mult_type == GF_MULT_COMPOSITE)) { - test_alt_map(); - } - - if (!gf_init_easy(&gf_def, w, GF_MULT_DEFAULT)) problem("No default for this value of w"); - - if (verbose) printf("Seed: %ld\n", t0); - - if (single) { - - if (w <= 32) { - if (gf.multiply.w32 == NULL) problem("No multiplication operation defined."); - if (verbose) { printf("Testing single multiplications/divisions.\n"); fflush(stdout); } - if (w <= 10) { - top = (1 << w)*(1 << w); - } else { - top = 1000000; - } - for (i = 0; i < top; i++) { - if (w <= 10) { - a = i % (1 << w); - b = i >> w; - } else if (i < 10) { - a = 0; - b = MOA_Random_W(w, 1); - } else if (i < 20) { - b = 0; - a = MOA_Random_W(w, 1); - } else if (i < 30) { - a = 1; - b = MOA_Random_W(w, 1); - } else if (i < 40) { - b = 1; - a = MOA_Random_W(w, 1); - } else { - a = MOA_Random_W(w, 1); - b = MOA_Random_W(w, 1); - } - - c = gf.multiply.w32(&gf, a, b); - tested = 0; - - /* If this is not composite, then first test against the default: */ - - if (h->mult_type != GF_MULT_COMPOSITE) { - tested = 1; - d = gf_def.multiply.w32(&gf_def, a, b); - - if (c != d) { - printf("Error in single multiplication (all numbers in hex):\n\n"); - printf(" gf.multiply.w32() of %x and %x returned %x\n", a, b, c); - printf(" The default returned %x\n", d); - exit(1); - } - } - - /* Now, we also need to double-check, in case the default is wanky, and when - we're performing composite operations. Start with 0 and 1: */ - - if (a == 0 || b == 0 || a == 1 || b == 1) { - tested = 1; - if (((a == 0 || b == 0) && c != 0) || - (a == 1 && c != b) || - (b == 1 && c != a)) { - printf("Error in single multiplication (all numbers in hex):\n\n"); - printf(" gf.multiply.w32() of %x and %x returned %x, which is clearly wrong.\n", a, b, c); - exit(1); - } - - /* If division or inverses are defined, let's test all combinations to make sure - that the operations are consistent with each other. */ - - } else { - if ((c & mask) != c) { - printf("Error in single multiplication (all numbers in hex):\n\n"); - printf(" gf.multiply.w32() of %x and %x returned %x, which is too big.\n", a, b, c); - exit(1); - } - - } - if (gf.inverse.w32 != NULL && (a != 0 || b != 0)) { - tested = 1; - if (a != 0) { - ai = gf.inverse.w32(&gf, a); - - if (gf.multiply.w32(&gf, c, ai) != b) { - printf("Error in single multiplication (all numbers in hex):\n\n"); - printf(" gf.multiply.w32() of %x and %x returned %x\n", a, b, c); - printf(" The inverse of %x is %x, and gf_multiply.w32() of %x and %x equals %x\n", - a, ai, c, ai, gf.multiply.w32(&gf, c, ai)); - exit(1); - } - } - if (b != 0) { - bi = gf.inverse.w32(&gf, b); - if (gf.multiply.w32(&gf, c, bi) != a) { - printf("Error in single multiplication (all numbers in hex):\n\n"); - printf(" gf.multiply.w32() of %x and %x returned %x\n", a, b, c); - printf(" The inverse of %x is %x, and gf_multiply.w32() of %x and %x equals %x\n", - b, bi, c, bi, gf.multiply.w32(&gf, c, bi)); - exit(1); - } - } - } - if (gf.divide.w32 != NULL && (a != 0 || b != 0)) { - tested = 1; - - if (a != 0) { - ai = gf.divide.w32(&gf, c, a); - - if (ai != b) { - printf("Error in single multiplication/division (all numbers in hex):\n\n"); - printf(" gf.multiply.w32() of %x and %x returned %x\n", a, b, c); - printf(" gf.divide.w32() of %x and %x returned %x\n", c, a, ai); - exit(1); - } - } - if (b != 0) { - bi = gf.divide.w32(&gf, c, b); - - if (bi != a) { - printf("Error in single multiplication/division (all numbers in hex):\n\n"); - printf(" gf.multiply.w32() of %x and %x returned %x\n", a, b, c); - printf(" gf.divide.w32() of %x and %x returned %x\n", c, b, bi); - exit(1); - } - } - } - - if (!tested) problem("There is no way to test multiplication.\n"); - } - - } else if (w == 64) { - if (verbose) { printf("Testing single multiplications/divisions.\n"); fflush(stdout); } - if (gf.multiply.w64 == NULL) problem("No multiplication operation defined."); - for (i = 0; i < 1000; i++) { - for (j = 0; j < 1000; j++) { - a64 = MOA_Random_64(); - b64 = MOA_Random_64(); - c64 = gf.multiply.w64(&gf, a64, b64); - if ((a64 == 0 || b64 == 0) && c64 != 0) problem("Single Multiplication by zero Failed"); - if (a64 != 0 && b64 != 0) { - d64 = (gf.divide.w64 == NULL) ? gf_def.divide.w64(&gf_def, c64, b64) : gf.divide.w64(&gf, c64, b64); - if (d64 != a64) { - printf("0x%llx * 0x%llx =? 0x%llx (check-a: 0x%llx)\n", a64, b64, c64, d64); - problem("Single multiplication/division failed"); - } - } - } - } - if (gf.inverse.w64 == NULL) { - printf("No inverse defined for this method.\n"); - } else { - if (verbose) { printf("Testing Inversions.\n"); fflush(stdout); } - for (i = 0; i < 1000; i++) { - do { a64 = MOA_Random_64(); } while (a64 == 0); - b64 = gf.inverse.w64(&gf, a64); - if (gf.multiply.w64(&gf, a64, b64) != 1) problem("Inversion failed.\n"); - } - } - } else if (w == 128) { - if (verbose) { printf("Testing single multiplications/divisions.\n"); fflush(stdout); } - if (gf.multiply.w128 == NULL) problem("No multiplication operation defined."); - for (i = 0; i < 500; i++) { - for (j = 0; j < 500; j++) { - MOA_Random_128(a128); - MOA_Random_128(b128); - gf.multiply.w128(&gf, a128, b128, c128); - if ((GF_W128_IS_ZERO(a128) && GF_W128_IS_ZERO(b128)) && !(GF_W128_IS_ZERO(c128))) problem("Single Multiplication by zero Failed"); - if (!GF_W128_IS_ZERO(a128) && !GF_W128_IS_ZERO(b128)) { - gf.divide.w128 == NULL ? gf_def.divide.w128(&gf_def, c128, b128, d128) : gf.divide.w128(&gf, c128, b128, d128); - if (!GF_W128_EQUAL(a128, d128)) { - printf("0x%llx 0x%llx * 0x%llx 0x%llx =? 0x%llx 0x%llx (check-a: 0x%llx 0x%llx)\n", a128[0], a128[1], b128[0], b128[1], c128[0], c128[1], d128[0], d128[1]); - problem("Single multiplication/division failed"); - } - } - } - } - if (gf.inverse.w128 == NULL) { - printf("No inverse defined for this method.\n"); - } else { - if (verbose) { printf("Testing Inversions.\n"); fflush(stdout); } - for (i = 0; i < 1000; i++) { - do { MOA_Random_128(a128); } while (GF_W128_IS_ZERO(a128)); - gf.inverse.w128(&gf, a128, b128); - gf.multiply.w128(&gf, a128, b128, c128); - if (!(c128[0] == 0 && c128[1] == 1)) problem("Inversion failed.\n"); - } - } - - } else { - problem("Value of w not implemented yet"); - } - } - - if (region) { - - if (w == 4) { - if (gf.multiply_region.w32 == NULL) { - printf("No multiply_region.\n"); - } else { - r8b = (uint8_t *) malloc(REGION_SIZE); - r8c = (uint8_t *) malloc(REGION_SIZE); - r8d = (uint8_t *) malloc(REGION_SIZE); - fill_random_region(r8b, REGION_SIZE); - for (xor = 0; xor < 2; xor++) { - if (verbose) { - printf("Testing buffer-constant, src != dest, xor = %d\n", xor); - fflush(stdout); - } - for (a = 0; a < 16; a++) { - fill_random_region(r8c, REGION_SIZE); - memcpy(r8d, r8c, REGION_SIZE); - sindex = MOA_Random_W(3, 1); - eindex = REGION_SIZE/sizeof(uint8_t)-MOA_Random_W(3, 1); - size = (eindex-sindex)*sizeof(uint8_t); - gf.multiply_region.w32(&gf, (void *) (r8b+sindex), (void *) (r8c+sindex), a, size, xor); - for (i = sindex; i < eindex; i++) { - b = (r8b[i] >> 4); - c = (r8c[i] >> 4); - d = (r8d[i] >> 4); - if (!xor && gf.multiply.w32(&gf, a, b) != c) { - printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r8b+i)); - printf(" %d * %d = %d, but should equal %d\n", a, b, c, gf.multiply.w32(&gf, a, b) ); - printf("i=%d. %d %d %d %d\n", i, a, r8b[i], r8c[i], r8d[i]); - problem("Failed buffer-constant, xor=0"); - } - if (xor && (gf.multiply.w32(&gf, a, b) ^ d) != c) { - printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r8b+i)); - printf(" %d %d %d %d\n", a, b, c, d); - printf(" %d %d %d %d\n", a, r8b[i], r8c[i], r8d[i]); - problem("Failed buffer-constant, xor=1"); - } - b = (r8b[i] & 0xf); - c = (r8c[i] & 0xf); - d = (r8d[i] & 0xf); - if (!xor && gf.multiply.w32(&gf, a, b) != c) { - printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r8b+i)); - printf(" %d * %d = %d, but should equal %d\n", a, b, c, gf.multiply.w32(&gf, a, b) ); - printf("i=%d. 0x%x 0x%x 0x%x 0x%x\n", i, a, r8b[i], r8c[i], r8d[i]); - problem("Failed buffer-constant, xor=0"); - } - if (xor && (gf.multiply.w32(&gf, a, b) ^ d) != c) { - printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r8b+i)); - printf(" (%d * %d ^ %d) should equal %d - equals %d\n", - a, b, d, (gf.multiply.w32(&gf, a, b) ^ d), c); - printf(" %d %d %d %d\n", a, r8b[i], r8c[i], r8d[i]); - problem("Failed buffer-constant, xor=1"); - } - } - } - } - for (xor = 0; xor < 2; xor++) { - if (verbose) { - printf("Testing buffer-constant, src == dest, xor = %d\n", xor); - fflush(stdout); - } - for (a = 0; a < 16; a++) { - fill_random_region(r8b, REGION_SIZE); - memcpy(r8d, r8b, REGION_SIZE); - sindex = MOA_Random_W(3, 1); - eindex = REGION_SIZE/sizeof(uint8_t)-MOA_Random_W(3, 1); - size = (eindex-sindex)*sizeof(uint8_t); - gf.multiply_region.w32(&gf, (void *) (r8b+sindex), (void *) (r8b+sindex), a, size, xor); - for (i = sindex; i < eindex; i++) { - b = (r8b[i] >> 4); - d = (r8d[i] >> 4); - if (!xor && gf.multiply.w32(&gf, a, d) != b) problem("Failed buffer-constant, xor=0"); - if (xor && (gf.multiply.w32(&gf, a, d) ^ d) != b) { - printf("i=%d. %d %d %d\n", i, a, b, d); - printf("i=%d. %d %d %d\n", i, a, r8b[i], r8d[i]); - problem("Failed buffer-constant, xor=1"); - } - b = (r8b[i] & 0xf); - d = (r8d[i] & 0xf); - if (!xor && gf.multiply.w32(&gf, a, d) != b) problem("Failed buffer-constant, xor=0"); - if (xor && (gf.multiply.w32(&gf, a, d) ^ d) != b) { - printf("%d %d %d\n", a, b, d); - problem("Failed buffer-constant, xor=1"); - } - } - } - } - free(r8b); - free(r8c); - free(r8d); - } - } else if (w == 8) { - if (gf.multiply_region.w32 == NULL) { - printf("No multiply_region.\n"); - } else { - r8b = (uint8_t *) malloc(REGION_SIZE); - r8c = (uint8_t *) malloc(REGION_SIZE); - r8d = (uint8_t *) malloc(REGION_SIZE); - fill_random_region(r8b, REGION_SIZE); - for (xor = 0; xor < 2; xor++) { - if (verbose) { - printf("Testing buffer-constant, src != dest, xor = %d\n", xor); - fflush(stdout); - } - for (a = 0; a < 256; a++) { - fill_random_region(r8c, REGION_SIZE); - memcpy(r8d, r8c, REGION_SIZE); - if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && - (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { - sindex = 0; - eindex = REGION_SIZE; - } else { - sindex = MOA_Random_W(3, 1); - eindex = REGION_SIZE/sizeof(uint8_t)-MOA_Random_W(3, 1); - } - size = (eindex-sindex)*sizeof(uint8_t); - gf.multiply_region.w32(&gf, (void *) (r8b+sindex), (void *) (r8c+sindex), a, size, xor); - for (i = sindex; i < eindex; i++) { - if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && - (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { - b = get_alt_map_2w8(i, (uint8_t*)r8b, REGION_SIZE / 2); - c = get_alt_map_2w8(i, (uint8_t*)r8c, REGION_SIZE / 2); - d = get_alt_map_2w8(i, (uint8_t*)r8d, REGION_SIZE / 2); - } else { - b = r8b[i]; - c = r8c[i]; - d = r8d[i]; - } - if (!xor && gf.multiply.w32(&gf, a, b) != c) { - printf("i=%d. %d %d %d %d\n", i, a, b, c, d); - printf("i=%d. %d %d %d %d\n", i, a, r8b[i], r8c[i], r8d[i]); - printf("%llx. Sindex: %d\n", r8b+i, sindex); - problem("Failed buffer-constant, xor=0"); - } - if (xor && (gf.multiply.w32(&gf, a, b) ^ d) != c) { - printf("i=%d. %d %d %d %d\n", i, a, b, c, d); - printf("i=%d. %d %d %d %d\n", i, a, r8b[i], r8c[i], r8d[i]); - problem("Failed buffer-constant, xor=1"); - } - } - } - } - for (xor = 0; xor < 2; xor++) { - if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && - (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { - continue; - } - if (verbose) { - printf("Testing buffer-constant, src == dest, xor = %d\n", xor); - fflush(stdout); - } - for (a = 0; a < 256; a++) { - fill_random_region(r8b, REGION_SIZE); - memcpy(r8d, r8b, REGION_SIZE); - sindex = MOA_Random_W(3, 1); - eindex = REGION_SIZE/sizeof(uint8_t)-MOA_Random_W(3, 1); - size = (eindex-sindex)*sizeof(uint8_t); - gf.multiply_region.w32(&gf, (void *) (r8b+sindex), (void *) (r8b+sindex), a, size, xor); - for (i = sindex; i < eindex; i++) { - b = r8b[i]; - d = r8d[i]; - if (!xor && gf.multiply.w32(&gf, a, d) != b) problem("Failed buffer-constant, xor=0"); - if (xor && (gf.multiply.w32(&gf, a, d) ^ d) != b) { - printf("i=%d. %d %d %d\n", i, a, b, d); - printf("i=%d. %d %d %d\n", i, a, r8b[i], r8d[i]); - problem("Failed buffer-constant, xor=1"); - } - } - } - } - free(r8b); - free(r8c); - free(r8d); - } - } else if (w == 16) { - if (gf.multiply_region.w32 == NULL) { - printf("No multiply_region.\n"); - } else { - r16b = (uint16_t *) malloc(REGION_SIZE); - r16c = (uint16_t *) malloc(REGION_SIZE); - r16d = (uint16_t *) malloc(REGION_SIZE); - for (xor = 0; xor < 2; xor++) { - if (verbose) { - printf("Testing buffer-constant, src != dest, xor = %d\n", xor); - fflush(stdout); - } - for (j = 0; j < 1000; j++) { - fill_random_region(r16b, REGION_SIZE); - a = MOA_Random_W(w, 0); - fill_random_region(r16c, REGION_SIZE); - memcpy(r16d, r16c, REGION_SIZE); - if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && - (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { - sindex = 0; - eindex = REGION_SIZE / sizeof(uint16_t); - } else { - sindex = MOA_Random_W(3, 1); - eindex = REGION_SIZE/sizeof(uint16_t)-MOA_Random_W(3, 1); - } - size = (eindex-sindex)*sizeof(uint16_t); - gf.multiply_region.w32(&gf, (void *) (r16b+sindex), (void *) (r16c+sindex), a, size, xor); - ai = gf.inverse.w32(&gf, a); - if (!xor) { - gf.multiply_region.w32(&gf, (void *) (r16c+sindex), (void *) (r16d+sindex), ai, size, xor); - } else { - gf.multiply_region.w32(&gf, (void *) (r16c+sindex), (void *) (r16d+sindex), 1, size, xor); - gf.multiply_region.w32(&gf, (void *) (r16d+sindex), (void *) (r16b+sindex), ai, size, xor); - } - - for (i = sindex; i < eindex; i++) { - if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && - (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { - b = get_alt_map_2w16(i, (uint8_t*)r16b, size / 2); - c = get_alt_map_2w16(i, (uint8_t*)r16c, size / 2); - d = get_alt_map_2w16(i, (uint8_t*)r16d, size / 2); - } else { - b = r16b[i]; - c = r16c[i]; - d = r16d[i]; - } - if (!xor && d != b) { - printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r16b+i)); - printf("We have %d * %d = %d, and %d * %d = %d.\n", a, b, c, c, ai, d); - printf("%d is the inverse of %d\n", ai, a); - problem("Failed buffer-constant, xor=0"); - } - if (xor && b != 0) { - printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r16b+i)); - printf("We did d=c; c ^= ba; d ^= c; b ^= (a^-1)d;\n"); - printf(" b should equal 0, but it doesn't. Probe into it.\n"); - problem("Failed buffer-constant, xor=1"); - } - } - } - } - for (xor = 0; xor < 2; xor++) { - if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && - (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { - continue; - } - if (verbose) { - printf("Testing buffer-constant, src == dest, xor = %d\n", xor); - fflush(stdout); - } - for (j = 0; j < 1000; j++) { - a = MOA_Random_W(w, 0); - fill_random_region(r16b, REGION_SIZE); - memcpy(r16d, r16b, REGION_SIZE); - sindex = MOA_Random_W(3, 1); - eindex = REGION_SIZE/sizeof(uint16_t)-MOA_Random_W(3, 1); - size = (eindex-sindex)*sizeof(uint16_t); - gf.multiply_region.w32(&gf, (void *) (r16b+sindex), (void *) (r16b+sindex), a, size, xor); - ai = gf.inverse.w32(&gf, a); - if (!xor) { - gf.multiply_region.w32(&gf, (void *) (r16b+sindex), (void *) (r16b+sindex), ai, size, xor); - } else { - gf.multiply_region.w32(&gf, (void *) (r16d+sindex), (void *) (r16b+sindex), 1, size, xor); - gf.multiply_region.w32(&gf, (void *) (r16b+sindex), (void *) (r16b+sindex), ai, size, 0); - } - - for (i = sindex; i < eindex; i++) { - b = r16b[i]; - c = r16c[i]; - d = r16d[i]; - if (!xor && (d != b)) { - printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r16b+i)); - printf("We did d=b; b = ba; b = b(a^-1).\n"); - printf("So, b should equal d, but it doesn't. Look into it.\n"); - printf("b = %d. d = %d. a = %d\n", b, d, a); - problem("Failed buffer-constant, xor=0"); - } - if (xor && d != b) { - printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r16b+i)); - printf("We did d=b; b = b + ba; b += d; b = b(a^-1);\n"); - printf("We did d=c; c ^= ba; d ^= c; b ^= (a^-1)d;\n"); - printf("So, b should equal d, but it doesn't. Look into it.\n"); - problem("Failed buffer-constant, xor=1"); - } - } - } - } - free(r16b); - free(r16c); - free(r16d); - } - } else if (w == 32) { - if (gf.multiply_region.w32 == NULL) { - printf("No multiply_region.\n"); - } else { - r32b = (uint32_t *) malloc(REGION_SIZE); - r32c = (uint32_t *) malloc(REGION_SIZE); - r32d = (uint32_t *) malloc(REGION_SIZE); - for (xor = 0; xor < 2; xor++) { - if (verbose) { - printf("Testing buffer-constant, src != dest, xor = %d\n", xor); - fflush(stdout); - } - for (j = 0; j < 1000; j++) { - a = MOA_Random_32(); - fill_random_region(r32b, REGION_SIZE); - fill_random_region(r32c, REGION_SIZE); - memcpy(r32d, r32c, REGION_SIZE); - if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && - (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { - sindex = 0; - eindex = REGION_SIZE / sizeof(uint32_t); - } else { - sindex = MOA_Random_W(3, 1); - eindex = REGION_SIZE/sizeof(uint32_t)-MOA_Random_W(3, 1); - } - size = (eindex-sindex)*sizeof(uint32_t); - gf.multiply_region.w32(&gf, (void *) (r32b+sindex), (void *) (r32c+sindex), a, size, xor); - ai = gf.inverse.w32(&gf, a); - if (!xor) { - gf.multiply_region.w32(&gf, (void *) (r32c+sindex), (void *) (r32d+sindex), ai, size, xor); - } else { - gf.multiply_region.w32(&gf, (void *) (r32c+sindex), (void *) (r32d+sindex), 1, size, xor); - gf.multiply_region.w32(&gf, (void *) (r32d+sindex), (void *) (r32b+sindex), ai, size, xor); - } - for (i = sindex; i < eindex; i++) { - if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && - (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { - b = get_alt_map_2w32(i, (uint8_t*)r32b, size / 2); - c = get_alt_map_2w32(i, (uint8_t*)r32c, size / 2); - d = get_alt_map_2w32(i, (uint8_t*)r32d, size / 2); - i++; - } else { - b = r32b[i]; - c = r32c[i]; - d = r32d[i]; - } - if (!xor && d != b) { - printf("i=%d. Addresses: b: 0x%lx\n", i, (unsigned long) (r32b+i)); - printf("We have %d * %d = %d, and %d * %d = %d.\n", a, b, c, c, ai, d); - printf("%d is the inverse of %d\n", ai, a); - problem("Failed buffer-constant, xor=0"); - } - if (xor && b != 0) { - printf("i=%d. Addresses: b: 0x%lx c: 0x%lx d: 0x%lx\n", i, - (unsigned long) (r32b+i), (unsigned long) (r32c+i), (unsigned long) (r32d+i)); - printf("We did d=c; c ^= ba; d ^= c; b ^= (a^-1)d;\n"); - printf(" b should equal 0, but it doesn't. Probe into it.\n"); - printf("a: %8x b: %8x c: %8x, d: %8x\n", a, b, c, d); - problem("Failed buffer-constant, xor=1"); - } - - } - } - } - for (xor = 0; xor < 2; xor++) { - if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && - (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { - continue; - } - if (verbose) { - printf("Testing buffer-constant, src == dest, xor = %d\n", xor); - fflush(stdout); - } - for (j = 0; j < 1000; j++) { - a = MOA_Random_32(); - fill_random_region(r32b, REGION_SIZE); - memcpy(r32d, r32b, REGION_SIZE); - sindex = MOA_Random_W(3, 1); - eindex = REGION_SIZE/sizeof(uint32_t)-MOA_Random_W(3, 1); - size = (eindex-sindex)*sizeof(uint32_t); - gf.multiply_region.w32(&gf, (void *) (r32b+sindex), (void *) (r32b+sindex), a, size, xor); - ai = gf.inverse.w32(&gf, a); - if (!xor) { - gf.multiply_region.w32(&gf, (void *) (r32b+sindex), (void *) (r32b+sindex), ai, size, xor); - } else { - gf.multiply_region.w32(&gf, (void *) (r32d+sindex), (void *) (r32b+sindex), 1, size, xor); - gf.multiply_region.w32(&gf, (void *) (r32b+sindex), (void *) (r32b+sindex), ai, size, 0); - } - - for (i = sindex; i < eindex; i++) { - b = r32b[i]; - c = r32c[i]; - d = r32d[i]; - if (!xor && (d != b)) { - printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r32b+i)); - printf("We did d=b; b = ba; b = b(a^-1).\n"); - printf("So, b should equal d, but it doesn't. Look into it.\n"); - printf("b = %d. d = %d. a = %d\n", b, d, a); - problem("Failed buffer-constant, xor=0"); - } - if (xor && d != b) { - printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r32b+i)); - printf("We did d=b; b = b + ba; b += d; b = b(a^-1);\n"); - printf("We did d=c; c ^= ba; d ^= c; b ^= (a^-1)d;\n"); - printf("So, b should equal d, but it doesn't. Look into it.\n"); - problem("Failed buffer-constant, xor=1"); - } - } - } - } - free(r32b); - free(r32c); - free(r32d); - } - } else if (w == 64) { - if (gf.multiply_region.w64 == NULL) { - printf("No multiply_region.\n"); - } else { - r64b = (uint64_t *) malloc(REGION_SIZE); - r64c = (uint64_t *) malloc(REGION_SIZE); - r64d = (uint64_t *) malloc(REGION_SIZE); - fill_random_region(r64b, REGION_SIZE); - for (xor = 0; xor < 2; xor++) { - if (verbose) { - printf("Testing buffer-constant, src != dest, xor = %d\n", xor); - fflush(stdout); - } - for (j = 0; j < 1000; j++) { - a64 = MOA_Random_64(); - fill_random_region(r64c, REGION_SIZE); - memcpy(r64d, r64c, REGION_SIZE); - sindex = MOA_Random_W(3, 1); - eindex = REGION_SIZE/sizeof(uint64_t)-MOA_Random_W(3, 1); - size = (eindex-sindex)*sizeof(uint64_t); - gf.multiply_region.w64(&gf, (void *) (r64b+sindex), (void *) (r64c+sindex), a64, size, xor); - for (i = sindex; i < eindex; i++) { - b64 = r64b[i]; - c64 = r64c[i]; - d64 = r64d[i]; - if (!xor && gf.multiply.w64(&gf, a64, b64) != c64) { - printf("i=%d. 0x%llx 0x%llx 0x%llx should be 0x%llx\n", i, a64, b64, c64, - gf.multiply.w64(&gf, a64, b64)); - printf("i=%d. 0x%llx 0x%llx 0x%llx\n", i, a64, r64b[i], r64c[i]); - problem("Failed buffer-constant, xor=0"); - } - if (xor && (gf.multiply.w64(&gf, a64, b64) ^ d64) != c64) { - printf("i=%d. 0x%llx 0x%llx 0x%llx 0x%llx\n", i, a64, b64, c64, d64); - printf("i=%d. 0x%llx 0x%llx 0x%llx 0x%llx\n", i, a64, r64b[i], r64c[i], r64d[i]); - problem("Failed buffer-constant, xor=1"); - } - } - } - } - for (xor = 0; xor < 2; xor++) { - if (verbose) { - printf("Testing buffer-constant, src == dest, xor = %d\n", xor); - fflush(stdout); - } - for (j = 0; j < 1000; j++) { - a64 = MOA_Random_64(); - fill_random_region(r64b, REGION_SIZE); - memcpy(r64d, r64b, REGION_SIZE); - sindex = MOA_Random_W(3, 1); - eindex = REGION_SIZE/sizeof(uint64_t)-MOA_Random_W(3, 1); - size = (eindex-sindex)*sizeof(uint64_t); - gf.multiply_region.w64(&gf, (void *) (r64b+sindex), (void *) (r64b+sindex), a64, size, xor); - for (i = sindex; i < eindex; i++) { - b64 = r64b[i]; - d64 = r64d[i]; - if (!xor && gf.multiply.w64(&gf, a64, d64) != b64) problem("Failed buffer-constant, xor=0"); - if (xor && (gf.multiply.w64(&gf, a64, d64) ^ d64) != b64) { - printf("i=%d. 0x%llx 0x%llx 0x%llx\n", i, a64, b64, d64); - printf("i=%d. 0x%llx 0x%llx 0x%llx\n", i, a64, r64b[i], r64d[i]); - problem("Failed buffer-constant, xor=1"); - } - } - } - } - free(r64b); - free(r64c); - free(r64d); - } - } else if (w == 128) { - if (gf.multiply_region.w128 == NULL) { - printf("No multiply_region.\n"); - } else { - r128b = (uint64_t *) malloc(REGION_SIZE); - r128c = (uint64_t *) malloc(REGION_SIZE); - r128d = (uint64_t *) malloc(REGION_SIZE); - fill_random_region(r128b, REGION_SIZE); - for (xor = 0; xor < 2; xor++) { - if (verbose) { - printf("Testing buffer-constant, src != dest, xor = %d\n", xor); - fflush(stdout); - } - for (j = 0; j < 1000; j++) { - MOA_Random_128(a128); - fill_random_region(r128c, REGION_SIZE); - memcpy(r128d, r128c, REGION_SIZE); - sindex = MOA_Random_W(3, 1); - eindex = REGION_SIZE/(2*sizeof(uint64_t))-MOA_Random_W(3, 1); - size = (eindex-sindex)*sizeof(uint64_t)*2; - gf.multiply_region.w128(&gf, (void *) (r128b+sindex*2), (void *) (r128c+sindex*2), a128, size, xor); - for (i = sindex; i < eindex; i++) { - b128[0] = r128b[2*i]; - b128[1] = r128b[2*i+1]; - c128[0] = r128c[2*i]; - c128[1] = r128c[2*i+1]; - d128[0] = r128d[2*i]; - d128[1] = r128d[2*i+1]; - gf.multiply.w128(&gf, a128, b128, e128); - if (xor) { - e128[0] ^= d128[0]; - e128[1] ^= d128[1]; - } - if (!xor && !GF_W128_EQUAL(c128, e128)) { - printf("i=%d. 0x%llx%llx 0x%llx%llx 0x%llx%llx should be 0x%llx%llx\n", - i, a128[0], a128[1], b128[0], b128[1], c128[0], c128[1], e128[0], e128[1]); - problem("Failed buffer-constant, xor=0"); - } - if (xor && !GF_W128_EQUAL(e128, c128)) { - printf("i=%d. 0x%llx%llx 0x%llx%llx 0x%llx%llx 0x%llx%llx\n", i, - a128[0], a128[1], b128[0], b128[1], c128[0], c128[1], d128[0], d128[1]); - problem("Failed buffer-constant, xor=1"); - } - } - } - } - for (xor = 0; xor < 2; xor++) { - if (verbose) { - printf("Testing buffer-constant, src == dest, xor = %d\n", xor); - fflush(stdout); - } - for (j = 0; j < 1000; j++) { - MOA_Random_128(a128); - fill_random_region(r128b, REGION_SIZE); - memcpy(r128d, r128b, REGION_SIZE); - sindex = MOA_Random_W(3, 1); - sindex = 0; - eindex = REGION_SIZE/(2*sizeof(uint64_t))-MOA_Random_W(3, 1); - eindex = REGION_SIZE/(2*sizeof(uint64_t)); - size = (eindex-sindex)*sizeof(uint64_t)*2; - gf.multiply_region.w128(&gf, (void *) (r128b+sindex), (void *) (r128b+sindex), a128, size, xor); - for (i = sindex; i < eindex; i++) { - b128[0] = r128b[2*i]; - b128[1] = r128b[2*i + 1]; - d128[0] = r128d[2*i]; - d128[1] = r128d[2*i + 1]; - gf.multiply.w128(&gf, a128, d128, e128); - if (xor) { - e128[0] ^= d128[0]; - e128[1] ^= d128[1]; - } - if (!xor && !GF_W128_EQUAL(b128, e128)) problem("Failed buffer-constant, xor=0"); - if (xor && !GF_W128_EQUAL(b128, e128)) { - problem("Failed buffer-constant, xor=1"); - } - } - } - } - free(r128b); - free(r128c); - free(r128d); - } - } - } - exit(0); -} |