summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJim Plank <plank@cs.utk.edu>2013-12-29 16:51:37 -0500
committerJim Plank <plank@cs.utk.edu>2013-12-29 16:51:37 -0500
commit88bb18c651e48c527de540564471dff34fdc0d9c (patch)
tree28d405b659cb07308d3ab362ab997ccd5a6b4c44
parent8eec6d46764dde8f11090d6bb0ff9111d535c793 (diff)
downloadgf-complete-88bb18c651e48c527de540564471dff34fdc0d9c.tar.gz
Added time_tool.sh to tools for quick timing.
Modified gf_methods to be a little more flexible.
-rw-r--r--flag_tester/README.txt10
-rw-r--r--flag_tester/flag_test.c120
-rw-r--r--flag_tester/intel_cpu_capabilities.h43
-rw-r--r--flag_tester/pclmul_test.c40
-rw-r--r--flag_tester/pclmul_test.txt8
-rw-r--r--flag_tester/sse2_test.txt30
-rw-r--r--flag_tester/sse4_test.txt35
-rw-r--r--flag_tester/sse_test.c142
-rw-r--r--flag_tester/ssse3_test.txt31
-rw-r--r--flag_tester/whats_my_sse.c43
-rwxr-xr-xflag_tester/which_compile_flags.sh19
-rw-r--r--tools/gf_methods.c242
-rw-r--r--tools/time_tool.sh95
13 files changed, 256 insertions, 602 deletions
diff --git a/flag_tester/README.txt b/flag_tester/README.txt
deleted file mode 100644
index 19101ff..0000000
--- a/flag_tester/README.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-Run which_compile_flags.sh and it will print out the compile flags to use in
- GNUmakefile. By default, this script uses "cc" as its compiler but you can
- pass in the name of your compiler as an argument.
-
-EXAMPLE: "./which_compile_flags.sh clang"
-
-This script will run "clang" in the above example so be warned that if you type
-something like "rm" for that argument, you get what you asked for. Also, make
-sure that the compiler that you pass to which_compile_flags.sh is the same as
-the compiler in GNUmakefile.
diff --git a/flag_tester/flag_test.c b/flag_tester/flag_test.c
deleted file mode 100644
index cecf472..0000000
--- a/flag_tester/flag_test.c
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * flag_test.c - copied from whats_my_sse.c to output proper compile
- * flags for the GNUmakefile
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "intel_cpu_capabilities.h"
-
-void usage()
-{
- fprintf(stderr, "usage: flag_test <compiler name>\n");
- exit(EXIT_FAILURE);
-}
-
-int main(int argc, char **argv)
-{
- //make sure to extend these buffers if more flags are added to this program
- char cflags[1000], ldflags[1000], buf[1000];
- FILE *file;
- char sse_found = 0;
-
- if(argc != 2)
- usage();
-
- sprintf(cflags, "CFLAGS = -O3");
- sprintf(ldflags, "LDFLAGS = -O3");
-
- if(cpu_has_feature(CPU_CAP_SSE42))
- {
- sprintf(buf, "%s sse_test.c -o sse4 -msse4 -DSSE4 2> /dev/null", argv[1]);
- system(buf);
- if(file = fopen("sse4", "r"))
- {
- fclose(file);
-
- //run program and compare to the included output
- system("./sse4 > temp.txt 2> /dev/null");
- system("diff sse4_test.txt temp.txt > diff.txt 2> /dev/null");
- file = fopen("diff.txt", "r");
- if(fgetc(file) == EOF)
- {
- strcat(cflags, " -msse4 -DINTEL_SSE4");
- strcat(ldflags, " -msse4");
- sse_found = 1;
- }
- fclose(file);
- }
- }
-
- if(cpu_has_feature(CPU_CAP_SSSE3) && !sse_found)
- {
- sprintf(buf, "%s sse_test.c -o ssse3 -mssse3 -DSSSE3 2> /dev/null", argv[1]);
- system(buf);
- if(file = fopen("ssse3", "r"))
- {
- fclose(file);
-
- //run program and compare to the included output
- system("./ssse3 > temp.txt 2> /dev/null");
- system("diff ssse3_test.txt temp.txt > diff.txt 2> /dev/null");
- file = fopen("diff.txt", "r");
- if(fgetc(file) == EOF)
- {
- strcat(cflags, " -mssse3 -DINTEL_SSSE3");
- strcat(ldflags, " -mssse3");
- sse_found = 1;
- }
- fclose(file);
- }
- }
-
- if(cpu_has_feature(CPU_CAP_SSE2) && !sse_found)
- {
- sprintf(buf, "%s sse_test.c -o sse2 -msse2 -DSSE2 2> /dev/null", argv[1]);
- system(buf);
- if(file = fopen("sse2", "r"))
- {
- fclose(file);
-
- //run program and compare to the included output
- system("./sse2 > temp.txt 2> /dev/null");
- system("diff sse2_test.txt temp.txt > diff.txt 2> /dev/null");
- file = fopen("diff.txt", "r");
- if(fgetc(file) == EOF)
- {
- strcat(cflags, " -msse2 -DINTEL_SSE2");
- strcat(ldflags, " -msse2");
- sse_found = 1;
- }
- fclose(file);
- }
- }
-
- if(cpu_has_feature(CPU_CAP_PCLMULQDQ) && sse_found)
- {
- sprintf(buf, "%s pclmul_test.c -o pclmul -maes -mpclmul 2> /dev/null"
- , argv[1]);
- system(buf);
- if(file = fopen("pclmul", "r"))
- {
- fclose(file);
-
- //run program and compare to the included output
- system("./pclmul > temp.txt 2> /dev/null");
- system("diff pclmul_test.txt temp.txt > diff.txt 2> /dev/null");
- file = fopen("diff.txt", "r");
- if(fgetc(file) == EOF)
- {
- strcat(cflags, " -maes -mpclmul -DINTEL_PCLMUL");
- strcat(ldflags, " -maes -mpclmul");
- }
- fclose(file);
- }
- }
-
- printf("%s\n%s\n", cflags, ldflags);
-}
diff --git a/flag_tester/intel_cpu_capabilities.h b/flag_tester/intel_cpu_capabilities.h
deleted file mode 100644
index 6d1bbeb..0000000
--- a/flag_tester/intel_cpu_capabilities.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Routines to figure out what an Intel CPU's capabilities are.
- */
-
-#pragma once
-
-#include <stdint.h>
-
-/* Words in CPE_INFO */
-#define CPU_CPE_INFO 0x1000
-#define CPU_CAP_MMX (CPU_CPE_INFO | 23)
-#define CPU_CAP_SSE (CPU_CPE_INFO | 25)
-#define CPU_CAP_SSE2 (CPU_CPE_INFO | 26)
-
-/* Words in CPSSE */
-#define CPU_CPSSE 0x2000
-#define CPU_CAP_SSE3 (CPU_CPSSE | 0)
-#define CPU_CAP_PCLMULQDQ (CPU_CPSSE | 1)
-#define CPU_CAP_SSSE3 (CPU_CPSSE | 9)
-#define CPU_CAP_SSE41 (CPU_CPSSE | 19)
-#define CPU_CAP_SSE42 (CPU_CPSSE | 20)
-#define CPU_CAP_AVX (CPU_CPSSE | 28)
-
-#define cpuid(func,ax,bx,cx,dx)\
- __asm__ __volatile__ ("cpuid":\
- "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) : "a" (func));
-
-int
-cpu_has_feature (unsigned which)
-{
- uint32_t cpeinfo;
- uint32_t cpsse;
- uint32_t a, b;
-
- cpuid(1, a, b, cpsse, cpeinfo);
- if (which & CPU_CPE_INFO) {
- return (!! ((cpeinfo >> (which & 0xff)) & 0x1) );
- } else if (which & CPU_CPSSE) {
- return (!! ((cpsse >> (which & 0xff)) & 0x1) );
- } else {
- return (0);
- }
-}
diff --git a/flag_tester/pclmul_test.c b/flag_tester/pclmul_test.c
deleted file mode 100644
index bdae184..0000000
--- a/flag_tester/pclmul_test.c
+++ /dev/null
@@ -1,40 +0,0 @@
-#include <wmmintrin.h>
-#include <stdint.h>
-#include <stdio.h>
-
-#define MM_PRINT8(s, r) { uint8_t blah[16], ii; printf("%-20s", s); _mm_storeu_si128((__m128i *)blah, r); for (ii = 0; ii < 16; ii += 1) printf("%s%02x", (ii%4==0) ? " " : " ", blah[15-ii]); printf("\n"); }
-
-
-int main()
-{
- uint64_t answer;
- uint32_t pp;
- __m128i a, b, c;
-
- a = _mm_set1_epi8(0x0D);
- b = _mm_set_epi32(0,0,0,0x0A);
- pp = 0x13;
- MM_PRINT8("a", a);
- MM_PRINT8("b", b);
-
- c = _mm_clmulepi64_si128(a, b, 0);
- MM_PRINT8("a clm b", c);
-
- a = _mm_set1_epi8(0xf0);
- MM_PRINT8("a", a);
- b = _mm_and_si128(a, c);
- b = _mm_srli_epi64(b, 4);
- MM_PRINT8("shifted", b);
-
-
- a = _mm_set_epi32(0,0,0,pp);
- MM_PRINT8("PP", a);
-
- b = _mm_clmulepi64_si128(a, b, 0);
- MM_PRINT8("PP clm over", b);
-
- c = _mm_xor_si128(c,b);
- MM_PRINT8("Answer", c);
- //answer = _mm_extract_epi64(c, 0);
- //printf("%llx\n", answer);
-}
diff --git a/flag_tester/pclmul_test.txt b/flag_tester/pclmul_test.txt
deleted file mode 100644
index 6102f94..0000000
--- a/flag_tester/pclmul_test.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-a 0d 0d 0d 0d 0d 0d 0d 0d 0d 0d 0d 0d 0d 0d 0d 0d
-b 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0a
-a clm b 00 00 00 00 00 00 00 00 72 72 72 72 72 72 72 72
-a f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0
-shifted 00 00 00 00 00 00 00 00 07 07 07 07 07 07 07 07
-PP 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 13
-PP clm over 00 00 00 00 00 00 00 00 79 79 79 79 79 79 79 79
-Answer 00 00 00 00 00 00 00 00 0b 0b 0b 0b 0b 0b 0b 0b
diff --git a/flag_tester/sse2_test.txt b/flag_tester/sse2_test.txt
deleted file mode 100644
index f79b6e0..0000000
--- a/flag_tester/sse2_test.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-a 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
-b 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
-c 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02
-d 12 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03
-a sl16 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08 04 00
-b sl32 40 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08 04
-c sl64 44 40 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08
-d sl128 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 00 00
-a sr16 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
-b sr32 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
-c sr64 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02
-d sr128 00 00 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03
-d = a^b 1f 01 03 01 07 01 03 01 0f 01 03 01 07 01 03 01
-d = a-b epi8 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
-d = a-b epi16 fe ff fe ff fe ff fe ff fe ff fe ff fe ff fe ff
-d = a-b epi32 fe fe fe ff fe fe fe ff fe fe fe ff fe fe fe ff
-d = a-b epi64 fe fe fe fe fe fe fe ff fe fe fe fe fe fe fe ff
-d set_epi8 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
-d set_epi32 12 34 56 78 9a bc de f0 12 34 56 78 9a bc de f0
-d set1_epi64 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0
-d set1_epi32 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2
-d set1_epi16 af f3 af f3 af f3 af f3 af f3 af f3 af f3 af f3
-d set1_epi8 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5
-d packus_epi16(d,d) 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
-c unpackhi(a,d) 00 0f 00 0e 00 0d 00 0c 00 0b 00 0a 00 09 00 08
-b unpacklo(c,a) 07 00 06 0b 05 00 04 0a 03 00 02 09 01 00 00 08
-d and(d,b) 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
-a stored to mem: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
-d setzero 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
-c 05 05 05 05 05 05 05 05 05 05 05 05 05 05 05 05
diff --git a/flag_tester/sse4_test.txt b/flag_tester/sse4_test.txt
deleted file mode 100644
index 3f6d7ec..0000000
--- a/flag_tester/sse4_test.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-a 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
-b 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
-c 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02
-d 12 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03
-a sl16 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08 04 00
-b sl32 40 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08 04
-c sl64 44 40 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08
-d sl128 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 00 00
-a sr16 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
-b sr32 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
-c sr64 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02
-d sr128 00 00 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03
-d = a^b 1f 01 03 01 07 01 03 01 0f 01 03 01 07 01 03 01
-d = a-b epi8 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
-d = a-b epi16 fe ff fe ff fe ff fe ff fe ff fe ff fe ff fe ff
-d = a-b epi32 fe fe fe ff fe fe fe ff fe fe fe ff fe fe fe ff
-d = a-b epi64 fe fe fe fe fe fe fe ff fe fe fe fe fe fe fe ff
-d set_epi8 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
-d set_epi32 12 34 56 78 9a bc de f0 12 34 56 78 9a bc de f0
-d set1_epi64 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0
-d set1_epi32 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2
-d set1_epi16 af f3 af f3 af f3 af f3 af f3 af f3 af f3 af f3
-d set1_epi8 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5
-d packus_epi16(d,d) 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
-c unpackhi(a,d) 00 0f 00 0e 00 0d 00 0c 00 0b 00 0a 00 09 00 08
-b unpacklo(c,a) 07 00 06 0b 05 00 04 0a 03 00 02 09 01 00 00 08
-d and(d,b) 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
-a stored to mem: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
-d setzero 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
-d insert32 @ 2 00 00 00 00 ab cd 12 34 00 00 00 00 00 00 00 00
-extract_epi32 @ 2: abcd1234
-d insert64 @ 0 00 00 00 00 ab cd 12 34 fe dc ba 12 91 82 73 64
-extract_epi64 @ 0: fedcba1291827364
-c 05 05 05 05 05 05 05 05 05 05 05 05 05 05 05 05
-a shuffle(b, c) 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02
diff --git a/flag_tester/sse_test.c b/flag_tester/sse_test.c
deleted file mode 100644
index e40cf25..0000000
--- a/flag_tester/sse_test.c
+++ /dev/null
@@ -1,142 +0,0 @@
-#ifdef SSE4
-#define SSSE3
-#include <nmmintrin.h>
-#endif
-
-#ifdef SSSE3
-#define SSE2
-#include <tmmintrin.h>
-#endif
-
-#ifdef SSE2
-#include <emmintrin.h>
-#endif
-
-#include <stdio.h>
-#include <stdint.h>
-#include <inttypes.h>
-
-#define MM_PRINT8(s, r) { uint8_t blah[16], ii; printf("%-20s", s); _mm_storeu_si128((__m128i *)blah, r); for (ii = 0; ii < 16; ii += 1) printf("%s%02x", (ii%4==0) ? " " : " ", blah[15-ii]); printf("\n"); }
-
-int main()
-{
- uint32_t u32;
- uint64_t u64;
- uint8_t *ui8 = malloc(20), i;
- __m128i a, b, c, d;
-
- for(i=0; i < 20; i++)
- ui8[i] = i;
-
- a = _mm_load_si128( (__m128i *) ui8 );
- b = _mm_loadu_si128( (__m128i *) (ui8+1));
- c = _mm_loadu_si128( (__m128i *) (ui8+2));
- d = _mm_loadu_si128( (__m128i *) (ui8+3));
-
- MM_PRINT8("a", a);
- MM_PRINT8("b", b);
- MM_PRINT8("c", c);
- MM_PRINT8("d", d);
-
- a = _mm_slli_epi16(a, 2);
- b = _mm_slli_epi32(b, 2);
- c = _mm_slli_epi64(c, 2);
- d = _mm_slli_si128(d, 2);
-
- MM_PRINT8("a sl16", a);
- MM_PRINT8("b sl32", b);
- MM_PRINT8("c sl64", c);
- MM_PRINT8("d sl128", d);
-
- a = _mm_srli_epi16(a, 2);
- b = _mm_srli_epi32(b, 2);
- c = _mm_srli_epi64(c, 2);
- d = _mm_srli_si128(d, 2);
-
- MM_PRINT8("a sr16", a);
- MM_PRINT8("b sr32", b);
- MM_PRINT8("c sr64", c);
- MM_PRINT8("d sr128", d);
-
- d = _mm_xor_si128(a, b);
- MM_PRINT8("d = a^b", d);
-
- d = _mm_sub_epi8(a, b);
- MM_PRINT8("d = a-b epi8", d);
-
- d = _mm_sub_epi16(a, b);
- MM_PRINT8("d = a-b epi16", d);
-
- d = _mm_sub_epi32(a, b);
- MM_PRINT8("d = a-b epi32", d);
-
- d = _mm_sub_epi64(a, b);
- MM_PRINT8("d = a-b epi64", d);
-
- d = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
- MM_PRINT8("d set_epi8", d);
-
- d = _mm_set_epi32(0x12345678, 0x9abcdef0, 0x12345678, 0x9abcdef0);
- MM_PRINT8("d set_epi32", d);
-
- d = _mm_set1_epi64x(0xF0F0F0F0F0F0F0F0ULL);
- MM_PRINT8("d set1_epi64", d);
-
- d = _mm_set1_epi32(0xe2e2e2e2);
- MM_PRINT8("d set1_epi32", d);
-
- d = _mm_set1_epi16(0xaff3);
- MM_PRINT8("d set1_epi16", d);
-
- d = _mm_set1_epi8(0xc5);
- MM_PRINT8("d set1_epi8", d);
-
- d = _mm_packus_epi16(d, d);
- MM_PRINT8("d packus_epi16(d,d)", d);
-
- c = _mm_unpackhi_epi8(a, d);
- MM_PRINT8("c unpackhi(a,d)", c);
-
- b = _mm_unpacklo_epi8(c, a);
- MM_PRINT8("b unpacklo(c,a)", b);
-
- d = _mm_and_si128(d, b);
- MM_PRINT8("d and(d,b)", d);
-
- _mm_store_si128( (__m128i *) ui8, a);
- printf("a stored to mem: ");
- for(i=0; i < 16; i++)
- printf("%u ", ui8[i]);
- printf("\n");
-
- d = _mm_setzero_si128();
- MM_PRINT8("d setzero", d);
-
- u32 = 0xABCD1234;
- u64 = 0xFEDCBA1291827364ULL;
-
- #ifdef SSE4
- d = _mm_insert_epi32(d, u32, 2);
- MM_PRINT8("d insert32 @ 2", d);
-
- u32 = 0;
- u32 = _mm_extract_epi32(d, 2);
- printf("extract_epi32 @ 2: %x\n", u32);
-
- d = _mm_insert_epi64(d, u64, 0);
- MM_PRINT8("d insert64 @ 0", d);
-
- u64 = 0;
- u64 = _mm_extract_epi64(d, 0);
- printf("extract_epi64 @ 0: %" PRIx64 "\n", u64);
- #endif
-
- c = _mm_set1_epi8(5);
- MM_PRINT8("c", c);
-
- #ifdef SSSE3
- a = _mm_shuffle_epi8(b, c);
- MM_PRINT8("a shuffle(b, c)", a);
- #endif
-
-}
diff --git a/flag_tester/ssse3_test.txt b/flag_tester/ssse3_test.txt
deleted file mode 100644
index 17bee1a..0000000
--- a/flag_tester/ssse3_test.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-a 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
-b 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
-c 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02
-d 12 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03
-a sl16 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08 04 00
-b sl32 40 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08 04
-c sl64 44 40 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08
-d sl128 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 00 00
-a sr16 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
-b sr32 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
-c sr64 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02
-d sr128 00 00 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03
-d = a^b 1f 01 03 01 07 01 03 01 0f 01 03 01 07 01 03 01
-d = a-b epi8 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
-d = a-b epi16 fe ff fe ff fe ff fe ff fe ff fe ff fe ff fe ff
-d = a-b epi32 fe fe fe ff fe fe fe ff fe fe fe ff fe fe fe ff
-d = a-b epi64 fe fe fe fe fe fe fe ff fe fe fe fe fe fe fe ff
-d set_epi8 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
-d set_epi32 12 34 56 78 9a bc de f0 12 34 56 78 9a bc de f0
-d set1_epi64 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0
-d set1_epi32 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2
-d set1_epi16 af f3 af f3 af f3 af f3 af f3 af f3 af f3 af f3
-d set1_epi8 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5
-d packus_epi16(d,d) 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
-c unpackhi(a,d) 00 0f 00 0e 00 0d 00 0c 00 0b 00 0a 00 09 00 08
-b unpacklo(c,a) 07 00 06 0b 05 00 04 0a 03 00 02 09 01 00 00 08
-d and(d,b) 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
-a stored to mem: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
-d setzero 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
-c 05 05 05 05 05 05 05 05 05 05 05 05 05 05 05 05
-a shuffle(b, c) 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02
diff --git a/flag_tester/whats_my_sse.c b/flag_tester/whats_my_sse.c
deleted file mode 100644
index 8c9192c..0000000
--- a/flag_tester/whats_my_sse.c
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * whats_my_sse.c - lifted from Jens Gregor -- thanks Jens
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "intel_cpu_capabilities.h"
-
-struct {
- unsigned feature_code;
- const char * feature_name;
-} features[] = {
- {CPU_CAP_MMX, "MMX"},
- {CPU_CAP_SSE, "SSE"},
- {CPU_CAP_SSE2, "SSE2"},
- {CPU_CAP_SSE3, "SSE3"},
- {CPU_CAP_SSSE3, "SSSE3"},
- {CPU_CAP_SSE41, "SSE4.1"},
- {CPU_CAP_SSE42, "SSE4.2"},
- {CPU_CAP_PCLMULQDQ, "PCLMULQDQ"},
- {CPU_CAP_AVX, "AVX"},
- {0, NULL},
-};
-
-
-int main()
-{
- unsigned i;
-
- printf ("CPU has the following instruction set features enabled: " );
- for (i = 0; features[i].feature_name != NULL; ++i) {
- if (cpu_has_feature (features[i].feature_code)) {
- printf ("%s ", features[i].feature_name);
- }
- }
- printf ("\nCPU is missing the following instruction set features: ");
- for (i = 0; features[i].feature_name != NULL; ++i) {
- if (! cpu_has_feature (features[i].feature_code)) {
- printf ("%s ", features[i].feature_name);
- }
- }
- printf ("\n");
-}
diff --git a/flag_tester/which_compile_flags.sh b/flag_tester/which_compile_flags.sh
deleted file mode 100755
index f39c609..0000000
--- a/flag_tester/which_compile_flags.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-if [ -n "$1" ]; then
- CC=$1
-else
- CC=cc
-fi
-
-$CC flag_test.c -o flag_test 2> /dev/null
-if [ -e "flag_test" ]; then
- OUTPUT=`./flag_test $CC 2> /dev/null`
- if [ -n "$OUTPUT" ]; then
- echo "$OUTPUT"
- else
- printf "CFLAGS = -O3\nLDFLAGS = -O3\n"
- fi
-else
- printf "$CC failed to compile flag_test.c\n"
-fi
-
-rm sse4 sse2 ssse3 pclmul diff.txt flag_test temp.txt 2> /dev/null
diff --git a/tools/gf_methods.c b/tools/gf_methods.c
index 90ddd15..298c1ec 100644
--- a/tools/gf_methods.c
+++ b/tools/gf_methods.c
@@ -17,109 +17,189 @@
#include "gf_method.h"
#include "gf_int.h"
+#define BNMULTS (7)
+static char *BMULTS[BNMULTS] = { "CARRY_FREE", "GROUP48",
+ "TABLE", "LOG", "SPLIT4", "SPLIT88", "COMPOSITE" };
#define NMULTS (16)
-static char *mults[NMULTS] = { "SHIFT", "CARRY_FREE", "GROUP44", "GROUP48", "BYTWO_p", "BYTWO_b",
+static char *MULTS[NMULTS] = { "CARRY_FREE", "GROUP44", "GROUP48", "BYTWO_p", "BYTWO_b",
"TABLE", "LOG", "LOG_ZERO", "LOG_ZERO_EXT", "SPLIT2",
"SPLIT4", "SPLIT8", "SPLIT16", "SPLIT88", "COMPOSITE" };
+/* Make sure CAUCHY is last */
+
#define NREGIONS (7)
-static char *regions[NREGIONS] = { "DOUBLE", "QUAD", "LAZY", "SSE", "NOSSE",
+static char *REGIONS[NREGIONS] = { "DOUBLE", "QUAD", "LAZY", "SSE", "NOSSE",
"ALTMAP", "CAUCHY" };
+#define BNREGIONS (4)
+static char *BREGIONS[BNREGIONS] = { "DOUBLE", "QUAD", "ALTMAP", "CAUCHY" };
+
#define NDIVS (2)
static char *divides[NDIVS] = { "MATRIX", "EUCLID" };
+void usage(char *s)
+{
+ fprintf(stderr, "usage: gf_methods w -BADCM -LUMDRB\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, " w can be 1-32, 64, 128\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, " -B lists basic methods that are useful\n");
+ fprintf(stderr, " -A does a nearly exhaustive listing\n");
+ fprintf(stderr, " -D adds EUCLID and MATRIX division\n");
+ fprintf(stderr, " -C adds CAUCHY when possible\n");
+ fprintf(stderr, " Combinations are fine.\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, " -L Simply lists methods\n");
+ fprintf(stderr, " -U Produces calls to gf_unit\n");
+ fprintf(stderr, " -M Produces calls to time_tool.sh for single multiplications\n");
+ fprintf(stderr, " -D Produces calls to time_tool.sh for single divisions\n");
+ fprintf(stderr, " -R Produces calls to time_tool.sh for region multiplications\n");
+ fprintf(stderr, " -B Produces calls to time_tool.sh for the fastest region multiplications\n");
+ fprintf(stderr, " Cannot combine L, U, T.\n");
+ if (s != NULL) {
+ fprintf(stderr, "\n");
+ fprintf(stderr, "%s\n", s);
+ }
+ exit(1);
+}
int main(int argc, char *argv[])
{
- int m, r, d, w, i, sa, j, k, reset;
- char *gf_argv[50];
+ int m, r, d, w, i, sa, j, k, reset, ok;
+ int nregions;
+ int nmults;
+ char **regions;
+ char **mults;
+ int exhaustive = 0;
+ int divide = 0;
+ int cauchy = 0;
+ int listing;
+ char *gf_argv[50], *x;
gf_t gf;
char divs[200], ks[10], ls[10];
- char * w_str = "w=%d:";
+ char * w_str;
- if (argc == 2) {
- if (!strcmp (argv[1], "-U")) {
- w_str = "%d A -1";
+ if (argc != 4) usage(NULL);
+ w = atoi(argv[1]);
+ ok = (w >= 1 && w <= 32);
+ if (w == 64) ok = 1;
+ if (w == 128) ok = 1;
+ if (!ok) usage("Bad w");
+
+ if (argv[2][0] != '-' || argv[3][0] != '-' || strlen(argv[2]) == 1 || strlen(argv[3]) != 2) {
+ usage(NULL);
+ }
+ for (i = 1; argv[2][i] != '\0'; i++) {
+ switch(argv[2][i]) {
+ case 'B': exhaustive = 0; break;
+ case 'A': exhaustive = 1; break;
+ case 'D': divide = 1; break;
+ case 'C': cauchy = 1; break;
+ default: usage("Bad -BADC");
}
}
- for (i = 2; i < 8; i++) {
- w = (1 << i);
- gf_argv[0] = "-";
- if (create_gf_from_argv(&gf, w, 1, gf_argv, 0) > 0) {
- printf(w_str, w);
- printf(" - \n");
- gf_free(&gf, 1);
- } else if (_gf_errno == GF_E_DEFAULT) {
- fprintf(stderr, "Unlabeled failed method: w=%d: -\n", 2);
- exit(1);
+ if (strchr("LUMDRB", argv[3][1]) == NULL) { usage("Bad -LUMDRB"); }
+ listing = argv[3][1];
+
+ if (listing == 'U') {
+ w_str = "../test/gf_unit %d A -1";
+ } else if (listing == 'L') {
+ w_str = "w=%d:";
+ } else {
+ w_str = strdup("sh time_tool.sh X %d");
+ x = strchr(w_str, 'X');
+ *x = listing;
+ }
+
+ gf_argv[0] = "-";
+ if (create_gf_from_argv(&gf, w, 1, gf_argv, 0) > 0) {
+ printf(w_str, w);
+ printf(" - \n");
+ gf_free(&gf, 1);
+ } else if (_gf_errno == GF_E_DEFAULT) {
+ fprintf(stderr, "Unlabeled failed method: w=%d: -\n", 2);
+ exit(1);
+ }
+
+ nregions = (exhaustive) ? NREGIONS : BNREGIONS;
+ if (!cauchy) nregions--;
+ regions = (exhaustive) ? REGIONS : BREGIONS;
+ mults = (exhaustive) ? MULTS : BMULTS;
+ nmults = (exhaustive) ? NMULTS : BNMULTS;
+
+
+ for (m = 0; m < nmults; m++) {
+ sa = 0;
+ gf_argv[sa++] = "-m";
+ if (strcmp(mults[m], "GROUP44") == 0) {
+ gf_argv[sa++] = "GROUP";
+ gf_argv[sa++] = "4";
+ gf_argv[sa++] = "4";
+ } else if (strcmp(mults[m], "GROUP48") == 0) {
+ gf_argv[sa++] = "GROUP";
+ gf_argv[sa++] = "4";
+ gf_argv[sa++] = "8";
+ } else if (strcmp(mults[m], "SPLIT2") == 0) {
+ gf_argv[sa++] = "SPLIT";
+ sprintf(ls, "%d", w);
+ gf_argv[sa++] = ls;
+ gf_argv[sa++] = "2";
+ } else if (strcmp(mults[m], "SPLIT4") == 0) {
+ gf_argv[sa++] = "SPLIT";
+ sprintf(ls, "%d", w);
+ gf_argv[sa++] = ls;
+ gf_argv[sa++] = "4";
+ } else if (strcmp(mults[m], "SPLIT8") == 0) {
+ gf_argv[sa++] = "SPLIT";
+ sprintf(ls, "%d", w);
+ gf_argv[sa++] = ls;
+ gf_argv[sa++] = "8";
+ } else if (strcmp(mults[m], "SPLIT16") == 0) {
+ gf_argv[sa++] = "SPLIT";
+ sprintf(ls, "%d", w);
+ gf_argv[sa++] = ls;
+ gf_argv[sa++] = "16";
+ } else if (strcmp(mults[m], "SPLIT88") == 0) {
+ gf_argv[sa++] = "SPLIT";
+ gf_argv[sa++] = "8";
+ gf_argv[sa++] = "8";
+ } else if (strcmp(mults[m], "COMPOSITE") == 0) {
+ gf_argv[sa++] = "COMPOSITE";
+ gf_argv[sa++] = "2";
+ gf_argv[sa++] = "-";
+ } else {
+ gf_argv[sa++] = mults[m];
}
+ reset = sa;
- for (m = 0; m < NMULTS; m++) {
- sa = 0;
- gf_argv[sa++] = "-m";
- if (strcmp(mults[m], "GROUP44") == 0) {
- gf_argv[sa++] = "GROUP";
- gf_argv[sa++] = "4";
- gf_argv[sa++] = "4";
- } else if (strcmp(mults[m], "GROUP48") == 0) {
- gf_argv[sa++] = "GROUP";
- gf_argv[sa++] = "4";
- gf_argv[sa++] = "8";
- } else if (strcmp(mults[m], "SPLIT2") == 0) {
- gf_argv[sa++] = "SPLIT";
- sprintf(ls, "%d", w);
- gf_argv[sa++] = ls;
- gf_argv[sa++] = "2";
- } else if (strcmp(mults[m], "SPLIT4") == 0) {
- gf_argv[sa++] = "SPLIT";
- sprintf(ls, "%d", w);
- gf_argv[sa++] = ls;
- gf_argv[sa++] = "4";
- } else if (strcmp(mults[m], "SPLIT8") == 0) {
- gf_argv[sa++] = "SPLIT";
- sprintf(ls, "%d", w);
- gf_argv[sa++] = ls;
- gf_argv[sa++] = "8";
- } else if (strcmp(mults[m], "SPLIT16") == 0) {
- gf_argv[sa++] = "SPLIT";
- sprintf(ls, "%d", w);
- gf_argv[sa++] = ls;
- gf_argv[sa++] = "16";
- } else if (strcmp(mults[m], "SPLIT88") == 0) {
- gf_argv[sa++] = "SPLIT";
- gf_argv[sa++] = "8";
- gf_argv[sa++] = "8";
- } else if (strcmp(mults[m], "COMPOSITE") == 0) {
- gf_argv[sa++] = "COMPOSITE";
- gf_argv[sa++] = "2";
- gf_argv[sa++] = "-";
- } else {
- gf_argv[sa++] = mults[m];
- }
- reset = sa;
- for (r = 0; r < (1 << NREGIONS); r++) {
- sa = reset;
- for (k = 0; k < NREGIONS; k++) {
- if (r & 1 << k) {
- gf_argv[sa++] = "-r";
- gf_argv[sa++] = regions[k];
- }
- }
- gf_argv[sa++] = "-";
- if (create_gf_from_argv(&gf, w, sa, gf_argv, 0) > 0) {
- printf(w_str, w);
- for (j = 0; j < sa; j++) printf(" %s", gf_argv[j]);
- printf("\n");
- gf_free(&gf, 1);
- } else if (_gf_errno == GF_E_DEFAULT) {
- fprintf(stderr, "Unlabeled failed method: w=%d:", w);
- for (j = 0; j < sa; j++) fprintf(stderr, " %s", gf_argv[j]);
- fprintf(stderr, "\n");
- exit(1);
+ for (r = 0; r < (1 << nregions); r++) {
+ sa = reset;
+ for (k = 0; k < nregions; k++) {
+ if (r & (1 << k)) {
+ gf_argv[sa++] = "-r";
+ gf_argv[sa++] = regions[k];
}
- sa--;
+ }
+ gf_argv[sa++] = "-";
+ /*
+ printf("Hmmmm. %s", gf_argv[0]);
+ for (j = 0; j < sa; j++) printf(" %s", gf_argv[j]);
+ printf("\n"); */
+
+ if (create_gf_from_argv(&gf, w, sa, gf_argv, 0) > 0) {
+ printf(w_str, w);
+ for (j = 0; j < sa; j++) printf(" %s", gf_argv[j]);
+ printf("\n");
+ gf_free(&gf, 1);
+ } else if (_gf_errno == GF_E_DEFAULT) {
+ fprintf(stderr, "Unlabeled failed method: w=%d:", w);
+ for (j = 0; j < sa; j++) fprintf(stderr, " %s", gf_argv[j]);
+ fprintf(stderr, "\n");
+ exit(1);
+ }
+ sa--;
+ if (divide) {
for (d = 0; d < NDIVS; d++) {
gf_argv[sa++] = "-d";
gf_argv[sa++] = divides[d];
diff --git a/tools/time_tool.sh b/tools/time_tool.sh
new file mode 100644
index 0000000..d2aa591
--- /dev/null
+++ b/tools/time_tool.sh
@@ -0,0 +1,95 @@
+# time_tool.sh - Shell script to test various timings.
+# This is a rough tester -- its job is to work quickly rather than precisely.
+# (Jim Plank)
+
+#!/bin/sh
+
+if [ $# -lt 3 ]; then
+ echo 'usage sh time_tool.sh M|D|R|B w method' >&2
+ exit 1
+fi
+
+op=$1
+w=$2
+
+shift ; shift
+
+method="$*"
+
+if [ $op != M -a $op != D -a $op != R -a $op != B ]; then
+ echo 'usage sh time_tool.sh M|D|R|B w method' >&2
+ echo 'You have to specify a test: ' >&2
+ echo ' M=Multiplication' >&2
+ echo ' D=Division' >&2
+ echo ' R=Regions' >&2
+ echo ' B=Best-Region' >&2
+ exit 1
+fi
+
+# First, use a 16K buffer to test the performance of single multiplies.
+
+fac=`echo $w | awk '{ n = $1; while (n != 0 && n%2==0) n /= 2; print n }'`
+if [ $fac -eq 0 ]; then
+ echo 'usage sh time_tool.sh M|D|R|B w method' >&2
+ echo 'Bad w' >&2
+ exit 1
+fi
+
+bsize=16384
+bsize=`echo $bsize $fac | awk '{ print $1 * $2 }'`
+
+if [ `./gf_time $w M -1 $bsize 1 $method 2>&1 | wc | awk '{ print $1 }'` -gt 2 ]; then
+ echo 'usage sh time_tool.sh w method' >&2
+ echo "Bad method"
+ exit 1
+fi
+
+if [ $op = M -o $op = D ]; then
+ iter=1
+ c1=`./gf_time $w $op -1 $bsize $iter $method`
+ t=`echo $c1 | awk '{ printf "%d\n", $4*1000 }'`
+ s=`echo $c1 | awk '{ print $8 }'`
+ bs=$s
+
+ while [ $t -lt 1 ]; do
+ bs=$s
+ iter=`echo $iter | awk '{ print $1*2 }'`
+ c1=`./gf_time $w $op -1 $bsize $iter $method`
+ t=`echo $c1 | awk '{ printf "%d\n", $4*1000 }'`
+ s=`echo $c1 | awk '{ print $8 }'`
+ done
+
+ echo "$op speed (MB/s): " $bs " W-Method:" $w $method
+ exit 0
+fi
+
+bsize=16384
+bsize=`echo $bsize $fac | awk '{ print $1 * $2 }'`
+
+best=0
+while [ $bsize -le 4194304 ]; do
+ iter=1
+ c1=`./gf_time $w G -1 $bsize $iter $method`
+ t=`echo $c1 | awk '{ printf "%d\n", $6*1000 }'`
+ s=`echo $c1 | awk '{ print $10 }'`
+ bs=$s
+
+ while [ $t -lt 1 ]; do
+ bs=$s
+ iter=`echo $iter | awk '{ print $1*2 }'`
+ c1=`./gf_time $w G -1 $bsize $iter $method`
+ t=`echo $c1 | awk '{ printf "%d\n", $6*1000 }'`
+ s=`echo $c1 | awk '{ print $10 }'`
+ done
+ if [ $bsize -lt 1048576 ]; then
+ str=`echo $bsize | awk '{ printf "%3dK\n", $1/1024 }'`
+ else
+ str=`echo $bsize | awk '{ printf "%3dM\n", $1/1024/1024 }'`
+ fi
+ if [ $op = R ]; then
+ echo "Region Buffer-Size: $str (MB/s): " $bs " W-Method:" $w $method
+ fi
+ best=`echo $best $bs | awk '{ print ($1 > $2) ? $1 : $2 }'`
+ bsize=`echo $bsize | awk '{ print $1 * 2 }'`
+done
+echo "Region Best (MB/s): "$best " W-Method:" $w $method