diff options
author | Jim Plank <plank@cs.utk.edu> | 2013-12-29 16:51:37 -0500 |
---|---|---|
committer | Jim Plank <plank@cs.utk.edu> | 2013-12-29 16:51:37 -0500 |
commit | 88bb18c651e48c527de540564471dff34fdc0d9c (patch) | |
tree | 28d405b659cb07308d3ab362ab997ccd5a6b4c44 | |
parent | 8eec6d46764dde8f11090d6bb0ff9111d535c793 (diff) | |
download | gf-complete-88bb18c651e48c527de540564471dff34fdc0d9c.tar.gz |
Added time_tool.sh to tools for quick timing.
Modified gf_methods to be a little more flexible.
-rw-r--r-- | flag_tester/README.txt | 10 | ||||
-rw-r--r-- | flag_tester/flag_test.c | 120 | ||||
-rw-r--r-- | flag_tester/intel_cpu_capabilities.h | 43 | ||||
-rw-r--r-- | flag_tester/pclmul_test.c | 40 | ||||
-rw-r--r-- | flag_tester/pclmul_test.txt | 8 | ||||
-rw-r--r-- | flag_tester/sse2_test.txt | 30 | ||||
-rw-r--r-- | flag_tester/sse4_test.txt | 35 | ||||
-rw-r--r-- | flag_tester/sse_test.c | 142 | ||||
-rw-r--r-- | flag_tester/ssse3_test.txt | 31 | ||||
-rw-r--r-- | flag_tester/whats_my_sse.c | 43 | ||||
-rwxr-xr-x | flag_tester/which_compile_flags.sh | 19 | ||||
-rw-r--r-- | tools/gf_methods.c | 242 | ||||
-rw-r--r-- | tools/time_tool.sh | 95 |
13 files changed, 256 insertions, 602 deletions
diff --git a/flag_tester/README.txt b/flag_tester/README.txt deleted file mode 100644 index 19101ff..0000000 --- a/flag_tester/README.txt +++ /dev/null @@ -1,10 +0,0 @@ -Run which_compile_flags.sh and it will print out the compile flags to use in - GNUmakefile. By default, this script uses "cc" as its compiler but you can - pass in the name of your compiler as an argument. - -EXAMPLE: "./which_compile_flags.sh clang" - -This script will run "clang" in the above example so be warned that if you type -something like "rm" for that argument, you get what you asked for. Also, make -sure that the compiler that you pass to which_compile_flags.sh is the same as -the compiler in GNUmakefile. diff --git a/flag_tester/flag_test.c b/flag_tester/flag_test.c deleted file mode 100644 index cecf472..0000000 --- a/flag_tester/flag_test.c +++ /dev/null @@ -1,120 +0,0 @@ -/* - * flag_test.c - copied from whats_my_sse.c to output proper compile - * flags for the GNUmakefile - * - */ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include "intel_cpu_capabilities.h" - -void usage() -{ - fprintf(stderr, "usage: flag_test <compiler name>\n"); - exit(EXIT_FAILURE); -} - -int main(int argc, char **argv) -{ - //make sure to extend these buffers if more flags are added to this program - char cflags[1000], ldflags[1000], buf[1000]; - FILE *file; - char sse_found = 0; - - if(argc != 2) - usage(); - - sprintf(cflags, "CFLAGS = -O3"); - sprintf(ldflags, "LDFLAGS = -O3"); - - if(cpu_has_feature(CPU_CAP_SSE42)) - { - sprintf(buf, "%s sse_test.c -o sse4 -msse4 -DSSE4 2> /dev/null", argv[1]); - system(buf); - if(file = fopen("sse4", "r")) - { - fclose(file); - - //run program and compare to the included output - system("./sse4 > temp.txt 2> /dev/null"); - system("diff sse4_test.txt temp.txt > diff.txt 2> /dev/null"); - file = fopen("diff.txt", "r"); - if(fgetc(file) == EOF) - { - strcat(cflags, " -msse4 -DINTEL_SSE4"); - strcat(ldflags, " -msse4"); - sse_found = 1; - } - fclose(file); - } - } - - if(cpu_has_feature(CPU_CAP_SSSE3) && !sse_found) - { - sprintf(buf, "%s sse_test.c -o ssse3 -mssse3 -DSSSE3 2> /dev/null", argv[1]); - system(buf); - if(file = fopen("ssse3", "r")) - { - fclose(file); - - //run program and compare to the included output - system("./ssse3 > temp.txt 2> /dev/null"); - system("diff ssse3_test.txt temp.txt > diff.txt 2> /dev/null"); - file = fopen("diff.txt", "r"); - if(fgetc(file) == EOF) - { - strcat(cflags, " -mssse3 -DINTEL_SSSE3"); - strcat(ldflags, " -mssse3"); - sse_found = 1; - } - fclose(file); - } - } - - if(cpu_has_feature(CPU_CAP_SSE2) && !sse_found) - { - sprintf(buf, "%s sse_test.c -o sse2 -msse2 -DSSE2 2> /dev/null", argv[1]); - system(buf); - if(file = fopen("sse2", "r")) - { - fclose(file); - - //run program and compare to the included output - system("./sse2 > temp.txt 2> /dev/null"); - system("diff sse2_test.txt temp.txt > diff.txt 2> /dev/null"); - file = fopen("diff.txt", "r"); - if(fgetc(file) == EOF) - { - strcat(cflags, " -msse2 -DINTEL_SSE2"); - strcat(ldflags, " -msse2"); - sse_found = 1; - } - fclose(file); - } - } - - if(cpu_has_feature(CPU_CAP_PCLMULQDQ) && sse_found) - { - sprintf(buf, "%s pclmul_test.c -o pclmul -maes -mpclmul 2> /dev/null" - , argv[1]); - system(buf); - if(file = fopen("pclmul", "r")) - { - fclose(file); - - //run program and compare to the included output - system("./pclmul > temp.txt 2> /dev/null"); - system("diff pclmul_test.txt temp.txt > diff.txt 2> /dev/null"); - file = fopen("diff.txt", "r"); - if(fgetc(file) == EOF) - { - strcat(cflags, " -maes -mpclmul -DINTEL_PCLMUL"); - strcat(ldflags, " -maes -mpclmul"); - } - fclose(file); - } - } - - printf("%s\n%s\n", cflags, ldflags); -} diff --git a/flag_tester/intel_cpu_capabilities.h b/flag_tester/intel_cpu_capabilities.h deleted file mode 100644 index 6d1bbeb..0000000 --- a/flag_tester/intel_cpu_capabilities.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Routines to figure out what an Intel CPU's capabilities are. - */ - -#pragma once - -#include <stdint.h> - -/* Words in CPE_INFO */ -#define CPU_CPE_INFO 0x1000 -#define CPU_CAP_MMX (CPU_CPE_INFO | 23) -#define CPU_CAP_SSE (CPU_CPE_INFO | 25) -#define CPU_CAP_SSE2 (CPU_CPE_INFO | 26) - -/* Words in CPSSE */ -#define CPU_CPSSE 0x2000 -#define CPU_CAP_SSE3 (CPU_CPSSE | 0) -#define CPU_CAP_PCLMULQDQ (CPU_CPSSE | 1) -#define CPU_CAP_SSSE3 (CPU_CPSSE | 9) -#define CPU_CAP_SSE41 (CPU_CPSSE | 19) -#define CPU_CAP_SSE42 (CPU_CPSSE | 20) -#define CPU_CAP_AVX (CPU_CPSSE | 28) - -#define cpuid(func,ax,bx,cx,dx)\ - __asm__ __volatile__ ("cpuid":\ - "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) : "a" (func)); - -int -cpu_has_feature (unsigned which) -{ - uint32_t cpeinfo; - uint32_t cpsse; - uint32_t a, b; - - cpuid(1, a, b, cpsse, cpeinfo); - if (which & CPU_CPE_INFO) { - return (!! ((cpeinfo >> (which & 0xff)) & 0x1) ); - } else if (which & CPU_CPSSE) { - return (!! ((cpsse >> (which & 0xff)) & 0x1) ); - } else { - return (0); - } -} diff --git a/flag_tester/pclmul_test.c b/flag_tester/pclmul_test.c deleted file mode 100644 index bdae184..0000000 --- a/flag_tester/pclmul_test.c +++ /dev/null @@ -1,40 +0,0 @@ -#include <wmmintrin.h> -#include <stdint.h> -#include <stdio.h> - -#define MM_PRINT8(s, r) { uint8_t blah[16], ii; printf("%-20s", s); _mm_storeu_si128((__m128i *)blah, r); for (ii = 0; ii < 16; ii += 1) printf("%s%02x", (ii%4==0) ? " " : " ", blah[15-ii]); printf("\n"); } - - -int main() -{ - uint64_t answer; - uint32_t pp; - __m128i a, b, c; - - a = _mm_set1_epi8(0x0D); - b = _mm_set_epi32(0,0,0,0x0A); - pp = 0x13; - MM_PRINT8("a", a); - MM_PRINT8("b", b); - - c = _mm_clmulepi64_si128(a, b, 0); - MM_PRINT8("a clm b", c); - - a = _mm_set1_epi8(0xf0); - MM_PRINT8("a", a); - b = _mm_and_si128(a, c); - b = _mm_srli_epi64(b, 4); - MM_PRINT8("shifted", b); - - - a = _mm_set_epi32(0,0,0,pp); - MM_PRINT8("PP", a); - - b = _mm_clmulepi64_si128(a, b, 0); - MM_PRINT8("PP clm over", b); - - c = _mm_xor_si128(c,b); - MM_PRINT8("Answer", c); - //answer = _mm_extract_epi64(c, 0); - //printf("%llx\n", answer); -} diff --git a/flag_tester/pclmul_test.txt b/flag_tester/pclmul_test.txt deleted file mode 100644 index 6102f94..0000000 --- a/flag_tester/pclmul_test.txt +++ /dev/null @@ -1,8 +0,0 @@ -a 0d 0d 0d 0d 0d 0d 0d 0d 0d 0d 0d 0d 0d 0d 0d 0d -b 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0a -a clm b 00 00 00 00 00 00 00 00 72 72 72 72 72 72 72 72 -a f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 -shifted 00 00 00 00 00 00 00 00 07 07 07 07 07 07 07 07 -PP 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 13 -PP clm over 00 00 00 00 00 00 00 00 79 79 79 79 79 79 79 79 -Answer 00 00 00 00 00 00 00 00 0b 0b 0b 0b 0b 0b 0b 0b diff --git a/flag_tester/sse2_test.txt b/flag_tester/sse2_test.txt deleted file mode 100644 index f79b6e0..0000000 --- a/flag_tester/sse2_test.txt +++ /dev/null @@ -1,30 +0,0 @@ -a 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -b 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 -c 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 -d 12 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 -a sl16 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08 04 00 -b sl32 40 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08 04 -c sl64 44 40 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08 -d sl128 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 00 00 -a sr16 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -b sr32 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 -c sr64 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 -d sr128 00 00 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 -d = a^b 1f 01 03 01 07 01 03 01 0f 01 03 01 07 01 03 01 -d = a-b epi8 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff -d = a-b epi16 fe ff fe ff fe ff fe ff fe ff fe ff fe ff fe ff -d = a-b epi32 fe fe fe ff fe fe fe ff fe fe fe ff fe fe fe ff -d = a-b epi64 fe fe fe fe fe fe fe ff fe fe fe fe fe fe fe ff -d set_epi8 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -d set_epi32 12 34 56 78 9a bc de f0 12 34 56 78 9a bc de f0 -d set1_epi64 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 -d set1_epi32 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 -d set1_epi16 af f3 af f3 af f3 af f3 af f3 af f3 af f3 af f3 -d set1_epi8 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 -d packus_epi16(d,d) 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 -c unpackhi(a,d) 00 0f 00 0e 00 0d 00 0c 00 0b 00 0a 00 09 00 08 -b unpacklo(c,a) 07 00 06 0b 05 00 04 0a 03 00 02 09 01 00 00 08 -d and(d,b) 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 -a stored to mem: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 -d setzero 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 -c 05 05 05 05 05 05 05 05 05 05 05 05 05 05 05 05 diff --git a/flag_tester/sse4_test.txt b/flag_tester/sse4_test.txt deleted file mode 100644 index 3f6d7ec..0000000 --- a/flag_tester/sse4_test.txt +++ /dev/null @@ -1,35 +0,0 @@ -a 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -b 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 -c 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 -d 12 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 -a sl16 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08 04 00 -b sl32 40 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08 04 -c sl64 44 40 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08 -d sl128 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 00 00 -a sr16 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -b sr32 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 -c sr64 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 -d sr128 00 00 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 -d = a^b 1f 01 03 01 07 01 03 01 0f 01 03 01 07 01 03 01 -d = a-b epi8 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff -d = a-b epi16 fe ff fe ff fe ff fe ff fe ff fe ff fe ff fe ff -d = a-b epi32 fe fe fe ff fe fe fe ff fe fe fe ff fe fe fe ff -d = a-b epi64 fe fe fe fe fe fe fe ff fe fe fe fe fe fe fe ff -d set_epi8 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -d set_epi32 12 34 56 78 9a bc de f0 12 34 56 78 9a bc de f0 -d set1_epi64 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 -d set1_epi32 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 -d set1_epi16 af f3 af f3 af f3 af f3 af f3 af f3 af f3 af f3 -d set1_epi8 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 -d packus_epi16(d,d) 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 -c unpackhi(a,d) 00 0f 00 0e 00 0d 00 0c 00 0b 00 0a 00 09 00 08 -b unpacklo(c,a) 07 00 06 0b 05 00 04 0a 03 00 02 09 01 00 00 08 -d and(d,b) 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 -a stored to mem: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 -d setzero 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 -d insert32 @ 2 00 00 00 00 ab cd 12 34 00 00 00 00 00 00 00 00 -extract_epi32 @ 2: abcd1234 -d insert64 @ 0 00 00 00 00 ab cd 12 34 fe dc ba 12 91 82 73 64 -extract_epi64 @ 0: fedcba1291827364 -c 05 05 05 05 05 05 05 05 05 05 05 05 05 05 05 05 -a shuffle(b, c) 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 diff --git a/flag_tester/sse_test.c b/flag_tester/sse_test.c deleted file mode 100644 index e40cf25..0000000 --- a/flag_tester/sse_test.c +++ /dev/null @@ -1,142 +0,0 @@ -#ifdef SSE4 -#define SSSE3 -#include <nmmintrin.h> -#endif - -#ifdef SSSE3 -#define SSE2 -#include <tmmintrin.h> -#endif - -#ifdef SSE2 -#include <emmintrin.h> -#endif - -#include <stdio.h> -#include <stdint.h> -#include <inttypes.h> - -#define MM_PRINT8(s, r) { uint8_t blah[16], ii; printf("%-20s", s); _mm_storeu_si128((__m128i *)blah, r); for (ii = 0; ii < 16; ii += 1) printf("%s%02x", (ii%4==0) ? " " : " ", blah[15-ii]); printf("\n"); } - -int main() -{ - uint32_t u32; - uint64_t u64; - uint8_t *ui8 = malloc(20), i; - __m128i a, b, c, d; - - for(i=0; i < 20; i++) - ui8[i] = i; - - a = _mm_load_si128( (__m128i *) ui8 ); - b = _mm_loadu_si128( (__m128i *) (ui8+1)); - c = _mm_loadu_si128( (__m128i *) (ui8+2)); - d = _mm_loadu_si128( (__m128i *) (ui8+3)); - - MM_PRINT8("a", a); - MM_PRINT8("b", b); - MM_PRINT8("c", c); - MM_PRINT8("d", d); - - a = _mm_slli_epi16(a, 2); - b = _mm_slli_epi32(b, 2); - c = _mm_slli_epi64(c, 2); - d = _mm_slli_si128(d, 2); - - MM_PRINT8("a sl16", a); - MM_PRINT8("b sl32", b); - MM_PRINT8("c sl64", c); - MM_PRINT8("d sl128", d); - - a = _mm_srli_epi16(a, 2); - b = _mm_srli_epi32(b, 2); - c = _mm_srli_epi64(c, 2); - d = _mm_srli_si128(d, 2); - - MM_PRINT8("a sr16", a); - MM_PRINT8("b sr32", b); - MM_PRINT8("c sr64", c); - MM_PRINT8("d sr128", d); - - d = _mm_xor_si128(a, b); - MM_PRINT8("d = a^b", d); - - d = _mm_sub_epi8(a, b); - MM_PRINT8("d = a-b epi8", d); - - d = _mm_sub_epi16(a, b); - MM_PRINT8("d = a-b epi16", d); - - d = _mm_sub_epi32(a, b); - MM_PRINT8("d = a-b epi32", d); - - d = _mm_sub_epi64(a, b); - MM_PRINT8("d = a-b epi64", d); - - d = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - MM_PRINT8("d set_epi8", d); - - d = _mm_set_epi32(0x12345678, 0x9abcdef0, 0x12345678, 0x9abcdef0); - MM_PRINT8("d set_epi32", d); - - d = _mm_set1_epi64x(0xF0F0F0F0F0F0F0F0ULL); - MM_PRINT8("d set1_epi64", d); - - d = _mm_set1_epi32(0xe2e2e2e2); - MM_PRINT8("d set1_epi32", d); - - d = _mm_set1_epi16(0xaff3); - MM_PRINT8("d set1_epi16", d); - - d = _mm_set1_epi8(0xc5); - MM_PRINT8("d set1_epi8", d); - - d = _mm_packus_epi16(d, d); - MM_PRINT8("d packus_epi16(d,d)", d); - - c = _mm_unpackhi_epi8(a, d); - MM_PRINT8("c unpackhi(a,d)", c); - - b = _mm_unpacklo_epi8(c, a); - MM_PRINT8("b unpacklo(c,a)", b); - - d = _mm_and_si128(d, b); - MM_PRINT8("d and(d,b)", d); - - _mm_store_si128( (__m128i *) ui8, a); - printf("a stored to mem: "); - for(i=0; i < 16; i++) - printf("%u ", ui8[i]); - printf("\n"); - - d = _mm_setzero_si128(); - MM_PRINT8("d setzero", d); - - u32 = 0xABCD1234; - u64 = 0xFEDCBA1291827364ULL; - - #ifdef SSE4 - d = _mm_insert_epi32(d, u32, 2); - MM_PRINT8("d insert32 @ 2", d); - - u32 = 0; - u32 = _mm_extract_epi32(d, 2); - printf("extract_epi32 @ 2: %x\n", u32); - - d = _mm_insert_epi64(d, u64, 0); - MM_PRINT8("d insert64 @ 0", d); - - u64 = 0; - u64 = _mm_extract_epi64(d, 0); - printf("extract_epi64 @ 0: %" PRIx64 "\n", u64); - #endif - - c = _mm_set1_epi8(5); - MM_PRINT8("c", c); - - #ifdef SSSE3 - a = _mm_shuffle_epi8(b, c); - MM_PRINT8("a shuffle(b, c)", a); - #endif - -} diff --git a/flag_tester/ssse3_test.txt b/flag_tester/ssse3_test.txt deleted file mode 100644 index 17bee1a..0000000 --- a/flag_tester/ssse3_test.txt +++ /dev/null @@ -1,31 +0,0 @@ -a 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -b 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 -c 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 -d 12 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 -a sl16 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08 04 00 -b sl32 40 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08 04 -c sl64 44 40 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08 -d sl128 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 00 00 -a sr16 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -b sr32 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 -c sr64 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 -d sr128 00 00 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 -d = a^b 1f 01 03 01 07 01 03 01 0f 01 03 01 07 01 03 01 -d = a-b epi8 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff -d = a-b epi16 fe ff fe ff fe ff fe ff fe ff fe ff fe ff fe ff -d = a-b epi32 fe fe fe ff fe fe fe ff fe fe fe ff fe fe fe ff -d = a-b epi64 fe fe fe fe fe fe fe ff fe fe fe fe fe fe fe ff -d set_epi8 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -d set_epi32 12 34 56 78 9a bc de f0 12 34 56 78 9a bc de f0 -d set1_epi64 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 -d set1_epi32 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 -d set1_epi16 af f3 af f3 af f3 af f3 af f3 af f3 af f3 af f3 -d set1_epi8 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 -d packus_epi16(d,d) 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 -c unpackhi(a,d) 00 0f 00 0e 00 0d 00 0c 00 0b 00 0a 00 09 00 08 -b unpacklo(c,a) 07 00 06 0b 05 00 04 0a 03 00 02 09 01 00 00 08 -d and(d,b) 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 -a stored to mem: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 -d setzero 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 -c 05 05 05 05 05 05 05 05 05 05 05 05 05 05 05 05 -a shuffle(b, c) 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 diff --git a/flag_tester/whats_my_sse.c b/flag_tester/whats_my_sse.c deleted file mode 100644 index 8c9192c..0000000 --- a/flag_tester/whats_my_sse.c +++ /dev/null @@ -1,43 +0,0 @@ -/* - * whats_my_sse.c - lifted from Jens Gregor -- thanks Jens - */ - -#include <stdio.h> -#include <stdlib.h> -#include "intel_cpu_capabilities.h" - -struct { - unsigned feature_code; - const char * feature_name; -} features[] = { - {CPU_CAP_MMX, "MMX"}, - {CPU_CAP_SSE, "SSE"}, - {CPU_CAP_SSE2, "SSE2"}, - {CPU_CAP_SSE3, "SSE3"}, - {CPU_CAP_SSSE3, "SSSE3"}, - {CPU_CAP_SSE41, "SSE4.1"}, - {CPU_CAP_SSE42, "SSE4.2"}, - {CPU_CAP_PCLMULQDQ, "PCLMULQDQ"}, - {CPU_CAP_AVX, "AVX"}, - {0, NULL}, -}; - - -int main() -{ - unsigned i; - - printf ("CPU has the following instruction set features enabled: " ); - for (i = 0; features[i].feature_name != NULL; ++i) { - if (cpu_has_feature (features[i].feature_code)) { - printf ("%s ", features[i].feature_name); - } - } - printf ("\nCPU is missing the following instruction set features: "); - for (i = 0; features[i].feature_name != NULL; ++i) { - if (! cpu_has_feature (features[i].feature_code)) { - printf ("%s ", features[i].feature_name); - } - } - printf ("\n"); -} diff --git a/flag_tester/which_compile_flags.sh b/flag_tester/which_compile_flags.sh deleted file mode 100755 index f39c609..0000000 --- a/flag_tester/which_compile_flags.sh +++ /dev/null @@ -1,19 +0,0 @@ -if [ -n "$1" ]; then - CC=$1 -else - CC=cc -fi - -$CC flag_test.c -o flag_test 2> /dev/null -if [ -e "flag_test" ]; then - OUTPUT=`./flag_test $CC 2> /dev/null` - if [ -n "$OUTPUT" ]; then - echo "$OUTPUT" - else - printf "CFLAGS = -O3\nLDFLAGS = -O3\n" - fi -else - printf "$CC failed to compile flag_test.c\n" -fi - -rm sse4 sse2 ssse3 pclmul diff.txt flag_test temp.txt 2> /dev/null diff --git a/tools/gf_methods.c b/tools/gf_methods.c index 90ddd15..298c1ec 100644 --- a/tools/gf_methods.c +++ b/tools/gf_methods.c @@ -17,109 +17,189 @@ #include "gf_method.h" #include "gf_int.h" +#define BNMULTS (7) +static char *BMULTS[BNMULTS] = { "CARRY_FREE", "GROUP48", + "TABLE", "LOG", "SPLIT4", "SPLIT88", "COMPOSITE" }; #define NMULTS (16) -static char *mults[NMULTS] = { "SHIFT", "CARRY_FREE", "GROUP44", "GROUP48", "BYTWO_p", "BYTWO_b", +static char *MULTS[NMULTS] = { "CARRY_FREE", "GROUP44", "GROUP48", "BYTWO_p", "BYTWO_b", "TABLE", "LOG", "LOG_ZERO", "LOG_ZERO_EXT", "SPLIT2", "SPLIT4", "SPLIT8", "SPLIT16", "SPLIT88", "COMPOSITE" }; +/* Make sure CAUCHY is last */ + #define NREGIONS (7) -static char *regions[NREGIONS] = { "DOUBLE", "QUAD", "LAZY", "SSE", "NOSSE", +static char *REGIONS[NREGIONS] = { "DOUBLE", "QUAD", "LAZY", "SSE", "NOSSE", "ALTMAP", "CAUCHY" }; +#define BNREGIONS (4) +static char *BREGIONS[BNREGIONS] = { "DOUBLE", "QUAD", "ALTMAP", "CAUCHY" }; + #define NDIVS (2) static char *divides[NDIVS] = { "MATRIX", "EUCLID" }; +void usage(char *s) +{ + fprintf(stderr, "usage: gf_methods w -BADCM -LUMDRB\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " w can be 1-32, 64, 128\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " -B lists basic methods that are useful\n"); + fprintf(stderr, " -A does a nearly exhaustive listing\n"); + fprintf(stderr, " -D adds EUCLID and MATRIX division\n"); + fprintf(stderr, " -C adds CAUCHY when possible\n"); + fprintf(stderr, " Combinations are fine.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " -L Simply lists methods\n"); + fprintf(stderr, " -U Produces calls to gf_unit\n"); + fprintf(stderr, " -M Produces calls to time_tool.sh for single multiplications\n"); + fprintf(stderr, " -D Produces calls to time_tool.sh for single divisions\n"); + fprintf(stderr, " -R Produces calls to time_tool.sh for region multiplications\n"); + fprintf(stderr, " -B Produces calls to time_tool.sh for the fastest region multiplications\n"); + fprintf(stderr, " Cannot combine L, U, T.\n"); + if (s != NULL) { + fprintf(stderr, "\n"); + fprintf(stderr, "%s\n", s); + } + exit(1); +} int main(int argc, char *argv[]) { - int m, r, d, w, i, sa, j, k, reset; - char *gf_argv[50]; + int m, r, d, w, i, sa, j, k, reset, ok; + int nregions; + int nmults; + char **regions; + char **mults; + int exhaustive = 0; + int divide = 0; + int cauchy = 0; + int listing; + char *gf_argv[50], *x; gf_t gf; char divs[200], ks[10], ls[10]; - char * w_str = "w=%d:"; + char * w_str; - if (argc == 2) { - if (!strcmp (argv[1], "-U")) { - w_str = "%d A -1"; + if (argc != 4) usage(NULL); + w = atoi(argv[1]); + ok = (w >= 1 && w <= 32); + if (w == 64) ok = 1; + if (w == 128) ok = 1; + if (!ok) usage("Bad w"); + + if (argv[2][0] != '-' || argv[3][0] != '-' || strlen(argv[2]) == 1 || strlen(argv[3]) != 2) { + usage(NULL); + } + for (i = 1; argv[2][i] != '\0'; i++) { + switch(argv[2][i]) { + case 'B': exhaustive = 0; break; + case 'A': exhaustive = 1; break; + case 'D': divide = 1; break; + case 'C': cauchy = 1; break; + default: usage("Bad -BADC"); } } - for (i = 2; i < 8; i++) { - w = (1 << i); - gf_argv[0] = "-"; - if (create_gf_from_argv(&gf, w, 1, gf_argv, 0) > 0) { - printf(w_str, w); - printf(" - \n"); - gf_free(&gf, 1); - } else if (_gf_errno == GF_E_DEFAULT) { - fprintf(stderr, "Unlabeled failed method: w=%d: -\n", 2); - exit(1); + if (strchr("LUMDRB", argv[3][1]) == NULL) { usage("Bad -LUMDRB"); } + listing = argv[3][1]; + + if (listing == 'U') { + w_str = "../test/gf_unit %d A -1"; + } else if (listing == 'L') { + w_str = "w=%d:"; + } else { + w_str = strdup("sh time_tool.sh X %d"); + x = strchr(w_str, 'X'); + *x = listing; + } + + gf_argv[0] = "-"; + if (create_gf_from_argv(&gf, w, 1, gf_argv, 0) > 0) { + printf(w_str, w); + printf(" - \n"); + gf_free(&gf, 1); + } else if (_gf_errno == GF_E_DEFAULT) { + fprintf(stderr, "Unlabeled failed method: w=%d: -\n", 2); + exit(1); + } + + nregions = (exhaustive) ? NREGIONS : BNREGIONS; + if (!cauchy) nregions--; + regions = (exhaustive) ? REGIONS : BREGIONS; + mults = (exhaustive) ? MULTS : BMULTS; + nmults = (exhaustive) ? NMULTS : BNMULTS; + + + for (m = 0; m < nmults; m++) { + sa = 0; + gf_argv[sa++] = "-m"; + if (strcmp(mults[m], "GROUP44") == 0) { + gf_argv[sa++] = "GROUP"; + gf_argv[sa++] = "4"; + gf_argv[sa++] = "4"; + } else if (strcmp(mults[m], "GROUP48") == 0) { + gf_argv[sa++] = "GROUP"; + gf_argv[sa++] = "4"; + gf_argv[sa++] = "8"; + } else if (strcmp(mults[m], "SPLIT2") == 0) { + gf_argv[sa++] = "SPLIT"; + sprintf(ls, "%d", w); + gf_argv[sa++] = ls; + gf_argv[sa++] = "2"; + } else if (strcmp(mults[m], "SPLIT4") == 0) { + gf_argv[sa++] = "SPLIT"; + sprintf(ls, "%d", w); + gf_argv[sa++] = ls; + gf_argv[sa++] = "4"; + } else if (strcmp(mults[m], "SPLIT8") == 0) { + gf_argv[sa++] = "SPLIT"; + sprintf(ls, "%d", w); + gf_argv[sa++] = ls; + gf_argv[sa++] = "8"; + } else if (strcmp(mults[m], "SPLIT16") == 0) { + gf_argv[sa++] = "SPLIT"; + sprintf(ls, "%d", w); + gf_argv[sa++] = ls; + gf_argv[sa++] = "16"; + } else if (strcmp(mults[m], "SPLIT88") == 0) { + gf_argv[sa++] = "SPLIT"; + gf_argv[sa++] = "8"; + gf_argv[sa++] = "8"; + } else if (strcmp(mults[m], "COMPOSITE") == 0) { + gf_argv[sa++] = "COMPOSITE"; + gf_argv[sa++] = "2"; + gf_argv[sa++] = "-"; + } else { + gf_argv[sa++] = mults[m]; } + reset = sa; - for (m = 0; m < NMULTS; m++) { - sa = 0; - gf_argv[sa++] = "-m"; - if (strcmp(mults[m], "GROUP44") == 0) { - gf_argv[sa++] = "GROUP"; - gf_argv[sa++] = "4"; - gf_argv[sa++] = "4"; - } else if (strcmp(mults[m], "GROUP48") == 0) { - gf_argv[sa++] = "GROUP"; - gf_argv[sa++] = "4"; - gf_argv[sa++] = "8"; - } else if (strcmp(mults[m], "SPLIT2") == 0) { - gf_argv[sa++] = "SPLIT"; - sprintf(ls, "%d", w); - gf_argv[sa++] = ls; - gf_argv[sa++] = "2"; - } else if (strcmp(mults[m], "SPLIT4") == 0) { - gf_argv[sa++] = "SPLIT"; - sprintf(ls, "%d", w); - gf_argv[sa++] = ls; - gf_argv[sa++] = "4"; - } else if (strcmp(mults[m], "SPLIT8") == 0) { - gf_argv[sa++] = "SPLIT"; - sprintf(ls, "%d", w); - gf_argv[sa++] = ls; - gf_argv[sa++] = "8"; - } else if (strcmp(mults[m], "SPLIT16") == 0) { - gf_argv[sa++] = "SPLIT"; - sprintf(ls, "%d", w); - gf_argv[sa++] = ls; - gf_argv[sa++] = "16"; - } else if (strcmp(mults[m], "SPLIT88") == 0) { - gf_argv[sa++] = "SPLIT"; - gf_argv[sa++] = "8"; - gf_argv[sa++] = "8"; - } else if (strcmp(mults[m], "COMPOSITE") == 0) { - gf_argv[sa++] = "COMPOSITE"; - gf_argv[sa++] = "2"; - gf_argv[sa++] = "-"; - } else { - gf_argv[sa++] = mults[m]; - } - reset = sa; - for (r = 0; r < (1 << NREGIONS); r++) { - sa = reset; - for (k = 0; k < NREGIONS; k++) { - if (r & 1 << k) { - gf_argv[sa++] = "-r"; - gf_argv[sa++] = regions[k]; - } - } - gf_argv[sa++] = "-"; - if (create_gf_from_argv(&gf, w, sa, gf_argv, 0) > 0) { - printf(w_str, w); - for (j = 0; j < sa; j++) printf(" %s", gf_argv[j]); - printf("\n"); - gf_free(&gf, 1); - } else if (_gf_errno == GF_E_DEFAULT) { - fprintf(stderr, "Unlabeled failed method: w=%d:", w); - for (j = 0; j < sa; j++) fprintf(stderr, " %s", gf_argv[j]); - fprintf(stderr, "\n"); - exit(1); + for (r = 0; r < (1 << nregions); r++) { + sa = reset; + for (k = 0; k < nregions; k++) { + if (r & (1 << k)) { + gf_argv[sa++] = "-r"; + gf_argv[sa++] = regions[k]; } - sa--; + } + gf_argv[sa++] = "-"; + /* + printf("Hmmmm. %s", gf_argv[0]); + for (j = 0; j < sa; j++) printf(" %s", gf_argv[j]); + printf("\n"); */ + + if (create_gf_from_argv(&gf, w, sa, gf_argv, 0) > 0) { + printf(w_str, w); + for (j = 0; j < sa; j++) printf(" %s", gf_argv[j]); + printf("\n"); + gf_free(&gf, 1); + } else if (_gf_errno == GF_E_DEFAULT) { + fprintf(stderr, "Unlabeled failed method: w=%d:", w); + for (j = 0; j < sa; j++) fprintf(stderr, " %s", gf_argv[j]); + fprintf(stderr, "\n"); + exit(1); + } + sa--; + if (divide) { for (d = 0; d < NDIVS; d++) { gf_argv[sa++] = "-d"; gf_argv[sa++] = divides[d]; diff --git a/tools/time_tool.sh b/tools/time_tool.sh new file mode 100644 index 0000000..d2aa591 --- /dev/null +++ b/tools/time_tool.sh @@ -0,0 +1,95 @@ +# time_tool.sh - Shell script to test various timings. +# This is a rough tester -- its job is to work quickly rather than precisely. +# (Jim Plank) + +#!/bin/sh + +if [ $# -lt 3 ]; then + echo 'usage sh time_tool.sh M|D|R|B w method' >&2 + exit 1 +fi + +op=$1 +w=$2 + +shift ; shift + +method="$*" + +if [ $op != M -a $op != D -a $op != R -a $op != B ]; then + echo 'usage sh time_tool.sh M|D|R|B w method' >&2 + echo 'You have to specify a test: ' >&2 + echo ' M=Multiplication' >&2 + echo ' D=Division' >&2 + echo ' R=Regions' >&2 + echo ' B=Best-Region' >&2 + exit 1 +fi + +# First, use a 16K buffer to test the performance of single multiplies. + +fac=`echo $w | awk '{ n = $1; while (n != 0 && n%2==0) n /= 2; print n }'` +if [ $fac -eq 0 ]; then + echo 'usage sh time_tool.sh M|D|R|B w method' >&2 + echo 'Bad w' >&2 + exit 1 +fi + +bsize=16384 +bsize=`echo $bsize $fac | awk '{ print $1 * $2 }'` + +if [ `./gf_time $w M -1 $bsize 1 $method 2>&1 | wc | awk '{ print $1 }'` -gt 2 ]; then + echo 'usage sh time_tool.sh w method' >&2 + echo "Bad method" + exit 1 +fi + +if [ $op = M -o $op = D ]; then + iter=1 + c1=`./gf_time $w $op -1 $bsize $iter $method` + t=`echo $c1 | awk '{ printf "%d\n", $4*1000 }'` + s=`echo $c1 | awk '{ print $8 }'` + bs=$s + + while [ $t -lt 1 ]; do + bs=$s + iter=`echo $iter | awk '{ print $1*2 }'` + c1=`./gf_time $w $op -1 $bsize $iter $method` + t=`echo $c1 | awk '{ printf "%d\n", $4*1000 }'` + s=`echo $c1 | awk '{ print $8 }'` + done + + echo "$op speed (MB/s): " $bs " W-Method:" $w $method + exit 0 +fi + +bsize=16384 +bsize=`echo $bsize $fac | awk '{ print $1 * $2 }'` + +best=0 +while [ $bsize -le 4194304 ]; do + iter=1 + c1=`./gf_time $w G -1 $bsize $iter $method` + t=`echo $c1 | awk '{ printf "%d\n", $6*1000 }'` + s=`echo $c1 | awk '{ print $10 }'` + bs=$s + + while [ $t -lt 1 ]; do + bs=$s + iter=`echo $iter | awk '{ print $1*2 }'` + c1=`./gf_time $w G -1 $bsize $iter $method` + t=`echo $c1 | awk '{ printf "%d\n", $6*1000 }'` + s=`echo $c1 | awk '{ print $10 }'` + done + if [ $bsize -lt 1048576 ]; then + str=`echo $bsize | awk '{ printf "%3dK\n", $1/1024 }'` + else + str=`echo $bsize | awk '{ printf "%3dM\n", $1/1024/1024 }'` + fi + if [ $op = R ]; then + echo "Region Buffer-Size: $str (MB/s): " $bs " W-Method:" $w $method + fi + best=`echo $best $bs | awk '{ print ($1 > $2) ? $1 : $2 }'` + bsize=`echo $bsize | awk '{ print $1 * 2 }'` +done +echo "Region Best (MB/s): "$best " W-Method:" $w $method |