diff options
author | David Schleef <ds@entropywave.com> | 2010-07-10 19:24:27 -0700 |
---|---|---|
committer | David Schleef <ds@entropywave.com> | 2010-07-10 19:27:30 -0700 |
commit | 7cbbc53adac6c928966ca99d6f7476bfa9e66957 (patch) | |
tree | 85e8c305b3da2fffaaa9464b07bce6f37810ef1d | |
parent | eb89bd7baeeba0d001f2db1ac4cf8b4bae7d4a98 (diff) | |
download | orc-7cbbc53adac6c928966ca99d6f7476bfa9e66957.tar.gz |
Add cache information
-rw-r--r-- | orc/orccpu-x86.c | 267 | ||||
-rw-r--r-- | orc/orcprogram-sse.c | 5 | ||||
-rw-r--r-- | orc/orcprogram.c | 19 | ||||
-rw-r--r-- | orc/orcprogram.h | 6 | ||||
-rw-r--r-- | testsuite/memcpy_speed.c | 13 | ||||
-rw-r--r-- | tools/orc-bugreport.c | 8 |
6 files changed, 269 insertions, 49 deletions
diff --git a/orc/orccpu-x86.c b/orc/orccpu-x86.c index 072156c..a63e8db 100644 --- a/orc/orccpu-x86.c +++ b/orc/orccpu-x86.c @@ -59,6 +59,9 @@ #define USE_I386_CPUID #endif +int orc_x86_family_id; +int orc_x86_model_id; +int orc_x86_stepping; #ifdef USE_I386_CPUINFO static unsigned int @@ -178,29 +181,31 @@ get_cpuid (orc_uint32 op, orc_uint32 *a, orc_uint32 *b, orc_uint32 *c, orc_uint3 #ifdef __i386__ static void -get_cpuid (orc_uint32 op, orc_uint32 *a, orc_uint32 *b, orc_uint32 *c, orc_uint32 *d) +get_cpuid (orc_uint32 op, orc_uint32 init_ecx, orc_uint32 *a, orc_uint32 *b, + orc_uint32 *c, orc_uint32 *d) { + *a = op; + *c = init_ecx; __asm__ ( " pushl %%ebx\n" " cpuid\n" " mov %%ebx, %%esi\n" " popl %%ebx\n" - : "=a" (*a), "=S" (*b), "=c" (*c), "=d" (*d) - : "0" (op)); + : "+a" (*a), "=S" (*b), "+c" (*c), "=d" (*d)); } #endif #ifdef __amd64__ static void -get_cpuid (orc_uint32 op, orc_uint32 *a, orc_uint32 *b, orc_uint32 *c, orc_uint32 *d) +get_cpuid (orc_uint32 op, orc_uint32 init_ecx, orc_uint32 *a, orc_uint32 *b, + orc_uint32 *c, orc_uint32 *d) { + *a = op; + *c = init_ecx; __asm__ ( - " pushq %%rbx\n" " cpuid\n" - " mov %%ebx, %%esi\n" - " popq %%rbx\n" - : "=a" (*a), "=S" (*b), "=c" (*c), "=d" (*d) - : "0" (op)); + : "+a" (*a), "=b" (*b), "+c" (*c), "=d" (*d)); + //ORC_ERROR("cpuid %08x %08x -> %08x %08x %08x %08x", op, init_ecx, *a, *b, *c, *d); } #endif @@ -214,12 +219,96 @@ test_cpuid (void *ignored) } #endif +struct desc_struct { + int desc; + int level; + int size; +}; +struct desc_struct cache_descriptors[] = { + { 0x0a, 1, 8*1024 }, + { 0x0c, 1, 16*1024 }, + { 0x0d, 1, 16*1024 }, + { 0x0e, 1, 24*1024 }, + { 0x21, 2, 256*1024 }, + { 0x22, 3, 512*1024 }, + { 0x23, 3, 1024*1024 }, + { 0x25, 3, 2*1024*1024 }, + { 0x29, 3, 4*1024*1024 }, + { 0x2c, 1, 32*1024 }, + { 0x41, 2, 128*1024 }, + { 0x42, 2, 256*1024 }, + { 0x43, 2, 512*1024 }, + { 0x44, 2, 1*1024*1024 }, + { 0x45, 2, 2*1024*1024 }, + { 0x46, 3, 4*1024*1024 }, + { 0x47, 3, 8*1024*1024 }, + { 0x48, 2, 3*1024*1024 }, + { 0x49, 2, 4*1024*1024 }, /* special case */ + { 0x4a, 3, 6*1024*1024 }, + { 0x4b, 3, 8*1024*1024 }, + { 0x4c, 3, 12*1024*1024 }, + { 0x4d, 3, 16*1024*1024 }, + { 0x4e, 2, 6*1024*1024 }, + { 0x60, 1, 16*1024 }, + { 0x66, 1, 8*1024 }, + { 0x67, 1, 16*1024 }, + { 0x68, 1, 32*1024 }, + { 0x78, 2, 1*1024*1024 }, + { 0x79, 2, 128*1024 }, + { 0x7a, 2, 256*1024 }, + { 0x7b, 2, 512*1024 }, + { 0x7c, 2, 1*1024*1024 }, + { 0x7d, 2, 2*1024*1024 }, + { 0x7f, 2, 512*1024 }, + { 0x80, 2, 512*1024 }, + { 0x82, 2, 256*1024 }, + { 0x83, 2, 512*1024 }, + { 0x84, 2, 1*1024*1024 }, + { 0x85, 2, 2*1024*1024 }, + { 0x86, 2, 512*1024 }, + { 0x87, 2, 1*1024*1024 }, + { 0xe4, 3, 8*1024*1024 } +}; + +static void +handle_cache_descriptor (unsigned int desc) +{ + int i; + + if (desc == 0) return; + + /* special case */ + if (desc == 0x49 && orc_x86_family_id == 0xf && orc_x86_model_id == 0x6) { + ORC_DEBUG("level %d size %d", 3, 4*1024*1024); + _orc_data_cache_size_level3 = 4*1024*1024; + return; + } + + for(i=0;i<sizeof(cache_descriptors)/sizeof(cache_descriptors[0]);i++){ + if (desc == cache_descriptors[i].desc) { + ORC_DEBUG("level %d size %d", cache_descriptors[i].level, + cache_descriptors[i].size); + switch (cache_descriptors[i].level) { + case 1: + _orc_data_cache_size_level1 = cache_descriptors[i].size; + break; + case 2: + _orc_data_cache_size_level2 = cache_descriptors[i].size; + break; + case 3: + _orc_data_cache_size_level3 = cache_descriptors[i].size; + break; + } + } + } +} + static unsigned int orc_sse_detect_cpuid (void) { orc_uint32 eax, ebx, ecx, edx; orc_uint32 level; - char vendor[13] = { 0 }; + orc_uint32 vendor; unsigned int sse_flags = 0; #if 0 int ret; @@ -233,42 +322,125 @@ orc_sse_detect_cpuid (void) } #endif - get_cpuid (0x00000000, &level, (orc_uint32 *)(vendor+0), - (orc_uint32 *)(vendor+8), (orc_uint32 *)(vendor+4)); + get_cpuid (0x00000000, 0, &level, &vendor, &ecx, &edx); - ORC_DEBUG("cpuid %d %s", level, vendor); + ORC_DEBUG("cpuid %d %08x %08x %08x", level, vendor, ecx, edx); - if (level < 1) { - return 0; - } + if (level >= 1) { + get_cpuid (0x00000001, 0, &eax, &ebx, &ecx, &edx); + + /* generic flags */ + if (edx & (1<<26)) { + sse_flags |= ORC_TARGET_SSE_SSE2; + } + if (ecx & (1<<0)) { + sse_flags |= ORC_TARGET_SSE_SSE3; + } + if (ecx & (1<<9)) { + sse_flags |= ORC_TARGET_SSE_SSSE3; + } + if (ecx & (1<<19)) { + sse_flags |= ORC_TARGET_SSE_SSE4_1; + } + if (ecx & (1<<20)) { + sse_flags |= ORC_TARGET_SSE_SSE4_2; + } - get_cpuid (0x00000001, &eax, &ebx, &ecx, &edx); + if (vendor == (('G'<<0)|('e'<<8)|('n'<<16)|('u'<<24))) { + int family_id = (eax>>8)&0xf; + int model_id = (eax>>4)&0xf; + int ext_family_id = (eax>>20)&0xff; + int ext_model_id = (eax>>16)&0xf; -#if 0 - if (edx & (1<<4)) { - _orc_profile_stamp = orc_profile_stamp_rdtsc; - } -#endif + if (family_id == 0xf) { + orc_x86_family_id = family_id + ext_family_id; + } else { + orc_x86_family_id = family_id; + } - /* Intel flags */ - if (edx & (1<<26)) { - sse_flags |= ORC_TARGET_SSE_SSE2; - } - if (ecx & (1<<0)) { - sse_flags |= ORC_TARGET_SSE_SSE3; - } - if (ecx & (1<<9)) { - sse_flags |= ORC_TARGET_SSE_SSSE3; + if (model_id == 0xf) { + orc_x86_model_id = model_id + ext_model_id; + } else { + orc_x86_model_id = model_id; + } + + orc_x86_stepping = eax&0xf; + + ORC_DEBUG("family_id %d model_id %d stepping %d", + orc_x86_family_id, orc_x86_model_id, orc_x86_stepping); + + } } - if (ecx & (1<<19)) { - sse_flags |= ORC_TARGET_SSE_SSE4_1; + + if (level >= 2 && vendor == (('G'<<0)|('e'<<8)|('n'<<16)|('u'<<24))) { + get_cpuid (0x00000002, 0, &eax, &ebx, &ecx, &edx); + + if ((eax&0x80000000) == 0) { + handle_cache_descriptor ((eax>>8)&0xff); + handle_cache_descriptor ((eax>>16)&0xff); + handle_cache_descriptor ((eax>>24)&0xff); + } + if ((ebx&0x80000000) == 0) { + handle_cache_descriptor (ebx&0xff); + handle_cache_descriptor ((ebx>>8)&0xff); + handle_cache_descriptor ((ebx>>16)&0xff); + handle_cache_descriptor ((ebx>>24)&0xff); + } + if ((ecx&0x80000000) == 0) { + handle_cache_descriptor (ecx&0xff); + handle_cache_descriptor ((ecx>>8)&0xff); + handle_cache_descriptor ((ecx>>16)&0xff); + handle_cache_descriptor ((ecx>>24)&0xff); + } + if ((edx&0x80000000) == 0) { + handle_cache_descriptor (edx&0xff); + handle_cache_descriptor ((edx>>8)&0xff); + handle_cache_descriptor ((edx>>16)&0xff); + handle_cache_descriptor ((edx>>24)&0xff); + } } - if (ecx & (1<<20)) { - sse_flags |= ORC_TARGET_SSE_SSE4_2; + + if (level >= 4 && vendor == (('G'<<0)|('e'<<8)|('n'<<16)|('u'<<24))) { + int i; + for(i=0;i<10;i++){ + int type; + int level; + int l; + int p; + int w; + int s; + + get_cpuid (0x00000004, i, &eax, &ebx, &ecx, &edx); + type = eax&0xf; + if (type == 0) break; + + level = (eax>>5)&0x7; + l = ((ebx>>0)&0xfff)+1; + p = ((ebx>>12)&0x3ff)+1; + w = ((ebx>>22)&0x3ff)+1; + s = ecx + 1; + + ORC_DEBUG("type %d level %d line size %d partitions %d ways %d sets %d", + type, level, l, p, w, s); + if (type == 1 || type == 3) { + switch (level) { + case 1: + _orc_data_cache_size_level1 = l*p*w*s; + break; + case 2: + _orc_data_cache_size_level2 = l*p*w*s; + break; + case 3: + _orc_data_cache_size_level3 = l*p*w*s; + break; + } + } + } + } - if (memcmp (vendor, "AuthenticAMD", 12) == 0) { - get_cpuid (0x80000001, &eax, &ebx, &ecx, &edx); + if (level >= 1 && vendor == (('A'<<0)|('u'<<8)|('t'<<16)|('h'<<24))) { + get_cpuid (0x80000001, 0, &eax, &ebx, &ecx, &edx); /* AMD flags */ if (ecx & (1<<6)) { @@ -278,18 +450,17 @@ orc_sse_detect_cpuid (void) sse_flags |= ORC_TARGET_SSE_SSE5; } -#if 0 - get_cpuid (0x80000005, &eax, &ebx, &ecx, &edx); - - ORC_INFO("L1 D-cache: %d kbytes, %d-way, %d lines/tag, %d line size", + get_cpuid (0x80000005, 0, &eax, &ebx, &ecx, &edx); + _orc_data_cache_size_level1 = ((ecx>>24)&0xff) * 1024; + ORC_DEBUG ("L1 D-cache: %d kbytes, %d-way, %d lines/tag, %d line size", (ecx>>24)&0xff, (ecx>>16)&0xff, (ecx>>8)&0xff, ecx&0xff); - ORC_INFO("L1 I-cache: %d kbytes, %d-way, %d lines/tag, %d line size", + ORC_DEBUG ("L1 I-cache: %d kbytes, %d-way, %d lines/tag, %d line size", (edx>>24)&0xff, (edx>>16)&0xff, (edx>>8)&0xff, edx&0xff); - get_cpuid (0x80000006, &eax, &ebx, &ecx, &edx); - ORC_INFO("L2 cache: %d kbytes, %d assoc, %d lines/tag, %d line size", + get_cpuid (0x80000006, 0, &eax, &ebx, &ecx, &edx); + _orc_data_cache_size_level2 = ((ecx>>16)&0xffff) * 1024; + ORC_DEBUG ("L2 cache: %d kbytes, %d assoc, %d lines/tag, %d line size", (ecx>>16)&0xffff, (ecx>>12)&0xf, (ecx>>8)&0xf, ecx&0xff); -#endif } if (orc_compiler_flag_check ("-sse2")) { @@ -325,7 +496,7 @@ orc_mmx_detect_cpuid (void) char vendor[13] = { 0 }; unsigned int mmx_flags = 0; - get_cpuid (0x00000000, &level, (orc_uint32 *)(vendor+0), + get_cpuid (0x00000000, 0, &level, (orc_uint32 *)(vendor+0), (orc_uint32 *)(vendor+8), (orc_uint32 *)(vendor+4)); ORC_DEBUG("cpuid %d %s", level, vendor); @@ -334,7 +505,7 @@ orc_mmx_detect_cpuid (void) return 0; } - get_cpuid (0x00000001, &eax, &ebx, &ecx, &edx); + get_cpuid (0x00000001, 0, &eax, &ebx, &ecx, &edx); /* Intel flags */ if (edx & (1<<23)) { @@ -345,7 +516,7 @@ orc_mmx_detect_cpuid (void) } if (memcmp (vendor, "AuthenticAMD", 12) == 0) { - get_cpuid (0x80000001, &eax, &ebx, &ecx, &edx); + get_cpuid (0x80000001, 0, &eax, &ebx, &ecx, &edx); /* AMD flags */ if (edx & (1<<22)) { diff --git a/orc/orcprogram-sse.c b/orc/orcprogram-sse.c index c225296..52d308e 100644 --- a/orc/orcprogram-sse.c +++ b/orc/orcprogram-sse.c @@ -54,6 +54,11 @@ static int _orc_compiler_flag_debug; void orc_sse_init (void) { +#if defined(HAVE_AMD64) || defined(HAVE_I386) + /* initializes cache information */ + orc_sse_get_cpu_flags (); +#endif + #if defined(HAVE_I386) if (!(orc_sse_get_cpu_flags () & ORC_TARGET_SSE_SSE2)) { sse_target.executable = FALSE; diff --git a/orc/orcprogram.c b/orc/orcprogram.c index 470b76e..2daf0bb 100644 --- a/orc/orcprogram.c +++ b/orc/orcprogram.c @@ -700,3 +700,22 @@ orc_program_get_max_accumulator_size (OrcProgram *program) return max; } +int _orc_data_cache_size_level1; +int _orc_data_cache_size_level2; +int _orc_data_cache_size_level3; + +void +orc_get_data_cache_sizes (int *level1, int *level2, int *level3) +{ + if (level1) { + *level1 = _orc_data_cache_size_level1; + } + if (level2) { + *level2 = _orc_data_cache_size_level2; + } + if (level3) { + *level3 = _orc_data_cache_size_level3; + } + +} + diff --git a/orc/orcprogram.h b/orc/orcprogram.h index 403d486..15b314b 100644 --- a/orc/orcprogram.h +++ b/orc/orcprogram.h @@ -595,10 +595,16 @@ int orc_program_get_max_var_size (OrcProgram *program); int orc_program_get_max_array_size (OrcProgram *program); int orc_program_get_max_accumulator_size (OrcProgram *program); +void orc_get_data_cache_sizes (int *level1, int *level2, int *level3); + #ifdef ORC_ENABLE_UNSTABLE_API int orc_compiler_flag_check (const char *flag); +extern int _orc_data_cache_size_level1; +extern int _orc_data_cache_size_level2; +extern int _orc_data_cache_size_level3; + #endif #endif diff --git a/testsuite/memcpy_speed.c b/testsuite/memcpy_speed.c index 6fb19fc..b3b5f43 100644 --- a/testsuite/memcpy_speed.c +++ b/testsuite/memcpy_speed.c @@ -42,6 +42,8 @@ main(int argc, char *argv[]) double cpufreq; int unalign; OrcProgram *p; + int level1, level2, level3; + int max; //const uint8_t zero = 0; orc_init (); @@ -82,7 +84,16 @@ main(int argc, char *argv[]) result = orc_program_compile (p); } - for(i=0;i<160;i++){ + orc_get_data_cache_sizes (&level1, &level2, &level3); + if (level3 > 0) { + max = (log(level3)/M_LN2 - 6.0) * 10 + 20; + } else if (level3 > 0) { + max = (log(level2)/M_LN2 - 6.0) * 10 + 20; + } else { + max = 200; + } + + for(i=0;i<max;i++){ double x = i*0.1 + 6.0; int size = pow(2.0, x); diff --git a/tools/orc-bugreport.c b/tools/orc-bugreport.c index dddbd87..300c738 100644 --- a/tools/orc-bugreport.c +++ b/tools/orc-bugreport.c @@ -59,6 +59,14 @@ main (int argc, char *argv[]) printf("Orc " VERSION " - integrated testing tool\n"); + { + int level1, level2, level3; + orc_get_data_cache_sizes(&level1, &level2, &level3); + printf("L1 cache: %d\n", level1); + printf("L2 cache: %d\n", level2); + printf("L3 cache: %d\n", level3); + } + if (filename) { int n; int ret; |