summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Schleef <ds@entropywave.com>2010-07-10 19:24:27 -0700
committerDavid Schleef <ds@entropywave.com>2010-07-10 19:27:30 -0700
commit7cbbc53adac6c928966ca99d6f7476bfa9e66957 (patch)
tree85e8c305b3da2fffaaa9464b07bce6f37810ef1d
parenteb89bd7baeeba0d001f2db1ac4cf8b4bae7d4a98 (diff)
downloadorc-7cbbc53adac6c928966ca99d6f7476bfa9e66957.tar.gz
Add cache information
-rw-r--r--orc/orccpu-x86.c267
-rw-r--r--orc/orcprogram-sse.c5
-rw-r--r--orc/orcprogram.c19
-rw-r--r--orc/orcprogram.h6
-rw-r--r--testsuite/memcpy_speed.c13
-rw-r--r--tools/orc-bugreport.c8
6 files changed, 269 insertions, 49 deletions
diff --git a/orc/orccpu-x86.c b/orc/orccpu-x86.c
index 072156c..a63e8db 100644
--- a/orc/orccpu-x86.c
+++ b/orc/orccpu-x86.c
@@ -59,6 +59,9 @@
#define USE_I386_CPUID
#endif
+int orc_x86_family_id;
+int orc_x86_model_id;
+int orc_x86_stepping;
#ifdef USE_I386_CPUINFO
static unsigned int
@@ -178,29 +181,31 @@ get_cpuid (orc_uint32 op, orc_uint32 *a, orc_uint32 *b, orc_uint32 *c, orc_uint3
#ifdef __i386__
static void
-get_cpuid (orc_uint32 op, orc_uint32 *a, orc_uint32 *b, orc_uint32 *c, orc_uint32 *d)
+get_cpuid (orc_uint32 op, orc_uint32 init_ecx, orc_uint32 *a, orc_uint32 *b,
+ orc_uint32 *c, orc_uint32 *d)
{
+ *a = op;
+ *c = init_ecx;
__asm__ (
" pushl %%ebx\n"
" cpuid\n"
" mov %%ebx, %%esi\n"
" popl %%ebx\n"
- : "=a" (*a), "=S" (*b), "=c" (*c), "=d" (*d)
- : "0" (op));
+ : "+a" (*a), "=S" (*b), "+c" (*c), "=d" (*d));
}
#endif
#ifdef __amd64__
static void
-get_cpuid (orc_uint32 op, orc_uint32 *a, orc_uint32 *b, orc_uint32 *c, orc_uint32 *d)
+get_cpuid (orc_uint32 op, orc_uint32 init_ecx, orc_uint32 *a, orc_uint32 *b,
+ orc_uint32 *c, orc_uint32 *d)
{
+ *a = op;
+ *c = init_ecx;
__asm__ (
- " pushq %%rbx\n"
" cpuid\n"
- " mov %%ebx, %%esi\n"
- " popq %%rbx\n"
- : "=a" (*a), "=S" (*b), "=c" (*c), "=d" (*d)
- : "0" (op));
+ : "+a" (*a), "=b" (*b), "+c" (*c), "=d" (*d));
+ //ORC_ERROR("cpuid %08x %08x -> %08x %08x %08x %08x", op, init_ecx, *a, *b, *c, *d);
}
#endif
@@ -214,12 +219,96 @@ test_cpuid (void *ignored)
}
#endif
+struct desc_struct {
+ int desc;
+ int level;
+ int size;
+};
+struct desc_struct cache_descriptors[] = {
+ { 0x0a, 1, 8*1024 },
+ { 0x0c, 1, 16*1024 },
+ { 0x0d, 1, 16*1024 },
+ { 0x0e, 1, 24*1024 },
+ { 0x21, 2, 256*1024 },
+ { 0x22, 3, 512*1024 },
+ { 0x23, 3, 1024*1024 },
+ { 0x25, 3, 2*1024*1024 },
+ { 0x29, 3, 4*1024*1024 },
+ { 0x2c, 1, 32*1024 },
+ { 0x41, 2, 128*1024 },
+ { 0x42, 2, 256*1024 },
+ { 0x43, 2, 512*1024 },
+ { 0x44, 2, 1*1024*1024 },
+ { 0x45, 2, 2*1024*1024 },
+ { 0x46, 3, 4*1024*1024 },
+ { 0x47, 3, 8*1024*1024 },
+ { 0x48, 2, 3*1024*1024 },
+ { 0x49, 2, 4*1024*1024 }, /* special case */
+ { 0x4a, 3, 6*1024*1024 },
+ { 0x4b, 3, 8*1024*1024 },
+ { 0x4c, 3, 12*1024*1024 },
+ { 0x4d, 3, 16*1024*1024 },
+ { 0x4e, 2, 6*1024*1024 },
+ { 0x60, 1, 16*1024 },
+ { 0x66, 1, 8*1024 },
+ { 0x67, 1, 16*1024 },
+ { 0x68, 1, 32*1024 },
+ { 0x78, 2, 1*1024*1024 },
+ { 0x79, 2, 128*1024 },
+ { 0x7a, 2, 256*1024 },
+ { 0x7b, 2, 512*1024 },
+ { 0x7c, 2, 1*1024*1024 },
+ { 0x7d, 2, 2*1024*1024 },
+ { 0x7f, 2, 512*1024 },
+ { 0x80, 2, 512*1024 },
+ { 0x82, 2, 256*1024 },
+ { 0x83, 2, 512*1024 },
+ { 0x84, 2, 1*1024*1024 },
+ { 0x85, 2, 2*1024*1024 },
+ { 0x86, 2, 512*1024 },
+ { 0x87, 2, 1*1024*1024 },
+ { 0xe4, 3, 8*1024*1024 }
+};
+
+static void
+handle_cache_descriptor (unsigned int desc)
+{
+ int i;
+
+ if (desc == 0) return;
+
+ /* special case */
+ if (desc == 0x49 && orc_x86_family_id == 0xf && orc_x86_model_id == 0x6) {
+ ORC_DEBUG("level %d size %d", 3, 4*1024*1024);
+ _orc_data_cache_size_level3 = 4*1024*1024;
+ return;
+ }
+
+ for(i=0;i<sizeof(cache_descriptors)/sizeof(cache_descriptors[0]);i++){
+ if (desc == cache_descriptors[i].desc) {
+ ORC_DEBUG("level %d size %d", cache_descriptors[i].level,
+ cache_descriptors[i].size);
+ switch (cache_descriptors[i].level) {
+ case 1:
+ _orc_data_cache_size_level1 = cache_descriptors[i].size;
+ break;
+ case 2:
+ _orc_data_cache_size_level2 = cache_descriptors[i].size;
+ break;
+ case 3:
+ _orc_data_cache_size_level3 = cache_descriptors[i].size;
+ break;
+ }
+ }
+ }
+}
+
static unsigned int
orc_sse_detect_cpuid (void)
{
orc_uint32 eax, ebx, ecx, edx;
orc_uint32 level;
- char vendor[13] = { 0 };
+ orc_uint32 vendor;
unsigned int sse_flags = 0;
#if 0
int ret;
@@ -233,42 +322,125 @@ orc_sse_detect_cpuid (void)
}
#endif
- get_cpuid (0x00000000, &level, (orc_uint32 *)(vendor+0),
- (orc_uint32 *)(vendor+8), (orc_uint32 *)(vendor+4));
+ get_cpuid (0x00000000, 0, &level, &vendor, &ecx, &edx);
- ORC_DEBUG("cpuid %d %s", level, vendor);
+ ORC_DEBUG("cpuid %d %08x %08x %08x", level, vendor, ecx, edx);
- if (level < 1) {
- return 0;
- }
+ if (level >= 1) {
+ get_cpuid (0x00000001, 0, &eax, &ebx, &ecx, &edx);
+
+ /* generic flags */
+ if (edx & (1<<26)) {
+ sse_flags |= ORC_TARGET_SSE_SSE2;
+ }
+ if (ecx & (1<<0)) {
+ sse_flags |= ORC_TARGET_SSE_SSE3;
+ }
+ if (ecx & (1<<9)) {
+ sse_flags |= ORC_TARGET_SSE_SSSE3;
+ }
+ if (ecx & (1<<19)) {
+ sse_flags |= ORC_TARGET_SSE_SSE4_1;
+ }
+ if (ecx & (1<<20)) {
+ sse_flags |= ORC_TARGET_SSE_SSE4_2;
+ }
- get_cpuid (0x00000001, &eax, &ebx, &ecx, &edx);
+ if (vendor == (('G'<<0)|('e'<<8)|('n'<<16)|('u'<<24))) {
+ int family_id = (eax>>8)&0xf;
+ int model_id = (eax>>4)&0xf;
+ int ext_family_id = (eax>>20)&0xff;
+ int ext_model_id = (eax>>16)&0xf;
-#if 0
- if (edx & (1<<4)) {
- _orc_profile_stamp = orc_profile_stamp_rdtsc;
- }
-#endif
+ if (family_id == 0xf) {
+ orc_x86_family_id = family_id + ext_family_id;
+ } else {
+ orc_x86_family_id = family_id;
+ }
- /* Intel flags */
- if (edx & (1<<26)) {
- sse_flags |= ORC_TARGET_SSE_SSE2;
- }
- if (ecx & (1<<0)) {
- sse_flags |= ORC_TARGET_SSE_SSE3;
- }
- if (ecx & (1<<9)) {
- sse_flags |= ORC_TARGET_SSE_SSSE3;
+ if (model_id == 0xf) {
+ orc_x86_model_id = model_id + ext_model_id;
+ } else {
+ orc_x86_model_id = model_id;
+ }
+
+ orc_x86_stepping = eax&0xf;
+
+ ORC_DEBUG("family_id %d model_id %d stepping %d",
+ orc_x86_family_id, orc_x86_model_id, orc_x86_stepping);
+
+ }
}
- if (ecx & (1<<19)) {
- sse_flags |= ORC_TARGET_SSE_SSE4_1;
+
+ if (level >= 2 && vendor == (('G'<<0)|('e'<<8)|('n'<<16)|('u'<<24))) {
+ get_cpuid (0x00000002, 0, &eax, &ebx, &ecx, &edx);
+
+ if ((eax&0x80000000) == 0) {
+ handle_cache_descriptor ((eax>>8)&0xff);
+ handle_cache_descriptor ((eax>>16)&0xff);
+ handle_cache_descriptor ((eax>>24)&0xff);
+ }
+ if ((ebx&0x80000000) == 0) {
+ handle_cache_descriptor (ebx&0xff);
+ handle_cache_descriptor ((ebx>>8)&0xff);
+ handle_cache_descriptor ((ebx>>16)&0xff);
+ handle_cache_descriptor ((ebx>>24)&0xff);
+ }
+ if ((ecx&0x80000000) == 0) {
+ handle_cache_descriptor (ecx&0xff);
+ handle_cache_descriptor ((ecx>>8)&0xff);
+ handle_cache_descriptor ((ecx>>16)&0xff);
+ handle_cache_descriptor ((ecx>>24)&0xff);
+ }
+ if ((edx&0x80000000) == 0) {
+ handle_cache_descriptor (edx&0xff);
+ handle_cache_descriptor ((edx>>8)&0xff);
+ handle_cache_descriptor ((edx>>16)&0xff);
+ handle_cache_descriptor ((edx>>24)&0xff);
+ }
}
- if (ecx & (1<<20)) {
- sse_flags |= ORC_TARGET_SSE_SSE4_2;
+
+ if (level >= 4 && vendor == (('G'<<0)|('e'<<8)|('n'<<16)|('u'<<24))) {
+ int i;
+ for(i=0;i<10;i++){
+ int type;
+ int level;
+ int l;
+ int p;
+ int w;
+ int s;
+
+ get_cpuid (0x00000004, i, &eax, &ebx, &ecx, &edx);
+ type = eax&0xf;
+ if (type == 0) break;
+
+ level = (eax>>5)&0x7;
+ l = ((ebx>>0)&0xfff)+1;
+ p = ((ebx>>12)&0x3ff)+1;
+ w = ((ebx>>22)&0x3ff)+1;
+ s = ecx + 1;
+
+ ORC_DEBUG("type %d level %d line size %d partitions %d ways %d sets %d",
+ type, level, l, p, w, s);
+ if (type == 1 || type == 3) {
+ switch (level) {
+ case 1:
+ _orc_data_cache_size_level1 = l*p*w*s;
+ break;
+ case 2:
+ _orc_data_cache_size_level2 = l*p*w*s;
+ break;
+ case 3:
+ _orc_data_cache_size_level3 = l*p*w*s;
+ break;
+ }
+ }
+ }
+
}
- if (memcmp (vendor, "AuthenticAMD", 12) == 0) {
- get_cpuid (0x80000001, &eax, &ebx, &ecx, &edx);
+ if (level >= 1 && vendor == (('A'<<0)|('u'<<8)|('t'<<16)|('h'<<24))) {
+ get_cpuid (0x80000001, 0, &eax, &ebx, &ecx, &edx);
/* AMD flags */
if (ecx & (1<<6)) {
@@ -278,18 +450,17 @@ orc_sse_detect_cpuid (void)
sse_flags |= ORC_TARGET_SSE_SSE5;
}
-#if 0
- get_cpuid (0x80000005, &eax, &ebx, &ecx, &edx);
-
- ORC_INFO("L1 D-cache: %d kbytes, %d-way, %d lines/tag, %d line size",
+ get_cpuid (0x80000005, 0, &eax, &ebx, &ecx, &edx);
+ _orc_data_cache_size_level1 = ((ecx>>24)&0xff) * 1024;
+ ORC_DEBUG ("L1 D-cache: %d kbytes, %d-way, %d lines/tag, %d line size",
(ecx>>24)&0xff, (ecx>>16)&0xff, (ecx>>8)&0xff, ecx&0xff);
- ORC_INFO("L1 I-cache: %d kbytes, %d-way, %d lines/tag, %d line size",
+ ORC_DEBUG ("L1 I-cache: %d kbytes, %d-way, %d lines/tag, %d line size",
(edx>>24)&0xff, (edx>>16)&0xff, (edx>>8)&0xff, edx&0xff);
- get_cpuid (0x80000006, &eax, &ebx, &ecx, &edx);
- ORC_INFO("L2 cache: %d kbytes, %d assoc, %d lines/tag, %d line size",
+ get_cpuid (0x80000006, 0, &eax, &ebx, &ecx, &edx);
+ _orc_data_cache_size_level2 = ((ecx>>16)&0xffff) * 1024;
+ ORC_DEBUG ("L2 cache: %d kbytes, %d assoc, %d lines/tag, %d line size",
(ecx>>16)&0xffff, (ecx>>12)&0xf, (ecx>>8)&0xf, ecx&0xff);
-#endif
}
if (orc_compiler_flag_check ("-sse2")) {
@@ -325,7 +496,7 @@ orc_mmx_detect_cpuid (void)
char vendor[13] = { 0 };
unsigned int mmx_flags = 0;
- get_cpuid (0x00000000, &level, (orc_uint32 *)(vendor+0),
+ get_cpuid (0x00000000, 0, &level, (orc_uint32 *)(vendor+0),
(orc_uint32 *)(vendor+8), (orc_uint32 *)(vendor+4));
ORC_DEBUG("cpuid %d %s", level, vendor);
@@ -334,7 +505,7 @@ orc_mmx_detect_cpuid (void)
return 0;
}
- get_cpuid (0x00000001, &eax, &ebx, &ecx, &edx);
+ get_cpuid (0x00000001, 0, &eax, &ebx, &ecx, &edx);
/* Intel flags */
if (edx & (1<<23)) {
@@ -345,7 +516,7 @@ orc_mmx_detect_cpuid (void)
}
if (memcmp (vendor, "AuthenticAMD", 12) == 0) {
- get_cpuid (0x80000001, &eax, &ebx, &ecx, &edx);
+ get_cpuid (0x80000001, 0, &eax, &ebx, &ecx, &edx);
/* AMD flags */
if (edx & (1<<22)) {
diff --git a/orc/orcprogram-sse.c b/orc/orcprogram-sse.c
index c225296..52d308e 100644
--- a/orc/orcprogram-sse.c
+++ b/orc/orcprogram-sse.c
@@ -54,6 +54,11 @@ static int _orc_compiler_flag_debug;
void
orc_sse_init (void)
{
+#if defined(HAVE_AMD64) || defined(HAVE_I386)
+ /* initializes cache information */
+ orc_sse_get_cpu_flags ();
+#endif
+
#if defined(HAVE_I386)
if (!(orc_sse_get_cpu_flags () & ORC_TARGET_SSE_SSE2)) {
sse_target.executable = FALSE;
diff --git a/orc/orcprogram.c b/orc/orcprogram.c
index 470b76e..2daf0bb 100644
--- a/orc/orcprogram.c
+++ b/orc/orcprogram.c
@@ -700,3 +700,22 @@ orc_program_get_max_accumulator_size (OrcProgram *program)
return max;
}
+int _orc_data_cache_size_level1;
+int _orc_data_cache_size_level2;
+int _orc_data_cache_size_level3;
+
+void
+orc_get_data_cache_sizes (int *level1, int *level2, int *level3)
+{
+ if (level1) {
+ *level1 = _orc_data_cache_size_level1;
+ }
+ if (level2) {
+ *level2 = _orc_data_cache_size_level2;
+ }
+ if (level3) {
+ *level3 = _orc_data_cache_size_level3;
+ }
+
+}
+
diff --git a/orc/orcprogram.h b/orc/orcprogram.h
index 403d486..15b314b 100644
--- a/orc/orcprogram.h
+++ b/orc/orcprogram.h
@@ -595,10 +595,16 @@ int orc_program_get_max_var_size (OrcProgram *program);
int orc_program_get_max_array_size (OrcProgram *program);
int orc_program_get_max_accumulator_size (OrcProgram *program);
+void orc_get_data_cache_sizes (int *level1, int *level2, int *level3);
+
#ifdef ORC_ENABLE_UNSTABLE_API
int orc_compiler_flag_check (const char *flag);
+extern int _orc_data_cache_size_level1;
+extern int _orc_data_cache_size_level2;
+extern int _orc_data_cache_size_level3;
+
#endif
#endif
diff --git a/testsuite/memcpy_speed.c b/testsuite/memcpy_speed.c
index 6fb19fc..b3b5f43 100644
--- a/testsuite/memcpy_speed.c
+++ b/testsuite/memcpy_speed.c
@@ -42,6 +42,8 @@ main(int argc, char *argv[])
double cpufreq;
int unalign;
OrcProgram *p;
+ int level1, level2, level3;
+ int max;
//const uint8_t zero = 0;
orc_init ();
@@ -82,7 +84,16 @@ main(int argc, char *argv[])
result = orc_program_compile (p);
}
- for(i=0;i<160;i++){
+ orc_get_data_cache_sizes (&level1, &level2, &level3);
+ if (level3 > 0) {
+ max = (log(level3)/M_LN2 - 6.0) * 10 + 20;
+ } else if (level3 > 0) {
+ max = (log(level2)/M_LN2 - 6.0) * 10 + 20;
+ } else {
+ max = 200;
+ }
+
+ for(i=0;i<max;i++){
double x = i*0.1 + 6.0;
int size = pow(2.0, x);
diff --git a/tools/orc-bugreport.c b/tools/orc-bugreport.c
index dddbd87..300c738 100644
--- a/tools/orc-bugreport.c
+++ b/tools/orc-bugreport.c
@@ -59,6 +59,14 @@ main (int argc, char *argv[])
printf("Orc " VERSION " - integrated testing tool\n");
+ {
+ int level1, level2, level3;
+ orc_get_data_cache_sizes(&level1, &level2, &level3);
+ printf("L1 cache: %d\n", level1);
+ printf("L2 cache: %d\n", level2);
+ printf("L3 cache: %d\n", level3);
+ }
+
if (filename) {
int n;
int ret;