summaryrefslogtreecommitdiff
path: root/rts/Papi.c
diff options
context:
space:
mode:
authorsimonmar@microsoft.com <unknown>2007-11-20 13:36:35 +0000
committersimonmar@microsoft.com <unknown>2007-11-20 13:36:35 +0000
commit8db56c8606e6c0e89a87d34c3f67124f1e8b988e (patch)
tree8ee9883969f75dbaa1fcd6d9842f79d5adc56805 /rts/Papi.c
parent510d17388f2c1cde5bbcef8083a57444438614de (diff)
downloadhaskell-8db56c8606e6c0e89a87d34c3f67124f1e8b988e.tar.gz
improvements to PAPI support
- major (multithreaded) GC is measured separately from minor GC - events to measure can now be specified on the command line, e.g prog +RTS -a+PAPI_TOT_CYC
Diffstat (limited to 'rts/Papi.c')
-rw-r--r--rts/Papi.c177
1 files changed, 96 insertions, 81 deletions
diff --git a/rts/Papi.c b/rts/Papi.c
index d442718fa9..d95e26c8e7 100644
--- a/rts/Papi.c
+++ b/rts/Papi.c
@@ -30,7 +30,7 @@ static Mutex papi_counter_mutex;
struct _papi_events {
int event_code;
- char * event_name;
+ const char * event_name;
};
/* Beware, these counters are Opteron specific
@@ -75,45 +75,54 @@ int papi_error;
/* Arbitrary, to avoid using malloc */
#define MAX_PAPI_EVENTS 10
-int n_papi_events = 0;
+static nat n_papi_events = 0;
/* Events counted during GC and Mutator execution */
/* There's a trailing comma, do all C compilers accept that? */
static struct _papi_events papi_events[MAX_PAPI_EVENTS];
long_long MutatorCounters[MAX_PAPI_EVENTS];
-long_long GCCounters[MAX_PAPI_EVENTS];
+long_long GC0Counters[MAX_PAPI_EVENTS];
+long_long GC1Counters[MAX_PAPI_EVENTS];
long_long start_mutator_cycles;
-long_long start_gc_cycles;
long_long mutator_cycles;
-long_long gc_cycles;
+long_long start_gc_cycles;
+long_long gc0_cycles;
+long_long gc1_cycles;
static long_long papi_counter(long_long values[],int event);
static void papi_add_events(int EventSet);
+static nat max_hardware_counters = 2;
+
/* If you want to add events to count, extend the
* init_countable_events and the papi_report function.
* Be aware that your processor can count a limited number
* of events simultaneously, you can turn on multiplexing
* to increase that number, though.
*/
+static void papi_add_event(const char *name, int code)
+{
+ if (n_papi_events >= max_hardware_counters) {
+ errorBelch("too many PAPI events for this CPU (max: %d)",
+ max_hardware_counters);
+ stg_exit(EXIT_FAILURE);
+ }
+ papi_events[n_papi_events].event_code = code;
+ papi_events[n_papi_events].event_name = name;
+ n_papi_events++;
+}
+
static void
init_countable_events(void)
{
-#define PAPI_ADD_EVENT(EVENT) \
- { \
- if (n_papi_events >= MAX_PAPI_EVENTS) { \
- barf("too many PAPI events"); \
- } \
- papi_events[n_papi_events].event_code = EVENT; \
- papi_events[n_papi_events].event_name = #EVENT; \
- n_papi_events++; \
- }
+ max_hardware_counters = PAPI_num_counters();
+
+#define PAPI_ADD_EVENT(EVENT) papi_add_event(#EVENT,EVENT)
- PAPI_ADD_EVENT(PAPI_TOT_INS);
if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_BRANCH) {
PAPI_ADD_EVENT(FR_BR);
PAPI_ADD_EVENT(FR_BR_MIS);
@@ -133,8 +142,35 @@ init_countable_events(void)
PAPI_ADD_EVENT(DC_L2_REFILL_MOES);
PAPI_ADD_EVENT(DC_SYS_REFILL_MOES);
PAPI_ADD_EVENT(FR_BR_MIS);
+ } else if (RtsFlags.PapiFlags.eventType==PAPI_USER_EVENTS) {
+ nat i;
+ char *name;
+ int code;
+ for (i = 0; i < RtsFlags.PapiFlags.numUserEvents; i++) {
+ name = RtsFlags.PapiFlags.userEvents[i];
+ PAPI_CHECK(PAPI_event_name_to_code(name, &code))
+ papi_add_event(name, code);
+ }
} else {
- PAPI_ADD_EVENT(PAPI_STL_ICY);
+ // PAPI_ADD_EVENT(PAPI_L1_DCA); // L1 data cache accesses
+ // PAPI_ADD_EVENT(PAPI_L1_ICR); // L1 instruction cache reads
+ // PAPI_ADD_EVENT(PAPI_L1_ICM); // L1 instruction cache misses
+ // PAPI_ADD_EVENT(PAPI_L1_STM); // L1 store misses
+ // PAPI_ADD_EVENT(PAPI_L1_DCM); // L1 data cache misses
+ // PAPI_ADD_EVENT(PAPI_L1_LDM); // L1 load misses
+ // PAPI_ADD_EVENT(PAPI_L2_TCM); // L2 cache misses
+ // PAPI_ADD_EVENT(PAPI_L2_STM); // L2 store misses
+ // PAPI_ADD_EVENT(PAPI_L2_DCW); // L2 data cache writes
+ // PAPI_ADD_EVENT(PAPI_L2_DCR); // L2 data cache reads
+ // PAPI_ADD_EVENT(PAPI_L2_TCW); // L2 cache writes
+ // PAPI_ADD_EVENT(PAPI_L2_TCR); // L2 cache reads
+ // PAPI_ADD_EVENT(PAPI_CA_CLN); // exclusive access to clean cache line
+ // PAPI_ADD_EVENT(PAPI_TLB_DM); // TLB misses
+ PAPI_ADD_EVENT(PAPI_TOT_INS); // Total instructions
+ PAPI_ADD_EVENT(PAPI_TOT_CYC); // Total instructions
+ // PAPI_ADD_EVENT(PAPI_CA_SHR); // exclusive access to shared cache line
+ // PAPI_ADD_EVENT(PAPI_RES_STL); // Cycles stalled on any resource
+
}
// We might also consider:
@@ -143,90 +179,58 @@ init_countable_events(void)
};
-static char temp[BIG_STRING_LEN];
-
-static void
-papi_mut_cycles(void)
-{
- ullong_format_string(mutator_cycles,temp,rtsTrue/*commas*/);
- statsPrintf(" (MUT_CYCLES) : %s\n",temp);
-}
-
static void
-papi_gc_cycles(void)
+papi_report_event(const char *name, ullong value)
{
- ullong_format_string(gc_cycles,temp,rtsTrue/*commas*/);
- statsPrintf(" (GC_CYCLES) : %s\n",temp);
+ static char temp[BIG_STRING_LEN];
+ ullong_format_string(value,temp,rtsTrue/*commas*/);
+ statsPrintf(" %15s %15s\n", name, temp);
}
/* This function reports counters for GC and mutator */
static void
-papi_report(long_long PapiCounters[])
+papi_report(long_long counters[])
{
-
-/* Report the value of a counter */
-#define PAPI_REPORT(EVENTSET,EVENT) \
- { \
- ullong_format_string(papi_counter(EVENTSET,EVENT),temp,rtsTrue/*commas*/); \
- statsPrintf(" (" #EVENT ") : %s\n",temp); \
- }
+ nat i;
/* Report the value of a counter as a percentage of another counter */
#define PAPI_REPORT_PCT(EVENTSET,EVENT,EVENTTOT) \
- statsPrintf(" (" #EVENT ") %% of (" #EVENTTOT ") : %.1f%%\n", \
- papi_counter(EVENTSET,EVENT)*100.0/papi_counter(EVENTSET,EVENTTOT))
+ statsPrintf(" " #EVENT " %% of " #EVENTTOT " : %.1f%%\n", \
+ papi_counter(EVENTSET,EVENT)*100.0/papi_counter(EVENTSET,EVENTTOT))
- /* I need to improve formatting aesthetics */
- PAPI_REPORT(PapiCounters,PAPI_TOT_INS);
-
- if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_BRANCH) {
- PAPI_REPORT(PapiCounters,FR_BR);
- PAPI_REPORT(PapiCounters,FR_BR_MIS);
- PAPI_REPORT_PCT(PapiCounters,FR_BR_MIS,FR_BR);
- PAPI_REPORT_PCT(PapiCounters,FR_BR_MISCOMPARE,FR_BR);
+ for (i = 0; i < n_papi_events; i++)
+ {
+ papi_report_event(papi_events[i].event_name, counters[i]);
}
- else if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_STALLS) {
- PAPI_REPORT(PapiCounters,FR_DISPATCH_STALLS);
- PAPI_REPORT(PapiCounters,FR_DISPATCH_STALLS_BR);
- //PAPI_REPORT_PCT(PapiCounters,FR_DISPATCH_STALLS_BR,PAPI_TOT_CYC);
- PAPI_REPORT(PapiCounters,FR_DISPATCH_STALLS_FULL_LS);
- //PAPI_REPORT_PCT(PapiCounters,FR_DISPATCH_STALLS_FULL_LS,PAPI_TOT_CYC);
+ if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_BRANCH) {
+ PAPI_REPORT_PCT(counters,FR_BR_MIS,FR_BR);
+ PAPI_REPORT_PCT(counters,FR_BR_MISCOMPARE,FR_BR);
}
else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L1) {
- PAPI_REPORT(PapiCounters,PAPI_L1_DCA);
- PAPI_REPORT(PapiCounters,PAPI_L1_DCM);
- PAPI_REPORT_PCT(PapiCounters,PAPI_L1_DCM,PAPI_L1_DCA);
+ PAPI_REPORT_PCT(counters,PAPI_L1_DCM,PAPI_L1_DCA);
}
else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L2) {
- PAPI_REPORT(PapiCounters,PAPI_L2_DCA);
- PAPI_REPORT(PapiCounters,PAPI_L2_DCM);
- PAPI_REPORT_PCT(PapiCounters,PAPI_L2_DCM,PAPI_L2_DCA);
+ PAPI_REPORT_PCT(counters,PAPI_L2_DCM,PAPI_L2_DCA);
}
-
- else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CB_EVENTS) {
- PAPI_REPORT(PapiCounters,DC_L2_REFILL_MOES);
- PAPI_REPORT(PapiCounters,DC_SYS_REFILL_MOES);
- PAPI_REPORT(PapiCounters,FR_BR_MIS);
- }
-
- else {
- PAPI_REPORT(PapiCounters,PAPI_STL_ICY);
- }
}
void
papi_stats_report (void)
{
- statsPrintf(" -- CPU Mutator counters --\n");
- papi_mut_cycles();
+ statsPrintf(" Mutator CPU counters\n");
+ papi_report_event("CYCLES", mutator_cycles);
papi_report(MutatorCounters);
- statsPrintf("\n -- CPU GC counters --\n");
- papi_gc_cycles();
- papi_report(GCCounters);
+ statsPrintf("\n GC(0) CPU counters\n");
+ papi_report_event("CYCLES", gc0_cycles);
+ papi_report(GC0Counters);
+
+ statsPrintf("\n GC(1) CPU counters\n");
+ papi_report_event("CYCLES", gc1_cycles);
+ papi_report(GC1Counters);
}
void
@@ -273,7 +277,7 @@ papi_init (void)
static long_long
papi_counter(long_long values[],int event)
{
- int i;
+ nat i;
for(i=0;i<n_papi_events;i++) {
if(papi_events[i].event_code==event) {
return values[i];
@@ -288,7 +292,7 @@ papi_counter(long_long values[],int event)
static void
papi_add_events(int EventSet)
{
- int i;
+ nat i;
for(i=0;i<n_papi_events;i++) {
if((papi_error=PAPI_add_event(EventSet,
papi_events[i].event_code))
@@ -334,18 +338,29 @@ papi_start_gc_count(void)
}
void
-papi_stop_gc_count(void)
+papi_stop_gc0_count(void)
+{
+ ACQUIRE_LOCK(&papi_counter_mutex);
+ PAPI_CHECK( PAPI_accum(GCEvents,GC0Counters));
+ PAPI_CHECK( PAPI_stop(GCEvents,NULL));
+ gc0_cycles += PAPI_cycles() - start_gc_cycles;
+ RELEASE_LOCK(&papi_counter_mutex);
+}
+
+
+void
+papi_stop_gc1_count(void)
{
ACQUIRE_LOCK(&papi_counter_mutex);
- PAPI_CHECK( PAPI_accum(GCEvents,GCCounters));
+ PAPI_CHECK( PAPI_accum(GCEvents,GC1Counters));
PAPI_CHECK( PAPI_stop(GCEvents,NULL));
- gc_cycles += PAPI_cycles() - start_gc_cycles;
+ gc1_cycles += PAPI_cycles() - start_gc_cycles;
RELEASE_LOCK(&papi_counter_mutex);
}
void
-papi_thread_start_gc_count(int event_set)
+papi_thread_start_gc1_count(int event_set)
{
ACQUIRE_LOCK(&papi_counter_mutex);
PAPI_CHECK( PAPI_start(event_set));
@@ -353,10 +368,10 @@ papi_thread_start_gc_count(int event_set)
}
void
-papi_thread_stop_gc_count(int event_set)
+papi_thread_stop_gc1_count(int event_set)
{
ACQUIRE_LOCK(&papi_counter_mutex);
- PAPI_CHECK( PAPI_accum(event_set,GCCounters));
+ PAPI_CHECK( PAPI_accum(event_set,GC1Counters));
PAPI_CHECK( PAPI_stop(event_set,NULL));
RELEASE_LOCK(&papi_counter_mutex);
}