diff options
author | simonmar@microsoft.com <unknown> | 2007-11-20 13:36:35 +0000 |
---|---|---|
committer | simonmar@microsoft.com <unknown> | 2007-11-20 13:36:35 +0000 |
commit | 8db56c8606e6c0e89a87d34c3f67124f1e8b988e (patch) | |
tree | 8ee9883969f75dbaa1fcd6d9842f79d5adc56805 /rts/Papi.c | |
parent | 510d17388f2c1cde5bbcef8083a57444438614de (diff) | |
download | haskell-8db56c8606e6c0e89a87d34c3f67124f1e8b988e.tar.gz |
improvements to PAPI support
- major (multithreaded) GC is measured separately from minor GC
- events to measure can now be specified on the command line, e.g
prog +RTS -a+PAPI_TOT_CYC
Diffstat (limited to 'rts/Papi.c')
-rw-r--r-- | rts/Papi.c | 177 |
1 files changed, 96 insertions, 81 deletions
diff --git a/rts/Papi.c b/rts/Papi.c index d442718fa9..d95e26c8e7 100644 --- a/rts/Papi.c +++ b/rts/Papi.c @@ -30,7 +30,7 @@ static Mutex papi_counter_mutex; struct _papi_events { int event_code; - char * event_name; + const char * event_name; }; /* Beware, these counters are Opteron specific @@ -75,45 +75,54 @@ int papi_error; /* Arbitrary, to avoid using malloc */ #define MAX_PAPI_EVENTS 10 -int n_papi_events = 0; +static nat n_papi_events = 0; /* Events counted during GC and Mutator execution */ /* There's a trailing comma, do all C compilers accept that? */ static struct _papi_events papi_events[MAX_PAPI_EVENTS]; long_long MutatorCounters[MAX_PAPI_EVENTS]; -long_long GCCounters[MAX_PAPI_EVENTS]; +long_long GC0Counters[MAX_PAPI_EVENTS]; +long_long GC1Counters[MAX_PAPI_EVENTS]; long_long start_mutator_cycles; -long_long start_gc_cycles; long_long mutator_cycles; -long_long gc_cycles; +long_long start_gc_cycles; +long_long gc0_cycles; +long_long gc1_cycles; static long_long papi_counter(long_long values[],int event); static void papi_add_events(int EventSet); +static nat max_hardware_counters = 2; + /* If you want to add events to count, extend the * init_countable_events and the papi_report function. * Be aware that your processor can count a limited number * of events simultaneously, you can turn on multiplexing * to increase that number, though. */ +static void papi_add_event(const char *name, int code) +{ + if (n_papi_events >= max_hardware_counters) { + errorBelch("too many PAPI events for this CPU (max: %d)", + max_hardware_counters); + stg_exit(EXIT_FAILURE); + } + papi_events[n_papi_events].event_code = code; + papi_events[n_papi_events].event_name = name; + n_papi_events++; +} + static void init_countable_events(void) { -#define PAPI_ADD_EVENT(EVENT) \ - { \ - if (n_papi_events >= MAX_PAPI_EVENTS) { \ - barf("too many PAPI events"); \ - } \ - papi_events[n_papi_events].event_code = EVENT; \ - papi_events[n_papi_events].event_name = #EVENT; \ - n_papi_events++; \ - } + max_hardware_counters = PAPI_num_counters(); + +#define PAPI_ADD_EVENT(EVENT) papi_add_event(#EVENT,EVENT) - PAPI_ADD_EVENT(PAPI_TOT_INS); if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_BRANCH) { PAPI_ADD_EVENT(FR_BR); PAPI_ADD_EVENT(FR_BR_MIS); @@ -133,8 +142,35 @@ init_countable_events(void) PAPI_ADD_EVENT(DC_L2_REFILL_MOES); PAPI_ADD_EVENT(DC_SYS_REFILL_MOES); PAPI_ADD_EVENT(FR_BR_MIS); + } else if (RtsFlags.PapiFlags.eventType==PAPI_USER_EVENTS) { + nat i; + char *name; + int code; + for (i = 0; i < RtsFlags.PapiFlags.numUserEvents; i++) { + name = RtsFlags.PapiFlags.userEvents[i]; + PAPI_CHECK(PAPI_event_name_to_code(name, &code)) + papi_add_event(name, code); + } } else { - PAPI_ADD_EVENT(PAPI_STL_ICY); + // PAPI_ADD_EVENT(PAPI_L1_DCA); // L1 data cache accesses + // PAPI_ADD_EVENT(PAPI_L1_ICR); // L1 instruction cache reads + // PAPI_ADD_EVENT(PAPI_L1_ICM); // L1 instruction cache misses + // PAPI_ADD_EVENT(PAPI_L1_STM); // L1 store misses + // PAPI_ADD_EVENT(PAPI_L1_DCM); // L1 data cache misses + // PAPI_ADD_EVENT(PAPI_L1_LDM); // L1 load misses + // PAPI_ADD_EVENT(PAPI_L2_TCM); // L2 cache misses + // PAPI_ADD_EVENT(PAPI_L2_STM); // L2 store misses + // PAPI_ADD_EVENT(PAPI_L2_DCW); // L2 data cache writes + // PAPI_ADD_EVENT(PAPI_L2_DCR); // L2 data cache reads + // PAPI_ADD_EVENT(PAPI_L2_TCW); // L2 cache writes + // PAPI_ADD_EVENT(PAPI_L2_TCR); // L2 cache reads + // PAPI_ADD_EVENT(PAPI_CA_CLN); // exclusive access to clean cache line + // PAPI_ADD_EVENT(PAPI_TLB_DM); // TLB misses + PAPI_ADD_EVENT(PAPI_TOT_INS); // Total instructions + PAPI_ADD_EVENT(PAPI_TOT_CYC); // Total instructions + // PAPI_ADD_EVENT(PAPI_CA_SHR); // exclusive access to shared cache line + // PAPI_ADD_EVENT(PAPI_RES_STL); // Cycles stalled on any resource + } // We might also consider: @@ -143,90 +179,58 @@ init_countable_events(void) }; -static char temp[BIG_STRING_LEN]; - -static void -papi_mut_cycles(void) -{ - ullong_format_string(mutator_cycles,temp,rtsTrue/*commas*/); - statsPrintf(" (MUT_CYCLES) : %s\n",temp); -} - static void -papi_gc_cycles(void) +papi_report_event(const char *name, ullong value) { - ullong_format_string(gc_cycles,temp,rtsTrue/*commas*/); - statsPrintf(" (GC_CYCLES) : %s\n",temp); + static char temp[BIG_STRING_LEN]; + ullong_format_string(value,temp,rtsTrue/*commas*/); + statsPrintf(" %15s %15s\n", name, temp); } /* This function reports counters for GC and mutator */ static void -papi_report(long_long PapiCounters[]) +papi_report(long_long counters[]) { - -/* Report the value of a counter */ -#define PAPI_REPORT(EVENTSET,EVENT) \ - { \ - ullong_format_string(papi_counter(EVENTSET,EVENT),temp,rtsTrue/*commas*/); \ - statsPrintf(" (" #EVENT ") : %s\n",temp); \ - } + nat i; /* Report the value of a counter as a percentage of another counter */ #define PAPI_REPORT_PCT(EVENTSET,EVENT,EVENTTOT) \ - statsPrintf(" (" #EVENT ") %% of (" #EVENTTOT ") : %.1f%%\n", \ - papi_counter(EVENTSET,EVENT)*100.0/papi_counter(EVENTSET,EVENTTOT)) + statsPrintf(" " #EVENT " %% of " #EVENTTOT " : %.1f%%\n", \ + papi_counter(EVENTSET,EVENT)*100.0/papi_counter(EVENTSET,EVENTTOT)) - /* I need to improve formatting aesthetics */ - PAPI_REPORT(PapiCounters,PAPI_TOT_INS); - - if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_BRANCH) { - PAPI_REPORT(PapiCounters,FR_BR); - PAPI_REPORT(PapiCounters,FR_BR_MIS); - PAPI_REPORT_PCT(PapiCounters,FR_BR_MIS,FR_BR); - PAPI_REPORT_PCT(PapiCounters,FR_BR_MISCOMPARE,FR_BR); + for (i = 0; i < n_papi_events; i++) + { + papi_report_event(papi_events[i].event_name, counters[i]); } - else if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_STALLS) { - PAPI_REPORT(PapiCounters,FR_DISPATCH_STALLS); - PAPI_REPORT(PapiCounters,FR_DISPATCH_STALLS_BR); - //PAPI_REPORT_PCT(PapiCounters,FR_DISPATCH_STALLS_BR,PAPI_TOT_CYC); - PAPI_REPORT(PapiCounters,FR_DISPATCH_STALLS_FULL_LS); - //PAPI_REPORT_PCT(PapiCounters,FR_DISPATCH_STALLS_FULL_LS,PAPI_TOT_CYC); + if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_BRANCH) { + PAPI_REPORT_PCT(counters,FR_BR_MIS,FR_BR); + PAPI_REPORT_PCT(counters,FR_BR_MISCOMPARE,FR_BR); } else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L1) { - PAPI_REPORT(PapiCounters,PAPI_L1_DCA); - PAPI_REPORT(PapiCounters,PAPI_L1_DCM); - PAPI_REPORT_PCT(PapiCounters,PAPI_L1_DCM,PAPI_L1_DCA); + PAPI_REPORT_PCT(counters,PAPI_L1_DCM,PAPI_L1_DCA); } else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L2) { - PAPI_REPORT(PapiCounters,PAPI_L2_DCA); - PAPI_REPORT(PapiCounters,PAPI_L2_DCM); - PAPI_REPORT_PCT(PapiCounters,PAPI_L2_DCM,PAPI_L2_DCA); + PAPI_REPORT_PCT(counters,PAPI_L2_DCM,PAPI_L2_DCA); } - - else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CB_EVENTS) { - PAPI_REPORT(PapiCounters,DC_L2_REFILL_MOES); - PAPI_REPORT(PapiCounters,DC_SYS_REFILL_MOES); - PAPI_REPORT(PapiCounters,FR_BR_MIS); - } - - else { - PAPI_REPORT(PapiCounters,PAPI_STL_ICY); - } } void papi_stats_report (void) { - statsPrintf(" -- CPU Mutator counters --\n"); - papi_mut_cycles(); + statsPrintf(" Mutator CPU counters\n"); + papi_report_event("CYCLES", mutator_cycles); papi_report(MutatorCounters); - statsPrintf("\n -- CPU GC counters --\n"); - papi_gc_cycles(); - papi_report(GCCounters); + statsPrintf("\n GC(0) CPU counters\n"); + papi_report_event("CYCLES", gc0_cycles); + papi_report(GC0Counters); + + statsPrintf("\n GC(1) CPU counters\n"); + papi_report_event("CYCLES", gc1_cycles); + papi_report(GC1Counters); } void @@ -273,7 +277,7 @@ papi_init (void) static long_long papi_counter(long_long values[],int event) { - int i; + nat i; for(i=0;i<n_papi_events;i++) { if(papi_events[i].event_code==event) { return values[i]; @@ -288,7 +292,7 @@ papi_counter(long_long values[],int event) static void papi_add_events(int EventSet) { - int i; + nat i; for(i=0;i<n_papi_events;i++) { if((papi_error=PAPI_add_event(EventSet, papi_events[i].event_code)) @@ -334,18 +338,29 @@ papi_start_gc_count(void) } void -papi_stop_gc_count(void) +papi_stop_gc0_count(void) +{ + ACQUIRE_LOCK(&papi_counter_mutex); + PAPI_CHECK( PAPI_accum(GCEvents,GC0Counters)); + PAPI_CHECK( PAPI_stop(GCEvents,NULL)); + gc0_cycles += PAPI_cycles() - start_gc_cycles; + RELEASE_LOCK(&papi_counter_mutex); +} + + +void +papi_stop_gc1_count(void) { ACQUIRE_LOCK(&papi_counter_mutex); - PAPI_CHECK( PAPI_accum(GCEvents,GCCounters)); + PAPI_CHECK( PAPI_accum(GCEvents,GC1Counters)); PAPI_CHECK( PAPI_stop(GCEvents,NULL)); - gc_cycles += PAPI_cycles() - start_gc_cycles; + gc1_cycles += PAPI_cycles() - start_gc_cycles; RELEASE_LOCK(&papi_counter_mutex); } void -papi_thread_start_gc_count(int event_set) +papi_thread_start_gc1_count(int event_set) { ACQUIRE_LOCK(&papi_counter_mutex); PAPI_CHECK( PAPI_start(event_set)); @@ -353,10 +368,10 @@ papi_thread_start_gc_count(int event_set) } void -papi_thread_stop_gc_count(int event_set) +papi_thread_stop_gc1_count(int event_set) { ACQUIRE_LOCK(&papi_counter_mutex); - PAPI_CHECK( PAPI_accum(event_set,GCCounters)); + PAPI_CHECK( PAPI_accum(event_set,GC1Counters)); PAPI_CHECK( PAPI_stop(event_set,NULL)); RELEASE_LOCK(&papi_counter_mutex); } |