diff options
author | Simon Marlow <simonmar@microsoft.com> | 2007-10-31 16:30:15 +0000 |
---|---|---|
committer | Simon Marlow <simonmar@microsoft.com> | 2007-10-31 16:30:15 +0000 |
commit | a84385fa752d8fa800a2155cce95f47c68b74e2b (patch) | |
tree | 1b3ad17775a99b9149fbfec56fd8e68760abfe13 /rts | |
parent | bf4d6a58a1079b944ffc4bdd41d57403a7727046 (diff) | |
download | haskell-a84385fa752d8fa800a2155cce95f47c68b74e2b.tar.gz |
Refactor PAPI support, and add profiling of multithreaded GC
Diffstat (limited to 'rts')
-rw-r--r-- | rts/Papi.c | 194 | ||||
-rw-r--r-- | rts/Papi.h | 34 | ||||
-rw-r--r-- | rts/RtsStartup.c | 22 | ||||
-rw-r--r-- | rts/Stats.c | 13 | ||||
-rw-r--r-- | rts/sm/GC.c | 18 | ||||
-rw-r--r-- | rts/sm/GC.h | 4 |
6 files changed, 168 insertions, 117 deletions
diff --git a/rts/Papi.c b/rts/Papi.c index 78cf9b1052..8e8900fe96 100644 --- a/rts/Papi.c +++ b/rts/Papi.c @@ -14,38 +14,25 @@ #ifdef USE_PAPI /* ugly */ +#include <papi.h> + #include "Papi.h" #include "Rts.h" #include "RtsUtils.h" #include "Stats.h" #include "RtsFlags.h" +#include "OSThreads.h" +// used to protect the aggregated counters +#ifdef THREADED_RTS +static Mutex papi_counter_mutex; +#endif struct _papi_events { int event_code; char * event_name; }; -#define PAPI_ADD_EVENT(EVENT) \ - { \ - ASSERT(n_papi_events<MAX_PAPI_EVENTS); \ - papi_events[n_papi_events].event_code = EVENT; \ - papi_events[n_papi_events].event_name = #EVENT; \ - n_papi_events++; \ - } - -/* Report the value of a counter */ -#define PAPI_REPORT(EVENTSET,EVENT) \ - { \ - ullong_format_string(papi_counter(EVENTSET,EVENT),temp,rtsTrue/*commas*/); \ - statsPrintf(" (" #EVENT ") : %s\n",temp); \ - } - -/* Report the value of a counter as a percentage of another counter */ -#define PAPI_REPORT_PCT(EVENTSET,EVENT,EVENTTOT) \ - statsPrintf(" (" #EVENT ") %% of (" #EVENTTOT ") : %.1f%%\n", \ - papi_counter(EVENTSET,EVENT)*100.0/papi_counter(EVENTSET,EVENTTOT)) - /* Beware, these counters are Opteron specific * I obtained the numbers using the papi_avail * and papi_native_avail utilities. @@ -65,12 +52,16 @@ struct _papi_events { #define DC_L2_REFILL_MOES 0x40001e1b #define DC_SYS_REFILL_MOES 0x40001e1c -/* Number of counted events, computed from size of papi_events */ -#define N_PAPI_EVENTS n_papi_events - /* This is bad, it should be in a header */ #define BIG_STRING_LEN 512 + +#define PAPI_CHECK(CALL) \ + if((papi_error=(CALL)) != PAPI_OK) { \ + debugBelch("PAPI function failed in module %s at line %d with error code %d\n", \ + __FILE__,__LINE__,papi_error); \ + } + /* While PAPI reporting is going on this flag is on */ int papi_is_reporting; @@ -100,6 +91,9 @@ long_long gc_cycles; +static long_long papi_counter(long_long values[],int event); +static void papi_add_events(int EventSet); + /* If you want to add events to count, extend the * init_countable_events and the papi_report function. * Be aware that your processor can count a limited number @@ -109,31 +103,43 @@ long_long gc_cycles; static void init_countable_events(void) { +#define PAPI_ADD_EVENT(EVENT) \ + { \ + if (n_papi_events >= MAX_PAPI_EVENTS) { \ + barf("too many PAPI events"); \ + } \ + papi_events[n_papi_events].event_code = EVENT; \ + papi_events[n_papi_events].event_name = #EVENT; \ + n_papi_events++; \ + } + PAPI_ADD_EVENT(PAPI_TOT_INS); - if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_BRANCH) { + if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_BRANCH) { PAPI_ADD_EVENT(FR_BR); PAPI_ADD_EVENT(FR_BR_MIS); /* Docs are wrong? Opteron does not count indirect branch misses exclusively */ PAPI_ADD_EVENT(FR_BR_MISCOMPARE); - } - if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_STALLS) { + } else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_STALLS) { PAPI_ADD_EVENT(FR_DISPATCH_STALLS); PAPI_ADD_EVENT(FR_DISPATCH_STALLS_BR); PAPI_ADD_EVENT(FR_DISPATCH_STALLS_FULL_LS); - } - if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L1) { + } else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L1) { PAPI_ADD_EVENT(PAPI_L1_DCA); PAPI_ADD_EVENT(PAPI_L1_DCM); - } - if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L2) { + } else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L2) { PAPI_ADD_EVENT(PAPI_L2_DCA); PAPI_ADD_EVENT(PAPI_L2_DCM); - } - if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_CB_EVENTS) { + } else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CB_EVENTS) { PAPI_ADD_EVENT(DC_L2_REFILL_MOES); PAPI_ADD_EVENT(DC_SYS_REFILL_MOES); PAPI_ADD_EVENT(FR_BR_MIS); + } else { + PAPI_ADD_EVENT(PAPI_STL_ICY); } + + // We might also consider: + // PAPI_BR_MSP Conditional branch instructions mispredicted + // PAPI_RES_STL Cycles stalled on any resource }; @@ -154,21 +160,33 @@ papi_gc_cycles() } /* This function reports counters for GC and mutator */ -void +static void papi_report(long_long PapiCounters[]) { - /* I need to improve formatting aesthetics */ +/* Report the value of a counter */ +#define PAPI_REPORT(EVENTSET,EVENT) \ + { \ + ullong_format_string(papi_counter(EVENTSET,EVENT),temp,rtsTrue/*commas*/); \ + statsPrintf(" (" #EVENT ") : %s\n",temp); \ + } + +/* Report the value of a counter as a percentage of another counter */ +#define PAPI_REPORT_PCT(EVENTSET,EVENT,EVENTTOT) \ + statsPrintf(" (" #EVENT ") %% of (" #EVENTTOT ") : %.1f%%\n", \ + papi_counter(EVENTSET,EVENT)*100.0/papi_counter(EVENTSET,EVENTTOT)) + + /* I need to improve formatting aesthetics */ PAPI_REPORT(PapiCounters,PAPI_TOT_INS); - if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_BRANCH) { + if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_BRANCH) { PAPI_REPORT(PapiCounters,FR_BR); PAPI_REPORT(PapiCounters,FR_BR_MIS); PAPI_REPORT_PCT(PapiCounters,FR_BR_MIS,FR_BR); PAPI_REPORT_PCT(PapiCounters,FR_BR_MISCOMPARE,FR_BR); } - if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_STALLS) { + else if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_STALLS) { PAPI_REPORT(PapiCounters,FR_DISPATCH_STALLS); PAPI_REPORT(PapiCounters,FR_DISPATCH_STALLS_BR); //PAPI_REPORT_PCT(PapiCounters,FR_DISPATCH_STALLS_BR,PAPI_TOT_CYC); @@ -176,50 +194,87 @@ papi_report(long_long PapiCounters[]) //PAPI_REPORT_PCT(PapiCounters,FR_DISPATCH_STALLS_FULL_LS,PAPI_TOT_CYC); } - if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L1) { + else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L1) { PAPI_REPORT(PapiCounters,PAPI_L1_DCA); PAPI_REPORT(PapiCounters,PAPI_L1_DCM); PAPI_REPORT_PCT(PapiCounters,PAPI_L1_DCM,PAPI_L1_DCA); } - if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L2) { + else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L2) { PAPI_REPORT(PapiCounters,PAPI_L2_DCA); PAPI_REPORT(PapiCounters,PAPI_L2_DCM); PAPI_REPORT_PCT(PapiCounters,PAPI_L2_DCM,PAPI_L2_DCA); } - if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_CB_EVENTS) { + else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CB_EVENTS) { PAPI_REPORT(PapiCounters,DC_L2_REFILL_MOES); PAPI_REPORT(PapiCounters,DC_SYS_REFILL_MOES); PAPI_REPORT(PapiCounters,FR_BR_MIS); } + else { + PAPI_REPORT(PapiCounters,PAPI_STL_ICY); + } } - +void +papi_stats_report (void) +{ + statsPrintf(" -- CPU Mutator counters --\n"); + papi_mut_cycles(); + papi_report(MutatorCounters); + + statsPrintf("\n -- CPU GC counters --\n"); + papi_gc_cycles(); + papi_report(GCCounters); +} + +void +papi_init_eventset (int *event_set) +{ + PAPI_register_thread(); + PAPI_CHECK( PAPI_create_eventset(event_set)); + papi_add_events(*event_set); +} void -papi_init_eventsets(void) +papi_init (void) { + /* Initialise the performance tracking library */ + int ver; + if ((ver = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT) { + if (ver > 0) { + errorBelch("PAPI_library_init: wrong version: %x", ver); + stg_exit(EXIT_FAILURE); + } else { + sysErrorBelch("PAPI_library_init"); + stg_exit(EXIT_FAILURE); + } + } - init_countable_events(); +#ifdef THREADED_RTS + { + int err; + if ((err = PAPI_thread_init(osThreadId)) < 0) { + barf("PAPI_thread_init: %d",err); + } - /* One event set for the mutator and another for the GC */ - PAPI_CHECK( PAPI_create_eventset(&MutatorEvents)); - PAPI_CHECK( PAPI_create_eventset(&GCEvents)); + initMutex(&papi_counter_mutex); + } +#endif - /* Both sets contain the same events */ - papi_add_events(MutatorEvents); - papi_add_events(GCEvents); + init_countable_events(); + papi_init_eventset(&MutatorEvents); + papi_init_eventset(&GCEvents); } /* Extract the value corresponding to an event */ -long_long +static long_long papi_counter(long_long values[],int event) { int i; - for(i=0;i<N_PAPI_EVENTS;i++) { + for(i=0;i<n_papi_events;i++) { if(papi_events[i].event_code==event) { return values[i]; } @@ -230,11 +285,11 @@ papi_counter(long_long values[],int event) } /* Add the events of papi_events into an event set */ -void +static void papi_add_events(int EventSet) { int i; - for(i=0;i<N_PAPI_EVENTS;i++) { + for(i=0;i<n_papi_events;i++) { if((papi_error=PAPI_add_event(EventSet, papi_events[i].event_code)) != PAPI_OK) @@ -253,32 +308,57 @@ papi_add_events(int EventSet) void papi_start_mutator_count(void) { + ACQUIRE_LOCK(&papi_counter_mutex); PAPI_CHECK( PAPI_start(MutatorEvents)); start_mutator_cycles = PAPI_cycles(); + RELEASE_LOCK(&papi_counter_mutex); } void papi_stop_mutator_count(void) { + ACQUIRE_LOCK(&papi_counter_mutex); mutator_cycles += PAPI_cycles() - start_mutator_cycles; PAPI_CHECK( PAPI_accum(MutatorEvents,MutatorCounters)); PAPI_CHECK( PAPI_stop(MutatorEvents,NULL)); + RELEASE_LOCK(&papi_counter_mutex); } void papi_start_gc_count(void) { - PAPI_CHECK( PAPI_start(GCEvents)); - start_gc_cycles = PAPI_cycles(); + ACQUIRE_LOCK(&papi_counter_mutex); + PAPI_CHECK( PAPI_start(GCEvents)); + start_gc_cycles = PAPI_cycles(); + RELEASE_LOCK(&papi_counter_mutex); } void papi_stop_gc_count(void) { - gc_cycles += PAPI_cycles() - start_gc_cycles; - PAPI_CHECK( PAPI_accum(GCEvents,GCCounters)); - PAPI_CHECK( PAPI_stop(GCEvents,NULL)); + ACQUIRE_LOCK(&papi_counter_mutex); + PAPI_CHECK( PAPI_accum(GCEvents,GCCounters)); + PAPI_CHECK( PAPI_stop(GCEvents,NULL)); + gc_cycles += PAPI_cycles() - start_gc_cycles; + RELEASE_LOCK(&papi_counter_mutex); } +void +papi_thread_start_gc_count(int event_set) +{ + ACQUIRE_LOCK(&papi_counter_mutex); + PAPI_CHECK( PAPI_start(event_set)); + RELEASE_LOCK(&papi_counter_mutex); +} + +void +papi_thread_stop_gc_count(int event_set) +{ + ACQUIRE_LOCK(&papi_counter_mutex); + PAPI_CHECK( PAPI_accum(event_set,GCCounters)); + PAPI_CHECK( PAPI_stop(event_set,NULL)); + RELEASE_LOCK(&papi_counter_mutex); +} + #endif /* USE_PAPI */ diff --git a/rts/Papi.h b/rts/Papi.h index 835eea6473..eaabdf5495 100644 --- a/rts/Papi.h +++ b/rts/Papi.h @@ -5,41 +5,21 @@ * * ---------------------------------------------------------------------------*/ - -#include <papi.h> - - - -#define PAPI_CHECK(CALL) \ - if((papi_error=(CALL)) != PAPI_OK) { \ - debugBelch("PAPI function failed in module %s at line %d with error code %d\n", \ - __FILE__,__LINE__,papi_error); \ - } - /* Check the error value of a PAPI call, reporting an error, if needed */ extern int papi_error; /* While PAPI reporting is going on this flag is on */ extern int papi_is_reporting; -/* Event sets and counter arrays for GC and mutator */ - -extern int MutatorEvents; -extern int GCEvents; - -extern long_long MutatorCounters[]; -extern long_long GCCounters[]; - -long_long papi_counter(long_long values[],int event); -void papi_report(long_long PapiCounters[]); -void papi_mut_cycles(void); -void papi_gc_cycles(void); -void papi_add_events(int EventSet); - -void papi_init_eventsets(void); +void papi_stats_report(void); +void papi_init_eventset(int * event_set); +void papi_init(void); void papi_start_mutator_count(void); void papi_stop_mutator_count(void); void papi_start_gc_count(void); void papi_stop_gc_count(void); - +// for multithreaded GC, each sub-thread uses these functions to count +// events and aggregate them into the main GC counters. +void papi_thread_start_gc_count(int event_set); +void papi_thread_stop_gc_count(int event_set); diff --git a/rts/RtsStartup.c b/rts/RtsStartup.c index 774de72e38..d1025a3a8b 100644 --- a/rts/RtsStartup.c +++ b/rts/RtsStartup.c @@ -167,28 +167,8 @@ hs_init(int *argc, char **argv[]) argv++; argc--; #endif - /* Initialise the performance tracking library */ #ifdef USE_PAPI - { - int ver; - if ((ver = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT) { - if (ver > 0) { - errorBelch("PAPI_library_init: wrong version: %x", ver); - stg_exit(EXIT_FAILURE); - } else { - sysErrorBelch("PAPI_library_init"); - stg_exit(EXIT_FAILURE); - } - } - } -#ifdef THREADED_RTS - { - int err; - if ((err = PAPI_thread_init(osThreadId)) < 0) { - barf("PAPI_thread_init: %d",err); - } - } -#endif + papi_init(); #endif /* Set the RTS flags to default values. */ diff --git a/rts/Stats.c b/rts/Stats.c index 1b0abaa4e4..1127b81566 100644 --- a/rts/Stats.c +++ b/rts/Stats.c @@ -171,8 +171,6 @@ stat_endInit(void) InitElapsedTime = elapsed - ElapsedTimeStart; } #if USE_PAPI - papi_init_eventsets(); - /* We start counting events for the mutator * when garbage collection starts * we switch to the GC event set. */ @@ -606,16 +604,7 @@ stat_exit(int alloc) */ #if USE_PAPI - /* PAPI reporting, should put somewhere else? - * Note that the cycles are counted _after_ the initialization of the RTS -- AR */ - - statsPrintf(" -- CPU Mutator counters --\n"); - papi_mut_cycles(); - papi_report(MutatorCounters); - - statsPrintf("\n -- CPU GC counters --\n"); - papi_gc_cycles(); - papi_report(GCCounters); + papi_stats_report(); #endif } diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 2fc3f4dd9b..f248a75a39 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -40,6 +40,7 @@ #include "RetainerProfile.h" #include "RaiseAsync.h" #include "Sparks.h" +#include "Papi.h" #include "GC.h" #include "Compact.h" @@ -873,6 +874,10 @@ alloc_gc_thread (gc_thread *t, int n) init_gc_thread(t); +#ifdef USE_PAPI + t->papi_events = -1; +#endif + t->steps = stgMallocBytes(RtsFlags.GcFlags.generations * sizeof(step_workspace *), "initialise_gc_thread"); @@ -1011,7 +1016,20 @@ gc_thread_mainloop (void) gct->wakeup = rtsFalse; if (gct->exit) break; +#ifdef USE_PAPI + // start performance counters in this thread... + if (gct->papi_events == -1) { + papi_init_eventset(&gct->papi_events); + } + papi_thread_start_gc_count(gct->papi_events); +#endif + gc_thread_work(); + +#ifdef USE_PAPI + // count events in this thread towards the GC totals + papi_thread_stop_gc_count(gct->papi_events); +#endif } } #endif diff --git a/rts/sm/GC.h b/rts/sm/GC.h index 488a2db8cf..5d7924ea6b 100644 --- a/rts/sm/GC.h +++ b/rts/sm/GC.h @@ -142,6 +142,10 @@ typedef struct gc_thread_ { lnat thunk_selector_depth; // ummm.... not used as of now +#ifdef USE_PAPI + int papi_events; +#endif + } gc_thread; extern nat N; |