diff options
-rw-r--r-- | ghc/ghc.mk | 7 | ||||
-rw-r--r-- | ghc/hschooks.c | 4 | ||||
-rw-r--r-- | includes/Rts.h | 30 | ||||
-rw-r--r-- | includes/rts/Flags.h | 21 | ||||
-rw-r--r-- | includes/rts/storage/TSO.h | 6 | ||||
-rw-r--r-- | rts/GetTime.h | 14 | ||||
-rw-r--r-- | rts/PrimOps.cmm | 13 | ||||
-rw-r--r-- | rts/ProfHeap.c | 3 | ||||
-rw-r--r-- | rts/ProfHeap.h | 4 | ||||
-rw-r--r-- | rts/Profiling.c | 8 | ||||
-rw-r--r-- | rts/Proftimer.c | 6 | ||||
-rw-r--r-- | rts/RtsFlags.c | 76 | ||||
-rw-r--r-- | rts/Schedule.c | 2 | ||||
-rw-r--r-- | rts/Stats.c | 162 | ||||
-rw-r--r-- | rts/Stats.h | 6 | ||||
-rw-r--r-- | rts/Task.c | 6 | ||||
-rw-r--r-- | rts/Task.h | 14 | ||||
-rw-r--r-- | rts/Threads.c | 12 | ||||
-rw-r--r-- | rts/Ticker.h | 2 | ||||
-rw-r--r-- | rts/eventlog/EventLog.c | 2 | ||||
-rw-r--r-- | rts/posix/GetTime.c | 38 | ||||
-rw-r--r-- | rts/posix/Itimer.c | 124 | ||||
-rw-r--r-- | rts/posix/Itimer.h | 2 | ||||
-rw-r--r-- | rts/posix/Select.c | 39 | ||||
-rw-r--r-- | rts/posix/Select.h | 8 | ||||
-rw-r--r-- | rts/sm/GCThread.h | 6 | ||||
-rw-r--r-- | rts/win32/GetTime.c | 31 | ||||
-rw-r--r-- | rts/win32/Ticker.c | 178 | ||||
-rw-r--r-- | rules/build-prog.mk | 4 |
29 files changed, 381 insertions, 447 deletions
diff --git a/ghc/ghc.mk b/ghc/ghc.mk index 38b3016b30..2af90bed28 100644 --- a/ghc/ghc.mk +++ b/ghc/ghc.mk @@ -26,8 +26,11 @@ ghc_stage1_MORE_HC_OPTS = $(GhcStage1HcOpts) ghc_stage2_MORE_HC_OPTS = $(GhcStage2HcOpts) ghc_stage3_MORE_HC_OPTS = $(GhcStage3HcOpts) -ghc_stage2_CC_OPTS = -Iincludes -ghc_stage3_CC_OPTS = -Iincludes +# We need __GLASGOW_HASKELL__ in hschooks.c, so we have to build C +# sources with GHC: +ghc_stage1_UseGhcForCC = YES +ghc_stage2_UseGhcForCC = YES +ghc_stage3_UseGhcForCC = YES ghc_stage1_C_FILES_NODEPS = ghc/hschooks.c diff --git a/ghc/hschooks.c b/ghc/hschooks.c index 18679281e0..037d4e18be 100644 --- a/ghc/hschooks.c +++ b/ghc/hschooks.c @@ -23,7 +23,11 @@ defaultsHook (void) // See #3408: the default idle GC time of 0.3s is too short on // Windows where we receive console events once per second or so. +#if __GLASGOW_HASKELL__ >= 703 + RtsFlags.GcFlags.idleGCDelayTime = SecondsToTime(5); +#else RtsFlags.GcFlags.idleGCDelayTime = 5*1000; +#endif } void diff --git a/includes/Rts.h b/includes/Rts.h index 5caba59dbe..45c09f8fb7 100644 --- a/includes/Rts.h +++ b/includes/Rts.h @@ -155,6 +155,36 @@ void _assertFail(const char *filename, unsigned int linenum) #endif /* ----------------------------------------------------------------------------- + Time values in the RTS + -------------------------------------------------------------------------- */ + +// For most time values in the RTS we use a fixed resolution of nanoseconds, +// normalising the time we get from platform-dependent APIs to this +// resolution. +#define TIME_RESOLUTION 1000000000 +typedef StgInt64 Time; + +#if TIME_RESOLUTION == 1000000000 +// I'm being lazy, but it's awkward to define fully general versions of these +#define TimeToUS(t) (t / 1000) +#define TimeToNS(t) (t) +#define USToTime(t) ((Time)(t) * 1000) +#define NSToTime(t) ((Time)(t)) +#else +#error Fix TimeToNS(), TimeToUS() etc. +#endif + +#define SecondsToTime(t) ((Time)(t) * TIME_RESOLUTION) +#define TimeToSeconds(t) ((t) / TIME_RESOLUTION) + +// Use instead of SecondsToTime() when we have a floating-point +// seconds value, to avoid truncating it. +INLINE_HEADER Time fsecondsToTime (double t) +{ + return (Time)(t * TIME_RESOLUTION); +} + +/* ----------------------------------------------------------------------------- Include everything STG-ish -------------------------------------------------------------------------- */ diff --git a/includes/rts/Flags.h b/includes/rts/Flags.h index 2d1516f586..439b261fd8 100644 --- a/includes/rts/Flags.h +++ b/includes/rts/Flags.h @@ -52,7 +52,7 @@ struct GC_FLAGS { rtsBool ringBell; rtsBool frontpanel; - int idleGCDelayTime; /* in milliseconds */ + Time idleGCDelayTime; /* units: TIME_RESOLUTION */ StgWord heapBase; /* address to ask the OS for memory */ }; @@ -99,8 +99,8 @@ struct PROFILING_FLAGS { # define HEAP_BY_CLOSURE_TYPE 8 - nat profileInterval; /* delta between samples (in ms) */ - nat profileIntervalTicks; /* delta between samples (in 'ticks') */ + Time heapProfileInterval; /* time between samples */ + nat heapProfileIntervalTicks; /* ticks between samples (derived) */ rtsBool includeTSOs; @@ -135,12 +135,21 @@ struct TRACE_FLAGS { }; struct CONCURRENT_FLAGS { - int ctxtSwitchTime; /* in milliseconds */ - int ctxtSwitchTicks; /* derived */ + Time ctxtSwitchTime; /* units: TIME_RESOLUTION */ + int ctxtSwitchTicks; /* derived */ }; +/* + * The tickInterval is the time interval between "ticks", ie. + * timer signals (see Timer.{c,h}). It is the frequency at + * which we sample CCCS for profiling. + * + * It is changed by the +RTS -V<secs> flag. + */ +#define DEFAULT_TICK_INTERVAL USToTime(10000) + struct MISC_FLAGS { - int tickInterval; /* in milliseconds */ + Time tickInterval; /* units: TIME_RESOLUTION */ rtsBool install_signal_handlers; rtsBool machineReadable; StgWord linkerMemBase; /* address to ask the OS for memory diff --git a/includes/rts/storage/TSO.h b/includes/rts/storage/TSO.h index 04e673fb12..20c6ebf4f2 100644 --- a/includes/rts/storage/TSO.h +++ b/includes/rts/storage/TSO.h @@ -54,7 +54,13 @@ typedef union { #if defined(mingw32_HOST_OS) StgAsyncIOResult *async_result; #endif +#if !defined(THREADED_RTS) StgWord target; + // Only for the non-threaded RTS: the target time for a thread + // blocked in threadDelay, in units of 10ms. This is a + // compromise: we don't want to take up much space in the TSO. If + // you want better resolution for threadDelay, use -threaded. +#endif } StgTSOBlockInfo; diff --git a/rts/GetTime.h b/rts/GetTime.h index b8d402db7c..86c5511df9 100644 --- a/rts/GetTime.h +++ b/rts/GetTime.h @@ -11,16 +11,10 @@ #include "BeginPrivate.h" -// We'll use a fixed resolution of usec for now. The machine -// dependent implementation may have a different resolution, but we'll -// normalise to this for the machine independent interface. -#define TICKS_PER_SECOND 1000000 -typedef StgInt64 Ticks; - -Ticks getProcessCPUTime (void); -Ticks getThreadCPUTime (void); -Ticks getProcessElapsedTime (void); -void getProcessTimes (Ticks *user, Ticks *elapsed); +Time getProcessCPUTime (void); +Time getThreadCPUTime (void); +Time getProcessElapsedTime (void); +void getProcessTimes (Time *user, Time *elapsed); /* Get the current date and time. Uses seconds since the Unix epoch, plus nanoseconds diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index 85920932c9..8836d3bfe6 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -1785,16 +1785,13 @@ stg_delayzh #else + W_ time; - W_ divisor; (time) = foreign "C" getourtimeofday() [R1]; - divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags)); - if (divisor == 0) { - divisor = 50; - } - divisor = divisor * 1000; - target = ((R1 + divisor - 1) / divisor) /* divide rounding up */ - + time + 1; /* Add 1 as getourtimeofday rounds down */ + // getourtimeofday() returns a value in units of 10ms + // R1 is in microseconds, we need to (/ 10000), rounding up + target = time + 1 + (R1 + 10000-1) / 10000; + StgTSO_block_info(CurrentTSO) = target; /* Insert the new thread in the sleeping queue. */ diff --git a/rts/ProfHeap.c b/rts/ProfHeap.c index 56c44519fb..302d1d7997 100644 --- a/rts/ProfHeap.c +++ b/rts/ProfHeap.c @@ -1070,8 +1070,7 @@ heapCensusChain( Census *census, bdescr *bd ) } } -void -heapCensus( Ticks t ) +void heapCensus (Time t) { nat g, n; Census *census; diff --git a/rts/ProfHeap.h b/rts/ProfHeap.h index cf09c59231..b3bed903b5 100644 --- a/rts/ProfHeap.h +++ b/rts/ProfHeap.h @@ -9,11 +9,9 @@ #ifndef PROFHEAP_H #define PROFHEAP_H -#include "GetTime.h" // for Ticks - #include "BeginPrivate.h" -void heapCensus (Ticks t); +void heapCensus (Time t); nat initHeapProfiling (void); void endHeapProfiling (void); rtsBool strMatchesSelector (char* str, char* sel); diff --git a/rts/Profiling.c b/rts/Profiling.c index 38191ff4bd..c393c8fa83 100644 --- a/rts/Profiling.c +++ b/rts/Profiling.c @@ -801,11 +801,11 @@ reportCCSProfiling( void ) fprintf(prof_file, " %s", prog_argv[count]); fprintf(prof_file, "\n\n"); - fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d ms)\n", - (double) total_prof_ticks * - (double) RtsFlags.MiscFlags.tickInterval / 1000, + fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d us)\n", + ((double) total_prof_ticks * + (double) RtsFlags.MiscFlags.tickInterval) / TIME_RESOLUTION, (unsigned long) total_prof_ticks, - (int) RtsFlags.MiscFlags.tickInterval); + (int) TimeToUS(RtsFlags.MiscFlags.tickInterval)); fprintf(prof_file, "\ttotal alloc = %11s bytes", showStgWord64(total_alloc * sizeof(W_), diff --git a/rts/Proftimer.c b/rts/Proftimer.c index 82838184b7..76d7679000 100644 --- a/rts/Proftimer.c +++ b/rts/Proftimer.c @@ -50,7 +50,7 @@ void startHeapProfTimer( void ) { if (RtsFlags.ProfFlags.doHeapProfile && - RtsFlags.ProfFlags.profileIntervalTicks > 0) { + RtsFlags.ProfFlags.heapProfileIntervalTicks > 0) { do_heap_prof_ticks = rtsTrue; } } @@ -60,7 +60,7 @@ initProfTimer( void ) { performHeapProfile = rtsFalse; - ticks_to_heap_profile = RtsFlags.ProfFlags.profileIntervalTicks; + ticks_to_heap_profile = RtsFlags.ProfFlags.heapProfileIntervalTicks; startHeapProfTimer(); } @@ -80,7 +80,7 @@ handleProfTick(void) if (do_heap_prof_ticks) { ticks_to_heap_profile--; if (ticks_to_heap_profile <= 0) { - ticks_to_heap_profile = RtsFlags.ProfFlags.profileIntervalTicks; + ticks_to_heap_profile = RtsFlags.ProfFlags.heapProfileIntervalTicks; performHeapProfile = rtsTrue; } } diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c index d8bcf1c915..3e3290dd3d 100644 --- a/rts/RtsFlags.c +++ b/rts/RtsFlags.c @@ -113,7 +113,7 @@ void initRtsFlagsDefaults(void) #ifdef RTS_GTK_FRONTPANEL RtsFlags.GcFlags.frontpanel = rtsFalse; #endif - RtsFlags.GcFlags.idleGCDelayTime = 300; /* millisecs */ + RtsFlags.GcFlags.idleGCDelayTime = USToTime(300000); // 300ms #if osf3_HOST_OS /* ToDo: Perhaps by adjusting this value we can make linking without @@ -150,7 +150,7 @@ void initRtsFlagsDefaults(void) #endif /* PROFILING */ RtsFlags.ProfFlags.doHeapProfile = rtsFalse; - RtsFlags.ProfFlags.profileInterval = 100; + RtsFlags.ProfFlags. heapProfileInterval = USToTime(100000); // 100ms #ifdef PROFILING RtsFlags.ProfFlags.includeTSOs = rtsFalse; @@ -176,8 +176,13 @@ void initRtsFlagsDefaults(void) RtsFlags.TraceFlags.user = rtsFalse; #endif - RtsFlags.MiscFlags.tickInterval = 20; /* In milliseconds */ - RtsFlags.ConcFlags.ctxtSwitchTime = 20; /* In milliseconds */ +#ifdef PROFILING + // When profiling we want a lot more ticks + RtsFlags.MiscFlags.tickInterval = USToTime(1000); // 1ms +#else + RtsFlags.MiscFlags.tickInterval = DEFAULT_TICK_INTERVAL; +#endif + RtsFlags.ConcFlags.ctxtSwitchTime = USToTime(20000); // 20ms RtsFlags.MiscFlags.install_signal_handlers = rtsTrue; RtsFlags.MiscFlags.machineReadable = rtsFalse; @@ -312,9 +317,9 @@ usage_text[] = { #if !defined(PROFILING) "", -" -hT Heap residency profile (output file <program>.hp)", +" -h Heap residency profile (output file <program>.hp)", #endif -" -i<sec> Time between heap samples (seconds, default: 0.1)", +" -i<sec> Time between heap profile samples (seconds, default: 0.1)", "", #if defined(TICKY_TICKY) " -r<file> Produce ticky-ticky statistics (with -rstderr for stderr)", @@ -322,10 +327,15 @@ usage_text[] = { #endif " -C<secs> Context-switch interval in seconds.", " 0 or no argument means switch as often as possible.", -" Default: 0.02 sec; resolution is set by -V below.", -" -V<secs> Master tick interval in seconds (0 == disable timer).", -" This sets the resolution for -C and the profile timer -i.", " Default: 0.02 sec.", +" -V<secs> Master tick interval in seconds (0 == disable timer).", +" This sets the resolution for -C and the heap profile timer -i,", +" and is the frequence of time profile samples.", +#ifdef PROFILING +" Default: 0.001 sec.", +#else +" Default: 0.01 sec.", +#endif "", #if defined(DEBUG) " -Ds DEBUG: scheduler", @@ -884,11 +894,8 @@ error = rtsTrue; if (rts_argv[arg][2] == '\0') { /* use default */ } else { - I_ cst; /* tmp */ - - /* Convert to millisecs */ - cst = (I_) ((atof(rts_argv[arg]+2) * 1000)); - RtsFlags.GcFlags.idleGCDelayTime = cst; + RtsFlags.GcFlags.idleGCDelayTime = + fsecondsToTime(atof(rts_argv[arg]+2)); } break; @@ -1090,12 +1097,9 @@ error = rtsTrue; if (rts_argv[arg][2] == '\0') { /* use default */ } else { - I_ cst; /* tmp */ - - /* Convert to milliseconds */ - cst = (I_) ((atof(rts_argv[arg]+2) * 1000)); - RtsFlags.ProfFlags.profileInterval = cst; - } + RtsFlags.ProfFlags.heapProfileInterval = + fsecondsToTime(atof(rts_argv[arg]+2)); + } break; /* =========== CONCURRENT ========================= */ @@ -1104,12 +1108,9 @@ error = rtsTrue; if (rts_argv[arg][2] == '\0') RtsFlags.ConcFlags.ctxtSwitchTime = 0; else { - I_ cst; /* tmp */ - - /* Convert to milliseconds */ - cst = (I_) ((atof(rts_argv[arg]+2) * 1000)); - RtsFlags.ConcFlags.ctxtSwitchTime = cst; - } + RtsFlags.ConcFlags.ctxtSwitchTime = + fsecondsToTime(atof(rts_argv[arg]+2)); + } break; case 'V': /* master tick interval */ @@ -1118,11 +1119,8 @@ error = rtsTrue; // turns off ticks completely RtsFlags.MiscFlags.tickInterval = 0; } else { - I_ cst; /* tmp */ - - /* Convert to milliseconds */ - cst = (I_) ((atof(rts_argv[arg]+2) * 1000)); - RtsFlags.MiscFlags.tickInterval = cst; + RtsFlags.MiscFlags.tickInterval = + fsecondsToTime(atof(rts_argv[arg]+2)); } break; @@ -1358,14 +1356,14 @@ error = rtsTrue; static void normaliseRtsOpts (void) { if (RtsFlags.MiscFlags.tickInterval < 0) { - RtsFlags.MiscFlags.tickInterval = 50; + RtsFlags.MiscFlags.tickInterval = DEFAULT_TICK_INTERVAL; } // If the master timer is disabled, turn off the other timers. if (RtsFlags.MiscFlags.tickInterval == 0) { RtsFlags.ConcFlags.ctxtSwitchTime = 0; RtsFlags.GcFlags.idleGCDelayTime = 0; - RtsFlags.ProfFlags.profileInterval = 0; + RtsFlags.ProfFlags.heapProfileInterval = 0; } // Determine what tick interval we should use for the RTS timer @@ -1383,9 +1381,9 @@ static void normaliseRtsOpts (void) RtsFlags.MiscFlags.tickInterval); } - if (RtsFlags.ProfFlags.profileInterval > 0) { + if (RtsFlags.ProfFlags.heapProfileInterval > 0) { RtsFlags.MiscFlags.tickInterval = - stg_min(RtsFlags.ProfFlags.profileInterval, + stg_min(RtsFlags.ProfFlags.heapProfileInterval, RtsFlags.MiscFlags.tickInterval); } @@ -1397,12 +1395,12 @@ static void normaliseRtsOpts (void) RtsFlags.ConcFlags.ctxtSwitchTicks = 0; } - if (RtsFlags.ProfFlags.profileInterval > 0) { - RtsFlags.ProfFlags.profileIntervalTicks = - RtsFlags.ProfFlags.profileInterval / + if (RtsFlags.ProfFlags.heapProfileInterval > 0) { + RtsFlags.ProfFlags.heapProfileIntervalTicks = + RtsFlags.ProfFlags.heapProfileInterval / RtsFlags.MiscFlags.tickInterval; } else { - RtsFlags.ProfFlags.profileIntervalTicks = 0; + RtsFlags.ProfFlags.heapProfileIntervalTicks = 0; } if (RtsFlags.GcFlags.stkChunkBufferSize > diff --git a/rts/Schedule.c b/rts/Schedule.c index 4f18209b9e..8c305008ae 100644 --- a/rts/Schedule.c +++ b/rts/Schedule.c @@ -1304,7 +1304,7 @@ scheduleNeedHeapProfile( rtsBool ready_to_gc STG_UNUSED ) // When we have +RTS -i0 and we're heap profiling, do a census at // every GC. This lets us get repeatable runs for debugging. if (performHeapProfile || - (RtsFlags.ProfFlags.profileInterval==0 && + (RtsFlags.ProfFlags.heapProfileInterval==0 && RtsFlags.ProfFlags.doHeapProfile && ready_to_gc)) { return rtsTrue; } else { diff --git a/rts/Stats.c b/rts/Stats.c index 23cb4bffaa..9c68364717 100644 --- a/rts/Stats.c +++ b/rts/Stats.c @@ -26,15 +26,15 @@ /* huh? */ #define BIG_STRING_LEN 512 -#define TICK_TO_DBL(t) ((double)(t) / TICKS_PER_SECOND) +#define TimeToSecondsDbl(t) ((double)(t) / TIME_RESOLUTION) -static Ticks +static Time start_init_cpu, start_init_elapsed, end_init_cpu, end_init_elapsed, start_exit_cpu, start_exit_elapsed, end_exit_cpu, end_exit_elapsed; -static Ticks GC_tot_cpu = 0; +static Time GC_tot_cpu = 0; static StgWord64 GC_tot_alloc = 0; static StgWord64 GC_tot_copied = 0; @@ -43,11 +43,11 @@ static StgWord64 GC_par_max_copied = 0; static StgWord64 GC_par_avg_copied = 0; #ifdef PROFILING -static Ticks RP_start_time = 0, RP_tot_time = 0; // retainer prof user time -static Ticks RPe_start_time = 0, RPe_tot_time = 0; // retainer prof elap time +static Time RP_start_time = 0, RP_tot_time = 0; // retainer prof user time +static Time RPe_start_time = 0, RPe_tot_time = 0; // retainer prof elap time -static Ticks HC_start_time, HC_tot_time = 0; // heap census prof user time -static Ticks HCe_start_time, HCe_tot_time = 0; // heap census prof elap time +static Time HC_start_time, HC_tot_time = 0; // heap census prof user time +static Time HCe_start_time, HCe_tot_time = 0; // heap census prof elap time #endif #ifdef PROFILING @@ -66,9 +66,9 @@ static lnat max_slop = 0; static lnat GC_end_faults = 0; -static Ticks *GC_coll_cpu = NULL; -static Ticks *GC_coll_elapsed = NULL; -static Ticks *GC_coll_max_pause = NULL; +static Time *GC_coll_cpu = NULL; +static Time *GC_coll_elapsed = NULL; +static Time *GC_coll_max_pause = NULL; static void statsFlush( void ); static void statsClose( void ); @@ -77,7 +77,7 @@ static void statsClose( void ); Current elapsed time ------------------------------------------------------------------------- */ -Ticks stat_getElapsedTime(void) +Time stat_getElapsedTime(void) { return getProcessElapsedTime() - start_init_elapsed; } @@ -87,9 +87,9 @@ Ticks stat_getElapsedTime(void) ------------------------------------------------------------------------ */ double -mut_user_time_until( Ticks t ) +mut_user_time_until( Time t ) { - return TICK_TO_DBL(t - GC_tot_cpu); + return TimeToSecondsDbl(t - GC_tot_cpu); // heapCensus() time is included in GC_tot_cpu, so we don't need // to subtract it here. } @@ -97,7 +97,7 @@ mut_user_time_until( Ticks t ) double mut_user_time( void ) { - Ticks cpu; + Time cpu; cpu = getProcessCPUTime(); return mut_user_time_until(cpu); } @@ -110,13 +110,13 @@ mut_user_time( void ) double mut_user_time_during_RP( void ) { - return TICK_TO_DBL(RP_start_time - GC_tot_cpu - RP_tot_time); + return TimeToSecondsDbl(RP_start_time - GC_tot_cpu - RP_tot_time); } double mut_user_time_during_heap_census( void ) { - return TICK_TO_DBL(HC_start_time - GC_tot_cpu - RP_tot_time); + return TimeToSecondsDbl(HC_start_time - GC_tot_cpu - RP_tot_time); } #endif /* PROFILING */ @@ -177,16 +177,16 @@ initStats1 (void) statsPrintf(" bytes bytes bytes user elap user elap\n"); } GC_coll_cpu = - (Ticks *)stgMallocBytes( - sizeof(Ticks)*RtsFlags.GcFlags.generations, + (Time *)stgMallocBytes( + sizeof(Time)*RtsFlags.GcFlags.generations, "initStats"); GC_coll_elapsed = - (Ticks *)stgMallocBytes( - sizeof(Ticks)*RtsFlags.GcFlags.generations, + (Time *)stgMallocBytes( + sizeof(Time)*RtsFlags.GcFlags.generations, "initStats"); GC_coll_max_pause = - (Ticks *)stgMallocBytes( - sizeof(Ticks)*RtsFlags.GcFlags.generations, + (Time *)stgMallocBytes( + sizeof(Time)*RtsFlags.GcFlags.generations, "initStats"); for (i = 0; i < RtsFlags.GcFlags.generations; i++) { GC_coll_cpu[i] = 0; @@ -299,7 +299,7 @@ stat_gcWorkerThreadStart (gc_thread *gct) void stat_gcWorkerThreadDone (gc_thread *gct) { - Ticks thread_cpu, elapsed, gc_cpu, gc_elapsed; + Time thread_cpu, elapsed, gc_cpu, gc_elapsed; if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) { @@ -326,7 +326,7 @@ stat_endGC (gc_thread *gct, RtsFlags.ProfFlags.doHeapProfile) // heap profiling needs GC_tot_time { - Ticks cpu, elapsed, thread_gc_cpu, gc_cpu, gc_elapsed; + Time cpu, elapsed, thread_gc_cpu, gc_cpu, gc_elapsed; getProcessTimes(&cpu, &elapsed); gc_elapsed = elapsed - gct->gc_start_elapsed; @@ -344,10 +344,10 @@ stat_endGC (gc_thread *gct, alloc*sizeof(W_), copied*sizeof(W_), live*sizeof(W_)); statsPrintf(" %5.2f %5.2f %7.2f %7.2f %4ld %4ld (Gen: %2d)\n", - TICK_TO_DBL(gc_cpu), - TICK_TO_DBL(gc_elapsed), - TICK_TO_DBL(cpu), - TICK_TO_DBL(elapsed - start_init_elapsed), + TimeToSecondsDbl(gc_cpu), + TimeToSecondsDbl(gc_elapsed), + TimeToSecondsDbl(cpu), + TimeToSecondsDbl(elapsed - start_init_elapsed), faults - gct->gc_start_faults, gct->gc_start_faults - GC_end_faults, gen); @@ -405,7 +405,7 @@ stat_endGC (gc_thread *gct, void stat_startRP(void) { - Ticks user, elapsed; + Time user, elapsed; getProcessTimes( &user, &elapsed ); RP_start_time = user; @@ -427,7 +427,7 @@ stat_endRP( #endif double averageNumVisit) { - Ticks user, elapsed; + Time user, elapsed; getProcessTimes( &user, &elapsed ); RP_tot_time += user - RP_start_time; @@ -450,7 +450,7 @@ stat_endRP( void stat_startHeapCensus(void) { - Ticks user, elapsed; + Time user, elapsed; getProcessTimes( &user, &elapsed ); HC_start_time = user; @@ -465,7 +465,7 @@ stat_startHeapCensus(void) void stat_endHeapCensus(void) { - Ticks user, elapsed; + Time user, elapsed; getProcessTimes( &user, &elapsed ); HC_tot_time += user - HC_start_time; @@ -516,27 +516,27 @@ StgInt TOTAL_CALLS=1; statsPrintf(" (SLOW_CALLS_" #arity ") %% of (TOTAL_CALLS) : %.1f%%\n", \ SLOW_CALLS_##arity * 100.0/TOTAL_CALLS) -static inline Ticks get_init_cpu(void) { return end_init_cpu - start_init_cpu; } -static inline Ticks get_init_elapsed(void) { return end_init_elapsed - start_init_elapsed; } +static inline Time get_init_cpu(void) { return end_init_cpu - start_init_cpu; } +static inline Time get_init_elapsed(void) { return end_init_elapsed - start_init_elapsed; } void stat_exit(int alloc) { generation *gen; - Ticks gc_cpu = 0; - Ticks gc_elapsed = 0; - Ticks init_cpu = 0; - Ticks init_elapsed = 0; - Ticks mut_cpu = 0; - Ticks mut_elapsed = 0; - Ticks exit_cpu = 0; - Ticks exit_elapsed = 0; + Time gc_cpu = 0; + Time gc_elapsed = 0; + Time init_cpu = 0; + Time init_elapsed = 0; + Time mut_cpu = 0; + Time mut_elapsed = 0; + Time exit_cpu = 0; + Time exit_elapsed = 0; if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) { char temp[BIG_STRING_LEN]; - Ticks tot_cpu; - Ticks tot_elapsed; + Time tot_cpu; + Time tot_elapsed; nat i, g, total_collections = 0; getProcessTimes( &tot_cpu, &tot_elapsed ); @@ -611,10 +611,10 @@ stat_exit(int alloc) gen->no, gen->collections, gen->par_collections, - TICK_TO_DBL(GC_coll_cpu[g]), - TICK_TO_DBL(GC_coll_elapsed[g]), - gen->collections == 0 ? 0 : TICK_TO_DBL(GC_coll_elapsed[g] / gen->collections), - TICK_TO_DBL(GC_coll_max_pause[g])); + TimeToSecondsDbl(GC_coll_cpu[g]), + TimeToSecondsDbl(GC_coll_elapsed[g]), + gen->collections == 0 ? 0 : TimeToSecondsDbl(GC_coll_elapsed[g] / gen->collections), + TimeToSecondsDbl(GC_coll_max_pause[g])); } #if defined(THREADED_RTS) @@ -639,10 +639,10 @@ stat_exit(int alloc) statsPrintf(" Task %2d %-8s : %6.2fs (%6.2fs) %6.2fs (%6.2fs)\n", i, (task->worker) ? "(worker)" : "(bound)", - TICK_TO_DBL(task->mut_time), - TICK_TO_DBL(task->mut_etime), - TICK_TO_DBL(task->gc_time), - TICK_TO_DBL(task->gc_etime)); + TimeToSecondsDbl(task->mut_time), + TimeToSecondsDbl(task->mut_etime), + TimeToSecondsDbl(task->gc_time), + TimeToSecondsDbl(task->gc_etime)); } } @@ -668,27 +668,27 @@ stat_exit(int alloc) #endif statsPrintf(" INIT time %6.2fs (%6.2fs elapsed)\n", - TICK_TO_DBL(init_cpu), TICK_TO_DBL(init_elapsed)); + TimeToSecondsDbl(init_cpu), TimeToSecondsDbl(init_elapsed)); statsPrintf(" MUT time %6.2fs (%6.2fs elapsed)\n", - TICK_TO_DBL(mut_cpu), TICK_TO_DBL(mut_elapsed)); + TimeToSecondsDbl(mut_cpu), TimeToSecondsDbl(mut_elapsed)); statsPrintf(" GC time %6.2fs (%6.2fs elapsed)\n", - TICK_TO_DBL(gc_cpu), TICK_TO_DBL(gc_elapsed)); + TimeToSecondsDbl(gc_cpu), TimeToSecondsDbl(gc_elapsed)); #ifdef PROFILING statsPrintf(" RP time %6.2fs (%6.2fs elapsed)\n", - TICK_TO_DBL(RP_tot_time), TICK_TO_DBL(RPe_tot_time)); + TimeToSecondsDbl(RP_tot_time), TimeToSecondsDbl(RPe_tot_time)); statsPrintf(" PROF time %6.2fs (%6.2fs elapsed)\n", - TICK_TO_DBL(HC_tot_time), TICK_TO_DBL(HCe_tot_time)); + TimeToSecondsDbl(HC_tot_time), TimeToSecondsDbl(HCe_tot_time)); #endif statsPrintf(" EXIT time %6.2fs (%6.2fs elapsed)\n", - TICK_TO_DBL(exit_cpu), TICK_TO_DBL(exit_elapsed)); + TimeToSecondsDbl(exit_cpu), TimeToSecondsDbl(exit_elapsed)); statsPrintf(" Total time %6.2fs (%6.2fs elapsed)\n\n", - TICK_TO_DBL(tot_cpu), TICK_TO_DBL(tot_elapsed)); + TimeToSecondsDbl(tot_cpu), TimeToSecondsDbl(tot_elapsed)); #ifndef THREADED_RTS statsPrintf(" %%GC time %5.1f%% (%.1f%% elapsed)\n\n", - TICK_TO_DBL(gc_cpu)*100/TICK_TO_DBL(tot_cpu), - TICK_TO_DBL(gc_elapsed)*100/TICK_TO_DBL(tot_elapsed)); + TimeToSecondsDbl(gc_cpu)*100/TimeToSecondsDbl(tot_cpu), + TimeToSecondsDbl(gc_elapsed)*100/TimeToSecondsDbl(tot_elapsed)); #endif if (tot_cpu - GC_tot_cpu - PROF_VAL(RP_tot_time + HC_tot_time) == 0) @@ -696,19 +696,19 @@ stat_exit(int alloc) else showStgWord64( (StgWord64)((GC_tot_alloc*sizeof(W_))/ - TICK_TO_DBL(tot_cpu - GC_tot_cpu - + TimeToSecondsDbl(tot_cpu - GC_tot_cpu - PROF_VAL(RP_tot_time + HC_tot_time))), temp, rtsTrue/*commas*/); statsPrintf(" Alloc rate %s bytes per MUT second\n\n", temp); statsPrintf(" Productivity %5.1f%% of total user, %.1f%% of total elapsed\n\n", - TICK_TO_DBL(tot_cpu - GC_tot_cpu - + TimeToSecondsDbl(tot_cpu - GC_tot_cpu - PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100 - / TICK_TO_DBL(tot_cpu), - TICK_TO_DBL(tot_cpu - GC_tot_cpu - + / TimeToSecondsDbl(tot_cpu), + TimeToSecondsDbl(tot_cpu - GC_tot_cpu - PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100 - / TICK_TO_DBL(tot_elapsed)); + / TimeToSecondsDbl(tot_elapsed)); /* TICK_PRINT(1); @@ -764,9 +764,9 @@ stat_exit(int alloc) max_residency*sizeof(W_), residency_samples, (unsigned long)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L)), - TICK_TO_DBL(init_cpu), TICK_TO_DBL(init_elapsed), - TICK_TO_DBL(mut_cpu), TICK_TO_DBL(mut_elapsed), - TICK_TO_DBL(gc_cpu), TICK_TO_DBL(gc_elapsed)); + TimeToSecondsDbl(init_cpu), TimeToSecondsDbl(init_elapsed), + TimeToSecondsDbl(mut_cpu), TimeToSecondsDbl(mut_elapsed), + TimeToSecondsDbl(gc_cpu), TimeToSecondsDbl(gc_elapsed)); } statsFlush(); @@ -865,10 +865,10 @@ extern void getGCStats( GCStats *s ) { nat total_collections = 0; nat g; - Ticks gc_cpu = 0; - Ticks gc_elapsed = 0; - Ticks current_elapsed = 0; - Ticks current_cpu = 0; + Time gc_cpu = 0; + Time gc_elapsed = 0; + Time current_elapsed = 0; + Time current_cpu = 0; getProcessTimes(¤t_cpu, ¤t_elapsed); @@ -892,16 +892,16 @@ extern void getGCStats( GCStats *s ) s->current_bytes_used = current_residency*(StgWord64)sizeof(W_); s->current_bytes_slop = current_slop*(StgWord64)sizeof(W_); /* - s->init_cpu_seconds = TICK_TO_DBL(get_init_cpu()); - s->init_wall_seconds = TICK_TO_DBL(get_init_elapsed()); + s->init_cpu_seconds = TimeToSecondsDbl(get_init_cpu()); + s->init_wall_seconds = TimeToSecondsDbl(get_init_elapsed()); */ - s->mutator_cpu_seconds = TICK_TO_DBL(current_cpu - end_init_cpu - gc_cpu - PROF_VAL(RP_tot_time + HC_tot_time)); - s->mutator_wall_seconds = TICK_TO_DBL(current_elapsed- end_init_elapsed - gc_elapsed); - s->gc_cpu_seconds = TICK_TO_DBL(gc_cpu); - s->gc_wall_seconds = TICK_TO_DBL(gc_elapsed); + s->mutator_cpu_seconds = TimeToSecondsDbl(current_cpu - end_init_cpu - gc_cpu - PROF_VAL(RP_tot_time + HC_tot_time)); + s->mutator_wall_seconds = TimeToSecondsDbl(current_elapsed- end_init_elapsed - gc_elapsed); + s->gc_cpu_seconds = TimeToSecondsDbl(gc_cpu); + s->gc_wall_seconds = TimeToSecondsDbl(gc_elapsed); /* EZY: Being consistent with incremental output, but maybe should also discount init */ - s->cpu_seconds = TICK_TO_DBL(current_cpu); - s->wall_seconds = TICK_TO_DBL(current_elapsed - end_init_elapsed); + s->cpu_seconds = TimeToSecondsDbl(current_cpu); + s->wall_seconds = TimeToSecondsDbl(current_elapsed - end_init_elapsed); s->par_avg_bytes_copied = GC_par_avg_copied*(StgWord64)sizeof(W_); s->par_max_bytes_copied = GC_par_max_copied*(StgWord64)sizeof(W_); } diff --git a/rts/Stats.h b/rts/Stats.h index f0060bdf4a..83b2cb6998 100644 --- a/rts/Stats.h +++ b/rts/Stats.h @@ -49,7 +49,7 @@ void stat_workerStop(void); void initStats0(void); void initStats1(void); -double mut_user_time_until(Ticks t); +double mut_user_time_until(Time t); double mut_user_time(void); #ifdef PROFILING @@ -59,8 +59,8 @@ double mut_user_time_during_heap_census(void); void statDescribeGens( void ); -Ticks stat_getElapsedGCTime(void); -Ticks stat_getElapsedTime(void); +Time stat_getElapsedGCTime(void); +Time stat_getElapsedTime(void); /* Only exported for Papi.c */ void statsPrintf( char *s, ... ) diff --git a/rts/Task.c b/rts/Task.c index 9e8214899c..d72d8a9085 100644 --- a/rts/Task.c +++ b/rts/Task.c @@ -165,7 +165,7 @@ static Task* newTask (rtsBool worker) { #if defined(THREADED_RTS) - Ticks currentElapsedTime, currentUserTime; + Time currentElapsedTime, currentUserTime; #endif Task *task; @@ -329,7 +329,7 @@ void taskTimeStamp (Task *task USED_IF_THREADS) { #if defined(THREADED_RTS) - Ticks currentElapsedTime, currentUserTime; + Time currentElapsedTime, currentUserTime; currentUserTime = getThreadCPUTime(); currentElapsedTime = getProcessElapsedTime(); @@ -347,7 +347,7 @@ taskTimeStamp (Task *task USED_IF_THREADS) } void -taskDoneGC (Task *task, Ticks cpu_time, Ticks elapsed_time) +taskDoneGC (Task *task, Time cpu_time, Time elapsed_time) { task->gc_time += cpu_time; task->gc_etime += elapsed_time; diff --git a/rts/Task.h b/rts/Task.h index 4000a045d4..386e003d28 100644 --- a/rts/Task.h +++ b/rts/Task.h @@ -149,12 +149,12 @@ typedef struct Task_ { // really want separate stats for each call in a nested chain of // foreign->haskell->foreign->haskell calls, but we'll get a // separate Task for each of the haskell calls. - Ticks elapsedtimestart; - Ticks muttimestart; - Ticks mut_time; - Ticks mut_etime; - Ticks gc_time; - Ticks gc_etime; + Time elapsedtimestart; + Time muttimestart; + Time mut_time; + Time mut_etime; + Time gc_time; + Time gc_etime; // Links tasks on the returning_tasks queue of a Capability, and // on spare_workers. @@ -208,7 +208,7 @@ void workerTaskStop (Task *task); void taskTimeStamp (Task *task); // The current Task has finished a GC, record the amount of time spent. -void taskDoneGC (Task *task, Ticks cpu_time, Ticks elapsed_time); +void taskDoneGC (Task *task, Time cpu_time, Time elapsed_time); // Put the task back on the free list, mark it stopped. Used by // forkProcess(). diff --git a/rts/Threads.c b/rts/Threads.c index 3e1c5cff0b..7e660d63f6 100644 --- a/rts/Threads.c +++ b/rts/Threads.c @@ -701,20 +701,22 @@ void printThreadBlockage(StgTSO *tso) { switch (tso->why_blocked) { +#if defined(mingw32_HOST_OS) + case BlockedOnDoProc: + debugBelch("is blocked on proc (request: %u)", tso->block_info.async_result->reqID); + break; +#endif +#if !defined(THREADED_RTS) case BlockedOnRead: debugBelch("is blocked on read from fd %d", (int)(tso->block_info.fd)); break; case BlockedOnWrite: debugBelch("is blocked on write to fd %d", (int)(tso->block_info.fd)); break; -#if defined(mingw32_HOST_OS) - case BlockedOnDoProc: - debugBelch("is blocked on proc (request: %u)", tso->block_info.async_result->reqID); - break; -#endif case BlockedOnDelay: debugBelch("is blocked until %ld", (long)(tso->block_info.target)); break; +#endif case BlockedOnMVar: debugBelch("is blocked on an MVar @ %p", tso->block_info.closure); break; diff --git a/rts/Ticker.h b/rts/Ticker.h index 5804501da5..685a79e5d2 100644 --- a/rts/Ticker.h +++ b/rts/Ticker.h @@ -13,7 +13,7 @@ typedef void (*TickProc)(int); -void initTicker (nat ms, TickProc handle_tick); +void initTicker (Time interval, TickProc handle_tick); void startTicker (void); void stopTicker (void); void exitTicker (rtsBool wait); diff --git a/rts/eventlog/EventLog.c b/rts/eventlog/EventLog.c index 2e2209d2d3..88fc64010d 100644 --- a/rts/eventlog/EventLog.c +++ b/rts/eventlog/EventLog.c @@ -149,7 +149,7 @@ static inline void postBuf(EventsBuf *eb, StgWord8 *buf, nat size) } static inline StgWord64 time_ns(void) -{ return stat_getElapsedTime() * (1000000000LL/TICKS_PER_SECOND); } +{ return TimeToNS(stat_getElapsedTime()); } static inline void postEventTypeNum(EventsBuf *eb, EventTypeNum etNum) { postWord16(eb, etNum); } diff --git a/rts/posix/GetTime.c b/rts/posix/GetTime.c index eab7177fe5..c31b319af4 100644 --- a/rts/posix/GetTime.c +++ b/rts/posix/GetTime.c @@ -44,7 +44,7 @@ // we'll implement getProcessCPUTime() and getProcessElapsedTime() // separately, using getrusage() and gettimeofday() respectively -Ticks getProcessCPUTime(void) +Time getProcessCPUTime(void) { #if !defined(BE_CONSERVATIVE) && defined(HAVE_CLOCK_GETTIME) && defined (_SC_CPUTIME) && defined(CLOCK_PROCESS_CPUTIME_ID) && defined(HAVE_SYSCONF) static int checked_sysconf = 0; @@ -59,8 +59,7 @@ Ticks getProcessCPUTime(void) int res; res = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts); if (res == 0) { - return ((Ticks)ts.tv_sec * TICKS_PER_SECOND + - ((Ticks)ts.tv_nsec * TICKS_PER_SECOND) / 1000000000); + return SecondsToTime(ts.tv_sec) + NSToTime(ts.tv_nsec); } } #endif @@ -69,20 +68,18 @@ Ticks getProcessCPUTime(void) { struct rusage t; getrusage(RUSAGE_SELF, &t); - return ((Ticks)t.ru_utime.tv_sec * TICKS_PER_SECOND + - ((Ticks)t.ru_utime.tv_usec * TICKS_PER_SECOND)/1000000); + return SecondsToTime(t.ru_utime.tv_sec) + USToTime(t.ru_utime.tv_usec); } } -Ticks getProcessElapsedTime(void) +Time getProcessElapsedTime(void) { struct timeval tv; gettimeofday(&tv, (struct timezone *) NULL); - return ((Ticks)tv.tv_sec * TICKS_PER_SECOND + - ((Ticks)tv.tv_usec * TICKS_PER_SECOND)/1000000); + return SecondsToTime(tv.tv_sec) + USToTime(tv.tv_usec); } -void getProcessTimes(Ticks *user, Ticks *elapsed) +void getProcessTimes(Time *user, Time *elapsed) { *user = getProcessCPUTime(); *elapsed = getProcessElapsedTime(); @@ -92,29 +89,29 @@ void getProcessTimes(Ticks *user, Ticks *elapsed) // we'll use the old times() API. -Ticks getProcessCPUTime(void) +Time getProcessCPUTime(void) { #if !defined(THREADED_RTS) && USE_PAPI long long usec; if ((usec = PAPI_get_virt_usec()) < 0) { barf("PAPI_get_virt_usec: %lld", usec); } - return ((usec * TICKS_PER_SECOND) / 1000000); + return USToTime(usec); #else - Ticks user, elapsed; + Time user, elapsed; getProcessTimes(&user,&elapsed); return user; #endif } -Ticks getProcessElapsedTime(void) +Time getProcessElapsedTime(void) { - Ticks user, elapsed; + Time user, elapsed; getProcessTimes(&user,&elapsed); return elapsed; } -void getProcessTimes(Ticks *user, Ticks *elapsed) +void getProcessTimes(Time *user, Time *elapsed) { static nat ClockFreq = 0; @@ -141,20 +138,20 @@ void getProcessTimes(Ticks *user, Ticks *elapsed) struct tms t; clock_t r = times(&t); - *user = (((Ticks)t.tms_utime * TICKS_PER_SECOND) / ClockFreq); - *elapsed = (((Ticks)r * TICKS_PER_SECOND) / ClockFreq); + *user = SecondsToTime(t.tms_utime) / ClockFreq; + *elapsed = SecondsToTime(r) / ClockFreq; } #endif // HAVE_TIMES -Ticks getThreadCPUTime(void) +Time getThreadCPUTime(void) { #if USE_PAPI long long usec; if ((usec = PAPI_get_virt_usec()) < 0) { barf("PAPI_get_virt_usec: %lld", usec); } - return ((usec * TICKS_PER_SECOND) / 1000000); + return USToTime(usec); #elif !defined(BE_CONSERVATIVE) && defined(HAVE_CLOCK_GETTIME) && defined (_SC_THREAD_CPUTIME) && defined(CLOCK_THREAD_CPUTIME_ID) && defined(HAVE_SYSCONF) { @@ -172,8 +169,7 @@ Ticks getThreadCPUTime(void) int res; res = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts); if (res == 0) { - return ((Ticks)ts.tv_sec * TICKS_PER_SECOND + - ((Ticks)ts.tv_nsec * TICKS_PER_SECOND) / 1000000000); + return SecondsToTime(ts.tv_sec) + NSToTime(ts.tv_nsec); } } } diff --git a/rts/posix/Itimer.c b/rts/posix/Itimer.c index e46bb12546..ece54910c2 100644 --- a/rts/posix/Itimer.c +++ b/rts/posix/Itimer.c @@ -44,68 +44,52 @@ #include <string.h> -/* Major bogosity: - * - * In the threaded RTS, we can't set the virtual timer because the - * thread which has the virtual timer might be sitting waiting for a - * capability, and the virtual timer only ticks in CPU time. +/* + * We use a realtime timer by default. I found this much more + * reliable than a CPU timer: * - * So, possible solutions: + * Experiments with different frequences: using + * CLOCK_REALTIME/CLOCK_MONOTONIC on Linux 2.6.32, + * 1000us has <1% impact on runtime + * 100us has ~2% impact on runtime + * 10us has ~40% impact on runtime * - * (1) tick in realtime. Not very good, because this ticker is used for - * profiling, and this will give us unreliable time profiling - * results. + * using CLOCK_PROCESS_CPUTIME_ID on Linux 2.6.32, + * I cannot get it to tick faster than 10ms (10000us) + * which isn't great for profiling. * - * (2) save/restore the virtual timer around excursions into STG land. - * Sounds great, but I tried it and the resolution of the virtual timer - * isn't good enough (on Linux) - most of our excursions fall - * within the timer's resolution and we never make any progress. - * - * (3) have a virtual timer in every OS thread. Might be reasonable, - * because most of the time there is only ever one of these - * threads running, so it approximates a single virtual timer. - * But still quite bogus (and I got crashes when I tried this). + * In the threaded RTS, we can't tick in CPU time because the thread + * which has the virtual timer might be idle, so the tick would never + * fire. Therfore we used to tick in realtime in the threaded RTS and + * in CPU time otherwise, but now we always tick in realtime, for + * several reasons: * - * For now, we're using (1), but this needs a better solution. --SDM + * - resolution (see above) + * - consistency (-threaded is the same as normal) + * - more consistency: Windows only has a realtime timer + * + * Note we want to use CLOCK_MONOTONIC rather than CLOCK_REALTIME, + * because the latter may jump around (NTP adjustments, leap seconds + * etc.). */ #if defined(USE_TIMER_CREATE) - # define ITIMER_SIGNAL SIGVTALRM -# ifdef THREADED_RTS -# define TIMER_FLAVOUR CLOCK_REALTIME -# else -# define TIMER_FLAVOUR CLOCK_PROCESS_CPUTIME_ID -# endif - #elif defined(HAVE_SETITIMER) - -# if defined(THREADED_RTS) || !defined(HAVE_SETITIMER_VIRTUAL) -// Oh dear, we have to use SIGALRM if there's no timer_create and -// we're using the THREADED_RTS. This leads to problems, see bug #850. -// We also use it if we don't have a virtual timer (trac #2883). -# define ITIMER_SIGNAL SIGALRM -# define ITIMER_FLAVOUR ITIMER_REAL -# else -# define ITIMER_SIGNAL SIGVTALRM -# define ITIMER_FLAVOUR ITIMER_VIRTUAL -# endif - +# define ITIMER_SIGNAL SIGALRM + // Using SIGALRM can leads to problems, see #850. But we have no + // option if timer_create() is not available. #else - # error No way to set an interval timer. - #endif #if defined(USE_TIMER_CREATE) static timer_t timer; #endif -static nat itimer_interval = 50; +static Time itimer_interval = DEFAULT_TICK_INTERVAL; -static -void -install_vtalrm_handler(TickProc handle_tick) +static void install_vtalrm_handler(TickProc handle_tick) { struct sigaction action; @@ -132,32 +116,35 @@ install_vtalrm_handler(TickProc handle_tick) } void -initTicker (nat ms, TickProc handle_tick) +initTicker (Time interval, TickProc handle_tick) { - install_vtalrm_handler(handle_tick); - -#if !defined(THREADED_RTS) - timestamp = getourtimeofday(); -#endif - - itimer_interval = ms; + itimer_interval = interval; #if defined(USE_TIMER_CREATE) { struct sigevent ev; + clockid_t clock; - // Keep programs like valgrind happy + // Keep programs like valgrind happy memset(&ev, 0, sizeof(ev)); ev.sigev_notify = SIGEV_SIGNAL; ev.sigev_signo = ITIMER_SIGNAL; - if (timer_create(TIMER_FLAVOUR, &ev, &timer) != 0) { +#if defined(CLOCK_MONOTONIC) + clock = CLOCK_MONOTONIC; +#else + clock = CLOCK_REALTIME; +#endif + + if (timer_create(clock, &ev, &timer) != 0) { sysErrorBelch("timer_create"); stg_exit(EXIT_FAILURE); } } #endif + + install_vtalrm_handler(handle_tick); } void @@ -167,8 +154,8 @@ startTicker(void) { struct itimerspec it; - it.it_value.tv_sec = itimer_interval / 1000; - it.it_value.tv_nsec = (itimer_interval % 1000) * 1000000; + it.it_value.tv_sec = TimeToSeconds(itimer_interval); + it.it_value.tv_nsec = TimeToNS(itimer_interval); it.it_interval = it.it_value; if (timer_settime(timer, 0, &it, NULL) != 0) { @@ -180,11 +167,11 @@ startTicker(void) { struct itimerval it; - it.it_value.tv_sec = itimer_interval / 1000; - it.it_value.tv_usec = (itimer_interval % 1000) * 1000; + it.it_value.tv_sec = TimeToSeconds(itimer_interval); + it.it_value.tv_usec = TimeToUS(itimer_interval); it.it_interval = it.it_value; - if (setitimer(ITIMER_FLAVOUR, &it, NULL) != 0) { + if (setitimer(ITIMER_REAL, &it, NULL) != 0) { sysErrorBelch("setitimer"); stg_exit(EXIT_FAILURE); } @@ -213,7 +200,7 @@ stopTicker(void) it.it_value.tv_usec = 0; it.it_interval = it.it_value; - if (setitimer(ITIMER_FLAVOUR, &it, NULL) != 0) { + if (setitimer(ITIMER_REAL, &it, NULL) != 0) { sysErrorBelch("setitimer"); stg_exit(EXIT_FAILURE); } @@ -229,23 +216,6 @@ exitTicker (rtsBool wait STG_UNUSED) #endif } -/* gettimeofday() takes around 1us on our 500MHz PIII. Since we're - * only calling it 50 times/s, it shouldn't have any great impact. - */ -lnat -getourtimeofday(void) -{ - struct timeval tv; - nat interval; - interval = RtsFlags.MiscFlags.tickInterval; - if (interval == 0) { interval = 50; } - gettimeofday(&tv, (struct timezone *) NULL); - - // Avoid overflow when we multiply seconds by 1000. See #2848 - return (lnat)((StgWord64)tv.tv_sec * 1000 / interval + - (StgWord64)tv.tv_usec / (interval * 1000)); -} - int rtsTimerSignal(void) { diff --git a/rts/posix/Itimer.h b/rts/posix/Itimer.h index b67c8c442e..7996da7c94 100644 --- a/rts/posix/Itimer.h +++ b/rts/posix/Itimer.h @@ -9,6 +9,4 @@ #ifndef ITIMER_H #define ITIMER_H -RTS_PRIVATE lnat getourtimeofday ( void ); - #endif /* ITIMER_H */ diff --git a/rts/posix/Select.c b/rts/posix/Select.c index 3c87fbdc70..45737ce0cc 100644 --- a/rts/posix/Select.c +++ b/rts/posix/Select.c @@ -16,6 +16,7 @@ #include "Capability.h" #include "Select.h" #include "AwaitEvent.h" +#include "Stats.h" # ifdef HAVE_SYS_SELECT_H # include <sys/select.h> @@ -37,13 +38,24 @@ #endif #if !defined(THREADED_RTS) -/* last timestamp */ -lnat timestamp = 0; /* * The threaded RTS uses an IO-manager thread in Haskell instead (see GHC.Conc) */ +#define LowResTimeToTime(t) (USToTime((t) * 10000)) + +/* + * Return the time since the program started, in LowResTime, + * rounded down. + * + * This is only used by posix/Select.c. It should probably go away. + */ +LowResTime getourtimeofday(void) +{ + return TimeToUS(stat_getElapsedTime()) / 10000; +} + /* There's a clever trick here to avoid problems when the time wraps * around. Since our maximum delay is smaller than 31 bits of ticks * (it's actually 31 bits of microseconds), we can safely check @@ -55,15 +67,14 @@ lnat timestamp = 0; * if this is true, then our time has expired. * (idea due to Andy Gill). */ -static rtsBool -wakeUpSleepingThreads(lnat ticks) +static rtsBool wakeUpSleepingThreads (LowResTime now) { StgTSO *tso; rtsBool flag = rtsFalse; while (sleeping_queue != END_TSO_QUEUE) { tso = sleeping_queue; - if (((long)ticks - (long)tso->block_info.target) < 0) { + if (((long)now - (long)tso->block_info.target) < 0) { break; } sleeping_queue = tso->_link; @@ -108,7 +119,8 @@ awaitEvent(rtsBool wait) rtsBool select_succeeded = rtsTrue; rtsBool unblock_all = rtsFalse; struct timeval tv; - lnat min, ticks; + Time min; + LowResTime now; tv.tv_sec = 0; tv.tv_usec = 0; @@ -128,18 +140,17 @@ awaitEvent(rtsBool wait) */ do { - ticks = timestamp = getourtimeofday(); - if (wakeUpSleepingThreads(ticks)) { + now = getourtimeofday(); + if (wakeUpSleepingThreads(now)) { return; } if (!wait) { min = 0; } else if (sleeping_queue != END_TSO_QUEUE) { - min = (sleeping_queue->block_info.target - ticks) - * RtsFlags.MiscFlags.tickInterval * 1000; + min = LowResTimeToTime(sleeping_queue->block_info.target - now); } else { - min = 0x7ffffff; + min = (Time)-1; } /* @@ -185,8 +196,8 @@ awaitEvent(rtsBool wait) /* Check for any interesting events */ - tv.tv_sec = min / 1000000; - tv.tv_usec = min % 1000000; + tv.tv_sec = TimeToSeconds(min); + tv.tv_usec = TimeToUS(min) % 1000000; while ((numFound = select(maxfd+1, &rfd, &wfd, NULL, &tv)) < 0) { if (errno != EINTR) { @@ -236,7 +247,7 @@ awaitEvent(rtsBool wait) /* check for threads that need waking up */ - wakeUpSleepingThreads(getourtimeofday()); + wakeUpSleepingThreads(getourtimeofday()); /* If new runnable threads have arrived, stop waiting for * I/O and run them. diff --git a/rts/posix/Select.h b/rts/posix/Select.h index e92a4bc889..15fa00ac66 100644 --- a/rts/posix/Select.h +++ b/rts/posix/Select.h @@ -9,9 +9,9 @@ #ifndef POSIX_SELECT_H #define POSIX_SELECT_H -#if !defined(THREADED_RTS) -/* In Select.c */ -extern lnat timestamp; -#endif +// An absolute time value in units of 10ms. +typedef StgWord LowResTime; + +RTS_PRIVATE LowResTime getourtimeofday ( void ); #endif /* POSIX_SELECT_H */ diff --git a/rts/sm/GCThread.h b/rts/sm/GCThread.h index e42a3a1239..b4f325631f 100644 --- a/rts/sm/GCThread.h +++ b/rts/sm/GCThread.h @@ -181,9 +181,9 @@ typedef struct gc_thread_ { lnat no_work; lnat scav_find_work; - Ticks gc_start_cpu; // process CPU time - Ticks gc_start_elapsed; // process elapsed time - Ticks gc_start_thread_cpu; // thread CPU time + Time gc_start_cpu; // process CPU time + Time gc_start_elapsed; // process elapsed time + Time gc_start_thread_cpu; // thread CPU time lnat gc_start_faults; // ------------------- diff --git a/rts/win32/GetTime.c b/rts/win32/GetTime.c index 13fb5ab22d..9a322bf0a5 100644 --- a/rts/win32/GetTime.c +++ b/rts/win32/GetTime.c @@ -15,26 +15,26 @@ # include <time.h> #endif -#define HNS_PER_SEC 10000000LL /* FILETIMES are in units of 100ns */ /* Convert FILETIMEs into secs */ -static INLINE_ME Ticks -fileTimeToTicks(FILETIME ft) +static INLINE_ME Time +fileTimeToRtsTime(FILETIME ft) { - Ticks t; - t = ((Ticks)ft.dwHighDateTime << 32) | ft.dwLowDateTime; - t = (t * TICKS_PER_SECOND) / HNS_PER_SEC; + Time t; + t = ((Time)ft.dwHighDateTime << 32) | ft.dwLowDateTime; + t = NSToTime(t * 100); + /* FILETIMES are in units of 100ns */ return t; } void -getProcessTimes(Ticks *user, Ticks *elapsed) +getProcessTimes(Time *user, Time *elapsed) { *user = getProcessCPUTime(); *elapsed = getProcessElapsedTime(); } -Ticks +Time getProcessCPUTime(void) { FILETIME creationTime, exitTime, userTime, kernelTime = {0,0}; @@ -44,14 +44,14 @@ getProcessCPUTime(void) return 0; } - return fileTimeToTicks(userTime); + return fileTimeToRtsTime(userTime); } // getProcessElapsedTime relies on QueryPerformanceFrequency // which should be available on any Windows computer thay you // would want to run Haskell on. Satnam Singh, 5 July 2010. -Ticks +Time getProcessElapsedTime(void) { // frequency represents the number of ticks per second @@ -73,13 +73,14 @@ getProcessElapsedTime(void) // Get the tick count. QueryPerformanceCounter(&system_time) ; - // Return the tick count as a millisecond value. + // Return the tick count as a Time value. // Using double to compute the intermediate value, because a 64-bit - // int would overflow when multiplied by TICKS_PER_SECOND in about 81 days. - return (Ticks)((TICKS_PER_SECOND * (double)system_time.QuadPart) / (double)frequency.QuadPart) ; + // int would overflow when multiplied by TICK_RESOLUTION in about 81 days. + return fsecondsToTime((double)system_time.QuadPart / + (double)frequency.QuadPart) ; } -Ticks +Time getThreadCPUTime(void) { FILETIME creationTime, exitTime, userTime, kernelTime = {0,0}; @@ -89,7 +90,7 @@ getThreadCPUTime(void) return 0; } - return fileTimeToTicks(userTime); + return fileTimeToRtsTime(userTime); } void diff --git a/rts/win32/Ticker.c b/rts/win32/Ticker.c index 1c45482651..d54fa4680f 100644 --- a/rts/win32/Ticker.c +++ b/rts/win32/Ticker.c @@ -2,166 +2,80 @@ * RTS periodic timers. * */ +#define _WIN32_WINNT 0x0500 + #include "Rts.h" #include "Ticker.h" #include <windows.h> #include <stdio.h> #include <process.h> -/* - * Provide a timer service for the RTS, periodically - * notifying it that a number of 'ticks' has passed. - * - */ - -/* To signal pause or shutdown of the timer service, we use a local - * event which the timer thread listens to. - */ -static HANDLE hStopEvent = INVALID_HANDLE_VALUE; -static HANDLE tickThread = INVALID_HANDLE_VALUE; - -static TickProc tickProc = NULL; +static TickProc tick_proc = NULL; +static HANDLE timer_queue = NULL; +static HANDLE timer = NULL; +static Time tick_interval = 0; -static enum { TickerGo, TickerPause, TickerExit } ticker_state; - -/* - * Ticking is done by a separate thread which periodically - * wakes up to handle a tick. - * - * This is the portable way of providing a timer service under - * Win32; features like waitable timers or timer queues are only - * supported by a subset of the Win32 platforms (notably not - * under Win9x.) - * - */ -static -unsigned -WINAPI -TimerProc(PVOID param) +static VOID CALLBACK tick_callback( + PVOID lpParameter STG_UNUSED, + BOOLEAN TimerOrWaitFired STG_UNUSED + ) { - int ms = (int)param; - DWORD waitRes = 0; - - /* interpret a < 0 timeout period as 'instantaneous' */ - if (ms < 0) ms = 0; - - while (1) { - switch (ticker_state) { - case TickerGo: - waitRes = WaitForSingleObject(hStopEvent, ms); - break; - case TickerPause: - waitRes = WaitForSingleObject(hStopEvent, INFINITE); - break; - case TickerExit: - /* event has become signalled */ - tickProc = NULL; - CloseHandle(hStopEvent); - hStopEvent = INVALID_HANDLE_VALUE; - return 0; - } - - switch (waitRes) { - case WAIT_OBJECT_0: - /* event has become signalled */ - ResetEvent(hStopEvent); - continue; - case WAIT_TIMEOUT: - /* tick */ - tickProc(0); - break; - case WAIT_FAILED: - sysErrorBelch("TimerProc: WaitForSingleObject failed"); - break; - default: - errorBelch("TimerProc: unexpected result %lu\n", waitRes); - break; - } - } - return 0; + tick_proc(0); } +// We use the CreateTimerQueue() API which has been around since +// Windows 2000. Apparently it gives bad results before Windows 7, +// though: http://www.virtualdub.org/blog/pivot/entry.php?id=272 +// +// Even with the improvements in Windows 7, this timer isn't going to +// be very useful for profiling with a max usable resolution of +// 15ms. Unfortunately we don't have anything better. void -initTicker (nat ms, TickProc handle_tick) +initTicker (Time interval, TickProc handle_tick) { - unsigned threadId; - /* 'hStopEvent' is a manual-reset event that's signalled upon - * shutdown of timer service (=> timer thread.) - */ - hStopEvent = CreateEvent ( NULL, - TRUE, - FALSE, - NULL); - if (hStopEvent == INVALID_HANDLE_VALUE) { - sysErrorBelch("CreateEvent"); - stg_exit(EXIT_FAILURE); - } - tickProc = handle_tick; - ticker_state = TickerPause; - tickThread = (HANDLE)(long)_beginthreadex( NULL, - 0, - TimerProc, - (LPVOID)ms, - 0, - &threadId); + tick_interval = interval; + tick_proc = handle_tick; - if (tickThread == 0) { - sysErrorBelch("_beginthreadex"); - stg_exit(EXIT_FAILURE); - } + timer_queue = CreateTimerQueue(); + if (timer_queue == NULL) { + sysErrorBelch("CreateTimerQueue"); + stg_exit(EXIT_FAILURE); + } } void startTicker(void) { - ticker_state = TickerGo; - SetEvent(hStopEvent); + BOOL r; + + r = CreateTimerQueueTimer(&timer, + timer_queue, + tick_callback, + 0, + 0, + TimeToUS(tick_interval) / 1000, // ms + WT_EXECUTEINTIMERTHREAD); + if (r == 0) { + sysErrorBelch("CreateTimerQueueTimer"); + stg_exit(EXIT_FAILURE); + } } void stopTicker(void) { - ticker_state = TickerPause; - SetEvent(hStopEvent); + if (timer_queue != NULL && timer != NULL) { + DeleteTimerQueueTimer(timer_queue, timer, NULL); + timer = NULL; + } } void exitTicker (rtsBool wait) { - // We must wait for the ticker thread to terminate, since if we - // are in a DLL that is about to be unloaded, the ticker thread - // cannot be allowed to return to a missing DLL. - - if (hStopEvent != INVALID_HANDLE_VALUE && - tickThread != INVALID_HANDLE_VALUE) { - DWORD exitCode; - ticker_state = TickerExit; - SetEvent(hStopEvent); - while (wait) { - // See #3748: - // - // when the RTS is compiled into a DLL (wait==rtsTrue), - // the ticker thread must stop before we exit, or chaos - // will ensue. We can't kill it, because it may be - // holding a lock. - // - // When not compiled into a DLL, we wait for - // the thread out of courtesy, but give up after 200ms if - // it still hasn't stopped. - WaitForSingleObject(tickThread, 200); - if (!GetExitCodeThread(tickThread, &exitCode)) { - return; - } - CloseHandle(tickThread); - if (exitCode != STILL_ACTIVE) { - tickThread = INVALID_HANDLE_VALUE; - if ( hStopEvent != INVALID_HANDLE_VALUE ) { - CloseHandle(hStopEvent); - hStopEvent = INVALID_HANDLE_VALUE; - } - return; - } - } + if (timer_queue != NULL) { + DeleteTimerQueueEx(timer_queue, wait ? INVALID_HANDLE_VALUE : NULL); + timer_queue = NULL; } } diff --git a/rules/build-prog.mk b/rules/build-prog.mk index 99093d3fee..1f43169ce3 100644 --- a/rules/build-prog.mk +++ b/rules/build-prog.mk @@ -117,8 +117,12 @@ ifeq "$3" "0" # worry about where the RTS header files are $(call c-suffix-rules,$1,$2,v,YES) else +ifeq "$$($1_$2_UseGhcForCC)" "YES" +$(call c-suffix-rules,$1,$2,v,YES) +else $(call c-suffix-rules,$1,$2,v,NO) endif +endif $(call hs-suffix-rules,$1,$2,v) $$(foreach dir,$$($1_$2_HS_SRC_DIRS),\ |