diff options
author | Simon Marlow <marlowsd@gmail.com> | 2011-11-25 13:11:39 +0000 |
---|---|---|
committer | Simon Marlow <marlowsd@gmail.com> | 2011-11-25 16:11:36 +0000 |
commit | 6b1098511aaabd2c9503ee7be6da1944466f9cb4 (patch) | |
tree | 83b3001603c7e6a5cfb3ac04adbb99c40504942e | |
parent | 18aae18503442276e14a47eabf4786bc7210662e (diff) | |
download | haskell-6b1098511aaabd2c9503ee7be6da1944466f9cb4.tar.gz |
Time handling overhaul
Terminology cleanup: the type "Ticks" has been renamed "Time", which
is an StgWord64 in units of TIME_RESOLUTION (currently nanoseconds).
The terminology "tick" is now used consistently to mean the interval
between timer signals.
The ticker now always ticks in realtime (actually CLOCK_MONOTONIC if
we have it). Before it used CPU time in the non-threaded RTS and
realtime in the threaded RTS, but I've discovered that the CPU timer
has terrible resolution (at least on Linux) and isn't much use for
profiling. So now we always use realtime. This should also fix
The default tick interval is now 10ms, except when profiling where we
drop it to 1ms. This gives more accurate profiles without affecting
runtime too much (<1%).
Lots of cleanups - the resolution of Time is now in one place
only (Rts.h) rather than having calculations that depend on the
resolution scattered all over the RTS. I hope I found them all.
-rw-r--r-- | ghc/ghc.mk | 7 | ||||
-rw-r--r-- | ghc/hschooks.c | 4 | ||||
-rw-r--r-- | includes/Rts.h | 30 | ||||
-rw-r--r-- | includes/rts/Flags.h | 21 | ||||
-rw-r--r-- | includes/rts/storage/TSO.h | 6 | ||||
-rw-r--r-- | rts/GetTime.h | 14 | ||||
-rw-r--r-- | rts/PrimOps.cmm | 13 | ||||
-rw-r--r-- | rts/ProfHeap.c | 3 | ||||
-rw-r--r-- | rts/ProfHeap.h | 4 | ||||
-rw-r--r-- | rts/Profiling.c | 8 | ||||
-rw-r--r-- | rts/Proftimer.c | 6 | ||||
-rw-r--r-- | rts/RtsFlags.c | 76 | ||||
-rw-r--r-- | rts/Schedule.c | 2 | ||||
-rw-r--r-- | rts/Stats.c | 162 | ||||
-rw-r--r-- | rts/Stats.h | 6 | ||||
-rw-r--r-- | rts/Task.c | 6 | ||||
-rw-r--r-- | rts/Task.h | 14 | ||||
-rw-r--r-- | rts/Threads.c | 12 | ||||
-rw-r--r-- | rts/Ticker.h | 2 | ||||
-rw-r--r-- | rts/eventlog/EventLog.c | 2 | ||||
-rw-r--r-- | rts/posix/GetTime.c | 38 | ||||
-rw-r--r-- | rts/posix/Itimer.c | 124 | ||||
-rw-r--r-- | rts/posix/Itimer.h | 2 | ||||
-rw-r--r-- | rts/posix/Select.c | 39 | ||||
-rw-r--r-- | rts/posix/Select.h | 8 | ||||
-rw-r--r-- | rts/sm/GCThread.h | 6 | ||||
-rw-r--r-- | rts/win32/GetTime.c | 31 | ||||
-rw-r--r-- | rts/win32/Ticker.c | 178 | ||||
-rw-r--r-- | rules/build-prog.mk | 4 |
29 files changed, 381 insertions, 447 deletions
diff --git a/ghc/ghc.mk b/ghc/ghc.mk index 38b3016b30..2af90bed28 100644 --- a/ghc/ghc.mk +++ b/ghc/ghc.mk @@ -26,8 +26,11 @@ ghc_stage1_MORE_HC_OPTS = $(GhcStage1HcOpts) ghc_stage2_MORE_HC_OPTS = $(GhcStage2HcOpts) ghc_stage3_MORE_HC_OPTS = $(GhcStage3HcOpts) -ghc_stage2_CC_OPTS = -Iincludes -ghc_stage3_CC_OPTS = -Iincludes +# We need __GLASGOW_HASKELL__ in hschooks.c, so we have to build C +# sources with GHC: +ghc_stage1_UseGhcForCC = YES +ghc_stage2_UseGhcForCC = YES +ghc_stage3_UseGhcForCC = YES ghc_stage1_C_FILES_NODEPS = ghc/hschooks.c diff --git a/ghc/hschooks.c b/ghc/hschooks.c index 18679281e0..037d4e18be 100644 --- a/ghc/hschooks.c +++ b/ghc/hschooks.c @@ -23,7 +23,11 @@ defaultsHook (void) // See #3408: the default idle GC time of 0.3s is too short on // Windows where we receive console events once per second or so. +#if __GLASGOW_HASKELL__ >= 703 + RtsFlags.GcFlags.idleGCDelayTime = SecondsToTime(5); +#else RtsFlags.GcFlags.idleGCDelayTime = 5*1000; +#endif } void diff --git a/includes/Rts.h b/includes/Rts.h index 5caba59dbe..45c09f8fb7 100644 --- a/includes/Rts.h +++ b/includes/Rts.h @@ -155,6 +155,36 @@ void _assertFail(const char *filename, unsigned int linenum) #endif /* ----------------------------------------------------------------------------- + Time values in the RTS + -------------------------------------------------------------------------- */ + +// For most time values in the RTS we use a fixed resolution of nanoseconds, +// normalising the time we get from platform-dependent APIs to this +// resolution. +#define TIME_RESOLUTION 1000000000 +typedef StgInt64 Time; + +#if TIME_RESOLUTION == 1000000000 +// I'm being lazy, but it's awkward to define fully general versions of these +#define TimeToUS(t) (t / 1000) +#define TimeToNS(t) (t) +#define USToTime(t) ((Time)(t) * 1000) +#define NSToTime(t) ((Time)(t)) +#else +#error Fix TimeToNS(), TimeToUS() etc. +#endif + +#define SecondsToTime(t) ((Time)(t) * TIME_RESOLUTION) +#define TimeToSeconds(t) ((t) / TIME_RESOLUTION) + +// Use instead of SecondsToTime() when we have a floating-point +// seconds value, to avoid truncating it. +INLINE_HEADER Time fsecondsToTime (double t) +{ + return (Time)(t * TIME_RESOLUTION); +} + +/* ----------------------------------------------------------------------------- Include everything STG-ish -------------------------------------------------------------------------- */ diff --git a/includes/rts/Flags.h b/includes/rts/Flags.h index 2d1516f586..439b261fd8 100644 --- a/includes/rts/Flags.h +++ b/includes/rts/Flags.h @@ -52,7 +52,7 @@ struct GC_FLAGS { rtsBool ringBell; rtsBool frontpanel; - int idleGCDelayTime; /* in milliseconds */ + Time idleGCDelayTime; /* units: TIME_RESOLUTION */ StgWord heapBase; /* address to ask the OS for memory */ }; @@ -99,8 +99,8 @@ struct PROFILING_FLAGS { # define HEAP_BY_CLOSURE_TYPE 8 - nat profileInterval; /* delta between samples (in ms) */ - nat profileIntervalTicks; /* delta between samples (in 'ticks') */ + Time heapProfileInterval; /* time between samples */ + nat heapProfileIntervalTicks; /* ticks between samples (derived) */ rtsBool includeTSOs; @@ -135,12 +135,21 @@ struct TRACE_FLAGS { }; struct CONCURRENT_FLAGS { - int ctxtSwitchTime; /* in milliseconds */ - int ctxtSwitchTicks; /* derived */ + Time ctxtSwitchTime; /* units: TIME_RESOLUTION */ + int ctxtSwitchTicks; /* derived */ }; +/* + * The tickInterval is the time interval between "ticks", ie. + * timer signals (see Timer.{c,h}). It is the frequency at + * which we sample CCCS for profiling. + * + * It is changed by the +RTS -V<secs> flag. + */ +#define DEFAULT_TICK_INTERVAL USToTime(10000) + struct MISC_FLAGS { - int tickInterval; /* in milliseconds */ + Time tickInterval; /* units: TIME_RESOLUTION */ rtsBool install_signal_handlers; rtsBool machineReadable; StgWord linkerMemBase; /* address to ask the OS for memory diff --git a/includes/rts/storage/TSO.h b/includes/rts/storage/TSO.h index 04e673fb12..20c6ebf4f2 100644 --- a/includes/rts/storage/TSO.h +++ b/includes/rts/storage/TSO.h @@ -54,7 +54,13 @@ typedef union { #if defined(mingw32_HOST_OS) StgAsyncIOResult *async_result; #endif +#if !defined(THREADED_RTS) StgWord target; + // Only for the non-threaded RTS: the target time for a thread + // blocked in threadDelay, in units of 10ms. This is a + // compromise: we don't want to take up much space in the TSO. If + // you want better resolution for threadDelay, use -threaded. +#endif } StgTSOBlockInfo; diff --git a/rts/GetTime.h b/rts/GetTime.h index b8d402db7c..86c5511df9 100644 --- a/rts/GetTime.h +++ b/rts/GetTime.h @@ -11,16 +11,10 @@ #include "BeginPrivate.h" -// We'll use a fixed resolution of usec for now. The machine -// dependent implementation may have a different resolution, but we'll -// normalise to this for the machine independent interface. -#define TICKS_PER_SECOND 1000000 -typedef StgInt64 Ticks; - -Ticks getProcessCPUTime (void); -Ticks getThreadCPUTime (void); -Ticks getProcessElapsedTime (void); -void getProcessTimes (Ticks *user, Ticks *elapsed); +Time getProcessCPUTime (void); +Time getThreadCPUTime (void); +Time getProcessElapsedTime (void); +void getProcessTimes (Time *user, Time *elapsed); /* Get the current date and time. Uses seconds since the Unix epoch, plus nanoseconds diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index 85920932c9..8836d3bfe6 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -1785,16 +1785,13 @@ stg_delayzh #else + W_ time; - W_ divisor; (time) = foreign "C" getourtimeofday() [R1]; - divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags)); - if (divisor == 0) { - divisor = 50; - } - divisor = divisor * 1000; - target = ((R1 + divisor - 1) / divisor) /* divide rounding up */ - + time + 1; /* Add 1 as getourtimeofday rounds down */ + // getourtimeofday() returns a value in units of 10ms + // R1 is in microseconds, we need to (/ 10000), rounding up + target = time + 1 + (R1 + 10000-1) / 10000; + StgTSO_block_info(CurrentTSO) = target; /* Insert the new thread in the sleeping queue. */ diff --git a/rts/ProfHeap.c b/rts/ProfHeap.c index 56c44519fb..302d1d7997 100644 --- a/rts/ProfHeap.c +++ b/rts/ProfHeap.c @@ -1070,8 +1070,7 @@ heapCensusChain( Census *census, bdescr *bd ) } } -void -heapCensus( Ticks t ) +void heapCensus (Time t) { nat g, n; Census *census; diff --git a/rts/ProfHeap.h b/rts/ProfHeap.h index cf09c59231..b3bed903b5 100644 --- a/rts/ProfHeap.h +++ b/rts/ProfHeap.h @@ -9,11 +9,9 @@ #ifndef PROFHEAP_H #define PROFHEAP_H -#include "GetTime.h" // for Ticks - #include "BeginPrivate.h" -void heapCensus (Ticks t); +void heapCensus (Time t); nat initHeapProfiling (void); void endHeapProfiling (void); rtsBool strMatchesSelector (char* str, char* sel); diff --git a/rts/Profiling.c b/rts/Profiling.c index 38191ff4bd..c393c8fa83 100644 --- a/rts/Profiling.c +++ b/rts/Profiling.c @@ -801,11 +801,11 @@ reportCCSProfiling( void ) fprintf(prof_file, " %s", prog_argv[count]); fprintf(prof_file, "\n\n"); - fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d ms)\n", - (double) total_prof_ticks * - (double) RtsFlags.MiscFlags.tickInterval / 1000, + fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d us)\n", + ((double) total_prof_ticks * + (double) RtsFlags.MiscFlags.tickInterval) / TIME_RESOLUTION, (unsigned long) total_prof_ticks, - (int) RtsFlags.MiscFlags.tickInterval); + (int) TimeToUS(RtsFlags.MiscFlags.tickInterval)); fprintf(prof_file, "\ttotal alloc = %11s bytes", showStgWord64(total_alloc * sizeof(W_), diff --git a/rts/Proftimer.c b/rts/Proftimer.c index 82838184b7..76d7679000 100644 --- a/rts/Proftimer.c +++ b/rts/Proftimer.c @@ -50,7 +50,7 @@ void startHeapProfTimer( void ) { if (RtsFlags.ProfFlags.doHeapProfile && - RtsFlags.ProfFlags.profileIntervalTicks > 0) { + RtsFlags.ProfFlags.heapProfileIntervalTicks > 0) { do_heap_prof_ticks = rtsTrue; } } @@ -60,7 +60,7 @@ initProfTimer( void ) { performHeapProfile = rtsFalse; - ticks_to_heap_profile = RtsFlags.ProfFlags.profileIntervalTicks; + ticks_to_heap_profile = RtsFlags.ProfFlags.heapProfileIntervalTicks; startHeapProfTimer(); } @@ -80,7 +80,7 @@ handleProfTick(void) if (do_heap_prof_ticks) { ticks_to_heap_profile--; if (ticks_to_heap_profile <= 0) { - ticks_to_heap_profile = RtsFlags.ProfFlags.profileIntervalTicks; + ticks_to_heap_profile = RtsFlags.ProfFlags.heapProfileIntervalTicks; performHeapProfile = rtsTrue; } } diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c index d8bcf1c915..3e3290dd3d 100644 --- a/rts/RtsFlags.c +++ b/rts/RtsFlags.c @@ -113,7 +113,7 @@ void initRtsFlagsDefaults(void) #ifdef RTS_GTK_FRONTPANEL RtsFlags.GcFlags.frontpanel = rtsFalse; #endif - RtsFlags.GcFlags.idleGCDelayTime = 300; /* millisecs */ + RtsFlags.GcFlags.idleGCDelayTime = USToTime(300000); // 300ms #if osf3_HOST_OS /* ToDo: Perhaps by adjusting this value we can make linking without @@ -150,7 +150,7 @@ void initRtsFlagsDefaults(void) #endif /* PROFILING */ RtsFlags.ProfFlags.doHeapProfile = rtsFalse; - RtsFlags.ProfFlags.profileInterval = 100; + RtsFlags.ProfFlags. heapProfileInterval = USToTime(100000); // 100ms #ifdef PROFILING RtsFlags.ProfFlags.includeTSOs = rtsFalse; @@ -176,8 +176,13 @@ void initRtsFlagsDefaults(void) RtsFlags.TraceFlags.user = rtsFalse; #endif - RtsFlags.MiscFlags.tickInterval = 20; /* In milliseconds */ - RtsFlags.ConcFlags.ctxtSwitchTime = 20; /* In milliseconds */ +#ifdef PROFILING + // When profiling we want a lot more ticks + RtsFlags.MiscFlags.tickInterval = USToTime(1000); // 1ms +#else + RtsFlags.MiscFlags.tickInterval = DEFAULT_TICK_INTERVAL; +#endif + RtsFlags.ConcFlags.ctxtSwitchTime = USToTime(20000); // 20ms RtsFlags.MiscFlags.install_signal_handlers = rtsTrue; RtsFlags.MiscFlags.machineReadable = rtsFalse; @@ -312,9 +317,9 @@ usage_text[] = { #if !defined(PROFILING) "", -" -hT Heap residency profile (output file <program>.hp)", +" -h Heap residency profile (output file <program>.hp)", #endif -" -i<sec> Time between heap samples (seconds, default: 0.1)", +" -i<sec> Time between heap profile samples (seconds, default: 0.1)", "", #if defined(TICKY_TICKY) " -r<file> Produce ticky-ticky statistics (with -rstderr for stderr)", @@ -322,10 +327,15 @@ usage_text[] = { #endif " -C<secs> Context-switch interval in seconds.", " 0 or no argument means switch as often as possible.", -" Default: 0.02 sec; resolution is set by -V below.", -" -V<secs> Master tick interval in seconds (0 == disable timer).", -" This sets the resolution for -C and the profile timer -i.", " Default: 0.02 sec.", +" -V<secs> Master tick interval in seconds (0 == disable timer).", +" This sets the resolution for -C and the heap profile timer -i,", +" and is the frequence of time profile samples.", +#ifdef PROFILING +" Default: 0.001 sec.", +#else +" Default: 0.01 sec.", +#endif "", #if defined(DEBUG) " -Ds DEBUG: scheduler", @@ -884,11 +894,8 @@ error = rtsTrue; if (rts_argv[arg][2] == '\0') { /* use default */ } else { - I_ cst; /* tmp */ - - /* Convert to millisecs */ - cst = (I_) ((atof(rts_argv[arg]+2) * 1000)); - RtsFlags.GcFlags.idleGCDelayTime = cst; + RtsFlags.GcFlags.idleGCDelayTime = + fsecondsToTime(atof(rts_argv[arg]+2)); } break; @@ -1090,12 +1097,9 @@ error = rtsTrue; if (rts_argv[arg][2] == '\0') { /* use default */ } else { - I_ cst; /* tmp */ - - /* Convert to milliseconds */ - cst = (I_) ((atof(rts_argv[arg]+2) * 1000)); - RtsFlags.ProfFlags.profileInterval = cst; - } + RtsFlags.ProfFlags.heapProfileInterval = + fsecondsToTime(atof(rts_argv[arg]+2)); + } break; /* =========== CONCURRENT ========================= */ @@ -1104,12 +1108,9 @@ error = rtsTrue; if (rts_argv[arg][2] == '\0') RtsFlags.ConcFlags.ctxtSwitchTime = 0; else { - I_ cst; /* tmp */ - - /* Convert to milliseconds */ - cst = (I_) ((atof(rts_argv[arg]+2) * 1000)); - RtsFlags.ConcFlags.ctxtSwitchTime = cst; - } + RtsFlags.ConcFlags.ctxtSwitchTime = + fsecondsToTime(atof(rts_argv[arg]+2)); + } break; case 'V': /* master tick interval */ @@ -1118,11 +1119,8 @@ error = rtsTrue; // turns off ticks completely RtsFlags.MiscFlags.tickInterval = 0; } else { - I_ cst; /* tmp */ - - /* Convert to milliseconds */ - cst = (I_) ((atof(rts_argv[arg]+2) * 1000)); - RtsFlags.MiscFlags.tickInterval = cst; + RtsFlags.MiscFlags.tickInterval = + fsecondsToTime(atof(rts_argv[arg]+2)); } break; @@ -1358,14 +1356,14 @@ error = rtsTrue; static void normaliseRtsOpts (void) { if (RtsFlags.MiscFlags.tickInterval < 0) { - RtsFlags.MiscFlags.tickInterval = 50; + RtsFlags.MiscFlags.tickInterval = DEFAULT_TICK_INTERVAL; } // If the master timer is disabled, turn off the other timers. if (RtsFlags.MiscFlags.tickInterval == 0) { RtsFlags.ConcFlags.ctxtSwitchTime = 0; RtsFlags.GcFlags.idleGCDelayTime = 0; - RtsFlags.ProfFlags.profileInterval = 0; + RtsFlags.ProfFlags.heapProfileInterval = 0; } // Determine what tick interval we should use for the RTS timer @@ -1383,9 +1381,9 @@ static void normaliseRtsOpts (void) RtsFlags.MiscFlags.tickInterval); } - if (RtsFlags.ProfFlags.profileInterval > 0) { + if (RtsFlags.ProfFlags.heapProfileInterval > 0) { RtsFlags.MiscFlags.tickInterval = - stg_min(RtsFlags.ProfFlags.profileInterval, + stg_min(RtsFlags.ProfFlags.heapProfileInterval, RtsFlags.MiscFlags.tickInterval); } @@ -1397,12 +1395,12 @@ static void normaliseRtsOpts (void) RtsFlags.ConcFlags.ctxtSwitchTicks = 0; } - if (RtsFlags.ProfFlags.profileInterval > 0) { - RtsFlags.ProfFlags.profileIntervalTicks = - RtsFlags.ProfFlags.profileInterval / + if (RtsFlags.ProfFlags.heapProfileInterval > 0) { + RtsFlags.ProfFlags.heapProfileIntervalTicks = + RtsFlags.ProfFlags.heapProfileInterval / RtsFlags.MiscFlags.tickInterval; } else { - RtsFlags.ProfFlags.profileIntervalTicks = 0; + RtsFlags.ProfFlags.heapProfileIntervalTicks = 0; } if (RtsFlags.GcFlags.stkChunkBufferSize > diff --git a/rts/Schedule.c b/rts/Schedule.c index 4f18209b9e..8c305008ae 100644 --- a/rts/Schedule.c +++ b/rts/Schedule.c @@ -1304,7 +1304,7 @@ scheduleNeedHeapProfile( rtsBool ready_to_gc STG_UNUSED ) // When we have +RTS -i0 and we're heap profiling, do a census at // every GC. This lets us get repeatable runs for debugging. if (performHeapProfile || - (RtsFlags.ProfFlags.profileInterval==0 && + (RtsFlags.ProfFlags.heapProfileInterval==0 && RtsFlags.ProfFlags.doHeapProfile && ready_to_gc)) { return rtsTrue; } else { diff --git a/rts/Stats.c b/rts/Stats.c index 23cb4bffaa..9c68364717 100644 --- a/rts/Stats.c +++ b/rts/Stats.c @@ -26,15 +26,15 @@ /* huh? */ #define BIG_STRING_LEN 512 -#define TICK_TO_DBL(t) ((double)(t) / TICKS_PER_SECOND) +#define TimeToSecondsDbl(t) ((double)(t) / TIME_RESOLUTION) -static Ticks +static Time start_init_cpu, start_init_elapsed, end_init_cpu, end_init_elapsed, start_exit_cpu, start_exit_elapsed, end_exit_cpu, end_exit_elapsed; -static Ticks GC_tot_cpu = 0; +static Time GC_tot_cpu = 0; static StgWord64 GC_tot_alloc = 0; static StgWord64 GC_tot_copied = 0; @@ -43,11 +43,11 @@ static StgWord64 GC_par_max_copied = 0; static StgWord64 GC_par_avg_copied = 0; #ifdef PROFILING -static Ticks RP_start_time = 0, RP_tot_time = 0; // retainer prof user time -static Ticks RPe_start_time = 0, RPe_tot_time = 0; // retainer prof elap time +static Time RP_start_time = 0, RP_tot_time = 0; // retainer prof user time +static Time RPe_start_time = 0, RPe_tot_time = 0; // retainer prof elap time -static Ticks HC_start_time, HC_tot_time = 0; // heap census prof user time -static Ticks HCe_start_time, HCe_tot_time = 0; // heap census prof elap time +static Time HC_start_time, HC_tot_time = 0; // heap census prof user time +static Time HCe_start_time, HCe_tot_time = 0; // heap census prof elap time #endif #ifdef PROFILING @@ -66,9 +66,9 @@ static lnat max_slop = 0; static lnat GC_end_faults = 0; -static Ticks *GC_coll_cpu = NULL; -static Ticks *GC_coll_elapsed = NULL; -static Ticks *GC_coll_max_pause = NULL; +static Time *GC_coll_cpu = NULL; +static Time *GC_coll_elapsed = NULL; +static Time *GC_coll_max_pause = NULL; static void statsFlush( void ); static void statsClose( void ); @@ -77,7 +77,7 @@ static void statsClose( void ); Current elapsed time ------------------------------------------------------------------------- */ -Ticks stat_getElapsedTime(void) +Time stat_getElapsedTime(void) { return getProcessElapsedTime() - start_init_elapsed; } @@ -87,9 +87,9 @@ Ticks stat_getElapsedTime(void) ------------------------------------------------------------------------ */ double -mut_user_time_until( Ticks t ) +mut_user_time_until( Time t ) { - return TICK_TO_DBL(t - GC_tot_cpu); + return TimeToSecondsDbl(t - GC_tot_cpu); // heapCensus() time is included in GC_tot_cpu, so we don't need // to subtract it here. } @@ -97,7 +97,7 @@ mut_user_time_until( Ticks t ) double mut_user_time( void ) { - Ticks cpu; + Time cpu; cpu = getProcessCPUTime(); return mut_user_time_until(cpu); } @@ -110,13 +110,13 @@ mut_user_time( void ) double mut_user_time_during_RP( void ) { - return TICK_TO_DBL(RP_start_time - GC_tot_cpu - RP_tot_time); + return TimeToSecondsDbl(RP_start_time - GC_tot_cpu - RP_tot_time); } double mut_user_time_during_heap_census( void ) { - return TICK_TO_DBL(HC_start_time - GC_tot_cpu - RP_tot_time); + return TimeToSecondsDbl(HC_start_time - GC_tot_cpu - RP_tot_time); } #endif /* PROFILING */ @@ -177,16 +177,16 @@ initStats1 (void) statsPrintf(" bytes bytes bytes user elap user elap\n"); } GC_coll_cpu = - (Ticks *)stgMallocBytes( - sizeof(Ticks)*RtsFlags.GcFlags.generations, + (Time *)stgMallocBytes( + sizeof(Time)*RtsFlags.GcFlags.generations, "initStats"); GC_coll_elapsed = - (Ticks *)stgMallocBytes( - sizeof(Ticks)*RtsFlags.GcFlags.generations, + (Time *)stgMallocBytes( + sizeof(Time)*RtsFlags.GcFlags.generations, "initStats"); GC_coll_max_pause = - (Ticks *)stgMallocBytes( - sizeof(Ticks)*RtsFlags.GcFlags.generations, + (Time *)stgMallocBytes( + sizeof(Time)*RtsFlags.GcFlags.generations, "initStats"); for (i = 0; i < RtsFlags.GcFlags.generations; i++) { GC_coll_cpu[i] = 0; @@ -299,7 +299,7 @@ stat_gcWorkerThreadStart (gc_thread *gct) void stat_gcWorkerThreadDone (gc_thread *gct) { - Ticks thread_cpu, elapsed, gc_cpu, gc_elapsed; + Time thread_cpu, elapsed, gc_cpu, gc_elapsed; if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) { @@ -326,7 +326,7 @@ stat_endGC (gc_thread *gct, RtsFlags.ProfFlags.doHeapProfile) // heap profiling needs GC_tot_time { - Ticks cpu, elapsed, thread_gc_cpu, gc_cpu, gc_elapsed; + Time cpu, elapsed, thread_gc_cpu, gc_cpu, gc_elapsed; getProcessTimes(&cpu, &elapsed); gc_elapsed = elapsed - gct->gc_start_elapsed; @@ -344,10 +344,10 @@ stat_endGC (gc_thread *gct, alloc*sizeof(W_), copied*sizeof(W_), live*sizeof(W_)); statsPrintf(" %5.2f %5.2f %7.2f %7.2f %4ld %4ld (Gen: %2d)\n", - TICK_TO_DBL(gc_cpu), - TICK_TO_DBL(gc_elapsed), - TICK_TO_DBL(cpu), - TICK_TO_DBL(elapsed - start_init_elapsed), + TimeToSecondsDbl(gc_cpu), + TimeToSecondsDbl(gc_elapsed), + TimeToSecondsDbl(cpu), + TimeToSecondsDbl(elapsed - start_init_elapsed), faults - gct->gc_start_faults, gct->gc_start_faults - GC_end_faults, gen); @@ -405,7 +405,7 @@ stat_endGC (gc_thread *gct, void stat_startRP(void) { - Ticks user, elapsed; + Time user, elapsed; getProcessTimes( &user, &elapsed ); RP_start_time = user; @@ -427,7 +427,7 @@ stat_endRP( #endif double averageNumVisit) { - Ticks user, elapsed; + Time user, elapsed; getProcessTimes( &user, &elapsed ); RP_tot_time += user - RP_start_time; @@ -450,7 +450,7 @@ stat_endRP( void stat_startHeapCensus(void) { - Ticks user, elapsed; + Time user, elapsed; getProcessTimes( &user, &elapsed ); HC_start_time = user; @@ -465,7 +465,7 @@ stat_startHeapCensus(void) void stat_endHeapCensus(void) { - Ticks user, elapsed; + Time user, elapsed; getProcessTimes( &user, &elapsed ); HC_tot_time += user - HC_start_time; @@ -516,27 +516,27 @@ StgInt TOTAL_CALLS=1; statsPrintf(" (SLOW_CALLS_" #arity ") %% of (TOTAL_CALLS) : %.1f%%\n", \ SLOW_CALLS_##arity * 100.0/TOTAL_CALLS) -static inline Ticks get_init_cpu(void) { return end_init_cpu - start_init_cpu; } -static inline Ticks get_init_elapsed(void) { return end_init_elapsed - start_init_elapsed; } +static inline Time get_init_cpu(void) { return end_init_cpu - start_init_cpu; } +static inline Time get_init_elapsed(void) { return end_init_elapsed - start_init_elapsed; } void stat_exit(int alloc) { generation *gen; - Ticks gc_cpu = 0; - Ticks gc_elapsed = 0; - Ticks init_cpu = 0; - Ticks init_elapsed = 0; - Ticks mut_cpu = 0; - Ticks mut_elapsed = 0; - Ticks exit_cpu = 0; - Ticks exit_elapsed = 0; + Time gc_cpu = 0; + Time gc_elapsed = 0; + Time init_cpu = 0; + Time init_elapsed = 0; + Time mut_cpu = 0; + Time mut_elapsed = 0; + Time exit_cpu = 0; + Time exit_elapsed = 0; if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) { char temp[BIG_STRING_LEN]; - Ticks tot_cpu; - Ticks tot_elapsed; + Time tot_cpu; + Time tot_elapsed; nat i, g, total_collections = 0; getProcessTimes( &tot_cpu, &tot_elapsed ); @@ -611,10 +611,10 @@ stat_exit(int alloc) gen->no, gen->collections, gen->par_collections, - TICK_TO_DBL(GC_coll_cpu[g]), - TICK_TO_DBL(GC_coll_elapsed[g]), - gen->collections == 0 ? 0 : TICK_TO_DBL(GC_coll_elapsed[g] / gen->collections), - TICK_TO_DBL(GC_coll_max_pause[g])); + TimeToSecondsDbl(GC_coll_cpu[g]), + TimeToSecondsDbl(GC_coll_elapsed[g]), + gen->collections == 0 ? 0 : TimeToSecondsDbl(GC_coll_elapsed[g] / gen->collections), + TimeToSecondsDbl(GC_coll_max_pause[g])); } #if defined(THREADED_RTS) @@ -639,10 +639,10 @@ stat_exit(int alloc) statsPrintf(" Task %2d %-8s : %6.2fs (%6.2fs) %6.2fs (%6.2fs)\n", i, (task->worker) ? "(worker)" : "(bound)", - TICK_TO_DBL(task->mut_time), - TICK_TO_DBL(task->mut_etime), - TICK_TO_DBL(task->gc_time), - TICK_TO_DBL(task->gc_etime)); + TimeToSecondsDbl(task->mut_time), + TimeToSecondsDbl(task->mut_etime), + TimeToSecondsDbl(task->gc_time), + TimeToSecondsDbl(task->gc_etime)); } } @@ -668,27 +668,27 @@ stat_exit(int alloc) #endif statsPrintf(" INIT time %6.2fs (%6.2fs elapsed)\n", - TICK_TO_DBL(init_cpu), TICK_TO_DBL(init_elapsed)); + TimeToSecondsDbl(init_cpu), TimeToSecondsDbl(init_elapsed)); statsPrintf(" MUT time %6.2fs (%6.2fs elapsed)\n", - TICK_TO_DBL(mut_cpu), TICK_TO_DBL(mut_elapsed)); + TimeToSecondsDbl(mut_cpu), TimeToSecondsDbl(mut_elapsed)); statsPrintf(" GC time %6.2fs (%6.2fs elapsed)\n", - TICK_TO_DBL(gc_cpu), TICK_TO_DBL(gc_elapsed)); + TimeToSecondsDbl(gc_cpu), TimeToSecondsDbl(gc_elapsed)); #ifdef PROFILING statsPrintf(" RP time %6.2fs (%6.2fs elapsed)\n", - TICK_TO_DBL(RP_tot_time), TICK_TO_DBL(RPe_tot_time)); + TimeToSecondsDbl(RP_tot_time), TimeToSecondsDbl(RPe_tot_time)); statsPrintf(" PROF time %6.2fs (%6.2fs elapsed)\n", - TICK_TO_DBL(HC_tot_time), TICK_TO_DBL(HCe_tot_time)); + TimeToSecondsDbl(HC_tot_time), TimeToSecondsDbl(HCe_tot_time)); #endif statsPrintf(" EXIT time %6.2fs (%6.2fs elapsed)\n", - TICK_TO_DBL(exit_cpu), TICK_TO_DBL(exit_elapsed)); + TimeToSecondsDbl(exit_cpu), TimeToSecondsDbl(exit_elapsed)); statsPrintf(" Total time %6.2fs (%6.2fs elapsed)\n\n", - TICK_TO_DBL(tot_cpu), TICK_TO_DBL(tot_elapsed)); + TimeToSecondsDbl(tot_cpu), TimeToSecondsDbl(tot_elapsed)); #ifndef THREADED_RTS statsPrintf(" %%GC time %5.1f%% (%.1f%% elapsed)\n\n", - TICK_TO_DBL(gc_cpu)*100/TICK_TO_DBL(tot_cpu), - TICK_TO_DBL(gc_elapsed)*100/TICK_TO_DBL(tot_elapsed)); + TimeToSecondsDbl(gc_cpu)*100/TimeToSecondsDbl(tot_cpu), + TimeToSecondsDbl(gc_elapsed)*100/TimeToSecondsDbl(tot_elapsed)); #endif if (tot_cpu - GC_tot_cpu - PROF_VAL(RP_tot_time + HC_tot_time) == 0) @@ -696,19 +696,19 @@ stat_exit(int alloc) else showStgWord64( (StgWord64)((GC_tot_alloc*sizeof(W_))/ - TICK_TO_DBL(tot_cpu - GC_tot_cpu - + TimeToSecondsDbl(tot_cpu - GC_tot_cpu - PROF_VAL(RP_tot_time + HC_tot_time))), temp, rtsTrue/*commas*/); statsPrintf(" Alloc rate %s bytes per MUT second\n\n", temp); statsPrintf(" Productivity %5.1f%% of total user, %.1f%% of total elapsed\n\n", - TICK_TO_DBL(tot_cpu - GC_tot_cpu - + TimeToSecondsDbl(tot_cpu - GC_tot_cpu - PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100 - / TICK_TO_DBL(tot_cpu), - TICK_TO_DBL(tot_cpu - GC_tot_cpu - + / TimeToSecondsDbl(tot_cpu), + TimeToSecondsDbl(tot_cpu - GC_tot_cpu - PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100 - / TICK_TO_DBL(tot_elapsed)); + / TimeToSecondsDbl(tot_elapsed)); /* TICK_PRINT(1); @@ -764,9 +764,9 @@ stat_exit(int alloc) max_residency*sizeof(W_), residency_samples, (unsigned long)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L)), - TICK_TO_DBL(init_cpu), TICK_TO_DBL(init_elapsed), - TICK_TO_DBL(mut_cpu), TICK_TO_DBL(mut_elapsed), - TICK_TO_DBL(gc_cpu), TICK_TO_DBL(gc_elapsed)); + TimeToSecondsDbl(init_cpu), TimeToSecondsDbl(init_elapsed), + TimeToSecondsDbl(mut_cpu), TimeToSecondsDbl(mut_elapsed), + TimeToSecondsDbl(gc_cpu), TimeToSecondsDbl(gc_elapsed)); } statsFlush(); @@ -865,10 +865,10 @@ extern void getGCStats( GCStats *s ) { nat total_collections = 0; nat g; - Ticks gc_cpu = 0; - Ticks gc_elapsed = 0; - Ticks current_elapsed = 0; - Ticks current_cpu = 0; + Time gc_cpu = 0; + Time gc_elapsed = 0; + Time current_elapsed = 0; + Time current_cpu = 0; getProcessTimes(¤t_cpu, ¤t_elapsed); @@ -892,16 +892,16 @@ extern void getGCStats( GCStats *s ) s->current_bytes_used = current_residency*(StgWord64)sizeof(W_); s->current_bytes_slop = current_slop*(StgWord64)sizeof(W_); /* - s->init_cpu_seconds = TICK_TO_DBL(get_init_cpu()); - s->init_wall_seconds = TICK_TO_DBL(get_init_elapsed()); + s->init_cpu_seconds = TimeToSecondsDbl(get_init_cpu()); + s->init_wall_seconds = TimeToSecondsDbl(get_init_elapsed()); */ - s->mutator_cpu_seconds = TICK_TO_DBL(current_cpu - end_init_cpu - gc_cpu - PROF_VAL(RP_tot_time + HC_tot_time)); - s->mutator_wall_seconds = TICK_TO_DBL(current_elapsed- end_init_elapsed - gc_elapsed); - s->gc_cpu_seconds = TICK_TO_DBL(gc_cpu); - s->gc_wall_seconds = TICK_TO_DBL(gc_elapsed); + s->mutator_cpu_seconds = TimeToSecondsDbl(current_cpu - end_init_cpu - gc_cpu - PROF_VAL(RP_tot_time + HC_tot_time)); + s->mutator_wall_seconds = TimeToSecondsDbl(current_elapsed- end_init_elapsed - gc_elapsed); + s->gc_cpu_seconds = TimeToSecondsDbl(gc_cpu); + s->gc_wall_seconds = TimeToSecondsDbl(gc_elapsed); /* EZY: Being consistent with incremental output, but maybe should also discount init */ - s->cpu_seconds = TICK_TO_DBL(current_cpu); - s->wall_seconds = TICK_TO_DBL(current_elapsed - end_init_elapsed); + s->cpu_seconds = TimeToSecondsDbl(current_cpu); + s->wall_seconds = TimeToSecondsDbl(current_elapsed - end_init_elapsed); s->par_avg_bytes_copied = GC_par_avg_copied*(StgWord64)sizeof(W_); s->par_max_bytes_copied = GC_par_max_copied*(StgWord64)sizeof(W_); } diff --git a/rts/Stats.h b/rts/Stats.h index f0060bdf4a..83b2cb6998 100644 --- a/rts/Stats.h +++ b/rts/Stats.h @@ -49,7 +49,7 @@ void stat_workerStop(void); void initStats0(void); void initStats1(void); -double mut_user_time_until(Ticks t); +double mut_user_time_until(Time t); double mut_user_time(void); #ifdef PROFILING @@ -59,8 +59,8 @@ double mut_user_time_during_heap_census(void); void statDescribeGens( void ); -Ticks stat_getElapsedGCTime(void); -Ticks stat_getElapsedTime(void); +Time stat_getElapsedGCTime(void); +Time stat_getElapsedTime(void); /* Only exported for Papi.c */ void statsPrintf( char *s, ... ) diff --git a/rts/Task.c b/rts/Task.c index 9e8214899c..d72d8a9085 100644 --- a/rts/Task.c +++ b/rts/Task.c @@ -165,7 +165,7 @@ static Task* newTask (rtsBool worker) { #if defined(THREADED_RTS) - Ticks currentElapsedTime, currentUserTime; + Time currentElapsedTime, currentUserTime; #endif Task *task; @@ -329,7 +329,7 @@ void taskTimeStamp (Task *task USED_IF_THREADS) { #if defined(THREADED_RTS) - Ticks currentElapsedTime, currentUserTime; + Time currentElapsedTime, currentUserTime; currentUserTime = getThreadCPUTime(); currentElapsedTime = getProcessElapsedTime(); @@ -347,7 +347,7 @@ taskTimeStamp (Task *task USED_IF_THREADS) } void -taskDoneGC (Task *task, Ticks cpu_time, Ticks elapsed_time) +taskDoneGC (Task *task, Time cpu_time, Time elapsed_time) { task->gc_time += cpu_time; task->gc_etime += elapsed_time; diff --git a/rts/Task.h b/rts/Task.h index 4000a045d4..386e003d28 100644 --- a/rts/Task.h +++ b/rts/Task.h @@ -149,12 +149,12 @@ typedef struct Task_ { // really want separate stats for each call in a nested chain of // foreign->haskell->foreign->haskell calls, but we'll get a // separate Task for each of the haskell calls. - Ticks elapsedtimestart; - Ticks muttimestart; - Ticks mut_time; - Ticks mut_etime; - Ticks gc_time; - Ticks gc_etime; + Time elapsedtimestart; + Time muttimestart; + Time mut_time; + Time mut_etime; + Time gc_time; + Time gc_etime; // Links tasks on the returning_tasks queue of a Capability, and // on spare_workers. @@ -208,7 +208,7 @@ void workerTaskStop (Task *task); void taskTimeStamp (Task *task); // The current Task has finished a GC, record the amount of time spent. -void taskDoneGC (Task *task, Ticks cpu_time, Ticks elapsed_time); +void taskDoneGC (Task *task, Time cpu_time, Time elapsed_time); // Put the task back on the free list, mark it stopped. Used by // forkProcess(). diff --git a/rts/Threads.c b/rts/Threads.c index 3e1c5cff0b..7e660d63f6 100644 --- a/rts/Threads.c +++ b/rts/Threads.c @@ -701,20 +701,22 @@ void printThreadBlockage(StgTSO *tso) { switch (tso->why_blocked) { +#if defined(mingw32_HOST_OS) + case BlockedOnDoProc: + debugBelch("is blocked on proc (request: %u)", tso->block_info.async_result->reqID); + break; +#endif +#if !defined(THREADED_RTS) case BlockedOnRead: debugBelch("is blocked on read from fd %d", (int)(tso->block_info.fd)); break; case BlockedOnWrite: debugBelch("is blocked on write to fd %d", (int)(tso->block_info.fd)); break; -#if defined(mingw32_HOST_OS) - case BlockedOnDoProc: - debugBelch("is blocked on proc (request: %u)", tso->block_info.async_result->reqID); - break; -#endif case BlockedOnDelay: debugBelch("is blocked until %ld", (long)(tso->block_info.target)); break; +#endif case BlockedOnMVar: debugBelch("is blocked on an MVar @ %p", tso->block_info.closure); break; diff --git a/rts/Ticker.h b/rts/Ticker.h index 5804501da5..685a79e5d2 100644 --- a/rts/Ticker.h +++ b/rts/Ticker.h @@ -13,7 +13,7 @@ typedef void (*TickProc)(int); -void initTicker (nat ms, TickProc handle_tick); +void initTicker (Time interval, TickProc handle_tick); void startTicker (void); void stopTicker (void); void exitTicker (rtsBool wait); diff --git a/rts/eventlog/EventLog.c b/rts/eventlog/EventLog.c index 2e2209d2d3..88fc64010d 100644 --- a/rts/eventlog/EventLog.c +++ b/rts/eventlog/EventLog.c @@ -149,7 +149,7 @@ static inline void postBuf(EventsBuf *eb, StgWord8 *buf, nat size) } static inline StgWord64 time_ns(void) -{ return stat_getElapsedTime() * (1000000000LL/TICKS_PER_SECOND); } +{ return TimeToNS(stat_getElapsedTime()); } static inline void postEventTypeNum(EventsBuf *eb, EventTypeNum etNum) { postWord16(eb, etNum); } diff --git a/rts/posix/GetTime.c b/rts/posix/GetTime.c index eab7177fe5..c31b319af4 100644 --- a/rts/posix/GetTime.c +++ b/rts/posix/GetTime.c @@ -44,7 +44,7 @@ // we'll implement getProcessCPUTime() and getProcessElapsedTime() // separately, using getrusage() and gettimeofday() respectively -Ticks getProcessCPUTime(void) +Time getProcessCPUTime(void) { #if !defined(BE_CONSERVATIVE) && defined(HAVE_CLOCK_GETTIME) && defined (_SC_CPUTIME) && defined(CLOCK_PROCESS_CPUTIME_ID) && defined(HAVE_SYSCONF) static int checked_sysconf = 0; @@ -59,8 +59,7 @@ Ticks getProcessCPUTime(void) int res; res = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts); if (res == 0) { - return ((Ticks)ts.tv_sec * TICKS_PER_SECOND + - ((Ticks)ts.tv_nsec * TICKS_PER_SECOND) / 1000000000); + return SecondsToTime(ts.tv_sec) + NSToTime(ts.tv_nsec); } } #endif @@ -69,20 +68,18 @@ Ticks getProcessCPUTime(void) { struct rusage t; getrusage(RUSAGE_SELF, &t); - return ((Ticks)t.ru_utime.tv_sec * TICKS_PER_SECOND + - ((Ticks)t.ru_utime.tv_usec * TICKS_PER_SECOND)/1000000); + return SecondsToTime(t.ru_utime.tv_sec) + USToTime(t.ru_utime.tv_usec); } } -Ticks getProcessElapsedTime(void) +Time getProcessElapsedTime(void) { struct timeval tv; gettimeofday(&tv, (struct timezone *) NULL); - return ((Ticks)tv.tv_sec * TICKS_PER_SECOND + - ((Ticks)tv.tv_usec * TICKS_PER_SECOND)/1000000); + return SecondsToTime(tv.tv_sec) + USToTime(tv.tv_usec); } -void getProcessTimes(Ticks *user, Ticks *elapsed) +void getProcessTimes(Time *user, Time *elapsed) { *user = getProcessCPUTime(); *elapsed = getProcessElapsedTime(); @@ -92,29 +89,29 @@ void getProcessTimes(Ticks *user, Ticks *elapsed) // we'll use the old times() API. -Ticks getProcessCPUTime(void) +Time getProcessCPUTime(void) { #if !defined(THREADED_RTS) && USE_PAPI long long usec; if ((usec = PAPI_get_virt_usec()) < 0) { barf("PAPI_get_virt_usec: %lld", usec); } - return ((usec * TICKS_PER_SECOND) / 1000000); + return USToTime(usec); #else - Ticks user, elapsed; + Time user, elapsed; getProcessTimes(&user,&elapsed); return user; #endif } -Ticks getProcessElapsedTime(void) +Time getProcessElapsedTime(void) { - Ticks user, elapsed; + Time user, elapsed; getProcessTimes(&user,&elapsed); return elapsed; } -void getProcessTimes(Ticks *user, Ticks *elapsed) +void getProcessTimes(Time *user, Time *elapsed) { static nat ClockFreq = 0; @@ -141,20 +138,20 @@ void getProcessTimes(Ticks *user, Ticks *elapsed) struct tms t; clock_t r = times(&t); - *user = (((Ticks)t.tms_utime * TICKS_PER_SECOND) / ClockFreq); - *elapsed = (((Ticks)r * TICKS_PER_SECOND) / ClockFreq); + *user = SecondsToTime(t.tms_utime) / ClockFreq; + *elapsed = SecondsToTime(r) / ClockFreq; } #endif // HAVE_TIMES -Ticks getThreadCPUTime(void) +Time getThreadCPUTime(void) { #if USE_PAPI long long usec; if ((usec = PAPI_get_virt_usec()) < 0) { barf("PAPI_get_virt_usec: %lld", usec); } - return ((usec * TICKS_PER_SECOND) / 1000000); + return USToTime(usec); #elif !defined(BE_CONSERVATIVE) && defined(HAVE_CLOCK_GETTIME) && defined (_SC_THREAD_CPUTIME) && defined(CLOCK_THREAD_CPUTIME_ID) && defined(HAVE_SYSCONF) { @@ -172,8 +169,7 @@ Ticks getThreadCPUTime(void) int res; res = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts); if (res == 0) { - return ((Ticks)ts.tv_sec * TICKS_PER_SECOND + - ((Ticks)ts.tv_nsec * TICKS_PER_SECOND) / 1000000000); + return SecondsToTime(ts.tv_sec) + NSToTime(ts.tv_nsec); } } } diff --git a/rts/posix/Itimer.c b/rts/posix/Itimer.c index e46bb12546..ece54910c2 100644 --- a/rts/posix/Itimer.c +++ b/rts/posix/Itimer.c @@ -44,68 +44,52 @@ #include <string.h> -/* Major bogosity: - * - * In the threaded RTS, we can't set the virtual timer because the - * thread which has the virtual timer might be sitting waiting for a - * capability, and the virtual timer only ticks in CPU time. +/* + * We use a realtime timer by default. I found this much more + * reliable than a CPU timer: * - * So, possible solutions: + * Experiments with different frequences: using + * CLOCK_REALTIME/CLOCK_MONOTONIC on Linux 2.6.32, + * 1000us has <1% impact on runtime + * 100us has ~2% impact on runtime + * 10us has ~40% impact on runtime * - * (1) tick in realtime. Not very good, because this ticker is used for - * profiling, and this will give us unreliable time profiling - * results. + * using CLOCK_PROCESS_CPUTIME_ID on Linux 2.6.32, + * I cannot get it to tick faster than 10ms (10000us) + * which isn't great for profiling. * - * (2) save/restore the virtual timer around excursions into STG land. - * Sounds great, but I tried it and the resolution of the virtual timer - * isn't good enough (on Linux) - most of our excursions fall - * within the timer's resolution and we never make any progress. - * - * (3) have a virtual timer in every OS thread. Might be reasonable, - * because most of the time there is only ever one of these - * threads running, so it approximates a single virtual timer. - * But still quite bogus (and I got crashes when I tried this). + * In the threaded RTS, we can't tick in CPU time because the thread + * which has the virtual timer might be idle, so the tick would never + * fire. Therfore we used to tick in realtime in the threaded RTS and + * in CPU time otherwise, but now we always tick in realtime, for + * several reasons: * - * For now, we're using (1), but this needs a better solution. --SDM + * - resolution (see above) + * - consistency (-threaded is the same as normal) + * - more consistency: Windows only has a realtime timer + * + * Note we want to use CLOCK_MONOTONIC rather than CLOCK_REALTIME, + * because the latter may jump around (NTP adjustments, leap seconds + * etc.). */ #if defined(USE_TIMER_CREATE) - # define ITIMER_SIGNAL SIGVTALRM -# ifdef THREADED_RTS -# define TIMER_FLAVOUR CLOCK_REALTIME -# else -# define TIMER_FLAVOUR CLOCK_PROCESS_CPUTIME_ID -# endif - #elif defined(HAVE_SETITIMER) - -# if defined(THREADED_RTS) || !defined(HAVE_SETITIMER_VIRTUAL) -// Oh dear, we have to use SIGALRM if there's no timer_create and -// we're using the THREADED_RTS. This leads to problems, see bug #850. -// We also use it if we don't have a virtual timer (trac #2883). -# define ITIMER_SIGNAL SIGALRM -# define ITIMER_FLAVOUR ITIMER_REAL -# else -# define ITIMER_SIGNAL SIGVTALRM -# define ITIMER_FLAVOUR ITIMER_VIRTUAL -# endif - +# define ITIMER_SIGNAL SIGALRM + // Using SIGALRM can leads to problems, see #850. But we have no + // option if timer_create() is not available. #else - # error No way to set an interval timer. - #endif #if defined(USE_TIMER_CREATE) static timer_t timer; #endif -static nat itimer_interval = 50; +static Time itimer_interval = DEFAULT_TICK_INTERVAL; -static -void -install_vtalrm_handler(TickProc handle_tick) +static void install_vtalrm_handler(TickProc handle_tick) { struct sigaction action; @@ -132,32 +116,35 @@ install_vtalrm_handler(TickProc handle_tick) } void -initTicker (nat ms, TickProc handle_tick) +initTicker (Time interval, TickProc handle_tick) { - install_vtalrm_handler(handle_tick); - -#if !defined(THREADED_RTS) - timestamp = getourtimeofday(); -#endif - - itimer_interval = ms; + itimer_interval = interval; #if defined(USE_TIMER_CREATE) { struct sigevent ev; + clockid_t clock; - // Keep programs like valgrind happy + // Keep programs like valgrind happy memset(&ev, 0, sizeof(ev)); ev.sigev_notify = SIGEV_SIGNAL; ev.sigev_signo = ITIMER_SIGNAL; - if (timer_create(TIMER_FLAVOUR, &ev, &timer) != 0) { +#if defined(CLOCK_MONOTONIC) + clock = CLOCK_MONOTONIC; +#else + clock = CLOCK_REALTIME; +#endif + + if (timer_create(clock, &ev, &timer) != 0) { sysErrorBelch("timer_create"); stg_exit(EXIT_FAILURE); } } #endif + + install_vtalrm_handler(handle_tick); } void @@ -167,8 +154,8 @@ startTicker(void) { struct itimerspec it; - it.it_value.tv_sec = itimer_interval / 1000; - it.it_value.tv_nsec = (itimer_interval % 1000) * 1000000; + it.it_value.tv_sec = TimeToSeconds(itimer_interval); + it.it_value.tv_nsec = TimeToNS(itimer_interval); it.it_interval = it.it_value; if (timer_settime(timer, 0, &it, NULL) != 0) { @@ -180,11 +167,11 @@ startTicker(void) { struct itimerval it; - it.it_value.tv_sec = itimer_interval / 1000; - it.it_value.tv_usec = (itimer_interval % 1000) * 1000; + it.it_value.tv_sec = TimeToSeconds(itimer_interval); + it.it_value.tv_usec = TimeToUS(itimer_interval); it.it_interval = it.it_value; - if (setitimer(ITIMER_FLAVOUR, &it, NULL) != 0) { + if (setitimer(ITIMER_REAL, &it, NULL) != 0) { sysErrorBelch("setitimer"); stg_exit(EXIT_FAILURE); } @@ -213,7 +200,7 @@ stopTicker(void) it.it_value.tv_usec = 0; it.it_interval = it.it_value; - if (setitimer(ITIMER_FLAVOUR, &it, NULL) != 0) { + if (setitimer(ITIMER_REAL, &it, NULL) != 0) { sysErrorBelch("setitimer"); stg_exit(EXIT_FAILURE); } @@ -229,23 +216,6 @@ exitTicker (rtsBool wait STG_UNUSED) #endif } -/* gettimeofday() takes around 1us on our 500MHz PIII. Since we're - * only calling it 50 times/s, it shouldn't have any great impact. - */ -lnat -getourtimeofday(void) -{ - struct timeval tv; - nat interval; - interval = RtsFlags.MiscFlags.tickInterval; - if (interval == 0) { interval = 50; } - gettimeofday(&tv, (struct timezone *) NULL); - - // Avoid overflow when we multiply seconds by 1000. See #2848 - return (lnat)((StgWord64)tv.tv_sec * 1000 / interval + - (StgWord64)tv.tv_usec / (interval * 1000)); -} - int rtsTimerSignal(void) { diff --git a/rts/posix/Itimer.h b/rts/posix/Itimer.h index b67c8c442e..7996da7c94 100644 --- a/rts/posix/Itimer.h +++ b/rts/posix/Itimer.h @@ -9,6 +9,4 @@ #ifndef ITIMER_H #define ITIMER_H -RTS_PRIVATE lnat getourtimeofday ( void ); - #endif /* ITIMER_H */ diff --git a/rts/posix/Select.c b/rts/posix/Select.c index 3c87fbdc70..45737ce0cc 100644 --- a/rts/posix/Select.c +++ b/rts/posix/Select.c @@ -16,6 +16,7 @@ #include "Capability.h" #include "Select.h" #include "AwaitEvent.h" +#include "Stats.h" # ifdef HAVE_SYS_SELECT_H # include <sys/select.h> @@ -37,13 +38,24 @@ #endif #if !defined(THREADED_RTS) -/* last timestamp */ -lnat timestamp = 0; /* * The threaded RTS uses an IO-manager thread in Haskell instead (see GHC.Conc) */ +#define LowResTimeToTime(t) (USToTime((t) * 10000)) + +/* + * Return the time since the program started, in LowResTime, + * rounded down. + * + * This is only used by posix/Select.c. It should probably go away. + */ +LowResTime getourtimeofday(void) +{ + return TimeToUS(stat_getElapsedTime()) / 10000; +} + /* There's a clever trick here to avoid problems when the time wraps * around. Since our maximum delay is smaller than 31 bits of ticks * (it's actually 31 bits of microseconds), we can safely check @@ -55,15 +67,14 @@ lnat timestamp = 0; * if this is true, then our time has expired. * (idea due to Andy Gill). */ -static rtsBool -wakeUpSleepingThreads(lnat ticks) +static rtsBool wakeUpSleepingThreads (LowResTime now) { StgTSO *tso; rtsBool flag = rtsFalse; while (sleeping_queue != END_TSO_QUEUE) { tso = sleeping_queue; - if (((long)ticks - (long)tso->block_info.target) < 0) { + if (((long)now - (long)tso->block_info.target) < 0) { break; } sleeping_queue = tso->_link; @@ -108,7 +119,8 @@ awaitEvent(rtsBool wait) rtsBool select_succeeded = rtsTrue; rtsBool unblock_all = rtsFalse; struct timeval tv; - lnat min, ticks; + Time min; + LowResTime now; tv.tv_sec = 0; tv.tv_usec = 0; @@ -128,18 +140,17 @@ awaitEvent(rtsBool wait) */ do { - ticks = timestamp = getourtimeofday(); - if (wakeUpSleepingThreads(ticks)) { + now = getourtimeofday(); + if (wakeUpSleepingThreads(now)) { return; } if (!wait) { min = 0; } else if (sleeping_queue != END_TSO_QUEUE) { - min = (sleeping_queue->block_info.target - ticks) - * RtsFlags.MiscFlags.tickInterval * 1000; + min = LowResTimeToTime(sleeping_queue->block_info.target - now); } else { - min = 0x7ffffff; + min = (Time)-1; } /* @@ -185,8 +196,8 @@ awaitEvent(rtsBool wait) /* Check for any interesting events */ - tv.tv_sec = min / 1000000; - tv.tv_usec = min % 1000000; + tv.tv_sec = TimeToSeconds(min); + tv.tv_usec = TimeToUS(min) % 1000000; while ((numFound = select(maxfd+1, &rfd, &wfd, NULL, &tv)) < 0) { if (errno != EINTR) { @@ -236,7 +247,7 @@ awaitEvent(rtsBool wait) /* check for threads that need waking up */ - wakeUpSleepingThreads(getourtimeofday()); + wakeUpSleepingThreads(getourtimeofday()); /* If new runnable threads have arrived, stop waiting for * I/O and run them. diff --git a/rts/posix/Select.h b/rts/posix/Select.h index e92a4bc889..15fa00ac66 100644 --- a/rts/posix/Select.h +++ b/rts/posix/Select.h @@ -9,9 +9,9 @@ #ifndef POSIX_SELECT_H #define POSIX_SELECT_H -#if !defined(THREADED_RTS) -/* In Select.c */ -extern lnat timestamp; -#endif +// An absolute time value in units of 10ms. +typedef StgWord LowResTime; + +RTS_PRIVATE LowResTime getourtimeofday ( void ); #endif /* POSIX_SELECT_H */ diff --git a/rts/sm/GCThread.h b/rts/sm/GCThread.h index e42a3a1239..b4f325631f 100644 --- a/rts/sm/GCThread.h +++ b/rts/sm/GCThread.h @@ -181,9 +181,9 @@ typedef struct gc_thread_ { lnat no_work; lnat scav_find_work; - Ticks gc_start_cpu; // process CPU time - Ticks gc_start_elapsed; // process elapsed time - Ticks gc_start_thread_cpu; // thread CPU time + Time gc_start_cpu; // process CPU time + Time gc_start_elapsed; // process elapsed time + Time gc_start_thread_cpu; // thread CPU time lnat gc_start_faults; // ------------------- diff --git a/rts/win32/GetTime.c b/rts/win32/GetTime.c index 13fb5ab22d..9a322bf0a5 100644 --- a/rts/win32/GetTime.c +++ b/rts/win32/GetTime.c @@ -15,26 +15,26 @@ # include <time.h> #endif -#define HNS_PER_SEC 10000000LL /* FILETIMES are in units of 100ns */ /* Convert FILETIMEs into secs */ -static INLINE_ME Ticks -fileTimeToTicks(FILETIME ft) +static INLINE_ME Time +fileTimeToRtsTime(FILETIME ft) { - Ticks t; - t = ((Ticks)ft.dwHighDateTime << 32) | ft.dwLowDateTime; - t = (t * TICKS_PER_SECOND) / HNS_PER_SEC; + Time t; + t = ((Time)ft.dwHighDateTime << 32) | ft.dwLowDateTime; + t = NSToTime(t * 100); + /* FILETIMES are in units of 100ns */ return t; } void -getProcessTimes(Ticks *user, Ticks *elapsed) +getProcessTimes(Time *user, Time *elapsed) { *user = getProcessCPUTime(); *elapsed = getProcessElapsedTime(); } -Ticks +Time getProcessCPUTime(void) { FILETIME creationTime, exitTime, userTime, kernelTime = {0,0}; @@ -44,14 +44,14 @@ getProcessCPUTime(void) return 0; } - return fileTimeToTicks(userTime); + return fileTimeToRtsTime(userTime); } // getProcessElapsedTime relies on QueryPerformanceFrequency // which should be available on any Windows computer thay you // would want to run Haskell on. Satnam Singh, 5 July 2010. -Ticks +Time getProcessElapsedTime(void) { // frequency represents the number of ticks per second @@ -73,13 +73,14 @@ getProcessElapsedTime(void) // Get the tick count. QueryPerformanceCounter(&system_time) ; - // Return the tick count as a millisecond value. + // Return the tick count as a Time value. // Using double to compute the intermediate value, because a 64-bit - // int would overflow when multiplied by TICKS_PER_SECOND in about 81 days. - return (Ticks)((TICKS_PER_SECOND * (double)system_time.QuadPart) / (double)frequency.QuadPart) ; + // int would overflow when multiplied by TICK_RESOLUTION in about 81 days. + return fsecondsToTime((double)system_time.QuadPart / + (double)frequency.QuadPart) ; } -Ticks +Time getThreadCPUTime(void) { FILETIME creationTime, exitTime, userTime, kernelTime = {0,0}; @@ -89,7 +90,7 @@ getThreadCPUTime(void) return 0; } - return fileTimeToTicks(userTime); + return fileTimeToRtsTime(userTime); } void diff --git a/rts/win32/Ticker.c b/rts/win32/Ticker.c index 1c45482651..d54fa4680f 100644 --- a/rts/win32/Ticker.c +++ b/rts/win32/Ticker.c @@ -2,166 +2,80 @@ * RTS periodic timers. * */ +#define _WIN32_WINNT 0x0500 + #include "Rts.h" #include "Ticker.h" #include <windows.h> #include <stdio.h> #include <process.h> -/* - * Provide a timer service for the RTS, periodically - * notifying it that a number of 'ticks' has passed. - * - */ - -/* To signal pause or shutdown of the timer service, we use a local - * event which the timer thread listens to. - */ -static HANDLE hStopEvent = INVALID_HANDLE_VALUE; -static HANDLE tickThread = INVALID_HANDLE_VALUE; - -static TickProc tickProc = NULL; +static TickProc tick_proc = NULL; +static HANDLE timer_queue = NULL; +static HANDLE timer = NULL; +static Time tick_interval = 0; -static enum { TickerGo, TickerPause, TickerExit } ticker_state; - -/* - * Ticking is done by a separate thread which periodically - * wakes up to handle a tick. - * - * This is the portable way of providing a timer service under - * Win32; features like waitable timers or timer queues are only - * supported by a subset of the Win32 platforms (notably not - * under Win9x.) - * - */ -static -unsigned -WINAPI -TimerProc(PVOID param) +static VOID CALLBACK tick_callback( + PVOID lpParameter STG_UNUSED, + BOOLEAN TimerOrWaitFired STG_UNUSED + ) { - int ms = (int)param; - DWORD waitRes = 0; - - /* interpret a < 0 timeout period as 'instantaneous' */ - if (ms < 0) ms = 0; - - while (1) { - switch (ticker_state) { - case TickerGo: - waitRes = WaitForSingleObject(hStopEvent, ms); - break; - case TickerPause: - waitRes = WaitForSingleObject(hStopEvent, INFINITE); - break; - case TickerExit: - /* event has become signalled */ - tickProc = NULL; - CloseHandle(hStopEvent); - hStopEvent = INVALID_HANDLE_VALUE; - return 0; - } - - switch (waitRes) { - case WAIT_OBJECT_0: - /* event has become signalled */ - ResetEvent(hStopEvent); - continue; - case WAIT_TIMEOUT: - /* tick */ - tickProc(0); - break; - case WAIT_FAILED: - sysErrorBelch("TimerProc: WaitForSingleObject failed"); - break; - default: - errorBelch("TimerProc: unexpected result %lu\n", waitRes); - break; - } - } - return 0; + tick_proc(0); } +// We use the CreateTimerQueue() API which has been around since +// Windows 2000. Apparently it gives bad results before Windows 7, +// though: http://www.virtualdub.org/blog/pivot/entry.php?id=272 +// +// Even with the improvements in Windows 7, this timer isn't going to +// be very useful for profiling with a max usable resolution of +// 15ms. Unfortunately we don't have anything better. void -initTicker (nat ms, TickProc handle_tick) +initTicker (Time interval, TickProc handle_tick) { - unsigned threadId; - /* 'hStopEvent' is a manual-reset event that's signalled upon - * shutdown of timer service (=> timer thread.) - */ - hStopEvent = CreateEvent ( NULL, - TRUE, - FALSE, - NULL); - if (hStopEvent == INVALID_HANDLE_VALUE) { - sysErrorBelch("CreateEvent"); - stg_exit(EXIT_FAILURE); - } - tickProc = handle_tick; - ticker_state = TickerPause; - tickThread = (HANDLE)(long)_beginthreadex( NULL, - 0, - TimerProc, - (LPVOID)ms, - 0, - &threadId); + tick_interval = interval; + tick_proc = handle_tick; - if (tickThread == 0) { - sysErrorBelch("_beginthreadex"); - stg_exit(EXIT_FAILURE); - } + timer_queue = CreateTimerQueue(); + if (timer_queue == NULL) { + sysErrorBelch("CreateTimerQueue"); + stg_exit(EXIT_FAILURE); + } } void startTicker(void) { - ticker_state = TickerGo; - SetEvent(hStopEvent); + BOOL r; + + r = CreateTimerQueueTimer(&timer, + timer_queue, + tick_callback, + 0, + 0, + TimeToUS(tick_interval) / 1000, // ms + WT_EXECUTEINTIMERTHREAD); + if (r == 0) { + sysErrorBelch("CreateTimerQueueTimer"); + stg_exit(EXIT_FAILURE); + } } void stopTicker(void) { - ticker_state = TickerPause; - SetEvent(hStopEvent); + if (timer_queue != NULL && timer != NULL) { + DeleteTimerQueueTimer(timer_queue, timer, NULL); + timer = NULL; + } } void exitTicker (rtsBool wait) { - // We must wait for the ticker thread to terminate, since if we - // are in a DLL that is about to be unloaded, the ticker thread - // cannot be allowed to return to a missing DLL. - - if (hStopEvent != INVALID_HANDLE_VALUE && - tickThread != INVALID_HANDLE_VALUE) { - DWORD exitCode; - ticker_state = TickerExit; - SetEvent(hStopEvent); - while (wait) { - // See #3748: - // - // when the RTS is compiled into a DLL (wait==rtsTrue), - // the ticker thread must stop before we exit, or chaos - // will ensue. We can't kill it, because it may be - // holding a lock. - // - // When not compiled into a DLL, we wait for - // the thread out of courtesy, but give up after 200ms if - // it still hasn't stopped. - WaitForSingleObject(tickThread, 200); - if (!GetExitCodeThread(tickThread, &exitCode)) { - return; - } - CloseHandle(tickThread); - if (exitCode != STILL_ACTIVE) { - tickThread = INVALID_HANDLE_VALUE; - if ( hStopEvent != INVALID_HANDLE_VALUE ) { - CloseHandle(hStopEvent); - hStopEvent = INVALID_HANDLE_VALUE; - } - return; - } - } + if (timer_queue != NULL) { + DeleteTimerQueueEx(timer_queue, wait ? INVALID_HANDLE_VALUE : NULL); + timer_queue = NULL; } } diff --git a/rules/build-prog.mk b/rules/build-prog.mk index 99093d3fee..1f43169ce3 100644 --- a/rules/build-prog.mk +++ b/rules/build-prog.mk @@ -117,8 +117,12 @@ ifeq "$3" "0" # worry about where the RTS header files are $(call c-suffix-rules,$1,$2,v,YES) else +ifeq "$$($1_$2_UseGhcForCC)" "YES" +$(call c-suffix-rules,$1,$2,v,YES) +else $(call c-suffix-rules,$1,$2,v,NO) endif +endif $(call hs-suffix-rules,$1,$2,v) $$(foreach dir,$$($1_$2_HS_SRC_DIRS),\ |