summaryrefslogtreecommitdiff
path: root/rts
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2012-03-02 10:53:34 +0000
committerSimon Marlow <marlowsd@gmail.com>2012-03-02 11:44:17 +0000
commit085c7fe5d4ea6e7b59f944d46ecfeba3755a315b (patch)
tree3de7e3e6306dbf9a8869862266a35a2d0d75d11f /rts
parent27d7d930ff8741f980245da1b895ceaa5294e257 (diff)
downloadhaskell-085c7fe5d4ea6e7b59f944d46ecfeba3755a315b.tar.gz
Drop the per-task timing stats, give a summary only (#5897)
We were keeping around the Task struct (216 bytes) for every worker we ever created, even though we only keep a maximum of 6 workers per Capability. These Task structs accumulate and cause a space leak in programs that do lots of safe FFI calls; this patch frees the Task struct as soon as a worker exits. One reason we were keeping the Task structs around is because we print out per-Task timing stats in +RTS -s, but that isn't terribly useful. What is sometimes useful is knowing how *many* Tasks there were. So now I'm printing a single-line summary, this is for the program in TASKS: 2001 (1 bound, 31 peak workers (2000 total), using -N1) So although we created 2k tasks overall, there were only 31 workers active at any one time (which is exactly what we expect: the program makes 30 safe FFI calls concurrently). This also gives an indication of how many capabilities were being used, which is handy if you use +RTS -N without an explicit number.
Diffstat (limited to 'rts')
-rw-r--r--rts/RtsMain.c3
-rw-r--r--rts/Stats.c42
-rw-r--r--rts/Task.c100
-rw-r--r--rts/Task.h30
-rw-r--r--rts/posix/OSThreads.c1
-rw-r--r--rts/sm/Compact.c2
6 files changed, 73 insertions, 105 deletions
diff --git a/rts/RtsMain.c b/rts/RtsMain.c
index 2084435f16..e89445db25 100644
--- a/rts/RtsMain.c
+++ b/rts/RtsMain.c
@@ -62,8 +62,7 @@ static void real_main(void)
Capability *cap = rts_lock();
rts_evalLazyIO(&cap,progmain_closure, NULL);
status = rts_getSchedStatus(cap);
- taskTimeStamp(myTask());
- rts_unlock(cap);
+ rts_unlock(cap);
}
/* check the status of the entire Haskell computation */
diff --git a/rts/Stats.c b/rts/Stats.c
index 83c43f0bdd..2c7c35d533 100644
--- a/rts/Stats.c
+++ b/rts/Stats.c
@@ -287,18 +287,29 @@ stat_startGC (gc_thread *gct)
}
void
-stat_gcWorkerThreadStart (gc_thread *gct)
+stat_gcWorkerThreadStart (gc_thread *gct STG_UNUSED)
{
+#if 0
+ /*
+ * We dont' collect per-thread GC stats any more, but this code
+ * could be used to do that if we want to in the future:
+ */
if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
{
getProcessTimes(&gct->gc_start_cpu, &gct->gc_start_elapsed);
gct->gc_start_thread_cpu = getThreadCPUTime();
}
+#endif
}
void
-stat_gcWorkerThreadDone (gc_thread *gct)
+stat_gcWorkerThreadDone (gc_thread *gct STG_UNUSED)
{
+#if 0
+ /*
+ * We dont' collect per-thread GC stats any more, but this code
+ * could be used to do that if we want to in the future:
+ */
Time thread_cpu, elapsed, gc_cpu, gc_elapsed;
if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
@@ -311,6 +322,7 @@ stat_gcWorkerThreadDone (gc_thread *gct)
taskDoneGC(gct->cap->running_task, gc_cpu, gc_elapsed);
}
+#endif
}
/* -----------------------------------------------------------------------------
@@ -326,17 +338,13 @@ stat_endGC (gc_thread *gct,
RtsFlags.ProfFlags.doHeapProfile)
// heap profiling needs GC_tot_time
{
- Time cpu, elapsed, thread_gc_cpu, gc_cpu, gc_elapsed;
+ Time cpu, elapsed, gc_cpu, gc_elapsed;
getProcessTimes(&cpu, &elapsed);
gc_elapsed = elapsed - gct->gc_start_elapsed;
- thread_gc_cpu = getThreadCPUTime() - gct->gc_start_thread_cpu;
-
gc_cpu = cpu - gct->gc_start_cpu;
- taskDoneGC(gct->cap->running_task, thread_gc_cpu, gc_elapsed);
-
if (RtsFlags.GcFlags.giveStats == VERBOSE_GC_STATS) {
nat faults = getPageFaults();
@@ -629,22 +637,10 @@ stat_exit(int alloc)
statsPrintf("\n");
#if defined(THREADED_RTS)
- {
- nat i;
- Task *task;
- statsPrintf(" MUT time (elapsed) GC time (elapsed)\n");
- for (i = 0, task = all_tasks;
- task != NULL;
- i++, task = task->all_link) {
- statsPrintf(" Task %2d %-8s : %6.2fs (%6.2fs) %6.2fs (%6.2fs)\n",
- i,
- (task->worker) ? "(worker)" : "(bound)",
- TimeToSecondsDbl(task->mut_time),
- TimeToSecondsDbl(task->mut_etime),
- TimeToSecondsDbl(task->gc_time),
- TimeToSecondsDbl(task->gc_etime));
- }
- }
+ statsPrintf(" TASKS: %d (%d bound, %d peak workers (%d total), using -N%d)\n",
+ taskCount, taskCount - workerCount,
+ peakWorkerCount, workerCount,
+ n_capabilities);
statsPrintf("\n");
diff --git a/rts/Task.c b/rts/Task.c
index 36dd0a94b9..f4a37bf6ff 100644
--- a/rts/Task.c
+++ b/rts/Task.c
@@ -26,7 +26,12 @@
// Task lists and global counters.
// Locks required: all_tasks_mutex.
Task *all_tasks = NULL;
-static nat taskCount;
+
+nat taskCount;
+nat workerCount;
+nat currentWorkerCount;
+nat peakWorkerCount;
+
static int tasksInitialized = 0;
static void freeTask (Task *task);
@@ -64,8 +69,11 @@ void
initTaskManager (void)
{
if (!tasksInitialized) {
- taskCount = 0;
- tasksInitialized = 1;
+ taskCount = 0;
+ workerCount = 0;
+ currentWorkerCount = 0;
+ peakWorkerCount = 0;
+ tasksInitialized = 1;
#if defined(THREADED_RTS)
#if !defined(MYTASK_USE_TLV)
newThreadLocalKey(&currentTaskKey);
@@ -87,7 +95,7 @@ freeTaskManager (void)
ACQUIRE_LOCK(&all_tasks_mutex);
for (task = all_tasks; task != NULL; task = next) {
- next = task->all_link;
+ next = task->all_next;
if (task->stopped) {
freeTask(task);
} else {
@@ -164,9 +172,6 @@ freeTask (Task *task)
static Task*
newTask (rtsBool worker)
{
-#if defined(THREADED_RTS)
- Time currentElapsedTime, currentUserTime;
-#endif
Task *task;
#define ROUND_TO_CACHE_LINE(x) ((((x)+63) / 64) * 64)
@@ -186,26 +191,25 @@ newTask (rtsBool worker)
task->wakeup = rtsFalse;
#endif
-#if defined(THREADED_RTS)
- currentUserTime = getThreadCPUTime();
- currentElapsedTime = getProcessElapsedTime();
- task->mut_time = 0;
- task->mut_etime = 0;
- task->gc_time = 0;
- task->gc_etime = 0;
- task->muttimestart = currentUserTime;
- task->elapsedtimestart = currentElapsedTime;
-#endif
-
task->next = NULL;
ACQUIRE_LOCK(&all_tasks_mutex);
- task->all_link = all_tasks;
+ task->all_prev = NULL;
+ task->all_next = all_tasks;
+ if (all_tasks != NULL) {
+ all_tasks->all_prev = task;
+ }
all_tasks = task;
taskCount++;
-
+ if (worker) {
+ workerCount++;
+ currentWorkerCount++;
+ if (currentWorkerCount > peakWorkerCount) {
+ peakWorkerCount = currentWorkerCount;
+ }
+ }
RELEASE_LOCK(&all_tasks_mutex);
return task;
@@ -314,14 +318,15 @@ discardTasksExcept (Task *keep)
// Wipe the task list, except the current Task.
ACQUIRE_LOCK(&all_tasks_mutex);
for (task = all_tasks; task != NULL; task=next) {
- next = task->all_link;
+ next = task->all_next;
if (task != keep) {
debugTrace(DEBUG_sched, "discarding task %ld", (long)TASK_ID(task));
freeTask(task);
}
}
all_tasks = keep;
- keep->all_link = NULL;
+ keep->all_next = NULL;
+ keep->all_prev = NULL;
RELEASE_LOCK(&all_tasks_mutex);
}
@@ -337,7 +342,7 @@ void updateCapabilityRefs (void)
ACQUIRE_LOCK(&all_tasks_mutex);
- for (task = all_tasks; task != NULL; task=task->all_link) {
+ for (task = all_tasks; task != NULL; task=task->all_next) {
if (task->cap != NULL) {
task->cap = &capabilities[task->cap->no];
}
@@ -353,34 +358,6 @@ void updateCapabilityRefs (void)
}
-void
-taskTimeStamp (Task *task USED_IF_THREADS)
-{
-#if defined(THREADED_RTS)
- Time currentElapsedTime, currentUserTime;
-
- currentUserTime = getThreadCPUTime();
- currentElapsedTime = getProcessElapsedTime();
-
- task->mut_time =
- currentUserTime - task->muttimestart - task->gc_time;
- task->mut_etime =
- currentElapsedTime - task->elapsedtimestart - task->gc_etime;
-
- if (task->gc_time < 0) { task->gc_time = 0; }
- if (task->gc_etime < 0) { task->gc_etime = 0; }
- if (task->mut_time < 0) { task->mut_time = 0; }
- if (task->mut_etime < 0) { task->mut_etime = 0; }
-#endif
-}
-
-void
-taskDoneGC (Task *task, Time cpu_time, Time elapsed_time)
-{
- task->gc_time += cpu_time;
- task->gc_etime += elapsed_time;
-}
-
#if defined(THREADED_RTS)
void
@@ -391,9 +368,22 @@ workerTaskStop (Task *task)
ASSERT(task->id == id);
ASSERT(myTask() == task);
- task->cap = NULL;
- taskTimeStamp(task);
- task->stopped = rtsTrue;
+ ACQUIRE_LOCK(&all_tasks_mutex);
+
+ if (task->all_prev) {
+ task->all_prev->all_next = task->all_next;
+ } else {
+ all_tasks = task->all_next;
+ }
+ if (task->all_next) {
+ task->all_next->all_prev = task->all_prev;
+ }
+
+ currentWorkerCount--;
+
+ RELEASE_LOCK(&all_tasks_mutex);
+
+ freeTask(task);
}
#endif
@@ -491,7 +481,7 @@ void
printAllTasks(void)
{
Task *task;
- for (task = all_tasks; task != NULL; task = task->all_link) {
+ for (task = all_tasks; task != NULL; task = task->all_next) {
debugBelch("task %p is %s, ", taskId(task), task->stopped ? "stopped" : "alive");
if (!task->stopped) {
if (task->cap) {
diff --git a/rts/Task.h b/rts/Task.h
index 59a316bd81..ab47a07fc3 100644
--- a/rts/Task.h
+++ b/rts/Task.h
@@ -143,25 +143,13 @@ typedef struct Task_ {
// So that we can detect when a finalizer illegally calls back into Haskell
rtsBool running_finalizers;
- // Stats that we collect about this task
- // ToDo: we probably want to put this in a separate TaskStats
- // structure, so we can share it between multiple Tasks. We don't
- // really want separate stats for each call in a nested chain of
- // foreign->haskell->foreign->haskell calls, but we'll get a
- // separate Task for each of the haskell calls.
- Time elapsedtimestart;
- Time muttimestart;
- Time mut_time;
- Time mut_etime;
- Time gc_time;
- Time gc_etime;
-
// Links tasks on the returning_tasks queue of a Capability, and
// on spare_workers.
struct Task_ *next;
// Links tasks on the all_tasks list
- struct Task_ *all_link;
+ struct Task_ *all_next;
+ struct Task_ *all_prev;
} Task;
@@ -201,15 +189,6 @@ void boundTaskExiting (Task *task);
void workerTaskStop (Task *task);
#endif
-// Record the time spent in this Task.
-// This is called by workerTaskStop() but not by boundTaskExiting(),
-// because it would impose an extra overhead on call-in.
-//
-void taskTimeStamp (Task *task);
-
-// The current Task has finished a GC, record the amount of time spent.
-void taskDoneGC (Task *task, Time cpu_time, Time elapsed_time);
-
// Put the task back on the free list, mark it stopped. Used by
// forkProcess().
//
@@ -240,6 +219,11 @@ void interruptWorkerTask (Task *task);
//
void updateCapabilityRefs (void);
+// For stats
+extern nat taskCount;
+extern nat workerCount;
+extern nat peakWorkerCount;
+
// -----------------------------------------------------------------------------
// INLINE functions... private from here on down:
diff --git a/rts/posix/OSThreads.c b/rts/posix/OSThreads.c
index b1be93d04b..c29454809f 100644
--- a/rts/posix/OSThreads.c
+++ b/rts/posix/OSThreads.c
@@ -198,7 +198,6 @@ forkOS_createThreadWrapper ( void * entry )
Capability *cap;
cap = rts_lock();
rts_evalStableIO(&cap, (HsStablePtr) entry, NULL);
- taskTimeStamp(myTask());
rts_unlock(cap);
return NULL;
}
diff --git a/rts/sm/Compact.c b/rts/sm/Compact.c
index 4f71026dfa..987f78b497 100644
--- a/rts/sm/Compact.c
+++ b/rts/sm/Compact.c
@@ -979,7 +979,7 @@ compact(StgClosure *static_objects)
{
Task *task;
InCall *incall;
- for (task = all_tasks; task != NULL; task = task->all_link) {
+ for (task = all_tasks; task != NULL; task = task->all_next) {
for (incall = task->incall; incall != NULL;
incall = incall->prev_stack) {
if (incall->tso) {