summaryrefslogtreecommitdiff
path: root/rts/Capability.c
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2011-11-28 16:48:43 +0000
committerSimon Marlow <marlowsd@gmail.com>2011-11-29 12:21:18 +0000
commit50de6034343abc93a7b01daccff34121042c0e7c (patch)
tree24496a5fc6bc39c6baaa574608e53c5d76c169f6 /rts/Capability.c
parent1c2b838131134d44004dfdff18c302131478390d (diff)
downloadhaskell-50de6034343abc93a7b01daccff34121042c0e7c.tar.gz
Make profiling work with multiple capabilities (+RTS -N)
This means that both time and heap profiling work for parallel programs. Main internal changes: - CCCS is no longer a global variable; it is now another pseudo-register in the StgRegTable struct. Thus every Capability has its own CCCS. - There is a new built-in CCS called "IDLE", which records ticks for Capabilities in the idle state. If you profile a single-threaded program with +RTS -N2, you'll see about 50% of time in "IDLE". - There is appropriate locking in rts/Profiling.c to protect the shared cost-centre-stack data structures. This patch does enough to get it working, I have cut one big corner: the cost-centre-stack data structure is still shared amongst all Capabilities, which means that multiple Capabilities will race when updating the "allocations" and "entries" fields of a CCS. Not only does this give unpredictable results, but it runs very slowly due to cache line bouncing. It is strongly recommended that you use -fno-prof-count-entries to disable the "entries" count when profiling parallel programs. (I shall add a note to this effect to the docs).
Diffstat (limited to 'rts/Capability.c')
-rw-r--r--rts/Capability.c21
1 files changed, 18 insertions, 3 deletions
diff --git a/rts/Capability.c b/rts/Capability.c
index 3b45dec360..fd9f64f147 100644
--- a/rts/Capability.c
+++ b/rts/Capability.c
@@ -46,7 +46,7 @@ volatile StgWord waiting_for_gc = 0;
/* Let foreign code get the current Capability -- assuming there is one!
* This is useful for unsafe foreign calls because they are called with
* the current Capability held, but they are not passed it. For example,
- * see see the integer-gmp package which calls allocateLocal() in its
+ * see see the integer-gmp package which calls allocate() in its
* stgAllocForGMP() function (which gets called by gmp functions).
* */
Capability * rts_unsafeGetMyCapability (void)
@@ -265,6 +265,10 @@ initCapability( Capability *cap, nat i )
cap->context_switch = 0;
cap->pinned_object_block = NULL;
+#ifdef PROFILING
+ cap->r.rCCCS = CCS_SYSTEM;
+#endif
+
traceCapsetAssignCap(CAPSET_OSPROCESS_DEFAULT, i);
traceCapsetAssignCap(CAPSET_CLOCKDOMAIN_DEFAULT, i);
#if defined(THREADED_RTS)
@@ -453,6 +457,9 @@ releaseCapability_ (Capability* cap,
}
}
+#ifdef PROFILING
+ cap->r.rCCCS = CCS_IDLE;
+#endif
last_free_capability = cap;
debugTrace(DEBUG_sched, "freeing capability %d", cap->no);
}
@@ -604,6 +611,9 @@ waitForReturnCapability (Capability **pCap, Task *task)
}
+#ifdef PROFILING
+ cap->r.rCCCS = CCS_SYSTEM;
+#endif
ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
debugTrace(DEBUG_sched, "resuming capability %d", cap->no);
@@ -676,14 +686,19 @@ yieldCapability (Capability** pCap, Task *task)
task->next = NULL;
cap->n_spare_workers--;
}
- cap->running_task = task;
+
+ cap->running_task = task;
RELEASE_LOCK(&cap->lock);
break;
}
- debugTrace(DEBUG_sched, "resuming capability %d", cap->no);
+ debugTrace(DEBUG_sched, "resuming capability %d", cap->no);
ASSERT(cap->running_task == task);
+#ifdef PROFILING
+ cap->r.rCCCS = CCS_SYSTEM;
+#endif
+
*pCap = cap;
ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);