summaryrefslogtreecommitdiff
path: root/includes
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2011-11-28 16:48:43 +0000
committerSimon Marlow <marlowsd@gmail.com>2011-11-29 12:21:18 +0000
commit50de6034343abc93a7b01daccff34121042c0e7c (patch)
tree24496a5fc6bc39c6baaa574608e53c5d76c169f6 /includes
parent1c2b838131134d44004dfdff18c302131478390d (diff)
downloadhaskell-50de6034343abc93a7b01daccff34121042c0e7c.tar.gz
Make profiling work with multiple capabilities (+RTS -N)
This means that both time and heap profiling work for parallel programs. Main internal changes: - CCCS is no longer a global variable; it is now another pseudo-register in the StgRegTable struct. Thus every Capability has its own CCCS. - There is a new built-in CCS called "IDLE", which records ticks for Capabilities in the idle state. If you profile a single-threaded program with +RTS -N2, you'll see about 50% of time in "IDLE". - There is appropriate locking in rts/Profiling.c to protect the shared cost-centre-stack data structures. This patch does enough to get it working, I have cut one big corner: the cost-centre-stack data structure is still shared amongst all Capabilities, which means that multiple Capabilities will race when updating the "allocations" and "entries" fields of a CCS. Not only does this give unpredictable results, but it runs very slowly due to cache line bouncing. It is strongly recommended that you use -fno-prof-count-entries to disable the "entries" count when profiling parallel programs. (I shall add a note to this effect to the docs).
Diffstat (limited to 'includes')
-rw-r--r--includes/Cmm.h2
-rw-r--r--includes/RtsAPI.h9
-rw-r--r--includes/mkDerivedConstants.c1
-rw-r--r--includes/rts/prof/CCS.h7
-rw-r--r--includes/stg/MiscClosures.h1
-rw-r--r--includes/stg/Regs.h1
6 files changed, 16 insertions, 5 deletions
diff --git a/includes/Cmm.h b/includes/Cmm.h
index 641faa216e..11c02b4e3e 100644
--- a/includes/Cmm.h
+++ b/includes/Cmm.h
@@ -372,7 +372,7 @@
CCCS_ALLOC(bytes);
/* CCS_ALLOC wants the size in words, because ccs->mem_alloc is in words */
-#define CCCS_ALLOC(__alloc) CCS_ALLOC(BYTES_TO_WDS(__alloc), W_[CCCS])
+#define CCCS_ALLOC(__alloc) CCS_ALLOC(BYTES_TO_WDS(__alloc), CCCS)
#define HP_CHK_GEN_TICKY(alloc,liveness,reentry) \
HP_CHK_GEN(alloc,liveness,reentry); \
diff --git a/includes/RtsAPI.h b/includes/RtsAPI.h
index 329b1569ab..8d948f9b49 100644
--- a/includes/RtsAPI.h
+++ b/includes/RtsAPI.h
@@ -37,6 +37,15 @@ typedef struct StgClosure_ *HaskellObj;
*/
typedef struct Capability_ Capability;
+/*
+ * The public view of a Capability: we can be sure it starts with
+ * these two components (but it may have more private fields).
+ */
+typedef struct CapabilityPublic_ {
+ StgFunTable f;
+ StgRegTable r;
+} CapabilityPublic;
+
/* ----------------------------------------------------------------------------
RTS configuration settings, for passing to hs_init_ghc()
------------------------------------------------------------------------- */
diff --git a/includes/mkDerivedConstants.c b/includes/mkDerivedConstants.c
index b02b6c86f0..a2c9160e95 100644
--- a/includes/mkDerivedConstants.c
+++ b/includes/mkDerivedConstants.c
@@ -222,6 +222,7 @@ main(int argc, char *argv[])
field_offset(StgRegTable, rSpLim);
field_offset(StgRegTable, rHp);
field_offset(StgRegTable, rHpLim);
+ field_offset(StgRegTable, rCCCS);
field_offset(StgRegTable, rCurrentTSO);
field_offset(StgRegTable, rCurrentNursery);
field_offset(StgRegTable, rHpAlloc);
diff --git a/includes/rts/prof/CCS.h b/includes/rts/prof/CCS.h
index 4692d166b5..9737fc9c18 100644
--- a/includes/rts/prof/CCS.h
+++ b/includes/rts/prof/CCS.h
@@ -114,8 +114,6 @@ typedef struct _IndexTable {
Pre-defined cost centres and cost centre stacks
-------------------------------------------------------------------------- */
-extern CostCentreStack * RTS_VAR(CCCS); /* current CCS */
-
#if IN_STG_CODE
extern StgWord CC_MAIN[];
@@ -153,6 +151,9 @@ extern CostCentreStack CCS_DONT_CARE[]; // shouldn't ever get set
extern CostCentre CC_PINNED[];
extern CostCentreStack CCS_PINNED[]; // pinned memory
+extern CostCentre CC_IDLE[];
+extern CostCentreStack CCS_IDLE[]; // capability is idle
+
#endif /* IN_STG_CODE */
extern unsigned int RTS_VAR(CC_ID); // global ids
@@ -165,7 +166,7 @@ extern unsigned int RTS_VAR(era);
* ---------------------------------------------------------------------------*/
CostCentreStack * pushCostCentre (CostCentreStack *, CostCentre *);
-void enterFunCCS (CostCentreStack *);
+void enterFunCCS (StgRegTable *reg, CostCentreStack *);
/* -----------------------------------------------------------------------------
Registering CCs and CCSs
diff --git a/includes/stg/MiscClosures.h b/includes/stg/MiscClosures.h
index 45dc8369c7..fcfdede2ff 100644
--- a/includes/stg/MiscClosures.h
+++ b/includes/stg/MiscClosures.h
@@ -488,7 +488,6 @@ extern StgWord RTS_VAR(stable_ptr_table);
// Profiling.c
extern unsigned int RTS_VAR(era);
-extern StgWord RTS_VAR(CCCS); /* current CCS */
extern unsigned int RTS_VAR(entering_PAP);
extern StgWord RTS_VAR(CC_LIST); /* registered CC list */
extern StgWord RTS_VAR(CCS_LIST); /* registered CCS list */
diff --git a/includes/stg/Regs.h b/includes/stg/Regs.h
index 1d0c00c491..b7f0abda7d 100644
--- a/includes/stg/Regs.h
+++ b/includes/stg/Regs.h
@@ -80,6 +80,7 @@ typedef struct StgRegTable_ {
StgPtr rSpLim;
StgPtr rHp;
StgPtr rHpLim;
+ struct _CostCentreStack * rCCCS; // current cost-centre-stack
struct StgTSO_ * rCurrentTSO;
struct nursery_ * rNursery;
struct bdescr_ * rCurrentNursery; /* Hp/HpLim point into this block */