diff options
author | Simon Marlow <marlowsd@gmail.com> | 2011-11-28 16:48:43 +0000 |
---|---|---|
committer | Simon Marlow <marlowsd@gmail.com> | 2011-11-29 12:21:18 +0000 |
commit | 50de6034343abc93a7b01daccff34121042c0e7c (patch) | |
tree | 24496a5fc6bc39c6baaa574608e53c5d76c169f6 /includes | |
parent | 1c2b838131134d44004dfdff18c302131478390d (diff) | |
download | haskell-50de6034343abc93a7b01daccff34121042c0e7c.tar.gz |
Make profiling work with multiple capabilities (+RTS -N)
This means that both time and heap profiling work for parallel
programs. Main internal changes:
- CCCS is no longer a global variable; it is now another
pseudo-register in the StgRegTable struct. Thus every
Capability has its own CCCS.
- There is a new built-in CCS called "IDLE", which records ticks for
Capabilities in the idle state. If you profile a single-threaded
program with +RTS -N2, you'll see about 50% of time in "IDLE".
- There is appropriate locking in rts/Profiling.c to protect the
shared cost-centre-stack data structures.
This patch does enough to get it working, I have cut one big corner:
the cost-centre-stack data structure is still shared amongst all
Capabilities, which means that multiple Capabilities will race when
updating the "allocations" and "entries" fields of a CCS. Not only
does this give unpredictable results, but it runs very slowly due to
cache line bouncing.
It is strongly recommended that you use -fno-prof-count-entries to
disable the "entries" count when profiling parallel programs. (I shall
add a note to this effect to the docs).
Diffstat (limited to 'includes')
-rw-r--r-- | includes/Cmm.h | 2 | ||||
-rw-r--r-- | includes/RtsAPI.h | 9 | ||||
-rw-r--r-- | includes/mkDerivedConstants.c | 1 | ||||
-rw-r--r-- | includes/rts/prof/CCS.h | 7 | ||||
-rw-r--r-- | includes/stg/MiscClosures.h | 1 | ||||
-rw-r--r-- | includes/stg/Regs.h | 1 |
6 files changed, 16 insertions, 5 deletions
diff --git a/includes/Cmm.h b/includes/Cmm.h index 641faa216e..11c02b4e3e 100644 --- a/includes/Cmm.h +++ b/includes/Cmm.h @@ -372,7 +372,7 @@ CCCS_ALLOC(bytes); /* CCS_ALLOC wants the size in words, because ccs->mem_alloc is in words */ -#define CCCS_ALLOC(__alloc) CCS_ALLOC(BYTES_TO_WDS(__alloc), W_[CCCS]) +#define CCCS_ALLOC(__alloc) CCS_ALLOC(BYTES_TO_WDS(__alloc), CCCS) #define HP_CHK_GEN_TICKY(alloc,liveness,reentry) \ HP_CHK_GEN(alloc,liveness,reentry); \ diff --git a/includes/RtsAPI.h b/includes/RtsAPI.h index 329b1569ab..8d948f9b49 100644 --- a/includes/RtsAPI.h +++ b/includes/RtsAPI.h @@ -37,6 +37,15 @@ typedef struct StgClosure_ *HaskellObj; */ typedef struct Capability_ Capability; +/* + * The public view of a Capability: we can be sure it starts with + * these two components (but it may have more private fields). + */ +typedef struct CapabilityPublic_ { + StgFunTable f; + StgRegTable r; +} CapabilityPublic; + /* ---------------------------------------------------------------------------- RTS configuration settings, for passing to hs_init_ghc() ------------------------------------------------------------------------- */ diff --git a/includes/mkDerivedConstants.c b/includes/mkDerivedConstants.c index b02b6c86f0..a2c9160e95 100644 --- a/includes/mkDerivedConstants.c +++ b/includes/mkDerivedConstants.c @@ -222,6 +222,7 @@ main(int argc, char *argv[]) field_offset(StgRegTable, rSpLim); field_offset(StgRegTable, rHp); field_offset(StgRegTable, rHpLim); + field_offset(StgRegTable, rCCCS); field_offset(StgRegTable, rCurrentTSO); field_offset(StgRegTable, rCurrentNursery); field_offset(StgRegTable, rHpAlloc); diff --git a/includes/rts/prof/CCS.h b/includes/rts/prof/CCS.h index 4692d166b5..9737fc9c18 100644 --- a/includes/rts/prof/CCS.h +++ b/includes/rts/prof/CCS.h @@ -114,8 +114,6 @@ typedef struct _IndexTable { Pre-defined cost centres and cost centre stacks -------------------------------------------------------------------------- */ -extern CostCentreStack * RTS_VAR(CCCS); /* current CCS */ - #if IN_STG_CODE extern StgWord CC_MAIN[]; @@ -153,6 +151,9 @@ extern CostCentreStack CCS_DONT_CARE[]; // shouldn't ever get set extern CostCentre CC_PINNED[]; extern CostCentreStack CCS_PINNED[]; // pinned memory +extern CostCentre CC_IDLE[]; +extern CostCentreStack CCS_IDLE[]; // capability is idle + #endif /* IN_STG_CODE */ extern unsigned int RTS_VAR(CC_ID); // global ids @@ -165,7 +166,7 @@ extern unsigned int RTS_VAR(era); * ---------------------------------------------------------------------------*/ CostCentreStack * pushCostCentre (CostCentreStack *, CostCentre *); -void enterFunCCS (CostCentreStack *); +void enterFunCCS (StgRegTable *reg, CostCentreStack *); /* ----------------------------------------------------------------------------- Registering CCs and CCSs diff --git a/includes/stg/MiscClosures.h b/includes/stg/MiscClosures.h index 45dc8369c7..fcfdede2ff 100644 --- a/includes/stg/MiscClosures.h +++ b/includes/stg/MiscClosures.h @@ -488,7 +488,6 @@ extern StgWord RTS_VAR(stable_ptr_table); // Profiling.c extern unsigned int RTS_VAR(era); -extern StgWord RTS_VAR(CCCS); /* current CCS */ extern unsigned int RTS_VAR(entering_PAP); extern StgWord RTS_VAR(CC_LIST); /* registered CC list */ extern StgWord RTS_VAR(CCS_LIST); /* registered CCS list */ diff --git a/includes/stg/Regs.h b/includes/stg/Regs.h index 1d0c00c491..b7f0abda7d 100644 --- a/includes/stg/Regs.h +++ b/includes/stg/Regs.h @@ -80,6 +80,7 @@ typedef struct StgRegTable_ { StgPtr rSpLim; StgPtr rHp; StgPtr rHpLim; + struct _CostCentreStack * rCCCS; // current cost-centre-stack struct StgTSO_ * rCurrentTSO; struct nursery_ * rNursery; struct bdescr_ * rCurrentNursery; /* Hp/HpLim point into this block */ |