summaryrefslogtreecommitdiff
path: root/rts
diff options
context:
space:
mode:
Diffstat (limited to 'rts')
-rw-r--r--rts/Adjustor.c3
-rw-r--r--rts/Apply.cmm7
-rw-r--r--rts/Capability.c8
-rw-r--r--rts/Capability.h3
-rw-r--r--rts/CheckUnload.c8
-rw-r--r--rts/ClosureFlags.c12
-rw-r--r--rts/Compact.cmm47
-rw-r--r--rts/Disassembler.c53
-rw-r--r--rts/Exception.cmm10
-rw-r--r--rts/GetTime.h1
-rw-r--r--rts/Globals.c2
-rw-r--r--rts/Hash.c23
-rw-r--r--rts/Hash.h2
-rw-r--r--rts/Heap.c225
-rw-r--r--rts/Hpc.c5
-rw-r--r--rts/HsFFI.c20
-rw-r--r--rts/Interpreter.c97
-rw-r--r--rts/LdvProfile.c13
-rw-r--r--rts/Libdw.h3
-rw-r--r--rts/Linker.c111
-rw-r--r--rts/LinkerInternals.h45
-rw-r--r--rts/Messages.c4
-rw-r--r--rts/Messages.h4
-rw-r--r--rts/PathUtils.c5
-rw-r--r--rts/PathUtils.h2
-rw-r--r--rts/PosixSource.h13
-rw-r--r--rts/Prelude.h2
-rw-r--r--rts/PrimOps.cmm399
-rw-r--r--rts/Printer.c122
-rw-r--r--rts/Printer.h1
-rw-r--r--rts/ProfHeap.c18
-rw-r--r--rts/Profiling.c24
-rw-r--r--rts/RaiseAsync.c11
-rw-r--r--rts/RetainerProfile.c258
-rw-r--r--rts/RetainerProfile.h2
-rw-r--r--rts/RetainerSet.c141
-rw-r--r--rts/RetainerSet.h25
-rw-r--r--rts/RtsAPI.c4
-rw-r--r--rts/RtsFlags.c304
-rw-r--r--rts/RtsFlags.h5
-rw-r--r--rts/RtsMain.c24
-rw-r--r--rts/RtsMessages.c47
-rw-r--r--rts/RtsProbes.d7
-rw-r--r--rts/RtsSignals.h8
-rw-r--r--rts/RtsStartup.c47
-rw-r--r--rts/RtsSymbolInfo.h2
-rw-r--r--rts/RtsSymbols.c44
-rw-r--r--rts/RtsUtils.c6
-rw-r--r--rts/RtsUtils.h5
-rw-r--r--rts/SMPClosureOps.h21
-rw-r--r--rts/STM.c383
-rw-r--r--rts/STM.h13
-rw-r--r--rts/Schedule.c89
-rw-r--r--rts/Schedule.h2
-rw-r--r--rts/Sparks.c18
-rw-r--r--rts/StableName.c349
-rw-r--r--rts/StableName.h31
-rw-r--r--rts/StablePtr.c (renamed from rts/Stable.c)339
-rw-r--r--rts/StablePtr.h (renamed from rts/Stable.h)27
-rw-r--r--rts/StaticPtrTable.c17
-rw-r--r--rts/Stats.c980
-rw-r--r--rts/Stats.h52
-rw-r--r--rts/StgCRun.c251
-rw-r--r--rts/StgMiscClosures.cmm174
-rw-r--r--rts/StgStartup.cmm24
-rw-r--r--rts/Task.c29
-rw-r--r--rts/ThreadPaused.c13
-rw-r--r--rts/ThreadPaused.h8
-rw-r--r--rts/Threads.c34
-rw-r--r--rts/TopHandler.c2
-rw-r--r--rts/TopHandler.h1
-rw-r--r--rts/Trace.c18
-rw-r--r--rts/Trace.h9
-rw-r--r--rts/Updates.cmm4
-rw-r--r--rts/Weak.c124
-rw-r--r--rts/Weak.h1
-rw-r--r--rts/eventlog/EventLog.c140
-rw-r--r--rts/eventlog/EventLog.h3
-rw-r--r--rts/eventlog/EventLogWriter.c3
-rw-r--r--rts/fs_rts.h15
-rw-r--r--rts/ghc.mk112
-rw-r--r--rts/hooks/Hooks.h2
-rw-r--r--rts/hooks/LongGCSync.c41
-rw-r--r--rts/linker/Elf.c51
-rw-r--r--rts/linker/LoadArchive.c12
-rw-r--r--rts/linker/PEi386.c1367
-rw-r--r--rts/linker/PEi386.h43
-rw-r--r--rts/linker/PEi386Types.h35
-rw-r--r--rts/linker/SymbolExtras.c8
-rw-r--r--rts/linker/elf_got.c3
-rw-r--r--rts/linker/elf_reloc_aarch64.c2
-rw-r--r--rts/linker/elf_util.c4
-rw-r--r--rts/package.conf.in12
-rw-r--r--rts/posix/OSMem.c169
-rw-r--r--rts/posix/OSThreads.c3
-rw-r--r--rts/posix/Select.c7
-rw-r--r--rts/posix/Signals.c39
-rw-r--r--rts/posix/itimer/Pthread.c25
-rw-r--r--rts/rts.cabal.in477
-rw-r--r--rts/sm/BlockAlloc.c15
-rw-r--r--rts/sm/CNF.c16
-rw-r--r--rts/sm/Compact.c26
-rw-r--r--rts/sm/Compact.h12
-rw-r--r--rts/sm/Evac.c130
-rw-r--r--rts/sm/GC.c232
-rw-r--r--rts/sm/GC.h10
-rw-r--r--rts/sm/GCThread.h6
-rw-r--r--rts/sm/GCUtils.c31
-rw-r--r--rts/sm/GCUtils.h6
-rw-r--r--rts/sm/MarkWeak.c17
-rw-r--r--rts/sm/OSMem.h1
-rw-r--r--rts/sm/Sanity.c23
-rw-r--r--rts/sm/Scav.c217
-rw-r--r--rts/sm/Storage.c126
-rw-r--r--rts/win32/ConsoleHandler.c26
-rw-r--r--rts/win32/GetTime.c5
-rw-r--r--rts/win32/IOManager.c24
-rw-r--r--rts/win32/OSMem.c30
-rw-r--r--rts/win32/OSThreads.c6
-rw-r--r--rts/win32/libHSbase.def1
-rw-r--r--rts/win32/veh_excn.c259
-rw-r--r--rts/win32/veh_excn.h5
-rw-r--r--rts/xxhash.c888
-rw-r--r--rts/xxhash.h293
124 files changed, 6964 insertions, 3259 deletions
diff --git a/rts/Adjustor.c b/rts/Adjustor.c
index f2b4355831..476d63140e 100644
--- a/rts/Adjustor.c
+++ b/rts/Adjustor.c
@@ -40,7 +40,7 @@ Haskell side.
#include "Rts.h"
#include "RtsUtils.h"
-#include "Stable.h"
+#include "StablePtr.h"
#if defined(USE_LIBFFI_FOR_ADJUSTORS)
#include "ffi.h"
@@ -357,6 +357,7 @@ static int totalArgumentSize(char *typeString)
sz += 2;
break;
}
+ /* fall through */
// everything else is one word.
default:
sz += 1;
diff --git a/rts/Apply.cmm b/rts/Apply.cmm
index ffcd0352a1..40f890d342 100644
--- a/rts/Apply.cmm
+++ b/rts/Apply.cmm
@@ -537,7 +537,7 @@ now have,
│ etc. │
Now consider what happens when TSO 1 runs its duplicate-computation check.
-Again, we start walking the stack from the top, where we we find the update
+Again, we start walking the stack from the top, where we find the update
frame updating THUNK A. We will lock this thunk, replacing it with a BLACKHOLE
owned by its TSO. We now have,
@@ -679,6 +679,11 @@ for:
R1 = StgAP_STACK_fun(ap);
+ // Because of eager blackholing the closure no longer has correct size so
+ // threadPaused() can't correctly zero the slop, so we do it here. See #15571
+ // and Note [zeroing slop].
+ OVERWRITING_CLOSURE_SIZE(ap, BYTES_TO_WDS(SIZEOF_StgThunkHeader) + 2 + Words);
+
ENTER_R1();
}
diff --git a/rts/Capability.c b/rts/Capability.c
index 1023be87ec..74f7a295e9 100644
--- a/rts/Capability.c
+++ b/rts/Capability.c
@@ -10,7 +10,7 @@
* STG execution, a pointer to the capabilitity is kept in a
* register (BaseReg; actually it is a pointer to cap->r).
*
- * Only in an THREADED_RTS build will there be multiple capabilities,
+ * Only in a THREADED_RTS build will there be multiple capabilities,
* for non-threaded builds there is only one global capability, namely
* MainCapability.
*
@@ -298,7 +298,6 @@ initCapability (Capability *cap, uint32_t i)
cap->weak_ptr_list_hd = NULL;
cap->weak_ptr_list_tl = NULL;
cap->free_tvar_watch_queues = END_STM_WATCH_QUEUE;
- cap->free_invariant_check_queues = END_INVARIANT_CHECK_QUEUE;
cap->free_trec_chunks = END_STM_CHUNK_LIST;
cap->free_trec_headers = NO_TREC;
cap->transaction_tokens = 0;
@@ -362,7 +361,7 @@ void initCapabilities (void)
}
n_numa_nodes = logical;
if (logical == 0) {
- barf("%s: available NUMA node set is empty");
+ barf("available NUMA node set is empty");
}
}
@@ -498,6 +497,9 @@ giveCapabilityToTask (Capability *cap USED_IF_DEBUG, Task *task)
*
* The current Task (cap->task) releases the Capability. The Capability is
* marked free, and if there is any work to do, an appropriate Task is woken up.
+ *
+ * N.B. May need to take all_tasks_mutex.
+ *
* ------------------------------------------------------------------------- */
#if defined(THREADED_RTS)
diff --git a/rts/Capability.h b/rts/Capability.h
index 5ab693e516..250ec2219c 100644
--- a/rts/Capability.h
+++ b/rts/Capability.h
@@ -109,7 +109,7 @@ struct Capability_ {
int interrupt;
// Total words allocated by this cap since rts start
- // See [Note allocation accounting] in Storage.c
+ // See Note [allocation accounting] in Storage.c
W_ total_allocated;
#if defined(THREADED_RTS)
@@ -154,7 +154,6 @@ struct Capability_ {
// Per-capability STM-related data
StgTVarWatchQueue *free_tvar_watch_queues;
- StgInvariantCheckQueue *free_invariant_check_queues;
StgTRecChunk *free_trec_chunks;
StgTRecHeader *free_trec_headers;
uint32_t transaction_tokens;
diff --git a/rts/CheckUnload.c b/rts/CheckUnload.c
index c3958e938e..fa4843d8e4 100644
--- a/rts/CheckUnload.c
+++ b/rts/CheckUnload.c
@@ -200,16 +200,16 @@ static void searchHeapBlocks (HashTable *addrs, bdescr *bd)
case MUT_ARR_PTRS_CLEAN:
case MUT_ARR_PTRS_DIRTY:
- case MUT_ARR_PTRS_FROZEN:
- case MUT_ARR_PTRS_FROZEN0:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
prim = true;
size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)p);
break;
case SMALL_MUT_ARR_PTRS_CLEAN:
case SMALL_MUT_ARR_PTRS_DIRTY:
- case SMALL_MUT_ARR_PTRS_FROZEN:
- case SMALL_MUT_ARR_PTRS_FROZEN0:
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
prim = true;
size = small_mut_arr_ptrs_sizeW((StgSmallMutArrPtrs *)p);
break;
diff --git a/rts/ClosureFlags.c b/rts/ClosureFlags.c
index 9bf301552d..f8dba8f326 100644
--- a/rts/ClosureFlags.c
+++ b/rts/ClosureFlags.c
@@ -66,8 +66,8 @@ StgWord16 closure_flags[] = {
[ARR_WORDS] = (_HNF| _NS| _UPT ),
[MUT_ARR_PTRS_CLEAN] = (_HNF| _NS| _MUT|_UPT ),
[MUT_ARR_PTRS_DIRTY] = (_HNF| _NS| _MUT|_UPT ),
- [MUT_ARR_PTRS_FROZEN0] = (_HNF| _NS| _MUT|_UPT ),
- [MUT_ARR_PTRS_FROZEN] = (_HNF| _NS| _UPT ),
+ [MUT_ARR_PTRS_FROZEN_DIRTY] = (_HNF| _NS| _MUT|_UPT ),
+ [MUT_ARR_PTRS_FROZEN_CLEAN] = (_HNF| _NS| _UPT ),
[MUT_VAR_CLEAN] = (_HNF| _NS| _MUT|_UPT ),
[MUT_VAR_DIRTY] = (_HNF| _NS| _MUT|_UPT ),
[WEAK] = (_HNF| _NS| _UPT ),
@@ -80,10 +80,10 @@ StgWord16 closure_flags[] = {
[CATCH_RETRY_FRAME] = ( _BTM ),
[CATCH_STM_FRAME] = ( _BTM ),
[WHITEHOLE] = ( 0 ),
- [SMALL_MUT_ARR_PTRS_CLEAN] = (_HNF| _NS| _MUT|_UPT ),
- [SMALL_MUT_ARR_PTRS_DIRTY] = (_HNF| _NS| _MUT|_UPT ),
- [SMALL_MUT_ARR_PTRS_FROZEN0] = (_HNF| _NS| _MUT|_UPT ),
- [SMALL_MUT_ARR_PTRS_FROZEN] = (_HNF| _NS| _UPT ),
+ [SMALL_MUT_ARR_PTRS_CLEAN] = (_HNF| _NS| _MUT|_UPT ),
+ [SMALL_MUT_ARR_PTRS_DIRTY] = (_HNF| _NS| _MUT|_UPT ),
+ [SMALL_MUT_ARR_PTRS_FROZEN_DIRTY] = (_HNF| _NS| _MUT|_UPT ),
+ [SMALL_MUT_ARR_PTRS_FROZEN_CLEAN] = (_HNF| _NS| _UPT ),
[COMPACT_NFDATA] = (_HNF| _NS ),
};
diff --git a/rts/Compact.cmm b/rts/Compact.cmm
index f20fdbf8bf..061646846d 100644
--- a/rts/Compact.cmm
+++ b/rts/Compact.cmm
@@ -24,7 +24,7 @@ import CLOSURE base_GHCziIOziException_cannotCompactPinned_closure;
hp = StgCompactNFData_hp(compact); \
if (hp + WDS(sizeW) <= StgCompactNFData_hpLim(compact)) { \
to = hp; \
- StgCompactNFData_hp(compact) = hp + WDS(sizeW); \
+ StgCompactNFData_hp(compact) = hp + WDS(sizeW); \
} else { \
("ptr" to) = ccall allocateForCompact( \
MyCapability() "ptr", compact "ptr", sizeW); \
@@ -154,8 +154,8 @@ eval:
}
case
- MUT_ARR_PTRS_FROZEN0,
- MUT_ARR_PTRS_FROZEN: {
+ MUT_ARR_PTRS_FROZEN_DIRTY,
+ MUT_ARR_PTRS_FROZEN_CLEAN: {
(should) = ccall shouldCompact(compact "ptr", p "ptr");
if (should == SHOULDCOMPACT_IN_CNF) { P_[pp] = p; return(); }
@@ -174,7 +174,7 @@ eval:
prim %memcpy(to + cards, p + cards , size - cards, 1);
i = 0;
loop0:
- if (i < ptrs) {
+ if (i < ptrs) ( likely: True ) {
W_ q;
q = to + SIZEOF_StgMutArrPtrs + WDS(i);
call stg_compactAddWorkerzh(
@@ -186,11 +186,31 @@ eval:
}
case
- SMALL_MUT_ARR_PTRS_FROZEN0,
- SMALL_MUT_ARR_PTRS_FROZEN: {
- // (P_ to) = allocateForCompact(cap, compact, size);
- // use prim memcpy
- ccall barf("stg_compactAddWorkerzh: TODO: SMALL_MUT_ARR_PTRS");
+ SMALL_MUT_ARR_PTRS_FROZEN_DIRTY,
+ SMALL_MUT_ARR_PTRS_FROZEN_CLEAN: {
+
+ (should) = ccall shouldCompact(compact "ptr", p "ptr");
+ if (should == SHOULDCOMPACT_IN_CNF) { P_[pp] = p; return(); }
+
+ CHECK_HASH();
+
+ W_ i, ptrs;
+ ptrs = StgSmallMutArrPtrs_ptrs(p);
+ ALLOCATE(compact, BYTES_TO_WDS(SIZEOF_StgSmallMutArrPtrs) + ptrs, p, to, tag);
+ P_[pp] = tag | to;
+ SET_HDR(to, StgHeader_info(p), StgHeader_ccs(p));
+ StgSmallMutArrPtrs_ptrs(to) = ptrs;
+ i = 0;
+ loop1:
+ if (i < ptrs) ( likely: True ) {
+ W_ q;
+ q = to + SIZEOF_StgSmallMutArrPtrs + WDS(i);
+ call stg_compactAddWorkerzh(
+ compact, P_[p + SIZEOF_StgSmallMutArrPtrs + WDS(i)], q);
+ i = i + 1;
+ goto loop1;
+ }
+ return();
}
// Everything else we should copy and evaluate the components:
@@ -220,16 +240,16 @@ eval:
// First, copy the non-pointers
if (nptrs > 0) {
i = ptrs;
- loop1:
+ loop2:
StgClosure_payload(to,i) = StgClosure_payload(p,i);
i = i + 1;
- if (i < ptrs + nptrs) goto loop1;
+ if (i < ptrs + nptrs) ( likely: True ) goto loop2;
}
// Next, recursively compact and copy the pointers
if (ptrs == 0) { return(); }
i = 0;
- loop2:
+ loop3:
W_ q;
q = to + SIZEOF_StgHeader + OFFSET_StgClosure_payload + WDS(i);
// Tail-call the last one. This means we don't build up a deep
@@ -239,7 +259,7 @@ eval:
}
call stg_compactAddWorkerzh(compact, StgClosure_payload(p,i), q);
i = i + 1;
- goto loop2;
+ goto loop3;
}
// these might be static closures that we can avoid copying into
@@ -435,4 +455,3 @@ stg_compactFixupPointerszh ( W_ first_block, W_ root )
gcstr = str;
return (gcstr, ok);
}
-
diff --git a/rts/Disassembler.c b/rts/Disassembler.c
index e133e3a6ff..01d6c3b1d9 100644
--- a/rts/Disassembler.c
+++ b/rts/Disassembler.c
@@ -68,7 +68,11 @@ disInstr ( StgBCO *bco, int pc )
case bci_BRK_FUN:
debugBelch ("BRK_FUN " ); printPtr( ptrs[instrs[pc]] );
debugBelch (" %d ", instrs[pc+1]); printPtr( ptrs[instrs[pc+2]] );
- debugBelch(" %s\n", ((CostCentre*)(literals[instrs[pc+3]]))->label);
+ CostCentre* cc = (CostCentre*)literals[instrs[pc+3]];
+ if (cc) {
+ debugBelch(" %s", cc->label);
+ }
+ debugBelch("\n");
pc += 4;
break;
case bci_SWIZZLE:
@@ -94,11 +98,28 @@ disInstr ( StgBCO *bco, int pc )
debugBelch("PUSH_LLL %d %d %d\n", instrs[pc], instrs[pc+1],
instrs[pc+2] );
pc += 3; break;
+ case bci_PUSH8:
+ debugBelch("PUSH8 %d\n", instrs[pc] );
+ pc += 1; break;
+ case bci_PUSH16:
+ debugBelch("PUSH16 %d\n", instrs[pc] );
+ pc += 1; break;
+ case bci_PUSH32:
+ debugBelch("PUSH32 %d\n", instrs[pc] );
+ pc += 1; break;
+ case bci_PUSH8_W:
+ debugBelch("PUSH8_W %d\n", instrs[pc] );
+ pc += 1; break;
+ case bci_PUSH16_W:
+ debugBelch("PUSH16_W %d\n", instrs[pc] );
+ pc += 1; break;
+ case bci_PUSH32_W:
+ debugBelch("PUSH32_W %d\n", instrs[pc] );
+ pc += 1; break;
case bci_PUSH_G:
debugBelch("PUSH_G " ); printPtr( ptrs[instrs[pc]] );
debugBelch("\n" );
pc += 1; break;
-
case bci_PUSH_ALTS:
debugBelch("PUSH_ALTS " ); printPtr( ptrs[instrs[pc]] );
debugBelch("\n");
@@ -127,7 +148,33 @@ disInstr ( StgBCO *bco, int pc )
debugBelch("PUSH_ALTS_V " ); printPtr( ptrs[instrs[pc]] );
debugBelch("\n");
pc += 1; break;
-
+ case bci_PUSH_PAD8:
+ debugBelch("PUSH_PAD8\n");
+ pc += 1; break;
+ case bci_PUSH_PAD16:
+ debugBelch("PUSH_PAD16\n");
+ pc += 1; break;
+ case bci_PUSH_PAD32:
+ debugBelch("PUSH_PAD32\n");
+ pc += 1; break;
+ case bci_PUSH_UBX8:
+ debugBelch(
+ "PUSH_UBX8 0x%" FMT_Word8 " ",
+ (StgWord8) literals[instrs[pc]] );
+ debugBelch("\n");
+ pc += 1; break;
+ case bci_PUSH_UBX16:
+ debugBelch(
+ "PUSH_UBX16 0x%" FMT_Word16 " ",
+ (StgWord16) literals[instrs[pc]] );
+ debugBelch("\n");
+ pc += 1; break;
+ case bci_PUSH_UBX32:
+ debugBelch(
+ "PUSH_UBX32 0x%" FMT_Word32 " ",
+ (StgWord32) literals[instrs[pc]] );
+ debugBelch("\n");
+ pc += 1; break;
case bci_PUSH_UBX:
debugBelch("PUSH_UBX ");
for (i = 0; i < instrs[pc+1]; i++)
diff --git a/rts/Exception.cmm b/rts/Exception.cmm
index a27227d547..8ea94b19f2 100644
--- a/rts/Exception.cmm
+++ b/rts/Exception.cmm
@@ -110,6 +110,7 @@ INFO_TABLE_RET(stg_unmaskAsyncExceptionszh_ret, RET_SMALL, W_ info_ptr)
INFO_TABLE_RET(stg_maskAsyncExceptionszh_ret, RET_SMALL, W_ info_ptr)
return (P_ ret)
{
+ unwind Sp = Sp + WDS(1);
StgTSO_flags(CurrentTSO) =
%lobits32(
TO_W_(StgTSO_flags(CurrentTSO))
@@ -122,6 +123,7 @@ INFO_TABLE_RET(stg_maskAsyncExceptionszh_ret, RET_SMALL, W_ info_ptr)
INFO_TABLE_RET(stg_maskUninterruptiblezh_ret, RET_SMALL, W_ info_ptr)
return (P_ ret)
{
+ unwind Sp = Sp + WDS(1);
StgTSO_flags(CurrentTSO) =
%lobits32(
(TO_W_(StgTSO_flags(CurrentTSO))
@@ -430,6 +432,7 @@ section "data" {
INFO_TABLE_RET(stg_raise_ret, RET_SMALL, W_ info_ptr, P_ exception)
return (P_ ret)
{
+ unwind Sp = Sp + WDS(2);
W_[no_break_on_exception] = 1;
jump stg_raisezh (exception);
}
@@ -486,11 +489,6 @@ retry_pop_stack:
ccall stmAbortTransaction(MyCapability() "ptr", trec "ptr");
ccall stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr");
- if (outer != NO_TREC) {
- ccall stmAbortTransaction(MyCapability() "ptr", outer "ptr");
- ccall stmFreeAbortedTRec(MyCapability() "ptr", outer "ptr");
- }
-
StgTSO_trec(CurrentTSO) = NO_TREC;
if (r != 0) {
// Transaction was valid: continue searching for a catch frame
@@ -528,7 +526,7 @@ retry_pop_stack:
Sp(8) = exception;
Sp(7) = stg_raise_ret_info;
Sp(6) = exception;
- Sp(5) = ghczmprim_GHCziTypes_True_closure; // True <=> a breakpoint
+ Sp(5) = ghczmprim_GHCziTypes_True_closure; // True <=> an exception
Sp(4) = stg_ap_ppv_info;
Sp(3) = 0;
Sp(2) = stg_ap_n_info;
diff --git a/rts/GetTime.h b/rts/GetTime.h
index 719b45f8b9..97f499cd6c 100644
--- a/rts/GetTime.h
+++ b/rts/GetTime.h
@@ -13,7 +13,6 @@
void initializeTimer (void);
Time getProcessCPUTime (void);
-Time getProcessElapsedTime (void);
void getProcessTimes (Time *user, Time *elapsed);
/* Get the current date and time.
diff --git a/rts/Globals.c b/rts/Globals.c
index 66c17d0f96..c9980d9a3a 100644
--- a/rts/Globals.c
+++ b/rts/Globals.c
@@ -21,7 +21,7 @@
#include "Rts.h"
#include "Globals.h"
-#include "Stable.h"
+#include "StablePtr.h"
typedef enum {
GHCConcSignalSignalHandlerStore,
diff --git a/rts/Hash.c b/rts/Hash.c
index 8f32ac3076..658187b944 100644
--- a/rts/Hash.c
+++ b/rts/Hash.c
@@ -13,6 +13,7 @@
#include "Hash.h"
#include "RtsUtils.h"
+#include "xxhash.h"
#include <string.h>
@@ -76,20 +77,17 @@ hashWord(const HashTable *table, StgWord key)
}
int
-hashStr(const HashTable *table, char *key)
+hashStr(const HashTable *table, StgWord w)
{
- int h, bucket;
- char *s;
-
- s = key;
- for (h=0; *s; s++) {
- h *= 128;
- h += *s;
- h = h % 1048583; /* some random large prime */
- }
+ const char *key = (char*) w;
+#ifdef x86_64_HOST_ARCH
+ StgWord h = XXH64 (key, strlen(key), 1048583);
+#else
+ StgWord h = XXH32 (key, strlen(key), 1048583);
+#endif
/* Mod the size of the hash table (a power of 2) */
- bucket = h & table->mask1;
+ int bucket = h & table->mask1;
if (bucket < table->split) {
/* Mod the size of the expanded hash table (also a power of 2) */
@@ -443,8 +441,7 @@ allocHashTable(void)
HashTable *
allocStrHashTable(void)
{
- return allocHashTable_((HashFunction *)hashStr,
- (CompareFunction *)compareStr);
+ return allocHashTable_(hashStr, compareStr);
}
void
diff --git a/rts/Hash.h b/rts/Hash.h
index 1dde314d0e..be388fb62f 100644
--- a/rts/Hash.h
+++ b/rts/Hash.h
@@ -56,7 +56,7 @@ typedef int HashFunction(const HashTable *table, StgWord key);
typedef int CompareFunction(StgWord key1, StgWord key2);
HashTable * allocHashTable_(HashFunction *hash, CompareFunction *compare);
int hashWord(const HashTable *table, StgWord key);
-int hashStr(const HashTable *table, char *key);
+int hashStr(const HashTable *table, StgWord key);
/* Freeing hash tables
*/
diff --git a/rts/Heap.c b/rts/Heap.c
new file mode 100644
index 0000000000..dfd32aff0c
--- /dev/null
+++ b/rts/Heap.c
@@ -0,0 +1,225 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The University of Glasgow 2006-2017
+ *
+ * Introspection into GHC's heap representation
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "Rts.h"
+#include "RtsAPI.h"
+
+#include "Capability.h"
+#include "Printer.h"
+
+StgWord heap_view_closureSize(StgClosure *closure) {
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(closure));
+ return closure_sizeW(closure);
+}
+
+static void
+heap_view_closure_ptrs_in_large_bitmap(StgClosure *ptrs[], StgWord *nptrs
+ , StgClosure **p, StgLargeBitmap *large_bitmap
+ , uint32_t size )
+{
+ uint32_t i, j, b;
+ StgWord bitmap;
+
+ b = 0;
+
+ for (i = 0; i < size; b++) {
+ bitmap = large_bitmap->bitmap[b];
+ j = stg_min(size-i, BITS_IN(W_));
+ i += j;
+ for (; j > 0; j--, p++) {
+ if ((bitmap & 1) == 0) {
+ ptrs[(*nptrs)++] = *p;
+ }
+ bitmap = bitmap >> 1;
+ }
+ }
+}
+
+void heap_view_closure_ptrs_in_pap_payload(StgClosure *ptrs[], StgWord *nptrs
+ , StgClosure *fun, StgClosure **payload, StgWord size) {
+ StgWord bitmap;
+ const StgFunInfoTable *fun_info;
+
+ fun_info = get_fun_itbl(UNTAG_CLOSURE(fun));
+ // ASSERT(fun_info->i.type != PAP);
+ StgClosure **p = payload;
+
+ switch (fun_info->f.fun_type) {
+ case ARG_GEN:
+ bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
+ goto small_bitmap;
+ case ARG_GEN_BIG:
+ heap_view_closure_ptrs_in_large_bitmap(ptrs, nptrs, payload,
+ GET_FUN_LARGE_BITMAP(fun_info), size);
+ break;
+ case ARG_BCO:
+ heap_view_closure_ptrs_in_large_bitmap(ptrs, nptrs, payload,
+ BCO_BITMAP(fun), size);
+ break;
+ default:
+ bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
+ small_bitmap:
+ while (size > 0) {
+ if ((bitmap & 1) == 0) {
+ ptrs[(*nptrs)++] = *p;
+ }
+ bitmap = bitmap >> 1;
+ p++;
+ size--;
+ }
+ break;
+ }
+}
+
+StgMutArrPtrs *heap_view_closurePtrs(Capability *cap, StgClosure *closure) {
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(closure));
+
+ StgWord size = heap_view_closureSize(closure);
+ StgWord nptrs = 0;
+ StgWord i;
+
+ // First collect all pointers here, with the comfortable memory bound
+ // of the whole closure. Afterwards we know how many pointers are in
+ // the closure and then we can allocate space on the heap and copy them
+ // there
+ StgClosure *ptrs[size];
+
+ StgClosure **end;
+ StgClosure **ptr;
+
+ const StgInfoTable *info = get_itbl(closure);
+
+ switch (info->type) {
+ case INVALID_OBJECT:
+ barf("Invalid Object");
+ break;
+
+ // No pointers
+ case ARR_WORDS:
+ break;
+
+ // Default layout
+ case CONSTR_1_0:
+ case CONSTR_0_1:
+ case CONSTR_2_0:
+ case CONSTR_1_1:
+ case CONSTR_0_2:
+ case CONSTR:
+
+
+ case PRIM:
+
+ case FUN:
+ case FUN_1_0:
+ case FUN_0_1:
+ case FUN_1_1:
+ case FUN_2_0:
+ case FUN_0_2:
+ case FUN_STATIC:
+ end = closure->payload + info->layout.payload.ptrs;
+ for (ptr = closure->payload; ptr < end; ptr++) {
+ ptrs[nptrs++] = *ptr;
+ }
+ break;
+
+ case THUNK:
+ case THUNK_1_0:
+ case THUNK_0_1:
+ case THUNK_1_1:
+ case THUNK_2_0:
+ case THUNK_0_2:
+ case THUNK_STATIC:
+ end = ((StgThunk *)closure)->payload + info->layout.payload.ptrs;
+ for (ptr = ((StgThunk *)closure)->payload; ptr < end; ptr++) {
+ ptrs[nptrs++] = *ptr;
+ }
+ break;
+
+ case THUNK_SELECTOR:
+ ptrs[nptrs++] = ((StgSelector *)closure)->selectee;
+ break;
+
+ case AP:
+ ptrs[nptrs++] = ((StgAP *)closure)->fun;
+ heap_view_closure_ptrs_in_pap_payload(ptrs, &nptrs,
+ ((StgAP *)closure)->fun,
+ ((StgAP *)closure)->payload,
+ ((StgAP *)closure)->n_args);
+ break;
+
+ case PAP:
+ ptrs[nptrs++] = ((StgPAP *)closure)->fun;
+ heap_view_closure_ptrs_in_pap_payload(ptrs, &nptrs,
+ ((StgPAP *)closure)->fun,
+ ((StgPAP *)closure)->payload,
+ ((StgPAP *)closure)->n_args);
+ break;
+
+ case AP_STACK:
+ ptrs[nptrs++] = ((StgAP_STACK *)closure)->fun;
+ /*
+ The payload is a stack, which consists of a mixture of pointers
+ and non-pointers. We can't simply pretend it's all pointers,
+ because that will cause crashes in the GC later. We could
+ traverse the stack and extract pointers and non-pointers, but that
+ would be complicated, so let's just ignore the payload for now.
+ See #15375.
+ */
+ break;
+
+ case BCO:
+ ptrs[nptrs++] = (StgClosure *)((StgBCO *)closure)->instrs;
+ ptrs[nptrs++] = (StgClosure *)((StgBCO *)closure)->literals;
+ ptrs[nptrs++] = (StgClosure *)((StgBCO *)closure)->ptrs;
+ break;
+
+ case IND:
+ case IND_STATIC:
+ case BLACKHOLE:
+ ptrs[nptrs++] = (StgClosure *)(((StgInd *)closure)->indirectee);
+ break;
+
+ case MUT_ARR_PTRS_CLEAN:
+ case MUT_ARR_PTRS_DIRTY:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
+ for (i = 0; i < ((StgMutArrPtrs *)closure)->ptrs; ++i) {
+ ptrs[nptrs++] = ((StgMutArrPtrs *)closure)->payload[i];
+ }
+ break;
+ case MUT_VAR_CLEAN:
+ case MUT_VAR_DIRTY:
+ ptrs[nptrs++] = ((StgMutVar *)closure)->var;
+ break;
+ case MVAR_DIRTY:
+ case MVAR_CLEAN:
+ ptrs[nptrs++] = (StgClosure *)((StgMVar *)closure)->head;
+ ptrs[nptrs++] = (StgClosure *)((StgMVar *)closure)->tail;
+ ptrs[nptrs++] = ((StgMVar *)closure)->value;
+ break;
+
+ default:
+ fprintf(stderr,"closurePtrs: Cannot handle type %s yet\n",
+ closure_type_names[info->type]);
+ break;
+ }
+
+ size = nptrs + mutArrPtrsCardTableSize(nptrs);
+ StgMutArrPtrs *arr =
+ (StgMutArrPtrs *)allocate(cap, sizeofW(StgMutArrPtrs) + size);
+ TICK_ALLOC_PRIM(sizeofW(StgMutArrPtrs), nptrs, 0);
+ SET_HDR(arr, &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info, cap->r.rCCCS);
+ arr->ptrs = nptrs;
+ arr->size = size;
+
+ for (i = 0; i<nptrs; i++) {
+ arr->payload[i] = ptrs[i];
+ }
+
+ return arr;
+}
diff --git a/rts/Hpc.c b/rts/Hpc.c
index 7575e34ce0..9ba9b04b61 100644
--- a/rts/Hpc.c
+++ b/rts/Hpc.c
@@ -13,6 +13,7 @@
#include <ctype.h>
#include <string.h>
#include <assert.h>
+#include <fs_rts.h>
#if defined(HAVE_SYS_TYPES_H)
#include <sys/types.h>
@@ -233,7 +234,7 @@ startupHpc(void)
sprintf(tixFilename, "%s.tix", prog_name);
}
- if (init_open(fopen(tixFilename,"r"))) {
+ if (init_open(__rts_fopen(tixFilename,"r"))) {
readTix();
}
}
@@ -387,7 +388,7 @@ exitHpc(void) {
// not clober the .tix file.
if (hpc_pid == getpid()) {
- FILE *f = fopen(tixFilename,"w");
+ FILE *f = __rts_fopen(tixFilename,"w");
writeTix(f);
}
diff --git a/rts/HsFFI.c b/rts/HsFFI.c
index 8fae246111..e482932193 100644
--- a/rts/HsFFI.c
+++ b/rts/HsFFI.c
@@ -10,7 +10,7 @@
#include "HsFFI.h"
#include "Rts.h"
-#include "Stable.h"
+#include "StablePtr.h"
#include "Task.h"
// hs_init and hs_exit are defined in RtsStartup.c
@@ -28,14 +28,28 @@ hs_perform_gc(void)
performMajorGC();
}
+// Lock the stable pointer table
+void hs_lock_stable_ptr_table (void)
+{
+ stablePtrLock();
+}
+
+// Deprecated version of hs_lock_stable_ptr_table
void hs_lock_stable_tables (void)
{
- stableLock();
+ stablePtrLock();
+}
+
+// Unlock the stable pointer table
+void hs_unlock_stable_ptr_table (void)
+{
+ stablePtrUnlock();
}
+// Deprecated version of hs_unlock_stable_ptr_table
void hs_unlock_stable_tables (void)
{
- stableUnlock();
+ stablePtrUnlock();
}
void
diff --git a/rts/Interpreter.c b/rts/Interpreter.c
index f3a6cb53b8..a3b179a4be 100644
--- a/rts/Interpreter.c
+++ b/rts/Interpreter.c
@@ -16,7 +16,7 @@
#include "Schedule.h"
#include "Updates.h"
#include "Prelude.h"
-#include "Stable.h"
+#include "StablePtr.h"
#include "Printer.h"
#include "Profiling.h"
#include "Disassembler.h"
@@ -289,7 +289,7 @@ static StgWord app_ptrs_itbl[] = {
};
HsStablePtr rts_breakpoint_io_action; // points to the IO action which is executed on a breakpoint
- // it is set in main/GHC.hs:runStmt
+ // it is set in main/GHC.hs:runStmt
Capability *
interpretBCO (Capability* cap)
@@ -429,7 +429,9 @@ eval_obj:
// https://ghc.haskell.org/trac/ghc/wiki/Commentary/Rts/HaskellExecution/PointerTagging
tagged_obj =
newEmptyPAP(cap,
- arity <= TAG_MASK ? obj + arity : obj,
+ arity <= TAG_MASK
+ ? (StgClosure *) ((intptr_t) obj + arity)
+ : obj,
arity);
}
#endif
@@ -1096,10 +1098,10 @@ run_BCO:
// Arrange the stack to call the breakpoint IO action, and
// continue execution of this BCO when the IO action returns.
//
- // ioAction :: Bool -- exception?
+ // ioAction :: Int# -- the breakpoint index
+ // -> Int# -- the module uniq
+ // -> Bool -- exception?
// -> HValue -- the AP_STACK, or exception
- // -> Int -- the breakpoint index (arg2)
- // -> Int -- the module uniq (arg3)
// -> IO ()
//
ioAction = (StgClosure *) deRefStablePtr (
@@ -1109,7 +1111,7 @@ run_BCO:
SpW(10) = (W_)obj;
SpW(9) = (W_)&stg_apply_interp_info;
SpW(8) = (W_)new_aps;
- SpW(7) = (W_)False_closure; // True <=> a breakpoint
+ SpW(7) = (W_)False_closure; // True <=> an exception
SpW(6) = (W_)&stg_ap_ppv_info;
SpW(5) = (W_)BCO_LIT(arg3_module_uniq);
SpW(4) = (W_)&stg_ap_n_info;
@@ -1179,6 +1181,48 @@ run_BCO:
goto nextInsn;
}
+ case bci_PUSH8: {
+ int off = BCO_NEXT;
+ Sp_subB(1);
+ *(StgWord8*)Sp = *(StgWord8*)(Sp_plusB(off+1));
+ goto nextInsn;
+ }
+
+ case bci_PUSH16: {
+ int off = BCO_NEXT;
+ Sp_subB(2);
+ *(StgWord16*)Sp = *(StgWord16*)(Sp_plusB(off+2));
+ goto nextInsn;
+ }
+
+ case bci_PUSH32: {
+ int off = BCO_NEXT;
+ Sp_subB(4);
+ *(StgWord32*)Sp = *(StgWord32*)(Sp_plusB(off+4));
+ goto nextInsn;
+ }
+
+ case bci_PUSH8_W: {
+ int off = BCO_NEXT;
+ *(StgWord*)(Sp_minusW(1)) = *(StgWord8*)(Sp_plusB(off));
+ Sp_subW(1);
+ goto nextInsn;
+ }
+
+ case bci_PUSH16_W: {
+ int off = BCO_NEXT;
+ *(StgWord*)(Sp_minusW(1)) = *(StgWord16*)(Sp_plusB(off));
+ Sp_subW(1);
+ goto nextInsn;
+ }
+
+ case bci_PUSH32_W: {
+ int off = BCO_NEXT;
+ *(StgWord*)(Sp_minusW(1)) = *(StgWord32*)(Sp_plusB(off));
+ Sp_subW(1);
+ goto nextInsn;
+ }
+
case bci_PUSH_G: {
int o1 = BCO_GET_LARGE_ARG;
SpW(-1) = BCO_PTR(o1);
@@ -1311,6 +1355,45 @@ run_BCO:
Sp_subW(1); SpW(0) = (W_)&stg_ap_pppppp_info;
goto nextInsn;
+ case bci_PUSH_PAD8: {
+ Sp_subB(1);
+ *(StgWord8*)Sp = 0;
+ goto nextInsn;
+ }
+
+ case bci_PUSH_PAD16: {
+ Sp_subB(2);
+ *(StgWord16*)Sp = 0;
+ goto nextInsn;
+ }
+
+ case bci_PUSH_PAD32: {
+ Sp_subB(4);
+ *(StgWord32*)Sp = 0;
+ goto nextInsn;
+ }
+
+ case bci_PUSH_UBX8: {
+ int o_lit = BCO_GET_LARGE_ARG;
+ Sp_subB(1);
+ *(StgWord8*)Sp = *(StgWord8*)(literals+o_lit);
+ goto nextInsn;
+ }
+
+ case bci_PUSH_UBX16: {
+ int o_lit = BCO_GET_LARGE_ARG;
+ Sp_subB(2);
+ *(StgWord16*)Sp = *(StgWord16*)(literals+o_lit);
+ goto nextInsn;
+ }
+
+ case bci_PUSH_UBX32: {
+ int o_lit = BCO_GET_LARGE_ARG;
+ Sp_subB(4);
+ *(StgWord32*)Sp = *(StgWord32*)(literals+o_lit);
+ goto nextInsn;
+ }
+
case bci_PUSH_UBX: {
int i;
int o_lits = BCO_GET_LARGE_ARG;
diff --git a/rts/LdvProfile.c b/rts/LdvProfile.c
index 789941603a..608961e246 100644
--- a/rts/LdvProfile.c
+++ b/rts/LdvProfile.c
@@ -66,12 +66,12 @@ processHeapClosureForDead( const StgClosure *c )
case TVAR:
case MUT_ARR_PTRS_CLEAN:
case MUT_ARR_PTRS_DIRTY:
- case MUT_ARR_PTRS_FROZEN:
- case MUT_ARR_PTRS_FROZEN0:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
case SMALL_MUT_ARR_PTRS_CLEAN:
case SMALL_MUT_ARR_PTRS_DIRTY:
- case SMALL_MUT_ARR_PTRS_FROZEN:
- case SMALL_MUT_ARR_PTRS_FROZEN0:
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
case ARR_WORDS:
case WEAK:
case MUT_VAR_CLEAN:
@@ -101,6 +101,7 @@ processHeapClosureForDead( const StgClosure *c )
case CONSTR_2_0:
case CONSTR_1_1:
case CONSTR_0_2:
+ case CONSTR_NOCAF:
case FUN:
case FUN_1_0:
case FUN_0_1:
@@ -130,7 +131,6 @@ processHeapClosureForDead( const StgClosure *c )
case IND_STATIC:
case FUN_STATIC:
case THUNK_STATIC:
- case CONSTR_NOCAF:
// stack objects
case UPDATE_FRAME:
case CATCH_FRAME:
@@ -139,6 +139,9 @@ processHeapClosureForDead( const StgClosure *c )
case RET_BCO:
case RET_SMALL:
case RET_BIG:
+ case CATCH_STM_FRAME:
+ case CATCH_RETRY_FRAME:
+ case ATOMICALLY_FRAME:
// others
case INVALID_OBJECT:
case COMPACT_NFDATA:
diff --git a/rts/Libdw.h b/rts/Libdw.h
index d737745c02..5fd1c9522c 100644
--- a/rts/Libdw.h
+++ b/rts/Libdw.h
@@ -23,9 +23,6 @@ LibdwSession *libdwInit(void);
/* Free a session */
void libdwFree(LibdwSession *session);
-/* Pretty-print a backtrace to std*/
-void libdwPrintBacktrace(LibdwSession *session, FILE *file, Backtrace *bt);
-
// Traverse backtrace in order of outer-most to inner-most frame
#define FOREACH_FRAME_INWARDS(pc, bt) \
BacktraceChunk *_chunk; \
diff --git a/rts/Linker.c b/rts/Linker.c
index 37007264db..a1de6a7a14 100644
--- a/rts/Linker.c
+++ b/rts/Linker.c
@@ -22,7 +22,7 @@
#include "StgPrimFloat.h" // for __int_encodeFloat etc.
#include "Proftimer.h"
#include "GetEnv.h"
-#include "Stable.h"
+#include "StablePtr.h"
#include "RtsSymbols.h"
#include "RtsSymbolInfo.h"
#include "Profiling.h"
@@ -48,6 +48,7 @@
#include <string.h>
#include <stdio.h>
#include <assert.h>
+#include <fs_rts.h>
#if defined(HAVE_SYS_STAT_H)
#include <sys/stat.h>
@@ -482,7 +483,7 @@ initLinker_ (int retain_cafs)
# endif /* RTLD_DEFAULT */
compileResult = regcomp(&re_invalid,
- "(([^ \t()])+\\.so([^ \t:()])*):([ \t])*(invalid ELF header|file too short)",
+ "(([^ \t()])+\\.so([^ \t:()])*):([ \t])*(invalid ELF header|file too short|invalid file format)",
REG_EXTENDED);
if (compileResult != 0) {
barf("Compiling re_invalid failed");
@@ -515,6 +516,9 @@ initLinker_ (int retain_cafs)
void
exitLinker( void ) {
+#if defined(OBJFORMAT_PEi386)
+ exitLinker_PEi386();
+#endif
#if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
if (linker_init_done == 1) {
regfree(&re_invalid);
@@ -711,7 +715,7 @@ addDLL( pathchar *dll_name )
strncpy(line, (errmsg+(match[1].rm_so)),match_length);
line[match_length] = '\0'; // make sure string is null-terminated
IF_DEBUG(linker, debugBelch ("file name = '%s'\n", line));
- if ((fp = fopen(line, "r")) == NULL) {
+ if ((fp = __rts_fopen(line, "r")) == NULL) {
return errmsg; // return original error if open fails
}
// try to find a GROUP or INPUT ( ... ) command
@@ -980,6 +984,21 @@ void ghci_enquire(SymbolAddr* addr)
}
#endif
+pathchar*
+resolveSymbolAddr (pathchar* buffer, int size,
+ SymbolAddr* symbol, uintptr_t* top)
+{
+#if defined(OBJFORMAT_PEi386)
+ return resolveSymbolAddr_PEi386 (buffer, size, symbol, top);
+#else /* OBJFORMAT_PEi386 */
+ (void)buffer;
+ (void)size;
+ (void)symbol;
+ (void)top;
+ return NULL;
+#endif /* OBJFORMAT_PEi386 */
+}
+
#if RTS_LINKER_USE_MMAP
//
// Returns NULL on failure.
@@ -1367,18 +1386,7 @@ preloadObjectFile (pathchar *path)
return NULL;
}
-# if defined(mingw32_HOST_OS)
-
- // TODO: We would like to use allocateExec here, but allocateExec
- // cannot currently allocate blocks large enough.
- image = allocateImageAndTrampolines(path, "itself", f, fileSize,
- HS_BOOL_FALSE);
- if (image == NULL) {
- fclose(f);
- return NULL;
- }
-
-# elif defined(darwin_HOST_OS)
+# if defined(darwin_HOST_OS)
// In a Mach-O .o file, all sections can and will be misaligned
// if the total size of the headers is not a multiple of the
@@ -1393,7 +1401,7 @@ preloadObjectFile (pathchar *path)
image = stgMallocBytes(fileSize + misalignment, "loadObj(image)");
image += misalignment;
-# else /* !defined(mingw32_HOST_OS) */
+# else /* !defined(darwin_HOST_OS) */
image = stgMallocBytes(fileSize, "loadObj(image)");
@@ -1489,6 +1497,34 @@ HsInt loadOc (ObjectCode* oc)
return r;
}
+ /* Note [loadOc orderings]
+ ocAllocateSymbolsExtras has only two pre-requisites, it must run after
+ preloadObjectFile and ocVerify. Neither have changed. On most targets
+ allocating the extras is independent on parsing the section data, so the
+ order between these two never mattered.
+
+ On Windows, when we have an import library we (for now, as we don't honor
+ the lazy loading semantics of the library and instead GHCi is already
+ lazy) don't use the library after ocGetNames as it just populates the
+ symbol table. Allocating space for jump tables in ocAllocateSymbolExtras
+ would just be a waste then as we'll be stopping further processing of the
+ library in the next few steps. */
+
+ /* build the symbol list for this image */
+# if defined(OBJFORMAT_ELF)
+ r = ocGetNames_ELF ( oc );
+# elif defined(OBJFORMAT_PEi386)
+ r = ocGetNames_PEi386 ( oc );
+# elif defined(OBJFORMAT_MACHO)
+ r = ocGetNames_MachO ( oc );
+# else
+ barf("loadObj: no getNames method");
+# endif
+ if (!r) {
+ IF_DEBUG(linker, debugBelch("loadOc: ocGetNames_* failed\n"));
+ return r;
+ }
+
#if defined(NEED_SYMBOL_EXTRAS)
# if defined(OBJFORMAT_MACHO)
r = ocAllocateSymbolExtras_MachO ( oc );
@@ -1509,21 +1545,6 @@ HsInt loadOc (ObjectCode* oc)
# endif
#endif
- /* build the symbol list for this image */
-# if defined(OBJFORMAT_ELF)
- r = ocGetNames_ELF ( oc );
-# elif defined(OBJFORMAT_PEi386)
- r = ocGetNames_PEi386 ( oc );
-# elif defined(OBJFORMAT_MACHO)
- r = ocGetNames_MachO ( oc );
-# else
- barf("loadObj: no getNames method");
-# endif
- if (!r) {
- IF_DEBUG(linker, debugBelch("loadOc: ocGetNames_* failed\n"));
- return r;
- }
-
/* loaded, but not resolved yet, ensure the OC is in a consistent state */
setOcInitialStatus( oc );
IF_DEBUG(linker, debugBelch("loadOc: done.\n"));
@@ -1708,6 +1729,30 @@ HsInt purgeObj (pathchar *path)
return r;
}
+static OStatus getObjectLoadStatus_ (pathchar *path)
+{
+ ObjectCode *o;
+ for (o = objects; o; o = o->next) {
+ if (0 == pathcmp(o->fileName, path)) {
+ return o->status;
+ }
+ }
+ for (o = unloaded_objects; o; o = o->next) {
+ if (0 == pathcmp(o->fileName, path)) {
+ return o->status;
+ }
+ }
+ return OBJECT_NOT_LOADED;
+}
+
+OStatus getObjectLoadStatus (pathchar *path)
+{
+ ACQUIRE_LOCK(&linker_mutex);
+ OStatus r = getObjectLoadStatus_(path);
+ RELEASE_LOCK(&linker_mutex);
+ return r;
+}
+
/* -----------------------------------------------------------------------------
* Sanity checking. For each ObjectCode, maintain a list of address ranges
* which may be prodded during relocation, and abort if we try and write
@@ -1769,7 +1814,9 @@ addSection (Section *s, SectionKind kind, SectionAlloc alloc,
s->mapped_start = mapped_start; /* start of mmap() block */
s->mapped_size = mapped_size; /* size of mmap() block */
- s->info = (struct SectionFormatInfo*)stgCallocBytes(1, sizeof *s->info,
+ if (!s->info)
+ s->info
+ = (struct SectionFormatInfo*)stgCallocBytes(1, sizeof *s->info,
"addSection(SectionFormatInfo)");
IF_DEBUG(linker,
diff --git a/rts/LinkerInternals.h b/rts/LinkerInternals.h
index 05fa7701d5..04d873ca99 100644
--- a/rts/LinkerInternals.h
+++ b/rts/LinkerInternals.h
@@ -20,19 +20,10 @@
typedef void SymbolAddr;
typedef char SymbolName;
-/* See Linker.c Note [runtime-linker-phases] */
-typedef enum {
- OBJECT_LOADED,
- OBJECT_NEEDED,
- OBJECT_RESOLVED,
- OBJECT_UNLOADED,
- OBJECT_DONT_RESOLVE
-} OStatus;
-
/* Indication of section kinds for loaded objects. Needed by
the GC for deciding whether or not a pointer on the stack
is a code pointer.
- See Note [BFD import libraries].
+ See Note [BFD import library].
*/
typedef
enum { /* Section is code or readonly. e.g. .text or .r(o)data. */
@@ -41,8 +32,12 @@ typedef
SECTIONKIND_RWDATA,
/* Static initializer section. e.g. .ctors. */
SECTIONKIND_INIT_ARRAY,
+ /* Static finalizer section. e.g. .dtors. */
+ SECTIONKIND_FINIT_ARRAY,
/* We don't know what the section is and don't care. */
SECTIONKIND_OTHER,
+ /* Section contains debug information. e.g. .debug$. */
+ SECTIONKIND_DEBUG,
/* Section belongs to an import section group. e.g. .idata$. */
SECTIONKIND_IMPORT,
/* Section defines an import library entry, e.g. idata$7. */
@@ -55,7 +50,7 @@ typedef
enum { SECTION_NOMEM,
SECTION_M32,
SECTION_MMAP,
- SECTION_MALLOC,
+ SECTION_MALLOC
}
SectionAlloc;
@@ -273,6 +268,9 @@ SymbolAddr* lookupSymbol_ (SymbolName* lbl);
extern /*Str*/HashTable *symhash;
+pathchar*
+resolveSymbolAddr (pathchar* buffer, int size,
+ SymbolAddr* symbol, uintptr_t* top);
/*************************************************
* Various bits of configuration
@@ -302,28 +300,6 @@ ObjectCode* mkOc( pathchar *path, char *image, int imageSize,
int misalignment
);
-#if defined(mingw32_HOST_OS)
-/* We use myindex to calculate array addresses, rather than
- simply doing the normal subscript thing. That's because
- some of the above structs have sizes which are not
- a whole number of words. GCC rounds their sizes up to a
- whole number of words, which means that the address calcs
- arising from using normal C indexing or pointer arithmetic
- are just plain wrong. Sigh.
-*/
-INLINE_HEADER unsigned char *
-myindex ( int scale, void* base, int index )
-{
- return
- ((unsigned char*)base) + scale * index;
-}
-
-// Defined in linker/PEi386.c
-char *cstring_from_section_name(
- unsigned char* name,
- unsigned char* strtab);
-#endif /* mingw32_HOST_OS */
-
/* MAP_ANONYMOUS is MAP_ANON on some systems,
e.g. OS X (before Sierra), OpenBSD etc */
#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
@@ -340,8 +316,7 @@ char *cstring_from_section_name(
# include "linker/ElfTypes.h"
#elif defined (mingw32_HOST_OS)
# define OBJFORMAT_PEi386
-struct SectionFormatInfo { void* placeholder; };
-struct ObjectCodeFormatInfo { void* placeholder; };
+# include "linker/PEi386Types.h"
#elif defined(darwin_HOST_OS) || defined(ios_HOST_OS)
# define OBJFORMAT_MACHO
# include "linker/MachOTypes.h"
diff --git a/rts/Messages.c b/rts/Messages.c
index 8fab314bc4..2b13b6306c 100644
--- a/rts/Messages.c
+++ b/rts/Messages.c
@@ -129,6 +129,9 @@ loop:
}
else if (i == &stg_WHITEHOLE_info)
{
+#if defined(PROF_SPIN)
+ ++whitehole_executeMessage_spin;
+#endif
goto loop;
}
else
@@ -200,6 +203,7 @@ loop:
// just been replaced with an IND by another thread in
// updateThunk(). In which case, if we read the indirectee
// again we should get the value.
+ // See Note [BLACKHOLE pointing to IND] in sm/Evac.c
goto loop;
}
diff --git a/rts/Messages.h b/rts/Messages.h
index e60f19dc1d..18371564c4 100644
--- a/rts/Messages.h
+++ b/rts/Messages.h
@@ -31,3 +31,7 @@ doneWithMsgThrowTo (MessageThrowTo *m)
}
#include "EndPrivate.h"
+
+#if defined(THREADED_RTS) && defined(PROF_SPIN)
+extern volatile StgWord64 whitehole_executeMessage_spin;
+#endif
diff --git a/rts/PathUtils.c b/rts/PathUtils.c
index 1b0b729f07..def3f7e329 100644
--- a/rts/PathUtils.c
+++ b/rts/PathUtils.c
@@ -7,6 +7,11 @@
#include <libgen.h>
#include <ctype.h>
+#if defined(mingw32_HOST_OS)
+/* Using Secure APIs */
+#define MINGW_HAS_SECURE_API 1
+#include <wchar.h>
+#endif
pathchar* pathdup(pathchar *path)
{
diff --git a/rts/PathUtils.h b/rts/PathUtils.h
index 152606a7b0..0b35b214e0 100644
--- a/rts/PathUtils.h
+++ b/rts/PathUtils.h
@@ -14,7 +14,7 @@
#if defined(mingw32_HOST_OS)
#define pathcmp wcscmp
#define pathlen wcslen
-#define pathopen _wfopen
+#define pathopen __rts_fwopen
#define pathstat _wstat
#define struct_stat struct _stat
#define open wopen
diff --git a/rts/PosixSource.h b/rts/PosixSource.h
index edee5fa94e..13fd7b0ff5 100644
--- a/rts/PosixSource.h
+++ b/rts/PosixSource.h
@@ -36,16 +36,3 @@
#define _POSIX_C_SOURCE 200809L
#define _XOPEN_SOURCE 700
#endif
-
-#if defined(mingw32_HOST_OS)
-/* Without this gcc will warn about %ull and the like since some msvcrt versions
- do not support them. See
- https://sourceforge.net/p/mingw-w64/mailman/message/28557333/
-
- Note that this is implied by _POSIX_C_SOURCE in the msys2 toolchain that we
- now use. However, we retain this explicit #define to preserve the ability to
- bootstrap GHC with compilers still using msys (e.g. GHC 7.10.1 and 7.10.2).
- This can be removed in for GHC 8.4. See #12951.
- */
-#define __USE_MINGW_ANSI_STDIO 1
-#endif
diff --git a/rts/Prelude.h b/rts/Prelude.h
index 74b6b08227..6e5bf03bd6 100644
--- a/rts/Prelude.h
+++ b/rts/Prelude.h
@@ -45,6 +45,7 @@ PRELUDE_CLOSURE(base_GHCziIOziException_cannotCompactPinned_closure);
PRELUDE_CLOSURE(base_GHCziIOziException_cannotCompactMutable_closure);
PRELUDE_CLOSURE(base_ControlziExceptionziBase_nonTermination_closure);
PRELUDE_CLOSURE(base_ControlziExceptionziBase_nestedAtomically_closure);
+PRELUDE_CLOSURE(base_ControlziExceptionziBase_absentSumFieldError_closure);
PRELUDE_CLOSURE(base_GHCziEventziThread_blockedOnBadFD_closure);
PRELUDE_CLOSURE(base_GHCziConcziSync_runSparks_closure);
@@ -99,6 +100,7 @@ PRELUDE_INFO(base_GHCziStable_StablePtr_con_info);
#define nonTermination_closure DLL_IMPORT_DATA_REF(base_ControlziExceptionziBase_nonTermination_closure)
#define nestedAtomically_closure DLL_IMPORT_DATA_REF(base_ControlziExceptionziBase_nestedAtomically_closure)
#define blockedOnBadFD_closure DLL_IMPORT_DATA_REF(base_GHCziEventziThread_blockedOnBadFD_closure)
+#define absentSumFieldError_closure DLL_IMPORT_DATA_REF(base_ControlziExceptionziBase_absentSumFieldError_closure)
#define Czh_con_info DLL_IMPORT_DATA_REF(ghczmprim_GHCziTypes_Czh_con_info)
#define Izh_con_info DLL_IMPORT_DATA_REF(ghczmprim_GHCziTypes_Izh_con_info)
diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm
index 3d4bea433d..a5d8553e94 100644
--- a/rts/PrimOps.cmm
+++ b/rts/PrimOps.cmm
@@ -30,6 +30,7 @@ import pthread_mutex_lock;
import pthread_mutex_unlock;
#endif
import CLOSURE base_ControlziExceptionziBase_nestedAtomically_closure;
+import CLOSURE base_GHCziIOziException_heapOverflow_closure;
import EnterCriticalSection;
import LeaveCriticalSection;
import CLOSURE ghczmprim_GHCziTypes_False_closure;
@@ -62,7 +63,10 @@ stg_newByteArrayzh ( W_ n )
payload_words = ROUNDUP_BYTES_TO_WDS(n);
words = BYTES_TO_WDS(SIZEOF_StgArrBytes) + payload_words;
- ("ptr" p) = ccall allocate(MyCapability() "ptr",words);
+ ("ptr" p) = ccall allocateMightFail(MyCapability() "ptr", words);
+ if (p == NULL) {
+ jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
+ }
TICK_ALLOC_PRIM(SIZEOF_StgArrBytes,WDS(payload_words),0);
SET_HDR(p, stg_ARR_WORDS_info, CCCS);
StgArrBytes_bytes(p) = n;
@@ -92,6 +96,9 @@ stg_newPinnedByteArrayzh ( W_ n )
words = ROUNDUP_BYTES_TO_WDS(bytes);
("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words);
+ if (p == NULL) {
+ jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
+ }
TICK_ALLOC_PRIM(SIZEOF_StgArrBytes,WDS(payload_words),0);
/* Now we need to move p forward so that the payload is aligned
@@ -130,6 +137,9 @@ stg_newAlignedPinnedByteArrayzh ( W_ n, W_ alignment )
words = ROUNDUP_BYTES_TO_WDS(bytes);
("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words);
+ if (p == NULL) {
+ jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
+ }
TICK_ALLOC_PRIM(SIZEOF_StgArrBytes,WDS(payload_words),0);
/* Now we need to move p forward so that the payload is aligned
@@ -150,8 +160,9 @@ stg_isByteArrayPinnedzh ( gcptr ba )
// Pinned byte arrays live in blocks with the BF_PINNED flag set.
// We also consider BF_LARGE objects to be immovable. See #13894.
// See the comment in Storage.c:allocatePinned.
+ // We also consider BF_COMPACT objects to be immovable. See #14900.
flags = TO_W_(bdescr_flags(bd));
- return (flags & (BF_PINNED | BF_LARGE) != 0);
+ return (flags & (BF_PINNED | BF_LARGE | BF_COMPACT) != 0);
}
stg_isMutableByteArrayPinnedzh ( gcptr mba )
@@ -240,17 +251,20 @@ stg_newArrayzh ( W_ n /* words */, gcptr init )
// number of words.
size = n + mutArrPtrsCardWords(n);
words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size;
- ("ptr" arr) = ccall allocate(MyCapability() "ptr",words);
+ ("ptr" arr) = ccall allocateMightFail(MyCapability() "ptr",words);
+ if (arr == NULL) {
+ jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
+ }
TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(size), 0);
SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, CCCS);
StgMutArrPtrs_ptrs(arr) = n;
StgMutArrPtrs_size(arr) = size;
- // Initialise all elements of the the array with the value in R2
+ // Initialise all elements of the array with the value in R2
p = arr + SIZEOF_StgMutArrPtrs;
for:
- if (p < arr + SIZEOF_StgMutArrPtrs + WDS(n)) {
+ if (p < arr + SIZEOF_StgMutArrPtrs + WDS(n)) (likely: True) {
W_[p] = init;
p = p + WDS(1);
goto for;
@@ -261,28 +275,15 @@ stg_newArrayzh ( W_ n /* words */, gcptr init )
stg_unsafeThawArrayzh ( gcptr arr )
{
- // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
- //
- // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN
- // normally doesn't. However, when we freeze a MUT_ARR_PTRS, we leave
- // it on the mutable list for the GC to remove (removing something from
- // the mutable list is not easy).
- //
- // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
- // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
- // to indicate that it is still on the mutable list.
- //
- // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
- // either it is on a mut_list, or it isn't. We adopt the convention that
- // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
- // and MUT_ARR_PTRS_FROZEN otherwise. In fact it wouldn't matter if
- // we put it on the mutable list more than once, but it would get scavenged
- // multiple times during GC, which would be unnecessarily slow.
- //
- if (StgHeader_info(arr) != stg_MUT_ARR_PTRS_FROZEN0_info) {
+ // A MUT_ARR_PTRS always lives on a mut_list, but a MUT_ARR_PTRS_FROZEN
+ // doesn't. To decide whether to add the thawed array to a mut_list we check
+ // the info table. MUT_ARR_PTRS_FROZEN_DIRTY means it's already on a
+ // mut_list so no need to add it again. MUT_ARR_PTRS_FROZEN_CLEAN means it's
+ // not and we should add it to a mut_list.
+ if (StgHeader_info(arr) != stg_MUT_ARR_PTRS_FROZEN_DIRTY_info) {
SET_INFO(arr,stg_MUT_ARR_PTRS_DIRTY_info);
+ // must be done after SET_INFO, because it ASSERTs closure_MUTABLE():
recordMutable(arr);
- // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
return (arr);
} else {
SET_INFO(arr,stg_MUT_ARR_PTRS_DIRTY_info);
@@ -312,7 +313,7 @@ stg_copyMutableArrayArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n
stg_cloneArrayzh ( gcptr src, W_ offset, W_ n )
{
- cloneArray(stg_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
+ cloneArray(stg_MUT_ARR_PTRS_FROZEN_CLEAN_info, src, offset, n)
}
stg_cloneMutableArrayzh ( gcptr src, W_ offset, W_ n )
@@ -323,7 +324,7 @@ stg_cloneMutableArrayzh ( gcptr src, W_ offset, W_ n )
// We have to escape the "z" in the name.
stg_freezzeArrayzh ( gcptr src, W_ offset, W_ n )
{
- cloneArray(stg_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
+ cloneArray(stg_MUT_ARR_PTRS_FROZEN_CLEAN_info, src, offset, n)
}
stg_thawArrayzh ( gcptr src, W_ offset, W_ n )
@@ -366,7 +367,10 @@ stg_newArrayArrayzh ( W_ n /* words */ )
// number of words.
size = n + mutArrPtrsCardWords(n);
words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size;
- ("ptr" arr) = ccall allocate(MyCapability() "ptr",words);
+ ("ptr" arr) = ccall allocateMightFail(MyCapability() "ptr",words);
+ if (arr == NULL) {
+ jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
+ }
TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(size), 0);
SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
@@ -376,7 +380,7 @@ stg_newArrayArrayzh ( W_ n /* words */ )
// Initialise all elements of the array with a pointer to the new array
p = arr + SIZEOF_StgMutArrPtrs;
for:
- if (p < arr + SIZEOF_StgMutArrPtrs + WDS(n)) {
+ if (p < arr + SIZEOF_StgMutArrPtrs + WDS(n)) (likely: True) {
W_[p] = arr;
p = p + WDS(1);
goto for;
@@ -398,16 +402,19 @@ stg_newSmallArrayzh ( W_ n /* words */, gcptr init )
again: MAYBE_GC(again);
words = BYTES_TO_WDS(SIZEOF_StgSmallMutArrPtrs) + n;
- ("ptr" arr) = ccall allocate(MyCapability() "ptr",words);
+ ("ptr" arr) = ccall allocateMightFail(MyCapability() "ptr",words);
+ if (arr == NULL) {
+ jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
+ }
TICK_ALLOC_PRIM(SIZEOF_StgSmallMutArrPtrs, WDS(n), 0);
SET_HDR(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info, CCCS);
StgSmallMutArrPtrs_ptrs(arr) = n;
- // Initialise all elements of the the array with the value in R2
+ // Initialise all elements of the array with the value in R2
p = arr + SIZEOF_StgSmallMutArrPtrs;
for:
- if (p < arr + SIZEOF_StgSmallMutArrPtrs + WDS(n)) {
+ if (p < arr + SIZEOF_StgSmallMutArrPtrs + WDS(n)) (likely: True) {
W_[p] = init;
p = p + WDS(1);
goto for;
@@ -419,7 +426,7 @@ stg_newSmallArrayzh ( W_ n /* words */, gcptr init )
stg_unsafeThawSmallArrayzh ( gcptr arr )
{
// See stg_unsafeThawArrayzh
- if (StgHeader_info(arr) != stg_SMALL_MUT_ARR_PTRS_FROZEN0_info) {
+ if (StgHeader_info(arr) != stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info) {
SET_INFO(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);
recordMutable(arr);
// must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
@@ -432,7 +439,7 @@ stg_unsafeThawSmallArrayzh ( gcptr arr )
stg_cloneSmallArrayzh ( gcptr src, W_ offset, W_ n )
{
- cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
+ cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info, src, offset, n)
}
stg_cloneSmallMutableArrayzh ( gcptr src, W_ offset, W_ n )
@@ -443,7 +450,7 @@ stg_cloneSmallMutableArrayzh ( gcptr src, W_ offset, W_ n )
// We have to escape the "z" in the name.
stg_freezzeSmallArrayzh ( gcptr src, W_ offset, W_ n )
{
- cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
+ cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info, src, offset, n)
}
stg_thawSmallArrayzh ( gcptr src, W_ offset, W_ n )
@@ -557,9 +564,9 @@ stg_casMutVarzh ( gcptr mv, gcptr old, gcptr new )
#endif
}
-stg_atomicModifyMutVarzh ( gcptr mv, gcptr f )
+stg_atomicModifyMutVar2zh ( gcptr mv, gcptr f )
{
- W_ z, x, y, r, h;
+ W_ z, x, y, h;
/* If x is the current contents of the MutVar#, then
We want to make the new contents point to
@@ -568,13 +575,12 @@ stg_atomicModifyMutVarzh ( gcptr mv, gcptr f )
and the return value is
- (sel_1 (f x))
+ (# x, (f x) #)
obviously we can share (f x).
z = [stg_ap_2 f x] (max (HS + 2) MIN_UPD_SIZE)
y = [stg_sel_0 z] (max (HS + 1) MIN_UPD_SIZE)
- r = [stg_sel_1 z] (max (HS + 1) MIN_UPD_SIZE)
*/
#if defined(MIN_UPD_SIZE) && MIN_UPD_SIZE > 1
@@ -593,7 +599,7 @@ stg_atomicModifyMutVarzh ( gcptr mv, gcptr f )
#define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
#endif
-#define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
+#define SIZE (THUNK_2_SIZE + THUNK_1_SIZE)
HP_CHK_GEN_TICKY(SIZE);
@@ -611,13 +617,6 @@ stg_atomicModifyMutVarzh ( gcptr mv, gcptr f )
LDV_RECORD_CREATE(y);
StgThunk_payload(y,0) = z;
- TICK_ALLOC_THUNK_1();
- CCCS_ALLOC(THUNK_1_SIZE);
- r = y - THUNK_1_SIZE;
- SET_HDR(r, stg_sel_1_upd_info, CCCS);
- LDV_RECORD_CREATE(r);
- StgThunk_payload(r,0) = z;
-
retry:
x = StgMutVar_var(mv);
StgThunk_payload(z,1) = x;
@@ -632,15 +631,66 @@ stg_atomicModifyMutVarzh ( gcptr mv, gcptr f )
ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr");
}
- return (r);
+ return (x,z);
+}
+
+stg_atomicModifyMutVarzuzh ( gcptr mv, gcptr f )
+{
+ W_ z, x, h;
+
+ /* If x is the current contents of the MutVar#, then
+ We want to make the new contents point to
+
+ (f x)
+
+ and the return value is
+
+ (# x, (f x) #)
+
+ obviously we can share (f x).
+
+ z = [stg_ap_2 f x] (max (HS + 2) MIN_UPD_SIZE)
+ */
+
+#if defined(MIN_UPD_SIZE) && MIN_UPD_SIZE > 2
+#define THUNK_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
+#define TICK_ALLOC_THUNK() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
+#else
+#define THUNK_SIZE (SIZEOF_StgThunkHeader + WDS(2))
+#define TICK_ALLOC_THUNK() TICK_ALLOC_UP_THK(WDS(2),0)
+#endif
+
+ HP_CHK_GEN_TICKY(THUNK_SIZE);
+
+ TICK_ALLOC_THUNK();
+ CCCS_ALLOC(THUNK_SIZE);
+ z = Hp - THUNK_SIZE + WDS(1);
+ SET_HDR(z, stg_ap_2_upd_info, CCCS);
+ LDV_RECORD_CREATE(z);
+ StgThunk_payload(z,0) = f;
+
+ retry:
+ x = StgMutVar_var(mv);
+ StgThunk_payload(z,1) = x;
+#if defined(THREADED_RTS)
+ (h) = prim %cmpxchgW(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, z);
+ if (h != x) { goto retry; }
+#else
+ StgMutVar_var(mv) = z;
+#endif
+
+ if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
+ ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr");
+ }
+
+ return (x,z);
}
+
/* -----------------------------------------------------------------------------
Weak Pointer Primitives
-------------------------------------------------------------------------- */
-STRING(stg_weak_msg,"New weak pointer at %p\n")
-
stg_mkWeakzh ( gcptr key,
gcptr value,
gcptr finalizer /* or stg_NO_FINALIZER_closure */ )
@@ -663,7 +713,7 @@ stg_mkWeakzh ( gcptr key,
Capability_weak_ptr_list_tl(MyCapability()) = w;
}
- IF_DEBUG(weak, ccall debugBelch(stg_weak_msg,w));
+ IF_DEBUG(weak, ccall debugBelch("New weak pointer at %p\n",w));
return (w);
}
@@ -673,8 +723,6 @@ stg_mkWeakNoFinalizzerzh ( gcptr key, gcptr value )
jump stg_mkWeakzh (key, value, stg_NO_FINALIZER_closure);
}
-STRING(stg_cfinalizer_msg,"Adding a finalizer to %p\n")
-
stg_addCFinalizzerToWeakzh ( W_ fptr, // finalizer
W_ ptr,
W_ flag, // has environment (0 or 1)
@@ -708,7 +756,7 @@ stg_addCFinalizzerToWeakzh ( W_ fptr, // finalizer
recordMutable(w);
- IF_DEBUG(weak, ccall debugBelch(stg_cfinalizer_msg,w));
+ IF_DEBUG(weak, ccall debugBelch("Adding a finalizer to %p\n",w));
return (1);
}
@@ -924,11 +972,6 @@ stg_yieldzh ()
jump stg_yield_noregs();
}
-stg_myThreadIdzh ()
-{
- return (CurrentTSO);
-}
-
stg_labelThreadzh ( gcptr threadid, W_ addr )
{
#if defined(DEBUG) || defined(TRACING) || defined(DTRACE)
@@ -1004,6 +1047,7 @@ INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
alt_code))
return (P_ ret)
{
+ unwind Sp = Sp + SIZEOF_StgCatchRetryFrame;
W_ r;
gcptr trec, outer, arg;
@@ -1041,11 +1085,10 @@ INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
// Atomically frame ------------------------------------------------------------
// This must match StgAtomicallyFrame in Closures.h
-#define ATOMICALLY_FRAME_FIELDS(w_,p_,info_ptr,p1,p2,code,next,result) \
+#define ATOMICALLY_FRAME_FIELDS(w_,p_,info_ptr,p1,p2,code,result) \
w_ info_ptr, \
PROF_HDR_FIELDS(w_,p1,p2) \
p_ code, \
- p_ next, \
p_ result
@@ -1054,67 +1097,36 @@ INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME,
ATOMICALLY_FRAME_FIELDS(W_,P_,
info_ptr, p1, p2,
code,
- next_invariant,
frame_result))
return (P_ result) // value returned to the frame
{
W_ valid;
- gcptr trec, outer, next_invariant, q;
+ gcptr trec, outer, q;
trec = StgTSO_trec(CurrentTSO);
outer = StgTRecHeader_enclosing_trec(trec);
- if (outer == NO_TREC) {
- /* First time back at the atomically frame -- pick up invariants */
- ("ptr" next_invariant) =
- ccall stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr");
- frame_result = result;
+ /* Back at the atomically frame */
+ frame_result = result;
+ /* try to commit */
+ (valid) = ccall stmCommitTransaction(MyCapability() "ptr", trec "ptr");
+ if (valid != 0) {
+ /* Transaction was valid: commit succeeded */
+ StgTSO_trec(CurrentTSO) = NO_TREC;
+ return (frame_result);
} else {
- /* Second/subsequent time back at the atomically frame -- abort the
- * tx that's checking the invariant and move on to the next one */
- StgTSO_trec(CurrentTSO) = outer;
- StgInvariantCheckQueue_my_execution(next_invariant) = trec;
- ccall stmAbortTransaction(MyCapability() "ptr", trec "ptr");
- /* Don't free trec -- it's linked from q and will be stashed in the
- * invariant if we eventually commit. */
- next_invariant =
- StgInvariantCheckQueue_next_queue_entry(next_invariant);
- trec = outer;
- }
-
- if (next_invariant != END_INVARIANT_CHECK_QUEUE) {
- /* We can't commit yet: another invariant to check */
- ("ptr" trec) = ccall stmStartTransaction(MyCapability() "ptr", trec "ptr");
+ /* Transaction was not valid: try again */
+ ("ptr" trec) = ccall stmStartTransaction(MyCapability() "ptr",
+ NO_TREC "ptr");
StgTSO_trec(CurrentTSO) = trec;
- q = StgInvariantCheckQueue_invariant(next_invariant);
+
jump stg_ap_v_fast
+ // push the StgAtomicallyFrame again: the code generator is
+ // clever enough to only assign the fields that have changed.
(ATOMICALLY_FRAME_FIELDS(,,info_ptr,p1,p2,
- code,next_invariant,frame_result))
- (StgAtomicInvariant_code(q));
-
- } else {
-
- /* We've got no more invariants to check, try to commit */
- (valid) = ccall stmCommitTransaction(MyCapability() "ptr", trec "ptr");
- if (valid != 0) {
- /* Transaction was valid: commit succeeded */
- StgTSO_trec(CurrentTSO) = NO_TREC;
- return (frame_result);
- } else {
- /* Transaction was not valid: try again */
- ("ptr" trec) = ccall stmStartTransaction(MyCapability() "ptr",
- NO_TREC "ptr");
- StgTSO_trec(CurrentTSO) = trec;
- next_invariant = END_INVARIANT_CHECK_QUEUE;
-
- jump stg_ap_v_fast
- // push the StgAtomicallyFrame again: the code generator is
- // clever enough to only assign the fields that have changed.
- (ATOMICALLY_FRAME_FIELDS(,,info_ptr,p1,p2,
- code,next_invariant,frame_result))
- (code);
- }
+ code,frame_result))
+ (code);
}
}
@@ -1124,7 +1136,6 @@ INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
ATOMICALLY_FRAME_FIELDS(W_,P_,
info_ptr, p1, p2,
code,
- next_invariant,
frame_result))
return (/* no return values */)
{
@@ -1136,7 +1147,7 @@ INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
/* Previous attempt is still valid: no point trying again yet */
jump stg_block_noregs
(ATOMICALLY_FRAME_FIELDS(,,info_ptr, p1, p2,
- code,next_invariant,frame_result))
+ code,frame_result))
();
} else {
/* Previous attempt is no longer valid: try again */
@@ -1146,7 +1157,7 @@ INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
// change the frame header to stg_atomically_frame_info
jump stg_ap_v_fast
(ATOMICALLY_FRAME_FIELDS(,,stg_atomically_frame_info, p1, p2,
- code,next_invariant,frame_result))
+ code,frame_result))
(code);
}
}
@@ -1197,7 +1208,7 @@ stg_atomicallyzh (P_ stm)
{
P_ old_trec;
P_ new_trec;
- P_ code, next_invariant, frame_result;
+ P_ code, frame_result;
// stmStartTransaction may allocate
MAYBE_GC_P(stg_atomicallyzh, stm);
@@ -1212,7 +1223,6 @@ stg_atomicallyzh (P_ stm)
}
code = stm;
- next_invariant = END_INVARIANT_CHECK_QUEUE;
frame_result = NO_TREC;
/* Start the memory transcation */
@@ -1221,7 +1231,7 @@ stg_atomicallyzh (P_ stm)
jump stg_ap_v_fast
(ATOMICALLY_FRAME_FIELDS(,,stg_atomically_frame_info, CCCS, 0,
- code,next_invariant,frame_result))
+ code,frame_result))
(stm);
}
@@ -1324,16 +1334,6 @@ retry_pop_stack:
// We've reached the ATOMICALLY_FRAME: attempt to wait
ASSERT(frame_type == ATOMICALLY_FRAME);
- if (outer != NO_TREC) {
- // We called retry while checking invariants, so abort the current
- // invariant check (merging its TVar accesses into the parents read
- // set so we'll wait on them)
- ccall stmAbortTransaction(MyCapability() "ptr", trec "ptr");
- ccall stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr");
- trec = outer;
- StgTSO_trec(CurrentTSO) = trec;
- outer = StgTRecHeader_enclosing_trec(trec);
- }
ASSERT(outer == NO_TREC);
(r) = ccall stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr");
@@ -1353,20 +1353,6 @@ retry_pop_stack:
}
}
-stg_checkzh (P_ closure /* STM a */)
-{
- W_ trec;
-
- MAYBE_GC_P (stg_checkzh, closure);
-
- trec = StgTSO_trec(CurrentTSO);
- ccall stmAddInvariantToCheck(MyCapability() "ptr",
- trec "ptr",
- closure "ptr");
- return ();
-}
-
-
stg_newTVarzh (P_ init)
{
W_ tv;
@@ -1741,7 +1727,7 @@ loop:
ccall tryWakeupThread(MyCapability() "ptr", tso);
- // If it was an readMVar, then we can still do work,
+ // If it was a readMVar, then we can still do work,
// so loop back. (XXX: This could take a while)
if (why_blocked == BlockedOnMVarRead) {
q = StgMVarTSOQueue_link(q);
@@ -1822,7 +1808,7 @@ loop:
ccall tryWakeupThread(MyCapability() "ptr", tso);
- // If it was an readMVar, then we can still do work,
+ // If it was a readMVar, then we can still do work,
// so loop back. (XXX: This could take a while)
if (why_blocked == BlockedOnMVarRead) {
q = StgMVarTSOQueue_link(q);
@@ -2004,70 +1990,44 @@ stg_mkApUpd0zh ( P_ bco )
stg_unpackClosurezh ( P_ closure )
{
- W_ clos, info, ptrs, nptrs, p, ptrs_arr, nptrs_arr;
- clos = UNTAG(closure);
- info = %GET_STD_INFO(clos);
-
- // Some closures have non-standard layout, so we omit those here.
- W_ type;
- type = TO_W_(%INFO_TYPE(info));
- switch [0 .. N_CLOSURE_TYPES] type {
- case THUNK_SELECTOR : {
- ptrs = 1;
- nptrs = 0;
- goto out;
- }
- case THUNK, THUNK_1_0, THUNK_0_1, THUNK_2_0, THUNK_1_1,
- THUNK_0_2, THUNK_STATIC, AP, PAP, AP_STACK, BCO : {
- ptrs = 0;
- nptrs = 0;
- goto out;
- }
- default: {
- ptrs = TO_W_(%INFO_PTRS(info));
- nptrs = TO_W_(%INFO_NPTRS(info));
- goto out;
- }}
-
-out:
- W_ ptrs_arr_sz, ptrs_arr_cards, nptrs_arr_sz;
- nptrs_arr_sz = SIZEOF_StgArrBytes + WDS(nptrs);
- ptrs_arr_cards = mutArrPtrsCardWords(ptrs);
- ptrs_arr_sz = SIZEOF_StgMutArrPtrs + WDS(ptrs) + WDS(ptrs_arr_cards);
-
- ALLOC_PRIM_P (ptrs_arr_sz + nptrs_arr_sz, stg_unpackClosurezh, closure);
-
- ptrs_arr = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1);
- nptrs_arr = Hp - nptrs_arr_sz + WDS(1);
-
- SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, CCCS);
- StgMutArrPtrs_ptrs(ptrs_arr) = ptrs;
- StgMutArrPtrs_size(ptrs_arr) = ptrs + ptrs_arr_cards;
+ W_ info, ptrs, nptrs, p, ptrs_arr, dat_arr;
+ info = %GET_STD_INFO(UNTAG(closure));
- p = 0;
+ ptrs = TO_W_(%INFO_PTRS(info));
+ nptrs = TO_W_(%INFO_NPTRS(info));
-write_ptrs:
- if(p < ptrs) {
- W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p);
- p = p + 1;
- goto write_ptrs;
- }
- /* We can leave the card table uninitialised, since the array is
- allocated in the nursery. The GC will fill it in if/when the array
- is promoted. */
+ W_ clos;
+ clos = UNTAG(closure);
- SET_HDR(nptrs_arr, stg_ARR_WORDS_info, CCCS);
- StgArrBytes_bytes(nptrs_arr) = WDS(nptrs);
- p = 0;
+ W_ len;
+ // The array returned is the raw data for the entire closure.
+ // The length is variable based upon the closure type, ptrs, and non-ptrs
+ (len) = foreign "C" heap_view_closureSize(clos "ptr");
+
+ W_ ptrs_arr_sz, ptrs_arr_cards, dat_arr_sz;
+ dat_arr_sz = SIZEOF_StgArrBytes + WDS(len);
-write_nptrs:
- if(p < nptrs) {
- W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs);
+ ALLOC_PRIM_P (dat_arr_sz, stg_unpackClosurezh, closure);
+
+ dat_arr = Hp - dat_arr_sz + WDS(1);
+
+
+ SET_HDR(dat_arr, stg_ARR_WORDS_info, CCCS);
+ StgArrBytes_bytes(dat_arr) = WDS(len);
+ p = 0;
+for:
+ if(p < len) {
+ W_[BYTE_ARR_CTS(dat_arr) + WDS(p)] = W_[clos + WDS(p)];
p = p + 1;
- goto write_nptrs;
+ goto for;
}
- return (info, ptrs_arr, nptrs_arr);
+ W_ ptrArray;
+
+ // Follow the pointers
+ ("ptr" ptrArray) = foreign "C" heap_view_closurePtrs(MyCapability() "ptr", clos "ptr");
+
+ return (info, dat_arr, ptrArray);
}
/* -----------------------------------------------------------------------------
@@ -2118,8 +2078,6 @@ stg_waitWritezh ( W_ fd )
#endif
}
-
-STRING(stg_delayzh_malloc_str, "stg_delayzh")
stg_delayzh ( W_ us_delay )
{
#if defined(mingw32_HOST_OS)
@@ -2140,7 +2098,7 @@ stg_delayzh ( W_ us_delay )
/* could probably allocate this on the heap instead */
("ptr" ares) = ccall stgMallocBytes(SIZEOF_StgAsyncIOResult,
- stg_delayzh_malloc_str);
+ "stg_delayzh");
(reqID) = ccall addDelayRequest(us_delay);
StgAsyncIOResult_reqID(ares) = reqID;
StgAsyncIOResult_len(ares) = 0;
@@ -2185,7 +2143,6 @@ while:
#if defined(mingw32_HOST_OS)
-STRING(stg_asyncReadzh_malloc_str, "stg_asyncReadzh")
stg_asyncReadzh ( W_ fd, W_ is_sock, W_ len, W_ buf )
{
W_ ares;
@@ -2200,7 +2157,7 @@ stg_asyncReadzh ( W_ fd, W_ is_sock, W_ len, W_ buf )
/* could probably allocate this on the heap instead */
("ptr" ares) = ccall stgMallocBytes(SIZEOF_StgAsyncIOResult,
- stg_asyncReadzh_malloc_str);
+ "stg_asyncReadzh");
(reqID) = ccall addIORequest(fd, 0/*FALSE*/,is_sock,len,buf "ptr");
StgAsyncIOResult_reqID(ares) = reqID;
StgAsyncIOResult_len(ares) = 0;
@@ -2211,7 +2168,6 @@ stg_asyncReadzh ( W_ fd, W_ is_sock, W_ len, W_ buf )
#endif
}
-STRING(stg_asyncWritezh_malloc_str, "stg_asyncWritezh")
stg_asyncWritezh ( W_ fd, W_ is_sock, W_ len, W_ buf )
{
W_ ares;
@@ -2225,7 +2181,7 @@ stg_asyncWritezh ( W_ fd, W_ is_sock, W_ len, W_ buf )
StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
("ptr" ares) = ccall stgMallocBytes(SIZEOF_StgAsyncIOResult,
- stg_asyncWritezh_malloc_str);
+ "stg_asyncWritezh");
(reqID) = ccall addIORequest(fd, 1/*TRUE*/,is_sock,len,buf "ptr");
StgAsyncIOResult_reqID(ares) = reqID;
@@ -2237,7 +2193,6 @@ stg_asyncWritezh ( W_ fd, W_ is_sock, W_ len, W_ buf )
#endif
}
-STRING(stg_asyncDoProczh_malloc_str, "stg_asyncDoProczh")
stg_asyncDoProczh ( W_ proc, W_ param )
{
W_ ares;
@@ -2252,7 +2207,7 @@ stg_asyncDoProczh ( W_ proc, W_ param )
/* could probably allocate this on the heap instead */
("ptr" ares) = ccall stgMallocBytes(SIZEOF_StgAsyncIOResult,
- stg_asyncDoProczh_malloc_str);
+ "stg_asyncDoProczh");
(reqID) = ccall addDoProcRequest(proc "ptr",param "ptr");
StgAsyncIOResult_reqID(ares) = reqID;
StgAsyncIOResult_len(ares) = 0;
@@ -2358,7 +2313,7 @@ stg_noDuplicatezh /* no arg list: explicit stack layout */
stg_getApStackValzh ( P_ ap_stack, W_ offset )
{
- if (%INFO_PTR(ap_stack) == stg_AP_STACK_info) {
+ if (%INFO_PTR(UNTAG(ap_stack)) == stg_AP_STACK_info) {
return (1,StgAP_STACK_payload(ap_stack,offset));
} else {
return (0,ap_stack);
@@ -2450,6 +2405,14 @@ stg_traceEventzh ( W_ msg )
return ();
}
+stg_traceBinaryEventzh ( W_ msg, W_ len )
+{
+#if defined(TRACING) || defined(DEBUG)
+ ccall traceUserBinaryMsg(MyCapability() "ptr", msg "ptr", len);
+#endif
+ return ();
+}
+
// Same code as stg_traceEventzh above but a different kind of event
// Before changing this code, read the comments in the impl above
stg_traceMarkerzh ( W_ msg )
@@ -2475,3 +2438,23 @@ stg_traceMarkerzh ( W_ msg )
return ();
}
+
+stg_getThreadAllocationCounterzh ()
+{
+ // Account for the allocation in the current block
+ W_ offset;
+ offset = Hp - bdescr_start(CurrentNursery);
+ return (StgTSO_alloc_limit(CurrentTSO) - TO_I64(offset));
+}
+
+stg_setThreadAllocationCounterzh ( I64 counter )
+{
+ // Allocation in the current block will be subtracted by
+ // getThreadAllocationCounter#, so we have to offset any existing
+ // allocation here. See also openNursery/closeNursery in
+ // compiler/codeGen/StgCmmForeign.hs.
+ W_ offset;
+ offset = Hp - bdescr_start(CurrentNursery);
+ StgTSO_alloc_limit(CurrentTSO) = counter + TO_I64(offset);
+ return ();
+}
diff --git a/rts/Printer.c b/rts/Printer.c
index f077814ba9..7f7e83cb8b 100644
--- a/rts/Printer.c
+++ b/rts/Printer.c
@@ -13,6 +13,7 @@
#include "rts/Bytecodes.h" /* for InstrPtr */
#include "sm/Storage.h"
+#include "sm/GCThread.h"
#include "Hash.h"
#include "Printer.h"
#include "RtsUtils.h"
@@ -23,6 +24,8 @@
#include <string.h>
+void findPtr(P_ p, int follow);
+
#if defined(DEBUG)
#include "Disassembler.h"
@@ -308,8 +311,8 @@ printClosure( const StgClosure *obj )
debugBelch("MUT_ARR_PTRS_DIRTY(size=%" FMT_Word ")\n", (W_)((StgMutArrPtrs *)obj)->ptrs);
break;
- case MUT_ARR_PTRS_FROZEN:
- debugBelch("MUT_ARR_PTRS_FROZEN(size=%" FMT_Word ")\n", (W_)((StgMutArrPtrs *)obj)->ptrs);
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ debugBelch("MUT_ARR_PTRS_FROZEN_CLEAN(size=%" FMT_Word ")\n", (W_)((StgMutArrPtrs *)obj)->ptrs);
break;
case SMALL_MUT_ARR_PTRS_CLEAN:
@@ -322,8 +325,8 @@ printClosure( const StgClosure *obj )
(W_)((StgSmallMutArrPtrs *)obj)->ptrs);
break;
- case SMALL_MUT_ARR_PTRS_FROZEN:
- debugBelch("SMALL_MUT_ARR_PTRS_FROZEN(size=%" FMT_Word ")\n",
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ debugBelch("SMALL_MUT_ARR_PTRS_FROZEN_CLEAN(size=%" FMT_Word ")\n",
(W_)((StgSmallMutArrPtrs *)obj)->ptrs);
break;
@@ -331,7 +334,29 @@ printClosure( const StgClosure *obj )
case MVAR_DIRTY:
{
StgMVar* mv = (StgMVar*)obj;
- debugBelch("MVAR(head=%p, tail=%p, value=%p)\n", mv->head, mv->tail, mv->value);
+
+ debugBelch("MVAR(head=");
+ if ((StgClosure*)mv->head == &stg_END_TSO_QUEUE_closure) {
+ debugBelch("END_TSO_QUEUE");
+ } else {
+ debugBelch("%p", mv->head);
+ }
+
+ debugBelch(", tail=");
+ if ((StgClosure*)mv->tail == &stg_END_TSO_QUEUE_closure) {
+ debugBelch("END_TSO_QUEUE");
+ } else {
+ debugBelch("%p", mv->tail);
+ }
+
+ debugBelch(", value=");
+ if ((StgClosure*)mv->value == &stg_END_TSO_QUEUE_closure) {
+ debugBelch("END_TSO_QUEUE");
+ } else {
+ debugBelch("%p", mv->value);
+ }
+ debugBelch(")\n");
+
break;
}
@@ -358,7 +383,7 @@ printClosure( const StgClosure *obj )
case WEAK:
debugBelch("WEAK(");
- debugBelch(" key=%p value=%p finalizer=%p",
+ debugBelch("key=%p value=%p finalizer=%p",
(StgPtr)(((StgWeak*)obj)->key),
(StgPtr)(((StgWeak*)obj)->value),
(StgPtr)(((StgWeak*)obj)->finalizer));
@@ -373,7 +398,7 @@ printClosure( const StgClosure *obj )
break;
case STACK:
- debugBelch("STACK");
+ debugBelch("STACK\n");
break;
#if 0
@@ -387,9 +412,12 @@ printClosure( const StgClosure *obj )
case COMPACT_NFDATA:
debugBelch("COMPACT_NFDATA(size=%" FMT_Word ")\n",
- (W_)((StgCompactNFData *)obj)->totalW * sizeof(W_));
+ (W_)((StgCompactNFData *)obj)->totalW * (W_)sizeof(W_));
break;
+ case TREC_CHUNK:
+ debugBelch("TREC_CHUNK\n");
+ break;
default:
//barf("printClosure %d",get_itbl(obj)->type);
@@ -400,6 +428,21 @@ printClosure( const StgClosure *obj )
}
}
+void
+printMutableList(bdescr *bd)
+{
+ StgPtr p;
+
+ debugBelch("mutable list %p: ", bd);
+
+ for (; bd != NULL; bd = bd->link) {
+ for (p = bd->start; p < bd->free; p++) {
+ debugBelch("%p (%s), ", (void *)*p, info_type((StgClosure *)*p));
+ }
+ }
+ debugBelch("\n");
+}
+
// If you know you have an UPDATE_FRAME, but want to know exactly which.
const char *info_update_frame(const StgClosure *closure)
{
@@ -419,13 +462,6 @@ const char *info_update_frame(const StgClosure *closure)
}
}
-/*
-void printGraph( StgClosure *obj )
-{
- printClosure(obj);
-}
-*/
-
static void
printSmallBitmap( StgPtr spBottom, StgPtr payload, StgWord bitmap,
uint32_t size )
@@ -520,6 +556,18 @@ printStackChunk( StgPtr sp, StgPtr spBottom )
debugBelch("stg_ap_ppppp_info\n" );
} else if (c == (StgWord)&stg_ap_pppppp_info) {
debugBelch("stg_ap_pppppp_info\n" );
+ } else if (c == (StgWord)&stg_ret_v_info) {
+ debugBelch("stg_ret_v_info\n" );
+ } else if (c == (StgWord)&stg_ret_p_info) {
+ debugBelch("stg_ret_p_info\n" );
+ } else if (c == (StgWord)&stg_ret_n_info) {
+ debugBelch("stg_ret_n_info\n" );
+ } else if (c == (StgWord)&stg_ret_f_info) {
+ debugBelch("stg_ret_f_info\n" );
+ } else if (c == (StgWord)&stg_ret_d_info) {
+ debugBelch("stg_ret_d_info\n" );
+ } else if (c == (StgWord)&stg_ret_l_info) {
+ debugBelch("stg_ret_l_info\n" );
#if defined(PROFILING)
} else if (c == (StgWord)&stg_restore_cccs_info) {
debugBelch("stg_restore_cccs_info\n" );
@@ -590,10 +638,14 @@ printStackChunk( StgPtr sp, StgPtr spBottom )
}
}
+static void printStack( StgStack *stack )
+{
+ printStackChunk( stack->sp, stack->stack + stack->stack_size );
+}
+
void printTSO( StgTSO *tso )
{
- printStackChunk( tso->stackobj->sp,
- tso->stackobj->stack+tso->stackobj->stack_size);
+ printStack( tso->stackobj );
}
/* --------------------------------------------------------------------------
@@ -725,8 +777,6 @@ extern void DEBUG_LoadSymbols( const char *name STG_UNUSED )
#endif /* USING_LIBBFD */
-void findPtr(P_ p, int); /* keep gcc -Wall happy */
-
int searched = 0;
static int
@@ -776,11 +826,16 @@ findPtr(P_ p, int follow)
int i = 0;
searched = 0;
+#if 0
+ // We can't search the nursery, because we don't know which blocks contain
+ // valid data, because the bd->free pointers in the nursery are only reset
+ // just before a block is used.
for (n = 0; n < n_capabilities; n++) {
bd = nurseries[i].blocks;
i = findPtrBlocks(p,bd,arr,arr_size,i);
if (i >= arr_size) return;
}
+#endif
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
bd = generations[g].blocks;
@@ -788,6 +843,13 @@ findPtr(P_ p, int follow)
bd = generations[g].large_objects;
i = findPtrBlocks(p,bd,arr,arr_size,i);
if (i >= arr_size) return;
+ for (n = 0; n < n_capabilities; n++) {
+ i = findPtrBlocks(p, gc_threads[n]->gens[g].part_list,
+ arr, arr_size, i);
+ i = findPtrBlocks(p, gc_threads[n]->gens[g].todo_bd,
+ arr, arr_size, i);
+ }
+ if (i >= arr_size) return;
}
if (follow && i == 1) {
debugBelch("-->\n");
@@ -814,13 +876,19 @@ void printObj( StgClosure *obj )
debugBelch("obj 0x%p (enable -DDEBUG for more info) " , obj );
}
-
+void findPtr(P_ p, int follow)
+{
+ // we're printing the arguments just to silence the unused parameter warning
+ debugBelch("recompile your program with -debug in order to run ");
+ debugBelch("findPtr(0x%p, %d)\n", p, follow);
+}
#endif /* DEBUG */
/* -----------------------------------------------------------------------------
Closure types
- NOTE: must be kept in sync with the closure types in includes/ClosureTypes.h
+ NOTE: must be kept in sync with the closure types in
+ includes/rts/storage/ClosureTypes.h
-------------------------------------------------------------------------- */
const char *closure_type_names[] = {
@@ -869,8 +937,8 @@ const char *closure_type_names[] = {
[ARR_WORDS] = "ARR_WORDS",
[MUT_ARR_PTRS_CLEAN] = "MUT_ARR_PTRS_CLEAN",
[MUT_ARR_PTRS_DIRTY] = "MUT_ARR_PTRS_DIRTY",
- [MUT_ARR_PTRS_FROZEN0] = "MUT_ARR_PTRS_FROZEN0",
- [MUT_ARR_PTRS_FROZEN] = "MUT_ARR_PTRS_FROZEN",
+ [MUT_ARR_PTRS_FROZEN_DIRTY] = "MUT_ARR_PTRS_FROZEN_DIRTY",
+ [MUT_ARR_PTRS_FROZEN_CLEAN] = "MUT_ARR_PTRS_FROZEN_CLEAN",
[MUT_VAR_CLEAN] = "MUT_VAR_CLEAN",
[MUT_VAR_DIRTY] = "MUT_VAR_DIRTY",
[WEAK] = "WEAK",
@@ -883,9 +951,17 @@ const char *closure_type_names[] = {
[CATCH_RETRY_FRAME] = "CATCH_RETRY_FRAME",
[CATCH_STM_FRAME] = "CATCH_STM_FRAME",
[WHITEHOLE] = "WHITEHOLE",
+ [SMALL_MUT_ARR_PTRS_CLEAN] = "SMALL_MUT_ARR_PTRS_CLEAN",
+ [SMALL_MUT_ARR_PTRS_DIRTY] = "SMALL_MUT_ARR_PTRS_DIRTY",
+ [SMALL_MUT_ARR_PTRS_FROZEN_DIRTY] = "SMALL_MUT_ARR_PTRS_FROZEN_DIRTY",
+ [SMALL_MUT_ARR_PTRS_FROZEN_CLEAN] = "SMALL_MUT_ARR_PTRS_FROZEN_CLEAN",
[COMPACT_NFDATA] = "COMPACT_NFDATA"
};
+#if N_CLOSURE_TYPES != 64
+#error Closure types changed: update Printer.c!
+#endif
+
const char *
info_type(const StgClosure *closure){
return closure_type_names[get_itbl(closure)->type];
diff --git a/rts/Printer.h b/rts/Printer.h
index dd268bef42..d2eaf010c6 100644
--- a/rts/Printer.h
+++ b/rts/Printer.h
@@ -24,6 +24,7 @@ const char * info_update_frame ( const StgClosure *closure );
extern void printClosure ( const StgClosure *obj );
extern void printStackChunk ( StgPtr sp, StgPtr spLim );
extern void printTSO ( StgTSO *tso );
+extern void printMutableList( bdescr *bd );
extern void DEBUG_LoadSymbols( const char *name );
diff --git a/rts/ProfHeap.c b/rts/ProfHeap.c
index 43801b8944..de3d2b6aa5 100644
--- a/rts/ProfHeap.c
+++ b/rts/ProfHeap.c
@@ -23,6 +23,7 @@
#include "Trace.h"
#include "sm/GCThread.h"
+#include <fs_rts.h>
#include <string.h>
/* -----------------------------------------------------------------------------
@@ -126,8 +127,8 @@ closureIdentity( const StgClosure *p )
return retainerSetOf(p);
else
return NULL;
+#endif
-#else
case HEAP_BY_CLOSURE_TYPE:
{
const StgInfoTable *info;
@@ -146,7 +147,6 @@ closureIdentity( const StgClosure *p )
}
}
-#endif
default:
barf("closureIdentity");
}
@@ -171,7 +171,7 @@ doingRetainerProfiling( void )
}
#endif /* PROFILING */
-// Precesses a closure 'c' being destroyed whose size is 'size'.
+// Processes a closure 'c' being destroyed whose size is 'size'.
// Make sure that LDV_recordDead() is not invoked on 'inherently used' closures
// such as TSO; they should not be involved in computing dragNew or voidNew.
//
@@ -340,7 +340,7 @@ void initProfiling (void)
sprintf(hp_filename, "%s.hp", prog);
/* open the log file */
- if ((hp_file = fopen(hp_filename, "w")) == NULL) {
+ if ((hp_file = __rts_fopen(hp_filename, "w")) == NULL) {
debugBelch("Can't open profiling report file %s\n",
hp_filename);
RtsFlags.ProfFlags.doHeapProfile = 0;
@@ -814,7 +814,6 @@ dumpCensus( Census *census )
if (count == 0) continue;
-#if !defined(PROFILING)
switch (RtsFlags.ProfFlags.doHeapProfile) {
case HEAP_BY_CLOSURE_TYPE:
fprintf(hp_file, "%s", (char *)ctr->identity);
@@ -822,7 +821,6 @@ dumpCensus( Census *census )
count * sizeof(W_));
break;
}
-#endif
#if defined(PROFILING)
switch (RtsFlags.ProfFlags.doHeapProfile) {
@@ -1095,16 +1093,16 @@ heapCensusChain( Census *census, bdescr *bd )
case MUT_ARR_PTRS_CLEAN:
case MUT_ARR_PTRS_DIRTY:
- case MUT_ARR_PTRS_FROZEN:
- case MUT_ARR_PTRS_FROZEN0:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
prim = true;
size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)p);
break;
case SMALL_MUT_ARR_PTRS_CLEAN:
case SMALL_MUT_ARR_PTRS_DIRTY:
- case SMALL_MUT_ARR_PTRS_FROZEN:
- case SMALL_MUT_ARR_PTRS_FROZEN0:
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
prim = true;
size = small_mut_arr_ptrs_sizeW((StgSmallMutArrPtrs *)p);
break;
diff --git a/rts/Profiling.c b/rts/Profiling.c
index 9523572887..9f1a442951 100644
--- a/rts/Profiling.c
+++ b/rts/Profiling.c
@@ -22,6 +22,7 @@
#include "Printer.h"
#include "Capability.h"
+#include <fs_rts.h>
#include <string.h>
#if defined(DEBUG)
@@ -264,7 +265,7 @@ initProfilingLogFile(void)
sprintf(prof_filename, "%s.prof", stem);
/* open the log file */
- if ((prof_file = fopen(prof_filename, "w")) == NULL) {
+ if ((prof_file = __rts_fopen(prof_filename, "w")) == NULL) {
debugBelch("Can't open profiling report file %s\n", prof_filename);
RtsFlags.CcFlags.doCostCentres = 0;
// Retainer profiling (`-hr` or `-hr<cc> -h<x>`) writes to
@@ -281,7 +282,7 @@ initProfilingLogFile(void)
sprintf(hp_filename, "%s.hp", stem);
/* open the log file */
- if ((hp_file = fopen(hp_filename, "w")) == NULL) {
+ if ((hp_file = __rts_fopen(hp_filename, "w")) == NULL) {
debugBelch("Can't open profiling report file %s\n",
hp_filename);
RtsFlags.ProfFlags.doHeapProfile = 0;
@@ -307,6 +308,25 @@ endProfiling ( void )
}
}
+
+/*
+ These are used in the C stubs produced by the code generator
+ to register code.
+ */
+void registerCcList(CostCentre **cc_list)
+{
+ for (CostCentre **i = cc_list; *i != NULL; i++) {
+ REGISTER_CC(*i);
+ }
+}
+
+void registerCcsList(CostCentreStack **cc_list)
+{
+ for (CostCentreStack **i = cc_list; *i != NULL; i++) {
+ REGISTER_CCS(*i);
+ }
+}
+
/* -----------------------------------------------------------------------------
Set CCCS when entering a function.
diff --git a/rts/RaiseAsync.c b/rts/RaiseAsync.c
index 6f1ab79691..b08acc4078 100644
--- a/rts/RaiseAsync.c
+++ b/rts/RaiseAsync.c
@@ -108,7 +108,7 @@ suspendComputation (Capability *cap, StgTSO *tso, StgUpdateFrame *stop_here)
yourself using throwTo, the exception would actually immediately
be delivered. This is because throwTo itself is considered an
interruptible point, so the exception is always deliverable. Thus,
- ordinarily, we never end up with a message to onesself in the
+ ordinarily, we never end up with a message to oneself in the
blocked_exceptions queue.
- In the case of a StackOverflow, we don't actually care about the
@@ -416,21 +416,12 @@ check_target:
}
case BlockedOnSTM:
- lockTSO(target);
- // Unblocking BlockedOnSTM threads requires the TSO to be
- // locked; see STM.c:unpark_tso().
- if (target->why_blocked != BlockedOnSTM) {
- unlockTSO(target);
- goto retry;
- }
if ((target->flags & TSO_BLOCKEX) &&
((target->flags & TSO_INTERRUPTIBLE) == 0)) {
blockedThrowTo(cap,target,msg);
- unlockTSO(target);
return THROWTO_BLOCKED;
} else {
raiseAsync(cap, target, msg->exception, false, NULL);
- unlockTSO(target);
return THROWTO_SUCCESS;
}
diff --git a/rts/RetainerProfile.c b/rts/RetainerProfile.c
index 6ca09fc43e..23f46e0714 100644
--- a/rts/RetainerProfile.c
+++ b/rts/RetainerProfile.c
@@ -30,9 +30,22 @@
#include "Stats.h"
#include "ProfHeap.h"
#include "Apply.h"
-#include "Stable.h" /* markStableTables */
+#include "StablePtr.h" /* markStablePtrTable */
+#include "StableName.h" /* rememberOldStableNameAddresses */
#include "sm/Storage.h" // for END_OF_STATIC_LIST
+/* Note [What is a retainer?]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~
+The definition of what sorts of things are counted as retainers is a bit hard to
+pin down. Intuitively, we want to identify closures which will help the user
+identify memory leaks due to thunks. In practice we also end up lumping mutable
+objects in this group for reasons that have been lost to time.
+
+The definition of retainer is implemented in isRetainer(), defined later in this
+file.
+*/
+
+
/*
Note: what to change in order to plug-in a new retainer profiling scheme?
(1) type retainer in ../includes/StgRetainerProf.h
@@ -108,7 +121,6 @@ typedef enum {
posTypeStep,
posTypePtrs,
posTypeSRT,
- posTypeLargeSRT,
} nextPosType;
typedef union {
@@ -125,16 +137,8 @@ typedef union {
// SRT
struct {
- StgClosure **srt;
- StgWord srt_bitmap;
+ StgClosure *srt;
} srt;
-
- // Large SRT
- struct {
- StgLargeSRT *srt;
- StgWord offset;
- } large_srt;
-
} nextPos;
typedef struct {
@@ -267,7 +271,6 @@ isEmptyRetainerStack( void )
/* -----------------------------------------------------------------------------
* Returns size of stack
* -------------------------------------------------------------------------- */
-#if defined(DEBUG)
W_
retainerStackBlocks( void )
{
@@ -279,7 +282,6 @@ retainerStackBlocks( void )
return res;
}
-#endif
/* -----------------------------------------------------------------------------
* Returns true if stackTop is at the stack boundary of the current stack,
@@ -324,28 +326,22 @@ find_ptrs( stackPos *info )
static INLINE void
init_srt_fun( stackPos *info, const StgFunInfoTable *infoTable )
{
- if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
- info->type = posTypeLargeSRT;
- info->next.large_srt.srt = (StgLargeSRT *)GET_FUN_SRT(infoTable);
- info->next.large_srt.offset = 0;
+ info->type = posTypeSRT;
+ if (infoTable->i.srt) {
+ info->next.srt.srt = (StgClosure*)GET_FUN_SRT(infoTable);
} else {
- info->type = posTypeSRT;
- info->next.srt.srt = (StgClosure **)GET_FUN_SRT(infoTable);
- info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
+ info->next.srt.srt = NULL;
}
}
static INLINE void
init_srt_thunk( stackPos *info, const StgThunkInfoTable *infoTable )
{
- if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
- info->type = posTypeLargeSRT;
- info->next.large_srt.srt = (StgLargeSRT *)GET_SRT(infoTable);
- info->next.large_srt.offset = 0;
+ info->type = posTypeSRT;
+ if (infoTable->i.srt) {
+ info->next.srt.srt = (StgClosure*)GET_SRT(infoTable);
} else {
- info->type = posTypeSRT;
- info->next.srt.srt = (StgClosure **)GET_SRT(infoTable);
- info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
+ info->next.srt.srt = NULL;
}
}
@@ -356,57 +352,10 @@ static INLINE StgClosure *
find_srt( stackPos *info )
{
StgClosure *c;
- StgWord bitmap;
-
if (info->type == posTypeSRT) {
- // Small SRT bitmap
- bitmap = info->next.srt.srt_bitmap;
- while (bitmap != 0) {
- if ((bitmap & 1) != 0) {
-#if defined(COMPILING_WINDOWS_DLL)
- if ((unsigned long)(*(info->next.srt.srt)) & 0x1)
- c = (* (StgClosure **)((unsigned long)*(info->next.srt.srt)) & ~0x1);
- else
- c = *(info->next.srt.srt);
-#else
- c = *(info->next.srt.srt);
-#endif
- bitmap = bitmap >> 1;
- info->next.srt.srt++;
- info->next.srt.srt_bitmap = bitmap;
- return c;
- }
- bitmap = bitmap >> 1;
- info->next.srt.srt++;
- }
- // bitmap is now zero...
- return NULL;
- }
- else {
- // Large SRT bitmap
- uint32_t i = info->next.large_srt.offset;
- StgWord bitmap;
-
- // Follow the pattern from GC.c:scavenge_large_srt_bitmap().
- bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
- bitmap = bitmap >> (i % BITS_IN(StgWord));
- while (i < info->next.large_srt.srt->l.size) {
- if ((bitmap & 1) != 0) {
- c = ((StgClosure **)info->next.large_srt.srt->srt)[i];
- i++;
- info->next.large_srt.offset = i;
- return c;
- }
- i++;
- if (i % BITS_IN(W_) == 0) {
- bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
- } else {
- bitmap = bitmap >> 1;
- }
- }
- // reached the end of this bitmap.
- info->next.large_srt.offset = i;
- return NULL;
+ c = info->next.srt.srt;
+ info->next.srt.srt = NULL;
+ return c;
}
}
@@ -414,7 +363,7 @@ find_srt( stackPos *info )
* push() pushes a stackElement representing the next child of *c
* onto the traverse stack. If *c has no child, *first_child is set
* to NULL and nothing is pushed onto the stack. If *c has only one
- * child, *c_chlid is set to that child and nothing is pushed onto
+ * child, *c_child is set to that child and nothing is pushed onto
* the stack. If *c has more than two children, *first_child is set
* to the first child and a stackElement representing the second
* child is pushed onto the stack.
@@ -518,8 +467,8 @@ push( StgClosure *c, retainer c_child_r, StgClosure **first_child )
// StgMutArrPtr.ptrs, no SRT
case MUT_ARR_PTRS_CLEAN:
case MUT_ARR_PTRS_DIRTY:
- case MUT_ARR_PTRS_FROZEN:
- case MUT_ARR_PTRS_FROZEN0:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
(StgPtr)(((StgMutArrPtrs *)c)->payload));
*first_child = find_ptrs(&se.info);
@@ -530,8 +479,8 @@ push( StgClosure *c, retainer c_child_r, StgClosure **first_child )
// StgMutArrPtr.ptrs, no SRT
case SMALL_MUT_ARR_PTRS_CLEAN:
case SMALL_MUT_ARR_PTRS_DIRTY:
- case SMALL_MUT_ARR_PTRS_FROZEN:
- case SMALL_MUT_ARR_PTRS_FROZEN0:
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
init_ptrs(&se.info, ((StgSmallMutArrPtrs *)c)->ptrs,
(StgPtr)(((StgSmallMutArrPtrs *)c)->payload));
*first_child = find_ptrs(&se.info);
@@ -540,6 +489,7 @@ push( StgClosure *c, retainer c_child_r, StgClosure **first_child )
break;
// layout.payload.ptrs, SRT
+ case FUN_STATIC:
case FUN: // *c is a heap object.
case FUN_2_0:
init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
@@ -574,9 +524,7 @@ push( StgClosure *c, retainer c_child_r, StgClosure **first_child )
init_srt_thunk(&se.info, get_thunk_itbl(c));
break;
- case FUN_STATIC: // *c is a heap object.
- ASSERT(get_itbl(c)->srt_bitmap != 0);
- case FUN_0_1:
+ case FUN_0_1: // *c is a heap object.
case FUN_0_2:
fun_srt_only:
init_srt_fun(&se.info, get_fun_itbl(c));
@@ -587,7 +535,7 @@ push( StgClosure *c, retainer c_child_r, StgClosure **first_child )
// SRT only
case THUNK_STATIC:
- ASSERT(get_itbl(c)->srt_bitmap != 0);
+ ASSERT(get_itbl(c)->srt != 0);
case THUNK_0_1:
case THUNK_0_2:
thunk_srt_only:
@@ -621,7 +569,7 @@ push( StgClosure *c, retainer c_child_r, StgClosure **first_child )
case IND:
case INVALID_OBJECT:
default:
- barf("Invalid object *c in push()");
+ barf("Invalid object *c in push(): %d", get_itbl(c)->type);
return;
}
@@ -861,8 +809,12 @@ pop( StgClosure **c, StgClosure **cp, retainer *r )
// StgMutArrPtr.ptrs, no SRT
case MUT_ARR_PTRS_CLEAN:
case MUT_ARR_PTRS_DIRTY:
- case MUT_ARR_PTRS_FROZEN:
- case MUT_ARR_PTRS_FROZEN0:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
+ case SMALL_MUT_ARR_PTRS_CLEAN:
+ case SMALL_MUT_ARR_PTRS_DIRTY:
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
*c = find_ptrs(&se->info);
if (*c == NULL) {
popOff();
@@ -874,6 +826,7 @@ pop( StgClosure **c, StgClosure **cp, retainer *r )
// layout.payload.ptrs, SRT
case FUN: // always a heap object
+ case FUN_STATIC:
case FUN_2_0:
if (se->info.type == posTypePtrs) {
*c = find_ptrs(&se->info);
@@ -902,7 +855,6 @@ pop( StgClosure **c, StgClosure **cp, retainer *r )
// SRT
do_srt:
case THUNK_STATIC:
- case FUN_STATIC:
case FUN_0_1:
case FUN_0_2:
case THUNK_0_1:
@@ -949,7 +901,7 @@ pop( StgClosure **c, StgClosure **cp, retainer *r )
case IND:
case INVALID_OBJECT:
default:
- barf("Invalid object *c in pop()");
+ barf("Invalid object *c in pop(): %d", get_itbl(se->c)->type);
return;
}
} while (true);
@@ -1022,6 +974,9 @@ isRetainer( StgClosure *c )
case MUT_VAR_DIRTY:
case MUT_ARR_PTRS_CLEAN:
case MUT_ARR_PTRS_DIRTY:
+ case SMALL_MUT_ARR_PTRS_CLEAN:
+ case SMALL_MUT_ARR_PTRS_DIRTY:
+ case BLOCKING_QUEUE:
// thunks are retainers.
case THUNK:
@@ -1069,17 +1024,21 @@ isRetainer( StgClosure *c )
// closures. See trac #3956 for a program that hit this error.
case IND_STATIC:
case BLACKHOLE:
+ case WHITEHOLE:
// static objects
case FUN_STATIC:
// misc
case PRIM:
case BCO:
case ARR_WORDS:
+ case COMPACT_NFDATA:
// STM
case TREC_CHUNK:
// immutable arrays
- case MUT_ARR_PTRS_FROZEN:
- case MUT_ARR_PTRS_FROZEN0:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
return false;
//
@@ -1089,11 +1048,15 @@ isRetainer( StgClosure *c )
// legal objects during retainer profiling.
case UPDATE_FRAME:
case CATCH_FRAME:
+ case CATCH_RETRY_FRAME:
+ case CATCH_STM_FRAME:
case UNDERFLOW_FRAME:
+ case ATOMICALLY_FRAME:
case STOP_FRAME:
case RET_BCO:
case RET_SMALL:
case RET_BIG:
+ case RET_FUN:
// other cases
case IND:
case INVALID_OBJECT:
@@ -1122,16 +1085,7 @@ getRetainerFrom( StgClosure *c )
{
ASSERT(isRetainer(c));
-#if defined(RETAINER_SCHEME_INFO)
- // Retainer scheme 1: retainer = info table
- return get_itbl(c);
-#elif defined(RETAINER_SCHEME_CCS)
- // Retainer scheme 2: retainer = cost centre stack
return c->header.prof.ccs;
-#elif defined(RETAINER_SCHEME_CC)
- // Retainer scheme 3: retainer = cost centre
- return c->header.prof.ccs->cc;
-#endif
}
/* -----------------------------------------------------------------------------
@@ -1193,69 +1147,6 @@ retain_small_bitmap (StgPtr p, uint32_t size, StgWord bitmap,
}
/* -----------------------------------------------------------------------------
- * Call retainClosure for each of the closures in an SRT.
- * ------------------------------------------------------------------------- */
-
-static void
-retain_large_srt_bitmap (StgLargeSRT *srt, StgClosure *c, retainer c_child_r)
-{
- uint32_t i, b, size;
- StgWord bitmap;
- StgClosure **p;
-
- b = 0;
- p = (StgClosure **)srt->srt;
- size = srt->l.size;
- bitmap = srt->l.bitmap[b];
- for (i = 0; i < size; ) {
- if ((bitmap & 1) != 0) {
- retainClosure((StgClosure *)*p, c, c_child_r);
- }
- i++;
- p++;
- if (i % BITS_IN(W_) == 0) {
- b++;
- bitmap = srt->l.bitmap[b];
- } else {
- bitmap = bitmap >> 1;
- }
- }
-}
-
-static INLINE void
-retainSRT (StgClosure **srt, uint32_t srt_bitmap, StgClosure *c,
- retainer c_child_r)
-{
- uint32_t bitmap;
- StgClosure **p;
-
- bitmap = srt_bitmap;
- p = srt;
-
- if (bitmap == (StgHalfWord)(-1)) {
- retain_large_srt_bitmap( (StgLargeSRT *)srt, c, c_child_r );
- return;
- }
-
- while (bitmap != 0) {
- if ((bitmap & 1) != 0) {
-#if defined(COMPILING_WINDOWS_DLL)
- if ( (unsigned long)(*srt) & 0x1 ) {
- retainClosure(* (StgClosure**) ((unsigned long) (*srt) & ~0x1),
- c, c_child_r);
- } else {
- retainClosure(*srt,c,c_child_r);
- }
-#else
- retainClosure(*srt,c,c_child_r);
-#endif
- }
- p++;
- bitmap = bitmap >> 1;
- }
-}
-
-/* -----------------------------------------------------------------------------
* Process all the objects in the stack chunk from stackStart to stackEnd
* with *c and *c_child_r being their parent and their most recent retainer,
* respectively. Treat stackOptionalFun as another child of *c if it is
@@ -1327,7 +1218,9 @@ retainStack( StgClosure *c, retainer c_child_r,
p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
follow_srt:
- retainSRT((StgClosure **)GET_SRT(info), info->i.srt_bitmap, c, c_child_r);
+ if (info->i.srt) {
+ retainClosure(GET_SRT(info),c,c_child_r);
+ }
continue;
case RET_BCO: {
@@ -1559,8 +1452,7 @@ inner_loop:
// all static objects after major garbage collections.
goto loop;
case THUNK_STATIC:
- case FUN_STATIC:
- if (get_itbl(c)->srt_bitmap == 0) {
+ if (get_itbl(c)->srt == 0) {
// No need to compute the retainer set; no dynamic objects
// are reachable from *c.
//
@@ -1587,6 +1479,14 @@ inner_loop:
// reachable static objects.
goto loop;
}
+ case FUN_STATIC: {
+ StgInfoTable *info = get_itbl(c);
+ if (info->srt == 0 && info->layout.payload.ptrs == 0) {
+ goto loop;
+ } else {
+ break;
+ }
+ }
default:
break;
}
@@ -1683,6 +1583,15 @@ inner_loop:
goto loop;
}
+ case BLOCKING_QUEUE:
+ {
+ StgBlockingQueue *bq = (StgBlockingQueue *)c;
+ retainClosure((StgClosure*) bq->link, c, c_child_r);
+ retainClosure((StgClosure*) bq->bh, c, c_child_r);
+ retainClosure((StgClosure*) bq->owner, c, c_child_r);
+ goto loop;
+ }
+
case PAP:
{
StgPAP *pap = (StgPAP *)c;
@@ -1755,11 +1664,11 @@ static void
computeRetainerSet( void )
{
StgWeak *weak;
- RetainerSet *rtl;
uint32_t g, n;
StgPtr ml;
bdescr *bd;
#if defined(DEBUG_RETAINER)
+ RetainerSet *rtl;
RetainerSet tmpRetainerSet;
#endif
@@ -1784,7 +1693,9 @@ computeRetainerSet( void )
}
// Consider roots from the stable ptr table.
- markStableTables(retainRoot, NULL);
+ markStablePtrTable(retainRoot, NULL);
+ // Remember old stable name addresses.
+ rememberOldStableNameAddresses ();
// The following code resets the rs field of each unvisited mutable
// object (computing sumOfNewCostExtra and updating costArray[] when
@@ -1801,9 +1712,9 @@ computeRetainerSet( void )
for (ml = bd->start; ml < bd->free; ml++) {
maybeInitRetainerSet((StgClosure *)*ml);
- rtl = retainerSetOf((StgClosure *)*ml);
#if defined(DEBUG_RETAINER)
+ rtl = retainerSetOf((StgClosure *)*ml);
if (rtl == NULL) {
// first visit to *ml
// This is a violation of the interface rule!
@@ -1886,9 +1797,6 @@ resetStaticObjectForRetainerProfiling( StgClosure *static_objects )
p = (StgClosure*)*THUNK_STATIC_LINK(p);
break;
case FUN_STATIC:
- maybeInitRetainerSet(p);
- p = (StgClosure*)*FUN_STATIC_LINK(p);
- break;
case CONSTR:
case CONSTR_1_0:
case CONSTR_2_0:
@@ -2056,7 +1964,7 @@ retainerProfile(void)
#if defined(DEBUG_RETAINER)
#define LOOKS_LIKE_PTR(r) ((LOOKS_LIKE_STATIC_CLOSURE(r) || \
- ((HEAP_ALLOCED(r) && ((Bdescr((P_)r)->flags & BF_FREE) == 0)))) && \
+ (HEAP_ALLOCED(r))) && \
((StgWord)(*(StgPtr)r)!=(StgWord)0xaaaaaaaaaaaaaaaaULL))
static uint32_t
diff --git a/rts/RetainerProfile.h b/rts/RetainerProfile.h
index 6882a2a58a..bc11cc7e80 100644
--- a/rts/RetainerProfile.h
+++ b/rts/RetainerProfile.h
@@ -41,9 +41,7 @@ retainerSetOf( const StgClosure *c )
}
// Used by Storage.c:memInventory()
-#if defined(DEBUG)
extern W_ retainerStackBlocks ( void );
-#endif
#include "EndPrivate.h"
diff --git a/rts/RetainerSet.c b/rts/RetainerSet.c
index 58080970e9..59103ddf61 100644
--- a/rts/RetainerSet.c
+++ b/rts/RetainerSet.c
@@ -218,82 +218,17 @@ addElement(retainer r, RetainerSet *rs)
* printRetainer() prints the full information on a given retainer,
* not a retainer set.
* -------------------------------------------------------------------------- */
-#if defined(RETAINER_SCHEME_INFO)
-// Retainer scheme 1: retainer = info table
-static void
-printRetainer(FILE *f, retainer itbl)
-{
- fprintf(f, "%s[%s]", GET_PROF_DESC(itbl), itbl->prof.closure_type);
-}
-#elif defined(RETAINER_SCHEME_CCS)
-// Retainer scheme 2: retainer = cost centre stack
static void
printRetainer(FILE *f, retainer ccs)
{
fprintCCS(f, ccs);
}
-#elif defined(RETAINER_SCHEME_CC)
-// Retainer scheme 3: retainer = cost centre
-static void
-printRetainer(FILE *f, retainer cc)
-{
- fprintf(f,"%s.%s", cc->module, cc->label);
-}
-#endif
/* -----------------------------------------------------------------------------
* printRetainerSetShort() should always display the same output for
* a given retainer set regardless of the time of invocation.
* -------------------------------------------------------------------------- */
#if defined(SECOND_APPROACH)
-#if defined(RETAINER_SCHEME_INFO)
-// Retainer scheme 1: retainer = info table
-void
-printRetainerSetShort(FILE *f, RetainerSet *rs, uint32_t max_length)
-{
- char tmp[max_length + 1];
- int size;
- uint32_t j;
-
- ASSERT(rs->id < 0);
-
- tmp[max_length] = '\0';
-
- // No blank characters are allowed.
- sprintf(tmp + 0, "(%d)", -(rs->id));
- size = strlen(tmp);
- ASSERT(size < max_length);
-
- for (j = 0; j < rs->num; j++) {
- if (j < rs->num - 1) {
- strncpy(tmp + size, GET_PROF_DESC(rs->element[j]), max_length - size);
- size = strlen(tmp);
- if (size == max_length)
- break;
- strncpy(tmp + size, ",", max_length - size);
- size = strlen(tmp);
- if (size == max_length)
- break;
- }
- else {
- strncpy(tmp + size, GET_PROF_DESC(rs->element[j]), max_length - size);
- // size = strlen(tmp);
- }
- }
- fprintf(f, tmp);
-}
-#elif defined(RETAINER_SCHEME_CC)
-// Retainer scheme 3: retainer = cost centre
-void
-printRetainerSetShort(FILE *f, RetainerSet *rs, uint32_t max_length)
-{
- char tmp[max_length + 1];
- int size;
- uint32_t j;
-
-}
-#elif defined(RETAINER_SCHEME_CCS)
-// Retainer scheme 2: retainer = cost centre stack
void
printRetainerSetShort(FILE *f, RetainerSet *rs, uint32_t max_length)
{
@@ -328,82 +263,6 @@ printRetainerSetShort(FILE *f, RetainerSet *rs, uint32_t max_length)
}
fputs(tmp, f);
}
-#elif defined(RETAINER_SCHEME_CC)
-// Retainer scheme 3: retainer = cost centre
-static void
-printRetainerSetShort(FILE *f, retainerSet *rs, uint32_t max_length)
-{
- char tmp[max_length + 1];
- int size;
- uint32_t j;
-
- ASSERT(rs->id < 0);
-
- tmp[max_length] = '\0';
-
- // No blank characters are allowed.
- sprintf(tmp + 0, "(%d)", -(rs->id));
- size = strlen(tmp);
- ASSERT(size < max_length);
-
- for (j = 0; j < rs->num; j++) {
- if (j < rs->num - 1) {
- strncpy(tmp + size, rs->element[j]->label,
- max_length - size);
- size = strlen(tmp);
- if (size == max_length)
- break;
- strncpy(tmp + size, ",", max_length - size);
- size = strlen(tmp);
- if (size == max_length)
- break;
- }
- else {
- strncpy(tmp + size, rs->element[j]->label,
- max_length - size);
- // size = strlen(tmp);
- }
- }
- fprintf(f, tmp);
-/*
- #define DOT_NUMBER 3
- // 1. 32 > max_length + 1 (1 for '\0')
- // 2. (max_length - DOT_NUMBER ) characters should be enough for
- // printing one natural number (plus '(' and ')').
- char tmp[32];
- int size, ts;
- uint32_t j;
-
- ASSERT(rs->id < 0);
-
- // No blank characters are allowed.
- sprintf(tmp + 0, "(%d)", -(rs->id));
- size = strlen(tmp);
- ASSERT(size < max_length - DOT_NUMBER);
-
- for (j = 0; j < rs->num; j++) {
- ts = strlen(rs->element[j]->label);
- if (j < rs->num - 1) {
- if (size + ts + 1 > max_length - DOT_NUMBER) {
- sprintf(tmp + size, "...");
- break;
- }
- sprintf(tmp + size, "%s,", rs->element[j]->label);
- size += ts + 1;
- }
- else {
- if (size + ts > max_length - DOT_NUMBER) {
- sprintf(tmp + size, "...");
- break;
- }
- sprintf(tmp + size, "%s", rs->element[j]->label);
- size += ts;
- }
- }
- fprintf(f, tmp);
-*/
-}
-#endif /* RETAINER_SCHEME_CC */
#endif /* SECOND_APPROACH */
/* -----------------------------------------------------------------------------
diff --git a/rts/RetainerSet.h b/rts/RetainerSet.h
index 52f12dc155..2f9aeea644 100644
--- a/rts/RetainerSet.h
+++ b/rts/RetainerSet.h
@@ -25,34 +25,13 @@
its retainer identity because its location may change during garbage
collections.
2. Type 'retainer' must come with comparison operations as well as
- an equality operation. That it, <, >, and == must be supported -
+ an equality operation. That is, <, >, and == must be supported -
this is necessary to store retainers in a sorted order in retainer sets.
Therefore, you cannot use a huge structure type as 'retainer', for instance.
-
- We illustrate three possibilities of defining 'retainer identity'.
- Choose one of the following three compiler directives:
-
- Retainer scheme 1 (RETAINER_SCHEME_INFO) : retainer = info table
- Retainer scheme 2 (RETAINER_SCHEME_CCS) : retainer = cost centre stack
- Retainer scheme 3 (RETAINER_SCHEME_CC) : retainer = cost centre
*/
-// #define RETAINER_SCHEME_INFO
-#define RETAINER_SCHEME_CCS
-// #define RETAINER_SCHEME_CC
-
-#if defined(RETAINER_SCHEME_INFO)
-struct _StgInfoTable;
-typedef struct _StgInfoTable *retainer;
-#endif
-#if defined(RETAINER_SCHEME_CCS)
typedef CostCentreStack *retainer;
-#endif
-
-#if defined(RETAINER_SCHEME_CC)
-typedef CostCentre *retainer;
-#endif
/*
Type 'retainerSet' defines an abstract datatype for sets of retainers.
@@ -67,7 +46,7 @@ typedef struct _RetainerSet {
struct _RetainerSet *link; // link to the next retainer set in the bucket
int id; // unique id of this retainer set (used when printing)
// Its absolute value is interpreted as its true id; if id is
- // negative, it indicates that this retainer set has had a postive
+ // negative, it indicates that this retainer set has had a positive
// cost after some retainer profiling.
retainer element[0]; // elements of this retainer set
// do not put anything below here!
diff --git a/rts/RtsAPI.c b/rts/RtsAPI.c
index 533c0c41d4..9396dccc07 100644
--- a/rts/RtsAPI.c
+++ b/rts/RtsAPI.c
@@ -15,7 +15,7 @@
#include "Prelude.h"
#include "Schedule.h"
#include "Capability.h"
-#include "Stable.h"
+#include "StablePtr.h"
#include "Threads.h"
#include "Weak.h"
@@ -367,7 +367,7 @@ rts_getBool (HaskellObj p)
const StgInfoTable *info;
info = get_itbl((const StgClosure *)UNTAG_CONST_CLOSURE(p));
- if (info->srt_bitmap == 0) { // srt_bitmap is the constructor tag
+ if (info->srt == 0) { // srt is the constructor tag
return 0;
} else {
return 1;
diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c
index 7b10d2a67d..6a72e67859 100644
--- a/rts/RtsFlags.c
+++ b/rts/RtsFlags.c
@@ -31,6 +31,8 @@
#include <sys/types.h>
#endif
+#include <fs_rts.h>
+
// Flag Structure
RTS_FLAGS RtsFlags;
@@ -46,12 +48,11 @@ int rts_argc = 0; /* ditto */
char **rts_argv = NULL;
int rts_argv_size = 0;
#if defined(mingw32_HOST_OS)
-// On Windows, we want to use GetCommandLineW rather than argc/argv,
-// but we need to mutate the command line arguments for withProgName and
-// friends. The System.Environment module achieves that using this bit of
-// shared state:
-int win32_prog_argc = 0;
-wchar_t **win32_prog_argv = NULL;
+// On Windows hs_main uses GetCommandLineW to get Unicode arguments and
+// passes them along UTF8 encoded as argv. We store them here in order to
+// free them on exit.
+int win32_full_utf8_argc = 0;
+char** win32_utf8_argv = NULL;
#endif
// The global rtsConfig, set from the RtsConfig supplied by the call
@@ -70,7 +71,9 @@ const RtsConfig defaultRtsConfig = {
.stackOverflowHook = StackOverflowHook,
.outOfHeapHook = OutOfHeapHook,
.mallocFailHook = MallocFailHook,
- .gcDoneHook = NULL
+ .gcDoneHook = NULL,
+ .longGCSync = LongGCSync,
+ .longGCSyncEnd = LongGCSyncEnd
};
/*
@@ -111,6 +114,9 @@ static void read_trace_flags(const char *arg);
static void errorUsage (void) GNU_ATTRIBUTE(__noreturn__);
+#if defined(mingw32_HOST_OS)
+static char** win32_full_utf8_argv;
+#endif
static char * copyArg (char *arg);
static char ** copyArgv (int argc, char *argv[]);
static void freeArgv (int argc, char *argv[]);
@@ -163,6 +169,7 @@ void initRtsFlagsDefaults(void)
RtsFlags.GcFlags.numa = false;
RtsFlags.GcFlags.numaMask = 1;
RtsFlags.GcFlags.ringBell = false;
+ RtsFlags.GcFlags.longGCSync = 0; /* detection turned off */
RtsFlags.DebugFlags.scheduler = false;
RtsFlags.DebugFlags.interpreter = false;
@@ -183,11 +190,12 @@ void initRtsFlagsDefaults(void)
RtsFlags.DebugFlags.compact = false;
#if defined(PROFILING)
- RtsFlags.CcFlags.doCostCentres = 0;
+ RtsFlags.CcFlags.doCostCentres = COST_CENTRES_NONE;
+ RtsFlags.CcFlags.outputFileNameStem = NULL;
#endif /* PROFILING */
RtsFlags.ProfFlags.doHeapProfile = false;
- RtsFlags.ProfFlags. heapProfileInterval = USToTime(100000); // 100ms
+ RtsFlags.ProfFlags.heapProfileInterval = USToTime(100000); // 100ms
#if defined(PROFILING)
RtsFlags.ProfFlags.includeTSOs = false;
@@ -222,8 +230,12 @@ void initRtsFlagsDefaults(void)
RtsFlags.ConcFlags.ctxtSwitchTime = USToTime(20000); // 20ms
RtsFlags.MiscFlags.install_signal_handlers = true;
- RtsFlags.MiscFlags.machineReadable = false;
- RtsFlags.MiscFlags.linkerMemBase = 0;
+ RtsFlags.MiscFlags.install_seh_handlers = true;
+ RtsFlags.MiscFlags.generate_stack_trace = true;
+ RtsFlags.MiscFlags.generate_dump_file = false;
+ RtsFlags.MiscFlags.machineReadable = false;
+ RtsFlags.MiscFlags.internalCounters = false;
+ RtsFlags.MiscFlags.linkerMemBase = 0;
#if defined(THREADED_RTS)
RtsFlags.ParFlags.nCapabilities = 1;
@@ -268,7 +280,7 @@ usage_text[] = {
" -kc<size> Sets the stack chunk size (default 32k)",
" -kb<size> Sets the stack chunk buffer size (default 1k)",
"",
-" -A<size> Sets the minimum allocation area size (default 512k) Egs: -A1m -A10k",
+" -A<size> Sets the minimum allocation area size (default 1m) Egs: -A20m -A10k",
" -AL<size> Sets the amount of large-object memory that can be allocated",
" before a GC is triggered (default: the value of -A)",
" -n<size> Allocation area chunk size (0 = disabled, default: 0)",
@@ -332,6 +344,8 @@ usage_text[] = {
"",
" -xc Show current cost centre stack on raising an exception",
#endif /* PROFILING */
+"",
+" -hT Produce a heap profile grouped by closure type"
#if defined(TRACING)
"",
@@ -368,7 +382,7 @@ usage_text[] = {
" Default: 0.02 sec.",
" -V<secs> Master tick interval in seconds (0 == disable timer).",
" This sets the resolution for -C and the heap profile timer -i,",
-" and is the frequence of time profile samples.",
+" and is the frequency of time profile samples.",
#if defined(PROFILING)
" Default: 0.001 sec.",
#else
@@ -423,6 +437,18 @@ usage_text[] = {
#endif
" --install-signal-handlers=<yes|no>",
" Install signal handlers (default: yes)",
+#if defined(mingw32_HOST_OS)
+" --install-seh-handlers=<yes|no>",
+" Install exception handlers (default: yes)",
+" --generate-crash-dumps",
+" Generate Windows crash dumps, requires exception handlers",
+" to be installed. Implies --install-signal-handlers=yes.",
+" (default: no)",
+" --generate-stack-traces=<yes|no>",
+" Generate a stack trace when your application encounters a",
+" fatal error. When symbols are available an attempt will be",
+" made to resolve addresses to names. (default: yes)",
+#endif
#if defined(THREADED_RTS)
" -e<n> Maximum number of outstanding local sparks (default: 4096)",
#endif
@@ -446,6 +472,66 @@ usage_text[] = {
0
};
+/**
+Note [Windows Unicode Arguments]
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+On Windows argv is usually encoded in the current Codepage which might not
+support unicode.
+
+Instead of ignoring the arguments to hs_init we expect them to be utf-8
+encoded when coming from a custom main function. In the regular hs_main we
+get the unicode arguments from the windows API and pass them along utf8
+encoded instead.
+
+This reduces special casing of arguments in later parts of the RTS and base
+libraries to dealing with slash differences and using utf8 instead of the
+current locale on Windows when decoding arguments.
+
+*/
+
+#if defined(mingw32_HOST_OS)
+//Allocate a buffer and return the string utf8 encoded.
+char* lpcwstrToUTF8(const wchar_t* utf16_str)
+{
+ //Check the utf8 encoded size first
+ int res = WideCharToMultiByte(CP_UTF8, 0, utf16_str, -1, NULL, 0,
+ NULL, NULL);
+ if (res == 0) {
+ return NULL;
+ }
+ char* buffer = (char*) stgMallocBytes((size_t)res, "getUTF8Args 2");
+ res = WideCharToMultiByte(CP_UTF8, 0, utf16_str, -1, buffer, res,
+ NULL, NULL);
+ return buffer;
+}
+
+char** getUTF8Args(int* argc)
+{
+ LPCWSTR cmdLine = GetCommandLineW();
+ LPWSTR* argvw = CommandLineToArgvW(cmdLine, argc);
+
+ // We create two argument arrays, one which is later permutated by the RTS
+ // instead of the main argv.
+ // The other one is used to free the allocted memory later.
+ char** argv = (char**) stgMallocBytes(sizeof(char*) * (*argc + 1),
+ "getUTF8Args 1");
+ win32_full_utf8_argv = (char**) stgMallocBytes(sizeof(char*) * (*argc + 1),
+ "getUTF8Args 1");
+
+ for (int i = 0; i < *argc; i++)
+ {
+ argv[i] = lpcwstrToUTF8(argvw[i]);
+ }
+ argv[*argc] = NULL;
+ memcpy(win32_full_utf8_argv, argv, sizeof(char*) * (*argc + 1));
+
+ LocalFree(argvw);
+ win32_utf8_argv = argv;
+ win32_full_utf8_argc = *argc;
+ return argv;
+}
+#endif
+
STATIC_INLINE bool strequal(const char *a, const char * b)
{
return(strcmp(a, b) == 0);
@@ -514,12 +600,8 @@ static void errorRtsOptsDisabled(const char *s)
- rtsConfig (global) contains the supplied RtsConfig
- On Windows getArgs ignores argv and instead takes the arguments directly
- from the WinAPI and removes any which would have been parsed by the RTS.
-
- If the handling of which arguments are passed to the Haskell side changes
- these changes have to be synchronized with getArgs in base. See #13287 and
- Note [Ignore hs_init argv] in System.Environment.
+ On Windows argv is assumed to be utf8 encoded for unicode compatibility.
+ See Note [Windows Unicode Arguments]
-------------------------------------------------------------------------- */
@@ -557,6 +639,8 @@ void setupRtsFlags (int *argc, char *argv[], RtsConfig rts_config)
// process arguments from the GHCRTS environment variable next
// (arguments from the command line override these).
+ // If we ignore all non-builtin rtsOpts we skip these.
+ if(rtsConfig.rts_opts_enabled != RtsOptsIgnoreAll)
{
char *ghc_rts = getenv("GHCRTS");
@@ -573,33 +657,44 @@ void setupRtsFlags (int *argc, char *argv[], RtsConfig rts_config)
}
}
- // Split arguments (argv) into PGM (argv) and RTS (rts_argv) parts
- // argv[0] must be PGM argument -- leave in argv
- //
- for (mode = PGM; arg < total_arg; arg++) {
- // The '--RTS' argument disables all future +RTS ... -RTS processing.
- if (strequal("--RTS", argv[arg])) {
- arg++;
- break;
- }
- // The '--' argument is passed through to the program, but
- // disables all further +RTS ... -RTS processing.
- else if (strequal("--", argv[arg])) {
- break;
- }
- else if (strequal("+RTS", argv[arg])) {
- mode = RTS;
- }
- else if (strequal("-RTS", argv[arg])) {
- mode = PGM;
- }
- else if (mode == RTS) {
- appendRtsArg(copyArg(argv[arg]));
- }
- else {
- argv[(*argc)++] = argv[arg];
+
+ // If we ignore all commandline rtsOpts we skip processing of argv by
+ // the RTS completely
+ if(!(rtsConfig.rts_opts_enabled == RtsOptsIgnoreAll ||
+ rtsConfig.rts_opts_enabled == RtsOptsIgnore)
+ )
+ {
+ // Split arguments (argv) into PGM (argv) and RTS (rts_argv) parts
+ // argv[0] must be PGM argument -- leave in argv
+ //
+ for (mode = PGM; arg < total_arg; arg++) {
+ // The '--RTS' argument disables all future
+ // +RTS ... -RTS processing.
+ if (strequal("--RTS", argv[arg])) {
+ arg++;
+ break;
+ }
+ // The '--' argument is passed through to the program, but
+ // disables all further +RTS ... -RTS processing.
+ else if (strequal("--", argv[arg])) {
+ break;
+ }
+ else if (strequal("+RTS", argv[arg])) {
+ mode = RTS;
+ }
+ else if (strequal("-RTS", argv[arg])) {
+ mode = PGM;
+ }
+ else if (mode == RTS) {
+ appendRtsArg(copyArg(argv[arg]));
+ }
+ else {
+ argv[(*argc)++] = argv[arg];
+ }
}
+
}
+
// process remaining program arguments
for (; arg < total_arg; arg++) {
argv[(*argc)++] = argv[arg];
@@ -768,19 +863,55 @@ error = true;
OPTION_UNSAFE;
RtsFlags.MiscFlags.install_signal_handlers = false;
}
+ else if (strequal("install-seh-handlers=yes",
+ &rts_argv[arg][2])) {
+ OPTION_UNSAFE;
+ RtsFlags.MiscFlags.install_seh_handlers = true;
+ }
+ else if (strequal("install-seh-handlers=no",
+ &rts_argv[arg][2])) {
+ OPTION_UNSAFE;
+ RtsFlags.MiscFlags.install_seh_handlers = false;
+ }
+ else if (strequal("generate-stack-traces=yes",
+ &rts_argv[arg][2])) {
+ OPTION_UNSAFE;
+ RtsFlags.MiscFlags.generate_stack_trace = true;
+ }
+ else if (strequal("generate-stack-traces=no",
+ &rts_argv[arg][2])) {
+ OPTION_UNSAFE;
+ RtsFlags.MiscFlags.generate_stack_trace = false;
+ }
+ else if (strequal("generate-crash-dumps",
+ &rts_argv[arg][2])) {
+ OPTION_UNSAFE;
+ RtsFlags.MiscFlags.generate_dump_file = true;
+ }
else if (strequal("machine-readable",
&rts_argv[arg][2])) {
OPTION_UNSAFE;
RtsFlags.MiscFlags.machineReadable = true;
}
+ else if (strequal("internal-counters",
+ &rts_argv[arg][2])) {
+ OPTION_SAFE;
+ RtsFlags.MiscFlags.internalCounters = true;
+ }
else if (strequal("info",
&rts_argv[arg][2])) {
OPTION_SAFE;
- printRtsInfo();
+ printRtsInfo(rtsConfig);
stg_exit(0);
}
#if defined(THREADED_RTS)
else if (!strncmp("numa", &rts_argv[arg][2], 4)) {
+ if (!osBuiltWithNumaSupport()) {
+ errorBelch("%s: This GHC build was compiled without NUMA support.",
+ rts_argv[arg]);
+ error = true;
+ break;
+ }
OPTION_SAFE;
StgWord mask;
if (rts_argv[arg][6] == '=') {
@@ -825,6 +956,16 @@ error = true;
}
}
#endif
+ else if (!strncmp("long-gc-sync=", &rts_argv[arg][2], 13)) {
+ OPTION_SAFE;
+ if (rts_argv[arg][2] == '\0') {
+ /* use default */
+ } else {
+ RtsFlags.GcFlags.longGCSync =
+ fsecondsToTime(atof(rts_argv[arg]+16));
+ }
+ break;
+ }
else {
OPTION_SAFE;
errorBelch("unknown RTS option: %s",rts_argv[arg]);
@@ -890,7 +1031,7 @@ error = true;
case 'K':
OPTION_UNSAFE;
RtsFlags.GcFlags.maxStkSize =
- decodeSize(rts_argv[arg], 2, sizeof(W_), HS_WORD_MAX)
+ decodeSize(rts_argv[arg], 2, 0, HS_WORD_MAX)
/ sizeof(W_);
break;
@@ -1072,6 +1213,14 @@ error = true;
case 'j':
RtsFlags.CcFlags.doCostCentres = COST_CENTRES_JSON;
break;
+ case 'o':
+ if (rts_argv[arg][3] == '\0') {
+ errorBelch("flag -po expects an argument");
+ error = true;
+ break;
+ }
+ RtsFlags.CcFlags.outputFileNameStem = rts_argv[arg]+3;
+ break;
case '\0':
if (rts_argv[arg][1] == 'P') {
RtsFlags.CcFlags.doCostCentres = COST_CENTRES_VERBOSE;
@@ -1156,11 +1305,7 @@ error = true;
OPTION_SAFE;
THREADED_BUILD_ONLY(
if (rts_argv[arg][2] == '\0') {
-#if defined(PROFILING)
- RtsFlags.ParFlags.nCapabilities = 1;
-#else
RtsFlags.ParFlags.nCapabilities = getNumberOfProcessors();
-#endif
} else {
int nCapabilities;
OPTION_SAFE; /* but see extra checks below... */
@@ -1513,6 +1658,11 @@ static void normaliseRtsOpts (void)
RtsFlags.ParFlags.parGcLoadBalancingGen = 1;
}
}
+
+ // We can't generate dumps without signal handlers
+ if (RtsFlags.MiscFlags.generate_dump_file) {
+ RtsFlags.MiscFlags.install_seh_handlers = true;
+ }
}
static void errorUsage (void)
@@ -1557,7 +1707,7 @@ openStatsFile (char *filename, // filename, or NULL
f = NULL; /* NULL means use debugBelch */
} else {
if (*filename != '\0') { /* stats file specified */
- f = fopen(filename,"w");
+ f = __rts_fopen (filename,"w");
} else {
if (filename_fmt == NULL) {
errorBelch("Invalid stats filename format (NULL)\n");
@@ -1565,8 +1715,9 @@ openStatsFile (char *filename, // filename, or NULL
}
/* default <program>.<ext> */
char stats_filename[STATS_FILENAME_MAXLEN];
- sprintf(stats_filename, filename_fmt, prog_name);
- f = fopen(stats_filename,"w");
+ snprintf(stats_filename, STATS_FILENAME_MAXLEN, filename_fmt,
+ prog_name);
+ f = __rts_fopen (stats_filename,"w");
}
if (f == NULL) {
errorBelch("Can't open stats file %s\n", filename);
@@ -1810,6 +1961,9 @@ static bool read_heap_profiling_flag(const char *arg)
case 'b':
RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_LDV;
break;
+ case 'T':
+ RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_CLOSURE_TYPE;
+ break;
}
break;
@@ -2040,48 +2194,18 @@ void freeWin32ProgArgv (void);
void
freeWin32ProgArgv (void)
{
- int i;
-
- if (win32_prog_argv != NULL) {
- for (i = 0; i < win32_prog_argc; i++) {
- stgFree(win32_prog_argv[i]);
- }
- stgFree(win32_prog_argv);
+ if(win32_utf8_argv == NULL) {
+ return;
+ }
+ else
+ {
+ freeArgv(win32_full_utf8_argc, win32_full_utf8_argv);
+ stgFree(win32_utf8_argv);
}
- win32_prog_argc = 0;
- win32_prog_argv = NULL;
-}
-void
-getWin32ProgArgv(int *argc, wchar_t **argv[])
-{
- *argc = win32_prog_argc;
- *argv = win32_prog_argv;
}
-void
-setWin32ProgArgv(int argc, wchar_t *argv[])
-{
- int i;
-
- freeWin32ProgArgv();
-
- win32_prog_argc = argc;
- if (argv == NULL) {
- win32_prog_argv = NULL;
- return;
- }
-
- win32_prog_argv = stgCallocBytes(argc + 1, sizeof (wchar_t *),
- "setWin32ProgArgv 1");
- for (i = 0; i < argc; i++) {
- win32_prog_argv[i] = stgMallocBytes((wcslen(argv[i]) + 1) * sizeof(wchar_t),
- "setWin32ProgArgv 2");
- wcscpy(win32_prog_argv[i], argv[i]);
- }
- win32_prog_argv[argc] = NULL;
-}
#endif
/* ----------------------------------------------------------------------------
diff --git a/rts/RtsFlags.h b/rts/RtsFlags.h
index 71ad219d29..c36c64a63b 100644
--- a/rts/RtsFlags.h
+++ b/rts/RtsFlags.h
@@ -13,6 +13,11 @@
/* Routines that operate-on/to-do-with RTS flags: */
+#if defined(mingw32_HOST_OS)
+//The returned buffer has to be freed with stgFree()
+char* lpcwstrToUTF8(const wchar_t* utf16_str);
+char** getUTF8Args(int* argc);
+#endif
void initRtsFlagsDefaults (void);
void setupRtsFlags (int *argc, char *argv[], RtsConfig rtsConfig);
void freeRtsArgs (void);
diff --git a/rts/RtsMain.c b/rts/RtsMain.c
index d9f05576a0..21b8577cca 100644
--- a/rts/RtsMain.c
+++ b/rts/RtsMain.c
@@ -13,6 +13,7 @@
#include "RtsAPI.h"
#include "RtsUtils.h"
+#include "RtsFlags.h"
#include "Prelude.h"
#include "Task.h"
#include "Excn.h"
@@ -23,37 +24,44 @@
// Hack: we assume that we're building a batch-mode system unless
// INTERPRETER is set
-
+
#if !defined(INTERPRETER) /* Hack */
// The rts entry point from a compiled program using a Haskell main
// function. This gets called from a tiny main function generated by
// GHC and linked into each compiled Haskell program that uses a
// Haskell main function.
-//
+//
// We expect the caller to pass ZCMain_main_closure for
// main_closure. The reason we cannot refer to this symbol directly
// is because we're inside the rts and we do not know for sure that
// we'll be using a Haskell main function.
-//
+//
// NOTE: This function is marked as _noreturn_ in Main.h
int hs_main ( int argc, char *argv[], // program args
StgClosure *main_closure, // closure for Main.main
RtsConfig rts_config) // RTS configuration
-
-{
- BEGIN_WINDOWS_VEH_HANDLER
+{
int exit_status;
SchedulerStatus status;
+ // See Note: [Windows Unicode Arguments] in rts/RtsFlags.c
+ #if defined(mingw32_HOST_OS)
+ {
+ argv = getUTF8Args(&argc);
+ }
+ #endif
+
hs_init_ghc(&argc, &argv, rts_config);
+ BEGIN_WINDOWS_VEH_HANDLER
+
// kick off the computation by creating the main thread with a pointer
// to mainIO_closure representing the computation of the overall program;
// then enter the scheduler with this thread and off we go;
- //
+ //
// in a parallel setup, where we have many instances of this code
// running on different PEs, we should do this only for the main PE
// (IAmMainThread is set in startupHaskell)
@@ -89,6 +97,6 @@ int hs_main ( int argc, char *argv[], // program args
END_WINDOWS_VEH_HANDLER
shutdownHaskellAndExit(exit_status, 0 /* !fastExit */);
- // No code beyond this point. Dead code elimination will remove it
+ // No code beyond this point. Dead code elimination will remove it
}
# endif /* BATCH_MODE */
diff --git a/rts/RtsMessages.c b/rts/RtsMessages.c
index 0859e5093b..053805e763 100644
--- a/rts/RtsMessages.c
+++ b/rts/RtsMessages.c
@@ -8,6 +8,7 @@
#include "PosixSource.h"
#include "Rts.h"
+#include "RtsUtils.h"
#include "eventlog/EventLog.h"
@@ -21,6 +22,7 @@
#if defined(HAVE_WINDOWS_H)
#include <windows.h>
+#include <fcntl.h>
#endif
/* -----------------------------------------------------------------------------
@@ -131,13 +133,12 @@ isGUIApp(void)
}
#endif
-#define xstr(s) str(s)
-#define str(s) #s
-
void GNU_ATTRIBUTE(__noreturn__)
rtsFatalInternalErrorFn(const char *s, va_list ap)
{
-#if defined (mingw32_HOST_OS)
+#if defined(mingw32_HOST_OS)
+ /* Ensure we're in text mode so newlines get encoded properly. */
+ int mode = _setmode (_fileno(stderr), _O_TEXT);
if (isGUIApp())
{
char title[BUFSIZE], message[BUFSIZE];
@@ -163,7 +164,7 @@ rtsFatalInternalErrorFn(const char *s, va_list ap)
vfprintf(stderr, s, ap);
#if USE_LIBDW
fprintf(stderr, "\n");
- fprintf(stderr, "Stack trace:");
+ fprintf(stderr, "Stack trace:\n");
LibdwSession *session = libdwInit();
Backtrace *bt = libdwGetBacktrace(session);
libdwPrintBacktrace(session, stderr, bt);
@@ -174,6 +175,9 @@ rtsFatalInternalErrorFn(const char *s, va_list ap)
fprintf(stderr, " Please report this as a GHC bug: http://www.haskell.org/ghc/reportabug\n");
fflush(stderr);
}
+#if defined(mingw32_HOST_OS)
+ _setmode (_fileno(stderr), mode);
+#endif
#if defined(TRACING)
if (RtsFlags.TraceFlags.tracing == TRACE_EVENTLOG) endEventLogging();
@@ -186,7 +190,9 @@ rtsFatalInternalErrorFn(const char *s, va_list ap)
void
rtsErrorMsgFn(const char *s, va_list ap)
{
-#if defined (mingw32_HOST_OS)
+#if defined(mingw32_HOST_OS)
+ /* Ensure we're in text mode so newlines get encoded properly. */
+ int mode = _setmode (_fileno(stderr), _O_TEXT);
if (isGUIApp())
{
char buf[BUFSIZE];
@@ -211,6 +217,9 @@ rtsErrorMsgFn(const char *s, va_list ap)
vfprintf(stderr, s, ap);
fprintf(stderr, "\n");
}
+#if defined(mingw32_HOST_OS)
+ _setmode (_fileno(stderr), mode);
+#endif
}
void
@@ -218,7 +227,9 @@ rtsSysErrorMsgFn(const char *s, va_list ap)
{
char *syserr;
-#if defined (mingw32_HOST_OS)
+#if defined(mingw32_HOST_OS)
+ /* Ensure we're in text mode so newlines get encoded properly. */
+ int mode = _setmode (_fileno(stderr), _O_TEXT);
FormatMessage(
FORMAT_MESSAGE_ALLOCATE_BUFFER |
FORMAT_MESSAGE_FROM_SYSTEM |
@@ -257,7 +268,7 @@ rtsSysErrorMsgFn(const char *s, va_list ap)
}
vfprintf(stderr, s, ap);
if (syserr) {
-#if defined (mingw32_HOST_OS)
+#if defined(mingw32_HOST_OS)
// Win32 error messages have a terminating \n
fprintf(stderr, ": %s", syserr);
#else
@@ -268,15 +279,18 @@ rtsSysErrorMsgFn(const char *s, va_list ap)
}
}
-#if defined (mingw32_HOST_OS)
+#if defined(mingw32_HOST_OS)
if (syserr) LocalFree(syserr);
+ _setmode (_fileno(stderr), mode);
#endif
}
void
rtsDebugMsgFn(const char *s, va_list ap)
{
-#if defined (mingw32_HOST_OS)
+#if defined(mingw32_HOST_OS)
+ /* Ensure we're in text mode so newlines get encoded properly. */
+ int mode = _setmode (_fileno(stderr), _O_TEXT);
if (isGUIApp())
{
char buf[BUFSIZE];
@@ -294,4 +308,17 @@ rtsDebugMsgFn(const char *s, va_list ap)
vfprintf(stderr, s, ap);
fflush(stderr);
}
+#if defined(mingw32_HOST_OS)
+ _setmode (_fileno(stderr), mode);
+#endif
+}
+
+
+// Used in stg_badAlignment_entry defined in StgStartup.cmm.
+void rtsBadAlignmentBarf(void) GNUC3_ATTRIBUTE(__noreturn__);
+
+void
+rtsBadAlignmentBarf()
+{
+ barf("Encountered incorrectly aligned pointer. This can't be good.");
}
diff --git a/rts/RtsProbes.d b/rts/RtsProbes.d
index 277a494632..efbe653e0b 100644
--- a/rts/RtsProbes.d
+++ b/rts/RtsProbes.d
@@ -12,6 +12,13 @@
# endif
#endif
+#if defined(__FreeBSD__)
+/* we need this otherwise dtrace complains about redeclared int types
+ * TODO: find a better way to do this
+ */
+#define _INTTYPES_H_
+#endif
+
#include "HsFFI.h"
#include "rts/EventLogFormat.h"
diff --git a/rts/RtsSignals.h b/rts/RtsSignals.h
index d4176034e3..b0add6727d 100644
--- a/rts/RtsSignals.h
+++ b/rts/RtsSignals.h
@@ -51,14 +51,6 @@ void freeSignalHandlers(void);
*/
void awaitUserSignals(void);
-/*
- * Function: markSignalHandlers()
- *
- * Evacuate the handler queue. _Assumes_ that console event delivery
- * has already been blocked.
- */
-void markSignalHandlers (evac_fn evac, void *user);
-
#include "EndPrivate.h"
#endif /* RTS_USER_SIGNALS */
diff --git a/rts/RtsStartup.c b/rts/RtsStartup.c
index 71a842d0a9..5e5aef3505 100644
--- a/rts/RtsStartup.c
+++ b/rts/RtsStartup.c
@@ -26,7 +26,8 @@
#include "ThreadLabels.h"
#include "sm/BlockAlloc.h"
#include "Trace.h"
-#include "Stable.h"
+#include "StableName.h"
+#include "StablePtr.h"
#include "StaticPtrTable.h"
#include "Hash.h"
#include "Profiling.h"
@@ -179,7 +180,33 @@ hs_init_ghc(int *argc, char **argv[], RtsConfig rts_config)
if (argc == NULL || argv == NULL) {
// Use a default for argc & argv if either is not supplied
int my_argc = 1;
+ #if defined(mingw32_HOST_OS)
+ //Retry larger buffer sizes on error up to about the NTFS length limit.
+ wchar_t* pathBuf;
+ char *my_argv[2] = { NULL, NULL };
+ for(DWORD maxLength = MAX_PATH; maxLength <= 33280; maxLength *= 2)
+ {
+ pathBuf = (wchar_t*) stgMallocBytes(sizeof(wchar_t) * maxLength,
+ "hs_init_ghc: GetModuleFileName");
+ DWORD pathLength = GetModuleFileNameW(NULL, pathBuf, maxLength);
+ if(GetLastError() == ERROR_INSUFFICIENT_BUFFER || pathLength == 0) {
+ stgFree(pathBuf);
+ pathBuf = NULL;
+ } else {
+ break;
+ }
+ }
+ if(pathBuf == NULL) {
+ my_argv[0] = "<unknown>";
+ } else {
+ my_argv[0] = lpcwstrToUTF8(pathBuf);
+ stgFree(pathBuf);
+ }
+
+
+ #else
char *my_argv[] = { "<unknown>", NULL };
+ #endif
setFullProgArgv(my_argc,my_argv);
setupRtsFlags(&my_argc, my_argv, rts_config);
} else {
@@ -211,16 +238,24 @@ hs_init_ghc(int *argc, char **argv[], RtsConfig rts_config)
/* Trace some basic information about the process */
traceWallClockTime();
traceOSProcessInfo();
+ flushTrace();
/* initialize the storage manager */
initStorage();
/* initialise the stable pointer table */
- initStableTables();
+ initStablePtrTable();
+
+ /* initialise the stable name table */
+ initStableNameTable();
/* Add some GC roots for things in the base package that the RTS
* knows about. We don't know whether these turn out to be CAFs
* or refer to CAFs, but we have to assume that they might.
+ *
+ * Because these stable pointers will retain any CAF references in
+ * these closures `Id`s of these can be safely marked as non-CAFFY
+ * in the compiler.
*/
getStablePtr((StgPtr)runIO_closure);
getStablePtr((StgPtr)runNonIO_closure);
@@ -239,6 +274,9 @@ hs_init_ghc(int *argc, char **argv[], RtsConfig rts_config)
getStablePtr((StgPtr)cannotCompactPinned_closure);
getStablePtr((StgPtr)cannotCompactMutable_closure);
getStablePtr((StgPtr)nestedAtomically_closure);
+ getStablePtr((StgPtr)absentSumFieldError_closure);
+ // `Id` for this closure is marked as non-CAFFY,
+ // see Note [aBSENT_SUM_FIELD_ERROR_ID] in MkCore.
getStablePtr((StgPtr)runSparks_closure);
getStablePtr((StgPtr)ensureIOManagerIsRunning_closure);
@@ -417,7 +455,10 @@ hs_exit_(bool wait_foreign)
exitTopHandler();
/* free the stable pointer table */
- exitStableTables();
+ exitStablePtrTable();
+
+ /* free the stable name table */
+ exitStableNameTable();
#if defined(DEBUG)
/* free the thread label table */
diff --git a/rts/RtsSymbolInfo.h b/rts/RtsSymbolInfo.h
index 9873ff3481..4c9b24519b 100644
--- a/rts/RtsSymbolInfo.h
+++ b/rts/RtsSymbolInfo.h
@@ -11,7 +11,7 @@
#include "LinkerInternals.h"
#include <stdbool.h>
-/* See Note [BFD Import libraries]. */
+/* See Note [BFD import library]. */
typedef enum _SymbolKind {
KIND_NORMAL = 0x01,
KIND_WEAK = 0x02,
diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c
index 11b1437f77..5091c90dad 100644
--- a/rts/RtsSymbols.c
+++ b/rts/RtsSymbols.c
@@ -97,8 +97,6 @@
SymI_HasProto(stg_asyncReadzh) \
SymI_HasProto(stg_asyncWritezh) \
SymI_HasProto(stg_asyncDoProczh) \
- SymI_HasProto(getWin32ProgArgv) \
- SymI_HasProto(setWin32ProgArgv) \
SymI_HasProto(rts_InstallConsoleEvent) \
SymI_HasProto(rts_ConsoleHandlerDone) \
SymI_HasProto(atexit) \
@@ -108,6 +106,7 @@
RTS_WIN64_ONLY(SymI_HasProto(__imp__environ)) \
RTS_WIN32_ONLY(SymI_HasProto(_imp___iob)) \
RTS_WIN64_ONLY(SymI_HasProto(__iob_func)) \
+ RTS_WIN64_ONLY(SymI_HasProto(__mingw_vsnwprintf)) \
/* see Note [Symbols for MinGW's printf] */ \
SymI_HasProto(_lock_file) \
SymI_HasProto(_unlock_file)
@@ -497,6 +496,8 @@
SymI_HasProto(enterFunCCS) \
SymI_HasProto(pushCostCentre) \
SymI_HasProto(mkCostCentre) \
+ SymI_HasProto(registerCcList) \
+ SymI_HasProto(registerCcsList) \
SymI_HasProto(era)
#else
#define RTS_PROF_SYMBOLS /* empty */
@@ -557,7 +558,6 @@
SymI_HasProto(stg_catchzh) \
SymI_HasProto(stg_catchRetryzh) \
SymI_HasProto(stg_catchSTMzh) \
- SymI_HasProto(stg_checkzh) \
SymI_HasProto(stg_clearCCSzh) \
SymI_HasProto(stg_compactAddWithSharingzh) \
SymI_HasProto(stg_compactAddzh) \
@@ -605,6 +605,7 @@
SymI_HasProto(getFullProgArgv) \
SymI_HasProto(setFullProgArgv) \
SymI_HasProto(freeFullProgArgv) \
+ SymI_HasProto(getProcessElapsedTime) \
SymI_HasProto(getStablePtr) \
SymI_HasProto(foreignExportStablePtr) \
SymI_HasProto(hs_init) \
@@ -614,6 +615,8 @@
SymI_HasProto(hs_exit_nowait) \
SymI_HasProto(hs_set_argv) \
SymI_HasProto(hs_perform_gc) \
+ SymI_HasProto(hs_lock_stable_ptr_table) \
+ SymI_HasProto(hs_unlock_stable_ptr_table) \
SymI_HasProto(hs_lock_stable_tables) \
SymI_HasProto(hs_unlock_stable_tables) \
SymI_HasProto(hs_free_stable_ptr) \
@@ -640,7 +643,6 @@
SymI_HasProto(lookupSymbol) \
SymI_HasProto(stg_makeStablePtrzh) \
SymI_HasProto(stg_mkApUpd0zh) \
- SymI_HasProto(stg_myThreadIdzh) \
SymI_HasProto(stg_labelThreadzh) \
SymI_HasProto(stg_newArrayzh) \
SymI_HasProto(stg_copyArrayzh) \
@@ -669,7 +671,8 @@
SymI_HasProto(stg_newMutVarzh) \
SymI_HasProto(stg_newTVarzh) \
SymI_HasProto(stg_noDuplicatezh) \
- SymI_HasProto(stg_atomicModifyMutVarzh) \
+ SymI_HasProto(stg_atomicModifyMutVar2zh) \
+ SymI_HasProto(stg_atomicModifyMutVarzuzh) \
SymI_HasProto(stg_casMutVarzh) \
SymI_HasProto(stg_newPinnedByteArrayzh) \
SymI_HasProto(stg_newAlignedPinnedByteArrayzh) \
@@ -743,8 +746,6 @@
SymI_HasProto(rts_isProfiled) \
SymI_HasProto(rts_isDynamic) \
SymI_HasProto(rts_setInCallCapability) \
- SymI_HasProto(rts_getThreadAllocationCounter) \
- SymI_HasProto(rts_setThreadAllocationCounter) \
SymI_HasProto(rts_enableThreadAllocationLimit) \
SymI_HasProto(rts_disableThreadAllocationLimit) \
SymI_HasProto(rts_setMainThread) \
@@ -769,14 +770,30 @@
SymI_HasProto(stg_IND_STATIC_info) \
SymI_HasProto(stg_ARR_WORDS_info) \
SymI_HasProto(stg_MUT_ARR_PTRS_DIRTY_info) \
- SymI_HasProto(stg_MUT_ARR_PTRS_FROZEN_info) \
- SymI_HasProto(stg_MUT_ARR_PTRS_FROZEN0_info) \
+ SymI_HasProto(stg_MUT_ARR_PTRS_FROZEN_CLEAN_info) \
+ SymI_HasProto(stg_MUT_ARR_PTRS_FROZEN_DIRTY_info) \
SymI_HasProto(stg_SMALL_MUT_ARR_PTRS_DIRTY_info) \
- SymI_HasProto(stg_SMALL_MUT_ARR_PTRS_FROZEN_info) \
- SymI_HasProto(stg_SMALL_MUT_ARR_PTRS_FROZEN0_info) \
+ SymI_HasProto(stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info) \
+ SymI_HasProto(stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info) \
SymI_HasProto(stg_MUT_VAR_CLEAN_info) \
SymI_HasProto(stg_MUT_VAR_DIRTY_info) \
SymI_HasProto(stg_WEAK_info) \
+ SymI_HasProto(stg_SRT_1_info) \
+ SymI_HasProto(stg_SRT_2_info) \
+ SymI_HasProto(stg_SRT_3_info) \
+ SymI_HasProto(stg_SRT_4_info) \
+ SymI_HasProto(stg_SRT_5_info) \
+ SymI_HasProto(stg_SRT_6_info) \
+ SymI_HasProto(stg_SRT_7_info) \
+ SymI_HasProto(stg_SRT_8_info) \
+ SymI_HasProto(stg_SRT_9_info) \
+ SymI_HasProto(stg_SRT_10_info) \
+ SymI_HasProto(stg_SRT_11_info) \
+ SymI_HasProto(stg_SRT_12_info) \
+ SymI_HasProto(stg_SRT_13_info) \
+ SymI_HasProto(stg_SRT_14_info) \
+ SymI_HasProto(stg_SRT_15_info) \
+ SymI_HasProto(stg_SRT_16_info) \
SymI_HasProto(stg_ap_v_info) \
SymI_HasProto(stg_ap_f_info) \
SymI_HasProto(stg_ap_d_info) \
@@ -868,6 +885,7 @@
SymI_HasProto(stg_waitWritezh) \
SymI_HasProto(stg_writeTVarzh) \
SymI_HasProto(stg_yieldzh) \
+ SymI_NeedsProto(stg_badAlignment_entry) \
SymI_NeedsProto(stg_interp_constr1_entry) \
SymI_NeedsProto(stg_interp_constr2_entry) \
SymI_NeedsProto(stg_interp_constr3_entry) \
@@ -894,6 +912,9 @@
SymI_HasProto(stg_traceCcszh) \
SymI_HasProto(stg_traceEventzh) \
SymI_HasProto(stg_traceMarkerzh) \
+ SymI_HasProto(stg_traceBinaryEventzh) \
+ SymI_HasProto(stg_getThreadAllocationCounterzh) \
+ SymI_HasProto(stg_setThreadAllocationCounterzh) \
SymI_HasProto(getMonotonicNSec) \
SymI_HasProto(lockFile) \
SymI_HasProto(unlockFile) \
@@ -911,6 +932,7 @@
SymI_HasProto(store_load_barrier) \
SymI_HasProto(load_load_barrier) \
SymI_HasProto(cas) \
+ SymI_HasProto(_assertFail) \
RTS_USER_SIGNALS_SYMBOLS \
RTS_INTCHAR_SYMBOLS
diff --git a/rts/RtsUtils.c b/rts/RtsUtils.c
index 5357dc635e..618815de76 100644
--- a/rts/RtsUtils.c
+++ b/rts/RtsUtils.c
@@ -275,7 +275,7 @@ int genericRaise(int sig) {
#endif
}
-static void mkRtsInfoPair(char *key, char *val) {
+static void mkRtsInfoPair(const char *key, const char *val) {
/* XXX should check for "s, \s etc in key and val */
printf(" ,(\"%s\", \"%s\")\n", key, val);
}
@@ -285,7 +285,7 @@ static void mkRtsInfoPair(char *key, char *val) {
#define TOSTRING2(x) #x
#define TOSTRING(x) TOSTRING2(x)
-void printRtsInfo(void) {
+void printRtsInfo(const RtsConfig rts_config) {
/* The first entry is just a hack to make it easy to get the
* commas right */
printf(" [(\"GHC RTS\", \"YES\")\n");
@@ -306,6 +306,8 @@ void printRtsInfo(void) {
mkRtsInfoPair("Word size", TOSTRING(WORD_SIZE_IN_BITS));
mkRtsInfoPair("Compiler unregisterised", GhcUnregisterised);
mkRtsInfoPair("Tables next to code", GhcEnableTablesNextToCode);
+ mkRtsInfoPair("Flag -with-rtsopts", /* See Trac #15261 */
+ rts_config.rts_opts != NULL ? rts_config.rts_opts : "");
printf(" ]\n");
}
diff --git a/rts/RtsUtils.h b/rts/RtsUtils.h
index 8d880c6e19..49712c0d47 100644
--- a/rts/RtsUtils.h
+++ b/rts/RtsUtils.h
@@ -40,8 +40,11 @@ char *showStgWord64(StgWord64, char *, bool);
void heapCheckFail( void );
#endif
-void printRtsInfo(void);
+void printRtsInfo(const RtsConfig);
void checkFPUStack(void);
+#define xstr(s) str(s)
+#define str(s) #s
+
#include "EndPrivate.h"
diff --git a/rts/SMPClosureOps.h b/rts/SMPClosureOps.h
index 4ea1c55976..c73821a782 100644
--- a/rts/SMPClosureOps.h
+++ b/rts/SMPClosureOps.h
@@ -38,6 +38,11 @@ EXTERN_INLINE void unlockClosure(StgClosure *p, const StgInfoTable *info);
#if defined(THREADED_RTS)
+#if defined(PROF_SPIN)
+extern volatile StgWord64 whitehole_lockClosure_spin;
+extern volatile StgWord64 whitehole_lockClosure_yield;
+#endif
+
/* -----------------------------------------------------------------------------
* Locking/unlocking closures
*
@@ -56,7 +61,14 @@ EXTERN_INLINE StgInfoTable *reallyLockClosure(StgClosure *p)
do {
info = xchg((P_)(void *)&p->header.info, (W_)&stg_WHITEHOLE_info);
if (info != (W_)&stg_WHITEHOLE_info) return (StgInfoTable *)info;
+#if defined(PROF_SPIN)
+ ++whitehole_lockClosure_spin;
+#endif
+ busy_wait_nop();
} while (++i < SPIN_COUNT);
+#if defined(PROF_SPIN)
+ ++whitehole_lockClosure_yield;
+#endif
yieldThread();
} while (1);
}
@@ -112,15 +124,6 @@ EXTERN_INLINE void unlockClosure(StgClosure *p, const StgInfoTable *info)
p->header.info = info;
}
-// Handy specialised versions of lockClosure()/unlockClosure()
-INLINE_HEADER void lockTSO(StgTSO *tso);
-INLINE_HEADER void lockTSO(StgTSO *tso)
-{ lockClosure((StgClosure *)tso); }
-
-INLINE_HEADER void unlockTSO(StgTSO *tso);
-INLINE_HEADER void unlockTSO(StgTSO *tso)
-{ unlockClosure((StgClosure*)tso, (const StgInfoTable *)&stg_TSO_info); }
-
#endif /* CMINUSMINUS */
#include "EndPrivate.h"
diff --git a/rts/STM.c b/rts/STM.c
index 5c8fd4ff40..dc0b0ebb78 100644
--- a/rts/STM.c
+++ b/rts/STM.c
@@ -211,15 +211,6 @@ static StgBool cond_lock_tvar(StgTRecHeader *trec STG_UNUSED,
TRACE("%p : %s", trec, (result == expected) ? "success" : "failure");
return (result == expected);
}
-
-static StgBool lock_inv(StgAtomicInvariant *inv STG_UNUSED) {
- // Nothing -- uniproc
- return true;
-}
-
-static void unlock_inv(StgAtomicInvariant *inv STG_UNUSED) {
- // Nothing -- uniproc
-}
#endif
#if defined(STM_CG_LOCK) /*........................................*/
@@ -272,15 +263,6 @@ static StgBool cond_lock_tvar(StgTRecHeader *trec STG_UNUSED,
TRACE("%p : %d", result ? "success" : "failure");
return (result == expected);
}
-
-static StgBool lock_inv(StgAtomicInvariant *inv STG_UNUSED) {
- // Nothing -- protected by STM lock
- return true;
-}
-
-static void unlock_inv(StgAtomicInvariant *inv STG_UNUSED) {
- // Nothing -- protected by STM lock
-}
#endif
#if defined(STM_FG_LOCKS) /*...................................*/
@@ -332,32 +314,10 @@ static StgBool cond_lock_tvar(StgTRecHeader *trec,
TRACE("%p : %s", trec, result ? "success" : "failure");
return (result == expected);
}
-
-static StgBool lock_inv(StgAtomicInvariant *inv) {
- return (cas(&(inv -> lock), 0, 1) == 0);
-}
-
-static void unlock_inv(StgAtomicInvariant *inv) {
- ASSERT(inv -> lock == 1);
- inv -> lock = 0;
-}
#endif
/*......................................................................*/
-static StgBool watcher_is_tso(StgTVarWatchQueue *q) {
- StgClosure *c = q -> closure;
- const StgInfoTable *info = get_itbl(c);
- return (info -> type) == TSO;
-}
-
-static StgBool watcher_is_invariant(StgTVarWatchQueue *q) {
- StgClosure *c = q -> closure;
- return (c->header.info == &stg_ATOMIC_INVARIANT_info);
-}
-
-/*......................................................................*/
-
// Helper functions for thread blocking and unblocking
static void park_tso(StgTSO *tso) {
@@ -372,24 +332,24 @@ static void unpark_tso(Capability *cap, StgTSO *tso) {
// queues: it's up to the thread itself to remove it from the wait queues
// if it decides to do so when it is scheduled.
- // Unblocking a TSO from BlockedOnSTM is done under the TSO lock,
- // to avoid multiple CPUs unblocking the same TSO, and also to
- // synchronise with throwTo(). The first time the TSO is unblocked
- // we mark this fact by setting block_info.closure == STM_AWOKEN.
- // This way we can avoid sending further wakeup messages in the
- // future.
- lockTSO(tso);
- if (tso->why_blocked == BlockedOnSTM &&
- tso->block_info.closure == &stg_STM_AWOKEN_closure) {
- TRACE("unpark_tso already woken up tso=%p", tso);
- } else if (tso -> why_blocked == BlockedOnSTM) {
- TRACE("unpark_tso on tso=%p", tso);
- tso->block_info.closure = &stg_STM_AWOKEN_closure;
- tryWakeupThread(cap,tso);
- } else {
- TRACE("spurious unpark_tso on tso=%p", tso);
- }
- unlockTSO(tso);
+ // Only the capability that owns this TSO may unblock it. We can
+ // call tryWakeupThread() which will either unblock it directly if
+ // it belongs to this cap, or send a message to the owning cap
+ // otherwise.
+
+ // TODO: This sends multiple messages if we write to the same TVar multiple
+ // times and the owning cap hasn't yet woken up the thread and removed it
+ // from the TVar's watch list. We tried to optimise this in D4961, but that
+ // patch was incorrect and broke other things, see #15544 comment:17. See
+ // #15626 for the tracking ticket.
+
+ // Safety Note: we hold the TVar lock at this point, so we know
+ // that this thread is definitely still blocked, since the first
+ // thing a thread will do when it runs is remove itself from the
+ // TVar watch queues, and to do that it would need to lock the
+ // TVar.
+
+ tryWakeupThread(cap,tso);
}
static void unpark_waiters_on(Capability *cap, StgTVar *s) {
@@ -406,9 +366,7 @@ static void unpark_waiters_on(Capability *cap, StgTVar *s) {
for (;
q != END_STM_WATCH_QUEUE;
q = q -> prev_queue_entry) {
- if (watcher_is_tso(q)) {
unpark_tso(cap, (StgTSO *)(q -> closure));
- }
}
}
@@ -416,16 +374,6 @@ static void unpark_waiters_on(Capability *cap, StgTVar *s) {
// Helper functions for downstream allocation and initialization
-static StgInvariantCheckQueue *new_stg_invariant_check_queue(Capability *cap,
- StgAtomicInvariant *invariant) {
- StgInvariantCheckQueue *result;
- result = (StgInvariantCheckQueue *)allocate(cap, sizeofW(StgInvariantCheckQueue));
- SET_HDR (result, &stg_INVARIANT_CHECK_QUEUE_info, CCS_SYSTEM);
- result -> invariant = invariant;
- result -> my_execution = NO_TREC;
- return result;
-}
-
static StgTVarWatchQueue *new_stg_tvar_watch_queue(Capability *cap,
StgClosure *closure) {
StgTVarWatchQueue *result;
@@ -452,7 +400,6 @@ static StgTRecHeader *new_stg_trec_header(Capability *cap,
result -> enclosing_trec = enclosing_trec;
result -> current_chunk = new_stg_trec_chunk(cap);
- result -> invariants_to_check = END_INVARIANT_CHECK_QUEUE;
if (enclosing_trec == NO_TREC) {
result -> state = TREC_ACTIVE;
@@ -470,20 +417,6 @@ static StgTRecHeader *new_stg_trec_header(Capability *cap,
// Allocation / deallocation functions that retain per-capability lists
// of closures that can be re-used
-static StgInvariantCheckQueue *alloc_stg_invariant_check_queue(Capability *cap,
- StgAtomicInvariant *invariant) {
- StgInvariantCheckQueue *result = NULL;
- if (cap -> free_invariant_check_queues == END_INVARIANT_CHECK_QUEUE) {
- result = new_stg_invariant_check_queue(cap, invariant);
- } else {
- result = cap -> free_invariant_check_queues;
- result -> invariant = invariant;
- result -> my_execution = NO_TREC;
- cap -> free_invariant_check_queues = result -> next_queue_entry;
- }
- return result;
-}
-
static StgTVarWatchQueue *alloc_stg_tvar_watch_queue(Capability *cap,
StgClosure *closure) {
StgTVarWatchQueue *result = NULL;
@@ -536,7 +469,6 @@ static StgTRecHeader *alloc_stg_trec_header(Capability *cap,
cap -> free_trec_headers = result -> enclosing_trec;
result -> enclosing_trec = enclosing_trec;
result -> current_chunk -> next_entry_idx = 0;
- result -> invariants_to_check = END_INVARIANT_CHECK_QUEUE;
if (enclosing_trec == NO_TREC) {
result -> state = TREC_ACTIVE;
} else {
@@ -1111,202 +1043,8 @@ static TRecEntry *get_entry_for(StgTRecHeader *trec, StgTVar *tvar, StgTRecHeade
/*......................................................................*/
-/*
- * Add/remove links between an invariant TVars. The caller must have
- * locked the TVars involved and the invariant.
- */
-
-static void disconnect_invariant(Capability *cap,
- StgAtomicInvariant *inv) {
- StgTRecHeader *last_execution = inv -> last_execution;
-
- TRACE("unhooking last execution inv=%p trec=%p", inv, last_execution);
-
- FOR_EACH_ENTRY(last_execution, e, {
- StgTVar *s = e -> tvar;
- StgTVarWatchQueue *q = s -> first_watch_queue_entry;
- DEBUG_ONLY( StgBool found = false );
- TRACE(" looking for trec on tvar=%p", s);
- for (q = s -> first_watch_queue_entry;
- q != END_STM_WATCH_QUEUE;
- q = q -> next_queue_entry) {
- if (q -> closure == (StgClosure*)inv) {
- StgTVarWatchQueue *pq;
- StgTVarWatchQueue *nq;
- nq = q -> next_queue_entry;
- pq = q -> prev_queue_entry;
- if (nq != END_STM_WATCH_QUEUE) {
- nq -> prev_queue_entry = pq;
- }
- if (pq != END_STM_WATCH_QUEUE) {
- pq -> next_queue_entry = nq;
- } else {
- ASSERT(s -> first_watch_queue_entry == q);
- s -> first_watch_queue_entry = nq;
- dirty_TVAR(cap,s); // we modified first_watch_queue_entry
- }
- TRACE(" found it in watch queue entry %p", q);
- free_stg_tvar_watch_queue(cap, q);
- DEBUG_ONLY( found = true );
- break;
- }
- }
- ASSERT(found);
- });
- inv -> last_execution = NO_TREC;
-}
-
-static void connect_invariant_to_trec(Capability *cap,
- StgAtomicInvariant *inv,
- StgTRecHeader *my_execution) {
- TRACE("connecting execution inv=%p trec=%p", inv, my_execution);
-
- ASSERT(inv -> last_execution == NO_TREC);
-
- FOR_EACH_ENTRY(my_execution, e, {
- StgTVar *s = e -> tvar;
- StgTVarWatchQueue *q = alloc_stg_tvar_watch_queue(cap, (StgClosure*)inv);
- StgTVarWatchQueue *fq = s -> first_watch_queue_entry;
-
- // We leave "last_execution" holding the values that will be
- // in the heap after the transaction we're in the process
- // of committing has finished.
- TRecEntry *entry = get_entry_for(my_execution -> enclosing_trec, s, NULL);
- if (entry != NULL) {
- e -> expected_value = entry -> new_value;
- e -> new_value = entry -> new_value;
- }
-
- TRACE(" linking trec on tvar=%p value=%p q=%p", s, e -> expected_value, q);
- q -> next_queue_entry = fq;
- q -> prev_queue_entry = END_STM_WATCH_QUEUE;
- if (fq != END_STM_WATCH_QUEUE) {
- fq -> prev_queue_entry = q;
- }
- s -> first_watch_queue_entry = q;
- dirty_TVAR(cap,s); // we modified first_watch_queue_entry
- });
-
- inv -> last_execution = my_execution;
-}
-
-/*
- * Add a new invariant to the trec's list of invariants to check on commit
- */
-void stmAddInvariantToCheck(Capability *cap,
- StgTRecHeader *trec,
- StgClosure *code) {
- StgAtomicInvariant *invariant;
- StgInvariantCheckQueue *q;
- TRACE("%p : stmAddInvariantToCheck closure=%p", trec, code);
- ASSERT(trec != NO_TREC);
- ASSERT(trec -> state == TREC_ACTIVE ||
- trec -> state == TREC_CONDEMNED);
-
-
- // 1. Allocate an StgAtomicInvariant, set last_execution to NO_TREC
- // to signal that this is a new invariant in the current atomic block
-
- invariant = (StgAtomicInvariant *) allocate(cap, sizeofW(StgAtomicInvariant));
- TRACE("%p : stmAddInvariantToCheck allocated invariant=%p", trec, invariant);
- SET_HDR (invariant, &stg_ATOMIC_INVARIANT_info, CCS_SYSTEM);
- invariant -> code = code;
- invariant -> last_execution = NO_TREC;
- invariant -> lock = 0;
-
- // 2. Allocate an StgInvariantCheckQueue entry, link it to the current trec
-
- q = alloc_stg_invariant_check_queue(cap, invariant);
- TRACE("%p : stmAddInvariantToCheck allocated q=%p", trec, q);
- q -> invariant = invariant;
- q -> my_execution = NO_TREC;
- q -> next_queue_entry = trec -> invariants_to_check;
- trec -> invariants_to_check = q;
-
- TRACE("%p : stmAddInvariantToCheck done", trec);
-}
-
-/*
- * Fill in the trec's list of invariants that might be violated by the
- * current transaction.
- */
-
-StgInvariantCheckQueue *stmGetInvariantsToCheck(Capability *cap, StgTRecHeader *trec) {
- StgTRecChunk *c;
- TRACE("%p : stmGetInvariantsToCheck, head was %p",
- trec,
- trec -> invariants_to_check);
-
- ASSERT(trec != NO_TREC);
- ASSERT((trec -> state == TREC_ACTIVE) ||
- (trec -> state == TREC_WAITING) ||
- (trec -> state == TREC_CONDEMNED));
- ASSERT(trec -> enclosing_trec == NO_TREC);
-
- lock_stm(trec);
- c = trec -> current_chunk;
- while (c != END_STM_CHUNK_LIST) {
- unsigned int i;
- for (i = 0; i < c -> next_entry_idx; i ++) {
- TRecEntry *e = &(c -> entries[i]);
- if (entry_is_update(e)) {
- StgTVar *s = e -> tvar;
- StgClosure *old = lock_tvar(trec, s);
-
- // Pick up any invariants on the TVar being updated
- // by entry "e"
-
- StgTVarWatchQueue *q;
- TRACE("%p : checking for invariants on %p", trec, s);
- for (q = s -> first_watch_queue_entry;
- q != END_STM_WATCH_QUEUE;
- q = q -> next_queue_entry) {
- if (watcher_is_invariant(q)) {
- StgBool found = false;
- StgInvariantCheckQueue *q2;
- TRACE("%p : Touching invariant %p", trec, q -> closure);
- for (q2 = trec -> invariants_to_check;
- q2 != END_INVARIANT_CHECK_QUEUE;
- q2 = q2 -> next_queue_entry) {
- if (q2 -> invariant == (StgAtomicInvariant*)(q -> closure)) {
- TRACE("%p : Already found %p", trec, q -> closure);
- found = true;
- break;
- }
- }
-
- if (!found) {
- StgInvariantCheckQueue *q3;
- TRACE("%p : Not already found %p", trec, q -> closure);
- q3 = alloc_stg_invariant_check_queue(cap,
- (StgAtomicInvariant*) q -> closure);
- q3 -> next_queue_entry = trec -> invariants_to_check;
- trec -> invariants_to_check = q3;
- }
- }
- }
-
- unlock_tvar(cap, trec, s, old, false);
- }
- }
- c = c -> prev_chunk;
- }
-
- unlock_stm(trec);
-
- TRACE("%p : stmGetInvariantsToCheck, head now %p",
- trec,
- trec -> invariants_to_check);
-
- return (trec -> invariants_to_check);
-}
-
-/*......................................................................*/
-
StgBool stmCommitTransaction(Capability *cap, StgTRecHeader *trec) {
StgInt64 max_commits_at_start = max_commits;
- StgBool touched_invariants;
- StgBool use_read_phase;
TRACE("%p : stmCommitTransaction()", trec);
ASSERT(trec != NO_TREC);
@@ -1317,69 +1055,15 @@ StgBool stmCommitTransaction(Capability *cap, StgTRecHeader *trec) {
ASSERT((trec -> state == TREC_ACTIVE) ||
(trec -> state == TREC_CONDEMNED));
- // touched_invariants is true if we've written to a TVar with invariants
- // attached to it, or if we're trying to add a new invariant to the system.
-
- touched_invariants = (trec -> invariants_to_check != END_INVARIANT_CHECK_QUEUE);
-
- // If we have touched invariants then (i) lock the invariant, and (ii) add
- // the invariant's read set to our own. Step (i) is needed to serialize
- // concurrent transactions that attempt to make conflicting updates
- // to the invariant's trec (suppose it read from t1 and t2, and that one
- // concurrent transcation writes only to t1, and a second writes only to
- // t2). Step (ii) is needed so that both transactions will lock t1 and t2
- // to gain access to their wait lists (and hence be able to unhook the
- // invariant from both tvars).
-
- if (touched_invariants) {
- StgInvariantCheckQueue *q = trec -> invariants_to_check;
- TRACE("%p : locking invariants", trec);
- while (q != END_INVARIANT_CHECK_QUEUE) {
- StgTRecHeader *inv_old_trec;
- StgAtomicInvariant *inv;
- TRACE("%p : locking invariant %p", trec, q -> invariant);
- inv = q -> invariant;
- if (!lock_inv(inv)) {
- TRACE("%p : failed to lock %p", trec, inv);
- trec -> state = TREC_CONDEMNED;
- break;
- }
-
- inv_old_trec = inv -> last_execution;
- if (inv_old_trec != NO_TREC) {
- StgTRecChunk *c = inv_old_trec -> current_chunk;
- while (c != END_STM_CHUNK_LIST) {
- unsigned int i;
- for (i = 0; i < c -> next_entry_idx; i ++) {
- TRecEntry *e = &(c -> entries[i]);
- TRACE("%p : ensuring we lock TVars for %p", trec, e -> tvar);
- merge_read_into (cap, trec, e -> tvar, e -> expected_value);
- }
- c = c -> prev_chunk;
- }
- }
- q = q -> next_queue_entry;
- }
- TRACE("%p : finished locking invariants", trec);
- }
-
// Use a read-phase (i.e. don't lock TVars we've read but not updated) if
- // (i) the configuration lets us use a read phase, and (ii) we've not
- // touched or introduced any invariants.
- //
- // In principle we could extend the implementation to support a read-phase
- // and invariants, but it complicates the logic: the links between
- // invariants and TVars are managed by the TVar watch queues which are
- // protected by the TVar's locks.
-
- use_read_phase = ((config_use_read_phase) && (!touched_invariants));
+ // the configuration lets us use a read phase.
- bool result = validate_and_acquire_ownership(cap, trec, (!use_read_phase), true);
+ bool result = validate_and_acquire_ownership(cap, trec, (!config_use_read_phase), true);
if (result) {
// We now know that all the updated locations hold their expected values.
ASSERT(trec -> state == TREC_ACTIVE);
- if (use_read_phase) {
+ if (config_use_read_phase) {
StgInt64 max_commits_at_end;
StgInt64 max_concurrent_commits;
TRACE("%p : doing read check", trec);
@@ -1399,32 +1083,11 @@ StgBool stmCommitTransaction(Capability *cap, StgTRecHeader *trec) {
// at the end of the call to validate_and_acquire_ownership. This forms the
// linearization point of the commit.
- // 1. If we have touched or introduced any invariants then unhook them
- // from the TVars they depended on last time they were executed
- // and hook them on the TVars that they now depend on.
- if (touched_invariants) {
- StgInvariantCheckQueue *q = trec -> invariants_to_check;
- while (q != END_INVARIANT_CHECK_QUEUE) {
- StgAtomicInvariant *inv = q -> invariant;
- if (inv -> last_execution != NO_TREC) {
- disconnect_invariant(cap, inv);
- }
-
- TRACE("%p : hooking up new execution trec=%p", trec, q -> my_execution);
- connect_invariant_to_trec(cap, inv, q -> my_execution);
-
- TRACE("%p : unlocking invariant %p", trec, inv);
- unlock_inv(inv);
-
- q = q -> next_queue_entry;
- }
- }
-
- // 2. Make the updates required by the transaction
+ // Make the updates required by the transaction.
FOR_EACH_ENTRY(trec, e, {
StgTVar *s;
s = e -> tvar;
- if ((!use_read_phase) || (e -> new_value != e -> expected_value)) {
+ if ((!config_use_read_phase) || (e -> new_value != e -> expected_value)) {
// Either the entry is an update or we're not using a read phase:
// write the value back to the TVar, unlocking it if necessary.
diff --git a/rts/STM.h b/rts/STM.h
index 2484c2f991..3d32daace2 100644
--- a/rts/STM.h
+++ b/rts/STM.h
@@ -138,18 +138,6 @@ StgBool stmValidateNestOfTransactions(Capability *cap, StgTRecHeader *trec);
*/
/*
- * Fill in the trec's list of invariants that might be violated by the current
- * transaction.
- */
-
-StgInvariantCheckQueue *stmGetInvariantsToCheck(Capability *cap,
- StgTRecHeader *trec);
-
-void stmAddInvariantToCheck(Capability *cap,
- StgTRecHeader *trec,
- StgClosure *code);
-
-/*
* Test whether the current transaction context is valid and, if so,
* commit its memory accesses to the heap. stmCommitTransaction must
* unblock any threads which are waiting on tvars that updates have
@@ -209,7 +197,6 @@ void stmWriteTVar(Capability *cap,
/* NULLs */
#define END_STM_WATCH_QUEUE ((StgTVarWatchQueue *)(void *)&stg_END_STM_WATCH_QUEUE_closure)
-#define END_INVARIANT_CHECK_QUEUE ((StgInvariantCheckQueue *)(void *)&stg_END_INVARIANT_CHECK_QUEUE_closure)
#define END_STM_CHUNK_LIST ((StgTRecChunk *)(void *)&stg_END_STM_CHUNK_LIST_closure)
#define NO_TREC ((StgTRecHeader *)(void *)&stg_NO_TREC_closure)
diff --git a/rts/Schedule.c b/rts/Schedule.c
index 8002ac37dc..0444f0ca15 100644
--- a/rts/Schedule.c
+++ b/rts/Schedule.c
@@ -41,7 +41,8 @@
#include "Timer.h"
#include "ThreadPaused.h"
#include "Messages.h"
-#include "Stable.h"
+#include "StablePtr.h"
+#include "StableName.h"
#include "TopHandler.h"
#if defined(HAVE_SYS_TYPES_H)
@@ -67,7 +68,7 @@
* -------------------------------------------------------------------------- */
#if !defined(THREADED_RTS)
-// Blocked/sleeping thrads
+// Blocked/sleeping threads
StgTSO *blocked_queue_hd = NULL;
StgTSO *blocked_queue_tl = NULL;
StgTSO *sleeping_queue = NULL; // perhaps replace with a hash table?
@@ -151,11 +152,11 @@ static bool scheduleHandleThreadFinished( Capability *cap, Task *task,
static bool scheduleNeedHeapProfile(bool ready_to_gc);
static void scheduleDoGC(Capability **pcap, Task *task, bool force_major);
-static void deleteThread (Capability *cap, StgTSO *tso);
-static void deleteAllThreads (Capability *cap);
+static void deleteThread (StgTSO *tso);
+static void deleteAllThreads (void);
#if defined(FORKPROCESS_PRIMOP_SUPPORTED)
-static void deleteThread_(Capability *cap, StgTSO *tso);
+static void deleteThread_(StgTSO *tso);
#endif
/* ---------------------------------------------------------------------------
@@ -180,9 +181,6 @@ schedule (Capability *initialCapability, Task *task)
StgThreadReturnCode ret;
uint32_t prev_what_next;
bool ready_to_gc;
-#if defined(THREADED_RTS)
- bool first = true;
-#endif
cap = initialCapability;
@@ -271,7 +269,7 @@ schedule (Capability *initialCapability, Task *task)
}
break;
default:
- barf("sched_state: %d", sched_state);
+ barf("sched_state: %" FMT_Word, sched_state);
}
scheduleFindWork(&cap);
@@ -292,16 +290,6 @@ schedule (Capability *initialCapability, Task *task)
// as a result of a console event having been delivered.
#if defined(THREADED_RTS)
- if (first)
- {
- // XXX: ToDo
- // // don't yield the first time, we want a chance to run this
- // // thread for a bit, even if there are others banging at the
- // // door.
- // first = false;
- // ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
- }
-
scheduleYield(&cap,task);
if (emptyRunQueue(cap)) continue; // look for work again
@@ -360,7 +348,7 @@ schedule (Capability *initialCapability, Task *task)
// in a foreign call returns.
if (sched_state >= SCHED_INTERRUPTING &&
!(t->what_next == ThreadComplete || t->what_next == ThreadKilled)) {
- deleteThread(cap,t);
+ deleteThread(t);
}
// If this capability is disabled, migrate the thread away rather
@@ -679,7 +667,11 @@ scheduleYield (Capability **pcap, Task *task)
// otherwise yield (sleep), and keep yielding if necessary.
do {
- didGcLast = yieldCapability(&cap,task, !didGcLast);
+ if (doIdleGCWork(cap, false)) {
+ didGcLast = false;
+ } else {
+ didGcLast = yieldCapability(&cap,task, !didGcLast);
+ }
}
while (shouldYieldCapability(cap,task,didGcLast));
@@ -701,8 +693,6 @@ static void
schedulePushWork(Capability *cap USED_IF_THREADS,
Task *task USED_IF_THREADS)
{
- /* following code not for PARALLEL_HASKELL. I kept the call general,
- future GUM versions might use pushing in a distributed setup */
#if defined(THREADED_RTS)
Capability *free_caps[n_capabilities], *cap0;
@@ -1263,7 +1253,7 @@ scheduleHandleThreadBlocked( StgTSO *t
* -------------------------------------------------------------------------- */
static bool
-scheduleHandleThreadFinished (Capability *cap STG_UNUSED, Task *task, StgTSO *t)
+scheduleHandleThreadFinished (Capability *cap, Task *task, StgTSO *t)
{
/* Need to check whether this was a main thread, and if so,
* return with the return value.
@@ -1352,7 +1342,7 @@ scheduleHandleThreadFinished (Capability *cap STG_UNUSED, Task *task, StgTSO *t)
* -------------------------------------------------------------------------- */
static bool
-scheduleNeedHeapProfile( bool ready_to_gc STG_UNUSED )
+scheduleNeedHeapProfile( bool ready_to_gc )
{
// When we have +RTS -i0 and we're heap profiling, do a census at
// every GC. This lets us get repeatable runs for debugging.
@@ -1738,10 +1728,8 @@ scheduleDoGC (Capability **pcap, Task *task USED_IF_THREADS,
// they have stopped mutating and are standing by for GC.
waitForGcThreads(cap, idle_cap);
-#if defined(THREADED_RTS)
// Stable point where we can do a global check on our spark counters
ASSERT(checkSparkCountInvariant());
-#endif
}
#endif
@@ -1756,7 +1744,7 @@ delete_threads_and_gc:
* Checking for major_gc ensures that the last GC is major.
*/
if (sched_state == SCHED_INTERRUPTING && major_gc) {
- deleteAllThreads(cap);
+ deleteAllThreads();
#if defined(THREADED_RTS)
// Discard all the sparks from every Capability. Why?
// They'll probably be GC'd anyway since we've killed all the
@@ -1800,6 +1788,9 @@ delete_threads_and_gc:
}
#endif
+ // Do any remaining idle GC work from the previous GC
+ doIdleGCWork(cap, true /* all of it */);
+
#if defined(THREADED_RTS)
// reset pending_sync *before* GC, so that when the GC threads
// emerge they don't immediately re-enter the GC.
@@ -1809,6 +1800,11 @@ delete_threads_and_gc:
GarbageCollect(collect_gen, heap_census, 0, cap, NULL);
#endif
+ // If we're shutting down, don't leave any idle GC work to do.
+ if (sched_state == SCHED_SHUTTING_DOWN) {
+ doIdleGCWork(cap, true /* all of it */);
+ }
+
traceSparkCounters(cap);
switch (recent_activity) {
@@ -1920,13 +1916,6 @@ delete_threads_and_gc:
throwToSelf(cap, main_thread, heapOverflow_closure);
}
}
-#if defined(SPARKBALANCE)
- /* JB
- Once we are all together... this would be the place to balance all
- spark pools. No concurrent stealing or adding of new sparks can
- occur. Should be defined in Sparks.c. */
- balanceSparkPoolsCaps(n_capabilities, capabilities);
-#endif
#if defined(THREADED_RTS)
stgFree(idle_cap);
@@ -1976,7 +1965,8 @@ forkProcess(HsStablePtr *entry
// inconsistent state in the child. See also #1391.
ACQUIRE_LOCK(&sched_mutex);
ACQUIRE_LOCK(&sm_mutex);
- ACQUIRE_LOCK(&stable_mutex);
+ ACQUIRE_LOCK(&stable_ptr_mutex);
+ ACQUIRE_LOCK(&stable_name_mutex);
ACQUIRE_LOCK(&task->lock);
for (i=0; i < n_capabilities; i++) {
@@ -2001,18 +1991,20 @@ forkProcess(HsStablePtr *entry
RELEASE_LOCK(&sched_mutex);
RELEASE_LOCK(&sm_mutex);
- RELEASE_LOCK(&stable_mutex);
+ RELEASE_LOCK(&stable_ptr_mutex);
+ RELEASE_LOCK(&stable_name_mutex);
RELEASE_LOCK(&task->lock);
+#if defined(THREADED_RTS)
+ /* N.B. releaseCapability_ below may need to take all_tasks_mutex */
+ RELEASE_LOCK(&all_tasks_mutex);
+#endif
+
for (i=0; i < n_capabilities; i++) {
releaseCapability_(capabilities[i],false);
RELEASE_LOCK(&capabilities[i]->lock);
}
-#if defined(THREADED_RTS)
- RELEASE_LOCK(&all_tasks_mutex);
-#endif
-
boundTaskExiting(task);
// just return the pid
@@ -2023,7 +2015,8 @@ forkProcess(HsStablePtr *entry
#if defined(THREADED_RTS)
initMutex(&sched_mutex);
initMutex(&sm_mutex);
- initMutex(&stable_mutex);
+ initMutex(&stable_ptr_mutex);
+ initMutex(&stable_name_mutex);
initMutex(&task->lock);
for (i=0; i < n_capabilities; i++) {
@@ -2049,7 +2042,7 @@ forkProcess(HsStablePtr *entry
// don't allow threads to catch the ThreadKilled
// exception, but we do want to raiseAsync() because these
// threads may be evaluating thunks that we need later.
- deleteThread_(t->cap,t);
+ deleteThread_(t);
// stop the GC from updating the InCall to point to
// the TSO. This is only necessary because the
@@ -2273,7 +2266,7 @@ setNumCapabilities (uint32_t new_n_capabilities USED_IF_THREADS)
* ------------------------------------------------------------------------- */
static void
-deleteAllThreads ( Capability *cap )
+deleteAllThreads ()
{
// NOTE: only safe to call if we own all capabilities.
@@ -2284,7 +2277,7 @@ deleteAllThreads ( Capability *cap )
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
for (t = generations[g].threads; t != END_TSO_QUEUE; t = next) {
next = t->global_link;
- deleteThread(cap,t);
+ deleteThread(t);
}
}
@@ -2795,7 +2788,7 @@ void wakeUpRts(void)
-------------------------------------------------------------------------- */
static void
-deleteThread (Capability *cap STG_UNUSED, StgTSO *tso)
+deleteThread (StgTSO *tso)
{
// NOTE: must only be called on a TSO that we have exclusive
// access to, because we will call throwToSingleThreaded() below.
@@ -2810,7 +2803,7 @@ deleteThread (Capability *cap STG_UNUSED, StgTSO *tso)
#if defined(FORKPROCESS_PRIMOP_SUPPORTED)
static void
-deleteThread_(Capability *cap, StgTSO *tso)
+deleteThread_(StgTSO *tso)
{ // for forkProcess only:
// like deleteThread(), but we delete threads in foreign calls, too.
@@ -2819,7 +2812,7 @@ deleteThread_(Capability *cap, StgTSO *tso)
tso->what_next = ThreadKilled;
appendToRunQueue(tso->cap, tso);
} else {
- deleteThread(cap,tso);
+ deleteThread(tso);
}
}
#endif
diff --git a/rts/Schedule.h b/rts/Schedule.h
index 23a1a5b770..49e094bb89 100644
--- a/rts/Schedule.h
+++ b/rts/Schedule.h
@@ -65,7 +65,7 @@ extern volatile StgWord sched_state;
* The timer interrupt transitions ACTIVITY_YES into
* ACTIVITY_MAYBE_NO, waits for RtsFlags.GcFlags.idleGCDelayTime,
* and then:
- * - if idle GC is no, set ACTIVITY_INACTIVE and wakeUpRts()
+ * - if idle GC is on, set ACTIVITY_INACTIVE and wakeUpRts()
* - if idle GC is off, set ACTIVITY_DONE_GC and stopTimer()
*
* If the scheduler finds ACTIVITY_INACTIVE, then it sets
diff --git a/rts/Sparks.c b/rts/Sparks.c
index ecd3c38a17..bd5e120863 100644
--- a/rts/Sparks.c
+++ b/rts/Sparks.c
@@ -14,6 +14,7 @@
#include "Trace.h"
#include "Prelude.h"
#include "Sparks.h"
+#include "ThreadLabels.h"
#include "sm/HeapAlloc.h"
#if defined(THREADED_RTS)
@@ -43,7 +44,7 @@ createSparkThread (Capability *cap)
tso = createIOThread (cap, RtsFlags.GcFlags.initialStkSize,
(StgClosure *)runSparks_closure);
-
+ labelThread(cap, tso, "spark evaluator");
traceEventCreateSparkThread(cap, tso->id);
appendToRunQueue(cap,tso);
@@ -283,21 +284,6 @@ traverseSparkQueue (evac_fn evac, void *user, Capability *cap)
sparkPoolSize(pool), pool->bottom, pool->top);
}
-/* ----------------------------------------------------------------------------
- * balanceSparkPoolsCaps: takes an array of capabilities (usually: all
- * capabilities) and its size. Accesses all spark pools and equally
- * distributes the sparks among them.
- *
- * Could be called after GC, before Cap. release, from scheduler.
- * -------------------------------------------------------------------------- */
-void balanceSparkPoolsCaps(uint32_t n_caps, Capability caps[])
- GNUC3_ATTRIBUTE(__noreturn__);
-
-void balanceSparkPoolsCaps(uint32_t n_caps STG_UNUSED,
- Capability caps[] STG_UNUSED) {
- barf("not implemented");
-}
-
#else
StgInt
diff --git a/rts/StableName.c b/rts/StableName.c
new file mode 100644
index 0000000000..757eb59180
--- /dev/null
+++ b/rts/StableName.c
@@ -0,0 +1,349 @@
+/* -*- tab-width: 4 -*- */
+
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2002
+ *
+ * Stable names
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "PosixSource.h"
+#include "Rts.h"
+#include "RtsAPI.h"
+
+#include "Hash.h"
+#include "RtsUtils.h"
+#include "Trace.h"
+#include "StableName.h"
+
+#include <string.h>
+
+snEntry *stable_name_table = NULL;
+static snEntry *stable_name_free = NULL;
+static unsigned int SNT_size = 0;
+#define INIT_SNT_SIZE 64
+
+#if defined(THREADED_RTS)
+Mutex stable_name_mutex;
+#endif
+
+static void enlargeStableNameTable(void);
+
+/*
+ * This hash table maps Haskell objects to stable names, so that every
+ * call to lookupStableName on a given object will return the same
+ * stable name.
+ */
+
+static HashTable *addrToStableHash = NULL;
+
+void
+stableNameLock(void)
+{
+ initStableNameTable();
+ ACQUIRE_LOCK(&stable_name_mutex);
+}
+
+void
+stableNameUnlock(void)
+{
+ RELEASE_LOCK(&stable_name_mutex);
+}
+
+/* -----------------------------------------------------------------------------
+ * Initialising the table
+ * -------------------------------------------------------------------------- */
+
+STATIC_INLINE void
+initSnEntryFreeList(snEntry *table, uint32_t n, snEntry *free)
+{
+ snEntry *p;
+ for (p = table + n - 1; p >= table; p--) {
+ p->addr = (P_)free;
+ p->old = NULL;
+ p->sn_obj = NULL;
+ free = p;
+ }
+ stable_name_free = table;
+}
+
+void
+initStableNameTable(void)
+{
+ if (SNT_size > 0) return;
+ SNT_size = INIT_SNT_SIZE;
+ stable_name_table = stgMallocBytes(SNT_size * sizeof(snEntry),
+ "initStableNameTable");
+ /* we don't use index 0 in the stable name table, because that
+ * would conflict with the hash table lookup operations which
+ * return NULL if an entry isn't found in the hash table.
+ */
+ initSnEntryFreeList(stable_name_table + 1,INIT_SNT_SIZE-1,NULL);
+ addrToStableHash = allocHashTable();
+
+#if defined(THREADED_RTS)
+ initMutex(&stable_name_mutex);
+#endif
+}
+
+/* -----------------------------------------------------------------------------
+ * Enlarging the tables
+ * -------------------------------------------------------------------------- */
+
+static void
+enlargeStableNameTable(void)
+{
+ uint32_t old_SNT_size = SNT_size;
+
+ // 2nd and subsequent times
+ SNT_size *= 2;
+ stable_name_table =
+ stgReallocBytes(stable_name_table,
+ SNT_size * sizeof(snEntry),
+ "enlargeStableNameTable");
+
+ initSnEntryFreeList(stable_name_table + old_SNT_size, old_SNT_size, NULL);
+}
+
+
+/* -----------------------------------------------------------------------------
+ * Freeing entries and tables
+ * -------------------------------------------------------------------------- */
+
+void
+exitStableNameTable(void)
+{
+ if (addrToStableHash)
+ freeHashTable(addrToStableHash, NULL);
+ addrToStableHash = NULL;
+
+ if (stable_name_table)
+ stgFree(stable_name_table);
+ stable_name_table = NULL;
+ SNT_size = 0;
+
+#if defined(THREADED_RTS)
+ closeMutex(&stable_name_mutex);
+#endif
+}
+
+STATIC_INLINE void
+freeSnEntry(snEntry *sn)
+{
+ ASSERT(sn->sn_obj == NULL);
+ removeHashTable(addrToStableHash, (W_)sn->old, NULL);
+ sn->addr = (P_)stable_name_free;
+ stable_name_free = sn;
+}
+
+/* -----------------------------------------------------------------------------
+ * Looking up
+ * -------------------------------------------------------------------------- */
+
+/*
+ * get at the real stuff...remove indirections.
+ */
+static StgClosure*
+removeIndirections (StgClosure* p)
+{
+ StgClosure* q;
+
+ while (1)
+ {
+ q = UNTAG_CLOSURE(p);
+
+ switch (get_itbl(q)->type) {
+ case IND:
+ case IND_STATIC:
+ p = ((StgInd *)q)->indirectee;
+ continue;
+
+ case BLACKHOLE:
+ p = ((StgInd *)q)->indirectee;
+ if (GET_CLOSURE_TAG(p) != 0) {
+ continue;
+ } else {
+ break;
+ }
+
+ default:
+ break;
+ }
+ return p;
+ }
+}
+
+StgWord
+lookupStableName (StgPtr p)
+{
+ stableNameLock();
+
+ if (stable_name_free == NULL) {
+ enlargeStableNameTable();
+ }
+
+ /* removing indirections increases the likelihood
+ * of finding a match in the stable name hash table.
+ */
+ p = (StgPtr)removeIndirections((StgClosure*)p);
+
+ // register the untagged pointer. This just makes things simpler.
+ p = (StgPtr)UNTAG_CLOSURE((StgClosure*)p);
+
+ StgWord sn = (StgWord)lookupHashTable(addrToStableHash,(W_)p);
+
+ if (sn != 0) {
+ ASSERT(stable_name_table[sn].addr == p);
+ debugTrace(DEBUG_stable, "cached stable name %ld at %p",sn,p);
+ stableNameUnlock();
+ return sn;
+ }
+
+ sn = stable_name_free - stable_name_table;
+ stable_name_free = (snEntry*)(stable_name_free->addr);
+ stable_name_table[sn].addr = p;
+ stable_name_table[sn].sn_obj = NULL;
+ /* debugTrace(DEBUG_stable, "new stable name %d at %p\n",sn,p); */
+
+ /* add the new stable name to the hash table */
+ insertHashTable(addrToStableHash, (W_)p, (void *)sn);
+
+ stableNameUnlock();
+
+ return sn;
+}
+
+/* -----------------------------------------------------------------------------
+ * Remember old stable name addresses
+ * -------------------------------------------------------------------------- */
+
+#define FOR_EACH_STABLE_NAME(p, CODE) \
+ do { \
+ snEntry *p; \
+ snEntry *__end_ptr = &stable_name_table[SNT_size]; \
+ for (p = stable_name_table + 1; p < __end_ptr; p++) { \
+ /* Internal pointers are free slots. */ \
+ /* If p->addr == NULL, it's a */ \
+ /* stable name where the object has been GC'd, but the */ \
+ /* StableName object (sn_obj) is still alive. */ \
+ if ((p->addr < (P_)stable_name_table || \
+ p->addr >= (P_)__end_ptr)) \
+ { \
+ /* NOTE: There is an ambiguity here if p->addr == NULL */ \
+ /* it is either the last item in the free list or it */ \
+ /* is a stable name whose pointee died. sn_obj == NULL */ \
+ /* disambiguates as last free list item. */ \
+ do { CODE } while(0); \
+ } \
+ } \
+ } while(0)
+
+void
+rememberOldStableNameAddresses(void)
+{
+ /* TODO: Only if !full GC */
+ FOR_EACH_STABLE_NAME(p, p->old = p->addr;);
+}
+
+/* -----------------------------------------------------------------------------
+ * Thread the stable name table for compacting GC.
+ *
+ * Here we must call the supplied evac function for each pointer into
+ * the heap from the stable name table, because the compacting
+ * collector may move the object it points to.
+ * -------------------------------------------------------------------------- */
+
+void
+threadStableNameTable( evac_fn evac, void *user )
+{
+ FOR_EACH_STABLE_NAME(p, {
+ if (p->sn_obj != NULL) {
+ evac(user, (StgClosure **)&p->sn_obj);
+ }
+ if (p->addr != NULL) {
+ evac(user, (StgClosure **)&p->addr);
+ }
+ });
+}
+
+/* -----------------------------------------------------------------------------
+ * Garbage collect any dead entries in the stable name table.
+ *
+ * A dead entry has:
+ *
+ * - a zero reference count
+ * - a dead sn_obj
+ *
+ * Both of these conditions must be true in order to re-use the stable
+ * name table entry. We can re-use stable name table entries for live
+ * heap objects, as long as the program has no StableName objects that
+ * refer to the entry.
+ * -------------------------------------------------------------------------- */
+
+void
+gcStableNameTable( void )
+{
+ FOR_EACH_STABLE_NAME(
+ p, {
+ // FOR_EACH_STABLE_NAME traverses free entries too, so
+ // check sn_obj
+ if (p->sn_obj != NULL) {
+ // Update the pointer to the StableName object, if there is one
+ p->sn_obj = isAlive(p->sn_obj);
+ if (p->sn_obj == NULL) {
+ // StableName object died
+ debugTrace(DEBUG_stable, "GC'd StableName %ld (addr=%p)",
+ (long)(p - stable_name_table), p->addr);
+ freeSnEntry(p);
+ } else if (p->addr != NULL) {
+ // sn_obj is alive, update pointee
+ p->addr = (StgPtr)isAlive((StgClosure *)p->addr);
+ if (p->addr == NULL) {
+ // Pointee died
+ debugTrace(DEBUG_stable, "GC'd pointee %ld",
+ (long)(p - stable_name_table));
+ }
+ }
+ }
+ });
+}
+
+/* -----------------------------------------------------------------------------
+ * Update the StableName hash table
+ *
+ * The boolean argument 'full' indicates that a major collection is
+ * being done, so we might as well throw away the hash table and build
+ * a new one. For a minor collection, we just re-hash the elements
+ * that changed.
+ * -------------------------------------------------------------------------- */
+
+void
+updateStableNameTable(bool full)
+{
+ if (full && addrToStableHash != NULL && 0 != keyCountHashTable(addrToStableHash)) {
+ freeHashTable(addrToStableHash,NULL);
+ addrToStableHash = allocHashTable();
+ }
+
+ if(full) {
+ FOR_EACH_STABLE_NAME(
+ p, {
+ if (p->addr != NULL) {
+ // Target still alive, Re-hash this stable name
+ insertHashTable(addrToStableHash, (W_)p->addr, (void *)(p - stable_name_table));
+ }
+ });
+ } else {
+ FOR_EACH_STABLE_NAME(
+ p, {
+ if (p->addr != p->old) {
+ removeHashTable(addrToStableHash, (W_)p->old, NULL);
+ /* Movement happened: */
+ if (p->addr != NULL) {
+ insertHashTable(addrToStableHash, (W_)p->addr, (void *)(p - stable_name_table));
+ }
+ }
+ });
+ }
+}
diff --git a/rts/StableName.h b/rts/StableName.h
new file mode 100644
index 0000000000..6b5e551add
--- /dev/null
+++ b/rts/StableName.h
@@ -0,0 +1,31 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2004
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#include "sm/GC.h" // for evac_fn below
+
+#include "BeginPrivate.h"
+
+void initStableNameTable ( void );
+void exitStableNameTable ( void );
+StgWord lookupStableName ( StgPtr p );
+
+void rememberOldStableNameAddresses ( void );
+
+void threadStableNameTable ( evac_fn evac, void *user );
+void gcStableNameTable ( void );
+void updateStableNameTable ( bool full );
+
+void stableNameLock ( void );
+void stableNameUnlock ( void );
+
+#if defined(THREADED_RTS)
+// needed by Schedule.c:forkProcess()
+extern Mutex stable_name_mutex;
+#endif
+
+#include "EndPrivate.h"
diff --git a/rts/Stable.c b/rts/StablePtr.c
index ecf216a550..0f53ffcdc4 100644
--- a/rts/Stable.c
+++ b/rts/StablePtr.c
@@ -4,7 +4,7 @@
*
* (c) The GHC Team, 1998-2002
*
- * Stable names and stable pointers.
+ * Stable pointers
*
* ---------------------------------------------------------------------------*/
@@ -15,7 +15,7 @@
#include "Hash.h"
#include "RtsUtils.h"
#include "Trace.h"
-#include "Stable.h"
+#include "StablePtr.h"
#include <string.h>
@@ -88,11 +88,6 @@
http://ghc.haskell.org/trac/ghc/ticket/7670 for details.
*/
-snEntry *stable_name_table = NULL;
-static snEntry *stable_name_free = NULL;
-static unsigned int SNT_size = 0;
-#define INIT_SNT_SIZE 64
-
spEntry *stable_ptr_table = NULL;
static spEntry *stable_ptr_free = NULL;
static unsigned int SPT_size = 0;
@@ -116,56 +111,34 @@ static spEntry *old_SPTs[MAX_N_OLD_SPTS];
static uint32_t n_old_SPTs = 0;
#if defined(THREADED_RTS)
-Mutex stable_mutex;
+Mutex stable_ptr_mutex;
#endif
-static void enlargeStableNameTable(void);
static void enlargeStablePtrTable(void);
-/*
- * This hash table maps Haskell objects to stable names, so that every
- * call to lookupStableName on a given object will return the same
- * stable name.
- */
-
-static HashTable *addrToStableHash = NULL;
-
/* -----------------------------------------------------------------------------
* We must lock the StablePtr table during GC, to prevent simultaneous
* calls to freeStablePtr().
* -------------------------------------------------------------------------- */
void
-stableLock(void)
+stablePtrLock(void)
{
- initStableTables();
- ACQUIRE_LOCK(&stable_mutex);
+ initStablePtrTable();
+ ACQUIRE_LOCK(&stable_ptr_mutex);
}
void
-stableUnlock(void)
+stablePtrUnlock(void)
{
- RELEASE_LOCK(&stable_mutex);
+ RELEASE_LOCK(&stable_ptr_mutex);
}
/* -----------------------------------------------------------------------------
- * Initialising the tables
+ * Initialising the table
* -------------------------------------------------------------------------- */
STATIC_INLINE void
-initSnEntryFreeList(snEntry *table, uint32_t n, snEntry *free)
-{
- snEntry *p;
- for (p = table + n - 1; p >= table; p--) {
- p->addr = (P_)free;
- p->old = NULL;
- p->sn_obj = NULL;
- free = p;
- }
- stable_name_free = table;
-}
-
-STATIC_INLINE void
initSpEntryFreeList(spEntry *table, uint32_t n, spEntry *free)
{
spEntry *p;
@@ -177,49 +150,24 @@ initSpEntryFreeList(spEntry *table, uint32_t n, spEntry *free)
}
void
-initStableTables(void)
+initStablePtrTable(void)
{
- if (SNT_size > 0) return;
- SNT_size = INIT_SNT_SIZE;
- stable_name_table = stgMallocBytes(SNT_size * sizeof *stable_name_table,
- "initStableNameTable");
- /* we don't use index 0 in the stable name table, because that
- * would conflict with the hash table lookup operations which
- * return NULL if an entry isn't found in the hash table.
- */
- initSnEntryFreeList(stable_name_table + 1,INIT_SNT_SIZE-1,NULL);
- addrToStableHash = allocHashTable();
-
if (SPT_size > 0) return;
SPT_size = INIT_SPT_SIZE;
- stable_ptr_table = stgMallocBytes(SPT_size * sizeof *stable_ptr_table,
+ stable_ptr_table = stgMallocBytes(SPT_size * sizeof(spEntry),
"initStablePtrTable");
initSpEntryFreeList(stable_ptr_table,INIT_SPT_SIZE,NULL);
#if defined(THREADED_RTS)
- initMutex(&stable_mutex);
+ initMutex(&stable_ptr_mutex);
#endif
}
/* -----------------------------------------------------------------------------
- * Enlarging the tables
+ * Enlarging the table
* -------------------------------------------------------------------------- */
-static void
-enlargeStableNameTable(void)
-{
- uint32_t old_SNT_size = SNT_size;
-
- // 2nd and subsequent times
- SNT_size *= 2;
- stable_name_table =
- stgReallocBytes(stable_name_table,
- SNT_size * sizeof *stable_name_table,
- "enlargeStableNameTable");
-
- initSnEntryFreeList(stable_name_table + old_SNT_size, old_SNT_size, NULL);
-}
-
+// Must be holding stable_ptr_mutex
static void
enlargeStablePtrTable(void)
{
@@ -233,11 +181,11 @@ enlargeStablePtrTable(void)
* [Enlarging the stable pointer table].
*/
new_stable_ptr_table =
- stgMallocBytes(SPT_size * sizeof *stable_ptr_table,
+ stgMallocBytes(SPT_size * sizeof(spEntry),
"enlargeStablePtrTable");
memcpy(new_stable_ptr_table,
stable_ptr_table,
- old_SPT_size * sizeof *stable_ptr_table);
+ old_SPT_size * sizeof(spEntry));
ASSERT(n_old_SPTs < MAX_N_OLD_SPTS);
old_SPTs[n_old_SPTs++] = stable_ptr_table;
@@ -282,17 +230,8 @@ freeOldSPTs(void)
}
void
-exitStableTables(void)
+exitStablePtrTable(void)
{
- if (addrToStableHash)
- freeHashTable(addrToStableHash, NULL);
- addrToStableHash = NULL;
-
- if (stable_name_table)
- stgFree(stable_name_table);
- stable_name_table = NULL;
- SNT_size = 0;
-
if (stable_ptr_table)
stgFree(stable_ptr_table);
stable_ptr_table = NULL;
@@ -301,20 +240,11 @@ exitStableTables(void)
freeOldSPTs();
#if defined(THREADED_RTS)
- closeMutex(&stable_mutex);
+ closeMutex(&stable_ptr_mutex);
#endif
}
STATIC_INLINE void
-freeSnEntry(snEntry *sn)
-{
- ASSERT(sn->sn_obj == NULL);
- removeHashTable(addrToStableHash, (W_)sn->old, NULL);
- sn->addr = (P_)stable_name_free;
- stable_name_free = sn;
-}
-
-STATIC_INLINE void
freeSpEntry(spEntry *sp)
{
sp->addr = (P_)stable_ptr_free;
@@ -331,103 +261,26 @@ freeStablePtrUnsafe(StgStablePtr sp)
void
freeStablePtr(StgStablePtr sp)
{
- stableLock();
+ stablePtrLock();
freeStablePtrUnsafe(sp);
- stableUnlock();
+ stablePtrUnlock();
}
/* -----------------------------------------------------------------------------
* Looking up
* -------------------------------------------------------------------------- */
-/*
- * get at the real stuff...remove indirections.
- */
-static StgClosure*
-removeIndirections (StgClosure* p)
-{
- StgClosure* q;
-
- while (1)
- {
- q = UNTAG_CLOSURE(p);
-
- switch (get_itbl(q)->type) {
- case IND:
- case IND_STATIC:
- p = ((StgInd *)q)->indirectee;
- continue;
-
- case BLACKHOLE:
- p = ((StgInd *)q)->indirectee;
- if (GET_CLOSURE_TAG(p) != 0) {
- continue;
- } else {
- break;
- }
-
- default:
- break;
- }
- return p;
- }
-}
-
-StgWord
-lookupStableName (StgPtr p)
-{
- StgWord sn;
- const void* sn_tmp;
-
- stableLock();
-
- if (stable_name_free == NULL) {
- enlargeStableNameTable();
- }
-
- /* removing indirections increases the likelihood
- * of finding a match in the stable name hash table.
- */
- p = (StgPtr)removeIndirections((StgClosure*)p);
-
- // register the untagged pointer. This just makes things simpler.
- p = (StgPtr)UNTAG_CLOSURE((StgClosure*)p);
-
- sn_tmp = lookupHashTable(addrToStableHash,(W_)p);
- sn = (StgWord)sn_tmp;
-
- if (sn != 0) {
- ASSERT(stable_name_table[sn].addr == p);
- debugTrace(DEBUG_stable, "cached stable name %ld at %p",sn,p);
- stableUnlock();
- return sn;
- }
-
- sn = stable_name_free - stable_name_table;
- stable_name_free = (snEntry*)(stable_name_free->addr);
- stable_name_table[sn].addr = p;
- stable_name_table[sn].sn_obj = NULL;
- /* debugTrace(DEBUG_stable, "new stable name %d at %p\n",sn,p); */
-
- /* add the new stable name to the hash table */
- insertHashTable(addrToStableHash, (W_)p, (void *)sn);
-
- stableUnlock();
-
- return sn;
-}
-
StgStablePtr
getStablePtr(StgPtr p)
{
StgWord sp;
- stableLock();
+ stablePtrLock();
if (!stable_ptr_free) enlargeStablePtrTable();
sp = stable_ptr_free - stable_ptr_table;
stable_ptr_free = (spEntry*)(stable_ptr_free->addr);
stable_ptr_table[sp].addr = p;
- stableUnlock();
+ stablePtrUnlock();
return (StgStablePtr)(sp);
}
@@ -450,50 +303,15 @@ getStablePtr(StgPtr p)
} \
} while(0)
-#define FOR_EACH_STABLE_NAME(p, CODE) \
- do { \
- snEntry *p; \
- snEntry *__end_ptr = &stable_name_table[SNT_size]; \
- for (p = stable_name_table + 1; p < __end_ptr; p++) { \
- /* Internal pointers are free slots. */ \
- /* If p->addr == NULL, it's a */ \
- /* stable name where the object has been GC'd, but the */ \
- /* StableName object (sn_obj) is still alive. */ \
- if ((p->addr < (P_)stable_name_table || \
- p->addr >= (P_)__end_ptr)) \
- { \
- /* NOTE: There is an ambiguity here if p->addr == NULL */ \
- /* it is either the last item in the free list or it */ \
- /* is a stable name whose pointee died. sn_obj == NULL */ \
- /* disambiguates as last free list item. */ \
- do { CODE } while(0); \
- } \
- } \
- } while(0)
-
-STATIC_INLINE void
-markStablePtrTable(evac_fn evac, void *user)
-{
- FOR_EACH_STABLE_PTR(p, evac(user, (StgClosure **)&p->addr););
-}
-
-STATIC_INLINE void
-rememberOldStableNameAddresses(void)
-{
- /* TODO: Only if !full GC */
- FOR_EACH_STABLE_NAME(p, p->old = p->addr;);
-}
-
void
-markStableTables(evac_fn evac, void *user)
+markStablePtrTable(evac_fn evac, void *user)
{
/* Since no other thread can currently be dereferencing a stable pointer, it
* is safe to free the old versions of the table.
*/
freeOldSPTs();
- markStablePtrTable(evac, user);
- rememberOldStableNameAddresses();
+ FOR_EACH_STABLE_PTR(p, evac(user, (StgClosure **)&p->addr););
}
/* -----------------------------------------------------------------------------
@@ -504,115 +322,8 @@ markStableTables(evac_fn evac, void *user)
* collector may move the object it points to.
* -------------------------------------------------------------------------- */
-STATIC_INLINE void
-threadStableNameTable( evac_fn evac, void *user )
-{
- FOR_EACH_STABLE_NAME(p, {
- if (p->sn_obj != NULL) {
- evac(user, (StgClosure **)&p->sn_obj);
- }
- if (p->addr != NULL) {
- evac(user, (StgClosure **)&p->addr);
- }
- });
-}
-
-STATIC_INLINE void
+void
threadStablePtrTable( evac_fn evac, void *user )
{
FOR_EACH_STABLE_PTR(p, evac(user, (StgClosure **)&p->addr););
}
-
-void
-threadStableTables( evac_fn evac, void *user )
-{
- threadStableNameTable(evac, user);
- threadStablePtrTable(evac, user);
-}
-
-/* -----------------------------------------------------------------------------
- * Garbage collect any dead entries in the stable pointer table.
- *
- * A dead entry has:
- *
- * - a zero reference count
- * - a dead sn_obj
- *
- * Both of these conditions must be true in order to re-use the stable
- * name table entry. We can re-use stable name table entries for live
- * heap objects, as long as the program has no StableName objects that
- * refer to the entry.
- * -------------------------------------------------------------------------- */
-
-void
-gcStableTables( void )
-{
- FOR_EACH_STABLE_NAME(
- p, {
- // Update the pointer to the StableName object, if there is one
- if (p->sn_obj != NULL) {
- p->sn_obj = isAlive(p->sn_obj);
- if(p->sn_obj == NULL) {
- // StableName object died
- debugTrace(DEBUG_stable, "GC'd StableName %ld (addr=%p)",
- (long)(p - stable_name_table), p->addr);
- freeSnEntry(p);
- /* Can't "continue", so use goto */
- goto next_stable_name;
- }
- }
- /* If sn_obj became NULL, the object died, and addr is now
- * invalid. But if sn_obj was null, then the StableName
- * object may not have been created yet, while the pointee
- * already exists and must be updated to new location. */
- if (p->addr != NULL) {
- p->addr = (StgPtr)isAlive((StgClosure *)p->addr);
- if(p->addr == NULL) {
- // StableName pointee died
- debugTrace(DEBUG_stable, "GC'd pointee %ld",
- (long)(p - stable_name_table));
- }
- }
- next_stable_name:
- if (0) {}
- });
-}
-
-/* -----------------------------------------------------------------------------
- * Update the StableName hash table
- *
- * The boolean argument 'full' indicates that a major collection is
- * being done, so we might as well throw away the hash table and build
- * a new one. For a minor collection, we just re-hash the elements
- * that changed.
- * -------------------------------------------------------------------------- */
-
-void
-updateStableTables(bool full)
-{
- if (full && addrToStableHash != NULL && 0 != keyCountHashTable(addrToStableHash)) {
- freeHashTable(addrToStableHash,NULL);
- addrToStableHash = allocHashTable();
- }
-
- if(full) {
- FOR_EACH_STABLE_NAME(
- p, {
- if (p->addr != NULL) {
- // Target still alive, Re-hash this stable name
- insertHashTable(addrToStableHash, (W_)p->addr, (void *)(p - stable_name_table));
- }
- });
- } else {
- FOR_EACH_STABLE_NAME(
- p, {
- if (p->addr != p->old) {
- removeHashTable(addrToStableHash, (W_)p->old, NULL);
- /* Movement happened: */
- if (p->addr != NULL) {
- insertHashTable(addrToStableHash, (W_)p->addr, (void *)(p - stable_name_table));
- }
- }
- });
- }
-}
diff --git a/rts/Stable.h b/rts/StablePtr.h
index 399a2b3877..3fb305b47b 100644
--- a/rts/Stable.h
+++ b/rts/StablePtr.h
@@ -20,32 +20,27 @@
void freeStablePtr ( StgStablePtr sp );
-/* Use the "Unsafe" one after manually locking with stableLock/stableUnlock */
+/* Use the "Unsafe" one after only when manually locking and
+ unlocking with stablePtrLock/stablePtrUnlock */
void freeStablePtrUnsafe ( StgStablePtr sp );
-void initStableTables ( void );
-void exitStableTables ( void );
-StgWord lookupStableName ( StgPtr p );
+void initStablePtrTable ( void );
+void exitStablePtrTable ( void );
-/* Call given function on every stable ptr. markStableTables depends
+/* Call given function on every stable ptr. markStablePtrTable depends
* on the function updating its pointers in case the object is
- * moved. */
-/* TODO: This also remembers old stable name addresses, which isn't
- * necessary in some contexts markStableTables is called from.
- * Consider splitting it.
+ * moved.
*/
-void markStableTables ( evac_fn evac, void *user );
+void markStablePtrTable ( evac_fn evac, void *user );
-void threadStableTables ( evac_fn evac, void *user );
-void gcStableTables ( void );
-void updateStableTables ( bool full );
+void threadStablePtrTable ( evac_fn evac, void *user );
-void stableLock ( void );
-void stableUnlock ( void );
+void stablePtrLock ( void );
+void stablePtrUnlock ( void );
#if defined(THREADED_RTS)
// needed by Schedule.c:forkProcess()
-extern Mutex stable_mutex;
+extern Mutex stable_ptr_mutex;
#endif
#include "EndPrivate.h"
diff --git a/rts/StaticPtrTable.c b/rts/StaticPtrTable.c
index 5d2737a262..0b2244025e 100644
--- a/rts/StaticPtrTable.c
+++ b/rts/StaticPtrTable.c
@@ -12,7 +12,7 @@
#include "Rts.h"
#include "RtsUtils.h"
#include "Hash.h"
-#include "Stable.h"
+#include "StablePtr.h"
static HashTable * spt = NULL;
@@ -21,23 +21,24 @@ static Mutex spt_lock;
#endif
/// Hash function for the SPT.
-static int hashFingerprint(HashTable *table, StgWord64 key[2]) {
+static int hashFingerprint(const HashTable *table, StgWord key) {
+ const StgWord64* ptr = (StgWord64*) key;
// Take half of the key to compute the hash.
- return hashWord(table, (StgWord)key[1]);
+ return hashWord(table, *(ptr + 1));
}
/// Comparison function for the SPT.
-static int compareFingerprint(StgWord64 ptra[2], StgWord64 ptrb[2]) {
- return ptra[0] == ptrb[0] && ptra[1] == ptrb[1];
+static int compareFingerprint(StgWord a, StgWord b) {
+ const StgWord64* ptra = (StgWord64*) a;
+ const StgWord64* ptrb = (StgWord64*) b;
+ return *ptra == *ptrb && *(ptra + 1) == *(ptrb + 1);
}
void hs_spt_insert_stableptr(StgWord64 key[2], StgStablePtr *entry) {
// hs_spt_insert is called from constructor functions, so
// the SPT needs to be initialized here.
if (spt == NULL) {
- spt = allocHashTable_( (HashFunction *)hashFingerprint
- , (CompareFunction *)compareFingerprint
- );
+ spt = allocHashTable_(hashFingerprint, compareFingerprint);
#if defined(THREADED_RTS)
initMutex(&spt_lock);
#endif
diff --git a/rts/Stats.c b/rts/Stats.c
index 6a5f80130e..3d03d4931b 100644
--- a/rts/Stats.c
+++ b/rts/Stats.c
@@ -16,10 +16,16 @@
#include "Profiling.h"
#include "GetTime.h"
#include "sm/Storage.h"
-#include "sm/GC.h" // gc_alloc_block_sync, whitehole_spin
#include "sm/GCThread.h"
#include "sm/BlockAlloc.h"
+// for spin/yield counters
+#include "sm/GC.h"
+#include "ThreadPaused.h"
+#include "Messages.h"
+
+#include <string.h> // for memset
+
#define TimeToSecondsDbl(t) ((double)(t) / TIME_RESOLUTION)
static Time
@@ -43,6 +49,13 @@ static Time HCe_start_time, HCe_tot_time = 0; // heap census prof elap time
#define PROF_VAL(x) 0
#endif
+#if defined(PROF_SPIN)
+volatile StgWord64 whitehole_lockClosure_spin = 0;
+volatile StgWord64 whitehole_lockClosure_yield = 0;
+volatile StgWord64 whitehole_threadPaused_spin = 0;
+volatile StgWord64 whitehole_executeMessage_spin = 0;
+#endif
+
//
// All the stats!
//
@@ -80,6 +93,9 @@ mut_user_time_until( Time t )
return TimeToSecondsDbl(t - stats.gc_cpu_ns);
// heapCensus() time is included in GC_tot_cpu, so we don't need
// to subtract it here.
+
+ // TODO: This seems wrong to me. Surely we should be subtracting
+ // (at least) start_init_cpu?
}
double
@@ -150,6 +166,15 @@ initStats0(void)
.par_copied_bytes = 0,
.cumulative_par_max_copied_bytes = 0,
.cumulative_par_balanced_copied_bytes = 0,
+ .gc_spin_spin = 0,
+ .gc_spin_yield = 0,
+ .mut_spin_spin = 0,
+ .mut_spin_yield = 0,
+ .any_work = 0,
+ .no_work = 0,
+ .scav_find_work = 0,
+ .init_cpu_ns = 0,
+ .init_elapsed_ns = 0,
.mutator_cpu_ns = 0,
.mutator_elapsed_ns = 0,
.gc_cpu_ns = 0,
@@ -221,6 +246,8 @@ void
stat_endInit(void)
{
getProcessTimes(&end_init_cpu, &end_init_elapsed);
+ stats.init_cpu_ns = end_init_cpu - start_init_cpu;
+ stats.init_elapsed_ns = end_init_elapsed - start_init_elapsed;
}
/* -----------------------------------------------------------------------------
@@ -283,10 +310,11 @@ stat_startGC (Capability *cap, gc_thread *gct)
-------------------------------------------------------------------------- */
void
-stat_endGC (Capability *cap, gc_thread *gct,
- W_ live, W_ copied, W_ slop, uint32_t gen,
- uint32_t par_n_threads, W_ par_max_copied,
- W_ par_balanced_copied)
+stat_endGC (Capability *cap, gc_thread *gct, W_ live, W_ copied, W_ slop,
+ uint32_t gen, uint32_t par_n_threads, W_ par_max_copied,
+ W_ par_balanced_copied, W_ gc_spin_spin, W_ gc_spin_yield,
+ W_ mut_spin_spin, W_ mut_spin_yield, W_ any_work, W_ no_work,
+ W_ scav_find_work)
{
// -------------------------------------------------
// Collect all the stats about this GC in stats.gc. We always do this since
@@ -310,6 +338,26 @@ stat_endGC (Capability *cap, gc_thread *gct,
stats.gc.par_max_copied_bytes = par_max_copied * sizeof(W_);
stats.gc.par_balanced_copied_bytes = par_balanced_copied * sizeof(W_);
+ bool stats_enabled =
+ RtsFlags.GcFlags.giveStats != NO_GC_STATS ||
+ rtsConfig.gcDoneHook != NULL;
+
+ if (stats_enabled
+ || RtsFlags.ProfFlags.doHeapProfile) // heap profiling needs GC_tot_time
+ {
+ // We only update the times when stats are explicitly enabled since
+ // getProcessTimes (e.g. requiring a system call) can be expensive on
+ // some platforms.
+ Time current_cpu, current_elapsed;
+ getProcessTimes(&current_cpu, &current_elapsed);
+ stats.cpu_ns = current_cpu - start_init_cpu;
+ stats.elapsed_ns = current_elapsed - start_init_elapsed;
+
+ stats.gc.sync_elapsed_ns =
+ gct->gc_start_elapsed - gct->gc_sync_start_elapsed;
+ stats.gc.elapsed_ns = current_elapsed - gct->gc_start_elapsed;
+ stats.gc.cpu_ns = current_cpu - gct->gc_start_cpu;
+ }
// -------------------------------------------------
// Update the cumulative stats
@@ -330,6 +378,13 @@ stat_endGC (Capability *cap, gc_thread *gct,
stats.gc.par_max_copied_bytes;
stats.cumulative_par_balanced_copied_bytes +=
stats.gc.par_balanced_copied_bytes;
+ stats.any_work += any_work;
+ stats.no_work += no_work;
+ stats.scav_find_work += scav_find_work;
+ stats.gc_spin_spin += gc_spin_spin;
+ stats.gc_spin_yield += gc_spin_yield;
+ stats.mut_spin_spin += mut_spin_spin;
+ stats.mut_spin_yield += mut_spin_yield;
}
stats.gc_cpu_ns += stats.gc.cpu_ns;
stats.gc_elapsed_ns += stats.gc.elapsed_ns;
@@ -354,23 +409,8 @@ stat_endGC (Capability *cap, gc_thread *gct,
// -------------------------------------------------
// Do the more expensive bits only when stats are enabled.
- if (RtsFlags.GcFlags.giveStats != NO_GC_STATS ||
- rtsConfig.gcDoneHook != NULL ||
- RtsFlags.ProfFlags.doHeapProfile) // heap profiling needs GC_tot_time
+ if (stats_enabled)
{
- // We only update the times when stats are explicitly enabled since
- // getProcessTimes (e.g. requiring a system call) can be expensive on
- // some platforms.
- Time current_cpu, current_elapsed;
- getProcessTimes(&current_cpu, &current_elapsed);
- stats.cpu_ns = current_cpu - start_init_cpu;
- stats.elapsed_ns = current_elapsed - start_init_elapsed;
-
- stats.gc.sync_elapsed_ns =
- gct->gc_start_elapsed - gct->gc_sync_start_elapsed;
- stats.gc.elapsed_ns = current_elapsed - gct->gc_start_elapsed;
- stats.gc.cpu_ns = current_cpu - gct->gc_start_cpu;
-
// -------------------------------------------------
// Emit events to the event log
@@ -387,8 +427,8 @@ stat_endGC (Capability *cap, gc_thread *gct,
stats.gc.copied_bytes,
stats.gc.slop_bytes,
/* current loss due to fragmentation */
- (mblocks_allocated * BLOCKS_PER_MBLOCK - n_alloc_blocks)
- * BLOCK_SIZE,
+ (mblocks_allocated * BLOCKS_PER_MBLOCK
+ - n_alloc_blocks) * BLOCK_SIZE,
par_n_threads,
stats.gc.par_max_copied_bytes,
stats.gc.copied_bytes,
@@ -483,7 +523,8 @@ stat_endRP(
fprintf(prof_file, "\tMax C stack size = %u\n", maxCStackSize);
fprintf(prof_file, "\tMax auxiliary stack size = %u\n", maxStackSize);
#endif
- fprintf(prof_file, "\tAverage number of visits per object = %f\n", averageNumVisit);
+ fprintf(prof_file, "\tAverage number of visits per object = %f\n",
+ averageNumVisit);
}
#endif /* PROFILING */
@@ -541,7 +582,7 @@ StgInt TOTAL_CALLS=1;
#define REPORT(counter) \
{ \
showStgWord64(counter,temp,true/*commas*/); \
- statsPrintf(" (" #counter ") : %s\n",temp); \
+ statsPrintf(" (" #counter ") : %s\n",temp); \
}
/* Report the value of a counter as a percentage of another counter */
@@ -560,260 +601,647 @@ StgInt TOTAL_CALLS=1;
statsPrintf(" (SLOW_CALLS_" #arity ") %% of (TOTAL_CALLS) : %.1f%%\n", \
SLOW_CALLS_##arity * 100.0/TOTAL_CALLS)
-STATIC_INLINE Time get_init_cpu(void) {
- return end_init_cpu - start_init_cpu;
-}
+/*
+Note [RTS Stats Reporting]
+==========================
+
+There are currently three reporting functions:
+ * report_summary:
+ Responsible for producing '+RTS -s' output.
+ Will report internal counters if the RTS flag --internal-counters is
+ passed. See [Internal Counters Stats]
+ * report_machine_readable:
+ Responsible for producing '+RTS -t --machine-readable' output.
+ * report_one_line:
+ Responsible for productin '+RTS -t' output
+
+Stats are accumulated into the global variable 'stats' as the program runs, then
+in 'stat_exit' we do the following:
+ * Finalise 'stats'. This involves setting final running times and allocations
+ that have not yet been accounted for.
+ * Create a RTSSummaryStats. This contains all data for reports that is not
+ included in stats (because they do not make sense before the program has
+ completed) or in a global variable.
+ * call the appropriate report_* function, passing the newly constructed
+ RTSSummaryStats.
+
+To ensure that the data in the different reports is kept consistent, the
+report_* functions should not do any calculation, excepting unit changes and
+formatting. If you need to add a new calculated field, add it to
+RTSSummaryStats.
+
+*/
+
-STATIC_INLINE Time get_init_elapsed(void) {
- return end_init_elapsed - start_init_elapsed;
+static void init_RTSSummaryStats(RTSSummaryStats* sum)
+{
+ const size_t sizeof_gc_summary_stats =
+ RtsFlags.GcFlags.generations * sizeof(GenerationSummaryStats);
+
+ memset(sum, 0, sizeof(RTSSummaryStats));
+ sum->gc_summary_stats =
+ stgMallocBytes(sizeof_gc_summary_stats,
+ "alloc_RTSSummaryStats.gc_summary_stats");
+ memset(sum->gc_summary_stats, 0, sizeof_gc_summary_stats);
}
+static void free_RTSSummaryStats(RTSSummaryStats * sum)
+{
+ if (!sum) { return; }
+ if (!sum->gc_summary_stats) {
+ stgFree(sum->gc_summary_stats);
+ sum->gc_summary_stats = NULL;
+ }
+}
-void
-stat_exit (void)
+static void report_summary(const RTSSummaryStats* sum)
{
- generation *gen;
- Time gc_cpu = 0;
- Time gc_elapsed = 0;
- Time init_cpu = 0;
- Time init_elapsed = 0;
- Time mut_cpu = 0;
- Time mut_elapsed = 0;
- Time exit_cpu = 0;
- Time exit_elapsed = 0;
- Time exit_gc_cpu = 0;
- Time exit_gc_elapsed = 0;
+ // We should do no calculation, other than unit changes and formatting, and
+ // we should not not use any data from outside of globals, sum and stats
+ // here. See Note [RTS Stats Reporting]
+
+ uint32_t g;
+ char temp[512];
+ showStgWord64(stats.allocated_bytes, temp, true/*commas*/);
+ statsPrintf("%16s bytes allocated in the heap\n", temp);
+
+ showStgWord64(stats.copied_bytes, temp, true/*commas*/);
+ statsPrintf("%16s bytes copied during GC\n", temp);
+
+ if ( stats.major_gcs > 0 ) {
+ showStgWord64(stats.max_live_bytes, temp, true/*commas*/);
+ statsPrintf("%16s bytes maximum residency (%" FMT_Word32
+ " sample(s))\n",
+ temp, stats.major_gcs);
+ }
- if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) {
+ showStgWord64(stats.max_slop_bytes, temp, true/*commas*/);
+ statsPrintf("%16s bytes maximum slop\n", temp);
+
+ statsPrintf("%16" FMT_Word64 " MB total memory in use (%"
+ FMT_Word64 " MB lost due to fragmentation)\n\n",
+ stats.max_live_bytes / (1024 * 1024),
+ sum->fragmentation_bytes / (1024 * 1024));
+
+ /* Print garbage collections in each gen */
+ statsPrintf(" Tot time (elapsed) Avg pause Max pause\n");
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ const GenerationSummaryStats * gen_stats =
+ &sum->gc_summary_stats[g];
+ statsPrintf(" Gen %2d %5d colls"
+ ", %5d par %6.3fs %6.3fs %3.4fs %3.4fs\n",
+ g, // REVIEWERS: this used to be gen->no
+ //, this can't ever be different right?
+ gen_stats->collections,
+ gen_stats->par_collections,
+ TimeToSecondsDbl(gen_stats->cpu_ns),
+ TimeToSecondsDbl(gen_stats->elapsed_ns),
+ TimeToSecondsDbl(gen_stats->avg_pause_ns),
+ TimeToSecondsDbl(gen_stats->max_pause_ns));
+ }
- char temp[512];
- Time tot_cpu;
- Time tot_elapsed;
- uint32_t g;
+ statsPrintf("\n");
- getProcessTimes( &tot_cpu, &tot_elapsed );
- tot_cpu -= start_init_cpu;
- tot_elapsed -= start_init_elapsed;
+#if defined(THREADED_RTS)
+ if (RtsFlags.ParFlags.parGcEnabled && sum->work_balance > 0) {
+ // See Note [Work Balance]
+ statsPrintf(" Parallel GC work balance: "
+ "%.2f%% (serial 0%%, perfect 100%%)\n\n",
+ sum->work_balance * 100);
+ }
- uint64_t tot_alloc_bytes = calcTotalAllocated() * sizeof(W_);
+ statsPrintf(" TASKS: %d "
+ "(%d bound, %d peak workers (%d total), using -N%d)\n\n",
+ taskCount, sum->bound_task_count,
+ peakWorkerCount, workerCount,
+ n_capabilities);
+
+ statsPrintf(" SPARKS: %" FMT_Word64
+ "(%" FMT_Word " converted, %" FMT_Word " overflowed, %"
+ FMT_Word " dud, %" FMT_Word " GC'd, %" FMT_Word " fizzled)\n\n",
+ sum->sparks_count,
+ sum->sparks.converted, sum->sparks.overflowed,
+ sum->sparks.dud, sum->sparks.gcd,
+ sum->sparks.fizzled);
+#endif
- // allocated since the last GC
- stats.gc.allocated_bytes = tot_alloc_bytes - stats.allocated_bytes;
- stats.allocated_bytes = tot_alloc_bytes;
+ statsPrintf(" INIT time %7.3fs (%7.3fs elapsed)\n",
+ TimeToSecondsDbl(stats.init_cpu_ns),
+ TimeToSecondsDbl(stats.init_elapsed_ns));
- /* avoid divide by zero if tot_cpu is measured as 0.00 seconds -- SDM */
- if (tot_cpu <= 0) tot_cpu = 1;
- if (tot_elapsed <= 0) tot_elapsed = 1;
+ statsPrintf(" MUT time %7.3fs (%7.3fs elapsed)\n",
+ TimeToSecondsDbl(stats.mutator_cpu_ns),
+ TimeToSecondsDbl(stats.mutator_elapsed_ns));
+ statsPrintf(" GC time %7.3fs (%7.3fs elapsed)\n",
+ TimeToSecondsDbl(stats.gc_cpu_ns),
+ TimeToSecondsDbl(stats.gc_elapsed_ns));
- if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
- statsPrintf("%9" FMT_Word " %9.9s %9.9s",
- (W_)stats.gc.allocated_bytes, "", "");
- statsPrintf(" %6.3f %6.3f\n\n", 0.0, 0.0);
- }
+#if defined(PROFILING)
+ statsPrintf(" RP time %7.3fs (%7.3fs elapsed)\n",
+ TimeToSecondsDbl(sum->rp_cpu_ns),
+ TimeToSecondsDbl(sum->rp_elapsed_ns));
+ statsPrintf(" PROF time %7.3fs (%7.3fs elapsed)\n",
+ TimeToSecondsDbl(sum->hc_cpu_ns),
+ TimeToSecondsDbl(sum->hc_elapsed_ns));
+#endif
+ statsPrintf(" EXIT time %7.3fs (%7.3fs elapsed)\n",
+ TimeToSecondsDbl(sum->exit_cpu_ns),
+ TimeToSecondsDbl(sum->exit_elapsed_ns));
+ statsPrintf(" Total time %7.3fs (%7.3fs elapsed)\n\n",
+ TimeToSecondsDbl(stats.cpu_ns),
+ TimeToSecondsDbl(stats.elapsed_ns));
+#if !defined(THREADED_RTS)
+ statsPrintf(" %%GC time %5.1f%% (%.1f%% elapsed)\n\n",
+ sum->gc_cpu_percent * 100,
+ sum->gc_elapsed_percent * 100);
+#endif
+
+ showStgWord64(sum->alloc_rate, temp, true/*commas*/);
+
+ statsPrintf(" Alloc rate %s bytes per MUT second\n\n", temp);
+
+ statsPrintf(" Productivity %5.1f%% of total user, "
+ "%.1f%% of total elapsed\n\n",
+ sum->productivity_cpu_percent * 100,
+ sum->productivity_elapsed_percent * 100);
- // heapCensus() is called by the GC, so RP and HC time are
- // included in the GC stats. We therefore subtract them to
- // obtain the actual GC cpu time.
- gc_cpu = stats.gc_cpu_ns - PROF_VAL(RP_tot_time + HC_tot_time);
- gc_elapsed = stats.gc_elapsed_ns - PROF_VAL(RPe_tot_time + HCe_tot_time);
+ // See Note [Internal Counter Stats] for a description of the
+ // following counters. If you add a counter here, please remember
+ // to update the Note.
+ if (RtsFlags.MiscFlags.internalCounters) {
+#if defined(THREADED_RTS) && defined(PROF_SPIN)
+ const int32_t col_width[] = {4, -30, 14, 14};
+ statsPrintf("Internal Counters:\n");
+ statsPrintf("%*s" "%*s" "%*s" "%*s" "\n"
+ , col_width[0], ""
+ , col_width[1], "SpinLock"
+ , col_width[2], "Spins"
+ , col_width[3], "Yields");
+ statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n"
+ , col_width[0], ""
+ , col_width[1], "gc_alloc_block_sync"
+ , col_width[2], gc_alloc_block_sync.spin
+ , col_width[3], gc_alloc_block_sync.yield);
+ statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n"
+ , col_width[0], ""
+ , col_width[1], "gc_spin"
+ , col_width[2], stats.gc_spin_spin
+ , col_width[3], stats.gc_spin_yield);
+ statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n"
+ , col_width[0], ""
+ , col_width[1], "mut_spin"
+ , col_width[2], stats.mut_spin_spin
+ , col_width[3], stats.mut_spin_yield);
+ statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*s\n"
+ , col_width[0], ""
+ , col_width[1], "whitehole_gc"
+ , col_width[2], whitehole_gc_spin
+ , col_width[3], "n/a");
+ statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*s\n"
+ , col_width[0], ""
+ , col_width[1], "whitehole_threadPaused"
+ , col_width[2], whitehole_threadPaused_spin
+ , col_width[3], "n/a");
+ statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*s\n"
+ , col_width[0], ""
+ , col_width[1], "whitehole_executeMessage"
+ , col_width[2], whitehole_executeMessage_spin
+ , col_width[3], "n/a");
+ statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n"
+ , col_width[0], ""
+ , col_width[1], "whitehole_lockClosure"
+ , col_width[2], whitehole_lockClosure_spin
+ , col_width[3], whitehole_lockClosure_yield);
+ // waitForGcThreads isn't really spin-locking(see the function)
+ // but these numbers still seem useful.
+ statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n"
+ , col_width[0], ""
+ , col_width[1], "waitForGcThreads"
+ , col_width[2], waitForGcThreads_spin
+ , col_width[3], waitForGcThreads_yield);
+
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ int prefix_length = 0;
+ statsPrintf("%*s" "gen[%" FMT_Word32 "%n",
+ col_width[0], "", g, &prefix_length);
+ prefix_length -= col_width[0];
+ int suffix_length = col_width[1] + prefix_length;
+ suffix_length =
+ suffix_length > 0 ? col_width[1] : suffix_length;
+
+ statsPrintf("%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n"
+ , suffix_length, "].sync"
+ , col_width[2], generations[g].sync.spin
+ , col_width[3], generations[g].sync.yield);
+ }
+ statsPrintf("\n");
+ statsPrintf("%*s" "%*s" "%*" FMT_Word64 "\n"
+ , col_width[0], ""
+ , col_width[1], "any_work"
+ , col_width[2], stats.any_work);
+ statsPrintf("%*s" "%*s" "%*" FMT_Word64 "\n"
+ , col_width[0], ""
+ , col_width[1], "no_work"
+ , col_width[2], stats.no_work);
+ statsPrintf("%*s" "%*s" "%*" FMT_Word64 "\n"
+ , col_width[0], ""
+ , col_width[1], "scav_find_work"
+ , col_width[2], stats.scav_find_work);
+#elif defined(THREADED_RTS) // THREADED_RTS && PROF_SPIN
+ statsPrintf("Internal Counters require the RTS to be built "
+ "with PROF_SPIN"); // PROF_SPIN is not #defined here
+#else // THREADED_RTS
+ statsPrintf("Internal Counters require the threaded RTS");
+#endif
+ }
+}
- init_cpu = get_init_cpu();
- init_elapsed = get_init_elapsed();
+static void report_machine_readable (const RTSSummaryStats * sum)
+{
+ // We should do no calculation, other than unit changes and formatting, and
+ // we should not not use any data from outside of globals, sum and stats
+ // here. See Note [RTS Stats Reporting]
+ uint32_t g;
+
+#define MR_STAT(field_name,format,value) \
+ statsPrintf(" ,(\"" field_name "\", \"%" format "\")\n", value)
+#define MR_STAT_GEN(gen,field_name,format,value) \
+ statsPrintf(" ,(\"gen_%" FMT_Word32 "_" field_name "\", \"%" \
+ format "\")\n", g, value)
+
+ // These first values are for backwards compatibility.
+ // Some of these first fields are duplicated with more machine-readable
+ // names, or to match the name in RtsStats.
+
+ // we don't use for the first field helper macro here because the prefix is
+ // different
+ statsPrintf(" [(\"%s\", \"%" FMT_Word64 "\")\n", "bytes allocated",
+ stats.allocated_bytes);
+ MR_STAT("num_GCs", FMT_Word32, stats.gcs);
+ MR_STAT("average_bytes_used", FMT_Word64, sum->average_bytes_used);
+ MR_STAT("max_bytes_used", FMT_Word64, stats.max_live_bytes);
+ MR_STAT("num_byte_usage_samples", FMT_Word32, stats.major_gcs);
+ MR_STAT("peak_megabytes_allocated", FMT_Word64,
+ stats.max_mem_in_use_bytes / (1024 * 1024));
+
+ MR_STAT("init_cpu_seconds", "f", TimeToSecondsDbl(stats.init_cpu_ns));
+ MR_STAT("init_wall_seconds", "f", TimeToSecondsDbl(stats.init_elapsed_ns));
+ MR_STAT("mut_cpu_seconds", "f", TimeToSecondsDbl(stats.mutator_cpu_ns));
+ MR_STAT("mut_wall_seconds", "f",
+ TimeToSecondsDbl(stats.mutator_elapsed_ns));
+ MR_STAT("GC_cpu_seconds", "f", TimeToSecondsDbl(stats.gc_cpu_ns));
+ MR_STAT("GC_wall_seconds", "f", TimeToSecondsDbl(stats.gc_elapsed_ns));
+
+ // end backward compatibility
+
+ // First, the rest of the times
+
+ MR_STAT("exit_cpu_seconds", "f", TimeToSecondsDbl(sum->exit_cpu_ns));
+ MR_STAT("exit_wall_seconds", "f", TimeToSecondsDbl(sum->exit_elapsed_ns));
+#if defined(PROFILING)
+ MR_STAT("rp_cpu_seconds", "f", TimeToSecondsDbl(sum->rp_cpu_ns));
+ MR_STAT("rp_wall_seconds", "f", TimeToSecondsDbl(sum->rp_elapsed_ns));
+ MR_STAT("hc_cpu_seconds", "f", TimeToSecondsDbl(sum->hc_cpu_ns));
+ MR_STAT("hc_wall_seconds", "f", TimeToSecondsDbl(sum->hc_elapsed_ns));
+#endif
+ MR_STAT("total_cpu_seconds", "f", TimeToSecondsDbl(stats.cpu_ns));
+ MR_STAT("total_wall_seconds", "f",
+ TimeToSecondsDbl(stats.elapsed_ns));
+
+ // next, the remainder of the fields of RTSStats, except internal counters
+
+ // The first two are duplicates of those above, but have more machine
+ // readable names that match the field names in RTSStats.
+
+
+ // gcs has been done as num_GCs above
+ MR_STAT("major_gcs", FMT_Word32, stats.major_gcs);
+ MR_STAT("allocated_bytes", FMT_Word64, stats.allocated_bytes);
+ MR_STAT("max_live_bytes", FMT_Word64, stats.max_live_bytes);
+ MR_STAT("max_large_objects_bytes", FMT_Word64,
+ stats.max_large_objects_bytes);
+ MR_STAT("max_compact_bytes", FMT_Word64, stats.max_compact_bytes);
+ MR_STAT("max_slop_bytes", FMT_Word64, stats.max_slop_bytes);
+ // This duplicates, except for unit, peak_megabytes_allocated above
+ MR_STAT("max_mem_in_use_bytes", FMT_Word64, stats.max_mem_in_use_bytes);
+ MR_STAT("cumulative_live_bytes", FMT_Word64, stats.cumulative_live_bytes);
+ MR_STAT("copied_bytes", FMT_Word64, stats.copied_bytes);
+ MR_STAT("par_copied_bytes", FMT_Word64, stats.par_copied_bytes);
+ MR_STAT("cumulative_par_max_copied_bytes", FMT_Word64,
+ stats.cumulative_par_max_copied_bytes);
+ MR_STAT("cumulative_par_balanced_copied_bytes", FMT_Word64,
+ stats.cumulative_par_balanced_copied_bytes);
+
+ // next, the computed fields in RTSSummaryStats
+#if !defined(THREADED_RTS) // THREADED_RTS
+ MR_STAT("gc_cpu_percent", "f", sum->gc_cpu_percent);
+ MR_STAT("gc_wall_percent", "f", sum->gc_cpu_percent);
+#endif
+ MR_STAT("fragmentation_bytes", FMT_Word64, sum->fragmentation_bytes);
+ // average_bytes_used is done above
+ MR_STAT("alloc_rate", FMT_Word64, sum->alloc_rate);
+ MR_STAT("productivity_cpu_percent", "f", sum->productivity_cpu_percent);
+ MR_STAT("productivity_wall_percent", "f",
+ sum->productivity_elapsed_percent);
- // We do a GC during the EXIT phase. We'll attribute the cost of that
- // to GC instead of EXIT, so carefully subtract it from the EXIT time.
- exit_gc_cpu = stats.gc_cpu_ns - start_exit_gc_cpu;
- exit_gc_elapsed = stats.gc_elapsed_ns - start_exit_gc_elapsed;
- exit_cpu = end_exit_cpu - start_exit_cpu - exit_gc_cpu;
- exit_elapsed = end_exit_elapsed - start_exit_elapsed - exit_gc_elapsed;
+ // next, the THREADED_RTS fields in RTSSummaryStats
- mut_elapsed = start_exit_elapsed - end_init_elapsed -
- (gc_elapsed - exit_gc_elapsed);
+#if defined(THREADED_RTS)
+ MR_STAT("bound_task_count", FMT_Word32, sum->bound_task_count);
+ MR_STAT("sparks_count", FMT_Word64, sum->sparks_count);
+ MR_STAT("sparks_converted", FMT_Word, sum->sparks.converted);
+ MR_STAT("sparks_overflowed", FMT_Word, sum->sparks.overflowed);
+ MR_STAT("sparks_dud ", FMT_Word, sum->sparks.dud);
+ MR_STAT("sparks_gcd", FMT_Word, sum->sparks.gcd);
+ MR_STAT("sparks_fizzled", FMT_Word, sum->sparks.fizzled);
+ MR_STAT("work_balance", "f", sum->work_balance);
+
+ // next, globals (other than internal counters)
+ MR_STAT("n_capabilities", FMT_Word32, n_capabilities);
+ MR_STAT("task_count", FMT_Word32, taskCount);
+ MR_STAT("peak_worker_count", FMT_Word32, peakWorkerCount);
+ MR_STAT("worker_count", FMT_Word32, workerCount);
+
+ // next, internal counters
+#if defined(PROF_SPIN)
+ MR_STAT("gc_alloc_block_sync_spin", FMT_Word64, gc_alloc_block_sync.spin);
+ MR_STAT("gc_alloc_block_sync_yield", FMT_Word64,
+ gc_alloc_block_sync.yield);
+ MR_STAT("gc_alloc_block_sync_spin", FMT_Word64, gc_alloc_block_sync.spin);
+ MR_STAT("gc_spin_spin", FMT_Word64, stats.gc_spin_spin);
+ MR_STAT("gc_spin_yield", FMT_Word64, stats.gc_spin_yield);
+ MR_STAT("mut_spin_spin", FMT_Word64, stats.mut_spin_spin);
+ MR_STAT("mut_spin_yield", FMT_Word64, stats.mut_spin_yield);
+ MR_STAT("waitForGcThreads_spin", FMT_Word64, waitForGcThreads_spin);
+ MR_STAT("waitForGcThreads_yield", FMT_Word64,
+ waitForGcThreads_yield);
+ MR_STAT("whitehole_gc_spin", FMT_Word64, whitehole_gc_spin);
+ MR_STAT("whitehole_lockClosure_spin", FMT_Word64,
+ whitehole_lockClosure_spin);
+ MR_STAT("whitehole_lockClosure_yield", FMT_Word64,
+ whitehole_lockClosure_yield);
+ MR_STAT("whitehole_executeMessage_spin", FMT_Word64,
+ whitehole_executeMessage_spin);
+ MR_STAT("whitehole_threadPaused_spin", FMT_Word64,
+ whitehole_threadPaused_spin);
+ MR_STAT("any_work", FMT_Word64,
+ stats.any_work);
+ MR_STAT("no_work", FMT_Word64,
+ stats.no_work);
+ MR_STAT("scav_find_work", FMT_Word64,
+ stats.scav_find_work);
+#endif // PROF_SPIN
+#endif // THREADED_RTS
+
+ // finally, per-generation stats. Named as, for example for generation 0,
+ // gen_0_collections
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ const GenerationSummaryStats* gc_sum = &sum->gc_summary_stats[g];
+ MR_STAT_GEN(g, "collections", FMT_Word32, gc_sum->collections);
+ MR_STAT_GEN(g, "par_collections", FMT_Word32, gc_sum->par_collections);
+ MR_STAT_GEN(g, "cpu_seconds", "f", TimeToSecondsDbl(gc_sum->cpu_ns));
+ MR_STAT_GEN(g, "wall_seconds", "f",
+ TimeToSecondsDbl(gc_sum->elapsed_ns));
+ MR_STAT_GEN(g, "max_pause_seconds", "f",
+ TimeToSecondsDbl(gc_sum->max_pause_ns));
+ MR_STAT_GEN(g, "avg_pause_seconds", "f",
+ TimeToSecondsDbl(gc_sum->avg_pause_ns));
+#if defined(THREADED_RTS) && defined(PROF_SPIN)
+ MR_STAT_GEN(g, "sync_spin", FMT_Word64, gc_sum->sync_spin);
+ MR_STAT_GEN(g, "sync_yield", FMT_Word64, gc_sum->sync_yield);
+#endif
+ }
- mut_cpu = start_exit_cpu - end_init_cpu - (gc_cpu - exit_gc_cpu)
- - PROF_VAL(RP_tot_time + HC_tot_time);
- if (mut_cpu < 0) { mut_cpu = 0; }
+ statsPrintf(" ]\n");
+}
- // The subdivision of runtime into INIT/EXIT/GC/MUT is just adding and
- // subtracting, so the parts should add up to the total exactly. Note
- // that tot_elapsed is captured a tiny bit later than end_exit_elapsed,
- // so we don't use it here.
- ASSERT(init_elapsed + mut_elapsed + gc_elapsed + exit_elapsed
- == end_exit_elapsed - start_init_elapsed);
+static void report_one_line(const RTSSummaryStats * sum)
+{
+ // We should do no calculation, other than unit changes and formatting, and
+ // we should not not use any data from outside of globals, sum and stats
+ // here. See Note [RTS Stats Reporting]
+ /* print the long long separately to avoid bugginess on mingwin (2001-07-02,
+ mingw-0.5) */
+ statsPrintf("<<ghc: %" FMT_Word64 " bytes, "
+ "%" FMT_Word32 " GCs, "
+ "%" FMT_Word64 "/%" FMT_Word64 " avg/max bytes residency "
+ "(%" FMT_Word32 " samples), "
+ "%" FMT_Word64 "M in use, "
+ "%.3f INIT (%.3f elapsed), "
+ "%.3f MUT (%.3f elapsed), "
+ "%.3f GC (%.3f elapsed) :ghc>>\n",
+ stats.allocated_bytes,
+ stats.gcs,
+ sum->average_bytes_used,
+ stats.max_live_bytes,
+ stats.major_gcs,
+ stats.max_mem_in_use_bytes / (1024 * 1024),
+ TimeToSecondsDbl(stats.init_cpu_ns),
+ TimeToSecondsDbl(stats.init_elapsed_ns),
+ TimeToSecondsDbl(stats.mutator_cpu_ns),
+ TimeToSecondsDbl(stats.mutator_elapsed_ns),
+ TimeToSecondsDbl(stats.gc_cpu_ns),
+ TimeToSecondsDbl(stats.gc_elapsed_ns));
+}
- if (RtsFlags.GcFlags.giveStats >= SUMMARY_GC_STATS) {
- showStgWord64(stats.allocated_bytes, temp, true/*commas*/);
- statsPrintf("%16s bytes allocated in the heap\n", temp);
+void
+stat_exit (void)
+{
+ RTSSummaryStats sum;
+ uint32_t g;
- showStgWord64(stats.copied_bytes, temp, true/*commas*/);
- statsPrintf("%16s bytes copied during GC\n", temp);
+ init_RTSSummaryStats(&sum);
+ if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) {
+ // First we tidy the times in stats, and populate the times in sum.
+ // In particular, we adjust the gc_* time in stats to remove
+ // profiling times.
+ {
+ Time now_cpu_ns, now_elapsed_ns;
+ Time exit_gc_cpu = 0;
+ Time exit_gc_elapsed = 0;
+ Time prof_cpu = 0;
+ Time prof_elapsed = 0;
+
+ getProcessTimes( &now_cpu_ns, &now_elapsed_ns);
+
+ stats.cpu_ns = now_cpu_ns - start_init_cpu;
+ stats.elapsed_ns = now_elapsed_ns - start_init_elapsed;
+ /* avoid divide by zero if stats.total_cpu_ns is measured as 0.00
+ seconds -- SDM */
+ if (stats.cpu_ns <= 0) { stats.cpu_ns = 1; }
+ if (stats.elapsed_ns <= 0) { stats.elapsed_ns = 1; }
+
+ prof_cpu = PROF_VAL(RP_tot_time + HC_tot_time);
+ prof_elapsed = PROF_VAL(RPe_tot_time + HCe_tot_time);
+
+ // heapCensus() is called by the GC, so RP and HC time are
+ // included in the GC stats. We therefore subtract them to
+ // obtain the actual GC cpu time.
+ stats.gc_cpu_ns -= prof_cpu;
+ stats.gc_elapsed_ns -= prof_elapsed;
- if ( stats.major_gcs > 0 ) {
- showStgWord64(stats.max_live_bytes, temp, true/*commas*/);
- statsPrintf("%16s bytes maximum residency (%" FMT_Word32
- " sample(s))\n",
- temp, stats.major_gcs);
- }
+#if defined(PROFILING)
+ sum.rp_cpu_ns = RP_tot_time;
+ sum.rp_elapsed_ns = RPe_tot_time;
+ sum.hc_cpu_ns = HC_tot_time;
+ sum.hc_elapsed_ns = HCe_tot_time;
+#endif // PROFILING
+
+ // We do a GC during the EXIT phase. We'll attribute the cost of
+ // that to GC instead of EXIT, so carefully subtract it from the
+ // EXIT time.
+ exit_gc_cpu = stats.gc_cpu_ns - start_exit_gc_cpu;
+ exit_gc_elapsed = stats.gc_elapsed_ns - start_exit_gc_elapsed;
+
+ sum.exit_cpu_ns = end_exit_cpu
+ - start_exit_cpu
+ - exit_gc_cpu;
+ sum.exit_elapsed_ns = end_exit_elapsed
+ - start_exit_elapsed
+ - exit_gc_elapsed;
+
+ stats.mutator_cpu_ns = start_exit_cpu
+ - end_init_cpu
+ - (stats.gc_cpu_ns - exit_gc_cpu)
+ - prof_cpu;
+ stats.mutator_elapsed_ns = start_exit_elapsed
+ - end_init_elapsed
+ - (stats.gc_elapsed_ns - exit_gc_elapsed)
+ - prof_elapsed;
+
+ if (stats.mutator_cpu_ns < 0) { stats.mutator_cpu_ns = 0; }
+
+ // The subdivision of runtime into INIT/EXIT/GC/MUT is just adding
+ // and subtracting, so the parts should add up to the total exactly.
+ // Note that stats->total_ns is captured a tiny bit later than
+ // end_exit_elapsed, so we don't use it here.
+ ASSERT(stats.init_elapsed_ns \
+ + stats.mutator_elapsed_ns \
+ + stats.gc_elapsed_ns \
+ + sum.exit_elapsed_ns \
+ == end_exit_elapsed - start_init_elapsed);
- showStgWord64(stats.max_slop_bytes, temp, true/*commas*/);
- statsPrintf("%16s bytes maximum slop\n", temp);
-
- statsPrintf("%16" FMT_SizeT " MB total memory in use (%"
- FMT_SizeT " MB lost due to fragmentation)\n\n",
- (size_t)(peak_mblocks_allocated * MBLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)),
- (size_t)(peak_mblocks_allocated * BLOCKS_PER_MBLOCK * BLOCK_SIZE_W - hw_alloc_blocks * BLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)));
-
- /* Print garbage collections in each gen */
- statsPrintf(" Tot time (elapsed) Avg pause Max pause\n");
- for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
- gen = &generations[g];
- statsPrintf(" Gen %2d %5d colls, %5d par %6.3fs %6.3fs %3.4fs %3.4fs\n",
- gen->no,
- gen->collections,
- gen->par_collections,
- TimeToSecondsDbl(GC_coll_cpu[g]),
- TimeToSecondsDbl(GC_coll_elapsed[g]),
- gen->collections == 0 ? 0 : TimeToSecondsDbl(GC_coll_elapsed[g] / gen->collections),
- TimeToSecondsDbl(GC_coll_max_pause[g]));
- }
+ }
-#if defined(THREADED_RTS)
- if (RtsFlags.ParFlags.parGcEnabled && stats.par_copied_bytes > 0) {
- // See Note [Work Balance]
- statsPrintf("\n Parallel GC work balance: %.2f%% (serial 0%%, perfect 100%%)\n",
- 100 * (double)stats.cumulative_par_balanced_copied_bytes /
- (double)stats.par_copied_bytes);
+ // REVIEWERS: it's not clear to me why the following isn't done in
+ // stat_endGC of the last garbage collection?
+
+ // We account for the last garbage collection
+ {
+ uint64_t tot_alloc_bytes = calcTotalAllocated() * sizeof(W_);
+ stats.gc.allocated_bytes = tot_alloc_bytes - stats.allocated_bytes;
+ stats.allocated_bytes = tot_alloc_bytes;
+ if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
+ statsPrintf("%9" FMT_Word " %9.9s %9.9s",
+ (W_)stats.gc.allocated_bytes, "", "");
+ statsPrintf(" %6.3f %6.3f\n\n", 0.0, 0.0);
}
-#endif
- statsPrintf("\n");
+ }
-#if defined(THREADED_RTS)
- statsPrintf(" TASKS: %d (%d bound, %d peak workers (%d total), using -N%d)\n",
- taskCount, taskCount - workerCount,
- peakWorkerCount, workerCount,
- n_capabilities);
-
- statsPrintf("\n");
-
- {
- uint32_t i;
- SparkCounters sparks = { 0, 0, 0, 0, 0, 0};
- for (i = 0; i < n_capabilities; i++) {
- sparks.created += capabilities[i]->spark_stats.created;
- sparks.dud += capabilities[i]->spark_stats.dud;
- sparks.overflowed+= capabilities[i]->spark_stats.overflowed;
- sparks.converted += capabilities[i]->spark_stats.converted;
- sparks.gcd += capabilities[i]->spark_stats.gcd;
- sparks.fizzled += capabilities[i]->spark_stats.fizzled;
- }
-
- statsPrintf(" SPARKS: %" FMT_Word " (%" FMT_Word " converted, %" FMT_Word " overflowed, %" FMT_Word " dud, %" FMT_Word " GC'd, %" FMT_Word " fizzled)\n\n",
- sparks.created + sparks.dud + sparks.overflowed,
- sparks.converted, sparks.overflowed, sparks.dud,
- sparks.gcd, sparks.fizzled);
+ // We populate the remainder (non-time elements) of sum
+ {
+ #if defined(THREADED_RTS)
+ uint32_t i;
+ sum.bound_task_count = taskCount - workerCount;
+
+ for (i = 0; i < n_capabilities; i++) {
+ sum.sparks.created += capabilities[i]->spark_stats.created;
+ sum.sparks.dud += capabilities[i]->spark_stats.dud;
+ sum.sparks.overflowed+=
+ capabilities[i]->spark_stats.overflowed;
+ sum.sparks.converted +=
+ capabilities[i]->spark_stats.converted;
+ sum.sparks.gcd += capabilities[i]->spark_stats.gcd;
+ sum.sparks.fizzled += capabilities[i]->spark_stats.fizzled;
}
-#endif
-
- statsPrintf(" INIT time %7.3fs (%7.3fs elapsed)\n",
- TimeToSecondsDbl(init_cpu), TimeToSecondsDbl(init_elapsed));
- statsPrintf(" MUT time %7.3fs (%7.3fs elapsed)\n",
- TimeToSecondsDbl(mut_cpu), TimeToSecondsDbl(mut_elapsed));
- statsPrintf(" GC time %7.3fs (%7.3fs elapsed)\n",
- TimeToSecondsDbl(gc_cpu), TimeToSecondsDbl(gc_elapsed));
+ sum.sparks_count = sum.sparks.created
+ + sum.sparks.dud
+ + sum.sparks.overflowed;
-#if defined(PROFILING)
- statsPrintf(" RP time %7.3fs (%7.3fs elapsed)\n",
- TimeToSecondsDbl(RP_tot_time), TimeToSecondsDbl(RPe_tot_time));
- statsPrintf(" PROF time %7.3fs (%7.3fs elapsed)\n",
- TimeToSecondsDbl(HC_tot_time), TimeToSecondsDbl(HCe_tot_time));
-#endif
- statsPrintf(" EXIT time %7.3fs (%7.3fs elapsed)\n",
- TimeToSecondsDbl(exit_cpu), TimeToSecondsDbl(exit_elapsed));
- statsPrintf(" Total time %7.3fs (%7.3fs elapsed)\n\n",
- TimeToSecondsDbl(tot_cpu), TimeToSecondsDbl(tot_elapsed));
-#if !defined(THREADED_RTS)
- statsPrintf(" %%GC time %5.1f%% (%.1f%% elapsed)\n\n",
- TimeToSecondsDbl(gc_cpu)*100/TimeToSecondsDbl(tot_cpu),
- TimeToSecondsDbl(gc_elapsed)*100/TimeToSecondsDbl(tot_elapsed));
-#endif
-
- if (mut_cpu == 0) {
- showStgWord64(0, temp, true/*commas*/);
+ if (RtsFlags.ParFlags.parGcEnabled && stats.par_copied_bytes > 0) {
+ // See Note [Work Balance]
+ sum.work_balance =
+ (double)stats.cumulative_par_balanced_copied_bytes
+ / (double)stats.par_copied_bytes;
} else {
- showStgWord64(
- (StgWord64)((double)stats.allocated_bytes /
- TimeToSecondsDbl(mut_cpu)),
- temp, true/*commas*/);
+ sum.work_balance = 0;
}
- statsPrintf(" Alloc rate %s bytes per MUT second\n\n", temp);
-
- statsPrintf(" Productivity %5.1f%% of total user, %.1f%% of total elapsed\n\n",
- TimeToSecondsDbl(tot_cpu - gc_cpu -
- PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100
- / TimeToSecondsDbl(tot_cpu),
- TimeToSecondsDbl(tot_elapsed - gc_elapsed -
- PROF_VAL(RPe_tot_time + HCe_tot_time) - init_elapsed) * 100
- / TimeToSecondsDbl(tot_elapsed));
-#if defined(THREADED_RTS) && defined(PROF_SPIN)
- {
- uint32_t g;
-
- statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin);
- statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin);
- for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
- statsPrintf("gen[%d].sync: %"FMT_Word64"\n", g, generations[g].sync.spin);
- }
+ #else // THREADED_RTS
+ sum.gc_cpu_percent = stats.gc_cpu_ns
+ / stats.cpu_ns;
+ sum.gc_elapsed_percent = stats.gc_elapsed_ns
+ / stats.elapsed_ns;
+ #endif // THREADED_RTS
+
+ sum.fragmentation_bytes =
+ (uint64_t)(peak_mblocks_allocated
+ * BLOCKS_PER_MBLOCK
+ * BLOCK_SIZE_W
+ - hw_alloc_blocks * BLOCK_SIZE_W)
+ / (uint64_t)sizeof(W_);
+
+ sum.average_bytes_used = stats.major_gcs == 0 ? 0 :
+ stats.cumulative_live_bytes/stats.major_gcs,
+
+ sum.alloc_rate = stats.mutator_cpu_ns == 0 ? 0 :
+ (uint64_t)((double)stats.allocated_bytes
+ / TimeToSecondsDbl(stats.mutator_cpu_ns));
+
+ // REVIEWERS: These two values didn't used to include the exit times
+ sum.productivity_cpu_percent =
+ TimeToSecondsDbl(stats.cpu_ns
+ - stats.gc_cpu_ns
+ - sum.rp_cpu_ns
+ - sum.hc_cpu_ns
+ - stats.init_cpu_ns
+ - sum.exit_cpu_ns)
+ / TimeToSecondsDbl(stats.cpu_ns);
+
+ sum.productivity_elapsed_percent =
+ TimeToSecondsDbl(stats.elapsed_ns
+ - stats.gc_elapsed_ns
+ - sum.rp_elapsed_ns
+ - sum.hc_elapsed_ns
+ - stats.init_elapsed_ns
+ - sum.exit_elapsed_ns)
+ / TimeToSecondsDbl(stats.elapsed_ns);
+
+ for(g = 0; g < RtsFlags.GcFlags.generations; ++g) {
+ const generation* gen = &generations[g];
+ GenerationSummaryStats* gen_stats = &sum.gc_summary_stats[g];
+ gen_stats->collections = gen->collections;
+ gen_stats->par_collections = gen->par_collections;
+ gen_stats->cpu_ns = GC_coll_cpu[g];
+ gen_stats->elapsed_ns = GC_coll_elapsed[g];
+ gen_stats->max_pause_ns = GC_coll_max_pause[g];
+ gen_stats->avg_pause_ns = gen->collections == 0 ?
+ 0 : (GC_coll_elapsed[g] / gen->collections);
+ #if defined(THREADED_RTS) && defined(PROF_SPIN)
+ gen_stats->sync_spin = gen->sync.spin;
+ gen_stats->sync_yield = gen->sync.yield;
+ #endif // PROF_SPIN
}
-#endif
+ }
+
+ // Now we generate the report
+ if (RtsFlags.GcFlags.giveStats >= SUMMARY_GC_STATS) {
+ report_summary(&sum);
}
if (RtsFlags.GcFlags.giveStats == ONELINE_GC_STATS) {
- char *fmt;
- if (RtsFlags.MiscFlags.machineReadable) {
- fmt =
- " [(\"bytes allocated\", \"%" FMT_Word64 "\")\n"
- " ,(\"num_GCs\", \"%" FMT_Word32 "\")\n"
- " ,(\"average_bytes_used\", \"%" FMT_Word64 "\")\n"
- " ,(\"max_bytes_used\", \"%" FMT_Word64 "\")\n"
- " ,(\"num_byte_usage_samples\", \"%" FMT_Word32 "\")\n"
- " ,(\"peak_megabytes_allocated\", \"%" FMT_Word64 "\")\n"
- " ,(\"init_cpu_seconds\", \"%.3f\")\n"
- " ,(\"init_wall_seconds\", \"%.3f\")\n"
- " ,(\"mutator_cpu_seconds\", \"%.3f\")\n"
- " ,(\"mutator_wall_seconds\", \"%.3f\")\n"
- " ,(\"GC_cpu_seconds\", \"%.3f\")\n"
- " ,(\"GC_wall_seconds\", \"%.3f\")\n"
- " ]\n";
- }
- else {
- fmt =
- "<<ghc: %" FMT_Word64 " bytes, "
- "%" FMT_Word32 " GCs, "
- "%" FMT_Word64 "/%" FMT_Word64 " avg/max bytes residency (%" FMT_Word32 " samples), "
- "%" FMT_Word64 "M in use, "
- "%.3f INIT (%.3f elapsed), "
- "%.3f MUT (%.3f elapsed), "
- "%.3f GC (%.3f elapsed) :ghc>>\n";
- }
- /* print the long long separately to avoid bugginess on mingwin (2001-07-02, mingw-0.5) */
- statsPrintf(fmt,
- stats.allocated_bytes,
- stats.gcs,
- (uint64_t)
- (stats.major_gcs == 0 ? 0 :
- stats.cumulative_live_bytes/stats.major_gcs),
- stats.max_live_bytes,
- stats.major_gcs,
- (uint64_t) (peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L)),
- TimeToSecondsDbl(init_cpu), TimeToSecondsDbl(init_elapsed),
- TimeToSecondsDbl(mut_cpu), TimeToSecondsDbl(mut_elapsed),
- TimeToSecondsDbl(gc_cpu), TimeToSecondsDbl(gc_elapsed));
+ if (RtsFlags.MiscFlags.machineReadable) {
+ report_machine_readable(&sum);
+ }
+ else {
+ report_one_line(&sum);
+ }
}
+ free_RTSSummaryStats(&sum);
statsFlush();
statsClose();
}
@@ -910,6 +1338,68 @@ the number of gc threads is limited to the number of cores.
See #13830
*/
+/*
+Note [Internal Counter Stats]
+-----------------------------
+What do the counts at the end of a '+RTS -s --internal-counters' report mean?
+They are detailed below. Most of these counters are used by multiple threads
+with no attempt at synchronisation. This means that reported values may be
+lower than the true value and this becomes more likely and more severe as
+contention increases.
+
+The first counters are for various SpinLock-like constructs in the RTS. See
+Spinlock.h for the definition of a SpinLock. We maintain up two counters per
+SpinLock:
+* spin: The number of busy-spins over the length of the program.
+* yield: The number of times the SpinLock spun SPIN_COUNT times without success
+ and called yieldThread().
+Not all of these are actual SpinLocks, see the details below.
+
+Actual SpinLocks:
+* gc_alloc_block:
+ This SpinLock protects the block allocator and free list manager. See
+ BlockAlloc.c.
+* gc_spin and mut_spin:
+ These SpinLocks are used to herd gc worker threads during parallel garbage
+ collection. See gcWorkerThread, wakeup_gc_threads and releaseGCThreads.
+* gen[g].sync:
+ These SpinLocks, one per generation, protect the generations[g] data
+ structure during garbage collection.
+
+waitForGcThreads:
+ These counters are incremented while we wait for all threads to be ready
+ for a parallel garbage collection. We yield more than we spin in this case.
+
+In several places in the runtime we must take a lock on a closure. To do this,
+we replace its info table with stg_WHITEHOLE_info, spinning if it is already
+a white-hole. Sometimes we yieldThread() if we spin too long, sometimes we
+don't. We count these white-hole spins and include them in the SpinLocks table.
+If a particular loop does not yield, we put "n/a" in the table. They are named
+for the function that has the spinning loop except that several loops in the
+garbage collector accumulate into whitehole_gc.
+TODO: Should these counters be more or less granular?
+
+white-hole spin counters:
+* whitehole_gc
+* whitehole_lockClosure
+* whitehole_executeMessage
+* whitehole_threadPaused
+
+
+We count the number of calls of several functions in the parallel garbage
+collector.
+
+Parallel garbage collector counters:
+* any_work:
+ A cheap function called whenever a gc_thread is ready for work. Does
+ not do any work.
+* no_work:
+ Incremented whenever any_work finds no work.
+* scav_find_work:
+ Called to do work when any_work return true.
+
+*/
+
/* -----------------------------------------------------------------------------
stat_describe_gens
@@ -967,14 +1457,15 @@ statDescribeGens(void)
gen_slop = gen_blocks * BLOCK_SIZE_W - gen_live;
- debugBelch("%8" FMT_Word " %8d %8d %9" FMT_Word " %9" FMT_Word "\n", gen_blocks, lge, compacts,
- gen_live*(W_)sizeof(W_), gen_slop*(W_)sizeof(W_));
+ debugBelch("%8" FMT_Word " %8d %8d %9" FMT_Word " %9" FMT_Word "\n",
+ gen_blocks, lge, compacts, gen_live*(W_)sizeof(W_),
+ gen_slop*(W_)sizeof(W_));
tot_live += gen_live;
tot_slop += gen_slop;
}
debugBelch("----------------------------------------------------------------------\n");
debugBelch("%51s%9" FMT_Word " %9" FMT_Word "\n",
- "",tot_live*sizeof(W_),tot_slop*sizeof(W_));
+ "",tot_live*(W_)sizeof(W_),tot_slop*(W_)sizeof(W_));
debugBelch("----------------------------------------------------------------------\n");
debugBelch("\n");
}
@@ -1005,8 +1496,7 @@ void getRTSStats( RTSStats *s )
s->cpu_ns = current_cpu - end_init_cpu;
s->elapsed_ns = current_elapsed - end_init_elapsed;
- s->mutator_cpu_ns = current_cpu - end_init_cpu - stats.gc_cpu_ns -
- PROF_VAL(RP_tot_time + HC_tot_time);
+ s->mutator_cpu_ns = current_cpu - end_init_cpu - stats.gc_cpu_ns;
s->mutator_elapsed_ns = current_elapsed - end_init_elapsed -
stats.gc_elapsed_ns;
}
diff --git a/rts/Stats.h b/rts/Stats.h
index 5d9cf04fa7..fbcca110e9 100644
--- a/rts/Stats.h
+++ b/rts/Stats.h
@@ -9,6 +9,8 @@
#pragma once
#include "GetTime.h"
+#include "sm/GC.h"
+#include "Sparks.h"
#include "BeginPrivate.h"
@@ -30,7 +32,10 @@ void stat_startGCSync(struct gc_thread_ *_gct);
void stat_startGC(Capability *cap, struct gc_thread_ *_gct);
void stat_endGC (Capability *cap, struct gc_thread_ *_gct, W_ live,
W_ copied, W_ slop, uint32_t gen, uint32_t n_gc_threads,
- W_ par_max_copied, W_ par_balanced_copied);
+ W_ par_max_copied, W_ par_balanced_copied,
+ W_ gc_spin_spin, W_ gc_spin_yield, W_ mut_spin_spin,
+ W_ mut_spin_yield, W_ any_work, W_ no_work,
+ W_ scav_find_work);
#if defined(PROFILING)
void stat_startRP(void);
@@ -63,4 +68,49 @@ void statDescribeGens( void );
Time stat_getElapsedGCTime(void);
Time stat_getElapsedTime(void);
+typedef struct GenerationSummaryStats_ {
+ uint32_t collections;
+ uint32_t par_collections;
+ Time cpu_ns;
+ Time elapsed_ns;
+ Time max_pause_ns;
+ Time avg_pause_ns;
+#if defined(THREADED_RTS) && defined(PROF_SPIN)
+ uint64_t sync_spin;
+ uint64_t sync_yield;
+#endif
+} GenerationSummaryStats;
+
+typedef struct RTSSummaryStats_ {
+ // These profiling times could potentially be in RTSStats. However, I'm not
+ // confident enough to do this now, since there is some logic depending on
+ // global state that I do not understand. (Or if I do understand it, it's
+ // wrong)
+ Time rp_cpu_ns;
+ Time rp_elapsed_ns;
+ Time hc_cpu_ns;
+ Time hc_elapsed_ns;
+
+ Time exit_cpu_ns;
+ Time exit_elapsed_ns;
+
+#if defined(THREADED_RTS)
+ uint32_t bound_task_count;
+ uint64_t sparks_count;
+ SparkCounters sparks;
+ double work_balance;
+#else // THREADED_RTS
+ double gc_cpu_percent;
+ double gc_elapsed_percent;
+#endif
+ uint64_t fragmentation_bytes;
+ uint64_t average_bytes_used; // This is not shown in the '+RTS -s' report
+ uint64_t alloc_rate;
+ double productivity_cpu_percent;
+ double productivity_elapsed_percent;
+
+ // one for each generation, 0 first
+ GenerationSummaryStats* gc_summary_stats;
+} RTSSummaryStats;
+
#include "EndPrivate.h"
diff --git a/rts/StgCRun.c b/rts/StgCRun.c
index 2166249c2a..92b0696c2b 100644
--- a/rts/StgCRun.c
+++ b/rts/StgCRun.c
@@ -59,8 +59,8 @@
#include "StgRun.h"
#include "Capability.h"
-#if defined(DEBUG)
#include "RtsUtils.h"
+#if defined(DEBUG)
#include "Printer.h"
#endif
@@ -90,20 +90,18 @@ StgFunPtr StgReturn(void)
#else /* !USE_MINIINTERPRETER */
-#if defined(LEADING_UNDERSCORE)
-#define STG_RUN "_StgRun"
-#define STG_RETURN "_StgReturn"
-#else
-#define STG_RUN "StgRun"
-#define STG_RETURN "StgReturn"
-#endif
-
#if defined(mingw32_HOST_OS)
-// On windows the stack has to be allocated 4k at a time, otherwise
-// we get a segfault. The C compiler knows how to do this (it calls
-// _alloca()), so we make sure that we can allocate as much stack as
-// we need:
-StgWord8 *win32AllocStack(void)
+/*
+ * Note [Windows Stack allocations]
+ *
+ * On windows the stack has to be allocated 4k at a time, otherwise
+ * we get a segfault. The C compiler knows how to do this (it calls
+ * _alloca()), so we make sure that we can allocate as much stack as
+ * we need. However since we are doing a local stack allocation and the value
+ * isn't valid outside the frame, compilers are free to optimize this allocation
+ * and the corresponding stack check away. So to prevent that we request that
+ * this function never be optimized (See #14669). */
+STG_NO_OPTIMIZE StgWord8 *win32AllocStack(void)
{
StgWord8 stack[RESERVED_C_STACK_BYTES + 16 + 12];
return stack;
@@ -230,7 +228,7 @@ StgRunIsImplementedInAssembler(void)
);
}
-#endif
+#endif // defined(i386_HOST_ARCH)
/* ----------------------------------------------------------------------------
x86-64 is almost the same as plain x86.
@@ -251,6 +249,121 @@ StgRunIsImplementedInAssembler(void)
#define STG_HIDDEN ".hidden "
#endif
+/*
+Note [Unwinding foreign exports on x86-64]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+For foreign exports, that is Haskell functions exported as C functions when
+we unwind we have to unwind from Haskell code into C code. The current story
+is as follows:
+
+ * The Haskell stack always has stg_stop_thread_info frame at the bottom
+ * We annotate stg_stop_thread_info to unwind the instruction pointer to a
+ label inside StgRun called StgRunJmp. It's the last instruction before the
+ code jumps into Haskell.
+ * StgRun - which is implemented in assembler is annotated with some manual
+ unwinding information. It unwinds all the registers that it has saved
+ on the stack. This is important as rsp and rbp are often required for
+ getting to the next frame and the rest of the saved registers are useful
+ when inspecting locals in gdb.
+
+
+ Example x86-64 stack for an FFI call
+ from C into a Haskell function:
+
+
+ HASKELL HEAP
+ "ADDRESS SPACE"
+
+ +--------------------+ <------ rbp
+ | |
+ | |
+ | |
+ | |
+ | Haskell |
+ | evaluation stack |
+ | |
+ | |
+ |--------------------|
+ |stg_catch_frame_info|
+ |--------------------|
+ | stg_forceIO_info |
+ |--------------------|
+ |stg_stop_thread_info| -------
+ +--------------------+ |
+ ... |
+ (other heap objects) |
+ ... |
+ |
+ |
+ |
+ C STACK "ADDRESS SPACE" |
+ v
+ +-----------------------------+ <------ rsp
+ | |
+ | RESERVED_C_STACK_BYTES ~16k |
+ | |
+ |-----------------------------|
+ | rbx ||
+ |-----------------------------| \
+ | rbp | |
+ |-----------------------------| \
+ | r12 | |
+ |-----------------------------| \
+ | r13 | | STG_RUN_STACK_FRAME_SIZE
+ |-----------------------------| /
+ | r14 | |
+ |-----------------------------| /
+ | r15 | |
+ |-----------------------------|/
+ | rip saved by call StgRun |
+ | in schedule() |
+ +-----------------------------+
+ ...
+ schedule() stack frame
+
+
+ Lower addresses on the top
+
+One little snag in this approach is that the annotations accepted by the
+assembler are surprisingly unexpressive. I had to resort to a .cfi_escape
+and hand-assemble a DWARF expression. What made it worse was that big numbers
+are LEB128 encoded, which makes them variable byte length, with length depending
+on the magnitude.
+
+Here's an example stack generated this way:
+
+ Thread 1 "m" hit Breakpoint 1, Fib_zdfstableZZC0ZZCmainZZCFibZZCfib1_info () at Fib.hs:9
+ 9 fib a = return (a + 1)
+ #0 Fib_zdfstableZZC0ZZCmainZZCFibZZCfib1_info () at Fib.hs:9
+ #1 stg_catch_frame_info () at rts/Exception.cmm:372
+ #2 stg_forceIO_info () at rts/StgStartup.cmm:178
+ #3 stg_stop_thread_info () at rts/StgStartup.cmm:42
+ #4 0x00000000007048ab in StgRunIsImplementedInAssembler () at rts/StgCRun.c:255
+ #5 0x00000000006fcf42 in schedule (initialCapability=initialCapability@entry=0x8adac0 <MainCapability>, task=task@entry=0x8cf2a0) at rts/Schedule.c:451
+ #6 0x00000000006fe18e in scheduleWaitThread (tso=0x4200006388, ret=<optimized out>, pcap=0x7fffffffdac0) at rts/Schedule.c:2533
+ #7 0x000000000040a21e in hs_fib ()
+ #8 0x000000000040a083 in main (argc=1, argv=0x7fffffffdc48) at m.cpp:15
+
+(This is from patched gdb. See Note [Info Offset].)
+
+The previous approach was to encode the unwinding information for select
+registers in stg_stop_thread_info with Cmm annotations. The unfortunate thing
+about that approach was that it required introduction of an artificial MachSp
+register that wasn't meaningful outside unwinding. I discovered that to get
+stack unwinding working under -threaded runtime I also needed to unwind rbp
+which would require adding MachRbp. If we wanted to see saved locals in gdb,
+we'd have to add more. The core of the problem is that Cmm is architecture
+independent, while unwinding isn't.
+
+Note [Unwinding foreign imports]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+For unwinding foreign imports, that is C functions exposed as Haskell functions
+no special handling is required. The C function unwinds according to the rip
+saved on the stack by the call instruction. Then we perform regular Haskell
+stack unwinding.
+*/
+
+
static void GNUC3_ATTRIBUTE(used)
StgRunIsImplementedInAssembler(void)
{
@@ -273,10 +386,61 @@ StgRunIsImplementedInAssembler(void)
"movq %%r14,32(%%rax)\n\t"
"movq %%r15,40(%%rax)\n\t"
#if defined(mingw32_HOST_OS)
+ /*
+ * Additional callee saved registers on Win64. This must match
+ * callClobberedRegisters in compiler/nativeGen/X86/Regs.hs as
+ * both represent the Win64 calling convention.
+ */
"movq %%rdi,48(%%rax)\n\t"
"movq %%rsi,56(%%rax)\n\t"
- "movq %%xmm6,64(%%rax)\n\t"
+ "movq %%xmm6, 64(%%rax)\n\t"
+ "movq %%xmm7, 72(%%rax)\n\t"
+ "movq %%xmm8, 80(%%rax)\n\t"
+ "movq %%xmm9, 88(%%rax)\n\t"
+ "movq %%xmm10, 96(%%rax)\n\t"
+ "movq %%xmm11,104(%%rax)\n\t"
+ "movq %%xmm12,112(%%rax)\n\t"
+ "movq %%xmm13,120(%%rax)\n\t"
+ "movq %%xmm14,128(%%rax)\n\t"
+ "movq %%xmm15,136(%%rax)\n\t"
+#endif
+
+ /*
+ * Let the unwinder know where we saved the registers
+ * See Note [Unwinding foreign exports on x86-64].
+ */
+ ".cfi_def_cfa rsp, 0\n\t"
+ ".cfi_offset rbx, %c2\n\t"
+ ".cfi_offset rbp, %c3\n\t"
+ ".cfi_offset r12, %c4\n\t"
+ ".cfi_offset r13, %c5\n\t"
+ ".cfi_offset r14, %c6\n\t"
+ ".cfi_offset r15, %c7\n\t"
+ ".cfi_offset rip, %c8\n\t"
+ ".cfi_escape " // DW_CFA_val_expression is not expressible otherwise
+ "0x16, " // DW_CFA_val_expression
+ "0x07, " // register num 7 - rsp
+ "0x04, " // block length
+ "0x77, " // DW_OP_breg7 - signed LEB128 offset from rsp
+#define RSP_DELTA (RESERVED_C_STACK_BYTES + STG_RUN_STACK_FRAME_SIZE + 8)
+ "%c9" // signed LEB128 encoded delta - byte 1
+#if (RSP_DELTA >> 7) > 0
+ ", %c10" // signed LEB128 encoded delta - byte 2
#endif
+
+#if (RSP_DELTA >> 14) > 0
+ ", %c11" // signed LEB128 encoded delta - byte 3
+#endif
+
+#if (RSP_DELTA >> 21) > 0
+ ", %c12" // signed LEB128 encoded delta - byte 4
+#endif
+
+#if (RSP_DELTA >> 28) > 0
+#error "RSP_DELTA too big"
+#endif
+ "\n\t"
+
/*
* Set BaseReg
*/
@@ -293,6 +457,17 @@ StgRunIsImplementedInAssembler(void)
#else
"movq %%rdi,%%rax\n\t"
#endif
+
+ STG_GLOBAL xstr(STG_RUN_JMP) "\n"
+#if !defined(mingw32_HOST_OS)
+ STG_HIDDEN xstr(STG_RUN_JMP) "\n"
+#endif
+#if HAVE_SUBSECTIONS_VIA_SYMBOLS
+ // If we have deadstripping enabled and a label is detected as unused
+ // the code gets nop'd out.
+ ".no_dead_strip " xstr(STG_RUN_JMP) "\n"
+#endif
+ xstr(STG_RUN_JMP) ":\n\t"
"jmp *%%rax\n\t"
".globl " STG_RETURN "\n"
@@ -311,16 +486,50 @@ StgRunIsImplementedInAssembler(void)
"movq 32(%%rsp),%%r14\n\t"
"movq 40(%%rsp),%%r15\n\t"
#if defined(mingw32_HOST_OS)
- "movq 48(%%rsp),%%rdi\n\t"
- "movq 56(%%rsp),%%rsi\n\t"
- "movq 64(%%rsp),%%xmm6\n\t"
+ "movq 48(%%rsp),%%rdi\n\t"
+ "movq 56(%%rsp),%%rsi\n\t"
+ "movq 64(%%rsp),%%xmm6\n\t"
+ "movq 72(%%rax),%%xmm7\n\t"
+ "movq 80(%%rax),%%xmm8\n\t"
+ "movq 88(%%rax),%%xmm9\n\t"
+ "movq 96(%%rax),%%xmm10\n\t"
+ "movq 104(%%rax),%%xmm11\n\t"
+ "movq 112(%%rax),%%xmm12\n\t"
+ "movq 120(%%rax),%%xmm13\n\t"
+ "movq 128(%%rax),%%xmm14\n\t"
+ "movq 136(%%rax),%%xmm15\n\t"
#endif
"addq %1, %%rsp\n\t"
"retq"
:
: "i"(RESERVED_C_STACK_BYTES),
- "i"(STG_RUN_STACK_FRAME_SIZE /* stack frame size */)
+ "i"(STG_RUN_STACK_FRAME_SIZE /* stack frame size */),
+ "i"(RESERVED_C_STACK_BYTES /* rbx relative to cfa (rsp) */),
+ "i"(RESERVED_C_STACK_BYTES + 8 /* rbp relative to cfa (rsp) */),
+ "i"(RESERVED_C_STACK_BYTES + 16 /* r12 relative to cfa (rsp) */),
+ "i"(RESERVED_C_STACK_BYTES + 24 /* r13 relative to cfa (rsp) */),
+ "i"(RESERVED_C_STACK_BYTES + 32 /* r14 relative to cfa (rsp) */),
+ "i"(RESERVED_C_STACK_BYTES + 40 /* r15 relative to cfa (rsp) */),
+ "i"(RESERVED_C_STACK_BYTES + STG_RUN_STACK_FRAME_SIZE
+ /* rip relative to cfa */),
+ "i"((RSP_DELTA & 127) | (128 * ((RSP_DELTA >> 7) > 0)))
+ /* signed LEB128-encoded delta from rsp - byte 1 */
+#if (RSP_DELTA >> 7) > 0
+ , "i"(((RSP_DELTA >> 7) & 127) | (128 * ((RSP_DELTA >> 14) > 0)))
+ /* signed LEB128-encoded delta from rsp - byte 2 */
+#endif
+
+#if (RSP_DELTA >> 14) > 0
+ , "i"(((RSP_DELTA >> 14) & 127) | (128 * ((RSP_DELTA >> 21) > 0)))
+ /* signed LEB128-encoded delta from rsp - byte 3 */
+#endif
+
+#if (RSP_DELTA >> 21) > 0
+ , "i"(((RSP_DELTA >> 21) & 127) | (128 * ((RSP_DELTA >> 28) > 0)))
+ /* signed LEB128-encoded delta from rsp - byte 4 */
+#endif
+#undef RSP_DELTA
);
/*
* See Note [Stack Alignment on X86]
@@ -766,7 +975,7 @@ StgRun(StgFunPtr f, StgRegTable *basereg) {
/*
* Save callee-saves registers on behalf of the STG code.
* Floating point registers only need the bottom 64 bits preserved.
- * We need to use the the names x16, x17, x29 and x30 instead of ip0
+ * We need to use the names x16, x17, x29 and x30 instead of ip0
* ip1, fp and lp because one of either clang or gcc doesn't understand
* the later names.
*/
diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm
index 88371f2109..c3a83dd059 100644
--- a/rts/StgMiscClosures.cmm
+++ b/rts/StgMiscClosures.cmm
@@ -26,7 +26,7 @@ INFO_TABLE_RET (stg_stack_underflow_frame, UNDERFLOW_FRAME,
W_ info_ptr, P_ unused)
/* no args => explicit stack */
{
- unwind UnwindReturnReg = return;
+ unwind Sp = W_[Sp + WDS(2)];
W_ new_tso;
W_ ret_off;
@@ -61,6 +61,7 @@ INFO_TABLE_RET (stg_restore_cccs, RET_SMALL, W_ info_ptr, W_ cccs)
INFO_TABLE_RET (stg_restore_cccs_eval, RET_SMALL, W_ info_ptr, W_ cccs)
return (P_ ret)
{
+ unwind Sp = Sp + WDS(2);
#if defined(PROFILING)
CCCS = cccs;
#endif
@@ -210,7 +211,7 @@ INFO_TABLE_RET( stg_apply_interp, RET_BCO )
Entry code for a BCO
------------------------------------------------------------------------- */
-INFO_TABLE_FUN( stg_BCO, 4, 0, BCO, "BCO", "BCO", ARG_BCO )
+INFO_TABLE_FUN( stg_BCO, 3, 0, BCO, "BCO", "BCO", ARG_BCO )
/* explicit stack */
{
/* entering a BCO means "apply it", same as a function */
@@ -301,6 +302,7 @@ retry:
// This could happen, if e.g. we got a BLOCKING_QUEUE that has
// just been replaced with an IND by another thread in
// wakeBlockingQueue().
+ // See Note [BLACKHOLE pointing to IND] in sm/Evac.c
goto retry;
}
@@ -352,11 +354,11 @@ INFO_TABLE(__stg_EAGER_BLACKHOLE,1,0,BLACKHOLE,"BLACKHOLE","BLACKHOLE")
}
INFO_TABLE(stg_BLOCKING_QUEUE_CLEAN,4,0,BLOCKING_QUEUE,"BLOCKING_QUEUE","BLOCKING_QUEUE")
-{ foreign "C" barf("BLOCKING_QUEUE_CLEAN object entered!") never returns; }
+{ foreign "C" barf("BLOCKING_QUEUE_CLEAN object (%p) entered!", R1) never returns; }
INFO_TABLE(stg_BLOCKING_QUEUE_DIRTY,4,0,BLOCKING_QUEUE,"BLOCKING_QUEUE","BLOCKING_QUEUE")
-{ foreign "C" barf("BLOCKING_QUEUE_DIRTY object entered!") never returns; }
+{ foreign "C" barf("BLOCKING_QUEUE_DIRTY object (%p) entered!", R1) never returns; }
/* ----------------------------------------------------------------------------
@@ -374,16 +376,26 @@ loop:
// spin until the WHITEHOLE is updated
info = StgHeader_info(node);
if (info == stg_WHITEHOLE_info) {
+#if defined(PROF_SPIN)
+ W_[whitehole_lockClosure_spin] =
+ W_[whitehole_lockClosure_spin] + 1;
+#endif
i = i + 1;
if (i == SPIN_COUNT) {
i = 0;
+#if defined(PROF_SPIN)
+ W_[whitehole_lockClosure_yield] =
+ W_[whitehole_lockClosure_yield] + 1;
+#endif
ccall yieldThread();
}
+ // TODO: We should busy_wait_nop() here, but that's not currently
+ // defined in CMM.
goto loop;
}
jump %ENTRY_CODE(info) (node);
#else
- ccall barf("WHITEHOLE object entered!") never returns;
+ ccall barf("WHITEHOLE object (%p) entered!", R1) never returns;
#endif
}
@@ -394,10 +406,10 @@ loop:
------------------------------------------------------------------------- */
INFO_TABLE(stg_TSO, 0,0,TSO, "TSO", "TSO")
-{ foreign "C" barf("TSO object entered!") never returns; }
+{ foreign "C" barf("TSO object (%p) entered!", R1) never returns; }
INFO_TABLE(stg_STACK, 0,0, STACK, "STACK", "STACK")
-{ foreign "C" barf("STACK object entered!") never returns; }
+{ foreign "C" barf("STACK object (%p) entered!", R1) never returns; }
/* ----------------------------------------------------------------------------
Weak pointers
@@ -408,7 +420,7 @@ INFO_TABLE(stg_STACK, 0,0, STACK, "STACK", "STACK")
------------------------------------------------------------------------- */
INFO_TABLE(stg_WEAK,1,4,WEAK,"WEAK","WEAK")
-{ foreign "C" barf("WEAK object entered!") never returns; }
+{ foreign "C" barf("WEAK object (%p) entered!", R1) never returns; }
/*
* It's important when turning an existing WEAK into a DEAD_WEAK
@@ -417,7 +429,7 @@ INFO_TABLE(stg_WEAK,1,4,WEAK,"WEAK","WEAK")
* DEAD_WEAK 5 non-pointer fields.
*/
INFO_TABLE_CONSTR(stg_DEAD_WEAK,0,5,0,CONSTR,"DEAD_WEAK","DEAD_WEAK")
-{ foreign "C" barf("DEAD_WEAK object entered!") never returns; }
+{ foreign "C" barf("DEAD_WEAK object (%p) entered!", R1) never returns; }
/* ----------------------------------------------------------------------------
C finalizer lists
@@ -426,7 +438,7 @@ INFO_TABLE_CONSTR(stg_DEAD_WEAK,0,5,0,CONSTR,"DEAD_WEAK","DEAD_WEAK")
------------------------------------------------------------------------- */
INFO_TABLE_CONSTR(stg_C_FINALIZER_LIST,1,4,0,CONSTR,"C_FINALIZER_LIST","C_FINALIZER_LIST")
-{ foreign "C" barf("C_FINALIZER_LIST object entered!") never returns; }
+{ foreign "C" barf("C_FINALIZER_LIST object (%p) entered!", R1) never returns; }
/* ----------------------------------------------------------------------------
NO_FINALIZER
@@ -436,7 +448,7 @@ INFO_TABLE_CONSTR(stg_C_FINALIZER_LIST,1,4,0,CONSTR,"C_FINALIZER_LIST","C_FINALI
------------------------------------------------------------------------- */
INFO_TABLE_CONSTR(stg_NO_FINALIZER,0,0,0,CONSTR_NOCAF,"NO_FINALIZER","NO_FINALIZER")
-{ foreign "C" barf("NO_FINALIZER object entered!") never returns; }
+{ foreign "C" barf("NO_FINALIZER object (%p) entered!", R1) never returns; }
CLOSURE(stg_NO_FINALIZER_closure,stg_NO_FINALIZER);
@@ -445,7 +457,7 @@ CLOSURE(stg_NO_FINALIZER_closure,stg_NO_FINALIZER);
------------------------------------------------------------------------- */
INFO_TABLE(stg_STABLE_NAME,0,1,PRIM,"STABLE_NAME","STABLE_NAME")
-{ foreign "C" barf("STABLE_NAME object entered!") never returns; }
+{ foreign "C" barf("STABLE_NAME object (%p) entered!", R1) never returns; }
/* ----------------------------------------------------------------------------
MVars
@@ -455,74 +467,116 @@ INFO_TABLE(stg_STABLE_NAME,0,1,PRIM,"STABLE_NAME","STABLE_NAME")
------------------------------------------------------------------------- */
INFO_TABLE(stg_MVAR_CLEAN,3,0,MVAR_CLEAN,"MVAR","MVAR")
-{ foreign "C" barf("MVAR object entered!") never returns; }
+{ foreign "C" barf("MVAR object (%p) entered!", R1) never returns; }
INFO_TABLE(stg_MVAR_DIRTY,3,0,MVAR_DIRTY,"MVAR","MVAR")
-{ foreign "C" barf("MVAR object entered!") never returns; }
+{ foreign "C" barf("MVAR object (%p) entered!", R1) never returns; }
/* -----------------------------------------------------------------------------
STM
-------------------------------------------------------------------------- */
INFO_TABLE(stg_TVAR_CLEAN, 2, 1, TVAR, "TVAR", "TVAR")
-{ foreign "C" barf("TVAR_CLEAN object entered!") never returns; }
+{ foreign "C" barf("TVAR_CLEAN object (%p) entered!", R1) never returns; }
INFO_TABLE(stg_TVAR_DIRTY, 2, 1, TVAR, "TVAR", "TVAR")
-{ foreign "C" barf("TVAR_DIRTY object entered!") never returns; }
+{ foreign "C" barf("TVAR_DIRTY object (%p) entered!", R1) never returns; }
INFO_TABLE(stg_TVAR_WATCH_QUEUE, 3, 0, MUT_PRIM, "TVAR_WATCH_QUEUE", "TVAR_WATCH_QUEUE")
-{ foreign "C" barf("TVAR_WATCH_QUEUE object entered!") never returns; }
-
-INFO_TABLE(stg_ATOMIC_INVARIANT, 2, 1, MUT_PRIM, "ATOMIC_INVARIANT", "ATOMIC_INVARIANT")
-{ foreign "C" barf("ATOMIC_INVARIANT object entered!") never returns; }
-
-INFO_TABLE(stg_INVARIANT_CHECK_QUEUE, 3, 0, MUT_PRIM, "INVARIANT_CHECK_QUEUE", "INVARIANT_CHECK_QUEUE")
-{ foreign "C" barf("INVARIANT_CHECK_QUEUE object entered!") never returns; }
+{ foreign "C" barf("TVAR_WATCH_QUEUE object (%p) entered!", R1) never returns; }
INFO_TABLE(stg_TREC_CHUNK, 0, 0, TREC_CHUNK, "TREC_CHUNK", "TREC_CHUNK")
-{ foreign "C" barf("TREC_CHUNK object entered!") never returns; }
+{ foreign "C" barf("TREC_CHUNK object (%p) entered!", R1) never returns; }
-INFO_TABLE(stg_TREC_HEADER, 3, 1, MUT_PRIM, "TREC_HEADER", "TREC_HEADER")
-{ foreign "C" barf("TREC_HEADER object entered!") never returns; }
+INFO_TABLE(stg_TREC_HEADER, 2, 1, MUT_PRIM, "TREC_HEADER", "TREC_HEADER")
+{ foreign "C" barf("TREC_HEADER object (%p) entered!", R1) never returns; }
INFO_TABLE_CONSTR(stg_END_STM_WATCH_QUEUE,0,0,0,CONSTR_NOCAF,"END_STM_WATCH_QUEUE","END_STM_WATCH_QUEUE")
-{ foreign "C" barf("END_STM_WATCH_QUEUE object entered!") never returns; }
-
-INFO_TABLE_CONSTR(stg_END_INVARIANT_CHECK_QUEUE,0,0,0,CONSTR_NOCAF,"END_INVARIANT_CHECK_QUEUE","END_INVARIANT_CHECK_QUEUE")
-{ foreign "C" barf("END_INVARIANT_CHECK_QUEUE object entered!") never returns; }
+{ foreign "C" barf("END_STM_WATCH_QUEUE object (%p) entered!", R1) never returns; }
INFO_TABLE_CONSTR(stg_END_STM_CHUNK_LIST,0,0,0,CONSTR_NOCAF,"END_STM_CHUNK_LIST","END_STM_CHUNK_LIST")
-{ foreign "C" barf("END_STM_CHUNK_LIST object entered!") never returns; }
+{ foreign "C" barf("END_STM_CHUNK_LIST object (%p) entered!", R1) never returns; }
INFO_TABLE_CONSTR(stg_NO_TREC,0,0,0,CONSTR_NOCAF,"NO_TREC","NO_TREC")
-{ foreign "C" barf("NO_TREC object entered!") never returns; }
+{ foreign "C" barf("NO_TREC object (%p) entered!", R1) never returns; }
CLOSURE(stg_END_STM_WATCH_QUEUE_closure,stg_END_STM_WATCH_QUEUE);
-CLOSURE(stg_END_INVARIANT_CHECK_QUEUE_closure,stg_END_INVARIANT_CHECK_QUEUE);
-
CLOSURE(stg_END_STM_CHUNK_LIST_closure,stg_END_STM_CHUNK_LIST);
CLOSURE(stg_NO_TREC_closure,stg_NO_TREC);
/* ----------------------------------------------------------------------------
- Messages
+ SRTs
+
+ See Note [SRTs] in compiler/cmm/CmmBuildInfoTable.hs
+ ------------------------------------------------------------------------- */
+
+INFO_TABLE_CONSTR(stg_SRT_1, 1, 0, 0, CONSTR, "SRT_1", "SRT_1")
+{ foreign "C" barf("SRT_1 object (%p) entered!", R1) never returns; }
+
+INFO_TABLE_CONSTR(stg_SRT_2, 2, 0, 0, CONSTR, "SRT_2", "SRT_2")
+{ foreign "C" barf("SRT_2 object (%p) entered!", R1) never returns; }
+
+INFO_TABLE_CONSTR(stg_SRT_3, 3, 0, 0, CONSTR, "SRT_3", "SRT_3")
+{ foreign "C" barf("SRT_3 object (%p) entered!", R1) never returns; }
+
+INFO_TABLE_CONSTR(stg_SRT_4, 4, 0, 0, CONSTR, "SRT_4", "SRT_4")
+{ foreign "C" barf("SRT_4 object (%p) entered!", R1) never returns; }
+
+INFO_TABLE_CONSTR(stg_SRT_5, 5, 0, 0, CONSTR, "SRT_5", "SRT_5")
+{ foreign "C" barf("SRT_5 object (%p) entered!", R1) never returns; }
+
+INFO_TABLE_CONSTR(stg_SRT_6, 6, 0, 0, CONSTR, "SRT_6", "SRT_6")
+{ foreign "C" barf("SRT_6 object (%p) entered!", R1) never returns; }
+
+INFO_TABLE_CONSTR(stg_SRT_7, 7, 0, 0, CONSTR, "SRT_7", "SRT_7")
+{ foreign "C" barf("SRT_7 object (%p) entered!", R1) never returns; }
+
+INFO_TABLE_CONSTR(stg_SRT_8, 8, 0, 0, CONSTR, "SRT_8", "SRT_8")
+{ foreign "C" barf("SRT_8 object (%p) entered!", R1) never returns; }
+
+INFO_TABLE_CONSTR(stg_SRT_9, 9, 0, 0, CONSTR, "SRT_9", "SRT_9")
+{ foreign "C" barf("SRT_9 object (%p) entered!", R1) never returns; }
+
+INFO_TABLE_CONSTR(stg_SRT_10, 10, 0, 0, CONSTR, "SRT_10", "SRT_10")
+{ foreign "C" barf("SRT_10 object (%p) entered!", R1) never returns; }
+
+INFO_TABLE_CONSTR(stg_SRT_11, 11, 0, 0, CONSTR, "SRT_11", "SRT_11")
+{ foreign "C" barf("SRT_11 object (%p) entered!", R1) never returns; }
+
+INFO_TABLE_CONSTR(stg_SRT_12, 12, 0, 0, CONSTR, "SRT_12", "SRT_12")
+{ foreign "C" barf("SRT_12 object (%p) entered!", R1) never returns; }
+
+INFO_TABLE_CONSTR(stg_SRT_13, 13, 0, 0, CONSTR, "SRT_13", "SRT_13")
+{ foreign "C" barf("SRT_13 object (%p) entered!", R1) never returns; }
+
+INFO_TABLE_CONSTR(stg_SRT_14, 14, 0, 0, CONSTR, "SRT_14", "SRT_14")
+{ foreign "C" barf("SRT_14 object (%p) entered!", R1) never returns; }
+
+INFO_TABLE_CONSTR(stg_SRT_15, 15, 0, 0, CONSTR, "SRT_15", "SRT_15")
+{ foreign "C" barf("SRT_15 object (%p) entered!", R1) never returns; }
+
+INFO_TABLE_CONSTR(stg_SRT_16, 16, 0, 0, CONSTR, "SRT_16", "SRT_16")
+{ foreign "C" barf("SRT_16 object (%p) entered!", R1) never returns; }
+
+/* --------------------------------------------------------------------------- Messages
------------------------------------------------------------------------- */
// PRIM rather than CONSTR, because PRIM objects cannot be duplicated by the GC.
INFO_TABLE_CONSTR(stg_MSG_TRY_WAKEUP,2,0,0,PRIM,"MSG_TRY_WAKEUP","MSG_TRY_WAKEUP")
-{ foreign "C" barf("MSG_TRY_WAKEUP object entered!") never returns; }
+{ foreign "C" barf("MSG_TRY_WAKEUP object (%p) entered!", R1) never returns; }
INFO_TABLE_CONSTR(stg_MSG_THROWTO,4,0,0,PRIM,"MSG_THROWTO","MSG_THROWTO")
-{ foreign "C" barf("MSG_THROWTO object entered!") never returns; }
+{ foreign "C" barf("MSG_THROWTO object (%p) entered!", R1) never returns; }
INFO_TABLE_CONSTR(stg_MSG_BLACKHOLE,3,0,0,PRIM,"MSG_BLACKHOLE","MSG_BLACKHOLE")
-{ foreign "C" barf("MSG_BLACKHOLE object entered!") never returns; }
+{ foreign "C" barf("MSG_BLACKHOLE object (%p) entered!", R1) never returns; }
// used to overwrite a MSG_THROWTO when the message has been used/revoked
INFO_TABLE_CONSTR(stg_MSG_NULL,1,0,0,PRIM,"MSG_NULL","MSG_NULL")
-{ foreign "C" barf("MSG_NULL object entered!") never returns; }
+{ foreign "C" barf("MSG_NULL object (%p) entered!", R1) never returns; }
/* ----------------------------------------------------------------------------
END_TSO_QUEUE
@@ -532,7 +586,7 @@ INFO_TABLE_CONSTR(stg_MSG_NULL,1,0,0,PRIM,"MSG_NULL","MSG_NULL")
------------------------------------------------------------------------- */
INFO_TABLE_CONSTR(stg_END_TSO_QUEUE,0,0,0,CONSTR_NOCAF,"END_TSO_QUEUE","END_TSO_QUEUE")
-{ foreign "C" barf("END_TSO_QUEUE object entered!") never returns; }
+{ foreign "C" barf("END_TSO_QUEUE object (%p) entered!", R1) never returns; }
CLOSURE(stg_END_TSO_QUEUE_closure,stg_END_TSO_QUEUE);
@@ -541,7 +595,7 @@ CLOSURE(stg_END_TSO_QUEUE_closure,stg_END_TSO_QUEUE);
------------------------------------------------------------------------- */
INFO_TABLE_CONSTR(stg_GCD_CAF,0,0,0,CONSTR_NOCAF,"GCD_CAF","GCD_CAF")
-{ foreign "C" barf("Evaluated a CAF that was GC'd!") never returns; }
+{ foreign "C" barf("Evaluated a CAF (%p) that was GC'd!", R1) never returns; }
/* ----------------------------------------------------------------------------
STM_AWOKEN
@@ -551,7 +605,7 @@ INFO_TABLE_CONSTR(stg_GCD_CAF,0,0,0,CONSTR_NOCAF,"GCD_CAF","GCD_CAF")
------------------------------------------------------------------------- */
INFO_TABLE_CONSTR(stg_STM_AWOKEN,0,0,0,CONSTR_NOCAF,"STM_AWOKEN","STM_AWOKEN")
-{ foreign "C" barf("STM_AWOKEN object entered!") never returns; }
+{ foreign "C" barf("STM_AWOKEN object (%p) entered!", R1) never returns; }
CLOSURE(stg_STM_AWOKEN_closure,stg_STM_AWOKEN);
@@ -571,40 +625,40 @@ CLOSURE(stg_STM_AWOKEN_closure,stg_STM_AWOKEN);
------------------------------------------------------------------------- */
INFO_TABLE(stg_ARR_WORDS, 0, 0, ARR_WORDS, "ARR_WORDS", "ARR_WORDS")
-{ foreign "C" barf("ARR_WORDS object entered!") never returns; }
+{ foreign "C" barf("ARR_WORDS object (%p) entered!", R1) never returns; }
INFO_TABLE(stg_MUT_ARR_PTRS_CLEAN, 0, 0, MUT_ARR_PTRS_CLEAN, "MUT_ARR_PTRS_CLEAN", "MUT_ARR_PTRS_CLEAN")
-{ foreign "C" barf("MUT_ARR_PTRS_CLEAN object entered!") never returns; }
+{ foreign "C" barf("MUT_ARR_PTRS_CLEAN object (%p) entered!", R1) never returns; }
INFO_TABLE(stg_MUT_ARR_PTRS_DIRTY, 0, 0, MUT_ARR_PTRS_DIRTY, "MUT_ARR_PTRS_DIRTY", "MUT_ARR_PTRS_DIRTY")
-{ foreign "C" barf("MUT_ARR_PTRS_DIRTY object entered!") never returns; }
+{ foreign "C" barf("MUT_ARR_PTRS_DIRTY object (%p) entered!", R1) never returns; }
-INFO_TABLE(stg_MUT_ARR_PTRS_FROZEN, 0, 0, MUT_ARR_PTRS_FROZEN, "MUT_ARR_PTRS_FROZEN", "MUT_ARR_PTRS_FROZEN")
-{ foreign "C" barf("MUT_ARR_PTRS_FROZEN object entered!") never returns; }
+INFO_TABLE(stg_MUT_ARR_PTRS_FROZEN_CLEAN, 0, 0, MUT_ARR_PTRS_FROZEN_CLEAN, "MUT_ARR_PTRS_FROZEN_CLEAN", "MUT_ARR_PTRS_FROZEN_CLEAN")
+{ foreign "C" barf("MUT_ARR_PTRS_FROZEN_CLEAN object (%p) entered!", R1) never returns; }
-INFO_TABLE(stg_MUT_ARR_PTRS_FROZEN0, 0, 0, MUT_ARR_PTRS_FROZEN0, "MUT_ARR_PTRS_FROZEN0", "MUT_ARR_PTRS_FROZEN0")
-{ foreign "C" barf("MUT_ARR_PTRS_FROZEN0 object entered!") never returns; }
+INFO_TABLE(stg_MUT_ARR_PTRS_FROZEN_DIRTY, 0, 0, MUT_ARR_PTRS_FROZEN_DIRTY, "MUT_ARR_PTRS_FROZEN_DIRTY", "MUT_ARR_PTRS_FROZEN_DIRTY")
+{ foreign "C" barf("MUT_ARR_PTRS_FROZEN_DIRTY object (%p) entered!", R1) never returns; }
INFO_TABLE(stg_SMALL_MUT_ARR_PTRS_CLEAN, 0, 0, SMALL_MUT_ARR_PTRS_CLEAN, "SMALL_MUT_ARR_PTRS_CLEAN", "SMALL_MUT_ARR_PTRS_CLEAN")
-{ foreign "C" barf("SMALL_MUT_ARR_PTRS_CLEAN object entered!") never returns; }
+{ foreign "C" barf("SMALL_MUT_ARR_PTRS_CLEAN object (%p) entered!", R1) never returns; }
INFO_TABLE(stg_SMALL_MUT_ARR_PTRS_DIRTY, 0, 0, SMALL_MUT_ARR_PTRS_DIRTY, "SMALL_MUT_ARR_PTRS_DIRTY", "SMALL_MUT_ARR_PTRS_DIRTY")
-{ foreign "C" barf("SMALL_MUT_ARR_PTRS_DIRTY object entered!") never returns; }
+{ foreign "C" barf("SMALL_MUT_ARR_PTRS_DIRTY object (%p) entered!", R1) never returns; }
-INFO_TABLE(stg_SMALL_MUT_ARR_PTRS_FROZEN, 0, 0, SMALL_MUT_ARR_PTRS_FROZEN, "SMALL_MUT_ARR_PTRS_FROZEN", "SMALL_MUT_ARR_PTRS_FROZEN")
-{ foreign "C" barf("SMALL_MUT_ARR_PTRS_FROZEN object entered!") never returns; }
+INFO_TABLE(stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN, 0, 0, SMALL_MUT_ARR_PTRS_FROZEN_CLEAN, "SMALL_MUT_ARR_PTRS_FROZEN_CLEAN", "SMALL_MUT_ARR_PTRS_FROZEN_CLEAN")
+{ foreign "C" barf("SMALL_MUT_ARR_PTRS_FROZEN_CLEAN object (%p) entered!", R1) never returns; }
-INFO_TABLE(stg_SMALL_MUT_ARR_PTRS_FROZEN0, 0, 0, SMALL_MUT_ARR_PTRS_FROZEN0, "SMALL_MUT_ARR_PTRS_FROZEN0", "SMALL_MUT_ARR_PTRS_FROZEN0")
-{ foreign "C" barf("SMALL_MUT_ARR_PTRS_FROZEN0 object entered!") never returns; }
+INFO_TABLE(stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY, 0, 0, SMALL_MUT_ARR_PTRS_FROZEN_DIRTY, "SMALL_MUT_ARR_PTRS_FROZEN_DIRTY", "SMALL_MUT_ARR_PTRS_FROZEN_DIRTY")
+{ foreign "C" barf("SMALL_MUT_ARR_PTRS_FROZEN_DIRTY object (%p) entered!", R1) never returns; }
/* ----------------------------------------------------------------------------
Mutable Variables
------------------------------------------------------------------------- */
INFO_TABLE(stg_MUT_VAR_CLEAN, 1, 0, MUT_VAR_CLEAN, "MUT_VAR_CLEAN", "MUT_VAR_CLEAN")
-{ foreign "C" barf("MUT_VAR_CLEAN object entered!") never returns; }
+{ foreign "C" barf("MUT_VAR_CLEAN object (%p) entered!", R1) never returns; }
INFO_TABLE(stg_MUT_VAR_DIRTY, 1, 0, MUT_VAR_DIRTY, "MUT_VAR_DIRTY", "MUT_VAR_DIRTY")
-{ foreign "C" barf("MUT_VAR_DIRTY object entered!") never returns; }
+{ foreign "C" barf("MUT_VAR_DIRTY object (%p) entered!", R1) never returns; }
/* ----------------------------------------------------------------------------
Dummy return closure
@@ -626,7 +680,7 @@ CLOSURE(stg_dummy_ret_closure,stg_dummy_ret);
------------------------------------------------------------------------- */
INFO_TABLE_CONSTR(stg_MVAR_TSO_QUEUE,2,0,0,PRIM,"MVAR_TSO_QUEUE","MVAR_TSO_QUEUE")
-{ foreign "C" barf("MVAR_TSO_QUEUE object entered!") never returns; }
+{ foreign "C" barf("MVAR_TSO_QUEUE object (%p) entered!", R1) never returns; }
/* ----------------------------------------------------------------------------
COMPACT_NFDATA (a blob of data in NF with no outgoing pointers)
@@ -639,11 +693,11 @@ INFO_TABLE_CONSTR(stg_MVAR_TSO_QUEUE,2,0,0,PRIM,"MVAR_TSO_QUEUE","MVAR_TSO_QUEUE
INFO_TABLE( stg_COMPACT_NFDATA_CLEAN, 0, 5, COMPACT_NFDATA, "COMPACT_NFDATA", "COMPACT_NFDATA")
()
-{ foreign "C" barf("COMPACT_NFDATA_CLEAN object entered!") never returns; }
+{ foreign "C" barf("COMPACT_NFDATA_CLEAN object (%p) entered!", R1) never returns; }
INFO_TABLE( stg_COMPACT_NFDATA_DIRTY, 0, 5, COMPACT_NFDATA, "COMPACT_NFDATA", "COMPACT_NFDATA")
()
-{ foreign "C" barf("COMPACT_NFDATA_DIRTY object entered!") never returns; }
+{ foreign "C" barf("COMPACT_NFDATA_DIRTY object (%p) entered!", R1) never returns; }
/* ----------------------------------------------------------------------------
CHARLIKE and INTLIKE closures.
diff --git a/rts/StgStartup.cmm b/rts/StgStartup.cmm
index 0cd18628e2..571e0637fc 100644
--- a/rts/StgStartup.cmm
+++ b/rts/StgStartup.cmm
@@ -62,24 +62,12 @@ INFO_TABLE_RET(stg_stop_thread, STOP_FRAME,
be an info table on top of the stack).
*/
- /*
- Here we setup the stack unwinding annotation necessary to allow
- debuggers to find their way back to the C stack.
-
- This is a bit fiddly as we assume the layout of the stack prepared
- for us by StgRun. Note that in most cases StgRun is written in assembler
- and therefore has no associated unwind information. For this reason we
- need to identify the platform stack pointer and return address values for
- the StgRun's caller.
- */
+ // See Note [Unwinding foreign exports on x86-64].
#if defined(x86_64_HOST_ARCH)
- // offset of 8 in MachSp value due to return address
- unwind MachSp = MachSp + RESERVED_C_STACK_BYTES + STG_RUN_STACK_FRAME_SIZE + 8,
- UnwindReturnReg = W_[MachSp + RESERVED_C_STACK_BYTES + STG_RUN_STACK_FRAME_SIZE];
+ unwind UnwindReturnReg = STG_RUN_JMP;
#else
// FIXME: Fill in for other platforms
- unwind MachSp = return,
- UnwindReturnReg = return;
+ unwind UnwindReturnReg = return;
#endif
Sp = Sp + SIZEOF_StgStopFrame - WDS(2);
@@ -181,3 +169,9 @@ INFO_TABLE_RET(stg_forceIO, RET_SMALL, P_ info_ptr)
{
ENTER(ret);
}
+
+/* Called when compiled with -falignment-sanitisation on alignment failure */
+stg_badAlignment_entry
+{
+ foreign "C" barf();
+}
diff --git a/rts/Task.c b/rts/Task.c
index 4376148c7a..ac86311844 100644
--- a/rts/Task.c
+++ b/rts/Task.c
@@ -19,6 +19,8 @@
#include "Hash.h"
#include "Trace.h"
+#include <string.h>
+
#if HAVE_SIGNAL_H
#include <signal.h>
#endif
@@ -197,6 +199,7 @@ freeTask (Task *task)
stgFree(task);
}
+/* Must take all_tasks_mutex */
static Task*
newTask (bool worker)
{
@@ -413,7 +416,7 @@ workerTaskStop (Task *task)
#if defined(THREADED_RTS)
-static void OSThreadProcAttr
+static void* OSThreadProcAttr
workerStart(Task *task)
{
Capability *cap;
@@ -439,8 +442,11 @@ workerStart(Task *task)
traceTaskCreate(task, cap);
scheduleWorker(cap,task);
+
+ return NULL;
}
+/* N.B. must take all_tasks_mutex */
void
startWorkerTask (Capability *cap)
{
@@ -468,7 +474,26 @@ startWorkerTask (Capability *cap)
ASSERT_LOCK_HELD(&cap->lock);
cap->running_task = task;
- r = createOSThread(&tid, "ghc_worker", (OSThreadProc*)workerStart, task);
+ // Set the name of the worker thread to the original process name followed by
+ // ":w", but only if we're on Linux where the program_invocation_short_name
+ // global is available.
+#if defined(linux_HOST_OS)
+ size_t procname_len = strlen(program_invocation_short_name);
+ char worker_name[16];
+ // The kernel only allocates 16 bytes for thread names, so we truncate if the
+ // original name is too long. Process names go in another table that has more
+ // capacity.
+ if (procname_len >= 13) {
+ strncpy(worker_name, program_invocation_short_name, 13);
+ strcpy(worker_name + 13, ":w");
+ } else {
+ strcpy(worker_name, program_invocation_short_name);
+ strcpy(worker_name + procname_len, ":w");
+ }
+#else
+ char * worker_name = "ghc_worker";
+#endif
+ r = createOSThread(&tid, worker_name, (OSThreadProc*)workerStart, task);
if (r != 0) {
sysErrorBelch("failed to create OS thread");
stg_exit(EXIT_FAILURE);
diff --git a/rts/ThreadPaused.c b/rts/ThreadPaused.c
index d01be291d3..a916891aa8 100644
--- a/rts/ThreadPaused.c
+++ b/rts/ThreadPaused.c
@@ -149,7 +149,7 @@ stackSqueeze(Capability *cap, StgTSO *tso, StgPtr bottom)
// | ********* |
// -| ********* |
//
- // 'sp' points the the current top-of-stack
+ // 'sp' points the current top-of-stack
// 'gap' points to the stack_gap structure inside the gap
// ***** indicates real stack data
// ..... indicates gap
@@ -306,13 +306,6 @@ threadPaused(Capability *cap, StgTSO *tso)
continue;
}
- // We should never have made it here in the event of blackholes that
- // we already own; they should have been marked when we blackholed
- // them and consequently we should have stopped our stack walk
- // above.
- ASSERT(!((bh_info == &stg_BLACKHOLE_info)
- && (((StgInd*)bh)->indirectee == (StgClosure*)tso)));
-
// zero out the slop so that the sanity checker can tell
// where the next closure is.
OVERWRITING_CLOSURE(bh);
@@ -329,6 +322,10 @@ threadPaused(Capability *cap, StgTSO *tso)
if (cur_bh_info != bh_info) {
bh_info = cur_bh_info;
+#if defined(PROF_SPIN)
+ ++whitehole_threadPaused_spin;
+#endif
+ busy_wait_nop();
goto retry;
}
#endif
diff --git a/rts/ThreadPaused.h b/rts/ThreadPaused.h
index 4d762f9aed..ee25189c20 100644
--- a/rts/ThreadPaused.h
+++ b/rts/ThreadPaused.h
@@ -8,4 +8,12 @@
#pragma once
+#include "BeginPrivate.h"
+
RTS_PRIVATE void threadPaused ( Capability *cap, StgTSO * );
+
+#include "EndPrivate.h"
+
+#if defined(THREADED_RTS) && defined(PROF_SPIN)
+extern volatile StgWord64 whitehole_threadPaused_spin;
+#endif
diff --git a/rts/Threads.c b/rts/Threads.c
index c87551180b..977635322d 100644
--- a/rts/Threads.c
+++ b/rts/Threads.c
@@ -165,19 +165,8 @@ rts_getThreadId(StgPtr tso)
}
/* ---------------------------------------------------------------------------
- * Getting & setting the thread allocation limit
+ * Enabling and disabling the thread allocation limit
* ------------------------------------------------------------------------ */
-HsInt64 rts_getThreadAllocationCounter(StgPtr tso)
-{
- // NB. doesn't take into account allocation in the current nursery
- // block, so it might be off by up to 4k.
- return PK_Int64((W_*)&(((StgTSO *)tso)->alloc_limit));
-}
-
-void rts_setThreadAllocationCounter(StgPtr tso, HsInt64 i)
-{
- ASSIGN_Int64((W_*)&(((StgTSO *)tso)->alloc_limit), i);
-}
void rts_enableThreadAllocationLimit(StgPtr tso)
{
@@ -308,8 +297,11 @@ tryWakeupThread (Capability *cap, StgTSO *tso)
goto unblock;
}
- case BlockedOnBlackHole:
case BlockedOnSTM:
+ tso->block_info.closure = &stg_STM_AWOKEN_closure;
+ goto unblock;
+
+ case BlockedOnBlackHole:
case ThreadMigrating:
goto unblock;
@@ -379,12 +371,6 @@ wakeBlockingQueue(Capability *cap, StgBlockingQueue *bq)
// overwrite the BQ with an indirection so it will be
// collected at the next GC.
-#if defined(DEBUG) && !defined(THREADED_RTS)
- // XXX FILL_SLOP, but not if THREADED_RTS because in that case
- // another thread might be looking at this BLOCKING_QUEUE and
- // checking the owner field at the same time.
- bq->bh = 0; bq->queue = 0; bq->owner = 0;
-#endif
OVERWRITE_INFO(bq, &stg_IND_info);
}
@@ -446,7 +432,7 @@ updateThunk (Capability *cap, StgTSO *tso, StgClosure *thunk, StgClosure *val)
return;
}
- v = ((StgInd*)thunk)->indirectee;
+ v = UNTAG_CLOSURE(((StgInd*)thunk)->indirectee);
updateWithIndirection(cap, thunk, val);
@@ -640,8 +626,8 @@ threadStackOverflow (Capability *cap, StgTSO *tso)
// if including this frame would exceed the size of the
// new stack (taking into account the underflow frame),
// then stop at the previous frame.
- if (sp + size > old_stack->stack + (new_stack->stack_size -
- sizeofW(StgUnderflowFrame))) {
+ if (sp + size > old_stack->sp + (new_stack->stack_size -
+ sizeofW(StgUnderflowFrame))) {
break;
}
sp += size;
@@ -808,7 +794,7 @@ loop:
tryWakeupThread(cap, tso);
- // If it was an readMVar, then we can still do work,
+ // If it was a readMVar, then we can still do work,
// so loop back. (XXX: This could take a while)
if (why_blocked == BlockedOnMVarRead) {
q = ((StgMVarTSOQueue*)q)->link;
@@ -876,7 +862,7 @@ printThreadBlockage(StgTSO *tso)
debugBelch("is blocked on an STM operation");
break;
default:
- barf("printThreadBlockage: strange tso->why_blocked: %d for TSO %d (%d)",
+ barf("printThreadBlockage: strange tso->why_blocked: %d for TSO %d (%p)",
tso->why_blocked, tso->id, tso);
}
}
diff --git a/rts/TopHandler.c b/rts/TopHandler.c
index 8e868e6e92..c0ac936b85 100644
--- a/rts/TopHandler.c
+++ b/rts/TopHandler.c
@@ -1,5 +1,5 @@
#include "Rts.h"
-#include "Stable.h"
+#include "StablePtr.h"
#include "TopHandler.h"
#if defined(THREADED_RTS)
diff --git a/rts/TopHandler.h b/rts/TopHandler.h
index 1146eea71c..d724354d9a 100644
--- a/rts/TopHandler.h
+++ b/rts/TopHandler.h
@@ -13,7 +13,6 @@
#include <rts/Types.h>
#include <rts/storage/Closures.h>
#include <stg/Types.h>
-#include <rts/Stable.h>
// Initialize the top handler subsystem
void initTopHandler(void);
diff --git a/rts/Trace.c b/rts/Trace.c
index 71403f8a57..5b485c4d0f 100644
--- a/rts/Trace.c
+++ b/rts/Trace.c
@@ -130,6 +130,13 @@ void resetTracing (void)
}
}
+void flushTrace (void)
+{
+ if (eventlog_enabled) {
+ flushEventLog();
+ }
+}
+
void tracingAddCapapilities (uint32_t from, uint32_t to)
{
if (eventlog_enabled) {
@@ -739,6 +746,17 @@ void traceUserMsg(Capability *cap, char *msg)
dtraceUserMsg(cap->no, msg);
}
+void traceUserBinaryMsg(Capability *cap, uint8_t *msg, size_t size)
+{
+ /* Note: normally we don't check the TRACE_* flags here as they're checked
+ by the wrappers in Trace.h. But traceUserMsg is special since it has no
+ wrapper (it's called from cmm code), so we check TRACE_user here
+ */
+ if (eventlog_enabled && TRACE_user) {
+ postUserBinaryEvent(cap, EVENT_USER_BINARY_MSG, msg, size);
+ }
+}
+
void traceUserMarker(Capability *cap, char *markername)
{
/* Note: traceUserMarker is special since it has no wrapper (it's called
diff --git a/rts/Trace.h b/rts/Trace.h
index a72248ab30..74b960ce31 100644
--- a/rts/Trace.h
+++ b/rts/Trace.h
@@ -206,6 +206,11 @@ void traceUserMsg(Capability *cap, char *msg);
void traceUserMarker(Capability *cap, char *msg);
/*
+ * A binary message or event emitted by the program
+ */
+void traceUserBinaryMsg(Capability *cap, uint8_t *msg, size_t size);
+
+/*
* An event to record a Haskell thread's label/name
* Used by GHC.Conc.labelThread
*/
@@ -295,6 +300,8 @@ void traceHeapProfSampleCostCentre(StgWord8 profile_id,
CostCentreStack *stack, StgWord residency);
#endif /* PROFILING */
+void flushTrace(void);
+
#else /* !TRACING */
#define traceSchedEvent(cap, tag, tso, other) /* nothing */
@@ -331,6 +338,8 @@ void traceHeapProfSampleCostCentre(StgWord8 profile_id,
#define traceHeapProfSampleCostCentre(profile_id, stack, residency) /* nothing */
#define traceHeapProfSampleString(profile_id, label, residency) /* nothing */
+#define flushTrace() /* nothing */
+
#endif /* TRACING */
// If DTRACE is enabled, but neither DEBUG nor TRACING, we need a C land
diff --git a/rts/Updates.cmm b/rts/Updates.cmm
index b3b6b20ef3..9d00fb8efb 100644
--- a/rts/Updates.cmm
+++ b/rts/Updates.cmm
@@ -54,7 +54,7 @@ INFO_TABLE_RET ( stg_marked_upd_frame, UPDATE_FRAME,
// we know the closure is a BLACKHOLE
v = StgInd_indirectee(updatee);
- if (GETTAG(v) != 0) {
+ if (GETTAG(v) != 0) (likely: False) {
// updated by someone else: discard our value and use the
// other one to increase sharing, but check the blocking
// queues to see if any threads were waiting on this BLACKHOLE.
@@ -63,7 +63,7 @@ INFO_TABLE_RET ( stg_marked_upd_frame, UPDATE_FRAME,
}
// common case: it is still our BLACKHOLE
- if (v == CurrentTSO) {
+ if (v == CurrentTSO) (likely: True) {
updateWithIndirection(updatee, ret, return (ret));
}
diff --git a/rts/Weak.c b/rts/Weak.c
index f3e91fb31b..a322d822af 100644
--- a/rts/Weak.c
+++ b/rts/Weak.c
@@ -14,8 +14,15 @@
#include "Weak.h"
#include "Schedule.h"
#include "Prelude.h"
+#include "ThreadLabels.h"
#include "Trace.h"
+// List of dead weak pointers collected by the last GC
+static StgWeak *finalizer_list = NULL;
+
+// Count of the above list.
+static uint32_t n_finalizers = 0;
+
void
runCFinalizers(StgCFinalizerList *list)
{
@@ -83,15 +90,16 @@ scheduleFinalizers(Capability *cap, StgWeak *list)
StgMutArrPtrs *arr;
StgWord size;
uint32_t n, i;
- Task *task;
- task = myTask();
- if (task != NULL) {
- task->running_finalizers = true;
- }
+ ASSERT(n_finalizers == 0);
+
+ finalizer_list = list;
- // count number of finalizers, and kill all the weak pointers first...
+ // Traverse the list and
+ // * count the number of Haskell finalizers
+ // * overwrite all the weak pointers with DEAD_WEAK
n = 0;
+ i = 0;
for (w = list; w; w = w->link) {
// Better not be a DEAD_WEAK at this stage; the garbage
// collector removes DEAD_WEAKs from the weak pointer list.
@@ -101,7 +109,8 @@ scheduleFinalizers(Capability *cap, StgWeak *list)
n++;
}
- runCFinalizers((StgCFinalizerList *)w->cfinalizers);
+ // Remember the length of the list, for runSomeFinalizers() below
+ i++;
#if defined(PROFILING)
// A weak pointer is inherently used, so we do not need to call
@@ -112,14 +121,16 @@ scheduleFinalizers(Capability *cap, StgWeak *list)
// no need to fill the slop, either. See stg_DEAD_WEAK_info
// in StgMiscClosures.cmm.
#endif
+
+ // We must overwrite the header with DEAD_WEAK, so that if
+ // there's a later call to finalizeWeak# on this weak pointer,
+ // we don't run the finalizer again.
SET_HDR(w, &stg_DEAD_WEAK_info, w->header.prof.ccs);
}
- if (task != NULL) {
- task->running_finalizers = false;
- }
+ n_finalizers = i;
- // No finalizers to run?
+ // No Haskell finalizers to run?
if (n == 0) return;
debugTrace(DEBUG_weak, "weak: batching %d finalizers", n);
@@ -127,7 +138,7 @@ scheduleFinalizers(Capability *cap, StgWeak *list)
size = n + mutArrPtrsCardTableSize(n);
arr = (StgMutArrPtrs *)allocate(cap, sizeofW(StgMutArrPtrs) + size);
TICK_ALLOC_PRIM(sizeofW(StgMutArrPtrs), n, 0);
- SET_HDR(arr, &stg_MUT_ARR_PTRS_FROZEN_info, CCS_SYSTEM);
+ SET_HDR(arr, &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info, CCS_SYSTEM);
arr->ptrs = n;
arr->size = size;
@@ -151,5 +162,94 @@ scheduleFinalizers(Capability *cap, StgWeak *list)
rts_mkInt(cap,n)),
(StgClosure *)arr)
);
+
scheduleThread(cap,t);
+ labelThread(cap, t, "weak finalizer thread");
+}
+
+/* -----------------------------------------------------------------------------
+ Incrementally running C finalizers
+
+ The GC detects all the dead finalizers, but we don't want to run
+ them during the GC because that increases the time that the runtime
+ is paused.
+
+ What options are there?
+
+ 1. Parallelise running the C finalizers across the GC threads
+ - doesn't solve the pause problem, just reduces it (maybe by a lot)
+
+ 2. Make a Haskell thread to run the C finalizers, like we do for
+ Haskell finalizers.
+ + scheduling is handled for us
+ - no guarantee that we'll process finalizers in a timely manner
+
+ 3. Run finalizers when any capability is idle.
+ + reduces pause to 0
+ - requires scheduler modifications
+ - if the runtime is busy, finalizers wait until the next GC
+
+ 4. like (3), but also run finalizers incrementally between GCs.
+ - reduces the delay to run finalizers compared with (3)
+
+ For now we do (3). It would be easy to do (4) later by adding a
+ call to doIdleGCWork() in the scheduler loop, but I haven't found
+ that necessary so far.
+
+ -------------------------------------------------------------------------- */
+
+// Run this many finalizers before returning from
+// runSomeFinalizers(). This is so that we only tie up the capability
+// for a short time, and respond quickly if new work becomes
+// available.
+static const int32_t finalizer_chunk = 100;
+
+// non-zero if a thread is already in runSomeFinalizers(). This
+// protects the globals finalizer_list and n_finalizers.
+static volatile StgWord finalizer_lock = 0;
+
+//
+// Run some C finalizers. Returns true if there's more work to do.
+//
+bool runSomeFinalizers(bool all)
+{
+ if (n_finalizers == 0)
+ return false;
+
+ if (cas(&finalizer_lock, 0, 1) != 0) {
+ // another capability is doing the work, it's safe to say
+ // there's nothing to do, because the thread already in
+ // runSomeFinalizers() will call in again.
+ return false;
+ }
+
+ debugTrace(DEBUG_sched, "running C finalizers, %d remaining", n_finalizers);
+
+ Task *task = myTask();
+ if (task != NULL) {
+ task->running_finalizers = true;
+ }
+
+ StgWeak *w = finalizer_list;
+ int32_t count = 0;
+ while (w != NULL) {
+ runCFinalizers((StgCFinalizerList *)w->cfinalizers);
+ w = w->link;
+ ++count;
+ if (!all && count >= finalizer_chunk) break;
+ }
+
+ finalizer_list = w;
+ n_finalizers -= count;
+
+ if (task != NULL) {
+ task->running_finalizers = false;
+ }
+
+ debugTrace(DEBUG_sched, "ran %d C finalizers", count);
+
+ write_barrier();
+ finalizer_lock = 0;
+
+ return n_finalizers != 0;
}
diff --git a/rts/Weak.h b/rts/Weak.h
index ab335424db..fb67981497 100644
--- a/rts/Weak.h
+++ b/rts/Weak.h
@@ -19,5 +19,6 @@ void runCFinalizers(StgCFinalizerList *list);
void runAllCFinalizers(StgWeak *w);
void scheduleFinalizers(Capability *cap, StgWeak *w);
void markWeakList(void);
+bool runSomeFinalizers(bool all);
#include "EndPrivate.h"
diff --git a/rts/eventlog/EventLog.c b/rts/eventlog/EventLog.c
index ed297b879e..ff79425a4c 100644
--- a/rts/eventlog/EventLog.c
+++ b/rts/eventlog/EventLog.c
@@ -105,6 +105,7 @@ char *EventDesc[] = {
[EVENT_HEAP_PROF_SAMPLE_BEGIN] = "Start of heap profile sample",
[EVENT_HEAP_PROF_SAMPLE_STRING] = "Heap profile string sample",
[EVENT_HEAP_PROF_SAMPLE_COST_CENTRE] = "Heap profile cost-centre sample",
+ [EVENT_USER_BINARY_MSG] = "User binary message"
};
// Event type.
@@ -264,38 +265,9 @@ flushEventLog(void)
}
}
-void
-initEventLogging(const EventLogWriter *ev_writer)
+static void
+postHeaderEvents(void)
{
- uint32_t n_caps;
-
- event_log_writer = ev_writer;
- initEventLogWriter();
-
- if (sizeof(EventDesc) / sizeof(char*) != NUM_GHC_EVENT_TAGS) {
- barf("EventDesc array has the wrong number of elements");
- }
-
- /*
- * Allocate buffer(s) to store events.
- * Create buffer large enough for the header begin marker, all event
- * types, and header end marker to prevent checking if buffer has room
- * for each of these steps, and remove the need to flush the buffer to
- * disk during initialization.
- *
- * Use a single buffer to store the header with event types, then flush
- * the buffer so all buffers are empty for writing events.
- */
-#if defined(THREADED_RTS)
- // XXX n_capabilities hasn't been initialized yet
- n_caps = RtsFlags.ParFlags.nCapabilities;
-#else
- n_caps = 1;
-#endif
- moreCapEventBufs(0, n_caps);
-
- initEventsBuf(&eventBuf, EVENT_LOG_SIZE, (EventCapNo)(-1));
-
// Write in buffer: the header begin marker.
postInt32(&eventBuf, EVENT_HEADER_BEGIN);
@@ -466,6 +438,10 @@ initEventLogging(const EventLogWriter *ev_writer)
eventTypes[t].size = EVENT_SIZE_DYNAMIC;
break;
+ case EVENT_USER_BINARY_MSG:
+ eventTypes[t].size = EVENT_SIZE_DYNAMIC;
+ break;
+
default:
continue; /* ignore deprecated events */
}
@@ -482,6 +458,44 @@ initEventLogging(const EventLogWriter *ev_writer)
// Prepare event buffer for events (data).
postInt32(&eventBuf, EVENT_DATA_BEGIN);
+}
+
+void
+initEventLogging(const EventLogWriter *ev_writer)
+{
+ uint32_t n_caps;
+
+ event_log_writer = ev_writer;
+ initEventLogWriter();
+
+ if (sizeof(EventDesc) / sizeof(char*) != NUM_GHC_EVENT_TAGS) {
+ barf("EventDesc array has the wrong number of elements");
+ }
+
+ /*
+ * Allocate buffer(s) to store events.
+ * Create buffer large enough for the header begin marker, all event
+ * types, and header end marker to prevent checking if buffer has room
+ * for each of these steps, and remove the need to flush the buffer to
+ * disk during initialization.
+ *
+ * Use a single buffer to store the header with event types, then flush
+ * the buffer so all buffers are empty for writing events.
+ */
+#if defined(THREADED_RTS)
+ // XXX n_capabilities hasn't been initialized yet
+ n_caps = RtsFlags.ParFlags.nCapabilities;
+#else
+ n_caps = 1;
+#endif
+ moreCapEventBufs(0, n_caps);
+
+ initEventsBuf(&eventBuf, EVENT_LOG_SIZE, (EventCapNo)(-1));
+#if defined(THREADED_RTS)
+ initMutex(&eventBufMutex);
+#endif
+
+ postHeaderEvents();
// Flush capEventBuf with header.
/*
@@ -493,10 +507,6 @@ initEventLogging(const EventLogWriter *ev_writer)
for (uint32_t c = 0; c < n_caps; ++c) {
postBlockMarker(&capEventBuf[c]);
}
-
-#if defined(THREADED_RTS)
- initMutex(&eventBufMutex);
-#endif
}
void
@@ -745,6 +755,10 @@ void postCapsetStrEvent (EventTypeNum tag,
{
int strsize = strlen(msg);
int size = strsize + sizeof(EventCapsetID);
+ if (size > EVENT_PAYLOAD_SIZE_MAX) {
+ errorBelch("Event size exceeds EVENT_PAYLOAD_SIZE_MAX, bail out");
+ return;
+ }
ACQUIRE_LOCK(&eventBufMutex);
@@ -752,7 +766,7 @@ void postCapsetStrEvent (EventTypeNum tag,
printAndClearEventBuf(&eventBuf);
if (!hasRoomForVariableEvent(&eventBuf, size)){
- // Event size exceeds buffer size, bail out:
+ errorBelch("Event size exceeds buffer size, bail out");
RELEASE_LOCK(&eventBufMutex);
return;
}
@@ -785,7 +799,7 @@ void postCapsetVecEvent (EventTypeNum tag,
printAndClearEventBuf(&eventBuf);
if(!hasRoomForVariableEvent(&eventBuf, size)){
- // Event size exceeds buffer size, bail out:
+ errorBelch("Event size exceeds buffer size, bail out");
RELEASE_LOCK(&eventBufMutex);
return;
}
@@ -1024,14 +1038,43 @@ void postCapMsg(Capability *cap, char *msg, va_list ap)
void postUserEvent(Capability *cap, EventTypeNum type, char *msg)
{
- const int size = strlen(msg);
+ const size_t size = strlen(msg);
+ if (size > EVENT_PAYLOAD_SIZE_MAX) {
+ errorBelch("Event size exceeds EVENT_PAYLOAD_SIZE_MAX, bail out");
+ return;
+ }
+
EventsBuf *eb = &capEventBuf[cap->no];
+ if (!hasRoomForVariableEvent(eb, size)){
+ printAndClearEventBuf(eb);
+
+ if (!hasRoomForVariableEvent(eb, size)){
+ errorBelch("Event size exceeds buffer size, bail out");
+ return;
+ }
+ }
+
+ postEventHeader(eb, type);
+ postPayloadSize(eb, size);
+ postBuf(eb, (StgWord8*) msg, size);
+}
+
+void postUserBinaryEvent(Capability *cap,
+ EventTypeNum type,
+ uint8_t *msg,
+ size_t size)
+{
+ if (size > EVENT_PAYLOAD_SIZE_MAX) {
+ errorBelch("Event size exceeds EVENT_PAYLOAD_SIZE_MAX, bail out");
+ return;
+ }
+ EventsBuf *eb = &capEventBuf[cap->no];
if (!hasRoomForVariableEvent(eb, size)){
printAndClearEventBuf(eb);
if (!hasRoomForVariableEvent(eb, size)){
- // Event size exceeds buffer size, bail out:
+ errorBelch("Event size exceeds buffer size, bail out");
return;
}
}
@@ -1047,13 +1090,17 @@ void postThreadLabel(Capability *cap,
{
const int strsize = strlen(label);
const int size = strsize + sizeof(EventThreadID);
- EventsBuf *eb = &capEventBuf[cap->no];
+ if (size > EVENT_PAYLOAD_SIZE_MAX) {
+ errorBelch("Event size exceeds EVENT_PAYLOAD_SIZE_MAX, bail out");
+ return;
+ }
+ EventsBuf *eb = &capEventBuf[cap->no];
if (!hasRoomForVariableEvent(eb, size)){
printAndClearEventBuf(eb);
if (!hasRoomForVariableEvent(eb, size)){
- // Event size exceeds buffer size, bail out:
+ errorBelch("Event size exceeds buffer size, bail out");
return;
}
}
@@ -1094,15 +1141,6 @@ void postBlockMarker (EventsBuf *eb)
postCapNo(eb, eb->capno);
}
-typedef enum {
- HEAP_PROF_BREAKDOWN_COST_CENTRE = 0x1,
- HEAP_PROF_BREAKDOWN_MODULE,
- HEAP_PROF_BREAKDOWN_CLOSURE_DESCR,
- HEAP_PROF_BREAKDOWN_TYPE_DESCR,
- HEAP_PROF_BREAKDOWN_RETAINER,
- HEAP_PROF_BREAKDOWN_BIOGRAPHY,
-} HeapProfBreakdown;
-
static HeapProfBreakdown getHeapProfBreakdown(void)
{
switch (RtsFlags.ProfFlags.doHeapProfile) {
@@ -1118,6 +1156,8 @@ static HeapProfBreakdown getHeapProfBreakdown(void)
return HEAP_PROF_BREAKDOWN_RETAINER;
case HEAP_BY_LDV:
return HEAP_PROF_BREAKDOWN_BIOGRAPHY;
+ case HEAP_BY_CLOSURE_TYPE:
+ return HEAP_PROF_BREAKDOWN_CLOSURE_TYPE;
default:
barf("getHeapProfBreakdown: unknown heap profiling mode");
}
diff --git a/rts/eventlog/EventLog.h b/rts/eventlog/EventLog.h
index eae11ede45..1fb7c4a071 100644
--- a/rts/eventlog/EventLog.h
+++ b/rts/eventlog/EventLog.h
@@ -47,6 +47,9 @@ void postMsg(char *msg, va_list ap);
void postUserEvent(Capability *cap, EventTypeNum type, char *msg);
+void postUserBinaryEvent(Capability *cap, EventTypeNum type,
+ uint8_t *msg, size_t size);
+
void postCapMsg(Capability *cap, char *msg, va_list ap);
/*
diff --git a/rts/eventlog/EventLogWriter.c b/rts/eventlog/EventLogWriter.c
index d8e5a44192..e6f560fc24 100644
--- a/rts/eventlog/EventLogWriter.c
+++ b/rts/eventlog/EventLogWriter.c
@@ -14,6 +14,7 @@
#include <string.h>
#include <stdio.h>
+#include <fs_rts.h>
#if defined(HAVE_SYS_TYPES_H)
#include <sys/types.h>
#endif
@@ -71,7 +72,7 @@ initEventLogFileWriter(void)
stgFree(prog);
/* Open event log file for writing. */
- if ((event_log_file = fopen(event_log_filename, "wb")) == NULL) {
+ if ((event_log_file = __rts_fopen(event_log_filename, "wb")) == NULL) {
sysErrorBelch(
"initEventLogFileWriter: can't open %s", event_log_filename);
stg_exit(EXIT_FAILURE);
diff --git a/rts/fs_rts.h b/rts/fs_rts.h
new file mode 100644
index 0000000000..12c27ccc8c
--- /dev/null
+++ b/rts/fs_rts.h
@@ -0,0 +1,15 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) Tamar Christina 2018
+ *
+ * Hack to get around linkewhole issues on linux. The FS utilities need to be in
+ * a different namespace to allow the linking.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#undef FS_NAMESPACE
+#define FS_NAMESPACE rts
+
+#include "fs.h"
diff --git a/rts/ghc.mk b/rts/ghc.mk
index 990f4db4e2..ff3f18f30c 100644
--- a/rts/ghc.mk
+++ b/rts/ghc.mk
@@ -46,6 +46,7 @@ ALL_DIRS += posix
endif
rts_C_SRCS := $(wildcard rts/*.c $(foreach dir,$(ALL_DIRS),rts/$(dir)/*.c))
+rts_C_HOOK_SRCS := $(wildcard rts/hooks/*.c)
rts_CMM_SRCS := $(wildcard rts/*.cmm)
# Don't compile .S files when bootstrapping a new arch
@@ -134,6 +135,13 @@ endif
endif
endif
+
+ifeq "$(USE_DTRACE)" "YES"
+ifneq "$(findstring $(TargetOS_CPP), linux solaris2 freebsd)" ""
+NEED_DTRACE_PROBES_OBJ = YES
+endif
+endif
+
#-----------------------------------------------------------------------------
# Building one way
define build-rts-way # args: $1 = way
@@ -146,7 +154,10 @@ rts_dist_$1_CC_OPTS := $$(GhcRtsCcOpts)
# The per-way CC_OPTS
ifneq "$$(findstring debug, $1)" ""
rts_dist_$1_HC_OPTS += -O0
-rts_dist_$1_CC_OPTS += -fno-omit-frame-pointer -g -O0
+rts_dist_$1_CC_OPTS += -fno-omit-frame-pointer -O0 -g3
+# Useful to ensure that inline functions can be called within GDB but not
+# supported by clang
+#rts_dist_$1_CC_OPTS += -fkeep-inline-functions
endif
ifneq "$$(findstring dyn, $1)" ""
@@ -164,20 +175,21 @@ $(call cmm-suffix-rules,rts,dist,$1)
rts_$1_LIB_FILE = libHSrts$$($1_libsuf)
rts_$1_LIB = rts/dist/build/$$(rts_$1_LIB_FILE)
-rts_$1_C_OBJS = $$(patsubst rts/%.c,rts/dist/build/%.$$($1_osuf),$$(rts_C_SRCS)) $$(patsubst %.c,%.$$($1_osuf),$$(rts_$1_EXTRA_C_SRCS))
-rts_$1_S_OBJS = $$(patsubst rts/%.S,rts/dist/build/%.$$($1_osuf),$$(rts_S_SRCS))
-rts_$1_CMM_OBJS = $$(patsubst rts/%.cmm,rts/dist/build/%.$$($1_osuf),$$(rts_CMM_SRCS)) $$(patsubst %.cmm,%.$$($1_osuf),$$(rts_AUTO_APPLY_CMM))
+rts_$1_C_OBJS = $$(patsubst rts/%.c,rts/dist/build/%.$$($1_osuf),$$(rts_C_SRCS)) $$(patsubst %.c,%.$$($1_osuf),$$(rts_$1_EXTRA_C_SRCS))
+rts_$1_C_HOOK_OBJS = $$(patsubst rts/hooks/%.c,rts/dist/build/hooks/%.$$($1_osuf),$$(rts_C_HOOK_SRCS))
+rts_$1_S_OBJS = $$(patsubst rts/%.S,rts/dist/build/%.$$($1_osuf),$$(rts_S_SRCS))
+rts_$1_CMM_OBJS = $$(patsubst rts/%.cmm,rts/dist/build/%.$$($1_osuf),$$(rts_CMM_SRCS)) $$(patsubst %.cmm,%.$$($1_osuf),$$(rts_AUTO_APPLY_CMM))
rts_$1_OBJS = $$(rts_$1_C_OBJS) $$(rts_$1_S_OBJS) $$(rts_$1_CMM_OBJS)
ifeq "$(USE_DTRACE)" "YES"
-ifeq "$(TargetOS_CPP)" "solaris2"
+ifeq "$(NEED_DTRACE_PROBES_OBJ)" "YES"
# On Darwin we don't need to generate binary containing probes defined
# in DTrace script, but DTrace on Solaris expects generation of binary
# from the DTrace probes definitions
rts_$1_DTRACE_OBJS = rts/dist/build/RtsProbes.$$($1_osuf)
-rts/dist/build/RtsProbes.$$($1_osuf) : $$(rts_$1_OBJS)
+$$(rts_$1_DTRACE_OBJS) : $$(rts_$1_OBJS)
$(DTRACE) -G -C $$(addprefix -I,$$(GHC_INCLUDE_DIRS)) -DDTRACE -s rts/RtsProbes.d -o \
$$@ $$(rts_$1_OBJS)
endif
@@ -203,11 +215,25 @@ ifneq "$$(findstring dyn, $1)" ""
ifeq "$$(TargetOS_CPP)" "mingw32"
$$(rts_$1_LIB) : $$(rts_$1_OBJS) $$(ALL_RTS_DEF_LIBS) rts/dist/libs.depend rts/dist/build/$$(LIBFFI_DLL)
"$$(RM)" $$(RM_OPTS) $$@
- "$$(rts_dist_HC)" -this-unit-id rts -shared -dynamic -dynload deploy \
- -no-auto-link-packages -Lrts/dist/build -l$$(LIBFFI_NAME) \
- `cat rts/dist/libs.depend` $$(rts_$1_OBJS) $$(ALL_RTS_DEF_LIBS) \
- $$(rts_dist_$1_GHC_LD_OPTS) \
- -o $$@
+ # Call out to the shell script to decide how to build the dll.
+ # Making a shared library for the RTS.
+ # $$1 = dir
+ # $$2 = distdir
+ # $$3 = way
+ # $$4 = extra flags
+ # $$5 = extra libraries to link
+ # $$6 = object files to link
+ # $$7 = output filename
+ # $$8 = link command
+ # $$9 = create delay load import lib
+ # $$10 = SxS Name
+ # $$11 = SxS Version
+ $$(gen-dll_INPLACE) link "rts/dist/build" "rts/dist/build" "" "" "$$(ALL_RTS_DEF_LIBS)" "$$(rts_$1_OBJS)" "$$@" "$$(rts_dist_HC) -this-unit-id rts -no-hs-main -shared -dynamic -dynload deploy \
+ -no-auto-link-packages -Lrts/dist/build -l$$(LIBFFI_NAME) \
+ `cat rts/dist/libs.depend | tr '\n' ' '` \
+ $$(rts_dist_$1_GHC_LD_OPTS)" "NO" \
+ "$(rts_INSTALL_INFO)-$(subst dyn,,$(subst _dyn,,$(subst v,,$1)))" "$(ProjectVersion)"
+
else
ifneq "$$(UseSystemLibFFI)" "YES"
LIBFFI_LIBS = -Lrts/dist/build -l$$(LIBFFI_NAME)
@@ -230,9 +256,50 @@ $$(rts_$1_LIB) : $$(rts_$1_OBJS) $$(rts_$1_DTRACE_OBJS) rts/dist/libs.depend $$(
$$(rts_$1_DTRACE_OBJS) -o $$@
endif
else
-$$(rts_$1_LIB) : $$(rts_$1_OBJS) $$(rts_$1_DTRACE_OBJS)
+
+ifeq "$(USE_DTRACE)" "YES"
+ifeq "$(NEED_DTRACE_PROBES_OBJ)" "YES"
+# A list of objects that do not get included in the RTS object that is created
+# during the linking step. To prevent future linking errors, especially when
+# using the compiler as a bootstrap compiler, we need to exclude the hook
+# objects from being re-linked into the single LINKED_OBJS object file. When the
+# hooks are being linked into the RTS object this will result in duplicated
+# symbols causing the linker to fail (e.g. `StackOverflowHook` in RTS.o and
+# hschooks.o). The excluded objects do not get relinked into the RTS object but
+# get included separately so prevent linker errors.
+# (see issue #15040)
+rts_$1_EXCLUDED_OBJS = $$(rts_$1_C_HOOK_OBJS)
+# The RTS object that gets generated to package up all of the runtime system
+# with the dtrace probe code.
+rts_$1_LINKED_OBJS = rts/dist/build/RTS.$$($1_osuf)
+
+$$(rts_$1_LINKED_OBJS) : $$(rts_$1_OBJS) $$(rts_$1_DTRACE_OBJS) $$(rts_$1_C_HOOK_OBJS)
"$$(RM)" $$(RM_OPTS) $$@
- echo $$(rts_$1_OBJS) $$(rts_$1_DTRACE_OBJS) | "$$(XARGS)" $$(XARGS_OPTS) "$$(AR_STAGE1)" \
+
+ # When linking an archive the linker will only include the object files that
+ # are actually needed during linking. It therefore does not include the dtrace
+ # specific code for initializing the probes. By creating a single object that
+ # also includes the probe object code we force the linker to include the
+ # probes when linking the static runtime.
+ #
+ # The reason why we are re-linking all the objects into a single object file
+ # is stated in this thread:
+ # https://thr3ads.net/dtrace-discuss/2005/08/384778-Problem-with-probes-defined-in-static-libraries
+ $(LD) -r -o $$(rts_$1_LINKED_OBJS) $$(rts_$1_DTRACE_OBJS) $$(filter-out $$(rts_$1_EXCLUDED_OBJS), $$(rts_$1_OBJS))
+else
+rts_$1_EXCLUDED_OBJS =
+rts_$1_LINKED_OBJS = $$(rts_$1_OBJS)
+endif
+else
+rts_$1_EXCLUDED_OBJS =
+rts_$1_LINKED_OBJS = $$(rts_$1_OBJS)
+endif
+
+
+$$(rts_$1_LIB) : $$(rts_$1_LINKED_OBJS)
+ "$$(RM)" $$(RM_OPTS) $$@
+
+ echo $$(rts_$1_LINKED_OBJS) $$(rts_$1_EXCLUDED_OBJS) | "$$(XARGS)" $$(XARGS_OPTS) "$$(AR_STAGE1)" \
$$(AR_OPTS_STAGE1) $$(EXTRA_AR_ARGS_STAGE1) $$@
ifneq "$$(UseSystemLibFFI)" "YES"
@@ -257,16 +324,11 @@ $(eval $(call distdir-opts,rts,dist,1))
# We like plenty of warnings.
WARNING_OPTS += -Wall
-ifeq "$(GccLT34)" "YES"
-WARNING_OPTS += -W
-else
WARNING_OPTS += -Wextra
-endif
WARNING_OPTS += -Wstrict-prototypes
WARNING_OPTS += -Wmissing-prototypes
WARNING_OPTS += -Wmissing-declarations
WARNING_OPTS += -Winline
-WARNING_OPTS += -Waggregate-return
WARNING_OPTS += -Wpointer-arith
WARNING_OPTS += -Wmissing-noreturn
WARNING_OPTS += -Wnested-externs
@@ -292,7 +354,7 @@ endif
STANDARD_OPTS += $(addprefix -I,$(GHC_INCLUDE_DIRS)) -Irts -Irts/dist/build
# COMPILING_RTS is only used when building Win32 DLL support.
-STANDARD_OPTS += -DCOMPILING_RTS
+STANDARD_OPTS += -DCOMPILING_RTS -DFS_NAMESPACE=rts
# HC_OPTS is included in both .c and .cmm compilations, whereas CC_OPTS is
# only included in .c compilations. HC_OPTS included the WAY_* opts, which
@@ -357,6 +419,8 @@ rts/RtsUtils_CC_OPTS += -DTargetVendor=\"$(TargetVendor_CPP)\"
#
rts/RtsUtils_CC_OPTS += -DGhcUnregisterised=\"$(GhcUnregisterised)\"
rts/RtsUtils_CC_OPTS += -DGhcEnableTablesNextToCode=\"$(GhcEnableTablesNextToCode)\"
+#
+rts/xxhash_CC_OPTS += -O3 -ffast-math -ftree-vectorize
# Compile various performance-critical pieces *without* -fPIC -dynamic
# even when building a shared library. If we don't do this, then the
@@ -417,15 +481,22 @@ endif
endif
# add CFLAGS for libffi
-# ffi.h triggers prototype warnings, so disable them here:
ifeq "$(UseSystemLibFFI)" "YES"
LIBFFI_CFLAGS = $(addprefix -I,$(FFIIncludeDir))
else
LIBFFI_CFLAGS =
endif
+# ffi.h triggers prototype warnings, so disable them here:
rts/Interpreter_CC_OPTS += -Wno-strict-prototypes $(LIBFFI_CFLAGS)
rts/Adjustor_CC_OPTS += -Wno-strict-prototypes $(LIBFFI_CFLAGS)
rts/sm/Storage_CC_OPTS += -Wno-strict-prototypes $(LIBFFI_CFLAGS)
+# ffi.h triggers undefined macro warnings on PowerPC, disable those:
+# this matches substrings of powerpc64le, including "powerpc" and "powerpc64"
+ifneq "$(findstring $(TargetArch_CPP), powerpc64le)" ""
+rts/Interpreter_CC_OPTS += -Wno-undef
+rts/Adjustor_CC_OPTS += -Wno-undef
+rts/sm/Storage_CC_OPTS += -Wno-undef
+endif
# inlining warnings happen in Compact
rts/sm/Compact_CC_OPTS += -Wno-inline
@@ -434,7 +505,6 @@ rts/sm/Compact_CC_OPTS += -Wno-inline
rts/StgCRun_CC_OPTS += -w
rts/RetainerProfile_CC_OPTS += -w
-rts/RetainerSet_CC_OPTS += -Wno-format
# On Windows:
rts/win32/ConsoleHandler_CC_OPTS += -w
rts/win32/ThrIOManager_CC_OPTS += -w
diff --git a/rts/hooks/Hooks.h b/rts/hooks/Hooks.h
index 760e1daefc..24500f19e7 100644
--- a/rts/hooks/Hooks.h
+++ b/rts/hooks/Hooks.h
@@ -22,5 +22,7 @@ extern void StackOverflowHook (W_ stack_size);
extern void OutOfHeapHook (W_ request_size, W_ heap_size);
extern void MallocFailHook (W_ request_size /* in bytes */, const char *msg);
extern void FlagDefaultsHook (void);
+extern void LongGCSync (uint32_t capno, Time t);
+extern void LongGCSyncEnd (Time t);
#include "EndPrivate.h"
diff --git a/rts/hooks/LongGCSync.c b/rts/hooks/LongGCSync.c
new file mode 100644
index 0000000000..351406df98
--- /dev/null
+++ b/rts/hooks/LongGCSync.c
@@ -0,0 +1,41 @@
+/* -----------------------------------------------------------------------------
+ *
+ * User-overridable RTS hooks.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "PosixSource.h"
+#include "Rts.h"
+#include "sm/GC.h"
+#include "sm/GCThread.h"
+#include "Hooks.h"
+
+/*
+ * Called when --long-gc-sync=<time> has expired during a GC sync. The idea is
+ * that you can set a breakpoint on this function in gdb and try to determine
+ * which thread was holding up the GC sync.
+ */
+void LongGCSync (uint32_t me USED_IF_THREADS, Time t STG_UNUSED)
+{
+#if defined(THREADED_RTS)
+ {
+ uint32_t i;
+ for (i=0; i < n_capabilities; i++) {
+ if (i != me && gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
+ debugBelch("Warning: slow GC sync: still waiting for cap %d\n",
+ i);
+ }
+ }
+ }
+#endif
+}
+
+/*
+ * Called at the end of a GC sync which was longer than --long-gc-sync=<time>.
+ * The idea is that you can use this function to log stats about the length of
+ * GC syncs.
+ */
+void LongGCSyncEnd (Time t)
+{
+ debugBelch("Warning: waited %" FMT_Word64 "us for GC sync\n", TimeToUS(t));
+}
diff --git a/rts/linker/Elf.c b/rts/linker/Elf.c
index e81b97acfa..f2fd88f750 100644
--- a/rts/linker/Elf.c
+++ b/rts/linker/Elf.c
@@ -439,7 +439,7 @@ ocVerifyImage_ELF ( ObjectCode* oc )
if (!SECTION_INDEX_VALID(shdr[i].sh_link)) {
if (shdr[i].sh_link == SHN_UNDEF)
errorBelch("\n%s: relocation section #%d has no symbol table\n"
- "This object file has probably been fully striped. "
+ "This object file has probably been fully stripped. "
"Such files cannot be linked.\n",
oc->archiveMemberName ? oc->archiveMemberName : oc->fileName, i);
else
@@ -1097,6 +1097,7 @@ do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC,
switch (reloc_type) {
# ifdef i386_HOST_ARCH
+ case COMPAT_R_386_NONE: break;
case COMPAT_R_386_32: *pP = value; break;
case COMPAT_R_386_PC32: *pP = value - P; break;
# endif
@@ -1571,6 +1572,9 @@ do_Elf_Rela_relocations ( ObjectCode* oc, char* ehdrC,
# endif
#if defined(x86_64_HOST_ARCH)
+ case COMPAT_R_X86_64_NONE:
+ break;
+
case COMPAT_R_X86_64_64:
*(Elf64_Xword *)P = value;
break;
@@ -1708,15 +1712,13 @@ do_Elf_Rela_relocations ( ObjectCode* oc, char* ehdrC,
int
ocResolve_ELF ( ObjectCode* oc )
{
- int ok;
- Elf_Word i;
char* ehdrC = (char*)(oc->image);
Elf_Ehdr* ehdr = (Elf_Ehdr*) ehdrC;
Elf_Shdr* shdr = (Elf_Shdr*) (ehdrC + ehdr->e_shoff);
const Elf_Word shnum = elf_shnum(ehdr);
#if defined(SHN_XINDEX)
- Elf_Word* shndxTable = get_shndx_table(ehdr);
+ Elf_Word* shndxTable = get_shndx_table(ehdr);
#endif
/* resolve section symbols
@@ -1749,9 +1751,9 @@ ocResolve_ELF ( ObjectCode* oc )
Elf_Word secno = symbol->elf_sym->st_shndx;
#if defined(SHN_XINDEX)
if (secno == SHN_XINDEX) {
- ASSERT(shndxTable);
- secno = shndxTable[i];
- }
+ ASSERT(shndxTable);
+ secno = shndxTable[i];
+ }
#endif
ASSERT(symbol->elf_sym->st_name == 0);
ASSERT(symbol->elf_sym->st_value == 0);
@@ -1763,6 +1765,9 @@ ocResolve_ELF ( ObjectCode* oc )
#if defined(NEED_GOT)
if(fillGot( oc ))
return 0;
+ /* silence warnings */
+ (void) shnum;
+ (void) shdr;
#endif /* NEED_GOT */
#if defined(aarch64_HOST_ARCH)
@@ -1770,27 +1775,27 @@ ocResolve_ELF ( ObjectCode* oc )
if(relocateObjectCode( oc ))
return 0;
#else
- /* Process the relocation sections. */
- for (i = 0; i < shnum; i++) {
- if (shdr[i].sh_type == SHT_REL) {
- ok = do_Elf_Rel_relocations ( oc, ehdrC, shdr, i );
- if (!ok)
- return ok;
- }
- else
- if (shdr[i].sh_type == SHT_RELA) {
- ok = do_Elf_Rela_relocations ( oc, ehdrC, shdr, i );
- if (!ok)
- return ok;
- }
- }
+ /* Process the relocation sections. */
+ for (Elf_Word i = 0; i < shnum; i++) {
+ if (shdr[i].sh_type == SHT_REL) {
+ bool ok = do_Elf_Rel_relocations ( oc, ehdrC, shdr, i );
+ if (!ok)
+ return ok;
+ }
+ else
+ if (shdr[i].sh_type == SHT_RELA) {
+ bool ok = do_Elf_Rela_relocations ( oc, ehdrC, shdr, i );
+ if (!ok)
+ return ok;
+ }
+ }
#endif
#if defined(powerpc_HOST_ARCH)
- ocFlushInstructionCache( oc );
+ ocFlushInstructionCache( oc );
#endif
- return 1;
+ return 1;
}
int ocRunInit_ELF( ObjectCode *oc )
diff --git a/rts/linker/LoadArchive.c b/rts/linker/LoadArchive.c
index 3c4bd44a28..8c32585681 100644
--- a/rts/linker/LoadArchive.c
+++ b/rts/linker/LoadArchive.c
@@ -22,6 +22,7 @@
#include <string.h>
#include <stddef.h>
#include <ctype.h>
+#include <fs_rts.h>
#define FAIL(...) do {\
errorBelch("loadArchive: "__VA_ARGS__); \
@@ -260,7 +261,7 @@ static HsInt loadArchive_ (pathchar *path)
int misalignment = 0;
DEBUG_LOG("start\n");
- DEBUG_LOG("Loading archive `%" PATH_FMT" '\n", path);
+ DEBUG_LOG("Loading archive `%" PATH_FMT "'\n", path);
/* Check that we haven't already loaded this archive.
Ignore requests to load multiple times */
@@ -489,12 +490,7 @@ static HsInt loadArchive_ (pathchar *path)
DEBUG_LOG("Member is an object file...loading...\n");
-#if defined(mingw32_HOST_OS)
- // TODO: We would like to use allocateExec here, but allocateExec
- // cannot currently allocate blocks large enough.
- image = allocateImageAndTrampolines(path, fileName, f, memberSize,
- isThin);
-#elif defined(darwin_HOST_OS) || defined(ios_HOST_OS)
+#if defined(darwin_HOST_OS) || defined(ios_HOST_OS)
if (RTS_LINKER_USE_MMAP)
image = mmapForLinker(memberSize, MAP_ANONYMOUS, -1, 0);
else {
@@ -505,7 +501,7 @@ static HsInt loadArchive_ (pathchar *path)
image += misalignment;
}
-#else // not windows or darwin
+#else // not darwin
image = stgMallocBytes(memberSize, "loadArchive(image)");
#endif
if (isThin) {
diff --git a/rts/linker/PEi386.c b/rts/linker/PEi386.c
index 22258fe970..4dbb6291f9 100644
--- a/rts/linker/PEi386.c
+++ b/rts/linker/PEi386.c
@@ -58,6 +58,7 @@
tools. See note below.
Note [BFD import library]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~
On Windows, compilers don't link directly to dynamic libraries.
The reason for this is that the exports are not always by symbol, the
@@ -124,6 +125,51 @@
dlltool, but it can be combined using ar. This is an important feature
required for dynamic linking support for GHC. So the runtime linker now
supports this too.
+
+ Note [Memory allocation]
+ ~~~~~~~~~~~~~~~~~~~~~~~~
+
+ Previously on Windows we would use VirtualAlloc to allocate enough space for
+ loading the entire object file into memory and keep it there for the duration
+ until the entire object file has been unloaded.
+
+ This has a couple of problems, first of, VirtualAlloc and the other Virtual
+ functions interact directly with the memory manager. Requesting memory from
+ VirtualAlloc will always return whole pages (32k), aligned on a 4k boundary.
+
+ This means for an object file of size N kbytes, we're always wasting 32-N
+ kbytes of memory. Nothing else can access this memory.
+
+ Because of this we're now using HeapAlloc and other heap function to create
+ a private heap. Another solution would have been to write our own memory
+ manager to keep track of where we have free memory, but the private heap
+ solution is simpler.
+
+ The private heap is created with full rights just as the pages we used to get
+ from VirtualAlloc (e.g. READ/WRITE/EXECUTE). In the end we end up using
+ memory much more efficiently than before. The downside is that heap memory
+ is always Allocated AND Committed, thus when the heap resizes the new size is
+ committed. It becomes harder to see how much we're actually using. This makes
+ it seem like for small programs that we're using more memory than before.
+ Certainly a clean GHCi startup will have a slightly higher commit count.
+
+ The second major change in how we allocate memory is that we no longer need
+ the entire object file. We now allocate the object file using normal malloc
+ and instead read bits from it. All tables are stored in the Object file info
+ table and are discarded as soon as they are no longer needed, e.g. after
+ relocation is finished. Only section data is kept around, but this data is
+ copied into the private heap.
+
+ The major knock on effect of this is that we have more memory to use in the
+ sub 2GB range, which means that Template Haskell should fail a lot less as we
+ will violate the small memory model much less than before.
+
+ Note [Section alignment]
+ ~~~~~~~~~~~~~~~~~~~~~~~~
+
+ The Windows linker aligns memory to it's section alignment requirement by
+ aligning it during the copying to the private heap. We also ensure that the
+ trampoline "region" we reserve is 8 bytes aligned.
*/
#include "Rts.h"
@@ -140,6 +186,7 @@
#include "RtsSymbolInfo.h"
#include "GetEnv.h"
#include "linker/PEi386.h"
+#include "linker/PEi386Types.h"
#include "LinkerInternals.h"
#include <windows.h>
@@ -149,9 +196,10 @@
#include <stdbool.h>
#include <stdint.h>
-static uint8_t* cstring_from_COFF_symbol_name(
- uint8_t* name,
- uint8_t* strtab);
+#include <inttypes.h>
+#include <dbghelp.h>
+#include <stdlib.h>
+#include <psapi.h>
#if defined(x86_64_HOST_ARCH)
static size_t makeSymbolExtra_PEi386(
@@ -173,6 +221,28 @@ static bool verifyCOFFHeader(
static bool checkIfDllLoaded(
HINSTANCE instance);
+static uint32_t getSectionAlignment(
+ Section section);
+
+static uint8_t* getAlignedMemory(
+ uint8_t* value,
+ Section section);
+
+static size_t getAlignedValue(
+ size_t value,
+ Section section);
+
+static void addCopySection(
+ ObjectCode *oc,
+ Section *s,
+ SectionKind kind,
+ SectionAlloc alloc,
+ void* start,
+ StgWord size);
+
+static void releaseOcInfo(
+ ObjectCode* oc);
+
/* Add ld symbol for PE image base. */
#if defined(__GNUC__)
#define __ImageBase __MINGW_LSYMBOL(_image_base__)
@@ -183,6 +253,34 @@ static bool checkIfDllLoaded(
extern IMAGE_DOS_HEADER __ImageBase;
#define __image_base (void*)((HINSTANCE)&__ImageBase)
+const Alignments pe_alignments[] = {
+ { IMAGE_SCN_ALIGN_1BYTES , 1 },
+ { IMAGE_SCN_ALIGN_2BYTES , 2 },
+ { IMAGE_SCN_ALIGN_4BYTES , 4 },
+ { IMAGE_SCN_ALIGN_8BYTES , 8 },
+ { IMAGE_SCN_ALIGN_16BYTES , 16 },
+ { IMAGE_SCN_ALIGN_32BYTES , 32 },
+ { IMAGE_SCN_ALIGN_64BYTES , 64 },
+ { IMAGE_SCN_ALIGN_128BYTES , 128 },
+ { IMAGE_SCN_ALIGN_256BYTES , 256 },
+ { IMAGE_SCN_ALIGN_512BYTES , 512 },
+ { IMAGE_SCN_ALIGN_1024BYTES, 1024},
+ { IMAGE_SCN_ALIGN_2048BYTES, 2048},
+ { IMAGE_SCN_ALIGN_4096BYTES, 4096},
+ { IMAGE_SCN_ALIGN_8192BYTES, 8192},
+ };
+
+const int pe_alignments_cnt = sizeof (pe_alignments) / sizeof (Alignments);
+const int default_alignment = 8;
+const int initHeapSizeMB = 15;
+static HANDLE code_heap = NULL;
+
+/* Low Fragmentation Heap, try to prevent heap from increasing in size when
+ space can simply be reclaimed. These are enums missing from mingw-w64's
+ headers. */
+#define HEAP_LFH 2
+#define HeapOptimizeResources 3
+
void initLinker_PEi386()
{
if (!ghciInsertSymbolTable(WSTR("(GHCi/Ld special symbols)"),
@@ -205,6 +303,31 @@ void initLinker_PEi386()
addDLL(WSTR("shell32"));
addDLL(WSTR("user32"));
#endif
+
+ /* See Note [Memory allocation]. */
+ /* Create a private heap which we will use to store all code and data. */
+ SYSTEM_INFO sSysInfo;
+ GetSystemInfo(&sSysInfo);
+ code_heap = HeapCreate (HEAP_CREATE_ENABLE_EXECUTE,
+ initHeapSizeMB * sSysInfo.dwPageSize , 0);
+ if (!code_heap)
+ barf ("Could not create private heap during initialization. Aborting.");
+
+ /* Set some flags for the new code heap. */
+ HeapSetInformation(code_heap, HeapEnableTerminationOnCorruption, NULL, 0);
+ unsigned long HeapInformation = HEAP_LFH;
+ HeapSetInformation(code_heap, HeapEnableTerminationOnCorruption,
+ &HeapInformation, sizeof(HeapInformation));
+ HeapSetInformation(code_heap, HeapOptimizeResources, NULL, 0);
+}
+
+void exitLinker_PEi386()
+{
+ /* See Note [Memory allocation]. */
+ if (code_heap) {
+ HeapDestroy (code_heap);
+ code_heap = NULL;
+ }
}
/* A list thereof. */
@@ -240,6 +363,13 @@ static void addDLLHandle(pathchar* dll_name, HINSTANCE instance) {
(PIMAGE_IMPORT_DESCRIPTOR)((BYTE *)instance + header->
OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT].VirtualAddress);
+ bool importTableMissing =
+ header->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT].Size == 0;
+
+ if (importTableMissing) {
+ return;
+ }
+
/* Ignore these compatibility shims. */
const pathchar* ms_dll = WSTR("api-ms-win-");
const int len = wcslen(ms_dll);
@@ -279,7 +409,22 @@ static bool checkIfDllLoaded(HINSTANCE instance)
void freePreloadObjectFile_PEi386(ObjectCode *oc)
{
- VirtualFree(oc->image - PEi386_IMAGE_OFFSET, 0, MEM_RELEASE);
+ if (oc->image) {
+ stgFree (oc->image);
+ oc->image = NULL;
+ }
+
+ if (oc->info->image) {
+ HeapFree(code_heap, 0, oc->info->image);
+ oc->info->image = NULL;
+ }
+
+ if (oc->info) {
+ if (oc->info->ch_info)
+ stgFree (oc->info->ch_info);
+ stgFree (oc->info);
+ oc->info = NULL;
+ }
IndirectAddr *ia, *ia_next;
ia = indirects;
@@ -291,6 +436,30 @@ void freePreloadObjectFile_PEi386(ObjectCode *oc)
indirects = NULL;
}
+static void releaseOcInfo(ObjectCode* oc) {
+ if (!oc) return;
+
+ if (oc->info) {
+ stgFree (oc->info->ch_info);
+ stgFree (oc->info->str_tab);
+ stgFree (oc->info->symbols);
+ stgFree (oc->info);
+ oc->info = NULL;
+ }
+ for (int i = 0; i < oc->n_sections; i++){
+ Section section = oc->sections[i];
+ if (section.info) {
+ stgFree (section.info->name);
+ if (section.info->relocs) {
+ stgFree (section.info->relocs);
+ section.info->relocs = NULL;
+ }
+ stgFree (section.info);
+ section.info = NULL;
+ }
+ }
+}
+
/*************
* This function determines what kind of COFF image we are dealing with.
* This is needed in order to correctly load and verify objects and their
@@ -395,7 +564,7 @@ COFF_HEADER_INFO* getHeaderInfo ( ObjectCode* oc )
__attribute__ ((always_inline)) inline
size_t getSymbolSize ( COFF_HEADER_INFO *info )
{
- ASSERT (info);
+ ASSERT(info);
switch (info->type)
{
case COFF_ANON_BIG_OBJ:
@@ -408,8 +577,8 @@ size_t getSymbolSize ( COFF_HEADER_INFO *info )
__attribute__ ((always_inline)) inline
int32_t getSymSectionNumber ( COFF_HEADER_INFO *info, COFF_symbol* sym )
{
- ASSERT (info);
- ASSERT (sym);
+ ASSERT(info);
+ ASSERT(sym);
switch (info->type)
{
case COFF_ANON_BIG_OBJ:
@@ -422,8 +591,8 @@ int32_t getSymSectionNumber ( COFF_HEADER_INFO *info, COFF_symbol* sym )
__attribute__ ((always_inline)) inline
uint32_t getSymValue ( COFF_HEADER_INFO *info, COFF_symbol* sym )
{
- ASSERT (info);
- ASSERT (sym);
+ ASSERT(info);
+ ASSERT(sym);
switch (info->type)
{
case COFF_ANON_BIG_OBJ:
@@ -436,8 +605,8 @@ uint32_t getSymValue ( COFF_HEADER_INFO *info, COFF_symbol* sym )
__attribute__ ((always_inline)) inline
uint8_t getSymStorageClass ( COFF_HEADER_INFO *info, COFF_symbol* sym )
{
- ASSERT (info);
- ASSERT (sym);
+ ASSERT(info);
+ ASSERT(sym);
switch (info->type)
{
case COFF_ANON_BIG_OBJ:
@@ -450,8 +619,8 @@ uint8_t getSymStorageClass ( COFF_HEADER_INFO *info, COFF_symbol* sym )
__attribute__ ((always_inline)) inline
uint8_t getSymNumberOfAuxSymbols ( COFF_HEADER_INFO *info, COFF_symbol* sym )
{
- ASSERT (info);
- ASSERT (sym);
+ ASSERT(info);
+ ASSERT(sym);
switch (info->type)
{
case COFF_ANON_BIG_OBJ:
@@ -464,8 +633,8 @@ uint8_t getSymNumberOfAuxSymbols ( COFF_HEADER_INFO *info, COFF_symbol* sym )
__attribute__ ((always_inline)) inline
uint16_t getSymType ( COFF_HEADER_INFO *info, COFF_symbol* sym )
{
- ASSERT (info);
- ASSERT (sym);
+ ASSERT(info);
+ ASSERT(sym);
switch (info->type)
{
case COFF_ANON_BIG_OBJ:
@@ -478,8 +647,8 @@ uint16_t getSymType ( COFF_HEADER_INFO *info, COFF_symbol* sym )
__attribute__ ((always_inline)) inline
uint8_t* getSymShortName ( COFF_HEADER_INFO *info, COFF_symbol* sym )
{
- ASSERT (info);
- ASSERT (sym);
+ ASSERT(info);
+ ASSERT(sym);
switch (info->type)
{
case COFF_ANON_BIG_OBJ:
@@ -686,115 +855,37 @@ bool removeLibrarySearchPath_PEi386(HsPtr dll_path_index)
/* We assume file pointer is right at the
beginning of COFF object.
*/
-char *
-allocateImageAndTrampolines (
- pathchar* arch_name, char* member_name,
- FILE* f USED_IF_x86_64_HOST_ARCH,
- int size,
- int isThin USED_IF_x86_64_HOST_ARCH)
-{
- char* image;
-#if defined(x86_64_HOST_ARCH)
- if (!isThin)
+static uint32_t getSectionAlignment(
+ Section section) {
+ uint32_t c = section.info->props;
+ for(int i = 0; i < pe_alignments_cnt; i++)
{
- /* PeCoff contains number of symbols right in it's header, so
- we can reserve the room for symbolExtras right here. */
- size_t n;
- /* Minimum header size to read. */
- const size_t MIN_HEADER_SIZE = sizeof(ANON_OBJECT_HEADER);
- char* tmp = stgMallocBytes (MIN_HEADER_SIZE, "allocateImageAndTrampolines");
- n = fread (tmp, 1, MIN_HEADER_SIZE, f);
- if (n != MIN_HEADER_SIZE) {
- stgFree (tmp);
- errorBelch ("getNumberOfSymbols: error whilst reading `%s' header "
- "in `%" PATH_FMT "'",
- member_name, arch_name);
- return NULL;
- }
- fseek(f, -(long int)MIN_HEADER_SIZE, SEEK_CUR);
-
- COFF_OBJ_TYPE objType = getObjectType (tmp, arch_name);
- stgFree (tmp);
- uint32_t numberOfSymbols = 0;
- switch (objType)
- {
- case COFF_IMAGE:
- {
- IMAGE_FILE_HEADER hdr;
- n = fread (&hdr, 1, sizeof(IMAGE_FILE_HEADER), f);
- if (n != sizeof(IMAGE_FILE_HEADER))
- {
- errorBelch ("getNumberOfSymbols: error whilst reading `%s' "
- "image header in `%" PATH_FMT "'",
- member_name, arch_name);
- return NULL;
- }
- fseek (f, -(long int)sizeof(IMAGE_FILE_HEADER), SEEK_CUR);
- if (!verifyCOFFHeader (hdr.Machine, &hdr, arch_name)) {
- return NULL;
- }
- numberOfSymbols = hdr.NumberOfSymbols;
- }
- break;
- case COFF_ANON_BIG_OBJ:
- {
- ANON_OBJECT_HEADER_BIGOBJ hdr;
- n = fread (&hdr, 1, sizeof(ANON_OBJECT_HEADER_BIGOBJ), f);
- if (n != sizeof(ANON_OBJECT_HEADER_BIGOBJ))
- {
- errorBelch ("getNumberOfSymbols: error whilst reading `%s' "
- "big obj header in `%" PATH_FMT "'",
- member_name, arch_name);
- return NULL;
- }
- fseek (f, -(long int)sizeof(ANON_OBJECT_HEADER_BIGOBJ), SEEK_CUR);
- if (!verifyCOFFHeader (hdr.Machine, NULL, arch_name)) {
- return NULL;
- }
- numberOfSymbols = hdr.NumberOfSymbols;
- }
- break;
- case COFF_ANON_OBJ:
- barf ("COFF_ANON_OBJ should not be allocated with "
- "allocateImageAndTrampolines. It is not specific enough.\n");
- case COFF_IMPORT_LIB:
- barf ("COFF_IMPORT_LIB should not be allocated with "
- "allocateImageAndTrampolines. It is read-only.\n");
- case COFF_UNKNOWN:
- default:
- {
- errorBelch (
- "getNumberOfSymbols: error whilst reading `%s' header "
- "in `%" PATH_FMT "': Unknown COFF_OBJ_TYPE.",
- member_name, arch_name);
- return NULL;
- }
- }
-
- /* We get back 8-byte aligned memory (is that guaranteed?), but
- the offsets to the sections within the file are all 4 mod 8
- (is that guaranteed?). We therefore need to offset the image
- by 4, so that all the pointers are 8-byte aligned, so that
- pointer tagging works. */
- /* For 32-bit case we don't need this, hence we use macro PEi386_IMAGE_OFFSET,
- which equals to 4 for 64-bit case and 0 for 32-bit case. */
- /* We allocate trampolines area for all symbols right behind
- image data, aligned on 8. */
- size = ((PEi386_IMAGE_OFFSET + size + 0x7) & ~0x7)
- + numberOfSymbols * sizeof(SymbolExtra);
- }
-#endif
- image = VirtualAlloc(NULL, size,
- MEM_RESERVE | MEM_COMMIT,
- PAGE_EXECUTE_READWRITE);
-
- if (image == NULL) {
- errorBelch("%" PATH_FMT ": failed to allocate memory for image for %s",
- arch_name, member_name);
- return NULL;
+ if ((c & 0xF00000) == pe_alignments[i].mask)
+ return pe_alignments[i].value;
}
- return image + PEi386_IMAGE_OFFSET;
+ /* No alignment flag found, assume 8-byte aligned. */
+ return default_alignment;
+}
+
+/* ----------------------
+ * return a memory location aligned to the section requirements
+ */
+static uint8_t* getAlignedMemory(
+ uint8_t* value, Section section) {
+ uint32_t alignment = getSectionAlignment(section);
+ uintptr_t mask = (uintptr_t)alignment - 1;
+ return (uint8_t*)(((uintptr_t)value + mask) & ~mask);
+}
+
+/* ----------------------
+ * return a value aligned to the section requirements
+ */
+static size_t getAlignedValue(
+ size_t value, Section section) {
+ uint32_t alignment = getSectionAlignment(section);
+ uint32_t mask = (uint32_t)alignment - 1;
+ return (size_t)((value + mask) & ~mask);
}
/* -----------------------
@@ -836,7 +927,7 @@ bool checkAndLoadImportLibrary( pathchar* arch_name, char* member_name, FILE* f
IF_DEBUG(linker, debugBelch("loadArchive: reading %lu bytes at %ld\n", hdr.SizeOfData, ftell(f)));
- image = malloc(hdr.SizeOfData);
+ image = stgMallocBytes(hdr.SizeOfData, "checkAndLoadImportLibrary(image)");
n = fread(image, 1, hdr.SizeOfData, f);
if (n != hdr.SizeOfData) {
errorBelch("loadArchive: error whilst reading `%s' header in `%" PATH_FMT "'. Did not read enough bytes.\n",
@@ -848,36 +939,39 @@ bool checkAndLoadImportLibrary( pathchar* arch_name, char* member_name, FILE* f
char* symbol = strtok(image, "\0");
int symLen = strlen(symbol) + 1;
int nameLen = n - symLen;
- char* dllName = malloc(sizeof(char) * nameLen);
+ char* dllName = stgMallocBytes(sizeof(char) * nameLen,
+ "checkAndLoadImportLibrary(dllname)");
dllName = strncpy(dllName, image + symLen, nameLen);
- pathchar* dll = malloc(sizeof(wchar_t) * nameLen);
+ pathchar* dll = stgMallocBytes(sizeof(wchar_t) * nameLen,
+ "checkAndLoadImportLibrary(dll)");
mbstowcs(dll, dllName, nameLen);
- free(dllName);
+ stgFree(dllName);
IF_DEBUG(linker, debugBelch("loadArchive: read symbol %s from lib `%" PATH_FMT "'\n", symbol, dll));
const char* result = addDLL(dll);
- free(image);
+ stgFree(image);
if (result != NULL) {
errorBelch("Could not load `%" PATH_FMT "'. Reason: %s\n", dll, result);
load_dll_warn = true;
- free(dll);
+ stgFree(dll);
fseek(f, -(n + sizeof_COFF_import_Header), SEEK_CUR);
return false;
}
- free(dll);
+ stgFree(dll);
return true;
}
static void
-printName ( uint8_t* name, uint8_t* strtab )
+printName ( uint8_t* name, ObjectCode* oc )
{
if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
- uint32_t strtab_offset = * (uint32_t*)(name+4);
- debugBelch("%s", strtab + strtab_offset );
+ uint32_t strtab_offset = * (uint32_t*)(name + 4);
+ debugBelch("%s",
+ oc->info->str_tab + strtab_offset - PEi386_STRTAB_OFFSET);
} else {
int i;
for (i = 0; i < 8; i++) {
@@ -889,11 +983,13 @@ printName ( uint8_t* name, uint8_t* strtab )
static void
-copyName ( uint8_t* name, uint8_t* strtab, uint8_t* dst, int dstSize )
+copyName ( uint8_t* name, ObjectCode* oc, uint8_t* dst, int dstSize )
{
if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
- uint32_t strtab_offset = * (uint32_t*)(name+4);
- strncpy ( (char*)dst, (char*)strtab+strtab_offset, dstSize );
+ uint32_t strtab_offset = * (uint32_t*)(name + 4);
+ strncpy ((char*)dst,
+ oc->info->str_tab + strtab_offset - PEi386_STRTAB_OFFSET,
+ dstSize);
dst[dstSize-1] = 0;
} else {
int i = 0;
@@ -908,27 +1004,27 @@ copyName ( uint8_t* name, uint8_t* strtab, uint8_t* dst, int dstSize )
}
-static uint8_t *
-cstring_from_COFF_symbol_name ( uint8_t* name, uint8_t* strtab )
+char*
+get_sym_name ( uint8_t* name, ObjectCode* oc )
{
- uint8_t* newstr;
+ char* newstr;
/* If the string is longer than 8 bytes, look in the
string table for it -- this will be correctly zero terminated.
*/
if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
- uint32_t strtab_offset = * (uint32_t*)(name+4);
- return strtab + strtab_offset;
+ uint32_t strtab_offset = * (uint32_t*)(name + 4);
+ return oc->info->str_tab + strtab_offset - PEi386_STRTAB_OFFSET;
}
/* Otherwise, if shorter than 8 bytes, return the original,
which by defn is correctly terminated.
*/
- if (name[7]==0) return name;
+ if (name[7]==0) return (char*)name;
/* The annoying case: 8 bytes. Copy into a temporary
(XXX which is never freed ...)
*/
- newstr = stgMallocBytes(9, "cstring_from_COFF_symbol_name");
+ newstr = stgMallocBytes(9, "get_sym_name");
ASSERT(newstr);
- strncpy((char*)newstr,(char*)name,8);
+ strncpy (newstr, (char*)name,8);
newstr[8] = 0;
return newstr;
}
@@ -939,16 +1035,17 @@ cstring_from_COFF_symbol_name ( uint8_t* name, uint8_t* strtab )
consistency we *always* copy the string; the caller must free it
*/
char *
-cstring_from_section_name (uint8_t* name, uint8_t* strtab)
+get_name_string (uint8_t* name, ObjectCode* oc)
{
char *newstr;
if (name[0]=='/') {
- int strtab_offset = strtol((char*)name+1,NULL,10);
- int len = strlen(((char*)strtab) + strtab_offset);
+ int strtab_offset = strtol((char*)name+1,NULL,10)-PEi386_STRTAB_OFFSET;
+ char* str = oc->info->str_tab + strtab_offset;
+ int len = strlen(str);
- newstr = stgMallocBytes(len+1, "cstring_from_section_symbol_name");
- strcpy(newstr, (char*)strtab + strtab_offset);
+ newstr = stgMallocBytes(len + 1, "cstring_from_section_symbol_name");
+ strncpy(newstr, str, len + 1);
return newstr;
}
else
@@ -964,23 +1061,15 @@ cstring_from_section_name (uint8_t* name, uint8_t* strtab)
/* See Note [mingw-w64 name decoration scheme] */
#if !defined(x86_64_HOST_ARCH)
static void
-zapTrailingAtSign ( uint8_t* sym )
+zapTrailingAtSign ( SymbolName* sym )
{
-# define my_isdigit(c) ((c) >= '0' && (c) <= '9')
- int i, j;
- if (sym[0] == 0) return;
- i = 0;
- while (sym[i] != 0) i++;
- i--;
- j = i;
- while (j > 0 && my_isdigit(sym[j])) j--;
- if (j > 0 && sym[j] == '@' && j != i) sym[j] = 0;
-# undef my_isdigit
+ char* lst = strrchr (sym, '@');
+ if (lst) lst[0]='\0';
}
#endif
SymbolAddr*
-lookupSymbolInDLLs ( uint8_t *lbl )
+lookupSymbolInDLLs ( const SymbolName* lbl )
{
OpenedDLL* o_dll;
SymbolAddr* sym;
@@ -988,7 +1077,7 @@ lookupSymbolInDLLs ( uint8_t *lbl )
for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) {
/* debugBelch("look in %ls for %s\n", o_dll->name, lbl); */
- sym = GetProcAddress(o_dll->instance, (char*)(lbl+STRIP_LEADING_UNDERSCORE));
+ sym = GetProcAddress(o_dll->instance, lbl+STRIP_LEADING_UNDERSCORE);
if (sym != NULL) {
/*debugBelch("found %s in %s\n", lbl+1,o_dll->name);*/
return sym;
@@ -1001,8 +1090,9 @@ lookupSymbolInDLLs ( uint8_t *lbl )
it generates call *__imp_foo, and __imp_foo here has exactly
the same semantics as in __imp_foo = GetProcAddress(..., "foo")
*/
- if (sym == NULL && strncmp ((const char*)lbl, "__imp_", 6) == 0) {
- sym = GetProcAddress(o_dll->instance, (char*)(lbl+6+STRIP_LEADING_UNDERSCORE));
+ if (sym == NULL && strncmp (lbl, "__imp_", 6) == 0) {
+ sym = GetProcAddress(o_dll->instance,
+ lbl + 6 + STRIP_LEADING_UNDERSCORE);
if (sym != NULL) {
IndirectAddr* ret;
ret = stgMallocBytes( sizeof(IndirectAddr), "lookupSymbolInDLLs" );
@@ -1011,12 +1101,12 @@ lookupSymbolInDLLs ( uint8_t *lbl )
indirects = ret;
IF_DEBUG(linker,
debugBelch("warning: %s from %S is linked instead of %s\n",
- (char*)(lbl+6+STRIP_LEADING_UNDERSCORE), o_dll->name, (char*)lbl));
+ lbl+6+STRIP_LEADING_UNDERSCORE, o_dll->name, lbl));
return (void*) & ret->addr;
}
}
- sym = GetProcAddress(o_dll->instance, (char*)lbl);
+ sym = GetProcAddress(o_dll->instance, lbl);
if (sym != NULL) {
/*debugBelch("found %s in %s\n", lbl,o_dll->name);*/
return sym;
@@ -1058,7 +1148,7 @@ verifyCOFFHeader ( uint16_t machine, IMAGE_FILE_HEADER *hdr,
return false;
}
if ( (hdr->Characteristics & IMAGE_FILE_BYTES_REVERSED_HI)) {
- errorBelch("%" PATH_FMT ": Invalid PE/PE+ word size or endiannness: %d",
+ errorBelch("%" PATH_FMT ": Invalid PE/PE+ word size or endianness: %d",
fileName,
(int)(hdr->Characteristics));
return false;
@@ -1069,14 +1159,13 @@ verifyCOFFHeader ( uint16_t machine, IMAGE_FILE_HEADER *hdr,
bool
ocVerifyImage_PEi386 ( ObjectCode* oc )
{
- unsigned int i;
- uint32_t j, noRelocs;
+ COFF_HEADER_INFO *info = getHeaderInfo (oc);
+
+ uint32_t i, noRelocs;
COFF_section* sectab;
COFF_symbol* symtab;
uint8_t* strtab;
- COFF_HEADER_INFO *info = getHeaderInfo (oc);
-
sectab = (COFF_section*) (
((uint8_t*)(oc->image))
+ info->sizeOfHeader + info->sizeOfOptionalHeader
@@ -1088,20 +1177,6 @@ ocVerifyImage_PEi386 ( ObjectCode* oc )
strtab = ((uint8_t*)symtab)
+ info->numberOfSymbols * getSymbolSize (info);
- /* If the string table size is way crazy, this might indicate that
- there are more than 64k relocations, despite claims to the
- contrary. Hence this test. */
- /* debugBelch("strtab size %d\n", * (uint32_t*)strtab); */
-#if 0
- if ( (*(uint32_t*)strtab) > 600000 ) {
- /* Note that 600k has no special significance other than being
- big enough to handle the almost-2MB-sized lumps that
- constitute HSwin32*.o. */
- debugBelch("PEi386 object has suspiciously large string table; > 64k relocs?");
- return false;
- }
-#endif
-
/* .BSS Section is initialized in ocGetNames_PEi386
but we need the Sections array initialized here already. */
Section *sections;
@@ -1111,25 +1186,109 @@ ocVerifyImage_PEi386 ( ObjectCode* oc )
"ocVerifyImage_PEi386(sections)");
oc->sections = sections;
oc->n_sections = info->numberOfSections + 1;
+ oc->info = stgCallocBytes (sizeof(struct ObjectCodeFormatInfo), 1,
+ "ocVerifyImage_PEi386(info)");
+ oc->info->secBytesTotal = 0;
+ oc->info->secBytesUsed = 0;
+ oc->info->init = NULL;
+ oc->info->finit = NULL;
+ oc->info->ch_info = info;
+
+ /* Copy the tables over from object-file. Copying these allows us to
+ simplify the indexing and to release the object file immediately after
+ this step as all information we need would be in available. After
+ loading we can also release everything in the info structure as it won't
+ be needed again further freeing up memory.
+ COFF_symbol is a union type, so we have to "adjust" the array to be able
+ to access it using normal subscript notation. This eliminates the complex
+ indexing later on. */
+ uint32_t s_symbols = info->numberOfSymbols * sizeof(COFF_symbol);
+ uint32_t sym_size = getSymbolSize (info);
+ oc->info->symbols
+ = stgMallocBytes (s_symbols, "ocVerifyImage_PEi386(oc->info->symbols)");
+ for (i = 0; i < info->numberOfSymbols; i++)
+ memcpy (oc->info->symbols+i, (char*)symtab + sym_size * i, sym_size);
+
+ uint32_t n_strtab = (*(uint32_t*)strtab) - PEi386_STRTAB_OFFSET;
+ oc->info->str_tab
+ = stgMallocBytes (n_strtab, "ocVerifyImage_PEi386(oc->info->str_tab)");
+ memcpy (oc->info->str_tab, strtab + PEi386_STRTAB_OFFSET, n_strtab);
/* Initialize the Sections */
for (i = 0; i < info->numberOfSections; i++) {
+ uint32_t relocs_offset;
COFF_section* sectab_i
= (COFF_section*)
myindex(sizeof_COFF_section, sectab, i);
- /* Calculate the start of the data section */
- sections[i].start = oc->image + sectab_i->PointerToRawData;
+ Section *section = &sections[i];
+ /* Calculate the start of the section data. */
+ section->start = oc->image + sectab_i->PointerToRawData;
+ section->size = sectab_i->SizeOfRawData;
+ section->info = stgCallocBytes (sizeof(struct SectionFormatInfo), 1,
+ "ocVerifyImage_PEi386(section.info)");
+ section->info->name = get_name_string (sectab_i->Name, oc);
+ section->info->alignment = getSectionAlignment (*section);
+ section->info->props = sectab_i->Characteristics;
+ section->info->virtualSize = sectab_i->Misc.VirtualSize;
+ section->info->virtualAddr = sectab_i->VirtualAddress;
+
+ COFF_reloc* reltab
+ = (COFF_reloc*) (oc->image + sectab_i->PointerToRelocations);
+
+ if (section->info->props & IMAGE_SCN_LNK_NRELOC_OVFL ) {
+ /* If the relocation field (a short) has overflowed, the
+ * real count can be found in the first reloc entry.
+ *
+ * See Section 4.1 (last para) of the PE spec (rev6.0).
+ */
+ COFF_reloc* rel = (COFF_reloc*)
+ myindex ( sizeof_COFF_reloc, reltab, 0 );
+ noRelocs = rel->VirtualAddress;
+ relocs_offset = 1;
+ } else {
+ noRelocs = sectab_i->NumberOfRelocations;
+ relocs_offset = 0;
+ }
+
+ section->info->noRelocs = noRelocs;
+ section->info->relocs = NULL;
+ if (noRelocs > 0) {
+ section->info->relocs
+ = stgMallocBytes (noRelocs * sizeof (COFF_reloc),
+ "ocVerifyImage_PEi386(section->info->relocs)");
+ memcpy (section->info->relocs, reltab + relocs_offset,
+ noRelocs * sizeof (COFF_reloc));
+ }
+
+ oc->info->secBytesTotal += getAlignedValue (section->size, *section);
}
- /* No further verification after this point; only debug printing. */
+ /* Initialize the last section's info field which contains the .bss
+ section, it doesn't need an info so set it to NULL. */
+ sections[info->numberOfSections].info = NULL;
+
+ /* Calculate space for trampolines nearby.
+ We get back 8-byte aligned memory (is that guaranteed?), but
+ the offsets to the sections within the file are all 4 mod 8
+ (is that guaranteed?). We therefore need to offset the image
+ by 4, so that all the pointers are 8-byte aligned, so that
+ pointer tagging works. */
+ /* For 32-bit case we don't need this, hence we use macro
+ PEi386_IMAGE_OFFSET, which equals to 4 for 64-bit case and 0 for
+ 32-bit case. */
+ /* We allocate trampolines area for all symbols right behind
+ image data, aligned on 8. */
+ oc->info->trampoline
+ = (PEi386_IMAGE_OFFSET + 2 * default_alignment
+ + oc->info->secBytesTotal) & ~0x7;
+ oc->info->secBytesTotal
+ = oc->info->trampoline + info->numberOfSymbols * sizeof(SymbolExtra);
+
+ /* No further verification after this point; only debug printing. */
i = 0;
IF_DEBUG(linker, i=1);
- if (i == 0)
- {
- stgFree (info);
- return true;
- }
+ if (i == 0) return true;
debugBelch("sectab offset = %" FMT_SizeT "\n",
((uint8_t*)sectab) - ((uint8_t*)oc->image) );
@@ -1176,14 +1335,12 @@ ocVerifyImage_PEi386 ( ObjectCode* oc )
else
{
debugBelch( "COFF Type: UNKNOWN\n");
- stgFree (info);
return false;
}
/* Print the section table. */
debugBelch("\n" );
for (i = 0; i < info->numberOfSections; i++) {
- COFF_reloc* reltab;
COFF_section* sectab_i
= (COFF_section*)
myindex ( sizeof_COFF_section, sectab, i );
@@ -1194,7 +1351,7 @@ ocVerifyImage_PEi386 ( ObjectCode* oc )
" name `",
i
);
- printName ( sectab_i->Name, strtab );
+ printName (sectab_i->Name, oc);
debugBelch(
"'\n"
" vsize %lu\n"
@@ -1203,54 +1360,38 @@ ocVerifyImage_PEi386 ( ObjectCode* oc )
" data off 0x%p\n"
" num rel %hu\n"
" off rel %lu\n"
- " ptr raw 0x%lx\n",
+ " ptr raw 0x%lx\n"
+ " align %u\n"
+ " data adj %zu\n",
sectab_i->Misc.VirtualSize,
sectab_i->VirtualAddress,
sectab_i->SizeOfRawData,
section.start,
sectab_i->NumberOfRelocations,
sectab_i->PointerToRelocations,
- sectab_i->PointerToRawData
+ sectab_i->PointerToRawData,
+ getSectionAlignment (section),
+ getAlignedValue (section.size, section)
);
- reltab = (COFF_reloc*) (
- ((uint8_t*)(oc->image)) + sectab_i->PointerToRelocations
- );
-
- if ( sectab_i->Characteristics & IMAGE_SCN_LNK_NRELOC_OVFL ) {
- /* If the relocation field (a short) has overflowed, the
- * real count can be found in the first reloc entry.
- *
- * See Section 4.1 (last para) of the PE spec (rev6.0).
- */
- COFF_reloc* rel = (COFF_reloc*)
- myindex ( sizeof_COFF_reloc, reltab, 0 );
- noRelocs = rel->VirtualAddress;
- j = 1;
- } else {
- noRelocs = sectab_i->NumberOfRelocations;
- j = 0;
- }
- for (; j < noRelocs; j++) {
- COFF_symbol* sym;
- COFF_reloc* rel = (COFF_reloc*)
- myindex ( sizeof_COFF_reloc, reltab, j );
+ noRelocs = section.info->noRelocs;
+ for (uint32_t j = 0; j < noRelocs; j++) {
+ COFF_reloc rel = section.info->relocs[j];
debugBelch(
" type 0x%-4x vaddr 0x%-8lx name `",
- (uint32_t)rel->Type,
- rel->VirtualAddress );
- sym = (COFF_symbol*)
- myindex ( getSymbolSize (info), symtab, rel->SymbolTableIndex );
- printName ( getSymShortName (info, sym), strtab );
+ rel.Type,
+ rel.VirtualAddress );
+ COFF_symbol sym = oc->info->symbols[rel.SymbolTableIndex];
+ printName (getSymShortName (info, &sym), oc);
debugBelch("'\n" );
}
debugBelch("\n" );
}
debugBelch("\n" );
- debugBelch("string table has size 0x%x\n", * (uint32_t*)strtab );
+ debugBelch("string table has size 0x%x\n", n_strtab + PEi386_STRTAB_OFFSET);
debugBelch("---START of string table---\n");
- for (i = 4; i < *(uint32_t*)strtab; i++) {
+ for (i = 4; i < n_strtab; i++) {
if (strtab[i] == 0)
debugBelch("\n"); else
debugBelch("%c", strtab[i] );
@@ -1258,18 +1399,15 @@ ocVerifyImage_PEi386 ( ObjectCode* oc )
debugBelch("--- END of string table---\n");
debugBelch("\n" );
- i = 0;
- while (1) {
- COFF_symbol* symtab_i;
- if (i >= info->numberOfSymbols) break;
- symtab_i = (COFF_symbol*)
- myindex ( getSymbolSize (info), symtab, i );
+
+ for (i = 0; i < info->numberOfSymbols; i++) {
+ COFF_symbol* symtab_i = &oc->info->symbols[i];
debugBelch(
"symbol %d\n"
" name `",
i
);
- printName ( getSymShortName (info, symtab_i), strtab );
+ printName (getSymShortName (info, symtab_i), oc);
debugBelch(
"'\n"
" value 0x%x\n"
@@ -1284,84 +1422,22 @@ ocVerifyImage_PEi386 ( ObjectCode* oc )
getSymNumberOfAuxSymbols (info, symtab_i)
);
i += getSymNumberOfAuxSymbols (info, symtab_i);
- i++;
}
debugBelch("\n" );
- stgFree (info);
return true;
}
bool
ocGetNames_PEi386 ( ObjectCode* oc )
{
- COFF_section* sectab;
- COFF_symbol* symtab;
- uint8_t* strtab;
- bool has_code_section = false;
+ bool has_code_section = false;
- uint8_t* sname;
+ SymbolName* sname;
SymbolAddr* addr;
unsigned int i;
- COFF_HEADER_INFO *info = getHeaderInfo (oc);
-
- sectab = (COFF_section*) (
- ((uint8_t*)(oc->image))
- + info->sizeOfHeader + info->sizeOfOptionalHeader
- );
- symtab = (COFF_symbol*) (
- ((uint8_t*)(oc->image))
- + info->pointerToSymbolTable
- );
- strtab = ((uint8_t*)(oc->image))
- + info->pointerToSymbolTable
- + info->numberOfSymbols * getSymbolSize (info);
-
- /* Allocate space for any (local, anonymous) .bss sections. */
-
- for (i = 0; i < info->numberOfSections; i++) {
- uint32_t bss_sz;
- uint8_t* zspace;
- COFF_section* sectab_i
- = (COFF_section*)
- myindex ( sizeof_COFF_section, sectab, i );
-
- char *secname = cstring_from_section_name(sectab_i->Name, strtab);
-
- if (0 != strcmp(secname, ".bss")) {
- stgFree(secname);
- continue;
- }
-
- stgFree(secname);
-
- /* sof 10/05: the PE spec text isn't too clear regarding what
- * the SizeOfRawData field is supposed to hold for object
- * file sections containing just uninitialized data -- for executables,
- * it is supposed to be zero; unclear what it's supposed to be
- * for object files. However, VirtualSize is guaranteed to be
- * zero for object files, which definitely suggests that SizeOfRawData
- * will be non-zero (where else would the size of this .bss section be
- * stored?) Looking at the COFF_section info for incoming object files,
- * this certainly appears to be the case.
- *
- * => I suspect we've been incorrectly handling .bss sections in (relocatable)
- * object files up until now. This turned out to bite us with ghc-6.4.1's use
- * of gcc-3.4.x, which has started to emit initially-zeroed-out local 'static'
- * variable decls into the .bss section. (The specific function in Q which
- * triggered this is libraries/base/cbits/dirUtils.c:__hscore_getFolderPath())
- */
- if (sectab_i->Misc.VirtualSize == 0 && sectab_i->SizeOfRawData == 0) continue;
- /* This is a non-empty .bss section.
- Allocate zeroed space for it */
- bss_sz = sectab_i->Misc.VirtualSize;
- if ( bss_sz < sectab_i->SizeOfRawData) { bss_sz = sectab_i->SizeOfRawData; }
- zspace = stgCallocBytes(1, bss_sz, "ocGetNames_PEi386(anonymous bss)");
- oc->sections[i].start = zspace;
- addProddableBlock(oc, zspace, bss_sz);
- /* debugBelch("BSS anon section at 0x%x\n", zspace); */
- }
+ COFF_HEADER_INFO *info = oc->info->ch_info;
/* Copy section information into the ObjectCode. */
@@ -1370,85 +1446,154 @@ ocGetNames_PEi386 ( ObjectCode* oc )
uint8_t* end;
uint32_t sz;
- /* By default consider all section as CODE or DATA, which means we want to load them. */
- SectionKind kind
- = SECTIONKIND_CODE_OR_RODATA;
- COFF_section* sectab_i
- = (COFF_section*)
- myindex ( sizeof_COFF_section, sectab, i );
- Section section = oc->sections[i];
+ /* By default consider all section as CODE or DATA,
+ which means we want to load them. */
+ SectionKind kind = SECTIONKIND_CODE_OR_RODATA;
+ Section section = oc->sections[i];
- char *secname = cstring_from_section_name(sectab_i->Name, strtab);
+ IF_DEBUG(linker, debugBelch("section name = %s\n", section.info->name ));
- IF_DEBUG(linker, debugBelch("section name = %s\n", secname ));
-
- /* The PE file section flag indicates whether the section contains code or data. */
- if (sectab_i->Characteristics & IMAGE_SCN_CNT_CODE) {
- has_code_section = has_code_section || sectab_i->SizeOfRawData > 0;
+ /* The PE file section flag indicates whether the section
+ contains code or data. */
+ if (section.info->props & IMAGE_SCN_CNT_CODE) {
+ has_code_section = has_code_section || section.size > 0;
kind = SECTIONKIND_CODE_OR_RODATA;
}
- if (sectab_i->Characteristics & IMAGE_SCN_CNT_INITIALIZED_DATA)
+ if (section.info->props & IMAGE_SCN_CNT_INITIALIZED_DATA)
kind = SECTIONKIND_CODE_OR_RODATA;
/* Check next if it contains any uninitialized data */
- if (sectab_i->Characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA)
+ if (section.info->props & IMAGE_SCN_CNT_UNINITIALIZED_DATA)
kind = SECTIONKIND_RWDATA;
- /* Finally check if it can be discarded. This will also ignore .debug sections */
- if (sectab_i->Characteristics & IMAGE_SCN_MEM_DISCARDABLE ||
- sectab_i->Characteristics & IMAGE_SCN_LNK_REMOVE)
+ /* Finally check if it can be discarded.
+ This will also ignore .debug sections */
+ if ( section.info->props & IMAGE_SCN_MEM_DISCARDABLE
+ || section.info->props & IMAGE_SCN_LNK_REMOVE)
kind = SECTIONKIND_OTHER;
- if (0==strncmp(".ctors", (char*)secname, 6))
+ if (0==strncmp(".ctors", section.info->name, 6)) {
kind = SECTIONKIND_INIT_ARRAY;
+ oc->info->init = &oc->sections[i];
+ }
+
+ if (0==strncmp(".dtors", section.info->name, 6)) {
+ kind = SECTIONKIND_FINIT_ARRAY;
+ oc->info->finit = &oc->sections[i];
+ }
- if (0==strncmp(".idata", (char*)secname, 6))
+ if ( 0 == strncmp(".stab" , section.info->name, 5 )
+ || 0 == strncmp(".stabstr" , section.info->name, 8 )
+ || 0 == strncmp(".pdata" , section.info->name, 6 )
+ || 0 == strncmp(".xdata" , section.info->name, 6 )
+ || 0 == strncmp(".debug" , section.info->name, 6 )
+ || 0 == strncmp(".rdata$zzz", section.info->name, 10))
+ kind = SECTIONKIND_DEBUG;
+
+ if (0==strncmp(".idata", section.info->name, 6))
kind = SECTIONKIND_IMPORT;
/* See Note [BFD import library]. */
- if (0==strncmp(".idata$7", (char*)secname, 8))
+ if (0==strncmp(".idata$7", section.info->name, 8))
kind = SECTIONKIND_IMPORT_LIBRARY;
- if (0==strncmp(".idata$6", (char*)secname, 8)) {
+ if (0==strncmp(".idata$6", section.info->name, 8)) {
/* The first two bytes contain the ordinal of the function
in the format of lowpart highpart. The two bytes combined
for the total range of 16 bits which is the function export limit
of DLLs. */
- sname = ((uint8_t*)section.start)+2;
- COFF_symbol* symtab_i = (COFF_symbol*)
- myindex ( getSymbolSize(info), symtab, info->numberOfSymbols-1 );
- addr = (char*)cstring_from_COFF_symbol_name(
- getSymShortName (info, symtab_i),
- strtab);
+ sname = (SymbolName*)section.start+2;
+ COFF_symbol* sym = &oc->info->symbols[info->numberOfSymbols-1];
+ addr = get_sym_name( getSymShortName (info, sym), oc);
IF_DEBUG(linker,
debugBelch("addImportSymbol `%s' => `%s'\n",
sname, (char*)addr));
- if (!ghciInsertSymbolTable(oc->fileName, symhash, (SymbolName*)sname,
- addr, false, oc))
+ /* We're going to free the any data associated with the import
+ library without copying the sections. So we have to duplicate
+ the symbol name and values before the pointers become invalid. */
+ sname = strdup (sname);
+ addr = strdup (addr);
+ if (!ghciInsertSymbolTable(oc->fileName, symhash, sname,
+ addr, false, oc)) {
+ releaseOcInfo (oc);
+ stgFree (oc->image);
+ oc->image = NULL;
return false;
+ }
setImportSymbol (oc, sname);
/* Don't process this oc any futher. Just exit. */
oc->n_symbols = 0;
oc->symbols = NULL;
+ stgFree (oc->image);
+ oc->image = NULL;
+ releaseOcInfo (oc);
return true;
}
- ASSERT(sectab_i->SizeOfRawData == 0 || sectab_i->Misc.VirtualSize == 0);
- sz = sectab_i->SizeOfRawData;
- if (sz < sectab_i->Misc.VirtualSize) sz = sectab_i->Misc.VirtualSize;
+ /* Allocate space for any (local, anonymous) .bss sections. */
+ if (0==strncmp(".bss", section.info->name, 4)) {
+ uint32_t bss_sz;
+ uint8_t* zspace;
+
+ /* sof 10/05: the PE spec text isn't too clear regarding what
+ * the SizeOfRawData field is supposed to hold for object
+ * file sections containing just uninitialized data -- for executables,
+ * it is supposed to be zero; unclear what it's supposed to be
+ * for object files. However, VirtualSize is guaranteed to be
+ * zero for object files, which definitely suggests that SizeOfRawData
+ * will be non-zero (where else would the size of this .bss section be
+ * stored?) Looking at the COFF_section info for incoming object files,
+ * this certainly appears to be the case.
+ *
+ * => I suspect we've been incorrectly handling .bss sections in
+ * (relocatable) object files up until now. This turned out to bite us
+ * with ghc-6.4.1's use of gcc-3.4.x, which has started to emit
+ * initially-zeroed-out local 'static' variable decls into the .bss
+ * section. (The specific function in Q which triggered this is
+ * libraries/base/cbits/dirUtils.c:__hscore_getFolderPath())
+ *
+ * TODO: check if this comment is still relevant.
+ */
+ if (section.info->virtualSize == 0 && section.size == 0) continue;
+ /* This is a non-empty .bss section.
+ Allocate zeroed space for it */
+ bss_sz = section.info->virtualSize;
+ if (bss_sz < section.size) { bss_sz = section.size; }
+ bss_sz = section.info->alignment;
+ zspace = stgCallocBytes(1, bss_sz, "ocGetNames_PEi386(anonymous bss)");
+ oc->sections[i].start = getAlignedMemory(zspace, section);
+ oc->sections[i].size = bss_sz;
+ addProddableBlock(oc, zspace, bss_sz);
+ /* debugBelch("BSS anon section at 0x%x\n", zspace); */
+ }
+
+ /* Allocate space for the sections since we have a real oc.
+ We initially mark it the region as non-accessible. But will adjust
+ as we go along. */
+ if (!oc->info->image) {
+ /* See Note [Memory allocation]. */
+ ASSERT(code_heap);
+ oc->info->image
+ = HeapAlloc (code_heap, HEAP_ZERO_MEMORY, oc->info->secBytesTotal);
+ if (!oc->info->image)
+ barf ("Could not allocate any heap memory from private heap.");
+ }
+
+ ASSERT(section.size == 0 || section.info->virtualSize == 0);
+ sz = section.size;
+ if (sz < section.info->virtualSize) sz = section.info->virtualSize;
start = section.start;
end = start + sz - 1;
if (kind != SECTIONKIND_OTHER && end >= start) {
- addSection(&oc->sections[i], kind, SECTION_NOMEM, start, sz, 0, 0, 0);
- addProddableBlock(oc, start, sz);
+ /* See Note [Section alignment]. */
+ addCopySection(oc, &oc->sections[i], kind, SECTION_NOMEM, start, sz);
+ addProddableBlock(oc, oc->sections[i].start, sz);
}
-
- stgFree(secname);
}
/* Copy exported symbols into the ObjectCode. */
@@ -1460,19 +1605,15 @@ ocGetNames_PEi386 ( ObjectCode* oc )
/* Work out the size of the global BSS section */
StgWord globalBssSize = 0;
for (i=0; i < info->numberOfSymbols; i++) {
- COFF_symbol* symtab_i;
- symtab_i = (COFF_symbol*)
- myindex ( getSymbolSize (info), symtab, i );
- if (getSymSectionNumber (info, symtab_i) == IMAGE_SYM_UNDEFINED
- && getSymValue (info, symtab_i) > 0
- && getSymStorageClass (info, symtab_i) != IMAGE_SYM_CLASS_SECTION) {
- globalBssSize += getSymValue (info, symtab_i);
- }
- i += getSymNumberOfAuxSymbols (info, symtab_i);
+ COFF_symbol* sym = &oc->info->symbols[i];
+ if (getSymSectionNumber (info, sym) == IMAGE_SYM_UNDEFINED
+ && getSymValue (info, sym) > 0
+ && getSymStorageClass (info, sym) != IMAGE_SYM_CLASS_SECTION) {
+ globalBssSize += getSymValue (info, sym);
+ }
+ i += getSymNumberOfAuxSymbols (info, sym);
}
- stgFree (info);
-
/* Allocate BSS space */
SymbolAddr* bss = NULL;
if (globalBssSize > 0) {
@@ -1488,21 +1629,22 @@ ocGetNames_PEi386 ( ObjectCode* oc )
SECTIONKIND_OTHER, SECTION_NOMEM, NULL, 0, 0, 0, 0);
}
+ /* At this point we're done with oc->image and all relevant memory have
+ been copied. Release it to free up the memory. */
+ stgFree (oc->image);
+ oc->image = NULL;
+
for (i = 0; i < (uint32_t)oc->n_symbols; i++) {
- COFF_symbol* symtab_i;
- symtab_i = (COFF_symbol*)
- myindex ( getSymbolSize (info), symtab, i );
+ COFF_symbol* sym = &oc->info->symbols[i];
- int32_t secNumber = getSymSectionNumber (info, symtab_i);
- uint32_t symValue = getSymValue (info, symtab_i);
- uint8_t symStorageClass = getSymStorageClass (info, symtab_i);
+ int32_t secNumber = getSymSectionNumber (info, sym);
+ uint32_t symValue = getSymValue (info, sym);
+ uint8_t symStorageClass = getSymStorageClass (info, sym);
addr = NULL;
bool isWeak = false;
- Section *section = secNumber > 0
- ? &oc->sections[secNumber-1]
- : NULL;
- sname = cstring_from_COFF_symbol_name(getSymShortName (info, symtab_i), strtab);
+ sname = get_sym_name (getSymShortName (info, sym), oc);
+ Section *section = secNumber > 0 ? &oc->sections[secNumber-1] : NULL;
if ( secNumber != IMAGE_SYM_UNDEFINED
&& secNumber > 0
@@ -1514,19 +1656,12 @@ ocGetNames_PEi386 ( ObjectCode* oc )
the address of the symbol is:
address of relevant section + offset in section
*/
- COFF_section* sectabent
- = (COFF_section*) myindex ( sizeof_COFF_section,
- sectab, secNumber-1 );
if (symStorageClass == IMAGE_SYM_CLASS_EXTERNAL
|| ( symStorageClass == IMAGE_SYM_CLASS_STATIC
- && sectabent->Characteristics & IMAGE_SCN_LNK_COMDAT
- && section)
+ && section->info->props & IMAGE_SCN_LNK_COMDAT)
) {
- addr = (void*)((size_t)section->start
- + symValue);
- if (sectabent->Characteristics & IMAGE_SCN_LNK_COMDAT) {
- isWeak = true;
- }
+ addr = (SymbolAddr*)((size_t)section->start + symValue);
+ isWeak = section->info->props & IMAGE_SCN_LNK_COMDAT;
}
}
else if (symStorageClass == IMAGE_SYM_CLASS_WEAK_EXTERNAL) {
@@ -1545,7 +1680,7 @@ ocGetNames_PEi386 ( ObjectCode* oc )
/* This is an import section. We should load the dll and lookup
the symbols.
See Note [BFD import library]. */
- char* dllName = (char*)section->start;
+ char* dllName = section->start;
if (strlen(dllName) == 0 || dllName[0] == 0 || has_code_section)
continue;
@@ -1553,9 +1688,8 @@ ocGetNames_PEi386 ( ObjectCode* oc )
HsPtr token = addLibrarySearchPath(dirName);
stgFree(dirName);
- symtab_i = (COFF_symbol*)
- myindex ( getSymbolSize (info), symtab, oc->n_symbols-1 );
- sname = cstring_from_COFF_symbol_name(getSymShortName (info, symtab_i), strtab);
+ sym = &oc->info->symbols[oc->n_symbols-1];
+ sname = get_sym_name (getSymShortName (info, sym), oc);
IF_DEBUG(linker,
debugBelch("loading symbol `%s' from dll: '%ls' => `%s'\n",
@@ -1579,10 +1713,10 @@ ocGetNames_PEi386 ( ObjectCode* oc )
/* the symbols are named <name>_iname when defined, but are named
_head_<name> when looked up. (Ugh. thanks GCC.) So correct it when
stored so we don't have to correct it each time when retrieved. */
- int size = strlen((char*)sname)+1;
- char *tmp = stgMallocBytes(size*sizeof(char),
+ int size = strlen(sname)+1;
+ char *tmp = stgMallocBytes(size * sizeof(char),
"ocGetNames_PEi386");
- strncpy(tmp, (char*)sname, size);
+ strncpy (tmp, sname, size);
char *pos = strstr(tmp, "_iname");
/* drop anything after the name. There are some inconsistencies with
whitespaces trailing the name. */
@@ -1590,64 +1724,43 @@ ocGetNames_PEi386 ( ObjectCode* oc )
int start = 0;
/* msys2 project's import lib builder has some inconsistent name
- manglings. Their names start with _ or __ yet they drop this when
+ mangling. Their names start with _ or __ yet they drop this when
making the _head_ symbol. So do the same. */
while (tmp[start]=='_')
start++;
- snprintf((char*)sname, size, "_head_%s", tmp+start);
+ snprintf (sname, size, "_head_%s", tmp+start);
sname[size-start]='\0';
stgFree(tmp);
- if (!ghciInsertSymbolTable(oc->fileName, symhash, (SymbolName*)sname,
+ sname = strdup (sname);
+ if (!ghciInsertSymbolTable(oc->fileName, symhash, sname,
addr, false, oc))
return false;
+
break;
}
if ((addr != NULL || isWeak)
&& (!section || (section && section->kind != SECTIONKIND_IMPORT))) {
/* debugBelch("addSymbol %p `%s' Weak:%lld \n", addr, sname, isWeak); */
- IF_DEBUG(linker, debugBelch("addSymbol %p `%s'\n", addr,sname));
+ sname = strdup (sname);
+ IF_DEBUG(linker, debugBelch("addSymbol %p `%s'\n", addr, sname));
ASSERT(i < (uint32_t)oc->n_symbols);
- /* cstring_from_COFF_symbol_name always succeeds. */
- oc->symbols[i] = (SymbolName*)sname;
+ oc->symbols[i] = sname;
if (isWeak) {
setWeakSymbol(oc, sname);
}
- if (! ghciInsertSymbolTable(oc->fileName, symhash, (SymbolName*)sname, addr,
- isWeak, oc)) {
+ if (! ghciInsertSymbolTable(oc->fileName, symhash, sname, addr,
+ isWeak, oc))
return false;
- }
} else {
/* We're skipping the symbol, but if we ever load this
object file we'll want to skip it then too. */
oc->symbols[i] = NULL;
-
-# if 0
- debugBelch(
- "IGNORING symbol %d\n"
- " name `",
- i
- );
- printName ( getSymShortName (info, symtab_i), strtab );
- debugBelch(
- "'\n"
- " value 0x%x\n"
- " 1+sec# %d\n"
- " type 0x%x\n"
- " sclass 0x%x\n"
- " nAux %d\n",
- symValue,
- getSymSectionNumber (info, symtab_i),
- getSymType (info, symtab_i),
- getSymStorageClass (info, symtab_i),
- getSymNumberOfAuxSymbols (info, symtab_i)
- );
-# endif
}
- i += getSymNumberOfAuxSymbols (info, symtab_i);
+ i += getSymNumberOfAuxSymbols (info, sym);
}
return true;
@@ -1661,12 +1774,18 @@ ocGetNames_PEi386 ( ObjectCode* oc )
bool
ocAllocateSymbolExtras_PEi386 ( ObjectCode* oc )
{
- oc->symbol_extras = (SymbolExtra*)(oc->image - PEi386_IMAGE_OFFSET
- + ((PEi386_IMAGE_OFFSET + oc->fileSize + 0x7) & ~0x7));
+ /* If the ObjectCode was unloaded we don't need a trampoline, it's likely
+ an import library so we're discarding it earlier. */
+ if (!oc->info)
+ return false;
+
+ const int mask = default_alignment - 1;
+ size_t origin = oc->info->trampoline;
+ oc->symbol_extras
+ = (SymbolExtra*)((uintptr_t)(oc->info->image + origin + mask) & ~mask);
oc->first_symbol_extra = 0;
- COFF_HEADER_INFO *info = getHeaderInfo (oc);
- oc->n_symbol_extras = info->numberOfSymbols;
- stgFree (info);
+ COFF_HEADER_INFO *info = oc->info->ch_info;
+ oc->n_symbol_extras = info->numberOfSymbols;
return true;
}
@@ -1700,11 +1819,7 @@ makeSymbolExtra_PEi386( ObjectCode* oc, uint64_t index, size_t s, char* symbol )
bool
ocResolve_PEi386 ( ObjectCode* oc )
{
- COFF_section* sectab;
- COFF_symbol* symtab;
- uint8_t* strtab;
-
- uint32_t A;
+ uint64_t A;
size_t S;
SymbolAddr* pP;
@@ -1716,107 +1831,41 @@ ocResolve_PEi386 ( ObjectCode* oc )
uint8_t symbol[1000];
/* debugBelch("resolving for %s\n", oc->fileName); */
- COFF_HEADER_INFO *info = getHeaderInfo (oc);
-
- sectab = (COFF_section*) (
- ((uint8_t*)(oc->image))
- + info->sizeOfHeader + info->sizeOfOptionalHeader
- );
- symtab = (COFF_symbol*) (
- ((uint8_t*)(oc->image))
- + info->pointerToSymbolTable
- );
- strtab = ((uint8_t*)(oc->image))
- + info->pointerToSymbolTable
- + info->numberOfSymbols * getSymbolSize (info);
-
+ COFF_HEADER_INFO *info = oc->info->ch_info;
uint32_t numberOfSections = info->numberOfSections;
for (i = 0; i < numberOfSections; i++) {
- COFF_section* sectab_i
- = (COFF_section*)
- myindex ( sizeof_COFF_section, sectab, i );
- COFF_reloc* reltab
- = (COFF_reloc*) (
- ((uint8_t*)(oc->image)) + sectab_i->PointerToRelocations
- );
Section section = oc->sections[i];
- char *secname = cstring_from_section_name(sectab_i->Name, strtab);
-
/* Ignore sections called which contain stabs debugging information. */
- if ( 0 == strcmp(".stab", (char*)secname)
- || 0 == strcmp(".stabstr", (char*)secname)
- || 0 == strncmp(".pdata", (char*)secname, 6)
- || 0 == strncmp(".xdata", (char*)secname, 6)
- || 0 == strncmp(".debug", (char*)secname, 6)
- || 0 == strcmp(".rdata$zzz", (char*)secname)) {
- stgFree(secname);
+ if (section.kind == SECTIONKIND_DEBUG)
continue;
- }
-
- stgFree(secname);
-
- if ( sectab_i->Characteristics & IMAGE_SCN_LNK_NRELOC_OVFL ) {
- /* If the relocation field (a short) has overflowed, the
- * real count can be found in the first reloc entry.
- *
- * See Section 4.1 (last para) of the PE spec (rev6.0).
- *
- * Nov2003 update: the GNU linker still doesn't correctly
- * handle the generation of relocatable object files with
- * overflown relocations. Hence the output to warn of potential
- * troubles.
- */
- COFF_reloc* rel = (COFF_reloc*)
- myindex ( sizeof_COFF_reloc, reltab, 0 );
- noRelocs = rel->VirtualAddress;
-
- /* 10/05: we now assume (and check for) a GNU ld that is capable
- * of handling object files with (>2^16) of relocs.
- */
-#if 0
- debugBelch("WARNING: Overflown relocation field (# relocs found: %u)\n",
- noRelocs);
-#endif
- j = 1;
- } else {
- noRelocs = sectab_i->NumberOfRelocations;
- j = 0;
- }
- for (; j < noRelocs; j++) {
+ noRelocs = section.info->noRelocs;
+ for (j = 0; j < noRelocs; j++) {
COFF_symbol* sym;
- COFF_reloc* reltab_j
- = (COFF_reloc*)
- myindex ( sizeof_COFF_reloc, reltab, j );
+ COFF_reloc* reloc = &section.info->relocs[j];
/* the location to patch */
- pP = (void*)(
- (size_t)section.start
- + reltab_j->VirtualAddress
- - sectab_i->VirtualAddress
+ pP = (SymbolAddr*)(
+ (uintptr_t)section.start
+ + (uintptr_t)reloc->VirtualAddress
+ - (uintptr_t)section.info->virtualAddr
);
/* the existing contents of pP */
A = *(uint32_t*)pP;
/* the symbol to connect to */
- sym = (COFF_symbol*)
- myindex ( getSymbolSize (info),
- symtab, reltab_j->SymbolTableIndex );
-#if defined(x86_64_HOST_ARCH)
- uint64_t symIndex = ((uint64_t)myindex(getSymbolSize (info), symtab,
- reltab_j->SymbolTableIndex)
- - (uint64_t)symtab) / getSymbolSize (info);
-#endif
+ uint64_t symIndex = reloc->SymbolTableIndex;
+ sym = &oc->info->symbols[symIndex];
IF_DEBUG(linker,
debugBelch(
"reloc sec %2d num %3d: type 0x%-4x "
"vaddr 0x%-8lx name `",
i, j,
- (uint32_t)reltab_j->Type,
- reltab_j->VirtualAddress );
- printName ( getSymShortName (info, sym), strtab );
+ reloc->Type,
+ reloc->VirtualAddress );
+ printName (getSymShortName (info, sym), oc);
debugBelch("'\n" ));
if (getSymStorageClass (info, sym) == IMAGE_SYM_CLASS_STATIC) {
@@ -1824,21 +1873,22 @@ ocResolve_PEi386 ( ObjectCode* oc )
S = ((size_t)(section.start))
+ ((size_t)(getSymValue (info, sym)));
} else {
- copyName ( getSymShortName (info, sym), strtab, symbol, 1000-1 );
+ copyName ( getSymShortName (info, sym), oc, symbol,
+ sizeof(symbol)-1 );
S = (size_t) lookupSymbol_( (char*)symbol );
if ((void*)S == NULL) {
errorBelch(" | %" PATH_FMT ": unknown symbol `%s'", oc->fileName, symbol);
- stgFree (info);
+ releaseOcInfo (oc);
return false;
}
}
/* All supported relocations write at least 4 bytes */
checkProddableBlock(oc, pP, 4);
- switch (reltab_j->Type) {
+ switch (reloc->Type) {
#if defined(i386_HOST_ARCH)
case IMAGE_REL_I386_DIR32:
case IMAGE_REL_I386_DIR32NB:
- *(uint32_t *)pP = ((uint32_t)S) + A;
+ *(uint32_t *)pP = S + A;
break;
case IMAGE_REL_I386_REL32:
/* Tricky. We have to insert a displacement at
@@ -1874,20 +1924,21 @@ ocResolve_PEi386 ( ObjectCode* oc )
uint64_t A;
checkProddableBlock(oc, pP, 8);
A = *(uint64_t*)pP;
- *(uint64_t *)pP = ((uint64_t)S) + ((uint64_t)A);
+ *(uint64_t *)pP = S + A;
break;
}
case 2: /* R_X86_64_32 (ELF constant 10) - IMAGE_REL_AMD64_ADDR32 (PE constant 2) */
case 3: /* R_X86_64_32S (ELF constant 11) - IMAGE_REL_AMD64_ADDR32NB (PE constant 3) */
case 17: /* R_X86_64_32S ELF constant, no PE mapping. See note [ELF constant in PE file] */
{
- size_t v;
- v = S + ((size_t)A);
+ uint64_t v;
+ v = S + A;
if (v >> 32) {
- copyName ( getSymShortName (info, sym), strtab, symbol, 1000-1 );
+ copyName (getSymShortName (info, sym), oc,
+ symbol, sizeof(symbol)-1);
S = makeSymbolExtra_PEi386(oc, symIndex, S, (char *)symbol);
/* And retry */
- v = S + ((size_t)A);
+ v = S + A;
if (v >> 32) {
barf("IMAGE_REL_AMD64_ADDR32[NB]: High bits are set in %zx for %s",
v, (char *)symbol);
@@ -1899,13 +1950,14 @@ ocResolve_PEi386 ( ObjectCode* oc )
case 4: /* R_X86_64_PC32 (ELF constant 2) - IMAGE_REL_AMD64_REL32 (PE constant 4) */
{
intptr_t v;
- v = ((intptr_t)S) + ((intptr_t)(int32_t)A) - ((intptr_t)pP) - 4;
+ v = S + (int32_t)A - ((intptr_t)pP) - 4;
if ((v >> 32) && ((-v) >> 32)) {
/* Make the trampoline then */
- copyName ( getSymShortName (info, sym), strtab, symbol, 1000-1 );
+ copyName (getSymShortName (info, sym),
+ oc, symbol, sizeof(symbol)-1);
S = makeSymbolExtra_PEi386(oc, symIndex, S, (char *)symbol);
/* And retry */
- v = ((intptr_t)S) + ((intptr_t)(int32_t)A) - ((intptr_t)pP) - 4;
+ v = S + (int32_t)A - ((intptr_t)pP) - 4;
if ((v >> 32) && ((-v) >> 32)) {
barf("IMAGE_REL_AMD64_REL32: High bits are set in %zx for %s",
v, (char *)symbol);
@@ -1917,15 +1969,14 @@ ocResolve_PEi386 ( ObjectCode* oc )
#endif
default:
debugBelch("%" PATH_FMT ": unhandled PEi386 relocation type %d\n",
- oc->fileName, reltab_j->Type);
- stgFree (info);
+ oc->fileName, reloc->Type);
+ releaseOcInfo (oc);
return false;
}
}
}
- stgFree (info);
IF_DEBUG(linker, debugBelch("completed %" PATH_FMT "\n", oc->fileName));
return true;
}
@@ -1948,51 +1999,30 @@ ocResolve_PEi386 ( ObjectCode* oc )
bool
ocRunInit_PEi386 ( ObjectCode *oc )
{
- COFF_section* sectab;
- uint8_t* strtab;
- unsigned int i;
-
- COFF_HEADER_INFO *info = getHeaderInfo (oc);
- sectab = (COFF_section*) (
- ((uint8_t*)(oc->image))
- + info->sizeOfHeader + info->sizeOfOptionalHeader
- );
- strtab = ((uint8_t*)(oc->image))
- + info->pointerToSymbolTable
- + info->numberOfSymbols * getSymbolSize (info);
-
- int argc, envc;
- char **argv, **envv;
-
- getProgArgv(&argc, &argv);
- getProgEnvv(&envc, &envv);
-
- /* TODO: This part is just looking for .ctors section. This can be optimized
- and should for objects compiled with function sections as these produce a
- large amount of sections.
-
- This can be done by saving the index of the .ctor section in the ObjectCode
- from ocGetNames. Then this loop isn't needed. */
- for (i = 0; i < info->numberOfSections; i++) {
- COFF_section* sectab_i
- = (COFF_section*)
- myindex ( sizeof_COFF_section, sectab, i );
- Section section = oc->sections[i];
- char *secname = cstring_from_section_name(sectab_i->Name, strtab);
- if (0 == strcmp(".ctors", (char*)secname)) {
- uint8_t *init_startC = section.start;
- init_t *init_start, *init_end, *init;
- init_start = (init_t*)init_startC;
- init_end = (init_t*)(init_startC + sectab_i->SizeOfRawData);
- // ctors are run *backwards*!
- for (init = init_end - 1; init >= init_start; init--) {
- (*init)(argc, argv, envv);
- }
- }
- }
- stgFree (info);
- freeProgEnvv(envc, envv);
+ if (!oc || !oc->info || !oc->info->init) {
return true;
+ }
+
+ int argc, envc;
+ char **argv, **envv;
+
+ getProgArgv(&argc, &argv);
+ getProgEnvv(&envc, &envv);
+
+ Section section = *oc->info->init;
+ ASSERT(SECTIONKIND_INIT_ARRAY == section.kind);
+
+ uint8_t *init_startC = section.start;
+ init_t *init_start = (init_t*)init_startC;
+ init_t *init_end = (init_t*)(init_startC + section.size);
+
+ // ctors are run *backwards*!
+ for (init_t *init = init_end - 1; init >= init_start; init--)
+ (*init)(argc, argv, envv);
+
+ freeProgEnvv(envc, envv);
+ releaseOcInfo (oc);
+ return true;
}
SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl)
@@ -2006,9 +2036,9 @@ SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl)
/* See Note [mingw-w64 name decoration scheme] */
#if !defined(x86_64_HOST_ARCH)
- zapTrailingAtSign ( (unsigned char*)lbl );
+ zapTrailingAtSign ( lbl );
#endif
- sym = lookupSymbolInDLLs((unsigned char*)lbl);
+ sym = lookupSymbolInDLLs(lbl);
return sym; // might be NULL if not found
} else {
#if defined(mingw32_HOST_OS)
@@ -2020,7 +2050,9 @@ SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl)
{
char symBuffer[50];
sprintf(symBuffer, "_%s", lbl);
- pinfo->value = GetProcAddress(GetModuleHandle("msvcrt"), symBuffer);
+ static HMODULE msvcrt = NULL;
+ if (!msvcrt) msvcrt = GetModuleHandle("msvcrt");
+ pinfo->value = GetProcAddress(msvcrt, symBuffer);
}
else if (pinfo && pinfo->owner && isSymbolImport (pinfo->owner, lbl))
{
@@ -2047,4 +2079,223 @@ SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl)
}
}
+/* -----------------------------------------------------------------------------
+ * Section management.
+ */
+
+ /* See Note [Section alignment]. */
+static void
+addCopySection (ObjectCode *oc, Section *s, SectionKind kind,
+ SectionAlloc alloc, void* start, StgWord size) {
+ char* pos = oc->info->image + oc->info->secBytesUsed;
+ char* newStart = (char*)getAlignedMemory ((uint8_t*)pos, *s);
+ memcpy (newStart, start, size);
+ uintptr_t offset = (uintptr_t)newStart - (uintptr_t)oc->info->image;
+ oc->info->secBytesUsed = (size_t)offset + size;
+ start = newStart;
+
+ /* Initially I wanted to apply the right memory protection to the region and
+ which would leaved the gaps in between the regions as inaccessible memory
+ to prevent exploits.
+ The problem is protection is always on page granularity, so we can use
+ less memory and be insecure or use more memory and be secure.
+ For now, I've chosen lower memory over secure as the first pass, this
+ doesn't regress security over the current implementation. After this
+ patch I will change to different implementation that will fix the mem
+ protection and keep the memory size small. */
+ addSection (s, kind, alloc, start, size, 0, 0, 0);
+}
+
+/* -----------------------------------------------------------------------------
+ * Debugging operations.
+ */
+
+pathchar*
+resolveSymbolAddr_PEi386 (pathchar* buffer, int size,
+ SymbolAddr* symbol, uintptr_t* top ){
+ SYMBOL_INFO sym;
+ ZeroMemory (&sym, sizeof(SYMBOL_INFO));
+ sym.MaxNameLen = sizeof(char) * 1024;
+
+ DWORD64 uDisplacement = 0;
+ HANDLE hProcess = GetCurrentProcess();
+ ObjectCode* obj = NULL;
+ uintptr_t start, end;
+ *top = 0;
+
+ pathprintf (buffer, size, WSTR("0x%" PRIxPTR), symbol);
+
+ if (SymFromAddr (hProcess, (uintptr_t)symbol, &uDisplacement, &sym))
+ {
+ /* Try using Windows symbols. */
+ wcscat (buffer, WSTR(" "));
+ pathchar* name = mkPath (sym.Name);
+ wcscat (buffer, name);
+ stgFree (name);
+ if (uDisplacement != 0)
+ {
+ int64_t displacement = (int64_t)uDisplacement;
+ pathchar s_disp[50];
+ if (displacement < 0)
+ pathprintf ((pathchar*)s_disp, 50, WSTR("-%ld"), -displacement);
+ else
+ pathprintf ((pathchar*)s_disp, 50, WSTR("+%ld"), displacement);
+
+ wcscat (buffer, s_disp);
+ }
+ }
+ else
+ {
+ /* Try to calculate from information inside the rts. */
+ uintptr_t loc = (uintptr_t)symbol;
+ for (ObjectCode* oc = objects; oc; oc = oc->next) {
+ for (int i = 0; i < oc->n_sections; i++) {
+ Section section = oc->sections[i];
+ start = (uintptr_t)section.start;
+ end = start + section.size;
+ if (loc > start && loc <= end)
+ {
+ wcscat (buffer, WSTR(" "));
+ if (oc->archiveMemberName)
+ {
+ pathchar* name = mkPath (oc->archiveMemberName);
+ wcscat (buffer, name);
+ stgFree (name);
+ }
+ else
+ {
+ wcscat (buffer, oc->fileName);
+ }
+ pathchar s_disp[50];
+ pathprintf (s_disp, 50, WSTR("+0x%" PRIxPTR), loc - start);
+ wcscat (buffer, s_disp);
+ obj = oc;
+ goto exit_loop;
+ }
+ }
+ }
+
+ /* If we managed to make it here, we must not have any symbols nor be
+ dealing with code we've linked. The only thing left is an internal
+ segfault or one in a dynamic library. So let's enumerate the module
+ address space. */
+ HMODULE *hMods = NULL;
+ DWORD cbNeeded;
+ EnumProcessModules (hProcess, hMods, 0, &cbNeeded);
+ hMods = stgMallocBytes (cbNeeded, "resolveSymbolAddr_PEi386");
+ if (EnumProcessModules (hProcess, hMods, cbNeeded, &cbNeeded))
+ {
+ uintptr_t loc = (uintptr_t)symbol;
+ MODULEINFO info;
+ for (uint32_t i = 0; i < cbNeeded / sizeof(HMODULE); i++) {
+ ZeroMemory (&info, sizeof (MODULEINFO));
+ if (GetModuleInformation (hProcess, hMods[i], &info,
+ sizeof(MODULEINFO)))
+ {
+ uintptr_t start = (uintptr_t)info.lpBaseOfDll;
+ uintptr_t end = start + info.SizeOfImage;
+ if (loc >= start && loc < end)
+ {
+ /* Hoera, finally found some information. */
+ pathchar tmp[MAX_PATH];
+ if (GetModuleFileNameExW (hProcess, hMods[i], tmp, MAX_PATH))
+ {
+ wcscat (buffer, WSTR(" "));
+ wcscat (buffer, tmp);
+ pathprintf (tmp, MAX_PATH, WSTR("+0x%" PRIxPTR), loc - start);
+ wcscat (buffer, tmp);
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ stgFree(hMods);
+ }
+
+ /* Finally any file/line number. */
+ IMAGEHLP_LINE64 lineInfo = {0};
+ DWORD dwDisplacement = 0;
+ exit_loop:
+ if (SymGetLineFromAddr64(hProcess, (uintptr_t)symbol, &dwDisplacement,
+ &lineInfo))
+ {
+ /* Try using Windows symbols. */
+ pathchar s_line[512];
+ pathprintf ((pathchar*) s_line, 512, WSTR(" %ls (%lu)"),
+ lineInfo.FileName, lineInfo.LineNumber);
+ wcscat (buffer, s_line);
+ if (dwDisplacement != 0)
+ {
+ pathprintf ((pathchar*) s_line, 512, WSTR(" +%lu byte%s"),
+ dwDisplacement,
+ (dwDisplacement == 1 ? WSTR("") : WSTR("s")));
+ }
+ wcscat (buffer, s_line);
+ }
+ else if (obj)
+ {
+ /* Try to calculate from information inside the rts. */
+ typedef struct _SymX { SymbolName* name; uintptr_t loc; } SymX;
+ SymX* locs = stgCallocBytes (sizeof(SymX), obj->n_symbols,
+ "resolveSymbolAddr");
+ int blanks = 0;
+ for (int i = 0; i < obj->n_symbols; i++) {
+ SymbolName* sym = obj->symbols[i];
+ if (sym == NULL)
+ {
+ blanks++;
+ continue;
+ }
+ RtsSymbolInfo* a = NULL;
+ ghciLookupSymbolInfo(symhash, sym, &a);
+ if (a) {
+ SymX sx = {0};
+ sx.name = sym;
+ sx.loc = (uintptr_t)a->value;
+ locs[i] = sx;
+ }
+ }
+ int comp (const void * elem1, const void * elem2)
+ {
+ SymX f = *((SymX*)elem1);
+ SymX s = *((SymX*)elem2);
+ if (f.loc > s.loc) return 1;
+ if (f.loc < s.loc) return -1;
+ return 0;
+ }
+ qsort (locs, obj->n_symbols, sizeof (SymX), comp);
+ uintptr_t key = (uintptr_t)symbol;
+ SymX* res = NULL;
+
+ for (int x = blanks; x < obj->n_symbols; x++) {
+ if (x < (obj->n_symbols -1)) {
+ if (locs[x].loc >= key && key < locs[x+1].loc) {
+ res = &locs[x];
+ break;
+ }
+ }
+ else
+ {
+ if (locs[x].loc >= key) {
+ res = &locs[x];
+ break;
+ }
+ }
+ }
+
+ if (res) {
+ pathchar s_disp[512];
+ *top = (uintptr_t)res->loc;
+ pathprintf ((pathchar*)s_disp, 512,
+ WSTR("\n\t\t (%s+0x%" PRIxPTR ")"),
+ res->name, res->loc - key);
+ wcscat (buffer, s_disp);
+ }
+ stgFree (locs);
+ }
+
+ return buffer;
+}
#endif /* mingw32_HOST_OS */
diff --git a/rts/linker/PEi386.h b/rts/linker/PEi386.h
index e84e05232a..eb5bec8b78 100644
--- a/rts/linker/PEi386.h
+++ b/rts/linker/PEi386.h
@@ -14,6 +14,8 @@
#define PEi386_IMAGE_OFFSET 0
#endif
+#define PEi386_STRTAB_OFFSET 4
+
/********************************************
* COFF/PE types
********************************************/
@@ -40,6 +42,7 @@ typedef struct _COFF_HEADER_INFO {
********************************************/
void initLinker_PEi386( void );
+void exitLinker_PEi386( void );
const char * addDLL_PEi386( pathchar *dll_name, HINSTANCE *instance );
void freePreloadObjectFile_PEi386( ObjectCode *oc );
@@ -55,15 +58,32 @@ bool ocGetNames_PEi386 ( ObjectCode* oc );
bool ocVerifyImage_PEi386 ( ObjectCode* oc );
SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl);
bool ocAllocateSymbolExtras_PEi386 ( ObjectCode* oc );
-SymbolAddr *lookupSymbolInDLLs ( unsigned char *lbl );
+SymbolAddr *lookupSymbolInDLLs ( const SymbolName* lbl );
/* See Note [mingw-w64 name decoration scheme] */
-
-char *
-allocateImageAndTrampolines (
- pathchar* arch_name, char* member_name,
- FILE* f,
- int size,
- int isThin);
+/* We use myindex to calculate array addresses, rather than
+ simply doing the normal subscript thing. That's because
+ some of the above structs have sizes which are not
+ a whole number of words. GCC rounds their sizes up to a
+ whole number of words, which means that the address calcs
+ arising from using normal C indexing or pointer arithmetic
+ are just plain wrong. Sigh.
+*/
+INLINE_HEADER unsigned char *
+myindex ( int scale, void* base, int index )
+{
+ return
+ ((unsigned char*)base) + scale * index;
+}
+pathchar* resolveSymbolAddr_PEi386 ( pathchar* buffer, int size,
+ SymbolAddr* symbol, uintptr_t* top );
+
+char *get_name_string(
+ unsigned char* name,
+ ObjectCode* oc);
+
+char* get_sym_name(
+ uint8_t* name,
+ ObjectCode* oc);
/********************************************
* COFF/PE headers
@@ -111,6 +131,13 @@ struct _IndirectAddr {
struct _IndirectAddr* next;
} IndirectAddr;
+/* Some alignment information. */
+typedef
+struct _Alignments {
+ uint32_t mask;
+ uint32_t value;
+} Alignments;
+
/* Util symbol handling functions. */
COFF_OBJ_TYPE getObjectType ( char* image, pathchar* fileName );
COFF_HEADER_INFO* getHeaderInfo ( ObjectCode* oc );
diff --git a/rts/linker/PEi386Types.h b/rts/linker/PEi386Types.h
new file mode 100644
index 0000000000..67ea34345f
--- /dev/null
+++ b/rts/linker/PEi386Types.h
@@ -0,0 +1,35 @@
+#pragma once
+
+#if defined(OBJFORMAT_PEi386)
+
+#include "ghcplatform.h"
+#include "PEi386.h"
+#include <stdint.h>
+#include <stdio.h>
+
+/* Some forward declares. */
+struct Section;
+
+
+struct SectionFormatInfo {
+ char* name;
+ size_t alignment;
+ COFF_reloc* relocs;
+ uint32_t noRelocs;
+ uint32_t props;
+ uint64_t virtualSize;
+ uint64_t virtualAddr;
+ };
+struct ObjectCodeFormatInfo {
+ size_t secBytesTotal;
+ size_t secBytesUsed;
+ char* image;
+ size_t trampoline;
+ Section* init;
+ Section* finit;
+ COFF_HEADER_INFO* ch_info;
+ char* str_tab;
+ COFF_symbol* symbols;
+ };
+
+#endif /* OBJFORMAT_PEi386. */
diff --git a/rts/linker/SymbolExtras.c b/rts/linker/SymbolExtras.c
index 486fa4a572..88541f44d0 100644
--- a/rts/linker/SymbolExtras.c
+++ b/rts/linker/SymbolExtras.c
@@ -51,8 +51,9 @@ int ocAllocateSymbolExtras( ObjectCode* oc, int count, int first )
n = roundUpToPage(oc->fileSize);
/* Keep image and symbol_extras contiguous */
- void *new = mmapForLinker(n + (sizeof(SymbolExtra) * count),
- MAP_ANONYMOUS, -1, 0);
+
+ size_t allocated_size = n + (sizeof(SymbolExtra) * count);
+ void *new = mmapForLinker(allocated_size, MAP_ANONYMOUS, -1, 0);
if (new) {
memcpy(new, oc->image, oc->fileSize);
if (oc->imageMapped) {
@@ -62,6 +63,9 @@ int ocAllocateSymbolExtras( ObjectCode* oc, int count, int first )
oc->imageMapped = true;
oc->fileSize = n + (sizeof(SymbolExtra) * count);
oc->symbol_extras = (SymbolExtra *) (oc->image + n);
+ if(mprotect(new, allocated_size, PROT_READ | PROT_EXEC) != 0) {
+ sysErrorBelch("unable to protect memory");
+ }
}
else {
oc->symbol_extras = NULL;
diff --git a/rts/linker/elf_got.c b/rts/linker/elf_got.c
index 0395d169df..10ea25b98b 100644
--- a/rts/linker/elf_got.c
+++ b/rts/linker/elf_got.c
@@ -62,6 +62,9 @@ makeGot(ObjectCode * oc) {
symTab->symbols[i].got_addr
= (uint8_t *)oc->info->got_start
+ (slot++ * sizeof(void*));
+ if(mprotect(mem, oc->info->got_size, PROT_READ) != 0) {
+ sysErrorBelch("unable to protect memory");
+ }
}
return EXIT_SUCCESS;
}
diff --git a/rts/linker/elf_reloc_aarch64.c b/rts/linker/elf_reloc_aarch64.c
index 1d8f9e8ffe..c50ef04080 100644
--- a/rts/linker/elf_reloc_aarch64.c
+++ b/rts/linker/elf_reloc_aarch64.c
@@ -20,7 +20,7 @@ bool isAdrp(addr_t p);
bool isLoadStore(addr_t p);
bool isAddSub(addr_t p);
bool isVectorOp(addr_t p);
-int64_t decodeAddendAarch64(Section * section, Elf_Rel * rel);
+int64_t decodeAddendAarch64(Section * section, Elf_Rel * rel) GNU_ATTRIBUTE(__noreturn__);
bool encodeAddendAarch64(Section * section, Elf_Rel * rel, int64_t addend);
bool isBranch(addr_t p) {
diff --git a/rts/linker/elf_util.c b/rts/linker/elf_util.c
index 9ff9d6271d..052b2d9cae 100644
--- a/rts/linker/elf_util.c
+++ b/rts/linker/elf_util.c
@@ -3,9 +3,9 @@
#if defined(OBJFORMAT_ELF)
ElfSymbolTable *
-findSymbolTable(ObjectCode * oc, unsigned symolTableIndex) {
+findSymbolTable(ObjectCode * oc, unsigned symbolTableIndex) {
for(ElfSymbolTable * t=oc->info->symbolTables; t != NULL; t = t->next)
- if(t->index == symolTableIndex)
+ if(t->index == symbolTableIndex)
return t;
return NULL;
}
diff --git a/rts/package.conf.in b/rts/package.conf.in
index 2f722f10e7..b6dac767fb 100644
--- a/rts/package.conf.in
+++ b/rts/package.conf.in
@@ -8,7 +8,7 @@ name: rts
version: 1.0
id: rts
key: rts
-license: BSD3
+license: BSD-3-Clause
maintainer: glasgow-haskell-users@haskell.org
exposed: True
@@ -45,6 +45,8 @@ extra-libraries:
,"wsock32" /* for the linker */
,"gdi32" /* for the linker */
,"winmm" /* for the linker */
+ ,"dbghelp" /* for crash dump */
+ ,"psapi" /* for process information. */
#endif
#if NEED_PTHREAD_LIB
, "pthread" /* for pthread_getthreadid_np, pthread_create, etc. */
@@ -91,6 +93,7 @@ ld-options:
, "-Wl,-u,_base_GHCziIOziException_cannotCompactFunction_closure"
, "-Wl,-u,_base_GHCziIOziException_cannotCompactPinned_closure"
, "-Wl,-u,_base_GHCziIOziException_cannotCompactMutable_closure"
+ , "-Wl,-u,_base_ControlziExceptionziBase_absentSumFieldError_closure"
, "-Wl,-u,_base_ControlziExceptionziBase_nonTermination_closure"
, "-Wl,-u,_base_ControlziExceptionziBase_nestedAtomically_closure"
, "-Wl,-u,_base_GHCziEventziThread_blockedOnBadFD_closure"
@@ -171,6 +174,9 @@ ld-options:
#if WORD_SIZE_IN_BITS == 64
, "-Wl,-u,_hs_atomicwrite64"
#endif
+ /* This symbol is useful in gdb, but not referred to anywhere,
+ * so we need to force it to be included in the binary. */
+ , "-Wl,-u,_findPtr"
#else
"-Wl,-u,base_GHCziTopHandler_runIO_closure"
, "-Wl,-u,base_GHCziTopHandler_runNonIO_closure"
@@ -188,6 +194,7 @@ ld-options:
, "-Wl,-u,base_GHCziIOziException_cannotCompactFunction_closure"
, "-Wl,-u,base_GHCziIOziException_cannotCompactPinned_closure"
, "-Wl,-u,base_GHCziIOziException_cannotCompactMutable_closure"
+ , "-Wl,-u,base_ControlziExceptionziBase_absentSumFieldError_closure"
, "-Wl,-u,base_ControlziExceptionziBase_nonTermination_closure"
, "-Wl,-u,base_ControlziExceptionziBase_nestedAtomically_closure"
, "-Wl,-u,base_GHCziEventziThread_blockedOnBadFD_closure"
@@ -268,6 +275,9 @@ ld-options:
#if WORD_SIZE_IN_BITS == 64
, "-Wl,-u,hs_atomicwrite64"
#endif
+ /* This symbol is useful in gdb, but not referred to anywhere,
+ * so we need to force it to be included in the binary. */
+ , "-Wl,-u,findPtr"
#endif
/* Pick up static libraries in preference over dynamic if in earlier search
diff --git a/rts/posix/OSMem.c b/rts/posix/OSMem.c
index 6ccd65ab16..347c7c1a5c 100644
--- a/rts/posix/OSMem.c
+++ b/rts/posix/OSMem.c
@@ -36,6 +36,10 @@
#if defined(HAVE_NUMAIF_H)
#include <numaif.h>
#endif
+#if defined(HAVE_SYS_RESOURCE_H) && defined(HAVE_SYS_TIME_H)
+#include <sys/time.h>
+#include <sys/resource.h>
+#endif
#include <errno.h>
@@ -45,6 +49,29 @@
#include <sys/sysctl.h>
#endif
+#ifndef MAP_FAILED
+# define MAP_FAILED ((void *)-1)
+#endif
+
+#if defined(hpux_HOST_OS)
+# ifndef MAP_ANON
+# define MAP_ANON MAP_ANONYMOUS
+# endif
+#endif
+
+#ifndef darwin_HOST_OS
+# undef RESERVE_FLAGS
+# if defined(MAP_GUARD)
+# define RESERVE_FLAGS MAP_GUARD /* FreeBSD */
+# elif defined(MAP_NORESERVE)
+# define RESERVE_FLAGS MAP_NORESERVE | MAP_ANON | MAP_PRIVATE;
+# else
+# if defined(USE_LARGE_ADDRESS_SPACE)
+# error USE_LARGE_ADDRESS_SPACE needs MAP_NORESERVE or MAP_GUARD
+# endif
+# endif
+#endif
+
static void *next_request = 0;
void osMemInit(void)
@@ -98,8 +125,10 @@ void osMemInit(void)
The naming is chosen from the Win32 API (VirtualAlloc) which does the
same thing and has done so forever, while support for this in Unix systems
has only been added recently and is hidden in the posix portability mess.
- It is confusing because to get the reserve behavior we need MAP_NORESERVE
- (which tells the kernel not to allocate backing space), but heh...
+ The Linux manpage suggests that mmap must be passed MAP_NORESERVE in order
+ to get reservation-only behavior. It is confusing because to get the reserve
+ behavior we need MAP_NORESERVE (which tells the kernel not to allocate backing
+ space), but heh...
*/
enum
{
@@ -108,6 +137,44 @@ enum
MEM_RESERVE_AND_COMMIT = MEM_RESERVE | MEM_COMMIT
};
+#if defined(linux_HOST_OS)
+static void *
+linux_retry_mmap(int operation, W_ size, void *ret, void *addr, int prot, int flags)
+{
+ if (addr != 0 && (operation & MEM_RESERVE)) {
+ // Try again with no hint address.
+ // It's not clear that this can ever actually help,
+ // but since our alternative is to abort, we may as well try.
+ ret = mmap(0, size, prot, flags, -1, 0);
+ }
+ if (ret == MAP_FAILED && errno == EPERM) {
+ // Linux is not willing to give us any mapping,
+ // so treat this as an out-of-memory condition
+ // (really out of virtual address space).
+ errno = ENOMEM;
+ }
+ return ret;
+}
+#endif /* defined(linux_HOST_OS) */
+
+static void
+post_mmap_madvise(int operation, W_ size, void *ret)
+{
+#if defined(MADV_WILLNEED)
+ if (operation & MEM_COMMIT) {
+ madvise(ret, size, MADV_WILLNEED);
+# if defined(MADV_DODUMP)
+ madvise(ret, size, MADV_DODUMP);
+# endif
+ } else {
+ madvise(ret, size, MADV_DONTNEED);
+# if defined(MADV_DONTDUMP)
+ madvise(ret, size, MADV_DONTDUMP);
+# endif
+ }
+#endif
+}
+
/* Returns NULL on failure; errno set */
static void *
my_mmap (void *addr, W_ size, int operation)
@@ -149,56 +216,44 @@ my_mmap (void *addr, W_ size, int operation)
VM_PROT_READ|VM_PROT_WRITE);
}
-#else
+#else /* defined(darwin_HOST_OS) */
int prot, flags;
- if (operation & MEM_COMMIT)
+ if (operation & MEM_COMMIT) {
prot = PROT_READ | PROT_WRITE;
- else
+ } else {
prot = PROT_NONE;
- if (operation == MEM_RESERVE)
-# if defined(MAP_NORESERVE)
- flags = MAP_NORESERVE;
+ }
+
+ if (operation == MEM_RESERVE) {
+# if defined(RESERVE_FLAGS)
+ flags = RESERVE_FLAGS;
# else
-# if defined(USE_LARGE_ADDRESS_SPACE)
-# error USE_LARGE_ADDRESS_SPACE needs MAP_NORESERVE
-# endif
errorBelch("my_mmap(,,MEM_RESERVE) not supported on this platform");
# endif
- else if (operation == MEM_COMMIT)
- flags = MAP_FIXED;
- else
- flags = 0;
+ } else if (operation == MEM_COMMIT) {
+ flags = MAP_FIXED | MAP_ANON | MAP_PRIVATE;
+ } else {
+ flags = MAP_ANON | MAP_PRIVATE;
+ }
-#if defined(hpux_HOST_OS)
- ret = mmap(addr, size, prot, flags | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
-#elif defined(linux_HOST_OS)
- ret = mmap(addr, size, prot, flags | MAP_ANON | MAP_PRIVATE, -1, 0);
- if (ret == (void *)-1 && errno == EPERM) {
+ ret = mmap(addr, size, prot, flags, -1, 0);
+# if defined(linux_HOST_OS)
+ if (ret == MAP_FAILED && errno == EPERM) {
// Linux may return EPERM if it tried to give us
// a chunk of address space below mmap_min_addr,
// See Trac #7500.
- if (addr != 0 && (operation & MEM_RESERVE)) {
- // Try again with no hint address.
- // It's not clear that this can ever actually help,
- // but since our alternative is to abort, we may as well try.
- ret = mmap(0, size, prot, flags | MAP_ANON | MAP_PRIVATE, -1, 0);
- }
- if (ret == (void *)-1 && errno == EPERM) {
- // Linux is not willing to give us any mapping,
- // so treat this as an out-of-memory condition
- // (really out of virtual address space).
- errno = ENOMEM;
- }
+ ret = linux_retry_mmap(operation, size, ret, addr, prot, flags);
}
-#else
- ret = mmap(addr, size, prot, flags | MAP_ANON | MAP_PRIVATE, -1, 0);
-#endif
-#endif
-
- if (ret == (void *)-1) {
+# endif
+ if (ret == MAP_FAILED) {
return NULL;
}
+#endif /* defined(darwin_HOST_OS) */
+
+ // Map in committed pages rather than take a fault for each chunk.
+ // Also arrange to include them in core-dump files.
+ post_mmap_madvise(operation, size, ret);
return ret;
}
@@ -422,6 +477,8 @@ osTryReserveHeapMemory (W_ len, void *hint)
void *base, *top;
void *start, *end;
+ ASSERT((len & ~MBLOCK_MASK) == len);
+
/* We try to allocate len + MBLOCK_SIZE,
because we need memory which is MBLOCK_SIZE aligned,
and then we discard what we don't need */
@@ -487,8 +544,19 @@ void *osReserveHeapMemory(void *startAddressPtr, W_ *len)
(void*)startAddress, (void*)minimumAddress);
}
+#if defined(HAVE_SYS_RESOURCE_H) && defined(HAVE_SYS_TIME_H)
+ struct rlimit limit;
+ if (!getrlimit(RLIMIT_AS, &limit)
+ && limit.rlim_cur > 0
+ && *len > limit.rlim_cur) {
+ *len = limit.rlim_cur;
+ }
+#endif
+
attempt = 0;
while (1) {
+ *len &= ~MBLOCK_MASK;
+
if (*len < MBLOCK_SIZE) {
// Give up if the system won't even give us 16 blocks worth of heap
barf("osReserveHeapMemory: Failed to allocate heap storage");
@@ -499,9 +567,14 @@ void *osReserveHeapMemory(void *startAddressPtr, W_ *len)
if (at == NULL) {
// This means that mmap failed which we take to mean that we asked
// for too much memory. This can happen due to POSIX resource
- // limits. In this case we reduce our allocation request by a factor
- // of two and try again.
- *len /= 2;
+ // limits. In this case we reduce our allocation request by a
+ // fraction of the current size and try again.
+ //
+ // Note that the previously would instead decrease the request size
+ // by a factor of two; however, this meant that significant amounts
+ // of memory will be wasted (e.g. imagine a machine with 512GB of
+ // physical memory but a 511GB ulimit). See #14492.
+ *len -= *len / 8;
} else if ((W_)at >= minimumAddress) {
// Success! We were given a block of memory starting above the 8 GB
// mark, which is what we were looking for.
@@ -521,7 +594,10 @@ void *osReserveHeapMemory(void *startAddressPtr, W_ *len)
void osCommitMemory(void *at, W_ size)
{
- my_mmap(at, size, MEM_COMMIT);
+ void *r = my_mmap(at, size, MEM_COMMIT);
+ if (r == NULL) {
+ barf("Unable to commit %" FMT_Word " bytes of memory", size);
+ }
}
void osDecommitMemory(void *at, W_ size)
@@ -575,6 +651,15 @@ void osReleaseHeapMemory(void)
#endif
+bool osBuiltWithNumaSupport(void)
+{
+#if HAVE_LIBNUMA
+ return true;
+#else
+ return false;
+#endif
+}
+
bool osNumaAvailable(void)
{
#if HAVE_LIBNUMA
diff --git a/rts/posix/OSThreads.c b/rts/posix/OSThreads.c
index e2471a223c..7dcf0eed15 100644
--- a/rts/posix/OSThreads.c
+++ b/rts/posix/OSThreads.c
@@ -134,7 +134,7 @@ int
createOSThread (OSThreadId* pId, char *name STG_UNUSED,
OSThreadProc *startProc, void *param)
{
- int result = pthread_create(pId, NULL, (void *(*)(void *))startProc, param);
+ int result = pthread_create(pId, NULL, startProc, param);
if (!result) {
pthread_detach(*pId);
#if defined(HAVE_PTHREAD_SETNAME_NP)
@@ -223,6 +223,7 @@ forkOS_createThreadWrapper ( void * entry )
cap = rts_lock();
rts_evalStableIO(&cap, (HsStablePtr) entry, NULL);
rts_unlock(cap);
+ rts_done();
return NULL;
}
diff --git a/rts/posix/Select.c b/rts/posix/Select.c
index 3d3b70b565..270e6ff45c 100644
--- a/rts/posix/Select.c
+++ b/rts/posix/Select.c
@@ -256,9 +256,14 @@ awaitEvent(bool wait)
for(tso = blocked_queue_hd; tso != END_TSO_QUEUE; tso = next) {
next = tso->_link;
- /* On FreeBSD FD_SETSIZE is unsigned. Cast it to signed int
+ /* On older FreeBSDs, FD_SETSIZE is unsigned. Cast it to signed int
* in order to switch off the 'comparison between signed and
* unsigned error message
+ * Newer versions of FreeBSD have switched to unsigned int:
+ * https://github.com/freebsd/freebsd/commit/12ae7f74a071f0439763986026525094a7032dfd
+ * http://fa.freebsd.cvs-all.narkive.com/bCWNHbaC/svn-commit-r265051-head-sys-sys
+ * So the (int) cast should be removed across the code base once
+ * GHC requires a version of FreeBSD that has that change in it.
*/
switch (tso->why_blocked) {
case BlockedOnRead:
diff --git a/rts/posix/Signals.c b/rts/posix/Signals.c
index 7471948cc0..f033870d16 100644
--- a/rts/posix/Signals.c
+++ b/rts/posix/Signals.c
@@ -15,7 +15,7 @@
#include "RtsUtils.h"
#include "Prelude.h"
#include "Ticker.h"
-#include "Stable.h"
+#include "ThreadLabels.h"
#include "Libdw.h"
#if defined(alpha_HOST_ARCH)
@@ -471,30 +471,22 @@ startSignalHandlers(Capability *cap)
// freed by runHandler
memcpy(info, next_pending_handler, sizeof(siginfo_t));
- scheduleThread(cap,
+ StgTSO *t =
createIOThread(cap,
- RtsFlags.GcFlags.initialStkSize,
- rts_apply(cap,
- rts_apply(cap,
- &base_GHCziConcziSignal_runHandlersPtr_closure,
- rts_mkPtr(cap, info)),
- rts_mkInt(cap, info->si_signo))));
+ RtsFlags.GcFlags.initialStkSize,
+ rts_apply(cap,
+ rts_apply(cap,
+ &base_GHCziConcziSignal_runHandlersPtr_closure,
+ rts_mkPtr(cap, info)),
+ rts_mkInt(cap, info->si_signo)));
+ scheduleThread(cap, t);
+ labelThread(cap, t, "signal handler thread");
}
unblockUserSignals();
}
#endif
-/* ----------------------------------------------------------------------------
- * Mark signal handlers during GC.
- * -------------------------------------------------------------------------- */
-
-void
-markSignalHandlers (evac_fn evac STG_UNUSED, void *user STG_UNUSED)
-{
- // nothing to do
-}
-
#else /* !RTS_USER_SIGNALS */
StgInt
stg_sig_install(StgInt sig STG_UNUSED,
@@ -528,10 +520,10 @@ shutdown_handler(int sig STG_UNUSED)
}
/* -----------------------------------------------------------------------------
- * SIGUSR2 handler.
+ * SIGQUIT handler.
*
* We try to give the user an indication of what we are currently doing
- * in response to SIGUSR2.
+ * in response to SIGQUIT.
* -------------------------------------------------------------------------- */
static void
backtrace_handler(int sig STG_UNUSED)
@@ -539,6 +531,7 @@ backtrace_handler(int sig STG_UNUSED)
#if USE_LIBDW
LibdwSession *session = libdwInit();
Backtrace *bt = libdwGetBacktrace(session);
+ fprintf(stderr, "\nCaught SIGQUIT; Backtrace:\n");
libdwPrintBacktrace(session, stderr, bt);
backtraceFree(bt);
libdwFree(session);
@@ -720,12 +713,12 @@ initDefaultHandlers(void)
sysErrorBelch("warning: failed to install SIGPIPE handler");
}
- // Print a backtrace on SIGUSR2
+ // Print a backtrace on SIGQUIT
action.sa_handler = backtrace_handler;
sigemptyset(&action.sa_mask);
action.sa_flags = 0;
- if (sigaction(SIGUSR2, &action, &oact) != 0) {
- sysErrorBelch("warning: failed to install SIGUSR2 handler");
+ if (sigaction(SIGQUIT, &action, &oact) != 0) {
+ sysErrorBelch("warning: failed to install SIGQUIT handler");
}
set_sigtstp_action(true);
diff --git a/rts/posix/itimer/Pthread.c b/rts/posix/itimer/Pthread.c
index e15ac2521e..d8f2497e3f 100644
--- a/rts/posix/itimer/Pthread.c
+++ b/rts/posix/itimer/Pthread.c
@@ -10,7 +10,7 @@
* We use a realtime timer by default. I found this much more
* reliable than a CPU timer:
*
- * Experiments with different frequences: using
+ * Experiments with different frequencies: using
* CLOCK_REALTIME/CLOCK_MONOTONIC on Linux 2.6.32,
* 1000us has <1% impact on runtime
* 100us has ~2% impact on runtime
@@ -84,11 +84,11 @@ static Time itimer_interval = DEFAULT_TICK_INTERVAL;
// Should we be firing ticks?
// Writers to this must hold the mutex below.
-static volatile HsBool stopped = 0;
+static volatile bool stopped = false;
// should the ticker thread exit?
// This can be set without holding the mutex.
-static volatile HsBool exited = 1;
+static volatile bool exited = true;
// Signaled when we want to (re)start the timer
static Condition start_cond;
@@ -109,15 +109,13 @@ static void *itimer_thread_func(void *_handle_tick)
timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
if (timerfd == -1) {
- sysErrorBelch("timerfd_create");
- stg_exit(EXIT_FAILURE);
+ barf("timerfd_create");
}
if (!TFD_CLOEXEC) {
- fcntl(timerfd, F_SETFD, FD_CLOEXEC);
+ fcntl(timerfd, F_SETFD, FD_CLOEXEC);
}
if (timerfd_settime(timerfd, 0, &it, NULL)) {
- sysErrorBelch("timerfd_settime");
- stg_exit(EXIT_FAILURE);
+ barf("timerfd_settime");
}
#endif
@@ -125,7 +123,7 @@ static void *itimer_thread_func(void *_handle_tick)
if (USE_TIMERFD_FOR_ITIMER) {
if (read(timerfd, &nticks, sizeof(nticks)) != sizeof(nticks)) {
if (errno != EINTR) {
- sysErrorBelch("Itimer: read(timerfd) failed");
+ barf("Itimer: read(timerfd) failed");
}
}
} else {
@@ -158,8 +156,8 @@ void
initTicker (Time interval, TickProc handle_tick)
{
itimer_interval = interval;
- stopped = 0;
- exited = 0;
+ stopped = false;
+ exited = false;
initCondition(&start_cond);
initMutex(&mutex);
@@ -173,8 +171,7 @@ initTicker (Time interval, TickProc handle_tick)
pthread_setname_np(thread, "ghc_ticker");
#endif
} else {
- sysErrorBelch("Itimer: Failed to spawn thread");
- stg_exit(EXIT_FAILURE);
+ barf("Itimer: Failed to spawn thread");
}
}
@@ -201,7 +198,7 @@ void
exitTicker (bool wait)
{
ASSERT(!exited);
- exited = 1;
+ exited = true;
// ensure that ticker wakes up if stopped
startTicker();
diff --git a/rts/rts.cabal.in b/rts/rts.cabal.in
new file mode 100644
index 0000000000..e09c0548f7
--- /dev/null
+++ b/rts/rts.cabal.in
@@ -0,0 +1,477 @@
+cabal-version: 2.1
+name: rts
+version: 1.0
+license: BSD-3-Clause
+maintainer: glasgow-haskell-users@haskell.org
+build-type: Simple
+flag libm
+ default: @CabalHaveLibm@
+flag librt
+ default: @CabalHaveLibrt@
+flag libdl
+ default: @CabalHaveLibdl@
+flag ffi
+ default: @CabalHaveLibffi@
+flag need-pthread
+ default: @CabalNeedLibpthread@
+flag libbfd
+ default: @CabalHaveLibbfd@
+flag mingwex
+ default: @CabalMingwex@
+flag libdw
+ default: @CabalHaveLibdw@
+flag libnuma
+ default: @CabalHaveLibNuma@
+flag 64bit
+ default: @Cabal64bit@
+flag leading-underscore
+ default: @CabalLeadingUnderscore@
+flag smp
+ default: True
+flag profiling
+ default: False
+
+library
+ -- rts is a wired in package and
+ -- expects the unit-id to be
+ -- set without version
+ ghc-options: -this-unit-id rts
+ if os(windows)
+ extra-bundled-libraries: Cffi-6
+ else
+ extra-bundled-libraries: Cffi
+ -- the rts comes in a variety of flavours that ar built outside
+ -- of cabal. The combination of extra-bundled-libraries and
+ -- extra-library-flavours results in the following libraries to
+ -- be copied:
+ -- libHSrts-1.0_debug libHSrts-1.0_l libHSrts-1.0_p
+ -- libHSrts-1.0_thr libHSrts-1.0_thr_debug libHSrts-1.0_thr_l
+ -- libHSrts-1.0_thr_p
+ -- libCffi_debug libCffi_ libCffi_l libCffi_p
+ -- libCffi_thr libCffi_thr_debug libCffi_thr_l libCffi_thr_p
+ extra-library-flavours: _debug _l _thr _thr_debug _thr_l
+
+ -- The make build system does something special in config.mk.in
+ -- for generating profiled builds of those libraries, but we need to
+ -- be transparent for hadrian which gets information about the rts
+ -- "package" through Cabal and this cabal file. We therefore declare
+ -- several profiling-enabled flavours to be available when passing the
+ -- 'profiling' flag when configuring the RTS from hadrian, using Cabal.
+ if flag(profiling)
+ extra-library-flavours: _p _thr_p _debug_p _thr_debug_p
+
+ exposed: True
+ exposed-modules:
+ if flag(libm)
+ -- for ldexp()
+ extra-libraries: m
+ if flag(librt)
+ extra-libraries: rt
+ if flag(libdl)
+ extra-libraries: dl
+ if flag(ffi)
+ extra-libraries: ffi
+ if os(windows)
+ extra-libraries:
+ -- for the linker
+ wsock32 gdi32 winmm
+ -- for crash dump
+ dbghelp
+ -- for process information
+ psapi
+ if flag(need-pthread)
+ -- for pthread_getthreadid_np, pthread_create, ...
+ extra-libraries: pthread
+ if flag(libbfd)
+ -- for debugging
+ extra-libraries: bfd iberty
+ if flag(mingwex)
+ extra-libraries: mingwex
+ if flag(libdw)
+ -- for backtraces
+ extra-libraries: elf dw
+ if flag(libnuma)
+ extra-libraries: numa
+ if !flag(smp)
+ cpp-options: -DNOSMP
+
+ include-dirs: build ../includes includes
+ includes/dist-derivedconstants/header @FFIIncludeDir@
+ includes: Stg.h
+ install-includes: Cmm.h HsFFI.h MachDeps.h Rts.h RtsAPI.h Stg.h
+ ghcautoconf.h ghcconfig.h ghcplatform.h ghcversion.h
+ -- ^ from ../includes
+ DerivedConstants.h ffi.h ffitarget.h
+ -- ^ generated
+ rts/Adjustor.h
+ rts/BlockSignals.h
+ rts/Bytecodes.h
+ rts/Config.h
+ rts/Constants.h
+ rts/EventLogFormat.h
+ rts/EventLogWriter.h
+ rts/FileLock.h
+ rts/Flags.h
+ rts/GetTime.h
+ rts/Globals.h
+ rts/Hpc.h
+ rts/IOManager.h
+ rts/Libdw.h
+ rts/LibdwPool.h
+ rts/Linker.h
+ rts/Main.h
+ rts/Messages.h
+ rts/OSThreads.h
+ rts/Parallel.h
+ rts/PrimFloat.h
+ rts/Profiling.h
+ rts/Signals.h
+ rts/SpinLock.h
+ rts/StableName.h
+ rts/StablePtr.h
+ rts/StaticPtrTable.h
+ rts/TTY.h
+ rts/Threads.h
+ rts/Ticky.h
+ rts/Time.h
+ rts/Timer.h
+ rts/Types.h
+ rts/Utils.h
+ rts/prof/CCS.h
+ rts/prof/LDV.h
+ rts/storage/Block.h
+ rts/storage/ClosureMacros.h
+ rts/storage/ClosureTypes.h
+ rts/storage/Closures.h
+ rts/storage/FunTypes.h
+ rts/storage/Heap.h
+ rts/storage/GC.h
+ rts/storage/InfoTables.h
+ rts/storage/MBlock.h
+ rts/storage/TSO.h
+ stg/DLL.h
+ stg/HaskellMachRegs.h
+ stg/MachRegs.h
+ stg/MiscClosures.h
+ stg/Prim.h
+ stg/Regs.h
+ stg/RtsMachRegs.h
+ stg/SMP.h
+ stg/Ticky.h
+ stg/Types.h
+ if flag(64bit)
+ if flag(leading-underscore)
+ ld-options:
+ "-Wl,-u,_hs_atomic_add64"
+ "-Wl,-u,_hs_atomic_sub64"
+ "-Wl,-u,_hs_atomic_and64"
+ "-Wl,-u,_hs_atomic_nand64"
+ "-Wl,-u,_hs_atomic_or64"
+ "-Wl,-u,_hs_atomic_xor64"
+ "-Wl,-u,_hs_cmpxchg64"
+ "-Wl,-u,_hs_atomicread64"
+ "-Wl,-u,_hs_atomicwrite64"
+ else
+ ld-options:
+ "-Wl,-u,hs_atomic_add64"
+ "-Wl,-u,hs_atomic_sub64"
+ "-Wl,-u,hs_atomic_and64"
+ "-Wl,-u,hs_atomic_nand64"
+ "-Wl,-u,hs_atomic_or64"
+ "-Wl,-u,hs_atomic_xor64"
+ "-Wl,-u,hs_cmpxchg64"
+ "-Wl,-u,hs_atomicread64"
+ "-Wl,-u,hs_atomicwrite64"
+ if flag(leading-underscore)
+ ld-options:
+ "-Wl,-u,_base_GHCziTopHandler_runIO_closure"
+ "-Wl,-u,_base_GHCziTopHandler_runNonIO_closure"
+ "-Wl,-u,_ghczmprim_GHCziTuple_Z0T_closure"
+ "-Wl,-u,_ghczmprim_GHCziTypes_True_closure"
+ "-Wl,-u,_ghczmprim_GHCziTypes_False_closure"
+ "-Wl,-u,_base_GHCziPack_unpackCString_closure"
+ "-Wl,-u,_base_GHCziWeak_runFinalizzerBatch_closure"
+ "-Wl,-u,_base_GHCziIOziException_stackOverflow_closure"
+ "-Wl,-u,_base_GHCziIOziException_heapOverflow_closure"
+ "-Wl,-u,_base_GHCziIOziException_allocationLimitExceeded_closure"
+ "-Wl,-u,_base_GHCziIOziException_blockedIndefinitelyOnMVar_closure"
+ "-Wl,-u,_base_GHCziIOziException_blockedIndefinitelyOnSTM_closure"
+ "-Wl,-u,_base_GHCziIOziException_cannotCompactFunction_closure"
+ "-Wl,-u,_base_GHCziIOziException_cannotCompactPinned_closure"
+ "-Wl,-u,_base_GHCziIOziException_cannotCompactMutable_closure"
+ "-Wl,-u,_base_ControlziExceptionziBase_absentSumFieldError_closure"
+ "-Wl,-u,_base_ControlziExceptionziBase_nonTermination_closure"
+ "-Wl,-u,_base_ControlziExceptionziBase_nestedAtomically_closure"
+ "-Wl,-u,_base_GHCziEventziThread_blockedOnBadFD_closure"
+ "-Wl,-u,_base_GHCziConcziSync_runSparks_closure"
+ "-Wl,-u,_base_GHCziConcziIO_ensureIOManagerIsRunning_closure"
+ "-Wl,-u,_base_GHCziConcziIO_ioManagerCapabilitiesChanged_closure"
+ "-Wl,-u,_base_GHCziConcziSignal_runHandlersPtr_closure"
+ "-Wl,-u,_base_GHCziTopHandler_flushStdHandles_closure"
+ "-Wl,-u,_base_GHCziTopHandler_runMainIO_closure"
+ "-Wl,-u,_ghczmprim_GHCziTypes_Czh_con_info"
+ "-Wl,-u,_ghczmprim_GHCziTypes_Izh_con_info"
+ "-Wl,-u,_ghczmprim_GHCziTypes_Fzh_con_info"
+ "-Wl,-u,_ghczmprim_GHCziTypes_Dzh_con_info"
+ "-Wl,-u,_ghczmprim_GHCziTypes_Wzh_con_info"
+ "-Wl,-u,_base_GHCziPtr_Ptr_con_info"
+ "-Wl,-u,_base_GHCziPtr_FunPtr_con_info"
+ "-Wl,-u,_base_GHCziInt_I8zh_con_info"
+ "-Wl,-u,_base_GHCziInt_I16zh_con_info"
+ "-Wl,-u,_base_GHCziInt_I32zh_con_info"
+ "-Wl,-u,_base_GHCziInt_I64zh_con_info"
+ "-Wl,-u,_base_GHCziWord_W8zh_con_info"
+ "-Wl,-u,_base_GHCziWord_W16zh_con_info"
+ "-Wl,-u,_base_GHCziWord_W32zh_con_info"
+ "-Wl,-u,_base_GHCziWord_W64zh_con_info"
+ "-Wl,-u,_base_GHCziStable_StablePtr_con_info"
+ "-Wl,-u,_hs_atomic_add8"
+ "-Wl,-u,_hs_atomic_add16"
+ "-Wl,-u,_hs_atomic_add32"
+ "-Wl,-u,_hs_atomic_sub8"
+ "-Wl,-u,_hs_atomic_sub16"
+ "-Wl,-u,_hs_atomic_sub32"
+ "-Wl,-u,_hs_atomic_and8"
+ "-Wl,-u,_hs_atomic_and16"
+ "-Wl,-u,_hs_atomic_and32"
+ "-Wl,-u,_hs_atomic_nand8"
+ "-Wl,-u,_hs_atomic_nand16"
+ "-Wl,-u,_hs_atomic_nand32"
+ "-Wl,-u,_hs_atomic_or8"
+ "-Wl,-u,_hs_atomic_or16"
+ "-Wl,-u,_hs_atomic_or32"
+ "-Wl,-u,_hs_atomic_xor8"
+ "-Wl,-u,_hs_atomic_xor16"
+ "-Wl,-u,_hs_atomic_xor32"
+ "-Wl,-u,_hs_cmpxchg8"
+ "-Wl,-u,_hs_cmpxchg16"
+ "-Wl,-u,_hs_cmpxchg32"
+ "-Wl,-u,_hs_atomicread8"
+ "-Wl,-u,_hs_atomicread16"
+ "-Wl,-u,_hs_atomicread32"
+ "-Wl,-u,_hs_atomicwrite8"
+ "-Wl,-u,_hs_atomicwrite16"
+ "-Wl,-u,_hs_atomicwrite32"
+ -- This symbol is useful in gdb, but not referred to anywhere,
+ -- so we need to force it to be included in the binary.
+ "-Wl,-u,_findPtr"
+ else
+ ld-options:
+ "-Wl,-u,base_GHCziTopHandler_runIO_closure"
+ "-Wl,-u,base_GHCziTopHandler_runNonIO_closure"
+ "-Wl,-u,ghczmprim_GHCziTuple_Z0T_closure"
+ "-Wl,-u,ghczmprim_GHCziTypes_True_closure"
+ "-Wl,-u,ghczmprim_GHCziTypes_False_closure"
+ "-Wl,-u,base_GHCziPack_unpackCString_closure"
+ "-Wl,-u,base_GHCziWeak_runFinalizzerBatch_closure"
+ "-Wl,-u,base_GHCziIOziException_stackOverflow_closure"
+ "-Wl,-u,base_GHCziIOziException_heapOverflow_closure"
+ "-Wl,-u,base_GHCziIOziException_allocationLimitExceeded_closure"
+ "-Wl,-u,base_GHCziIOziException_blockedIndefinitelyOnMVar_closure"
+ "-Wl,-u,base_GHCziIOziException_blockedIndefinitelyOnSTM_closure"
+ "-Wl,-u,base_GHCziIOziException_cannotCompactFunction_closure"
+ "-Wl,-u,base_GHCziIOziException_cannotCompactPinned_closure"
+ "-Wl,-u,base_GHCziIOziException_cannotCompactMutable_closure"
+ "-Wl,-u,base_ControlziExceptionziBase_absentSumFieldError_closure"
+ "-Wl,-u,base_ControlziExceptionziBase_nonTermination_closure"
+ "-Wl,-u,base_ControlziExceptionziBase_nestedAtomically_closure"
+ "-Wl,-u,base_GHCziEventziThread_blockedOnBadFD_closure"
+ "-Wl,-u,base_GHCziConcziSync_runSparks_closure"
+ "-Wl,-u,base_GHCziConcziIO_ensureIOManagerIsRunning_closure"
+ "-Wl,-u,base_GHCziConcziIO_ioManagerCapabilitiesChanged_closure"
+ "-Wl,-u,base_GHCziConcziSignal_runHandlersPtr_closure"
+ "-Wl,-u,base_GHCziTopHandler_flushStdHandles_closure"
+ "-Wl,-u,base_GHCziTopHandler_runMainIO_closure"
+ "-Wl,-u,ghczmprim_GHCziTypes_Czh_con_info"
+ "-Wl,-u,ghczmprim_GHCziTypes_Izh_con_info"
+ "-Wl,-u,ghczmprim_GHCziTypes_Fzh_con_info"
+ "-Wl,-u,ghczmprim_GHCziTypes_Dzh_con_info"
+ "-Wl,-u,ghczmprim_GHCziTypes_Wzh_con_info"
+ "-Wl,-u,base_GHCziPtr_Ptr_con_info"
+ "-Wl,-u,base_GHCziPtr_FunPtr_con_info"
+ "-Wl,-u,base_GHCziInt_I8zh_con_info"
+ "-Wl,-u,base_GHCziInt_I16zh_con_info"
+ "-Wl,-u,base_GHCziInt_I32zh_con_info"
+ "-Wl,-u,base_GHCziInt_I64zh_con_info"
+ "-Wl,-u,base_GHCziWord_W8zh_con_info"
+ "-Wl,-u,base_GHCziWord_W16zh_con_info"
+ "-Wl,-u,base_GHCziWord_W32zh_con_info"
+ "-Wl,-u,base_GHCziWord_W64zh_con_info"
+ "-Wl,-u,base_GHCziStable_StablePtr_con_info"
+ "-Wl,-u,hs_atomic_add8"
+ "-Wl,-u,hs_atomic_add16"
+ "-Wl,-u,hs_atomic_add32"
+ "-Wl,-u,hs_atomic_sub8"
+ "-Wl,-u,hs_atomic_sub16"
+ "-Wl,-u,hs_atomic_sub32"
+ "-Wl,-u,hs_atomic_and8"
+ "-Wl,-u,hs_atomic_and16"
+ "-Wl,-u,hs_atomic_and32"
+ "-Wl,-u,hs_atomic_nand8"
+ "-Wl,-u,hs_atomic_nand16"
+ "-Wl,-u,hs_atomic_nand32"
+ "-Wl,-u,hs_atomic_or8"
+ "-Wl,-u,hs_atomic_or16"
+ "-Wl,-u,hs_atomic_or32"
+ "-Wl,-u,hs_atomic_xor8"
+ "-Wl,-u,hs_atomic_xor16"
+ "-Wl,-u,hs_atomic_xor32"
+ "-Wl,-u,hs_cmpxchg8"
+ "-Wl,-u,hs_cmpxchg16"
+ "-Wl,-u,hs_cmpxchg32"
+ "-Wl,-u,hs_atomicread8"
+ "-Wl,-u,hs_atomicread16"
+ "-Wl,-u,hs_atomicread32"
+ "-Wl,-u,hs_atomicwrite8"
+ "-Wl,-u,hs_atomicwrite16"
+ "-Wl,-u,hs_atomicwrite32"
+ -- This symbol is useful in gdb, but not referred to anywhere,
+ -- so we need to force it to be included in the binary.
+ "-Wl,-u,findPtr"
+
+ if os(osx)
+ ld-options: "-Wl,-search_paths_first"
+ if !arch(x86_64)
+ ld-options: -read_only_relocs warning
+
+ cmm-sources: Apply.cmm
+ Compact.cmm
+ Exception.cmm
+ HeapStackCheck.cmm
+ PrimOps.cmm
+ StgMiscClosures.cmm
+ StgStartup.cmm
+ StgStdThunks.cmm
+ Updates.cmm
+ -- AutoApply is generated
+ AutoApply.cmm
+
+ if arch(i386) || arch(powerpc) || arch(powerpc64)
+ asm-sources: AdjustorAsm.S
+ if arch(powerpc) || arch(powerpc64) || arch(powerpc64le)
+ asm-sources: StgCRunAsm.S
+
+ c-sources: Adjustor.c
+ Arena.c
+ Capability.c
+ CheckUnload.c
+ ClosureFlags.c
+ Disassembler.c
+ FileLock.c
+ Globals.c
+ Hash.c
+ Heap.c
+ Hpc.c
+ HsFFI.c
+ Inlines.c
+ Interpreter.c
+ LdvProfile.c
+ Libdw.c
+ LibdwPool.c
+ Linker.c
+ Messages.c
+ OldARMAtomic.c
+ PathUtils.c
+ Pool.c
+ Printer.c
+ ProfHeap.c
+ ProfilerReport.c
+ ProfilerReportJson.c
+ Profiling.c
+ Proftimer.c
+ RaiseAsync.c
+ RetainerProfile.c
+ RetainerSet.c
+ RtsAPI.c
+ RtsDllMain.c
+ RtsFlags.c
+ RtsMain.c
+ RtsMessages.c
+ RtsStartup.c
+ RtsSymbolInfo.c
+ RtsSymbols.c
+ RtsUtils.c
+ STM.c
+ Schedule.c
+ Sparks.c
+ StableName.c
+ StablePtr.c
+ StaticPtrTable.c
+ Stats.c
+ StgCRun.c
+ StgPrimFloat.c
+ Task.c
+ ThreadLabels.c
+ ThreadPaused.c
+ Threads.c
+ Ticky.c
+ Timer.c
+ TopHandler.c
+ Trace.c
+ WSDeque.c
+ Weak.c
+ eventlog/EventLog.c
+ eventlog/EventLogWriter.c
+ hooks/FlagDefaults.c
+ hooks/LongGCSync.c
+ hooks/MallocFail.c
+ hooks/OnExit.c
+ hooks/OutOfHeap.c
+ hooks/StackOverflow.c
+ linker/CacheFlush.c
+ linker/Elf.c
+ linker/LoadArchive.c
+ linker/M32Alloc.c
+ linker/MachO.c
+ linker/PEi386.c
+ linker/SymbolExtras.c
+ linker/elf_got.c
+ linker/elf_plt.c
+ linker/elf_plt_aarch64.c
+ linker/elf_plt_arm.c
+ linker/elf_reloc.c
+ linker/elf_reloc_aarch64.c
+ linker/elf_util.c
+ sm/BlockAlloc.c
+ sm/CNF.c
+ sm/Compact.c
+ sm/Evac.c
+ sm/Evac_thr.c
+ sm/GC.c
+ sm/GCAux.c
+ sm/GCUtils.c
+ sm/MBlock.c
+ sm/MarkWeak.c
+ sm/Sanity.c
+ sm/Scav.c
+ sm/Scav_thr.c
+ sm/Storage.c
+ sm/Sweep.c
+ xxhash.c
+ fs.c
+ -- I wish we had wildcards..., this would be:
+ -- *.c hooks/**/*.c sm/**/*.c eventlog/**/*.c linker/**/*.c
+ if os(windows)
+ c-sources: win32/AsyncIO.c
+ win32/AwaitEvent.c
+ win32/ConsoleHandler.c
+ win32/GetEnv.c
+ win32/GetTime.c
+ win32/IOManager.c
+ win32/OSMem.c
+ win32/OSThreads.c
+ win32/ThrIOManager.c
+ win32/Ticker.c
+ win32/WorkQueue.c
+ win32/veh_excn.c
+ -- win32/**/*.c
+ else
+ c-sources: posix/GetEnv.c
+ posix/GetTime.c
+ posix/Itimer.c
+ posix/OSMem.c
+ posix/OSThreads.c
+ posix/Select.c
+ posix/Signals.c
+ posix/TTY.c
+ -- posix/*.c -- we do not want itimer
diff --git a/rts/sm/BlockAlloc.c b/rts/sm/BlockAlloc.c
index 2a02ecc9c5..bbb4f8a6c1 100644
--- a/rts/sm/BlockAlloc.c
+++ b/rts/sm/BlockAlloc.c
@@ -210,6 +210,12 @@ void recordFreedBlocks(uint32_t node, uint32_t n)
Allocation
-------------------------------------------------------------------------- */
+STATIC_INLINE bdescr *
+tail_of (bdescr *bd)
+{
+ return bd + bd->blocks - 1;
+}
+
STATIC_INLINE void
initGroup(bdescr *head)
{
@@ -223,7 +229,7 @@ initGroup(bdescr *head)
// mblocks don't have bdescrs; freeing these is handled in a
// different way by free_mblock_group().
if (head->blocks > 1 && head->blocks <= BLOCKS_PER_MBLOCK) {
- bdescr *last = head + head->blocks-1;
+ bdescr *last = tail_of(head);
last->blocks = 0;
last->link = head;
}
@@ -285,13 +291,6 @@ free_list_insert (uint32_t node, bdescr *bd)
dbl_link_onto(bd, &free_list[node][ln]);
}
-
-STATIC_INLINE bdescr *
-tail_of (bdescr *bd)
-{
- return bd + bd->blocks - 1;
-}
-
// After splitting a group, the last block of each group must have a
// tail that points to the head block, to keep our invariants for
// coalescing.
diff --git a/rts/sm/CNF.c b/rts/sm/CNF.c
index c12f53a120..6bc58cde75 100644
--- a/rts/sm/CNF.c
+++ b/rts/sm/CNF.c
@@ -722,14 +722,14 @@ verify_consistency_block (StgCompactNFData *str, StgCompactNFDataBlock *block)
p += arr_words_sizeW((StgArrBytes*)p);
break;
- case MUT_ARR_PTRS_FROZEN:
- case MUT_ARR_PTRS_FROZEN0:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
verify_mut_arr_ptrs(str, (StgMutArrPtrs*)p);
p += mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
break;
- case SMALL_MUT_ARR_PTRS_FROZEN:
- case SMALL_MUT_ARR_PTRS_FROZEN0:
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
{
uint32_t i;
StgSmallMutArrPtrs *arr = (StgSmallMutArrPtrs*)p;
@@ -969,14 +969,14 @@ fixup_block(StgCompactNFDataBlock *block, StgWord *fixup_table, uint32_t count)
p += arr_words_sizeW((StgArrBytes*)p);
break;
- case MUT_ARR_PTRS_FROZEN:
- case MUT_ARR_PTRS_FROZEN0:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
fixup_mut_arr_ptrs(fixup_table, count, (StgMutArrPtrs*)p);
p += mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
break;
- case SMALL_MUT_ARR_PTRS_FROZEN:
- case SMALL_MUT_ARR_PTRS_FROZEN0:
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
{
uint32_t i;
StgSmallMutArrPtrs *arr = (StgSmallMutArrPtrs*)p;
diff --git a/rts/sm/Compact.c b/rts/sm/Compact.c
index 0e2fea8990..004e042069 100644
--- a/rts/sm/Compact.c
+++ b/rts/sm/Compact.c
@@ -25,7 +25,8 @@
#include "Trace.h"
#include "Weak.h"
#include "MarkWeak.h"
-#include "Stable.h"
+#include "StablePtr.h"
+#include "StableName.h"
// Turn off inlining when debugging - it obfuscates things
#if defined(DEBUG)
@@ -212,7 +213,7 @@ thread_static( StgClosure* p )
p = *THUNK_STATIC_LINK(p);
continue;
case FUN_STATIC:
- p = *FUN_STATIC_LINK(p);
+ p = *STATIC_LINK(info,p);
continue;
case CONSTR:
case CONSTR_NOCAF:
@@ -482,8 +483,8 @@ update_fwd_large( bdescr *bd )
case MUT_ARR_PTRS_CLEAN:
case MUT_ARR_PTRS_DIRTY:
- case MUT_ARR_PTRS_FROZEN:
- case MUT_ARR_PTRS_FROZEN0:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
// follow everything
{
StgMutArrPtrs *a;
@@ -497,8 +498,8 @@ update_fwd_large( bdescr *bd )
case SMALL_MUT_ARR_PTRS_CLEAN:
case SMALL_MUT_ARR_PTRS_DIRTY:
- case SMALL_MUT_ARR_PTRS_FROZEN:
- case SMALL_MUT_ARR_PTRS_FROZEN0:
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
// follow everything
{
StgSmallMutArrPtrs *a;
@@ -682,8 +683,8 @@ thread_obj (const StgInfoTable *info, StgPtr p)
case MUT_ARR_PTRS_CLEAN:
case MUT_ARR_PTRS_DIRTY:
- case MUT_ARR_PTRS_FROZEN:
- case MUT_ARR_PTRS_FROZEN0:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
// follow everything
{
StgMutArrPtrs *a;
@@ -698,8 +699,8 @@ thread_obj (const StgInfoTable *info, StgPtr p)
case SMALL_MUT_ARR_PTRS_CLEAN:
case SMALL_MUT_ARR_PTRS_DIRTY:
- case SMALL_MUT_ARR_PTRS_FROZEN:
- case SMALL_MUT_ARR_PTRS_FROZEN0:
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
// follow everything
{
StgSmallMutArrPtrs *a;
@@ -1000,7 +1001,10 @@ compact(StgClosure *static_objects)
thread_static(static_objects /* ToDo: ok? */);
// the stable pointer table
- threadStableTables((evac_fn)thread_root, NULL);
+ threadStablePtrTable((evac_fn)thread_root, NULL);
+
+ // the stable name table
+ threadStableNameTable((evac_fn)thread_root, NULL);
// the CAF list (used by GHCi)
markCAFs((evac_fn)thread_root, NULL);
diff --git a/rts/sm/Compact.h b/rts/sm/Compact.h
index 6dcb50b1aa..63abfc7180 100644
--- a/rts/sm/Compact.h
+++ b/rts/sm/Compact.h
@@ -20,8 +20,8 @@ mark(StgPtr p, bdescr *bd)
{
uint32_t offset_within_block = p - bd->start; // in words
StgPtr bitmap_word = (StgPtr)bd->u.bitmap +
- (offset_within_block / (sizeof(W_)*BITS_PER_BYTE));
- StgWord bit_mask = (StgWord)1 << (offset_within_block & (sizeof(W_)*BITS_PER_BYTE - 1));
+ (offset_within_block / BITS_IN(W_));
+ StgWord bit_mask = (StgWord)1 << (offset_within_block & (BITS_IN(W_) - 1));
*bitmap_word |= bit_mask;
}
@@ -30,8 +30,8 @@ unmark(StgPtr p, bdescr *bd)
{
uint32_t offset_within_block = p - bd->start; // in words
StgPtr bitmap_word = (StgPtr)bd->u.bitmap +
- (offset_within_block / (sizeof(W_)*BITS_PER_BYTE));
- StgWord bit_mask = (StgWord)1 << (offset_within_block & (sizeof(W_)*BITS_PER_BYTE - 1));
+ (offset_within_block / BITS_IN(W_));
+ StgWord bit_mask = (StgWord)1 << (offset_within_block & (BITS_IN(W_) - 1));
*bitmap_word &= ~bit_mask;
}
@@ -40,8 +40,8 @@ is_marked(StgPtr p, bdescr *bd)
{
uint32_t offset_within_block = p - bd->start; // in words
StgPtr bitmap_word = (StgPtr)bd->u.bitmap +
- (offset_within_block / (sizeof(W_)*BITS_PER_BYTE));
- StgWord bit_mask = (StgWord)1 << (offset_within_block & (sizeof(W_)*BITS_PER_BYTE - 1));
+ (offset_within_block / BITS_IN(W_));
+ StgWord bit_mask = (StgWord)1 << (offset_within_block & (BITS_IN(W_)- 1));
return (*bitmap_word & bit_mask);
}
diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c
index fb1af0f692..289031945d 100644
--- a/rts/sm/Evac.c
+++ b/rts/sm/Evac.c
@@ -28,10 +28,6 @@
#include "CNF.h"
#include "Scav.h"
-#if defined(PROF_SPIN) && defined(THREADED_RTS) && defined(PARALLEL_GC)
-StgWord64 whitehole_spin = 0;
-#endif
-
#if defined(THREADED_RTS) && !defined(PARALLEL_GC)
#define evacuate(p) evacuate1(p)
#define evacuate_BLACKHOLE(p) evacuate_BLACKHOLE1(p)
@@ -47,7 +43,7 @@ StgWord64 whitehole_spin = 0;
*/
#define MAX_THUNK_SELECTOR_DEPTH 16
-static void eval_thunk_selector (StgClosure **q, StgSelector * p, bool);
+static void eval_thunk_selector (StgClosure **q, StgSelector *p, bool);
STATIC_INLINE void evacuate_large(StgPtr p);
/* -----------------------------------------------------------------------------
@@ -197,8 +193,9 @@ spin:
info = xchg((StgPtr)&src->header.info, (W_)&stg_WHITEHOLE_info);
if (info == (W_)&stg_WHITEHOLE_info) {
#if defined(PROF_SPIN)
- whitehole_spin++;
+ whitehole_gc_spin++;
#endif
+ busy_wait_nop();
goto spin;
}
if (IS_FORWARDING_PTR(info)) {
@@ -281,14 +278,7 @@ evacuate_large(StgPtr p)
}
// remove from large_object list
- if (bd->u.back) {
- bd->u.back->link = bd->link;
- } else { // first object in the list
- gen->large_objects = bd->link;
- }
- if (bd->link) {
- bd->link->u.back = bd->u.back;
- }
+ dbl_link_remove(bd, &gen->large_objects);
/* link it on to the evacuated large object list of the destination gen
*/
@@ -417,14 +407,7 @@ evacuate_compact (StgPtr p)
}
// remove from compact_objects list
- if (bd->u.back) {
- bd->u.back->link = bd->link;
- } else { // first object in the list
- gen->compact_objects = bd->link;
- }
- if (bd->link) {
- bd->link->u.back = bd->u.back;
- }
+ dbl_link_remove(bd, &gen->compact_objects);
/* link it on to the evacuated compact object list of the destination gen
*/
@@ -539,14 +522,14 @@ loop:
switch (info->type) {
case THUNK_STATIC:
- if (info->srt_bitmap != 0) {
+ if (info->srt != 0) {
evacuate_static_object(THUNK_STATIC_LINK((StgClosure *)q), q);
}
return;
case FUN_STATIC:
- if (info->srt_bitmap != 0) {
- evacuate_static_object(FUN_STATIC_LINK((StgClosure *)q), q);
+ if (info->srt != 0 || info->layout.payload.ptrs != 0) {
+ evacuate_static_object(STATIC_LINK(info,(StgClosure *)q), q);
}
return;
@@ -707,9 +690,6 @@ loop:
case THUNK_1_1:
case THUNK_2_0:
case THUNK_0_2:
-#if defined(NO_PROMOTE_THUNKS)
-#error bitrotted
-#endif
copy(p,info,q,sizeofW(StgThunk)+2,gen_no);
return;
@@ -753,6 +733,19 @@ loop:
copy(p,info,q,sizeofW(StgInd),gen_no);
return;
}
+ // Note [BLACKHOLE pointing to IND]
+ //
+ // BLOCKING_QUEUE can be overwritten by IND (see
+ // wakeBlockingQueue()). However, when this happens we must
+ // be updating the BLACKHOLE, so the BLACKHOLE's indirectee
+ // should now point to the value.
+ //
+ // The mutator might observe an inconsistent state, because
+ // the writes are happening in another thread, so it's
+ // possible for the mutator to follow an indirectee and find
+ // an IND. But this should never happen in the GC, because
+ // the mutators are all stopped and the writes have
+ // completed.
ASSERT(i != &stg_IND_info);
}
q = r;
@@ -818,16 +811,16 @@ loop:
case MUT_ARR_PTRS_CLEAN:
case MUT_ARR_PTRS_DIRTY:
- case MUT_ARR_PTRS_FROZEN:
- case MUT_ARR_PTRS_FROZEN0:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
// just copy the block
copy(p,info,q,mut_arr_ptrs_sizeW((StgMutArrPtrs *)q),gen_no);
return;
case SMALL_MUT_ARR_PTRS_CLEAN:
case SMALL_MUT_ARR_PTRS_DIRTY:
- case SMALL_MUT_ARR_PTRS_FROZEN:
- case SMALL_MUT_ARR_PTRS_FROZEN0:
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
// just copy the block
copy(p,info,q,small_mut_arr_ptrs_sizeW((StgSmallMutArrPtrs *)q),gen_no);
return;
@@ -898,9 +891,16 @@ evacuate_BLACKHOLE(StgClosure **p)
bd = Bdescr((P_)q);
- // blackholes can't be in a compact, or large
- ASSERT((bd->flags & (BF_COMPACT | BF_LARGE)) == 0);
+ // blackholes can't be in a compact
+ ASSERT((bd->flags & BF_COMPACT) == 0);
+ // blackholes *can* be in a large object: when raiseAsync() creates an
+ // AP_STACK the payload might be large enough to create a large object.
+ // See #14497.
+ if (bd->flags & BF_LARGE) {
+ evacuate_large((P_)q);
+ return;
+ }
if (bd->flags & BF_EVACUATED) {
if (bd->gen_no < gct->evac_gen_no) {
gct->failed_to_evac = true;
@@ -934,23 +934,34 @@ evacuate_BLACKHOLE(StgClosure **p)
copy(p,info,q,sizeofW(StgInd),gen_no);
}
-/* -----------------------------------------------------------------------------
- Evaluate a THUNK_SELECTOR if possible.
+/* ----------------------------------------------------------------------------
+ Update a chain of thunk selectors with the given value. All selectors in the
+ chain become IND pointing to the value, except when there is a loop (i.e.
+ the value of a THUNK_SELECTOR is the THUNK_SELECTOR itself), in that case we
+ leave the selector as-is.
+
+ p is the current selector to update. In eval_thunk_selector we make a list
+ from selectors using ((StgThunk*)p)->payload[0] for the link field and use
+ that field to traverse the chain here.
+
+ val is the final value of the selector chain.
+
+ A chain is formed when we've got something like:
- p points to a THUNK_SELECTOR that we want to evaluate. The
- result of "evaluating" it will be evacuated and a pointer to the
- to-space closure will be returned.
+ let x = C1 { f1 = e1 }
+ y = C2 { f2 = f1 x }
+ z = f2 y
- If the THUNK_SELECTOR could not be evaluated (its selectee is still
- a THUNK, for example), then the THUNK_SELECTOR itself will be
- evacuated.
+ Here the chain (p) we get when evacuating z is:
+
+ [ f2 y, f1 x ]
+
+ and val is e1.
-------------------------------------------------------------------------- */
+
static void
unchain_thunk_selectors(StgSelector *p, StgClosure *val)
{
- StgSelector *prev;
-
- prev = NULL;
while (p)
{
ASSERT(p->header.info == &stg_WHITEHOLE_info);
@@ -960,7 +971,7 @@ unchain_thunk_selectors(StgSelector *p, StgClosure *val)
// not evacuate it), so in this case val is in from-space.
// ASSERT(!HEAP_ALLOCED_GC(val) || Bdescr((P_)val)->gen_no > N || (Bdescr((P_)val)->flags & BF_EVACUATED));
- prev = (StgSelector*)((StgClosure *)p)->payload[0];
+ StgSelector *prev = (StgSelector*)((StgClosure *)p)->payload[0];
// Update the THUNK_SELECTOR with an indirection to the
// value. The value is still in from-space at this stage.
@@ -997,8 +1008,18 @@ unchain_thunk_selectors(StgSelector *p, StgClosure *val)
}
}
+/* -----------------------------------------------------------------------------
+ Evaluate a THUNK_SELECTOR if possible.
+
+ p points to a THUNK_SELECTOR that we want to evaluate.
+
+ If the THUNK_SELECTOR could not be evaluated (its selectee is still a THUNK,
+ for example), then the THUNK_SELECTOR itself will be evacuated depending on
+ the evac parameter.
+ -------------------------------------------------------------------------- */
+
static void
-eval_thunk_selector (StgClosure **q, StgSelector * p, bool evac)
+eval_thunk_selector (StgClosure **q, StgSelector *p, bool evac)
// NB. for legacy reasons, p & q are swapped around :(
{
uint32_t field;
@@ -1007,7 +1028,6 @@ eval_thunk_selector (StgClosure **q, StgSelector * p, bool evac)
StgClosure *selectee;
StgSelector *prev_thunk_selector;
bdescr *bd;
- StgClosure *val;
prev_thunk_selector = NULL;
// this is a chain of THUNK_SELECTORs that we are going to update
@@ -1057,9 +1077,14 @@ selector_chain:
// In threaded mode, we'll use WHITEHOLE to lock the selector
// thunk while we evaluate it.
{
- do {
+ while(true) {
info_ptr = xchg((StgPtr)&p->header.info, (W_)&stg_WHITEHOLE_info);
- } while (info_ptr == (W_)&stg_WHITEHOLE_info);
+ if (info_ptr != (W_)&stg_WHITEHOLE_info) { break; }
+#if defined(PROF_SPIN)
+ ++whitehole_gc_spin;
+#endif
+ busy_wait_nop();
+ }
// make sure someone else didn't get here first...
if (IS_FORWARDING_PTR(info_ptr) ||
@@ -1127,7 +1152,7 @@ selector_loop:
info->layout.payload.nptrs));
// Select the right field from the constructor
- val = selectee->payload[field];
+ StgClosure *val = selectee->payload[field];
#if defined(PROFILING)
// For the purposes of LDV profiling, we have destroyed
@@ -1159,6 +1184,8 @@ selector_loop:
val = ((StgInd *)val)->indirectee;
goto val_loop;
case THUNK_SELECTOR:
+ // Use payload to make a list of thunk selectors, to be
+ // used in unchain_thunk_selectors
((StgClosure*)p)->payload[0] = (StgClosure *)prev_thunk_selector;
prev_thunk_selector = p;
p = (StgSelector*)val;
@@ -1273,5 +1300,4 @@ bale_out:
copy(q,(const StgInfoTable *)info_ptr,(StgClosure *)p,THUNK_SELECTOR_sizeW(),bd->dest_no);
}
unchain_thunk_selectors(prev_thunk_selector, *q);
- return;
}
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index aa804a8b76..70d6d8efe5 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -28,6 +28,7 @@
#include "Sparks.h"
#include "Sweep.h"
+#include "Arena.h"
#include "Storage.h"
#include "RtsUtils.h"
#include "Apply.h"
@@ -45,9 +46,15 @@
#include "RetainerProfile.h"
#include "LdvProfile.h"
#include "RaiseAsync.h"
-#include "Stable.h"
+#include "StableName.h"
+#include "StablePtr.h"
#include "CheckUnload.h"
#include "CNF.h"
+#include "RtsFlags.h"
+
+#if defined(PROFILING)
+#include "RetainerProfile.h"
+#endif
#include <string.h> // for memset()
#include <unistd.h>
@@ -89,6 +96,8 @@
*
* We build up a static object list while collecting generations 0..N,
* which is then appended to the static object list of generation N+1.
+ *
+ * See also: Note [STATIC_LINK fields] in Storage.h.
*/
/* N is the oldest generation being collected, where the generations
@@ -112,8 +121,6 @@ uint32_t mutlist_MUTVARS,
mutlist_TVAR_WATCH_QUEUE,
mutlist_TREC_CHUNK,
mutlist_TREC_HEADER,
- mutlist_ATOMIC_INVARIANT,
- mutlist_INVARIANT_CHECK_QUEUE,
mutlist_OTHERS;
#endif
@@ -122,7 +129,9 @@ uint32_t mutlist_MUTVARS,
gc_thread **gc_threads = NULL;
#if !defined(THREADED_RTS)
-StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
+// Must be aligned to 64-bytes to meet stated 64-byte alignment of gen_workspace
+StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)]
+ ATTRIBUTE_ALIGNED(64);
#endif
// Number of threads running in *this* GC. Affects how many
@@ -132,6 +141,13 @@ uint32_t n_gc_threads;
// For stats:
static long copied; // *words* copied & scavenged during this GC
+#if defined(PROF_SPIN) && defined(THREADED_RTS)
+// spin and yield counts for the quasi-SpinLock in waitForGcThreads
+volatile StgWord64 waitForGcThreads_spin = 0;
+volatile StgWord64 waitForGcThreads_yield = 0;
+volatile StgWord64 whitehole_gc_spin = 0;
+#endif
+
bool work_stealing;
uint32_t static_flag = STATIC_FLAG_B;
@@ -188,7 +204,9 @@ GarbageCollect (uint32_t collect_gen,
{
bdescr *bd;
generation *gen;
- StgWord live_blocks, live_words, par_max_copied, par_balanced_copied;
+ StgWord live_blocks, live_words, par_max_copied, par_balanced_copied,
+ gc_spin_spin, gc_spin_yield, mut_spin_spin, mut_spin_yield,
+ any_work, no_work, scav_find_work;
#if defined(THREADED_RTS)
gc_thread *saved_gct;
#endif
@@ -221,8 +239,9 @@ GarbageCollect (uint32_t collect_gen,
// tell the stats department that we've started a GC
stat_startGC(cap, gct);
- // lock the StablePtr table
- stableLock();
+ // Lock the StablePtr table. This prevents FFI calls manipulating
+ // the table from occurring during GC.
+ stablePtrLock();
#if defined(DEBUG)
mutlist_MUTVARS = 0;
@@ -232,8 +251,6 @@ GarbageCollect (uint32_t collect_gen,
mutlist_TVAR_WATCH_QUEUE = 0;
mutlist_TREC_CHUNK = 0;
mutlist_TREC_HEADER = 0;
- mutlist_ATOMIC_INVARIANT = 0;
- mutlist_INVARIANT_CHECK_QUEUE = 0;
mutlist_OTHERS = 0;
#endif
@@ -385,17 +402,15 @@ GarbageCollect (uint32_t collect_gen,
markScheduler(mark_root, gct);
-#if defined(RTS_USER_SIGNALS)
- // mark the signal handlers (signals should be already blocked)
- markSignalHandlers(mark_root, gct);
-#endif
-
// Mark the weak pointer list, and prepare to detect dead weak pointers.
markWeakPtrList();
initWeakForGC();
// Mark the stable pointer table.
- markStableTables(mark_root, gct);
+ markStablePtrTable(mark_root, gct);
+
+ // Remember old stable name addresses.
+ rememberOldStableNameAddresses ();
/* -------------------------------------------------------------------------
* Repeatedly scavenge all the areas we know about until there's no
@@ -421,7 +436,7 @@ GarbageCollect (uint32_t collect_gen,
shutdown_gc_threads(gct->thread_index, idle_cap);
// Now see which stable names are still alive.
- gcStableTables();
+ gcStableNameTable();
#if defined(THREADED_RTS)
if (n_gc_threads == 1) {
@@ -461,32 +476,53 @@ GarbageCollect (uint32_t collect_gen,
copied = 0;
par_max_copied = 0;
par_balanced_copied = 0;
+ gc_spin_spin = 0;
+ gc_spin_yield = 0;
+ mut_spin_spin = 0;
+ mut_spin_yield = 0;
+ any_work = 0;
+ no_work = 0;
+ scav_find_work = 0;
{
uint32_t i;
uint64_t par_balanced_copied_acc = 0;
+ const gc_thread* thread;
for (i=0; i < n_gc_threads; i++) {
copied += gc_threads[i]->copied;
}
for (i=0; i < n_gc_threads; i++) {
+ thread = gc_threads[i];
if (n_gc_threads > 1) {
debugTrace(DEBUG_gc,"thread %d:", i);
- debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
- debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
- debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work);
- debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work);
- debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
+ debugTrace(DEBUG_gc," copied %ld",
+ thread->copied * sizeof(W_));
+ debugTrace(DEBUG_gc," scanned %ld",
+ thread->scanned * sizeof(W_));
+ debugTrace(DEBUG_gc," any_work %ld",
+ thread->any_work);
+ debugTrace(DEBUG_gc," no_work %ld",
+ thread->no_work);
+ debugTrace(DEBUG_gc," scav_find_work %ld",
+ thread->scav_find_work);
+
+#if defined(THREADED_RTS) && defined(PROF_SPIN)
+ gc_spin_spin += thread->gc_spin.spin;
+ gc_spin_yield += thread->gc_spin.yield;
+ mut_spin_spin += thread->mut_spin.spin;
+ mut_spin_yield += thread->mut_spin.yield;
+#endif
+
+ any_work += thread->any_work;
+ no_work += thread->no_work;
+ scav_find_work += thread->scav_find_work;
+
+ par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied);
+ par_balanced_copied_acc +=
+ stg_min(n_gc_threads * gc_threads[i]->copied, copied);
}
- par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied);
- par_balanced_copied_acc +=
- stg_min(n_gc_threads * gc_threads[i]->copied, copied);
}
- if (n_gc_threads == 1) {
- par_max_copied = 0;
- par_balanced_copied = 0;
- }
- else
- {
+ if (n_gc_threads > 1) {
// See Note [Work Balance] for an explanation of this computation
par_balanced_copied =
(par_balanced_copied_acc - copied + (n_gc_threads - 1) / 2) /
@@ -521,13 +557,11 @@ GarbageCollect (uint32_t collect_gen,
copied += mut_list_size;
debugTrace(DEBUG_gc,
- "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d TVARs, %d TVAR_WATCH_QUEUEs, %d TREC_CHUNKs, %d TREC_HEADERs, %d ATOMIC_INVARIANTs, %d INVARIANT_CHECK_QUEUEs, %d others)",
+ "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d TVARs, %d TVAR_WATCH_QUEUEs, %d TREC_CHUNKs, %d TREC_HEADERs, %d others)",
(unsigned long)(mut_list_size * sizeof(W_)),
mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS,
mutlist_TVAR, mutlist_TVAR_WATCH_QUEUE,
mutlist_TREC_CHUNK, mutlist_TREC_HEADER,
- mutlist_ATOMIC_INVARIANT,
- mutlist_INVARIANT_CHECK_QUEUE,
mutlist_OTHERS);
}
@@ -701,15 +735,15 @@ GarbageCollect (uint32_t collect_gen,
if (major_gc) { gcCAFs(); }
#endif
- // Update the stable pointer hash table.
- updateStableTables(major_gc);
+ // Update the stable name hash table
+ updateStableNameTable(major_gc);
// unlock the StablePtr table. Must be before scheduleFinalizers(),
// because a finalizer may call hs_free_fun_ptr() or
// hs_free_stable_ptr(), both of which access the StablePtr table.
- stableUnlock();
+ stablePtrUnlock();
- // Must be after stableUnlock(), because it might free stable ptrs.
+ // Must be after stablePtrUnlock(), because it might free stable ptrs.
if (major_gc) {
checkUnload (gct->scavenged_static_objects);
}
@@ -751,24 +785,51 @@ GarbageCollect (uint32_t collect_gen,
ACQUIRE_SM_LOCK;
if (major_gc) {
- W_ need, got;
- need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
- got = mblocks_allocated;
+ W_ need_prealloc, need_live, need, got;
+ uint32_t i;
+
+ need_live = 0;
+ for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
+ need_live += genLiveBlocks(&generations[i]);
+ }
+ need_live = stg_max(RtsFlags.GcFlags.minOldGenSize, need_live);
+
+ need_prealloc = 0;
+ for (i = 0; i < n_nurseries; i++) {
+ need_prealloc += nurseries[i].n_blocks;
+ }
+ need_prealloc += RtsFlags.GcFlags.largeAllocLim;
+ need_prealloc += countAllocdBlocks(exec_block);
+ need_prealloc += arenaBlocks();
+#if defined(PROFILING)
+ if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
+ need_prealloc = retainerStackBlocks();
+ }
+#endif
+
/* If the amount of data remains constant, next major GC we'll
- require (F+1)*need. We leave (F+2)*need in order to reduce
- repeated deallocation and reallocation. */
- need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
+ * require (F+1)*live + prealloc. We leave (F+2)*live + prealloc
+ * in order to reduce repeated deallocation and reallocation. #14702
+ */
+ need = need_prealloc + (RtsFlags.GcFlags.oldGenFactor + 2) * need_live;
+
+ /* Also, if user set heap size, do not drop below it.
+ */
+ need = stg_max(RtsFlags.GcFlags.heapSizeSuggestion, need);
+
/* But with a large nursery, the above estimate might exceed
* maxHeapSize. A large resident set size might make the OS
* kill this process, or swap unnecessarily. Therefore we
* ensure that our estimate does not exceed maxHeapSize.
*/
if (RtsFlags.GcFlags.maxHeapSize != 0) {
- W_ max = BLOCKS_TO_MBLOCKS(RtsFlags.GcFlags.maxHeapSize);
- if (need > max) {
- need = max;
- }
+ need = stg_min(RtsFlags.GcFlags.maxHeapSize, need);
}
+
+ need = BLOCKS_TO_MBLOCKS(need);
+
+ got = mblocks_allocated;
+
if (got > need) {
returnMemoryToOS(got - need);
}
@@ -797,7 +858,9 @@ GarbageCollect (uint32_t collect_gen,
// ok, GC over: tell the stats department what happened.
stat_endGC(cap, gct, live_words, copied,
live_blocks * BLOCK_SIZE_W - live_words /* slop */,
- N, n_gc_threads, par_max_copied, par_balanced_copied);
+ N, n_gc_threads, par_max_copied, par_balanced_copied,
+ gc_spin_spin, gc_spin_yield, mut_spin_spin, mut_spin_yield,
+ any_work, no_work, scav_find_work);
#if defined(RTS_USER_SIGNALS)
if (RtsFlags.MiscFlags.install_signal_handlers) {
@@ -825,11 +888,6 @@ static void heapOverflow(void)
Initialise the gc_thread structures.
-------------------------------------------------------------------------- */
-#define GC_THREAD_INACTIVE 0
-#define GC_THREAD_STANDING_BY 1
-#define GC_THREAD_RUNNING 2
-#define GC_THREAD_WAITING_TO_CONTINUE 3
-
static void
new_gc_thread (uint32_t n, gc_thread *t)
{
@@ -1132,6 +1190,9 @@ waitForGcThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])
const uint32_t me = cap->no;
uint32_t i, j;
bool retry = true;
+ Time t0, t1, t2;
+
+ t0 = t1 = t2 = getProcessElapsedTime();
while(retry) {
for (i=0; i < n_threads; i++) {
@@ -1151,8 +1212,32 @@ waitForGcThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])
}
}
if (!retry) break;
+#if defined(PROF_SPIN)
+ waitForGcThreads_yield++;
+#endif
yieldThread();
}
+
+ t2 = getProcessElapsedTime();
+ if (RtsFlags.GcFlags.longGCSync != 0 &&
+ t2 - t1 > RtsFlags.GcFlags.longGCSync) {
+ /* call this every longGCSync of delay */
+ rtsConfig.longGCSync(cap->no, t2 - t0);
+ t1 = t2;
+ }
+ if (retry) {
+#if defined(PROF_SPIN)
+ // This is a bit strange, we'll get more yields than spins.
+ // I guess that means it's not a spin-lock at all, but these
+ // numbers are still useful (I think).
+ waitForGcThreads_spin++;
+#endif
+ }
+ }
+
+ if (RtsFlags.GcFlags.longGCSync != 0 &&
+ t2 - t0 > RtsFlags.GcFlags.longGCSync) {
+ rtsConfig.longGCSyncEnd(t2 - t0);
}
}
@@ -1324,7 +1409,7 @@ prepare_collected_gen (generation *gen)
bdescr *bitmap_bdescr;
StgWord *bitmap;
- bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
+ bitmap_size = gen->n_old_blocks * BLOCK_SIZE / BITS_IN(W_);
if (bitmap_size > 0) {
bitmap_bdescr = allocGroup((StgWord)BLOCK_ROUND_UP(bitmap_size)
@@ -1342,7 +1427,7 @@ prepare_collected_gen (generation *gen)
// block descriptor.
for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
bd->u.bitmap = bitmap;
- bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
+ bitmap += BLOCK_SIZE_W / BITS_IN(W_);
// Also at this point we set the BF_MARKED flag
// for this block. The invariant is that
@@ -1446,9 +1531,6 @@ collect_gct_blocks (void)
take a global lock. Here we collect those blocks from the
cap->pinned_object_blocks lists and put them on the
main g0->large_object list.
-
- Returns: the number of words allocated this way, for stats
- purposes.
-------------------------------------------------------------------------- */
static void
@@ -1744,8 +1826,8 @@ resize_nursery (void)
Sanity code for CAF garbage collection.
With DEBUG turned on, we manage a CAF list in addition to the SRT
- mechanism. After GC, we run down the CAF list and blackhole any
- CAFs which have been garbage collected. This means we get an error
+ mechanism. After GC, we run down the CAF list and make any
+ CAFs which have been garbage collected GCD_CAF. This means we get an error
whenever the program tries to enter a garbage collected CAF.
Any garbage collected CAFs are taken off the CAF list at the same
@@ -1771,7 +1853,10 @@ static void gcCAFs(void)
info = get_itbl((StgClosure*)p);
ASSERT(info->type == IND_STATIC);
- if (p->static_link == NULL) {
+ // See Note [STATIC_LINK fields] in Storage.h
+ // This condition identifies CAFs that have just been GC'd and
+ // don't have static_link==3 which means they should be ignored.
+ if ((((StgWord)(p->static_link)&STATIC_BITS) | prev_static_flag) != 3) {
debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%p", p);
SET_INFO((StgClosure*)p,&stg_GCD_CAF_info); // stub it
if (prev == NULL) {
@@ -1788,3 +1873,28 @@ static void gcCAFs(void)
debugTrace(DEBUG_gccafs, "%d CAFs live", i);
}
#endif
+
+
+/* -----------------------------------------------------------------------------
+ The GC can leave some work for the mutator to do before the next
+ GC, provided the work can be safely overlapped with mutation. This
+ can help reduce the GC pause time.
+
+ The mutator can call doIdleGCWork() any time it likes, but
+ preferably when it is idle. It's safe for multiple capabilities to
+ call doIdleGCWork().
+
+ When 'all' is
+ * false: doIdleGCWork() should only take a short, bounded, amount
+ of time.
+ * true: doIdleGCWork() will complete all the outstanding GC work.
+
+ The return value is
+ * true if there's more to do (only if 'all' is false).
+ * false otherwise.
+ -------------------------------------------------------------------------- */
+
+bool doIdleGCWork(Capability *cap STG_UNUSED, bool all)
+{
+ return runSomeFinalizers(all);
+}
diff --git a/rts/sm/GC.h b/rts/sm/GC.h
index c6b0c13a46..437a25f8d9 100644
--- a/rts/sm/GC.h
+++ b/rts/sm/GC.h
@@ -26,6 +26,8 @@ typedef void (*evac_fn)(void *user, StgClosure **root);
StgClosure * isAlive ( StgClosure *p );
void markCAFs ( evac_fn evac, void *user );
+bool doIdleGCWork(Capability *cap, bool all);
+
extern uint32_t N;
extern bool major_gc;
@@ -40,13 +42,13 @@ extern uint32_t mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS,
mutlist_TVAR,
mutlist_TVAR_WATCH_QUEUE,
mutlist_TREC_CHUNK,
- mutlist_TREC_HEADER,
- mutlist_ATOMIC_INVARIANT,
- mutlist_INVARIANT_CHECK_QUEUE;
+ mutlist_TREC_HEADER;
#endif
#if defined(PROF_SPIN) && defined(THREADED_RTS)
-extern StgWord64 whitehole_spin;
+extern volatile StgWord64 whitehole_gc_spin;
+extern volatile StgWord64 waitForGcThreads_spin;
+extern volatile StgWord64 waitForGcThreads_yield;
#endif
void gcWorkerThread (Capability *cap);
diff --git a/rts/sm/GCThread.h b/rts/sm/GCThread.h
index bb206db64c..e865dabe5d 100644
--- a/rts/sm/GCThread.h
+++ b/rts/sm/GCThread.h
@@ -116,6 +116,12 @@ typedef struct gen_workspace_ {
of the GC threads
------------------------------------------------------------------------- */
+/* values for the wakeup field */
+#define GC_THREAD_INACTIVE 0
+#define GC_THREAD_STANDING_BY 1
+#define GC_THREAD_RUNNING 2
+#define GC_THREAD_WAITING_TO_CONTINUE 3
+
typedef struct gc_thread_ {
Capability *cap;
diff --git a/rts/sm/GCUtils.c b/rts/sm/GCUtils.c
index 0373c2b925..31b2913a37 100644
--- a/rts/sm/GCUtils.c
+++ b/rts/sm/GCUtils.c
@@ -81,6 +81,14 @@ freeChain_sync(bdescr *bd)
RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
}
+void
+freeGroup_sync(bdescr *bd)
+{
+ ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
+ freeGroup(bd);
+ RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
+}
+
/* -----------------------------------------------------------------------------
Workspace utilities
-------------------------------------------------------------------------- */
@@ -261,7 +269,7 @@ todo_block_full (uint32_t size, gen_workspace *ws)
// object. However, if the object we're copying is
// larger than a block, then we might have an empty
// block here.
- freeGroup(bd);
+ freeGroup_sync(bd);
} else {
push_scanned_block(bd, ws);
}
@@ -341,24 +349,3 @@ alloc_todo_block (gen_workspace *ws, uint32_t size)
return ws->todo_free;
}
-
-/* -----------------------------------------------------------------------------
- * Debugging
- * -------------------------------------------------------------------------- */
-
-#if defined(DEBUG)
-void
-printMutableList(bdescr *bd)
-{
- StgPtr p;
-
- debugBelch("mutable list %p: ", bd);
-
- for (; bd != NULL; bd = bd->link) {
- for (p = bd->start; p < bd->free; p++) {
- debugBelch("%p (%s), ", (void *)*p, info_type((StgClosure *)*p));
- }
- }
- debugBelch("\n");
-}
-#endif /* DEBUG */
diff --git a/rts/sm/GCUtils.h b/rts/sm/GCUtils.h
index 2e2d4b199d..8b6040769e 100644
--- a/rts/sm/GCUtils.h
+++ b/rts/sm/GCUtils.h
@@ -31,6 +31,7 @@ INLINE_HEADER bdescr *allocBlockOnNode_sync(uint32_t node)
}
void freeChain_sync(bdescr *bd);
+void freeGroup_sync(bdescr *bd);
void push_scanned_block (bdescr *bd, gen_workspace *ws);
StgPtr todo_block_full (uint32_t size, gen_workspace *ws);
@@ -50,11 +51,6 @@ isPartiallyFull(bdescr *bd)
return (bd->free + WORK_UNIT_WORDS < bd->start + BLOCK_SIZE_W);
}
-
-#if defined(DEBUG)
-void printMutableList (bdescr *bd);
-#endif
-
// Version of recordMutableGen for use during GC. This uses the
// mutable lists attached to the current gc_thread structure, which
// are the same as the mutable lists on the Capability.
diff --git a/rts/sm/MarkWeak.c b/rts/sm/MarkWeak.c
index 9a077b3d14..88037f6a34 100644
--- a/rts/sm/MarkWeak.c
+++ b/rts/sm/MarkWeak.c
@@ -32,11 +32,11 @@
/* -----------------------------------------------------------------------------
Weak Pointers
- traverse_weak_ptr_list is called possibly many times during garbage
+ traverseWeakPtrList is called possibly many times during garbage
collection. It returns a flag indicating whether it did any work
(i.e. called evacuate on any live pointers).
- Invariant: traverse_weak_ptr_list is called when the heap is in an
+ Invariant: traverseWeakPtrList is called when the heap is in an
idempotent state. That means that there are no pending
evacuate/scavenge operations. This invariant helps the weak
pointer code decide which weak pointers are dead - if there are no
@@ -60,7 +60,7 @@
Now, we discover which *threads* are still alive. Pointers to
threads from the all_threads and main thread lists are the
- weakest of all: a pointers from the finalizer of a dead weak
+ weakest of all: a pointer from the finalizer of a dead weak
pointer can keep a thread alive. Any threads found to be unreachable
are evacuated and placed on the resurrected_threads list so we
can send them a signal later.
@@ -72,7 +72,7 @@
-------------------------------------------------------------------------- */
/* Which stage of processing various kinds of weak pointer are we at?
- * (see traverse_weak_ptr_list() below for discussion).
+ * (see traverseWeakPtrList() below for discussion).
*/
typedef enum { WeakPtrs, WeakThreads, WeakDone } WeakStage;
static WeakStage weak_stage;
@@ -185,7 +185,7 @@ traverseWeakPtrList(void)
}
default:
- barf("traverse_weak_ptr_list");
+ barf("traverseWeakPtrList");
return true;
}
}
@@ -344,7 +344,7 @@ static void tidyThreadList (generation *gen)
if (tmp == NULL) {
// not alive (yet): leave this thread on the
- // old_all_threads list.
+ // old_threads list.
prev = &(t->global_link);
}
else {
@@ -378,14 +378,13 @@ static void checkWeakPtrSanity(StgWeak *hd, StgWeak *tl)
void collectFreshWeakPtrs()
{
uint32_t i;
- generation *gen = &generations[0];
// move recently allocated weak_ptr_list to the old list as well
for (i = 0; i < n_capabilities; i++) {
Capability *cap = capabilities[i];
if (cap->weak_ptr_list_tl != NULL) {
IF_DEBUG(sanity, checkWeakPtrSanity(cap->weak_ptr_list_hd, cap->weak_ptr_list_tl));
- cap->weak_ptr_list_tl->link = gen->weak_ptr_list;
- gen->weak_ptr_list = cap->weak_ptr_list_hd;
+ cap->weak_ptr_list_tl->link = g0->weak_ptr_list;
+ g0->weak_ptr_list = cap->weak_ptr_list_hd;
cap->weak_ptr_list_tl = NULL;
cap->weak_ptr_list_hd = NULL;
} else {
diff --git a/rts/sm/OSMem.h b/rts/sm/OSMem.h
index 3b0cee9630..7dd0efdc23 100644
--- a/rts/sm/OSMem.h
+++ b/rts/sm/OSMem.h
@@ -18,6 +18,7 @@ void osFreeAllMBlocks(void);
size_t getPageSize (void);
StgWord64 getPhysicalMemorySize (void);
void setExecutable (void *p, W_ len, bool exec);
+bool osBuiltWithNumaSupport(void); // See #14956
bool osNumaAvailable(void);
uint32_t osNumaNodes(void);
uint64_t osNumaMask(void);
diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c
index 53b101024a..8d4171b1cd 100644
--- a/rts/sm/Sanity.c
+++ b/rts/sm/Sanity.c
@@ -380,8 +380,8 @@ checkClosure( const StgClosure* p )
case MUT_ARR_PTRS_CLEAN:
case MUT_ARR_PTRS_DIRTY:
- case MUT_ARR_PTRS_FROZEN:
- case MUT_ARR_PTRS_FROZEN0:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
{
StgMutArrPtrs* a = (StgMutArrPtrs *)p;
uint32_t i;
@@ -391,6 +391,18 @@ checkClosure( const StgClosure* p )
return mut_arr_ptrs_sizeW(a);
}
+ case SMALL_MUT_ARR_PTRS_CLEAN:
+ case SMALL_MUT_ARR_PTRS_DIRTY:
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
+ {
+ StgSmallMutArrPtrs *a = (StgSmallMutArrPtrs *)p;
+ for (uint32_t i = 0; i < a->ptrs; i++) {
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(a->payload[i]));
+ }
+ return small_mut_arr_ptrs_sizeW(a);
+ }
+
case TSO:
checkTSO((StgTSO *)p);
return sizeofW(StgTSO);
@@ -535,7 +547,8 @@ checkTSO(StgTSO *tso)
ASSERT(next == END_TSO_QUEUE ||
info == &stg_MVAR_TSO_QUEUE_info ||
info == &stg_TSO_info ||
- info == &stg_WHITEHOLE_info); // happens due to STM doing lockTSO()
+ info == &stg_WHITEHOLE_info); // used to happen due to STM doing
+ // lockTSO(), might not happen now
if ( tso->why_blocked == BlockedOnMVar
|| tso->why_blocked == BlockedOnMVarRead
@@ -677,7 +690,7 @@ checkStaticObjects ( StgClosure* static_objects )
break;
case FUN_STATIC:
- p = *FUN_STATIC_LINK((StgClosure *)p);
+ p = *STATIC_LINK(info,(StgClosure *)p);
break;
case CONSTR:
@@ -859,7 +872,7 @@ void findSlop(bdescr *bd)
slop = (bd->blocks * BLOCK_SIZE_W) - (bd->free - bd->start);
if (slop > (1024/sizeof(W_))) {
debugBelch("block at %p (bdescr %p) has %" FMT_Word "KB slop\n",
- bd->start, bd, slop / (1024/sizeof(W_)));
+ bd->start, bd, slop / (1024/(W_)sizeof(W_)));
}
}
}
diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c
index 1ae8a4c19b..2f61914e55 100644
--- a/rts/sm/Scav.c
+++ b/rts/sm/Scav.c
@@ -11,6 +11,37 @@
*
* ---------------------------------------------------------------------------*/
+/* ----------------------------------------------------------------------------
+ We have two main scavenge functions:
+
+ - scavenge_block(bdescr *bd)
+ - scavenge_one(StgPtr p)
+
+ As the names and parameters suggest, first one scavenges a whole block while
+ the second one only scavenges one object. This however is not the only
+ difference. scavenge_block scavenges all SRTs while scavenge_one only
+ scavenges SRTs of stacks. The reason is because scavenge_one is called in two
+ cases:
+
+ - When scavenging a mut_list
+ - When scavenging a large object
+
+ We don't have to scavenge SRTs when scavenging a mut_list, because we only
+ scavenge mut_lists in minor GCs, and static objects are only collected in
+ major GCs.
+
+ However, because scavenge_one is also used to scavenge large objects (which
+ are scavenged even in major GCs), we need to deal with SRTs of large
+ objects. We never allocate large FUNs and THUNKs, but we allocate large
+ STACKs (e.g. in threadStackOverflow), and stack frames can have SRTs. So
+ scavenge_one skips FUN and THUNK SRTs but scavenges stack frame SRTs.
+
+ In summary, in a major GC:
+
+ - scavenge_block() scavenges all SRTs
+ - scavenge_one() scavenges only stack frame SRTs
+ ------------------------------------------------------------------------- */
+
#include "PosixSource.h"
#include "Rts.h"
@@ -329,105 +360,17 @@ scavenge_AP (StgAP *ap)
Scavenge SRTs
-------------------------------------------------------------------------- */
-/* Similar to scavenge_large_bitmap(), but we don't write back the
- * pointers we get back from evacuate().
- */
-static void
-scavenge_large_srt_bitmap( StgLargeSRT *large_srt )
-{
- uint32_t i, j, size;
- StgWord bitmap;
- StgClosure **p;
-
- size = (uint32_t)large_srt->l.size;
- p = (StgClosure **)large_srt->srt;
-
- for (i = 0; i < size / BITS_IN(W_); i++) {
- bitmap = large_srt->l.bitmap[i];
- // skip zero words: bitmaps can be very sparse, and this helps
- // performance a lot in some cases.
- if (bitmap != 0) {
- for (j = 0; j < BITS_IN(W_); j++) {
- if ((bitmap & 1) != 0) {
- evacuate(p);
- }
- p++;
- bitmap = bitmap >> 1;
- }
- } else {
- p += BITS_IN(W_);
- }
- }
- if (size % BITS_IN(W_) != 0) {
- bitmap = large_srt->l.bitmap[i];
- for (j = 0; j < size % BITS_IN(W_); j++) {
- if ((bitmap & 1) != 0) {
- evacuate(p);
- }
- p++;
- bitmap = bitmap >> 1;
- }
- }
-}
-
-/* evacuate the SRT. If srt_bitmap is zero, then there isn't an
- * srt field in the info table. That's ok, because we'll
- * never dereference it.
- */
-STATIC_INLINE GNUC_ATTR_HOT void
-scavenge_srt (StgClosure **srt, uint32_t srt_bitmap)
-{
- uint32_t bitmap;
- StgClosure **p;
-
- bitmap = srt_bitmap;
- p = srt;
-
- if (bitmap == (StgHalfWord)(-1)) {
- scavenge_large_srt_bitmap( (StgLargeSRT *)srt );
- return;
- }
-
- while (bitmap != 0) {
- if ((bitmap & 1) != 0) {
-#if defined(COMPILING_WINDOWS_DLL)
- // Special-case to handle references to closures hiding out in DLLs, since
- // double indirections required to get at those. The code generator knows
- // which is which when generating the SRT, so it stores the (indirect)
- // reference to the DLL closure in the table by first adding one to it.
- // We check for this here, and undo the addition before evacuating it.
- //
- // If the SRT entry hasn't got bit 0 set, the SRT entry points to a
- // closure that's fixed at link-time, and no extra magic is required.
- if ( (W_)(*srt) & 0x1 ) {
- evacuate( (StgClosure**) ((W_) (*srt) & ~0x1));
- } else {
- evacuate(p);
- }
-#else
- evacuate(p);
-#endif
- }
- p++;
- bitmap = bitmap >> 1;
- }
-}
-
-
STATIC_INLINE GNUC_ATTR_HOT void
scavenge_thunk_srt(const StgInfoTable *info)
{
StgThunkInfoTable *thunk_info;
- uint32_t bitmap;
if (!major_gc) return;
thunk_info = itbl_to_thunk_itbl(info);
- bitmap = thunk_info->i.srt_bitmap;
- if (bitmap) {
- // don't read srt_offset if bitmap==0, because it doesn't exist
- // and so the memory might not be readable.
- scavenge_srt((StgClosure **)GET_SRT(thunk_info), bitmap);
+ if (thunk_info->i.srt) {
+ StgClosure *srt = (StgClosure*)GET_SRT(thunk_info);
+ evacuate(&srt);
}
}
@@ -435,16 +378,13 @@ STATIC_INLINE GNUC_ATTR_HOT void
scavenge_fun_srt(const StgInfoTable *info)
{
StgFunInfoTable *fun_info;
- uint32_t bitmap;
if (!major_gc) return;
fun_info = itbl_to_fun_itbl(info);
- bitmap = fun_info->i.srt_bitmap;
- if (bitmap) {
- // don't read srt_offset if bitmap==0, because it doesn't exist
- // and so the memory might not be readable.
- scavenge_srt((StgClosure **)GET_FUN_SRT(fun_info), bitmap);
+ if (fun_info->i.srt) {
+ StgClosure *srt = (StgClosure*)GET_FUN_SRT(fun_info);
+ evacuate(&srt);
}
}
@@ -737,18 +677,16 @@ scavenge_block (bdescr *bd)
break;
}
- case MUT_ARR_PTRS_FROZEN:
- case MUT_ARR_PTRS_FROZEN0:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
// follow everything
{
p = scavenge_mut_arr_ptrs((StgMutArrPtrs*)p);
- // If we're going to put this object on the mutable list, then
- // set its info ptr to MUT_ARR_PTRS_FROZEN0 to indicate that.
if (gct->failed_to_evac) {
- ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN0_info;
+ ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_DIRTY_info;
} else {
- ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_info;
+ ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info;
}
break;
}
@@ -780,8 +718,8 @@ scavenge_block (bdescr *bd)
break;
}
- case SMALL_MUT_ARR_PTRS_FROZEN:
- case SMALL_MUT_ARR_PTRS_FROZEN0:
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
// follow everything
{
StgPtr next;
@@ -791,12 +729,10 @@ scavenge_block (bdescr *bd)
evacuate((StgClosure **)p);
}
- // If we're going to put this object on the mutable list, then
- // set its info ptr to SMALL_MUT_ARR_PTRS_FROZEN0 to indicate that.
if (gct->failed_to_evac) {
- ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN0_info;
+ ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info;
} else {
- ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_info;
+ ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info;
}
break;
}
@@ -1133,20 +1069,18 @@ scavenge_mark_stack(void)
break;
}
- case MUT_ARR_PTRS_FROZEN:
- case MUT_ARR_PTRS_FROZEN0:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
// follow everything
{
StgPtr q = p;
scavenge_mut_arr_ptrs((StgMutArrPtrs *)p);
- // If we're going to put this object on the mutable list, then
- // set its info ptr to MUT_ARR_PTRS_FROZEN0 to indicate that.
if (gct->failed_to_evac) {
- ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN0_info;
+ ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_DIRTY_info;
} else {
- ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_info;
+ ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info;
}
break;
}
@@ -1180,8 +1114,8 @@ scavenge_mark_stack(void)
break;
}
- case SMALL_MUT_ARR_PTRS_FROZEN:
- case SMALL_MUT_ARR_PTRS_FROZEN0:
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
// follow everything
{
StgPtr next, q = p;
@@ -1191,12 +1125,10 @@ scavenge_mark_stack(void)
evacuate((StgClosure **)p);
}
- // If we're going to put this object on the mutable list, then
- // set its info ptr to SMALL_MUT_ARR_PTRS_FROZEN0 to indicate that.
if (gct->failed_to_evac) {
- ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN0_info;
+ ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info;
} else {
- ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_info;
+ ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info;
}
break;
}
@@ -1365,7 +1297,7 @@ scavenge_one(StgPtr p)
case WEAK:
// This WEAK object will not be considered by tidyWeakList during this
- // collection because it is in a generation >= N, but it is on the
+ // collection because it is in a generation > N, but it is on the
// mutable list so we must evacuate all of its pointers because some
// of them may point into a younger generation.
scavengeLiveWeak((StgWeak *)p);
@@ -1457,18 +1389,16 @@ scavenge_one(StgPtr p)
break;
}
- case MUT_ARR_PTRS_FROZEN:
- case MUT_ARR_PTRS_FROZEN0:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
{
// follow everything
scavenge_mut_arr_ptrs((StgMutArrPtrs *)p);
- // If we're going to put this object on the mutable list, then
- // set its info ptr to MUT_ARR_PTRS_FROZEN0 to indicate that.
if (gct->failed_to_evac) {
- ((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_FROZEN0_info;
+ ((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_FROZEN_DIRTY_info;
} else {
- ((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_FROZEN_info;
+ ((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info;
}
break;
}
@@ -1502,8 +1432,8 @@ scavenge_one(StgPtr p)
break;
}
- case SMALL_MUT_ARR_PTRS_FROZEN:
- case SMALL_MUT_ARR_PTRS_FROZEN0:
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
{
// follow everything
StgPtr next, q=p;
@@ -1513,12 +1443,10 @@ scavenge_one(StgPtr p)
evacuate((StgClosure **)p);
}
- // If we're going to put this object on the mutable list, then
- // set its info ptr to SMALL_MUT_ARR_PTRS_FROZEN0 to indicate that.
if (gct->failed_to_evac) {
- ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN0_info;
+ ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info;
} else {
- ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_info;
+ ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info;
}
break;
}
@@ -1653,8 +1581,8 @@ scavenge_mutable_list(bdescr *bd, generation *gen)
mutlist_MUTVARS++; break;
case MUT_ARR_PTRS_CLEAN:
case MUT_ARR_PTRS_DIRTY:
- case MUT_ARR_PTRS_FROZEN:
- case MUT_ARR_PTRS_FROZEN0:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
mutlist_MUTARRS++; break;
case MVAR_CLEAN:
barf("MVAR_CLEAN on mutable list");
@@ -1669,10 +1597,6 @@ scavenge_mutable_list(bdescr *bd, generation *gen)
mutlist_TVAR_WATCH_QUEUE++;
else if (((StgClosure*)p)->header.info == &stg_TREC_HEADER_info)
mutlist_TREC_HEADER++;
- else if (((StgClosure*)p)->header.info == &stg_ATOMIC_INVARIANT_info)
- mutlist_ATOMIC_INVARIANT++;
- else if (((StgClosure*)p)->header.info == &stg_INVARIANT_CHECK_QUEUE_info)
- mutlist_INVARIANT_CHECK_QUEUE++;
else
mutlist_OTHERS++;
break;
@@ -1690,6 +1614,7 @@ scavenge_mutable_list(bdescr *bd, generation *gen)
//
switch (get_itbl((StgClosure *)p)->type) {
case MUT_ARR_PTRS_CLEAN:
+ case SMALL_MUT_ARR_PTRS_CLEAN:
recordMutableGen_GC((StgClosure *)p,gen_no);
continue;
case MUT_ARR_PTRS_DIRTY:
@@ -1813,7 +1738,11 @@ scavenge_static(void)
case FUN_STATIC:
scavenge_fun_srt(info);
- break;
+ /* fallthrough */
+
+ // a FUN_STATIC can also be an SRT, so it may have pointer
+ // fields. See Note [SRTs] in CmmBuildInfoTables, specifically
+ // the [FUN] optimisation.
case CONSTR:
case CONSTR_NOCAF:
@@ -1979,8 +1908,10 @@ scavenge_stack(StgPtr p, StgPtr stack_end)
p = scavenge_small_bitmap(p, size, bitmap);
follow_srt:
- if (major_gc)
- scavenge_srt((StgClosure **)GET_SRT(info), info->i.srt_bitmap);
+ if (major_gc && info->i.srt) {
+ StgClosure *srt = (StgClosure*)GET_SRT(info);
+ evacuate(&srt);
+ }
continue;
case RET_BCO: {
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index ffaed5f17c..dcc5b3a3c7 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -197,11 +197,7 @@ initStorage (void)
#if defined(THREADED_RTS)
initSpinLock(&gc_alloc_block_sync);
-#if defined(PROF_SPIN)
- whitehole_spin = 0;
#endif
-#endif
-
N = 0;
for (n = 0; n < n_numa_nodes; n++) {
@@ -224,6 +220,7 @@ initStorage (void)
void storageAddCapabilities (uint32_t from, uint32_t to)
{
uint32_t n, g, i, new_n_nurseries;
+ nursery *old_nurseries;
if (RtsFlags.GcFlags.nurseryChunkSize == 0) {
new_n_nurseries = to;
@@ -233,6 +230,7 @@ void storageAddCapabilities (uint32_t from, uint32_t to)
stg_max(to, total_alloc / RtsFlags.GcFlags.nurseryChunkSize);
}
+ old_nurseries = nurseries;
if (from > 0) {
nurseries = stgReallocBytes(nurseries,
new_n_nurseries * sizeof(struct nursery_),
@@ -244,8 +242,9 @@ void storageAddCapabilities (uint32_t from, uint32_t to)
// we've moved the nurseries, so we have to update the rNursery
// pointers from the Capabilities.
- for (i = 0; i < to; i++) {
- capabilities[i]->r.rNursery = &nurseries[i];
+ for (i = 0; i < from; i++) {
+ uint32_t index = capabilities[i]->r.rNursery - old_nurseries;
+ capabilities[i]->r.rNursery = &nurseries[index];
}
/* The allocation area. Policy: keep the allocation area
@@ -307,21 +306,21 @@ freeStorage (bool free_heap)
The entry code for every CAF does the following:
- - calls newCaf, which builds a CAF_BLACKHOLE on the heap and atomically
+ - calls newCAF, which builds a CAF_BLACKHOLE on the heap and atomically
updates the CAF with IND_STATIC pointing to the CAF_BLACKHOLE
- - if newCaf returns zero, it re-enters the CAF (see Note [atomic
+ - if newCAF returns zero, it re-enters the CAF (see Note [atomic
CAF entry])
- pushes an update frame pointing to the CAF_BLACKHOLE
- Why do we build an BLACKHOLE in the heap rather than just updating
+ Why do we build a BLACKHOLE in the heap rather than just updating
the thunk directly? It's so that we only need one kind of update
frame - otherwise we'd need a static version of the update frame
too, and various other parts of the RTS that deal with update
frames would also need special cases for static update frames.
- newCaf() does the following:
+ newCAF() does the following:
- atomically locks the CAF (see [atomic CAF entry])
@@ -339,7 +338,7 @@ freeStorage (bool free_heap)
------------------
Note [atomic CAF entry]
- With THREADED_RTS, newCaf() is required to be atomic (see
+ With THREADED_RTS, newCAF() is required to be atomic (see
#5558). This is because if two threads happened to enter the same
CAF simultaneously, they would create two distinct CAF_BLACKHOLEs,
and so the normal threadPaused() machinery for detecting duplicate
@@ -359,7 +358,7 @@ freeStorage (bool free_heap)
- we must be able to *revert* CAFs that have been evaluated, to
their pre-evaluated form.
- To do this, we use an additional CAF list. When newCaf() is
+ To do this, we use an additional CAF list. When newCAF() is
called on a dynamically-loaded CAF, we add it to the CAF list
instead of the old-generation mutable list, and save away its
old info pointer (in caf->saved_info) for later reversion.
@@ -796,6 +795,20 @@ move_STACK (StgStack *src, StgStack *dest)
dest->sp = (StgPtr)dest->sp + diff;
}
+STATIC_INLINE void
+accountAllocation(Capability *cap, W_ n)
+{
+ TICK_ALLOC_HEAP_NOCTR(WDS(n));
+ CCS_ALLOC(cap->r.rCCCS,n);
+ if (cap->r.rCurrentTSO != NULL) {
+ // cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_)
+ ASSIGN_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit),
+ (PK_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit))
+ - n*sizeof(W_)));
+ }
+
+}
+
/* -----------------------------------------------------------------------------
StgPtr allocate (Capability *cap, W_ n)
@@ -812,21 +825,37 @@ move_STACK (StgStack *src, StgStack *dest)
that operation fails, then the whole process will be killed.
-------------------------------------------------------------------------- */
+/*
+ * Allocate some n words of heap memory; terminating
+ * on heap overflow
+ */
StgPtr
allocate (Capability *cap, W_ n)
{
+ StgPtr p = allocateMightFail(cap, n);
+ if (p == NULL) {
+ reportHeapOverflow();
+ // heapOverflow() doesn't exit (see #2592), but we aren't
+ // in a position to do a clean shutdown here: we
+ // either have to allocate the memory or exit now.
+ // Allocating the memory would be bad, because the user
+ // has requested that we not exceed maxHeapSize, so we
+ // just exit.
+ stg_exit(EXIT_HEAPOVERFLOW);
+ }
+ return p;
+}
+
+/*
+ * Allocate some n words of heap memory; returning NULL
+ * on heap overflow
+ */
+StgPtr
+allocateMightFail (Capability *cap, W_ n)
+{
bdescr *bd;
StgPtr p;
- TICK_ALLOC_HEAP_NOCTR(WDS(n));
- CCS_ALLOC(cap->r.rCCCS,n);
- if (cap->r.rCurrentTSO != NULL) {
- // cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_)
- ASSIGN_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit),
- (PK_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit))
- - n*sizeof(W_)));
- }
-
if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
// The largest number of words such that
// the computation of req_blocks will not overflow.
@@ -845,16 +874,12 @@ allocate (Capability *cap, W_ n)
req_blocks >= HS_INT32_MAX) // avoid overflow when
// calling allocGroup() below
{
- reportHeapOverflow();
- // heapOverflow() doesn't exit (see #2592), but we aren't
- // in a position to do a clean shutdown here: we
- // either have to allocate the memory or exit now.
- // Allocating the memory would be bad, because the user
- // has requested that we not exceed maxHeapSize, so we
- // just exit.
- stg_exit(EXIT_HEAPOVERFLOW);
+ return NULL;
}
+ // Only credit allocation after we've passed the size check above
+ accountAllocation(cap, n);
+
ACQUIRE_SM_LOCK
bd = allocGroupOnNode(cap->node,req_blocks);
dbl_link_onto(bd, &g0->large_objects);
@@ -870,6 +895,7 @@ allocate (Capability *cap, W_ n)
/* small allocation (<LARGE_OBJECT_THRESHOLD) */
+ accountAllocation(cap, n);
bd = cap->r.rCurrentAlloc;
if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
@@ -955,7 +981,8 @@ allocate (Capability *cap, W_ n)
to pinned ByteArrays, not scavenging is ok.
This function is called by newPinnedByteArray# which immediately
- fills the allocated memory with a MutableByteArray#.
+ fills the allocated memory with a MutableByteArray#. Note that
+ this returns NULL on heap overflow.
------------------------------------------------------------------------- */
StgPtr
@@ -967,20 +994,16 @@ allocatePinned (Capability *cap, W_ n)
// If the request is for a large object, then allocate()
// will give us a pinned object anyway.
if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
- p = allocate(cap, n);
- Bdescr(p)->flags |= BF_PINNED;
- return p;
- }
-
- TICK_ALLOC_HEAP_NOCTR(WDS(n));
- CCS_ALLOC(cap->r.rCCCS,n);
- if (cap->r.rCurrentTSO != NULL) {
- // cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_);
- ASSIGN_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit),
- (PK_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit))
- - n*sizeof(W_)));
+ p = allocateMightFail(cap, n);
+ if (p == NULL) {
+ return NULL;
+ } else {
+ Bdescr(p)->flags |= BF_PINNED;
+ return p;
+ }
}
+ accountAllocation(cap, n);
bd = cap->pinned_object_block;
// If we don't have a block of pinned objects yet, or the current
@@ -1135,7 +1158,7 @@ dirty_MVAR(StgRegTable *reg, StgClosure *p)
* -------------------------------------------------------------------------- */
/* -----------------------------------------------------------------------------
- * [Note allocation accounting]
+ * Note [allocation accounting]
*
* - When cap->r.rCurrentNusery moves to a new block in the nursery,
* we add the size of the used portion of the previous block to
@@ -1241,16 +1264,15 @@ W_ gcThreadLiveBlocks (uint32_t i, uint32_t g)
* to store bitmaps and the mark stack. Note: blocks_needed does not
* include the blocks in the nursery.
*
- * Assume: all data currently live will remain live. Generationss
+ * Assume: all data currently live will remain live. Generations
* that will be collected next time will therefore need twice as many
* blocks since all the data will be copied.
*/
extern W_
calcNeeded (bool force_major, memcount *blocks_needed)
{
- W_ needed = 0, blocks;
- uint32_t g, N;
- generation *gen;
+ W_ needed = 0;
+ uint32_t N;
if (force_major) {
N = RtsFlags.GcFlags.generations - 1;
@@ -1258,12 +1280,12 @@ calcNeeded (bool force_major, memcount *blocks_needed)
N = 0;
}
- for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
- gen = &generations[g];
+ for (uint32_t g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ generation *gen = &generations[g];
- blocks = gen->n_blocks // or: gen->n_words / BLOCK_SIZE_W (?)
- + gen->n_large_blocks
- + gen->n_compact_blocks;
+ W_ blocks = gen->n_blocks // or: gen->n_words / BLOCK_SIZE_W (?)
+ + gen->n_large_blocks
+ + gen->n_compact_blocks;
// we need at least this much space
needed += blocks;
diff --git a/rts/win32/ConsoleHandler.c b/rts/win32/ConsoleHandler.c
index 3d283b0162..545a76a004 100644
--- a/rts/win32/ConsoleHandler.c
+++ b/rts/win32/ConsoleHandler.c
@@ -183,13 +183,15 @@ void startSignalHandlers(Capability *cap)
handler = deRefStablePtr((StgStablePtr)console_handler);
while (stg_pending_events > 0) {
stg_pending_events--;
- scheduleThread(cap,
+ StgTSO *t =
createIOThread(cap,
- RtsFlags.GcFlags.initialStkSize,
- rts_apply(cap,
- (StgClosure *)handler,
- rts_mkInt(cap,
- stg_pending_buf[stg_pending_events]))));
+ RtsFlags.GcFlags.initialStkSize,
+ rts_apply(cap,
+ (StgClosure *)handler,
+ rts_mkInt(cap,
+ stg_pending_buf[stg_pending_events])));
+ scheduleThread(cap, t);
+ labelThread(cap, t, "signal handler thread");
}
RELEASE_LOCK(&sched_mutex);
@@ -197,18 +199,6 @@ void startSignalHandlers(Capability *cap)
}
#endif /* !THREADED_RTS */
-/*
- * Function: markSignalHandlers()
- *
- * Evacuate the handler stack. _Assumes_ that console event delivery
- * has already been blocked.
- */
-void markSignalHandlers (evac_fn evac STG_UNUSED, void *user STG_UNUSED)
-{
- // nothing to mark; the console handler is a StablePtr which is
- // already treated as a root by the GC.
-}
-
/*
* Function: generic_handler()
diff --git a/rts/win32/GetTime.c b/rts/win32/GetTime.c
index 90c0d0d244..014a676e99 100644
--- a/rts/win32/GetTime.c
+++ b/rts/win32/GetTime.c
@@ -84,6 +84,11 @@ getMonotonicNSec()
}
else // fallback to GetTickCount
{
+ // TODO: Remove this code path, it cannot be taken because
+ // `QueryPerformanceFrequency` cannot fail on Windows >= XP
+ // and GHC no longer supports Windows <= XP.
+ // See https://ghc.haskell.org/trac/ghc/ticket/14233
+
// NOTE: GetTickCount is a 32-bit millisecond value, so it wraps around
// every 49 days.
DWORD count = GetTickCount();
diff --git a/rts/win32/IOManager.c b/rts/win32/IOManager.c
index c5cae75366..f155180ef3 100644
--- a/rts/win32/IOManager.c
+++ b/rts/win32/IOManager.c
@@ -435,10 +435,12 @@ AddIORequest ( int fd,
char* buffer,
CompletionProc onCompletion)
{
+ ASSERT(ioMan);
+
WorkItem* wItem = (WorkItem*)malloc(sizeof(WorkItem));
- unsigned int reqID;
- if (!ioMan || !wItem) return 0;
- reqID = ioMan->requestID++;
+ if (!wItem) return 0;
+
+ unsigned int reqID = ioMan->requestID++;
/* Fill in the blanks */
wItem->workKind = ( isSocket ? WORKER_FOR_SOCKET : 0 ) |
@@ -464,10 +466,12 @@ BOOL
AddDelayRequest ( HsInt usecs,
CompletionProc onCompletion)
{
+ ASSERT(ioMan);
+
WorkItem* wItem = (WorkItem*)malloc(sizeof(WorkItem));
- unsigned int reqID;
- if (!ioMan || !wItem) return false;
- reqID = ioMan->requestID++;
+ if (!wItem) return false;
+
+ unsigned int reqID = ioMan->requestID++;
/* Fill in the blanks */
wItem->workKind = WORKER_DELAY;
@@ -489,10 +493,12 @@ AddProcRequest ( void* proc,
void* param,
CompletionProc onCompletion)
{
+ ASSERT(ioMan);
+
WorkItem* wItem = (WorkItem*)malloc(sizeof(WorkItem));
- unsigned int reqID;
- if (!ioMan || !wItem) return false;
- reqID = ioMan->requestID++;
+ if (!wItem) return false;
+
+ unsigned int reqID = ioMan->requestID++;
/* Fill in the blanks */
wItem->workKind = WORKER_DO_PROC;
diff --git a/rts/win32/OSMem.c b/rts/win32/OSMem.c
index c67b95be82..c62ee3b7a4 100644
--- a/rts/win32/OSMem.c
+++ b/rts/win32/OSMem.c
@@ -458,7 +458,7 @@ void *osReserveHeapMemory (void *startAddress, W_ *len)
sysErrorBelch(
"osReserveHeapMemory: VirtualAlloc MEM_RESERVE %llu bytes \
at address %p bytes failed",
- len + MBLOCK_SIZE, startAddress);
+ *len + MBLOCK_SIZE, startAddress);
}
stg_exit(EXIT_FAILURE);
}
@@ -499,6 +499,11 @@ void osReleaseHeapMemory (void)
#endif
+bool osBuiltWithNumaSupport(void)
+{
+ return true;
+}
+
bool osNumaAvailable(void)
{
return osNumaNodes() > 1;
@@ -510,9 +515,18 @@ uint32_t osNumaNodes(void)
static ULONG numNumaNodes = 0;
/* Cache the amount of NUMA nodes. */
- if (!numNumaNodes && !GetNumaHighestNodeNumber(&numNumaNodes))
+ if (!numNumaNodes)
{
- numNumaNodes = 1;
+ if (GetNumaHighestNodeNumber(&numNumaNodes))
+ {
+ // GetNumaHighestNodeNumber returns the highest node number
+ // i.e: 0 for a non-NUMA system, and >0 for a NUMA system, so add a 1.
+ numNumaNodes += 1;
+ }
+ else
+ {
+ numNumaNodes = 1;
+ }
}
return numNumaNodes;
@@ -520,12 +534,12 @@ uint32_t osNumaNodes(void)
uint64_t osNumaMask(void)
{
- uint64_t numaMask;
- if (!GetNumaNodeProcessorMask(0, &numaMask))
- {
- return 1;
+ // the concept of a numa node mask (c.f. numa_get_mems_allowed on POSIX)
+ // doesn't exist on Windows. Thus, all nodes are allowed.
+ if (osNumaNodes() > sizeof(StgWord)*8) {
+ barf("osNumaMask: too many NUMA nodes (%d)", osNumaNodes());
}
- return numaMask;
+ return (1 << osNumaNodes()) - 1;
}
void osBindMBlocksToNode(
diff --git a/rts/win32/OSThreads.c b/rts/win32/OSThreads.c
index ad4234066b..c67d621bc2 100644
--- a/rts/win32/OSThreads.c
+++ b/rts/win32/OSThreads.c
@@ -236,6 +236,7 @@ forkOS_createThreadWrapper ( void * entry )
cap = rts_lock();
rts_evalStableIO(&cap, (HsStablePtr) entry, NULL);
rts_unlock(cap);
+ rts_done();
return 0;
}
@@ -577,9 +578,8 @@ void setThreadNode (uint32_t node)
{
if (osNumaAvailable())
{
- StgWord mask = 0;
- mask |= 1 << node;
- if (!SetThreadAffinityMask(GetCurrentThread(), mask))
+ uint64_t mask = 0;
+ if (!GetNumaNodeProcessorMask(node, &mask) && !SetThreadAffinityMask(GetCurrentThread(), mask))
{
sysErrorBelch(
"setThreadNode: Error setting affinity of thread to NUMA node `%u': %lu.",
diff --git a/rts/win32/libHSbase.def b/rts/win32/libHSbase.def
index 496893a722..d4ec1fab0a 100644
--- a/rts/win32/libHSbase.def
+++ b/rts/win32/libHSbase.def
@@ -42,6 +42,7 @@ EXPORTS
base_GHCziIOziException_cannotCompactPinned_closure
base_GHCziIOziException_cannotCompactMutable_closure
+ base_ControlziExceptionziBase_absentSumFieldError_closure
base_ControlziExceptionziBase_nonTermination_closure
base_ControlziExceptionziBase_nestedAtomically_closure
base_GHCziEventziThread_blockedOnBadFD_closure
diff --git a/rts/win32/veh_excn.c b/rts/win32/veh_excn.c
index d925ad8919..2d9de52199 100644
--- a/rts/win32/veh_excn.c
+++ b/rts/win32/veh_excn.c
@@ -5,16 +5,79 @@
* Error Handling implementations for windows
*
* ---------------------------------------------------------------------------*/
-
+#define UNICODE 1
#include "Rts.h"
#include "ghcconfig.h"
#include "veh_excn.h"
+#include "LinkerInternals.h"
#include <assert.h>
+#include <stdbool.h>
+#include <dbghelp.h>
+#include <shellapi.h>
+#include <shlobj.h>
+#include <wchar.h>
+#include <windows.h>
+#include <stdio.h>
+#include <excpt.h>
+#include <inttypes.h>
+#include <dbghelp.h>
+#include <signal.h>
/////////////////////////////////
// Exception / signal handlers.
/////////////////////////////////
+/*
+ SEH (Structured Error Handler) on Windows is quite tricky. On x86 SEHs are
+ stack based and are stored in FS[0] of each thread. Which means every time we
+ spawn an OS thread we'd have to set up the error handling. However on x64 it's
+ table based and memory region based. e.g. you register a handler for a
+ particular memory range. This means that we'd have to register handlers for
+ each block of code we load externally or generate internally ourselves.
+
+ In Windows XP VEH (Vectored Exception Handler) and VCH (Vectored Continue
+ Handler) were added. Both of these are global/process wide handlers, the
+ former handling all exceptions and the latter handling only exceptions which
+ we're trying to recover from, e.g. a handler returned
+ EXCEPTION_CONTINUE_EXECUTION.
+
+ And lastly you have top level exception filters, which are also process global
+ but the problem here is that you can only have one, and setting this removes
+ the previous ones. The chain of exception handling looks like
+
+ [ Vectored Exception Handler ]
+ |
+ [ Structured Exception Handler ]
+ |
+ [ Exception Filters ]
+ |
+ [ Vectored Continue Handler ]
+
+ To make things more tricky, the exception handlers handle both hardware and
+ software exceptions Which means previously when we registered VEH handlers
+ we would also trap software exceptions. Which means when haskell code was
+ loaded in a C++ or C# context we would swallow exceptions and terminate in
+ contexes that normally the runtime should be able to continue on, e.g. you
+ could be handling the segfault in your C++ code, or the div by 0.
+
+ We could not handle these exceptions, but GHCi would just die a horrible death
+ then on normal Haskell only code when such an exception occurs.
+
+ So instead, we'll move to Continue handler, to run as late as possible, and
+ also register a filter which calls any existing filter, and then runs the
+ continue handlers, we then also only run as the last continue handler so we
+ don't supercede any other VCH handlers.
+
+ Lastly we'll also provide a way for users to disable the exception handling
+ entirely so even if the new approach doesn't solve the issue they can work
+ around it. After all, I don't expect any interpreted code if you are running
+ a haskell dll.
+
+ For a detailed analysis see
+ https://reverseengineering.stackexchange.com/questions/14992/what-are-the-vectored-continue-handlers
+ and https://www.gamekiller.net/threads/vectored-exception-handler.3237343/
+ */
+
// Define some values for the ordering of VEH Handlers:
// - CALL_FIRST means call this exception handler first
// - CALL_LAST means call this exception handler last
@@ -28,11 +91,19 @@
// Registered exception handler
PVOID __hs_handle = NULL;
+LPTOP_LEVEL_EXCEPTION_FILTER oldTopFilter = NULL;
+bool crash_dump = false;
+bool filter_called = false;
long WINAPI __hs_exception_handler(struct _EXCEPTION_POINTERS *exception_data)
{
- long action = EXCEPTION_CONTINUE_SEARCH;
+ if (!crash_dump && filter_called)
+ return EXCEPTION_CONTINUE_EXECUTION;
+
+ long action = EXCEPTION_CONTINUE_SEARCH;
+ int exit_code = EXIT_FAILURE;
ULONG_PTR what;
+ fprintf (stderr, "\n");
// When the system unwinds the VEH stack after having handled an excn,
// return immediately.
@@ -42,21 +113,28 @@ long WINAPI __hs_exception_handler(struct _EXCEPTION_POINTERS *exception_data)
switch (exception_data->ExceptionRecord->ExceptionCode) {
case EXCEPTION_FLT_DIVIDE_BY_ZERO:
case EXCEPTION_INT_DIVIDE_BY_ZERO:
- fprintf(stdout, "divide by zero\n");
+ fprintf(stderr, "divide by zero\n");
action = EXCEPTION_CONTINUE_EXECUTION;
+ exit_code = SIGFPE;
break;
case EXCEPTION_STACK_OVERFLOW:
- fprintf(stdout, "C stack overflow in generated code\n");
+ fprintf(stderr, "C stack overflow in generated code\n");
action = EXCEPTION_CONTINUE_EXECUTION;
break;
case EXCEPTION_ACCESS_VIOLATION:
what = exception_data->ExceptionRecord->ExceptionInformation[0];
- fprintf(stdout, "Access violation in generated code"
- " when %s %p\n"
- , what == 0 ? "reading" : what == 1 ? "writing" : what == 8 ? "executing data at" : "?"
- , (void*) exception_data->ExceptionRecord->ExceptionInformation[1]
+ fprintf(stderr, "Access violation in generated code"
+ " when %s 0x%" PRIxPTR "\n"
+ , what == 0 ? "reading"
+ : what == 1 ? "writing"
+ : what == 8 ? "executing data at"
+ : "?"
+ , (uintptr_t) exception_data
+ ->ExceptionRecord
+ ->ExceptionInformation[1]
);
action = EXCEPTION_CONTINUE_EXECUTION;
+ exit_code = SIGSEGV;
break;
default:;
}
@@ -66,40 +144,191 @@ long WINAPI __hs_exception_handler(struct _EXCEPTION_POINTERS *exception_data)
// But the correct action is still to exit as fast as possible.
if (EXCEPTION_CONTINUE_EXECUTION == action)
{
- fflush(stdout);
- stg_exit(EXIT_FAILURE);
+ fflush(stderr);
+ generateStack (exception_data);
+ generateDump (exception_data);
+ stg_exit(exit_code);
}
}
return action;
}
+long WINAPI __hs_exception_filter(struct _EXCEPTION_POINTERS *exception_data)
+{
+ filter_called = true;
+ long result = EXCEPTION_CONTINUE_EXECUTION;
+ if (oldTopFilter)
+ {
+ result = (*oldTopFilter)(exception_data);
+ if (EXCEPTION_CONTINUE_SEARCH == result)
+ result = EXCEPTION_CONTINUE_EXECUTION;
+ return result;
+ }
+
+ crash_dump = true;
+ return result;
+}
+
void __register_hs_exception_handler( void )
{
- // Allow the VEH handler to be registered only once.
+ if (!RtsFlags.MiscFlags.install_seh_handlers)
+ return;
+
+ // Allow the VCH handler to be registered only once.
if (NULL == __hs_handle)
{
- __hs_handle = AddVectoredExceptionHandler(CALL_FIRST, __hs_exception_handler);
+ // Be the last one to run, We can then be sure we didn't interfere with
+ // anything else.
+ __hs_handle = AddVectoredContinueHandler(CALL_LAST,
+ __hs_exception_handler);
// should the handler not be registered this will return a null.
assert(__hs_handle);
+
+ // Register for an exception filter to ensure the continue handler gets
+ // hit if no one handled the exception.
+ oldTopFilter = SetUnhandledExceptionFilter (__hs_exception_filter);
}
else
{
- errorBelch("There is no need to call __register_hs_exception_handler() twice, VEH handlers are global per process.");
+ errorBelch("There is no need to call __register_hs_exception_handler()"
+ " twice, VEH handlers are global per process.");
}
}
void __unregister_hs_exception_handler( void )
{
+ if (!RtsFlags.MiscFlags.install_seh_handlers)
+ return;
+
if (__hs_handle != NULL)
{
// Should the return value be checked? we're terminating anyway.
- RemoveVectoredExceptionHandler(__hs_handle);
+ RemoveVectoredContinueHandler(__hs_handle);
__hs_handle = NULL;
}
else
{
- errorBelch("__unregister_hs_exception_handler() called without having called __register_hs_exception_handler() first.");
+ errorBelch("__unregister_hs_exception_handler() called without having"
+ "called __register_hs_exception_handler() first.");
}
}
+// Generate a crash dump, however in order for these to generate undecorated
+// names we really need to be able to generate PDB files.
+void generateDump (EXCEPTION_POINTERS* pExceptionPointers)
+{
+ if (!RtsFlags.MiscFlags.generate_dump_file)
+ return;
+
+ WCHAR szPath[MAX_PATH];
+ WCHAR szFileName[MAX_PATH];
+ WCHAR const *const szAppName = L"ghc";
+ WCHAR const *const szVersion = L"";
+ DWORD dwBufferSize = MAX_PATH;
+ HANDLE hDumpFile;
+ SYSTEMTIME stLocalTime;
+ MINIDUMP_EXCEPTION_INFORMATION ExpParam;
+
+ GetLocalTime (&stLocalTime);
+ GetTempPathW (dwBufferSize, szPath);
+
+ swprintf (szFileName, MAX_PATH,
+ L"%ls%ls%ls-%04d%02d%02d-%02d%02d%02d-%ld-%ld.dmp",
+ szPath, szAppName, szVersion,
+ stLocalTime.wYear, stLocalTime.wMonth, stLocalTime.wDay,
+ stLocalTime.wHour, stLocalTime.wMinute, stLocalTime.wSecond,
+ GetCurrentProcessId(), GetCurrentThreadId());
+ hDumpFile = CreateFileW (szFileName, GENERIC_READ|GENERIC_WRITE,
+ FILE_SHARE_WRITE|FILE_SHARE_READ, 0, CREATE_ALWAYS, 0, 0);
+
+ ExpParam.ThreadId = GetCurrentThreadId();
+ ExpParam.ExceptionPointers = pExceptionPointers;
+ ExpParam.ClientPointers = TRUE;
+
+ MiniDumpWriteDump(GetCurrentProcess(), GetCurrentProcessId(),
+ hDumpFile, MiniDumpNormal | MiniDumpWithDataSegs |
+ MiniDumpWithThreadInfo | MiniDumpWithCodeSegs,
+ &ExpParam, NULL, NULL);
+
+ fprintf (stderr, "Crash dump created. Dump written to:\n\t%ls", szFileName);
+}
+
+// Generate stack trace information, we can piggy back on information we know
+// about in the runtime linker to resolve symbols. So this is a good opportunity
+// to make the output more useful.
+void generateStack (EXCEPTION_POINTERS* pExceptionPointers)
+{
+ if (!RtsFlags.MiscFlags.generate_stack_trace)
+ return;
+
+ PCONTEXT context = pExceptionPointers->ContextRecord;
+ STACKFRAME64 stackFrame = {0};
+ DWORD machineType;
+
+#if defined(x86_64_HOST_ARCH)
+ machineType = IMAGE_FILE_MACHINE_AMD64;
+ stackFrame.AddrPC.Offset = context->Rip;
+ stackFrame.AddrPC.Mode = AddrModeFlat;
+
+ stackFrame.AddrFrame.Offset = context->Rbp;
+ stackFrame.AddrFrame.Mode = AddrModeFlat;
+
+ stackFrame.AddrStack.Offset = context->Rsp;
+ stackFrame.AddrStack.Mode = AddrModeFlat;
+#else
+ machineType = IMAGE_FILE_MACHINE_I386;
+ stackFrame.AddrPC.Offset = context->Eip;
+ stackFrame.AddrPC.Mode = AddrModeFlat;
+
+ stackFrame.AddrFrame.Offset = context->Ebp;
+ stackFrame.AddrFrame.Mode = AddrModeFlat;
+
+ stackFrame.AddrStack.Offset = context->Esp;
+ stackFrame.AddrStack.Mode = AddrModeFlat;
+#endif
+ fprintf (stderr, "\n Attempting to reconstruct a stack trace...\n\n");
+ if (!SymInitialize (GetCurrentProcess (), NULL, true))
+ fprintf (stderr, " \nNOTE: Symbols could not be loaded. Addresses may"
+ " be unresolved.\n\n");
+
+ /* Maximum amount of stack frames to show. */
+ /* Phyx: I'm not sure if I should make this configurable or not. Would a
+ longer stack really be more useful? usually you only care about the top
+ few. */
+ int max_frames = 35;
+
+ fprintf (stderr, " Frame\tCode address\n");
+ DWORD64 lastBp = 0; /* Prevent loops with optimized stackframes. */
+ while (StackWalk64 (machineType, GetCurrentProcess(), GetCurrentThread(),
+ &stackFrame, context, NULL, SymFunctionTableAccess64,
+ SymGetModuleBase64, NULL) && max_frames > 0)
+ {
+ if (stackFrame.AddrPC.Offset == 0)
+ {
+ fprintf (stderr, "Null address\n");
+ break;
+ }
+ wchar_t buffer[1024];
+ uintptr_t topSp = 0;
+ fprintf (stderr, " * 0x%" PRIxPTR "\t%ls\n",
+ (uintptr_t)stackFrame.AddrFrame.Offset,
+ resolveSymbolAddr ((wchar_t*)&buffer, 1024,
+ (SymbolAddr*)(intptr_t)stackFrame.AddrPC.Offset,
+ &topSp));
+ if (lastBp >= stackFrame.AddrFrame.Offset)
+ {
+ fprintf (stderr, "Stack frame out of sequence...\n");
+ break;
+ }
+ lastBp = stackFrame.AddrFrame.Offset;
+
+ max_frames--;
+ if (max_frames ==0)
+ {
+ fprintf (stderr, "\n ... (maximum recursion depth reached.)\n");
+ }
+ }
+ fprintf (stderr, "\n");
+ fflush(stderr);
+}
diff --git a/rts/win32/veh_excn.h b/rts/win32/veh_excn.h
index fda837f1f1..e0a11ade58 100644
--- a/rts/win32/veh_excn.h
+++ b/rts/win32/veh_excn.h
@@ -63,7 +63,12 @@
// See https://msdn.microsoft.com/en-us/library/windows/desktop/ms681419(v=vs.85).aspx
//
long WINAPI __hs_exception_handler( struct _EXCEPTION_POINTERS *exception_data );
+long WINAPI __hs_exception_filter(struct _EXCEPTION_POINTERS *exception_data);
// prototypes to the functions doing the registration and unregistration of the VEH handlers
void __register_hs_exception_handler( void );
void __unregister_hs_exception_handler( void );
+
+// prototypes for dump methods.
+void generateDump(EXCEPTION_POINTERS* pExceptionPointers);
+void generateStack (EXCEPTION_POINTERS* pExceptionPointers);
diff --git a/rts/xxhash.c b/rts/xxhash.c
new file mode 100644
index 0000000000..833b99f3b2
--- /dev/null
+++ b/rts/xxhash.c
@@ -0,0 +1,888 @@
+/*
+* xxHash - Fast Hash algorithm
+* Copyright (C) 2012-2016, Yann Collet
+*
+* BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following disclaimer
+* in the documentation and/or other materials provided with the
+* distribution.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+* You can contact the author at :
+* - xxHash homepage: http://www.xxhash.com
+* - xxHash source repository : https://github.com/Cyan4973/xxHash
+*/
+
+
+/* *************************************
+* Tuning parameters
+***************************************/
+/*!XXH_FORCE_MEMORY_ACCESS :
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
+ * It can generate buggy code on targets which do not support unaligned memory accesses.
+ * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://stackoverflow.com/a/32095106/646947 for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
+# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+# define XXH_FORCE_MEMORY_ACCESS 2
+# elif defined(__INTEL_COMPILER) || \
+ (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+# define XXH_FORCE_MEMORY_ACCESS 1
+# endif
+#endif
+
+/*!XXH_ACCEPT_NULL_INPUT_POINTER :
+ * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
+ * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
+ * By default, this option is disabled. To enable it, uncomment below define :
+ */
+/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */
+
+/*!XXH_FORCE_NATIVE_FORMAT :
+ * By default, xxHash library provides endian-independent Hash values, based on little-endian convention.
+ * Results are therefore identical for little-endian and big-endian CPU.
+ * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
+ * Should endian-independence be of no importance for your application, you may set the #define below to 1,
+ * to improve speed for Big-endian CPU.
+ * This option has no impact on Little_Endian CPU.
+ */
+#ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */
+# define XXH_FORCE_NATIVE_FORMAT 0
+#endif
+
+/*!XXH_FORCE_ALIGN_CHECK :
+ * This is a minor performance trick, only useful with lots of very small keys.
+ * It means : check for aligned/unaligned input.
+ * The check costs one initial branch per hash;
+ * set it to 0 when the input is guaranteed to be aligned,
+ * or when alignment doesn't matter for performance.
+ */
+#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
+# if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
+# define XXH_FORCE_ALIGN_CHECK 0
+# else
+# define XXH_FORCE_ALIGN_CHECK 1
+# endif
+#endif
+
+
+/* *************************************
+* Includes & Memory related functions
+***************************************/
+/*! Modify the local functions below should you wish to use some other memory routines
+* for malloc(), free() */
+#include <stdlib.h>
+static void* XXH_malloc(size_t s) { return malloc(s); }
+static void XXH_free (void* p) { free(p); }
+/*! and for memcpy() */
+#include <string.h>
+static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
+
+#define XXH_STATIC_LINKING_ONLY
+#include "xxhash.h"
+
+
+/* *************************************
+* Compiler Specific Options
+***************************************/
+#ifdef _MSC_VER /* Visual Studio */
+# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
+# define FORCE_INLINE static __forceinline
+#else
+# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
+# ifdef __GNUC__
+# define FORCE_INLINE static inline __attribute__((always_inline))
+# else
+# define FORCE_INLINE static inline
+# endif
+# else
+# define FORCE_INLINE static
+# endif /* __STDC_VERSION__ */
+#endif
+
+
+/* *************************************
+* Basic Types
+***************************************/
+#ifndef MEM_MODULE
+# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# include <stdint.h>
+ typedef uint8_t BYTE;
+ typedef uint16_t U16;
+ typedef uint32_t U32;
+# else
+ typedef unsigned char BYTE;
+ typedef unsigned short U16;
+ typedef unsigned int U32;
+# endif
+#endif
+
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
+
+/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
+static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; }
+
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U32 u32; } __attribute__((packed)) unalign;
+static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+
+#else
+
+/* portable and safe solution. Generally efficient.
+ * see : http://stackoverflow.com/a/32095106/646947
+ */
+static U32 XXH_read32(const void* memPtr)
+{
+ U32 val;
+ memcpy(&val, memPtr, sizeof(val));
+ return val;
+}
+
+#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
+
+
+/* ****************************************
+* Compiler-specific Functions and Macros
+******************************************/
+#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
+#if defined(_MSC_VER)
+# define XXH_rotl32(x,r) _rotl(x,r)
+# define XXH_rotl64(x,r) _rotl64(x,r)
+#else
+# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
+#endif
+
+#if defined(_MSC_VER) /* Visual Studio */
+# define XXH_swap32 _byteswap_ulong
+#elif XXH_GCC_VERSION >= 403
+# define XXH_swap32 __builtin_bswap32
+#else
+static U32 XXH_swap32 (U32 x)
+{
+ return ((x << 24) & 0xff000000 ) |
+ ((x << 8) & 0x00ff0000 ) |
+ ((x >> 8) & 0x0000ff00 ) |
+ ((x >> 24) & 0x000000ff );
+}
+#endif
+
+
+/* *************************************
+* Architecture Macros
+***************************************/
+typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
+
+/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
+#ifndef XXH_CPU_LITTLE_ENDIAN
+ static const int g_one = 1;
+# define XXH_CPU_LITTLE_ENDIAN (*(const char*)(&g_one))
+#endif
+
+
+/* ***************************
+* Memory reads
+*****************************/
+typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
+
+FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
+ if (align==XXH_unaligned)
+ return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
+ else
+ return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
+}
+
+FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
+{
+ return XXH_readLE32_align(ptr, endian, XXH_unaligned);
+}
+
+static U32 XXH_readBE32(const void* ptr)
+{
+ return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
+}
+
+
+/* *************************************
+* Macros
+***************************************/
+#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
+XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
+
+
+/* *******************************************************************
+* 32-bits hash functions
+*********************************************************************/
+static const U32 PRIME32_1 = 2654435761U;
+static const U32 PRIME32_2 = 2246822519U;
+static const U32 PRIME32_3 = 3266489917U;
+static const U32 PRIME32_4 = 668265263U;
+static const U32 PRIME32_5 = 374761393U;
+
+static U32 XXH32_round(U32 seed, U32 input)
+{
+ seed += input * PRIME32_2;
+ seed = XXH_rotl32(seed, 13);
+ seed *= PRIME32_1;
+ return seed;
+}
+
+FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
+{
+ const BYTE* p = (const BYTE*)input;
+ const BYTE* bEnd = p + len;
+ U32 h32;
+#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+ if (p==NULL) {
+ len=0;
+ bEnd=p=(const BYTE*)(size_t)16;
+ }
+#endif
+
+ if (len>=16) {
+ const BYTE* const limit = bEnd - 16;
+ U32 v1 = seed + PRIME32_1 + PRIME32_2;
+ U32 v2 = seed + PRIME32_2;
+ U32 v3 = seed + 0;
+ U32 v4 = seed - PRIME32_1;
+
+ do {
+ v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4;
+ v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4;
+ v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4;
+ v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4;
+ } while (p<=limit);
+
+ h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
+ } else {
+ h32 = seed + PRIME32_5;
+ }
+
+ h32 += (U32) len;
+
+ while (p+4<=bEnd) {
+ h32 += XXH_get32bits(p) * PRIME32_3;
+ h32 = XXH_rotl32(h32, 17) * PRIME32_4 ;
+ p+=4;
+ }
+
+ while (p<bEnd) {
+ h32 += (*p) * PRIME32_5;
+ h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
+ p++;
+ }
+
+ h32 ^= h32 >> 15;
+ h32 *= PRIME32_2;
+ h32 ^= h32 >> 13;
+ h32 *= PRIME32_3;
+ h32 ^= h32 >> 16;
+
+ return h32;
+}
+
+
+XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed)
+{
+#if 0
+ /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+ XXH32_state_t state;
+ XXH32_reset(&state, seed);
+ XXH32_update(&state, input, len);
+ return XXH32_digest(&state);
+#else
+ XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+ if (XXH_FORCE_ALIGN_CHECK) {
+ if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+ else
+ return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+ } }
+
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+ else
+ return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+
+
+/*====== Hash streaming ======*/
+
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
+{
+ return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
+}
+XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
+{
+ XXH_free(statePtr);
+ return XXH_OK;
+}
+
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
+{
+ memcpy(dstState, srcState, sizeof(*dstState));
+}
+
+XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed)
+{
+ XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
+ memset(&state, 0, sizeof(state)-4); /* do not write into reserved, for future removal */
+ state.v1 = seed + PRIME32_1 + PRIME32_2;
+ state.v2 = seed + PRIME32_2;
+ state.v3 = seed + 0;
+ state.v4 = seed - PRIME32_1;
+ memcpy(statePtr, &state, sizeof(state));
+ return XXH_OK;
+}
+
+
+FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian)
+{
+ const BYTE* p = (const BYTE*)input;
+ const BYTE* const bEnd = p + len;
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+ if (input==NULL) return XXH_ERROR;
+#endif
+
+ state->total_len_32 += (unsigned)len;
+ state->large_len |= (len>=16) | (state->total_len_32>=16);
+
+ if (state->memsize + len < 16) { /* fill in tmp buffer */
+ XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
+ state->memsize += (unsigned)len;
+ return XXH_OK;
+ }
+
+ if (state->memsize) { /* some data left from previous update */
+ XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
+ { const U32* p32 = state->mem32;
+ state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++;
+ state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++;
+ state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++;
+ state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian));
+ }
+ p += 16-state->memsize;
+ state->memsize = 0;
+ }
+
+ if (p <= bEnd-16) {
+ const BYTE* const limit = bEnd - 16;
+ U32 v1 = state->v1;
+ U32 v2 = state->v2;
+ U32 v3 = state->v3;
+ U32 v4 = state->v4;
+
+ do {
+ v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4;
+ v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4;
+ v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4;
+ v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4;
+ } while (p<=limit);
+
+ state->v1 = v1;
+ state->v2 = v2;
+ state->v3 = v3;
+ state->v4 = v4;
+ }
+
+ if (p < bEnd) {
+ XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
+ state->memsize = (unsigned)(bEnd-p);
+ }
+
+ return XXH_OK;
+}
+
+XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
+{
+ XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
+ else
+ return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+
+
+FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian)
+{
+ const BYTE * p = (const BYTE*)state->mem32;
+ const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize;
+ U32 h32;
+
+ if (state->large_len) {
+ h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
+ } else {
+ h32 = state->v3 /* == seed */ + PRIME32_5;
+ }
+
+ h32 += state->total_len_32;
+
+ while (p+4<=bEnd) {
+ h32 += XXH_readLE32(p, endian) * PRIME32_3;
+ h32 = XXH_rotl32(h32, 17) * PRIME32_4;
+ p+=4;
+ }
+
+ while (p<bEnd) {
+ h32 += (*p) * PRIME32_5;
+ h32 = XXH_rotl32(h32, 11) * PRIME32_1;
+ p++;
+ }
+
+ h32 ^= h32 >> 15;
+ h32 *= PRIME32_2;
+ h32 ^= h32 >> 13;
+ h32 *= PRIME32_3;
+ h32 ^= h32 >> 16;
+
+ return h32;
+}
+
+
+XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in)
+{
+ XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH32_digest_endian(state_in, XXH_littleEndian);
+ else
+ return XXH32_digest_endian(state_in, XXH_bigEndian);
+}
+
+
+/*====== Canonical representation ======*/
+
+/*! Default XXH result types are basic unsigned 32 and 64 bits.
+* The canonical representation follows human-readable write convention, aka big-endian (large digits first).
+* These functions allow transformation of hash result into and from its canonical format.
+* This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs.
+*/
+
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
+{
+ XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
+ if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
+ memcpy(dst, &hash, sizeof(*dst));
+}
+
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
+{
+ return XXH_readBE32(src);
+}
+
+
+#ifndef XXH_NO_LONG_LONG
+
+/* *******************************************************************
+* 64-bits hash functions
+*********************************************************************/
+
+/*====== Memory access ======*/
+
+#ifndef MEM_MODULE
+# define MEM_MODULE
+# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# include <stdint.h>
+ typedef uint64_t U64;
+# else
+ typedef unsigned long long U64; /* if your compiler doesn't support unsigned long long, replace by another 64-bit type here. Note that xxhash.h will also need to be updated. */
+# endif
+#endif
+
+
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
+
+/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
+static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign64;
+static U64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; }
+
+#else
+
+/* portable and safe solution. Generally efficient.
+ * see : http://stackoverflow.com/a/32095106/646947
+ */
+
+static U64 XXH_read64(const void* memPtr)
+{
+ U64 val;
+ memcpy(&val, memPtr, sizeof(val));
+ return val;
+}
+
+#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
+
+#if defined(_MSC_VER) /* Visual Studio */
+# define XXH_swap64 _byteswap_uint64
+#elif XXH_GCC_VERSION >= 403
+# define XXH_swap64 __builtin_bswap64
+#else
+static U64 XXH_swap64 (U64 x)
+{
+ return ((x << 56) & 0xff00000000000000ULL) |
+ ((x << 40) & 0x00ff000000000000ULL) |
+ ((x << 24) & 0x0000ff0000000000ULL) |
+ ((x << 8) & 0x000000ff00000000ULL) |
+ ((x >> 8) & 0x00000000ff000000ULL) |
+ ((x >> 24) & 0x0000000000ff0000ULL) |
+ ((x >> 40) & 0x000000000000ff00ULL) |
+ ((x >> 56) & 0x00000000000000ffULL);
+}
+#endif
+
+FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
+ if (align==XXH_unaligned)
+ return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
+ else
+ return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
+}
+
+FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
+{
+ return XXH_readLE64_align(ptr, endian, XXH_unaligned);
+}
+
+static U64 XXH_readBE64(const void* ptr)
+{
+ return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
+}
+
+
+/*====== xxh64 ======*/
+
+static const U64 PRIME64_1 = 11400714785074694791ULL;
+static const U64 PRIME64_2 = 14029467366897019727ULL;
+static const U64 PRIME64_3 = 1609587929392839161ULL;
+static const U64 PRIME64_4 = 9650029242287828579ULL;
+static const U64 PRIME64_5 = 2870177450012600261ULL;
+
+static U64 XXH64_round(U64 acc, U64 input)
+{
+ acc += input * PRIME64_2;
+ acc = XXH_rotl64(acc, 31);
+ acc *= PRIME64_1;
+ return acc;
+}
+
+static U64 XXH64_mergeRound(U64 acc, U64 val)
+{
+ val = XXH64_round(0, val);
+ acc ^= val;
+ acc = acc * PRIME64_1 + PRIME64_4;
+ return acc;
+}
+
+FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
+{
+ const BYTE* p = (const BYTE*)input;
+ const BYTE* bEnd = p + len;
+ U64 h64;
+#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+ if (p==NULL) {
+ len=0;
+ bEnd=p=(const BYTE*)(size_t)32;
+ }
+#endif
+
+ if (len>=32) {
+ const BYTE* const limit = bEnd - 32;
+ U64 v1 = seed + PRIME64_1 + PRIME64_2;
+ U64 v2 = seed + PRIME64_2;
+ U64 v3 = seed + 0;
+ U64 v4 = seed - PRIME64_1;
+
+ do {
+ v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8;
+ v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8;
+ v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8;
+ v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8;
+ } while (p<=limit);
+
+ h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+ h64 = XXH64_mergeRound(h64, v1);
+ h64 = XXH64_mergeRound(h64, v2);
+ h64 = XXH64_mergeRound(h64, v3);
+ h64 = XXH64_mergeRound(h64, v4);
+
+ } else {
+ h64 = seed + PRIME64_5;
+ }
+
+ h64 += (U64) len;
+
+ while (p+8<=bEnd) {
+ U64 const k1 = XXH64_round(0, XXH_get64bits(p));
+ h64 ^= k1;
+ h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
+ p+=8;
+ }
+
+ if (p+4<=bEnd) {
+ h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
+ h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+ p+=4;
+ }
+
+ while (p<bEnd) {
+ h64 ^= (*p) * PRIME64_5;
+ h64 = XXH_rotl64(h64, 11) * PRIME64_1;
+ p++;
+ }
+
+ h64 ^= h64 >> 33;
+ h64 *= PRIME64_2;
+ h64 ^= h64 >> 29;
+ h64 *= PRIME64_3;
+ h64 ^= h64 >> 32;
+
+ return h64;
+}
+
+
+XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
+{
+#if 0
+ /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+ XXH64_state_t state;
+ XXH64_reset(&state, seed);
+ XXH64_update(&state, input, len);
+ return XXH64_digest(&state);
+#else
+ XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+ if (XXH_FORCE_ALIGN_CHECK) {
+ if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+ else
+ return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+ } }
+
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+ else
+ return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+/*====== Hash Streaming ======*/
+
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
+{
+ return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
+}
+XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
+{
+ XXH_free(statePtr);
+ return XXH_OK;
+}
+
+XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState)
+{
+ memcpy(dstState, srcState, sizeof(*dstState));
+}
+
+XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed)
+{
+ XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
+ memset(&state, 0, sizeof(state)-8); /* do not write into reserved, for future removal */
+ state.v1 = seed + PRIME64_1 + PRIME64_2;
+ state.v2 = seed + PRIME64_2;
+ state.v3 = seed + 0;
+ state.v4 = seed - PRIME64_1;
+ memcpy(statePtr, &state, sizeof(state));
+ return XXH_OK;
+}
+
+FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian)
+{
+ const BYTE* p = (const BYTE*)input;
+ const BYTE* const bEnd = p + len;
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+ if (input==NULL) return XXH_ERROR;
+#endif
+
+ state->total_len += len;
+
+ if (state->memsize + len < 32) { /* fill in tmp buffer */
+ XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
+ state->memsize += (U32)len;
+ return XXH_OK;
+ }
+
+ if (state->memsize) { /* tmp buffer is full */
+ XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
+ state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian));
+ state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian));
+ state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian));
+ state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian));
+ p += 32-state->memsize;
+ state->memsize = 0;
+ }
+
+ if (p+32 <= bEnd) {
+ const BYTE* const limit = bEnd - 32;
+ U64 v1 = state->v1;
+ U64 v2 = state->v2;
+ U64 v3 = state->v3;
+ U64 v4 = state->v4;
+
+ do {
+ v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8;
+ v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8;
+ v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8;
+ v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8;
+ } while (p<=limit);
+
+ state->v1 = v1;
+ state->v2 = v2;
+ state->v3 = v3;
+ state->v4 = v4;
+ }
+
+ if (p < bEnd) {
+ XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
+ state->memsize = (unsigned)(bEnd-p);
+ }
+
+ return XXH_OK;
+}
+
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
+{
+ XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
+ else
+ return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian)
+{
+ const BYTE * p = (const BYTE*)state->mem64;
+ const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize;
+ U64 h64;
+
+ if (state->total_len >= 32) {
+ U64 const v1 = state->v1;
+ U64 const v2 = state->v2;
+ U64 const v3 = state->v3;
+ U64 const v4 = state->v4;
+
+ h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+ h64 = XXH64_mergeRound(h64, v1);
+ h64 = XXH64_mergeRound(h64, v2);
+ h64 = XXH64_mergeRound(h64, v3);
+ h64 = XXH64_mergeRound(h64, v4);
+ } else {
+ h64 = state->v3 + PRIME64_5;
+ }
+
+ h64 += (U64) state->total_len;
+
+ while (p+8<=bEnd) {
+ U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian));
+ h64 ^= k1;
+ h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
+ p+=8;
+ }
+
+ if (p+4<=bEnd) {
+ h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1;
+ h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+ p+=4;
+ }
+
+ while (p<bEnd) {
+ h64 ^= (*p) * PRIME64_5;
+ h64 = XXH_rotl64(h64, 11) * PRIME64_1;
+ p++;
+ }
+
+ h64 ^= h64 >> 33;
+ h64 *= PRIME64_2;
+ h64 ^= h64 >> 29;
+ h64 *= PRIME64_3;
+ h64 ^= h64 >> 32;
+
+ return h64;
+}
+
+XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in)
+{
+ XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH64_digest_endian(state_in, XXH_littleEndian);
+ else
+ return XXH64_digest_endian(state_in, XXH_bigEndian);
+}
+
+
+/*====== Canonical representation ======*/
+
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
+{
+ XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
+ if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
+ memcpy(dst, &hash, sizeof(*dst));
+}
+
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
+{
+ return XXH_readBE64(src);
+}
+
+#endif /* XXH_NO_LONG_LONG */
diff --git a/rts/xxhash.h b/rts/xxhash.h
new file mode 100644
index 0000000000..9d831e03b3
--- /dev/null
+++ b/rts/xxhash.h
@@ -0,0 +1,293 @@
+/*
+ xxHash - Extremely Fast Hash algorithm
+ Header File
+ Copyright (C) 2012-2016, Yann Collet.
+
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ You can contact the author at :
+ - xxHash source repository : https://github.com/Cyan4973/xxHash
+*/
+
+/* Notice extracted from xxHash homepage :
+
+xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
+It also successfully passes all tests from the SMHasher suite.
+
+Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
+
+Name Speed Q.Score Author
+xxHash 5.4 GB/s 10
+CrapWow 3.2 GB/s 2 Andrew
+MumurHash 3a 2.7 GB/s 10 Austin Appleby
+SpookyHash 2.0 GB/s 10 Bob Jenkins
+SBox 1.4 GB/s 9 Bret Mulvey
+Lookup3 1.2 GB/s 9 Bob Jenkins
+SuperFastHash 1.2 GB/s 1 Paul Hsieh
+CityHash64 1.05 GB/s 10 Pike & Alakuijala
+FNV 0.55 GB/s 5 Fowler, Noll, Vo
+CRC32 0.43 GB/s 9
+MD5-32 0.33 GB/s 10 Ronald L. Rivest
+SHA1-32 0.28 GB/s 10
+
+Q.Score is a measure of quality of the hash function.
+It depends on successfully passing SMHasher test set.
+10 is a perfect score.
+
+A 64-bits version, named XXH64, is available since r35.
+It offers much better speed, but for 64-bits applications only.
+Name Speed on 64 bits Speed on 32 bits
+XXH64 13.8 GB/s 1.9 GB/s
+XXH32 6.8 GB/s 6.0 GB/s
+*/
+
+#ifndef XXHASH_H_5627135585666179
+#define XXHASH_H_5627135585666179 1
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* ****************************
+* Definitions
+******************************/
+#include <stddef.h> /* size_t */
+typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
+
+
+/* ****************************
+* API modifier
+******************************/
+/** XXH_PRIVATE_API
+* This is useful to include xxhash functions in `static` mode
+* in order to inline them, and remove their symbol from the public list.
+* Methodology :
+* #define XXH_PRIVATE_API
+* #include "xxhash.h"
+* `xxhash.c` is automatically included.
+* It's not useful to compile and link it as a separate module.
+*/
+#ifdef XXH_PRIVATE_API
+# ifndef XXH_STATIC_LINKING_ONLY
+# define XXH_STATIC_LINKING_ONLY
+# endif
+# if defined(__GNUC__)
+# define XXH_PUBLIC_API static __inline __attribute__((unused))
+# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# define XXH_PUBLIC_API static inline
+# elif defined(_MSC_VER)
+# define XXH_PUBLIC_API static __inline
+# else
+# define XXH_PUBLIC_API static /* this version may generate warnings for unused static functions; disable the relevant warning */
+# endif
+#else
+# define XXH_PUBLIC_API /* do nothing */
+#endif /* XXH_PRIVATE_API */
+
+/*!XXH_NAMESPACE, aka Namespace Emulation :
+
+If you want to include _and expose_ xxHash functions from within your own library,
+but also want to avoid symbol collisions with other libraries which may also include xxHash,
+
+you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
+with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values).
+
+Note that no change is required within the calling program as long as it includes `xxhash.h` :
+regular symbol name will be automatically translated by this header.
+*/
+#ifdef XXH_NAMESPACE
+# define XXH_CAT(A,B) A##B
+# define XXH_NAME2(A,B) XXH_CAT(A,B)
+# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
+# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
+# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
+# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
+# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
+# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
+# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
+# define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
+# define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
+# define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
+# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
+# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
+# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
+# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
+# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
+# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
+# define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
+# define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
+# define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
+#endif
+
+
+/* *************************************
+* Version
+***************************************/
+#define XXH_VERSION_MAJOR 0
+#define XXH_VERSION_MINOR 6
+#define XXH_VERSION_RELEASE 2
+#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
+XXH_PUBLIC_API unsigned XXH_versionNumber (void);
+
+
+/*-**********************************************************************
+* 32-bits hash
+************************************************************************/
+typedef unsigned int XXH32_hash_t;
+
+/*! XXH32() :
+ Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
+ The memory between input & input+length must be valid (allocated and read-accessible).
+ "seed" can be used to alter the result predictably.
+ Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */
+XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed);
+
+/*====== Streaming ======*/
+typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
+XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr);
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
+
+XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed);
+XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
+
+/*
+These functions generate the xxHash of an input provided in multiple segments.
+Note that, for small input, they are slower than single-call functions, due to state management.
+For small input, prefer `XXH32()` and `XXH64()` .
+
+XXH state must first be allocated, using XXH*_createState() .
+
+Start a new hash by initializing state with a seed, using XXH*_reset().
+
+Then, feed the hash state by calling XXH*_update() as many times as necessary.
+Obviously, input must be allocated and read accessible.
+The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
+
+Finally, a hash value can be produced anytime, by using XXH*_digest().
+This function returns the nn-bits hash as an int or long long.
+
+It's still possible to continue inserting input into the hash state after a digest,
+and generate some new hashes later on, by calling again XXH*_digest().
+
+When done, free XXH state space if it was allocated dynamically.
+*/
+
+/*====== Canonical representation ======*/
+
+typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
+
+/* Default result type for XXH functions are primitive unsigned 32 and 64 bits.
+* The canonical representation uses human-readable write convention, aka big-endian (large digits first).
+* These functions allow transformation of hash result into and from its canonical format.
+* This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
+*/
+
+
+#ifndef XXH_NO_LONG_LONG
+/*-**********************************************************************
+* 64-bits hash
+************************************************************************/
+typedef unsigned long long XXH64_hash_t;
+
+/*! XXH64() :
+ Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
+ "seed" can be used to alter the result predictably.
+ This function runs faster on 64-bits systems, but slower on 32-bits systems (see benchmark).
+*/
+XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed);
+
+/*====== Streaming ======*/
+typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
+XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr);
+XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
+
+XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed);
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr);
+
+/*====== Canonical representation ======*/
+typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
+#endif /* XXH_NO_LONG_LONG */
+
+
+#ifdef XXH_STATIC_LINKING_ONLY
+
+/* ================================================================================================
+ This section contains definitions which are not guaranteed to remain stable.
+ They may change in future versions, becoming incompatible with a different version of the library.
+ They shall only be used with static linking.
+ Never use these definitions in association with dynamic linking !
+=================================================================================================== */
+
+/* These definitions are only meant to make possible
+ static allocation of XXH state, on stack or in a struct for example.
+ Never use members directly. */
+
+struct XXH32_state_s {
+ unsigned total_len_32;
+ unsigned large_len;
+ unsigned v1;
+ unsigned v2;
+ unsigned v3;
+ unsigned v4;
+ unsigned mem32[4]; /* buffer defined as U32 for alignment */
+ unsigned memsize;
+ unsigned reserved; /* never read nor write, will be removed in a future version */
+}; /* typedef'd to XXH32_state_t */
+
+#ifndef XXH_NO_LONG_LONG /* remove 64-bits support */
+struct XXH64_state_s {
+ unsigned long long total_len;
+ unsigned long long v1;
+ unsigned long long v2;
+ unsigned long long v3;
+ unsigned long long v4;
+ unsigned long long mem64[4]; /* buffer defined as U64 for alignment */
+ unsigned memsize;
+ unsigned reserved[2]; /* never read nor write, will be removed in a future version */
+}; /* typedef'd to XXH64_state_t */
+#endif
+
+#ifdef XXH_PRIVATE_API
+# include "xxhash.c" /* include xxhash function bodies as `static`, for inlining */
+#endif
+
+#endif /* XXH_STATIC_LINKING_ONLY */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* XXHASH_H_5627135585666179 */