summaryrefslogtreecommitdiff
path: root/rts/parallel
diff options
context:
space:
mode:
authorSimon Marlow <simonmar@microsoft.com>2006-04-07 02:05:11 +0000
committerSimon Marlow <simonmar@microsoft.com>2006-04-07 02:05:11 +0000
commit0065d5ab628975892cea1ec7303f968c3338cbe1 (patch)
tree8e2afe0ab48ee33cf95009809d67c9649573ef92 /rts/parallel
parent28a464a75e14cece5db40f2765a29348273ff2d2 (diff)
downloadhaskell-0065d5ab628975892cea1ec7303f968c3338cbe1.tar.gz
Reorganisation of the source tree
Most of the other users of the fptools build system have migrated to Cabal, and with the move to darcs we can now flatten the source tree without losing history, so here goes. The main change is that the ghc/ subdir is gone, and most of what it contained is now at the top level. The build system now makes no pretense at being multi-project, it is just the GHC build system. No doubt this will break many things, and there will be a period of instability while we fix the dependencies. A straightforward build should work, but I haven't yet fixed binary/source distributions. Changes to the Building Guide will follow, too.
Diffstat (limited to 'rts/parallel')
-rw-r--r--rts/parallel/0Hash.c320
-rw-r--r--rts/parallel/0Parallel.h414
-rw-r--r--rts/parallel/0Unpack.c440
-rw-r--r--rts/parallel/Dist.c117
-rw-r--r--rts/parallel/Dist.h20
-rw-r--r--rts/parallel/FetchMe.h24
-rw-r--r--rts/parallel/FetchMe.hc180
-rw-r--r--rts/parallel/Global.c1090
-rw-r--r--rts/parallel/GranSim.c3015
-rw-r--r--rts/parallel/GranSimRts.h268
-rw-r--r--rts/parallel/HLC.h63
-rw-r--r--rts/parallel/HLComms.c1810
-rw-r--r--rts/parallel/LLC.h130
-rw-r--r--rts/parallel/LLComms.c489
-rw-r--r--rts/parallel/PEOpCodes.h58
-rw-r--r--rts/parallel/Pack.c4293
-rw-r--r--rts/parallel/ParInit.c322
-rw-r--r--rts/parallel/ParInit.h19
-rw-r--r--rts/parallel/ParTicky.c450
-rw-r--r--rts/parallel/ParTicky.h60
-rw-r--r--rts/parallel/ParTypes.h38
-rw-r--r--rts/parallel/Parallel.c1140
-rw-r--r--rts/parallel/ParallelDebug.c1955
-rw-r--r--rts/parallel/ParallelDebug.h79
-rw-r--r--rts/parallel/ParallelRts.h253
-rw-r--r--rts/parallel/RBH.c337
-rw-r--r--rts/parallel/SysMan.c650
27 files changed, 18034 insertions, 0 deletions
diff --git a/rts/parallel/0Hash.c b/rts/parallel/0Hash.c
new file mode 100644
index 0000000000..a471e30a66
--- /dev/null
+++ b/rts/parallel/0Hash.c
@@ -0,0 +1,320 @@
+/*-----------------------------------------------------------------------------
+ *
+ * (c) The AQUA Project, Glasgow University, 1995-1998
+ * (c) The GHC Team, 1999
+ *
+ * Dynamically expanding linear hash tables, as described in
+ * Per-\AAke Larson, ``Dynamic Hash Tables,'' CACM 31(4), April 1988,
+ * pp. 446 -- 457.
+ * -------------------------------------------------------------------------- */
+
+/*
+ Replaced with ghc/rts/Hash.c in the new RTS
+*/
+
+#if 0
+
+#include "Rts.h"
+#include "Hash.h"
+#include "RtsUtils.h"
+
+#define HSEGSIZE 1024 /* Size of a single hash table segment */
+ /* Also the minimum size of a hash table */
+#define HDIRSIZE 1024 /* Size of the segment directory */
+ /* Maximum hash table size is HSEGSIZE * HDIRSIZE */
+#define HLOAD 5 /* Maximum average load of a single hash bucket */
+
+#define HCHUNK (1024 * sizeof(W_) / sizeof(HashList))
+ /* Number of HashList cells to allocate in one go */
+
+
+/* Linked list of (key, data) pairs for separate chaining */
+struct hashlist {
+ StgWord key;
+ void *data;
+ struct hashlist *next; /* Next cell in bucket chain (same hash value) */
+};
+
+typedef struct hashlist HashList;
+
+struct hashtable {
+ int split; /* Next bucket to split when expanding */
+ int max; /* Max bucket of smaller table */
+ int mask1; /* Mask for doing the mod of h_1 (smaller table) */
+ int mask2; /* Mask for doing the mod of h_2 (larger table) */
+ int kcount; /* Number of keys */
+ int bcount; /* Number of buckets */
+ HashList **dir[HDIRSIZE]; /* Directory of segments */
+};
+
+/* -----------------------------------------------------------------------------
+ * Hash first using the smaller table. If the bucket is less than the
+ * next bucket to be split, re-hash using the larger table.
+ * -------------------------------------------------------------------------- */
+
+static int
+hash(HashTable *table, W_ key)
+{
+ int bucket;
+
+ /* Strip the boring zero bits */
+ key /= sizeof(StgWord);
+
+ /* Mod the size of the hash table (a power of 2) */
+ bucket = key & table->mask1;
+
+ if (bucket < table->split) {
+ /* Mod the size of the expanded hash table (also a power of 2) */
+ bucket = key & table->mask2;
+ }
+ return bucket;
+}
+
+/* -----------------------------------------------------------------------------
+ * Allocate a new segment of the dynamically growing hash table.
+ * -------------------------------------------------------------------------- */
+
+static void
+allocSegment(HashTable *table, int segment)
+{
+ table->dir[segment] = stgMallocBytes(HSEGSIZE * sizeof(HashList *),
+ "allocSegment");
+}
+
+
+/* -----------------------------------------------------------------------------
+ * Expand the larger hash table by one bucket, and split one bucket
+ * from the smaller table into two parts. Only the bucket referenced
+ * by @table->split@ is affected by the expansion.
+ * -------------------------------------------------------------------------- */
+
+static void
+expand(HashTable *table)
+{
+ int oldsegment;
+ int oldindex;
+ int newbucket;
+ int newsegment;
+ int newindex;
+ HashList *hl;
+ HashList *next;
+ HashList *old, *new;
+
+ if (table->split + table->max >= HDIRSIZE * HSEGSIZE)
+ /* Wow! That's big. Too big, so don't expand. */
+ return;
+
+ /* Calculate indices of bucket to split */
+ oldsegment = table->split / HSEGSIZE;
+ oldindex = table->split % HSEGSIZE;
+
+ newbucket = table->max + table->split;
+
+ /* And the indices of the new bucket */
+ newsegment = newbucket / HSEGSIZE;
+ newindex = newbucket % HSEGSIZE;
+
+ if (newindex == 0)
+ allocSegment(table, newsegment);
+
+ if (++table->split == table->max) {
+ table->split = 0;
+ table->max *= 2;
+ table->mask1 = table->mask2;
+ table->mask2 = table->mask2 << 1 | 1;
+ }
+ table->bcount++;
+
+ /* Split the bucket, paying no attention to the original order */
+
+ old = new = NULL;
+ for (hl = table->dir[oldsegment][oldindex]; hl != NULL; hl = next) {
+ next = hl->next;
+ if (hash(table, hl->key) == newbucket) {
+ hl->next = new;
+ new = hl;
+ } else {
+ hl->next = old;
+ old = hl;
+ }
+ }
+ table->dir[oldsegment][oldindex] = old;
+ table->dir[newsegment][newindex] = new;
+
+ return;
+}
+
+void *
+lookupHashTable(HashTable *table, StgWord key)
+{
+ int bucket;
+ int segment;
+ int index;
+ HashList *hl;
+
+ bucket = hash(table, key);
+ segment = bucket / HSEGSIZE;
+ index = bucket % HSEGSIZE;
+
+ for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next)
+ if (hl->key == key)
+ return hl->data;
+
+ /* It's not there */
+ return NULL;
+}
+
+/* -----------------------------------------------------------------------------
+ * We allocate the hashlist cells in large chunks to cut down on malloc
+ * overhead. Although we keep a free list of hashlist cells, we make
+ * no effort to actually return the space to the malloc arena.
+ * -------------------------------------------------------------------------- */
+
+static HashList *freeList = NULL;
+
+static HashList *
+allocHashList(void)
+{
+ HashList *hl, *p;
+
+ if ((hl = freeList) != NULL) {
+ freeList = hl->next;
+ } else {
+ hl = stgMallocBytes(HCHUNK * sizeof(HashList), "allocHashList");
+
+ freeList = hl + 1;
+ for (p = freeList; p < hl + HCHUNK - 1; p++)
+ p->next = p + 1;
+ p->next = NULL;
+ }
+ return hl;
+}
+
+static void
+freeHashList(HashList *hl)
+{
+ hl->next = freeList;
+ freeList = hl;
+}
+
+void
+insertHashTable(HashTable *table, StgWord key, void *data)
+{
+ int bucket;
+ int segment;
+ int index;
+ HashList *hl;
+
+ /* We want no duplicates */
+ ASSERT(lookupHashTable(table, key) == NULL);
+
+ /* When the average load gets too high, we expand the table */
+ if (++table->kcount >= HLOAD * table->bcount)
+ expand(table);
+
+ bucket = hash(table, key);
+ segment = bucket / HSEGSIZE;
+ index = bucket % HSEGSIZE;
+
+ hl = allocHashList();
+
+ hl->key = key;
+ hl->data = data;
+ hl->next = table->dir[segment][index];
+ table->dir[segment][index] = hl;
+
+}
+
+void *
+removeHashTable(HashTable *table, StgWord key, void *data)
+{
+ int bucket;
+ int segment;
+ int index;
+ HashList *hl;
+ HashList *prev = NULL;
+
+ bucket = hash(table, key);
+ segment = bucket / HSEGSIZE;
+ index = bucket % HSEGSIZE;
+
+ for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) {
+ if (hl->key == key && (data == NULL || hl->data == data)) {
+ if (prev == NULL)
+ table->dir[segment][index] = hl->next;
+ else
+ prev->next = hl->next;
+ table->kcount--;
+ return hl->data;
+ }
+ prev = hl;
+ }
+
+ /* It's not there */
+ ASSERT(data == NULL);
+ return NULL;
+}
+
+/* -----------------------------------------------------------------------------
+ * When we free a hash table, we are also good enough to free the
+ * data part of each (key, data) pair, as long as our caller can tell
+ * us how to do it.
+ * -------------------------------------------------------------------------- */
+
+void
+freeHashTable(HashTable *table, void (*freeDataFun)(void *) )
+{
+ long segment;
+ long index;
+ HashList *hl;
+ HashList *next;
+
+ /* The last bucket with something in it is table->max + table->split - 1 */
+ segment = (table->max + table->split - 1) / HSEGSIZE;
+ index = (table->max + table->split - 1) % HSEGSIZE;
+
+ while (segment >= 0) {
+ while (index >= 0) {
+ for (hl = table->dir[segment][index]; hl != NULL; hl = next) {
+ next = hl->next;
+ if (freeDataFun != NULL)
+ (*freeDataFun)(hl->data);
+ freeHashList(hl);
+ }
+ index--;
+ }
+ free(table->dir[segment]);
+ segment--;
+ index = HSEGSIZE - 1;
+ }
+ free(table);
+}
+
+/* -----------------------------------------------------------------------------
+ * When we initialize a hash table, we set up the first segment as well,
+ * initializing all of the first segment's hash buckets to NULL.
+ * -------------------------------------------------------------------------- */
+
+HashTable *
+allocHashTable(void)
+{
+ HashTable *table;
+ HashList **hb;
+
+ table = stgMallocBytes(sizeof(HashTable),"allocHashTable");
+
+ allocSegment(table, 0);
+
+ for (hb = table->dir[0]; hb < table->dir[0] + HSEGSIZE; hb++)
+ *hb = NULL;
+
+ table->split = 0;
+ table->max = HSEGSIZE;
+ table->mask1 = HSEGSIZE - 1;
+ table->mask2 = 2 * HSEGSIZE - 1;
+ table->kcount = 0;
+ table->bcount = HSEGSIZE;
+
+ return table;
+}
+#endif
diff --git a/rts/parallel/0Parallel.h b/rts/parallel/0Parallel.h
new file mode 100644
index 0000000000..d52bf00fc2
--- /dev/null
+++ b/rts/parallel/0Parallel.h
@@ -0,0 +1,414 @@
+/*
+ Time-stamp: <Mon Oct 04 1999 14:50:28 Stardate: [-30]3692.88 hwloidl>
+
+ Definitions for parallel machines.
+
+This section contains definitions applicable only to programs compiled
+to run on a parallel machine, i.e. on GUM. Some of these definitions
+are also used when simulating parallel execution, i.e. on GranSim.
+ */
+
+/*
+ ToDo: Check the PAR specfic part of this file
+ Move stuff into Closures.h and ClosureMacros.h
+ Clean-up GRAN specific code
+ -- HWL
+ */
+
+#ifndef PARALLEL_H
+#define PARALLEL_H
+
+#if defined(PAR) || defined(GRAN) /* whole file */
+
+#include "Rts.h"
+#include "GranSim.h"
+//#include "ClosureTypes.h"
+
+//@menu
+//* Basic definitions::
+//* Externs and types::
+//* Dummy defs::
+//* Par specific fixed headers::
+//* Parallel only heap objects::
+//* Packing definitions::
+//* End of File::
+//@end menu
+//*/
+
+//@node Basic definitions, Externs and types
+//@section Basic definitions
+
+/* SET_PAR_HDR and SET_STATIC_PAR_HDR now live in ClosureMacros.h */
+
+/* Needed for dumping routines */
+#if defined(PAR)
+# define TIME ullong
+# define CURRENT_TIME msTime()
+# define TIME_ON_PROC(p) msTime()
+# define CURRENT_PROC thisPE
+# define BINARY_STATS RtsFlags.ParFlags.granSimStats_Binary
+#elif defined(GRAN)
+# define TIME rtsTime
+# define CURRENT_TIME CurrentTime[CurrentProc]
+# define TIME_ON_PROC(p) CurrentTime[p]
+# define CURRENT_PROC CurrentProc
+# define BINARY_STATS RtsFlags.GranFlags.granSimStats_Binary
+#endif
+
+#if defined(PAR)
+# define MAX_PES 256 /* Maximum number of processors */
+ /* MAX_PES is enforced by SysMan, which does not
+ allow more than this many "processors".
+ This is important because PackGA [GlobAddr.lc]
+ **assumes** that a PE# can fit in 8+ bits.
+ */
+#endif
+
+//@node Externs and types, Dummy defs, Basic definitions
+//@section Externs and types
+
+#if defined(PAR)
+/* GUM: one spark queue on each PE, and each PE sees only its own spark queue */
+extern rtsSparkQ pending_sparks_hd;
+extern rtsSparkQ pending_sparks_tl;
+#elif defined(GRAN)
+/* GranSim: a globally visible array of spark queues */
+extern rtsSparkQ pending_sparks_hds[];
+extern rtsSparkQ pending_sparks_tls[];
+#endif
+extern unsigned int /* nat */ spark_queue_len(PEs proc);
+
+extern StgInt SparksAvail; /* How many sparks are available */
+
+/* prototypes of spark routines */
+/* ToDo: check whether all have to be visible -- HWL */
+#if defined(GRAN)
+rtsSpark *newSpark(StgClosure *node, StgInt name, StgInt gran_info, StgInt size_info, StgInt par_info, StgInt local);
+void disposeSpark(rtsSpark *spark);
+void disposeSparkQ(rtsSparkQ spark);
+void add_to_spark_queue(rtsSpark *spark);
+void delete_from_spark_queue (rtsSpark *spark);
+#endif
+
+#define STATS_FILENAME_MAXLEN 128
+
+/* Where to write the log file */
+//extern FILE *gr_file;
+extern char gr_filename[STATS_FILENAME_MAXLEN];
+
+#if defined(GRAN)
+int init_gr_simulation(char *rts_argv[], int rts_argc, char *prog_argv[], int prog_argc);
+void end_gr_simulation(void);
+#endif
+
+#if defined(PAR)
+extern I_ do_sp_profile;
+
+extern P_ PendingFetches;
+extern GLOBAL_TASK_ID *PEs;
+
+extern rtsBool IAmMainThread, GlobalStopPending;
+extern rtsBool fishing;
+extern GLOBAL_TASK_ID SysManTask;
+extern int seed; /*pseudo-random-number generator seed:*/
+ /*Initialised in ParInit*/
+extern I_ threadId; /*Number of Threads that have existed on a PE*/
+extern GLOBAL_TASK_ID mytid;
+
+extern int nPEs;
+
+extern rtsBool InGlobalGC; /* Are we in the midst of performing global GC */
+
+extern HashTable *pGAtoGALAtable;
+extern HashTable *LAtoGALAtable;
+extern GALA *freeIndirections;
+extern GALA *liveIndirections;
+extern GALA *freeGALAList;
+extern GALA *liveRemoteGAs;
+extern int thisPE;
+
+void RunParallelSystem (StgPtr program_closure);
+void initParallelSystem();
+void SynchroniseSystem();
+
+void registerTask (GLOBAL_TASK_ID gtid);
+globalAddr *LAGAlookup (P_ addr);
+P_ GALAlookup (globalAddr *ga);
+globalAddr *MakeGlobal (P_ addr, rtsBool preferred);
+globalAddr *setRemoteGA (P_ addr, globalAddr *ga, rtsBool preferred);
+void splitWeight (globalAddr *to, globalAddr *from);
+globalAddr *addWeight (globalAddr *ga);
+void initGAtables();
+W_ taskIDtoPE (GLOBAL_TASK_ID gtid);
+void RebuildLAGAtable();
+
+void *lookupHashTable (HashTable *table, StgWord key);
+void insertHashTable (HashTable *table, StgWord key, void *data);
+void freeHashTable (HashTable *table, void (*freeDataFun) ((void *data)));
+HashTable *allocHashTable();
+void *removeHashTable (HashTable *table, StgWord key, void *data);
+#endif /* PAR */
+
+/* Interface for dumping routines (i.e. writing to log file) */
+void DumpGranEvent(GranEventType name, StgTSO *tso);
+void DumpRawGranEvent(PEs proc, PEs p, GranEventType name,
+ StgTSO *tso, StgClosure *node, StgInt sparkname, StgInt len);
+//void DumpEndEvent(PEs proc, StgTSO *tso, rtsBool mandatory_thread);
+
+//@node Dummy defs, Par specific fixed headers, Externs and types
+//@section Dummy defs
+
+/*
+Get this out of the way. These are all null definitions.
+*/
+
+
+//# define GA_HDR_SIZE 0
+//# define GA(closure) /*nothing */
+
+//# define SET_GA(closure,ga) /* nothing */
+//# define SET_STATIC_GA(closure) /* nothing */
+//# define SET_GRAN_HDR(closure,pe) /* nothing */
+//# define SET_STATIC_PROCS(closure) /* nothing */
+
+//# define SET_TASK_ACTIVITY(act) /* nothing */
+
+#if defined(GRAN)
+
+# define GA_HDR_SIZE 1
+
+# define PROCS_HDR_POSN PAR_HDR_POSN
+# define PROCS_HDR_SIZE 1
+
+/* Accessing components of the field */
+# define PROCS(closure) ((closure)->header.gran.procs)
+/* SET_PROCS is now SET_GRAN_HEADER in ClosureMacros.h. */
+#endif
+
+
+//@node Par specific fixed headers, Parallel only heap objects, Dummy defs
+//@section Par specific fixed headers
+
+/*
+Definitions relating to the entire parallel-only fixed-header field.
+
+On GUM, the global addresses for each local closure are stored in a separate
+hash table, rather then with the closure in the heap. We call @getGA@ to
+look up the global address associated with a local closure (0 is returned
+for local closures that have no global address), and @setGA@ to store a new
+global address for a local closure which did not previously have one.
+*/
+
+#if defined(PAR)
+
+# define GA_HDR_SIZE 0
+
+# define GA(closure) getGA(closure)
+
+# define SET_GA(closure, ga) setGA(closure,ga)
+# define SET_STATIC_GA(closure)
+# define SET_GRAN_HDR(closure,pe)
+# define SET_STATIC_PROCS(closure)
+
+# define MAX_GA_WEIGHT 0 /* Treat as 2^n */
+
+W_ PackGA ((W_, int));
+ /* There was a PACK_GA macro here; but we turned it into the PackGA
+ routine [GlobAddr.lc] (because it needs to do quite a bit of
+ paranoia checking. Phil & Will (95/08)
+ */
+
+/* At the moment, there is no activity profiling for GUM. This may change. */
+# define SET_TASK_ACTIVITY(act) /* nothing */
+#endif
+
+//@node Parallel only heap objects, Packing definitions, Par specific fixed headers
+//@section Parallel only heap objects
+
+// NB: The following definitons are BOTH for GUM and GrAnSim -- HWL
+
+/* All in Closures.h and CLosureMacros.h */
+
+//@node Packing definitions, End of File, Parallel only heap objects
+//@section Packing definitions
+
+//@menu
+//* GUM::
+//* GranSim::
+//@end menu
+//*/
+
+//@node GUM, GranSim, Packing definitions, Packing definitions
+//@subsection GUM
+
+#if defined(PAR)
+/*
+Symbolic constants for the packing code.
+
+This constant defines how many words of data we can pack into a single
+packet in the parallel (GUM) system.
+*/
+
+//@menu
+//* Externs::
+//* Prototypes::
+//* Macros::
+//@end menu
+//*/
+
+//@node Externs, Prototypes, GUM, GUM
+//@subsubsection Externs
+
+extern W_ *PackBuffer; /* size: can be set via option */
+extern long *buffer; /* HWL_ */
+extern W_ *freeBuffer; /* HWL_ */
+extern W_ *packBuffer; /* HWL_ */
+
+extern void InitPackBuffer(STG_NO_ARGS);
+extern void InitMoreBuffers(STG_NO_ARGS);
+extern void InitPendingGABuffer(W_ size);
+extern void AllocClosureQueue(W_ size);
+
+//@node Prototypes, Macros, Externs, GUM
+//@subsubsection Prototypes
+
+void InitPackBuffer();
+P_ PackTSO (P_ tso, W_ *size);
+P_ PackStkO (P_ stko, W_ *size);
+P_ AllocateHeap (W_ size); /* Doesn't belong */
+
+void InitClosureQueue ();
+P_ DeQueueClosure();
+void QueueClosure (P_ closure);
+rtsBool QueueEmpty();
+void PrintPacket (P_ buffer);
+
+P_ get_closure_info (P_ closure, W_ *size, W_ *ptrs, W_ *nonptrs, W_ *vhs, char *type);
+
+rtsBool isOffset (globalAddr *ga),
+ isFixed (globalAddr *ga);
+
+void doGlobalGC();
+
+P_ PackNearbyGraph (P_ closure,W_ *size);
+P_ UnpackGraph (W_ *buffer, globalAddr **gamap, W_ *nGAs);
+
+
+//@node Macros, , Prototypes, GUM
+//@subsubsection Macros
+
+# define PACK_HEAP_REQUIRED \
+ ((RtsFlags.ParFlags.packBufferSize - PACK_HDR_SIZE) / (PACK_GA_SIZE + _FHS) * (SPEC_HS + 2))
+
+# define MAX_GAS (RtsFlags.ParFlags.packBufferSize / PACK_GA_SIZE)
+
+
+# define PACK_GA_SIZE 3 /* Size of a packed GA in words */
+ /* Size of a packed fetch-me in words */
+# define PACK_FETCHME_SIZE (PACK_GA_SIZE + FIXED_HS)
+
+# define PACK_HDR_SIZE 1 /* Words of header in a packet */
+
+# define PACK_PLC_SIZE 2 /* Size of a packed PLC in words */
+
+#endif /* PAR */
+
+//@node GranSim, , GUM, Packing definitions
+//@subsection GranSim
+
+#if defined(GRAN)
+/* ToDo: Check which of the PAR routines are needed in GranSim -- HWL */
+
+//@menu
+//* Types::
+//* Prototypes::
+//* Macros::
+//@end menu
+//*/
+
+//@node Types, Prototypes, GranSim, GranSim
+//@subsubsection Types
+
+typedef struct rtsPackBuffer_ {
+ StgInt /* nat */ size;
+ StgInt /* nat */ unpacked_size;
+ StgTSO *tso;
+ StgClosure **buffer;
+} rtsPackBuffer;
+
+//@node Prototypes, Macros, Types, GranSim
+//@subsubsection Prototypes
+
+
+/* main packing functions */
+/*
+rtsPackBuffer *PackNearbyGraph(StgClosure* closure, StgTSO* tso, nat *packbuffersize);
+rtsPackBuffer *PackOneNode(StgClosure* closure, StgTSO* tso, nat *packbuffersize);
+void PrintPacket(rtsPackBuffer *buffer);
+StgClosure *UnpackGraph(rtsPackBuffer* buffer);
+*/
+/* important auxiliary functions */
+
+//StgInfoTable *get_closure_info(StgClosure* node, nat *size, nat *ptrs, nat *nonptrs, nat *vhs, char *info_hdr_ty);
+int IS_BLACK_HOLE(StgClosure* node);
+StgClosure *IS_INDIRECTION(StgClosure* node);
+int IS_THUNK(StgClosure* closure);
+char *display_info_type(StgClosure* closure, char *str);
+
+/*
+OLD CODE -- HWL
+void InitPackBuffer(void);
+P_ AllocateHeap (W_ size);
+P_ PackNearbyGraph (P_ closure, P_ tso, W_ *packbuffersize);
+P_ PackOneNode (P_ closure, P_ tso, W_ *packbuffersize);
+P_ UnpackGraph (P_ buffer);
+
+void InitClosureQueue (void);
+P_ DeQueueClosure(void);
+void QueueClosure (P_ closure);
+// rtsBool QueueEmpty();
+void PrintPacket (P_ buffer);
+*/
+
+// StgInfoTable *get_closure_info(StgClosure* node, unsigned int /* nat */ *size, unsigned int /* nat */ *ptrs, unsigned int /* nat */ *nonptrs, unsigned int /* nat */ *vhs, char *info_hdr_ty);
+// int /* rtsBool */ IS_BLACK_HOLE(StgClosure* node) ;
+
+//@node Macros, , Prototypes, GranSim
+//@subsubsection Macros
+
+/* These are needed in the packing code to get the size of the packet
+ right. The closures itself are never built in GrAnSim. */
+# define FETCHME_VHS IND_VHS
+# define FETCHME_HS IND_HS
+
+# define FETCHME_GA_LOCN FETCHME_HS
+
+# define FETCHME_CLOSURE_SIZE(closure) IND_CLOSURE_SIZE(closure)
+# define FETCHME_CLOSURE_NoPTRS(closure) 0L
+# define FETCHME_CLOSURE_NoNONPTRS(closure) (IND_CLOSURE_SIZE(closure)-IND_VHS)
+
+# define MAX_GAS (RtsFlags.GranFlags.packBufferSize / PACK_GA_SIZE)
+# define PACK_GA_SIZE 3 /* Size of a packed GA in words */
+ /* Size of a packed fetch-me in words */
+# define PACK_FETCHME_SIZE (PACK_GA_SIZE + FIXED_HS)
+# define PACK_HDR_SIZE 4 /* Words of header in a packet */
+
+# define PACK_HEAP_REQUIRED \
+ (RtsFlags.GranFlags.packBufferSize * sizeofW(StgClosure*) + \
+ 2 * sizeofW(StgInt) + sizeofW(StgTSO*))
+
+# define PACK_FLAG_LOCN 0
+# define PACK_TSO_LOCN 1
+# define PACK_UNPACKED_SIZE_LOCN 2
+# define PACK_SIZE_LOCN 3
+# define MAGIC_PACK_FLAG 0xfabc
+
+#endif /* GRAN */
+
+//@node End of File, , Packing definitions
+//@section End of File
+
+#endif /* defined(PAR) || defined(GRAN) whole file */
+#endif /* Parallel_H */
+
+
diff --git a/rts/parallel/0Unpack.c b/rts/parallel/0Unpack.c
new file mode 100644
index 0000000000..fc4a8e50c3
--- /dev/null
+++ b/rts/parallel/0Unpack.c
@@ -0,0 +1,440 @@
+/*
+ Time-stamp: <Wed Jan 12 2000 13:29:08 Stardate: [-30]4193.85 hwloidl>
+
+ Unpacking closures which have been exported to remote processors
+
+ This module defines routines for unpacking closures in the parallel
+ runtime system (GUM).
+
+ In the case of GrAnSim, this module defines routines for *simulating* the
+ unpacking of closures as it is done in the parallel runtime system.
+*/
+
+/*
+ Code in this file has been merged with Pack.c
+*/
+
+#if 0
+
+//@node Unpacking closures, , ,
+//@section Unpacking closures
+
+//@menu
+//* Includes::
+//* Prototypes::
+//* GUM code::
+//* GranSim Code::
+//* Index::
+//@end menu
+//*/
+
+//@node Includes, Prototypes, Unpacking closures, Unpacking closures
+//@subsection Includes
+
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "GranSimRts.h"
+#include "ParallelRts.h"
+#include "ParallelDebug.h"
+#include "FetchMe.h"
+#include "Storage.h"
+
+//@node Prototypes, GUM code, Includes, Unpacking closures
+//@subsection Prototypes
+
+void InitPacking(void);
+# if defined(PAR)
+void InitPackBuffer(void);
+# endif
+/* Interface for ADT of closure queues */
+void AllocClosureQueue(nat size);
+void InitClosureQueue(void);
+rtsBool QueueEmpty(void);
+void QueueClosure(StgClosure *closure);
+StgClosure *DeQueueClosure(void);
+
+StgPtr AllocateHeap(nat size);
+
+//@node GUM code, GranSim Code, Prototypes, Unpacking closures
+//@subsection GUM code
+
+#if defined(PAR)
+
+//@node Local Definitions, , GUM code, GUM code
+//@subsubsection Local Definitions
+
+//@cindex PendingGABuffer
+static globalAddr *PendingGABuffer;
+/* is initialised in main; */
+
+//@cindex InitPendingGABuffer
+void
+InitPendingGABuffer(size)
+nat size;
+{
+ PendingGABuffer = (globalAddr *)
+ stgMallocBytes((size-PACK_HDR_SIZE)*2*sizeof(globalAddr),
+ "InitPendingGABuffer");
+}
+
+/*
+ @CommonUp@ commons up two closures which we have discovered to be
+ variants of the same object. One is made an indirection to the other. */
+
+//@cindex CommonUp
+void
+CommonUp(StgClosure *src, StgClosure *dst)
+{
+ StgBlockingQueueElement *bqe;
+
+ ASSERT(src != dst);
+ switch (get_itbl(src)->type) {
+ case BLACKHOLE_BQ:
+ bqe = ((StgBlockingQueue *)src)->blocking_queue;
+ break;
+
+ case FETCH_ME_BQ:
+ bqe = ((StgFetchMeBlockingQueue *)src)->blocking_queue;
+ break;
+
+ case RBH:
+ bqe = ((StgRBH *)src)->blocking_queue;
+ break;
+
+ case BLACKHOLE:
+ case FETCH_ME:
+ bqe = END_BQ_QUEUE;
+ break;
+
+ default:
+ /* Don't common up anything else */
+ return;
+ }
+ /* We do not use UPD_IND because that would awaken the bq, too */
+ // UPD_IND(src, dst);
+ updateWithIndirection(get_itbl(src), src, dst);
+ //ASSERT(!IS_BIG_MOTHER(INFO_PTR(dst)));
+ if (bqe != END_BQ_QUEUE)
+ awaken_blocked_queue(bqe, src);
+}
+
+/*
+ @UnpackGraph@ unpacks the graph contained in a message buffer. It
+ returns a pointer to the new graph. The @gamap@ parameter is set to
+ point to an array of (oldGA,newGA) pairs which were created as a result
+ of unpacking the buffer; @nGAs@ is set to the number of GA pairs which
+ were created.
+
+ The format of graph in the pack buffer is as defined in @Pack.lc@. */
+
+//@cindex UnpackGraph
+StgClosure *
+UnpackGraph(packBuffer, gamap, nGAs)
+rtsPackBuffer *packBuffer;
+globalAddr **gamap;
+nat *nGAs;
+{
+ nat size, ptrs, nonptrs, vhs;
+ StgWord **buffer, **bufptr, **slotptr;
+ globalAddr ga, *gaga;
+ StgClosure *closure, *existing,
+ *graphroot, *graph, *parent;
+ StgInfoTable *ip, *oldip;
+ nat bufsize, i,
+ pptr = 0, pptrs = 0, pvhs;
+ char str[80];
+
+ InitPackBuffer(); /* in case it isn't already init'd */
+ graphroot = (StgClosure *)NULL;
+
+ gaga = PendingGABuffer;
+
+ InitClosureQueue();
+
+ /* Unpack the header */
+ bufsize = packBuffer->size;
+ buffer = packBuffer->buffer;
+ bufptr = buffer;
+
+ /* allocate heap */
+ if (bufsize > 0) {
+ graph = allocate(bufsize);
+ ASSERT(graph != NULL);
+ }
+
+ parent = (StgClosure *)NULL;
+
+ do {
+ /* This is where we will ultimately save the closure's address */
+ slotptr = bufptr;
+
+ /* First, unpack the next GA or PLC */
+ ga.weight = (rtsWeight) *bufptr++;
+
+ if (ga.weight > 0) {
+ ga.payload.gc.gtid = (GlobalTaskId) *bufptr++;
+ ga.payload.gc.slot = (int) *bufptr++;
+ } else
+ ga.payload.plc = (StgPtr) *bufptr++;
+
+ /* Now unpack the closure body, if there is one */
+ if (isFixed(&ga)) {
+ /* No more to unpack; just set closure to local address */
+ IF_PAR_DEBUG(pack,
+ belch("Unpacked PLC at %x", ga.payload.plc));
+ closure = ga.payload.plc;
+ } else if (isOffset(&ga)) {
+ /* No more to unpack; just set closure to cached address */
+ ASSERT(parent != (StgClosure *)NULL);
+ closure = (StgClosure *) buffer[ga.payload.gc.slot];
+ } else {
+ /* Now we have to build something. */
+
+ ASSERT(bufsize > 0);
+
+ /*
+ * Close your eyes. You don't want to see where we're looking. You
+ * can't get closure info until you've unpacked the variable header,
+ * but you don't know how big it is until you've got closure info.
+ * So...we trust that the closure in the buffer is organized the
+ * same way as they will be in the heap...at least up through the
+ * end of the variable header.
+ */
+ ip = get_closure_info(bufptr, &size, &ptrs, &nonptrs, &vhs, str);
+
+ /*
+ Remember, the generic closure layout is as follows:
+ +-------------------------------------------------+
+ | FIXED HEADER | VARIABLE HEADER | PTRS | NON-PRS |
+ +-------------------------------------------------+
+ */
+ /* Fill in the fixed header */
+ for (i = 0; i < FIXED_HS; i++)
+ ((StgPtr)graph)[i] = *bufptr++;
+
+ if (ip->type == FETCH_ME)
+ size = ptrs = nonptrs = vhs = 0;
+
+ /* Fill in the packed variable header */
+ for (i = 0; i < vhs; i++)
+ ((StgPtr)graph)[FIXED_HS + i] = *bufptr++;
+
+ /* Pointers will be filled in later */
+
+ /* Fill in the packed non-pointers */
+ for (i = 0; i < nonptrs; i++)
+ ((StgPtr)graph)[FIXED_HS + i + vhs + ptrs] = *bufptr++;
+
+ /* Indirections are never packed */
+ // ASSERT(INFO_PTR(graph) != (W_) Ind_info_TO_USE);
+
+ /* Add to queue for processing */
+ QueueClosure(graph);
+
+ /*
+ * Common up the new closure with any existing closure having the same
+ * GA
+ */
+
+ if ((existing = GALAlookup(&ga)) == NULL) {
+ globalAddr *newGA;
+ /* Just keep the new object */
+ IF_PAR_DEBUG(pack,
+ belch("Unpacking new (%x, %d, %x)\n",
+ ga.payload.gc.gtid, ga.payload.gc.slot, ga.weight));
+
+ closure = graph;
+ newGA = setRemoteGA(graph, &ga, rtsTrue);
+ if (ip->type == FETCH_ME)
+ // FETCHME_GA(closure) = newGA;
+ ((StgFetchMe *)closure)->ga = newGA;
+ } else {
+ /* Two closures, one global name. Someone loses */
+ oldip = get_itbl(existing);
+
+ if ((oldip->type == FETCH_ME || IS_BLACK_HOLE(existing)) &&
+ ip->type != FETCH_ME) {
+
+ /* What we had wasn't worth keeping */
+ closure = graph;
+ CommonUp(existing, graph);
+ } else {
+
+ /*
+ * Either we already had something worthwhile by this name or
+ * the new thing is just another FetchMe. However, the thing we
+ * just unpacked has to be left as-is, or the child unpacking
+ * code will fail. Remember that the way pointer words are
+ * filled in depends on the info pointers of the parents being
+ * the same as when they were packed.
+ */
+ IF_PAR_DEBUG(pack,
+ belch("Unpacking old (%x, %d, %x), keeping %#lx",
+ ga.payload.gc.gtid, ga.payload.gc.slot, ga.weight,
+ existing));
+
+ closure = existing;
+ }
+ /* Pool the total weight in the stored ga */
+ (void) addWeight(&ga);
+ }
+
+ /* Sort out the global address mapping */
+ if ((ip_THUNK(ip) && !ip_UNPOINTED(ip)) ||
+ (ip_MUTABLE(ip) && ip->type != FETCH_ME)) {
+ /* Make up new GAs for single-copy closures */
+ globalAddr *newGA = makeGlobal(closure, rtsTrue);
+
+ ASSERT(closure == graph);
+
+ /* Create an old GA to new GA mapping */
+ *gaga++ = ga;
+ splitWeight(gaga, newGA);
+ ASSERT(gaga->weight == 1L << (BITS_IN(unsigned) - 1));
+ gaga++;
+ }
+ graph += FIXED_HS + (size < MIN_UPD_SIZE ? MIN_UPD_SIZE : size);
+ }
+
+ /*
+ * Set parent pointer to point to chosen closure. If we're at the top of
+ * the graph (our parent is NULL), then we want to arrange to return the
+ * chosen closure to our caller (possibly in place of the allocated graph
+ * root.)
+ */
+ if (parent == NULL)
+ graphroot = closure;
+ else
+ ((StgPtr)parent)[FIXED_HS + pvhs + pptr] = (StgWord) closure;
+
+ /* Save closure pointer for resolving offsets */
+ *slotptr = (StgWord) closure;
+
+ /* Locate next parent pointer */
+ pptr++;
+ while (pptr + 1 > pptrs) {
+ parent = DeQueueClosure();
+
+ if (parent == NULL)
+ break;
+ else {
+ (void) get_closure_info(parent, &size, &pptrs, &nonptrs,
+ &pvhs, str);
+ pptr = 0;
+ }
+ }
+ } while (parent != NULL);
+
+ ASSERT(bufsize == 0 || graph - 1 <= SAVE_Hp);
+
+ *gamap = PendingGABuffer;
+ *nGAs = (gaga - PendingGABuffer) / 2;
+
+ /* ToDo: are we *certain* graphroot has been set??? WDP 95/07 */
+ ASSERT(graphroot!=NULL);
+ return (graphroot);
+}
+#endif /* PAR */
+
+//@node GranSim Code, Index, GUM code, Unpacking closures
+//@subsection GranSim Code
+
+/*
+ For GrAnSim: In general no actual unpacking should be necessary. We just
+ have to walk over the graph and set the bitmasks appropriately. -- HWL */
+
+//@node Unpacking, , GranSim Code, GranSim Code
+//@subsubsection Unpacking
+
+#if defined(GRAN)
+void
+CommonUp(StgClosure *src, StgClosure *dst)
+{
+ barf("CommonUp: should never be entered in a GranSim setup");
+}
+
+/* This code fakes the unpacking of a somewhat virtual buffer */
+StgClosure*
+UnpackGraph(buffer)
+rtsPackBuffer* buffer;
+{
+ nat size, ptrs, nonptrs, vhs,
+ bufptr = 0;
+ StgClosure *closure, *graphroot, *graph;
+ StgInfoTable *ip;
+ StgWord bufsize, unpackedsize,
+ pptr = 0, pptrs = 0, pvhs;
+ StgTSO* tso;
+ char str[240], str1[80];
+ int i;
+
+ bufptr = 0;
+ graphroot = buffer->buffer[0];
+
+ tso = buffer->tso;
+
+ /* Unpack the header */
+ unpackedsize = buffer->unpacked_size;
+ bufsize = buffer->size;
+
+ IF_GRAN_DEBUG(pack,
+ belch("<<< Unpacking <<%d>> (buffer @ %p):\n (root @ %p, PE %d,size=%d), demanded by TSO %d (%p)[PE %d]",
+ buffer->id, buffer, graphroot, where_is(graphroot),
+ bufsize, tso->id, tso,
+ where_is((StgClosure *)tso)));
+
+ do {
+ closure = buffer->buffer[bufptr++]; /* that's all we need for GrAnSim -- HWL */
+
+ /* Actually only ip is needed; rest is useful for TESTING -- HWL */
+ ip = get_closure_info(closure,
+ &size, &ptrs, &nonptrs, &vhs, str);
+
+ IF_GRAN_DEBUG(pack,
+ sprintf(str, "** (%p): Changing bitmask[%s]: 0x%x ",
+ closure, (closure_HNF(closure) ? "NF" : "__"),
+ PROCS(closure)));
+
+ if (ip->type == RBH) {
+ closure->header.gran.procs = PE_NUMBER(CurrentProc); /* Move node */
+
+ IF_GRAN_DEBUG(pack,
+ strcat(str, " (converting RBH) "));
+
+ convertFromRBH(closure); /* In GUM that's done by convertToFetchMe */
+ } else if (IS_BLACK_HOLE(closure)) {
+ closure->header.gran.procs |= PE_NUMBER(CurrentProc); /* Copy node */
+ } else if ( closure->header.gran.procs & PE_NUMBER(CurrentProc) == 0 ) {
+ if (closure_HNF(closure))
+ closure->header.gran.procs |= PE_NUMBER(CurrentProc); /* Copy node */
+ else
+ closure->header.gran.procs = PE_NUMBER(CurrentProc); /* Move node */
+ }
+
+ IF_GRAN_DEBUG(pack,
+ sprintf(str1, "0x%x", PROCS(closure)); strcat(str, str1));
+ IF_GRAN_DEBUG(pack, belch(str));
+
+ } while (bufptr<buffer->size) ; /* (parent != NULL); */
+
+ /* In GrAnSim we allocate pack buffers dynamically! -- HWL */
+ free(buffer->buffer);
+ free(buffer);
+
+ IF_GRAN_DEBUG(pack,
+ belch("PrintGraph of %p is:", graphroot); PrintGraph(graphroot,0));
+
+ return (graphroot);
+}
+#endif /* GRAN */
+#endif
+
+//@node Index, , GranSim Code, Unpacking closures
+//@subsection Index
+
+//@index
+//* CommonUp:: @cindex\s-+CommonUp
+//* InitPendingGABuffer:: @cindex\s-+InitPendingGABuffer
+//* PendingGABuffer:: @cindex\s-+PendingGABuffer
+//* UnpackGraph:: @cindex\s-+UnpackGraph
+//@end index
diff --git a/rts/parallel/Dist.c b/rts/parallel/Dist.c
new file mode 100644
index 0000000000..eeec780716
--- /dev/null
+++ b/rts/parallel/Dist.c
@@ -0,0 +1,117 @@
+#include "Dist.h"
+
+#ifdef DIST /* whole file */
+
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "ParallelRts.h"
+#include "Parallel.h" // nPEs,allPEs,mytid
+#include "HLC.h" //for sendReval
+#include "LLC.h" //for pvm stuff
+#include "FetchMe.h" // for BLOCKED_FETCH_info
+#include "Storage.h" // for recordMutable
+
+/* hopefully the result>0 */
+StgWord32 cGetPECount(void)
+{ return nPEs;
+}
+
+/* return taskID, n is 1..count, n=1 is always the mainPE */
+StgPEId cGetPEId(StgWord32 n)
+{ return allPEs[n-1];
+}
+
+/* return the taskID */
+StgPEId cGetMyPEId(void)
+{ return mytid;
+}
+
+/* return the taskID of the owning PE of an MVar/TSO:
+- MVAR/TSOs get converted to REMOTE_REFs when shipped, and
+ there is no mechanism for using these REMOTE_REFs
+ apart from this code.
+*/
+
+StgPEId cGetCertainOwner(StgClosure *mv)
+{ globalAddr *ga;
+ switch(get_itbl(mv)->type)
+ { case TSO:
+ case MVAR:
+ return mytid; // must be local
+ case REMOTE_REF:
+ ga = LAGAlookup(mv);
+ ASSERT(ga);
+ return ga->payload.gc.gtid; // I know its global address
+ }
+ barf("Dist.c:cGetCertainOwner() wrong closure type %s",info_type(mv));
+}
+
+/* for some additional fun, lets look up a certain host... */
+StgPEId cGetHostOwner(StgByteArray h) //okay h is a C string
+{ int nArch,nHost,nTask,i;
+ StgPEId dtid;
+ struct pvmhostinfo *host;
+ struct pvmtaskinfo *task;
+
+ dtid=0;
+ pvm_config(&nHost,&nArch,&host);
+ for(i=0;i<nHost;i++)
+ if(strcmp(host[i].hi_name,h)==0)
+ { dtid=host[i].hi_tid;
+ break;
+ }
+ if(dtid==0) return 0; // no host of that name
+
+ for(i=0;i<nPEs;i++)
+ { pvm_tasks(allPEs[i],&nTask,&task);
+ ASSERT(nTask==1); //cause we lookup a single task
+ if(task[0].ti_host==dtid)
+ return allPEs[i];
+ }
+ return 0; //know host, put no PE on it
+}
+
+void cRevalIO(StgClosure *job,StgPEId p)
+{ nat size;
+ rtsPackBuffer *buffer=NULL;
+
+ ASSERT(get_itbl(job)->type==MVAR);
+ job=((StgMVar*)job)->value; // extract the job from the MVar
+
+ ASSERT(closure_THUNK(job)); // must be a closure!!!!!
+ ASSERT(p!=mytid);
+
+ buffer = PackNearbyGraph(job, END_TSO_QUEUE, &size,p);
+ ASSERT(buffer != (rtsPackBuffer *)NULL);
+ ASSERT(get_itbl(job)->type==RBH);
+
+ IF_PAR_DEBUG(verbose,
+ belch("@;~) %x doing revalIO to %x\n",
+ mytid,p));
+
+ sendReval(p,size,buffer);
+
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.tot_reval_mess++;
+ }
+
+ /*
+ We turn job into a FETCHME_BQ so that the thread will block
+ when it enters it.
+
+ Note: it will not receive an ACK, thus no GA.
+ */
+
+ ASSERT(get_itbl(job)->type==RBH);
+
+ /* put closure on mutables list, while it is still a RBH */
+ recordMutable((StgMutClosure *)job);
+
+ /* actually turn it into a FETCH_ME_BQ */
+ SET_INFO(job, &FETCH_ME_BQ_info);
+ ((StgFetchMe *)job)->ga = 0; //hope this won't make anyone barf!!!
+ ((StgBlockingQueue*)job)->blocking_queue=END_BQ_QUEUE;
+}
+
+#endif
diff --git a/rts/parallel/Dist.h b/rts/parallel/Dist.h
new file mode 100644
index 0000000000..c67cce2748
--- /dev/null
+++ b/rts/parallel/Dist.h
@@ -0,0 +1,20 @@
+#ifndef __DIST_H
+#define __DIST_H
+
+#ifdef DIST
+
+#include "Rts.h"
+
+typedef StgWord32 StgPEId;
+
+// interface functions for Haskell Language calls
+StgWord32 cGetPECount(void);
+StgPEId cGetPEId(StgWord32 n);
+StgPEId cGetMyPEId(void);
+StgPEId cGetCertainOwner(StgClosure *mv);
+void cRevalIO(StgClosure *job,StgPEId p);
+StgPEId cGetHostOwner(StgByteArray h);
+
+#endif /* DIST */
+
+#endif /* __DIST_H */
diff --git a/rts/parallel/FetchMe.h b/rts/parallel/FetchMe.h
new file mode 100644
index 0000000000..be5cbf6b54
--- /dev/null
+++ b/rts/parallel/FetchMe.h
@@ -0,0 +1,24 @@
+/* -----------------------------------------------------------------------------
+ *
+ * Closure types for the parallel system.
+ *
+ * ---------------------------------------------------------------------------*/
+
+EI_(stg_FETCH_ME_info);
+EF_(stg_FETCH_ME_entry);
+
+EI_(stg_FETCH_ME_BQ_info);
+EF_(stg_FETCH_ME_BQ_entry);
+
+EI_(stg_BLOCKED_FETCH_info);
+EF_(stg_BLOCKED_FETCH_entry);
+
+EI_(stg_REMOTE_REF_info);
+EF_(stg_REMOTE_REF_entry);
+
+EI_(stg_RBH_Save_0_info);
+EF_(stg_RBH_Save_0_entry);
+EI_(stg_RBH_Save_1_info);
+EF_(stg_RBH_Save_1_entry);
+EI_(stg_RBH_Save_2_info);
+EF_(stg_RBH_Save_2_entry);
diff --git a/rts/parallel/FetchMe.hc b/rts/parallel/FetchMe.hc
new file mode 100644
index 0000000000..f142e9e514
--- /dev/null
+++ b/rts/parallel/FetchMe.hc
@@ -0,0 +1,180 @@
+/* ----------------------------------------------------------------------------
+ Time-stamp: <Tue Mar 06 2001 17:01:46 Stardate: [-30]6288.54 hwloidl>
+
+ Entry code for a FETCH_ME closure
+
+ This module defines routines for handling remote pointers (@FetchMe@s)
+ in GUM. It is threaded (@.hc@) because @FetchMe_entry@ will be
+ called during evaluation.
+
+ * --------------------------------------------------------------------------*/
+
+#ifdef PAR /* all of it */
+
+//@menu
+//* Includes::
+//* Info tables::
+//* Index::
+//@end menu
+
+//@node Includes, Info tables
+//@subsection Includes
+
+#include "Stg.h"
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "Storage.h"
+#include "GranSim.h"
+#include "GranSimRts.h"
+#include "Parallel.h"
+#include "ParallelRts.h"
+#include "FetchMe.h"
+#include "HLC.h"
+#include "StgRun.h" /* for StgReturn and register saving */
+
+/* --------------------------------------------------------------------------
+ FETCH_ME closures.
+
+ A FETCH_ME closure represents data that currently resides on
+ another PE. We issue a fetch message, and wait for the data to be
+ retrieved.
+
+ A word on the ptr/nonptr fields in the macros: they are unused at the
+ moment; all closures defined here have constant size (ie. no payload
+ that varies from closure to closure). Therefore, all routines that
+ need to know the size of these closures have to do a sizeofW(StgFetchMe)
+ etc to get the closure size. See get_closure_info(), evacuate() and
+ checkClosure() (using the same fcts for determining the size of the
+ closures would be a good idea; at least it would be a nice step towards
+ making this code bug free).
+ ------------------------------------------------------------------------ */
+
+//@node Info tables, Index, Includes
+//@subsection Info tables
+
+//@cindex FETCH_ME_info
+INFO_TABLE(stg_FETCH_ME_info, stg_FETCH_ME_entry, 0,2, FETCH_ME,, EF_,"FETCH_ME","FETCH_ME");
+//@cindex FETCH_ME_entry
+STGFUN(stg_FETCH_ME_entry)
+{
+ FB_
+ TICK_ENT_BH();
+
+ ASSERT(((StgFetchMe *)R1.p)->ga->payload.gc.gtid != mytid);
+
+ /* Turn the FETCH_ME into a FETCH_ME_BQ, and place the current thread
+ * on the blocking queue.
+ */
+ // ((StgFetchMeBlockingQueue *)R1.cl)->header.info = &FETCH_ME_BQ_info; // does the same as SET_INFO
+ SET_INFO((StgClosure *)R1.cl, &stg_FETCH_ME_BQ_info);
+
+ /* Remember GA as a global var (used in blockThread); NB: not thread safe! */
+ ASSERT(theGlobalFromGA.payload.gc.gtid == (GlobalTaskId)0);
+ theGlobalFromGA = *((StgFetchMe *)R1.p)->ga;
+
+ /* Put ourselves on the blocking queue for this black hole */
+ ASSERT(looks_like_ga(((StgFetchMe *)R1.p)->ga));
+ CurrentTSO->link = END_BQ_QUEUE;
+ ((StgFetchMeBlockingQueue *)R1.cl)->blocking_queue = (StgBlockingQueueElement *)CurrentTSO;
+
+ /* jot down why and on what closure we are blocked */
+ CurrentTSO->why_blocked = BlockedOnGA;
+ CurrentTSO->block_info.closure = R1.cl;
+ /* closure is mutable since something has just been added to its BQ */
+ //recordMutable((StgMutClosure *)R1.cl);
+
+ /* sendFetch etc is now done in blockThread, which is called from the
+ scheduler -- HWL */
+
+ BLOCK_NP(1);
+ FE_
+}
+
+/* ---------------------------------------------------------------------------
+ FETCH_ME_BQ
+
+ On the first entry of a FETCH_ME closure, we turn the closure into
+ a FETCH_ME_BQ, which behaves just like a BLACKHOLE_BQ. Any thread
+ entering the FETCH_ME_BQ will be placed in the blocking queue.
+ When the data arrives from the remote PE, all waiting threads are
+ woken up and the FETCH_ME_BQ is overwritten with the fetched data.
+
+ FETCH_ME_BQ_entry is almost identical to BLACKHOLE_BQ_entry -- HWL
+ ------------------------------------------------------------------------ */
+
+INFO_TABLE(stg_FETCH_ME_BQ_info, stg_FETCH_ME_BQ_entry,0,2,FETCH_ME_BQ,,EF_,"FETCH_ME_BQ","FETCH_ME_BQ");
+//@cindex FETCH_ME_BQ_info
+STGFUN(stg_FETCH_ME_BQ_entry)
+{
+ FB_
+ TICK_ENT_BH();
+
+ /* Put ourselves on the blocking queue for this node */
+ CurrentTSO->link = (StgTSO*)((StgBlockingQueue *)R1.p)->blocking_queue;
+ ((StgBlockingQueue *)R1.p)->blocking_queue = (StgBlockingQueueElement *)CurrentTSO;
+
+ /* jot down why and on what closure we are blocked */
+ CurrentTSO->why_blocked = BlockedOnGA_NoSend;
+ CurrentTSO->block_info.closure = R1.cl;
+
+ /* stg_gen_block is too heavyweight, use a specialised one */
+ BLOCK_NP(1);
+ FE_
+}
+
+/* ---------------------------------------------------------------------------
+ BLOCKED_FETCH_BQ
+
+ A BLOCKED_FETCH closure only ever exists in the blocking queue of a
+ globally visible closure i.e. one with a GA. A BLOCKED_FETCH closure
+ indicates that a TSO on another PE is waiting for the result of this
+ computation. Thus, when updating the closure, the result has to be sent
+ to that PE. The relevant routines handling that are awakenBlockedQueue
+ and blockFetch (for putting BLOCKED_FETCH closure into a BQ).
+ ------------------------------------------------------------------------ */
+
+//@cindex BLOCKED_FETCH_info
+INFO_TABLE(stg_BLOCKED_FETCH_info, stg_BLOCKED_FETCH_entry,0,2,BLOCKED_FETCH,,EF_,"BLOCKED_FETCH","BLOCKED_FETCH");
+//@cindex BLOCKED_FETCH_entry
+STGFUN(stg_BLOCKED_FETCH_entry)
+{
+ FB_
+ /* see NON_ENTERABLE_ENTRY_CODE in StgMiscClosures.hc */
+ STGCALL2(fprintf,stderr,"BLOCKED_FETCH object entered!\n");
+ STGCALL1(shutdownHaskellAndExit, EXIT_FAILURE);
+ FE_
+}
+
+
+/* ---------------------------------------------------------------------------
+ REMOTE_REF
+
+ A REMOTE_REF closure is generated whenever we wish to refer to a sticky
+ object on another PE.
+ ------------------------------------------------------------------------ */
+
+//@cindex REMOTE_REF_info
+INFO_TABLE(stg_REMOTE_REF_info, stg_REMOTE_REF_entry,0,2,REMOTE_REF,,EF_,"REMOTE_REF","REMOTE_REF");
+//@cindex REMOTE_REF_entry
+STGFUN(stg_REMOTE_REF_entry)
+{
+ FB_
+ /* see NON_ENTERABLE_ENTRY_CODE in StgMiscClosures.hc */
+ STGCALL2(fprintf,stderr,"REMOTE REF object entered!\n");
+ STGCALL1(shutdownHaskellAndExit, EXIT_FAILURE);
+ FE_
+}
+
+#endif /* PAR */
+
+//@node Index, , Info tables
+//@subsection Index
+
+//@index
+//* BLOCKED_FETCH_entry:: @cindex\s-+BLOCKED_FETCH_entry
+//* BLOCKED_FETCH_info:: @cindex\s-+BLOCKED_FETCH_info
+//* FETCH_ME_BQ_info:: @cindex\s-+FETCH_ME_BQ_info
+//* FETCH_ME_entry:: @cindex\s-+FETCH_ME_entry
+//* FETCH_ME_info:: @cindex\s-+FETCH_ME_info
+//@end index
diff --git a/rts/parallel/Global.c b/rts/parallel/Global.c
new file mode 100644
index 0000000000..b2541357e1
--- /dev/null
+++ b/rts/parallel/Global.c
@@ -0,0 +1,1090 @@
+/* ---------------------------------------------------------------------------
+ Time-stamp: <Wed Mar 21 2001 16:32:23 Stardate: [-30]6363.44 hwloidl>
+
+ (c) The AQUA/Parade Projects, Glasgow University, 1995
+ The GdH/APART 624 Projects, Heriot-Watt University, Edinburgh, 1999
+
+ Global Address Manipulation.
+
+ The GALA and LAGA tables for mapping global addresses to local addresses
+ (i.e. heap pointers) are defined here. We use the generic hash tables
+ defined in Hash.c.
+ ------------------------------------------------------------------------- */
+
+#ifdef PAR /* whole file */
+
+//@menu
+//* Includes::
+//* Global tables and lists::
+//* Fcts on GALA tables::
+//* Interface to taskId-PE table::
+//* Interface to LAGA table::
+//* Interface to GALA table::
+//* GC functions for GALA tables::
+//* Index::
+//@end menu
+//*/
+
+//@node Includes, Global tables and lists, Global Address Manipulation, Global Address Manipulation
+//@subsection Includes
+
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "Storage.h"
+#include "Hash.h"
+#include "HLC.h"
+#include "ParallelRts.h"
+#if defined(DEBUG)
+# include "Sanity.h"
+#include "ParallelDebug.h"
+#endif
+#if defined(DIST)
+# include "Dist.h"
+#endif
+
+/*
+ @globalAddr@ structures are allocated in chunks to reduce malloc overhead.
+*/
+
+//@node Global tables and lists, Fcts on GALA tables, Includes, Global Address Manipulation
+//@subsection Global tables and lists
+
+//@cindex thisPE
+nat thisPE;
+
+//@menu
+//* Free lists::
+//* Hash tables::
+//@end menu
+
+//@node Free lists, Hash tables, Global tables and lists, Global tables and lists
+//@subsubsection Free lists
+
+/* Free list of GALA entries */
+GALA *freeGALAList = NULL;
+
+/* Number of globalAddr cells to allocate in one go */
+#define GCHUNK (1024 * sizeof(StgWord) / sizeof(GALA))
+
+/* Free list of indirections */
+
+//@cindex nextIndirection
+static StgInt nextIndirection = 0;
+//@cindex freeIndirections
+GALA *freeIndirections = NULL;
+
+/* The list of live indirections has to be marked for GC (see makeGlobal) */
+//@cindex liveIndirections
+GALA *liveIndirections = NULL;
+
+/* The list of remote indirections has to be marked for GC (see setRemoteGA) */
+//@cindex liveRemoteGAs
+GALA *liveRemoteGAs = NULL;
+
+//@node Hash tables, , Free lists, Global tables and lists
+//@subsubsection Hash tables
+
+/* Mapping global task ids PEs */
+//@cindex taskIDtoPEtable
+HashTable *taskIDtoPEtable = NULL;
+
+static int nextPE = 0;
+
+/* LAGA table: StgClosure* -> globalAddr*
+ (Remember: globalAddr = (GlobalTaskId, Slot, Weight))
+ Mapping local to global addresses (see interface below)
+*/
+
+//@cindex LAtoGALAtable
+HashTable *LAtoGALAtable = NULL;
+
+/* GALA table: globalAddr* -> StgClosure*
+ (Remember: globalAddr = (GlobalTaskId, Slot, Weight))
+ Mapping global to local addresses (see interface below)
+*/
+
+//@cindex pGAtoGALAtable
+HashTable *pGAtoGALAtable = NULL;
+
+//@node Fcts on GALA tables, Interface to taskId-PE table, Global tables and lists, Global Address Manipulation
+//@subsection Fcts on GALA tables
+
+//@cindex allocGALA
+static GALA *
+allocGALA(void)
+{
+ GALA *gl, *p;
+
+ if ((gl = freeGALAList) != NULL) {
+ IF_DEBUG(sanity,
+ ASSERT(gl->ga.weight==0xdead0add);
+ ASSERT(gl->la==(StgPtr)0xdead00aa));
+ freeGALAList = gl->next;
+ } else {
+ gl = (GALA *) stgMallocBytes(GCHUNK * sizeof(GALA), "allocGALA");
+
+ freeGALAList = gl + 1;
+ for (p = freeGALAList; p < gl + GCHUNK - 1; p++) {
+ p->next = p + 1;
+ IF_DEBUG(sanity,
+ p->ga.weight=0xdead0add;
+ p->la=(StgPtr)0xdead00aa);
+ }
+ /* last elem in the new block has NULL pointer in link field */
+ p->next = NULL;
+ IF_DEBUG(sanity,
+ p->ga.weight=0xdead0add;
+ p->la=(StgPtr)0xdead00aa);
+ }
+ IF_DEBUG(sanity,
+ gl->ga.weight=0xdead0add;
+ gl->la=(StgPtr)0xdead00aa);
+ return gl;
+}
+
+//@node Interface to taskId-PE table, Interface to LAGA table, Fcts on GALA tables, Global Address Manipulation
+//@subsection Interface to taskId-PE table
+
+/*
+ We don't really like GLOBAL_TASK_ID, so we keep a table of TASK_ID to
+ PE mappings. The idea is that a PE identifier will fit in 16 bits, whereas
+ a TASK_ID may not.
+*/
+
+//@cindex taskIDtoPE
+PEs
+taskIDtoPE(GlobalTaskId gtid)
+{
+ return ((PEs) lookupHashTable(taskIDtoPEtable, gtid));
+}
+
+//@cindex registerTask
+void
+registerTask(GlobalTaskId gtid) {
+ nextPE++; //start counting from 1
+ if (gtid == mytid)
+ thisPE = nextPE;
+
+ insertHashTable(taskIDtoPEtable, gtid, (void *) (StgWord) nextPE);
+}
+
+//@node Interface to LAGA table, Interface to GALA table, Interface to taskId-PE table, Global Address Manipulation
+//@subsection Interface to LAGA table
+
+/*
+ The local address to global address mapping returns a globalAddr structure
+ (pe task id, slot, weight) for any closure in the local heap which has a
+ global identity. Such closures may be copies of normal form objects with
+ a remote `master' location, @FetchMe@ nodes referencing remote objects, or
+ globally visible objects in the local heap (for which we are the master).
+*/
+
+//@cindex LAGAlookup
+globalAddr *
+LAGAlookup(addr)
+StgClosure *addr;
+{
+ GALA *gala;
+
+ /* We never look for GA's on indirections. -- unknown hacker
+ Well, in fact at the moment we do in the new RTS. -- HWL
+ ToDo: unwind INDs when entering them into the hash table
+
+ ASSERT(IS_INDIRECTION(addr) == NULL);
+ */
+ if ((gala = lookupHashTable(LAtoGALAtable, (StgWord) addr)) == NULL)
+ return NULL;
+ else
+ return &(gala->ga);
+}
+
+//@node Interface to GALA table, GC functions for GALA tables, Interface to LAGA table, Global Address Manipulation
+//@subsection Interface to GALA table
+
+/*
+ We also manage a mapping of global addresses to local addresses, so that
+ we can ``common up'' multiple references to the same object as they arrive
+ in data packets from remote PEs.
+
+ The global address to local address mapping is actually managed via a
+ ``packed global address'' to GALA hash table. The packed global
+ address takes the interesting part of the @globalAddr@ structure
+ (i.e. the pe and slot fields) and packs them into a single word
+ suitable for hashing.
+*/
+
+//@cindex GALAlookup
+StgClosure *
+GALAlookup(ga)
+globalAddr *ga;
+{
+ StgWord pga = PackGA(taskIDtoPE(ga->payload.gc.gtid), ga->payload.gc.slot);
+ GALA *gala;
+
+ if ((gala = (GALA *) lookupHashTable(pGAtoGALAtable, pga)) == NULL)
+ return NULL;
+ else {
+ /*
+ * Bypass any indirections when returning a local closure to
+ * the caller. Note that we do not short-circuit the entry in
+ * the GALA tables right now, because we would have to do a
+ * hash table delete and insert in the LAtoGALAtable to keep
+ * that table up-to-date for preferred GALA pairs. That's
+ * probably a bit expensive.
+ */
+ return UNWIND_IND((StgClosure *)(gala->la));
+ }
+}
+
+/* ga becomes non-preferred (e.g. due to CommonUp) */
+void
+GALAdeprecate(ga)
+globalAddr *ga;
+{
+ StgWord pga = PackGA(taskIDtoPE(ga->payload.gc.gtid), ga->payload.gc.slot);
+ GALA *gala;
+
+ gala = (GALA *) lookupHashTable(pGAtoGALAtable, pga);
+ ASSERT(gala!=NULL);
+ ASSERT(gala->preferred==rtsTrue);
+ gala->preferred = rtsFalse;
+}
+
+/*
+ External references to our globally-visible closures are managed through an
+ indirection table. The idea is that the closure may move about as the result
+ of local garbage collections, but its global identity is determined by its
+ slot in the indirection table, which never changes.
+
+ The indirection table is maintained implicitly as part of the global
+ address to local address table. We need only keep track of the
+ highest numbered indirection index allocated so far, along with a free
+ list of lower numbered indices no longer in use.
+*/
+
+/*
+ Allocate an indirection slot for the closure currently at address @addr@.
+*/
+
+//@cindex allocIndirection
+static GALA *
+allocIndirection(StgClosure *closure)
+{
+ GALA *gala;
+
+ if ((gala = freeIndirections) != NULL) {
+ IF_DEBUG(sanity,
+ ASSERT(gala->ga.weight==0xdead0add);
+ ASSERT(gala->la==(StgPtr)0xdead00aa));
+ freeIndirections = gala->next;
+ } else {
+ gala = allocGALA();
+ IF_DEBUG(sanity,
+ ASSERT(gala->ga.weight==0xdead0add);
+ ASSERT(gala->la==(StgPtr)0xdead00aa));
+ gala->ga.payload.gc.gtid = mytid;
+ gala->ga.payload.gc.slot = nextIndirection++;
+ IF_DEBUG(sanity,
+ if (nextIndirection>=MAX_SLOTS)
+ barf("Cannot handle more than %d slots for GA in a sanity-checking setup (this is no error)"));
+ }
+ gala->ga.weight = MAX_GA_WEIGHT;
+ gala->la = (StgPtr)closure;
+ IF_DEBUG(sanity,
+ gala->next=(struct gala *)0xcccccccc);
+ return gala;
+}
+
+/*
+ This is only used for sanity checking (see LOOKS_LIKE_SLOT)
+*/
+StgInt
+highest_slot (void) { return nextIndirection; }
+
+/*
+ Make a local closure globally visible.
+
+ Called from: GlobaliseAndPackGA
+ Args:
+ closure ... closure to be made visible
+ preferred ... should the new GA become the preferred one (normalle=y true)
+
+ Allocate a GALA structure and add it to the (logical) Indirections table,
+ by inserting it into the LAtoGALAtable hash table and putting it onto the
+ liveIndirections list (only if it is preferred).
+
+ We have to allocate an indirection slot for it, and update both the local
+ address to global address and global address to local address maps.
+*/
+
+//@cindex makeGlobal
+globalAddr *
+makeGlobal(closure, preferred)
+StgClosure *closure;
+rtsBool preferred;
+{
+ /* check whether we already have a GA for this local closure */
+ GALA *oldGALA = lookupHashTable(LAtoGALAtable, (StgWord) closure);
+ /* create an entry in the LAGA table */
+ GALA *newGALA = allocIndirection(closure);
+ StgWord pga = PackGA(thisPE, newGALA->ga.payload.gc.slot);
+
+ IF_DEBUG(sanity,
+ ASSERT(newGALA->next==(struct gala *)0xcccccccc););
+ // ASSERT(HEAP_ALLOCED(closure)); // check that closure might point into the heap; might be static, though
+ ASSERT(GALAlookup(&(newGALA->ga)) == NULL);
+
+ /* global statistics gathering */
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.local_alloc_GA++;
+ }
+
+ newGALA->la = (StgPtr)closure;
+ newGALA->preferred = preferred;
+
+ if (preferred) {
+ /* The new GA is now the preferred GA for the LA */
+ if (oldGALA != NULL) {
+ oldGALA->preferred = rtsFalse;
+ (void) removeHashTable(LAtoGALAtable, (StgWord) closure, (void *) oldGALA);
+ }
+ insertHashTable(LAtoGALAtable, (StgWord) closure, (void *) newGALA);
+ }
+
+ ASSERT(!isOnLiveIndTable(&(newGALA->ga)));
+ /* put the new GALA entry on the list of live indirections */
+ newGALA->next = liveIndirections;
+ liveIndirections = newGALA;
+
+ insertHashTable(pGAtoGALAtable, pga, (void *) newGALA);
+
+ return &(newGALA->ga);
+}
+
+/*
+ Assign an existing remote global address to an existing closure.
+
+ Called from: Unpack in Pack.c
+ Args:
+ local_closure ... a closure that has just been unpacked
+ remote_ga ... the GA that came with it, ie. the name under which the
+ closure is known while being transferred
+ preferred ... should the new GA become the preferred one (normalle=y true)
+
+ Allocate a GALA structure and add it to the (logical) RemoteGA table,
+ by inserting it into the LAtoGALAtable hash table and putting it onto the
+ liveRemoteGAs list (only if it is preferred).
+
+ We do not retain the @globalAddr@ structure that's passed in as an argument,
+ so it can be a static in the calling routine.
+*/
+
+//@cindex setRemoteGA
+globalAddr *
+setRemoteGA(local_closure, remote_ga, preferred)
+StgClosure *local_closure;
+globalAddr *remote_ga;
+rtsBool preferred;
+{
+ /* old entry ie the one with the GA generated when sending off the closure */
+ GALA *oldGALA = lookupHashTable(LAtoGALAtable, (StgWord) local_closure);
+ /* alloc new entry and fill it with contents of the newly arrives GA */
+ GALA *newGALA = allocGALA();
+ StgWord pga = PackGA(taskIDtoPE(remote_ga->payload.gc.gtid),
+ remote_ga->payload.gc.slot);
+
+ ASSERT(remote_ga->payload.gc.gtid != mytid);
+ ASSERT(remote_ga->weight > 0);
+ ASSERT(GALAlookup(remote_ga) == NULL);
+
+ newGALA->ga = *remote_ga;
+ newGALA->la = (StgPtr)local_closure;
+ newGALA->preferred = preferred;
+
+ if (preferred) {
+ /* The new GA is now the preferred GA for the LA */
+ if (oldGALA != NULL) {
+ oldGALA->preferred = rtsFalse;
+ (void) removeHashTable(LAtoGALAtable, (StgWord) local_closure, (void *) oldGALA);
+ }
+ insertHashTable(LAtoGALAtable, (StgWord) local_closure, (void *) newGALA);
+ }
+
+ ASSERT(!isOnRemoteGATable(&(newGALA->ga)));
+ /* add new entry to the (logical) RemoteGA table */
+ newGALA->next = liveRemoteGAs;
+ liveRemoteGAs = newGALA;
+
+ insertHashTable(pGAtoGALAtable, pga, (void *) newGALA);
+
+ /*
+ The weight carried by the incoming closure is transferred to the newGALA
+ entry (via the structure assign above). Therefore, we have to give back
+ the weight to the GA on the other processor, because that indirection is
+ no longer needed.
+ */
+ remote_ga->weight = 0;
+ return &(newGALA->ga);
+}
+
+/*
+ Give me a bit of weight to give away on a new reference to a particular
+ global address. If we run down to nothing, we have to assign a new GA.
+*/
+
+//@cindex splitWeight
+#if 0
+void
+splitWeight(to, from)
+globalAddr *to, *from;
+{
+ /* Make sure we have enough weight to split */
+ if (from->weight!=MAX_GA_WEIGHT && from->weight<=3) // fixed by UK in Eden implementation
+ from = makeGlobal(GALAlookup(from), rtsTrue);
+
+ to->payload = from->payload;
+
+ if (from->weight == MAX_GA_WEIGHT)
+ to->weight = 1L << (BITS_IN(unsigned) - 1);
+ else
+ to->weight = from->weight / 2;
+
+ from->weight -= to->weight;
+}
+#else
+void
+splitWeight(to, from)
+globalAddr *to, *from;
+{
+ /* Make sure we have enough weight to split */
+ /* Splitting at 2 needed, as weight 1 is not legal in packets (UK+KH) */
+
+ if (from->weight / 2 <= 2) /* old: weight== 1 (UK) */
+ from = makeGlobal(GALAlookup(from), rtsTrue);
+
+ to->payload = from->payload;
+
+ if (from->weight <= 1) /* old == 0 (UK) */
+ to->weight = 1L << (BITS_IN(unsigned) - 1);
+ else
+ to->weight = from->weight / 2;
+
+ from->weight -= to->weight;
+}
+#endif
+/*
+ Here, I am returning a bit of weight that a remote PE no longer needs.
+*/
+
+//@cindex addWeight
+globalAddr *
+addWeight(ga)
+globalAddr *ga;
+{
+ StgWord pga;
+ GALA *gala;
+
+ ASSERT(LOOKS_LIKE_GA(ga));
+
+ pga = PackGA(taskIDtoPE(ga->payload.gc.gtid), ga->payload.gc.slot);
+ gala = (GALA *) lookupHashTable(pGAtoGALAtable, pga);
+
+ IF_PAR_DEBUG(weight,
+ fprintf(stderr, "@* Adding weight %x to ", ga->weight);
+ printGA(&(gala->ga));
+ fputc('\n', stderr));
+
+ gala->ga.weight += ga->weight;
+ ga->weight = 0;
+
+ return &(gala->ga);
+}
+
+/*
+ Initialize all of the global address structures: the task ID to PE id
+ map, the local address to global address map, the global address to
+ local address map, and the indirection table.
+*/
+
+//@cindex initGAtables
+void
+initGAtables(void)
+{
+ taskIDtoPEtable = allocHashTable();
+ LAtoGALAtable = allocHashTable();
+ pGAtoGALAtable = allocHashTable();
+}
+
+//@cindex PackGA
+StgWord
+PackGA (pe, slot)
+StgWord pe;
+int slot;
+{
+ int pe_shift = (BITS_IN(StgWord)*3)/4;
+ int pe_bits = BITS_IN(StgWord) - pe_shift;
+
+ if ( pe_bits < 8 || slot >= (1L << pe_shift) ) { /* big trouble */
+ fflush(stdout);
+ fprintf(stderr, "PackGA: slot# too big (%d) or not enough pe_bits (%d)\n",
+ slot,pe_bits);
+ stg_exit(EXIT_FAILURE);
+ }
+
+ return((((StgWord)(pe)) << pe_shift) | ((StgWord)(slot)));
+
+ /* the idea is to use 3/4 of the bits (e.g., 24) for indirection-
+ table "slot", and 1/4 for the pe# (e.g., 8).
+
+ We check for too many bits in "slot", and double-check (at
+ compile-time?) that we have enough bits for "pe". We *don't*
+ check for too many bits in "pe", because SysMan enforces a
+ MAX_PEs limit at the very very beginning.
+
+ Phil & Will 95/08
+ */
+}
+
+//@node GC functions for GALA tables, Debugging routines, Interface to GALA table, Global Address Manipulation
+//@subsection GC functions for GALA tables
+
+/*
+ When we do a copying collection, we want to evacuate all of the local
+ entries in the GALA table for which there are outstanding remote
+ pointers (i.e. for which the weight is not MAX_GA_WEIGHT.)
+ This routine has to be run BEFORE doing the GC proper (it's a
+ ``mark roots'' thing).
+*/
+//@cindex markLocalGAs
+void
+markLocalGAs(rtsBool full)
+{
+ GALA *gala, *next, *prev = NULL;
+ StgPtr old_la, new_la;
+ nat n=0, m=0; // debugging only
+ double start_time_GA; // stats only
+
+ IF_PAR_DEBUG(tables,
+ belch("@@%%%% markLocalGAs (full=%d): Marking LIVE INDIRECTIONS in GALA table starting with GALA at %p\n",
+ full, liveIndirections);
+ printLAGAtable());
+
+ PAR_TICKY_MARK_LOCAL_GAS_START();
+
+ for (gala = liveIndirections, m=0; gala != NULL; gala = next, m++) {
+ IF_PAR_DEBUG(tables,
+ fputs("@@ ",stderr);
+ printGA(&(gala->ga));
+ fprintf(stderr, ";@ %d: LA: %p (%s) ",
+ m, (void*)gala->la, info_type((StgClosure*)gala->la)));
+ next = gala->next;
+ old_la = gala->la;
+ ASSERT(gala->ga.payload.gc.gtid == mytid); /* it's supposed to be local */
+ if (gala->ga.weight != MAX_GA_WEIGHT) {
+ /* Remote references exist, so we must evacuate the local closure */
+ if (get_itbl((StgClosure *)old_la)->type == EVACUATED) {
+ /* somebody else already evacuated this closure */
+ new_la = (StgPtr)((StgEvacuated *)old_la)->evacuee;
+ IF_PAR_DEBUG(tables,
+ belch(" already evacuated to %p", new_la));
+ } else {
+#if 1
+ /* unwind any indirections we find */
+ StgClosure *foo = UNWIND_IND((StgClosure *)old_la) ; // debugging only
+ //ASSERT(HEAP_ALLOCED(foo));
+ n++;
+
+ new_la = (StgPtr) MarkRoot(foo);
+ IF_PAR_DEBUG(tables,
+ belch(" evacuated %p to %p", foo, new_la));
+ /* ToDo: is this the right assertion to check that new_la is in to-space?
+ ASSERT(!HEAP_ALLOCED(new_la) || Bdescr(new_la)->evacuated);
+ */
+#else
+ new_la = MarkRoot(old_la); // or just evacuate(old_ga)
+ IF_PAR_DEBUG(tables,
+ belch(" evacuated %p to %p", old_la, new_la));
+#endif
+ }
+
+ gala->la = new_la;
+ /* remove old LA and replace with new LA */
+ if (/* !full && */ gala->preferred && new_la != old_la) {
+ GALA *q;
+ ASSERT(lookupHashTable(LAtoGALAtable, (StgWord)old_la));
+ (void) removeHashTable(LAtoGALAtable, (StgWord) old_la, (void *) gala);
+ if ((q = lookupHashTable(LAtoGALAtable, (StgWord) new_la))!=NULL) {
+ if (q->preferred && gala->preferred) {
+ q->preferred = rtsFalse;
+ IF_PAR_DEBUG(tables,
+ fprintf(stderr, "@@## found hash entry for closure %p (%s): deprecated GA ",
+ new_la, info_type((StgClosure*)new_la));
+ printGA(&(q->ga));
+ fputc('\n', stderr));
+ }
+ } else {
+ insertHashTable(LAtoGALAtable, (StgWord) new_la, (void *) gala);
+ }
+ IF_PAR_DEBUG(tables,
+ belch("__## Hash table update (%p --> %p): ",
+ old_la, new_la));
+ }
+
+ gala->next = prev;
+ prev = gala;
+ } else if(LOOKS_LIKE_STATIC_CLOSURE(gala->la)) {
+ /* to handle the CAFs, is this all?*/
+ MarkRoot(gala->la);
+ IF_PAR_DEBUG(tables,
+ belch(" processed static closure"));
+ n++;
+ gala->next = prev;
+ prev = gala;
+ } else {
+ /* Since we have all of the weight, this GA is no longer needed */
+ StgWord pga = PackGA(thisPE, gala->ga.payload.gc.slot);
+
+ IF_PAR_DEBUG(free,
+ belch("@@!! Freeing slot %d",
+ gala->ga.payload.gc.slot));
+ /* put gala on free indirections list */
+ gala->next = freeIndirections;
+ freeIndirections = gala;
+ (void) removeHashTable(pGAtoGALAtable, pga, (void *) gala);
+ if (/* !full && */ gala->preferred)
+ (void) removeHashTable(LAtoGALAtable, (W_) gala->la, (void *) gala);
+
+ IF_DEBUG(sanity,
+ gala->ga.weight = 0xdead0add;
+ gala->la = (StgPtr) 0xdead00aa);
+ }
+ } /* for gala ... */
+ liveIndirections = prev; /* list has been reversed during the marking */
+
+
+ PAR_TICKY_MARK_LOCAL_GAS_END(n);
+
+ IF_PAR_DEBUG(tables,
+ belch("@@%%%% markLocalGAs: %d of %d GALAs marked on PE %x",
+ n, m, mytid));
+}
+
+/*
+ Traverse the GALA table: for every live remote GA check whether it has been
+ touched during GC; if not it is not needed locally and we can free the
+ closure (i.e. let go of its heap space and send a free message to the
+ PE holding its GA).
+ This routine has to be run AFTER doing the GC proper.
+*/
+void
+rebuildGAtables(rtsBool full)
+{
+ GALA *gala, *next, *prev;
+ StgClosure *closure;
+ nat n = 0, size_GA = 0; // stats only (no. of GAs, and their heap size in bytes)
+
+ IF_PAR_DEBUG(tables,
+ belch("@@%%%% rebuildGAtables (full=%d): rebuilding LIVE REMOTE GAs in GALA table starting with GALA at %p\n",
+ full, liveRemoteGAs));
+
+ PAR_TICKY_REBUILD_GA_TABLES_START();
+
+ prepareFreeMsgBuffers();
+
+ for (gala = liveRemoteGAs, prev = NULL; gala != NULL; gala = next) {
+ IF_PAR_DEBUG(tables,
+ printGA(&(gala->ga)));
+ next = gala->next;
+ ASSERT(gala->ga.payload.gc.gtid != mytid); /* it's supposed to be remote */
+
+ closure = (StgClosure *) (gala->la);
+ IF_PAR_DEBUG(tables,
+ fprintf(stderr, " %p (%s) ",
+ (StgClosure *)closure, info_type(closure)));
+
+ if (/* !full && */ gala->preferred)
+ (void) removeHashTable(LAtoGALAtable, (StgWord) gala->la, (void *) gala);
+
+ /* Follow indirection chains to the end, just in case */
+ // should conform with unwinding in markLocalGAs
+ closure = UNWIND_IND(closure);
+
+ /*
+ If closure has been evacuated it is live; otherwise it's dead and we
+ can nuke the GA attached to it in the LAGA table.
+ This approach also drops global aliases for PLCs.
+ */
+
+ //ASSERT(!HEAP_ALLOCED(closure) || !(Bdescr((StgPtr)closure)->evacuated));
+ if (get_itbl(closure)->type == EVACUATED) {
+ closure = ((StgEvacuated *)closure)->evacuee;
+ IF_PAR_DEBUG(tables,
+ fprintf(stderr, " EVAC %p (%s)\n",
+ closure, info_type(closure)));
+ } else {
+ /* closure is not alive any more, thus remove GA and send free msg */
+ int pe = taskIDtoPE(gala->ga.payload.gc.gtid);
+ StgWord pga = PackGA(pe, gala->ga.payload.gc.slot);
+
+ /* check that the block containing this closure is not in to-space */
+ IF_PAR_DEBUG(tables,
+ fprintf(stderr, " !EVAC %p (%s); sending free to PE %d\n",
+ closure, info_type(closure), pe));
+
+ (void) removeHashTable(pGAtoGALAtable, pga, (void *) gala);
+ freeRemoteGA(pe-1, &(gala->ga)); //-1 cause ids start at 1... not 0
+ gala->next = freeGALAList;
+ freeGALAList = gala;
+ IF_DEBUG(sanity,
+ gala->ga.weight = 0xdead0add;
+ gala->la = (StgPtr)0xdead00aa);
+ continue;
+ }
+ gala->la = (StgPtr)closure;
+ if (/* !full && */ gala->preferred) {
+ GALA *q;
+ if ((q = lookupHashTable(LAtoGALAtable, (StgWord) gala->la))!=NULL) {
+ if (q->preferred && gala->preferred) {
+ q->preferred = rtsFalse;
+ IF_PAR_DEBUG(tables,
+ fprintf(stderr, "@@## found hash entry for closure %p (%s): deprecated GA ",
+ gala->la, info_type((StgClosure*)gala->la));
+ printGA(&(q->ga));
+ fputc('\n', stderr));
+ }
+ } else {
+ insertHashTable(LAtoGALAtable, (StgWord) gala->la, (void *) gala);
+ }
+ }
+ gala->next = prev;
+ prev = gala;
+ /* Global statistics: count GAs and total size
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ StgInfoTable *info;
+ nat size, ptrs, nonptrs, vhs, i;
+ char str[80];
+
+ info = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str);
+
+ size_GA += size ;
+ n++; // stats: count number of GAs we add to the new table
+ }
+ */
+ }
+ liveRemoteGAs = prev; /* list is reversed during marking */
+
+ /* If we have any remaining FREE messages to send off, do so now */
+ sendFreeMessages();
+
+ PAR_TICKY_CNT_FREE_GA();
+
+ IF_DEBUG(sanity,
+ checkFreeGALAList();
+ checkFreeIndirectionsList());
+
+ rebuildLAGAtable();
+
+#if defined(PAR_TICKY)
+ getLAGAtableSize(&n, &size_GA); // determine no of GAs and global heap
+ PAR_TICKY_REBUILD_GA_TABLES_END(n, size_GA); // record these values
+#endif
+
+ IF_PAR_DEBUG(tables,
+ belch("@#%%%% rebuildGAtables: After ReBuilding GALA table starting with GALA at %p",
+ liveRemoteGAs);
+ printLAGAtable());
+}
+
+/*
+ Rebuild the LA->GA table, assuming that the addresses in the GALAs are
+ correct.
+ A word on the lookupHashTable check in both loops:
+ After GC we may end up with 2 preferred GAs for the same LA! For example,
+ if we received a closure whose GA already exists on this PE we CommonUp
+ both closures, making one an indirection to the other. Before GC everything
+ is fine: one preferred GA refers to the IND, the other preferred GA refers
+ to the closure it points to. After GC, however, we have short cutted the
+ IND and suddenly we have 2 preferred GAs for the same closure. We detect
+ this case in the loop below and deprecate one GA, so that we always just
+ have one preferred GA per LA.
+*/
+
+//@cindex rebuildLAGAtable
+void
+rebuildLAGAtable(void)
+{
+ GALA *gala;
+ nat n=0, m=0; // debugging
+
+ /* The old LA->GA table is worthless */
+ freeHashTable(LAtoGALAtable, NULL);
+ LAtoGALAtable = allocHashTable();
+
+ IF_PAR_DEBUG(tables,
+ belch("@@%%%% rebuildLAGAtable: new LAGA table at %p",
+ LAtoGALAtable));
+
+ for (gala = liveIndirections; gala != NULL; gala = gala->next) {
+ n++;
+ if (gala->preferred) {
+ GALA *q;
+ if ((q = lookupHashTable(LAtoGALAtable, (StgWord) gala->la))!=NULL) {
+ if (q->preferred && gala->preferred) {
+ /* this deprecates q (see also GALAdeprecate) */
+ q->preferred = rtsFalse;
+ (void) removeHashTable(LAtoGALAtable, (StgWord) gala->la, (void *)q);
+ IF_PAR_DEBUG(tables,
+ fprintf(stderr, "@@## found hash entry for closure %p (%s): deprecated GA ",
+ gala->la, info_type((StgClosure*)gala->la));
+ printGA(&(q->ga));
+ fputc('\n', stderr));
+ }
+ }
+ insertHashTable(LAtoGALAtable, (StgWord) gala->la, (void *) gala);
+ }
+ }
+
+ for (gala = liveRemoteGAs; gala != NULL; gala = gala->next) {
+ m++;
+ if (gala->preferred) {
+ GALA *q;
+ if ((q = lookupHashTable(LAtoGALAtable, (StgWord) gala->la))!=NULL) {
+ if (q->preferred && gala->preferred) {
+ /* this deprecates q (see also GALAdeprecate) */
+ q->preferred = rtsFalse;
+ (void) removeHashTable(LAtoGALAtable, (StgWord) gala->la, (void *)q);
+ IF_PAR_DEBUG(tables,
+ fprintf(stderr, "@@## found hash entry for closure %p (%s): deprecated GA ",
+ (StgClosure*)gala->la, info_type((StgClosure*)gala->la));
+ printGA(&(q->ga));
+ fputc('\n', stderr));
+ }
+ }
+ insertHashTable(LAtoGALAtable, (StgWord) gala->la, (void *) gala);
+ }
+ }
+
+ IF_PAR_DEBUG(tables,
+ belch("@@%%%% rebuildLAGAtable: inserted %d entries from liveIndirections and %d entries from liveRemoteGAs",
+ n,m));
+}
+
+/*
+ Determine the size of the LAGA and GALA tables.
+ Has to be done after rebuilding the tables.
+ Only used for global statistics gathering.
+*/
+
+//@cindex getLAGAtableSize
+void
+getLAGAtableSize(nat *nP, nat *sizeP)
+{
+ GALA *gala;
+ // nat n=0, tot_size=0;
+ StgClosure *closure;
+ StgInfoTable *info;
+ nat size, ptrs, nonptrs, vhs, i;
+ char str[80];
+ /* IN order to avoid counting closures twice we maintain a hash table
+ of all closures seen so far.
+ ToDo: collect this data while rebuilding the GALA table and make use
+ of the existing hash tables;
+ */
+ HashTable *closureTable; // hash table for closures encountered already
+
+ closureTable = allocHashTable();
+
+ (*nP) = (*sizeP) = 0;
+ for (gala = liveIndirections; gala != NULL; gala = gala->next) {
+ closure = (StgClosure*) gala->la;
+ if (lookupHashTable(closureTable, (StgWord)closure)==NULL) { // not seen yet
+ insertHashTable(closureTable, (StgWord)closure, (void *)1);
+ info = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str);
+ (*sizeP) += size ; // stats: measure total heap size of global closures
+ (*nP)++; // stats: count number of GAs
+ }
+ }
+
+ for (gala = liveRemoteGAs; gala != NULL; gala = gala->next) {
+ closure = (StgClosure*) gala->la;
+ if (lookupHashTable(closureTable, (StgWord)closure)==NULL) { // not seen yet
+ insertHashTable(closureTable, (StgWord)closure, (void *)1);
+ info = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str);
+ (*sizeP) += size ; // stats: measure total heap size of global closures
+ (*nP)++; // stats: count number of GAs
+ }
+ }
+
+ freeHashTable(closureTable, NULL);
+}
+
+//@node Debugging routines, Index, GC functions for GALA tables, Global Address Manipulation
+//@subsection Debugging routines
+
+//@cindex printGA
+void
+printGA (globalAddr *ga)
+{
+ fprintf(stderr, "((%x, %d, %x))",
+ ga->payload.gc.gtid,
+ ga->payload.gc.slot,
+ ga->weight);
+}
+
+//@cindex printGALA
+void
+printGALA (GALA *gala)
+{
+ printGA(&(gala->ga));
+ fprintf(stderr, " -> %p (%s)",
+ (StgClosure*)gala->la, info_type((StgClosure*)gala->la));
+ fprintf(stderr, " %s",
+ (gala->preferred) ? "PREF" : "____");
+}
+
+/*
+ Printing the LA->GA table.
+*/
+
+//@cindex printLiveIndTable
+void
+printLiveIndTable(void)
+{
+ GALA *gala, *q;
+ nat n=0; // debugging
+
+ belch("@@%%%%:: logical LiveIndTable (%p) (liveIndirections=%p):",
+ LAtoGALAtable, liveIndirections);
+
+ for (gala = liveIndirections; gala != NULL; gala = gala->next) {
+ n++;
+ printGALA(gala);
+ /* check whether this gala->la is hashed into the LAGA table */
+ q = lookupHashTable(LAtoGALAtable, (StgWord)(gala->la));
+ fprintf(stderr, "\t%s\n", (q==NULL) ? "...." : (q==gala) ? "====" : "####");
+ //ASSERT(lookupHashTable(LAtoGALAtable, (StgWord)(gala->la)));
+ }
+ belch("@@%%%%:: %d live indirections",
+ n);
+}
+
+void
+printRemoteGATable(void)
+{
+ GALA *gala, *q;
+ nat m=0; // debugging
+
+ belch("@@%%%%:: logical RemoteGATable (%p) (liveRemoteGAs=%p):",
+ LAtoGALAtable, liveRemoteGAs);
+
+ for (gala = liveRemoteGAs; gala != NULL; gala = gala->next) {
+ m++;
+ printGALA(gala);
+ /* check whether this gala->la is hashed into the LAGA table */
+ q = lookupHashTable(LAtoGALAtable, (StgWord)(gala->la));
+ fprintf(stderr, "\t%s\n", (q==NULL) ? "...." : (q==gala) ? "====" : "####");
+ // ASSERT(lookupHashTable(LAtoGALAtable, (StgWord)(gala->la)));
+ }
+ belch("@@%%%%:: %d remote GAs",
+ m);
+}
+
+//@cindex printLAGAtable
+void
+printLAGAtable(void)
+{
+ belch("@@%%: LAGAtable (%p) with liveIndirections=%p, liveRemoteGAs=%p:",
+ LAtoGALAtable, liveIndirections, liveRemoteGAs);
+
+ printLiveIndTable();
+ printRemoteGATable();
+}
+
+/*
+ Check whether a GA is already in a list.
+*/
+rtsBool
+isOnLiveIndTable(globalAddr *ga)
+{
+ GALA *gala;
+
+ for (gala = liveIndirections; gala != NULL; gala = gala->next)
+ if (gala->ga.weight==ga->weight &&
+ gala->ga.payload.gc.slot==ga->payload.gc.slot &&
+ gala->ga.payload.gc.gtid==ga->payload.gc.gtid)
+ return rtsTrue;
+
+ return rtsFalse;
+}
+
+rtsBool
+isOnRemoteGATable(globalAddr *ga)
+{
+ GALA *gala;
+
+ for (gala = liveRemoteGAs; gala != NULL; gala = gala->next)
+ if (gala->ga.weight==ga->weight &&
+ gala->ga.payload.gc.slot==ga->payload.gc.slot &&
+ gala->ga.payload.gc.gtid==ga->payload.gc.gtid)
+ return rtsTrue;
+
+ return rtsFalse;
+}
+
+/*
+ Sanity check for free lists.
+*/
+void
+checkFreeGALAList(void) {
+ GALA *gl;
+
+ for (gl=freeGALAList; gl != NULL; gl=gl->next) {
+ ASSERT(gl->ga.weight==0xdead0add);
+ ASSERT(gl->la==(StgPtr)0xdead00aa);
+ }
+}
+
+void
+checkFreeIndirectionsList(void) {
+ GALA *gl;
+
+ for (gl=freeIndirections; gl != NULL; gl=gl->next) {
+ ASSERT(gl->ga.weight==0xdead0add);
+ ASSERT(gl->la==(StgPtr)0xdead00aa);
+ }
+}
+#endif /* PAR -- whole file */
+
+//@node Index, , Debugging routines, Global Address Manipulation
+//@subsection Index
+
+//@index
+//* DebugPrintLAGAtable:: @cindex\s-+DebugPrintLAGAtable
+//* GALAlookup:: @cindex\s-+GALAlookup
+//* LAGAlookup:: @cindex\s-+LAGAlookup
+//* LAtoGALAtable:: @cindex\s-+LAtoGALAtable
+//* PackGA:: @cindex\s-+PackGA
+//* addWeight:: @cindex\s-+addWeight
+//* allocGALA:: @cindex\s-+allocGALA
+//* allocIndirection:: @cindex\s-+allocIndirection
+//* freeIndirections:: @cindex\s-+freeIndirections
+//* initGAtables:: @cindex\s-+initGAtables
+//* liveIndirections:: @cindex\s-+liveIndirections
+//* liveRemoteGAs:: @cindex\s-+liveRemoteGAs
+//* makeGlobal:: @cindex\s-+makeGlobal
+//* markLocalGAs:: @cindex\s-+markLocalGAs
+//* nextIndirection:: @cindex\s-+nextIndirection
+//* pGAtoGALAtable:: @cindex\s-+pGAtoGALAtable
+//* printGA:: @cindex\s-+printGA
+//* printGALA:: @cindex\s-+printGALA
+//* rebuildLAGAtable:: @cindex\s-+rebuildLAGAtable
+//* registerTask:: @cindex\s-+registerTask
+//* setRemoteGA:: @cindex\s-+setRemoteGA
+//* splitWeight:: @cindex\s-+splitWeight
+//* taskIDtoPE:: @cindex\s-+taskIDtoPE
+//* taskIDtoPEtable:: @cindex\s-+taskIDtoPEtable
+//* thisPE:: @cindex\s-+thisPE
+//@end index
diff --git a/rts/parallel/GranSim.c b/rts/parallel/GranSim.c
new file mode 100644
index 0000000000..b1cc0962be
--- /dev/null
+++ b/rts/parallel/GranSim.c
@@ -0,0 +1,3015 @@
+/*
+ Time-stamp: <Tue Mar 06 2001 00:17:42 Stardate: [-30]6285.06 hwloidl>
+
+ Variables and functions specific to GranSim the parallelism simulator
+ for GPH.
+*/
+
+//@node GranSim specific code, , ,
+//@section GranSim specific code
+
+/*
+ Macros for dealing with the new and improved GA field for simulating
+ parallel execution. Based on @CONCURRENT@ package. The GA field now
+ contains a mask, where the n-th bit stands for the n-th processor, where
+ this data can be found. In case of multiple copies, several bits are
+ set. The total number of processors is bounded by @MAX_PROC@, which
+ should be <= the length of a word in bits. -- HWL
+*/
+
+//@menu
+//* Includes::
+//* Prototypes and externs::
+//* Constants and Variables::
+//* Initialisation::
+//* Global Address Operations::
+//* Global Event Queue::
+//* Spark queue functions::
+//* Scheduling functions::
+//* Thread Queue routines::
+//* GranSim functions::
+//* GranSimLight routines::
+//* Code for Fetching Nodes::
+//* Idle PEs::
+//* Routines directly called from Haskell world::
+//* Emiting profiling info for GrAnSim::
+//* Dumping routines::
+//* Index::
+//@end menu
+
+//@node Includes, Prototypes and externs, GranSim specific code, GranSim specific code
+//@subsection Includes
+
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "StgMiscClosures.h"
+#include "StgTypes.h"
+#include "Schedule.h"
+#include "SchedAPI.h" // for pushClosure
+#include "GranSimRts.h"
+#include "GranSim.h"
+#include "ParallelRts.h"
+#include "ParallelDebug.h"
+#include "Sparks.h"
+#include "Storage.h" // for recordMutable
+
+
+//@node Prototypes and externs, Constants and Variables, Includes, GranSim specific code
+//@subsection Prototypes and externs
+
+#if defined(GRAN)
+
+/* Prototypes */
+static inline PEs ga_to_proc(StgWord);
+static inline rtsBool any_idle(void);
+static inline nat idlers(void);
+ PEs where_is(StgClosure *node);
+
+static rtsBool stealSomething(PEs proc, rtsBool steal_spark, rtsBool steal_thread);
+static rtsBool stealSpark(PEs proc);
+static rtsBool stealThread(PEs proc);
+static rtsBool stealSparkMagic(PEs proc);
+static rtsBool stealThreadMagic(PEs proc);
+/* subsumed by stealSomething
+static void stealThread(PEs proc);
+static void stealSpark(PEs proc);
+*/
+static rtsTime sparkStealTime(void);
+static nat natRandom(nat from, nat to);
+static PEs findRandomPE(PEs proc);
+static void sortPEsByTime (PEs proc, PEs *pes_by_time,
+ nat *firstp, nat *np);
+
+void GetRoots(void);
+
+#endif /* GRAN */
+
+//@node Constants and Variables, Initialisation, Prototypes and externs, GranSim specific code
+//@subsection Constants and Variables
+
+#if defined(GRAN) || defined(PAR)
+/* See GranSim.h for the definition of the enum gran_event_types */
+char *gran_event_names[] = {
+ "START", "START(Q)",
+ "STEALING", "STOLEN", "STOLEN(Q)",
+ "FETCH", "REPLY", "BLOCK", "RESUME", "RESUME(Q)",
+ "SCHEDULE", "DESCHEDULE",
+ "END",
+ "SPARK", "SPARKAT", "USED", "PRUNED", "EXPORTED", "ACQUIRED",
+ "ALLOC",
+ "TERMINATE",
+ "SYSTEM_START", "SYSTEM_END", /* only for debugging */
+ "??"
+};
+#endif
+
+#if defined(GRAN) /* whole file */
+char *proc_status_names[] = {
+ "Idle", "Sparking", "Starting", "Fetching", "Fishing", "Busy",
+ "UnknownProcStatus"
+};
+
+/* For internal use (event statistics) only */
+char *event_names[] =
+ { "ContinueThread", "StartThread", "ResumeThread",
+ "MoveSpark", "MoveThread", "FindWork",
+ "FetchNode", "FetchReply",
+ "GlobalBlock", "UnblockThread"
+ };
+
+//@cindex CurrentProc
+PEs CurrentProc = 0;
+
+/*
+ ToDo: Create a structure for the processor status and put all the
+ arrays below into it.
+ -- HWL */
+
+//@cindex CurrentTime
+/* One clock for each PE */
+rtsTime CurrentTime[MAX_PROC];
+
+/* Useful to restrict communication; cf fishing model in GUM */
+nat OutstandingFetches[MAX_PROC], OutstandingFishes[MAX_PROC];
+
+/* Status of each PE (new since but independent of GranSim Light) */
+rtsProcStatus procStatus[MAX_PROC];
+
+# if defined(GRAN) && defined(GRAN_CHECK)
+/* To check if the RTS ever tries to run a thread that should be blocked
+ because of fetching remote data */
+StgTSO *BlockedOnFetch[MAX_PROC];
+# define FETCH_MASK_TSO 0x08000000 /* only bits 0, 1, 2 should be used */
+# endif
+
+nat SparksAvail = 0; /* How many sparks are available */
+nat SurplusThreads = 0; /* How many excess threads are there */
+
+/* Do we need to reschedule following a fetch? */
+rtsBool NeedToReSchedule = rtsFalse, IgnoreEvents = rtsFalse, IgnoreYields = rtsFalse;
+rtsTime TimeOfNextEvent, TimeOfLastEvent, EndOfTimeSlice; /* checked from the threaded world! */
+
+//@cindex spark queue
+/* GranSim: a globally visible array of spark queues */
+rtsSparkQ pending_sparks_hds[MAX_PROC];
+rtsSparkQ pending_sparks_tls[MAX_PROC];
+
+nat sparksIgnored = 0, sparksCreated = 0;
+
+GlobalGranStats globalGranStats;
+
+nat gran_arith_cost, gran_branch_cost, gran_load_cost,
+ gran_store_cost, gran_float_cost;
+
+/*
+Old comment from 0.29. ToDo: Check and update -- HWL
+
+The following variables control the behaviour of GrAnSim. In general, there
+is one RTS option for enabling each of these features. In getting the
+desired setup of GranSim the following questions have to be answered:
+\begin{itemize}
+\item {\em Which scheduling algorithm} to use (@RtsFlags.GranFlags.DoFairSchedule@)?
+ Currently only unfair scheduling is supported.
+\item What to do when remote data is fetched (@RtsFlags.GranFlags.DoAsyncFetch@)?
+ Either block and wait for the
+ data or reschedule and do some other work.
+ Thus, if this variable is true, asynchronous communication is
+ modelled. Block on fetch mainly makes sense for incremental fetching.
+
+ There is also a simplified fetch variant available
+ (@RtsFlags.GranFlags.SimplifiedFetch@). This variant does not use events to model
+ communication. It is faster but the results will be less accurate.
+\item How aggressive to be in getting work after a reschedule on fetch
+ (@RtsFlags.GranFlags.FetchStrategy@)?
+ This is determined by the so-called {\em fetching
+ strategy\/}. Currently, there are four possibilities:
+ \begin{enumerate}
+ \item Only run a runnable thread.
+ \item Turn a spark into a thread, if necessary.
+ \item Steal a remote spark, if necessary.
+ \item Steal a runnable thread from another processor, if necessary.
+ \end{itemize}
+ The variable @RtsFlags.GranFlags.FetchStrategy@ determines how far to go in this list
+ when rescheduling on a fetch.
+\item Should sparks or threads be stolen first when looking for work
+ (@RtsFlags.GranFlags.DoStealThreadsFirst@)?
+ The default is to steal sparks first (much cheaper).
+\item Should the RTS use a lazy thread creation scheme
+ (@RtsFlags.GranFlags.DoAlwaysCreateThreads@)? By default yes i.e.\ sparks are only
+ turned into threads when work is needed. Also note, that sparks
+ can be discarded by the RTS (this is done in the case of an overflow
+ of the spark pool). Setting @RtsFlags.GranFlags.DoAlwaysCreateThreads@ to @True@ forces
+ the creation of threads at the next possibility (i.e.\ when new work
+ is demanded the next time).
+\item Should data be fetched closure-by-closure or in packets
+ (@RtsFlags.GranFlags.DoBulkFetching@)? The default strategy is a GRIP-like incremental
+ (i.e.\ closure-by-closure) strategy. This makes sense in a
+ low-latency setting but is bad in a high-latency system. Setting
+ @RtsFlags.GranFlags.DoBulkFetching@ to @True@ enables bulk (packet) fetching. Other
+ parameters determine the size of the packets (@pack_buffer_size@) and the number of
+ thunks that should be put into one packet (@RtsFlags.GranFlags.ThunksToPack@).
+\item If there is no other possibility to find work, should runnable threads
+ be moved to an idle processor (@RtsFlags.GranFlags.DoThreadMigration@)? In any case, the
+ RTS tried to get sparks (either local or remote ones) first. Thread
+ migration is very expensive, since a whole TSO has to be transferred
+ and probably data locality becomes worse in the process. Note, that
+ the closure, which will be evaluated next by that TSO is not
+ transferred together with the TSO (that might block another thread).
+\item Should the RTS distinguish between sparks created by local nodes and
+ stolen sparks (@RtsFlags.GranFlags.PreferSparksOfLocalNodes@)? The idea is to improve
+ data locality by preferring sparks of local nodes (it is more likely
+ that the data for those sparks is already on the local processor).
+ However, such a distinction also imposes an overhead on the spark
+ queue management, and typically a large number of sparks are
+ generated during execution. By default this variable is set to @False@.
+\item Should the RTS use granularity control mechanisms? The idea of a
+ granularity control mechanism is to make use of granularity
+ information provided via annotation of the @par@ construct in order
+ to prefer bigger threads when either turning a spark into a thread or
+ when choosing the next thread to schedule. Currently, three such
+ mechanisms are implemented:
+ \begin{itemize}
+ \item Cut-off: The granularity information is interpreted as a
+ priority. If a threshold priority is given to the RTS, then
+ only those sparks with a higher priority than the threshold
+ are actually created. Other sparks are immediately discarded.
+ This is similar to a usual cut-off mechanism often used in
+ parallel programs, where parallelism is only created if the
+ input data is lage enough. With this option, the choice is
+ hidden in the RTS and only the threshold value has to be
+ provided as a parameter to the runtime system.
+ \item Priority Sparking: This mechanism keeps priorities for sparks
+ and chooses the spark with the highest priority when turning
+ a spark into a thread. After that the priority information is
+ discarded. The overhead of this mechanism comes from
+ maintaining a sorted spark queue.
+ \item Priority Scheduling: This mechanism keeps the granularity
+ information for threads, to. Thus, on each reschedule the
+ largest thread is chosen. This mechanism has a higher
+ overhead, as the thread queue is sorted, too.
+ \end{itemize}
+\end{itemize}
+*/
+
+//@node Initialisation, Global Address Operations, Constants and Variables, GranSim specific code
+//@subsection Initialisation
+
+void
+init_gr_stats (void) {
+ memset(&globalGranStats, '\0', sizeof(GlobalGranStats));
+#if 0
+ /* event stats */
+ globalGranStats.noOfEvents = 0;
+ for (i=0; i<MAX_EVENT; i++) globalGranStats.event_counts[i]=0;
+
+ /* communication stats */
+ globalGranStats.fetch_misses = 0;
+ globalGranStats.tot_low_pri_sparks = 0;
+
+ /* obscure stats */
+ globalGranStats.rs_sp_count = 0;
+ globalGranStats.rs_t_count = 0;
+ globalGranStats.ntimes_total = 0,
+ globalGranStats.fl_total = 0;
+ globalGranStats.no_of_steals = 0;
+
+ /* spark queue stats */
+ globalGranStats.tot_sq_len = 0,
+ globalGranStats.tot_sq_probes = 0;
+ globalGranStats.tot_sparks = 0;
+ globalGranStats.withered_sparks = 0;
+ globalGranStats.tot_add_threads = 0;
+ globalGranStats.tot_tq_len = 0;
+ globalGranStats.non_end_add_threads = 0;
+
+ /* thread stats */
+ globalGranStats.tot_threads_created = 0;
+ for (i=0; i<MAX_PROC; i++) globalGranStats.threads_created_on_PE[i]=0;
+#endif /* 0 */
+}
+
+//@node Global Address Operations, Global Event Queue, Initialisation, GranSim specific code
+//@subsection Global Address Operations
+/*
+ ----------------------------------------------------------------------
+ Global Address Operations
+
+ These functions perform operations on the global-address (ga) part of a
+ closure. The ga is the only new field (1 word) in a closure introduced by
+ GrAnSim. It serves as a bitmask, indicating on which processor the
+ closure is residing. Since threads are described by Thread State Object
+ (TSO), which is nothing but another kind of closure, this scheme allows
+ gives placement information about threads.
+
+ A ga is just a bitmask, so the operations on them are mainly bitmask
+ manipulating functions. Note, that there are important macros like PROCS,
+ IS_LOCAL_TO etc. They are defined in @GrAnSim.lh@.
+
+ NOTE: In GrAnSim-light we don't maintain placement information. This
+ allows to simulate an arbitrary number of processors. The price we have
+ to be is the lack of costing any communication properly. In short,
+ GrAnSim-light is meant to reveal the maximal parallelism in a program.
+ From an implementation point of view the important thing is: {\em
+ GrAnSim-light does not maintain global-addresses}. */
+
+/* ga_to_proc returns the first processor marked in the bitmask ga.
+ Normally only one bit in ga should be set. But for PLCs all bits
+ are set. That shouldn't hurt since we only need IS_LOCAL_TO for PLCs */
+
+//@cindex ga_to_proc
+
+static inline PEs
+ga_to_proc(StgWord ga)
+{
+ PEs i;
+ for (i = 0; i < RtsFlags.GranFlags.proc && !IS_LOCAL_TO(ga, i); i++);
+ ASSERT(i<RtsFlags.GranFlags.proc);
+ return (i);
+}
+
+/* NB: This takes a *node* rather than just a ga as input */
+//@cindex where_is
+PEs
+where_is(StgClosure *node)
+{ return (ga_to_proc(PROCS(node))); }
+
+// debugging only
+//@cindex is_unique
+rtsBool
+is_unique(StgClosure *node)
+{
+ PEs i;
+ rtsBool unique = rtsFalse;
+
+ for (i = 0; i < RtsFlags.GranFlags.proc ; i++)
+ if (IS_LOCAL_TO(PROCS(node), i))
+ if (unique) // exactly 1 instance found so far
+ return rtsFalse; // found a 2nd instance => not unique
+ else
+ unique = rtsTrue; // found 1st instance
+ ASSERT(unique); // otherwise returned from within loop
+ return (unique);
+}
+
+//@cindex any_idle
+static inline rtsBool
+any_idle(void) { /* any (map (\ i -> procStatus[i] == Idle)) [0,..,MAX_PROC] */
+ PEs i;
+ rtsBool any_idle;
+ for(i=0, any_idle=rtsFalse;
+ !any_idle && i<RtsFlags.GranFlags.proc;
+ any_idle = any_idle || procStatus[i] == Idle, i++)
+ {} ;
+}
+
+//@cindex idlers
+static inline nat
+idlers(void) { /* number of idle PEs */
+ PEs i, j;
+ for(i=0, j=0;
+ i<RtsFlags.GranFlags.proc;
+ j += (procStatus[i] == Idle) ? 1 : 0, i++)
+ {} ;
+ return j;
+}
+
+//@node Global Event Queue, Spark queue functions, Global Address Operations, GranSim specific code
+//@subsection Global Event Queue
+/*
+The following routines implement an ADT of an event-queue (FIFO).
+ToDo: Put that in an own file(?)
+*/
+
+/* Pointer to the global event queue; events are currently malloc'ed */
+rtsEventQ EventHd = NULL;
+
+//@cindex get_next_event
+rtsEvent *
+get_next_event(void)
+{
+ static rtsEventQ entry = NULL;
+
+ if (EventHd == NULL) {
+ barf("No next event. This may be caused by a circular data dependency in the program.");
+ }
+
+ if (entry != NULL)
+ free((char *)entry);
+
+ if (RtsFlags.GranFlags.GranSimStats.Global) { /* count events */
+ globalGranStats.noOfEvents++;
+ globalGranStats.event_counts[EventHd->evttype]++;
+ }
+
+ entry = EventHd;
+
+ IF_GRAN_DEBUG(event_trace,
+ print_event(entry));
+
+ EventHd = EventHd->next;
+ return(entry);
+}
+
+/* When getting the time of the next event we ignore CONTINUETHREAD events:
+ we don't want to be interrupted before the end of the current time slice
+ unless there is something important to handle.
+*/
+//@cindex get_time_of_next_event
+rtsTime
+get_time_of_next_event(void)
+{
+ rtsEventQ event = EventHd;
+
+ while (event != NULL && event->evttype==ContinueThread) {
+ event = event->next;
+ }
+ if(event == NULL)
+ return ((rtsTime) 0);
+ else
+ return (event->time);
+}
+
+/* ToDo: replace malloc/free with a free list */
+//@cindex insert_event
+void
+insert_event(newentry)
+rtsEvent *newentry;
+{
+ rtsEventType evttype = newentry->evttype;
+ rtsEvent *event, **prev;
+
+ /* if(evttype >= CONTINUETHREAD1) evttype = CONTINUETHREAD; */
+
+ /* Search the queue and insert at the right point:
+ FINDWORK before everything, CONTINUETHREAD after everything.
+
+ This ensures that we find any available work after all threads have
+ executed the current cycle. This level of detail would normally be
+ irrelevant, but matters for ridiculously low latencies...
+ */
+
+ /* Changed the ordering: Now FINDWORK comes after everything but
+ CONTINUETHREAD. This makes sure that a MOVESPARK comes before a
+ FINDWORK. This is important when a GranSimSparkAt happens and
+ DoAlwaysCreateThreads is turned on. Also important if a GC occurs
+ when trying to build a new thread (see much_spark) -- HWL 02/96 */
+
+ if(EventHd == NULL)
+ EventHd = newentry;
+ else {
+ for (event = EventHd, prev=(rtsEvent**)&EventHd;
+ event != NULL;
+ prev = (rtsEvent**)&(event->next), event = event->next) {
+ switch (evttype) {
+ case FindWork: if ( event->time < newentry->time ||
+ ( (event->time == newentry->time) &&
+ (event->evttype != ContinueThread) ) )
+ continue;
+ else
+ break;
+ case ContinueThread: if ( event->time <= newentry->time )
+ continue;
+ else
+ break;
+ default: if ( event->time < newentry->time ||
+ ((event->time == newentry->time) &&
+ (event->evttype == newentry->evttype)) )
+ continue;
+ else
+ break;
+ }
+ /* Insert newentry here (i.e. before event) */
+ *prev = newentry;
+ newentry->next = event;
+ break;
+ }
+ if (event == NULL)
+ *prev = newentry;
+ }
+}
+
+//@cindex new_event
+void
+new_event(proc,creator,time,evttype,tso,node,spark)
+PEs proc, creator;
+rtsTime time;
+rtsEventType evttype;
+StgTSO *tso;
+StgClosure *node;
+rtsSpark *spark;
+{
+ rtsEvent *newentry = (rtsEvent *) stgMallocBytes(sizeof(rtsEvent), "new_event");
+
+ newentry->proc = proc;
+ newentry->creator = creator;
+ newentry->time = time;
+ newentry->evttype = evttype;
+ newentry->tso = tso;
+ newentry->node = node;
+ newentry->spark = spark;
+ newentry->gc_info = 0;
+ newentry->next = NULL;
+
+ insert_event(newentry);
+
+ IF_DEBUG(gran,
+ fprintf(stderr, "GRAN: new_event: \n");
+ print_event(newentry));
+}
+
+//@cindex prepend_event
+void
+prepend_event(event) /* put event at beginning of EventQueue */
+rtsEvent *event;
+{ /* only used for GC! */
+ event->next = EventHd;
+ EventHd = event;
+}
+
+//@cindex grab_event
+rtsEventQ
+grab_event(void) /* undo prepend_event i.e. get the event */
+{ /* at the head of EventQ but don't free anything */
+ rtsEventQ event = EventHd;
+
+ if (EventHd == NULL) {
+ barf("No next event (in grab_event). This may be caused by a circular data dependency in the program.");
+ }
+
+ EventHd = EventHd->next;
+ return (event);
+}
+
+//@cindex traverse_eventq_for_gc
+void
+traverse_eventq_for_gc(void)
+{
+ rtsEventQ event = EventHd;
+ StgWord bufsize;
+ StgClosure *closurep;
+ StgTSO *tsop;
+ StgPtr buffer, bufptr;
+ PEs proc, creator;
+
+ /* Traverse eventq and replace every FETCHREPLY by a FETCHNODE for the
+ orig closure (root of packed graph). This means that a graph, which is
+ between processors at the time of GC is fetched again at the time when
+ it would have arrived, had there been no GC. Slightly inaccurate but
+ safe for GC.
+ This is only needed for GUM style fetchng. -- HWL */
+ if (!RtsFlags.GranFlags.DoBulkFetching)
+ return;
+
+ for(event = EventHd; event!=NULL; event=event->next) {
+ if (event->evttype==FetchReply) {
+ buffer = stgCast(StgPtr,event->node);
+ ASSERT(buffer[PACK_FLAG_LOCN]==MAGIC_PACK_FLAG); /* It's a pack buffer */
+ bufsize = buffer[PACK_SIZE_LOCN];
+ closurep = stgCast(StgClosure*,buffer[PACK_HDR_SIZE]);
+ tsop = stgCast(StgTSO*,buffer[PACK_TSO_LOCN]);
+ proc = event->proc;
+ creator = event->creator; /* similar to unpacking */
+ for (bufptr=buffer+PACK_HDR_SIZE;
+ bufptr<(buffer+bufsize);
+ bufptr++) {
+ // if ( (INFO_TYPE(INFO_PTR(*bufptr)) == INFO_SPEC_RBH_TYPE) ||
+ // (INFO_TYPE(INFO_PTR(*bufptr)) == INFO_GEN_RBH_TYPE) ) {
+ if ( GET_INFO(stgCast(StgClosure*,bufptr)) ) {
+ convertFromRBH(stgCast(StgClosure *,bufptr));
+ }
+ }
+ free(buffer);
+ event->evttype = FetchNode;
+ event->proc = creator;
+ event->creator = proc;
+ event->node = closurep;
+ event->tso = tsop;
+ event->gc_info = 0;
+ }
+ }
+}
+
+void
+markEventQueue(void)
+{
+ StgClosure *MarkRoot(StgClosure *root); // prototype
+
+ rtsEventQ event = EventHd;
+ nat len;
+
+ /* iterate over eventq and register relevant fields in event as roots */
+ for(event = EventHd, len = 0; event!=NULL; event=event->next, len++) {
+ switch (event->evttype) {
+ case ContinueThread:
+ event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso);
+ break;
+ case StartThread:
+ event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso);
+ event->node = (StgClosure *)MarkRoot((StgClosure *)event->node);
+ break;
+ case ResumeThread:
+ event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso);
+ event->node = (StgClosure *)MarkRoot((StgClosure *)event->node);
+ break;
+ case MoveSpark:
+ event->spark->node = (StgClosure *)MarkRoot((StgClosure *)event->spark->node);
+ break;
+ case MoveThread:
+ event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso);
+ break;
+ case FindWork:
+ break;
+ case FetchNode:
+ event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso);
+ event->node = (StgClosure *)MarkRoot((StgClosure *)event->node);
+ break;
+ case FetchReply:
+ event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso);
+ if (RtsFlags.GranFlags.DoBulkFetching)
+ // ToDo: traverse_eventw_for_gc if GUM-Fetching!!! HWL
+ belch("ghuH: packets in BulkFetching not marked as roots; mayb be fatal");
+ else
+ event->node = (StgClosure *)MarkRoot((StgClosure *)event->node);
+ break;
+ case GlobalBlock:
+ event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso);
+ event->node = (StgClosure *)MarkRoot((StgClosure *)event->node);
+ break;
+ case UnblockThread:
+ event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso);
+ event->node = (StgClosure *)MarkRoot((StgClosure *)event->node);
+ break;
+ default:
+ barf("markEventQueue: trying to mark unknown event @ %p", event);
+ }}
+ IF_DEBUG(gc,
+ belch("GC: markEventQueue: %d events in queue", len));
+}
+
+/*
+ Prune all ContinueThread events related to tso or node in the eventq.
+ Currently used if a thread leaves STG land with ThreadBlocked status,
+ i.e. it blocked on a closure and has been put on its blocking queue. It
+ will be reawakended via a call to awakenBlockedQueue. Until then no
+ event effecting this tso should appear in the eventq. A bit of a hack,
+ because ideally we shouldn't generate such spurious ContinueThread events
+ in the first place.
+*/
+//@cindex prune_eventq
+void
+prune_eventq(tso, node)
+StgTSO *tso;
+StgClosure *node;
+{ rtsEventQ prev = (rtsEventQ)NULL, event = EventHd;
+
+ /* node unused for now */
+ ASSERT(node==NULL);
+ /* tso must be valid, then */
+ ASSERT(tso!=END_TSO_QUEUE);
+ while (event != NULL) {
+ if (event->evttype==ContinueThread &&
+ (event->tso==tso)) {
+ IF_GRAN_DEBUG(event_trace, // ToDo: use another debug flag
+ belch("prune_eventq: pruning ContinueThread event for TSO %d (%p) on PE %d @ %lx (%p)",
+ event->tso->id, event->tso, event->proc, event->time, event));
+ if (prev==(rtsEventQ)NULL) { // beginning of eventq
+ EventHd = event->next;
+ free(event);
+ event = EventHd;
+ } else {
+ prev->next = event->next;
+ free(event);
+ event = prev->next;
+ }
+ } else { // no pruning necessary; go to next event
+ prev = event;
+ event = event->next;
+ }
+ }
+}
+
+//@cindex print_event
+void
+print_event(event)
+rtsEvent *event;
+{
+ char str_tso[16], str_node[16];
+ StgThreadID tso_id;
+
+ if (event->tso==END_TSO_QUEUE) {
+ strcpy(str_tso, "______");
+ tso_id = 0;
+ } else {
+ sprintf(str_tso, "%p", event->tso);
+ tso_id = (event->tso==NULL) ? 0 : event->tso->id;
+ }
+ if (event->node==(StgClosure*)NULL) {
+ strcpy(str_node, "______");
+ } else {
+ sprintf(str_node, "%p", event->node);
+ }
+ // HWL: shouldn't be necessary; ToDo: nuke
+ //str_tso[6]='\0';
+ //str_node[6]='\0';
+
+ if (event==NULL)
+ fprintf(stderr,"Evt: NIL\n");
+ else
+ fprintf(stderr, "Evt: %s (%u), PE %u [%u], Time %lu, TSO %d (%s), Node %s\n", //"Evt: %s (%u), PE %u [%u], Time %u, TSO %s (%#l), Node %s\n",
+ event_names[event->evttype], event->evttype,
+ event->proc, event->creator, event->time,
+ tso_id, str_tso, str_node
+ /*, event->spark, event->next */ );
+
+}
+
+//@cindex print_eventq
+void
+print_eventq(hd)
+rtsEvent *hd;
+{
+ rtsEvent *x;
+
+ fprintf(stderr,"Event Queue with root at %p:\n", hd);
+ for (x=hd; x!=NULL; x=x->next) {
+ print_event(x);
+ }
+}
+
+/*
+ Spark queue functions are now all in Sparks.c!!
+*/
+//@node Scheduling functions, Thread Queue routines, Spark queue functions, GranSim specific code
+//@subsection Scheduling functions
+
+/*
+ These functions are variants of thread initialisation and therefore
+ related to initThread and friends in Schedule.c. However, they are
+ specific to a GranSim setup in storing more info in the TSO's statistics
+ buffer and sorting the thread queues etc.
+*/
+
+/*
+ A large portion of startThread deals with maintaining a sorted thread
+ queue, which is needed for the Priority Sparking option. Without that
+ complication the code boils down to FIFO handling.
+*/
+//@cindex insertThread
+void
+insertThread(tso, proc)
+StgTSO* tso;
+PEs proc;
+{
+ StgTSO *prev = NULL, *next = NULL;
+ nat count = 0;
+ rtsBool found = rtsFalse;
+
+ ASSERT(CurrentProc==proc);
+ ASSERT(!is_on_queue(tso,proc));
+ /* Idle proc: put the thread on the run queue
+ same for pri spark and basic version */
+ if (run_queue_hds[proc] == END_TSO_QUEUE)
+ {
+ /* too strong!
+ ASSERT((CurrentProc==MainProc &&
+ CurrentTime[MainProc]==0 &&
+ procStatus[MainProc]==Idle) ||
+ procStatus[proc]==Starting);
+ */
+ run_queue_hds[proc] = run_queue_tls[proc] = tso;
+
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.threadqueuetime;
+
+ /* new_event of ContinueThread has been moved to do_the_startthread */
+
+ /* too strong!
+ ASSERT(procStatus[proc]==Idle ||
+ procStatus[proc]==Fishing ||
+ procStatus[proc]==Starting);
+ procStatus[proc] = Busy;
+ */
+ return;
+ }
+
+ if (RtsFlags.GranFlags.Light)
+ GranSimLight_insertThread(tso, proc);
+
+ /* Only for Pri Scheduling: find place where to insert tso into queue */
+ if (RtsFlags.GranFlags.DoPriorityScheduling && tso->gran.pri!=0)
+ /* {add_to_spark_queue}vo' jInIHta'; Qu' wa'DIch yIleghQo' */
+ for (prev = run_queue_hds[proc], next = run_queue_hds[proc]->link, count=0;
+ (next != END_TSO_QUEUE) &&
+ !(found = tso->gran.pri >= next->gran.pri);
+ prev = next, next = next->link, count++)
+ {
+ ASSERT((prev!=(StgTSO*)NULL || next==run_queue_hds[proc]) &&
+ (prev==(StgTSO*)NULL || prev->link==next));
+ }
+
+ ASSERT(!found || next != END_TSO_QUEUE);
+ ASSERT(procStatus[proc]!=Idle);
+
+ if (found) {
+ /* found can only be rtsTrue if pri scheduling enabled */
+ ASSERT(RtsFlags.GranFlags.DoPriorityScheduling);
+ if (RtsFlags.GranFlags.GranSimStats.Global)
+ globalGranStats.non_end_add_threads++;
+ /* Add tso to ThreadQueue between prev and next */
+ tso->link = next;
+ if ( next == (StgTSO*)END_TSO_QUEUE ) {
+ run_queue_tl = tso;
+ } else {
+ /* no back link for TSO chain */
+ }
+
+ if ( prev == (StgTSO*)END_TSO_QUEUE ) {
+ /* Never add TSO as first elem of thread queue; the first */
+ /* element should be the one that is currently running -- HWL */
+ IF_DEBUG(gran,
+ belch("GRAN: Qagh: NewThread (w/ PriorityScheduling): Trying to add TSO %p (PRI=%d) as first elem of threadQ (%p) on proc %u (@ %u)\n",
+ tso, tso->gran.pri, run_queue_hd, proc,
+ CurrentTime[proc]));
+ } else {
+ prev->link = tso;
+ }
+ } else { /* !found */ /* or not pri sparking! */
+ /* Add TSO to the end of the thread queue on that processor */
+ run_queue_tls[proc]->link = tso;
+ run_queue_tls[proc] = tso;
+ }
+ ASSERT(RtsFlags.GranFlags.DoPriorityScheduling || count==0);
+ CurrentTime[proc] += count * RtsFlags.GranFlags.Costs.pri_sched_overhead +
+ RtsFlags.GranFlags.Costs.threadqueuetime;
+
+ /* ToDo: check if this is still needed -- HWL
+ if (RtsFlags.GranFlags.DoThreadMigration)
+ ++SurplusThreads;
+
+ if (RtsFlags.GranFlags.GranSimStats.Full &&
+ !(( event_type == GR_START || event_type == GR_STARTQ) &&
+ RtsFlags.GranFlags.labelling) )
+ DumpRawGranEvent(proc, creator, event_type+1, tso, node,
+ tso->gran.sparkname, spark_queue_len(proc));
+ */
+
+# if defined(GRAN_CHECK)
+ /* Check if thread queue is sorted. Only for testing, really! HWL */
+ if ( RtsFlags.GranFlags.DoPriorityScheduling &&
+ (RtsFlags.GranFlags.Debug.sortedQ) ) {
+ rtsBool sorted = rtsTrue;
+ StgTSO *prev, *next;
+
+ if (run_queue_hds[proc]==END_TSO_QUEUE ||
+ run_queue_hds[proc]->link==END_TSO_QUEUE) {
+ /* just 1 elem => ok */
+ } else {
+ /* Qu' wa'DIch yIleghQo' (ignore first elem)! */
+ for (prev = run_queue_hds[proc]->link, next = prev->link;
+ (next != END_TSO_QUEUE) ;
+ prev = next, next = prev->link) {
+ ASSERT((prev!=(StgTSO*)NULL || next==run_queue_hds[proc]) &&
+ (prev==(StgTSO*)NULL || prev->link==next));
+ sorted = sorted &&
+ (prev->gran.pri >= next->gran.pri);
+ }
+ }
+ if (!sorted) {
+ fprintf(stderr,"Qagh: THREADQ on PE %d is not sorted:\n",
+ CurrentProc);
+ G_THREADQ(run_queue_hd,0x1);
+ }
+ }
+# endif
+}
+
+/*
+ insertThread, which is only used for GranSim Light, is similar to
+ startThread in that it adds a TSO to a thread queue. However, it assumes
+ that the thread queue is sorted by local clocks and it inserts the TSO at
+ the right place in the queue. Don't create any event, just insert.
+*/
+//@cindex GranSimLight_insertThread
+rtsBool
+GranSimLight_insertThread(tso, proc)
+StgTSO* tso;
+PEs proc;
+{
+ StgTSO *prev, *next;
+ nat count = 0;
+ rtsBool found = rtsFalse;
+
+ ASSERT(RtsFlags.GranFlags.Light);
+
+ /* In GrAnSim-Light we always have an idle `virtual' proc.
+ The semantics of the one-and-only thread queue is different here:
+ all threads in the queue are running (each on its own virtual processor);
+ the queue is only needed internally in the simulator to interleave the
+ reductions of the different processors.
+ The one-and-only thread queue is sorted by the local clocks of the TSOs.
+ */
+ ASSERT(run_queue_hds[proc] != END_TSO_QUEUE);
+ ASSERT(tso->link == END_TSO_QUEUE);
+
+ /* If only one thread in queue so far we emit DESCHEDULE in debug mode */
+ if (RtsFlags.GranFlags.GranSimStats.Full &&
+ (RtsFlags.GranFlags.Debug.checkLight) &&
+ (run_queue_hd->link == END_TSO_QUEUE)) {
+ DumpRawGranEvent(proc, proc, GR_DESCHEDULE,
+ run_queue_hds[proc], (StgClosure*)NULL,
+ tso->gran.sparkname, spark_queue_len(proc)); // ToDo: check spar_queue_len
+ // resched = rtsTrue;
+ }
+
+ /* this routine should only be used in a GrAnSim Light setup */
+ /* && CurrentProc must be 0 in GrAnSim Light setup */
+ ASSERT(RtsFlags.GranFlags.Light && CurrentProc==0);
+
+ /* Idle proc; same for pri spark and basic version */
+ if (run_queue_hd==END_TSO_QUEUE)
+ {
+ run_queue_hd = run_queue_tl = tso;
+ /* MAKE_BUSY(CurrentProc); */
+ return rtsTrue;
+ }
+
+ for (prev = run_queue_hds[proc], next = run_queue_hds[proc]->link, count = 0;
+ (next != END_TSO_QUEUE) &&
+ !(found = (tso->gran.clock < next->gran.clock));
+ prev = next, next = next->link, count++)
+ {
+ ASSERT((prev!=(StgTSO*)NULL || next==run_queue_hds[proc]) &&
+ (prev==(StgTSO*)NULL || prev->link==next));
+ }
+
+ /* found can only be rtsTrue if pri sparking enabled */
+ if (found) {
+ /* Add tso to ThreadQueue between prev and next */
+ tso->link = next;
+ if ( next == END_TSO_QUEUE ) {
+ run_queue_tls[proc] = tso;
+ } else {
+ /* no back link for TSO chain */
+ }
+
+ if ( prev == END_TSO_QUEUE ) {
+ run_queue_hds[proc] = tso;
+ } else {
+ prev->link = tso;
+ }
+ } else { /* !found */ /* or not pri sparking! */
+ /* Add TSO to the end of the thread queue on that processor */
+ run_queue_tls[proc]->link = tso;
+ run_queue_tls[proc] = tso;
+ }
+
+ if ( prev == END_TSO_QUEUE ) { /* new head of queue */
+ new_event(proc, proc, CurrentTime[proc],
+ ContinueThread,
+ tso, (StgClosure*)NULL, (rtsSpark*)NULL);
+ }
+ /*
+ if (RtsFlags.GranFlags.GranSimStats.Full &&
+ !(( event_type == GR_START || event_type == GR_STARTQ) &&
+ RtsFlags.GranFlags.labelling) )
+ DumpRawGranEvent(proc, creator, gr_evttype, tso, node,
+ tso->gran.sparkname, spark_queue_len(proc));
+ */
+ return rtsTrue;
+}
+
+/*
+ endThread is responsible for general clean-up after the thread tso has
+ finished. This includes emitting statistics into the profile etc.
+*/
+void
+endThread(StgTSO *tso, PEs proc)
+{
+ ASSERT(procStatus[proc]==Busy); // coming straight out of STG land
+ ASSERT(tso->what_next==ThreadComplete);
+ // ToDo: prune ContinueThreads for this TSO from event queue
+ DumpEndEvent(proc, tso, rtsFalse /* not mandatory */);
+
+ /* if this was the last thread on this PE then make it Idle */
+ if (run_queue_hds[proc]==END_TSO_QUEUE) {
+ procStatus[CurrentProc] = Idle;
+ }
+}
+
+//@node Thread Queue routines, GranSim functions, Scheduling functions, GranSim specific code
+//@subsection Thread Queue routines
+
+/*
+ Check whether given tso resides on the run queue of the current processor.
+ Only used for debugging.
+*/
+
+//@cindex is_on_queue
+rtsBool
+is_on_queue (StgTSO *tso, PEs proc)
+{
+ StgTSO *t;
+ rtsBool found;
+
+ for (t=run_queue_hds[proc], found=rtsFalse;
+ t!=END_TSO_QUEUE && !(found = t==tso);
+ t=t->link)
+ /* nothing */ ;
+
+ return found;
+}
+
+/* This routine is only used for keeping a statistics of thread queue
+ lengths to evaluate the impact of priority scheduling. -- HWL
+ {spark_queue_len}vo' jInIHta'
+*/
+//@cindex thread_queue_len
+nat
+thread_queue_len(PEs proc)
+{
+ StgTSO *prev, *next;
+ nat len;
+
+ for (len = 0, prev = END_TSO_QUEUE, next = run_queue_hds[proc];
+ next != END_TSO_QUEUE;
+ len++, prev = next, next = prev->link)
+ {}
+
+ return (len);
+}
+
+//@node GranSim functions, GranSimLight routines, Thread Queue routines, GranSim specific code
+//@subsection GranSim functions
+
+/* ----------------------------------------------------------------- */
+/* The main event handling functions; called from Schedule.c (schedule) */
+/* ----------------------------------------------------------------- */
+
+//@cindex do_the_globalblock
+
+void
+do_the_globalblock(rtsEvent* event)
+{
+ PEs proc = event->proc; /* proc that requested node */
+ StgTSO *tso = event->tso; /* tso that requested node */
+ StgClosure *node = event->node; /* requested, remote node */
+
+ IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the GlobalBlock\n"));
+ /* There should be no GLOBALBLOCKs in GrAnSim Light setup */
+ ASSERT(!RtsFlags.GranFlags.Light);
+ /* GlobalBlock events only valid with GUM fetching */
+ ASSERT(RtsFlags.GranFlags.DoBulkFetching);
+
+ IF_GRAN_DEBUG(bq, // globalBlock,
+ if (IS_LOCAL_TO(PROCS(node),proc)) {
+ belch("## Qagh: GlobalBlock: Blocking TSO %d (%p) on LOCAL node %p (PE %d).\n",
+ tso->id, tso, node, proc);
+ });
+
+ /* CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.munpacktime; */
+ if ( blockFetch(tso,proc,node) != 0 )
+ return; /* node has become local by now */
+
+#if 0
+ ToDo: check whether anything has to be done at all after blockFetch -- HWL
+
+ if (!RtsFlags.GranFlags.DoAsyncFetch) { /* head of queue is next thread */
+ StgTSO* tso = run_queue_hds[proc]; /* awaken next thread */
+ if (tso != (StgTSO*)NULL) {
+ new_event(proc, proc, CurrentTime[proc],
+ ContinueThread,
+ tso, (StgClosure*)NULL, (rtsSpark*)NULL);
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.threadcontextswitchtime;
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(proc, CurrentProc, GR_SCHEDULE, tso,
+ (StgClosure*)NULL, tso->gran.sparkname, spark_queue_len(CurrentProc)); // ToDo: check sparkname and spar_queue_len
+ procStatus[proc] = Busy; /* might have been fetching */
+ } else {
+ procStatus[proc] = Idle; /* no work on proc now */
+ }
+ } else { /* RtsFlags.GranFlags.DoAsyncFetch i.e. block-on-fetch */
+ /* other thread is already running */
+ /* 'oH 'utbe' 'e' vIHar ; I think that's not needed -- HWL
+ new_event(proc,proc,CurrentTime[proc],
+ CONTINUETHREAD,EVENT_TSO(event),
+ (RtsFlags.GranFlags.DoBulkFetching ? closure :
+ EVENT_NODE(event)),NULL);
+ */
+ }
+#endif
+}
+
+//@cindex do_the_unblock
+
+void
+do_the_unblock(rtsEvent* event)
+{
+ PEs proc = event->proc, /* proc that requested node */
+ creator = event->creator; /* proc that requested node */
+ StgTSO* tso = event->tso; /* tso that requested node */
+ StgClosure* node = event->node; /* requested, remote node */
+
+ IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the UnBlock\n"))
+ /* There should be no UNBLOCKs in GrAnSim Light setup */
+ ASSERT(!RtsFlags.GranFlags.Light);
+ /* UnblockThread means either FetchReply has arrived or
+ a blocking queue has been awakened;
+ ToDo: check with assertions
+ ASSERT(procStatus[proc]==Fetching || IS_BLACK_HOLE(event->node));
+ */
+ if (!RtsFlags.GranFlags.DoAsyncFetch) { /* block-on-fetch */
+ /* We count block-on-fetch as normal block time */
+ tso->gran.blocktime += CurrentTime[proc] - tso->gran.blockedat;
+ /* Dumping now done when processing the event
+ No costs for contextswitch or thread queueing in this case
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(proc, CurrentProc, GR_RESUME, tso,
+ (StgClosure*)NULL, tso->gran.sparkname, spark_queue_len(CurrentProc));
+ */
+ /* Maybe do this in FetchReply already
+ if (procStatus[proc]==Fetching)
+ procStatus[proc] = Busy;
+ */
+ /*
+ new_event(proc, proc, CurrentTime[proc],
+ ContinueThread,
+ tso, node, (rtsSpark*)NULL);
+ */
+ } else {
+ /* Asynchr comm causes additional costs here: */
+ /* Bring the TSO from the blocked queue into the threadq */
+ }
+ /* In all cases, the UnblockThread causes a ResumeThread to be scheduled */
+ new_event(proc, proc,
+ CurrentTime[proc]+RtsFlags.GranFlags.Costs.threadqueuetime,
+ ResumeThread,
+ tso, node, (rtsSpark*)NULL);
+}
+
+//@cindex do_the_fetchnode
+
+void
+do_the_fetchnode(rtsEvent* event)
+{
+ PEs proc = event->proc, /* proc that holds the requested node */
+ creator = event->creator; /* proc that requested node */
+ StgTSO* tso = event->tso;
+ StgClosure* node = event->node; /* requested, remote node */
+ rtsFetchReturnCode rc;
+
+ ASSERT(CurrentProc==proc);
+ /* There should be no FETCHNODEs in GrAnSim Light setup */
+ ASSERT(!RtsFlags.GranFlags.Light);
+
+ IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the FetchNode\n"));
+
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.munpacktime;
+
+ /* ToDo: check whether this is the right place for dumping the event */
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(creator, proc, GR_FETCH, tso, node, (StgInt)0, 0);
+
+ do {
+ rc = handleFetchRequest(node, proc, creator, tso);
+ if (rc == OutOfHeap) { /* trigger GC */
+# if defined(GRAN_CHECK) && defined(GRAN)
+ if (RtsFlags.GcFlags.giveStats)
+ fprintf(RtsFlags.GcFlags.statsFile,"***** veQ boSwI' PackNearbyGraph(node %p, tso %p (%d))\n",
+ node, tso, tso->id);
+# endif
+ barf("//// do_the_fetchnode: out of heap after handleFetchRequest; ToDo: call GarbageCollect()");
+ prepend_event(event);
+ GarbageCollect(GetRoots, rtsFalse);
+ // HWL: ToDo: check whether a ContinueThread has to be issued
+ // HWL old: ReallyPerformThreadGC(PACK_HEAP_REQUIRED, rtsFalse);
+# if 0 && defined(GRAN_CHECK) && defined(GRAN)
+ if (RtsFlags.GcFlags.giveStats) {
+ fprintf(RtsFlags.GcFlags.statsFile,"***** SAVE_Hp=%p, SAVE_HpLim=%p, PACK_HEAP_REQUIRED=%d\n",
+ Hp, HpLim, 0) ; // PACK_HEAP_REQUIRED); ???
+ fprintf(stderr,"***** No. of packets so far: %d (total size: %d)\n",
+ globalGranStats.tot_packets, globalGranStats.tot_packet_size);
+ }
+# endif
+ event = grab_event();
+ // Hp -= PACK_HEAP_REQUIRED; // ???
+
+ /* GC knows that events are special and follows the pointer i.e. */
+ /* events are valid even if they moved. An EXIT is triggered */
+ /* if there is not enough heap after GC. */
+ }
+ } while (rc == OutOfHeap);
+}
+
+//@cindex do_the_fetchreply
+void
+do_the_fetchreply(rtsEvent* event)
+{
+ PEs proc = event->proc, /* proc that requested node */
+ creator = event->creator; /* proc that holds the requested node */
+ StgTSO* tso = event->tso;
+ StgClosure* node = event->node; /* requested, remote node */
+ StgClosure* closure=(StgClosure*)NULL;
+
+ ASSERT(CurrentProc==proc);
+ ASSERT(RtsFlags.GranFlags.DoAsyncFetch || procStatus[proc]==Fetching);
+
+ IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the FetchReply\n"));
+ /* There should be no FETCHREPLYs in GrAnSim Light setup */
+ ASSERT(!RtsFlags.GranFlags.Light);
+
+ /* assign message unpack costs *before* dumping the event */
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.munpacktime;
+
+ /* ToDo: check whether this is the right place for dumping the event */
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(proc, creator, GR_REPLY, tso, node,
+ tso->gran.sparkname, spark_queue_len(proc));
+
+ /* THIS SHOULD NEVER HAPPEN
+ If tso is in the BQ of node this means that it actually entered the
+ remote closure, due to a missing GranSimFetch at the beginning of the
+ entry code; therefore, this is actually a faked fetch, triggered from
+ within GranSimBlock;
+ since tso is both in the EVQ and the BQ for node, we have to take it out
+ of the BQ first before we can handle the FetchReply;
+ ToDo: special cases in awakenBlockedQueue, since the BQ magically moved.
+ */
+ if (tso->block_info.closure!=(StgClosure*)NULL) {
+ IF_GRAN_DEBUG(bq,
+ belch("## ghuH: TSO %d (%p) in FetchReply is blocked on node %p (shouldn't happen AFAIK)",
+ tso->id, tso, node));
+ // unlink_from_bq(tso, node);
+ }
+
+ if (RtsFlags.GranFlags.DoBulkFetching) { /* bulk (packet) fetching */
+ rtsPackBuffer *buffer = (rtsPackBuffer*)node;
+ nat size = buffer->size;
+
+ /* NB: Fetch misses can't occur with GUM fetching, as */
+ /* updatable closure are turned into RBHs and therefore locked */
+ /* for other processors that try to grab them. */
+
+ closure = UnpackGraph(buffer);
+ CurrentTime[proc] += size * RtsFlags.GranFlags.Costs.munpacktime;
+ } else // incremental fetching
+ /* Copy or move node to CurrentProc */
+ if (fetchNode(node, creator, proc)) {
+ /* Fetch has failed i.e. node has been grabbed by another PE */
+ PEs p = where_is(node);
+ rtsTime fetchtime;
+
+ if (RtsFlags.GranFlags.GranSimStats.Global)
+ globalGranStats.fetch_misses++;
+
+ IF_GRAN_DEBUG(thunkStealing,
+ belch("== Qu'vatlh! fetch miss @ %u: node %p is at proc %u (rather than proc %u)\n",
+ CurrentTime[proc],node,p,creator));
+
+ CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.mpacktime;
+
+ /* Count fetch again !? */
+ ++(tso->gran.fetchcount);
+ tso->gran.fetchtime += RtsFlags.GranFlags.Costs.fetchtime;
+
+ fetchtime = stg_max(CurrentTime[CurrentProc],CurrentTime[p]) +
+ RtsFlags.GranFlags.Costs.latency;
+
+ /* Chase the grabbed node */
+ new_event(p, proc, fetchtime,
+ FetchNode,
+ tso, node, (rtsSpark*)NULL);
+
+# if 0 && defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
+ IF_GRAN_DEBUG(blockOnFetch,
+ BlockedOnFetch[CurrentProc] = tso;) /*-rtsTrue;-*/
+
+ IF_GRAN_DEBUG(blockOnFetch_sanity,
+ tso->type |= FETCH_MASK_TSO;)
+# endif
+
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.mtidytime;
+
+ return; /* NB: no REPLy has been processed; tso still sleeping */
+ }
+
+ /* -- Qapla'! Fetch has been successful; node is here, now */
+ ++(event->tso->gran.fetchcount);
+ event->tso->gran.fetchtime += RtsFlags.GranFlags.Costs.fetchtime;
+
+ /* this is now done at the beginning of this routine
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(proc,event->creator, GR_REPLY, event->tso,
+ (RtsFlags.GranFlags.DoBulkFetching ?
+ closure :
+ event->node),
+ tso->gran.sparkname, spark_queue_len(proc));
+ */
+
+ ASSERT(OutstandingFetches[proc] > 0);
+ --OutstandingFetches[proc];
+ new_event(proc, proc, CurrentTime[proc],
+ ResumeThread,
+ event->tso, (RtsFlags.GranFlags.DoBulkFetching ?
+ closure :
+ event->node),
+ (rtsSpark*)NULL);
+}
+
+//@cindex do_the_movethread
+
+void
+do_the_movethread(rtsEvent* event) {
+ PEs proc = event->proc, /* proc that requested node */
+ creator = event->creator; /* proc that holds the requested node */
+ StgTSO* tso = event->tso;
+
+ IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the MoveThread\n"));
+
+ ASSERT(CurrentProc==proc);
+ /* There should be no MOVETHREADs in GrAnSim Light setup */
+ ASSERT(!RtsFlags.GranFlags.Light);
+ /* MOVETHREAD events should never occur without -bM */
+ ASSERT(RtsFlags.GranFlags.DoThreadMigration);
+ /* Bitmask of moved thread should be 0 */
+ ASSERT(PROCS(tso)==0);
+ ASSERT(procStatus[proc] == Fishing ||
+ RtsFlags.GranFlags.DoAsyncFetch);
+ ASSERT(OutstandingFishes[proc]>0);
+
+ /* ToDo: exact costs for unpacking the whole TSO */
+ CurrentTime[proc] += 5l * RtsFlags.GranFlags.Costs.munpacktime;
+
+ /* ToDo: check whether this is the right place for dumping the event */
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(proc, creator,
+ GR_STOLEN, tso, (StgClosure*)NULL, (StgInt)0, 0);
+
+ // ToDo: check cost functions
+ --OutstandingFishes[proc];
+ SET_GRAN_HDR(tso, ThisPE); // adjust the bitmask for the TSO
+ insertThread(tso, proc);
+
+ if (procStatus[proc]==Fishing)
+ procStatus[proc] = Idle;
+
+ if (RtsFlags.GranFlags.GranSimStats.Global)
+ globalGranStats.tot_TSOs_migrated++;
+}
+
+//@cindex do_the_movespark
+
+void
+do_the_movespark(rtsEvent* event) {
+ PEs proc = event->proc, /* proc that requested spark */
+ creator = event->creator; /* proc that holds the requested spark */
+ StgTSO* tso = event->tso;
+ rtsSparkQ spark = event->spark;
+
+ IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the MoveSpark\n"))
+
+ ASSERT(CurrentProc==proc);
+ ASSERT(spark!=NULL);
+ ASSERT(procStatus[proc] == Fishing ||
+ RtsFlags.GranFlags.DoAsyncFetch);
+ ASSERT(OutstandingFishes[proc]>0);
+
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.munpacktime;
+
+ /* record movement of spark only if spark profiling is turned on */
+ if (RtsFlags.GranFlags.GranSimStats.Sparks)
+ DumpRawGranEvent(proc, creator,
+ SP_ACQUIRED,
+ tso, spark->node, spark->name, spark_queue_len(proc));
+
+ /* global statistics */
+ if ( RtsFlags.GranFlags.GranSimStats.Global &&
+ !closure_SHOULD_SPARK(spark->node))
+ globalGranStats.withered_sparks++;
+ /* Not adding the spark to the spark queue would be the right */
+ /* thing here, but it also would be cheating, as this info can't be */
+ /* available in a real system. -- HWL */
+
+ --OutstandingFishes[proc];
+
+ add_to_spark_queue(spark);
+
+ IF_GRAN_DEBUG(randomSteal, // ToDo: spark-distribution flag
+ print_sparkq_stats());
+
+ /* Should we treat stolen sparks specially? Currently, we don't. */
+
+ if (procStatus[proc]==Fishing)
+ procStatus[proc] = Idle;
+
+ /* add_to_spark_queue will increase the time of the current proc. */
+ /*
+ If proc was fishing, it is Idle now with the new spark in its spark
+ pool. This means that the next time handleIdlePEs is called, a local
+ FindWork will be created on this PE to turn the spark into a thread. Of
+ course another PE might steal the spark in the meantime (that's why we
+ are using events rather than inlining all the operations in the first
+ place). */
+}
+
+/*
+ In the Constellation class version of GranSim the semantics of StarThread
+ events has changed. Now, StartThread has to perform 3 basic operations:
+ - create a new thread (previously this was done in ActivateSpark);
+ - insert the thread into the run queue of the current processor
+ - generate a new event for actually running the new thread
+ Note that the insertThread is called via createThread.
+*/
+
+//@cindex do_the_startthread
+
+void
+do_the_startthread(rtsEvent *event)
+{
+ PEs proc = event->proc; /* proc that requested node */
+ StgTSO *tso = event->tso; /* tso that requested node */
+ StgClosure *node = event->node; /* requested, remote node */
+ rtsSpark *spark = event->spark;
+ GranEventType gr_evttype;
+
+ ASSERT(CurrentProc==proc);
+ ASSERT(!RtsFlags.GranFlags.Light || CurrentProc==0);
+ ASSERT(event->evttype == ResumeThread || event->evttype == StartThread);
+ /* if this was called via StartThread: */
+ ASSERT(event->evttype!=StartThread || tso == END_TSO_QUEUE); // not yet created
+ // ToDo: check: ASSERT(event->evttype!=StartThread || procStatus[proc]==Starting);
+ /* if this was called via ResumeThread: */
+ ASSERT(event->evttype!=ResumeThread ||
+ RtsFlags.GranFlags.DoAsyncFetch ||!is_on_queue(tso,proc));
+
+ /* startThread may have been called from the main event handler upon
+ finding either a ResumeThread or a StartThread event; set the
+ gr_evttype (needed for writing to .gr file) accordingly */
+ // gr_evttype = (event->evttype == ResumeThread) ? GR_RESUME : GR_START;
+
+ if ( event->evttype == StartThread ) {
+ GranEventType gr_evttype = (run_queue_hds[proc]==END_TSO_QUEUE) ?
+ GR_START : GR_STARTQ;
+
+ tso = createThread(BLOCK_SIZE_W, spark->gran_info);// implicit insertThread!
+ pushClosure(tso, node);
+
+ // ToDo: fwd info on local/global spark to thread -- HWL
+ // tso->gran.exported = spark->exported;
+ // tso->gran.locked = !spark->global;
+ tso->gran.sparkname = spark->name;
+
+ ASSERT(CurrentProc==proc);
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpGranEvent(gr_evttype,tso);
+
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.threadcreatetime;
+ } else { // event->evttype == ResumeThread
+ GranEventType gr_evttype = (run_queue_hds[proc]==END_TSO_QUEUE) ?
+ GR_RESUME : GR_RESUMEQ;
+
+ insertThread(tso, proc);
+
+ ASSERT(CurrentProc==proc);
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpGranEvent(gr_evttype,tso);
+ }
+
+ ASSERT(run_queue_hds[proc]!=END_TSO_QUEUE); // non-empty run queue
+ procStatus[proc] = Busy;
+ /* make sure that this thread is actually run */
+ new_event(proc, proc,
+ CurrentTime[proc],
+ ContinueThread,
+ tso, node, (rtsSpark*)NULL);
+
+ /* A wee bit of statistics gathering */
+ if (RtsFlags.GranFlags.GranSimStats.Global) {
+ globalGranStats.tot_add_threads++;
+ globalGranStats.tot_tq_len += thread_queue_len(CurrentProc);
+ }
+
+}
+
+//@cindex do_the_findwork
+void
+do_the_findwork(rtsEvent* event)
+{
+ PEs proc = event->proc, /* proc to search for work */
+ creator = event->creator; /* proc that requested work */
+ rtsSparkQ spark = event->spark;
+ /* ToDo: check that this size is safe -- HWL */
+#if 0
+ ToDo: check available heap
+
+ nat req_heap = sizeofW(StgTSO) + MIN_STACK_WORDS;
+ // add this? -- HWL:RtsFlags.ConcFlags.stkChunkSize;
+#endif
+
+ IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the Findwork\n"));
+
+ /* If GUM style fishing is enabled, the contents of the spark field says
+ what to steal (spark(1) or thread(2)); */
+ ASSERT(!(RtsFlags.GranFlags.Fishing && event->spark==(rtsSpark*)0));
+
+ /* Make sure that we have enough heap for creating a new
+ thread. This is a conservative estimate of the required heap.
+ This eliminates special checks for GC around NewThread within
+ ActivateSpark. */
+
+#if 0
+ ToDo: check available heap
+
+ if (Hp + req_heap > HpLim ) {
+ IF_DEBUG(gc,
+ belch("GC: Doing GC from within Findwork handling (that's bloody dangerous if you ask me)");)
+ GarbageCollect(GetRoots);
+ // ReallyPerformThreadGC(req_heap, rtsFalse); old -- HWL
+ Hp -= req_heap;
+ if (procStatus[CurrentProc]==Sparking)
+ procStatus[CurrentProc]=Idle;
+ return;
+ }
+#endif
+
+ if ( RtsFlags.GranFlags.DoAlwaysCreateThreads ||
+ RtsFlags.GranFlags.Fishing ||
+ ((procStatus[proc]==Idle || procStatus[proc]==Sparking) &&
+ (RtsFlags.GranFlags.FetchStrategy >= 2 ||
+ OutstandingFetches[proc] == 0)) )
+ {
+ rtsBool found;
+ rtsSparkQ prev, spark;
+
+ /* ToDo: check */
+ ASSERT(procStatus[proc]==Sparking ||
+ RtsFlags.GranFlags.DoAlwaysCreateThreads ||
+ RtsFlags.GranFlags.Fishing);
+
+ /* SImmoHwI' yInej! Search spark queue! */
+ /* gimme_spark (event, &found, &spark); */
+ findLocalSpark(event, &found, &spark);
+
+ if (!found) { /* pagh vumwI' */
+ /*
+ If no spark has been found this can mean 2 things:
+ 1/ The FindWork was a fish (i.e. a message sent by another PE) and
+ the spark pool of the receiver is empty
+ --> the fish has to be forwarded to another PE
+ 2/ The FindWork was local to this PE (i.e. no communication; in this
+ case creator==proc) and the spark pool of the PE is not empty
+ contains only sparks of closures that should not be sparked
+ (note: if the spark pool were empty, handleIdlePEs wouldn't have
+ generated a FindWork in the first place)
+ --> the PE has to be made idle to trigger stealing sparks the next
+ time handleIdlePEs is performed
+ */
+
+ ASSERT(pending_sparks_hds[proc]==(rtsSpark*)NULL);
+ if (creator==proc) {
+ /* local FindWork */
+ if (procStatus[proc]==Busy) {
+ belch("ghuH: PE %d in Busy state while processing local FindWork (spark pool is empty!) @ %lx",
+ proc, CurrentTime[proc]);
+ procStatus[proc] = Idle;
+ }
+ } else {
+ /* global FindWork i.e. a Fish */
+ ASSERT(RtsFlags.GranFlags.Fishing);
+ /* actually this generates another request from the originating PE */
+ ASSERT(OutstandingFishes[creator]>0);
+ OutstandingFishes[creator]--;
+ /* ToDo: assign costs for sending fish to proc not to creator */
+ stealSpark(creator); /* might steal from same PE; ToDo: fix */
+ ASSERT(RtsFlags.GranFlags.maxFishes!=1 || procStatus[creator] == Fishing);
+ /* any assertions on state of proc possible here? */
+ }
+ } else {
+ /* DaH chu' Qu' yIchen! Now create new work! */
+ IF_GRAN_DEBUG(findWork,
+ belch("+- munching spark %p; creating thread for node %p",
+ spark, spark->node));
+ activateSpark (event, spark);
+ ASSERT(spark != (rtsSpark*)NULL);
+ spark = delete_from_sparkq (spark, proc, rtsTrue);
+ }
+
+ IF_GRAN_DEBUG(findWork,
+ belch("+- Contents of spark queues at the end of FindWork @ %lx",
+ CurrentTime[proc]);
+ print_sparkq_stats());
+
+ /* ToDo: check ; not valid if GC occurs in ActivateSpark */
+ ASSERT(!found ||
+ /* forward fish or */
+ (proc!=creator ||
+ /* local spark or */
+ (proc==creator && procStatus[proc]==Starting)) ||
+ //(!found && procStatus[proc]==Idle) ||
+ RtsFlags.GranFlags.DoAlwaysCreateThreads);
+ } else {
+ IF_GRAN_DEBUG(findWork,
+ belch("+- RTS refuses to findWork on PE %d @ %lx",
+ proc, CurrentTime[proc]);
+ belch(" procStatus[%d]=%s, fetch strategy=%d, outstanding fetches[%d]=%d",
+ proc, proc_status_names[procStatus[proc]],
+ RtsFlags.GranFlags.FetchStrategy,
+ proc, OutstandingFetches[proc]));
+ }
+}
+
+//@node GranSimLight routines, Code for Fetching Nodes, GranSim functions, GranSim specific code
+//@subsection GranSimLight routines
+
+/*
+ This code is called from the central scheduler after having rgabbed a
+ new event and is only needed for GranSim-Light. It mainly adjusts the
+ ActiveTSO so that all costs that have to be assigned from within the
+ scheduler are assigned to the right TSO. The choice of ActiveTSO depends
+ on the type of event that has been found.
+*/
+
+void
+GranSimLight_enter_system(event, ActiveTSOp)
+rtsEvent *event;
+StgTSO **ActiveTSOp;
+{
+ StgTSO *ActiveTSO = *ActiveTSOp;
+
+ ASSERT (RtsFlags.GranFlags.Light);
+
+ /* Restore local clock of the virtual processor attached to CurrentTSO.
+ All costs will be associated to the `virt. proc' on which the tso
+ is living. */
+ if (ActiveTSO != NULL) { /* already in system area */
+ ActiveTSO->gran.clock = CurrentTime[CurrentProc];
+ if (RtsFlags.GranFlags.DoFairSchedule)
+ {
+ if (RtsFlags.GranFlags.GranSimStats.Full &&
+ RtsFlags.GranFlags.Debug.checkLight)
+ DumpGranEvent(GR_SYSTEM_END,ActiveTSO);
+ }
+ }
+ switch (event->evttype)
+ {
+ case ContinueThread:
+ case FindWork: /* inaccurate this way */
+ ActiveTSO = run_queue_hd;
+ break;
+ case ResumeThread:
+ case StartThread:
+ case MoveSpark: /* has tso of virt proc in tso field of event */
+ ActiveTSO = event->tso;
+ break;
+ default: barf("Illegal event type %s (%d) in GrAnSim Light setup\n",
+ event_names[event->evttype],event->evttype);
+ }
+ CurrentTime[CurrentProc] = ActiveTSO->gran.clock;
+ if (RtsFlags.GranFlags.DoFairSchedule) {
+ if (RtsFlags.GranFlags.GranSimStats.Full &&
+ RtsFlags.GranFlags.Debug.checkLight)
+ DumpGranEvent(GR_SYSTEM_START,ActiveTSO);
+ }
+}
+
+void
+GranSimLight_leave_system(event, ActiveTSOp)
+rtsEvent *event;
+StgTSO **ActiveTSOp;
+{
+ StgTSO *ActiveTSO = *ActiveTSOp;
+
+ ASSERT(RtsFlags.GranFlags.Light);
+
+ /* Save time of `virt. proc' which was active since last getevent and
+ restore time of `virt. proc' where CurrentTSO is living on. */
+ if(RtsFlags.GranFlags.DoFairSchedule) {
+ if (RtsFlags.GranFlags.GranSimStats.Full &&
+ RtsFlags.GranFlags.Debug.checkLight) // ToDo: clean up flags
+ DumpGranEvent(GR_SYSTEM_END,ActiveTSO);
+ }
+ ActiveTSO->gran.clock = CurrentTime[CurrentProc];
+ ActiveTSO = (StgTSO*)NULL;
+ CurrentTime[CurrentProc] = CurrentTSO->gran.clock;
+ if (RtsFlags.GranFlags.DoFairSchedule /* && resched */ ) {
+ // resched = rtsFalse;
+ if (RtsFlags.GranFlags.GranSimStats.Full &&
+ RtsFlags.GranFlags.Debug.checkLight)
+ DumpGranEvent(GR_SCHEDULE,run_queue_hd);
+ }
+ /*
+ if (TSO_LINK(ThreadQueueHd)!=PrelBase_Z91Z93_closure &&
+ (TimeOfNextEvent == 0 ||
+ TSO_CLOCK(TSO_LINK(ThreadQueueHd))+1000<TimeOfNextEvent)) {
+ new_event(CurrentProc,CurrentProc,TSO_CLOCK(TSO_LINK(ThreadQueueHd))+1000,
+ CONTINUETHREAD,TSO_LINK(ThreadQueueHd),PrelBase_Z91Z93_closure,NULL);
+ TimeOfNextEvent = get_time_of_next_event();
+ }
+ */
+}
+
+//@node Code for Fetching Nodes, Idle PEs, GranSimLight routines, GranSim specific code
+//@subsection Code for Fetching Nodes
+
+/*
+ The following GrAnSim routines simulate the fetching of nodes from a
+ remote processor. We use a 1 word bitmask to indicate on which processor
+ a node is lying. Thus, moving or copying a node from one processor to
+ another just requires an appropriate change in this bitmask (using
+ @SET_GA@). Additionally, the clocks have to be updated.
+
+ A special case arises when the node that is needed by processor A has
+ been moved from a processor B to a processor C between sending out a
+ @FETCH@ (from A) and its arrival at B. In that case the @FETCH@ has to
+ be forwarded to C. This is simulated by issuing another FetchNode event
+ on processor C with A as creator.
+*/
+
+/* ngoqvam che' {GrAnSim}! */
+
+/* Fetch node "node" to processor "p" */
+
+//@cindex fetchNode
+
+rtsFetchReturnCode
+fetchNode(node,from,to)
+StgClosure* node;
+PEs from, to;
+{
+ /* In case of RtsFlags.GranFlags.DoBulkFetching this fct should never be
+ entered! Instead, UnpackGraph is used in ReSchedule */
+ StgClosure* closure;
+
+ ASSERT(to==CurrentProc);
+ /* Should never be entered in GrAnSim Light setup */
+ ASSERT(!RtsFlags.GranFlags.Light);
+ /* fetchNode should never be entered with DoBulkFetching */
+ ASSERT(!RtsFlags.GranFlags.DoBulkFetching);
+
+ /* Now fetch the node */
+ if (!IS_LOCAL_TO(PROCS(node),from) &&
+ !IS_LOCAL_TO(PROCS(node),to) )
+ return NodeHasMoved;
+
+ if (closure_HNF(node)) /* node already in head normal form? */
+ node->header.gran.procs |= PE_NUMBER(to); /* Copy node */
+ else
+ node->header.gran.procs = PE_NUMBER(to); /* Move node */
+
+ return Ok;
+}
+
+/*
+ Process a fetch request.
+
+ Cost of sending a packet of size n = C + P*n
+ where C = packet construction constant,
+ P = cost of packing one word into a packet
+ [Should also account for multiple packets].
+*/
+
+//@cindex handleFetchRequest
+
+rtsFetchReturnCode
+handleFetchRequest(node,to,from,tso)
+StgClosure* node; // the node which is requested
+PEs to, from; // fetch request: from -> to
+StgTSO* tso; // the tso which needs the node
+{
+ ASSERT(!RtsFlags.GranFlags.Light);
+ /* ToDo: check assertion */
+ ASSERT(OutstandingFetches[from]>0);
+
+ /* probably wrong place; */
+ ASSERT(CurrentProc==to);
+
+ if (IS_LOCAL_TO(PROCS(node), from)) /* Somebody else moved node already => */
+ { /* start tso */
+ IF_GRAN_DEBUG(thunkStealing,
+ fprintf(stderr,"ghuH: handleFetchRequest entered with local node %p (%s) (PE %d)\n",
+ node, info_type(node), from));
+
+ if (RtsFlags.GranFlags.DoBulkFetching) {
+ nat size;
+ rtsPackBuffer *graph;
+
+ /* Create a 1-node-buffer and schedule a FETCHREPLY now */
+ graph = PackOneNode(node, tso, &size);
+ new_event(from, to, CurrentTime[to],
+ FetchReply,
+ tso, (StgClosure *)graph, (rtsSpark*)NULL);
+ } else {
+ new_event(from, to, CurrentTime[to],
+ FetchReply,
+ tso, node, (rtsSpark*)NULL);
+ }
+ IF_GRAN_DEBUG(thunkStealing,
+ belch("== majQa'! closure %p is local on PE %d already (this is a good thing)", node, from));
+ return (NodeIsLocal);
+ }
+ else if (IS_LOCAL_TO(PROCS(node), to) ) /* Is node still here? */
+ {
+ if (RtsFlags.GranFlags.DoBulkFetching) { /* {GUM}vo' ngoqvam vInIHta' */
+ nat size; /* (code from GUM) */
+ StgClosure* graph;
+
+ if (IS_BLACK_HOLE(node)) { /* block on BH or RBH */
+ new_event(from, to, CurrentTime[to],
+ GlobalBlock,
+ tso, node, (rtsSpark*)NULL);
+ /* Note: blockFetch is done when handling GLOBALBLOCK event;
+ make sure the TSO stays out of the run queue */
+ /* When this thread is reawoken it does the usual: it tries to
+ enter the updated node and issues a fetch if it's remote.
+ It has forgotten that it has sent a fetch already (i.e. a
+ FETCHNODE is swallowed by a BH, leaving the thread in a BQ) */
+ --OutstandingFetches[from];
+
+ IF_GRAN_DEBUG(thunkStealing,
+ belch("== majQa'! closure %p on PE %d is a BH (demander=PE %d); faking a FMBQ",
+ node, to, from));
+ if (RtsFlags.GranFlags.GranSimStats.Global) {
+ globalGranStats.tot_FMBQs++;
+ }
+ return (NodeIsBH);
+ }
+
+ /* The tso requesting the node is blocked and cannot be on a run queue */
+ ASSERT(!is_on_queue(tso, from));
+
+ // ToDo: check whether graph is ever used as an rtsPackBuffer!!
+ if ((graph = (StgClosure *)PackNearbyGraph(node, tso, &size, 0)) == NULL)
+ return (OutOfHeap); /* out of heap */
+
+ /* Actual moving/copying of node is done on arrival; see FETCHREPLY */
+ /* Send a reply to the originator */
+ /* ToDo: Replace that by software costs for doing graph packing! */
+ CurrentTime[to] += size * RtsFlags.GranFlags.Costs.mpacktime;
+
+ new_event(from, to,
+ CurrentTime[to]+RtsFlags.GranFlags.Costs.latency,
+ FetchReply,
+ tso, (StgClosure *)graph, (rtsSpark*)NULL);
+
+ CurrentTime[to] += RtsFlags.GranFlags.Costs.mtidytime;
+ return (Ok);
+ } else { /* incremental (single closure) fetching */
+ /* Actual moving/copying of node is done on arrival; see FETCHREPLY */
+ /* Send a reply to the originator */
+ CurrentTime[to] += RtsFlags.GranFlags.Costs.mpacktime;
+
+ new_event(from, to,
+ CurrentTime[to]+RtsFlags.GranFlags.Costs.latency,
+ FetchReply,
+ tso, node, (rtsSpark*)NULL);
+
+ CurrentTime[to] += RtsFlags.GranFlags.Costs.mtidytime;
+ return (Ok);
+ }
+ }
+ else /* Qu'vatlh! node has been grabbed by another proc => forward */
+ {
+ PEs node_loc = where_is(node);
+ rtsTime fetchtime;
+
+ IF_GRAN_DEBUG(thunkStealing,
+ belch("== Qu'vatlh! node %p has been grabbed by PE %d from PE %d (demander=%d) @ %d\n",
+ node,node_loc,to,from,CurrentTime[to]));
+ if (RtsFlags.GranFlags.GranSimStats.Global) {
+ globalGranStats.fetch_misses++;
+ }
+
+ /* Prepare FORWARD message to proc p_new */
+ CurrentTime[to] += RtsFlags.GranFlags.Costs.mpacktime;
+
+ fetchtime = stg_max(CurrentTime[to], CurrentTime[node_loc]) +
+ RtsFlags.GranFlags.Costs.latency;
+
+ new_event(node_loc, from, fetchtime,
+ FetchNode,
+ tso, node, (rtsSpark*)NULL);
+
+ CurrentTime[to] += RtsFlags.GranFlags.Costs.mtidytime;
+
+ return (NodeHasMoved);
+ }
+}
+
+/*
+ blockFetch blocks a BlockedFetch node on some kind of black hole.
+
+ Taken from gum/HLComms.lc. [find a better place for that ?] -- HWL
+
+ {\bf Note:} In GranSim we don't have @FETCHME@ nodes and therefore don't
+ create @FMBQ@'s (FetchMe blocking queues) to cope with global
+ blocking. Instead, non-local TSO are put into the BQ in the same way as
+ local TSOs. However, we have to check if a TSO is local or global in
+ order to account for the latencies involved and for keeping track of the
+ number of fetches that are really going on.
+*/
+
+//@cindex blockFetch
+
+rtsFetchReturnCode
+blockFetch(tso, proc, bh)
+StgTSO* tso; /* TSO which gets blocked */
+PEs proc; /* PE where that tso was running */
+StgClosure* bh; /* closure to block on (BH, RBH, BQ) */
+{
+ StgInfoTable *info;
+
+ IF_GRAN_DEBUG(bq,
+ fprintf(stderr,"## blockFetch: blocking TSO %p (%d)[PE %d] on node %p (%s) [PE %d]. No graph is packed!\n",
+ tso, tso->id, proc, bh, info_type(bh), where_is(bh)));
+
+ if (!IS_BLACK_HOLE(bh)) { /* catches BHs and RBHs */
+ IF_GRAN_DEBUG(bq,
+ fprintf(stderr,"## blockFetch: node %p (%s) is not a BH => awakening TSO %p (%d) [PE %u]\n",
+ bh, info_type(bh), tso, tso->id, proc));
+
+ /* No BH anymore => immediately unblock tso */
+ new_event(proc, proc, CurrentTime[proc],
+ UnblockThread,
+ tso, bh, (rtsSpark*)NULL);
+
+ /* Is this always a REPLY to a FETCH in the profile ? */
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(proc, proc, GR_REPLY, tso, bh, (StgInt)0, 0);
+ return (NodeIsNoBH);
+ }
+
+ /* DaH {BQ}Daq Qu' Suq 'e' wISov!
+ Now we know that we have to put the tso into the BQ.
+ 2 cases: If block-on-fetch, tso is at head of threadq =>
+ => take it out of threadq and into BQ
+ If reschedule-on-fetch, tso is only pointed to be event
+ => just put it into BQ
+
+ ngoq ngo'!!
+ if (!RtsFlags.GranFlags.DoAsyncFetch) {
+ GranSimBlock(tso, proc, bh);
+ } else {
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(proc, where_is(bh), GR_BLOCK, tso, bh, (StgInt)0, 0);
+ ++(tso->gran.blockcount);
+ tso->gran.blockedat = CurrentTime[proc];
+ }
+ */
+
+ /* after scheduling the GlobalBlock event the TSO is not put into the
+ run queue again; it is only pointed to via the event we are
+ processing now; in GranSim 4.xx there is no difference between
+ synchr and asynchr comm here */
+ ASSERT(!is_on_queue(tso, proc));
+ ASSERT(tso->link == END_TSO_QUEUE);
+
+ GranSimBlock(tso, proc, bh); /* GranSim statistics gathering */
+
+ /* Now, put tso into BQ (similar to blocking entry codes) */
+ info = get_itbl(bh);
+ switch (info -> type) {
+ case RBH:
+ case BLACKHOLE:
+ case CAF_BLACKHOLE: // ToDo: check whether this is a possibly ITBL here
+ case SE_BLACKHOLE: // ToDo: check whether this is a possibly ITBL here
+ case SE_CAF_BLACKHOLE:// ToDo: check whether this is a possibly ITBL here
+ /* basically an inlined version of BLACKHOLE_entry -- HWL */
+ /* Change the BLACKHOLE into a BLACKHOLE_BQ */
+ ((StgBlockingQueue *)bh)->header.info = &BLACKHOLE_BQ_info;
+ /* Put ourselves on the blocking queue for this black hole */
+ // tso->link=END_TSO_QUEUE; not necessary; see assertion above
+ ((StgBlockingQueue *)bh)->blocking_queue = (StgBlockingQueueElement *)tso;
+ tso->block_info.closure = bh;
+ recordMutable((StgMutClosure *)bh);
+ break;
+
+ case BLACKHOLE_BQ:
+ /* basically an inlined version of BLACKHOLE_BQ_entry -- HWL */
+ tso->link = (StgTSO *) (((StgBlockingQueue*)bh)->blocking_queue);
+ ((StgBlockingQueue*)bh)->blocking_queue = (StgBlockingQueueElement *)tso;
+ recordMutable((StgMutClosure *)bh);
+
+# if 0 && defined(GC_MUT_REQUIRED)
+ ToDo: check whether recordMutable is necessary -- HWL
+ /*
+ * If we modify a black hole in the old generation, we have to make
+ * sure it goes on the mutables list
+ */
+
+ if (bh <= StorageMgrInfo.OldLim) {
+ MUT_LINK(bh) = (W_) StorageMgrInfo.OldMutables;
+ StorageMgrInfo.OldMutables = bh;
+ } else
+ MUT_LINK(bh) = MUT_NOT_LINKED;
+# endif
+ break;
+
+ case FETCH_ME_BQ:
+ barf("Qagh: FMBQ closure (%p) found in GrAnSim (TSO=%p (%d))\n",
+ bh, tso, tso->id);
+
+ default:
+ {
+ G_PRINT_NODE(bh);
+ barf("Qagh: thought %p was a black hole (IP %p (%s))",
+ bh, info, info_type(bh));
+ }
+ }
+ return (Ok);
+}
+
+
+//@node Idle PEs, Routines directly called from Haskell world, Code for Fetching Nodes, GranSim specific code
+//@subsection Idle PEs
+
+/*
+ Export work to idle PEs. This function is called from @ReSchedule@
+ before dispatching on the current event. @HandleIdlePEs@ iterates over
+ all PEs, trying to get work for idle PEs. Note, that this is a
+ simplification compared to GUM's fishing model. We try to compensate for
+ that by making the cost for stealing work dependent on the number of
+ idle processors and thereby on the probability with which a randomly
+ sent fish would find work.
+*/
+
+//@cindex handleIdlePEs
+
+void
+handleIdlePEs(void)
+{
+ PEs p;
+
+ IF_DEBUG(gran, fprintf(stderr, "GRAN: handling Idle PEs\n"))
+
+ /* Should never be entered in GrAnSim Light setup */
+ ASSERT(!RtsFlags.GranFlags.Light);
+
+ /* Could check whether there are idle PEs if it's a cheap check */
+ for (p = 0; p < RtsFlags.GranFlags.proc; p++)
+ if (procStatus[p]==Idle) /* && IS_SPARKING(p) && IS_STARTING(p) */
+ /* First look for local work i.e. examine local spark pool! */
+ if (pending_sparks_hds[p]!=(rtsSpark *)NULL) {
+ new_event(p, p, CurrentTime[p],
+ FindWork,
+ (StgTSO*)NULL, (StgClosure*)NULL, (rtsSpark*)NULL);
+ procStatus[p] = Sparking;
+ } else if ((RtsFlags.GranFlags.maxFishes==0 ||
+ OutstandingFishes[p]<RtsFlags.GranFlags.maxFishes) ) {
+
+ /* If no local work then try to get remote work!
+ Qu' Hopbe' pagh tu'lu'pu'chugh Qu' Hop yISuq ! */
+ if (RtsFlags.GranFlags.DoStealThreadsFirst &&
+ (RtsFlags.GranFlags.FetchStrategy >= 4 || OutstandingFetches[p] == 0))
+ {
+ if (SurplusThreads > 0l) /* Steal a thread */
+ stealThread(p);
+
+ if (procStatus[p]!=Idle)
+ break;
+ }
+
+ if (SparksAvail > 0 &&
+ (RtsFlags.GranFlags.FetchStrategy >= 3 || OutstandingFetches[p] == 0)) /* Steal a spark */
+ stealSpark(p);
+
+ if (SurplusThreads > 0 &&
+ (RtsFlags.GranFlags.FetchStrategy >= 4 || OutstandingFetches[p] == 0)) /* Steal a thread */
+ stealThread(p);
+ }
+}
+
+/*
+ Steal a spark and schedule moving it to proc. We want to look at PEs in
+ clock order -- most retarded first. Currently sparks are only stolen
+ from the @ADVISORY_POOL@ never from the @REQUIRED_POOL@. Eventually,
+ this should be changed to first steal from the former then from the
+ latter.
+
+ We model a sort of fishing mechanism by counting the number of sparks
+ and threads we are currently stealing. */
+
+/*
+ Return a random nat value in the intervall [from, to)
+*/
+static nat
+natRandom(from, to)
+nat from, to;
+{
+ nat r, d;
+
+ ASSERT(from<=to);
+ d = to - from;
+ /* random returns a value in [0, RAND_MAX] */
+ r = (nat) ((float)from + ((float)random()*(float)d)/(float)RAND_MAX);
+ r = (r==to) ? from : r;
+ ASSERT(from<=r && (r<to || from==to));
+ return r;
+}
+
+/*
+ Find any PE other than proc. Used for GUM style fishing only.
+*/
+static PEs
+findRandomPE (proc)
+PEs proc;
+{
+ nat p;
+
+ ASSERT(RtsFlags.GranFlags.Fishing);
+ if (RtsFlags.GranFlags.RandomSteal) {
+ p = natRandom(0,RtsFlags.GranFlags.proc); /* full range of PEs */
+ } else {
+ p = 0;
+ }
+ IF_GRAN_DEBUG(randomSteal,
+ belch("^^ RANDOM_STEAL (fishing): stealing from PE %d (current proc is %d)",
+ p, proc));
+
+ return (PEs)p;
+}
+
+/*
+ Magic code for stealing sparks/threads makes use of global knowledge on
+ spark queues.
+*/
+static void
+sortPEsByTime (proc, pes_by_time, firstp, np)
+PEs proc;
+PEs *pes_by_time;
+nat *firstp, *np;
+{
+ PEs p, temp, n, i, j;
+ nat first, upb, r=0, q=0;
+
+ ASSERT(!RtsFlags.GranFlags.Fishing);
+
+#if 0
+ upb = RtsFlags.GranFlags.proc; /* full range of PEs */
+
+ if (RtsFlags.GranFlags.RandomSteal) {
+ r = natRandom(0,RtsFlags.GranFlags.proc); /* full range of PEs */
+ } else {
+ r = 0;
+ }
+#endif
+
+ /* pes_by_time shall contain processors from which we may steal sparks */
+ for(n=0, p=0; p < RtsFlags.GranFlags.proc; ++p)
+ if ((proc != p) && // not the current proc
+ (pending_sparks_hds[p] != (rtsSpark *)NULL) && // non-empty spark pool
+ (CurrentTime[p] <= CurrentTime[CurrentProc]))
+ pes_by_time[n++] = p;
+
+ /* sort pes_by_time */
+ for(i=0; i < n; ++i)
+ for(j=i+1; j < n; ++j)
+ if (CurrentTime[pes_by_time[i]] > CurrentTime[pes_by_time[j]]) {
+ rtsTime temp = pes_by_time[i];
+ pes_by_time[i] = pes_by_time[j];
+ pes_by_time[j] = temp;
+ }
+
+ /* Choose random processor to steal spark from; first look at processors */
+ /* that are earlier than the current one (i.e. proc) */
+ for(first=0;
+ (first < n) && (CurrentTime[pes_by_time[first]] <= CurrentTime[proc]);
+ ++first)
+ /* nothing */ ;
+
+ /* if the assertion below is true we can get rid of first */
+ /* ASSERT(first==n); */
+ /* ToDo: check if first is really needed; find cleaner solution */
+
+ *firstp = first;
+ *np = n;
+}
+
+/*
+ Steal a spark (piece of work) from any processor and bring it to proc.
+*/
+//@cindex stealSpark
+static rtsBool
+stealSpark(PEs proc) { stealSomething(proc, rtsTrue, rtsFalse); }
+
+/*
+ Steal a thread from any processor and bring it to proc i.e. thread migration
+*/
+//@cindex stealThread
+static rtsBool
+stealThread(PEs proc) { stealSomething(proc, rtsFalse, rtsTrue); }
+
+/*
+ Steal a spark or a thread and schedule moving it to proc.
+*/
+//@cindex stealSomething
+static rtsBool
+stealSomething(proc, steal_spark, steal_thread)
+PEs proc; // PE that needs work (stealer)
+rtsBool steal_spark, steal_thread; // should a spark and/or thread be stolen
+{
+ PEs p;
+ rtsTime fish_arrival_time;
+ rtsSpark *spark, *prev, *next;
+ rtsBool stolen = rtsFalse;
+
+ ASSERT(steal_spark || steal_thread);
+
+ /* Should never be entered in GrAnSim Light setup */
+ ASSERT(!RtsFlags.GranFlags.Light);
+ ASSERT(!steal_thread || RtsFlags.GranFlags.DoThreadMigration);
+
+ if (!RtsFlags.GranFlags.Fishing) {
+ // ToDo: check if stealing threads is prefered over stealing sparks
+ if (steal_spark) {
+ if (stealSparkMagic(proc))
+ return rtsTrue;
+ else // no spark found
+ if (steal_thread)
+ return stealThreadMagic(proc);
+ else // no thread found
+ return rtsFalse;
+ } else { // ASSERT(steal_thread);
+ return stealThreadMagic(proc);
+ }
+ barf("stealSomething: never reached");
+ }
+
+ /* The rest of this function does GUM style fishing */
+
+ p = findRandomPE(proc); /* find a random PE other than proc */
+
+ /* Message packing costs for sending a Fish; qeq jabbI'ID */
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.mpacktime;
+
+ /* use another GranEvent for requesting a thread? */
+ if (steal_spark && RtsFlags.GranFlags.GranSimStats.Sparks)
+ DumpRawGranEvent(p, proc, SP_REQUESTED,
+ (StgTSO*)NULL, (StgClosure *)NULL, (StgInt)0, 0);
+
+ /* time of the fish arrival on the remote PE */
+ fish_arrival_time = CurrentTime[proc] + RtsFlags.GranFlags.Costs.latency;
+
+ /* Phps use an own Fish event for that? */
+ /* The contents of the spark component is a HACK:
+ 1 means give me a spark;
+ 2 means give me a thread
+ 0 means give me nothing (this should never happen)
+ */
+ new_event(p, proc, fish_arrival_time,
+ FindWork,
+ (StgTSO*)NULL, (StgClosure*)NULL,
+ (steal_spark ? (rtsSpark*)1 : steal_thread ? (rtsSpark*)2 : (rtsSpark*)0));
+
+ ++OutstandingFishes[proc];
+ /* only with Async fetching? */
+ if (procStatus[proc]==Idle)
+ procStatus[proc]=Fishing;
+
+ /* time needed to clean up buffers etc after sending a message */
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.mtidytime;
+
+ /* If GUM style fishing stealing always succeeds because it only consists
+ of sending out a fish; of course, when the fish may return
+ empty-handed! */
+ return rtsTrue;
+}
+
+/*
+ This version of stealing a spark makes use of the global info on all
+ spark pools etc which is not available in a real parallel system.
+ This could be extended to test e.g. the impact of perfect load information.
+*/
+//@cindex stealSparkMagic
+static rtsBool
+stealSparkMagic(proc)
+PEs proc;
+{
+ PEs p=0, i=0, j=0, n=0, first, upb;
+ rtsSpark *spark=NULL, *next;
+ PEs pes_by_time[MAX_PROC];
+ rtsBool stolen = rtsFalse;
+ rtsTime stealtime;
+
+ /* Should never be entered in GrAnSim Light setup */
+ ASSERT(!RtsFlags.GranFlags.Light);
+
+ sortPEsByTime(proc, pes_by_time, &first, &n);
+
+ while (!stolen && n>0) {
+ upb = (first==0) ? n : first;
+ i = natRandom(0,upb); /* choose a random eligible PE */
+ p = pes_by_time[i];
+
+ IF_GRAN_DEBUG(randomSteal,
+ belch("^^ stealSparkMagic (random_steal, not fishing): stealing spark from PE %d (current proc is %d)",
+ p, proc));
+
+ ASSERT(pending_sparks_hds[p]!=(rtsSpark *)NULL); /* non-empty spark pool */
+
+ /* Now go through rtsSparkQ and steal the first eligible spark */
+
+ spark = pending_sparks_hds[p];
+ while (!stolen && spark != (rtsSpark*)NULL)
+ {
+ /* NB: no prev pointer is needed here because all sparks that are not
+ chosen are pruned
+ */
+ if ((procStatus[p]==Idle || procStatus[p]==Sparking || procStatus[p] == Fishing) &&
+ spark->next==(rtsSpark*)NULL)
+ {
+ /* Be social! Don't steal the only spark of an idle processor
+ not {spark} neH yInIH !! */
+ break; /* next PE */
+ }
+ else if (closure_SHOULD_SPARK(spark->node))
+ {
+ /* Don't Steal local sparks;
+ ToDo: optionally prefer local over global sparks
+ if (!spark->global) {
+ prev=spark;
+ continue; next spark
+ }
+ */
+ /* found a spark! */
+
+ /* Prepare message for sending spark */
+ CurrentTime[p] += RtsFlags.GranFlags.Costs.mpacktime;
+
+ if (RtsFlags.GranFlags.GranSimStats.Sparks)
+ DumpRawGranEvent(p, (PEs)0, SP_EXPORTED,
+ (StgTSO*)NULL, spark->node,
+ spark->name, spark_queue_len(p));
+
+ stealtime = (CurrentTime[p] > CurrentTime[proc] ?
+ CurrentTime[p] :
+ CurrentTime[proc])
+ + sparkStealTime();
+
+ new_event(proc, p /* CurrentProc */, stealtime,
+ MoveSpark,
+ (StgTSO*)NULL, spark->node, spark);
+
+ stolen = rtsTrue;
+ ++OutstandingFishes[proc]; /* no. of sparks currently on the fly */
+ if (procStatus[proc]==Idle)
+ procStatus[proc] = Fishing;
+ ++(spark->global); /* record that this is a global spark */
+ ASSERT(SparksAvail>0);
+ --SparksAvail; /* on-the-fly sparks are not available */
+ next = delete_from_sparkq(spark, p, rtsFalse); // don't dispose!
+ CurrentTime[p] += RtsFlags.GranFlags.Costs.mtidytime;
+ }
+ else /* !(closure_SHOULD_SPARK(SPARK_NODE(spark))) */
+ {
+ IF_GRAN_DEBUG(checkSparkQ,
+ belch("^^ pruning spark %p (node %p) in stealSparkMagic",
+ spark, spark->node));
+
+ /* if the spark points to a node that should not be sparked,
+ prune the spark queue at this point */
+ if (RtsFlags.GranFlags.GranSimStats.Sparks)
+ DumpRawGranEvent(p, (PEs)0, SP_PRUNED,
+ (StgTSO*)NULL, spark->node,
+ spark->name, spark_queue_len(p));
+ if (RtsFlags.GranFlags.GranSimStats.Global)
+ globalGranStats.pruned_sparks++;
+
+ ASSERT(SparksAvail>0);
+ --SparksAvail;
+ spark = delete_from_sparkq(spark, p, rtsTrue);
+ }
+ /* unlink spark (may have been freed!) from sparkq;
+ if (prev == NULL) // spark was head of spark queue
+ pending_sparks_hds[p] = spark->next;
+ else
+ prev->next = spark->next;
+ if (spark->next == NULL)
+ pending_sparks_tls[p] = prev;
+ else
+ next->prev = prev;
+ */
+ } /* while ... iterating over sparkq */
+
+ /* ToDo: assert that PE p still has work left after stealing the spark */
+
+ if (!stolen && (n>0)) { /* nothing stealable from proc p :( */
+ ASSERT(pes_by_time[i]==p);
+
+ /* remove p from the list (at pos i) */
+ for (j=i; j+1<n; j++)
+ pes_by_time[j] = pes_by_time[j+1];
+ n--;
+
+ /* update index to first proc which is later (or equal) than proc */
+ for ( ;
+ (first>0) &&
+ (CurrentTime[pes_by_time[first-1]]>CurrentTime[proc]);
+ first--)
+ /* nothing */ ;
+ }
+ } /* while ... iterating over PEs in pes_by_time */
+
+ IF_GRAN_DEBUG(randomSteal,
+ if (stolen)
+ belch("^^ stealSparkMagic: spark %p (node=%p) stolen by PE %d from PE %d (SparksAvail=%d; idlers=%d)",
+ spark, spark->node, proc, p,
+ SparksAvail, idlers());
+ else
+ belch("^^ stealSparkMagic: nothing stolen by PE %d (sparkq len after pruning=%d)(SparksAvail=%d; idlers=%d)",
+ proc, SparksAvail, idlers()));
+
+ if (RtsFlags.GranFlags.GranSimStats.Global &&
+ stolen && (i!=0)) { /* only for statistics */
+ globalGranStats.rs_sp_count++;
+ globalGranStats.ntimes_total += n;
+ globalGranStats.fl_total += first;
+ globalGranStats.no_of_steals++;
+ }
+
+ return stolen;
+}
+
+/*
+ The old stealThread code, which makes use of global info and does not
+ send out fishes.
+ NB: most of this is the same as in stealSparkMagic;
+ only the pieces specific to processing thread queues are different;
+ long live polymorphism!
+*/
+
+//@cindex stealThreadMagic
+static rtsBool
+stealThreadMagic(proc)
+PEs proc;
+{
+ PEs p=0, i=0, j=0, n=0, first, upb;
+ StgTSO *tso=END_TSO_QUEUE;
+ PEs pes_by_time[MAX_PROC];
+ rtsBool stolen = rtsFalse;
+ rtsTime stealtime;
+
+ /* Should never be entered in GrAnSim Light setup */
+ ASSERT(!RtsFlags.GranFlags.Light);
+
+ sortPEsByTime(proc, pes_by_time, &first, &n);
+
+ while (!stolen && n>0) {
+ upb = (first==0) ? n : first;
+ i = natRandom(0,upb); /* choose a random eligible PE */
+ p = pes_by_time[i];
+
+ IF_GRAN_DEBUG(randomSteal,
+ belch("^^ stealThreadMagic (random_steal, not fishing): stealing thread from PE %d (current proc is %d)",
+ p, proc));
+
+ /* Steal the first exportable thread in the runnable queue but
+ never steal the first in the queue for social reasons;
+ not Qu' wa'DIch yInIH !!
+ */
+ /* Would be better to search through queue and have options which of
+ the threads to pick when stealing */
+ if (run_queue_hds[p] == END_TSO_QUEUE) {
+ IF_GRAN_DEBUG(randomSteal,
+ belch("^^ stealThreadMagic: No thread to steal from PE %d (stealer=PE %d)",
+ p, proc));
+ } else {
+ tso = run_queue_hds[p]->link; /* tso is *2nd* thread in thread queue */
+ /* Found one */
+ stolen = rtsTrue;
+
+ /* update links in queue */
+ run_queue_hds[p]->link = tso->link;
+ if (run_queue_tls[p] == tso)
+ run_queue_tls[p] = run_queue_hds[p];
+
+ /* ToDo: Turn magic constants into params */
+
+ CurrentTime[p] += 5l * RtsFlags.GranFlags.Costs.mpacktime;
+
+ stealtime = (CurrentTime[p] > CurrentTime[proc] ?
+ CurrentTime[p] :
+ CurrentTime[proc])
+ + sparkStealTime()
+ + 4l * RtsFlags.GranFlags.Costs.additional_latency
+ + 5l * RtsFlags.GranFlags.Costs.munpacktime;
+
+ /* Move the thread; set bitmask to 0 while TSO is `on-the-fly' */
+ SET_GRAN_HDR(tso,Nowhere /* PE_NUMBER(proc) */);
+
+ /* Move from one queue to another */
+ new_event(proc, p, stealtime,
+ MoveThread,
+ tso, (StgClosure*)NULL, (rtsSpark*)NULL);
+
+ /* MAKE_BUSY(proc); not yet; only when thread is in threadq */
+ ++OutstandingFishes[proc];
+ if (procStatus[proc])
+ procStatus[proc] = Fishing;
+ --SurplusThreads;
+
+ if(RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(p, proc,
+ GR_STEALING,
+ tso, (StgClosure*)NULL, (StgInt)0, 0);
+
+ /* costs for tidying up buffer after having sent it */
+ CurrentTime[p] += 5l * RtsFlags.GranFlags.Costs.mtidytime;
+ }
+
+ /* ToDo: assert that PE p still has work left after stealing the spark */
+
+ if (!stolen && (n>0)) { /* nothing stealable from proc p :( */
+ ASSERT(pes_by_time[i]==p);
+
+ /* remove p from the list (at pos i) */
+ for (j=i; j+1<n; j++)
+ pes_by_time[j] = pes_by_time[j+1];
+ n--;
+
+ /* update index to first proc which is later (or equal) than proc */
+ for ( ;
+ (first>0) &&
+ (CurrentTime[pes_by_time[first-1]]>CurrentTime[proc]);
+ first--)
+ /* nothing */ ;
+ }
+ } /* while ... iterating over PEs in pes_by_time */
+
+ IF_GRAN_DEBUG(randomSteal,
+ if (stolen)
+ belch("^^ stealThreadMagic: stolen TSO %d (%p) by PE %d from PE %d (SparksAvail=%d; idlers=%d)",
+ tso->id, tso, proc, p,
+ SparksAvail, idlers());
+ else
+ belch("stealThreadMagic: nothing stolen by PE %d (SparksAvail=%d; idlers=%d)",
+ proc, SparksAvail, idlers()));
+
+ if (RtsFlags.GranFlags.GranSimStats.Global &&
+ stolen && (i!=0)) { /* only for statistics */
+ /* ToDo: more statistics on avg thread queue lenght etc */
+ globalGranStats.rs_t_count++;
+ globalGranStats.no_of_migrates++;
+ }
+
+ return stolen;
+}
+
+//@cindex sparkStealTime
+static rtsTime
+sparkStealTime(void)
+{
+ double fishdelay, sparkdelay, latencydelay;
+ fishdelay = (double)RtsFlags.GranFlags.proc/2;
+ sparkdelay = fishdelay -
+ ((fishdelay-1.0)/(double)(RtsFlags.GranFlags.proc-1))*((double)idlers());
+ latencydelay = sparkdelay*((double)RtsFlags.GranFlags.Costs.latency);
+
+ return((rtsTime)latencydelay);
+}
+
+//@node Routines directly called from Haskell world, Emiting profiling info for GrAnSim, Idle PEs, GranSim specific code
+//@subsection Routines directly called from Haskell world
+/*
+The @GranSim...@ routines in here are directly called via macros from the
+threaded world.
+
+First some auxiliary routines.
+*/
+
+/* Take the current thread off the thread queue and thereby activate the
+ next thread. It's assumed that the next ReSchedule after this uses
+ NEW_THREAD as param.
+ This fct is called from GranSimBlock and GranSimFetch
+*/
+
+//@cindex ActivateNextThread
+
+void
+ActivateNextThread (proc)
+PEs proc;
+{
+ StgTSO *t;
+ /*
+ This routine is entered either via GranSimFetch or via GranSimBlock.
+ It has to prepare the CurrentTSO for being blocked and update the
+ run queue and other statistics on PE proc. The actual enqueuing to the
+ blocking queue (if coming from GranSimBlock) is done in the entry code
+ of the BLACKHOLE and BLACKHOLE_BQ closures (see StgMiscClosures.hc).
+ */
+ /* ToDo: add assertions here!! */
+ //ASSERT(run_queue_hds[proc]!=END_TSO_QUEUE);
+
+ // Only necessary if the running thread is at front of the queue
+ // run_queue_hds[proc] = run_queue_hds[proc]->link;
+ ASSERT(CurrentProc==proc);
+ ASSERT(!is_on_queue(CurrentTSO,proc));
+ if (run_queue_hds[proc]==END_TSO_QUEUE) {
+ /* NB: this routine is only entered with asynchr comm (see assertion) */
+ procStatus[proc] = Idle;
+ } else {
+ /* ToDo: check cost assignment */
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.threadcontextswitchtime;
+ if (RtsFlags.GranFlags.GranSimStats.Full &&
+ (!RtsFlags.GranFlags.Light || RtsFlags.GranFlags.Debug.checkLight))
+ /* right flag !?? ^^^ */
+ DumpRawGranEvent(proc, 0, GR_SCHEDULE, run_queue_hds[proc],
+ (StgClosure*)NULL, (StgInt)0, 0);
+ }
+}
+
+/*
+ The following GranSim fcts are stg-called from the threaded world.
+*/
+
+/* Called from HP_CHK and friends (see StgMacros.h) */
+//@cindex GranSimAllocate
+void
+GranSimAllocate(n)
+StgInt n;
+{
+ CurrentTSO->gran.allocs += n;
+ ++(CurrentTSO->gran.basicblocks);
+
+ if (RtsFlags.GranFlags.GranSimStats.Heap) {
+ DumpRawGranEvent(CurrentProc, 0, GR_ALLOC, CurrentTSO,
+ (StgClosure*)NULL, (StgInt)0, n);
+ }
+
+ CurrentTSO->gran.exectime += RtsFlags.GranFlags.Costs.heapalloc_cost;
+ CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.heapalloc_cost;
+}
+
+/*
+ Subtract the values added above, if a heap check fails and
+ so has to be redone.
+*/
+//@cindex GranSimUnallocate
+void
+GranSimUnallocate(n)
+StgInt n;
+{
+ CurrentTSO->gran.allocs -= n;
+ --(CurrentTSO->gran.basicblocks);
+
+ CurrentTSO->gran.exectime -= RtsFlags.GranFlags.Costs.heapalloc_cost;
+ CurrentTime[CurrentProc] -= RtsFlags.GranFlags.Costs.heapalloc_cost;
+}
+
+/* NB: We now inline this code via GRAN_EXEC rather than calling this fct */
+//@cindex GranSimExec
+void
+GranSimExec(ariths,branches,loads,stores,floats)
+StgWord ariths,branches,loads,stores,floats;
+{
+ StgWord cost = RtsFlags.GranFlags.Costs.arith_cost*ariths +
+ RtsFlags.GranFlags.Costs.branch_cost*branches +
+ RtsFlags.GranFlags.Costs.load_cost * loads +
+ RtsFlags.GranFlags.Costs.store_cost*stores +
+ RtsFlags.GranFlags.Costs.float_cost*floats;
+
+ CurrentTSO->gran.exectime += cost;
+ CurrentTime[CurrentProc] += cost;
+}
+
+/*
+ Fetch the node if it isn't local
+ -- result indicates whether fetch has been done.
+
+ This is GRIP-style single item fetching.
+*/
+
+//@cindex GranSimFetch
+StgInt
+GranSimFetch(node /* , liveness_mask */ )
+StgClosure *node;
+/* StgInt liveness_mask; */
+{
+ /* reset the return value (to be checked within STG land) */
+ NeedToReSchedule = rtsFalse;
+
+ if (RtsFlags.GranFlags.Light) {
+ /* Always reschedule in GrAnSim-Light to prevent one TSO from
+ running off too far
+ new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
+ ContinueThread,CurrentTSO,node,NULL);
+ */
+ return(0);
+ }
+
+ /* Faking an RBH closure:
+ If the bitmask of the closure is 0 then this node is a fake RBH;
+ */
+ if (node->header.gran.procs == Nowhere) {
+ IF_GRAN_DEBUG(bq,
+ belch("## Found fake RBH (node %p); delaying TSO %d (%p)",
+ node, CurrentTSO->id, CurrentTSO));
+
+ new_event(CurrentProc, CurrentProc, CurrentTime[CurrentProc]+10000,
+ ContinueThread, CurrentTSO, node, (rtsSpark*)NULL);
+
+ /* Rescheduling (GranSim internal) is necessary */
+ NeedToReSchedule = rtsTrue;
+
+ return(1);
+ }
+
+ /* Note: once a node has been fetched, this test will be passed */
+ if (!IS_LOCAL_TO(PROCS(node),CurrentProc))
+ {
+ PEs p = where_is(node);
+ rtsTime fetchtime;
+
+ IF_GRAN_DEBUG(thunkStealing,
+ if (p==CurrentProc)
+ belch("GranSimFetch: Trying to fetch from own processor%u\n", p););
+
+ CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.mpacktime;
+ /* NB: Fetch is counted on arrival (FetchReply) */
+
+ fetchtime = stg_max(CurrentTime[CurrentProc],CurrentTime[p]) +
+ RtsFlags.GranFlags.Costs.latency;
+
+ new_event(p, CurrentProc, fetchtime,
+ FetchNode, CurrentTSO, node, (rtsSpark*)NULL);
+
+ if (fetchtime<TimeOfNextEvent)
+ TimeOfNextEvent = fetchtime;
+
+ /* About to block */
+ CurrentTSO->gran.blockedat = CurrentTime[CurrentProc];
+
+ ++OutstandingFetches[CurrentProc];
+
+ if (RtsFlags.GranFlags.DoAsyncFetch)
+ /* if asynchr comm is turned on, activate the next thread in the q */
+ ActivateNextThread(CurrentProc);
+ else
+ procStatus[CurrentProc] = Fetching;
+
+#if 0
+ /* ToDo: nuke the entire if (anything special for fair schedule?) */
+ if (RtsFlags.GranFlags.DoAsyncFetch)
+ {
+ /* Remove CurrentTSO from the queue -- assumes head of queue == CurrentTSO */
+ if(!RtsFlags.GranFlags.DoFairSchedule)
+ {
+ /* now done in do_the_fetchnode
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(CurrentProc, p, GR_FETCH, CurrentTSO,
+ node, (StgInt)0, 0);
+ */
+ ActivateNextThread(CurrentProc);
+
+# if 0 && defined(GRAN_CHECK)
+ if (RtsFlags.GranFlags.Debug.blockOnFetch_sanity) {
+ if (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) {
+ fprintf(stderr,"FetchNode: TSO 0x%x has fetch-mask set @ %d\n",
+ CurrentTSO,CurrentTime[CurrentProc]);
+ stg_exit(EXIT_FAILURE);
+ } else {
+ TSO_TYPE(CurrentTSO) |= FETCH_MASK_TSO;
+ }
+ }
+# endif
+ CurrentTSO->link = END_TSO_QUEUE;
+ /* CurrentTSO = END_TSO_QUEUE; */
+
+ /* CurrentTSO is pointed to by the FetchNode event; it is
+ on no run queue any more */
+ } else { /* fair scheduling currently not supported -- HWL */
+ barf("Asynchr communication is not yet compatible with fair scheduling\n");
+ }
+ } else { /* !RtsFlags.GranFlags.DoAsyncFetch */
+ procStatus[CurrentProc] = Fetching; // ToDo: BlockedOnFetch;
+ /* now done in do_the_fetchnode
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(CurrentProc, p,
+ GR_FETCH, CurrentTSO, node, (StgInt)0, 0);
+ */
+ IF_GRAN_DEBUG(blockOnFetch,
+ BlockedOnFetch[CurrentProc] = CurrentTSO;); /*- rtsTrue; -*/
+ }
+#endif /* 0 */
+
+ CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.mtidytime;
+
+ /* Rescheduling (GranSim internal) is necessary */
+ NeedToReSchedule = rtsTrue;
+
+ return(1);
+ }
+ return(0);
+}
+
+//@cindex GranSimSpark
+void
+GranSimSpark(local,node)
+StgInt local;
+StgClosure *node;
+{
+ /* ++SparksAvail; Nope; do that in add_to_spark_queue */
+ if (RtsFlags.GranFlags.GranSimStats.Sparks)
+ DumpRawGranEvent(CurrentProc, (PEs)0, SP_SPARK,
+ END_TSO_QUEUE, node, (StgInt)0, spark_queue_len(CurrentProc)-1);
+
+ /* Force the PE to take notice of the spark */
+ if(RtsFlags.GranFlags.DoAlwaysCreateThreads) {
+ new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
+ FindWork,
+ END_TSO_QUEUE, (StgClosure*)NULL, (rtsSpark*)NULL);
+ if (CurrentTime[CurrentProc]<TimeOfNextEvent)
+ TimeOfNextEvent = CurrentTime[CurrentProc];
+ }
+
+ if(local)
+ ++CurrentTSO->gran.localsparks;
+ else
+ ++CurrentTSO->gran.globalsparks;
+}
+
+//@cindex GranSimSparkAt
+void
+GranSimSparkAt(spark,where,identifier)
+rtsSpark *spark;
+StgClosure *where; /* This should be a node; alternatively could be a GA */
+StgInt identifier;
+{
+ PEs p = where_is(where);
+ GranSimSparkAtAbs(spark,p,identifier);
+}
+
+//@cindex GranSimSparkAtAbs
+void
+GranSimSparkAtAbs(spark,proc,identifier)
+rtsSpark *spark;
+PEs proc;
+StgInt identifier;
+{
+ rtsTime exporttime;
+
+ if (spark == (rtsSpark *)NULL) /* Note: Granularity control might have */
+ return; /* turned a spark into a NULL. */
+
+ /* ++SparksAvail; Nope; do that in add_to_spark_queue */
+ if(RtsFlags.GranFlags.GranSimStats.Sparks)
+ DumpRawGranEvent(proc,0,SP_SPARKAT,
+ END_TSO_QUEUE, spark->node, (StgInt)0, spark_queue_len(proc));
+
+ if (proc!=CurrentProc) {
+ CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.mpacktime;
+ exporttime = (CurrentTime[proc] > CurrentTime[CurrentProc]?
+ CurrentTime[proc]: CurrentTime[CurrentProc])
+ + RtsFlags.GranFlags.Costs.latency;
+ } else {
+ exporttime = CurrentTime[CurrentProc];
+ }
+
+ if ( RtsFlags.GranFlags.Light )
+ /* Need CurrentTSO in event field to associate costs with creating
+ spark even in a GrAnSim Light setup */
+ new_event(proc, CurrentProc, exporttime,
+ MoveSpark,
+ CurrentTSO, spark->node, spark);
+ else
+ new_event(proc, CurrentProc, exporttime,
+ MoveSpark, (StgTSO*)NULL, spark->node, spark);
+ /* Bit of a hack to treat placed sparks the same as stolen sparks */
+ ++OutstandingFishes[proc];
+
+ /* Force the PE to take notice of the spark (FINDWORK is put after a
+ MoveSpark into the sparkq!) */
+ if (RtsFlags.GranFlags.DoAlwaysCreateThreads) {
+ new_event(CurrentProc,CurrentProc,exporttime+1,
+ FindWork,
+ (StgTSO*)NULL, (StgClosure*)NULL, (rtsSpark*)NULL);
+ }
+
+ if (exporttime<TimeOfNextEvent)
+ TimeOfNextEvent = exporttime;
+
+ if (proc!=CurrentProc) {
+ CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.mtidytime;
+ ++CurrentTSO->gran.globalsparks;
+ } else {
+ ++CurrentTSO->gran.localsparks;
+ }
+}
+
+/*
+ This function handles local and global blocking. It's called either
+ from threaded code (RBH_entry, BH_entry etc) or from blockFetch when
+ trying to fetch an BH or RBH
+*/
+
+//@cindex GranSimBlock
+void
+GranSimBlock(tso, proc, node)
+StgTSO *tso;
+PEs proc;
+StgClosure *node;
+{
+ PEs node_proc = where_is(node),
+ tso_proc = where_is((StgClosure *)tso);
+
+ ASSERT(tso_proc==CurrentProc);
+ // ASSERT(node_proc==CurrentProc);
+ IF_GRAN_DEBUG(bq,
+ if (node_proc!=CurrentProc)
+ belch("## ghuH: TSO %d (%lx) [PE %d] blocks on non-local node %p [PE %d] (no simulation of FETCHMEs)",
+ tso->id, tso, tso_proc, node, node_proc));
+ ASSERT(tso->link==END_TSO_QUEUE);
+ ASSERT(!is_on_queue(tso,proc)); // tso must not be on run queue already!
+ //ASSERT(tso==run_queue_hds[proc]);
+
+ IF_DEBUG(gran,
+ belch("GRAN: TSO %d (%p) [PE %d] blocks on closure %p @ %lx",
+ tso->id, tso, proc, node, CurrentTime[proc]));
+
+
+ /* THIS SHOULD NEVER HAPPEN!
+ If tso tries to block on a remote node (i.e. node_proc!=CurrentProc)
+ we have missed a GranSimFetch before entering this closure;
+ we hack around it for now, faking a FetchNode;
+ because GranSimBlock is entered via a BLACKHOLE(_BQ) closure,
+ tso will be blocked on this closure until the FetchReply occurs.
+
+ ngoq Dogh!
+
+ if (node_proc!=CurrentProc) {
+ StgInt ret;
+ ret = GranSimFetch(node);
+ IF_GRAN_DEBUG(bq,
+ if (ret)
+ belch(".. GranSimBlock: faking a FetchNode of node %p from %d to %d",
+ node, node_proc, CurrentProc););
+ return;
+ }
+ */
+
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(proc,node_proc,GR_BLOCK,tso,node,(StgInt)0,0);
+
+ ++(tso->gran.blockcount);
+ /* Distinction between local and global block is made in blockFetch */
+ tso->gran.blockedat = CurrentTime[proc];
+
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.threadqueuetime;
+ ActivateNextThread(proc);
+ /* tso->link = END_TSO_QUEUE; not really necessary; only for testing */
+}
+
+#endif /* GRAN */
+
+//@node Index, , Dumping routines, GranSim specific code
+//@subsection Index
+
+//@index
+//* ActivateNextThread:: @cindex\s-+ActivateNextThread
+//* CurrentProc:: @cindex\s-+CurrentProc
+//* CurrentTime:: @cindex\s-+CurrentTime
+//* GranSimAllocate:: @cindex\s-+GranSimAllocate
+//* GranSimBlock:: @cindex\s-+GranSimBlock
+//* GranSimExec:: @cindex\s-+GranSimExec
+//* GranSimFetch:: @cindex\s-+GranSimFetch
+//* GranSimLight_insertThread:: @cindex\s-+GranSimLight_insertThread
+//* GranSimSpark:: @cindex\s-+GranSimSpark
+//* GranSimSparkAt:: @cindex\s-+GranSimSparkAt
+//* GranSimSparkAtAbs:: @cindex\s-+GranSimSparkAtAbs
+//* GranSimUnallocate:: @cindex\s-+GranSimUnallocate
+//* any_idle:: @cindex\s-+any_idle
+//* blockFetch:: @cindex\s-+blockFetch
+//* do_the_fetchnode:: @cindex\s-+do_the_fetchnode
+//* do_the_fetchreply:: @cindex\s-+do_the_fetchreply
+//* do_the_findwork:: @cindex\s-+do_the_findwork
+//* do_the_globalblock:: @cindex\s-+do_the_globalblock
+//* do_the_movespark:: @cindex\s-+do_the_movespark
+//* do_the_movethread:: @cindex\s-+do_the_movethread
+//* do_the_startthread:: @cindex\s-+do_the_startthread
+//* do_the_unblock:: @cindex\s-+do_the_unblock
+//* fetchNode:: @cindex\s-+fetchNode
+//* ga_to_proc:: @cindex\s-+ga_to_proc
+//* get_next_event:: @cindex\s-+get_next_event
+//* get_time_of_next_event:: @cindex\s-+get_time_of_next_event
+//* grab_event:: @cindex\s-+grab_event
+//* handleFetchRequest:: @cindex\s-+handleFetchRequest
+//* handleIdlePEs:: @cindex\s-+handleIdlePEs
+//* idlers:: @cindex\s-+idlers
+//* insertThread:: @cindex\s-+insertThread
+//* insert_event:: @cindex\s-+insert_event
+//* is_on_queue:: @cindex\s-+is_on_queue
+//* is_unique:: @cindex\s-+is_unique
+//* new_event:: @cindex\s-+new_event
+//* prepend_event:: @cindex\s-+prepend_event
+//* print_event:: @cindex\s-+print_event
+//* print_eventq:: @cindex\s-+print_eventq
+//* prune_eventq :: @cindex\s-+prune_eventq
+//* spark queue:: @cindex\s-+spark queue
+//* sparkStealTime:: @cindex\s-+sparkStealTime
+//* stealSomething:: @cindex\s-+stealSomething
+//* stealSpark:: @cindex\s-+stealSpark
+//* stealSparkMagic:: @cindex\s-+stealSparkMagic
+//* stealThread:: @cindex\s-+stealThread
+//* stealThreadMagic:: @cindex\s-+stealThreadMagic
+//* thread_queue_len:: @cindex\s-+thread_queue_len
+//* traverse_eventq_for_gc:: @cindex\s-+traverse_eventq_for_gc
+//* where_is:: @cindex\s-+where_is
+//@end index
diff --git a/rts/parallel/GranSimRts.h b/rts/parallel/GranSimRts.h
new file mode 100644
index 0000000000..fc31a1f0a6
--- /dev/null
+++ b/rts/parallel/GranSimRts.h
@@ -0,0 +1,268 @@
+/* --------------------------------------------------------------------------
+ Time-stamp: <Tue Mar 06 2001 00:18:30 Stardate: [-30]6285.06 hwloidl>
+
+ Variables and functions specific to GranSim.
+ ----------------------------------------------------------------------- */
+
+#ifndef GRANSIM_RTS_H
+#define GRANSIM_RTS_H
+
+//@node Headers for GranSim objs used only in the RTS internally, , ,
+//@section Headers for GranSim objs used only in the RTS internally
+
+//@menu
+//* Event queue::
+//* Spark handling routines::
+//* Processor related stuff::
+//* Local types::
+//* Statistics gathering::
+//* Prototypes::
+//@end menu
+//*/ fool highlight
+
+//@node Event queue, Spark handling routines, Headers for GranSim objs used only in the RTS internally, Headers for GranSim objs used only in the RTS internally
+//@subsection Event queue
+
+#if defined(GRAN) || defined(PAR)
+/* Granularity event types for output (see DumpGranEvent) */
+typedef enum GranEventType_ {
+ GR_START = 0, GR_STARTQ,
+ GR_STEALING, GR_STOLEN, GR_STOLENQ,
+ GR_FETCH, GR_REPLY, GR_BLOCK, GR_RESUME, GR_RESUMEQ,
+ GR_SCHEDULE, GR_DESCHEDULE,
+ GR_END,
+ SP_SPARK, SP_SPARKAT, SP_USED, SP_PRUNED, SP_EXPORTED, SP_ACQUIRED, SP_REQUESTED,
+ GR_ALLOC,
+ GR_TERMINATE,
+ GR_SYSTEM_START, GR_SYSTEM_END, /* only for debugging */
+ GR_EVENT_MAX
+} GranEventType;
+
+extern char *gran_event_names[];
+#endif
+
+#if defined(GRAN) /* whole file */
+
+/* Event Types (internal use only) */
+typedef enum rtsEventType_ {
+ ContinueThread = 0, /* Continue running the first thread in the queue */
+ StartThread, /* Start a newly created thread */
+ ResumeThread, /* Resume a previously running thread */
+ MoveSpark, /* Move a spark from one PE to another */
+ MoveThread, /* Move a thread from one PE to another */
+ FindWork, /* Search for work */
+ FetchNode, /* Fetch a node */
+ FetchReply, /* Receive a node */
+ GlobalBlock, /* Block a TSO on a remote node */
+ UnblockThread /* Make a TSO runnable */
+} rtsEventType;
+
+/* Number of last event type */
+#define MAX_EVENT 9
+
+typedef struct rtsEvent_ {
+ PEs proc; /* Processor id */
+ PEs creator; /* Processor id of PE that created the event */
+ rtsEventType evttype; /* rtsEvent type */
+ rtsTime time; /* Time at which event happened */
+ StgTSO *tso; /* Associated TSO, if relevant */
+ StgClosure *node; /* Associated node, if relevant */
+ rtsSpark *spark; /* Associated SPARK, if relevant */
+ StgInt gc_info; /* Counter of heap objects to mark (used in GC only)*/
+ struct rtsEvent_ *next;
+ } rtsEvent;
+
+typedef rtsEvent *rtsEventQ;
+
+extern rtsEventQ EventHd;
+
+/* Interface for ADT of Event Queue */
+rtsEvent *get_next_event(void);
+rtsTime get_time_of_next_event(void);
+void insert_event(rtsEvent *newentry);
+void new_event(PEs proc, PEs creator, rtsTime time,
+ rtsEventType evttype, StgTSO *tso,
+ StgClosure *node, rtsSpark *spark);
+void print_event(rtsEvent *event);
+void print_eventq(rtsEvent *hd);
+void prepend_event(rtsEvent *event);
+rtsEventQ grab_event(void);
+void prune_eventq(StgTSO *tso, StgClosure *node);
+
+void traverse_eventq_for_gc(void);
+void markEventQueue(void);
+
+//@node Spark handling routines, Processor related stuff, Event queue, Headers for GranSim objs used only in the RTS internally
+//@subsection Spark handling routines
+
+/* These functions are only used in the RTS internally; see GranSim.h for rest */
+void disposeSpark(rtsSpark *spark);
+void disposeSparkQ(rtsSparkQ spark);
+void print_spark(rtsSpark *spark);
+void print_sparkq(PEs proc);
+void print_sparkq_stats(void);
+nat spark_queue_len(PEs proc);
+rtsSpark *delete_from_sparkq (rtsSpark *spark, PEs p, rtsBool dispose_too);
+void markSparkQueue(void);
+
+//@node Processor related stuff, Local types, Spark handling routines, Headers for GranSim objs used only in the RTS internally
+//@subsection Processor related stuff
+
+typedef enum rtsProcStatus_ {
+ Idle = 0, /* empty threadq */
+ Sparking, /* non-empty sparkq; FINDWORK has been issued */
+ Starting, /* STARTTHREAD has been issue */
+ Fetching, /* waiting for remote data (only if block-on-fetch) */
+ Fishing, /* waiting for remote spark/thread */
+ Busy /* non-empty threadq, with head of queue active */
+} rtsProcStatus;
+
+/*
+#define IS_IDLE(proc) (procStatus[proc] == Idle)
+#define IS_SPARKING(proc) (procStatus[proc] == Sparking)
+#define IS_STARTING(proc) (procStatus[proc] == Starting)
+#define IS_FETCHING(proc) (procStatus[proc] == Fetching)
+#define IS_FISHING(proc) (procStatus[proc] == Fishing)
+#define IS_BUSY(proc) (procStatus[proc] == Busy)
+#define ANY_IDLE (any_idle())
+#define MAKE_IDLE(proc) procStatus[proc] = Idle
+#define MAKE_SPARKING(proc) procStatus[proc] = Sparking
+#define MAKE_STARTING(proc) procStatus[proc] = Starting
+#define MAKE_FETCHING(proc) procStatus[proc] = Fetching
+#define MAKE_FISHING(proc) procStatus[proc] = Fishing
+#define MAKE_BUSY(proc) procStatus[proc] = Busy
+*/
+
+//@node Local types, Statistics gathering, Processor related stuff, Headers for GranSim objs used only in the RTS internally
+//@subsection Local types
+
+/* Return codes of HandleFetchRequest:
+ 0 ... ok (FETCHREPLY event with a buffer containing addresses of the
+ nearby graph has been scheduled)
+ 1 ... node is already local (fetched by somebody else; no event is
+ scheduled in here)
+ 2 ... fetch request has been forwrded to the PE that now contains the
+ node
+ 3 ... node is a black hole (BH, BQ or RBH); no event is scheduled, and
+ the current TSO is put into the blocking queue of that node
+ 4 ... out of heap in PackNearbyGraph; GC should be triggered in calling
+ function to guarantee that the tso and node inputs are valid
+ (they may be moved during GC).
+ Return codes of blockFetch:
+ 0 ... ok; tso is now at beginning of BQ attached to the bh closure
+ 1 ... the bh closure is no BH any more; tso is immediately unblocked
+*/
+
+typedef enum rtsFetchReturnCode_ {
+ Ok = 0,
+ NodeIsLocal,
+ NodeHasMoved,
+ NodeIsBH,
+ NodeIsNoBH,
+ OutOfHeap,
+} rtsFetchReturnCode;
+
+//@node Statistics gathering, Prototypes, Local types, Headers for GranSim objs used only in the RTS internally
+//@subsection Statistics gathering
+
+extern unsigned int /* nat */ OutstandingFetches[], OutstandingFishes[];
+extern rtsProcStatus procStatus[];
+extern StgTSO *BlockedOnFetch[];
+
+/* global structure for collecting statistics */
+typedef struct GlobalGranStats_ {
+ /* event stats */
+ nat noOfEvents;
+ nat event_counts[MAX_EVENT];
+
+ /* communication stats */
+ nat fetch_misses;
+ nat tot_fake_fetches; // GranSim internal; faked Fetches are a kludge!!
+ nat tot_low_pri_sparks;
+
+ /* load distribution statistics */
+ nat rs_sp_count, rs_t_count, ntimes_total, fl_total,
+ no_of_steals, no_of_migrates;
+
+ /* spark queue stats */
+ nat tot_sq_len, tot_sq_probes, tot_sparks;
+ nat tot_add_threads, tot_tq_len, non_end_add_threads;
+
+ /* packet statistics */
+ nat tot_packets, tot_packet_size, tot_cuts, tot_thunks;
+
+ /* thread stats */
+ nat tot_threads_created, threads_created_on_PE[MAX_PROC],
+ tot_TSOs_migrated;
+
+ /* spark stats */
+ nat pruned_sparks, withered_sparks;
+ nat tot_sparks_created, sparks_created_on_PE[MAX_PROC];
+
+ /* scheduling stats */
+ nat tot_yields, tot_stackover, tot_heapover;
+
+ /* blocking queue statistics */
+ rtsTime tot_bq_processing_time;
+ nat tot_bq_len, tot_bq_len_local, tot_awbq, tot_FMBQs;
+} GlobalGranStats;
+
+extern GlobalGranStats globalGranStats;
+
+//@node Prototypes, , Statistics gathering, Headers for GranSim objs used only in the RTS internally
+//@subsection Prototypes
+
+/* Generally useful fcts */
+PEs where_is(StgClosure *node);
+rtsBool is_unique(StgClosure *node);
+
+/* Prototypes of event handling functions; needed in Schedule.c:ReSchedule() */
+void do_the_globalblock (rtsEvent* event);
+void do_the_unblock (rtsEvent* event);
+void do_the_fetchnode (rtsEvent* event);
+void do_the_fetchreply (rtsEvent* event);
+void do_the_movethread (rtsEvent* event);
+void do_the_movespark (rtsEvent* event);
+void do_the_startthread(rtsEvent *event);
+void do_the_findwork(rtsEvent* event);
+void gimme_spark (rtsEvent *event, rtsBool *found_res, rtsSparkQ *spark_res);
+rtsBool munch_spark (rtsEvent *event, rtsSparkQ spark);
+
+/* GranSimLight routines */
+void GranSimLight_enter_system(rtsEvent *event, StgTSO **ActiveTSOp);
+void GranSimLight_leave_system(rtsEvent *event, StgTSO **ActiveTSOp);
+
+/* Communication related routines */
+rtsFetchReturnCode fetchNode(StgClosure* node, PEs from, PEs to);
+rtsFetchReturnCode handleFetchRequest(StgClosure* node, PEs curr_proc, PEs p, StgTSO* tso);
+void handleIdlePEs(void);
+
+long int random(void); /* used in stealSpark() and stealThread() in GranSim.c */
+
+/* Scheduling fcts defined in GranSim.c */
+void insertThread(StgTSO *tso, PEs proc);
+void endThread(StgTSO *tso, PEs proc);
+rtsBool GranSimLight_insertThread(StgTSO *tso, PEs proc);
+nat thread_queue_len(PEs proc);
+
+/* For debugging */
+rtsBool is_on_queue (StgTSO *tso, PEs proc);
+#endif
+
+#if defined(GRAN) || defined(PAR)
+/*
+ Interface for dumping routines (i.e. writing to log file).
+ These routines are shared with GUM (and could also be used for SMP).
+*/
+void DumpGranEvent(GranEventType name, StgTSO *tso);
+void DumpEndEvent(PEs proc, StgTSO *tso, rtsBool mandatory_thread);
+void DumpTSO(StgTSO *tso);
+void DumpRawGranEvent(PEs proc, PEs p, GranEventType name,
+ StgTSO *tso, StgClosure *node,
+ StgInt sparkname, StgInt len);
+void DumpVeryRawGranEvent(rtsTime time, PEs proc, PEs p, GranEventType name,
+ StgTSO *tso, StgClosure *node,
+ StgInt sparkname, StgInt len);
+#endif
+
+#endif /* GRANSIM_RTS_H */
diff --git a/rts/parallel/HLC.h b/rts/parallel/HLC.h
new file mode 100644
index 0000000000..793ac840f9
--- /dev/null
+++ b/rts/parallel/HLC.h
@@ -0,0 +1,63 @@
+/* --------------------------------------------------------------------------
+ Time-stamp: <Sun Mar 18 2001 20:16:14 Stardate: [-30]6349.22 hwloidl>
+
+ High Level Communications Header (HLC.h)
+
+ Contains the high-level definitions (i.e. communication
+ subsystem independent) used by GUM
+ Phil Trinder, Glasgow University, 12 December 1994
+ H-W. Loidl, Heriot-Watt, November 1999
+ ----------------------------------------------------------------------- */
+
+#ifndef __HLC_H
+#define __HLC_H
+
+#ifdef PAR
+
+#include "LLC.h"
+
+#define NEW_FISH_AGE 0
+#define NEW_FISH_HISTORY 0
+#define NEW_FISH_HUNGER 0
+#define FISH_LIFE_EXPECTANCY 10
+
+
+//@node GUM Message Sending and Unpacking Functions
+//@subsection GUM Message Sending and Unpacking Functions
+
+rtsBool initMoreBuffers(void);
+
+void sendFetch (globalAddr *ga, globalAddr *bqga, int load);
+void sendResume(globalAddr *rga, int nelem, rtsPackBuffer *packBuffer);
+void sendAck (GlobalTaskId task, int ngas, globalAddr *gagamap);
+void sendFish (GlobalTaskId destPE, GlobalTaskId origPE, int age, int history, int hunger);
+void sendFree (GlobalTaskId destPE, int nelem, P_ data);
+void sendSchedule(GlobalTaskId origPE, int nelem, rtsPackBuffer *packBuffer);
+void sendReval(GlobalTaskId origPE, int nelem, rtsPackBuffer *data);
+
+//@node Message-Processing Functions
+//@subsection Message-Processing Functions
+
+rtsBool processMessages(void);
+void processFetches(void);
+void processTheRealFetches(void);
+
+//@node Miscellaneous Functions
+//@subsection Miscellaneous Functions
+
+void prepareFreeMsgBuffers(void);
+void freeRemoteGA (int pe, globalAddr *ga);
+void sendFreeMessages(void);
+
+GlobalTaskId choosePE(void);
+StgClosure *createBlockedFetch (globalAddr ga, globalAddr rga);
+void waitForTermination(void);
+
+/* Message bouncing (startup and shutdown, mainly) */
+void bounceFish(void);
+void bounceReval(void);
+
+void DebugPrintGAGAMap (globalAddr *gagamap, int nGAs);
+
+#endif /* PAR */
+#endif /* __HLC_H */
diff --git a/rts/parallel/HLComms.c b/rts/parallel/HLComms.c
new file mode 100644
index 0000000000..b0982e441c
--- /dev/null
+++ b/rts/parallel/HLComms.c
@@ -0,0 +1,1810 @@
+/* ----------------------------------------------------------------------------
+ * Time-stamp: <Wed Mar 21 2001 16:34:41 Stardate: [-30]6363.45 hwloidl>
+ *
+ * High Level Communications Routines (HLComms.lc)
+ *
+ * Contains the high-level routines (i.e. communication
+ * subsystem independent) used by GUM
+ *
+ * GUM 0.2x: Phil Trinder, Glasgow University, 12 December 1994
+ * GUM 3.xx: Phil Trinder, Simon Marlow July 1998
+ * GUM 4.xx: H-W. Loidl, Heriot-Watt University, November 1999 -
+ *
+ * ------------------------------------------------------------------------- */
+
+#ifdef PAR /* whole file */
+
+//@node High Level Communications Routines, , ,
+//@section High Level Communications Routines
+
+//@menu
+//* Macros etc::
+//* Includes::
+//* GUM Message Sending and Unpacking Functions::
+//* Message-Processing Functions::
+//* GUM Message Processor::
+//* Miscellaneous Functions::
+//* Index::
+//@end menu
+
+//@node Macros etc, Includes, High Level Communications Routines, High Level Communications Routines
+//@subsection Macros etc
+
+/* Evidently not Posix */
+/* #include "PosixSource.h" */
+
+//@node Includes, GUM Message Sending and Unpacking Functions, Macros etc, High Level Communications Routines
+//@subsection Includes
+
+#include "Rts.h"
+#include "RtsUtils.h"
+#include "RtsFlags.h"
+#include "Storage.h" // for recordMutable
+#include "HLC.h"
+#include "Parallel.h"
+#include "GranSimRts.h"
+#include "ParallelRts.h"
+#include "Sparks.h"
+#include "FetchMe.h" // for BLOCKED_FETCH_info etc
+#if defined(DEBUG)
+# include "ParallelDebug.h"
+#endif
+#include "StgMacros.h" // inlined IS_... fcts
+
+#ifdef DIST
+#include "SchedAPI.h" //for createIOThread
+extern unsigned int context_switch;
+#endif /* DIST */
+
+//@node GUM Message Sending and Unpacking Functions, Message-Processing Functions, Includes, High Level Communications Routines
+//@subsection GUM Message Sending and Unpacking Functions
+
+/*
+ * GUM Message Sending and Unpacking Functions
+ */
+
+/*
+ * Allocate space for message processing
+ */
+
+//@cindex gumPackBuffer
+static rtsPackBuffer *gumPackBuffer;
+
+//@cindex initMoreBuffers
+rtsBool
+initMoreBuffers(void)
+{
+ if ((gumPackBuffer = (rtsPackBuffer *)stgMallocWords(RtsFlags.ParFlags.packBufferSize,
+ "initMoreBuffers")) == NULL)
+ return rtsFalse;
+ return rtsTrue;
+}
+
+/*
+ * SendFetch packs the two global addresses and a load into a message +
+ * sends it.
+
+//@cindex FETCH
+
+ Structure of a FETCH message:
+
+ | GA 1 | GA 2 |
+ +------------------------------------+------+
+ | gtid | slot | weight | gtid | slot | load |
+ +------------------------------------+------+
+ */
+
+//@cindex sendFetch
+void
+sendFetch(globalAddr *rga, globalAddr *lga, int load)
+{
+ ASSERT(rga->weight > 0 && lga->weight > 0);
+ IF_PAR_DEBUG(fetch,
+ belch("~^** Sending Fetch for ((%x, %d, 0)); locally ((%x, %d, %x)), load = %d",
+ rga->payload.gc.gtid, rga->payload.gc.slot,
+ lga->payload.gc.gtid, lga->payload.gc.slot, lga->weight,
+ load));
+
+
+ /* ToDo: Dump event
+ DumpRawGranEvent(CURRENT_PROC, taskIDtoPE(rga->payload.gc.gtid),
+ GR_FETCH, CurrentTSO, (StgClosure *)(lga->payload.gc.slot),
+ 0, spark_queue_len(ADVISORY_POOL));
+ */
+
+ sendOpV(PP_FETCH, rga->payload.gc.gtid, 6,
+ (StgWord) rga->payload.gc.gtid, (StgWord) rga->payload.gc.slot,
+ (StgWord) lga->weight, (StgWord) lga->payload.gc.gtid,
+ (StgWord) lga->payload.gc.slot, (StgWord) load);
+}
+
+/*
+ * unpackFetch unpacks a FETCH message into two Global addresses and a load
+ * figure.
+*/
+
+//@cindex unpackFetch
+static void
+unpackFetch(globalAddr *lga, globalAddr *rga, int *load)
+{
+ long buf[6];
+
+ GetArgs(buf, 6);
+
+ IF_PAR_DEBUG(fetch,
+ belch("~^** Unpacking Fetch for ((%x, %d, 0)) to ((%x, %d, %x)), load = %d",
+ (GlobalTaskId) buf[0], (int) buf[1],
+ (GlobalTaskId) buf[3], (int) buf[4], buf[2], buf[5]));
+
+ lga->weight = 1;
+ lga->payload.gc.gtid = (GlobalTaskId) buf[0];
+ lga->payload.gc.slot = (int) buf[1];
+
+ rga->weight = (unsigned) buf[2];
+ rga->payload.gc.gtid = (GlobalTaskId) buf[3];
+ rga->payload.gc.slot = (int) buf[4];
+
+ *load = (int) buf[5];
+
+ ASSERT(rga->weight > 0);
+}
+
+/*
+ * SendResume packs the remote blocking queue's GA and data into a message
+ * and sends it.
+
+//@cindex RESUME
+
+ Structure of a RESUME message:
+
+ -------------------------------
+ | weight | slot | n | data ...
+ -------------------------------
+
+ data is a packed graph represented as an rtsPackBuffer
+ n is the size of the graph (as returned by PackNearbyGraph) + packet hdr size
+ */
+
+//@cindex sendResume
+void
+sendResume(globalAddr *rga, int nelem, rtsPackBuffer *packBuffer)
+{
+ IF_PAR_DEBUG(fetch,
+ belch("~^[] Sending Resume (packet <<%d>> with %d elems) for ((%x, %d, %x)) to [%x]",
+ packBuffer->id, nelem,
+ rga->payload.gc.gtid, rga->payload.gc.slot, rga->weight,
+ rga->payload.gc.gtid));
+ IF_PAR_DEBUG(packet,
+ PrintPacket(packBuffer));
+
+ ASSERT(nelem==packBuffer->size);
+ /* check for magic end-of-buffer word */
+ IF_DEBUG(sanity, ASSERT(*(packBuffer->buffer+nelem) == END_OF_BUFFER_MARKER));
+
+ sendOpNV(PP_RESUME, rga->payload.gc.gtid,
+ nelem + PACK_BUFFER_HDR_SIZE + DEBUG_HEADROOM, (StgPtr)packBuffer,
+ 2, (rtsWeight) rga->weight, (StgWord) rga->payload.gc.slot);
+}
+
+/*
+ * unpackResume unpacks a Resume message into two Global addresses and
+ * a data array.
+ */
+
+//@cindex unpackResume
+static void
+unpackResume(globalAddr *lga, int *nelem, rtsPackBuffer *packBuffer)
+{
+ long buf[3];
+
+ GetArgs(buf, 3);
+
+ /*
+ RESUME event is written in awaken_blocked_queue
+ DumpRawGranEvent(CURRENT_PROC, taskIDtoPE(lga->payload.gc.gtid),
+ GR_RESUME, END_TSO_QUEUE, (StgClosure *)NULL, 0, 0);
+ */
+
+ lga->weight = (unsigned) buf[0];
+ lga->payload.gc.gtid = mytid;
+ lga->payload.gc.slot = (int) buf[1];
+
+ *nelem = (int) buf[2] - PACK_BUFFER_HDR_SIZE - DEBUG_HEADROOM;
+ GetArgs(packBuffer, *nelem + PACK_BUFFER_HDR_SIZE + DEBUG_HEADROOM);
+
+ IF_PAR_DEBUG(fetch,
+ belch("~^[] Unpacking Resume (packet <<%d>> with %d elems) for ((%x, %d, %x))",
+ packBuffer->id, *nelem, mytid, (int) buf[1], (unsigned) buf[0]));
+
+ /* check for magic end-of-buffer word */
+ IF_DEBUG(sanity, ASSERT(*(packBuffer->buffer+*nelem) == END_OF_BUFFER_MARKER));
+}
+
+/*
+ * SendAck packs the global address being acknowledged, together with
+ * an array of global addresses for any closures shipped and sends them.
+
+//@cindex ACK
+
+ Structure of an ACK message:
+
+ | GA 1 | GA 2 |
+ +---------------------------------------------+-------
+ | weight | gtid | slot | weight | gtid | slot | ..... ngas times
+ + --------------------------------------------+-------
+
+ */
+
+//@cindex sendAck
+void
+sendAck(GlobalTaskId task, int ngas, globalAddr *gagamap)
+{
+ static long *buffer;
+ long *p;
+ int i;
+
+ if(ngas==0)
+ return; //don't send unnecessary messages!!
+
+ buffer = (long *) gumPackBuffer;
+
+ for(i = 0, p = buffer; i < ngas; i++, p += 6) {
+ ASSERT(gagamap[1].weight > 0);
+ p[0] = (long) gagamap->weight;
+ p[1] = (long) gagamap->payload.gc.gtid;
+ p[2] = (long) gagamap->payload.gc.slot;
+ gagamap++;
+ p[3] = (long) gagamap->weight;
+ p[4] = (long) gagamap->payload.gc.gtid;
+ p[5] = (long) gagamap->payload.gc.slot;
+ gagamap++;
+ }
+ IF_PAR_DEBUG(schedule,
+ belch("~^,, Sending Ack (%d pairs) to [%x]\n",
+ ngas, task));
+
+ sendOpN(PP_ACK, task, p - buffer, (StgPtr)buffer);
+}
+
+/*
+ * unpackAck unpacks an Acknowledgement message into a Global address,
+ * a count of the number of global addresses following and a map of
+ * Global addresses
+ */
+
+//@cindex unpackAck
+static void
+unpackAck(int *ngas, globalAddr *gagamap)
+{
+ long GAarraysize;
+ long buf[6];
+
+ GetArgs(&GAarraysize, 1);
+
+ *ngas = GAarraysize / 6;
+
+ IF_PAR_DEBUG(schedule,
+ belch("~^,, Unpacking Ack (%d pairs) on [%x]\n",
+ *ngas, mytid));
+
+ while (GAarraysize > 0) {
+ GetArgs(buf, 6);
+ gagamap->weight = (rtsWeight) buf[0];
+ gagamap->payload.gc.gtid = (GlobalTaskId) buf[1];
+ gagamap->payload.gc.slot = (int) buf[2];
+ gagamap++;
+ gagamap->weight = (rtsWeight) buf[3];
+ gagamap->payload.gc.gtid = (GlobalTaskId) buf[4];
+ gagamap->payload.gc.slot = (int) buf[5];
+ ASSERT(gagamap->weight > 0);
+ gagamap++;
+ GAarraysize -= 6;
+ }
+}
+
+/*
+ * SendFish packs the global address being acknowledged, together with
+ * an array of global addresses for any closures shipped and sends them.
+
+//@cindex FISH
+
+ Structure of a FISH message:
+
+ +----------------------------------+
+ | orig PE | age | history | hunger |
+ +----------------------------------+
+ */
+
+//@cindex sendFish
+void
+sendFish(GlobalTaskId destPE, GlobalTaskId origPE,
+ int age, int history, int hunger)
+{
+ IF_PAR_DEBUG(fish,
+ belch("~^$$ Sending Fish to [%x] (%d outstanding fishes)",
+ destPE, outstandingFishes));
+
+ sendOpV(PP_FISH, destPE, 4,
+ (StgWord) origPE, (StgWord) age, (StgWord) history, (StgWord) hunger);
+
+ if (origPE == mytid) {
+ //fishing = rtsTrue;
+ outstandingFishes++;
+ }
+}
+
+/*
+ * unpackFish unpacks a FISH message into the global task id of the
+ * originating PE and 3 data fields: the age, history and hunger of the
+ * fish. The history + hunger are not currently used.
+
+ */
+
+//@cindex unpackFish
+static void
+unpackFish(GlobalTaskId *origPE, int *age, int *history, int *hunger)
+{
+ long buf[4];
+
+ GetArgs(buf, 4);
+
+ IF_PAR_DEBUG(fish,
+ belch("~^$$ Unpacking Fish from [%x] (age=%d)",
+ (GlobalTaskId) buf[0], (int) buf[1]));
+
+ *origPE = (GlobalTaskId) buf[0];
+ *age = (int) buf[1];
+ *history = (int) buf[2];
+ *hunger = (int) buf[3];
+}
+
+/*
+ * SendFree sends (weight, slot) pairs for GAs that we no longer need
+ * references to.
+
+//@cindex FREE
+
+ Structure of a FREE message:
+
+ +-----------------------------
+ | n | weight_1 | slot_1 | ...
+ +-----------------------------
+ */
+//@cindex sendFree
+void
+sendFree(GlobalTaskId pe, int nelem, StgPtr data)
+{
+ IF_PAR_DEBUG(free,
+ belch("~^!! Sending Free (%d GAs) to [%x]",
+ nelem/2, pe));
+
+ sendOpN(PP_FREE, pe, nelem, data);
+}
+
+/*
+ * unpackFree unpacks a FREE message into the amount of data shipped and
+ * a data block.
+ */
+//@cindex unpackFree
+static void
+unpackFree(int *nelem, StgWord *data)
+{
+ long buf[1];
+
+ GetArgs(buf, 1);
+ *nelem = (int) buf[0];
+
+ IF_PAR_DEBUG(free,
+ belch("~^!! Unpacking Free (%d GAs)",
+ *nelem/2));
+
+ GetArgs(data, *nelem);
+}
+
+/*
+ * SendSchedule sends a closure to be evaluated in response to a Fish
+ * message. The message is directed to the PE that originated the Fish
+ * (origPE), and includes the packed closure (data) along with its size
+ * (nelem).
+
+//@cindex SCHEDULE
+
+ Structure of a SCHEDULE message:
+
+ +------------------------------------
+ | PE | n | pack buffer of a graph ...
+ +------------------------------------
+ */
+//@cindex sendSchedule
+void
+sendSchedule(GlobalTaskId origPE, int nelem, rtsPackBuffer *packBuffer)
+{
+ IF_PAR_DEBUG(schedule,
+ belch("~^-- Sending Schedule (packet <<%d>> with %d elems) to [%x]\n",
+ packBuffer->id, nelem, origPE));
+ IF_PAR_DEBUG(packet,
+ PrintPacket(packBuffer));
+
+ ASSERT(nelem==packBuffer->size);
+ /* check for magic end-of-buffer word */
+ IF_DEBUG(sanity, ASSERT(*(packBuffer->buffer+nelem) == END_OF_BUFFER_MARKER));
+
+ sendOpN(PP_SCHEDULE, origPE,
+ nelem + PACK_BUFFER_HDR_SIZE + DEBUG_HEADROOM, (StgPtr)packBuffer);
+}
+
+/*
+ * unpackSchedule unpacks a SCHEDULE message into the Global address of
+ * the closure shipped, the amount of data shipped (nelem) and the data
+ * block (data).
+ */
+
+//@cindex unpackSchedule
+static void
+unpackSchedule(int *nelem, rtsPackBuffer *packBuffer)
+{
+ long buf[1];
+
+ /* first, just unpack 1 word containing the total size (including header) */
+ GetArgs(buf, 1);
+ /* no. of elems, not counting the header of the pack buffer */
+ *nelem = (int) buf[0] - PACK_BUFFER_HDR_SIZE - DEBUG_HEADROOM;
+
+ /* automatic cast of flat pvm-data to rtsPackBuffer */
+ GetArgs(packBuffer, *nelem + PACK_BUFFER_HDR_SIZE + DEBUG_HEADROOM);
+
+ IF_PAR_DEBUG(schedule,
+ belch("~^-- Unpacking Schedule (packet <<%d>> with %d elems) on [%x]\n",
+ packBuffer->id, *nelem, mytid));
+
+ ASSERT(*nelem==packBuffer->size);
+ /* check for magic end-of-buffer word */
+ IF_DEBUG(sanity, ASSERT(*(packBuffer->buffer+*nelem) == END_OF_BUFFER_MARKER));
+}
+
+#ifdef DIST
+/* sendReval is almost identical to the Schedule version, so we can unpack with unpackSchedule */
+void
+sendReval(GlobalTaskId origPE, int nelem, rtsPackBuffer *packBuffer)
+{
+ IF_PAR_DEBUG(schedule,
+ belch("~^-- Sending Reval (packet <<%d>> with %d elems) to [%x]\n",
+ packBuffer->id, nelem, origPE));
+ IF_PAR_DEBUG(packet,
+ PrintPacket(packBuffer));
+
+ ASSERT(nelem==packBuffer->size);
+ /* check for magic end-of-buffer word */
+ IF_DEBUG(sanity, ASSERT(*(packBuffer->buffer+nelem) == END_OF_BUFFER_MARKER));
+
+ sendOpN(PP_REVAL, origPE,
+ nelem + PACK_BUFFER_HDR_SIZE + DEBUG_HEADROOM, (StgPtr)packBuffer);
+}
+
+void FinishReval(StgTSO *t)
+{ StgClosure *res;
+ globalAddr ga;
+ nat size;
+ rtsPackBuffer *buffer=NULL;
+
+ ga.payload.gc.slot = t->revalSlot;
+ ga.payload.gc.gtid = t->revalTid;
+ ga.weight = 0;
+
+ //find where the reval result is
+ res = GALAlookup(&ga);
+ ASSERT(res);
+
+ IF_PAR_DEBUG(schedule,
+ printGA(&ga);
+ belch(" needs the result %08x\n",res));
+
+ //send off the result
+ buffer = PackNearbyGraph(res, END_TSO_QUEUE, &size,ga.payload.gc.gtid);
+ ASSERT(buffer != (rtsPackBuffer *)NULL);
+ sendResume(&ga, size, buffer);
+
+ IF_PAR_DEBUG(schedule,
+ belch("@;~) Reval Finished"));
+}
+
+#endif /* DIST */
+
+//@node Message-Processing Functions, GUM Message Processor, GUM Message Sending and Unpacking Functions, High Level Communications Routines
+//@subsection Message-Processing Functions
+
+/*
+ * Message-Processing Functions
+ *
+ * The following routines process incoming GUM messages. Often reissuing
+ * messages in response.
+ *
+ * processFish unpacks a fish message, reissuing it if it's our own,
+ * sending work if we have it or sending it onwards otherwise.
+ */
+
+/*
+ * processFetches constructs and sends resume messages for every
+ * BlockedFetch which is ready to be awakened.
+ * awaken_blocked_queue (in Schedule.c) is responsible for moving
+ * BlockedFetches from a blocking queue to the PendingFetches queue.
+ */
+void GetRoots(void);
+extern StgBlockedFetch *PendingFetches;
+
+nat
+pending_fetches_len(void)
+{
+ StgBlockedFetch *bf;
+ nat n;
+
+ for (n=0, bf=PendingFetches; bf != END_BF_QUEUE; n++, bf = (StgBlockedFetch *)(bf->link)) {
+ ASSERT(get_itbl(bf)->type==BLOCKED_FETCH);
+ }
+ return n;
+}
+
+//@cindex processFetches
+void
+processFetches(void) {
+ StgBlockedFetch *bf, *next;
+ StgClosure *closure;
+ StgInfoTable *ip;
+ globalAddr rga;
+ static rtsPackBuffer *packBuffer;
+
+ IF_PAR_DEBUG(verbose,
+ belch("____ processFetches: %d pending fetches (root @ %p)",
+ pending_fetches_len(), PendingFetches));
+
+ for (bf = PendingFetches;
+ bf != END_BF_QUEUE;
+ bf=next) {
+ /* the PendingFetches list contains only BLOCKED_FETCH closures */
+ ASSERT(get_itbl(bf)->type==BLOCKED_FETCH);
+ /* store link (we might overwrite it via blockFetch later on */
+ next = (StgBlockedFetch *)(bf->link);
+
+ /*
+ * Find the target at the end of the indirection chain, and
+ * process it in much the same fashion as the original target
+ * of the fetch. Though we hope to find graph here, we could
+ * find a black hole (of any flavor) or even a FetchMe.
+ */
+ closure = bf->node;
+ /*
+ We evacuate BQs and update the node fields where necessary in GC.c
+ So, if we find an EVACUATED closure, something has gone Very Wrong
+ (and therefore we let the RTS crash most ungracefully).
+ */
+ ASSERT(get_itbl(closure)->type != EVACUATED);
+ // closure = ((StgEvacuated *)closure)->evacuee;
+
+ closure = UNWIND_IND(closure);
+ //while ((ind = IS_INDIRECTION(closure)) != NULL) { closure = ind; }
+
+ ip = get_itbl(closure);
+ if (ip->type == FETCH_ME) {
+ /* Forward the Fetch to someone else */
+ rga.payload.gc.gtid = bf->ga.payload.gc.gtid;
+ rga.payload.gc.slot = bf->ga.payload.gc.slot;
+ rga.weight = bf->ga.weight;
+
+ sendFetch(((StgFetchMe *)closure)->ga, &rga, 0 /* load */);
+
+ // Global statistics: count no. of fetches
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.tot_fetch_mess++;
+ }
+
+ IF_PAR_DEBUG(fetch,
+ belch("__-> processFetches: Forwarding fetch from %lx to %lx",
+ mytid, rga.payload.gc.gtid));
+
+ } else if (IS_BLACK_HOLE(closure)) {
+ IF_PAR_DEBUG(verbose,
+ belch("__++ processFetches: trying to send a BLACK_HOLE => doing a blockFetch on closure %p (%s)",
+ closure, info_type(closure)));
+ bf->node = closure;
+ blockFetch(bf, closure);
+ } else {
+ /* We now have some local graph to send back */
+ nat size;
+
+ packBuffer = gumPackBuffer;
+ IF_PAR_DEBUG(verbose,
+ belch("__*> processFetches: PackNearbyGraph of closure %p (%s)",
+ closure, info_type(closure)));
+
+ if ((packBuffer = PackNearbyGraph(closure, END_TSO_QUEUE, &size, bf->ga.payload.gc.gtid)) == NULL) {
+ // Put current BF back on list
+ bf->link = (StgBlockingQueueElement *)PendingFetches;
+ PendingFetches = (StgBlockedFetch *)bf;
+ // ToDo: check that nothing more has to be done to prepare for GC!
+ barf("processFetches: out of heap while packing graph; ToDo: call GC here");
+ GarbageCollect(GetRoots, rtsFalse);
+ bf = PendingFetches;
+ PendingFetches = (StgBlockedFetch *)(bf->link);
+ closure = bf->node;
+ packBuffer = PackNearbyGraph(closure, END_TSO_QUEUE, &size, bf->ga.payload.gc.gtid);
+ ASSERT(packBuffer != (rtsPackBuffer *)NULL);
+ }
+ rga.payload.gc.gtid = bf->ga.payload.gc.gtid;
+ rga.payload.gc.slot = bf->ga.payload.gc.slot;
+ rga.weight = bf->ga.weight;
+
+ sendResume(&rga, size, packBuffer);
+
+ // Global statistics: count no. of fetches
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.tot_resume_mess++;
+ }
+ }
+ }
+ PendingFetches = END_BF_QUEUE;
+}
+
+#if 0
+/*
+ Alternatively to sending fetch messages directly from the FETCH_ME_entry
+ code we could just store the data about the remote data in a global
+ variable and send the fetch request from the main scheduling loop (similar
+ to processFetches above). This would save an expensive STGCALL in the entry
+ code because we have to go back to the scheduler anyway.
+*/
+//@cindex processFetches
+void
+processTheRealFetches(void) {
+ StgBlockedFetch *bf;
+ StgClosure *closure, *next;
+
+ IF_PAR_DEBUG(verbose,
+ belch("__ processTheRealFetches: ");
+ printGA(&theGlobalFromGA);
+ printGA(&theGlobalToGA));
+
+ ASSERT(theGlobalFromGA.payload.gc.gtid != 0 &&
+ theGlobalToGA.payload.gc.gtid != 0);
+
+ /* the old version did this in the FETCH_ME entry code */
+ sendFetch(&theGlobalFromGA, &theGlobalToGA, 0/*load*/);
+
+}
+#endif
+
+
+/*
+ Way of dealing with unwanted fish.
+ Used during startup/shutdown, or from unknown PEs
+*/
+void
+bounceFish(void) {
+ GlobalTaskId origPE;
+ int age, history, hunger;
+
+ /* IF_PAR_DEBUG(verbose, */
+ belch(".... [%x] Bouncing unwanted FISH",mytid);
+
+ unpackFish(&origPE, &age, &history, &hunger);
+
+ if (origPE == mytid) {
+ //fishing = rtsFalse; // fish has come home
+ outstandingFishes--;
+ last_fish_arrived_at = CURRENT_TIME; // remember time (see schedule fct)
+ return; // that's all
+ }
+
+ /* otherwise, send it home to die */
+ sendFish(origPE, origPE, (age + 1), NEW_FISH_HISTORY, NEW_FISH_HUNGER);
+ // Global statistics: count no. of fetches
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.tot_fish_mess++;
+ }
+}
+
+/*
+ * processFish unpacks a fish message, reissuing it if it's our own,
+ * sending work if we have it or sending it onwards otherwise.
+ */
+//@cindex processFish
+static void
+processFish(void)
+{
+ GlobalTaskId origPE;
+ int age, history, hunger;
+ rtsSpark spark;
+ static rtsPackBuffer *packBuffer;
+
+ unpackFish(&origPE, &age, &history, &hunger);
+
+ if (origPE == mytid) {
+ //fishing = rtsFalse; // fish has come home
+ outstandingFishes--;
+ last_fish_arrived_at = CURRENT_TIME; // remember time (see schedule fct)
+ return; // that's all
+ }
+
+ ASSERT(origPE != mytid);
+ IF_PAR_DEBUG(fish,
+ belch("$$__ processing fish; %d sparks available",
+ spark_queue_len(&(MainRegTable.rSparks))));
+ while ((spark = findSpark(rtsTrue/*for_export*/)) != NULL) {
+ nat size;
+ // StgClosure *graph;
+
+ packBuffer = gumPackBuffer;
+ ASSERT(closure_SHOULD_SPARK((StgClosure *)spark));
+ if ((packBuffer = PackNearbyGraph(spark, END_TSO_QUEUE, &size,origPE)) == NULL) {
+ IF_PAR_DEBUG(fish,
+ belch("$$ GC while trying to satisfy FISH via PackNearbyGraph of node %p",
+ (StgClosure *)spark));
+ barf("processFish: out of heap while packing graph; ToDo: call GC here");
+ GarbageCollect(GetRoots, rtsFalse);
+ /* Now go back and try again */
+ } else {
+ IF_PAR_DEBUG(verbose,
+ if (RtsFlags.ParFlags.ParStats.Sparks)
+ belch("==== STEALING spark %x; sending to %x", spark, origPE));
+
+ IF_PAR_DEBUG(fish,
+ belch("$$-- Replying to FISH from %x by sending graph @ %p (%s)",
+ origPE,
+ (StgClosure *)spark, info_type((StgClosure *)spark)));
+ sendSchedule(origPE, size, packBuffer);
+ disposeSpark(spark);
+ // Global statistics: count no. of fetches
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.tot_schedule_mess++;
+ }
+
+ break;
+ }
+ }
+ if (spark == (rtsSpark)NULL) {
+ IF_PAR_DEBUG(fish,
+ belch("$$^^ No sparks available for FISH from %x",
+ origPE));
+ /* We have no sparks to give */
+ if (age < FISH_LIFE_EXPECTANCY) {
+ /* and the fish is atill young, send it to another PE to look for work */
+ sendFish(choosePE(), origPE,
+ (age + 1), NEW_FISH_HISTORY, NEW_FISH_HUNGER);
+
+ // Global statistics: count no. of fetches
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.tot_fish_mess++;
+ }
+ } else { /* otherwise, send it home to die */
+ sendFish(origPE, origPE, (age + 1), NEW_FISH_HISTORY, NEW_FISH_HUNGER);
+ // Global statistics: count no. of fetches
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.tot_fish_mess++;
+ }
+ }
+ }
+} /* processFish */
+
+/*
+ * processFetch either returns the requested data (if available)
+ * or blocks the remote blocking queue on a black hole (if not).
+ */
+
+//@cindex processFetch
+static void
+processFetch(void)
+{
+ globalAddr ga, rga;
+ int load;
+ StgClosure *closure;
+ StgInfoTable *ip;
+
+ unpackFetch(&ga, &rga, &load);
+ IF_PAR_DEBUG(fetch,
+ belch("%%%%__ Rcvd Fetch for ((%x, %d, 0)), Resume ((%x, %d, %x)) (load %d) from %x",
+ ga.payload.gc.gtid, ga.payload.gc.slot,
+ rga.payload.gc.gtid, rga.payload.gc.slot, rga.weight, load,
+ rga.payload.gc.gtid));
+
+ closure = GALAlookup(&ga);
+ ASSERT(closure != (StgClosure *)NULL);
+ ip = get_itbl(closure);
+ if (ip->type == FETCH_ME) {
+ /* Forward the Fetch to someone else */
+ sendFetch(((StgFetchMe *)closure)->ga, &rga, load);
+
+ // Global statistics: count no. of fetches
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.tot_fetch_mess++;
+ }
+ } else if (rga.payload.gc.gtid == mytid) {
+ /* Our own FETCH forwarded back around to us */
+ StgFetchMeBlockingQueue *fmbq = (StgFetchMeBlockingQueue *)GALAlookup(&rga);
+
+ IF_PAR_DEBUG(fetch,
+ belch("%%%%== Fetch returned to sending PE; closure=%p (%s); receiver=%p (%s)",
+ closure, info_type(closure), fmbq, info_type((StgClosure*)fmbq)));
+ /* We may have already discovered that the fetch target is our own. */
+ if ((StgClosure *)fmbq != closure)
+ CommonUp((StgClosure *)fmbq, closure);
+ (void) addWeight(&rga);
+ } else if (IS_BLACK_HOLE(closure)) {
+ /* This includes RBH's and FMBQ's */
+ StgBlockedFetch *bf;
+
+ /* Can we assert something on the remote GA? */
+ ASSERT(GALAlookup(&rga) == NULL);
+
+ /* If we're hitting a BH or RBH or FMBQ we have to put a BLOCKED_FETCH
+ closure into the BQ in order to denote that when updating this node
+ the result should be sent to the originator of this fetch message. */
+ bf = (StgBlockedFetch *)createBlockedFetch(ga, rga);
+ IF_PAR_DEBUG(fetch,
+ belch("%%++ Blocking Fetch ((%x, %d, %x)) on %p (%s)",
+ rga.payload.gc.gtid, rga.payload.gc.slot, rga.weight,
+ closure, info_type(closure)));
+ blockFetch(bf, closure);
+ } else {
+ /* The target of the FetchMe is some local graph */
+ nat size;
+ // StgClosure *graph;
+ rtsPackBuffer *buffer = (rtsPackBuffer *)NULL;
+
+ if ((buffer = PackNearbyGraph(closure, END_TSO_QUEUE, &size, rga.payload.gc.gtid)) == NULL) {
+ barf("processFetch: out of heap while packing graph; ToDo: call GC here");
+ GarbageCollect(GetRoots, rtsFalse);
+ closure = GALAlookup(&ga);
+ buffer = PackNearbyGraph(closure, END_TSO_QUEUE, &size, rga.payload.gc.gtid);
+ ASSERT(buffer != (rtsPackBuffer *)NULL);
+ }
+ sendResume(&rga, size, buffer);
+
+ // Global statistics: count no. of fetches
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.tot_resume_mess++;
+ }
+ }
+}
+
+/*
+ The list of pending fetches must be a root-list for GC.
+ This routine is called from GC.c (same as marking GAs etc).
+*/
+void
+markPendingFetches(rtsBool major_gc) {
+
+ /* No need to traverse the list; this is done via the scavenge code
+ for a BLOCKED_FETCH closure, which evacuates the link field */
+
+ if (PendingFetches != END_BF_QUEUE ) {
+ IF_PAR_DEBUG(tables,
+ fprintf(stderr, "@@@@ PendingFetches is root; evaced from %p to",
+ PendingFetches));
+
+ PendingFetches = MarkRoot((StgClosure*)PendingFetches);
+
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr, " %p\n", PendingFetches));
+
+ } else {
+ IF_PAR_DEBUG(tables,
+ fprintf(stderr, "@@@@ PendingFetches is empty; no need to mark it\n"));
+ }
+}
+
+/*
+ * processFree unpacks a FREE message and adds the weights to our GAs.
+ */
+//@cindex processFree
+static void
+processFree(void)
+{
+ int nelem;
+ static StgWord *buffer;
+ int i;
+ globalAddr ga;
+
+ buffer = (StgWord *)gumPackBuffer;
+ unpackFree(&nelem, buffer);
+ IF_PAR_DEBUG(free,
+ belch("!!__ Rcvd Free (%d GAs)", nelem / 2));
+
+ ga.payload.gc.gtid = mytid;
+ for (i = 0; i < nelem;) {
+ ga.weight = (rtsWeight) buffer[i++];
+ ga.payload.gc.slot = (int) buffer[i++];
+ IF_PAR_DEBUG(free,
+ fprintf(stderr, "!!-- Processing free ");
+ printGA(&ga);
+ fputc('\n', stderr);
+ );
+ (void) addWeight(&ga);
+ }
+}
+
+/*
+ * processResume unpacks a RESUME message into the graph, filling in
+ * the LA -> GA, and GA -> LA tables. Threads blocked on the original
+ * FetchMe (now a blocking queue) are awakened, and the blocking queue
+ * is converted into an indirection. Finally it sends an ACK in response
+ * which contains any newly allocated GAs.
+ */
+
+//@cindex processResume
+static void
+processResume(GlobalTaskId sender)
+{
+ int nelem;
+ nat nGAs;
+ static rtsPackBuffer *packBuffer;
+ StgClosure *newGraph, *old;
+ globalAddr lga;
+ globalAddr *gagamap;
+
+ packBuffer = (rtsPackBuffer *)gumPackBuffer;
+ unpackResume(&lga, &nelem, packBuffer);
+
+ IF_PAR_DEBUG(fetch,
+ fprintf(stderr, "[]__ Rcvd Resume for ");
+ printGA(&lga);
+ fputc('\n', stderr));
+ IF_PAR_DEBUG(packet,
+ PrintPacket((rtsPackBuffer *)packBuffer));
+
+ /*
+ * We always unpack the incoming graph, even if we've received the
+ * requested node in some other data packet (and already awakened
+ * the blocking queue).
+ if (SAVE_Hp + packBuffer[0] >= SAVE_HpLim) {
+ ReallyPerformThreadGC(packBuffer[0], rtsFalse);
+ SAVE_Hp -= packBuffer[0];
+ }
+ */
+
+ // ToDo: Check for GC here !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+ /* Do this *after* GC; we don't want to release the object early! */
+
+ if (lga.weight > 0)
+ (void) addWeight(&lga);
+
+ old = GALAlookup(&lga);
+
+ /* ToDo: The closure that requested this graph must be one of these two?*/
+ ASSERT(get_itbl(old)->type == FETCH_ME_BQ ||
+ get_itbl(old)->type == RBH);
+
+ if (RtsFlags.ParFlags.ParStats.Full) {
+ StgBlockingQueueElement *bqe, *last_bqe;
+
+ IF_PAR_DEBUG(fetch,
+ belch("[]-- Resume is REPLY to closure %lx", old));
+
+ /* Write REPLY events to the log file, indicating that the remote
+ data has arrived
+ NB: we emit a REPLY only for the *last* elem in the queue; this is
+ the one that triggered the fetch message; all other entries
+ have just added themselves to the queue, waiting for the data
+ they know that has been requested (see entry code for FETCH_ME_BQ)
+ */
+ if ((get_itbl(old)->type == FETCH_ME_BQ ||
+ get_itbl(old)->type == RBH)) {
+ for (bqe = ((StgFetchMeBlockingQueue *)old)->blocking_queue,
+ last_bqe = END_BQ_QUEUE;
+ get_itbl(bqe)->type==TSO ||
+ get_itbl(bqe)->type==BLOCKED_FETCH;
+ last_bqe = bqe, bqe = bqe->link) { /* nothing */ }
+
+ ASSERT(last_bqe==END_BQ_QUEUE ||
+ get_itbl((StgClosure *)last_bqe)->type == TSO);
+
+ /* last_bqe now points to the TSO that triggered the FETCH */
+ if (get_itbl((StgClosure *)last_bqe)->type == TSO)
+ DumpRawGranEvent(CURRENT_PROC, taskIDtoPE(sender),
+ GR_REPLY, ((StgTSO *)last_bqe), ((StgTSO *)last_bqe)->block_info.closure,
+ 0, spark_queue_len(&(MainRegTable.rSparks)));
+ }
+ }
+
+ newGraph = UnpackGraph(packBuffer, &gagamap, &nGAs);
+ ASSERT(newGraph != NULL);
+
+ /*
+ * Sometimes, unpacking will common up the resumee with the
+ * incoming graph, but if it hasn't, we'd better do so now.
+ */
+
+ if (get_itbl(old)->type == FETCH_ME_BQ)
+ CommonUp(old, newGraph);
+
+ IF_PAR_DEBUG(fetch,
+ belch("[]-- Ready to resume unpacked graph at %p (%s)",
+ newGraph, info_type(newGraph)));
+
+ IF_PAR_DEBUG(tables,
+ DebugPrintGAGAMap(gagamap, nGAs));
+
+ sendAck(sender, nGAs, gagamap);
+}
+
+/*
+ * processSchedule unpacks a SCHEDULE message into the graph, filling
+ * in the LA -> GA, and GA -> LA tables. The root of the graph is added to
+ * the local spark queue. Finally it sends an ACK in response
+ * which contains any newly allocated GAs.
+ */
+//@cindex processSchedule
+static void
+processSchedule(GlobalTaskId sender)
+{
+ nat nelem, nGAs;
+ rtsBool success;
+ static rtsPackBuffer *packBuffer;
+ StgClosure *newGraph;
+ globalAddr *gagamap;
+
+ packBuffer = gumPackBuffer; /* HWL */
+ unpackSchedule(&nelem, packBuffer);
+
+ IF_PAR_DEBUG(schedule,
+ belch("--__ Rcvd Schedule (%d elems)", nelem));
+ IF_PAR_DEBUG(packet,
+ PrintPacket(packBuffer));
+
+ /*
+ * For now, the graph is a closure to be sparked as an advisory
+ * spark, but in future it may be a complete spark with
+ * required/advisory status, priority etc.
+ */
+
+ /*
+ space_required = packBuffer[0];
+ if (SAVE_Hp + space_required >= SAVE_HpLim) {
+ ReallyPerformThreadGC(space_required, rtsFalse);
+ SAVE_Hp -= space_required;
+ }
+ */
+ // ToDo: check whether GC is necessary !!!!!!!!!!!!!!!!!!!!!
+ newGraph = UnpackGraph(packBuffer, &gagamap, &nGAs);
+ ASSERT(newGraph != NULL);
+ success = add_to_spark_queue(newGraph, &(MainRegTable.rSparks));
+
+ if (RtsFlags.ParFlags.ParStats.Full &&
+ RtsFlags.ParFlags.ParStats.Sparks &&
+ success)
+ DumpRawGranEvent(CURRENT_PROC, CURRENT_PROC,
+ GR_STOLEN, ((StgTSO *)NULL), newGraph,
+ 0, 0 /* spark_queue_len(ADVISORY_POOL) */);
+
+ IF_PAR_DEBUG(schedule,
+ if (success)
+ belch("--^^ added spark to unpacked graph %p (%s); %d sparks available on [%x] (%s)",
+ newGraph, info_type(newGraph), spark_queue_len(&(MainRegTable.rSparks)), mytid);
+ else
+ belch("--^^ received non-sparkable closure %p (%s); nothing added to spark pool; %d sparks available on [%x]",
+ newGraph, info_type(newGraph), spark_queue_len(&(MainRegTable.rSparks)), mytid));
+ IF_PAR_DEBUG(packet,
+ belch("*< Unpacked graph with root at %p (%s):",
+ newGraph, info_type(newGraph));
+ PrintGraph(newGraph, 0));
+
+ IF_PAR_DEBUG(tables,
+ DebugPrintGAGAMap(gagamap, nGAs));
+
+ sendAck(sender, nGAs, gagamap);
+
+ //fishing = rtsFalse;
+ ASSERT(outstandingFishes>0);
+ outstandingFishes--;
+}
+
+/*
+ * processAck unpacks an ACK, and uses the GAGA map to convert RBH's
+ * (which represent shared thunks that have been shipped) into fetch-mes
+ * to remote GAs.
+ */
+//@cindex processAck
+static void
+processAck(void)
+{
+ nat nGAs;
+ globalAddr *gaga;
+ globalAddr gagamap[256]; // ToDo: elim magic constant!! MAX_GAS * 2];??
+
+ unpackAck(&nGAs, gagamap);
+
+ IF_PAR_DEBUG(tables,
+ belch(",,,, Rcvd Ack (%d pairs)", nGAs);
+ DebugPrintGAGAMap(gagamap, nGAs));
+
+ IF_DEBUG(sanity,
+ checkGAGAMap(gagamap, nGAs));
+
+ /*
+ * For each (oldGA, newGA) pair, set the GA of the corresponding
+ * thunk to the newGA, convert the thunk to a FetchMe, and return
+ * the weight from the oldGA.
+ */
+ for (gaga = gagamap; gaga < gagamap + nGAs * 2; gaga += 2) {
+ StgClosure *old_closure = GALAlookup(gaga);
+ StgClosure *new_closure = GALAlookup(gaga + 1);
+
+ ASSERT(old_closure != NULL);
+ if (new_closure == NULL) {
+ /* We don't have this closure, so we make a fetchme for it */
+ globalAddr *ga = setRemoteGA(old_closure, gaga + 1, rtsTrue);
+
+ /* convertToFetchMe should be done unconditionally here.
+ Currently, we assign GAs to CONSTRs, too, (a bit of a hack),
+ so we have to check whether it is an RBH before converting
+
+ ASSERT(get_itbl(old_closure)==RBH);
+ */
+ if (get_itbl(old_closure)->type==RBH)
+ convertToFetchMe((StgRBH *)old_closure, ga);
+ } else {
+ /*
+ * Oops...we've got this one already; update the RBH to
+ * point to the object we already know about, whatever it
+ * happens to be.
+ */
+ CommonUp(old_closure, new_closure);
+
+ /*
+ * Increase the weight of the object by the amount just
+ * received in the second part of the ACK pair.
+ */
+ (void) addWeight(gaga + 1);
+ }
+ (void) addWeight(gaga);
+ }
+
+ /* check the sanity of the LAGA and GALA tables after mincing them */
+ IF_DEBUG(sanity, checkLAGAtable(rtsFalse));
+}
+
+#ifdef DIST
+
+void
+bounceReval(void) {
+ barf("Task %x: TODO: should send NACK in response to REVAL",mytid);
+}
+
+static void
+processReval(GlobalTaskId sender) //similar to schedule...
+{ nat nelem, space_required, nGAs;
+ static rtsPackBuffer *packBuffer;
+ StgClosure *newGraph;
+ globalAddr *gagamap;
+ StgTSO* tso;
+ globalAddr *ga;
+
+ packBuffer = gumPackBuffer; /* HWL */
+ unpackSchedule(&nelem, packBuffer); /* okay, since the structure is the same */
+
+ IF_PAR_DEBUG(packet,
+ belch("@;~) [%x] Rcvd Reval (%d elems)", mytid, nelem);
+ PrintPacket(packBuffer));
+
+ /*
+ space_required = packBuffer[0];
+ if (SAVE_Hp + space_required >= SAVE_HpLim) {
+ ReallyPerformThreadGC(space_required, rtsFalse);
+ SAVE_Hp -= space_required;
+ }
+ */
+
+ // ToDo: check whether GC is necessary !!!!!!!!!!!!!!!!!!!!!
+ newGraph = UnpackGraph(packBuffer, &gagamap, &nGAs);
+ ASSERT(newGraph != NULL);
+
+ IF_PAR_DEBUG(packet,
+ belch("@;~) Unpacked graph with root at %p (%s):",
+ newGraph, info_type(newGraph));
+ PrintGraph(newGraph, 0));
+
+ IF_PAR_DEBUG(tables,
+ DebugPrintGAGAMap(gagamap, nGAs));
+
+ IF_PAR_DEBUG(tables,
+ printLAGAtable();
+ DebugPrintGAGAMap(gagamap, nGAs));
+
+ //We don't send an Ack to the head!!!!
+ ASSERT(nGAs>0);
+ sendAck(sender, nGAs-1, gagamap+2);
+
+ IF_PAR_DEBUG(verbose,
+ belch("@;~) About to create Reval thread on behalf of %x",
+ sender));
+
+ tso=createGenThread(RtsFlags.GcFlags.initialStkSize,newGraph);
+ tso->priority=RevalPriority;
+ tso->revalSlot=gagamap->payload.gc.slot;//record who sent the reval
+ tso->revalTid =gagamap->payload.gc.gtid;
+ scheduleThread(tso);
+ context_switch = 1; // switch at the earliest opportunity
+}
+#endif
+
+
+//@node GUM Message Processor, Miscellaneous Functions, Message-Processing Functions, High Level Communications Routines
+//@subsection GUM Message Processor
+
+/*
+ * GUM Message Processor
+
+ * processMessages processes any messages that have arrived, calling
+ * appropriate routines depending on the message tag
+ * (opcode). N.B. Unless profiling it assumes that there {\em ARE} messages
+ * present and performs a blocking receive! During profiling it
+ * busy-waits in order to record idle time.
+ */
+
+//@cindex processMessages
+rtsBool
+processMessages(void)
+{
+ rtsPacket packet;
+ OpCode opcode;
+ GlobalTaskId task;
+ rtsBool receivedFinish = rtsFalse;
+
+ do {
+ packet = GetPacket(); /* Get next message; block until one available */
+ getOpcodeAndSender(packet, &opcode, &task);
+
+ if (task==SysManTask) {
+ switch (opcode) {
+ case PP_PETIDS:
+ processPEtids();
+ break;
+
+ case PP_FINISH:
+ IF_PAR_DEBUG(verbose,
+ belch("==== received FINISH [%p]", mytid));
+ /* this boolean value is returned and propagated to the main
+ scheduling loop, thus shutting-down this PE */
+ receivedFinish = rtsTrue;
+ break;
+
+ default:
+ barf("Task %x: received unknown opcode %x from SysMan",mytid, opcode);
+ }
+ } else if (taskIDtoPE(task)==0) {
+ /* When a new PE joins then potentially FISH & REVAL message may
+ reach PES before they are notified of the new PEs existance. The
+ only solution is to bounce/fail these messages back to the sender.
+ But we will worry about it once we start seeing these race
+ conditions! */
+ switch (opcode) {
+ case PP_FISH:
+ bounceFish();
+ break;
+#ifdef DIST
+ case PP_REVAL:
+ bounceReval();
+ break;
+#endif
+ case PP_PETIDS:
+ belch("Task %x: Ignoring PVM session opened by another SysMan %x",mytid,task);
+ break;
+
+ case PP_FINISH:
+ break;
+
+ default:
+ belch("Task %x: Ignoring opcode %x from unknown PE %x",mytid, opcode, task);
+ }
+ } else
+ switch (opcode) {
+ case PP_FETCH:
+ processFetch();
+ // Global statistics: count no. of fetches
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.rec_fetch_mess++;
+ }
+ break;
+
+ case PP_RESUME:
+ processResume(task);
+ // Global statistics: count no. of fetches
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.rec_resume_mess++;
+ }
+ break;
+
+ case PP_ACK:
+ processAck();
+ break;
+
+ case PP_FISH:
+ processFish();
+ // Global statistics: count no. of fetches
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.rec_fish_mess++;
+ }
+ break;
+
+ case PP_FREE:
+ processFree();
+ break;
+
+ case PP_SCHEDULE:
+ processSchedule(task);
+ // Global statistics: count no. of fetches
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.rec_schedule_mess++;
+ }
+ break;
+
+#ifdef DIST
+ case PP_REVAL:
+ processReval(task);
+ // Global statistics: count no. of fetches
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.rec_reval_mess++;
+ }
+ break;
+#endif
+
+ default:
+ /* Anything we're not prepared to deal with. */
+ barf("Task %x: Unexpected opcode %x from %x",
+ mytid, opcode, task);
+ } /* switch */
+
+ } while (PacketsWaiting()); /* While there are messages: process them */
+ return receivedFinish;
+} /* processMessages */
+
+//@node Miscellaneous Functions, Index, GUM Message Processor, High Level Communications Routines
+//@subsection Miscellaneous Functions
+
+/*
+ * blockFetch blocks a BlockedFetch node on some kind of black hole.
+ */
+//@cindex blockFetch
+void
+blockFetch(StgBlockedFetch *bf, StgClosure *bh) {
+ bf->node = bh;
+ switch (get_itbl(bh)->type) {
+ case BLACKHOLE:
+ bf->link = END_BQ_QUEUE;
+ //((StgBlockingQueue *)bh)->header.info = &stg_BLACKHOLE_BQ_info;
+ SET_INFO(bh, &stg_BLACKHOLE_BQ_info); // turn closure into a blocking queue
+ ((StgBlockingQueue *)bh)->blocking_queue = (StgBlockingQueueElement *)bf;
+
+ // put bh on the mutables list
+ recordMutable((StgMutClosure *)bh);
+ break;
+
+ case BLACKHOLE_BQ:
+ /* enqueue bf on blocking queue of closure bh */
+ bf->link = ((StgBlockingQueue *)bh)->blocking_queue;
+ ((StgBlockingQueue *)bh)->blocking_queue = (StgBlockingQueueElement *)bf;
+
+ // put bh on the mutables list; ToDo: check
+ recordMutable((StgMutClosure *)bh);
+ break;
+
+ case FETCH_ME_BQ:
+ /* enqueue bf on blocking queue of closure bh */
+ bf->link = ((StgFetchMeBlockingQueue *)bh)->blocking_queue;
+ ((StgFetchMeBlockingQueue *)bh)->blocking_queue = (StgBlockingQueueElement *)bf;
+
+ // put bh on the mutables list; ToDo: check
+ recordMutable((StgMutClosure *)bh);
+ break;
+
+ case RBH:
+ /* enqueue bf on blocking queue of closure bh */
+ bf->link = ((StgRBH *)bh)->blocking_queue;
+ ((StgRBH *)bh)->blocking_queue = (StgBlockingQueueElement *)bf;
+
+ // put bh on the mutables list; ToDo: check
+ recordMutable((StgMutClosure *)bh);
+ break;
+
+ default:
+ barf("blockFetch: thought %p was a black hole (IP %#lx, %s)",
+ (StgClosure *)bh, get_itbl((StgClosure *)bh),
+ info_type((StgClosure *)bh));
+ }
+ IF_PAR_DEBUG(bq,
+ belch("##++ blockFetch: after block the BQ of %p (%s) is:",
+ bh, info_type(bh));
+ print_bq(bh));
+}
+
+
+/*
+ @blockThread@ is called from the main scheduler whenever tso returns with
+ a ThreadBlocked return code; tso has already been added to a blocking
+ queue (that's done in the entry code of the closure, because it is a
+ cheap operation we have to do in any case); the main purpose of this
+ routine is to send a Fetch message in case we are blocking on a FETCHME(_BQ)
+ closure, which is indicated by the tso.why_blocked field;
+ we also write an entry into the log file if we are generating one
+
+ Should update exectime etc in the entry code already; but we don't have
+ something like ``system time'' in the log file anyway, so this should
+ even out the inaccuracies.
+*/
+
+//@cindex blockThread
+void
+blockThread(StgTSO *tso)
+{
+ globalAddr *remote_ga=NULL;
+ globalAddr *local_ga;
+ globalAddr fmbq_ga;
+
+ // ASSERT(we are on some blocking queue)
+ ASSERT(tso->block_info.closure != (StgClosure *)NULL);
+
+ /*
+ We have to check why this thread has been blocked.
+ */
+ switch (tso->why_blocked) {
+ case BlockedOnGA:
+ /* the closure must be a FETCH_ME_BQ; tso came in here via
+ FETCH_ME entry code */
+ ASSERT(get_itbl(tso->block_info.closure)->type==FETCH_ME_BQ);
+
+ /* HACK: the link field is used to hold the GA between FETCH_ME_entry
+ end this point; if something (eg. GC) happens inbetween the whole
+ thing will blow up
+ The problem is that the ga field of the FETCH_ME has been overwritten
+ with the head of the blocking queue (which is tso).
+ */
+ ASSERT(looks_like_ga(&theGlobalFromGA));
+ // ASSERT(tso->link!=END_TSO_QUEUE && tso->link!=NULL);
+ remote_ga = &theGlobalFromGA; //tso->link;
+ tso->link = (StgTSO*)END_BQ_QUEUE;
+ /* it was tso which turned node from FETCH_ME into FETCH_ME_BQ =>
+ we have to send a Fetch message here! */
+ if (RtsFlags.ParFlags.ParStats.Full) {
+ /* Note that CURRENT_TIME may perform an unsafe call */
+ tso->par.exectime += CURRENT_TIME - tso->par.blockedat;
+ tso->par.fetchcount++;
+ tso->par.blockedat = CURRENT_TIME;
+ /* we are about to send off a FETCH message, so dump a FETCH event */
+ DumpRawGranEvent(CURRENT_PROC,
+ taskIDtoPE(remote_ga->payload.gc.gtid),
+ GR_FETCH, tso, tso->block_info.closure, 0, 0);
+ }
+ /* Phil T. claims that this was a workaround for a hard-to-find
+ * bug, hence I'm leaving it out for now --SDM
+ */
+ /* Assign a brand-new global address to the newly created FMBQ */
+ local_ga = makeGlobal(tso->block_info.closure, rtsFalse);
+ splitWeight(&fmbq_ga, local_ga);
+ ASSERT(fmbq_ga.weight == 1U << (BITS_IN(unsigned) - 1));
+
+ sendFetch(remote_ga, &fmbq_ga, 0/*load*/);
+
+ // Global statistics: count no. of fetches
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.tot_fetch_mess++;
+ }
+
+ IF_DEBUG(sanity,
+ theGlobalFromGA.payload.gc.gtid = (GlobalTaskId)0);
+ break;
+
+ case BlockedOnGA_NoSend:
+ /* the closure must be a FETCH_ME_BQ; tso came in here via
+ FETCH_ME_BQ entry code */
+ ASSERT(get_itbl(tso->block_info.closure)->type==FETCH_ME_BQ);
+
+ /* Fetch message has been sent already */
+ if (RtsFlags.ParFlags.ParStats.Full) {
+ /* Note that CURRENT_TIME may perform an unsafe call */
+ tso->par.exectime += CURRENT_TIME - tso->par.blockedat;
+ tso->par.blockcount++;
+ tso->par.blockedat = CURRENT_TIME;
+ /* dump a block event, because fetch has been sent already */
+ DumpRawGranEvent(CURRENT_PROC, thisPE,
+ GR_BLOCK, tso, tso->block_info.closure, 0, 0);
+ }
+ break;
+
+ case BlockedOnMVar:
+ case BlockedOnBlackHole:
+ /* the closure must be a BLACKHOLE_BQ or an RBH; tso came in here via
+ BLACKHOLE(_BQ) or CAF_BLACKHOLE or RBH entry code */
+ ASSERT(get_itbl(tso->block_info.closure)->type==MVAR ||
+ get_itbl(tso->block_info.closure)->type==BLACKHOLE_BQ ||
+ get_itbl(tso->block_info.closure)->type==RBH);
+
+ /* if collecting stats update the execution time etc */
+ if (RtsFlags.ParFlags.ParStats.Full) {
+ /* Note that CURRENT_TIME may perform an unsafe call */
+ tso->par.exectime += CURRENT_TIME - tso->par.blockedat;
+ tso->par.blockcount++;
+ tso->par.blockedat = CURRENT_TIME;
+ DumpRawGranEvent(CURRENT_PROC, thisPE,
+ GR_BLOCK, tso, tso->block_info.closure, 0, 0);
+ }
+ break;
+
+ case BlockedOnDelay:
+ /* Whats sort of stats shall we collect for an explicit threadDelay? */
+ IF_PAR_DEBUG(verbose,
+ belch("##++ blockThread: TSO %d blocked on ThreadDelay",
+ tso->id));
+ break;
+
+ /* Check that the following is impossible to happen, indeed
+ case BlockedOnException:
+ case BlockedOnRead:
+ case BlockedOnWrite:
+ */
+ default:
+ barf("blockThread: impossible why_blocked code %d for TSO %d",
+ tso->why_blocked, tso->id);
+ }
+
+ IF_PAR_DEBUG(verbose,
+ belch("##++ blockThread: TSO %d blocked on closure %p (%s); %s",
+ tso->id, tso->block_info.closure, info_type(tso->block_info.closure),
+ (tso->why_blocked==BlockedOnGA) ? "Sent FETCH for GA" : ""));
+
+ IF_PAR_DEBUG(bq,
+ print_bq(tso->block_info.closure));
+}
+
+/*
+ * ChoosePE selects a GlobalTaskId from the array of PEs 'at random'.
+ * Important properties:
+ * - it varies during execution, even if the PE is idle
+ * - it's different for each PE
+ * - we never send a fish to ourselves
+ */
+extern long lrand48 (void);
+
+//@cindex choosePE
+GlobalTaskId
+choosePE(void)
+{
+ long temp;
+
+ temp = lrand48() % nPEs;
+ if (allPEs[temp] == mytid) { /* Never send a FISH to yourself */
+ temp = (temp + 1) % nPEs;
+ }
+ return allPEs[temp];
+}
+
+/*
+ * allocate a BLOCKED_FETCH closure and fill it with the relevant fields
+ * of the ga argument; called from processFetch when the local closure is
+ * under evaluation
+ */
+//@cindex createBlockedFetch
+StgClosure *
+createBlockedFetch (globalAddr ga, globalAddr rga)
+{
+ StgBlockedFetch *bf;
+ StgClosure *closure;
+
+ closure = GALAlookup(&ga);
+ if ((bf = (StgBlockedFetch *)allocate(_HS + sizeofW(StgBlockedFetch))) == NULL) {
+ barf("createBlockedFetch: out of heap while allocating heap for a BlocekdFetch; ToDo: call GC here");
+ GarbageCollect(GetRoots, rtsFalse);
+ closure = GALAlookup(&ga);
+ bf = (StgBlockedFetch *)allocate(_HS + sizeofW(StgBlockedFetch));
+ // ToDo: check whether really guaranteed to succeed 2nd time around
+ }
+
+ ASSERT(bf != (StgBlockedFetch *)NULL);
+ SET_INFO((StgClosure *)bf, &stg_BLOCKED_FETCH_info);
+ // ToDo: check whether other header info is needed
+ bf->node = closure;
+ bf->ga.payload.gc.gtid = rga.payload.gc.gtid;
+ bf->ga.payload.gc.slot = rga.payload.gc.slot;
+ bf->ga.weight = rga.weight;
+ // bf->link = NULL; debugging
+
+ IF_PAR_DEBUG(schedule,
+ fprintf(stderr, "%%%%// created BF: bf=%p (%s) of closure , GA: ",
+ bf, info_type((StgClosure*)bf));
+ printGA(&(bf->ga));
+ fputc('\n',stderr));
+ return (StgClosure *)bf;
+}
+
+/*
+ * waitForTermination enters a loop ignoring spurious messages while
+ * waiting for the termination sequence to be completed.
+ */
+//@cindex waitForTermination
+void
+waitForTermination(void)
+{
+ do {
+ rtsPacket p = GetPacket();
+ processUnexpectedMessage(p);
+ } while (rtsTrue);
+}
+
+#ifdef DEBUG
+//@cindex DebugPrintGAGAMap
+void
+DebugPrintGAGAMap(globalAddr *gagamap, int nGAs)
+{
+ nat i;
+
+ for (i = 0; i < nGAs; ++i, gagamap += 2)
+ fprintf(stderr, "__ gagamap[%d] = ((%x, %d, %x)) -> ((%x, %d, %x))\n", i,
+ gagamap[0].payload.gc.gtid, gagamap[0].payload.gc.slot, gagamap[0].weight,
+ gagamap[1].payload.gc.gtid, gagamap[1].payload.gc.slot, gagamap[1].weight);
+}
+
+//@cindex checkGAGAMap
+void
+checkGAGAMap(globalAddr *gagamap, int nGAs)
+{
+ nat i;
+
+ for (i = 0; i < (nat)nGAs; ++i, gagamap += 2) {
+ ASSERT(looks_like_ga(gagamap));
+ ASSERT(looks_like_ga(gagamap+1));
+ }
+}
+#endif
+
+//@cindex freeMsgBuffer
+static StgWord **freeMsgBuffer = NULL;
+//@cindex freeMsgIndex
+static nat *freeMsgIndex = NULL;
+
+//@cindex prepareFreeMsgBuffers
+void
+prepareFreeMsgBuffers(void)
+{
+ nat i;
+
+ /* Allocate the freeMsg buffers just once and then hang onto them. */
+ if (freeMsgIndex == NULL) {
+ freeMsgIndex = (nat *) stgMallocBytes(nPEs * sizeof(nat),
+ "prepareFreeMsgBuffers (Index)");
+ freeMsgBuffer = (StgWord **) stgMallocBytes(nPEs * sizeof(long *),
+ "prepareFreeMsgBuffers (Buffer)");
+
+ for(i = 0; i < nPEs; i++)
+ if (i != (thisPE-1))
+ freeMsgBuffer[i] = (StgPtr) stgMallocWords(RtsFlags.ParFlags.packBufferSize,
+ "prepareFreeMsgBuffers (Buffer #i)");
+ else
+ freeMsgBuffer[i] = 0;
+ }
+
+ /* Initialize the freeMsg buffer pointers to point to the start of their
+ buffers */
+ for (i = 0; i < nPEs; i++)
+ freeMsgIndex[i] = 0;
+}
+
+//@cindex freeRemoteGA
+void
+freeRemoteGA(int pe, globalAddr *ga)
+{
+ nat i;
+
+ ASSERT(GALAlookup(ga) == NULL);
+
+ if ((i = freeMsgIndex[pe]) + 2 >= RtsFlags.ParFlags.packBufferSize) {
+ IF_PAR_DEBUG(free,
+ belch("!! Filled a free message buffer (sending remaining messages indivisually)"));
+
+ sendFree(ga->payload.gc.gtid, i, freeMsgBuffer[pe]);
+ i = 0;
+ }
+ freeMsgBuffer[pe][i++] = (StgWord) ga->weight;
+ freeMsgBuffer[pe][i++] = (StgWord) ga->payload.gc.slot;
+ freeMsgIndex[pe] = i;
+
+ IF_DEBUG(sanity,
+ ga->weight = 0xdead0add;
+ ga->payload.gc.gtid = 0xbbbbbbbb;
+ ga->payload.gc.slot = 0xbbbbbbbb;);
+}
+
+//@cindex sendFreeMessages
+void
+sendFreeMessages(void)
+{
+ nat i;
+
+ for (i = 0; i < nPEs; i++)
+ if (freeMsgIndex[i] > 0)
+ sendFree(allPEs[i], freeMsgIndex[i], freeMsgBuffer[i]);
+}
+
+/* synchronises with the other PEs. Receives and records in a global
+ * variable the task-id of SysMan. If this is the main thread (discovered
+ * in main.lc), identifies itself to SysMan. Finally it receives
+ * from SysMan an array of the Global Task Ids of each PE, which is
+ * returned as the value of the function.
+ */
+
+#if defined(PAR_TICKY)
+/* Has to see freeMsgIndex, so must be defined here not in ParTicky.c */
+//@cindex stats_CntFreeGA
+void
+stats_CntFreeGA (void) { // stats only
+
+ // Global statistics: residency of thread and spark pool
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ nat i, s;
+
+ globalParStats.cnt_free_GA++;
+ for (i = 0, s = 0; i < nPEs; i++)
+ s += globalParStats.tot_free_GA += freeMsgIndex[i]/2;
+
+ if ( s > globalParStats.res_free_GA )
+ globalParStats.res_free_GA = s;
+ }
+}
+#endif /* PAR_TICKY */
+
+#endif /* PAR -- whole file */
+
+//@node Index, , Miscellaneous Functions, High Level Communications Routines
+//@subsection Index
+
+//@index
+//* ACK:: @cindex\s-+ACK
+//* DebugPrintGAGAMap:: @cindex\s-+DebugPrintGAGAMap
+//* FETCH:: @cindex\s-+FETCH
+//* FISH:: @cindex\s-+FISH
+//* FREE:: @cindex\s-+FREE
+//* RESUME:: @cindex\s-+RESUME
+//* SCHEDULE:: @cindex\s-+SCHEDULE
+//* blockFetch:: @cindex\s-+blockFetch
+//* choosePE:: @cindex\s-+choosePE
+//* freeMsgBuffer:: @cindex\s-+freeMsgBuffer
+//* freeMsgIndex:: @cindex\s-+freeMsgIndex
+//* freeRemoteGA:: @cindex\s-+freeRemoteGA
+//* gumPackBuffer:: @cindex\s-+gumPackBuffer
+//* initMoreBuffers:: @cindex\s-+initMoreBuffers
+//* prepareFreeMsgBuffers:: @cindex\s-+prepareFreeMsgBuffers
+//* processAck:: @cindex\s-+processAck
+//* processFetch:: @cindex\s-+processFetch
+//* processFetches:: @cindex\s-+processFetches
+//* processFish:: @cindex\s-+processFish
+//* processFree:: @cindex\s-+processFree
+//* processMessages:: @cindex\s-+processMessages
+//* processResume:: @cindex\s-+processResume
+//* processSchedule:: @cindex\s-+processSchedule
+//* sendAck:: @cindex\s-+sendAck
+//* sendFetch:: @cindex\s-+sendFetch
+//* sendFish:: @cindex\s-+sendFish
+//* sendFree:: @cindex\s-+sendFree
+//* sendFreeMessages:: @cindex\s-+sendFreeMessages
+//* sendResume:: @cindex\s-+sendResume
+//* sendSchedule:: @cindex\s-+sendSchedule
+//* unpackAck:: @cindex\s-+unpackAck
+//* unpackFetch:: @cindex\s-+unpackFetch
+//* unpackFish:: @cindex\s-+unpackFish
+//* unpackFree:: @cindex\s-+unpackFree
+//* unpackResume:: @cindex\s-+unpackResume
+//* unpackSchedule:: @cindex\s-+unpackSchedule
+//* waitForTermination:: @cindex\s-+waitForTermination
+//@end index
diff --git a/rts/parallel/LLC.h b/rts/parallel/LLC.h
new file mode 100644
index 0000000000..536e431bef
--- /dev/null
+++ b/rts/parallel/LLC.h
@@ -0,0 +1,130 @@
+/* --------------------------------------------------------------------------
+ Time-stamp: <Sun Mar 18 2001 21:23:50 Stardate: [-30]6349.45 hwloidl>
+
+ Low Level Communications Header (LLC.h)
+
+ Contains the definitions used by the Low-level Communications
+ module of the GUM Haskell runtime environment.
+ Based on the Graph for PVM implementation.
+
+ Phil Trinder, Glasgow University, 13th Dec 1994
+ Adapted for the 4.xx RTS
+ H-W. Loidl, Heriot-Watt, November 1999
+ ----------------------------------------------------------------------- */
+
+#ifndef __LLC_H
+#define __LLC_H
+
+#ifdef PAR
+
+//@node Low Level Communications Header, , ,
+//@section Low Level Communications Header
+
+//@menu
+//* Includes::
+//* Macros and Constants::
+//* PVM macros::
+//* Externs::
+//@end menu
+
+//@node Includes, Macros and Constants, Low Level Communications Header, Low Level Communications Header
+//@subsection Includes
+
+#include "Rts.h"
+#include "Parallel.h"
+
+#include "PEOpCodes.h"
+#include "pvm3.h"
+
+//@node Macros and Constants, PVM macros, Includes, Low Level Communications Header
+//@subsection Macros and Constants
+
+#define ANY_TASK (-1) /* receive messages from any task */
+#define ANY_GLOBAL_TASK ANY_TASK
+#define ANY_OPCODE (-1) /* receive any opcode */
+#define ALL_GROUP (-1) /* wait for barrier from every group member */
+
+#define PEGROUP "PE"
+
+#define MGRGROUP "MGR"
+#define SYSGROUP "SYS"
+
+
+#define PETASK "PE"
+
+//@node PVM macros, Externs, Macros and Constants, Low Level Communications Header
+//@subsection PVM macros
+
+#define sync(gp,op) do { \
+ broadcast(gp,op); \
+ pvm_barrier(gp,ALL_GROUP); \
+ } while(0)
+
+#define broadcast(gp,op) do { \
+ pvm_initsend(PvmDataDefault); \
+ pvm_bcast(gp,op); \
+ } while(0)
+
+#define checkComms(c,s) do { \
+ if ((c)<0) { \
+ pvm_perror(s); \
+ stg_exit(EXIT_FAILURE); \
+ }} while(0)
+
+#define _my_gtid pvm_mytid()
+#define GetPacket() pvm_recv(ANY_TASK,ANY_OPCODE)
+#define PacketsWaiting() (pvm_probe(ANY_TASK,ANY_OPCODE) != 0)
+
+#define SPARK_THREAD_DESCRIPTOR 1
+#define GLOBAL_THREAD_DESCRIPTOR 2
+
+#define _extract_jump_field(v) (v)
+
+#define MAX_DATA_WORDS_IN_PACKET 1024
+
+/* basic PVM packing */
+#define PutArg1(a) pvm_pklong((long *)&(a),1,1)
+#define PutArg2(a) pvm_pklong((long *)&(a),1,1)
+#define PutArgN(n,a) pvm_pklong((long *)&(a),1,1)
+#define PutArgs(b,n) pvm_pklong((long *)b,n,1)
+
+#define PutLit(l) { int a = l; PutArgN(?,a); }
+
+/* basic PVM unpacking */
+#define GetArg1(a) pvm_upklong((long *)&(a),1,1)
+#define GetArg2(a) pvm_upklong((long *)&(a),1,1)
+#define GetArgN(n,a) pvm_upklong((long *)&(a),1,1)
+#define GetArgs(b,n) pvm_upklong((long *)b,n,1)
+
+//@node Externs, , PVM macros, Low Level Communications Header
+//@subsection Externs
+
+/* basic message passing routines */
+extern void sendOp (OpCode,GlobalTaskId),
+ sendOp1 (OpCode,GlobalTaskId,StgWord),
+ sendOp2 (OpCode,GlobalTaskId,StgWord,StgWord),
+ sendOpV (OpCode,GlobalTaskId,int,...),
+ sendOpN (OpCode,GlobalTaskId,int,StgPtr),
+ sendOpNV (OpCode,GlobalTaskId,int,StgPtr,int,...);
+
+extern void broadcastOpN(OpCode op, char *group, int n, StgPtr args);
+
+/* extracting data out of a packet */
+OpCode getOpcode (rtsPacket p);
+void getOpcodeAndSender (rtsPacket p, OpCode *popcode,
+ GlobalTaskId *psender_id);
+GlobalTaskId senderTask (rtsPacket p);
+rtsPacket waitForPEOp(OpCode op, GlobalTaskId who, void(*processUnexpected)(rtsPacket) );
+
+/* Init and shutdown routines */
+void startUpPE (void);
+void shutDownPE(void);
+int getExitCode(int nbytes, GlobalTaskId *sender_idp);
+
+/* aux functions */
+char *getOpName (unsigned op); // returns string of opcode
+void processUnexpectedMessage (rtsPacket);
+//void NullException(void);
+
+#endif /*PAR */
+#endif /*defined __LLC_H */
diff --git a/rts/parallel/LLComms.c b/rts/parallel/LLComms.c
new file mode 100644
index 0000000000..baa6dddf0c
--- /dev/null
+++ b/rts/parallel/LLComms.c
@@ -0,0 +1,489 @@
+/* ----------------------------------------------------------------------------
+ * Time-stamp: <Mon Mar 19 2001 22:10:38 Stardate: [-30]6354.62 hwloidl>
+ *
+ * GUM Low-Level Inter-Task Communication
+ *
+ * This module defines PVM Routines for PE-PE communication.
+ *
+ * P. Trinder, December 5th. 1994.
+ * P. Trinder, July 1998
+ * H-W. Loidl, November 1999 -
+ --------------------------------------------------------------------------- */
+
+#ifdef PAR /* whole file */
+
+//@node GUM Low-Level Inter-Task Communication, , ,
+//@section GUM Low-Level Inter-Task Communication
+
+/*
+ *This module defines the routines which communicate between PEs. The
+ *code is based on Kevin Hammond's GRIP RTS. (OpCodes.h defines
+ *PEOp1 etc. in terms of sendOp1 etc.).
+ *
+ *Routine & Arguments
+ * &
+ *sendOp & 0 \\
+ *sendOp1 & 1 \\
+ *sendOp2 & 2 \\
+ *sendOpN & vector \\
+ *sendOpV & variable \\
+ *sendOpNV & variable+ vector \\
+ *
+ *First the standard include files.
+ */
+
+//@menu
+//* Macros etc::
+//* Includes::
+//* Auxiliary functions::
+//* Index::
+//@end menu
+
+//@node Macros etc, Includes, GUM Low-Level Inter-Task Communication, GUM Low-Level Inter-Task Communication
+//@subsection Macros etc
+
+/* Evidently not Posix */
+/* #include "PosixSource.h" */
+
+#define UNUSED /* nothing */
+
+//@node Includes, Auxiliary functions, Macros etc, GUM Low-Level Inter-Task Communication
+//@subsection Includes
+
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "Parallel.h"
+#include "ParallelRts.h"
+#if defined(DEBUG)
+# include "ParallelDebug.h"
+#endif
+#include "LLC.h"
+
+#ifdef __STDC__
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+/* Cannot use std macro when compiling for SysMan */
+/* debugging enabled */
+// #define IF_PAR_DEBUG(c,s) { s; }
+/* debugging disabled */
+#define IF_PAR_DEBUG(c,s) /* nothing */
+
+//@node Auxiliary functions, Index, Includes, GUM Low-Level Inter-Task Communication
+//@subsection Auxiliary functions
+
+/*
+ * heapChkCounter tracks the number of heap checks since the last probe.
+ * Not currently used! We check for messages when a thread is resheduled.
+ */
+int heapChkCounter = 0;
+
+/*
+ * Then some miscellaneous functions.
+ * getOpName returns the character-string name of any OpCode.
+ */
+
+char *UserPEOpNames[] = { PEOP_NAMES };
+
+//@cindex getOpName
+char *
+getOpName(nat op)
+{
+ if (op >= MIN_PEOPS && op <= MAX_PEOPS)
+ return (UserPEOpNames[op - MIN_PEOPS]);
+ else
+ return ("Unknown PE OpCode");
+}
+
+/*
+ * traceSendOp handles the tracing of messages.
+ */
+
+//@cindex traceSendOp
+static void
+traceSendOp(OpCode op, GlobalTaskId dest UNUSED,
+ unsigned int data1 UNUSED, unsigned int data2 UNUSED)
+{
+ char *OpName;
+
+ OpName = getOpName(op);
+ IF_PAR_DEBUG(trace,
+ fprintf(stderr," %s [%x,%x] sent from %x to %x",
+ OpName, data1, data2, mytid, dest));
+}
+
+/*
+ * sendOp sends a 0-argument message with OpCode {\em op} to
+ * the global task {\em task}.
+ */
+
+//@cindex sendOp
+void
+sendOp(OpCode op, GlobalTaskId task)
+{
+ traceSendOp(op, task,0,0);
+
+ pvm_initsend(PvmDataRaw);
+ pvm_send(task, op);
+}
+
+/*
+ * sendOp1 sends a 1-argument message with OpCode {\em op}
+ * to the global task {\em task}.
+ */
+
+//@cindex sendOp1
+void
+sendOp1(OpCode op, GlobalTaskId task, StgWord arg1)
+{
+ traceSendOp(op, task, arg1,0);
+
+ pvm_initsend(PvmDataRaw);
+ PutArg1(arg1);
+ pvm_send(task, op);
+}
+
+
+/*
+ * sendOp2 is used by the FP code only.
+ */
+
+//@cindex sendOp2
+void
+sendOp2(OpCode op, GlobalTaskId task, StgWord arg1, StgWord arg2)
+{
+ traceSendOp(op, task, arg1, arg2);
+
+ pvm_initsend(PvmDataRaw);
+ PutArg1(arg1);
+ PutArg2(arg2);
+ pvm_send(task, op);
+}
+
+/*
+ *
+ * sendOpV takes a variable number of arguments, as specified by {\em n}.
+ * For example,
+ *
+ * sendOpV( PP_STATS, StatsTask, 3, start_time, stop_time, sparkcount);
+ */
+
+//@cindex sendOpV
+void
+sendOpV(OpCode op, GlobalTaskId task, int n, ...)
+{
+ va_list ap;
+ int i;
+ StgWord arg;
+
+ va_start(ap, n);
+
+ traceSendOp(op, task, 0, 0);
+
+ pvm_initsend(PvmDataRaw);
+
+ for (i = 0; i < n; ++i) {
+ arg = va_arg(ap, StgWord);
+ PutArgN(i, arg);
+ }
+ va_end(ap);
+
+ pvm_send(task, op);
+}
+
+/*
+ *
+ * sendOpNV takes a variable-size datablock, as specified by {\em
+ * nelem} and a variable number of arguments, as specified by {\em
+ * narg}. N.B. The datablock and the additional arguments are contiguous
+ * and are copied over together. For example,
+ *
+ * sendOpNV(PP_RESUME, tsoga.pe, 6, nelem, data,
+ * (W_) ga.weight, (W_) ga.loc.gc.gtid, (W_) ga.loc.gc.slot,
+ * (W_) tsoga.weight, (W_) tsoga.loc.gc.gtid, (W_) tsoga.loc.gc.slot);
+ *
+ * Important: The variable arguments must all be StgWords.
+
+ sendOpNV(_, tid, m, n, data, x1, ..., xm):
+
+ | n elems
+ +------------------------------
+ | x1 | ... | xm | n | data ....
+ +------------------------------
+ */
+
+//@cindex sendOpNV
+void
+sendOpNV(OpCode op, GlobalTaskId task, int nelem,
+ StgWord *datablock, int narg, ...)
+{
+ va_list ap;
+ int i;
+ StgWord arg;
+
+ va_start(ap, narg);
+
+ traceSendOp(op, task, 0, 0);
+ IF_PAR_DEBUG(trace,
+ fprintf(stderr,"~~ sendOpNV: op = %x (%s), task = %x, narg = %d, nelem = %d",
+ op, getOpName(op), task, narg, nelem));
+
+ pvm_initsend(PvmDataRaw);
+
+ for (i = 0; i < narg; ++i) {
+ arg = va_arg(ap, StgWord);
+ IF_PAR_DEBUG(trace,
+ fprintf(stderr,"~~ sendOpNV: arg = %d\n",arg));
+ PutArgN(i, arg);
+ }
+ arg = (StgWord) nelem;
+ PutArgN(narg, arg);
+
+/* for (i=0; i < nelem; ++i) fprintf(stderr, "%d ",datablock[i]); */
+/* fprintf(stderr," in sendOpNV\n");*/
+
+ PutArgs(datablock, nelem);
+ va_end(ap);
+
+ pvm_send(task, op);
+}
+
+/*
+ * sendOpN take a variable size array argument, whose size is given by
+ * {\em n}. For example,
+ *
+ * sendOpN( PP_STATS, StatsTask, 3, stats_array);
+ */
+
+//@cindex sendOpN
+void
+sendOpN(OpCode op, GlobalTaskId task, int n, StgPtr args)
+{
+ long arg;
+
+ traceSendOp(op, task, 0, 0);
+
+ pvm_initsend(PvmDataRaw);
+ arg = (long) n;
+ PutArgN(0, arg);
+ PutArgs(args, n);
+ pvm_send(task, op);
+}
+
+/*
+ * broadcastOpN is as sendOpN but broadcasts to all members of a group.
+ */
+
+void
+broadcastOpN(OpCode op, char *group, int n, StgPtr args)
+{
+ long arg;
+
+ //traceSendOp(op, task, 0, 0);
+
+ pvm_initsend(PvmDataRaw);
+ arg = (long) n;
+ PutArgN(0, arg);
+ PutArgs(args, n);
+ pvm_bcast(group, op);
+}
+
+/*
+ waitForPEOp waits for a packet from global task who with the
+ OpCode op. If ignore is true all other messages are simply ignored;
+ otherwise they are handled by processUnexpected.
+ */
+//@cindex waitForPEOp
+rtsPacket
+waitForPEOp(OpCode op, GlobalTaskId who, void(*processUnexpected)(rtsPacket) )
+{
+ rtsPacket p;
+ int nbytes;
+ OpCode opCode;
+ GlobalTaskId sender_id;
+ rtsBool match;
+
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"~~ waitForPEOp: expecting op = %x (%s), who = [%x]\n",
+ op, getOpName(op), who));
+
+ do {
+ while((p = pvm_recv(ANY_TASK,ANY_OPCODE)) < 0)
+ pvm_perror("waitForPEOp: Waiting for PEOp");
+
+ pvm_bufinfo( p, &nbytes, &opCode, &sender_id );
+ match = (op == ANY_OPCODE || op == opCode) &&
+ (who == ANY_TASK || who == sender_id);
+
+ if (match) {
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,
+ "~~waitForPEOp: Qapla! received: OpCode = %#x (%s), sender_id = [%x]",
+ opCode, getOpName(opCode), sender_id));
+
+ return(p);
+ }
+
+ /* Handle the unexpected OpCodes */
+ if (processUnexpected!=NULL) {
+ (*processUnexpected)(p);
+ } else {
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,
+ "~~ waitForPEOp: ignoring OpCode = %#x (%s), sender_id = [%x]",
+ opCode, getOpName(opCode), sender_id));
+ }
+
+ } while(rtsTrue);
+}
+
+/*
+ processUnexpected processes unexpected messages. If the message is a
+ FINISH it exits the prgram, and PVM gracefully
+ */
+//@cindex processUnexpectedMessage
+void
+processUnexpectedMessage(rtsPacket packet) {
+ OpCode opCode = getOpcode(packet);
+
+ IF_PAR_DEBUG(verbose,
+ GlobalTaskId sender = senderTask(packet);
+ fprintf(stderr,"~~ [%x] processUnexpected: Received %x (%s), sender %x\n",
+ mytid, opCode, getOpName(opCode), sender));
+
+ switch (opCode) {
+ case PP_FINISH:
+ stg_exit(EXIT_SUCCESS);
+ break;
+
+ /* Anything we're not prepared to deal with. Note that ALL OpCodes
+ are discarded during termination -- this helps prevent bizarre
+ race conditions. */
+ default:
+ // if (!GlobalStopPending)
+ {
+ GlobalTaskId errorTask;
+ OpCode opCode;
+
+ getOpcodeAndSender(packet, &opCode, &errorTask);
+ fprintf(stderr,"== Task %x: Unexpected OpCode %x from %x in processUnexpected",
+ mytid, opCode, errorTask );
+
+ stg_exit(EXIT_FAILURE);
+ }
+ }
+}
+
+//@cindex getOpcode
+OpCode
+getOpcode(rtsPacket p)
+{
+ int nbytes;
+ OpCode OpCode;
+ GlobalTaskId sender_id;
+ /* read PVM buffer */
+ pvm_bufinfo(p, &nbytes, &OpCode, &sender_id);
+ /* return tag of the buffer as opcode */
+ return(OpCode);
+}
+
+//@cindex getOpcodeAndSender
+void
+getOpcodeAndSender(rtsPacket p, OpCode *opCodep, GlobalTaskId *senderIdp)
+{
+ int nbytes;
+ /* read PVM buffer */
+ pvm_bufinfo(p, &nbytes, opCodep, senderIdp);
+}
+
+//@cindex senderTask
+GlobalTaskId
+senderTask(rtsPacket p)
+{
+ int nbytes;
+ OpCode opCode;
+ GlobalTaskId sender_id;
+ /* read PVM buffer */
+ pvm_bufinfo(p, &nbytes, &opCode, &sender_id);
+ return(sender_id);
+}
+
+/*
+ * startUpPE does the low-level comms specific startup stuff for a
+ * PE. It initialises the comms system, joins the appropriate groups
+ * allocates the PE buffer
+ */
+
+//@cindex startUpPE
+void
+startUpPE(void)
+{
+ mytid = _my_gtid; /* Initialise PVM and get task id into global var.*/
+
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"== [%x] PEStartup: Task id = [%x], No. PEs = %d \n",
+ mytid, mytid, nPEs));
+ checkComms(pvm_joingroup(PEGROUP), "PEStartup");
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"== [%x] PEStartup: Joined PEGROUP\n", mytid));
+}
+
+/*
+ * PEShutdown does the low-level comms-specific shutdown stuff for a
+ * single PE. It leaves the groups and then exits from pvm.
+ */
+//@cindex shutDownPE
+void
+shutDownPE(void)
+{
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr, "== [%x] PEshutdown\n", mytid));
+
+ checkComms(pvm_lvgroup(PEGROUP),"PEShutDown");
+ checkComms(pvm_exit(),"PEShutDown");
+}
+
+/*
+ Extract the exit code out of a PP_FINISH packet (used in SysMan)
+*/
+int
+getExitCode(int nbytes, GlobalTaskId *sender_idp) {
+ int exitCode=0;
+
+ if (nbytes==4) { // Notification from a task doing pvm_exit
+ GetArgs(sender_idp,1); // Presumably this must be MainPE Id
+ exitCode = -1;
+ } else if (nbytes==8) { // Doing a controlled shutdown
+ GetArgs(&exitCode,1); // HACK: controlled shutdown == 2 values
+ GetArgs(&exitCode,1);
+ } else {
+ exitCode = -2; // everything else
+ }
+ return exitCode;
+}
+
+#endif /* PAR -- whole file */
+
+//@node Index, , Auxiliary functions, GUM Low-Level Inter-Task Communication
+//@subsection Index
+
+//@index
+//* getOpName:: @cindex\s-+getOpName
+//* traceSendOp:: @cindex\s-+traceSendOp
+//* sendOp:: @cindex\s-+sendOp
+//* sendOp1:: @cindex\s-+sendOp1
+//* sendOp2:: @cindex\s-+sendOp2
+//* sendOpV:: @cindex\s-+sendOpV
+//* sendOpNV:: @cindex\s-+sendOpNV
+//* sendOpN:: @cindex\s-+sendOpN
+//* waitForPEOp:: @cindex\s-+waitForPEOp
+//* processUnexpectedMessage:: @cindex\s-+processUnexpectedMessage
+//* getOpcode:: @cindex\s-+getOpcode
+//* getOpcodeAndSender:: @cindex\s-+getOpcodeAndSender
+//* senderTask:: @cindex\s-+senderTask
+//* startUpPE:: @cindex\s-+startUpPE
+//* shutDownPE:: @cindex\s-+shutDownPE
+//@end index
diff --git a/rts/parallel/PEOpCodes.h b/rts/parallel/PEOpCodes.h
new file mode 100644
index 0000000000..2d18b439f2
--- /dev/null
+++ b/rts/parallel/PEOpCodes.h
@@ -0,0 +1,58 @@
+#ifndef PEOPCODES_H
+#define PEOPCODES_H
+
+/************************************************************************
+* PEOpCodes.h *
+* *
+* This file contains definitions for all the GUM PE Opcodes *
+* It's based on the GRAPH for PVM version *
+* Phil Trinder, Glasgow University 8th December 1994 *
+* *
+ RFPointon, December 1999
+ - removed PP_SYSMAN_TID, introduced PP_READY
+ - removed PP_MAIN_TASK, introduced PP_NEWPE
+ - added PP_REVAL
+************************************************************************/
+
+#define REPLY_OK 0x00
+
+/*Startup + Shutdown*/
+#define PP_READY 0x50 /* sent PEs -> SysMan */
+#define PP_NEWPE 0x51 /* sent via newHost notify -> SysMan */
+#define PP_FINISH 0x52 /* sent PEs & via taskExit notfiy -> SysMan */
+#define PP_PETIDS 0x53 /* sent sysman -> PEs */
+
+/* Stats stuff */
+#define PP_STATS 0x54
+#define PP_STATS_ON 0x55
+#define PP_STATS_OFF 0x56
+
+//#define PP_FAIL 0x57
+
+/*Garbage Collection*/
+#define PP_GC_INIT 0x58
+#define PP_FULL_SYSTEM 0x59
+#define PP_GC_POLL 0x5a
+
+/*GUM Messages*/
+#define PP_FETCH 0x5b
+#define PP_RESUME 0x5c
+#define PP_ACK 0x5d
+#define PP_FISH 0x5e
+#define PP_SCHEDULE 0x5f
+#define PP_FREE 0x60
+#define PP_REVAL 0x61
+
+
+#define MIN_PEOPS 0x50
+#define MAX_PEOPS 0x61
+
+#define PEOP_NAMES "Ready", "NewPE", \
+ "Finish", "PETIDS", \
+ "Stats", "Stats_On", "Stats_Off", \
+ "Fail", \
+ "GCInit", "FullSystem", "GCPoll", \
+ "Fetch","Resume","ACK","Fish","Schedule", \
+ "Free","REval"
+
+#endif /* PEOPCODES_H */
diff --git a/rts/parallel/Pack.c b/rts/parallel/Pack.c
new file mode 100644
index 0000000000..e8653f6303
--- /dev/null
+++ b/rts/parallel/Pack.c
@@ -0,0 +1,4293 @@
+/*
+ Time-stamp: <Wed Mar 21 2001 16:32:47 Stardate: [-30]6363.44 hwloidl>
+
+ Graph packing and unpacking code for sending it to another processor
+ and retrieving the original graph structure from the packet.
+ In the old RTS the code was split into Pack.c and Unpack.c (now deceased)
+ Used in GUM and GrAnSim.
+
+ The GrAnSim version of the code defines routines for *simulating* the
+ packing of closures in the same way it is done in the parallel runtime
+ system. Basically GrAnSim only puts the addresses of the closures to be
+ transferred into a buffer. This buffer will then be associated with the
+ event of transferring the graph. When this event is scheduled, the
+ @UnpackGraph@ routine is called and the buffer can be discarded
+ afterwards.
+
+ Note that in GranSim we need many buffers, not just one per PE.
+*/
+
+//@node Graph packing, , ,
+//@section Graph packing
+
+#if defined(PAR) || defined(GRAN) /* whole file */
+
+//@menu
+//* Includes::
+//* Prototypes::
+//* Global variables::
+//* ADT of Closure Queues::
+//* Initialisation for packing::
+//* Packing Functions::
+//* Low level packing routines::
+//* Unpacking routines::
+//* Aux fcts for packing::
+//* Printing Packet Contents::
+//* End of file::
+//@end menu
+//*/
+
+//@node Includes, Prototypes, Graph packing, Graph packing
+//@subsection Includes
+
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "ClosureTypes.h"
+#include "Storage.h"
+#include "Hash.h"
+#include "Parallel.h"
+#include "GranSimRts.h"
+#include "ParallelRts.h"
+# if defined(DEBUG)
+# include "Sanity.h"
+# include "Printer.h"
+# include "ParallelDebug.h"
+# endif
+#include "FetchMe.h"
+
+/* Which RTS flag should be used to get the size of the pack buffer ? */
+# if defined(PAR)
+# define RTS_PACK_BUFFER_SIZE RtsFlags.ParFlags.packBufferSize
+# else /* GRAN */
+# define RTS_PACK_BUFFER_SIZE RtsFlags.GranFlags.packBufferSize
+# endif
+
+//@node Prototypes, Global variables, Includes, Graph packing
+//@subsection Prototypes
+/*
+ Code declarations.
+*/
+
+//@node ADT of closure queues, Init for packing, Prototypes, Prototypes
+//@subsubsection ADT of closure queues
+
+static inline void InitClosureQueue(void);
+static inline rtsBool QueueEmpty(void);
+static inline void QueueClosure(StgClosure *closure);
+static inline StgClosure *DeQueueClosure(void);
+
+//@node Init for packing, Packing routines, ADT of closure queues, Prototypes
+//@subsubsection Init for packing
+
+static void InitPacking(rtsBool unpack);
+# if defined(PAR)
+rtsBool InitPackBuffer(void);
+# elif defined(GRAN)
+rtsPackBuffer *InstantiatePackBuffer (void);
+static void reallocPackBuffer (void);
+# endif
+
+//@node Packing routines, Low level packing fcts, Init for packing, Prototypes
+//@subsubsection Packing routines
+
+static void PackClosure (StgClosure *closure);
+
+//@node Low level packing fcts, Unpacking routines, Packing routines, Prototypes
+//@subsubsection Low level packing fcts
+
+# if defined(GRAN)
+static void Pack (StgClosure *data);
+# else
+static void Pack (StgWord data);
+
+static void PackGeneric(StgClosure *closure);
+static void PackArray(StgClosure *closure);
+static void PackPLC (StgPtr addr);
+static void PackOffset (int offset);
+static void PackPAP(StgPAP *pap);
+static rtsPackBuffer *PackTSO(StgTSO *tso, nat *packBufferSize);
+static rtsPackBuffer *PackStkO(StgPtr stko, nat *packBufferSize);
+static void PackFetchMe(StgClosure *closure);
+
+static void GlobaliseAndPackGA (StgClosure *closure);
+# endif
+
+//@node Unpacking routines, Aux fcts for packing, Low level packing fcts, Prototypes
+//@subsubsection Unpacking routines
+
+# if defined(PAR)
+void InitPendingGABuffer(nat size);
+void CommonUp(StgClosure *src, StgClosure *dst);
+static StgClosure *SetGAandCommonUp(globalAddr *gaP, StgClosure *closure,
+ rtsBool hasGA);
+static nat FillInClosure(StgWord ***bufptrP, StgClosure *graph);
+static void LocateNextParent(StgClosure **parentP,
+ nat *pptrP, nat *pptrsP, nat *sizeP);
+StgClosure *UnpackGraph(rtsPackBuffer *packBuffer,
+ globalAddr **gamap,
+ nat *nGAs);
+static StgClosure *UnpackClosure (StgWord ***bufptrP, StgClosure **graphP,
+ globalAddr *ga);
+static StgWord **UnpackGA(StgWord **bufptr, globalAddr *ga);
+static StgClosure *UnpackOffset(globalAddr *ga);
+static StgClosure *UnpackPLC(globalAddr *ga);
+static void UnpackArray(StgWord ***bufptrP, StgClosure *graph);
+static nat UnpackPAP(StgWord ***bufptrP, StgClosure *graph);
+
+# elif defined(GRAN)
+void CommonUp(StgClosure *src, StgClosure *dst);
+StgClosure *UnpackGraph(rtsPackBuffer* buffer);
+#endif
+
+//@node Aux fcts for packing, , Unpacking routines, Prototypes
+//@subsubsection Aux fcts for packing
+
+# if defined(PAR)
+static void DonePacking(void);
+static void AmPacking(StgClosure *closure);
+static int OffsetFor(StgClosure *closure);
+static rtsBool NotYetPacking(int offset);
+static inline rtsBool RoomToPack (nat size, nat ptrs);
+static inline rtsBool isOffset(globalAddr *ga);
+static inline rtsBool isFixed(globalAddr *ga);
+static inline rtsBool isConstr(globalAddr *ga);
+static inline rtsBool isUnglobalised(globalAddr *ga);
+# elif defined(GRAN)
+static void DonePacking(void);
+static rtsBool NotYetPacking(StgClosure *closure);
+# endif
+
+//@node Global variables, ADT of Closure Queues, Prototypes, Graph packing
+//@subsection Global variables
+/*
+ Static data declarations
+*/
+
+static nat pack_locn, /* ptr to first free loc in pack buffer */
+ clq_size, clq_pos,
+ buf_id = 1; /* identifier for buffer */
+static nat unpacked_size;
+static rtsBool roomInBuffer;
+#if defined(PAR)
+static GlobalTaskId dest_gtid=0; /* destination for message to send */
+#endif
+
+/*
+ The pack buffer
+ To be pedantic: in GrAnSim we're packing *addresses* of closures,
+ not the closures themselves.
+*/
+static rtsPackBuffer *globalPackBuffer = NULL, /* for packing a graph */
+ *globalUnpackBuffer = NULL; /* for unpacking a graph */
+
+
+/*
+ Bit of a hack for testing if a closure is the root of the graph. This is
+ set in @PackNearbyGraph@ and tested in @PackClosure@.
+*/
+
+static nat packed_thunks = 0;
+static StgClosure *graph_root;
+
+# if defined(PAR)
+/*
+ The offset hash table is used during packing to record the location in
+ the pack buffer of each closure which is packed.
+*/
+//@cindex offsetTable
+static HashTable *offsetTable;
+
+//@cindex PendingGABuffer
+static globalAddr *PendingGABuffer, *gaga;
+
+# endif /* PAR */
+
+
+//@node ADT of Closure Queues, Initialisation for packing, Global variables, Graph packing
+//@subsection ADT of Closure Queues
+
+//@menu
+//* Closure Queues::
+//* Init routines::
+//* Basic routines::
+//@end menu
+
+//@node Closure Queues, Init routines, ADT of Closure Queues, ADT of Closure Queues
+//@subsubsection Closure Queues
+/*
+ Closure Queues
+
+ These routines manage the closure queue.
+*/
+
+static nat clq_pos, clq_size;
+
+static StgClosure **ClosureQueue = NULL; /* HWL: init in main */
+
+#if defined(DEBUG)
+static char graphFingerPrint[MAX_FINGER_PRINT_LEN];
+#endif
+
+//@node Init routines, Basic routines, Closure Queues, ADT of Closure Queues
+//@subsubsection Init routines
+
+/* @InitClosureQueue@ allocates and initialises the closure queue. */
+
+//@cindex InitClosureQueue
+static inline void
+InitClosureQueue(void)
+{
+ clq_pos = clq_size = 0;
+
+ if (ClosureQueue==NULL)
+ ClosureQueue = (StgClosure**) stgMallocWords(RTS_PACK_BUFFER_SIZE,
+ "InitClosureQueue");
+}
+
+//@node Basic routines, Types of Global Addresses, Init routines, ADT of Closure Queues
+//@subsubsection Basic routines
+
+/*
+ QueueEmpty returns rtsTrue if the closure queue is empty; rtsFalse otherwise.
+*/
+
+//@cindex QueueEmpty
+static inline rtsBool
+QueueEmpty(void)
+{
+ return(clq_pos >= clq_size);
+}
+
+/* QueueClosure adds its argument to the closure queue. */
+
+//@cindex QueueClosure
+static inline void
+QueueClosure(closure)
+StgClosure *closure;
+{
+ if(clq_size < RTS_PACK_BUFFER_SIZE ) {
+ IF_PAR_DEBUG(paranoia,
+ belch(">__> <<%d>> Q: %p (%s); %d elems in q",
+ globalPackBuffer->id, closure, info_type(closure), clq_size-clq_pos));
+ ClosureQueue[clq_size++] = closure;
+ } else {
+ barf("Closure Queue Overflow (EnQueueing %p (%s))",
+ closure, info_type(closure));
+ }
+}
+
+/* DeQueueClosure returns the head of the closure queue. */
+
+//@cindex DeQueueClosure
+static inline StgClosure *
+DeQueueClosure(void)
+{
+ if(!QueueEmpty()) {
+ IF_PAR_DEBUG(paranoia,
+ belch(">__> <<%d>> DeQ: %p (%s); %d elems in q",
+ globalPackBuffer->id, ClosureQueue[clq_pos], info_type(ClosureQueue[clq_pos]),
+ clq_size-clq_pos));
+ return(ClosureQueue[clq_pos++]);
+ } else {
+ return((StgClosure*)NULL);
+ }
+}
+
+/* DeQueueClosure returns the head of the closure queue. */
+
+#if defined(DEBUG)
+//@cindex PrintQueueClosure
+static void
+PrintQueueClosure(void)
+{
+ nat i;
+
+ fputs("Closure queue:", stderr);
+ for (i=clq_pos; i < clq_size; i++)
+ fprintf(stderr, "%p (%s), ",
+ (StgClosure *)ClosureQueue[clq_pos++],
+ info_type(ClosureQueue[clq_pos++]));
+ fputc('\n', stderr);
+}
+#endif
+
+//@node Types of Global Addresses, , Basic routines, ADT of Closure Queues
+//@subsubsection Types of Global Addresses
+
+/*
+ Types of Global Addresses
+
+ These routines determine whether a GA is one of a number of special types
+ of GA.
+*/
+
+# if defined(PAR)
+//@cindex isOffset
+static inline rtsBool
+isOffset(globalAddr *ga)
+{
+ return (ga->weight == 1U && ga->payload.gc.gtid == (GlobalTaskId)0);
+}
+
+//@cindex isFixed
+static inline rtsBool
+isFixed(globalAddr *ga)
+{
+ return (ga->weight == 0U);
+}
+
+//@cindex isConstr
+static inline rtsBool
+isConstr(globalAddr *ga)
+{
+ return (ga->weight == 2U);
+}
+
+//@cindex isUnglobalised
+static inline rtsBool
+isUnglobalised(globalAddr *ga)
+{
+ return (ga->weight == 2U);
+}
+# endif
+
+//@node Initialisation for packing, Packing Functions, ADT of Closure Queues, Graph packing
+//@subsection Initialisation for packing
+/*
+ Simple Packing Routines
+
+ About packet sizes in GrAnSim: In GrAnSim we use a malloced block of
+ gransim_pack_buffer_size words to simulate a packet of pack_buffer_size
+ words. In the simulated PackBuffer we only keep the addresses of the
+ closures that would be packed in the parallel system (see Pack). To
+ decide if a packet overflow occurs pack_buffer_size must be compared
+ versus unpacked_size (see RoomToPack). Currently, there is no multi
+ packet strategy implemented, so in the case of an overflow we just stop
+ adding closures to the closure queue. If an overflow of the simulated
+ packet occurs, we just realloc some more space for it and carry on as
+ usual. -- HWL
+*/
+
+# if defined(GRAN)
+rtsPackBuffer *
+InstantiatePackBuffer (void) {
+ extern rtsPackBuffer *globalPackBuffer;
+
+ globalPackBuffer = (rtsPackBuffer *) stgMallocWords(sizeofW(rtsPackBuffer),
+ "InstantiatePackBuffer: failed to alloc packBuffer");
+ globalPackBuffer->size = RtsFlags.GranFlags.packBufferSize_internal;
+ globalPackBuffer->buffer = (StgWord **) stgMallocWords(RtsFlags.GranFlags.packBufferSize_internal,
+ "InstantiatePackBuffer: failed to alloc GranSim internal packBuffer");
+ /* NB: gransim_pack_buffer_size instead of pack_buffer_size -- HWL */
+ /* stgMallocWords is now simple allocate in Storage.c */
+
+ return (globalPackBuffer);
+}
+
+/*
+ Reallocate the GranSim internal pack buffer to make room for more closure
+ pointers. This is independent of the check for packet overflow as in GUM
+*/
+static void
+reallocPackBuffer (void) {
+
+ ASSERT(pack_locn >= (int)globalPackBuffer->size+sizeofW(rtsPackBuffer));
+
+ IF_GRAN_DEBUG(packBuffer,
+ belch("** Increasing size of PackBuffer %p to %d words (PE %u @ %d)\n",
+ globalPackBuffer, globalPackBuffer->size+REALLOC_SZ,
+ CurrentProc, CurrentTime[CurrentProc]));
+
+ globalPackBuffer = (rtsPackBuffer*)realloc(globalPackBuffer,
+ sizeof(StgClosure*)*(REALLOC_SZ +
+ (int)globalPackBuffer->size +
+ sizeofW(rtsPackBuffer))) ;
+ if (globalPackBuffer==(rtsPackBuffer*)NULL)
+ barf("Failing to realloc %d more words for PackBuffer %p (PE %u @ %d)\n",
+ REALLOC_SZ, globalPackBuffer, CurrentProc, CurrentTime[CurrentProc]);
+
+ globalPackBuffer->size += REALLOC_SZ;
+
+ ASSERT(pack_locn < globalPackBuffer->size+sizeofW(rtsPackBuffer));
+}
+# endif
+
+# if defined(PAR)
+/* @initPacking@ initialises the packing buffer etc. */
+//@cindex InitPackBuffer
+rtsBool
+InitPackBuffer(void)
+{
+ if (globalPackBuffer==(rtsPackBuffer*)NULL) {
+ if ((globalPackBuffer = (rtsPackBuffer *)
+ stgMallocWords(sizeofW(rtsPackBuffer)+RtsFlags.ParFlags.packBufferSize+DEBUG_HEADROOM,
+ "InitPackBuffer")) == NULL)
+ return rtsFalse;
+ }
+ return rtsTrue;
+}
+
+# endif
+//@cindex InitPacking
+static void
+InitPacking(rtsBool unpack)
+{
+# if defined(GRAN)
+ globalPackBuffer = InstantiatePackBuffer(); /* for GrAnSim only -- HWL */
+ /* NB: free in UnpackGraph */
+# elif defined(PAR)
+ if (unpack) {
+ /* allocate a GA-to-GA map (needed for ACK message) */
+ InitPendingGABuffer(RtsFlags.ParFlags.packBufferSize);
+ } else {
+ /* allocate memory to pack the graph into */
+ InitPackBuffer();
+ }
+# endif
+ /* init queue of closures seen during packing */
+ InitClosureQueue();
+
+ if (unpack)
+ return;
+
+ globalPackBuffer->id = buf_id++; /* buffer id are only used for debugging! */
+ pack_locn = 0; /* the index into the actual pack buffer */
+ unpacked_size = 0; /* the size of the whole graph when unpacked */
+ roomInBuffer = rtsTrue;
+ packed_thunks = 0; /* total number of thunks packed so far */
+# if defined(PAR)
+ offsetTable = allocHashTable();
+# endif
+}
+
+//@node Packing Functions, Low level packing routines, Initialisation for packing, Graph packing
+//@subsection Packing Functions
+
+//@menu
+//* Packing Sections of Nearby Graph::
+//* Packing Closures::
+//@end menu
+
+//@node Packing Sections of Nearby Graph, Packing Closures, Packing Functions, Packing Functions
+//@subsubsection Packing Sections of Nearby Graph
+/*
+ Packing Sections of Nearby Graph
+
+ @PackNearbyGraph@ packs a closure and associated graph into a static
+ buffer (@PackBuffer@). It returns the address of this buffer and the
+ size of the data packed into the buffer (in its second parameter,
+ @packBufferSize@). The associated graph is packed in a depth first
+ manner, hence it uses an explicit queue of closures to be packed rather
+ than simply using a recursive algorithm. Once the packet is full,
+ closures (other than primitive arrays) are packed as FetchMes, and their
+ children are not queued for packing. */
+
+//@cindex PackNearbyGraph
+
+/* NB: this code is shared between GranSim and GUM;
+ tso only used in GranSim */
+rtsPackBuffer *
+PackNearbyGraph(closure, tso, packBufferSize, dest)
+StgClosure* closure;
+StgTSO* tso;
+nat *packBufferSize;
+GlobalTaskId dest;
+{
+ IF_PAR_DEBUG(resume,
+ graphFingerPrint[0] = '\0');
+
+ ASSERT(RTS_PACK_BUFFER_SIZE > 0);
+ ASSERT(_HS==1); // HWL HACK; compile time constant
+
+#if defined(PAR_TICKY) // HWL HAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACK
+ PAR_TICKY_PACK_NEARBY_GRAPH_START();
+#endif
+
+ /* ToDo: check that we have enough heap for the packet
+ ngoq ngo'
+ if (Hp + PACK_HEAP_REQUIRED > HpLim)
+ return NULL;
+ */
+ InitPacking(rtsFalse);
+# if defined(PAR)
+ dest_gtid=dest; //-1 to disable
+# elif defined(GRAN)
+ graph_root = closure;
+# endif
+
+ IF_GRAN_DEBUG(pack,
+ belch(">>> Packing <<%d>> (buffer @ %p); graph root @ %p [PE %d]\n demanded by TSO %d (%p) [PE %u]",
+ globalPackBuffer->id, globalPackBuffer, closure, where_is(closure),
+ tso->id, tso, where_is((StgClosure*)tso)));
+
+ IF_GRAN_DEBUG(pack,
+ belch("** PrintGraph of %p is:", closure);
+ PrintGraph(closure,0));
+
+ IF_PAR_DEBUG(resume,
+ GraphFingerPrint(closure, graphFingerPrint);
+ ASSERT(strlen(graphFingerPrint)<=MAX_FINGER_PRINT_LEN);
+ belch(">>> Packing <<%d>> (buffer @ %p); graph root @ %p [%x]\n demanded by TSO %d (%p); Finger-print is\n {%s}",
+ globalPackBuffer->id, globalPackBuffer, closure, mytid,
+ tso->id, tso, graphFingerPrint));
+
+ IF_PAR_DEBUG(packet,
+ belch("** PrintGraph of %p is:", closure);
+ belch("** pack_locn=%d", pack_locn);
+ PrintGraph(closure,0));
+
+ QueueClosure(closure);
+ do {
+ PackClosure(DeQueueClosure());
+ } while (!QueueEmpty());
+
+# if defined(PAR)
+
+ /* Record how much space the graph needs in packet and in heap */
+ globalPackBuffer->tso = tso; // currently unused, I think (debugging?)
+ globalPackBuffer->unpacked_size = unpacked_size;
+ globalPackBuffer->size = pack_locn;
+
+ /* Check for buffer overflow (again) */
+ ASSERT(pack_locn <= RtsFlags.ParFlags.packBufferSize+DEBUG_HEADROOM);
+ IF_DEBUG(sanity, // write magic end-of-buffer word
+ globalPackBuffer->buffer[pack_locn] = END_OF_BUFFER_MARKER);
+ *packBufferSize = pack_locn;
+
+# else /* GRAN */
+
+ /* Record how much space is needed to unpack the graph */
+ // PackBuffer[PACK_FLAG_LOCN] = (P_) MAGIC_PACK_FLAG; for testing
+ globalPackBuffer->tso = tso;
+ globalPackBuffer->unpacked_size = unpacked_size;
+
+ // ASSERT(pack_locn <= PackBuffer[PACK_SIZE_LOCN]+PACK_HDR_SIZE);
+ /* ToDo: Print an earlier, more meaningful message */
+ if (pack_locn==0) /* i.e. packet is empty */
+ barf("EMPTY PACKET! Can't transfer closure %p at all!!\n",
+ closure);
+ globalPackBuffer->size = pack_locn;
+ *packBufferSize = pack_locn;
+
+# endif
+
+ DonePacking(); /* {GrAnSim}vaD 'ut'Ha' */
+
+# if defined(GRAN)
+ IF_GRAN_DEBUG(pack ,
+ belch("** Finished <<%d>> packing graph %p; closures packed: %d; thunks packed: %d; size of graph: %d",
+ globalPackBuffer->id, closure, globalPackBuffer->size, packed_thunks, globalPackBuffer->unpacked_size));
+ if (RtsFlags.GranFlags.GranSimStats.Global) {
+ globalGranStats.tot_packets++;
+ globalGranStats.tot_packet_size += pack_locn;
+ }
+
+ IF_GRAN_DEBUG(pack, PrintPacket(globalPackBuffer));
+# elif defined(PAR)
+ IF_PAR_DEBUG(packet,
+ belch("** Finished <<%d>> packing graph %p (%s); closures packed: %d; thunks packed: %d; size of graph: %d",
+ globalPackBuffer->id, closure, info_type(closure),
+ globalPackBuffer->size, packed_thunks,
+ globalPackBuffer->unpacked_size));;
+
+ IF_DEBUG(sanity, // do a sanity check on the packet just constructed
+ checkPacket(globalPackBuffer));
+# endif /* GRAN */
+
+#if defined(PAR_TICKY) // HWL HAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACK
+ PAR_TICKY_PACK_NEARBY_GRAPH_END(globalPackBuffer->size, packed_thunks);
+#endif
+
+ return (globalPackBuffer);
+}
+
+//@cindex PackOneNode
+
+# if defined(GRAN)
+/* This version is used when the node is already local */
+
+rtsPackBuffer *
+PackOneNode(closure, tso, packBufferSize)
+StgClosure* closure;
+StgTSO* tso;
+nat *packBufferSize;
+{
+ extern rtsPackBuffer *globalPackBuffer;
+ int i, clpack_locn;
+
+ InitPacking(rtsFalse);
+
+ IF_GRAN_DEBUG(pack,
+ belch("** PackOneNode: %p (%s)[PE %d] requested by TSO %d (%p) [PE %d]",
+ closure, info_type(closure),
+ where_is(closure), tso->id, tso, where_is((StgClosure *)tso)));
+
+ Pack(closure);
+
+ /* Record how much space is needed to unpack the graph */
+ globalPackBuffer->tso = tso;
+ globalPackBuffer->unpacked_size = unpacked_size;
+
+ /* Set the size parameter */
+ ASSERT(pack_locn <= RTS_PACK_BUFFER_SIZE);
+ globalPackBuffer->size = pack_locn;
+ *packBufferSize = pack_locn;
+
+ if (RtsFlags.GranFlags.GranSimStats.Global) {
+ globalGranStats.tot_packets++;
+ globalGranStats.tot_packet_size += pack_locn;
+ }
+ IF_GRAN_DEBUG(pack,
+ PrintPacket(globalPackBuffer));
+
+ return (globalPackBuffer);
+}
+# endif /* GRAN */
+
+#if defined(GRAN)
+
+/*
+ PackTSO and PackStkO are entry points for two special kinds of closure
+ which are used in the parallel RTS. Compared with other closures they
+ are rather awkward to pack because they don't follow the normal closure
+ layout (where all pointers occur before all non-pointers). Luckily,
+ they're only needed when migrating threads between processors. */
+
+//@cindex PackTSO
+rtsPackBuffer*
+PackTSO(tso, packBufferSize)
+StgTSO *tso;
+nat *packBufferSize;
+{
+ extern rtsPackBuffer *globalPackBuffer;
+ IF_GRAN_DEBUG(pack,
+ belch("** Packing TSO %d (%p)", tso->id, tso));
+ *packBufferSize = 0;
+ // PackBuffer[0] = PackBuffer[1] = 0; ???
+ return(globalPackBuffer);
+}
+
+//@cindex PackStkO
+static rtsPackBuffer*
+PackStkO(stko, packBufferSize)
+StgPtr stko;
+nat *packBufferSize;
+{
+ extern rtsPackBuffer *globalPackBuffer;
+ IF_GRAN_DEBUG(pack,
+ belch("** Packing STKO %p", stko));
+ *packBufferSize = 0;
+ // PackBuffer[0] = PackBuffer[1] = 0;
+ return(globalPackBuffer);
+}
+
+static void
+PackFetchMe(StgClosure *closure)
+{
+ barf("{PackFetchMe}Daq Qagh: no FetchMe closures in GRAN!");
+}
+
+#elif defined(PAR)
+
+static rtsPackBuffer*
+PackTSO(tso, packBufferSize)
+StgTSO *tso;
+nat *packBufferSize;
+{
+ barf("{PackTSO}Daq Qagh: trying to pack a TSO %d (%p) of size %d; thread migrations not supported, yet",
+ tso->id, tso, packBufferSize);
+}
+
+rtsPackBuffer*
+PackStkO(stko, packBufferSize)
+StgPtr stko;
+nat *packBufferSize;
+{
+ barf("{PackStkO}Daq Qagh: trying to pack a STKO (%p) of size %d; thread migrations not supported, yet",
+ stko, packBufferSize);
+}
+
+//@cindex PackFetchMe
+static void
+PackFetchMe(StgClosure *closure)
+{
+ StgInfoTable *ip;
+ nat i;
+ int offset;
+#if defined(DEBUG)
+ nat x = pack_locn;
+#endif
+
+#if defined(GRAN)
+ barf("{PackFetchMe}Daq Qagh: no FetchMe closures in GRAN!");
+#else
+ offset = OffsetFor(closure);
+ if (!NotYetPacking(offset)) {
+ IF_PAR_DEBUG(pack,
+ belch("*>.. Packing FETCH_ME for closure %p (s) as offset to %d",
+ closure, info_type(closure), offset));
+ PackOffset(offset);
+ // unpacked_size += 0; // unpacked_size unchanged (closure is shared!!)
+ return;
+ }
+
+ /* Need a GA even when packing a constructed FETCH_ME (cruel world!) */
+ AmPacking(closure);
+ /* FMs must be always globalised */
+ GlobaliseAndPackGA(closure);
+
+ IF_PAR_DEBUG(pack,
+ belch("*>.. Packing FETCH_ME for closure %p (%s) with GA: ((%x, %d, %x))",
+ closure, info_type(closure),
+ globalPackBuffer->buffer[pack_locn-2],
+ globalPackBuffer->buffer[pack_locn-1],
+ globalPackBuffer->buffer[pack_locn-3]));
+
+ /* Pack a FetchMe closure instead of closure */
+ ip = &stg_FETCH_ME_info;
+ /* this assumes that the info ptr is always the first word in a closure*/
+ Pack((StgWord)ip);
+ for (i = 1; i < _HS; ++i) // pack rest of fixed header
+ Pack((StgWord)*(((StgPtr)closure)+i));
+
+ unpacked_size += sizeofW(StgFetchMe);
+ /* size of FETCHME in packed is the same as that constant */
+ ASSERT(pack_locn-x==PACK_FETCHME_SIZE);
+ /* In the pack buffer the pointer to a GA (in the FetchMe closure)
+ is expanded to the full GA; this is a compile-time const */
+ //ASSERT(PACK_FETCHME_SIZE == sizeofW(StgFetchMe)-1+PACK_GA_SIZE);
+#endif
+}
+
+#endif
+
+#ifdef DIST
+static void
+PackRemoteRef(StgClosure *closure)
+{
+ StgInfoTable *ip;
+ nat i;
+ int offset;
+
+ offset = OffsetFor(closure);
+ if (!NotYetPacking(offset)) {
+ PackOffset(offset);
+ unpacked_size += 2;
+ return;
+ }
+
+ /* Need a GA even when packing a constructed REMOTE_REF (cruel world!) */
+ AmPacking(closure);
+
+ /* basically we just Globalise, but for sticky things we can't have multiple GAs,
+ so we must prevent the GAs being split.
+
+ In returning things to the true sticky owner, this case is already handled, but for
+ anything else we just give up at the moment... This needs to be fixed!
+ */
+ { globalAddr *ga;
+ ga = LAGAlookup(closure); // surely this ga must exist?
+
+ // ***************************************************************************
+ // ***************************************************************************
+ // REMOTE_REF HACK - dual is in SetGAandCommonUp
+ // - prevents the weight from ever reaching zero
+ if(ga != NULL)
+ ga->weight=0x06660666; //anything apart from 0 really...
+ // ***************************************************************************
+ // ***************************************************************************
+
+ if((ga != NULL)&&(ga->weight / 2 <= 2))
+ barf("Cant split the weight any further when packing REMOTE_REF for closure %p (%s) with GA: ((%x, %d, %x))",
+ closure, info_type(closure),
+ ga->payload.gc.gtid, ga->payload.gc.slot, ga->weight);
+ }
+ GlobaliseAndPackGA(closure);
+
+ IF_PAR_DEBUG(pack,
+ belch("*>.. Packing REMOTE_REF for closure %p (%s) with GA: ((%x, %d, %x))",
+ closure, info_type(closure),
+ globalPackBuffer->buffer[pack_locn-2],
+ globalPackBuffer->buffer[pack_locn-1],
+ globalPackBuffer->buffer[pack_locn-3]));
+
+ /* Pack a REMOTE_REF closure instead of closure */
+ ip = &stg_REMOTE_REF_info;
+ /* this assumes that the info ptr is always the first word in a closure*/
+ Pack((StgWord)ip);
+ for (i = 1; i < _HS; ++i) // pack rest of fixed header
+ Pack((StgWord)*(((StgPtr)closure)+i));
+
+ unpacked_size += PACK_FETCHME_SIZE;
+}
+#endif /* DIST */
+
+//@node Packing Closures, , Packing Sections of Nearby Graph, Packing Functions
+//@subsubsection Packing Closures
+/*
+ Packing Closures
+
+ @PackClosure@ is the heart of the normal packing code. It packs a single
+ closure into the pack buffer, skipping over any indirections and
+ globalising it as necessary, queues any child pointers for further
+ packing, and turns it into a @FetchMe@ or revertible black hole (@RBH@)
+ locally if it was a thunk. Before the actual closure is packed, a
+ suitable global address (GA) is inserted in the pack buffer. There is
+ always room to pack a fetch-me to the closure (guaranteed by the
+ RoomToPack calculation), and this is packed if there is no room for the
+ entire closure.
+
+ Space is allocated for any primitive array children of a closure, and
+ hence a primitive array can always be packed along with it's parent
+ closure. */
+
+//@cindex PackClosure
+
+# if defined(PAR)
+
+void
+PackClosure(closure)
+StgClosure *closure;
+{
+ StgInfoTable *info;
+ nat clpack_locn;
+
+ ASSERT(LOOKS_LIKE_GHC_INFO(get_itbl(closure)));
+
+ closure = UNWIND_IND(closure);
+ /* now closure is the thing we want to pack */
+ info = get_itbl(closure);
+
+ clpack_locn = OffsetFor(closure);
+
+ /* If the closure has been packed already, just pack an indirection to it
+ to guarantee that the graph doesn't become a tree when unpacked */
+ if (!NotYetPacking(clpack_locn)) {
+ PackOffset(clpack_locn);
+ return;
+ }
+
+ switch (info->type) {
+
+ case CONSTR_CHARLIKE:
+ IF_PAR_DEBUG(pack,
+ belch("*>^^ Packing a charlike closure %d",
+ ((StgIntCharlikeClosure*)closure)->data));
+
+ PackPLC((StgPtr)CHARLIKE_CLOSURE(((StgIntCharlikeClosure*)closure)->data));
+ // NB: unpacked_size of a PLC is 0
+ return;
+
+ case CONSTR_INTLIKE:
+ {
+ StgInt val = ((StgIntCharlikeClosure*)closure)->data;
+
+ if ((val <= MAX_INTLIKE) && (val >= MIN_INTLIKE)) {
+ IF_PAR_DEBUG(pack,
+ belch("*>^^ Packing a small intlike %d as a PLC",
+ val));
+ PackPLC((StgPtr)INTLIKE_CLOSURE(val));
+ // NB: unpacked_size of a PLC is 0
+ return;
+ } else {
+ IF_PAR_DEBUG(pack,
+ belch("*>^^ Packing a big intlike %d as a normal closure",
+ val));
+ PackGeneric(closure);
+ return;
+ }
+ }
+
+ case CONSTR:
+ case CONSTR_1_0:
+ case CONSTR_0_1:
+ case CONSTR_2_0:
+ case CONSTR_1_1:
+ case CONSTR_0_2:
+ /* it's a constructor (i.e. plain data) */
+ IF_PAR_DEBUG(pack,
+ belch("*>^^ Packing a CONSTR %p (%s) using generic packing",
+ closure, info_type(closure)));
+ PackGeneric(closure);
+ return;
+
+ case THUNK_STATIC: // ToDo: check whether that's ok
+ case FUN_STATIC: // ToDo: check whether that's ok
+ case CONSTR_STATIC:
+ case CONSTR_NOCAF_STATIC:// For now we ship indirections to CAFs: They are
+ // evaluated on each PE if needed
+ IF_PAR_DEBUG(pack,
+ belch("*>~~ Packing a %p (%s) as a PLC",
+ closure, info_type(closure)));
+
+ PackPLC((StgPtr)closure);
+ // NB: unpacked_size of a PLC is 0
+ return;
+
+ case THUNK_SELECTOR:
+ {
+ StgClosure *selectee = ((StgSelector *)closure)->selectee;
+
+ IF_PAR_DEBUG(pack,
+ belch("*>** Found THUNK_SELECTOR at %p (%s) pointing to %p (%s); using PackGeneric",
+ closure, info_type(closure),
+ selectee, info_type(selectee)));
+ PackGeneric(closure);
+ /* inlined code; probably could use PackGeneric
+ Pack((StgWord)(*(StgPtr)closure));
+ Pack((StgWord)(selectee));
+ QueueClosure(selectee);
+ unpacked_size += 2;
+ */
+ }
+ return;
+
+ case FUN:
+ case FUN_1_0:
+ case FUN_0_1:
+ case FUN_2_0:
+ case FUN_1_1:
+ case FUN_0_2:
+ case THUNK:
+ case THUNK_1_0:
+ case THUNK_0_1:
+ case THUNK_2_0:
+ case THUNK_1_1:
+ case THUNK_0_2:
+ PackGeneric(closure);
+ return;
+
+ case AP_UPD:
+ case PAP:
+ /*
+ barf("*> Packing of PAP not implemented %p (%s)",
+ closure, info_type(closure));
+
+ Currently we don't pack PAPs; we pack a FETCH_ME to the closure,
+ instead. Note that since PAPs contain a chunk of stack as payload,
+ implementing packing of PAPs is a first step towards thread migration.
+ IF_PAR_DEBUG(pack,
+ belch("*>.. Packing a PAP closure at %p (%s) as a FETCH_ME",
+ closure, info_type(closure)));
+ PackFetchMe(closure);
+ */
+ PackPAP((StgPAP *)closure);
+ return;
+
+ case CAF_BLACKHOLE:
+ case BLACKHOLE:
+ case BLACKHOLE_BQ:
+ case SE_BLACKHOLE:
+ case SE_CAF_BLACKHOLE:
+ case RBH:
+ case FETCH_ME:
+ case FETCH_ME_BQ:
+
+ /* If it's a (revertible) black-hole, pack a FetchMe closure to it */
+ //ASSERT(pack_locn > PACK_HDR_SIZE);
+
+ IF_PAR_DEBUG(pack,
+ belch("*>.. Packing a BH-like closure at %p (%s) as a FETCH_ME",
+ closure, info_type(closure)));
+ /* NB: in case of a FETCH_ME this might build up a chain of FETCH_MEs;
+ phps short-cut the GA here */
+ PackFetchMe(closure);
+ return;
+
+#ifdef DIST
+ case REMOTE_REF:
+ IF_PAR_DEBUG(pack,
+ belch("*>.. Packing %p (%s) as a REMOTE_REF",
+ closure, info_type(closure)));
+ PackRemoteRef(closure);
+ /* we hopefully don't end up with a chain of REMOTE_REFs!!!!!!!!!! */
+
+ return;
+#endif
+
+ case TSO:
+ case MVAR:
+#ifdef DIST
+ IF_PAR_DEBUG(pack,
+ belch("*>.. Packing %p (%s) as a RemoteRef",
+ closure, info_type(closure)));
+ PackRemoteRef(closure);
+#else
+ barf("{Pack}Daq Qagh: Only GdH can pack %p (%s)",
+ closure, info_type(closure));
+#endif
+ return;
+
+ case ARR_WORDS:
+ PackArray(closure);
+ return;
+
+ case MUT_ARR_PTRS:
+ case MUT_ARR_PTRS_FROZEN:
+ case MUT_VAR:
+ /*
+ Eventually, this should use the same packing routine as ARR_WRODS
+
+ GlobaliseAndPackGA(closure);
+ PackArray(closure);
+ return;
+ */
+ barf("Qagh{Pack}Doq: packing of mutable closures not yet implemented: %p (%s)",
+ closure, info_type(closure));
+
+# ifdef DEBUG
+ case BCO:
+ barf("{Pack}Daq Qagh: found BCO closure %p (%s); GUM hates interpreted code",
+ closure, info_type(closure));
+ /* never reached */
+
+ // check error cases only in a debugging setup
+ case RET_BCO:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ case RET_BIG:
+ case RET_VEC_BIG:
+ case RET_DYN:
+ barf("{Pack}Daq Qagh: found return vector %p (%s) when packing (thread migration not implemented)",
+ closure, info_type(closure));
+ /* never reached */
+
+ case UPDATE_FRAME:
+ case STOP_FRAME:
+ case CATCH_FRAME:
+ case SEQ_FRAME:
+ barf("{Pack}Daq Qagh: found stack frame %p (%s) when packing (thread migration not implemented)",
+ closure, info_type(closure));
+ /* never reached */
+
+ case BLOCKED_FETCH:
+ case EVACUATED:
+ /* something's very wrong */
+ barf("{Pack}Daq Qagh: found %s (%p) when packing",
+ info_type(closure), closure);
+ /* never reached */
+
+ case IND:
+ case IND_OLDGEN:
+ case IND_PERM:
+ case IND_OLDGEN_PERM:
+ case IND_STATIC:
+ barf("Pack: found IND_... after shorting out indirections %d (%s)",
+ (nat)(info->type), info_type(closure));
+
+ case WEAK:
+ case FOREIGN:
+ case STABLE_NAME:
+ barf("Pack: found foreign thingy; not yet implemented in %d (%s)",
+ (nat)(info->type), info_type(closure));
+#endif
+
+ default:
+ barf("Pack: strange closure %d", (nat)(info->type));
+ } /* switch */
+}
+
+/*
+ Pack a constructor of unknown size.
+ Similar to PackGeneric but without creating GAs.
+*/
+#if 0
+//@cindex PackConstr
+static void
+PackConstr(StgClosure *closure)
+{
+ StgInfoTable *info;
+ nat size, ptrs, nonptrs, vhs, i;
+ char str[80];
+
+ ASSERT(LOOKS_LIKE_GHC_INFO(closure->header.info));
+
+ /* get info about basic layout of the closure */
+ info = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str);
+
+ ASSERT(info->type == CONSTR ||
+ info->type == CONSTR_1_0 ||
+ info->type == CONSTR_0_1 ||
+ info->type == CONSTR_2_0 ||
+ info->type == CONSTR_1_1 ||
+ info->type == CONSTR_0_2);
+
+ IF_PAR_DEBUG(pack,
+ fprintf(stderr, "*>^^ packing a constructor at %p (%s) (size=%d, ptrs=%d, nonptrs=%d)\n",
+ closure, info_type(closure), size, ptrs, nonptrs));
+
+ /* Primitive arrays have gone; now we have (MUT_)ARR_WORDS etc */
+
+ if (!RoomToPack(PACK_GA_SIZE + _HS + vhs + nonptrs, ptrs)) {
+ IF_PAR_DEBUG(pack,
+ belch("*>&& pack buffer is full; packing FETCH_ME for closure %p (%s)",
+ closure, info_type(closure)));
+ PackFetchMe(closure);
+ return;
+ }
+
+ /* Record the location of the GA */
+ AmPacking(closure);
+
+ /* Pack Constructor marker */
+ Pack((StgWord)2);
+
+ /* pack fixed and variable header */
+ for (i = 0; i < _HS + vhs; ++i)
+ Pack((StgWord)*(((StgPtr)closure)+i));
+
+ /* register all ptrs for further packing */
+ for (i = 0; i < ptrs; ++i)
+ QueueClosure(((StgClosure *) *(((StgPtr)closure)+(_HS+vhs)+i)));
+
+ /* pack non-ptrs */
+ for (i = 0; i < nonptrs; ++i)
+ Pack((StgWord)*(((StgPtr)closure)+(_HS+vhs)+ptrs+i));
+}
+#endif
+
+/*
+ Generic packing code.
+ This code is performed for `ordinary' closures such as CONSTR, THUNK etc.
+*/
+//@cindex PackGeneric
+static void
+PackGeneric(StgClosure *closure)
+{
+ StgInfoTable *info;
+ StgClosure *rbh;
+ nat size, ptrs, nonptrs, vhs, i, m;
+ char str[80];
+
+ ASSERT(LOOKS_LIKE_COOL_CLOSURE(closure));
+
+ /* get info about basic layout of the closure */
+ info = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str);
+
+ ASSERT(!IS_BLACK_HOLE(closure));
+
+ IF_PAR_DEBUG(pack,
+ fprintf(stderr, "*>== %p (%s): generic packing (size=%d, ptrs=%d, nonptrs=%d)\n",
+ closure, info_type(closure), size, ptrs, nonptrs));
+
+ /* packing strategies: how many thunks to add to a packet;
+ default is infinity i.e. RtsFlags.ParFlags.thunksToPack==0 */
+ if (RtsFlags.ParFlags.thunksToPack &&
+ packed_thunks >= RtsFlags.ParFlags.thunksToPack &&
+ closure_THUNK(closure)) {
+ IF_PAR_DEBUG(pack,
+ belch("*>&& refusing to pack more than %d thunks per packet; packing FETCH_ME for closure %p (%s)",
+ packed_thunks, closure, info_type(closure)));
+ PackFetchMe(closure);
+ return;
+ }
+
+ /* Primitive arrays have gone; now we have (MUT_)ARR_WORDS etc */
+
+ if (!RoomToPack(PACK_GA_SIZE + _HS + vhs + nonptrs, ptrs)) {
+ IF_PAR_DEBUG(pack,
+ belch("*>&& pack buffer is full; packing FETCH_ME for closure %p (%s)",
+ closure, info_type(closure)));
+ PackFetchMe(closure);
+ return;
+ }
+
+ /* Record the location of the GA */
+ AmPacking(closure);
+ /* Allocate a GA for this closure and put it into the buffer */
+ /* Checks for globalisation scheme; default: globalise everything thunks */
+ if ( RtsFlags.ParFlags.globalising == 0 ||
+ (closure_THUNK(closure) && !closure_UNPOINTED(closure)) )
+ GlobaliseAndPackGA(closure);
+ else
+ Pack((StgWord)2); // marker for unglobalised closure
+
+
+ ASSERT(!(info->type == ARR_WORDS || info->type == MUT_ARR_PTRS ||
+ info->type == MUT_ARR_PTRS_FROZEN || info->type == MUT_VAR));
+
+ /* At last! A closure we can actually pack! */
+ if (ip_MUTABLE(info) && ((info->type != FETCH_ME)||(info->type != REMOTE_REF)))
+ barf("*>// %p (%s) PackClosure: trying to replicate a Mutable closure!",
+ closure, info_type(closure));
+
+ /*
+ Remember, the generic closure layout is as follows:
+ +-------------------------------------------------+
+ | FIXED HEADER | VARIABLE HEADER | PTRS | NON-PRS |
+ +-------------------------------------------------+
+ */
+ /* pack fixed and variable header */
+ for (i = 0; i < _HS + vhs; ++i)
+ Pack((StgWord)*(((StgPtr)closure)+i));
+
+ /* register all ptrs for further packing */
+ for (i = 0; i < ptrs; ++i)
+ QueueClosure(((StgClosure *) *(((StgPtr)closure)+(_HS+vhs)+i)));
+
+ /* pack non-ptrs */
+ for (i = 0; i < nonptrs; ++i)
+ Pack((StgWord)*(((StgPtr)closure)+(_HS+vhs)+ptrs+i));
+
+ // ASSERT(_HS+vhs+ptrs+nonptrs==size);
+ if ((m=_HS+vhs+ptrs+nonptrs)<size) {
+ IF_PAR_DEBUG(pack,
+ belch("*>** WARNING: slop in closure %p (%s); filling %d words; SHOULD NEVER HAPPEN",
+ closure, info_type(closure), size-m));
+ for (i=m; i<size; i++)
+ Pack((StgWord)*(((StgPtr)closure)+i));
+ }
+
+ unpacked_size += size;
+ //unpacked_size += (size < MIN_UPD_SIZE) ? MIN_UPD_SIZE : size;
+
+ /*
+ * Record that this is a revertable black hole so that we can fill in
+ * its address from the fetch reply. Problem: unshared thunks may cause
+ * space leaks this way, their GAs should be deallocated following an
+ * ACK.
+ */
+
+ if (closure_THUNK(closure) && !closure_UNPOINTED(closure)) {
+ rbh = convertToRBH(closure);
+ ASSERT(size>=_HS+MIN_UPD_SIZE); // min size for any updatable closure
+ ASSERT(rbh == closure); // rbh at the same position (minced version)
+ packed_thunks++;
+ } else if ( closure==graph_root ) {
+ packed_thunks++; // root of graph is counted as a thunk
+ }
+}
+/*
+ Pack an array of words.
+ ToDo: implement packing of MUT_ARRAYs
+*/
+
+//@cindex PackArray
+static void
+PackArray(StgClosure *closure)
+{
+ StgInfoTable *info;
+ nat size, ptrs, nonptrs, vhs;
+ nat i, n;
+ char str[80];
+
+ /* get info about basic layout of the closure */
+ info = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str);
+
+ ASSERT(info->type == ARR_WORDS || info->type == MUT_ARR_PTRS ||
+ info->type == MUT_ARR_PTRS_FROZEN || info->type == MUT_VAR);
+
+ n = ((StgArrWords *)closure)->words;
+ // this includes the header!: arr_words_sizeW(stgCast(StgArrWords*,q));
+
+ IF_PAR_DEBUG(pack,
+ belch("*>== %p (%s): packing an array of %d words (size=%d)\n",
+ closure, info_type(closure), n,
+ arr_words_sizeW((StgArrWords *)closure)));
+
+ /* check that we have enough room in the pack buffer */
+ if (!RoomToPack(PACK_GA_SIZE + _HS + vhs + nonptrs, ptrs)) {
+ IF_PAR_DEBUG(pack,
+ belch("*>&& pack buffer is full; packing FETCH_ME for closure %p (%s)",
+ closure, info_type(closure)));
+ PackFetchMe(closure);
+ return;
+ }
+
+ /* global stats about arrays sent */
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.tot_arrs++;
+ globalParStats.tot_arr_size += ((StgArrWords *)closure)->words;
+ }
+
+ /* record offset of the closure and allocate a GA */
+ AmPacking(closure);
+ /* Checks for globalisation scheme; default: globalise everything thunks */
+ if ( RtsFlags.ParFlags.globalising == 0 ||
+ (closure_THUNK(closure) && !closure_UNPOINTED(closure)) )
+ GlobaliseAndPackGA(closure);
+ else
+ Pack((StgWord)2); // marker for unglobalised closure
+
+ /* Pack the header (2 words: info ptr and the number of words to follow) */
+ Pack((StgWord)*(StgPtr)closure);
+ Pack(((StgArrWords *)closure)->words);
+
+ /* pack the payload of the closure (all non-ptrs) */
+ for (i=0; i<n; i++)
+ Pack((StgWord)((StgArrWords *)closure)->payload[i]);
+
+ unpacked_size += arr_words_sizeW((StgArrWords *)closure);
+}
+
+/*
+ Pack a PAP closure.
+ Note that the representation of a PAP in the buffer is different from
+ its representation in the heap. In particular, pointers to local
+ closures are packed directly as FETCHME closures, using
+ PACK_FETCHME_SIZE words to represent q 1 word pointer in the orig graph
+ structure. To account for the difference in size we store the packed
+ size of the closure as part of the PAP's variable header in the buffer.
+*/
+
+//@cindex PackPAP
+static void
+PackPAP(StgPAP *pap) {
+ nat n, i, j, pack_start;
+ StgPtr p, q;
+ const StgInfoTable* info;
+ StgWord bitmap;
+ /* debugging only */
+ StgPtr end;
+ nat size, ptrs, nonptrs, vhs;
+ char str[80];
+ nat unpacked_size_before_PAP, FMs_in_PAP=0; // debugging only
+
+ /* This is actually a setup invariant; checked here 'cause it affects PAPs*/
+ //ASSERT(PACK_FETCHME_SIZE == sizeofW(StgFetchMe)-1+PACK_GA_SIZE);
+ ASSERT(NotYetPacking(OffsetFor((StgClosure *)pap)));
+ IF_DEBUG(sanity,
+ unpacked_size_before_PAP = unpacked_size);
+
+ n = (nat)(pap->n_args);
+
+ /* get info about basic layout of the closure */
+ info = get_closure_info((StgClosure *)pap, &size, &ptrs, &nonptrs, &vhs, str);
+ ASSERT(ptrs==0 && nonptrs==0 && size==pap_sizeW(pap));
+
+ IF_PAR_DEBUG(pack,
+ belch("*>** %p (%s): PackPAP: packing PAP with %d words (size=%d; ptrs=%d; nonptrs=%d:",
+ (StgClosure *)pap, info_type((StgClosure *)pap),
+ n, size, ptrs, nonptrs);
+ printClosure((StgClosure *)pap));
+
+ /* check that we have enough room in the pack buffer */
+ if (!RoomToPack(PACK_GA_SIZE + _HS + vhs + nonptrs, ptrs)) {
+ IF_PAR_DEBUG(pack,
+ belch("*>&& pack buffer is full; packing FETCH_ME for closure %p (%s)",
+ (StgClosure *)pap, info_type((StgClosure *)pap)));
+ PackFetchMe((StgClosure *)pap);
+ return;
+ }
+
+ /* record offset of the closure and allocate a GA */
+ AmPacking((StgClosure *)pap);
+ /* Checks for globalisation scheme; default: globalise everything thunks */
+ if ( RtsFlags.ParFlags.globalising == 0 ||
+ (closure_THUNK(pap) && !closure_UNPOINTED(pap)) )
+ GlobaliseAndPackGA((StgClosure *)pap);
+ else
+ Pack((StgWord)2); // marker for unglobalised closure
+
+ /* Pack the PAP header */
+ Pack((StgWord)(pap->header.info));
+ Pack((StgWord)(pap->n_args));
+ Pack((StgWord)(pap->fun));
+ pack_start = pack_locn; // to compute size of PAP in buffer
+ Pack((StgWord)0); // this will be filled in later (size of PAP in buffer)
+
+ /* Pack the payload of a PAP i.e. a stack chunk */
+ /* pointers to start of stack chunk */
+ p = (StgPtr)(pap->payload);
+ end = (StgPtr)((nat)pap+pap_sizeW(pap)*sizeof(StgWord)); // (StgPtr)((nat)pap+sizeof(StgPAP)+sizeof(StgPtr)*n);
+ while (p<end) {
+ /* the loop body has been borrowed from scavenge_stack */
+ q = (StgPtr)*p;
+
+ /* If we've got a tag, pack all words in that block */
+ if (IS_ARG_TAG((W_)q)) { // q stands for the no. of non-ptrs to follow
+ nat m = ARG_TAG((W_)q); // first word after this block
+ IF_PAR_DEBUG(pack,
+ belch("*>** PackPAP @ %p: packing %d words (tagged), starting @ %p",
+ p, m, p));
+ for (i=0; i<m+1; i++)
+ Pack((StgWord)*(p+i));
+ p += m+1; // m words + the tag
+ continue;
+ }
+
+ /* If q is is a pointer to a (heap allocated) closure we pack a FETCH_ME
+ ToDo: provide RTS flag to also pack these closures
+ */
+ if (! LOOKS_LIKE_GHC_INFO(q) ) {
+ /* distinguish static closure (PLC) from other closures (FM) */
+ switch (get_itbl((StgClosure*)q)->type) {
+ case CONSTR_CHARLIKE:
+ IF_PAR_DEBUG(pack,
+ belch("*>** PackPAP: packing a charlike closure %d",
+ ((StgIntCharlikeClosure*)q)->data));
+
+ PackPLC((StgPtr)CHARLIKE_CLOSURE(((StgIntCharlikeClosure*)q)->data));
+ p++;
+ break;
+
+ case CONSTR_INTLIKE:
+ {
+ StgInt val = ((StgIntCharlikeClosure*)q)->data;
+
+ if ((val <= MAX_INTLIKE) && (val >= MIN_INTLIKE)) {
+ IF_PAR_DEBUG(pack,
+ belch("*>** PackPAP: Packing ptr to a small intlike %d as a PLC", val));
+ PackPLC((StgPtr)INTLIKE_CLOSURE(val));
+ p++;
+ break;
+ } else {
+ IF_PAR_DEBUG(pack,
+ belch("*>** PackPAP: Packing a ptr to a big intlike %d as a FM",
+ val));
+ Pack((StgWord)(ARGTAG_MAX+1));
+ PackFetchMe((StgClosure *)q);
+ p++;
+ IF_DEBUG(sanity, FMs_in_PAP++);
+ break;
+ }
+ }
+ case THUNK_STATIC: // ToDo: check whether that's ok
+ case FUN_STATIC: // ToDo: check whether that's ok
+ case CONSTR_STATIC:
+ case CONSTR_NOCAF_STATIC:
+ {
+ IF_PAR_DEBUG(pack,
+ belch("*>** PackPAP: packing a ptr to a %p (%s) as a PLC",
+ q, info_type((StgClosure *)q)));
+
+ PackPLC((StgPtr)q);
+ p++;
+ break;
+ }
+ default:
+ IF_PAR_DEBUG(pack,
+ belch("*>** PackPAP @ %p: packing FM to %p (%s)",
+ p, q, info_type((StgClosure*)q)));
+ Pack((StgWord)(ARGTAG_MAX+1));
+ PackFetchMe((StgClosure *)q);
+ p++;
+ IF_DEBUG(sanity, FMs_in_PAP++);
+ break;
+ }
+ continue;
+ }
+
+ /*
+ * Otherwise, q must be the info pointer of an activation
+ * record. All activation records have 'bitmap' style layout
+ * info.
+ */
+ info = get_itbl((StgClosure *)p);
+ switch (info->type) {
+
+ /* Dynamic bitmap: the mask is stored on the stack */
+ case RET_DYN:
+ IF_PAR_DEBUG(pack,
+ belch("*>** PackPAP @ %p: RET_DYN",
+ p));
+
+ /* Pack the header as is */
+ Pack((StgWord)(((StgRetDyn *)p)->info));
+ Pack((StgWord)(((StgRetDyn *)p)->liveness));
+ Pack((StgWord)(((StgRetDyn *)p)->ret_addr));
+
+ bitmap = ((StgRetDyn *)p)->liveness;
+ p = (P_)&((StgRetDyn *)p)->payload[0];
+ goto small_bitmap;
+
+ /* probably a slow-entry point return address: */
+ case FUN:
+ case FUN_STATIC:
+ {
+ IF_PAR_DEBUG(pack,
+ belch("*>** PackPAP @ %p: FUN or FUN_STATIC",
+ p));
+
+ Pack((StgWord)(((StgClosure *)p)->header.info));
+ p++;
+
+ goto follow_srt; //??
+ }
+
+ /* Using generic code here; could inline as in scavenge_stack */
+ case UPDATE_FRAME:
+ {
+ StgUpdateFrame *frame = (StgUpdateFrame *)p;
+ nat type = get_itbl(frame->updatee)->type;
+
+ ASSERT(type==BLACKHOLE || type==CAF_BLACKHOLE || type==BLACKHOLE_BQ);
+
+ IF_PAR_DEBUG(pack,
+ belch("*>** PackPAP @ %p: UPDATE_FRAME (updatee=%p; link=%p)",
+ p, frame->updatee, frame->link));
+
+ Pack((StgWord)(frame->header.info));
+ Pack((StgWord)(frame->link)); // ToDo: fix intra-stack pointer
+ Pack((StgWord)(frame->updatee)); // ToDo: follow link
+
+ p += 3;
+ }
+
+ /* small bitmap (< 32 entries, or 64 on a 64-bit machine) */
+ case STOP_FRAME:
+ {
+ IF_PAR_DEBUG(pack,
+ belch("*>** PackPAP @ %p: STOP_FRAME",
+ p));
+ Pack((StgWord)((StgStopFrame *)p)->header.info);
+ p++;
+ }
+
+ case CATCH_FRAME:
+ {
+ IF_PAR_DEBUG(pack,
+ belch("*>** PackPAP @ %p: CATCH_FRAME (handler=%p)",
+ p, ((StgCatchFrame *)p)->handler));
+
+ Pack((StgWord)((StgCatchFrame *)p)->header.info);
+ Pack((StgWord)((StgCatchFrame *)p)->link); // ToDo: fix intra-stack pointer
+ Pack((StgWord)((StgCatchFrame *)p)->exceptions_blocked);
+ Pack((StgWord)((StgCatchFrame *)p)->handler);
+ p += 4;
+ }
+
+ case SEQ_FRAME:
+ {
+ IF_PAR_DEBUG(pack,
+ belch("*>** PackPAP @ %p: UPDATE_FRAME (link=%p)",
+ p, ((StgSeqFrame *)p)->link));
+
+ Pack((StgWord)((StgSeqFrame *)p)->header.info);
+ Pack((StgWord)((StgSeqFrame *)p)->link); // ToDo: fix intra-stack pointer
+
+ // ToDo: handle bitmap
+ bitmap = info->layout.bitmap;
+
+ p = (StgPtr)&(((StgClosure *)p)->payload);
+ goto small_bitmap;
+ }
+ case RET_BCO:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ IF_PAR_DEBUG(pack,
+ belch("*>** PackPAP @ %p: RET_{BCO,SMALL,VEC_SMALL} (bitmap=%o)",
+ p, info->layout.bitmap));
+
+
+ Pack((StgWord)((StgClosure *)p)->header.info);
+ p++;
+ // ToDo: handle bitmap
+ bitmap = info->layout.bitmap;
+ /* this assumes that the payload starts immediately after the info-ptr */
+
+ small_bitmap:
+ while (bitmap != 0) {
+ if ((bitmap & 1) == 0) {
+ Pack((StgWord)(ARGTAG_MAX+1));
+ PackFetchMe((StgClosure *)*p++); // pack a FetchMe to the closure
+ IF_DEBUG(sanity, FMs_in_PAP++);
+ } else {
+ Pack((StgWord)*p++);
+ }
+ bitmap = bitmap >> 1;
+ }
+
+ follow_srt:
+ IF_PAR_DEBUG(pack,
+ belch("*>-- PackPAP: nothing to do for follow_srt"));
+ continue;
+
+ /* large bitmap (> 32 entries) */
+ case RET_BIG:
+ case RET_VEC_BIG:
+ {
+ StgPtr q;
+ StgLargeBitmap *large_bitmap;
+
+ IF_PAR_DEBUG(pack,
+ belch("*>** PackPAP @ %p: RET_{BIG,VEC_BIG} (large_bitmap=%p)",
+ p, info->layout.large_bitmap));
+
+
+ Pack((StgWord)((StgClosure *)p)->header.info);
+ p++;
+
+ large_bitmap = info->layout.large_bitmap;
+
+ for (j=0; j<large_bitmap->size; j++) {
+ bitmap = large_bitmap->bitmap[j];
+ q = p + BITS_IN(W_);
+ while (bitmap != 0) {
+ if ((bitmap & 1) == 0) {
+ Pack((StgWord)(ARGTAG_MAX+1));
+ PackFetchMe((StgClosure *)*p++); // ToDo: pack pointer(StgClosure *)*p = evacuate((StgClosure *)*p);
+ IF_DEBUG(sanity, FMs_in_PAP++);
+ } else {
+ Pack((StgWord)*p++);
+ }
+ bitmap = bitmap >> 1;
+ }
+ if (j+1 < large_bitmap->size) {
+ while (p < q) {
+ Pack((StgWord)(ARGTAG_MAX+1));
+ PackFetchMe((StgClosure *)*p++); // ToDo: pack pointer (StgClosure *)*p = evacuate((StgClosure *)*p);
+ IF_DEBUG(sanity, FMs_in_PAP++);
+ }
+ }
+ }
+
+ /* and don't forget to follow the SRT */
+ goto follow_srt;
+ }
+
+ default:
+ barf("PackPAP: weird activation record found on stack (@ %p): %d",
+ p, (int)(info->type));
+ }
+ }
+ // fill in size of the PAP (only the payload!) in buffer
+ globalPackBuffer->buffer[pack_start] = (StgWord)(pack_locn - pack_start - 1*sizeofW(StgWord));
+ /*
+ We can use the generic pap_sizeW macro to compute the size of the
+ unpacked PAP because whenever we pack a new FETCHME as part of the
+ PAP's payload we also adjust unpacked_size accordingly (smart, aren't we?)
+
+ NB: the current PAP (un-)packing code relies on the fact that
+ the size of the unpacked PAP + size of all unpacked FMs is the same as
+ the size of the packed PAP!!
+ */
+ unpacked_size += pap_sizeW(pap); // sizeofW(pap) + (nat)(globalPackBuffer->buffer[pack_start]);
+ IF_DEBUG(sanity,
+ ASSERT(unpacked_size-unpacked_size_before_PAP==pap_sizeW(pap)+FMs_in_PAP*sizeofW(StgFetchMe)));
+}
+# else /* GRAN */
+
+/* Fake the packing of a closure */
+
+void
+PackClosure(closure)
+StgClosure *closure;
+{
+ StgInfoTable *info, *childInfo;
+ nat size, ptrs, nonptrs, vhs;
+ char info_hdr_ty[80];
+ nat i;
+ StgClosure *indirectee, *rbh;
+ char str[80];
+ rtsBool is_mutable, will_be_rbh, no_more_thunks_please;
+
+ is_mutable = rtsFalse;
+
+ /* In GranSim we don't pack and unpack closures -- we just simulate
+ packing by updating the bitmask. So, the graph structure is unchanged
+ i.e. we don't short out indirections here. -- HWL */
+
+ /* Nothing to do with packing but good place to (sanity) check closure;
+ if the closure is a thunk, it must be unique; otherwise we have copied
+ work at some point before that which violates one of our main global
+ assertions in GranSim/GUM */
+ ASSERT(!closure_THUNK(closure) || is_unique(closure));
+
+ IF_GRAN_DEBUG(pack,
+ belch("** Packing closure %p (%s)",
+ closure, info_type(closure)));
+
+ if (where_is(closure) != where_is(graph_root)) {
+ IF_GRAN_DEBUG(pack,
+ belch("** faking a FETCHME [current PE: %d, closure's PE: %d]",
+ where_is(graph_root), where_is(closure)));
+
+ /* GUM would pack a FETCHME here; simulate that by increasing the */
+ /* unpacked size accordingly but don't pack anything -- HWL */
+ unpacked_size += _HS + 2 ; // sizeofW(StgFetchMe);
+ return;
+ }
+
+ /* If the closure's not already being packed */
+ if (!NotYetPacking(closure))
+ /* Don't have to do anything in GrAnSim if closure is already */
+ /* packed -- HWL */
+ {
+ IF_GRAN_DEBUG(pack,
+ belch("** Closure %p is already packed and omitted now!",
+ closure));
+ return;
+ }
+
+ switch (get_itbl(closure)->type) {
+ /* ToDo: check for sticky bit here? */
+ /* BH-like closures which must not be moved to another PE */
+ case CAF_BLACKHOLE: /* # of ptrs, nptrs: 0,2 */
+ case SE_BLACKHOLE: /* # of ptrs, nptrs: 0,2 */
+ case SE_CAF_BLACKHOLE: /* # of ptrs, nptrs: 0,2 */
+ case BLACKHOLE: /* # of ptrs, nptrs: 0,2 */
+ case BLACKHOLE_BQ: /* # of ptrs, nptrs: 1,1 */
+ case RBH: /* # of ptrs, nptrs: 1,1 */
+ /* same for these parallel specific closures */
+ case BLOCKED_FETCH:
+ case FETCH_ME:
+ case FETCH_ME_BQ:
+ IF_GRAN_DEBUG(pack,
+ belch("** Avoid packing BH-like closures (%p, %s)!",
+ closure, info_type(closure)));
+ /* Just ignore RBHs i.e. they stay where they are */
+ return;
+
+ case THUNK_SELECTOR:
+ {
+ StgClosure *selectee = ((StgSelector *)closure)->selectee;
+
+ IF_GRAN_DEBUG(pack,
+ belch("** Avoid packing THUNK_SELECTOR (%p, %s) but queuing %p (%s)!",
+ closure, info_type(closure), selectee, info_type(selectee)));
+ QueueClosure(selectee);
+ IF_GRAN_DEBUG(pack,
+ belch("** [%p (%s) (Queueing closure) ....]",
+ selectee, info_type(selectee)));
+ }
+ return;
+
+ case CONSTR_STATIC:
+ case CONSTR_NOCAF_STATIC:
+ /* For now we ship indirections to CAFs:
+ * They are evaluated on each PE if needed */
+ IF_GRAN_DEBUG(pack,
+ belch("** Nothing to pack for %p (%s)!",
+ closure, info_type(closure)));
+ // Pack(closure); GUM only
+ return;
+
+ case CONSTR_CHARLIKE:
+ case CONSTR_INTLIKE:
+ IF_GRAN_DEBUG(pack,
+ belch("** Nothing to pack for %s (%p)!",
+ closure, info_type(closure)));
+ // PackPLC(((StgIntCharlikeClosure *)closure)->data); GUM only
+ return;
+
+ case AP_UPD:
+ case PAP:
+ /* partial applications; special treatment necessary? */
+ break;
+
+ case MVAR:
+ barf("{PackClosure}Daq Qagh: found an MVAR (%p, %s); ToDo: implement proper treatment of MVARs",
+ closure, info_type(closure));
+
+ case ARR_WORDS:
+ case MUT_VAR:
+ case MUT_ARR_PTRS:
+ case MUT_ARR_PTRS_FROZEN:
+ /* Mutable objects; require special treatment to ship all data */
+ is_mutable = rtsTrue;
+ break;
+
+ case WEAK:
+ case FOREIGN:
+ case STABLE_NAME:
+ /* weak pointers and other FFI objects */
+ barf("{PackClosure}Daq Qagh: found an FFI object (%p, %s); FFI not yet supported by GranSim, sorry",
+ closure, info_type(closure));
+
+ case TSO:
+ /* parallel objects */
+ barf("{PackClosure}Daq Qagh: found a TSO when packing (%p, %s); thread migration not yet implemented, sorry",
+ closure, info_type(closure));
+
+ case BCO:
+ /* Hugs objects (i.e. closures used by the interpreter) */
+ barf("{PackClosure}Daq Qagh: found a Hugs closure when packing (%p, %s); GranSim not yet integrated with Hugs, sorry",
+ closure, info_type(closure));
+
+ case IND: /* # of ptrs, nptrs: 1,0 */
+ case IND_STATIC: /* # of ptrs, nptrs: 1,0 */
+ case IND_PERM: /* # of ptrs, nptrs: 1,1 */
+ case IND_OLDGEN: /* # of ptrs, nptrs: 1,1 */
+ case IND_OLDGEN_PERM: /* # of ptrs, nptrs: 1,1 */
+ /* we shouldn't find an indirection here, because we have shorted them
+ out at the beginning of this functions already.
+ */
+ break;
+ /* should be:
+ barf("{PackClosure}Daq Qagh: found indirection when packing (%p, %s)",
+ closure, info_type(closure));
+ */
+
+ case UPDATE_FRAME:
+ case CATCH_FRAME:
+ case SEQ_FRAME:
+ case STOP_FRAME:
+ /* stack frames; should never be found when packing for now;
+ once we support thread migration these have to be covered properly
+ */
+ barf("{PackClosure}Daq Qagh: found stack frame when packing (%p, %s)",
+ closure, info_type(closure));
+
+ case RET_BCO:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ case RET_BIG:
+ case RET_VEC_BIG:
+ case RET_DYN:
+ /* vectored returns; should never be found when packing; */
+ barf("{PackClosure}Daq Qagh: found vectored return (%p, %s)",
+ closure, info_type(closure));
+
+ case INVALID_OBJECT:
+ barf("{PackClosure}Daq Qagh: found Invalid object (%p, %s)",
+ closure, info_type(closure));
+
+ default:
+ /*
+ Here we know that the closure is a CONSTR, FUN or THUNK (maybe
+ a specialised version with wired in #ptr/#nptr info; currently
+ we treat these specialised versions like the generic version)
+ */
+ } /* switch */
+
+ /* Otherwise it's not Fixed */
+
+ info = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str);
+ will_be_rbh = closure_THUNK(closure) && !closure_UNPOINTED(closure);
+
+ IF_GRAN_DEBUG(pack,
+ belch("** Info on closure %p (%s): size=%d; ptrs=%d",
+ closure, info_type(closure),
+ size, ptrs,
+ (will_be_rbh) ? "will become RBH" : "will NOT become RBH"));
+
+ // check whether IS_UPDATABLE(closure) == !closure_UNPOINTED(closure) -- HWL
+ no_more_thunks_please =
+ (RtsFlags.GranFlags.ThunksToPack>0) &&
+ (packed_thunks>=RtsFlags.GranFlags.ThunksToPack);
+
+ /*
+ should be covered by get_closure_info
+ if (info->type == FETCH_ME || info->type == FETCH_ME_BQ ||
+ info->type == BLACKHOLE || info->type == RBH )
+ size = ptrs = nonptrs = vhs = 0;
+ */
+ /* Now peek ahead to see whether the closure has any primitive */
+ /* array children */
+ /*
+ ToDo: fix this code
+ for (i = 0; i < ptrs; ++i) {
+ P_ childInfo;
+ W_ childSize, childPtrs, childNonPtrs, childVhs;
+
+ childInfo = get_closure_info(((StgPtrPtr) (closure))[i + _HS + vhs],
+ &childSize, &childPtrs, &childNonPtrs,
+ &childVhs, junk_str);
+ if (IS_BIG_MOTHER(childInfo)) {
+ reservedPAsize += PACK_GA_SIZE + _HS +
+ childVhs + childNonPtrs +
+ childPtrs * PACK_FETCHME_SIZE;
+ PAsize += PACK_GA_SIZE + _HS + childSize;
+ PAptrs += childPtrs;
+ }
+ }
+ */
+ /* Don't pack anything (GrAnSim) if it's a black hole, or the buffer
+ * is full and it isn't a primitive array. N.B. Primitive arrays are
+ * always packed (because their parents index into them directly) */
+
+ if (IS_BLACK_HOLE(closure))
+ /*
+ ToDo: fix this code
+ ||
+ !(RoomToPack(PACK_GA_SIZE + _HS + vhs + nonptrs, ptrs)
+ || IS_BIG_MOTHER(info)))
+ */
+ return;
+
+ /* At last! A closure we can actually pack! */
+
+ if (closure_MUTABLE(closure)) // not nec. && (info->type != FETCHME))
+ belch("ghuH: Replicated a Mutable closure!");
+
+ if (RtsFlags.GranFlags.GranSimStats.Global &&
+ no_more_thunks_please && will_be_rbh) {
+ globalGranStats.tot_cuts++;
+ if ( RtsFlags.GranFlags.Debug.pack )
+ belch("** PackClosure (w/ ThunksToPack=%d): Cutting tree with root at %#x\n",
+ RtsFlags.GranFlags.ThunksToPack, closure);
+ } else if (will_be_rbh || (closure==graph_root) ) {
+ packed_thunks++;
+ globalGranStats.tot_thunks++;
+ }
+
+ if (no_more_thunks_please && will_be_rbh)
+ return; /* don't pack anything */
+
+ /* actual PACKING done here -- HWL */
+ Pack(closure);
+ for (i = 0; i < ptrs; ++i) {
+ /* extract i-th pointer from closure */
+ QueueClosure((StgClosure *)(closure->payload[i]));
+ IF_GRAN_DEBUG(pack,
+ belch("** [%p (%s) (Queueing closure) ....]",
+ closure->payload[i],
+ info_type(*stgCast(StgPtr*,((closure)->payload+(i))))));
+ //^^^^^^^^^^^ payloadPtr(closure,i))));
+ }
+
+ /*
+ for packing words (GUM only) do something like this:
+
+ for (i = 0; i < ptrs; ++i) {
+ Pack(payloadWord(obj,i+j));
+ }
+ */
+ /* Turn thunk into a revertible black hole. */
+ if (will_be_rbh) {
+ rbh = convertToRBH(closure);
+ ASSERT(rbh != NULL);
+ }
+}
+# endif /* PAR */
+
+//@node Low level packing routines, Unpacking routines, Packing Functions, Graph packing
+//@subsection Low level packing routines
+
+/*
+ @Pack@ is the basic packing routine. It just writes a word of data into
+ the pack buffer and increments the pack location. */
+
+//@cindex Pack
+
+# if defined(PAR)
+static void
+Pack(data)
+StgWord data;
+{
+ ASSERT(pack_locn < RtsFlags.ParFlags.packBufferSize);
+ globalPackBuffer->buffer[pack_locn++] = data;
+}
+#endif
+
+#if defined(GRAN)
+static void
+Pack(closure)
+StgClosure *closure;
+{
+ StgInfoTable *info;
+ nat size, ptrs, nonptrs, vhs;
+ char str[80];
+
+ /* This checks the size of the GrAnSim internal pack buffer. The simulated
+ pack buffer is checked via RoomToPack (as in GUM) */
+ if (pack_locn >= (int)globalPackBuffer->size+sizeofW(rtsPackBuffer))
+ reallocPackBuffer();
+
+ if (closure==(StgClosure*)NULL)
+ belch("Qagh {Pack}Daq: Trying to pack 0");
+ globalPackBuffer->buffer[pack_locn++] = closure;
+ /* ASSERT: Data is a closure in GrAnSim here */
+ info = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str);
+ // ToDo: is check for MIN_UPD_SIZE really needed? */
+ unpacked_size += _HS + (size < MIN_UPD_SIZE ?
+ MIN_UPD_SIZE :
+ size);
+}
+# endif /* GRAN */
+
+/*
+ If a closure is local, make it global. Then, divide its weight for
+ export. The GA is then packed into the pack buffer. */
+
+# if defined(PAR)
+//@cindex GlobaliseAndPackGA
+static void
+GlobaliseAndPackGA(closure)
+StgClosure *closure;
+{
+ globalAddr *ga;
+ globalAddr packGA;
+
+ if ((ga = LAGAlookup(closure)) == NULL) {
+ ga = makeGlobal(closure, rtsTrue);
+
+ // Global statistics: increase amount of global data by closure-size
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ StgInfoTable *info;
+ nat size, ptrs, nonptrs, vhs, i, m; // stats only!!
+ char str[80]; // stats only!!
+
+ info = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str);
+ globalParStats.tot_global += size;
+ }
+ }
+ ASSERT(ga->weight==MAX_GA_WEIGHT || ga->weight > 2);
+
+ if(dest_gtid==ga->payload.gc.gtid)
+ { packGA.payload = ga->payload;
+ packGA.weight = 0xFFFFFFFF; // 0,1,2 are used already
+ }
+ else
+ { splitWeight(&packGA, ga);
+ ASSERT(packGA.weight > 0);
+ }
+
+ IF_PAR_DEBUG(pack,
+ fprintf(stderr, "*>## %p (%s): Globalising (%s) closure with GA ",
+ closure, info_type(closure),
+ ( (ga->payload.gc.gtid==dest_gtid)?"returning":
+ ( (ga->payload.gc.gtid==mytid)?"creating":"sharing" ) ));
+ printGA(&packGA);
+ fputc('\n', stderr));
+
+
+ Pack((StgWord) packGA.weight);
+ Pack((StgWord) packGA.payload.gc.gtid);
+ Pack((StgWord) packGA.payload.gc.slot);
+}
+
+/*
+ @PackPLC@ makes up a bogus GA for a PLC. Weight 0 implies that a PLC
+ address follows instead of PE, slot. */
+
+//@cindex PackPLC
+
+static void
+PackPLC(addr)
+StgPtr addr;
+{
+ Pack(0L); /* weight */
+ Pack((StgWord) addr); /* address */
+}
+
+/*
+ @PackOffset@ packs a special GA value that will be interpreted as an
+ offset to a closure in the pack buffer. This is used to avoid unfolding
+ the graph structure into a tree. */
+
+static void
+PackOffset(offset)
+int offset;
+{
+ /*
+ IF_PAR_DEBUG(pack,
+ belch("** Packing Offset %d at pack location %u",
+ offset, pack_locn));
+ */
+ Pack(1L); /* weight */
+ Pack(0L); /* pe */
+ Pack(offset); /* slot/offset */
+}
+# endif /* PAR */
+
+//@node Unpacking routines, Aux fcts for packing, Low level packing routines, Graph packing
+//@subsection Unpacking routines
+
+/*
+ This was formerly in the (now deceased) module Unpack.c
+
+ Unpacking closures which have been exported to remote processors
+
+ This module defines routines for unpacking closures in the parallel
+ runtime system (GUM).
+
+ In the case of GrAnSim, this module defines routines for *simulating* the
+ unpacking of closures as it is done in the parallel runtime system.
+*/
+
+//@node GUM code, GranSim Code, Unpacking routines, Unpacking routines
+//@subsubsection GUM code
+
+#if defined(PAR)
+
+//@cindex InitPendingGABuffer
+void
+InitPendingGABuffer(size)
+nat size;
+{
+ if (PendingGABuffer==(globalAddr *)NULL)
+ PendingGABuffer = (globalAddr *)
+ stgMallocBytes(size*2*sizeof(globalAddr),
+ "InitPendingGABuffer");
+
+ /* current location in the buffer */
+ gaga = PendingGABuffer;
+}
+
+/*
+ @CommonUp@ commons up two closures which we have discovered to be
+ variants of the same object. One is made an indirection to the other. */
+
+//@cindex CommonUp
+void
+CommonUp(StgClosure *src, StgClosure *dst)
+{
+ StgBlockingQueueElement *bqe;
+#if defined(DEBUG)
+ StgInfoTable *info;
+ nat size, ptrs, nonptrs, vhs, i;
+ char str[80];
+
+ /* get info about basic layout of the closure */
+ info = get_closure_info(src, &size, &ptrs, &nonptrs, &vhs, str);
+#endif
+
+ ASSERT(src != (StgClosure *)NULL && dst != (StgClosure *)NULL);
+ ASSERT(src != dst);
+
+ IF_PAR_DEBUG(pack,
+ belch("*___ CommonUp %p (%s) --> %p (%s)",
+ src, info_type(src), dst, info_type(dst)));
+
+ switch (get_itbl(src)->type) {
+ case BLACKHOLE_BQ:
+ bqe = ((StgBlockingQueue *)src)->blocking_queue;
+ break;
+
+ case FETCH_ME_BQ:
+ bqe = ((StgFetchMeBlockingQueue *)src)->blocking_queue;
+ break;
+
+ case RBH:
+ bqe = ((StgRBH *)src)->blocking_queue;
+ break;
+
+ case BLACKHOLE:
+ case FETCH_ME:
+ bqe = END_BQ_QUEUE;
+ break;
+
+ /* These closures are too small to be updated with an indirection!!! */
+ case CONSTR_1_0:
+ case CONSTR_0_1:
+ ASSERT(size<_HS+MIN_UPD_SIZE); // that's why we have to avoid UPD_IND
+ return;
+
+ /* currently we also common up 2 CONSTRs; this should reduce heap
+ * consumption but also does more work; not sure whether it's worth doing
+ */
+ case CONSTR:
+ case CONSTR_2_0:
+ case CONSTR_1_1:
+ case CONSTR_0_2:
+ case ARR_WORDS:
+ case MUT_ARR_PTRS:
+ case MUT_ARR_PTRS_FROZEN:
+ case MUT_VAR:
+ break;
+
+ default:
+ /* Don't common up anything else */
+ return;
+ }
+
+ /* closure must be big enough to permit update with ind */
+ ASSERT(size>=_HS+MIN_UPD_SIZE);
+ /* NB: this also awakens the blocking queue for src */
+ UPD_IND(src, dst);
+}
+
+/*
+ * Common up the new closure with any existing closure having the same
+ * GA
+ */
+//@cindex SetGAandCommonUp
+static StgClosure *
+SetGAandCommonUp(globalAddr *ga, StgClosure *closure, rtsBool hasGA)
+{
+ StgClosure *existing;
+ StgInfoTable *ip, *oldip;
+ globalAddr *newGA;
+
+ if (!hasGA)
+ return closure;
+
+ /* should we already have a local copy? */
+ if (ga->weight==0xFFFFFFFF) {
+ ASSERT(ga->payload.gc.gtid==mytid); //sanity
+ ga->weight=0;
+ /* probably should also ASSERT that a commonUp takes place...*/
+ }
+
+ ip = get_itbl(closure);
+ if ((existing = GALAlookup(ga)) == NULL) {
+ /* Just keep the new object */
+ IF_PAR_DEBUG(pack,
+ belch("*<## New local object for GA ((%x, %d, %x)) is %p (%s)",
+ ga->payload.gc.gtid, ga->payload.gc.slot, ga->weight,
+ closure, info_type(closure)));
+
+ // make an entry binding closure to ga in the RemoteGA table
+ newGA = setRemoteGA(closure, ga, rtsTrue);
+ // if local closure is a FETCH_ME etc fill in the global indirection
+ if (ip->type == FETCH_ME || ip->type == REMOTE_REF)
+ ((StgFetchMe *)closure)->ga = newGA;
+ } else {
+
+
+#ifdef DIST
+// ***************************************************************************
+// ***************************************************************************
+// REMOTE_REF HACK - dual is in PackRemoteRef
+// - prevents the weight ever being updated
+ if (ip->type == REMOTE_REF)
+ ga->weight=0;
+// ***************************************************************************
+// ***************************************************************************
+#endif /* DIST */
+
+ /* Two closures, one global name. Someone loses */
+ oldip = get_itbl(existing);
+ if ((oldip->type == FETCH_ME ||
+ IS_BLACK_HOLE(existing) ||
+ /* try to share evaluated closures */
+ oldip->type == CONSTR ||
+ oldip->type == CONSTR_1_0 ||
+ oldip->type == CONSTR_0_1 ||
+ oldip->type == CONSTR_2_0 ||
+ oldip->type == CONSTR_1_1 ||
+ oldip->type == CONSTR_0_2
+ ) &&
+ ip->type != FETCH_ME)
+ {
+ IF_PAR_DEBUG(pack,
+ belch("*<#- Duplicate local object for GA ((%x, %d, %x)); redirecting %p (%s) -> %p (%s)",
+ ga->payload.gc.gtid, ga->payload.gc.slot, ga->weight,
+ existing, info_type(existing), closure, info_type(closure)));
+
+ /*
+ * What we had wasn't worth keeping, so make the old closure an
+ * indirection to the new closure (copying BQs if necessary) and
+ * make sure that the old entry is not the preferred one for this
+ * closure.
+ */
+ CommonUp(existing, closure);
+ //GALAdeprecate(ga);
+#if defined(DEBUG)
+ {
+ StgInfoTable *info;
+ nat size, ptrs, nonptrs, vhs, i;
+ char str[80];
+
+ /* get info about basic layout of the closure */
+ info = get_closure_info(GALAlookup(ga), &size, &ptrs, &nonptrs, &vhs, str);
+
+ /* now ga indirectly refers to the new closure */
+ ASSERT(size<_HS+MIN_UPD_SIZE ||
+ UNWIND_IND(GALAlookup(ga))==closure);
+ }
+#endif
+ } else {
+ /*
+ * Either we already had something worthwhile by this name or
+ * the new thing is just another FetchMe. However, the thing we
+ * just unpacked has to be left as-is, or the child unpacking
+ * code will fail. Remember that the way pointer words are
+ * filled in depends on the info pointers of the parents being
+ * the same as when they were packed.
+ */
+ IF_PAR_DEBUG(pack,
+ belch("*<#@ Duplicate local object for GA ((%x, %d, %x)); keeping %p (%s) nuking unpacked %p (%s)",
+ ga->payload.gc.gtid, ga->payload.gc.slot, ga->weight,
+ existing, info_type(existing), closure, info_type(closure)));
+
+ /* overwrite 2nd word; indicates that the closure is garbage */
+ IF_DEBUG(sanity,
+ ((StgFetchMe*)closure)->ga = (globalAddr*)GARBAGE_MARKER;
+ IF_PAR_DEBUG(pack,
+ belch("++++ unpacked closure %p (%s) is garbage: %p",
+ closure, info_type(closure), *(closure+1))));
+
+ closure = existing;
+#if 0
+ // HACK
+ ty = get_itbl(closure)->type;
+ if (ty == CONSTR ||
+ ty == CONSTR_1_0 ||
+ ty == CONSTR_0_1 ||
+ ty == CONSTR_2_0 ||
+ ty == CONSTR_1_1 ||
+ ty == CONSTR_0_2)
+ CommonUp(closure, graph);
+#endif
+ }
+ /* We don't use this GA after all, so give back the weight */
+ (void) addWeight(ga);
+ }
+
+ /* if we have unpacked a FETCH_ME, we have a GA, too */
+ ASSERT(get_itbl(closure)->type!=FETCH_ME ||
+ looks_like_ga(((StgFetchMe*)closure)->ga));
+
+ /* Sort out the global address mapping */
+ if (ip_THUNK(ip)){
+ // || // (ip_THUNK(ip) && !ip_UNPOINTED(ip)) ||
+ //(ip_MUTABLE(ip) && ip->type != FETCH_ME)) {
+ /* Make up new GAs for single-copy closures */
+ globalAddr *newGA = makeGlobal(closure, rtsTrue);
+
+ // It's a new GA and therefore has the full weight
+ ASSERT(newGA->weight==0);
+
+ /* Create an old GA to new GA mapping */
+ *gaga++ = *ga;
+ splitWeight(gaga, newGA);
+ /* inlined splitWeight; we know that newGALA has full weight
+ newGA->weight = gaga->weight = 1L << (BITS_IN(unsigned) - 1);
+ gaga->payload = newGA->payload;
+ */
+ ASSERT(gaga->weight == 1U << (BITS_IN(unsigned) - 1));
+ gaga++;
+ }
+ return closure;
+}
+
+/*
+ Copies a segment of the buffer, starting at @bufptr@, representing a closure
+ into the heap at @graph@.
+ */
+//@cindex FillInClosure
+static nat
+FillInClosure(StgWord ***bufptrP, StgClosure *graph)
+{
+ StgInfoTable *ip;
+ StgWord **bufptr = *bufptrP;
+ nat ptrs, nonptrs, vhs, i, size;
+ char str[80];
+
+ ASSERT(LOOKS_LIKE_GHC_INFO(((StgClosure*)bufptr)->header.info));
+
+ /*
+ * Close your eyes. You don't want to see where we're looking. You
+ * can't get closure info until you've unpacked the variable header,
+ * but you don't know how big it is until you've got closure info.
+ * So...we trust that the closure in the buffer is organized the
+ * same way as they will be in the heap...at least up through the
+ * end of the variable header.
+ */
+ ip = get_closure_info((StgClosure *)bufptr, &size, &ptrs, &nonptrs, &vhs, str);
+
+ /* Make sure that nothing sans the fixed header is filled in
+ The ga field of the FETCH_ME is filled in in SetGAandCommonUp */
+ if (ip->type == FETCH_ME || ip->type == REMOTE_REF) {
+ ASSERT(size>=_HS+MIN_UPD_SIZE); // size of the FM in the heap
+ ptrs = nonptrs = vhs = 0; // i.e. only unpack FH from buffer
+ }
+ /* ToDo: check whether this is really needed */
+ if (ip->type == ARR_WORDS) {
+ UnpackArray(bufptrP, graph);
+ return arr_words_sizeW((StgArrWords *)bufptr);
+ }
+
+ if (ip->type == PAP || ip->type == AP_UPD) {
+ return UnpackPAP(bufptrP, graph); // includes size of unpackes FMs
+ }
+
+ /*
+ Remember, the generic closure layout is as follows:
+ +-------------------------------------------------+
+ | FIXED HEADER | VARIABLE HEADER | PTRS | NON-PRS |
+ +-------------------------------------------------+
+ */
+ /* Fill in the fixed header */
+ for (i = 0; i < _HS; i++)
+ ((StgPtr)graph)[i] = (StgWord)*bufptr++;
+
+ /* Fill in the packed variable header */
+ for (i = 0; i < vhs; i++)
+ ((StgPtr)graph)[_HS + i] = (StgWord)*bufptr++;
+
+ /* Pointers will be filled in later */
+
+ /* Fill in the packed non-pointers */
+ for (i = 0; i < nonptrs; i++)
+ ((StgPtr)graph)[_HS + i + vhs + ptrs] = (StgWord)*bufptr++;
+
+ /* Indirections are never packed */
+ // ASSERT(INFO_PTR(graph) != (W_) Ind_info_TO_USE);
+ // return bufptr;
+ *bufptrP = bufptr;
+ ASSERT(((ip->type==FETCH_ME || ip->type==REMOTE_REF)&& sizeofW(StgFetchMe)==size) ||
+ _HS+vhs+ptrs+nonptrs == size);
+ return size;
+}
+
+/*
+ Find the next pointer field in the parent closure.
+ If the current parent has been completely unpacked already, get the
+ next closure from the global closure queue.
+*/
+//@cindex LocateNextParent
+static void
+LocateNextParent(parentP, pptrP, pptrsP, sizeP)
+StgClosure **parentP;
+nat *pptrP, *pptrsP, *sizeP;
+{
+ StgInfoTable *ip; // debugging
+ nat nonptrs, pvhs;
+ char str[80];
+
+ /* pptr as an index into the current parent; find the next pointer field
+ in the parent by increasing pptr; if that takes us off the closure
+ (i.e. *pptr + 1 > *pptrs) grab a new parent from the closure queue
+ */
+ (*pptrP)++;
+ while (*pptrP + 1 > *pptrsP) {
+ /* *parentP has been constructed (all pointer set); so check it now */
+ IF_DEBUG(sanity,
+ if ((*parentP!=(StgClosure*)NULL) && // not root
+ (*((StgPtr)(*parentP)+1)!=GARBAGE_MARKER) && // not commoned up
+ (get_itbl(*parentP)->type != FETCH_ME))
+ checkClosure(*parentP));
+
+ *parentP = DeQueueClosure();
+
+ if (*parentP == NULL)
+ break;
+ else {
+ ip = get_closure_info(*parentP, sizeP, pptrsP, &nonptrs,
+ &pvhs, str);
+ *pptrP = 0;
+ }
+ }
+ /* *parentP points to the new (or old) parent; */
+ /* *pptr, *pptrs and *size have been updated referring to the new parent */
+}
+
+/*
+ UnpackClosure is the heart of the unpacking routine. It is called for
+ every closure found in the packBuffer. Any prefix such as GA, PLC marker
+ etc has been unpacked into the *ga structure.
+ UnpackClosure does the following:
+ - check for the kind of the closure (PLC, Offset, std closure)
+ - copy the contents of the closure from the buffer into the heap
+ - update LAGA tables (in particular if we end up with 2 closures
+ having the same GA, we make one an indirection to the other)
+ - set the GAGA map in order to send back an ACK message
+
+ At the end of this function *graphP has been updated to point to the
+ next free word in the heap for unpacking the rest of the graph and
+ *bufptrP points to the next word in the pack buffer to be unpacked.
+*/
+
+static StgClosure*
+UnpackClosure (StgWord ***bufptrP, StgClosure **graphP, globalAddr *ga) {
+ StgClosure *closure;
+ nat size;
+ rtsBool hasGA = rtsFalse, unglobalised = rtsFalse;
+
+ /* Now unpack the closure body, if there is one; three cases:
+ - PLC: closure is just a pointer to a static closure
+ - Offset: closure has been unpacked already
+ - else: copy data from packet into closure
+ */
+ if (isFixed(ga)) {
+ closure = UnpackPLC(ga);
+ } else if (isOffset(ga)) {
+ closure = UnpackOffset(ga);
+ } else {
+ /* if not PLC or Offset it must be a GA and then the closure */
+ ASSERT(RtsFlags.ParFlags.globalising!=0 || LOOKS_LIKE_GA(ga));
+ /* check whether this is an unglobalised closure */
+ unglobalised = isUnglobalised(ga);
+ /* Now we have to build something. */
+ hasGA = !isConstr(ga);
+ /* the new closure will be built here */
+ closure = *graphP;
+
+ /* fill in the closure from the buffer */
+ size = FillInClosure(/*in/out*/bufptrP, /*in*/closure);
+ /* if it is unglobalised, it may not be a thunk!! */
+ ASSERT(!unglobalised || !closure_THUNK(closure));
+
+ /* Add to queue for processing */
+ QueueClosure(closure);
+
+ /* common up with other graph if necessary */
+ if (!unglobalised)
+ closure = SetGAandCommonUp(ga, closure, hasGA);
+
+ /* if we unpacked a THUNK, check that it is large enough to update */
+ ASSERT(!closure_THUNK(closure) || size>=_HS+MIN_UPD_SIZE);
+ /* graph shall point to next free word in the heap */
+ *graphP += size;
+ //*graphP += (size < _HS+MIN_UPD_SIZE) ? _HS+MIN_UPD_SIZE : size; // see ASSERT
+ }
+ return closure;
+}
+
+/*
+ @UnpackGraph@ unpacks the graph contained in a message buffer. It
+ returns a pointer to the new graph. The @gamap@ parameter is set to
+ point to an array of (oldGA,newGA) pairs which were created as a result
+ of unpacking the buffer; @nGAs@ is set to the number of GA pairs which
+ were created.
+
+ The format of graph in the pack buffer is as defined in @Pack.lc@. */
+
+//@cindex UnpackGraph
+StgClosure *
+UnpackGraph(packBuffer, gamap, nGAs)
+rtsPackBuffer *packBuffer;
+globalAddr **gamap;
+nat *nGAs;
+{
+ StgWord **bufptr, **slotptr;
+ globalAddr gaS;
+ StgClosure *closure, *graphroot, *graph, *parent;
+ nat size, heapsize, bufsize,
+ pptr = 0, pptrs = 0, pvhs = 0;
+ nat unpacked_closures = 0, unpacked_thunks = 0; // stats only
+
+ IF_PAR_DEBUG(resume,
+ graphFingerPrint[0] = '\0');
+
+ ASSERT(_HS==1); // HWL HACK; compile time constant
+
+#if defined(PAR_TICKY) // HWL HAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACK
+ PAR_TICKY_UNPACK_GRAPH_START();
+#endif
+
+ /* Initialisation */
+ InitPacking(rtsTrue); // same as in PackNearbyGraph
+ globalUnpackBuffer = packBuffer;
+
+ IF_DEBUG(sanity, // do a sanity check on the incoming packet
+ checkPacket(packBuffer));
+
+ ASSERT(gaga==PendingGABuffer);
+ graphroot = (StgClosure *)NULL;
+
+ /* Unpack the header */
+ bufsize = packBuffer->size;
+ heapsize = packBuffer->unpacked_size;
+ bufptr = packBuffer->buffer;
+
+ /* allocate heap */
+ if (heapsize > 0) {
+ graph = (StgClosure *)allocate(heapsize);
+ ASSERT(graph != NULL);
+ // parallel global statistics: increase amount of global data
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.tot_global += heapsize;
+ }
+ }
+
+ /* iterate over the buffer contents and unpack all closures */
+ parent = (StgClosure *)NULL;
+ do {
+ /* check that we aren't at the end of the buffer, yet */
+ IF_DEBUG(sanity, ASSERT(*bufptr != END_OF_BUFFER_MARKER));
+
+ /* This is where we will ultimately save the closure's address */
+ slotptr = bufptr;
+
+ /* fill in gaS from buffer; gaS may receive GA, PLC- or offset-marker */
+ bufptr = UnpackGA(/*in*/bufptr, /*out*/&gaS);
+
+ /* this allocates heap space, updates LAGA tables etc */
+ closure = UnpackClosure (/*in/out*/&bufptr, /*in/out*/&graph, /*in*/&gaS);
+ unpacked_closures++; // stats only; doesn't count FMs in PAP!!!
+ unpacked_thunks += (closure_THUNK(closure)) ? 1 : 0; // stats only
+
+ /*
+ * Set parent pointer to point to chosen closure. If we're at the top of
+ * the graph (our parent is NULL), then we want to arrange to return the
+ * chosen closure to our caller (possibly in place of the allocated graph
+ * root.)
+ */
+ if (parent == NULL)
+ graphroot = closure;
+ else
+ ((StgPtr)parent)[_HS + pvhs + pptr] = (StgWord) closure;
+
+ /* Save closure pointer for resolving offsets */
+ *slotptr = (StgWord*) closure;
+
+ /* Locate next parent pointer */
+ LocateNextParent(&parent, &pptr, &pptrs, &size);
+
+ IF_DEBUG(sanity,
+ gaS.weight = 0xdeadffff;
+ gaS.payload.gc.gtid = 0xdead;
+ gaS.payload.gc.slot = 0xdeadbeef;);
+ } while (parent != NULL);
+
+ IF_PAR_DEBUG(resume,
+ GraphFingerPrint(graphroot, graphFingerPrint);
+ ASSERT(strlen(graphFingerPrint)<=MAX_FINGER_PRINT_LEN);
+ belch(">>> Fingerprint of graph rooted at %p (after unpacking <<%d>>:\n {%s}",
+ graphroot, packBuffer->id, graphFingerPrint));
+
+ /* we unpacked exactly as many words as there are in the buffer */
+ ASSERT(bufsize == (nat) (bufptr-(packBuffer->buffer)));
+ /* we filled no more heap closure than we allocated at the beginning;
+ ideally this should be a ==;
+ NB: test is only valid if we unpacked anything at all (graphroot might
+ end up to be a PLC!), therfore the strange test for HEAP_ALLOCED
+ */
+
+ /*
+ {
+ StgInfoTable *info = get_itbl(graphroot);
+ ASSERT(!HEAP_ALLOCED(graphroot) || heapsize >= (nat) (graph-graphroot) ||
+ // ToDo: check whether CAFs are really a special case here!!
+ info->type==CAF_BLACKHOLE || info->type==FETCH_ME || info->type==FETCH_ME_BQ);
+ }
+ */
+
+ /* check for magic end-of-buffer word */
+ IF_DEBUG(sanity, ASSERT(*bufptr == END_OF_BUFFER_MARKER));
+
+ *gamap = PendingGABuffer;
+ *nGAs = (gaga - PendingGABuffer) / 2;
+
+ IF_PAR_DEBUG(tables,
+ belch("** LAGA table after unpacking closure %p:",
+ graphroot);
+ printLAGAtable());
+
+ /* ToDo: are we *certain* graphroot has been set??? WDP 95/07 */
+ ASSERT(graphroot!=NULL);
+
+ IF_DEBUG(sanity,
+ {
+ StgPtr p;
+
+ /* check the unpacked graph */
+ //checkHeapChunk(graphroot,graph-sizeof(StgWord));
+
+ // if we do sanity checks, then wipe the pack buffer after unpacking
+ for (p=(StgPtr)packBuffer->buffer; p<(StgPtr)(packBuffer->buffer)+(packBuffer->size); )
+ *p++ = 0xdeadbeef;
+ });
+
+ /* reset the global variable */
+ globalUnpackBuffer = (rtsPackBuffer*)NULL;
+
+#if defined(PAR_TICKY) // HWL HAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACK
+ PAR_TICKY_UNPACK_GRAPH_END(unpacked_closures, unpacked_thunks);
+#endif
+
+ return (graphroot);
+}
+
+//@cindex UnpackGA
+static StgWord **
+UnpackGA(StgWord **bufptr, globalAddr *ga)
+{
+ /* First, unpack the next GA or PLC */
+ ga->weight = (rtsWeight) *bufptr++;
+
+ if (ga->weight == 2) { // unglobalised closure to follow
+ // nothing to do; closure starts at *bufptr
+ } else if (ga->weight > 0) { // fill in GA
+ ga->payload.gc.gtid = (GlobalTaskId) *bufptr++;
+ ga->payload.gc.slot = (int) *bufptr++;
+ } else {
+ ga->payload.plc = (StgPtr) *bufptr++;
+ }
+ return bufptr;
+}
+
+//@cindex UnpackPLC
+static StgClosure *
+UnpackPLC(globalAddr *ga)
+{
+ /* No more to unpack; just set closure to local address */
+ IF_PAR_DEBUG(pack,
+ belch("*<^^ Unpacked PLC at %x", ga->payload.plc));
+ return (StgClosure*)ga->payload.plc;
+}
+
+//@cindex UnpackOffset
+static StgClosure *
+UnpackOffset(globalAddr *ga)
+{
+ /* globalUnpackBuffer is a global var init in UnpackGraph */
+ ASSERT(globalUnpackBuffer!=(rtsPackBuffer*)NULL);
+ /* No more to unpack; just set closure to cached address */
+ IF_PAR_DEBUG(pack,
+ belch("*<__ Unpacked indirection to %p (was OFFSET %d)",
+ (StgClosure *)((globalUnpackBuffer->buffer)[ga->payload.gc.slot]),
+ ga->payload.gc.slot));
+ return (StgClosure *)(globalUnpackBuffer->buffer)[ga->payload.gc.slot];
+}
+
+/*
+ Input: *bufptrP, *graphP ... ptrs to the pack buffer and into the heap.
+
+ *bufptrP points to something that should be unpacked as a FETCH_ME:
+ |
+ v
+ +-------------------------------
+ | GA | FH of FM
+ +-------------------------------
+
+ The first 3 words starting at *bufptrP are the GA address; the next
+ word is the generic FM info ptr followed by the remaining FH (if any)
+ The result after unpacking will be a FETCH_ME closure, pointed to by
+ *graphP at the start of the fct;
+ |
+ v
+ +------------------------+
+ | FH of FM | ptr to a GA |
+ +------------------------+
+
+ The ptr field points into the RemoteGA table, which holds the actual GA.
+ *bufptrP has been updated to point to the next word in the buffer.
+ *graphP has been updated to point to the first free word at the end.
+*/
+
+static StgClosure*
+UnpackFetchMe (StgWord ***bufptrP, StgClosure **graphP) {
+ StgClosure *closure, *foo;
+ globalAddr gaS;
+
+ /* This fct relies on size of FM < size of FM in pack buffer */
+ ASSERT(sizeofW(StgFetchMe)<=PACK_FETCHME_SIZE);
+
+ /* fill in gaS from buffer */
+ *bufptrP = UnpackGA(*bufptrP, &gaS);
+ /* might be an offset to a closure in the pack buffer */
+ if (isOffset(&gaS)) {
+ belch("*< UnpackFetchMe: found OFFSET to %d when unpacking FM at buffer loc %p",
+ gaS.payload.gc.slot, *bufptrP);
+
+ closure = UnpackOffset(&gaS);
+ /* return address of previously unpacked closure; leaves *graphP unchanged */
+ return closure;
+ }
+
+ /* we have a proper GA at hand */
+ ASSERT(LOOKS_LIKE_GA(&gaS));
+
+ IF_DEBUG(sanity,
+ if (isFixed(&gaS))
+ barf("*< UnpackFetchMe: found PLC where FM was expected %p (%s)",
+ *bufptrP, info_type((StgClosure*)*bufptrP)));
+
+ IF_PAR_DEBUG(pack,
+ belch("*<_- Unpacked @ %p a FETCH_ME to GA ",
+ *graphP);
+ printGA(&gaS);
+ fputc('\n', stderr));
+
+ /* the next thing must be the IP to a FETCH_ME closure */
+ ASSERT(get_itbl((StgClosure *)*bufptrP)->type == FETCH_ME);
+
+ closure = *graphP;
+ /* fill in the closure from the buffer */
+ FillInClosure(bufptrP, closure);
+
+ /* the newly built closure is a FETCH_ME */
+ ASSERT(get_itbl(closure)->type == FETCH_ME);
+
+ /* common up with other graph if necessary
+ this also assigns the contents of gaS to the ga field of the FM closure */
+ foo = SetGAandCommonUp(&gaS, closure, rtsTrue);
+
+ ASSERT(foo!=closure || LOOKS_LIKE_GA(((StgFetchMe*)closure)->ga));
+
+ IF_PAR_DEBUG(pack,
+ if (foo==closure) { // only if not commoned up
+ belch("*<_- current FM @ %p next FM @ %p; unpacked FM @ %p is ",
+ *graphP, *graphP+sizeofW(StgFetchMe), closure);
+ printClosure(closure);
+ });
+ *graphP += sizeofW(StgFetchMe);
+ return foo;
+}
+
+/*
+ Unpack an array of words.
+ Could use generic unpack most of the time, but cleaner to separate it.
+ ToDo: implement packing of MUT_ARRAYs
+*/
+
+//@cindex UnackArray
+static void
+UnpackArray(StgWord ***bufptrP, StgClosure *graph)
+{
+ StgInfoTable *info;
+ StgWord **bufptr=*bufptrP;
+ nat size, ptrs, nonptrs, vhs, i, n;
+ char str[80];
+
+ /* yes, I know I am paranoid; but who's asking !? */
+ IF_DEBUG(sanity,
+ info = get_closure_info((StgClosure*)bufptr,
+ &size, &ptrs, &nonptrs, &vhs, str);
+ ASSERT(info->type == ARR_WORDS || info->type == MUT_ARR_PTRS ||
+ info->type == MUT_ARR_PTRS_FROZEN || info->type == MUT_VAR));
+
+ n = ((StgArrWords *)bufptr)->words;
+ // this includes the header!: arr_words_sizeW(stgCast(StgArrWords*,q));
+
+ IF_PAR_DEBUG(pack,
+ if (n<100)
+ belch("*<== unpacking an array of %d words %p (%s) (size=%d) |%s|\n",
+ n, (StgClosure*)bufptr, info_type((StgClosure*)bufptr),
+ arr_words_sizeW((StgArrWords *)bufptr),
+ /* print array (string?) */
+ ((StgArrWords *)graph)->payload);
+ else
+ belch("*<== unpacking an array of %d words %p (%s) (size=%d)\n",
+ n, (StgClosure*)bufptr, info_type((StgClosure*)bufptr),
+ arr_words_sizeW((StgArrWords *)bufptr)));
+
+ /* Unpack the header (2 words: info ptr and the number of words to follow) */
+ ((StgArrWords *)graph)->header.info = (StgInfoTable*)*bufptr++; // assumes _HS==1; yuck!
+ ((StgArrWords *)graph)->words = (StgWord)*bufptr++;
+
+ /* unpack the payload of the closure (all non-ptrs) */
+ for (i=0; i<n; i++)
+ ((StgArrWords *)graph)->payload[i] = (StgWord)*bufptr++;
+
+ ASSERT(bufptr==*bufptrP+arr_words_sizeW((StgArrWords *)*bufptrP));
+ *bufptrP = bufptr;
+}
+
+/*
+ Unpack a PAP in the buffer into a heap closure.
+ For each FETCHME we find in the packed PAP we have to unpack a separate
+ FETCHME closure and insert a pointer to this closure into the PAP.
+ We unpack all FETCHMEs into an area after the PAP proper (the `FM area').
+ Note that the size of a FETCHME in the buffer is exactly the same as
+ the size of an unpacked FETCHME plus 1 word for the pointer to it.
+ Therefore, we just allocate packed_size words in the heap for the unpacking.
+ After this routine the heap starting from *graph looks like this:
+
+ graph
+ |
+ v PAP closure | FM area |
+ +------------------------------------------------------------+
+ | PAP header | n_args | fun | payload ... | FM_1 | FM_2 .... |
+ +------------------------------------------------------------+
+
+ where payload contains pointers to each of the unpacked FM_1, FM_2 ...
+ The size of the PAP closure plus all FMs is _HS+2+packed_size.
+*/
+
+//@cindex UnpackPAP
+static nat
+UnpackPAP(StgWord ***bufptrP, StgClosure *graph)
+{
+ nat n, i, j, packed_size = 0;
+ StgPtr p, q, end, payload_start, p_FMs;
+ const StgInfoTable* info;
+ StgWord bitmap;
+ StgWord **bufptr = *bufptrP;
+#if defined(DEBUG)
+ nat FMs_in_PAP=0;
+ void checkPAPSanity(StgPAP *graph, StgPtr p_FM_begin, StgPtr p_FM_end);
+#endif
+
+ IF_PAR_DEBUG(pack,
+ belch("*<** UnpackPAP: unpacking PAP @ %p with %d words to closure %p",
+ *bufptr, *(bufptr+1), graph));
+
+ /* Unpack the PAP header (both fixed and variable) */
+ ((StgPAP *)graph)->header.info = (StgInfoTable*)*bufptr++;
+ n = ((StgPAP *)graph)->n_args = (StgWord)*bufptr++;
+ ((StgPAP *)graph)->fun = (StgClosure*)*bufptr++;
+ packed_size = (nat)*bufptr++;
+
+ IF_PAR_DEBUG(pack,
+ belch("*<** UnpackPAP: PAP header is [%p, %d, %p] %d",
+ ((StgPAP *)graph)->header.info,
+ ((StgPAP *)graph)->n_args,
+ ((StgPAP *)graph)->fun,
+ packed_size));
+
+ payload_start = (StgPtr)bufptr;
+ /* p points to the current word in the heap */
+ p = (StgPtr)((StgPAP *)graph)->payload; // payload of PAP will be unpacked here
+ p_FMs = (StgPtr)graph+pap_sizeW((StgPAP*)graph); // FMs will be unpacked here
+ end = (StgPtr) payload_start+packed_size;
+ /*
+ The main loop unpacks the PAP in *bufptr into *p, with *p_FMS as the
+ FM area for unpacking all FETCHMEs encountered during unpacking.
+ */
+ while ((StgPtr)bufptr<end) {
+ /* be sure that we don't write more than we allocated for this closure */
+ ASSERT(p_FMs <= (StgPtr)(graph+_HS+2+packed_size));
+ /* be sure that the unpacked PAP doesn't run into the FM area */
+ ASSERT(p < (StgPtr)(graph+pap_sizeW((StgPAP*)graph)));
+ /* the loop body has been borrowed from scavenge_stack */
+ q = *bufptr; // let q be the contents of the current pointer into the buffer
+
+ /* Test whether the next thing is a FETCH_ME.
+ In PAPs FETCH_ME are encoded via a starting marker of ARGTAG_MAX+1
+ */
+ if (q==(StgPtr)(ARGTAG_MAX+1)) {
+ IF_PAR_DEBUG(pack,
+ belch("*<** UnpackPAP @ %p: unpacking FM; filling in ptr to FM area: %p",
+ p, p_FMs));
+ bufptr++; // skip ARGTAG_MAX+1 marker
+ // Unpack a FM into the FM area after the PAP proper and insert pointer
+ *p++ = (StgWord)UnpackFetchMe(&bufptr, (StgClosure**)&p_FMs);
+ IF_DEBUG(sanity, FMs_in_PAP++);
+ continue;
+ }
+
+ /* Test whether it is a PLC */
+ if (q==(StgPtr)0) { // same as isFixed(q)
+ IF_PAR_DEBUG(pack,
+ belch("*<** UnpackPAP @ %p: unpacking PLC to %p",
+ p, *(bufptr+1)));
+ bufptr++; // skip 0 marker
+ *p++ = (StgWord)*bufptr++;
+ continue;
+ }
+
+ /* If we've got a tag, pack all words in that block */
+ if (IS_ARG_TAG((W_)q)) { // q stands for the no. of non-ptrs to follow
+ nat m = ARG_SIZE(q); // first word after this block
+ IF_PAR_DEBUG(pack,
+ belch("*<** UnpackPAP @ %p: unpacking %d words (tagged), starting @ %p",
+ p, m, p));
+ for (i=0; i<m+1; i++)
+ *p++ = (StgWord)*bufptr++;
+ continue;
+ }
+
+ /*
+ * Otherwise, q must be the info pointer of an activation
+ * record. All activation records have 'bitmap' style layout
+ * info.
+ */
+ info = get_itbl((StgClosure *)q);
+ switch (info->type) {
+
+ /* Dynamic bitmap: the mask is stored on the stack */
+ case RET_DYN:
+ IF_PAR_DEBUG(pack,
+ belch("*<** UnpackPAP @ %p: RET_DYN",
+ p));
+
+ /* Pack the header as is */
+ ((StgRetDyn *)p)->info = (StgWord)*bufptr++;
+ ((StgRetDyn *)p)->liveness = (StgWord)*bufptr++;
+ ((StgRetDyn *)p)->ret_addr = (StgWord)*bufptr++;
+ p += 3;
+
+ //bitmap = ((StgRetDyn *)p)->liveness;
+ //p = (P_)&((StgRetDyn *)p)->payload[0];
+ goto small_bitmap;
+
+ /* probably a slow-entry point return address: */
+ case FUN:
+ case FUN_STATIC:
+ {
+ IF_PAR_DEBUG(pack,
+ belch("*<** UnpackPAP @ %p: FUN or FUN_STATIC",
+ p));
+
+ ((StgClosure *)p)->header.info = (StgInfoTable*)*bufptr;
+ p++;
+
+ goto follow_srt; //??
+ }
+
+ /* Using generic code here; could inline as in scavenge_stack */
+ case UPDATE_FRAME:
+ {
+ StgUpdateFrame *frame = (StgUpdateFrame *)p;
+ //nat type = get_itbl(frame->updatee)->type;
+
+ //ASSERT(type==BLACKHOLE || type==CAF_BLACKHOLE || type==BLACKHOLE_BQ);
+
+ IF_PAR_DEBUG(pack,
+ belch("*<** UnackPAP @ %p: UPDATE_FRAME",
+ p));
+
+ ((StgUpdateFrame *)p)->header.info = (StgInfoTable*)*bufptr++;
+ ((StgUpdateFrame *)p)->link = (StgUpdateFrame*)*bufptr++; // ToDo: fix intra-stack pointer
+ ((StgUpdateFrame *)p)->updatee = (StgClosure*)*bufptr++; // ToDo: follow link
+
+ p += 3;
+ }
+
+ /* small bitmap (< 32 entries, or 64 on a 64-bit machine) */
+ case STOP_FRAME:
+ {
+ IF_PAR_DEBUG(pack,
+ belch("*<** UnpackPAP @ %p: STOP_FRAME",
+ p));
+ ((StgStopFrame *)p)->header.info = (StgInfoTable*)*bufptr;
+ p++;
+ }
+
+ case CATCH_FRAME:
+ {
+ IF_PAR_DEBUG(pack,
+ belch("*<** UnpackPAP @ %p: CATCH_FRAME",
+ p));
+
+ ((StgCatchFrame *)p)->header.info = (StgInfoTable*)*bufptr++;
+ ((StgCatchFrame *)p)->link = (StgUpdateFrame*)*bufptr++;
+ ((StgCatchFrame *)p)->exceptions_blocked = (StgInt)*bufptr++;
+ ((StgCatchFrame *)p)->handler = (StgClosure*)*bufptr++;
+ p += 4;
+ }
+
+ case SEQ_FRAME:
+ {
+ IF_PAR_DEBUG(pack,
+ belch("*<** UnpackPAP @ %p: UPDATE_FRAME",
+ p));
+
+ ((StgSeqFrame *)p)->header.info = (StgInfoTable*)*bufptr++;
+ ((StgSeqFrame *)p)->link = (StgUpdateFrame*)*bufptr++;
+
+ // ToDo: handle bitmap
+ bitmap = info->layout.bitmap;
+
+ p = (StgPtr)&(((StgClosure *)p)->payload);
+ goto small_bitmap;
+ }
+ case RET_BCO:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ IF_PAR_DEBUG(pack,
+ belch("*<** UnpackPAP @ %p: RET_{BCO,SMALL,VEC_SMALL}",
+ p));
+
+
+ ((StgClosure *)p)->header.info = (StgInfoTable*)*bufptr++;
+ p++;
+ // ToDo: handle bitmap
+ bitmap = info->layout.bitmap;
+ /* this assumes that the payload starts immediately after the info-ptr */
+
+ small_bitmap:
+ while (bitmap != 0) {
+ if ((bitmap & 1) == 0) {
+ *p++ = (StgWord)UnpackFetchMe(&bufptr, (StgClosure**)&p_FMs);
+ IF_DEBUG(sanity, FMs_in_PAP++);
+ } else {
+ *p++ = (StgWord)*bufptr++;
+ }
+ bitmap = bitmap >> 1;
+ }
+
+ follow_srt:
+ belch("*<-- UnpackPAP: nothing to do for follow_srt");
+ continue;
+
+ /* large bitmap (> 32 entries) */
+ case RET_BIG:
+ case RET_VEC_BIG:
+ {
+ StgPtr q;
+ StgLargeBitmap *large_bitmap;
+
+ IF_PAR_DEBUG(pack,
+ belch("*<** UnpackPAP @ %p: RET_{BIG,VEC_BIG} (large_bitmap=%p)",
+ p, info->layout.large_bitmap));
+
+
+ ((StgClosure *)p)->header.info = (StgInfoTable*)*bufptr++;
+ p++;
+
+ large_bitmap = info->layout.large_bitmap;
+
+ for (j=0; j<large_bitmap->size; j++) {
+ bitmap = large_bitmap->bitmap[j];
+ q = p + BITS_IN(W_);
+ while (bitmap != 0) {
+ if ((bitmap & 1) == 0) {
+ *p++ = (StgWord)UnpackFetchMe(&bufptr, (StgClosure**)&p_FMs);
+ IF_DEBUG(sanity, FMs_in_PAP++);
+ } else {
+ *p++ = (StgWord)*bufptr;
+ }
+ bitmap = bitmap >> 1;
+ }
+ if (j+1 < large_bitmap->size) {
+ while (p < q) {
+ *p++ = (StgWord)UnpackFetchMe(&bufptr, (StgClosure**)&p_FMs);
+ IF_DEBUG(sanity, FMs_in_PAP++);
+ }
+ }
+ }
+
+ /* and don't forget to follow the SRT */
+ goto follow_srt;
+ }
+
+ default:
+ barf("UnpackPAP: weird activation record found on stack: %d",
+ (int)(info->type));
+ }
+ }
+ IF_PAR_DEBUG(pack,
+ belch("*<** UnpackPAP finished; unpacked closure @ %p is:",
+ (StgClosure *)graph);
+ printClosure((StgClosure *)graph));
+
+ IF_DEBUG(sanity, /* check sanity of unpacked PAP */
+ checkClosure(graph));
+
+ *bufptrP = bufptr;
+ /*
+ Now p points to the first word after the PAP proper and p_FMs points
+ to the next free word in the heap; everything between p and p_FMs are
+ FETCHMEs
+ */
+ IF_DEBUG(sanity,
+ checkPAPSanity(graph, p, p_FMs));
+
+ /* we have to return the size of PAP + FMs as size of the unpacked thing */
+ ASSERT(graph+pap_sizeW((StgPAP*)graph)==p);
+ return (nat)((StgClosure*)p_FMs-graph);
+}
+
+#if defined(DEBUG)
+/*
+ Check sanity of a PAP after unpacking the PAP.
+ This means that there is slice of heap after the PAP containing FETCHMEs
+*/
+void
+checkPAPSanity(StgPAP *graph, StgPtr p_FM_begin, StgPtr p_FM_end)
+{
+ StgPtr xx;
+
+ /* check that the main unpacked closure is a PAP */
+ ASSERT(graph->header.info = &stg_PAP_info);
+ checkClosure(graph);
+ /* check that all of the closures in the FM-area are FETCHMEs */
+ for (xx=p_FM_begin; xx<p_FM_end; xx += sizeofW(StgFetchMe)) {
+ /* must be a FETCHME closure */
+ ASSERT(((StgClosure*)xx)->header.info == &stg_FETCH_ME_info);
+ /* it might have been commoned up (=> marked as garbage);
+ otherwise it points to a GA */
+ ASSERT((((StgFetchMe*)xx)->ga)==GARBAGE_MARKER ||
+ LOOKS_LIKE_GA(((StgFetchMe*)xx)->ga));
+ }
+ /* traverse the payload of the PAP */
+ for (xx=graph->payload; xx-(StgPtr)(graph->payload)<graph->n_args; xx++) {
+ /* if the current elem is a pointer into the FM area, check that
+ the GA field is ok */
+ ASSERT(!(p_FM_begin<(StgPtr)*xx && (StgPtr)*xx<p_FM_end) ||
+ LOOKS_LIKE_GA(((StgFetchMe*)*xx)->ga));
+ }
+}
+#endif /* DEBUG */
+#endif /* PAR */
+
+//@node GranSim Code, , GUM code, Unpacking routines
+//@subsubsection GranSim Code
+
+/*
+ For GrAnSim: No actual unpacking should be necessary. We just
+ have to walk over the graph and set the bitmasks appropriately.
+ Since we use RBHs similarly to GUM but without an ACK message/event
+ we have to revert the RBH from within the UnpackGraph routine (good luck!)
+ -- HWL
+*/
+
+#if defined(GRAN)
+void
+CommonUp(StgClosure *src, StgClosure *dst)
+{
+ barf("CommonUp: should never be entered in a GranSim setup");
+}
+
+StgClosure*
+UnpackGraph(buffer)
+rtsPackBuffer* buffer;
+{
+ nat size, ptrs, nonptrs, vhs,
+ bufptr = 0;
+ StgClosure *closure, *graphroot, *graph;
+ StgInfoTable *ip;
+ StgWord bufsize, unpackedsize,
+ pptr = 0, pptrs = 0, pvhs;
+ StgTSO* tso;
+ char str[240], str1[80];
+ int i;
+
+ bufptr = 0;
+ graphroot = buffer->buffer[0];
+
+ tso = buffer->tso;
+
+ /* Unpack the header */
+ unpackedsize = buffer->unpacked_size;
+ bufsize = buffer->size;
+
+ IF_GRAN_DEBUG(pack,
+ belch("<<< Unpacking <<%d>> (buffer @ %p):\n (root @ %p, PE %d,size=%d), demanded by TSO %d (%p)[PE %d]",
+ buffer->id, buffer, graphroot, where_is(graphroot),
+ bufsize, tso->id, tso,
+ where_is((StgClosure *)tso)));
+
+ do {
+ closure = buffer->buffer[bufptr++]; /* that's all we need for GrAnSim -- HWL */
+
+ /* Actually only ip is needed; rest is useful for TESTING -- HWL */
+ ip = get_closure_info(closure,
+ &size, &ptrs, &nonptrs, &vhs, str);
+
+ IF_GRAN_DEBUG(pack,
+ sprintf(str, "** (%p): Changing bitmask[%s]: 0x%x ",
+ closure, (closure_HNF(closure) ? "NF" : "__"),
+ PROCS(closure)));
+
+ if (get_itbl(closure)->type == RBH) {
+ /* if it's an RBH, we have to revert it into a normal closure, thereby
+ awakening the blocking queue; not that this is code currently not
+ needed in GUM, but it should be added with the new features in
+ GdH (and the implementation of an NACK message)
+ */
+ // closure->header.gran.procs = PE_NUMBER(CurrentProc);
+ SET_GRAN_HDR(closure, PE_NUMBER(CurrentProc)); /* Move node */
+
+ IF_GRAN_DEBUG(pack,
+ strcat(str, " (converting RBH) "));
+
+ convertFromRBH(closure); /* In GUM that's done by convertToFetchMe */
+
+ IF_GRAN_DEBUG(pack,
+ belch(":: closure %p (%s) is a RBH; after reverting: IP=%p",
+ closure, info_type(closure), get_itbl(closure)));
+ } else if (IS_BLACK_HOLE(closure)) {
+ IF_GRAN_DEBUG(pack,
+ belch(":: closure %p (%s) is a BH; copying node to %d",
+ closure, info_type(closure), CurrentProc));
+ closure->header.gran.procs |= PE_NUMBER(CurrentProc); /* Copy node */
+ } else if ( (closure->header.gran.procs & PE_NUMBER(CurrentProc)) == 0 ) {
+ if (closure_HNF(closure)) {
+ IF_GRAN_DEBUG(pack,
+ belch(":: closure %p (%s) is a HNF; copying node to %d",
+ closure, info_type(closure), CurrentProc));
+ closure->header.gran.procs |= PE_NUMBER(CurrentProc); /* Copy node */
+ } else {
+ IF_GRAN_DEBUG(pack,
+ belch(":: closure %p (%s) is no (R)BH or HNF; moving node to %d",
+ closure, info_type(closure), CurrentProc));
+ closure->header.gran.procs = PE_NUMBER(CurrentProc); /* Move node */
+ }
+ }
+
+ IF_GRAN_DEBUG(pack,
+ sprintf(str1, "0x%x", PROCS(closure)); strcat(str, str1));
+ IF_GRAN_DEBUG(pack, belch(str));
+
+ } while (bufptr<buffer->size) ; /* (parent != NULL); */
+
+ /* In GrAnSim we allocate pack buffers dynamically! -- HWL */
+ free(buffer->buffer);
+ free(buffer);
+
+ IF_GRAN_DEBUG(pack,
+ belch("PrintGraph of %p is:", graphroot); PrintGraph(graphroot,0));
+
+ return (graphroot);
+}
+#endif /* GRAN */
+
+//@node Aux fcts for packing, Printing Packet Contents, Unpacking routines, Graph packing
+//@subsection Aux fcts for packing
+
+//@menu
+//* Offset table::
+//* Packet size::
+//* Types of Global Addresses::
+//* Closure Info::
+//@end menu
+
+//@node Offset table, Packet size, Aux fcts for packing, Aux fcts for packing
+//@subsubsection Offset table
+
+/*
+ DonePacking is called when we've finished packing. It releases memory
+ etc. */
+
+//@cindex DonePacking
+
+# if defined(PAR)
+
+static void
+DonePacking(void)
+{
+ freeHashTable(offsetTable, NULL);
+ offsetTable = NULL;
+}
+
+/*
+ AmPacking records that the closure is being packed. Note the abuse of
+ the data field in the hash table -- this saves calling @malloc@! */
+
+//@cindex AmPacking
+
+static void
+AmPacking(closure)
+StgClosure *closure;
+{
+ insertHashTable(offsetTable, (StgWord) closure, (void *) (StgWord) pack_locn);
+}
+
+/*
+ OffsetFor returns an offset for a closure which is already being packed. */
+
+//@cindex OffsetFor
+
+static int
+OffsetFor(closure)
+StgClosure *closure;
+{
+ return (int) (StgWord) lookupHashTable(offsetTable, (StgWord) closure);
+}
+
+/*
+ NotYetPacking determines whether the closure's already being packed.
+ Offsets $<$ @PACK_HDR_SIZE@ (e.g. 0) mean no. */
+
+//@cindex NotYetPacking
+
+static rtsBool
+NotYetPacking(offset)
+int offset;
+{
+ return(offset == 0); // ToDo: what if root is found again?? FIX
+}
+
+# else /* GRAN */
+
+static void
+DonePacking(void)
+{
+ /* nothing */
+}
+
+/*
+ NotYetPacking searches through the whole pack buffer for closure. */
+
+static rtsBool
+NotYetPacking(closure)
+StgClosure *closure;
+{ nat i;
+ rtsBool found = rtsFalse;
+
+ for (i=0; (i<pack_locn) && !found; i++)
+ found = globalPackBuffer->buffer[i]==closure;
+
+ return (!found);
+}
+# endif
+
+//@node Packet size, Closure Info, Offset table, Aux fcts for packing
+//@subsubsection Packet size
+
+/*
+ The size needed if all currently queued closures are packed as FETCH_ME
+ closures. This represents the headroom we must have when packing the
+ buffer in order to maintain all links in the graphs.
+*/
+// ToDo: check and merge cases
+#if defined(PAR)
+static nat
+QueuedClosuresMinSize (nat ptrs) {
+ return ((clq_size - clq_pos) + ptrs) * PACK_FETCHME_SIZE;
+}
+#else /* GRAN */
+static nat
+QueuedClosuresMinSize (nat ptrs) {
+ return ((clq_size - clq_pos) + ptrs) * PACK_FETCHME_SIZE;
+}
+#endif
+
+/*
+ RoomToPack determines whether there's room to pack the closure into
+ the pack buffer based on
+
+ o how full the buffer is already,
+ o the closures' size and number of pointers (which must be packed as GAs),
+ o the size and number of pointers held by any primitive arrays that it
+ points to
+
+ It has a *side-effect* (naughty, naughty) in assigning roomInBuffer
+ to rtsFalse.
+*/
+
+//@cindex RoomToPack
+static rtsBool
+RoomToPack(size, ptrs)
+nat size, ptrs;
+{
+# if defined(PAR)
+ if (roomInBuffer &&
+ (pack_locn + // where we are in the buffer right now
+ size + // space needed for the current closure
+ QueuedClosuresMinSize(ptrs) // space for queued closures as FETCH_MEs
+ + 1 // headroom (DEBUGGING only)
+ >=
+ RTS_PACK_BUFFER_SIZE))
+ {
+ roomInBuffer = rtsFalse;
+ }
+# else /* GRAN */
+ if (roomInBuffer &&
+ (unpacked_size +
+ size +
+ QueuedClosuresMinSize(ptrs)
+ >=
+ RTS_PACK_BUFFER_SIZE))
+ {
+ roomInBuffer = rtsFalse;
+ }
+# endif
+ return (roomInBuffer);
+}
+
+//@node Closure Info, , Packet size, Aux fcts for packing
+//@subsubsection Closure Info
+
+/*
+ Closure Info
+
+ @get_closure_info@ determines the size, number of pointers etc. for this
+ type of closure -- see @SMInfoTables.lh@ for the legal info. types etc.
+
+[Can someone please keep this function up to date. I keep needing it
+ (or something similar) for interpretive code, and it keeps
+ bit-rotting. {\em It really belongs somewhere else too}. KH @@ 17/2/95] */
+
+#if 0
+
+// {Parallel.h}Daq ngoqvam vIroQpu'
+
+# if defined(GRAN) || defined(PAR)
+/* extracting specific info out of closure; currently only used in GRAN -- HWL */
+//@cindex get_closure_info
+StgInfoTable*
+get_closure_info(node, size, ptrs, nonptrs, vhs, info_hdr_ty)
+StgClosure* node;
+nat *size, *ptrs, *nonptrs, *vhs;
+char *info_hdr_ty;
+{
+ StgInfoTable *info;
+
+ info = get_itbl(node);
+ /* the switch shouldn't be necessary, really; just use default case */
+ switch (info->type) {
+#if 0
+ case CONSTR_1_0:
+ case THUNK_1_0:
+ case FUN_1_0:
+ *size = sizeW_fromITBL(info);
+ *ptrs = (nat) 1; // (info->layout.payload.ptrs);
+ *nonptrs = (nat) 0; // (info->layout.payload.nptrs);
+ *vhs = (nat) 0; // unknown
+ info_hdr_type(node, info_hdr_ty);
+ return info;
+
+ case CONSTR_0_1:
+ case THUNK_0_1:
+ case FUN_0_1:
+ *size = sizeW_fromITBL(info);
+ *ptrs = (nat) 0; // (info->layout.payload.ptrs);
+ *nonptrs = (nat) 1; // (info->layout.payload.nptrs);
+ *vhs = (nat) 0; // unknown
+ info_hdr_type(node, info_hdr_ty);
+ return info;
+
+ case CONSTR_2_0:
+ case THUNK_2_0:
+ case FUN_2_0:
+ *size = sizeW_fromITBL(info);
+ *ptrs = (nat) 2; // (info->layout.payload.ptrs);
+ *nonptrs = (nat) 0; // (info->layout.payload.nptrs);
+ *vhs = (nat) 0; // unknown
+ info_hdr_type(node, info_hdr_ty);
+ return info;
+
+ case CONSTR_1_1:
+ case THUNK_1_1:
+ case FUN_1_1:
+ *size = sizeW_fromITBL(info);
+ *ptrs = (nat) 1; // (info->layout.payload.ptrs);
+ *nonptrs = (nat) 1; // (info->layout.payload.nptrs);
+ *vhs = (nat) 0; // unknown
+ info_hdr_type(node, info_hdr_ty);
+ return info;
+
+ case CONSTR_0_2:
+ case THUNK_0_2:
+ case FUN_0_2:
+ *size = sizeW_fromITBL(info);
+ *ptrs = (nat) 0; // (info->layout.payload.ptrs);
+ *nonptrs = (nat) 2; // (info->layout.payload.nptrs);
+ *vhs = (nat) 0; // unknown
+ info_hdr_type(node, info_hdr_ty);
+ return info;
+#endif
+ case RBH:
+ {
+ StgInfoTable *rip = REVERT_INFOPTR(info); // closure to revert to
+ *size = sizeW_fromITBL(rip);
+ *ptrs = (nat) (rip->layout.payload.ptrs);
+ *nonptrs = (nat) (rip->layout.payload.nptrs);
+ *vhs = (nat) 0; // unknown
+ info_hdr_type(node, info_hdr_ty);
+ return rip; // NB: we return the reverted info ptr for a RBH!!!!!!
+ }
+
+ default:
+ *size = sizeW_fromITBL(info);
+ *ptrs = (nat) (info->layout.payload.ptrs);
+ *nonptrs = (nat) (info->layout.payload.nptrs);
+ *vhs = (nat) 0; // unknown
+ info_hdr_type(node, info_hdr_ty);
+ return info;
+ }
+}
+
+//@cindex IS_BLACK_HOLE
+rtsBool
+IS_BLACK_HOLE(StgClosure* node)
+{
+ StgInfoTable *info;
+ info = get_itbl(node);
+ return ((info->type == BLACKHOLE || info->type == RBH) ? rtsTrue : rtsFalse);
+}
+
+//@cindex IS_INDIRECTION
+StgClosure *
+IS_INDIRECTION(StgClosure* node)
+{
+ StgInfoTable *info;
+ info = get_itbl(node);
+ switch (info->type) {
+ case IND:
+ case IND_OLDGEN:
+ case IND_PERM:
+ case IND_OLDGEN_PERM:
+ case IND_STATIC:
+ /* relies on indirectee being at same place for all these closure types */
+ return (((StgInd*)node) -> indirectee);
+ default:
+ return NULL;
+ }
+}
+
+/*
+rtsBool
+IS_THUNK(StgClosure* node)
+{
+ StgInfoTable *info;
+ info = get_itbl(node);
+ return ((info->type == THUNK ||
+ info->type == THUNK_STATIC ||
+ info->type == THUNK_SELECTOR) ? rtsTrue : rtsFalse);
+}
+*/
+
+# endif /* GRAN */
+#endif /* 0 */
+
+# if 0
+/* ngoq ngo' */
+
+P_
+get_closure_info(closure, size, ptrs, nonptrs, vhs, type)
+P_ closure;
+W_ *size, *ptrs, *nonptrs, *vhs;
+char *type;
+{
+ P_ ip = (P_) INFO_PTR(closure);
+
+ if (closure==NULL) {
+ fprintf(stderr, "Qagh {get_closure_info}Daq: NULL closure\n");
+ *size = *ptrs = *nonptrs = *vhs = 0;
+ strcpy(type,"ERROR in get_closure_info");
+ return;
+ } else if (closure==PrelBase_Z91Z93_closure) {
+ /* fprintf(stderr, "Qagh {get_closure_info}Daq: PrelBase_Z91Z93_closure closure\n"); */
+ *size = *ptrs = *nonptrs = *vhs = 0;
+ strcpy(type,"PrelBase_Z91Z93_closure");
+ return;
+ };
+
+ ip = (P_) INFO_PTR(closure);
+
+ switch (INFO_TYPE(ip)) {
+ case INFO_SPEC_U_TYPE:
+ case INFO_SPEC_S_TYPE:
+ case INFO_SPEC_N_TYPE:
+ *size = SPEC_CLOSURE_SIZE(closure);
+ *ptrs = SPEC_CLOSURE_NoPTRS(closure);
+ *nonptrs = SPEC_CLOSURE_NoNONPTRS(closure);
+ *vhs = 0 /*SPEC_VHS*/;
+ strcpy(type,"SPEC");
+ break;
+
+ case INFO_GEN_U_TYPE:
+ case INFO_GEN_S_TYPE:
+ case INFO_GEN_N_TYPE:
+ *size = GEN_CLOSURE_SIZE(closure);
+ *ptrs = GEN_CLOSURE_NoPTRS(closure);
+ *nonptrs = GEN_CLOSURE_NoNONPTRS(closure);
+ *vhs = GEN_VHS;
+ strcpy(type,"GEN");
+ break;
+
+ case INFO_DYN_TYPE:
+ *size = DYN_CLOSURE_SIZE(closure);
+ *ptrs = DYN_CLOSURE_NoPTRS(closure);
+ *nonptrs = DYN_CLOSURE_NoNONPTRS(closure);
+ *vhs = DYN_VHS;
+ strcpy(type,"DYN");
+ break;
+
+ case INFO_TUPLE_TYPE:
+ *size = TUPLE_CLOSURE_SIZE(closure);
+ *ptrs = TUPLE_CLOSURE_NoPTRS(closure);
+ *nonptrs = TUPLE_CLOSURE_NoNONPTRS(closure);
+ *vhs = TUPLE_VHS;
+ strcpy(type,"TUPLE");
+ break;
+
+ case INFO_DATA_TYPE:
+ *size = DATA_CLOSURE_SIZE(closure);
+ *ptrs = DATA_CLOSURE_NoPTRS(closure);
+ *nonptrs = DATA_CLOSURE_NoNONPTRS(closure);
+ *vhs = DATA_VHS;
+ strcpy(type,"DATA");
+ break;
+
+ case INFO_IMMUTUPLE_TYPE:
+ case INFO_MUTUPLE_TYPE:
+ *size = MUTUPLE_CLOSURE_SIZE(closure);
+ *ptrs = MUTUPLE_CLOSURE_NoPTRS(closure);
+ *nonptrs = MUTUPLE_CLOSURE_NoNONPTRS(closure);
+ *vhs = MUTUPLE_VHS;
+ strcpy(type,"(IM)MUTUPLE");
+ break;
+
+ case INFO_STATIC_TYPE:
+ *size = STATIC_CLOSURE_SIZE(closure);
+ *ptrs = STATIC_CLOSURE_NoPTRS(closure);
+ *nonptrs = STATIC_CLOSURE_NoNONPTRS(closure);
+ *vhs = STATIC_VHS;
+ strcpy(type,"STATIC");
+ break;
+
+ case INFO_CAF_TYPE:
+ case INFO_IND_TYPE:
+ *size = IND_CLOSURE_SIZE(closure);
+ *ptrs = IND_CLOSURE_NoPTRS(closure);
+ *nonptrs = IND_CLOSURE_NoNONPTRS(closure);
+ *vhs = IND_VHS;
+ strcpy(type,"CAF|IND");
+ break;
+
+ case INFO_CONST_TYPE:
+ *size = CONST_CLOSURE_SIZE(closure);
+ *ptrs = CONST_CLOSURE_NoPTRS(closure);
+ *nonptrs = CONST_CLOSURE_NoNONPTRS(closure);
+ *vhs = CONST_VHS;
+ strcpy(type,"CONST");
+ break;
+
+ case INFO_SPEC_RBH_TYPE:
+ *size = SPEC_RBH_CLOSURE_SIZE(closure);
+ *ptrs = SPEC_RBH_CLOSURE_NoPTRS(closure);
+ *nonptrs = SPEC_RBH_CLOSURE_NoNONPTRS(closure);
+ if (*ptrs <= 2) {
+ *nonptrs -= (2 - *ptrs);
+ *ptrs = 1;
+ } else
+ *ptrs -= 1;
+ *vhs = SPEC_RBH_VHS;
+ strcpy(type,"SPEC_RBH");
+ break;
+
+ case INFO_GEN_RBH_TYPE:
+ *size = GEN_RBH_CLOSURE_SIZE(closure);
+ *ptrs = GEN_RBH_CLOSURE_NoPTRS(closure);
+ *nonptrs = GEN_RBH_CLOSURE_NoNONPTRS(closure);
+ if (*ptrs <= 2) {
+ *nonptrs -= (2 - *ptrs);
+ *ptrs = 1;
+ } else
+ *ptrs -= 1;
+ *vhs = GEN_RBH_VHS;
+ strcpy(type,"GEN_RBH");
+ break;
+
+ case INFO_CHARLIKE_TYPE:
+ *size = CHARLIKE_CLOSURE_SIZE(closure);
+ *ptrs = CHARLIKE_CLOSURE_NoPTRS(closure);
+ *nonptrs = CHARLIKE_CLOSURE_NoNONPTRS(closure);
+ *vhs = CHARLIKE_VHS;
+ strcpy(type,"CHARLIKE");
+ break;
+
+ case INFO_INTLIKE_TYPE:
+ *size = INTLIKE_CLOSURE_SIZE(closure);
+ *ptrs = INTLIKE_CLOSURE_NoPTRS(closure);
+ *nonptrs = INTLIKE_CLOSURE_NoNONPTRS(closure);
+ *vhs = INTLIKE_VHS;
+ strcpy(type,"INTLIKE");
+ break;
+
+# if !defined(GRAN)
+ case INFO_FETCHME_TYPE:
+ *size = FETCHME_CLOSURE_SIZE(closure);
+ *ptrs = FETCHME_CLOSURE_NoPTRS(closure);
+ *nonptrs = FETCHME_CLOSURE_NoNONPTRS(closure);
+ *vhs = FETCHME_VHS;
+ strcpy(type,"FETCHME");
+ break;
+
+ case INFO_FMBQ_TYPE:
+ *size = FMBQ_CLOSURE_SIZE(closure);
+ *ptrs = FMBQ_CLOSURE_NoPTRS(closure);
+ *nonptrs = FMBQ_CLOSURE_NoNONPTRS(closure);
+ *vhs = FMBQ_VHS;
+ strcpy(type,"FMBQ");
+ break;
+# endif
+
+ case INFO_BQ_TYPE:
+ *size = BQ_CLOSURE_SIZE(closure);
+ *ptrs = BQ_CLOSURE_NoPTRS(closure);
+ *nonptrs = BQ_CLOSURE_NoNONPTRS(closure);
+ *vhs = BQ_VHS;
+ strcpy(type,"BQ");
+ break;
+
+ case INFO_BH_TYPE:
+ *size = BH_CLOSURE_SIZE(closure);
+ *ptrs = BH_CLOSURE_NoPTRS(closure);
+ *nonptrs = BH_CLOSURE_NoNONPTRS(closure);
+ *vhs = BH_VHS;
+ strcpy(type,"BH");
+ break;
+
+ case INFO_TSO_TYPE:
+ *size = 0; /* TSO_CLOSURE_SIZE(closure); */
+ *ptrs = 0; /* TSO_CLOSURE_NoPTRS(closure); */
+ *nonptrs = 0; /* TSO_CLOSURE_NoNONPTRS(closure); */
+ *vhs = TSO_VHS;
+ strcpy(type,"TSO");
+ break;
+
+ case INFO_STKO_TYPE:
+ *size = 0;
+ *ptrs = 0;
+ *nonptrs = 0;
+ *vhs = STKO_VHS;
+ strcpy(type,"STKO");
+ break;
+
+ default:
+ fprintf(stderr, "get_closure_info: Unexpected closure type (%lu), closure %lx\n",
+ INFO_TYPE(ip), (StgWord) closure);
+ EXIT(EXIT_FAILURE);
+ }
+
+ return ip;
+}
+# endif
+
+# if 0
+// Use allocate in Storage.c instead
+/*
+ @AllocateHeap@ will bump the heap pointer by @size@ words if the space
+ is available, but it will not perform garbage collection.
+ ToDo: check whether we can use an existing STG allocation routine -- HWL
+*/
+
+
+//@cindex AllocateHeap
+StgPtr
+AllocateHeap(size)
+nat size;
+{
+ StgPtr newClosure;
+
+ /* Allocate a new closure */
+ if (Hp + size > HpLim)
+ return NULL;
+
+ newClosure = Hp + 1;
+ Hp += size;
+
+ return newClosure;
+}
+# endif
+
+# if defined(PAR)
+
+//@cindex doGlobalGC
+void
+doGlobalGC(void)
+{
+ fprintf(stderr,"Splat -- we just hit global GC!\n");
+ stg_exit(EXIT_FAILURE);
+ //fishing = rtsFalse;
+ outstandingFishes--;
+}
+
+# endif /* PAR */
+
+//@node Printing Packet Contents, End of file, Aux fcts for packing, Graph packing
+//@subsection Printing Packet Contents
+/*
+ Printing Packet Contents
+ */
+
+#if defined(DEBUG) || defined(GRAN_CHECK)
+
+//@cindex PrintPacket
+
+#if defined(PAR)
+void
+PrintPacket(packBuffer)
+rtsPackBuffer *packBuffer;
+{
+ StgClosure *parent, *graphroot, *closure_start;
+ const StgInfoTable *ip;
+ globalAddr ga;
+ StgWord **bufptr, **slotptr;
+
+ nat bufsize;
+ nat pptr = 0, pptrs = 0, pvhs;
+ nat locn = 0;
+ nat i;
+ nat size, ptrs, nonptrs, vhs;
+ char str[80];
+
+ /* disable printing if a non-std globalisation scheme is used; ToDo: FIX */
+ if (RtsFlags.ParFlags.globalising != 0)
+ return;
+
+ /* NB: this whole routine is more or less a copy of UnpackGraph with all
+ unpacking components replaced by printing fcts
+ Long live higher-order fcts!
+ */
+ /* Initialisation */
+ //InitPackBuffer(); /* in case it isn't already init'd */
+ InitClosureQueue();
+ // ASSERT(gaga==PendingGABuffer);
+ graphroot = (StgClosure *)NULL;
+
+ /* Unpack the header */
+ bufsize = packBuffer->size;
+ bufptr = packBuffer->buffer;
+
+ fprintf(stderr, "*. Printing <<%d>> (buffer @ %p):\n",
+ packBuffer->id, packBuffer);
+ fprintf(stderr, "*. size: %d; unpacked_size: %d; tso: %p; buffer: %p\n",
+ packBuffer->size, packBuffer->unpacked_size,
+ packBuffer->tso, packBuffer->buffer);
+
+ parent = (StgClosure *)NULL;
+
+ do {
+ /* This is where we will ultimately save the closure's address */
+ slotptr = bufptr;
+ locn = slotptr-(packBuffer->buffer); // index of closure in buffer
+
+ /* First, unpack the next GA or PLC */
+ ga.weight = (rtsWeight) *bufptr++;
+
+ if (ga.weight == 2) { // unglobalised closure to follow
+ // nothing to do; closure starts at *bufptr
+ } else if (ga.weight > 0) { // fill in GA
+ ga.payload.gc.gtid = (GlobalTaskId) *bufptr++;
+ ga.payload.gc.slot = (int) *bufptr++;
+ } else
+ ga.payload.plc = (StgPtr) *bufptr++;
+
+ /* Now unpack the closure body, if there is one */
+ if (isFixed(&ga)) {
+ fprintf(stderr, "*. %u: PLC @ %p\n", locn, ga.payload.plc);
+ // closure = ga.payload.plc;
+ } else if (isOffset(&ga)) {
+ fprintf(stderr, "*. %u: OFFSET TO %d\n", locn, ga.payload.gc.slot);
+ // closure = (StgClosure *) buffer[ga.payload.gc.slot];
+ } else {
+ /* Print normal closures */
+
+ ASSERT(bufsize > 0);
+
+ fprintf(stderr, "*. %u: ((%x, %d, %x)) ", locn,
+ ga.payload.gc.gtid, ga.payload.gc.slot, ga.weight);
+
+ closure_start = (StgClosure*)bufptr;
+ ip = get_closure_info((StgClosure *)bufptr,
+ &size, &ptrs, &nonptrs, &vhs, str);
+
+ /* ToDo: check whether this is really needed */
+ if (ip->type == FETCH_ME || ip->type == REMOTE_REF) {
+ size = _HS;
+ ptrs = nonptrs = vhs = 0;
+ }
+ /* ToDo: check whether this is really needed */
+ if (ip->type == ARR_WORDS) {
+ ptrs = vhs = 0;
+ nonptrs = ((StgArrWords *)bufptr)->words;
+ size = arr_words_sizeW((StgArrWords *)bufptr);
+ }
+
+ /* special code for printing a PAP in a buffer */
+ if (ip->type == PAP || ip->type == AP_UPD) {
+ vhs = 3;
+ ptrs = 0;
+ nonptrs = (nat)((StgPAP *)bufptr)->payload[0];
+ size = _HS+vhs+ptrs+nonptrs;
+ }
+
+ /*
+ Remember, the generic closure layout is as follows:
+ +-------------------------------------------------+
+ | FIXED HEADER | VARIABLE HEADER | PTRS | NON-PRS |
+ +-------------------------------------------------+
+ */
+ /* Print fixed header */
+ fprintf(stderr, "FH [");
+ for (i = 0; i < _HS; i++)
+ fprintf(stderr, " %p", *bufptr++);
+
+ if (ip->type == FETCH_ME || ip->type == REMOTE_REF)
+ size = ptrs = nonptrs = vhs = 0;
+
+ // VH is always empty in the new RTS
+ ASSERT(vhs==0 ||
+ ip->type == PAP || ip->type == AP_UPD);
+ /* Print variable header */
+ fprintf(stderr, "] VH [");
+ for (i = 0; i < vhs; i++)
+ fprintf(stderr, " %p", *bufptr++);
+
+ //fprintf(stderr, "] %d PTRS [", ptrs);
+ /* Pointers will be filled in later */
+
+ fprintf(stderr, " ] (%d, %d) [", ptrs, nonptrs);
+ /* Print non-pointers */
+ for (i = 0; i < nonptrs; i++)
+ fprintf(stderr, " %p", *bufptr++);
+
+ fprintf(stderr, "] (%s)\n", str);
+
+ /* Indirections are never packed */
+ // ASSERT(INFO_PTR(graph) != (W_) Ind_info_TO_USE);
+
+ /* Add to queue for processing
+ When just printing the packet we do not have an unpacked closure
+ in hand, so we feed it the packet entry;
+ again, this assumes that at least the fixed header of the closure
+ has the same layout in the packet; also we may not overwrite entries
+ in the packet (done in Unpack), but for printing that's a bad idea
+ anyway */
+ QueueClosure((StgClosure *)closure_start);
+
+ /* No Common up needed for printing */
+
+ /* No Sort out the global address mapping for printing */
+
+ } /* normal closure case */
+
+ /* Locate next parent pointer */
+ pptr++;
+ while (pptr + 1 > pptrs) {
+ parent = DeQueueClosure();
+
+ if (parent == NULL)
+ break;
+ else {
+ (void) get_closure_info(parent, &size, &pptrs, &nonptrs,
+ &pvhs, str);
+ pptr = 0;
+ }
+ }
+ } while (parent != NULL);
+ fprintf(stderr, "*. --- End packet <<%d>> (claimed size=%d; real size=%d)---\n",
+ packBuffer->id, packBuffer->size, size);
+
+}
+
+/*
+ Doing a sanity check on a packet.
+ This does a full iteration over the packet, as in PrintPacket.
+*/
+//@cindex checkPacket
+void
+checkPacket(packBuffer)
+rtsPackBuffer *packBuffer;
+{
+ StgClosure *parent, *graphroot, *closure_start;
+ const StgInfoTable *ip;
+ globalAddr ga;
+ StgWord **bufptr, **slotptr;
+
+ nat bufsize;
+ nat pptr = 0, pptrs = 0, pvhs;
+ nat locn = 0;
+ nat size, ptrs, nonptrs, vhs;
+ char str[80];
+
+ /* NB: this whole routine is more or less a copy of UnpackGraph with all
+ unpacking components replaced by printing fcts
+ Long live higher-order fcts!
+ */
+ /* Initialisation */
+ //InitPackBuffer(); /* in case it isn't already init'd */
+ InitClosureQueue();
+ // ASSERT(gaga==PendingGABuffer);
+ graphroot = (StgClosure *)NULL;
+
+ /* Unpack the header */
+ bufsize = packBuffer->size;
+ bufptr = packBuffer->buffer;
+ parent = (StgClosure *)NULL;
+ ASSERT(bufsize > 0);
+ do {
+ /* check that we are not at the end of the buffer, yet */
+ IF_DEBUG(sanity, ASSERT(*bufptr != END_OF_BUFFER_MARKER));
+
+ /* This is where we will ultimately save the closure's address */
+ slotptr = bufptr;
+ locn = slotptr-(packBuffer->buffer); // index of closure in buffer
+ ASSERT(locn<=bufsize);
+
+ /* First, check whether we have a GA, a PLC, or an OFFSET at hand */
+ ga.weight = (rtsWeight) *bufptr++;
+
+ if (ga.weight == 2) { // unglobalised closure to follow
+ // nothing to do; closure starts at *bufptr
+ } else if (ga.weight > 0) { // fill in GA
+ ga.payload.gc.gtid = (GlobalTaskId) *bufptr++;
+ ga.payload.gc.slot = (int) *bufptr++;
+ } else
+ ga.payload.plc = (StgPtr) *bufptr++;
+
+ /* Now unpack the closure body, if there is one */
+ if (isFixed(&ga)) {
+ /* It's a PLC */
+ ASSERT(LOOKS_LIKE_STATIC(ga.payload.plc));
+ } else if (isOffset(&ga)) {
+ ASSERT(ga.payload.gc.slot<=(int)bufsize);
+ } else {
+ /* normal closure */
+ ASSERT(!RtsFlags.ParFlags.globalising==0 || LOOKS_LIKE_GA(&ga));
+
+ closure_start = (StgClosure*)bufptr;
+ ASSERT(LOOKS_LIKE_GHC_INFO((StgPtr)*bufptr));
+ ip = get_closure_info((StgClosure *)bufptr,
+ &size, &ptrs, &nonptrs, &vhs, str);
+
+ /* ToDo: check whether this is really needed */
+ if (ip->type == FETCH_ME || ip->type == REMOTE_REF) {
+ size = _HS;
+ ptrs = nonptrs = vhs = 0;
+ }
+ /* ToDo: check whether this is really needed */
+ if (ip->type == ARR_WORDS) {
+ ptrs = vhs = 0;
+ nonptrs = ((StgArrWords *)bufptr)->words+1; // payload+words
+ size = arr_words_sizeW((StgArrWords *)bufptr);
+ ASSERT(size==_HS+vhs+nonptrs);
+ }
+ /* special code for printing a PAP in a buffer */
+ if (ip->type == PAP || ip->type == AP_UPD) {
+ vhs = 3;
+ ptrs = 0;
+ nonptrs = (nat)((StgPAP *)bufptr)->payload[0];
+ size = _HS+vhs+ptrs+nonptrs;
+ }
+
+ /* no checks on contents of closure (pointers aren't packed anyway) */
+ ASSERT(_HS+vhs+nonptrs>=MIN_NONUPD_SIZE);
+ bufptr += _HS+vhs+nonptrs;
+
+ /* Add to queue for processing */
+ QueueClosure((StgClosure *)closure_start);
+
+ /* No Common up needed for checking */
+
+ /* No Sort out the global address mapping for checking */
+
+ } /* normal closure case */
+
+ /* Locate next parent pointer */
+ pptr++;
+ while (pptr + 1 > pptrs) {
+ parent = DeQueueClosure();
+
+ if (parent == NULL)
+ break;
+ else {
+ //ASSERT(LOOKS_LIKE_GHC_INFO((StgPtr)*parent));
+ (void) get_closure_info(parent, &size, &pptrs, &nonptrs,
+ &pvhs, str);
+ pptr = 0;
+ }
+ }
+ } while (parent != NULL);
+ /* we unpacked exactly as many words as there are in the buffer */
+ ASSERT(packBuffer->size == bufptr-(packBuffer->buffer));
+ /* check for magic end-of-buffer word */
+ IF_DEBUG(sanity, ASSERT(*bufptr == END_OF_BUFFER_MARKER));
+}
+#else /* GRAN */
+void
+PrintPacket(buffer)
+rtsPackBuffer *buffer;
+{
+ // extern char *info_hdr_type(P_ infoptr); /* defined in Threads.lc */
+ // extern char *display_info_type(P_ infoptr); /* defined in Threads.lc */
+
+ StgInfoTable *info;
+ nat size, ptrs, nonptrs, vhs;
+ char info_hdr_ty[80];
+ char str1[80], str2[80], junk_str[80];
+
+ /* globalAddr ga; */
+
+ nat bufsize, unpacked_size ;
+ StgClosure *parent;
+ nat pptr = 0, pptrs = 0, pvhs;
+
+ nat unpack_locn = 0;
+ nat gastart = unpack_locn;
+ nat closurestart = unpack_locn;
+
+ StgTSO *tso;
+ StgClosure *closure, *p;
+
+ nat i;
+
+ fprintf(stderr, "*** Printing <<%d>> (buffer @ %p):\n", buffer->id, buffer);
+ fprintf(stderr, " size: %d; unpacked_size: %d; tso: %d (%p); buffer: %p\n",
+ buffer->size, buffer->unpacked_size, buffer->tso, buffer->buffer);
+ fputs(" contents: ", stderr);
+ for (unpack_locn=0; unpack_locn<buffer->size; unpack_locn++) {
+ closure = buffer->buffer[unpack_locn];
+ fprintf(stderr, ", %p (%s)",
+ closure, info_type(closure));
+ }
+ fputc('\n', stderr);
+
+#if 0
+ /* traverse all elements of the graph; omitted for now, but might be usefule */
+ InitClosureQueue();
+
+ tso = buffer->tso;
+
+ /* Unpack the header */
+ unpacked_size = buffer->unpacked_size;
+ bufsize = buffer->size;
+
+ fprintf(stderr, "Packet %p, size %u (unpacked size is %u); demanded by TSO %d (%p)[PE %d]\n--- Begin ---\n",
+ buffer, bufsize, unpacked_size,
+ tso->id, tso, where_is((StgClosure*)tso));
+
+ do {
+ closurestart = unpack_locn;
+ closure = buffer->buffer[unpack_locn++];
+
+ fprintf(stderr, "[%u]: (%p) ", closurestart, closure);
+
+ info = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str1);
+ strcpy(str2, str1);
+ fprintf(stderr, "(%s|%s) ", str1, str2);
+
+ if (info->type == FETCH_ME || info->type == FETCH_ME_BQ ||
+ IS_BLACK_HOLE(closure))
+ size = ptrs = nonptrs = vhs = 0;
+
+ if (closure_THUNK(closure)) {
+ if (closure_UNPOINTED(closure))
+ fputs("UNPOINTED ", stderr);
+ else
+ fputs("POINTED ", stderr);
+ }
+ if (IS_BLACK_HOLE(closure)) {
+ fputs("BLACK HOLE\n", stderr);
+ } else {
+ /* Fixed header */
+ fprintf(stderr, "FH [");
+ for (i = 0, p = (StgClosure*)&(closure->header); i < _HS; i++, p++)
+ fprintf(stderr, " %p", *p);
+
+ /* Variable header
+ if (vhs > 0) {
+ fprintf(stderr, "] VH [%p", closure->payload[_HS]);
+
+ for (i = 1; i < vhs; i++)
+ fprintf(stderr, " %p", closure->payload[_HS+i]);
+ }
+ */
+ fprintf(stderr, "] PTRS %u", ptrs);
+
+ /* Non-pointers */
+ if (nonptrs > 0) {
+ fprintf(stderr, " NPTRS [%p", closure->payload[_HS+vhs]);
+
+ for (i = 1; i < nonptrs; i++)
+ fprintf(stderr, " %p", closure->payload[_HS+vhs+i]);
+
+ putc(']', stderr);
+ }
+ putc('\n', stderr);
+ }
+ } while (unpack_locn<bufsize) ; /* (parent != NULL); */
+
+ fprintf(stderr, "--- End ---\n\n");
+#endif /* 0 */
+}
+#endif /* PAR */
+#endif /* DEBUG || GRAN_CHECK */
+
+#endif /* PAR || GRAN -- whole file */
+
+//@node End of file, , Printing Packet Contents, Graph packing
+//@subsection End of file
+
+//@index
+//* AllocateHeap:: @cindex\s-+AllocateHeap
+//* AmPacking:: @cindex\s-+AmPacking
+//* CommonUp:: @cindex\s-+CommonUp
+//* DeQueueClosure:: @cindex\s-+DeQueueClosure
+//* DeQueueClosure:: @cindex\s-+DeQueueClosure
+//* DonePacking:: @cindex\s-+DonePacking
+//* FillInClosure:: @cindex\s-+FillInClosure
+//* IS_BLACK_HOLE:: @cindex\s-+IS_BLACK_HOLE
+//* IS_INDIRECTION:: @cindex\s-+IS_INDIRECTION
+//* InitClosureQueue:: @cindex\s-+InitClosureQueue
+//* InitPendingGABuffer:: @cindex\s-+InitPendingGABuffer
+//* LocateNextParent:: @cindex\s-+LocateNextParent
+//* NotYetPacking:: @cindex\s-+NotYetPacking
+//* OffsetFor:: @cindex\s-+OffsetFor
+//* Pack:: @cindex\s-+Pack
+//* PackArray:: @cindex\s-+PackArray
+//* PackClosure:: @cindex\s-+PackClosure
+//* PackFetchMe:: @cindex\s-+PackFetchMe
+//* PackGeneric:: @cindex\s-+PackGeneric
+//* PackNearbyGraph:: @cindex\s-+PackNearbyGraph
+//* PackOneNode:: @cindex\s-+PackOneNode
+//* PackPAP:: @cindex\s-+PackPAP
+//* PackPLC:: @cindex\s-+PackPLC
+//* PackStkO:: @cindex\s-+PackStkO
+//* PackTSO:: @cindex\s-+PackTSO
+//* PendingGABuffer:: @cindex\s-+PendingGABuffer
+//* PrintPacket:: @cindex\s-+PrintPacket
+//* QueueClosure:: @cindex\s-+QueueClosure
+//* QueueEmpty:: @cindex\s-+QueueEmpty
+//* RoomToPack:: @cindex\s-+RoomToPack
+//* SetGAandCommonUp:: @cindex\s-+SetGAandCommonUp
+//* UnpackGA:: @cindex\s-+UnpackGA
+//* UnpackGraph:: @cindex\s-+UnpackGraph
+//* UnpackOffset:: @cindex\s-+UnpackOffset
+//* UnpackPLC:: @cindex\s-+UnpackPLC
+//* doGlobalGC:: @cindex\s-+doGlobalGC
+//* get_closure_info:: @cindex\s-+get_closure_info
+//* InitPackBuffer:: @cindex\s-+initPackBuffer
+//* isFixed:: @cindex\s-+isFixed
+//* isOffset:: @cindex\s-+isOffset
+//* offsetTable:: @cindex\s-+offsetTable
+//@end index
+
diff --git a/rts/parallel/ParInit.c b/rts/parallel/ParInit.c
new file mode 100644
index 0000000000..22c9119c89
--- /dev/null
+++ b/rts/parallel/ParInit.c
@@ -0,0 +1,322 @@
+/* --------------------------------------------------------------------------
+ Time-stamp: <Wed Mar 21 2001 16:37:16 Stardate: [-30]6363.46 hwloidl>
+
+ Initialising the parallel RTS
+
+ An extension based on Kevin Hammond's GRAPH for PVM version
+ P. Trinder, January 17th 1995.
+ Adapted for the new RTS
+ P. Trinder, July 1997.
+ H-W. Loidl, November 1999.
+
+ ------------------------------------------------------------------------ */
+
+#ifdef PAR /* whole file */
+
+//@menu
+//* Includes::
+//* Global variables::
+//* Initialisation Routines::
+//@end menu
+
+//@node Includes, Global variables
+//@subsection Includes
+
+/* Evidently not Posix */
+/* #include "PosixSource.h" */
+
+#include <setjmp.h>
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "ParallelRts.h"
+#include "Sparks.h"
+#include "LLC.h"
+#include "HLC.h"
+
+//@node Global variables, Initialisation Routines, Includes
+//@subsection Global variables
+
+/* Global conditions defined here. */
+
+rtsBool IAmMainThread = rtsFalse; /* Set for the main thread */
+
+/* Task identifiers for various interesting global tasks. */
+
+GlobalTaskId IOTask = 0, /* The IO Task Id */
+ SysManTask = 0, /* The System Manager Task Id */
+ mytid = 0; /* This PE's Task Id */
+
+rtsTime main_start_time; /* When the program started */
+rtsTime main_stop_time; /* When the program finished */
+jmp_buf exit_parallel_system; /* How to abort from the RTS */
+
+
+//rtsBool fishing = rtsFalse; /* We have no fish out in the stream */
+rtsTime last_fish_arrived_at = 0; /* Time of arrival of most recent fish*/
+nat outstandingFishes = 0; /* Number of active fishes */
+
+//@cindex spark queue
+/* GranSim: a globally visible array of spark queues */
+rtsSpark *pending_sparks_hd[SPARK_POOLS], /* ptr to start of a spark pool */
+ *pending_sparks_tl[SPARK_POOLS], /* ptr to end of a spark pool */
+ *pending_sparks_lim[SPARK_POOLS],
+ *pending_sparks_base[SPARK_POOLS];
+
+//@cindex spark_limit
+/* max number of sparks permitted on the PE;
+ see RtsFlags.ParFlags.maxLocalSparks */
+nat spark_limit[SPARK_POOLS];
+
+//@cindex PendingFetches
+/* A list of fetch reply messages not yet processed; this list is filled
+ by awaken_blocked_queue and processed by processFetches */
+StgBlockedFetch *PendingFetches = END_BF_QUEUE;
+
+//@cindex allPEs
+GlobalTaskId *allPEs;
+
+//@cindex nPEs
+nat nPEs = 0;
+
+//@cindex sparksIgnored
+nat sparksIgnored = 0, sparksCreated = 0,
+ threadsIgnored = 0, threadsCreated = 0;
+
+//@cindex advisory_thread_count
+nat advisory_thread_count = 0;
+
+globalAddr theGlobalFromGA;
+
+/* For flag handling see RtsFlags.h */
+
+//@node Prototypes
+//@subsection Prototypes
+
+/* Needed for FISH messages (initialisation of random number generator) */
+void srand48 (long);
+time_t time (time_t *);
+
+//@node Initialisation Routines, , Global variables
+//@subsection Initialisation Routines
+
+/*
+ par_exit defines how to terminate the program. If the exit code is
+ non-zero (i.e. an error has occurred), the PE should not halt until
+ outstanding error messages have been processed. Otherwise, messages
+ might be sent to non-existent Task Ids. The infinite loop will actually
+ terminate, since STG_Exception will call myexit\tr{(0)} when
+ it received a PP_FINISH from the system manager task.
+*/
+//@cindex shutdownParallelSystem
+void
+shutdownParallelSystem(StgInt n)
+{
+ /* use the file specified via -S */
+ FILE *sf = RtsFlags.GcFlags.statsFile;
+
+ IF_PAR_DEBUG(verbose,
+ if (n==0)
+ belch("==== entered shutdownParallelSystem ...");
+ else
+ belch("==== entered shutdownParallelSystem (ERROR %d)...", n);
+ );
+
+ stopPEComms(n);
+
+#if 0
+ if (sf!=(FILE*)NULL)
+ fprintf(sf, "PE %x: %u sparks created, %u sparks Ignored, %u threads created, %u threads Ignored",
+ (W_) mytid, sparksCreated, sparksIgnored,
+ threadsCreated, threadsIgnored);
+#endif
+
+ ShutdownEachPEHook();
+}
+
+//@cindex initParallelSystem
+void
+initParallelSystem(void)
+{
+ /* Don't buffer standard channels... */
+ setbuf(stdout,NULL);
+ setbuf(stderr,NULL);
+
+ srand48(time(NULL) * getpid()); /* Initialise Random-number generator seed*/
+ /* used to select target of FISH message*/
+ if (!InitPackBuffer())
+ barf("InitPackBuffer");
+
+ if (!initMoreBuffers())
+ barf("initMoreBuffers");
+
+ if (!initSparkPools())
+ barf("initSparkPools");
+}
+
+/*
+ * SynchroniseSystem synchronises the reduction task with the system
+ * manager, and initialises the Global address tables (LAGA & GALA)
+ */
+
+//@cindex synchroniseSystem
+void
+synchroniseSystem(void)
+{
+ /* Only in debug mode? */
+ fprintf(stderr, "==== Starting parallel execution on %d processors ...\n", nPEs);
+
+ InitEachPEHook(); /* HWL: hook to be execed on each PE */
+
+ /* Initialize global address tables */
+ initGAtables();
+
+ initParallelSystem();
+
+ startPEComms();
+}
+
+/*
+ Do the startup stuff (this is PVM specific!).
+ Determines global vars: mytid, IAmMainThread, SysManTask, nPEs
+ Called at the beginning of RtsStartup.startupHaskell
+*/
+void
+startupParallelSystem(char *argv[]) {
+ mytid = pvm_mytid(); /* Connect to PVM */
+
+ if (*argv[0] == '-') { /* Look to see whether we're the Main Thread */
+ IAmMainThread = rtsTrue;
+ sscanf(argv[0],"-%0X",&SysManTask); /* extract SysMan task ID*/
+ argv++; /* Strip off flag argument */
+ } else {
+ SysManTask = pvm_parent();
+ }
+
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr, "==== [%x] %s PE located SysMan at %x\n",
+ mytid, IAmMainThread?"Main":"Remote", SysManTask));
+
+ nPEs = atoi(argv[1]);
+}
+
+/*
+ Exception handler during startup.
+*/
+void *
+processUnexpectedMessageDuringStartup(rtsPacket p) {
+ OpCode opCode;
+ GlobalTaskId sender_id;
+
+ getOpcodeAndSender(p, &opCode, &sender_id);
+
+ switch(opCode) {
+ case PP_FISH:
+ bounceFish();
+ break;
+#if defined(DIST)
+ case PP_REVAL:
+ bounceReval();
+ break;
+#endif
+ case PP_FINISH:
+ stg_exit(EXIT_SUCCESS);
+ break;
+ default:
+ fprintf(stderr,"== Task %x: Unexpected OpCode %x (%s) from %x in startPEComms\n",
+ mytid, opCode, getOpName(opCode), sender_id);
+ }
+}
+
+void
+startPEComms(void){
+
+ startUpPE();
+ allPEs = (GlobalTaskId *) stgMallocBytes(sizeof(GlobalTaskId) * MAX_PES,
+ "(PEs)");
+
+ /* Send our tid and IAmMainThread flag back to SysMan */
+ sendOp1(PP_READY, SysManTask, (StgWord)IAmMainThread);
+ /* Wait until we get the PE-Id table from Sysman */
+ waitForPEOp(PP_PETIDS, SysManTask, processUnexpectedMessageDuringStartup);
+
+ IF_PAR_DEBUG(verbose,
+ belch("==-- startPEComms: methinks we just received a PP_PETIDS message"));
+
+ /* Digest the PE table we received */
+ processPEtids();
+}
+
+void
+processPEtids(void) {
+ long newPE;
+ nat i, sentPEs, currentPEs;
+
+ nPEs=0;
+
+ currentPEs = nPEs;
+
+ IF_PAR_DEBUG(verbose,
+ belch("==-- processPEtids: starting to iterate over a PVM buffer"));
+ /* ToDo: this has to go into LLComms !!! */
+ GetArgs(&sentPEs,1);
+
+ ASSERT(sentPEs > currentPEs);
+ ASSERT(sentPEs < MAX_PES); /* enforced by SysMan too*/
+
+ for (i = 0; i < sentPEs; i++) {
+ GetArgs(&newPE,1);
+ if (i<currentPEs) {
+ ASSERT(newPE == allPEs[i]);
+ } else {
+#if defined(DIST)
+ // breaks with PAR && !DEBUG
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr, "[%x] registering %d'th %x\n", mytid, i, newPE));
+ if(!looks_like_tid(newPE))
+ barf("unacceptable taskID %x\n",newPE);
+#endif
+ allPEs[i] = newPE;
+ nPEs++;
+ registerTask(newPE);
+ }
+ }
+
+ IF_PAR_DEBUG(verbose,
+ /* debugging */
+ belch("++++ [%x] PE table as I see it:", mytid);
+ for (i = 0; i < sentPEs; i++) {
+ belch("++++ allPEs[%d] = %x", i, allPEs[i]);
+ });
+}
+
+void
+stopPEComms(StgInt n) {
+ if (n != 0) {
+ /* In case sysman doesn't know about us yet...
+ pvm_initsend(PvmDataDefault);
+ PutArgs(&IAmMainThread,1);
+ pvm_send(SysManTask, PP_READY);
+ */
+ sendOp(PP_READY, SysManTask);
+ }
+
+ sendOp2(PP_FINISH, SysManTask, n, n);
+ waitForPEOp(PP_FINISH, SysManTask, NULL);
+ fflush(gr_file);
+ shutDownPE();
+}
+
+#endif /* PAR -- whole file */
+
+//@index
+//* PendingFetches:: @cindex\s-+PendingFetches
+//* SynchroniseSystem:: @cindex\s-+SynchroniseSystem
+//* allPEs:: @cindex\s-+allPEs
+//* initParallelSystem:: @cindex\s-+initParallelSystem
+//* nPEs:: @cindex\s-+nPEs
+//* par_exit:: @cindex\s-+par_exit
+//* spark queue:: @cindex\s-+spark queue
+//* sparksIgnored:: @cindex\s-+sparksIgnored
+//@end index
+
diff --git a/rts/parallel/ParInit.h b/rts/parallel/ParInit.h
new file mode 100644
index 0000000000..a22a50bae6
--- /dev/null
+++ b/rts/parallel/ParInit.h
@@ -0,0 +1,19 @@
+/* -----------------------------------------------------------------------------
+ * ParInit.h,1
+ *
+ * Phil Trinder
+ * July 1998
+ *
+ * External Parallel Initialisation Interface
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef PARINIT_H
+#define PARINIT_H
+
+extern void RunParallelSystem (P_);
+extern void initParallelSystem(void);
+extern void SynchroniseSystem(void);
+extern void par_exit(I_);
+
+#endif /* PARINIT_H */
diff --git a/rts/parallel/ParTicky.c b/rts/parallel/ParTicky.c
new file mode 100644
index 0000000000..347c2b8bca
--- /dev/null
+++ b/rts/parallel/ParTicky.c
@@ -0,0 +1,450 @@
+/* -------------------------------------------------------------------------
+ *
+ * (c) Hans-Wolfgang Loidl, 2000-
+ *
+ * Parallel ticky profiling, monitoring basic RTS operations in GUM.
+ * Similar in structure to TICKY_TICKY profiling, but doesn't need a
+ * separate way of building GHC.
+ *-------------------------------------------------------------------------- */
+
+#if defined(PAR) && defined(PAR_TICKY)
+
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+//#include "StoragePriv.h"
+//#include "MBlock.h"
+//#include "Schedule.h"
+#include "GC.h"
+#include "Stats.h"
+#include "ParTicky.h" // ToDo: move into Rts.h
+#include "ParallelRts.h"
+
+#if defined(PAR) && defined(HAVE_GETRUSAGE)
+#include <sys/resource.h>
+#endif
+
+/* external data */
+extern double ElapsedTimeStart;
+
+extern ullong GC_tot_alloc;
+extern ullong GC_tot_copied;
+
+extern lnat MaxResidency; /* in words; for stats only */
+extern lnat ResidencySamples; /* for stats only */
+
+/* ngIplu' {Stats.c}vo' */
+#define BIG_STRING_LEN 512
+
+/* ngIplu' {Ticky.c}vo' */
+#define INTAVG(a,b) ((b == 0) ? 0.0 : ((double) (a) / (double) (b)))
+#define PC(a) (100.0 * a)
+
+#define AVG(thing) \
+ StgDouble avg##thing = INTAVG(tot##thing,ctr##thing)
+
+
+#if 0
+void
+set_foo_time(double *x) {
+ *x = usertime();
+}
+
+double
+get_foo_time(double x) {
+ fprintf(stderr, "get_foo_time: %7.2f (%7.5f,%7.5f) \n",
+ usertime()-x,usertime(),x);
+ return (usertime()-x);
+}
+#endif
+
+static double start_time_GA = 0.0;
+static double start_mark = 0.0;
+static double start_pack = 0.0;
+static double start_unpack = 0.0;
+
+void
+par_ticky_Par_start (void) {
+# if !defined(HAVE_GETRUSAGE) || irix_HOST_OS || defined(_WIN32)
+ fprintf(stderr, "|| sorry don't have RUSAGE\n");
+ return ;
+# else
+ FILE *sf = RtsFlags.GcFlags.statsFile;
+ struct rusage t;
+ double utime, stime;
+
+ if (RtsFlags.GcFlags.giveStats>1 && sf != NULL) {
+ getrusage(RUSAGE_SELF, &t);
+
+ utime = t.ru_utime.tv_sec + 1e-6*t.ru_utime.tv_usec;
+ stime = t.ru_stime.tv_sec + 1e-6*t.ru_stime.tv_usec;
+
+ fprintf(stderr, "|| user time: %5.2f; system time: %5.2f\n",
+ utime, stime);
+ fprintf(stderr, "|| max RSS: %ld; int SM size: %ld; int USM data size: %ld; int USS size: %ld\n",
+ t.ru_maxrss, t.ru_ixrss, t.ru_idrss, t.ru_isrss);
+ }
+#endif
+}
+
+#if 0
+FYI:
+ struct rusage
+ {
+ struct timeval ru_utime; /* user time used */
+ struct timeval ru_stime; /* system time used */
+ long ru_maxrss; /* maximum resident set size */
+ long ru_ixrss; /* integral shared memory size */
+ long ru_idrss; /* integral unshared data size */
+ long ru_isrss; /* integral unshared stack size */
+ long ru_minflt; /* page reclaims */
+ long ru_majflt; /* page faults */
+ long ru_nswap; /* swaps */
+ long ru_inblock; /* block input operations */
+ long ru_oublock; /* block output operations */
+ long ru_msgsnd; /* messages sent */
+ long ru_msgrcv; /* messages received */
+ long ru_nsignals; /* signals received */
+ long ru_nvcsw; /* voluntary context switches */
+ long ru_nivcsw; /* involuntary context switches */
+ };
+#endif
+
+
+void
+par_ticky_rebuildGAtables_start(void) {
+ // collect parallel global statistics (currently done together with GC stats)
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ //set_foo_time(&start_time_GA);
+ start_time_GA = usertime();
+ }
+}
+
+void
+par_ticky_rebuildGAtables_end(nat n, nat size_GA) {
+ // collect parallel global statistics (currently done together with GC stats)
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ static double foo = 0.0;
+ foo = usertime() - start_time_GA; // get_foo_time(start_time_GA);
+ globalParStats.cnt_rebuild_GA++;
+ globalParStats.tot_rebuild_GA += n;
+ if ( n > globalParStats.res_rebuild_GA )
+ globalParStats.res_rebuild_GA = n;
+ // fprintf(stderr, "rebuildGAtables: footime=%7.2f (%11.5f, %11.5f)\n",
+ // foo, usertime(), start_time_GA);
+ globalParStats.time_rebuild_GA += foo;
+ globalParStats.tot_size_GA += size_GA;
+ if ( size_GA > globalParStats.res_size_GA )
+ globalParStats.res_size_GA = size_GA;
+ }
+ // fprintf(stderr, ">> n: %d; size: %d;; tot: %d; res: %d\n",
+ // n, size_GA, globalParStats.tot_size_GA, globalParStats.res_size_GA);
+}
+
+void
+par_ticky_markLocalGAs_start(void) {
+ // collect parallel global statistics (currently done together with GC stats)
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ start_time_GA = usertime();
+ }
+}
+
+void
+par_ticky_markLocalGAs_end(nat n) {
+ // collect parallel global statistics (currently done together with GC stats)
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.cnt_mark_GA++;
+ globalParStats.tot_mark_GA += n;
+ if ( n > globalParStats.res_mark_GA )
+ globalParStats.res_mark_GA = n;
+ globalParStats.time_mark_GA += usertime() - start_time_GA;
+ }
+}
+
+void
+par_ticky_markSparkQueue_start(void) {
+ // collect parallel global statistics (currently done together with GC stats)
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ start_mark=usertime();
+ }
+}
+
+void
+par_ticky_markSparkQueue_end(nat n) {
+ // collect parallel global statistics (currently done together with GC stats)
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.time_sparks += usertime() - start_mark;
+
+ globalParStats.tot_sparks_marked += n;
+ if ( n > globalParStats.res_sparks_marked )
+ globalParStats.res_sparks_marked = n;
+ }
+}
+
+void
+par_ticky_PackNearbyGraph_start (void) {
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ start_pack=usertime();
+ }
+}
+
+void
+par_ticky_PackNearbyGraph_end(nat n, nat thunks) {
+ // collect parallel global statistics (currently done together with GC stats)
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.time_pack += usertime() - start_pack;
+
+ globalParStats.tot_packets++;
+ globalParStats.tot_packet_size += n;
+ if ( n > globalParStats.res_packet_size )
+ globalParStats.res_packet_size = n;
+ globalParStats.tot_thunks += thunks;
+ if ( thunks > globalParStats.res_thunks )
+ globalParStats.res_thunks = thunks;
+ }
+}
+
+void
+par_ticky_UnpackGraph_start (void) {
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ start_unpack=usertime();
+ }
+}
+
+void
+par_ticky_UnpackGraph_end(nat n, nat thunks) {
+ // collect parallel global statistics (currently done together with GC stats)
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.time_unpack += usertime() - start_unpack;
+
+ globalParStats.rec_packets++;
+ globalParStats.rec_packet_size += n;
+ /*
+ if ( n > globalParStats.res_packet_size )
+ globalParStats.res_packet_size = n;
+ */
+ globalParStats.rec_thunks += thunks;
+ /*
+ if ( thunks > globalParStats.res_thunks )
+ globalParStats.res_thunks = thunks;
+ */
+ }
+}
+
+void
+par_ticky_TP (void) {
+ StgSparkPool *pool;
+ nat tp_size, sp_size; // stats only
+
+ // Global stats gathering
+ /* the spark pool for the current PE */
+ pool = &(MainRegTable.rSparks); // generalise to cap = &MainRegTable
+
+ // Global statistics: residency of thread and spark pool
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ tp_size = run_queue_len() + 1; // add the TSO just poped
+ // No: there may be many blocked threads being awoken at the same time
+ // ASSERT(tp_size <= RtsFlags.ParFlags.maxThreads);
+ globalParStats.tot_tp += tp_size;
+ globalParStats.emp_tp += (tp_size==0) ? 1 : 0;
+ globalParStats.cnt_tp++;
+ if ( tp_size > globalParStats.res_tp)
+ globalParStats.res_tp = tp_size;
+ // fprintf(stderr, "run_queue_len() = %d (max %d)\n", run_queue_len(), globalParStats.res_tp);
+ sp_size = spark_queue_len(pool);
+ //ASSERT(sp_size <= RtsFlags.ParFlags.maxLocalSparks);
+ globalParStats.tot_sp += sp_size;
+ globalParStats.emp_sp += (sp_size==0) ? 1 : 0;
+ globalParStats.cnt_sp++;
+ if ( sp_size > globalParStats.res_sp)
+ globalParStats.res_sp = sp_size;
+ // fprintf(stderr, "spark_queue_len(pool) = %d (max %d)\n", spark_queue_len(pool), globalParStats.res_sp);
+ }
+}
+
+void
+globalParStat_exit(void)
+{
+ FILE *sf = RtsFlags.GcFlags.statsFile;
+ double time, etime;
+
+ /* print only if GC stats is enabled, too; i.e. -sstderr */
+ if (!(RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS))
+ return;
+
+ time = usertime();
+ etime = elapsedtime() - ElapsedTimeStart;
+ // fprintf(stderr, "foo=%7.2f\n", time);
+
+ if (sf != NULL){
+ char temp[BIG_STRING_LEN];
+
+ // GC_tot_alloc += alloc;
+ fprintf(sf,"\n");
+
+ fprintf(sf, "%11d threads created\n",
+ globalParStats.tot_threads_created);
+ /*
+ Would need to add a ++ to the par macro to use this
+
+ fprintf(sf, "%11d sparks created\n",
+ globalParStats.tot_sparks_created);
+ fprintf(sf, "%11d sparks ignored\n",
+ globalParStats.tot_sparks_ignored);
+ */
+ ullong_format_string(globalParStats.res_tp, temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11s thread pool residency", temp);
+ fprintf(sf, " (avg: %3.2f; %d times (%2.2f%%) of %d empty)\n",
+ (double)globalParStats.tot_tp/(double)globalParStats.cnt_tp,
+ globalParStats.emp_tp,
+ globalParStats.emp_tp*100.0/(double)globalParStats.cnt_tp,
+ globalParStats.cnt_tp);
+ ullong_format_string(globalParStats.res_sp, temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11s spark pool residency", temp);
+
+ fprintf(sf, " (avg: %3.2f; %d times (%2.2f%%) of %d empty)\n",
+ (double)globalParStats.tot_sp/(double)globalParStats.cnt_sp,
+ globalParStats.emp_sp,
+ globalParStats.emp_sp*100.0/(double)globalParStats.cnt_sp,
+ globalParStats.cnt_sp);
+ //ullong_format_string(globalParStats.tot_fishes, temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11d messages sent (%d fish, %d fetch, %d resume, %d schedule",
+ globalParStats.tot_fish_mess+globalParStats.tot_fetch_mess+
+ globalParStats.tot_resume_mess+globalParStats.tot_schedule_mess,
+ globalParStats.tot_fish_mess, globalParStats.tot_fetch_mess,
+ globalParStats.tot_resume_mess, globalParStats.tot_schedule_mess);
+#if defined(DIST)
+ fprintf(sf, "%d revals", globalParStats.tot_reval_mess);
+#endif
+ fprintf(sf,")\n");
+ fprintf(sf, "%11d messages received (%d fish, %d fetch, %d resume, %d schedule",
+ globalParStats.rec_fish_mess+globalParStats.rec_fetch_mess+
+ globalParStats.rec_resume_mess+globalParStats.rec_schedule_mess,
+ globalParStats.rec_fish_mess, globalParStats.rec_fetch_mess,
+ globalParStats.rec_resume_mess, globalParStats.rec_schedule_mess);
+#if defined(DIST)
+ fprintf(sf, "%d revals", globalParStats.rec_reval_mess);
+#endif
+ fprintf(sf,")\n\n");
+
+ ullong_format_string(globalParStats.tot_size_GA*sizeof(W_), temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11s bytes of global heap in total ", temp);
+ fprintf(sf, "(%5.2f%% of total allocated heap)\n",
+ globalParStats.tot_size_GA*sizeof(W_)*100.0/(double)GC_tot_alloc*sizeof(W_));
+ ullong_format_string(globalParStats.res_size_GA*sizeof(W_), temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11s bytes global heap residency ", temp);
+ fprintf(sf, "(%5.2f%% of max heap residency)\n",
+ globalParStats.res_size_GA*sizeof(W_)*100.0/(double)MaxResidency*sizeof(W_));
+
+ //ullong_format_string(globalParStats.res_mark_GA, temp, rtsTrue/*commas*/);
+ //fprintf(sf, "%11s GAs residency in GALA table ", temp);
+ // ullong_format_string(globalParStats.tot_mark_GA, temp, rtsTrue/*commas*/);
+ //fprintf(sf, "(avg %5.2f; %d samples)\n",
+ // (double)globalParStats.tot_mark_GA/(double)globalParStats.cnt_mark_GA,
+ // globalParStats.cnt_mark_GA);
+
+ ullong_format_string(globalParStats.local_alloc_GA, temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11s GAs locally allocated (calls to makeGlobal)\n", temp);
+
+ ullong_format_string(globalParStats.tot_rebuild_GA, temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11s live GAs in total (after rebuilding tables)\n", temp);
+ ullong_format_string(globalParStats.res_rebuild_GA, temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11s GAs residency (after rebuilding tables) ", temp);
+ fprintf(sf, "(avg %5.2f; %d samples)\n",
+ (double)globalParStats.tot_rebuild_GA/(double)globalParStats.cnt_rebuild_GA,
+ globalParStats.cnt_rebuild_GA);
+ ullong_format_string(globalParStats.res_free_GA, temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11s residency of freeing GAs", temp);
+ fprintf(sf, " (avg %5.2f; %d samples)\n",
+ (double)globalParStats.tot_free_GA/(double)globalParStats.cnt_free_GA,
+ globalParStats.cnt_free_GA);
+
+ fprintf(sf, "%11.2fs spent marking GAs (%7.2f%% of %7.2fs)\n",
+ globalParStats.time_mark_GA,
+ globalParStats.time_mark_GA*100./time, time);
+ fprintf(sf, "%11.2fs spent rebuilding GALA tables (%7.2f%% of %7.2fs; %7.2f%% of %7.2fs)\n",
+ globalParStats.time_rebuild_GA,
+ globalParStats.time_rebuild_GA*100./time, time,
+ globalParStats.time_rebuild_GA*100./etime, etime);
+
+ ullong_format_string(globalParStats.tot_sparks_marked, temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11s sparks marked\t", temp);
+ ullong_format_string(globalParStats.res_sparks_marked, temp, rtsTrue/*commas*/);
+ fprintf(sf, "%6s spark mark residency\n", temp);
+ fprintf(sf, "%11.2fs spent marking sparks (%7.2f%% of %7.2fs; %7.2f%% of %7.2fs elapsed)\n",
+ globalParStats.time_sparks,
+ globalParStats.time_sparks*100./time, time,
+ globalParStats.time_sparks*100./etime, etime);
+
+ fprintf(sf,"\n");
+
+ ullong_format_string(globalParStats.tot_packets, temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11s packets sent\n", temp);
+ ullong_format_string(globalParStats.tot_packet_size, temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11s bytes of graph sent in total (max %d; avg %.2f)\n",
+ temp, globalParStats.res_packet_size,
+ (double)globalParStats.tot_packet_size/(double)globalParStats.tot_packets);
+ ullong_format_string(globalParStats.tot_thunks, temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11s thunks sent in total (max %d; avg %.2f)\n",
+ temp, globalParStats.res_thunks,
+ (double)globalParStats.tot_thunks/(double)globalParStats.tot_packets);
+ fprintf(sf, "%11.2fs spent packing graph structures (%7.2f%% of %7.2fs; %7.2f%% of %7.2fs elapsed)\n",
+ globalParStats.time_pack,
+ globalParStats.time_pack*100./time, time,
+ globalParStats.time_pack*100./etime, etime);
+
+ ullong_format_string(globalParStats.rec_packets, temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11s packets received\n", temp);
+ ullong_format_string(globalParStats.rec_packet_size, temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11s bytes of graph received in total (max %d; avg %.2f)\n",
+ temp, globalParStats.rec_res_packet_size,
+ (double)globalParStats.rec_packet_size/(double)globalParStats.rec_packets);
+ ullong_format_string(globalParStats.rec_thunks, temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11s thunks received in total (max %d; avg %.2f)\n",
+ temp, globalParStats.rec_res_thunks,
+ (double)globalParStats.rec_thunks/(double)globalParStats.rec_packets);
+ fprintf(sf, "%11.2fs spent unpacking graph structures (%7.2f%% of %7.2fs; %7.2f%% of %7.2fs elapsed)\n",
+ globalParStats.time_unpack,
+ globalParStats.time_unpack*100./time, time,
+ globalParStats.time_unpack*100./etime, etime);
+
+ fprintf(sf,"\n");
+
+ ullong_format_string(globalParStats.tot_arrs, temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11s bytearrays sent; ", temp);
+ ullong_format_string(globalParStats.tot_arr_size, temp, rtsTrue/*commas*/);
+ fprintf(sf, " %s bytes in total (avg %.2f)\n",
+ temp,
+ (double)globalParStats.tot_arr_size/(double)globalParStats.tot_arrs);
+
+ fprintf(sf,"\n");
+
+ fprintf(sf, "%11d yields, %d stack overflows, %d heap overflows\n",
+ globalParStats.tot_yields, globalParStats.tot_stackover,
+ globalParStats.tot_heapover);
+
+ fprintf(sf,"\n");
+
+ //fprintf(stderr, "Printing this pathetic statistics took %7.2fs (start @ %7.2f)\n",
+ // usertime()-time, time);
+
+ fflush(sf);
+ // Open filehandle needed by other stats printing fcts
+ // fclose(sf);
+ }
+}
+
+#endif
+
diff --git a/rts/parallel/ParTicky.h b/rts/parallel/ParTicky.h
new file mode 100644
index 0000000000..1d6e7435c9
--- /dev/null
+++ b/rts/parallel/ParTicky.h
@@ -0,0 +1,60 @@
+/* --------------------------------------------------------------------------
+ *
+ * (c) Hans-Wolfgang Loidl, 2000-
+ *
+ * Header for ParTicky.c
+ *
+ * --------------------------------------------------------------------------*/
+
+#if defined(PAR_TICKY)
+
+/* macros */
+#define PAR_TICKY_PAR_START() par_ticky_Par_start ()
+#define PAR_TICKY_PAR_END() globalParStat_exit ()
+#define PAR_TICKY_REBUILD_GA_TABLES_START() par_ticky_rebuildGAtables_start()
+#define PAR_TICKY_REBUILD_GA_TABLES_END(n, size_GA) par_ticky_rebuildGAtables_end(n, size_GA)
+#define PAR_TICKY_MARK_LOCAL_GAS_START() par_ticky_markLocalGAs_start()
+#define PAR_TICKY_MARK_LOCAL_GAS_END(n) par_ticky_markLocalGAs_end(n)
+#define PAR_TICKY_MARK_SPARK_QUEUE_START() par_ticky_markSparkQueue_start()
+#define PAR_TICKY_MARK_SPARK_QUEUE_END(n) par_ticky_markSparkQueue_end(n)
+#define PAR_TICKY_PACK_NEARBY_GRAPH_START() (par_ticky_PackNearbyGraph_start())
+#define PAR_TICKY_PACK_NEARBY_GRAPH_END(n, thunks) par_ticky_PackNearbyGraph_end(n, thunks)
+#define PAR_TICKY_UNPACK_GRAPH_START() par_ticky_UnpackGraph_start()
+#define PAR_TICKY_UNPACK_GRAPH_END(n,thunks) par_ticky_UnpackGraph_end(n,thunks)
+#define PAR_TICKY_TP() par_ticky_TP()
+#define PAR_TICKY_CNT_FREE_GA() stats_CntFreeGA()
+
+/* prototypes */
+extern void par_ticky_Par_start (void) ;
+extern void par_ticky_rebuildGAtables_start(void) ;
+extern void par_ticky_rebuildGAtables_end(nat n, nat size_GA) ;
+extern void par_ticky_markLocalGAs_start(void) ;
+extern void par_ticky_markLocalGAs_end(nat n) ;
+extern void par_ticky_markSparkQueue_start(void) ;
+extern void par_ticky_markSparkQueue_end(nat n) ;
+extern void par_ticky_PackNearbyGraph_start (void) ;
+extern void par_ticky_PackNearbyGraph_end(nat n, nat thunks) ;
+extern void par_ticky_UnpackGraph_start (void) ;
+extern void par_ticky_UnpackGraph_end(nat n, nat thunks) ;
+extern void par_ticky_TP (void) ;
+extern void globalParStat_exit(void);
+
+#else
+
+#define PAR_TICKY_PAR_START()
+#define PAR_TICKY_PAR_END()
+#define PAR_TICKY_REBUILD_GA_TABLES_START()
+#define PAR_TICKY_REBUILD_GA_TABLES_END(n, size_GA)
+#define PAR_TICKY_MARK_LOCAL_GAS_START()
+#define PAR_TICKY_MARK_LOCAL_GAS_END(n)
+#define PAR_TICKY_MARK_SPARK_QUEUE_START()
+#define PAR_TICKY_MARK_SPARK_QUEUE_END(n)
+#define PAR_TICKY_PACK_NEARBY_GRAPH_START ()
+#define PAR_TICKY_PACK_NEARBY_GRAPH_END(n, thunks)
+#define PAR_TICKY_UNPACK_GRAPH_START ()
+#define PAR_TICKY_UNPACK_GRAPH_END(n, thunks)
+#define PAR_TICKY_TP ()
+#define PAR_TICKY_CNT_FREE_GA()
+
+#endif
+
diff --git a/rts/parallel/ParTypes.h b/rts/parallel/ParTypes.h
new file mode 100644
index 0000000000..910a6f2d99
--- /dev/null
+++ b/rts/parallel/ParTypes.h
@@ -0,0 +1,38 @@
+/* ---------------------------------------------------------------------------
+ * Time-stamp: <Tue Nov 09 1999 16:31:38 Stardate: [-30]3873.44 hwloidl>
+ *
+ * Runtime system types for GUM
+ *
+ * ------------------------------------------------------------------------- */
+
+#ifndef PARTYPES_H
+#define PARTYPES_H
+
+#ifdef PAR /* all of it */
+
+// now in Parallel.h
+//typedef struct hashtable HashTable;
+//typedef struct hashlist HashList;
+
+/* Global addresses now live in Parallel.h (needed in Closures.h) */
+// gaddr
+
+// now in Parallel.h
+/* (GA, LA) pairs
+typedef struct gala {
+ globalAddr ga;
+ StgPtr la;
+ struct gala *next;
+ rtsBool preferred;
+} rtsGaLa;
+*/
+
+#if defined(GRAN)
+typedef unsigned long TIME;
+typedef unsigned char Proc;
+typedef unsigned char EVTTYPE;
+#endif
+
+#endif /* PAR */
+
+#endif /* ! PARTYPES_H */
diff --git a/rts/parallel/Parallel.c b/rts/parallel/Parallel.c
new file mode 100644
index 0000000000..414b7e4406
--- /dev/null
+++ b/rts/parallel/Parallel.c
@@ -0,0 +1,1140 @@
+/*
+ Time-stamp: <Wed Mar 21 2001 16:42:40 Stardate: [-30]6363.48 hwloidl>
+
+ Basic functions for use in either GranSim or GUM.
+*/
+
+#if defined(GRAN) || defined(PAR) /* whole file */
+
+//@menu
+//* Includes::
+//* Variables and constants::
+//* Writing to the log-file::
+//* Global statistics::
+//* Dumping routines::
+//@end menu
+//*/ fool highlight
+
+//@node Includes, Variables and constants
+//@subsection Includes
+
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "Storage.h"
+#include "GranSimRts.h"
+#include "ParallelRts.h"
+
+//@node Variables and constants, Writing to the log-file, Includes
+//@subsection Variables and constants
+
+/* Where to write the log file */
+FILE *gr_file = NULL;
+char gr_filename[STATS_FILENAME_MAXLEN];
+
+#if defined(PAR)
+/* Global statistics */
+GlobalParStats globalParStats;
+#endif
+
+#if defined(PAR)
+ullong startTime = 0;
+#endif
+
+#if defined(PAR) && !defined(DEBUG)
+// HAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACCCCCCCCCKKKKKKKKKKKK
+// Definitely the wrong place for info_type in !DEBUG (see Printer.c) -- HWL
+
+static char *closure_type_names[] = {
+ "INVALID_OBJECT", /* 0 */
+ "CONSTR", /* 1 */
+ "CONSTR_1_0", /* 2 */
+ "CONSTR_0_1", /* 3 */
+ "CONSTR_2_0", /* 4 */
+ "CONSTR_1_1", /* 5 */
+ "CONSTR_0_2", /* 6 */
+ "CONSTR_INTLIKE", /* 7 */
+ "CONSTR_CHARLIKE", /* 8 */
+ "CONSTR_STATIC", /* 9 */
+ "CONSTR_NOCAF_STATIC", /* 10 */
+ "FUN", /* 11 */
+ "FUN_1_0", /* 12 */
+ "FUN_0_1", /* 13 */
+ "FUN_2_0", /* 14 */
+ "FUN_1_1", /* 15 */
+ "FUN_0_2", /* 16 */
+ "FUN_STATIC", /* 17 */
+ "THUNK", /* 18 */
+ "THUNK_1_0", /* 19 */
+ "THUNK_0_1", /* 20 */
+ "THUNK_2_0", /* 21 */
+ "THUNK_1_1", /* 22 */
+ "THUNK_0_2", /* 23 */
+ "THUNK_STATIC", /* 24 */
+ "THUNK_SELECTOR", /* 25 */
+ "BCO", /* 26 */
+ "AP_UPD", /* 27 */
+ "PAP", /* 28 */
+ "IND", /* 29 */
+ "IND_OLDGEN", /* 30 */
+ "IND_PERM", /* 31 */
+ "IND_OLDGEN_PERM", /* 32 */
+ "IND_STATIC", /* 33 */
+ "CAF_UNENTERED", /* 34 */
+ "CAF_ENTERED", /* 35 */
+ "CAF_BLACKHOLE", /* 36 */
+ "RET_BCO", /* 37 */
+ "RET_SMALL", /* 38 */
+ "RET_VEC_SMALL", /* 39 */
+ "RET_BIG", /* 40 */
+ "RET_VEC_BIG", /* 41 */
+ "RET_DYN", /* 42 */
+ "UPDATE_FRAME", /* 43 */
+ "CATCH_FRAME", /* 44 */
+ "STOP_FRAME", /* 45 */
+ "SEQ_FRAME", /* 46 */
+ "BLACKHOLE", /* 47 */
+ "BLACKHOLE_BQ", /* 48 */
+ "SE_BLACKHOLE", /* 49 */
+ "SE_CAF_BLACKHOLE", /* 50 */
+ "MVAR", /* 51 */
+ "ARR_WORDS", /* 52 */
+ "MUT_ARR_PTRS", /* 53 */
+ "MUT_ARR_PTRS_FROZEN", /* 54 */
+ "MUT_VAR", /* 55 */
+ "WEAK", /* 56 */
+ "FOREIGN", /* 57 */
+ "STABLE_NAME", /* 58 */
+ "TSO", /* 59 */
+ "BLOCKED_FETCH", /* 60 */
+ "FETCH_ME", /* 61 */
+ "FETCH_ME_BQ", /* 62 */
+ "RBH", /* 63 */
+ "EVACUATED", /* 64 */
+ "REMOTE_REF", /* 65 */
+ "N_CLOSURE_TYPES" /* 66 */
+};
+
+char *
+info_type(StgClosure *closure){
+ return closure_type_names[get_itbl(closure)->type];
+}
+
+char *
+info_type_by_ip(StgInfoTable *ip){
+ return closure_type_names[ip->type];
+}
+
+void
+info_hdr_type(StgClosure *closure, char *res){
+ strcpy(res,closure_type_names[get_itbl(closure)->type]);
+}
+#endif
+
+//@node Writing to the log-file, Global statistics, Variables and constants
+//@subsection Writing to the log-file
+/*
+ Writing to the log-file
+
+ These routines dump event-based info to the main log-file.
+ The code for writing log files is shared between GranSim and GUM.
+*/
+
+/*
+ * If you're not using GNUC and you're on a 32-bit machine, you're
+ * probably out of luck here. However, since CONCURRENT currently
+ * requires GNUC, I'm not too worried about it. --JSM
+ */
+
+//@cindex init_gr_simulation
+#if defined(GRAN)
+void
+init_gr_simulation(rts_argc, rts_argv, prog_argc, prog_argv)
+char *prog_argv[], *rts_argv[];
+int prog_argc, rts_argc;
+{
+ nat i;
+ char *extension = RtsFlags.GranFlags.GranSimStats.Binary ? "gb" : "gr";
+
+ if (RtsFlags.GranFlags.GranSimStats.Global)
+ init_gr_stats();
+
+ /* init global constants for costs of basic operations */
+ gran_arith_cost = RtsFlags.GranFlags.Costs.arith_cost;
+ gran_branch_cost = RtsFlags.GranFlags.Costs.branch_cost;
+ gran_load_cost = RtsFlags.GranFlags.Costs.load_cost;
+ gran_store_cost = RtsFlags.GranFlags.Costs.store_cost;
+ gran_float_cost = RtsFlags.GranFlags.Costs.float_cost;
+
+ if (RtsFlags.GranFlags.GranSimStats.Suppressed)
+ return;
+
+ if (!RtsFlags.GranFlags.GranSimStats.Full)
+ return;
+
+ sprintf(gr_filename, GR_FILENAME_FMT, prog_argv[0], extension);
+
+ if ((gr_file = fopen(gr_filename, "w")) == NULL) {
+ barf("Can't open granularity simulation report file %s\n",
+ gr_filename);
+ }
+
+ setbuf(gr_file, NULL); /* turn buffering off */
+
+ /* write header with program name, options and setup to gr_file */
+ fputs("Granularity Simulation for ", gr_file);
+ for (i = 0; i < prog_argc; ++i) {
+ fputs(prog_argv[i], gr_file);
+ fputc(' ', gr_file);
+ }
+
+ if (rts_argc > 0) {
+ fputs("+RTS ", gr_file);
+
+ for (i = 0; i < rts_argc; ++i) {
+ fputs(rts_argv[i], gr_file);
+ fputc(' ', gr_file);
+ }
+ }
+
+ fputs("\nStart time: ", gr_file);
+ fputs(time_str(), gr_file); /* defined in RtsUtils.c */
+ fputc('\n', gr_file);
+
+ fputs("\n\n--------------------\n\n", gr_file);
+
+ fputs("General Parameters:\n\n", gr_file);
+
+ if (RtsFlags.GranFlags.Light)
+ fprintf(gr_file, "GrAnSim-Light\nPEs infinite, %s Scheduler, %sMigrate Threads %s, %s\n",
+ RtsFlags.GranFlags.DoFairSchedule?"Fair":"Unfair",
+ RtsFlags.GranFlags.DoThreadMigration?"":"Don't ",
+ RtsFlags.GranFlags.DoThreadMigration && RtsFlags.GranFlags.DoStealThreadsFirst?" Before Sparks":"",
+ RtsFlags.GranFlags.DoAsyncFetch ? "Asynchronous Fetch" :
+ "Block on Fetch");
+ else
+ fprintf(gr_file, "PEs %u, %s Scheduler, %sMigrate Threads %s, %s\n",
+ RtsFlags.GranFlags.proc,RtsFlags.GranFlags.DoFairSchedule?"Fair":"Unfair",
+ RtsFlags.GranFlags.DoThreadMigration?"":"Don't ",
+ RtsFlags.GranFlags.DoThreadMigration && RtsFlags.GranFlags.DoStealThreadsFirst?" Before Sparks":"",
+ RtsFlags.GranFlags.DoAsyncFetch ? "Asynchronous Fetch" :
+ "Block on Fetch");
+
+ if (RtsFlags.GranFlags.DoBulkFetching)
+ if (RtsFlags.GranFlags.ThunksToPack)
+ fprintf(gr_file, "Bulk Fetching: Fetch %d Thunks in Each Packet (Packet Size = %d closures)\n",
+ RtsFlags.GranFlags.ThunksToPack,
+ RtsFlags.GranFlags.packBufferSize);
+ else
+ fprintf(gr_file, "Bulk Fetching: Fetch as many closures as possible (Packet Size = %d closures)\n",
+ RtsFlags.GranFlags.packBufferSize);
+ else
+ fprintf(gr_file, "Incremental Fetching: Fetch Exactly One Closure in Each Packet\n");
+
+ fprintf(gr_file, "Fetch Strategy(%u):If outstanding fetches %s\n",
+ RtsFlags.GranFlags.FetchStrategy,
+ RtsFlags.GranFlags.FetchStrategy==0 ?
+ " block (block-on-fetch)":
+ RtsFlags.GranFlags.FetchStrategy==1 ?
+ "only run runnable threads":
+ RtsFlags.GranFlags.FetchStrategy==2 ?
+ "create threads only from local sparks":
+ RtsFlags.GranFlags.FetchStrategy==3 ?
+ "create threads from local or global sparks":
+ RtsFlags.GranFlags.FetchStrategy==4 ?
+ "create sparks and steal threads if necessary":
+ "unknown");
+
+ if (RtsFlags.GranFlags.DoPrioritySparking)
+ fprintf(gr_file, "Priority Sparking (i.e. keep sparks ordered by priority)\n");
+
+ if (RtsFlags.GranFlags.DoPriorityScheduling)
+ fprintf(gr_file, "Priority Scheduling (i.e. keep threads ordered by priority)\n");
+
+ fprintf(gr_file, "Thread Creation Time %u, Thread Queue Time %u\n",
+ RtsFlags.GranFlags.Costs.threadcreatetime,
+ RtsFlags.GranFlags.Costs.threadqueuetime);
+ fprintf(gr_file, "Thread DeSchedule Time %u, Thread Schedule Time %u\n",
+ RtsFlags.GranFlags.Costs.threaddescheduletime,
+ RtsFlags.GranFlags.Costs.threadscheduletime);
+ fprintf(gr_file, "Thread Context-Switch Time %u\n",
+ RtsFlags.GranFlags.Costs.threadcontextswitchtime);
+ fputs("\n\n--------------------\n\n", gr_file);
+
+ fputs("Communication Metrics:\n\n", gr_file);
+ fprintf(gr_file,
+ "Latency %u (1st) %u (rest), Fetch %u, Notify %u (Global) %u (Local)\n",
+ RtsFlags.GranFlags.Costs.latency,
+ RtsFlags.GranFlags.Costs.additional_latency,
+ RtsFlags.GranFlags.Costs.fetchtime,
+ RtsFlags.GranFlags.Costs.gunblocktime,
+ RtsFlags.GranFlags.Costs.lunblocktime);
+ fprintf(gr_file,
+ "Message Creation %u (+ %u after send), Message Read %u\n",
+ RtsFlags.GranFlags.Costs.mpacktime,
+ RtsFlags.GranFlags.Costs.mtidytime,
+ RtsFlags.GranFlags.Costs.munpacktime);
+ fputs("\n\n--------------------\n\n", gr_file);
+
+ fputs("Instruction Metrics:\n\n", gr_file);
+ fprintf(gr_file, "Arith %u, Branch %u, Load %u, Store %u, Float %u, Alloc %u\n",
+ RtsFlags.GranFlags.Costs.arith_cost,
+ RtsFlags.GranFlags.Costs.branch_cost,
+ RtsFlags.GranFlags.Costs.load_cost,
+ RtsFlags.GranFlags.Costs.store_cost,
+ RtsFlags.GranFlags.Costs.float_cost,
+ RtsFlags.GranFlags.Costs.heapalloc_cost);
+ fputs("\n\n++++++++++++++++++++\n\n", gr_file);
+
+# if 0
+ /* binary log files are currently not supported */
+ if (RtsFlags.GranFlags.GranSimStats.Binary)
+ grputw(sizeof(rtsTime));
+# endif
+
+ return (0);
+}
+
+#elif defined(PAR)
+
+void init_gr_stats (void);
+
+void
+init_gr_simulation(rts_argc, rts_argv, prog_argc, prog_argv)
+char *prog_argv[], *rts_argv[];
+int prog_argc, rts_argc;
+{
+ nat i;
+ char time_string[TIME_STR_LEN], node_str[NODE_STR_LEN];
+ char *extension = RtsFlags.ParFlags.ParStats.Binary ? "gb" : "gr";
+
+ sprintf(gr_filename, GR_FILENAME_FMT_GUM, prog_argv[0], thisPE, extension);
+
+ if (!RtsFlags.ParFlags.ParStats.Full)
+ return;
+
+ if (RtsFlags.ParFlags.ParStats.Global)
+ init_gr_stats();
+
+ if ((gr_file = fopen(gr_filename, "w")) == NULL)
+ barf("Can't open activity report file %s\n", gr_filename);
+
+ setbuf(gr_file, NULL); /* turn buffering off */
+
+ /* write header with program name, options and setup to gr_file */
+ for (i = 0; i < prog_argc; ++i) {
+ fputs(prog_argv[i], gr_file);
+ fputc(' ', gr_file);
+ }
+
+ if (rts_argc > 0) {
+ fputs("+RTS ", gr_file);
+
+ for (i = 0; i < rts_argc; ++i) {
+ fputs(rts_argv[i], gr_file);
+ fputc(' ', gr_file);
+ }
+ }
+ fputc('\n', gr_file);
+
+ /* record the absolute start time to allow synchronisation of log-files */
+ fputs("Start-Time: ", gr_file);
+ fputs(time_str(), gr_file);
+ fputc('\n', gr_file);
+
+ ASSERT(startTime==0);
+ // startTime = msTime();
+ startTime = CURRENT_TIME;
+ ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/);
+ fprintf(gr_file, "PE %2u [%s]: TIME\n", thisPE, time_string);
+
+# if 0
+ ngoq Dogh'q' vImuS
+ IF_PAR_DEBUG(verbose,
+ belch("== Start-time: %ld (%s)",
+ startTime, time_string));
+
+ if (startTime > LL(1000000000)) {
+ fprintf(gr_file, "PE %2u [%lu%lu]: TIME\n", thisPE,
+ (rtsTime) (startTime / LL(1000000000)),
+ (rtsTime) (startTime % LL(1000000000)));
+ } else {
+ fprintf(gr_file, "PE %2u [%lu]: TIME\n", thisPE, (TIME) startTime);
+ }
+ /* binary log files are currently not supported */
+ if (RtsFlags.GranFlags.GranSimStats.Binary)
+ grputw(sizeof(rtsTime));
+# endif
+
+ return;
+}
+
+void
+init_gr_stats (void) {
+ // memset(&globalParStats, '\0', sizeof(GlobalParStats));
+
+ globalParStats.tot_mark_GA = globalParStats.tot_rebuild_GA = globalParStats.tot_free_GA = globalParStats.res_mark_GA = globalParStats.res_rebuild_GA = globalParStats.res_free_GA = globalParStats.tot_size_GA = globalParStats.res_size_GA = globalParStats.tot_global = globalParStats.tot_local = 0;
+ globalParStats.cnt_mark_GA = globalParStats.cnt_rebuild_GA = globalParStats.cnt_free_GA = globalParStats.res_free_GA = globalParStats.local_alloc_GA = 0;
+
+ globalParStats.time_mark_GA = 0.0;
+ globalParStats.time_rebuild_GA = 0.0;
+ globalParStats.time_sparks = 0.0;
+ globalParStats.time_pack = 0.0;
+
+ globalParStats.res_sp = globalParStats.res_tp = globalParStats.tot_sp = globalParStats.tot_tp = globalParStats.cnt_sp = globalParStats.cnt_tp = globalParStats.emp_sp = globalParStats.emp_tp = 0;
+ globalParStats.tot_packets = globalParStats.tot_packet_size = globalParStats.tot_thunks = globalParStats.res_packet_size = globalParStats.res_thunks = globalParStats.rec_res_packet_size = globalParStats.rec_res_thunks = 0;
+
+ globalParStats.tot_fish_mess = globalParStats.tot_fetch_mess = globalParStats.tot_resume_mess = globalParStats.tot_schedule_mess = 0;
+ globalParStats.rec_fish_mess = globalParStats.rec_resume_mess = globalParStats.rec_schedule_mess = 0;
+ globalParStats.rec_fetch_mess = 0;
+#if defined(DIST)
+ globalParStats.tot_reval_mess = 0;
+ globalParStats.rec_reval_mess = 0;
+#endif
+
+ globalParStats.tot_threads_created = globalParStats.tot_sparks_created = globalParStats.tot_sparks_ignored = globalParStats.tot_sparks_marked = globalParStats.res_sparks_created = globalParStats.res_sparks_ignored = globalParStats.res_sparks_marked = 0;
+ globalParStats.tot_yields = globalParStats.tot_stackover = globalParStats.tot_heapover = 0;
+
+ globalParStats.tot_arrs = globalParStats.tot_arr_size = 0;
+}
+
+#endif /* PAR */
+
+//@cindex end_gr_simulation
+#if defined(GRAN)
+void
+end_gr_simulation(void)
+{
+ char time_string[TIME_STR_LEN];
+
+ ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/);
+
+ if (RtsFlags.GranFlags.GranSimStats.Suppressed)
+ return;
+
+ /* Print event stats */
+ if (RtsFlags.GranFlags.GranSimStats.Global) {
+ nat i;
+
+ fprintf(stderr,"Total yields: %d\n",
+ globalGranStats.tot_yields);
+
+ fprintf(stderr,"Total number of threads created: %d ; per PE:\n",
+ globalGranStats.tot_threads_created);
+ for (i=0; i<RtsFlags.GranFlags.proc; i++) {
+ fprintf(stderr," PE %d: %d\t",
+ i, globalGranStats.threads_created_on_PE[i]);
+ if (i+1 % 4 == 0) fprintf(stderr,"\n");
+ }
+ if (RtsFlags.GranFlags.proc+1 % 4 != 0) fprintf(stderr,"\n");
+ fprintf(stderr,"Total number of threads migrated: %d\n",
+ globalGranStats.tot_TSOs_migrated);
+
+ fprintf(stderr,"Total number of sparks created: %d ; per PE:\n",
+ globalGranStats.tot_sparks_created);
+ for (i=0; i<RtsFlags.GranFlags.proc; i++) {
+ fprintf(stderr," PE %d: %d\t",
+ i, globalGranStats.sparks_created_on_PE[i]);
+ if (i+1 % 4 == 0) fprintf(stderr,"\n");
+ }
+ if (RtsFlags.GranFlags.proc+1 % 4 != 0) fprintf(stderr,"\n");
+
+ fprintf(stderr,"Event statistics (number of events: %d):\n",
+ globalGranStats.noOfEvents);
+ for (i=0; i<=MAX_EVENT; i++) {
+ fprintf(stderr," %s (%d): \t%d \t%f%%\t%f%%\n",
+ event_names[i],i,globalGranStats.event_counts[i],
+ (float)(100*globalGranStats.event_counts[i])/(float)(globalGranStats.noOfEvents),
+ (i==ContinueThread ? 0.0 :
+ (float)(100*(globalGranStats.event_counts[i])/(float)(globalGranStats.noOfEvents-globalGranStats.event_counts[ContinueThread])) ));
+ }
+ fprintf(stderr,"Randomized steals: %ld sparks, %ld threads \n \t(Sparks: #%u (avg ntimes=%f; avg fl=%f)\n\t(Threads: %ld)",
+ globalGranStats.rs_sp_count,
+ globalGranStats.rs_t_count,
+ globalGranStats.no_of_steals,
+ (float)globalGranStats.ntimes_total/(float)stg_max(globalGranStats.no_of_steals,1),
+ (float)globalGranStats.fl_total/(float)stg_max(globalGranStats.no_of_steals,1),
+ globalGranStats.no_of_migrates);
+ fprintf(stderr,"Moved sparks: %d Withered sparks: %d (%.2f %%)\n",
+ globalGranStats.tot_sparks, globalGranStats.withered_sparks,
+ ( globalGranStats.tot_sparks == 0 ? 0 :
+ (float)(100*globalGranStats.withered_sparks)/(float)(globalGranStats.tot_sparks)) );
+ /* Print statistics about priority sparking */
+ if (RtsFlags.GranFlags.DoPrioritySparking) {
+ fprintf(stderr,"About Priority Sparking:\n");
+ fprintf(stderr," Total no. NewThreads: %d Avg. spark queue len: %.2f \n", globalGranStats.tot_sq_probes, (float)globalGranStats.tot_sq_len/(float)globalGranStats.tot_sq_probes);
+ }
+ /* Print statistics about priority sparking */
+ if (RtsFlags.GranFlags.DoPriorityScheduling) {
+ fprintf(stderr,"About Priority Scheduling:\n");
+ fprintf(stderr," Total no. of StartThreads: %d (non-end: %d) Avg. thread queue len: %.2f\n",
+ globalGranStats.tot_add_threads, globalGranStats.non_end_add_threads,
+ (float)globalGranStats.tot_tq_len/(float)globalGranStats.tot_add_threads);
+ }
+ /* Blocking queue statistics */
+ if (1) {
+ fprintf(stderr,"Blocking queue statistcs:\n");
+ fprintf(stderr," Total no. of FMBQs generated: %d\n",
+ globalGranStats.tot_FMBQs);
+ fprintf(stderr," Total no. of bqs awakened: %d\n",
+ globalGranStats.tot_awbq);
+ fprintf(stderr," Total length of all bqs: %d\tAvg length of bqs: %.2f\n",
+ globalGranStats.tot_bq_len, (float)globalGranStats.tot_bq_len/(float)globalGranStats.tot_awbq);
+ fprintf(stderr," Percentage of local TSOs in BQs: %.2f\n",
+ (float)globalGranStats.tot_bq_len*100.0/(float)globalGranStats.tot_bq_len);
+ fprintf(stderr," Total time spent processing BQs: %lx\n",
+ globalGranStats.tot_bq_processing_time);
+ }
+
+ /* Fetch misses and thunk stealing */
+ fprintf(stderr,"Number of fetch misses: %d\n",
+ globalGranStats.fetch_misses);
+
+ /* Print packet statistics if GUMM fetching is turned on */
+ if (RtsFlags.GranFlags.DoBulkFetching) {
+ fprintf(stderr,"Packet statistcs:\n");
+ fprintf(stderr," Total no. of packets: %d Avg. packet size: %.2f \n", globalGranStats.tot_packets, (float)globalGranStats.tot_packet_size/(float)globalGranStats.tot_packets);
+ fprintf(stderr," Total no. of thunks: %d Avg. thunks/packet: %.2f \n", globalGranStats.tot_thunks, (float)globalGranStats.tot_thunks/(float)globalGranStats.tot_packets);
+ fprintf(stderr," Total no. of cuts: %d Avg. cuts/packet: %.2f\n", globalGranStats.tot_cuts, (float)globalGranStats.tot_cuts/(float)globalGranStats.tot_packets);
+ /*
+ if (closure_queue_overflows>0)
+ fprintf(stderr," Number of closure queue overflows: %u\n",
+ closure_queue_overflows);
+ */
+ }
+ } /* RtsFlags.GranFlags.GranSimStats.Global */
+
+# if defined(GRAN_COUNT)
+# error "GRAN_COUNT not supported; should be parallel ticky profiling, really"
+ fprintf(stderr,"Update count statistics:\n");
+ fprintf(stderr," Total number of updates: %u\n",nUPDs);
+ fprintf(stderr," Needed to awaken BQ: %u with avg BQ len of: %f\n",
+ nUPDs_BQ,(float)BQ_lens/(float)nUPDs_BQ);
+ fprintf(stderr," Number of PAPs: %u\n",nPAPs);
+# endif
+
+ fprintf(stderr, "Simulation finished after @ %s @ cycles. %d sparks created, %d sparks ignored. Check %s for details.\n",
+ time_string, sparksCreated, sparksIgnored, gr_filename);
+
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ fclose(gr_file);
+}
+
+#elif defined(PAR)
+
+/*
+ Under GUM we print only one line.
+*/
+void
+end_gr_simulation(void)
+{
+ char time_string[TIME_STR_LEN];
+
+ ullong_format_string(CURRENT_TIME-startTime, time_string, rtsFalse/*no commas!*/);
+
+ fprintf(stderr, "Computation finished after @ %s @ ms. %d sparks created, %d sparks ignored. Check %s for details.\n",
+ time_string, sparksCreated, sparksIgnored, gr_filename);
+
+ if (RtsFlags.ParFlags.ParStats.Full)
+ fclose(gr_file);
+}
+#endif /* PAR */
+
+//@node Global statistics, Dumping routines, Writing to the log-file
+//@subsection Global statistics
+/*
+ Called at the end of execution
+*/
+
+//@node Dumping routines, , Global statistics
+//@subsection Dumping routines
+
+//@cindex DumpGranEvent
+void
+DumpGranEvent(name, tso)
+GranEventType name;
+StgTSO *tso;
+{
+ DumpRawGranEvent(CURRENT_PROC, (PEs)0, name, tso, &stg_END_TSO_QUEUE_closure, (StgInt)0, (StgInt)0);
+}
+
+//@cindex DumpRawGranEvent
+void
+DumpRawGranEvent(proc, p, name, tso, node, sparkname, len)
+PEs proc, p; /* proc ... where it happens; p ... where node lives */
+GranEventType name;
+StgTSO *tso;
+StgClosure *node;
+StgInt sparkname, len;
+{
+# if defined(GRAN)
+ DumpVeryRawGranEvent(TIME_ON_PROC(proc),
+ proc, p, name, tso, node, sparkname, len);
+# elif defined(PAR)
+ DumpVeryRawGranEvent(CURRENT_TIME,
+ proc, p, name, tso, node, sparkname, len);
+# endif
+}
+
+//@cindex DumpVeryRawGranEvent
+void
+DumpVeryRawGranEvent(time, proc, p, name, tso, node, sparkname, len)
+rtsTime time;
+PEs proc, p; /* proc ... where it happens; p ... where node lives */
+GranEventType name;
+StgTSO *tso;
+StgClosure *node;
+StgInt sparkname, len;
+{
+ FILE *output_file; // DEBUGGING ONLY !!!!!!!!!!!!!!!!!!!!!!!!!1
+ StgWord id;
+ char time_string[TIME_STR_LEN], node_str[NODE_STR_LEN];
+# if defined(GRAN)
+ ullong_format_string(time,
+ time_string, rtsFalse/*no commas!*/);
+# elif defined(PAR)
+ ullong_format_string(time,
+ time_string, rtsFalse/*no commas!*/);
+# endif
+ output_file = gr_file;
+
+# if defined(GRAN)
+
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ ASSERT(output_file!=NULL);
+
+ if (RtsFlags.GranFlags.GranSimStats.Suppressed)
+ return;
+# elif defined(PAR)
+
+ if (RtsFlags.ParFlags.ParStats.Full)
+ ASSERT(output_file!=NULL);
+
+ if (RtsFlags.ParFlags.ParStats.Suppressed)
+ return;
+
+# endif
+
+ id = tso == NULL ? -1 : tso->id;
+ if (node==stgCast(StgClosure*,&stg_END_TSO_QUEUE_closure))
+ strcpy(node_str,"________"); /* "END_TSO_QUEUE"); */
+ else
+ sprintf(node_str,"0x%-6lx",node);
+
+ if (name > GR_EVENT_MAX)
+ name = GR_EVENT_MAX;
+
+ if (BINARY_STATS)
+ barf("binary log files not yet supported");
+#if 0
+ /* ToDo: fix code for writing binary GrAnSim statistics */
+ switch (name) {
+ case GR_START:
+ case GR_STARTQ:
+ grputw(name);
+ grputw(proc);
+ abort(); /* die please: a single word */
+ /* doesn't represent long long times */
+ grputw(TIME_ON_PROC(proc));
+ grputw((StgWord)node);
+ break;
+ case GR_FETCH:
+ case GR_REPLY:
+ case GR_BLOCK:
+ grputw(name);
+ grputw(proc);
+ abort(); /* die please: a single word */
+ /* doesn't represent long long times */
+ grputw(TIME_ON_PROC(proc)); /* this line is bound to */
+ grputw(id); /* do the wrong thing */
+ break;
+ default:
+ grputw(name);
+ grputw(proc);
+ abort(); /* die please: a single word */
+ /* doesn't represent long long times */
+ grputw(TIME_ON_PROC(proc));
+ grputw((StgWord)node);
+ }
+#endif
+ else /* !BINARY_STATS */
+ switch (name) {
+ case GR_START:
+ case GR_STARTQ:
+ fprintf(output_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[SN %u]\t[sparks %u]\n",
+ proc,time_string,gran_event_names[name],
+ id,node_str,sparkname,len);
+ break;
+ case GR_FETCH:
+ case GR_REPLY:
+ case GR_BLOCK:
+ case GR_STOLEN:
+ case GR_STOLENQ:
+ case GR_STEALING:
+ fprintf(output_file, "PE %2u [%s]: %-9s\t%lx \t%s\t(from %2u)\n",
+ proc, time_string, gran_event_names[name],
+ id,node_str,p);
+ break;
+ case GR_RESUME:
+ case GR_RESUMEQ:
+ case GR_SCHEDULE:
+ case GR_DESCHEDULE:
+ fprintf(output_file,"PE %2u [%s]: %-9s\t%lx \n",
+ proc,time_string,gran_event_names[name],id);
+ break;
+ case GR_ALLOC:
+ fprintf(output_file,"PE %2u [%s]: %-9s\t%lx\t \tallocating %u words\n",
+ proc,time_string,gran_event_names[name],id,len);
+ break;
+ default:
+ fprintf(output_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[sparks %u]\n",
+ proc,time_string,gran_event_names[name],id,node_str,len);
+ }
+}
+
+//@cindex DumpGranInfo
+void
+DumpEndEvent(proc, tso, mandatory_thread)
+PEs proc;
+StgTSO *tso;
+rtsBool mandatory_thread;
+{
+ FILE *output_file; // DEBUGGING ONLY !!!!!!!!!!!!!!!!!!!!!!!!!1
+ char time_string[TIME_STR_LEN];
+# if defined(GRAN)
+ ullong_format_string(TIME_ON_PROC(proc),
+ time_string, rtsFalse/*no commas!*/);
+# elif defined(PAR)
+ ullong_format_string(CURRENT_TIME,
+ time_string, rtsFalse/*no commas!*/);
+# endif
+
+ output_file = gr_file;
+ ASSERT(output_file!=NULL);
+#if defined(GRAN)
+ if (RtsFlags.GranFlags.GranSimStats.Suppressed)
+ return;
+#endif
+
+ if (BINARY_STATS) {
+ barf("binary log files not yet supported");
+#if 0
+ grputw(GR_END);
+ grputw(proc);
+ abort(); /* die please: a single word doesn't represent long long times */
+ grputw(CURRENT_TIME); /* this line is bound to fail */
+ grputw(tso->id);
+#ifdef PAR
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+#else
+ grputw(tso->gran.sparkname);
+ grputw(tso->gran.startedat);
+ grputw(tso->gran.exported);
+ grputw(tso->gran.basicblocks);
+ grputw(tso->gran.allocs);
+ grputw(tso->gran.exectime);
+ grputw(tso->gran.blocktime);
+ grputw(tso->gran.blockcount);
+ grputw(tso->gran.fetchtime);
+ grputw(tso->gran.fetchcount);
+ grputw(tso->gran.localsparks);
+ grputw(tso->gran.globalsparks);
+#endif
+ grputw(mandatory_thread);
+#endif /* 0 */
+ } else {
+
+ /*
+ * NB: DumpGranEvent cannot be used because PE may be wrong
+ * (as well as the extra info)
+ */
+ fprintf(output_file, "PE %2u [%s]: END %lx, SN %u, ST %lu, EXP %s, BB %u, HA %u, RT %u, BT %u (%u), FT %u (%u), LS %u, GS %u, MY %s\n"
+ ,proc
+ ,time_string
+ ,tso->id
+#if defined(GRAN)
+ ,tso->gran.sparkname
+ ,tso->gran.startedat
+ ,((tso->gran.exported) ? 'T' : 'F')
+ ,tso->gran.basicblocks
+ ,tso->gran.allocs
+ ,tso->gran.exectime
+ ,tso->gran.blocktime
+ ,tso->gran.blockcount
+ ,tso->gran.fetchtime
+ ,tso->gran.fetchcount
+ ,tso->gran.localsparks
+ ,tso->gran.globalsparks
+#elif defined(PAR)
+ ,tso->par.sparkname
+ ,tso->par.startedat
+ ,(tso->par.exported) ? "T" : "F"
+ ,tso->par.basicblocks
+ ,tso->par.allocs
+ ,tso->par.exectime
+ ,tso->par.blocktime
+ ,tso->par.blockcount
+ ,tso->par.fetchtime
+ ,tso->par.fetchcount
+ ,tso->par.localsparks
+ ,tso->par.globalsparks
+#endif
+ ,(mandatory_thread ? "T" : "F")
+ );
+ }
+}
+
+//@cindex DumpTSO
+void
+DumpTSO(tso)
+StgTSO *tso;
+{
+ FILE *output_file; // DEBUGGING ONLY !!!!!!!!!!!!!!!!!!!!!!!!!1
+
+ output_file = gr_file;
+ ASSERT(output_file!=NULL);
+ fprintf(stderr,"TSO 0x%lx, NAME 0x%lx, ID %u, LINK 0x%lx, TYPE %s\n"
+ ,tso
+#if defined(GRAN)
+ ,tso->gran.sparkname
+#elif defined(PAR)
+ ,tso->par.sparkname
+#endif
+ ,tso->id
+ ,tso->link
+ ,/*tso->state==T_MAIN?"MAIN":
+ TSO_TYPE(tso)==T_FAIL?"FAIL":
+ TSO_TYPE(tso)==T_REQUIRED?"REQUIRED":
+ TSO_TYPE(tso)==T_ADVISORY?"ADVISORY":
+ */
+ "???"
+ );
+
+ fprintf(output_file,"TSO %lx: SN %u, ST %u, GBL %c, BB %u, HA %u, RT %u, BT %u (%u), FT %u (%u) LS %u, GS %u\n"
+ ,tso->id
+#if defined(GRAN)
+ ,tso->gran.sparkname
+ ,tso->gran.startedat
+ ,tso->gran.exported?'T':'F'
+ ,tso->gran.basicblocks
+ ,tso->gran.allocs
+ ,tso->gran.exectime
+ ,tso->gran.blocktime
+ ,tso->gran.blockcount
+ ,tso->gran.fetchtime
+ ,tso->gran.fetchcount
+ ,tso->gran.localsparks
+ ,tso->gran.globalsparks
+#elif defined(PAR)
+ ,tso->par.sparkname
+ ,tso->par.startedat
+ ,tso->par.exported?'T':'F'
+ ,tso->par.basicblocks
+ ,tso->par.allocs
+ ,tso->par.exectime
+ ,tso->par.blocktime
+ ,tso->par.blockcount
+ ,tso->par.fetchtime
+ ,tso->par.fetchcount
+ ,tso->par.localsparks
+ ,tso->par.globalsparks
+#endif
+ );
+}
+
+#if 0
+/*
+ ToDo: fix binary output of log files, and support new log file format.
+*/
+/*
+ Output a terminate event and an 8-byte time.
+*/
+
+//@cindex grterminate
+void
+grterminate(v)
+rtsTime v;
+{
+ if (!BINARY_STATS)
+ barf("grterminate: binary statistics not enabled\n");
+
+# if defined(GRAN)
+ if (RtsFlags.GranFlags.GranSimStats.Suppressed)
+ return;
+# endif
+
+ DumpGranEvent(GR_TERMINATE, stgCast(StgTSO*,&stg_END_TSO_QUEUE_closure));
+
+ if (sizeof(rtsTime) == 4) {
+ putc('\0', gr_file);
+ putc('\0', gr_file);
+ putc('\0', gr_file);
+ putc('\0', gr_file);
+ } else {
+ putc(v >> 56l, gr_file);
+ putc((v >> 48l) & 0xffl, gr_file);
+ putc((v >> 40l) & 0xffl, gr_file);
+ putc((v >> 32l) & 0xffl, gr_file);
+ }
+ putc((v >> 24l) & 0xffl, gr_file);
+ putc((v >> 16l) & 0xffl, gr_file);
+ putc((v >> 8l) & 0xffl, gr_file);
+ putc(v & 0xffl, gr_file);
+}
+
+/*
+ Length-coded output: first 3 bits contain length coding
+
+ 00x 1 byte
+ 01x 2 bytes
+ 10x 4 bytes
+ 110 8 bytes
+ 111 5 or 9 bytes
+*/
+
+//@cindex grputw
+void
+grputw(v)
+rtsTime v;
+{
+ if (!BINARY_STATS)
+ barf("grputw: binary statistics not enabled\n");
+
+# if defined(GRAN)
+ if (RtsFlags.GranFlags.GranSimStats.Suppressed)
+ return;
+# endif
+
+ if (v <= 0x3fl) { /* length v = 1 byte */
+ fputc(v & 0x3f, gr_file);
+ } else if (v <= 0x3fffl) { /* length v = 2 byte */
+ fputc((v >> 8l) | 0x40l, gr_file);
+ fputc(v & 0xffl, gr_file);
+ } else if (v <= 0x3fffffffl) { /* length v = 4 byte */
+ fputc((v >> 24l) | 0x80l, gr_file);
+ fputc((v >> 16l) & 0xffl, gr_file);
+ fputc((v >> 8l) & 0xffl, gr_file);
+ fputc(v & 0xffl, gr_file);
+ } else if (sizeof(TIME) == 4) {
+ fputc(0x70, gr_file);
+ fputc((v >> 24l) & 0xffl, gr_file);
+ fputc((v >> 16l) & 0xffl, gr_file);
+ fputc((v >> 8l) & 0xffl, gr_file);
+ fputc(v & 0xffl, gr_file);
+ } else {
+ if (v <= 0x3fffffffffffffl)
+ putc((v >> 56l) | 0x60l, gr_file);
+ else {
+ putc(0x70, gr_file);
+ putc((v >> 56l) & 0xffl, gr_file);
+ }
+
+ putc((v >> 48l) & 0xffl, gr_file);
+ putc((v >> 40l) & 0xffl, gr_file);
+ putc((v >> 32l) & 0xffl, gr_file);
+ putc((v >> 24l) & 0xffl, gr_file);
+ putc((v >> 16l) & 0xffl, gr_file);
+ putc((v >> 8l) & 0xffl, gr_file);
+ putc(v & 0xffl, gr_file);
+ }
+}
+#endif /* 0 */
+
+/*
+ extracting specific info out of a closure; used in packing (GranSim, GUM)
+*/
+//@cindex get_closure_info
+StgInfoTable*
+get_closure_info(StgClosure* node, nat *size, nat *ptrs, nat *nonptrs,
+ nat *vhs, char *info_hdr_ty)
+{
+ StgInfoTable *info;
+
+ ASSERT(LOOKS_LIKE_COOL_CLOSURE(node));
+ info = get_itbl(node);
+ /* the switch shouldn't be necessary, really; just use default case */
+ switch (info->type) {
+ case RBH:
+ {
+ StgInfoTable *rip = REVERT_INFOPTR(info); // closure to revert to
+ *size = sizeW_fromITBL(rip);
+ *ptrs = (nat) (rip->layout.payload.ptrs);
+ *nonptrs = (nat) (rip->layout.payload.nptrs);
+ *vhs = *size - *ptrs - *nonptrs - sizeofW(StgHeader);
+#if 0 /* DEBUG */
+ info_hdr_type(node, info_hdr_ty);
+#else
+ strcpy(info_hdr_ty, "RBH");
+#endif
+ return rip; // NB: we return the reverted info ptr for a RBH!!!!!!
+ }
+
+#if defined(PAR)
+ /* Closures specific to GUM */
+ case FETCH_ME:
+ *size = sizeofW(StgFetchMe);
+ *ptrs = (nat)0;
+ *nonptrs = (nat)0;
+ *vhs = *size - *ptrs - *nonptrs - sizeofW(StgHeader);
+#if 0 /* DEBUG */
+ info_hdr_type(node, info_hdr_ty);
+#else
+ strcpy(info_hdr_ty, "FETCH_ME");
+#endif
+ return info;
+
+#ifdef DIST
+ case REMOTE_REF: //same as for FETCH_ME...
+ *size = sizeofW(StgFetchMe);
+ *ptrs = (nat)0;
+ *nonptrs = (nat)0;
+ *vhs = *size - *ptrs - *nonptrs - sizeofW(StgHeader);
+#if 0 /* DEBUG */
+ info_hdr_type(node, info_hdr_ty);
+#else
+ strcpy(info_hdr_ty, "REMOTE_REF");
+#endif
+ return info;
+#endif /* DIST */
+
+ case FETCH_ME_BQ:
+ *size = sizeofW(StgFetchMeBlockingQueue);
+ *ptrs = (nat)0;
+ *nonptrs = (nat)0;
+ *vhs = *size - *ptrs - *nonptrs - sizeofW(StgHeader);
+#if 0 /* DEBUG */
+ info_hdr_type(node, info_hdr_ty);
+#else
+ strcpy(info_hdr_ty, "FETCH_ME_BQ");
+#endif
+ return info;
+
+ case BLOCKED_FETCH:
+ *size = sizeofW(StgBlockedFetch);
+ *ptrs = (nat)0;
+ *nonptrs = (nat)0;
+ *vhs = *size - *ptrs - *nonptrs - sizeofW(StgHeader);
+#if 0 /* DEBUG */
+ info_hdr_type(node, info_hdr_ty);
+#else
+ strcpy(info_hdr_ty, "BLOCKED_FETCH");
+#endif
+ return info;
+#endif /* PAR */
+
+ /* these magic constants are outrageous!! why does the ITBL lie about it? */
+ case THUNK_SELECTOR:
+ *size = THUNK_SELECTOR_sizeW();
+ *ptrs = 1;
+ *nonptrs = MIN_UPD_SIZE-*ptrs; // weird
+ *vhs = *size - *ptrs - *nonptrs - sizeofW(StgHeader);
+ return info;
+
+ case ARR_WORDS:
+ /* ToDo: check whether this can be merged with the default case */
+ *size = arr_words_sizeW((StgArrWords *)node);
+ *ptrs = 0;
+ *nonptrs = ((StgArrWords *)node)->words;
+ *vhs = *size - *ptrs - *nonptrs - sizeofW(StgHeader);
+ return info;
+
+ case PAP:
+ /* ToDo: check whether this can be merged with the default case */
+ *size = pap_sizeW((StgPAP *)node);
+ *ptrs = 0;
+ *nonptrs = 0;
+ *vhs = *size - *ptrs - *nonptrs - sizeofW(StgHeader);
+ return info;
+
+ case AP_UPD:
+ /* ToDo: check whether this can be merged with the default case */
+ *size = AP_sizeW(((StgAP_UPD *)node)->n_args);
+ *ptrs = 0;
+ *nonptrs = 0;
+ *vhs = *size - *ptrs - *nonptrs - sizeofW(StgHeader);
+ return info;
+
+ default:
+ *size = sizeW_fromITBL(info);
+ *ptrs = (nat) (info->layout.payload.ptrs);
+ *nonptrs = (nat) (info->layout.payload.nptrs);
+ *vhs = *size - *ptrs - *nonptrs - sizeofW(StgHeader);
+#if 0 /* DEBUG */
+ info_hdr_type(node, info_hdr_ty);
+#else
+ strcpy(info_hdr_ty, "UNKNOWN");
+#endif
+ return info;
+ }
+}
+
+//@cindex IS_BLACK_HOLE
+rtsBool
+IS_BLACK_HOLE(StgClosure* node)
+{
+ // StgInfoTable *info;
+ ASSERT(LOOKS_LIKE_COOL_CLOSURE(node));
+ switch (get_itbl(node)->type) {
+ case BLACKHOLE:
+ case BLACKHOLE_BQ:
+ case RBH:
+ case FETCH_ME:
+ case FETCH_ME_BQ:
+ return rtsTrue;
+ default:
+ return rtsFalse;
+ }
+//return ((info->type == BLACKHOLE || info->type == RBH) ? rtsTrue : rtsFalse);
+}
+
+//@cindex IS_INDIRECTION
+StgClosure *
+IS_INDIRECTION(StgClosure* node)
+{
+ StgInfoTable *info;
+ ASSERT(LOOKS_LIKE_COOL_CLOSURE(node));
+ info = get_itbl(node);
+ switch (info->type) {
+ case IND:
+ case IND_OLDGEN:
+ case IND_PERM:
+ case IND_OLDGEN_PERM:
+ case IND_STATIC:
+ /* relies on indirectee being at same place for all these closure types */
+ return (((StgInd*)node) -> indirectee);
+#if 0
+ case EVACUATED: // counting as ind to use in GC routines, too
+ // could use the same code as above (evacuee is at same pos as indirectee)
+ return (((StgEvacuated *)node) -> evacuee);
+#endif
+ default:
+ return NULL;
+ }
+}
+
+//@cindex unwindInd
+StgClosure *
+UNWIND_IND (StgClosure *closure)
+{
+ StgClosure *next;
+
+ while ((next = IS_INDIRECTION((StgClosure *)closure)) != NULL)
+ closure = next;
+
+ ASSERT(next==(StgClosure *)NULL);
+ ASSERT(LOOKS_LIKE_COOL_CLOSURE(closure));
+ return closure;
+}
+
+#endif /* GRAN || PAR whole file */
diff --git a/rts/parallel/ParallelDebug.c b/rts/parallel/ParallelDebug.c
new file mode 100644
index 0000000000..b357af6379
--- /dev/null
+++ b/rts/parallel/ParallelDebug.c
@@ -0,0 +1,1955 @@
+/*
+ Time-stamp: <Sun Mar 18 2001 19:32:56 Stardate: [-30]6349.07 hwloidl>
+
+ Various debugging routines for GranSim and GUM
+*/
+
+#if defined(DEBUG) && (defined(GRAN) || defined(PAR)) /* whole file */
+
+//@node Debugging routines for GranSim and GUM, , ,
+//@section Debugging routines for GranSim and GUM
+
+//@menu
+//* Includes::
+//* Constants and Variables::
+//* Closures::
+//* Threads::
+//* Events::
+//* Sparks::
+//* Processors::
+//* Shortcuts::
+//* Printing info type::
+//* Printing Pack:et Contents::
+//* End of File::
+//@end menu
+//*/
+
+//@node Includes, Prototypes, Debugging routines for GranSim and GUM, Debugging routines for GranSim and GUM
+//@subsection Includes
+
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "GranSimRts.h"
+#include "ParallelRts.h"
+#include "StgMiscClosures.h"
+#include "Printer.h"
+# if defined(DEBUG)
+# include "Hash.h"
+# include "Storage.h"
+# include "ParallelDebug.h"
+# endif
+
+//@node Prototypes, Constants and Variables, Includes, Debugging routines for GranSim and GUM
+//@subsection Prototypes
+/*
+rtsBool isOffset(globalAddr *ga);
+rtsBool isFixed(globalAddr *ga);
+*/
+//@node Constants and Variables, Closures, Prototypes, Debugging routines for GranSim and GUM
+//@subsection Constants and Variables
+
+static HashTable *tmpClosureTable; // used in GraphFingerPrint and PrintGraph
+
+#if defined(PAR)
+static char finger_print_char[] = {
+ '/', /* INVALID_OBJECT 0 */
+ 'C', /* CONSTR 1 */
+ 'C', /* CONSTR_1_0 2 */
+ 'C', /* CONSTR_0_1 3 */
+ 'C', /* CONSTR_2_0 4 */
+ 'C', /* CONSTR_1_1 5 */
+ 'C', /* CONSTR_0_2 6 */
+ 'I', /* CONSTR_INTLIKE 7 */
+ 'I', /* CONSTR_CHARLIKE 8 */
+ 'S', /* CONSTR_STATIC 9 */
+ 'S', /* CONSTR_NOCAF_STATIC 10 */
+ 'F', /* FUN 11 */
+ 'F', /* FUN_1_0 12 */
+ 'F', /* FUN_0_1 13 */
+ 'F', /* FUN_2_0 14 */
+ 'F', /* FUN_1_1 15 */
+ 'F', /* FUN_0_2 16 */
+ 'S', /* FUN_STATIC 17 */
+ 'T', /* THUNK 18 */
+ 'T', /* THUNK_1_0 19 */
+ 'T', /* THUNK_0_1 20 */
+ 'T', /* THUNK_2_0 21 */
+ 'T', /* THUNK_1_1 22 */
+ 'T', /* THUNK_0_2 23 */
+ 'S', /* THUNK_STATIC 24 */
+ 'E', /* THUNK_SELECTOR 25 */
+ 'b', /* BCO 26 */
+ 'p', /* AP_UPD 27 */
+ 'p', /* PAP 28 */
+ '_', /* IND 29 */
+ '_', /* IND_OLDGEN 30 */
+ '_', /* IND_PERM 31 */
+ '_', /* IND_OLDGEN_PERM 32 */
+ '_', /* IND_STATIC 33 */
+ '?', /* ***unused*** 34 */
+ '?', /* ***unused*** 35 */
+ '^', /* RET_BCO 36 */
+ '^', /* RET_SMALL 37 */
+ '^', /* RET_VEC_SMALL 38 */
+ '^', /* RET_BIG 39 */
+ '^', /* RET_VEC_BIG 40 */
+ '^', /* RET_DYN 41 */
+ '~', /* UPDATE_FRAME 42 */
+ '~', /* CATCH_FRAME 43 */
+ '~', /* STOP_FRAME 44 */
+ '~', /* SEQ_FRAME 45 */
+ 'o', /* CAF_BLACKHOLE 46 */
+ 'o', /* BLACKHOLE 47 */
+ 'o', /* BLACKHOLE_BQ 48 */
+ 'o', /* SE_BLACKHOLE 49 */
+ 'o', /* SE_CAF_BLACKHOLE 50 */
+ 'm', /* MVAR 51 */
+ 'a', /* ARR_WORDS 52 */
+ 'a', /* MUT_ARR_PTRS 53 */
+ 'a', /* MUT_ARR_PTRS_FROZEN 54 */
+ 'q', /* MUT_VAR 55 */
+ 'w', /* WEAK 56 */
+ 'f', /* FOREIGN 57 */
+ 's', /* STABLE_NAME 58 */
+ '@', /* TSO 59 */
+ '#', /* BLOCKED_FETCH 60 */
+ '>', /* FETCH_ME 61 */
+ '>', /* FETCH_ME_BQ 62 */
+ '$', /* RBH 63 */
+ 'v', /* EVACUATED 64 */
+ '>' /* REMOTE_REF 65 */
+ /* ASSERT(there are N_CLOSURE_TYPES (==66) in this arrary) */
+};
+#endif /* PAR */
+
+#if defined(GRAN) && defined(GRAN_CHECK)
+//@node Closures, Threads, Constants and Variables, Debugging routines for GranSim and GUM
+//@subsection Closures
+
+void
+G_PRINT_NODE(node)
+StgClosure* node;
+{
+ StgInfoTable *info_ptr;
+ StgTSO* bqe;
+ nat size = 0, ptrs = 0, nonptrs = 0, i, vhs = 0;
+ char info_hdr_ty[80], info_ty[80];
+
+ if (node==NULL) {
+ fprintf(stderr,"NULL\n");
+ return;
+ } else if (node==END_TSO_QUEUE) {
+ fprintf(stderr,"END_TSO_QUEUE\n");
+ return;
+ }
+ /* size_and_ptrs(node,&size,&ptrs); */
+ info_ptr = get_closure_info(node, &size, &ptrs, &nonptrs, &vhs, info_hdr_ty);
+
+ /* vhs = var_hdr_size(node); */
+ display_info_type(info_ptr,info_ty);
+
+ fprintf(stderr,"Node: 0x%lx", node);
+
+#if defined(PAR)
+ fprintf(stderr," [GA: 0x%lx]",GA(node));
+#endif
+
+#if defined(USE_COST_CENTRES)
+ fprintf(stderr," [CC: 0x%lx]",CC_HDR(node));
+#endif
+
+#if defined(GRAN)
+ fprintf(stderr," [Bitmask: 0%lo]",PROCS(node));
+#endif
+
+ if (info_ptr->type==TSO)
+ fprintf(stderr," TSO: 0x%lx (%x) IP: 0x%lx (%s), type %s \n ",
+ (StgTSO*)node, ((StgTSO*)node)->id, info_ptr, info_hdr_ty, info_ty);
+ else
+ fprintf(stderr," IP: 0x%lx (%s), type %s \n VHS: %d, size: %ld, ptrs:%ld, nonptrs: %ld\n ",
+ info_ptr,info_hdr_ty,info_ty,vhs,size,ptrs,nonptrs);
+
+ /* For now, we ignore the variable header */
+
+ fprintf(stderr," Ptrs: ");
+ for(i=0; i < ptrs; ++i)
+ {
+ if ( (i+1) % 6 == 0)
+ fprintf(stderr,"\n ");
+ fprintf(stderr," 0x%lx[P]",node->payload[i]);
+ };
+
+ fprintf(stderr," Data: ");
+ for(i=0; i < nonptrs; ++i)
+ {
+ if( (i+1) % 6 == 0)
+ fprintf(stderr,"\n ");
+ fprintf(stderr," %lu[D]",node->payload[ptrs+i]);
+ }
+ fprintf(stderr, "\n");
+
+
+ switch (info_ptr->type)
+ {
+ case TSO:
+ fprintf(stderr,"\n TSO_LINK: %#lx",
+ ((StgTSO*)node)->link);
+ break;
+
+ case BLACKHOLE:
+ case RBH:
+ bqe = ((StgBlockingQueue*)node)->blocking_queue;
+ fprintf(stderr," BQ of %#lx: ", node);
+ G_PRINT_BQ(bqe);
+ break;
+ case FETCH_ME:
+ case FETCH_ME_BQ:
+ printf("Panic: found FETCH_ME or FETCH_ME_BQ Infotable in GrAnSim system.\n");
+ break;
+ default:
+ /* do nothing */
+ }
+}
+
+void
+G_PPN(node) /* Extracted from PrintPacket in Pack.lc */
+StgClosure* node;
+{
+ StgInfoTable *info ;
+ nat size = 0, ptrs = 0, nonptrs = 0, i, vhs = 0, locn = 0;
+ char info_type[80];
+
+ /* size_and_ptrs(node,&size,&ptrs); */
+ info = get_closure_info(node, &size, &ptrs, &nonptrs, &vhs, info_type);
+
+ if (info->type == FETCH_ME || info->type == FETCH_ME_BQ ||
+ info->type == BLACKHOLE || info->type == RBH )
+ size = ptrs = nonptrs = vhs = 0;
+
+ if (closure_THUNK(node)) {
+ if (!closure_UNPOINTED(node))
+ fputs("SHARED ", stderr);
+ else
+ fputs("UNSHARED ", stderr);
+ }
+ if (info->type==BLACKHOLE) {
+ fputs("BLACK HOLE\n", stderr);
+ } else {
+ /* Fixed header */
+ fprintf(stderr, "(%s) FH [%#lx", info_type, node[locn++]);
+ for (i = 1; i < _HS; i++)
+ fprintf(stderr, " %#lx", node[locn++]);
+
+ /* Variable header */
+ if (vhs > 0) {
+ fprintf(stderr, "] VH [%#lx", node->payload[0]);
+
+ for (i = 1; i < vhs; i++)
+ fprintf(stderr, " %#lx", node->payload[i]);
+ }
+
+ fprintf(stderr, "] PTRS %u", ptrs);
+
+ /* Non-pointers */
+ if (nonptrs > 0) {
+ fprintf(stderr, " NPTRS [%#lx", node->payload[ptrs]);
+
+ for (i = 1; i < nonptrs; i++)
+ fprintf(stderr, " %#lx", node->payload[ptrs+i]);
+
+ putc(']', stderr);
+ }
+ putc('\n', stderr);
+ }
+
+}
+
+#if 0
+// ToDo: fix this!! -- HWL
+void
+G_INFO_TABLE(node)
+StgClosure *node;
+{
+ StgInfoTable *info_ptr;
+ nat size = 0, ptrs = 0, nonptrs = 0, vhs = 0;
+ char info_type[80], hdr_type[80];
+
+ info_hdr_type(info_ptr, hdr_type);
+
+ // get_itbl(node);
+ info_ptr = get_closure_info(node, &size, &ptrs, &nonptrs, &vhs, info_type);
+ fprintf(stderr,"%s Info Ptr @0x%lx; Entry: 0x%lx; Size: %lu; Ptrs: %lu\n\n",
+ info_type,info_ptr,(W_) ENTRY_CODE(info_ptr),
+ size, ptrs);
+ // INFO_SIZE(info_ptr),INFO_NoPTRS(info_ptr));
+
+ if (closure_THUNK(node) && !closure_UNPOINTED(node) ) {
+ fprintf(stderr," RBH InfoPtr: %#lx\n",
+ RBH_INFOPTR(info_ptr));
+ }
+
+#if defined(PAR)
+ fprintf(stderr,"Enter Flush Entry: 0x%lx;\tExit Flush Entry: 0x%lx\n",INFO_FLUSHENT(info_ptr),INFO_FLUSH(info_ptr));
+#endif
+
+#if defined(USE_COST_CENTRES)
+ fprintf(stderr,"Cost Centre (?): 0x%lx\n",INFO_CAT(info_ptr));
+#endif
+
+#if defined(_INFO_COPYING)
+ fprintf(stderr,"Evacuate Entry: 0x%lx;\tScavenge Entry: 0x%lx\n",
+ INFO_EVAC_2S(info_ptr),INFO_SCAV_2S(info_ptr));
+#endif
+
+#if defined(_INFO_COMPACTING)
+ fprintf(stderr,"Scan Link: 0x%lx;\tScan Move: 0x%lx\n",
+ (W_) INFO_SCAN_LINK_1S(info_ptr), (W_) INFO_SCAN_MOVE_1S(info_ptr));
+ fprintf(stderr,"Mark: 0x%lx;\tMarked: 0x%lx;\t",
+ (W_) INFO_MARK_1S(info_ptr), (W_) INFO_MARKED_1S(info_ptr));
+#if 0 /* avoid INFO_TYPE */
+ if(BASE_INFO_TYPE(info_ptr)==INFO_SPEC_TYPE)
+ fprintf(stderr,"plus specialised code\n");
+ else
+ fprintf(stderr,"Marking: 0x%lx\n",(W_) INFO_MARKING_1S(info_ptr));
+#endif /* 0 */
+#endif /* _INFO_COMPACTING */
+}
+#endif /* 0 */
+
+//@cindex G_PRINT_BQ
+void
+G_PRINT_BQ(node)
+StgClosure* node;
+{
+ StgInfoTable *info;
+ StgTSO *tso, *last;
+ char str[80], str0[80];
+
+ fprintf(stderr,"\n[PE %d] @ %lu BQ: ",
+ CurrentProc,CurrentTime[CurrentProc]);
+ if ( node == (StgClosure*)NULL ) {
+ fprintf(stderr," NULL.\n");
+ return;
+ }
+ if ( node == END_TSO_QUEUE ) {
+ fprintf(stderr," _|_\n");
+ return;
+ }
+ tso = ((StgBlockingQueue*)node)->blocking_queue;
+ while (node != END_TSO_QUEUE) {
+ PEs proc;
+
+ /* Find where the tso lives */
+ proc = where_is(node);
+ info = get_itbl(node);
+
+ switch (info->type) {
+ case TSO:
+ strcpy(str0,"TSO");
+ break;
+ case BLOCKED_FETCH:
+ strcpy(str0,"BLOCKED_FETCH");
+ break;
+ default:
+ strcpy(str0,"???");
+ break;
+ }
+
+ if(proc == CurrentProc)
+ fprintf(stderr," %#lx (%x) L %s,",
+ node, ((StgBlockingQueue*)node)->blocking_queue, str0);
+ else
+ fprintf(stderr," %#lx (%x) G (PE %d) %s,",
+ node, ((StgBlockingQueue*)node)->blocking_queue, proc, str0);
+
+ last = tso;
+ tso = last->link;
+ }
+ if ( tso == END_TSO_QUEUE )
+ fprintf(stderr," _|_\n");
+}
+
+//@node Threads, Events, Closures, Debugging routines for GranSim and GUM
+//@subsection Threads
+
+void
+G_CURR_THREADQ(verbose)
+StgInt verbose;
+{
+ fprintf(stderr,"Thread Queue on proc %d: ", CurrentProc);
+ G_THREADQ(run_queue_hd, verbose);
+}
+
+void
+G_THREADQ(closure, verbose)
+StgTSO* closure;
+StgInt verbose;
+{
+ StgTSO* x;
+
+ fprintf(stderr,"Thread Queue: ");
+ for (x=closure; x!=END_TSO_QUEUE; x=x->link)
+ if (verbose)
+ G_TSO(x,0);
+ else
+ fprintf(stderr," %#lx",x);
+
+ if (closure==END_TSO_QUEUE)
+ fprintf(stderr,"NIL\n");
+ else
+ fprintf(stderr,"\n");
+}
+
+void
+G_TSO(closure,verbose)
+StgTSO* closure;
+StgInt verbose;
+{
+
+ if (closure==END_TSO_QUEUE) {
+ fprintf(stderr,"TSO at %#lx is END_TSO_QUEUE!\n");
+ return;
+ }
+
+ if ( verbose & 0x08 ) { /* short info */
+ fprintf(stderr,"[TSO @ %#lx, PE %d]: Id: %#lx, Link: %#lx\n",
+ closure,where_is(closure),
+ closure->id,closure->link);
+ return;
+ }
+
+ fprintf(stderr,"TSO at %#lx has the following contents:\n",
+ closure);
+
+ fprintf(stderr,"> Id: \t%#lx",closure->id);
+ // fprintf(stderr,"\tstate: \t%#lx",closure->state);
+ fprintf(stderr,"\twhat_next: \t%#lx",closure->what_next);
+ fprintf(stderr,"\tlink: \t%#lx\n",closure->link);
+ fprintf(stderr,"\twhy_blocked: \t%d", closure->why_blocked);
+ fprintf(stderr,"\tblock_info: \t%p\n", closure->block_info);
+ // fprintf(stderr,"\tType: \t%s\n",type_name[TSO_TYPE(closure)]);
+ fprintf(stderr,">PRI: \t%#lx", closure->gran.pri);
+ fprintf(stderr,"\tMAGIC: \t%#lx %s\n", closure->gran.magic,
+ (closure->gran.magic==TSO_MAGIC ? "it IS a TSO" : "THIS IS NO TSO!!"));
+ if ( verbose & 0x04 ) {
+ fprintf(stderr, "Stack: stack @ %#lx (stack_size: %u; max_stack_size: %u)\n",
+ closure->stack, closure->stack_size, closure->max_stack_size);
+ fprintf(stderr, " sp: %#lx, su: %#lx, splim: %#lx\n",
+ closure->sp, closure->su, closure->splim);
+ }
+ // fprintf(stderr,"\n");
+ if (verbose & 0x01) {
+ // fprintf(stderr,"} LOCKED: \t%#lx",closure->locked);
+ fprintf(stderr,"} SPARKNAME: \t%#lx\n", closure->gran.sparkname);
+ fprintf(stderr,"} STARTEDAT: \t%#lx", closure->gran.startedat);
+ fprintf(stderr,"\tEXPORTED: \t%#lx\n", closure->gran.exported);
+ fprintf(stderr,"} BASICBLOCKS: \t%#lx", closure->gran.basicblocks);
+ fprintf(stderr,"\tALLOCS: \t%#lx\n", closure->gran.allocs);
+ fprintf(stderr,"} EXECTIME: \t%#lx", closure->gran.exectime);
+ fprintf(stderr,"\tFETCHTIME: \t%#lx\n", closure->gran.fetchtime);
+ fprintf(stderr,"} FETCHCOUNT: \t%#lx", closure->gran.fetchcount);
+ fprintf(stderr,"\tBLOCKTIME: \t%#lx\n", closure->gran.blocktime);
+ fprintf(stderr,"} BLOCKCOUNT: \t%#lx", closure->gran.blockcount);
+ fprintf(stderr,"\tBLOCKEDAT: \t%#lx\n", closure->gran.blockedat);
+ fprintf(stderr,"} GLOBALSPARKS:\t%#lx", closure->gran.globalsparks);
+ fprintf(stderr,"\tLOCALSPARKS:\t%#lx\n", closure->gran.localsparks);
+ }
+ if ( verbose & 0x02 ) {
+ fprintf(stderr,"BQ that starts with this TSO: ");
+ G_PRINT_BQ(closure);
+ }
+}
+
+//@node Events, Sparks, Threads, Debugging routines for GranSim and GUM
+//@subsection Events
+
+void
+G_EVENT(event, verbose)
+rtsEventQ event;
+StgInt verbose;
+{
+ if (verbose) {
+ print_event(event);
+ }else{
+ fprintf(stderr," %#lx",event);
+ }
+}
+
+void
+G_EVENTQ(verbose)
+StgInt verbose;
+{
+ extern rtsEventQ EventHd;
+ rtsEventQ x;
+
+ fprintf(stderr,"RtsEventQ (hd @%#lx):\n",EventHd);
+ for (x=EventHd; x!=NULL; x=x->next) {
+ G_EVENT(x,verbose);
+ }
+ if (EventHd==NULL)
+ fprintf(stderr,"NIL\n");
+ else
+ fprintf(stderr,"\n");
+}
+
+void
+G_PE_EQ(pe,verbose)
+PEs pe;
+StgInt verbose;
+{
+ extern rtsEventQ EventHd;
+ rtsEventQ x;
+
+ fprintf(stderr,"RtsEventQ (hd @%#lx):\n",EventHd);
+ for (x=EventHd; x!=NULL; x=x->next) {
+ if (x->proc==pe)
+ G_EVENT(x,verbose);
+ }
+ if (EventHd==NULL)
+ fprintf(stderr,"NIL\n");
+ else
+ fprintf(stderr,"\n");
+}
+
+//@node Sparks, Processors, Events, Debugging routines for GranSim and GUM
+//@subsection Sparks
+
+void
+G_SPARK(spark, verbose)
+rtsSparkQ spark;
+StgInt verbose;
+{
+ if (spark==(rtsSpark*)NULL) {
+ belch("G_SPARK: NULL spark; aborting");
+ return;
+ }
+ if (verbose)
+ print_spark(spark);
+ else
+ fprintf(stderr," %#lx",spark);
+}
+
+void
+G_SPARKQ(spark,verbose)
+rtsSparkQ spark;
+StgInt verbose;
+{
+ rtsSparkQ x;
+
+ if (spark==(rtsSpark*)NULL) {
+ belch("G_SPARKQ: NULL spark; aborting");
+ return;
+ }
+
+ fprintf(stderr,"RtsSparkQ (hd @%#lx):\n",spark);
+ for (x=spark; x!=NULL; x=x->next) {
+ G_SPARK(x,verbose);
+ }
+ if (spark==NULL)
+ fprintf(stderr,"NIL\n");
+ else
+ fprintf(stderr,"\n");
+}
+
+void
+G_CURR_SPARKQ(verbose)
+StgInt verbose;
+{
+ G_SPARKQ(pending_sparks_hd,verbose);
+}
+
+//@node Processors, Shortcuts, Sparks, Debugging routines for GranSim and GUM
+//@subsection Processors
+
+void
+G_PROC(proc,verbose)
+StgInt proc;
+StgInt verbose;
+{
+ extern rtsEventQ EventHd;
+ extern char *proc_status_names[];
+
+ fprintf(stderr,"Status of proc %d at time %d (%#lx): %s (%s)\n",
+ proc,CurrentTime[proc],CurrentTime[proc],
+ (CurrentProc==proc)?"ACTIVE":"INACTIVE",
+ proc_status_names[procStatus[proc]]);
+ G_THREADQ(run_queue_hds[proc],verbose & 0x2);
+ if ( (CurrentProc==proc) )
+ G_TSO(CurrentTSO,1);
+
+ if (EventHd!=NULL)
+ fprintf(stderr,"Next event (%s) is on proc %d\n",
+ event_names[EventHd->evttype],EventHd->proc);
+
+ if (verbose & 0x1) {
+ fprintf(stderr,"\nREQUIRED sparks: ");
+ G_SPARKQ(pending_sparks_hds[proc],1);
+ fprintf(stderr,"\nADVISORY_sparks: ");
+ G_SPARKQ(pending_sparks_hds[proc],1);
+ }
+}
+
+//@node Shortcuts, Printing info type, Processors, Debugging routines for GranSim and GUM
+//@subsection Shortcuts
+
+/* Debug Processor */
+void
+GP(proc)
+StgInt proc;
+{ G_PROC(proc,1);
+}
+
+/* Debug Current Processor */
+void
+GCP(){ G_PROC(CurrentProc,2); }
+
+/* Debug TSO */
+void
+GT(StgPtr tso){
+ G_TSO(tso,1);
+}
+
+/* Debug CurrentTSO */
+void
+GCT(){
+ fprintf(stderr,"Current Proc: %d\n",CurrentProc);
+ G_TSO(CurrentTSO,1);
+}
+
+/* Shorthand for debugging event queue */
+void
+GEQ() { G_EVENTQ(1); }
+
+/* Shorthand for debugging thread queue of a processor */
+void
+GTQ(PEs p) { G_THREADQ(run_queue_hds[p],1); }
+
+/* Shorthand for debugging thread queue of current processor */
+void
+GCTQ() { G_THREADQ(run_queue_hds[CurrentProc],1); }
+
+/* Shorthand for debugging spark queue of a processor */
+void
+GSQ(PEs p) { G_SPARKQ(pending_sparks_hds[p],1); }
+
+/* Shorthand for debugging spark queue of current processor */
+void
+GCSQ() { G_CURR_SPARKQ(1); }
+
+/* Shorthand for printing a node */
+void
+GN(StgPtr node) { G_PRINT_NODE(node); }
+
+/* Shorthand for printing info table */
+#if 0
+// ToDo: fix -- HWL
+void
+GIT(StgPtr node) { G_INFO_TABLE(node); }
+#endif
+
+void
+printThreadQPtrs(void)
+{
+ PEs p;
+ for (p=0; p<RtsFlags.GranFlags.proc; p++) {
+ fprintf(stderr,", PE %d: (hd=%p,tl=%p)",
+ run_queue_hds[p], run_queue_tls[p]);
+ }
+}
+
+void
+printThreadQ(StgTSO *tso) { G_THREADQ(tso, 0); };
+
+void
+printSparkQ(rtsSpark *spark) { G_SPARKQ(spark, 0); };
+
+void
+printThreadQ_verbose(StgTSO *tso) { G_THREADQ(tso, 1); };
+
+void
+printSparkQ_verbose(rtsSpark *spark) { G_SPARKQ(spark, 1); };
+
+/* Shorthand for some of ADRs debugging functions */
+
+#endif /* GRAN && GRAN_CHECK*/
+
+#if 0
+void
+DEBUG_PRINT_NODE(node)
+StgPtr node;
+{
+ W_ info_ptr = INFO_PTR(node);
+ StgInt size = 0, ptrs = 0, i, vhs = 0;
+ char info_type[80];
+
+ info_hdr_type(info_ptr, info_type);
+
+ size_and_ptrs(node,&size,&ptrs);
+ vhs = var_hdr_size(node);
+
+ fprintf(stderr,"Node: 0x%lx", (W_) node);
+
+#if defined(PAR)
+ fprintf(stderr," [GA: 0x%lx]",GA(node));
+#endif
+
+#if defined(PROFILING)
+ fprintf(stderr," [CC: 0x%lx]",CC_HDR(node));
+#endif
+
+#if defined(GRAN)
+ fprintf(stderr," [Bitmask: 0%lo]",PROCS(node));
+#endif
+
+ fprintf(stderr," IP: 0x%lx (%s), size %ld, %ld ptrs\n",
+ info_ptr,info_type,size,ptrs);
+
+ /* For now, we ignore the variable header */
+
+ for(i=0; i < size; ++i)
+ {
+ if(i == 0)
+ fprintf(stderr,"Data: ");
+
+ else if(i % 6 == 0)
+ fprintf(stderr,"\n ");
+
+ if(i < ptrs)
+ fprintf(stderr," 0x%lx[P]",*(node+_HS+vhs+i));
+ else
+ fprintf(stderr," %lu[D]",*(node+_HS+vhs+i));
+ }
+ fprintf(stderr, "\n");
+}
+
+
+#define INFO_MASK 0x80000000
+
+void
+DEBUG_TREE(node)
+StgPtr node;
+{
+ W_ size = 0, ptrs = 0, i, vhs = 0;
+
+ /* Don't print cycles */
+ if((INFO_PTR(node) & INFO_MASK) != 0)
+ return;
+
+ size_and_ptrs(node,&size,&ptrs);
+ vhs = var_hdr_size(node);
+
+ DEBUG_PRINT_NODE(node);
+ fprintf(stderr, "\n");
+
+ /* Mark the node -- may be dangerous */
+ INFO_PTR(node) |= INFO_MASK;
+
+ for(i = 0; i < ptrs; ++i)
+ DEBUG_TREE((StgPtr)node[i+vhs+_HS]);
+
+ /* Unmark the node */
+ INFO_PTR(node) &= ~INFO_MASK;
+}
+
+
+void
+DEBUG_INFO_TABLE(node)
+StgPtr node;
+{
+ W_ info_ptr = INFO_PTR(node);
+ char *iStgPtrtype = info_hdr_type(info_ptr);
+
+ fprintf(stderr,"%s Info Ptr @0x%lx; Entry: 0x%lx; Size: %lu; Ptrs: %lu\n\n",
+ iStgPtrtype,info_ptr,(W_) ENTRY_CODE(info_ptr),INFO_SIZE(info_ptr),INFO_NoPTRS(info_ptr));
+#if defined(PAR)
+ fprintf(stderr,"Enter Flush Entry: 0x%lx;\tExit Flush Entry: 0x%lx\n",INFO_FLUSHENT(info_ptr),INFO_FLUSH(info_ptr));
+#endif
+
+#if defined(PROFILING)
+ fprintf(stderr,"Cost Centre (?): 0x%lx\n",INFO_CAT(info_ptr));
+#endif
+
+#if defined(_INFO_COPYING)
+ fprintf(stderr,"Evacuate Entry: 0x%lx;\tScavenge Entry: 0x%lx\n",
+ INFO_EVAC_2S(info_ptr),INFO_SCAV_2S(info_ptr));
+#endif
+
+#if defined(_INFO_COMPACTING)
+ fprintf(stderr,"Scan Link: 0x%lx;\tScan Move: 0x%lx\n",
+ (W_) INFO_SCAN_LINK_1S(info_ptr), (W_) INFO_SCAN_MOVE_1S(info_ptr));
+ fprintf(stderr,"Mark: 0x%lx;\tMarked: 0x%lx;\t",
+ (W_) INFO_MARK_1S(info_ptr), (W_) INFO_MARKED_1S(info_ptr));
+#if 0 /* avoid INFO_TYPE */
+ if(BASE_INFO_TYPE(info_ptr)==INFO_SPEC_TYPE)
+ fprintf(stderr,"plus specialised code\n");
+ else
+ fprintf(stderr,"Marking: 0x%lx\n",(W_) INFO_MARKING_1S(info_ptr));
+#endif /* 0 */
+#endif /* _INFO_COMPACTING */
+}
+#endif /* 0 */
+
+//@node Printing info type, Printing Packet Contents, Shortcuts, Debugging routines for GranSim and GUM
+//@subsection Printing info type
+
+char *
+display_info_type(closure, str)
+StgClosure *closure;
+char *str;
+{
+ strcpy(str,"");
+ if ( closure_HNF(closure) )
+ strcat(str,"|_HNF ");
+ else if ( closure_BITMAP(closure) )
+ strcat(str,"|_BTM");
+ else if ( !closure_SHOULD_SPARK(closure) )
+ strcat(str,"|_NS");
+ else if ( closure_STATIC(closure) )
+ strcat(str,"|_STA");
+ else if ( closure_THUNK(closure) )
+ strcat(str,"|_THU");
+ else if ( closure_MUTABLE(closure) )
+ strcat(str,"|_MUT");
+ else if ( closure_UNPOINTED(closure) )
+ strcat(str,"|_UPT");
+ else if ( closure_SRT(closure) )
+ strcat(str,"|_SRT");
+
+ return(str);
+}
+
+/*
+ PrintPacket is in Pack.c because it makes use of closure queues
+*/
+
+#if defined(GRAN) || defined(PAR)
+
+/*
+ Print graph rooted at q. The structure of this recursive printing routine
+ should be the same as in the graph traversals when packing a graph in
+ GUM. Thus, it demonstrates the structure of such a generic graph
+ traversal, and in particular, how to extract pointer and non-pointer info
+ from the multitude of different heap objects available.
+
+ {evacuate}Daq ngoqvam nIHlu'pu'!!
+*/
+
+void
+PrintGraph(StgClosure *p, int indent_level)
+{
+ void PrintGraph_(StgClosure *p, int indent_level);
+
+ ASSERT(tmpClosureTable==NULL);
+
+ /* init hash table */
+ tmpClosureTable = allocHashTable();
+
+ /* now do the real work */
+ PrintGraph_(p, indent_level);
+
+ /* nuke hash table */
+ freeHashTable(tmpClosureTable, NULL);
+ tmpClosureTable = NULL;
+}
+
+/*
+ This is the actual worker functions.
+ All recursive calls should be made to this function.
+*/
+void
+PrintGraph_(StgClosure *p, int indent_level)
+{
+ StgPtr x, q;
+ rtsBool printed = rtsFalse;
+ nat i, j;
+ const StgInfoTable *info;
+
+ /* check whether we have met this node already to break cycles */
+ if (lookupHashTable(tmpClosureTable, (StgWord)p)) { // ie. already touched
+ /* indentation */
+ for (j=0; j<indent_level; j++)
+ fputs(" ", stderr);
+
+ fprintf(stderr, "#### cylce to %p", p);
+ return;
+ }
+
+ /* record that we are processing this closure */
+ insertHashTable(tmpClosureTable, (StgWord) p, (void *)rtsTrue/*non-NULL*/);
+
+ q = p; /* save ptr to object */
+
+ /* indentation */
+ for (j=0; j<indent_level; j++)
+ fputs(" ", stderr);
+
+ ASSERT(p!=(StgClosure*)NULL);
+ ASSERT(LOOKS_LIKE_STATIC(p) ||
+ LOOKS_LIKE_GHC_INFO(GET_INFO((StgClosure *)p)) ||
+ IS_HUGS_CONSTR_INFO(GET_INFO((StgClosure *)p)));
+
+ printClosure(p); // prints contents of this one closure
+
+ /* indentation */
+ for (j=0; j<indent_level; j++)
+ fputs(" ", stderr);
+
+ info = get_itbl((StgClosure *)p);
+ /* the rest of this fct recursively traverses the graph */
+ switch (info -> type) {
+
+ case BCO:
+ {
+ StgBCO* bco = stgCast(StgBCO*,p);
+ nat i;
+ fprintf(stderr, "BCO (%p)\n", p);
+ /*
+ for (i = 0; i < bco->n_ptrs; i++) {
+ // bcoConstCPtr(bco,i) =
+ PrintGraph_(bcoConstCPtr(bco,i), indent_level+1);
+ }
+ */
+ // p += bco_sizeW(bco);
+ break;
+ }
+
+ case MVAR:
+ /* treat MVars specially, because we don't want to PrintGraph the
+ * mut_link field in the middle of the closure.
+ */
+ {
+ StgMVar *mvar = ((StgMVar *)p);
+ // evac_gen = 0;
+ fprintf(stderr, "MVAR (%p) with 3 pointers (head, tail, value)\n", p);
+ // (StgClosure *)mvar->head =
+ PrintGraph_((StgClosure *)mvar->head, indent_level+1);
+ // (StgClosure *)mvar->tail =
+ PrintGraph_((StgClosure *)mvar->tail, indent_level+1);
+ //(StgClosure *)mvar->value =
+ PrintGraph_((StgClosure *)mvar->value, indent_level+1);
+ // p += sizeofW(StgMVar);
+ // evac_gen = saved_evac_gen;
+ break;
+ }
+
+ case THUNK_2_0:
+ if (!printed) {
+ fprintf(stderr, "THUNK_2_0 (%p) with 2 pointers\n", p);
+ printed = rtsTrue;
+ }
+ case FUN_2_0:
+ if (!printed) {
+ fprintf(stderr, "FUN_2_0 (%p) with 2 pointers\n", p);
+ printed = rtsTrue;
+ }
+ // scavenge_srt(info);
+ case CONSTR_2_0:
+ if (!printed) {
+ fprintf(stderr, "CONSTR_2_0 (%p) with 2 pointers\n", p);
+ printed = rtsTrue;
+ }
+ // ((StgClosure *)p)->payload[0] =
+ PrintGraph_(((StgClosure *)p)->payload[0],
+ indent_level+1);
+ // ((StgClosure *)p)->payload[1] =
+ PrintGraph_(((StgClosure *)p)->payload[1],
+ indent_level+1);
+ // p += sizeofW(StgHeader) + 2;
+ break;
+
+ case THUNK_1_0:
+ // scavenge_srt(info);
+ fprintf(stderr, "THUNK_1_0 (%p) with 1 pointer\n", p);
+ // ((StgClosure *)p)->payload[0] =
+ PrintGraph_(((StgClosure *)p)->payload[0],
+ indent_level+1);
+ // p += sizeofW(StgHeader) + 2; /* MIN_UPD_SIZE */
+ break;
+
+ case FUN_1_0:
+ if (!printed) {
+ fprintf(stderr, "FUN_1_0 (%p) with 1 pointer\n", p);
+ printed = rtsTrue;
+ }
+ // scavenge_srt(info);
+ case CONSTR_1_0:
+ if (!printed) {
+ fprintf(stderr, "CONSTR_2_0 (%p) with 2 pointers\n", p);
+ printed = rtsTrue;
+ }
+ // ((StgClosure *)p)->payload[0] =
+ PrintGraph_(((StgClosure *)p)->payload[0],
+ indent_level+1);
+ // p += sizeofW(StgHeader) + 1;
+ break;
+
+ case THUNK_0_1:
+ fprintf(stderr, "THUNK_0_1 (%p) with 0 pointers\n", p);
+ // scavenge_srt(info);
+ // p += sizeofW(StgHeader) + 2; /* MIN_UPD_SIZE */
+ break;
+
+ case FUN_0_1:
+ fprintf(stderr, "FUN_0_1 (%p) with 0 pointers\n", p);
+ //scavenge_srt(info);
+ case CONSTR_0_1:
+ fprintf(stderr, "CONSTR_0_1 (%p) with 0 pointers\n", p);
+ //p += sizeofW(StgHeader) + 1;
+ break;
+
+ case THUNK_0_2:
+ if (!printed) {
+ fprintf(stderr, "THUNK_0_2 (%p) with 0 pointers\n", p);
+ printed = rtsTrue;
+ }
+ case FUN_0_2:
+ if (!printed) {
+ fprintf(stderr, "FUN_0_2 (%p) with 0 pointers\n", p);
+ printed = rtsTrue;
+ }
+ // scavenge_srt(info);
+ case CONSTR_0_2:
+ if (!printed) {
+ fprintf(stderr, "CONSTR_0_2 (%p) with 0 pointers\n", p);
+ printed = rtsTrue;
+ }
+ // p += sizeofW(StgHeader) + 2;
+ break;
+
+ case THUNK_1_1:
+ if (!printed) {
+ fprintf(stderr, "THUNK_1_1 (%p) with 1 pointer\n", p);
+ printed = rtsTrue;
+ }
+ case FUN_1_1:
+ if (!printed) {
+ fprintf(stderr, "FUN_1_1 (%p) with 1 pointer\n", p);
+ printed = rtsTrue;
+ }
+ // scavenge_srt(info);
+ case CONSTR_1_1:
+ if (!printed) {
+ fprintf(stderr, "CONSTR_1_1 (%p) with 1 pointer\n", p);
+ printed = rtsTrue;
+ }
+ // ((StgClosure *)p)->payload[0] =
+ PrintGraph_(((StgClosure *)p)->payload[0],
+ indent_level+1);
+ // p += sizeofW(StgHeader) + 2;
+ break;
+
+ case FUN:
+ if (!printed) {
+ fprintf(stderr, "FUN (%p) with %d pointers\n", p, info->layout.payload.ptrs);
+ printed = rtsTrue;
+ }
+ /* fall through */
+
+ case THUNK:
+ if (!printed) {
+ fprintf(stderr, "THUNK (%p) with %d pointers\n", p, info->layout.payload.ptrs);
+ printed = rtsTrue;
+ }
+ // scavenge_srt(info);
+ /* fall through */
+
+ case CONSTR:
+ if (!printed) {
+ fprintf(stderr, "CONSTR (%p) with %d pointers\n", p, info->layout.payload.ptrs);
+ printed = rtsTrue;
+ }
+ /* basically same as loop in STABLE_NAME case */
+ for (i=0; i<info->layout.payload.ptrs; i++)
+ PrintGraph_(((StgClosure *)p)->payload[i],
+ indent_level+1);
+ break;
+ /* NOT fall through */
+
+ case WEAK:
+ if (!printed) {
+ fprintf(stderr, "WEAK (%p) with %d pointers\n", p, info->layout.payload.ptrs);
+ printed = rtsTrue;
+ }
+ /* fall through */
+
+ case FOREIGN:
+ if (!printed) {
+ fprintf(stderr, "FOREIGN (%p) with %d pointers\n", p, info->layout.payload.ptrs);
+ printed = rtsTrue;
+ }
+ /* fall through */
+
+ case STABLE_NAME:
+ {
+ StgPtr end;
+
+ if (!printed) {
+ fprintf(stderr, "STABLE_NAME (%p) with %d pointers (not followed!)\n",
+ p, info->layout.payload.ptrs);
+ printed = rtsTrue;
+ }
+ end = (StgPtr)((StgClosure *)p)->payload + info->layout.payload.ptrs;
+ for (p = (StgPtr)((StgClosure *)p)->payload; p < end; p++) {
+ // (StgClosure *)*p =
+ //PrintGraph_((StgClosure *)*p, indent_level+1);
+ fprintf(stderr, ", %p", *p);
+ }
+ //fputs("\n", stderr);
+ // p += info->layout.payload.nptrs;
+ break;
+ }
+
+ case IND_PERM:
+ //if (step->gen->no != 0) {
+ // SET_INFO(((StgClosure *)p), &IND_OLDGEN_PERM_info);
+ //}
+ if (!printed) {
+ fprintf(stderr, "IND_PERM (%p) with indirection to\n",
+ p, ((StgIndOldGen *)p)->indirectee);
+ printed = rtsTrue;
+ }
+ /* fall through */
+
+ case IND_OLDGEN_PERM:
+ if (!printed) {
+ fprintf(stderr, "IND_OLDGEN_PERM (%p) with indirection to %p\n",
+ p, ((StgIndOldGen *)p)->indirectee);
+ printed = rtsTrue;
+ }
+ // ((StgIndOldGen *)p)->indirectee =
+ PrintGraph_(((StgIndOldGen *)p)->indirectee,
+ indent_level+1);
+ //if (failed_to_evac) {
+ // failed_to_evac = rtsFalse;
+ // recordOldToNewPtrs((StgMutClosure *)p);
+ //}
+ // p += sizeofW(StgIndOldGen);
+ break;
+
+ case MUT_VAR:
+ /* ignore MUT_CONSs */
+ fprintf(stderr, "MUT_VAR (%p) pointing to %p\n", p, ((StgMutVar *)p)->var);
+ if (((StgMutVar *)p)->header.info != &stg_MUT_CONS_info) {
+ //evac_gen = 0;
+ PrintGraph_(((StgMutVar *)p)->var, indent_level+1);
+ //evac_gen = saved_evac_gen;
+ }
+ //p += sizeofW(StgMutVar);
+ break;
+
+ case CAF_BLACKHOLE:
+ if (!printed) {
+ fprintf(stderr, "CAF_BLACKHOLE (%p) with 0 pointers\n", p);
+ printed = rtsTrue;
+ }
+ case SE_CAF_BLACKHOLE:
+ if (!printed) {
+ fprintf(stderr, "SE_CAF_BLACKHOLE (%p) with 0 pointers\n", p);
+ printed = rtsTrue;
+ }
+ case SE_BLACKHOLE:
+ if (!printed) {
+ fprintf(stderr, "SE_BLACKHOLE (%p) with 0 pointers\n", p);
+ printed = rtsTrue;
+ }
+ case BLACKHOLE:
+ if (!printed) {
+ fprintf(stderr, "BLACKHOLE (%p) with 0 pointers\n", p);
+ printed = rtsTrue;
+ }
+ //p += BLACKHOLE_sizeW();
+ break;
+
+ case BLACKHOLE_BQ:
+ {
+ StgBlockingQueue *bh = (StgBlockingQueue *)p;
+ // (StgClosure *)bh->blocking_queue =
+ fprintf(stderr, "BLACKHOLE_BQ (%p) pointing to %p\n",
+ p, (StgClosure *)bh->blocking_queue);
+ PrintGraph_((StgClosure *)bh->blocking_queue, indent_level+1);
+ //if (failed_to_evac) {
+ // failed_to_evac = rtsFalse;
+ // recordMutable((StgMutClosure *)bh);
+ //}
+ // p += BLACKHOLE_sizeW();
+ break;
+ }
+
+ case THUNK_SELECTOR:
+ {
+ StgSelector *s = (StgSelector *)p;
+ fprintf(stderr, "THUNK_SELECTOR (%p) pointing to %p\n",
+ p, s->selectee);
+ PrintGraph_(s->selectee, indent_level+1);
+ // p += THUNK_SELECTOR_sizeW();
+ break;
+ }
+
+ case IND:
+ fprintf(stderr, "IND (%p) pointing to %p\n", p, ((StgInd*)p)->indirectee);
+ PrintGraph_(((StgInd*)p)->indirectee, indent_level+1);
+ break;
+
+ case IND_OLDGEN:
+ fprintf(stderr, "IND_OLDGEN (%p) pointing to %p\n",
+ p, ((StgIndOldGen*)p)->indirectee);
+ PrintGraph_(((StgIndOldGen*)p)->indirectee, indent_level+1);
+ break;
+
+ case CONSTR_INTLIKE:
+ fprintf(stderr, "CONSTR_INTLIKE (%p) with 0 pointers\n", p);
+ break;
+ case CONSTR_CHARLIKE:
+ fprintf(stderr, "CONSTR_CHARLIKE (%p) with 0 pointers\n", p);
+ break;
+ case CONSTR_STATIC:
+ fprintf(stderr, "CONSTR_STATIC (%p) with 0 pointers\n", p);
+ break;
+ case CONSTR_NOCAF_STATIC:
+ fprintf(stderr, "CONSTR_NOCAF_STATIC (%p) with 0 pointers\n", p);
+ break;
+ case THUNK_STATIC:
+ fprintf(stderr, "THUNK_STATIC (%p) with 0 pointers\n", p);
+ break;
+ case FUN_STATIC:
+ fprintf(stderr, "FUN_STATIC (%p) with 0 pointers\n", p);
+ break;
+ case IND_STATIC:
+ fprintf(stderr, "IND_STATIC (%p) with 0 pointers\n", p);
+ break;
+
+ case RET_BCO:
+ fprintf(stderr, "RET_BCO (%p) with 0 pointers\n", p);
+ break;
+ case RET_SMALL:
+ fprintf(stderr, "RET_SMALL (%p) with 0 pointers\n", p);
+ break;
+ case RET_VEC_SMALL:
+ fprintf(stderr, "RET_VEC_SMALL (%p) with 0 pointers\n", p);
+ break;
+ case RET_BIG:
+ fprintf(stderr, "RET_BIG (%p) with 0 pointers\n", p);
+ break;
+ case RET_VEC_BIG:
+ fprintf(stderr, "RET_VEC_BIG (%p) with 0 pointers\n", p);
+ break;
+ case RET_DYN:
+ fprintf(stderr, "RET_DYN (%p) with 0 pointers\n", p);
+ break;
+ case UPDATE_FRAME:
+ fprintf(stderr, "UPDATE_FRAME (%p) with 0 pointers\n", p);
+ break;
+ case STOP_FRAME:
+ fprintf(stderr, "STOP_FRAME (%p) with 0 pointers\n", p);
+ break;
+ case CATCH_FRAME:
+ fprintf(stderr, "CATCH_FRAME (%p) with 0 pointers\n", p);
+ break;
+ case SEQ_FRAME:
+ fprintf(stderr, "SEQ_FRAME (%p) with 0 pointers\n", p);
+ break;
+
+ case AP_UPD: /* same as PAPs */
+ fprintf(stderr, "AP_UPD (%p) with 0 pointers\n", p);
+ case PAP:
+ /* Treat a PAP just like a section of stack, not forgetting to
+ * PrintGraph_ the function pointer too...
+ */
+ {
+ StgPAP* pap = stgCast(StgPAP*,p);
+
+ fprintf(stderr, "PAP (%p) pointing to %p\n", p, pap->fun);
+ // pap->fun =
+ //PrintGraph_(pap->fun, indent_level+1);
+ //scavenge_stack((P_)pap->payload, (P_)pap->payload + pap->n_args);
+ //p += pap_sizeW(pap);
+ break;
+ }
+
+ case ARR_WORDS:
+ /* an array of (non-mutable) words */
+ fprintf(stderr, "ARR_WORDS (%p) of %d non-ptrs (maybe a string?)\n",
+ p, ((StgArrWords *)q)->words);
+ break;
+
+ case MUT_ARR_PTRS:
+ /* follow everything */
+ {
+ StgPtr next;
+
+ fprintf(stderr, "MUT_ARR_PTRS (%p) with %d pointers (not followed)\n",
+ p, mut_arr_ptrs_sizeW((StgMutArrPtrs*)p));
+ // evac_gen = 0; /* repeatedly mutable */
+ next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
+ for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
+ // (StgClosure *)*p =
+ // PrintGraph_((StgClosure *)*p, indent_level+1);
+ fprintf(stderr, ", %p", *p);
+ }
+ fputs("\n", stderr);
+ //evac_gen = saved_evac_gen;
+ break;
+ }
+
+ case MUT_ARR_PTRS_FROZEN:
+ /* follow everything */
+ {
+ StgPtr start = p, next;
+
+ fprintf(stderr, "MUT_ARR_PTRS (%p) with %d pointers (not followed)",
+ p, mut_arr_ptrs_sizeW((StgMutArrPtrs*)p));
+ next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
+ for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
+ // (StgClosure *)*p =
+ // PrintGraph_((StgClosure *)*p, indent_level+1);
+ fprintf(stderr, ", %p", *p);
+ }
+ fputs("\n", stderr);
+ //if (failed_to_evac) {
+ /* we can do this easier... */
+ // recordMutable((StgMutClosure *)start);
+ // failed_to_evac = rtsFalse;
+ //}
+ break;
+ }
+
+ case TSO:
+ {
+ StgTSO *tso;
+
+ tso = (StgTSO *)p;
+ fprintf(stderr, "TSO (%p) with link field %p\n", p, (StgClosure *)tso->link);
+ // evac_gen = 0;
+ /* chase the link field for any TSOs on the same queue */
+ // (StgClosure *)tso->link =
+ PrintGraph_((StgClosure *)tso->link, indent_level+1);
+ //if (tso->blocked_on) {
+ // tso->blocked_on = PrintGraph_(tso->blocked_on);
+ //}
+ /* scavenge this thread's stack */
+ //scavenge_stack(tso->sp, &(tso->stack[tso->stack_size]));
+ //evac_gen = saved_evac_gen;
+ //p += tso_sizeW(tso);
+ break;
+ }
+
+#if defined(GRAN) || defined(PAR)
+ case RBH:
+ {
+ StgInfoTable *rip = REVERT_INFOPTR(get_itbl(p));
+ //if (LOOKS_LIKE_GHC_INFO(rip))
+ // fprintf(stderr, "RBH (%p) with 0 pointers (reverted type=%s)\n",
+ // p, info_type_by_ip(rip));
+ //else
+ fprintf(stderr, "RBH (%p) with 0 pointers (reverted IP=%x)\n",
+ p, rip);
+ }
+ break;
+#endif
+#if defined(PAR)
+ case BLOCKED_FETCH:
+ fprintf(stderr, "BLOCKED_FETCH (%p) with 0 pointers (link=%p)\n",
+ p, ((StgBlockedFetch *)p)->link);
+ break;
+ case FETCH_ME:
+ fprintf(stderr, "FETCH_ME (%p) with 0 pointers\n", p);
+ break;
+ case FETCH_ME_BQ:
+ fprintf(stderr, "FETCH_ME_BQ (%p) with 0 pointers (blocking_queue=%p)\n",
+ p, ((StgFetchMeBlockingQueue *)p)->blocking_queue);
+ break;
+#endif
+
+#ifdef DIST
+ case REMOTE_REF:
+ fprintf(stderr, "REMOTE_REF (%p) with 0 pointers\n", p);
+ break;
+#endif
+
+ case EVACUATED:
+ fprintf(stderr, "EVACUATED (%p) with 0 pointers (evacuee=%p)\n",
+ p, ((StgEvacuated *)p)->evacuee);
+ break;
+
+ default:
+ barf("PrintGraph_: unknown closure %d (%s)",
+ info -> type, info_type(info));
+ }
+
+ /* If we didn't manage to promote all the objects pointed to by
+ * the current object, then we have to designate this object as
+ * mutable (because it contains old-to-new generation pointers).
+ */
+ //if (failed_to_evac) {
+ // mkMutCons((StgClosure *)q, &generations[evac_gen]);
+ // failed_to_evac = rtsFalse;
+ //}
+}
+
+# if defined(PAR)
+/*
+ Generate a finger-print for a graph.
+ A finger-print is a string, with each char representing one node;
+ depth-first traversal
+*/
+
+void
+GraphFingerPrint(StgClosure *p, char *finger_print)
+{
+ void GraphFingerPrint_(StgClosure *p, char *finger_print);
+
+ ASSERT(tmpClosureTable==NULL);
+ ASSERT(strlen(finger_print)==0);
+
+ /* init hash table */
+ tmpClosureTable = allocHashTable();
+
+ /* now do the real work */
+ GraphFingerPrint_(p, finger_print);
+
+ /* nuke hash table */
+ freeHashTable(tmpClosureTable, NULL);
+ tmpClosureTable = NULL;
+}
+
+/*
+ This is the actual worker functions.
+ All recursive calls should be made to this function.
+*/
+void
+GraphFingerPrint_(StgClosure *p, char *finger_print)
+{
+ StgPtr x, q;
+ rtsBool printed = rtsFalse;
+ nat i, j, len;
+ const StgInfoTable *info;
+
+ q = p; /* save ptr to object */
+ len = strlen(finger_print);
+ ASSERT(len<=MAX_FINGER_PRINT_LEN);
+ /* at most 7 chars for this node (I think) */
+ if (len+7>=MAX_FINGER_PRINT_LEN)
+ return;
+
+ /* check whether we have met this node already to break cycles */
+ if (lookupHashTable(tmpClosureTable, (StgWord)p)) { // ie. already touched
+ strcat(finger_print, "#");
+ return;
+ }
+
+ /* record that we are processing this closure */
+ insertHashTable(tmpClosureTable, (StgWord) p, (void *)rtsTrue/*non-NULL*/);
+
+ ASSERT(p!=(StgClosure*)NULL);
+ ASSERT(LOOKS_LIKE_STATIC(p) ||
+ LOOKS_LIKE_GHC_INFO(GET_INFO((StgClosure *)p)) ||
+ IS_HUGS_CONSTR_INFO(GET_INFO((StgClosure *)p)));
+
+ info = get_itbl((StgClosure *)p);
+ // append char for this node
+ finger_print[len] = finger_print_char[info->type]; finger_print[len+1] = '\0';
+ /* the rest of this fct recursively traverses the graph */
+ switch (info -> type) {
+
+ case BCO:
+ {
+ StgBCO* bco = stgCast(StgBCO*,p);
+ nat i;
+ //%% fprintf(stderr, "BCO (%p) with %d pointers\n", p, bco->n_ptrs);
+ /*
+ for (i = 0; i < bco->n_ptrs; i++) {
+ // bcoConstCPtr(bco,i) =
+ GraphFingerPrint_(bcoConstCPtr(bco,i), finger_print);
+ }
+ */
+ // p += bco_sizeW(bco);
+ break;
+ }
+
+ case MVAR:
+ break;
+
+ case THUNK_2_0:
+ case FUN_2_0:
+ case CONSTR_2_0:
+ // append char for this node
+ strcat(finger_print, "22(");
+ GraphFingerPrint_(((StgClosure *)p)->payload[0], finger_print);
+ GraphFingerPrint_(((StgClosure *)p)->payload[1], finger_print);
+ if (strlen(finger_print)+2<MAX_FINGER_PRINT_LEN)
+ strcat(finger_print, ")");
+ break;
+
+ case THUNK_1_0:
+ case FUN_1_0:
+ case CONSTR_1_0:
+ // append char for this node
+ strcat(finger_print, "12(");
+ GraphFingerPrint_(((StgClosure *)p)->payload[0], finger_print);
+ if (strlen(finger_print)+2<MAX_FINGER_PRINT_LEN)
+ strcat(finger_print, ")");
+ break;
+
+ case THUNK_0_1:
+ case FUN_0_1:
+ case CONSTR_0_1:
+ // append char for this node
+ strcat(finger_print, "01");
+ break;
+
+ case THUNK_0_2:
+ case FUN_0_2:
+ case CONSTR_0_2:
+ // append char for this node
+ strcat(finger_print, "02");
+ break;
+
+ case THUNK_1_1:
+ case FUN_1_1:
+ case CONSTR_1_1:
+ // append char for this node
+ strcat(finger_print, "11(");
+ GraphFingerPrint_(((StgClosure *)p)->payload[0], finger_print);
+ if (strlen(finger_print)+2<MAX_FINGER_PRINT_LEN)
+ strcat(finger_print, ")");
+ break;
+
+ case FUN:
+ case THUNK:
+ case CONSTR:
+ /* basically same as loop in STABLE_NAME case */
+ {
+ char str[6];
+ sprintf(str,"%d?(",info->layout.payload.ptrs);
+ strcat(finger_print,str);
+ for (i=0; i<info->layout.payload.ptrs; i++)
+ GraphFingerPrint_(((StgClosure *)p)->payload[i], finger_print);
+ if (strlen(finger_print)+2<MAX_FINGER_PRINT_LEN)
+ strcat(finger_print, ")");
+ }
+ break;
+
+ case WEAK:
+ case FOREIGN:
+ case STABLE_NAME:
+ {
+ StgPtr end;
+ char str[6];
+ sprintf(str,"%d?", info->layout.payload.ptrs);
+ strcat(finger_print,str);
+
+ //end = (StgPtr)((StgClosure *)p)->payload + info->layout.payload.ptrs;
+ //for (p = (StgPtr)((StgClosure *)p)->payload; p < end; p++) {
+ // GraphFingerPrint_((StgClosure *)*p, finger_print);
+ //}
+ break;
+ }
+
+ case IND_PERM:
+ case IND_OLDGEN_PERM:
+ GraphFingerPrint_(((StgIndOldGen *)p)->indirectee, finger_print);
+ break;
+
+ case MUT_VAR:
+ /* ignore MUT_CONSs */
+ if (((StgMutVar *)p)->header.info != &stg_MUT_CONS_info) {
+ GraphFingerPrint_(((StgMutVar *)p)->var, finger_print);
+ }
+ break;
+
+ case CAF_BLACKHOLE:
+ case SE_CAF_BLACKHOLE:
+ case SE_BLACKHOLE:
+ case BLACKHOLE:
+ break;
+
+ case BLACKHOLE_BQ:
+ {
+ StgBlockingQueue *bh = (StgBlockingQueue *)p;
+ // GraphFingerPrint_((StgClosure *)bh->blocking_queue, finger_print);
+ break;
+ }
+
+ case THUNK_SELECTOR:
+ {
+ StgSelector *s = (StgSelector *)p;
+ GraphFingerPrint_(s->selectee, finger_print);
+ break;
+ }
+
+ case IND:
+ GraphFingerPrint_(((StgInd*)p)->indirectee, finger_print);
+ break;
+
+ case IND_OLDGEN:
+ GraphFingerPrint_(((StgIndOldGen*)p)->indirectee, finger_print);
+ break;
+
+ case IND_STATIC:
+ GraphFingerPrint_(((StgIndOldGen*)p)->indirectee, finger_print);
+ break;
+
+ case CONSTR_INTLIKE:
+ case CONSTR_CHARLIKE:
+ case CONSTR_STATIC:
+ case CONSTR_NOCAF_STATIC:
+ case THUNK_STATIC:
+ case FUN_STATIC:
+ break;
+
+ case RET_BCO:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ case RET_BIG:
+ case RET_VEC_BIG:
+ case RET_DYN:
+ case UPDATE_FRAME:
+ case STOP_FRAME:
+ case CATCH_FRAME:
+ case SEQ_FRAME:
+ break;
+
+ case AP_UPD: /* same as PAPs */
+ case PAP:
+ /* Treat a PAP just like a section of stack, not forgetting to
+ * GraphFingerPrint_ the function pointer too...
+ */
+ {
+ StgPAP* pap = stgCast(StgPAP*,p);
+ char str[6];
+ sprintf(str,"%d",pap->n_args);
+ strcat(finger_print,str);
+ //GraphFingerPrint_(pap->fun, finger_print); // ??
+ break;
+ }
+
+ case ARR_WORDS:
+ {
+ char str[6];
+ sprintf(str,"%d",((StgArrWords*)p)->words);
+ strcat(finger_print,str);
+ }
+ break;
+
+ case MUT_ARR_PTRS:
+ /* follow everything */
+ {
+ char str[6];
+ sprintf(str,"%d",((StgMutArrPtrs*)p)->ptrs);
+ strcat(finger_print,str);
+ }
+ {
+ StgPtr next;
+ //next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
+ //for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
+ // GraphFingerPrint_((StgClosure *)*p, finger_print);
+ //}
+ break;
+ }
+
+ case MUT_ARR_PTRS_FROZEN:
+ /* follow everything */
+ {
+ char str[6];
+ sprintf(str,"%d",((StgMutArrPtrs*)p)->ptrs);
+ strcat(finger_print,str);
+ }
+ {
+ StgPtr start = p, next;
+ //next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
+ //for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
+ // GraphFingerPrint_((StgClosure *)*p, finger_print);
+ //}
+ break;
+ }
+
+ case TSO:
+ {
+ StgTSO *tso = (StgTSO *)p;
+ char str[6];
+ sprintf(str,"%d",tso->id);
+ strcat(finger_print,str);
+ }
+ //GraphFingerPrint_((StgClosure *)tso->link, indent_level+1);
+ break;
+
+#if defined(GRAN) || defined(PAR)
+ case RBH:
+ {
+ // use this
+ // StgInfoTable *rip = REVERT_INFOPTR(get_itbl(p));
+ }
+ break;
+#endif
+#if defined(PAR)
+ case BLOCKED_FETCH:
+ break;
+ case FETCH_ME:
+ break;
+ case FETCH_ME_BQ:
+ break;
+#endif
+#ifdef DIST
+ case REMOTE_REF:
+ break;
+#endif
+ case EVACUATED:
+ break;
+
+ default:
+ barf("GraphFingerPrint_: unknown closure %d (%s)",
+ info -> type, info_type(info));
+ }
+
+}
+# endif /* PAR */
+
+/*
+ Do a sanity check on the whole graph, down to a recursion level of level.
+ Same structure as PrintGraph (nona).
+*/
+void
+checkGraph(StgClosure *p, int rec_level)
+{
+ StgPtr x, q;
+ nat i, j;
+ const StgInfoTable *info;
+
+ if (rec_level==0)
+ return;
+
+ q = p; /* save ptr to object */
+
+ /* First, the obvious generic checks */
+ ASSERT(p!=(StgClosure*)NULL);
+ checkClosure(p); /* see Sanity.c for what's actually checked */
+
+ info = get_itbl((StgClosure *)p);
+ /* the rest of this fct recursively traverses the graph */
+ switch (info -> type) {
+
+ case BCO:
+ {
+ StgBCO* bco = stgCast(StgBCO*,p);
+ nat i;
+ /*
+ for (i = 0; i < bco->n_ptrs; i++) {
+ checkGraph(bcoConstCPtr(bco,i), rec_level-1);
+ }
+ */
+ break;
+ }
+
+ case MVAR:
+ /* treat MVars specially, because we don't want to PrintGraph the
+ * mut_link field in the middle of the closure.
+ */
+ {
+ StgMVar *mvar = ((StgMVar *)p);
+ checkGraph((StgClosure *)mvar->head, rec_level-1);
+ checkGraph((StgClosure *)mvar->tail, rec_level-1);
+ checkGraph((StgClosure *)mvar->value, rec_level-1);
+ break;
+ }
+
+ case THUNK_2_0:
+ case FUN_2_0:
+ case CONSTR_2_0:
+ checkGraph(((StgClosure *)p)->payload[0], rec_level-1);
+ checkGraph(((StgClosure *)p)->payload[1], rec_level-1);
+ break;
+
+ case THUNK_1_0:
+ checkGraph(((StgClosure *)p)->payload[0], rec_level-1);
+ break;
+
+ case FUN_1_0:
+ case CONSTR_1_0:
+ checkGraph(((StgClosure *)p)->payload[0], rec_level-1);
+ break;
+
+ case THUNK_0_1:
+ break;
+
+ case FUN_0_1:
+ case CONSTR_0_1:
+ break;
+
+ case THUNK_0_2:
+ case FUN_0_2:
+ case CONSTR_0_2:
+ break;
+
+ case THUNK_1_1:
+ case FUN_1_1:
+ case CONSTR_1_1:
+ checkGraph(((StgClosure *)p)->payload[0], rec_level-1);
+ break;
+
+ case FUN:
+ case THUNK:
+ case CONSTR:
+ for (i=0; i<info->layout.payload.ptrs; i++)
+ checkGraph(((StgClosure *)p)->payload[i], rec_level-1);
+ break;
+
+ case WEAK:
+ case FOREIGN:
+ case STABLE_NAME:
+ {
+ StgPtr end;
+
+ end = (StgPtr)((StgClosure *)p)->payload + info->layout.payload.ptrs;
+ for (p = (StgPtr)((StgClosure *)p)->payload; p < end; p++) {
+ checkGraph(*(StgClosure **)p, rec_level-1);
+ }
+ break;
+ }
+
+ case IND_PERM:
+ case IND_OLDGEN_PERM:
+ checkGraph(((StgIndOldGen *)p)->indirectee, rec_level-1);
+ break;
+
+ case MUT_VAR:
+ /* ignore MUT_CONSs */
+ if (((StgMutVar *)p)->header.info != &stg_MUT_CONS_info) {
+ checkGraph(((StgMutVar *)p)->var, rec_level-1);
+ }
+ break;
+
+ case CAF_BLACKHOLE:
+ case SE_CAF_BLACKHOLE:
+ case SE_BLACKHOLE:
+ case BLACKHOLE:
+ break;
+
+ case BLACKHOLE_BQ:
+ break;
+
+ case THUNK_SELECTOR:
+ {
+ StgSelector *s = (StgSelector *)p;
+ checkGraph(s->selectee, rec_level-1);
+ break;
+ }
+
+ case IND:
+ checkGraph(((StgInd*)p)->indirectee, rec_level-1);
+ break;
+
+ case IND_OLDGEN:
+ checkGraph(((StgIndOldGen*)p)->indirectee, rec_level-1);
+ break;
+
+ case CONSTR_INTLIKE:
+ break;
+ case CONSTR_CHARLIKE:
+ break;
+ case CONSTR_STATIC:
+ break;
+ case CONSTR_NOCAF_STATIC:
+ break;
+ case THUNK_STATIC:
+ break;
+ case FUN_STATIC:
+ break;
+ case IND_STATIC:
+ break;
+
+ case RET_BCO:
+ break;
+ case RET_SMALL:
+ break;
+ case RET_VEC_SMALL:
+ break;
+ case RET_BIG:
+ break;
+ case RET_VEC_BIG:
+ break;
+ case RET_DYN:
+ break;
+ case UPDATE_FRAME:
+ break;
+ case STOP_FRAME:
+ break;
+ case CATCH_FRAME:
+ break;
+ case SEQ_FRAME:
+ break;
+
+ case AP_UPD: /* same as PAPs */
+ case PAP:
+ /* Treat a PAP just like a section of stack, not forgetting to
+ * checkGraph the function pointer too...
+ */
+ {
+ StgPAP* pap = stgCast(StgPAP*,p);
+
+ checkGraph(pap->fun, rec_level-1);
+ break;
+ }
+
+ case ARR_WORDS:
+ break;
+
+ case MUT_ARR_PTRS:
+ /* follow everything */
+ {
+ StgPtr next;
+
+ next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
+ for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
+ checkGraph(*(StgClosure **)p, rec_level-1);
+ }
+ break;
+ }
+
+ case MUT_ARR_PTRS_FROZEN:
+ /* follow everything */
+ {
+ StgPtr start = p, next;
+
+ next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
+ for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
+ checkGraph(*(StgClosure **)p, rec_level-1);
+ }
+ break;
+ }
+
+ case TSO:
+ {
+ StgTSO *tso;
+
+ tso = (StgTSO *)p;
+ checkGraph((StgClosure *)tso->link, rec_level-1);
+ break;
+ }
+
+#if defined(GRAN) || defined(PAR)
+ case RBH:
+ break;
+#endif
+#if defined(PAR)
+ case BLOCKED_FETCH:
+ break;
+ case FETCH_ME:
+ break;
+ case FETCH_ME_BQ:
+ break;
+#endif
+ case EVACUATED:
+ barf("checkGraph: found EVACUATED closure %p (%s)",
+ p, info_type(p));
+ break;
+
+ default:
+ }
+}
+
+#endif /* GRAN */
+
+#endif /* GRAN || PAR */
+
+//@node End of File, , Printing Packet Contents, Debugging routines for GranSim and GUM
+//@subsection End of File
diff --git a/rts/parallel/ParallelDebug.h b/rts/parallel/ParallelDebug.h
new file mode 100644
index 0000000000..f8aaeb85d4
--- /dev/null
+++ b/rts/parallel/ParallelDebug.h
@@ -0,0 +1,79 @@
+/*
+ Time-stamp: <Tue Mar 06 2001 00:25:14 Stardate: [-30]6285.08 hwloidl>
+
+ Prototypes of all parallel debugging functions.
+*/
+
+#ifndef PARALLEL_DEBUG_H
+#define PARALLEL_DEBUG_H
+
+#if defined(DEBUG) && (defined(GRAN) || defined(PAR))
+/* max length of the string holding a finger-print for a graph */
+#define MAX_FINGER_PRINT_LEN 10000
+// (10*RtsFlags.ParFlags.packBufferSize)
+#endif
+
+#if defined(DEBUG) && defined(GRAN)
+void G_PRINT_NODE(StgClosure* node);
+void G_PPN(StgClosure* node);
+void G_INFO_TABLE(StgClosure* node);
+void G_CURR_THREADQ(StgInt verbose);
+void G_THREADQ(StgTSO* closure, StgInt verbose);
+void G_TSO(StgTSO* closure, StgInt verbose);
+void G_EVENT(rtsEventQ event, StgInt verbose);
+void G_EVENTQ(StgInt verbose);
+void G_PE_EQ(PEs pe, StgInt verbose);
+void G_SPARK(rtsSparkQ spark, StgInt verbose);
+void G_SPARKQ(rtsSparkQ spark, StgInt verbose);
+void G_CURR_SPARKQ(StgInt verbose);
+void G_PROC(StgInt proc, StgInt verbose);
+void GP(StgInt proc);
+void GCP(void);
+void GT(StgPtr tso);
+void GCT(void);
+void GEQ(void);
+void GTQ(PEs p);
+void GCTQ(void);
+void GSQ(PEs p);
+void GCSQ(void);
+void GN(StgPtr node);
+void GIT(StgPtr node);
+#endif
+
+#if defined(GRAN) || defined(PAR)
+
+char *display_info_type(StgClosure *closure, char *str);
+void info_hdr_type(StgClosure *closure, char *res);
+char *info_type(StgClosure *closure);
+char *info_type_by_ip(StgInfoTable *ip);
+
+void PrintPacket(rtsPackBuffer *buffer);
+void PrintGraph(StgClosure *p, int indent_level);
+void GraphFingerPrint(StgClosure *p, char *finger_print);
+void checkGraph(StgClosure *p, int rec_level);
+
+void checkPacket(rtsPackBuffer *packBuffer);
+
+#endif /* GRAN || PAR */
+
+#if defined(PAR)
+
+/* don't want to import Schedule.h and Sanity.h everywhere */
+extern void print_bq (StgClosure *node);
+extern void checkBQ (StgBlockingQueueElement *bqe, StgClosure *closure);
+
+void checkGAGAMap(globalAddr *gagamap, int nGAs);
+extern rtsBool isOnLiveIndTable(globalAddr *ga);
+extern void rebuildGAtables(rtsBool full);
+extern void rebuildLAGAtable(void);
+extern void checkLAGAtable(rtsBool check_closures);
+extern void checkHeapChunk(StgPtr start, StgPtr end);
+extern void printGA (globalAddr *ga);
+extern void printGALA (GALA *gala);
+extern void printLiveIndTable(void);
+extern void printRemoteGATable(void);
+extern void printLAGAtable(void);
+
+#endif
+
+#endif /* PARALLEL_DEBUG_H */
diff --git a/rts/parallel/ParallelRts.h b/rts/parallel/ParallelRts.h
new file mode 100644
index 0000000000..d421296d19
--- /dev/null
+++ b/rts/parallel/ParallelRts.h
@@ -0,0 +1,253 @@
+/* --------------------------------------------------------------------------
+ Time-stamp: <Tue Mar 06 2001 00:25:50 Stardate: [-30]6285.08 hwloidl>
+
+ Variables and functions specific to the parallel RTS (i.e. GUM or GranSim)
+ ----------------------------------------------------------------------- */
+
+#ifndef PARALLEL_RTS_H
+#define PARALLEL_RTS_H
+
+#include "ParTicky.h"
+
+/* HWL HACK: compile time sanity checks; shouldn't be necessary at all */
+#if defined(PAR) && defined(GRAN)
+# error "Both PAR and GRAN defined"
+#endif
+
+#if defined(DEBUG)
+/* Paranoia debugging: we add an end-of-buffer marker to every pack buffer
+ (only when sanity checking RTS is enabled, of course) */
+#define DEBUG_HEADROOM 1
+#define END_OF_BUFFER_MARKER 0x1111bbbb
+#define GARBAGE_MARKER 0x1111eeee
+#else
+#define DEBUG_HEADROOM 0
+#endif /* DEBUG */
+
+#if defined(GRAN) || defined(PAR)
+
+#if defined(GRAN)
+
+/* Statistics info */
+extern nat tot_packets, tot_packet_size, tot_cuts, tot_thunks;
+
+/* Pack.c */
+rtsPackBuffer *PackNearbyGraph(StgClosure* closure, StgTSO* tso,
+ nat *packBufferSize, GlobalTaskId dest);
+rtsPackBuffer *PackOneNode(StgClosure* closure, StgTSO* tso,
+ nat *packBufferSize);
+rtsPackBuffer *PackTSO(StgTSO *tso, nat *packBufferSize);
+rtsPackBuffer *PackStkO(StgPtr stko, nat *packBufferSize);
+void PackFetchMe(StgClosure *closure);
+
+/* Unpack.c */
+StgClosure* UnpackGraph(rtsPackBuffer* buffer);
+void InitPendingGABuffer(nat size);
+
+/* RBH.c */
+StgClosure *convertToRBH(StgClosure *closure);
+void convertFromRBH(StgClosure *closure);
+
+/* HLComms.c */
+rtsFetchReturnCode blockFetch(StgTSO* tso, PEs proc, StgClosure* bh);
+void blockThread(StgTSO *tso);
+
+#endif
+#if defined(PAR)
+
+/* Statistics info */
+
+/* global structure for collecting statistics */
+typedef struct GlobalParStats_ {
+ /* GALA and LAGA table info */
+ nat tot_mark_GA, tot_rebuild_GA, tot_free_GA,
+ res_mark_GA, res_rebuild_GA, res_free_GA,
+ cnt_mark_GA, cnt_rebuild_GA, cnt_free_GA,
+ res_size_GA, tot_size_GA, local_alloc_GA, tot_global, tot_local;
+
+ /* time spent managing the GAs */
+ double time_mark_GA, time_rebuild_GA;
+
+ /* spark queue stats */
+ nat res_sp, tot_sp, cnt_sp, emp_sp;
+ // nat tot_sq_len, tot_sq_probes, tot_sparks;
+ /* thread queue stats */
+ nat res_tp, tot_tp, cnt_tp, emp_tp;
+ //nat tot_add_threads, tot_tq_len, non_end_add_threads;
+
+ /* packet statistics */
+ nat tot_packets, tot_packet_size, tot_thunks,
+ res_packet_size, res_thunks,
+ rec_packets, rec_packet_size, rec_thunks,
+ rec_res_packet_size, rec_res_thunks;
+ /* time spent packing stuff */
+ double time_pack, time_unpack;
+
+ /* thread stats */
+ nat tot_threads_created;
+
+ /* spark stats */
+ //nat pruned_sparks, withered_sparks;
+ nat tot_sparks_created, tot_sparks_ignored, tot_sparks_marked,
+ res_sparks_created, res_sparks_ignored, res_sparks_marked; // , sparks_created_on_PE[MAX_PROC];
+ double time_sparks;
+
+ /* scheduling stats */
+ nat tot_yields, tot_stackover, tot_heapover;
+
+ /* message statistics */
+ nat tot_fish_mess, tot_fetch_mess, tot_resume_mess, tot_schedule_mess;
+ nat rec_fish_mess, rec_fetch_mess, rec_resume_mess, rec_schedule_mess;
+#if defined(DIST)
+ nat tot_reval_mess;
+ nat rec_reval_mess;
+#endif
+
+ /* blocking queue statistics
+ rtsTime tot_bq_processing_time;
+ nat tot_bq_len, tot_bq_len_local, tot_awbq, tot_FMBQs;
+ */
+
+ /* specialised info on arrays (for GPH/Maple mainly) */
+ nat tot_arrs, tot_arr_size;
+} GlobalParStats;
+
+extern GlobalParStats globalParStats;
+
+void globalParStat_exit(void);
+
+/* Pack.c */
+rtsBool InitPackBuffer(void);
+rtsPackBuffer *PackNearbyGraph(StgClosure* closure, StgTSO* tso,
+ nat *packBufferSize, GlobalTaskId dest);
+
+/* Unpack.c */
+void CommonUp(StgClosure *src, StgClosure *dst);
+StgClosure *UnpackGraph(rtsPackBuffer *buffer, globalAddr **gamap,
+ nat *nGAs);
+
+/* RBH.c */
+StgClosure *convertToRBH(StgClosure *closure);
+void convertToFetchMe(StgRBH *rbh, globalAddr *ga);
+
+/* HLComms.c */
+void blockFetch(StgBlockedFetch *bf, StgClosure *bh);
+void blockThread(StgTSO *tso);
+
+/* Global.c */
+void GALAdeprecate(globalAddr *ga);
+
+/* HLComms.c */
+nat pending_fetches_len(void);
+
+/* ParInit.c */
+void initParallelSystem(void);
+void shutdownParallelSystem(StgInt n);
+void synchroniseSystem(void);
+void par_exit(I_);
+
+#endif
+
+/* this routine should be moved to a more general module; currently in Pack.c
+StgInfoTable* get_closure_info(StgClosure* node,
+ nat *size, nat *ptrs, nat *nonptrs, nat *vhs,
+ char *info_hdr_ty);
+*/
+void doGlobalGC(void);
+
+//@node GC routines, Debugging routines, Spark handling routines
+//@subsection GC routines
+
+#if defined(PAR)
+/* HLComms.c */
+void freeRemoteGA(int pe, globalAddr *ga);
+void sendFreeMessages(void);
+void markPendingFetches(rtsBool major_gc);
+
+/* Global.c */
+void markLocalGAs(rtsBool full);
+void RebuildGAtables(rtsBool full);
+void RebuildLAGAtable(void);
+#endif
+
+//@node Debugging routines, Generating .gr profiles, GC routines
+//@subsection Debugging routines
+
+#if defined(PAR)
+void printGA (globalAddr *ga);
+void printGALA (GALA *gala);
+void printLAGAtable(void);
+
+rtsBool isOnLiveIndTable(globalAddr *ga);
+rtsBool isOnRemoteGATable(globalAddr *ga);
+void checkFreeGALAList(void);
+void checkFreeIndirectionsList(void);
+#endif
+
+//@node Generating .gr profiles, Index, Debugging routines
+//@subsection Generating .gr profiles
+
+#define STATS_FILENAME_MAXLEN 128
+
+/* Where to write the log file */
+//@cindex gr_file
+//@cindex gr_filename
+extern FILE *gr_file;
+extern char gr_filename[STATS_FILENAME_MAXLEN];
+
+//@cindex init_gr_stats
+//@cindex init_gr_simulation
+//@cindex end_gr_simulation
+void init_gr_stats (void);
+void init_gr_simulation(int rts_argc, char *rts_argv[],
+ int prog_argc, char *prog_argv[]);
+void end_gr_simulation(void);
+
+// TODO: move fcts in here (as static inline)
+StgInfoTable* get_closure_info(StgClosure* node, nat *size, nat *ptrs, nat *nonptrs, nat *vhs, char *info_hdr_ty);
+rtsBool IS_BLACK_HOLE(StgClosure* node);
+StgClosure *IS_INDIRECTION(StgClosure* node) ;
+StgClosure *UNWIND_IND (StgClosure *closure);
+
+
+#endif /* defined(PAR) || defined(GRAN) */
+
+//@node Common macros, Index, Generating .gr profiles
+//@subsection Common macros
+
+#define LOOKS_LIKE_PTR(r) \
+ (LOOKS_LIKE_STATIC_CLOSURE(r) || \
+ ((HEAP_ALLOCED(r) && Bdescr((P_)r)->free != (void *)-1)))
+
+/* see Sanity.c for this kind of test; doing this in these basic fcts
+ is paranoid (nuke it after debugging!)
+*/
+
+/* pathetic version of the check whether p can be a closure */
+#define LOOKS_LIKE_COOL_CLOSURE(p) 1
+
+//LOOKS_LIKE_GHC_INFO(get_itbl(p))
+
+ /* Is it a static closure (i.e. in the data segment)? */ \
+ /*
+#define LOOKS_LIKE_COOL_CLOSURE(p) \
+ ((LOOKS_LIKE_STATIC(p)) ? \
+ closure_STATIC(p) \
+ : !closure_STATIC(p) && LOOKS_LIKE_PTR(p))
+ */
+
+#endif /* PARALLEL_RTS_H */
+
+//@node Index, , Index
+//@subsection Index
+
+//@index
+//* IS_BLACK_HOLE:: @cindex\s-+IS_BLACK_HOLE
+//* IS_INDIRECTION:: @cindex\s-+IS_INDIRECTION
+//* end_gr_simulation:: @cindex\s-+end_gr_simulation
+//* get_closure_info:: @cindex\s-+get_closure_info
+//* gr_file:: @cindex\s-+gr_file
+//* gr_filename:: @cindex\s-+gr_filename
+//* init_gr_simulation:: @cindex\s-+init_gr_simulation
+//* unwindInd:: @cindex\s-+unwindInd
+//@end index
diff --git a/rts/parallel/RBH.c b/rts/parallel/RBH.c
new file mode 100644
index 0000000000..1612209027
--- /dev/null
+++ b/rts/parallel/RBH.c
@@ -0,0 +1,337 @@
+/*
+ Time-stamp: <Tue Mar 13 2001 19:07:13 Stardate: [-30]6323.98 hwloidl>
+
+ Revertible Black Hole Manipulation.
+ Used in GUM and GranSim during the packing of closures. These black holes
+ must be revertible because a GC might occur while the packet is being
+ transmitted. In this case all RBHs have to be reverted.
+ */
+
+#if defined(PAR) || defined(GRAN) /* whole file */
+
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "GranSimRts.h"
+#include "ParallelRts.h"
+# if defined(DEBUG)
+# include "ParallelDebug.h"
+# endif
+#include "Storage.h" // for recordMutable
+#include "StgMacros.h" // inlined IS_... fcts
+
+/*
+ Turn a closure into a revertible black hole. After the conversion, the
+ first two words of the closure (after the fixed header, of course) will
+ be a link to the mutables list (if appropriate for the garbage
+ collector), and a pointer to the blocking queue. The blocking queue is
+ terminated by a 2-word SPEC closure which holds the original contents of
+ the first two words of the closure.
+*/
+
+//@menu
+//* Externs and prototypes::
+//* Conversion Functions::
+//* Index::
+//@end menu
+
+//@node Externs and prototypes, Conversion Functions
+//@section Externs and prototypes
+
+EXTFUN(stg_RBH_Save_0_info);
+EXTFUN(stg_RBH_Save_1_info);
+EXTFUN(stg_RBH_Save_2_info);
+
+//@node Conversion Functions, Index, Externs and prototypes
+//@section Conversion Functions
+
+/*
+ A closure is turned into an RBH upon packing it (see PackClosure in Pack.c).
+ This is needed in case we have to do a GC before the packet is turned
+ into a graph on the PE receiving the packet.
+*/
+//@cindex convertToRBH
+StgClosure *
+convertToRBH(closure)
+StgClosure *closure;
+{
+ StgRBHSave *rbh_save;
+ StgInfoTable *info_ptr, *rbh_info_ptr, *old_info;
+ nat size, ptrs, nonptrs, vhs;
+ char str[80];
+
+ /*
+ Closure layout before this routine runs amuck:
+ +-------------------
+ | HEADER | DATA ...
+ +-------------------
+ | FIXED_HS |
+ */
+ /*
+ Turn closure into an RBH. This is done by modifying the info_ptr,
+ grabbing the info_ptr of the RBH for this closure out of its
+ ITBL. Additionally, we have to save the words from the closure, which
+ will hold the link to the blocking queue. For this purpose we use the
+ RBH_Save_N closures, with N being the number of pointers for this
+ closure. */
+ IF_GRAN_DEBUG(pack,
+ belch("*>:: %p (%s): Converting closure into an RBH",
+ closure, info_type(closure)));
+ IF_PAR_DEBUG(pack,
+ belch("*>:: %p (%s): Converting closure into an RBH",
+ closure, info_type(closure)));
+
+ ASSERT(closure_THUNK(closure));
+
+ IF_GRAN_DEBUG(pack,
+ old_info = get_itbl(closure));
+
+ /* Allocate a new closure for the holding data ripped out of closure */
+ if ((rbh_save = (StgRBHSave *)allocate(_HS + 2)) == NULL)
+ return NULL; /* have to Garbage Collect; check that in the caller! */
+
+ info_ptr = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str);
+ ASSERT(size >= _HS+MIN_UPD_SIZE);
+
+ /* Fill in the RBH_Save closure with the original data from closure */
+ rbh_save->payload[0] = (StgPtr) ((StgRBH *)closure)->blocking_queue;
+ rbh_save->payload[1] = (StgPtr) ((StgRBH *)closure)->mut_link;
+
+ /* Set the info_ptr for the rbh_Save closure according to the number of
+ pointers in the original */
+
+ rbh_info_ptr = (StgInfoTable *) (ptrs == 0 ? &stg_RBH_Save_0_info :
+ ptrs == 1 ? &stg_RBH_Save_1_info :
+ &stg_RBH_Save_2_info);
+ SET_INFO(rbh_save, rbh_info_ptr);
+ /* same bitmask as the original closure */
+ SET_GRAN_HDR(rbh_save, PROCS(closure));
+
+ /* Init the blocking queue of the RBH and have it point to the saved data */
+ ((StgRBH *)closure)->blocking_queue = (StgBlockingQueueElement *)rbh_save;
+
+ ASSERT(LOOKS_LIKE_GHC_INFO(RBH_INFOPTR(get_itbl(closure))));
+ /* Turn the closure into a RBH; a great system, indeed! */
+ SET_INFO(closure, RBH_INFOPTR(get_itbl(closure)));
+
+ /*
+ add closure to the mutable list!
+ do this after having turned the closure into an RBH, because an
+ RBH is mutable but the closure it was before wasn't mutable
+ */
+ recordMutable((StgMutClosure *)closure);
+
+ //IF_GRAN_DEBUG(pack,
+ /* sanity check; make sure that reverting the RBH yields the
+ orig closure, again */
+ //ASSERT(REVERT_INFOPTR(get_itbl(closure))==old_info));
+
+ /*
+ Closure layout after this routine has run amuck:
+ +---------------------
+ | RBH-HEADER | | | ...
+ +--------------|---|--
+ | FIXED_HS | | v
+ | Mutable-list ie another StgMutClosure
+ v
+ +---------
+ | RBH_SAVE with 0-2 words of DATA
+ +---------
+ */
+
+ return closure;
+}
+
+/*
+ An RBH closure is turned into a FETCH_ME when reveiving an ACK message
+ indicating that the transferred closure has been unpacked on the other PE
+ (see processAck in HLComms.c). The ACK also contains the new GA of the
+ closure to which the FETCH_ME closure has to point.
+
+ Converting a closure to a FetchMe is trivial, unless the closure has
+ acquired a blocking queue. If that has happened, we first have to awaken
+ the blocking queue. What a nuisance! Fortunately, @AwakenBlockingQueue@
+ should now know what to do.
+
+ A note on GrAnSim: In GrAnSim we don't have FetchMe closures. However,
+ we have to turn a RBH back to its original form when the simulated
+ transfer of the closure has been finished. Therefore we need the
+ @convertFromRBH@ routine below. After converting the RBH back to its
+ original form and awakening all TSOs, the first TSO will reenter the
+ closure which is now local and carry on merrily reducing it (the other
+ TSO will be less merrily blocked on the now local closure; we're costing
+ the difference between local and global blocks in the BQ code). -- HWL
+*/
+
+# if defined(PAR)
+
+EXTFUN(stg_FETCH_ME_info);
+
+//@cindex convertToFetchMe
+void
+convertToFetchMe(rbh, ga)
+StgRBH *rbh;
+globalAddr *ga;
+{
+ // StgInfoTable *ip = get_itbl(rbh);
+ StgBlockingQueueElement *bqe = rbh->blocking_queue;
+
+ ASSERT(get_itbl(rbh)->type==RBH);
+
+ IF_PAR_DEBUG(pack,
+ belch("**:: Converting RBH %p (%s) into a FETCH_ME for GA ((%x, %d, %x))",
+ rbh, info_type(rbh),
+ ga->payload.gc.gtid, ga->payload.gc.slot, ga->weight));
+
+ /* put closure on mutables list, while it is still a RBH */
+ recordMutable((StgMutClosure *)rbh);
+
+ /* actually turn it into a FETCH_ME */
+ SET_INFO((StgClosure *)rbh, &stg_FETCH_ME_info);
+
+ /* set the global pointer in the FETCH_ME closure to the given value */
+ ((StgFetchMe *)rbh)->ga = ga;
+
+ IF_PAR_DEBUG(pack,
+ if (get_itbl(bqe)->type==TSO || get_itbl(bqe)->type==BLOCKED_FETCH)
+ belch("**:: Awakening non-empty BQ of RBH closure %p (first TSO is %d (%p)",
+ rbh, ((StgTSO *)bqe)->id, ((StgTSO *)bqe)));
+
+ /* awaken all TSOs and BLOCKED_FETCHES on the blocking queue */
+ if (get_itbl(bqe)->type==TSO || get_itbl(bqe)->type==BLOCKED_FETCH)
+ awakenBlockedQueue(bqe, (StgClosure *)rbh);
+}
+# else /* GRAN */
+/* Prototype */
+// void UnlinkFromMUT(StgPtr closure);
+
+/*
+ This routine in fact reverts the RBH into its original form; this code
+ should be of interest for GUM, too, but is not needed in the current version.
+ convertFromRBH is called where GUM uses convertToFetchMe.
+*/
+void
+convertFromRBH(closure)
+StgClosure *closure;
+{
+ StgBlockingQueueElement *bqe = ((StgRBH*)closure)->blocking_queue;
+ char str[NODE_STR_LEN]; // debugging only
+ StgInfoTable *rip = REVERT_INFOPTR(get_itbl(closure)); // debugging only
+
+ IF_GRAN_DEBUG(pack,
+ if (get_itbl(bqe)->type==TSO)
+ sprintf(str, "%d (%p)",
+ ((StgTSO *)bqe)->id, ((StgTSO *)bqe));
+ else
+ strcpy(str, "empty");
+ belch("*<:: Reverting RBH %p (%s) into a ??? closure again; BQ start: %s",
+ closure, info_type(closure), str));
+
+ ASSERT(get_itbl(closure)->type==RBH);
+
+ /* awakenBlockedQueue also restores the RBH_Save closure
+ (have to call it even if there are no TSOs in the queue!) */
+ awakenBlockedQueue(bqe, closure);
+
+ /* Put back old info pointer (grabbed from the RBH's info table).
+ We do that *after* awakening the BQ to be sure node is an RBH when
+ calling awakenBlockedQueue (different in GUM!)
+ */
+ SET_INFO(closure, REVERT_INFOPTR(get_itbl(closure)));
+
+ /* put closure on mutables list */
+ recordMutable((StgMutClosure *)closure);
+
+# if 0 /* rest of this fct */
+ /* ngoq ngo' */
+ /* FETCHME_GA(closure) = ga; */
+ if (IS_MUTABLE(INFO_PTR(bqe))) {
+ PROC old_proc = CurrentProc, /* NB: For AwakenBlockingQueue, */
+ new_proc = where_is(closure); /* CurentProc must be where */
+ /* closure lives. */
+ CurrentProc = new_proc;
+
+# if defined(GRAN_CHECK)
+ if (RTSflags.GranFlags.debug & 0x100)
+ fprintf(stderr,"===== AwBQ of node 0x%lx (%s) [PE %2u]\n",
+ closure, (isSpec ? "SPEC_RBH" : "GEN_RBH"), new_proc);
+# endif
+
+ rbh_save = AwakenBlockingQueue(bqe); /* AwakenBlockingQueue(bqe); */
+ CurrentProc = old_proc;
+ } else {
+ rbh_save = bqe;
+ }
+
+ /* Put data from special RBH save closures back into the closure */
+ if ( rbh_save == NULL ) {
+ fprintf(stderr,"convertFromRBH: No RBH_Save_? closure found at end of BQ!\n");
+ EXIT(EXIT_FAILURE);
+ } else {
+ closure[isSpec ? SPEC_HS : GEN_HS] = rbh_save[SPEC_HS];
+ closure[(isSpec ? SPEC_HS : GEN_HS) + 1] = rbh_save[SPEC_HS + 1];
+ }
+# endif /* 0 */
+
+# if 0 && (defined(GCap) || defined(GCgn))
+ /* ngoq ngo' */
+ /* If we convert from an RBH in the old generation,
+ we have to make sure it goes on the mutables list */
+
+ if(closure <= StorageMgrInfo.OldLim) {
+ if (IS_MUTABLE(INFO_PTR(closure)) && MUT_LINK(closure) == MUT_NOT_LINKED) {
+ MUT_LINK(closure) = (StgWord) StorageMgrInfo.OldMutables;
+ StorageMgrInfo.OldMutables = closure;
+ }
+ }
+# endif /* 0 */
+}
+#endif /* PAR */
+
+/* Remove closure from the mutables list */
+#if 0
+/* ngoq ngo' */
+void
+UnlinkFromMUT(StgPtr closure)
+{
+ StgPtr curr = StorageMgrInfo.OldMutables, prev = NULL;
+
+ while (curr != NULL && curr != closure) {
+ ASSERT(MUT_LINK(curr)!=MUT_NOT_LINKED);
+ prev=curr;
+ curr=MUT_LINK(curr);
+ }
+ if (curr==closure) {
+ if (prev==NULL)
+ StorageMgrInfo.OldMutables = MUT_LINK(curr);
+ else
+ MUT_LINK(prev) = MUT_LINK(curr);
+ MUT_LINK(curr) = MUT_NOT_LINKED;
+ }
+
+# if 0 && (defined(GCap) || defined(GCgn))
+ {
+ closq newclos;
+ extern closq ex_RBH_q;
+
+ newclos = (closq) stgMallocBytes(sizeof(struct clos), "UnlinkFromMUT");
+ CLOS_CLOSURE(newclos) = closure;
+ CLOS_PREV(newclos) = NULL;
+ CLOS_NEXT(newclos) = ex_RBH_q;
+ if (ex_RBH_q!=NULL)
+ CLOS_PREV(ex_RBH_q) = newclos;
+ ex_RBH_q = newclos;
+ }
+# endif
+}
+#endif /* PAR */
+
+#endif /* PAR || GRAN -- whole file */
+
+//@node Index, , Conversion Functions
+//@section Index
+
+//@index
+//* convertToFetchMe:: @cindex\s-+convertToFetchMe
+//* convertToRBH:: @cindex\s-+convertToRBH
+//@end index
diff --git a/rts/parallel/SysMan.c b/rts/parallel/SysMan.c
new file mode 100644
index 0000000000..40bcf6a19e
--- /dev/null
+++ b/rts/parallel/SysMan.c
@@ -0,0 +1,650 @@
+/* ----------------------------------------------------------------------------
+ Time-stamp: <Wed Mar 21 2001 17:16:28 Stardate: [-30]6363.59 hwloidl>
+
+ GUM System Manager Program
+ Handles startup, shutdown and global synchronisation of the parallel system.
+
+ The Parade/AQUA Projects, Glasgow University, 1994-1995.
+ GdH/APART Projects, Heriot-Watt University, Edinburgh, 1997-2000.
+
+ ------------------------------------------------------------------------- */
+
+//@node GUM System Manager Program, , ,
+//@section GUM System Manager Program
+
+//@menu
+//* General docu::
+//* Includes::
+//* Macros etc::
+//* Variables::
+//* Prototypes::
+//* Aux startup and shutdown fcts::
+//* Main fct::
+//* Message handlers::
+//* Auxiliary fcts::
+//* Index::
+//@end menu
+
+//@node General docu, Includes, GUM System Manager Program, GUM System Manager Program
+//@subsection General docu
+
+/*
+The Sysman task currently controls initiation, termination, of a
+parallel Haskell program running under GUM. In the future it may
+control global GC synchronisation and statistics gathering. Based on
+K. Hammond's SysMan.lc in Graph for PVM. SysMan is unusual in that it
+is not part of the executable produced by ghc: it is a free-standing
+program that spawns PVM tasks (logical PEs) to evaluate the
+program. After initialisation it runs in parallel with the PE tasks,
+awaiting messages.
+
+OK children, buckle down for some serious weirdness, it works like this ...
+
+o The argument vector (argv) for SysMan has one the following 2 shapes:
+
+-------------------------------------------------------------------------------
+| SysMan path | debug flag | pvm-executable path | Num. PEs | Program Args ...|
+-------------------------------------------------------------------------------
+
+-------------------------------------------------------------------
+| SysMan path | pvm-executable path | Num. PEs | Program Args ... |
+-------------------------------------------------------------------
+
+The "pvm-executable path" is an absolute path of where PVM stashes the
+code for each PE. The arguments passed on to each PE-executable
+spawned by PVM are:
+
+-------------------------------
+| Num. PEs | Program Args ... |
+-------------------------------
+
+The arguments passed to the Main-thread PE-executable are
+
+-------------------------------------------------------------------
+| main flag | pvm-executable path | Num. PEs | Program Args ... |
+-------------------------------------------------------------------
+
+o SysMan's algorithm is as follows.
+
+o use PVM to spawn (nPE-1) PVM tasks
+o fork SysMan to create the main-thread PE. This permits the main-thread to
+ read and write to stdin and stdout.
+o Wait for all the PE-tasks to reply back saying they are ready and if they were the
+ main thread or not.
+o Broadcast an array of the PE task-ids out to all of the PE-tasks.
+o Enter a loop awaiting incoming messages, e.g. failure, Garbage-collection,
+ termination.
+
+The forked Main-thread algorithm, in SysMan, is as follows.
+
+o disconnects from PVM.
+o sets a flag in argv to indicate that it is the main thread.
+o `exec's a copy of the pvm-executable (i.e. the program being run)
+
+
+The pvm-executable run by each PE-task, is initialised as follows.
+
+o Registers with PVM, obtaining a task-id.
+o If it was main it gets SysMan's task-id from argv otherwise it can use pvm_parent.
+oSends a ready message to SysMan together with a flag indicating if it was main or not.
+o Receives from SysMan the array of task-ids of the other PEs.
+o If the number of task-ids sent was larger than expected then it must have been a task
+ generated after the rest of the program had started, so it sends its own task-id message
+ to all the tasks it was told about.
+o Begins execution.
+
+*/
+
+//@node Includes, Macros etc, General docu, GUM System Manager Program
+//@subsection Includes
+
+/* Evidently not Posix */
+/* #include "PosixSource.h" */
+
+#include "Rts.h"
+#include "ParTypes.h"
+#include "LLC.h"
+#include "Parallel.h"
+#include "ParallelRts.h" // stats only
+
+//@node Macros etc, Variables, Includes, GUM System Manager Program
+//@subsection Macros etc
+
+/* SysMan is put on top of the GHC routine that does the RtsFlags handling.
+ So, we cannot use the standard macros. For the time being we use a macro
+ that is fixed at compile time.
+*/
+
+#ifdef IF_PAR_DEBUG
+#undef IF_PAR_DEBUG
+#endif
+
+/* debugging enabled */
+//#define IF_PAR_DEBUG(c,s) { s; }
+/* debugging disabled */
+#define IF_PAR_DEBUG(c,s) /* nothing */
+
+void *stgMallocBytes (int n, char *msg);
+
+//@node Variables, Prototypes, Macros etc, GUM System Manager Program
+//@subsection Variables
+
+/*
+ The following definitions included so that SysMan can be linked with Low
+ Level Communications module (LLComms). They are not used in SysMan.
+*/
+GlobalTaskId mytid;
+
+static unsigned PEsArrived = 0;
+static GlobalTaskId gtids[MAX_PES];
+static GlobalTaskId sysman_id, sender_id;
+static unsigned PEsTerminated = 0;
+static rtsBool Finishing = rtsFalse;
+static long PEbuffer[MAX_PES];
+nat nSpawn = 0; // current no. of spawned tasks (see gtids)
+nat nPEs = 0; // number of PEs specified on startup
+nat nextPE;
+/* PVM-ish variables */
+char *petask, *pvmExecutable;
+char **pargv;
+int cc, spawn_flag = PvmTaskDefault;
+
+#if 0 && defined(PAR_TICKY)
+/* ToDo: use allGlobalParStats to collect stats of all PEs */
+GlobalParStats *allGlobalParStats[MAX_PES];
+#endif
+
+//@node Prototypes, Aux startup and shutdown fcts, Variables, GUM System Manager Program
+//@subsection Prototypes
+
+/* prototypes for message handlers called from the main loop of SysMan */
+void newPE(int nbytes, int opcode, int sender_id);
+void readyPE(int nbytes, int opcode, int sender_id);
+void finishPE(int nbytes, int opcode, int sender_id, int exit_code);
+
+//@node Aux startup and shutdown fcts, Main fct, Prototypes, GUM System Manager Program
+//@subsection Aux startup and shutdown fcts
+
+/*
+ Create the PE Tasks. We spawn (nPEs-1) pvm threads: the Main Thread
+ (which starts execution and performs IO) is created by forking SysMan
+*/
+static int
+createPEs(int total_nPEs) {
+ int i, spawn_nPEs, iSpawn = 0, nArch, nHost;
+ struct pvmhostinfo *hostp;
+ int sysman_host;
+
+ spawn_nPEs = total_nPEs-1;
+ if (spawn_nPEs > 0) {
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr, "==== [%x] Spawning %d PEs(%s) ...\n",
+ sysman_id, spawn_nPEs, petask);
+ fprintf(stderr, " args: ");
+ for (i = 0; pargv[i]; ++i)
+ fprintf(stderr, "%s, ", pargv[i]);
+ fprintf(stderr, "\n"));
+
+ pvm_config(&nHost,&nArch,&hostp);
+ sysman_host=pvm_tidtohost(sysman_id);
+
+ /* create PEs on the specific machines in the specified order! */
+ for (i=0; (iSpawn<spawn_nPEs) && (i<nHost); i++)
+ if (hostp[i].hi_tid != sysman_host) {
+ checkComms(pvm_spawn(petask, pargv, spawn_flag+PvmTaskHost,
+ hostp[i].hi_name, 1, gtids+iSpawn),
+ "SysMan startup");
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr, "==== [%x] Spawned PE %d onto %s\n",
+ sysman_id, i, hostp[i].hi_name));
+ iSpawn++;
+ }
+
+ /* create additional PEs anywhere you like */
+ if (iSpawn<spawn_nPEs) {
+ checkComms(pvm_spawn(petask, pargv, spawn_flag, "",
+ spawn_nPEs-iSpawn, gtids+iSpawn),
+ "SysMan startup");
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] Spawned %d additional PEs anywhere\n",
+ sysman_id, spawn_nPEs-iSpawn));
+ }
+ }
+
+#if 0
+ /* old code with random placement of PEs; make that a variant? */
+# error "Broken startup in SysMan"
+ { /* let pvm place the PEs anywhere; not used anymore */
+ checkComms(pvm_spawn(petask, pargv, spawn_flag, "", spawn_nPEs, gtids),"SysMan startup");
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] Spawned\n", sysman_id));
+
+ }
+#endif
+
+ // iSpawn=spawn_nPEs;
+
+ return iSpawn;
+}
+
+/*
+ Check if this pvm task is in the list of tasks we spawned and are waiting
+ on, if so then remove it.
+*/
+
+static rtsBool
+alreadySpawned (GlobalTaskId g) {
+ unsigned int i;
+
+ for (i=0; i<nSpawn; i++)
+ if (g==gtids[i]) {
+ nSpawn--;
+ gtids[i] = gtids[nSpawn]; //the last takes its place
+ return rtsTrue;
+ }
+ return rtsFalse;
+}
+
+static void
+broadcastFinish(void) {
+ int i,j;
+ int tids[MAX_PES]; /* local buffer of all surviving PEs */
+
+ for (i=0, j=0; i<nPEs; i++)
+ if (PEbuffer[i])
+ tids[j++]=PEbuffer[i]; //extract valid tids
+
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] Broadcasting Finish to %d PEs; initiating shutdown\n",
+ sysman_id, j));
+
+ /* ToDo: move into LLComms.c */
+ pvm_initsend(PvmDataDefault);
+ pvm_mcast(tids,j,PP_FINISH);
+}
+
+static void
+broadcastPEtids (void) {
+ nat i;
+
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] SysMan sending PE table to all PEs\n", sysman_id);
+ /* debugging */
+ fprintf(stderr,"++++ [%x] PE table as seen by SysMan:\n", mytid);
+ for (i = 0; i < nPEs; i++) {
+ fprintf(stderr,"++++ PEbuffer[%d] = %x\n", i, PEbuffer[i]);
+ }
+ )
+
+ broadcastOpN(PP_PETIDS, PEGROUP, nPEs, &PEbuffer);
+}
+
+//@node Main fct, Message handlers, Aux startup and shutdown fcts, GUM System Manager Program
+//@subsection Main fct
+
+//@cindex main
+int
+main (int argc, char **argv) {
+ int rbufid;
+ int opcode, nbytes, nSpawn;
+ unsigned int i;
+
+ setbuf(stdout, NULL); // disable buffering of stdout
+ setbuf(stderr, NULL); // disable buffering of stderr
+
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,
+ "==== RFP: GdH enabled SysMan reporting for duty\n"));
+
+ if (argc > 1) {
+ if (*argv[1] == '-') {
+ spawn_flag = PvmTaskDebug;
+ argv[1] = argv[0];
+ argv++; argc--;
+ }
+ sysman_id = pvm_mytid(); /* This must be the first PVM call */
+
+ if (sysman_id<0) {
+ fprintf(stderr, "==== PVM initialisation failure\n");
+ exit(EXIT_FAILURE);
+ }
+
+ /*
+ Get the full path and filename of the pvm executable (stashed in some
+ PVM directory), and the number of PEs from the command line.
+ */
+ pvmExecutable = argv[1];
+ nPEs = atoi(argv[2]);
+
+ if (nPEs==0) {
+ /* as usual 0 means infinity: use all PEs specified in PVM config */
+ int nArch, nHost;
+ struct pvmhostinfo *hostp;
+
+ /* get info on PVM config */
+ pvm_config(&nHost,&nArch,&hostp);
+ nPEs=nHost;
+ sprintf(argv[2],"%d",nPEs); /* ToCheck: does this work on all archs */
+ }
+
+ /* get the name of the binary to execute */
+ if ((petask = getenv(PETASK)) == NULL) // PETASK set by driver
+ petask = PETASK;
+
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] nPEs: %d; executable: |%s|\n",
+ sysman_id, nPEs, petask));
+
+ /* Check that we can create the number of PE and IMU tasks requested.
+ ^^^
+ This comment is most entertaining since we haven't been using IMUs
+ for the last 10 years or so -- HWL */
+ if ((nPEs > MAX_PES) || (nPEs<1)) {
+ fprintf(stderr,"==** SysMan: No more than %d PEs allowed (%d requested)\n Reconfigure GUM setting MAX_PE in ghc/includes/Parallel.h to a higher value\n",
+ MAX_PES, nPEs);
+ exit(EXIT_FAILURE);
+ }
+
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] is SysMan Task\n", sysman_id));
+
+ /* Initialise the PE task arguments from Sysman's arguments */
+ pargv = argv + 2;
+
+ /* Initialise list of all PE identifiers */
+ PEsArrived=0;
+ nextPE=1;
+ for (i=0; i<nPEs; i++)
+ PEbuffer[i]=0;
+
+ /* start up the required number of PEs */
+ nSpawn = createPEs(nPEs);
+
+ /*
+ Create the MainThread PE by forking SysMan. This arcane coding
+ is required to allow MainThread to read stdin and write to stdout.
+ PWT 18/1/96
+ */
+ //nPEs++; /* Record that the number of PEs is increasing */
+ if ((cc = fork())) {
+ checkComms(cc,"SysMan fork"); /* Parent continues as SysMan */
+
+ PEbuffer[0]=0; /* we accept the first main and assume its valid. */
+ PEsArrived=1; /* assume you've got main */
+
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] Sysman successfully initialized!\n",
+ sysman_id));
+
+//@cindex message handling loop
+ /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
+ /* Main message handling loop */
+ /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
+ /* Process incoming messages */
+ while (1) {
+ if ((rbufid = pvm_recv(ANY_TASK, ANY_OPCODE)) < 0) {
+ pvm_perror("==** Sysman: Receiving Message (pvm_recv)");
+ /* never reached */
+ }
+
+ pvm_bufinfo(rbufid, &nbytes, &opcode, &sender_id);
+
+ /* very low level debugging
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"== [%x] SysMan: Message received by SysMan: rbufid=%x, nbytes = %d, opcode = %x, sender_id = %x\n",
+ sysman_id, rbufid, nbytes, opcode, sender_id));
+ */
+
+ switch (opcode) {
+
+ case PP_NEWPE: /* a new PE is registering for work */
+ newPE(nbytes, opcode, sender_id);
+ break;
+
+ case PP_READY: /* startup complete; let PEs start working */
+ readyPE(nbytes, opcode, sender_id);
+ break;
+
+
+ case PP_GC_INIT: /* start global GC */
+ /* This Function not yet implemented for GUM */
+ fprintf(stderr,"==** Global GC requested by PE %x. Not yet implemented for GUM!\n",
+ sender_id);
+ break;
+
+ case PP_STATS_ON: /* enable statistics gathering */
+ fprintf(stderr,"==** PP_STATS_ON requested by %x. Not yet implemented for GUM!\n",
+ sender_id);
+ break;
+
+ case PP_STATS_OFF: /* disable statistics gathering */
+ fprintf(stderr,"==** PP_STATS_OFF requested by %x. Not yet implemented for GUM!\n",
+ sender_id);
+ break;
+
+ case PP_FINISH:
+ {
+ int exit_code = getExitCode(nbytes, &sender_id);
+ finishPE(nbytes, opcode, sender_id, exit_code);
+ break;
+
+ default:
+ {
+ /*
+ char *opname = GetOpName(opcode);
+ fprintf(stderr,"Sysman: Unrecognised opcode %s (%x)\n",
+ opname,opcode); */
+ fprintf(stderr,"==** Qagh: Sysman: Unrecognised opcode (%x)\n",
+ opcode);
+ }
+ break;
+ } /* switch */
+ } /* else */
+ } /* while 1 */
+ /* end of SysMan!! */
+ } else {
+ /* forked main thread begins here */
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr, "==== Main Thread PE has been forked; doing an execv(%s,...)\n",
+ pvmExecutable));
+ pvmendtask(); // Disconnect from PVM to avoid confusion:
+ // executable reconnects
+
+ // RFP: assumes that length(arvv[0])>=9 !!!
+ sprintf(argv[0],"-%08X",sysman_id); /*flag that its the Main Thread PE and include sysman's id*/
+ execv(pvmExecutable,argv); /* Parent task becomes Main Thread PE */
+ } /* else */
+ } /* argc > 1 */
+} /* main */
+
+//@node Message handlers, Auxiliary fcts, Main fct, GUM System Manager Program
+//@subsection Message handlers
+
+/*
+ Received PP_NEWPE:
+ A new PE has been added to the configuration.
+*/
+void
+newPE(int nbytes, int opcode, int sender_id) {
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] SysMan detected a new host\n",
+ sysman_id));
+
+ /* Determine the new machine... assume its the last on the config list? */
+ if (nSpawn < MAX_PES) {
+ int nArch,nHost;
+ struct pvmhostinfo *hostp;
+
+ /* get conmfiguration of PVM machine */
+ pvm_config(&nHost,&nArch,&hostp);
+ nHost--;
+ checkComms(pvm_spawn(petask, pargv, spawn_flag+PvmTaskHost,
+ hostp[nHost].hi_name, 1, gtids+nSpawn),
+ "SysMan loop");
+ nSpawn++;
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr, "==== [%x] Spawned onto %s\n",
+ sysman_id, hostp[nHost].hi_name));
+ }
+}
+
+/*
+ Received PP_READY:
+ Let it be known that PE @sender_id@ participates in the computation.
+*/
+void
+readyPE(int nbytes, int opcode, int sender_id) {
+ int i = 0, flag = 1;
+ long isMain;
+ int nArch, nHost;
+ struct pvmhostinfo *hostp;
+
+ //ASSERT(opcode==PP_READY);
+
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] SysMan received PP_READY message from %x\n",
+ sysman_id, sender_id));
+
+ pvm_config(&nHost,&nArch,&hostp);
+
+ GetArg1(isMain);
+
+ //if ((isMain && (PEbuffer[0]==0)) || alreadySpawned(sender_id)) {
+ if (nPEs >= MAX_PES) {
+ fprintf(stderr,"==== [%x] SysMan doesn't need PE %d (max %d PEs allowed)\n",
+ sysman_id, sender_id, MAX_PES);
+ pvm_kill(sender_id);
+ } else {
+ if (isMain) {
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] SysMan found Main PE %x\n",
+ sysman_id, sender_id));
+ PEbuffer[0]=sender_id;
+ } else {
+ /* search for PE in list of PEs */
+ for(i=1; i<nPEs; i++)
+ if (PEbuffer[i]==sender_id) {
+ flag=0;
+ break;
+ }
+ /* it's a new PE: add it to the list of PEs */
+ if (flag)
+ PEbuffer[nextPE++] = sender_id;
+
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] SysMan: found PE %d as [%x] on host %s\n",
+ sysman_id, PEsArrived, sender_id, hostp[PEsArrived].hi_name));
+
+ PEbuffer[PEsArrived++] = sender_id;
+ }
+
+
+ /* enable better handling of unexpected terminations */
+ checkComms( pvm_notify(PvmTaskExit, PP_FINISH, 1, &sender_id),
+ "SysMan loop");
+
+ /* finished registration of all PEs => enable notification */
+ if ((PEsArrived==nPEs) && PEbuffer[0]) {
+ checkComms( pvm_notify(PvmHostAdd, PP_NEWPE, -1, 0),
+ "SysMan startup");
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] SysMan initialising notificaton for new hosts\n", sysman_id));
+ }
+
+ /* finished notification => send off the PE ids */
+ if ((PEsArrived>=nPEs) && PEbuffer[0]) {
+ if (PEsArrived>nPEs) {
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] Weird: %d PEs registered, but we only asked for %d\n", sysman_id, PEsArrived, nPEs));
+ // nPEs=PEsArrived;
+ }
+ broadcastPEtids();
+ }
+ }
+}
+
+/*
+ Received PP_FINISH:
+ Shut down the corresponding PE. Check whether it is a regular shutdown
+ or an uncontrolled termination.
+*/
+void
+finishPE(int nbytes, int opcode, int sender_id, int exitCode) {
+ int i;
+
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] SysMan received PP_FINISH message from %x (exit code: %d)\n",
+ sysman_id, sender_id, exitCode));
+
+ /* Is it relevant to us? Count the first message */
+ for (i=0; i<nPEs; i++)
+ if (PEbuffer[i] == sender_id) {
+ PEsTerminated++;
+ PEbuffer[i]=0;
+
+ /* handle exit code */
+ if (exitCode<0) { /* a task exit before a controlled finish? */
+ fprintf(stderr,"==== [%x] Termination at %x with exit(%d)\n",
+ sysman_id, sender_id, exitCode);
+ } else if (exitCode>0) { /* an abnormal exit code? */
+ fprintf(stderr,"==== [%x] Uncontrolled termination at %x with exit(%d)\n",
+ sysman_id, sender_id, exitCode);
+ } else if (!Finishing) { /* exitCode==0 which is good news */
+ if (i!=0) { /* someone other than main PE terminated first? */
+ fprintf(stderr,"==== [%x] Unexpected early termination at %x\n",
+ sysman_id, sender_id);
+ } else {
+ /* start shutdown by broadcasting FINISH to other PEs */
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] Initiating shutdown (requested by [%x] RIP) (exit code: %d)\n", sysman_id, sender_id, exitCode));
+ Finishing = rtsTrue;
+ broadcastFinish();
+ }
+ } else {
+ /* we are in a shutdown already */
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] Finish from %x during shutdown (%d PEs terminated so far; %d total)\n",
+ sysman_id, sender_id, PEsTerminated, nPEs));
+ }
+
+ if (PEsTerminated >= nPEs) {
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] Global Shutdown, Goodbye!! (SysMan has received FINISHes from all PEs)\n", sysman_id));
+ //broadcastFinish();
+ /* received finish from everybody; now, we can exit, too */
+ exit(EXIT_SUCCESS); /* Qapla'! */
+ }
+ }
+}
+
+//@node Auxiliary fcts, Index, Message handlers, GUM System Manager Program
+//@subsection Auxiliary fcts
+
+/* Needed here because its used in loads of places like LLComms etc */
+
+//@cindex stg_exit
+
+/*
+ * called from STG-land to exit the program
+ */
+
+void
+stg_exit(I_ n)
+{
+ fprintf(stderr, "==// [%x] %s in SysMan code; sending PP_FINISH to all PEs ...\n",
+ mytid,(n!=0)?"FAILURE":"FINISH");
+ broadcastFinish();
+ //broadcastFinish();
+ pvm_exit();
+ exit(n);
+}
+
+//@node Index, , Auxiliary fcts, GUM System Manager Program
+//@subsection Index
+
+//@index
+//* main:: @cindex\s-+main
+//* message handling loop:: @cindex\s-+message handling loop
+//* stgMallocBytes:: @cindex\s-+stgMallocBytes
+//* stg_exit:: @cindex\s-+stg_exit
+//@end index