summaryrefslogtreecommitdiff
path: root/ghc/runtime/main
diff options
context:
space:
mode:
Diffstat (limited to 'ghc/runtime/main')
-rw-r--r--ghc/runtime/main/GranSim.lc595
-rw-r--r--ghc/runtime/main/Itimer.lc84
-rw-r--r--ghc/runtime/main/RednCounts.lc682
-rw-r--r--ghc/runtime/main/SMRep.lc204
-rw-r--r--ghc/runtime/main/Select.lc123
-rw-r--r--ghc/runtime/main/Signals.lc588
-rw-r--r--ghc/runtime/main/StgOverflow.lc450
-rw-r--r--ghc/runtime/main/StgStartup.lhc662
-rw-r--r--ghc/runtime/main/StgThreads.lhc496
-rw-r--r--ghc/runtime/main/StgTrace.lc74
-rw-r--r--ghc/runtime/main/StgUpdate.lhc730
-rw-r--r--ghc/runtime/main/Threads.lc3749
-rw-r--r--ghc/runtime/main/TopClosure.lc8
-rw-r--r--ghc/runtime/main/TopClosure13.lc8
-rw-r--r--ghc/runtime/main/main.lc1355
15 files changed, 9808 insertions, 0 deletions
diff --git a/ghc/runtime/main/GranSim.lc b/ghc/runtime/main/GranSim.lc
new file mode 100644
index 0000000000..de603ddbc2
--- /dev/null
+++ b/ghc/runtime/main/GranSim.lc
@@ -0,0 +1,595 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1995
+%
+%************************************************************************
+%* *
+\section[GranSim.lc]{Granularity Simulator Routines}
+%* *
+%************************************************************************
+
+Macros for dealing with the new and improved GA field for simulating
+parallel execution. Based on @CONCURRENT@ package. The GA field now
+contains a mask, where the n-th bit stands for the n-th processor,
+where this data can be found. In case of multiple copies, several bits
+are set. The total number of processors is bounded by @MAX_PROC@,
+which should be <= the length of a word in bits. -- HWL
+
+\begin{code}
+#if defined(GRAN) || defined(PAR)
+
+#define NON_POSIX_SOURCE /* gettimeofday */
+
+#include "rtsdefs.h"
+
+
+#ifdef HAVE_GETCLOCK
+
+#ifdef HAVE_SYS_TIMERS_H
+#define POSIX_4D9 1
+#include <sys/timers.h>
+#endif
+
+#else
+#ifdef HAVE_GETTIMEOFDAY
+
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+
+#else
+
+#ifdef HAVE_TIME_H
+#include <time.h>
+#endif
+
+#endif
+#endif
+
+void grputw PROTO((TIME v));
+
+#if defined(GRAN)
+/* Pointer to the event queue; events are currently malloc'ed */
+static eventq EventHd = NULL;
+
+PROC
+ga_to_proc(W_ ga)
+{
+ PROC i;
+
+ for (i = 0; i < MAX_PROC && !IS_LOCAL_TO(ga, i); i++);
+
+ return (i);
+}
+
+/* NB: This takes a *node* rather than just a ga as input */
+PROC
+where_is(P_ node)
+{ return (ga_to_proc(PROCS(node))); } /* Access the GA field of the node */
+
+#if 0
+PROC
+no_of_copies(W_ ga) /* DaH lo'lu'Qo'; currently unused */
+{
+ PROC i, n;
+
+ for (i = 0, n = 0; i < MAX_PROC; i++)
+ if (IS_LOCAL_TO(ga, i))
+ n++;;
+
+ return (n);
+}
+#endif
+
+eventq
+getnextevent()
+{
+ static eventq entry = NULL;
+
+ if(EventHd == NULL)
+ {
+ fprintf(stderr,"No next event\n");
+ exit(EXIT_FAILURE); /* why not EXIT??? WDP 95/07 */
+ }
+
+ if(entry != NULL)
+ free((char *)entry);
+
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if (debug & 0x20) { /* count events */
+ noOfEvents++;
+ event_counts[(EVENT_TYPE(EventHd)>=CONTINUETHREAD1) ?
+ CONTINUETHREAD :
+ EVENT_TYPE(EventHd)]++;
+ }
+#endif
+
+ entry = EventHd;
+ EventHd = EVENT_NEXT(EventHd);
+ return(entry);
+}
+
+/* ToDo: replace malloc/free with a free list */
+
+/* NB: newevent unused (WDP 95/07) */
+
+static
+newevent(proc,creator,time,evttype,tso,node,spark)
+ PROC proc, creator;
+ TIME time;
+ EVTTYPE evttype;
+ P_ tso, node;
+ sparkq spark;
+{
+ eventq newentry = (eventq) xmalloc(sizeof(struct event));
+
+ EVENT_PROC(newentry) = proc;
+ EVENT_CREATOR(newentry) = creator;
+ EVENT_TIME(newentry) = time;
+ EVENT_TYPE(newentry) = evttype;
+ EVENT_TSO(newentry) = tso;
+ EVENT_NODE(newentry) = node;
+ EVENT_SPARK(newentry) = spark;
+ EVENT_NEXT(newentry) = NULL;
+
+ insert_event(newentry);
+}
+
+#endif /* GRAN ; HWL */
+\end{code}
+
+%****************************************************************************
+%
+\subsection[GrAnSim-profile]{Writing profiling info for GrAnSim}
+%
+%****************************************************************************
+
+Event dumping routines.
+
+\begin{code}
+
+FILE *gr_file = NULL;
+
+char *gran_event_names[] = {
+ "START", "START(Q)",
+ "STEALING", "STOLEN", "STOLEN(Q)",
+ "FETCH", "REPLY", "BLOCK", "RESUME", "RESUME(Q)",
+ "SCHEDULE", "DESCHEDULE",
+ "END",
+ "SPARK", "SPARKAT", "USED", "PRUNED", "EXPORTED", "ACQUIRED",
+ "TERMINATE",
+ "??"
+};
+
+/*
+ * If you're not using GNUC and you're on a 32-bit machine, you're
+ * probably out of luck here. However, since CONCURRENT currently
+ * requires GNUC, I'm not too worried about it. --JSM
+ */
+
+#if !defined(GRAN)
+
+static ullong startTime = 0;
+
+ullong
+msTime(STG_NO_ARGS)
+{
+# ifdef HAVE_GETCLOCK
+ struct timespec tv;
+
+ if (getclock(TIMEOFDAY, &tv) != 0) {
+ fflush(stdout);
+ fprintf(stderr, "Clock failed\n");
+ EXIT(EXIT_FAILURE);
+ }
+ return tv.tv_sec * LL(1000) + tv.tv_nsec / LL(1000000) - startTime;
+# else
+# ifdef HAVE_GETTIMEOFDAY
+ struct timeval tv;
+
+ if (gettimeofday(&tv, NULL) != 0) {
+ fflush(stdout);
+ fprintf(stderr, "Clock failed\n");
+ EXIT(EXIT_FAILURE);
+ }
+ return tv.tv_sec * LL(1000) + tv.tv_usec / LL(1000) - startTime;
+# else
+ time_t t;
+ if ((t = time(NULL)) == (time_t) -1) {
+ fflush(stdout);
+ fprintf(stderr, "Clock failed\n");
+ EXIT(EXIT_FAILURE);
+ }
+ return t * LL(1000);
+# endif
+# endif
+}
+
+#endif /* !GRAN */
+
+
+void
+DumpGranEvent(name, tso)
+enum gran_event_types name;
+P_ tso;
+{
+ DumpRawGranEvent(CURRENT_PROC, name, TSO_ID(tso));
+}
+
+void
+DumpSparkGranEvent(name, id)
+enum gran_event_types name;
+W_ id;
+{
+ DumpRawGranEvent(CURRENT_PROC, name, id);
+}
+
+void
+DumpGranEventAndNode(name, tso, node, proc)
+enum gran_event_types name;
+P_ tso, node;
+PROC proc;
+{
+ PROC pe = CURRENT_PROC;
+ W_ id;
+
+ char time_string[500]; /*ToDo: kill magic constant */
+ ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/);
+
+#ifdef PAR
+ id = tso == NULL ? -1 : TSO_ID(tso);
+#else
+ id = TSO_ID(tso);
+#endif
+ if (name > GR_EVENT_MAX)
+ name = GR_EVENT_MAX;
+
+ if (do_gr_binary) {
+ grputw(name);
+ grputw(pe);
+ abort(); /* die please: a single word doesn't represent long long times */
+ grputw(CURRENT_TIME); /* this line is bound to do the wrong thing */
+ grputw(id);
+ } else
+ fprintf(gr_file, "PE %2u [%s]: %s %lx \t0x%lx\t(from %2u)\n",
+ pe, time_string, gran_event_names[name], id, (W_) node, proc);
+}
+
+void
+DumpRawGranEvent(pe, name, id)
+PROC pe;
+enum gran_event_types name;
+W_ id;
+{
+ char time_string[500]; /* ToDo: kill magic constant */
+
+ if (name > GR_EVENT_MAX)
+ name = GR_EVENT_MAX;
+
+ ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/);
+
+ if (do_gr_binary) {
+ grputw(name);
+ grputw(pe);
+ abort(); /* die please: a single word doesn't represent long long times */
+ grputw(CURRENT_TIME); /* this line is bound to fail */
+ grputw(id);
+ } else
+ fprintf(gr_file, "PE %2u [%s]: %s %lx\n",
+ pe, time_string, gran_event_names[name], id);
+}
+
+void
+DumpGranInfo(pe, tso, mandatory_thread)
+PROC pe;
+P_ tso;
+rtsBool mandatory_thread;
+{
+ char time_string[500]; /* ToDo: kill magic constant */
+ ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/);
+
+ if (do_gr_binary) {
+ grputw(GR_END);
+ grputw(pe);
+ abort(); /* die please: a single word doesn't represent long long times */
+ grputw(CURRENT_TIME); /* this line is bound to fail */
+ grputw(TSO_ID(tso));
+#ifdef PAR
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+#else
+ grputw(TSO_SPARKNAME(tso));
+ grputw(TSO_STARTEDAT(tso));
+ grputw(TSO_EXPORTED(tso));
+ grputw(TSO_BASICBLOCKS(tso));
+ grputw(TSO_ALLOCS(tso));
+ grputw(TSO_EXECTIME(tso));
+ grputw(TSO_BLOCKTIME(tso));
+ grputw(TSO_BLOCKCOUNT(tso));
+ grputw(TSO_FETCHTIME(tso));
+ grputw(TSO_FETCHCOUNT(tso));
+ grputw(TSO_LOCALSPARKS(tso));
+ grputw(TSO_GLOBALSPARKS(tso));
+#endif
+ grputw(mandatory_thread);
+ } else {
+
+ /*
+ * NB: DumpGranEvent cannot be used because PE may be wrong (as well as the
+ * extra info)
+ */
+ fprintf(gr_file, "PE %2u [%s]: END %lx, SN %lu, ST %lu, EXP %c, BB %lu, HA %lu, RT %lu, BT %lu (%lu), FT %lu (%lu), LS %lu, GS %lu, MY %c\n"
+ ,pe
+ ,time_string
+ ,TSO_ID(tso)
+ ,TSO_SPARKNAME(tso)
+ ,TSO_STARTEDAT(tso)
+ ,TSO_EXPORTED(tso) ? 'T' : 'F'
+ ,TSO_BASICBLOCKS(tso)
+ ,TSO_ALLOCS(tso)
+ ,TSO_EXECTIME(tso)
+ ,TSO_BLOCKTIME(tso)
+ ,TSO_BLOCKCOUNT(tso)
+ ,TSO_FETCHTIME(tso)
+ ,TSO_FETCHCOUNT(tso)
+ ,TSO_LOCALSPARKS(tso)
+ ,TSO_GLOBALSPARKS(tso)
+ ,mandatory_thread ? 'T' : 'F'
+ );
+ }
+}
+
+/*
+ Output a terminate event and an 8-byte time.
+*/
+
+void
+grterminate(v)
+TIME v;
+{
+ DumpGranEvent(GR_TERMINATE, 0);
+
+ if (sizeof(TIME) == 4) {
+ putc('\0', gr_file);
+ putc('\0', gr_file);
+ putc('\0', gr_file);
+ putc('\0', gr_file);
+ } else {
+ putc(v >> 56l, gr_file);
+ putc((v >> 48l) & 0xffl, gr_file);
+ putc((v >> 40l) & 0xffl, gr_file);
+ putc((v >> 32l) & 0xffl, gr_file);
+ }
+ putc((v >> 24l) & 0xffl, gr_file);
+ putc((v >> 16l) & 0xffl, gr_file);
+ putc((v >> 8l) & 0xffl, gr_file);
+ putc(v & 0xffl, gr_file);
+}
+
+/*
+ Length-coded output: first 3 bits contain length coding
+
+ 00x 1 byte
+ 01x 2 bytes
+ 10x 4 bytes
+ 110 8 bytes
+ 111 5 or 9 bytes
+*/
+
+void
+grputw(v)
+TIME v;
+{
+ if (v <= 0x3fl) {
+ fputc(v & 0x3f, gr_file);
+ } else if (v <= 0x3fffl) {
+ fputc((v >> 8l) | 0x40l, gr_file);
+ fputc(v & 0xffl, gr_file);
+ } else if (v <= 0x3fffffffl) {
+ fputc((v >> 24l) | 0x80l, gr_file);
+ fputc((v >> 16l) & 0xffl, gr_file);
+ fputc((v >> 8l) & 0xffl, gr_file);
+ fputc(v & 0xffl, gr_file);
+ } else if (sizeof(TIME) == 4) {
+ fputc(0x70, gr_file);
+ fputc((v >> 24l) & 0xffl, gr_file);
+ fputc((v >> 16l) & 0xffl, gr_file);
+ fputc((v >> 8l) & 0xffl, gr_file);
+ fputc(v & 0xffl, gr_file);
+ } else {
+ if (v <= 0x3fffffffffffffl)
+ putc((v >> 56l) | 0x60l, gr_file);
+ else {
+ putc(0x70, gr_file);
+ putc((v >> 56l) & 0xffl, gr_file);
+ }
+
+ putc((v >> 48l) & 0xffl, gr_file);
+ putc((v >> 40l) & 0xffl, gr_file);
+ putc((v >> 32l) & 0xffl, gr_file);
+ putc((v >> 24l) & 0xffl, gr_file);
+ putc((v >> 16l) & 0xffl, gr_file);
+ putc((v >> 8l) & 0xffl, gr_file);
+ putc(v & 0xffl, gr_file);
+ }
+}
+
+\end{code}
+
+%****************************************************************************
+%
+\subsection[gr-simulation]{Granularity Simulation}
+%
+%****************************************************************************
+
+\begin{code}
+#ifdef GRAN
+char gr_filename[32]; /*ToDo: magic short filename constant????? WDP 95/07 */
+I_ do_gr_sim = 0;
+
+int
+init_gr_simulation(rts_argc, rts_argv, prog_argc, prog_argv)
+char *prog_argv[], *rts_argv[];
+int prog_argc, rts_argc;
+{
+ I_ i;
+
+ if (do_gr_sim) {
+ char *extension = do_gr_binary ? "gb" : "gr";
+
+ sprintf(gr_filename, "%0.28s.%0.2s", prog_argv[0], extension);
+
+ if ((gr_file = fopen(gr_filename, "w")) == NULL) {
+ fprintf(stderr, "Can't open granularity simulation report file %s\n", gr_filename);
+ exit(EXIT_FAILURE); /* why not EXIT??? WDP 95/07 */
+ }
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if (DoReScheduleOnFetch)
+ setbuf(gr_file, NULL);
+#endif
+
+ fputs("Granularity Simulation for ", gr_file);
+ for (i = 0; i < prog_argc; ++i) {
+ fputs(prog_argv[i], gr_file);
+ fputc(' ', gr_file);
+ }
+
+ if (rts_argc > 0) {
+ fputs("+RTS ", gr_file);
+
+ for (i = 0; i < rts_argc; ++i) {
+ fputs(rts_argv[i], gr_file);
+ fputc(' ', gr_file);
+ }
+ }
+ fputs("\n\n--------------------\n\n", gr_file);
+
+ fputs("General Parameters:\n\n", gr_file);
+
+ fprintf(gr_file, "PEs %u, %s Scheduler, %sMigrate Threads%s ????? %s\n",
+ max_proc, DoFairSchedule ? "Fair" : "Unfair",
+ DoThreadMigration ? "" : "Don't ",
+ DoThreadMigration && DoStealThreadsFirst ? " Before Sparks" : "",
+ DoReScheduleOnFetch ? "" : "Don't ");
+
+ fprintf(gr_file, "%s, Fetch %s in Each Packet\n",
+ SimplifiedFetch ? "Simplified Fetch" : (DoReScheduleOnFetch ? "Reschedule on Fetch" : "Block on Fetch"),
+ DoGUMMFetching ? "Many Closures" : "Exactly One Closure");
+ fprintf(gr_file, "Fetch Strategy(%lu): If outstanding fetches %s\n",
+ FetchStrategy,
+ FetchStrategy == 1 ? "only run runnable threads (don't create new ones" :
+ FetchStrategy == 2 ? "create threads only from local sparks" :
+ FetchStrategy == 3 ? "create threads from local or global sparks" :
+ FetchStrategy == 4 ? "create sparks and steal threads if necessary" :
+ "unknown");
+
+ fprintf(gr_file, "Thread Creation Time %lu, Thread Queue Time %lu\n",
+ gran_threadcreatetime, gran_threadqueuetime);
+ fprintf(gr_file, "Thread DeSchedule Time %lu, Thread Schedule Time %lu\n",
+ gran_threaddescheduletime, gran_threadscheduletime);
+ fprintf(gr_file, "Thread Context-Switch Time %lu\n",
+ gran_threadcontextswitchtime);
+ fputs("\n\n--------------------\n\n", gr_file);
+
+ fputs("Communication Metrics:\n\n", gr_file);
+ fprintf(gr_file,
+ "Latency %lu (1st) %lu (rest), Fetch %lu, Notify %lu (Global) %lu (Local)\n",
+ gran_latency, gran_additional_latency, gran_fetchtime,
+ gran_gunblocktime, gran_lunblocktime);
+ fprintf(gr_file,
+ "Message Creation %lu (+ %lu after send), Message Read %lu\n",
+ gran_mpacktime, gran_mtidytime, gran_munpacktime);
+ fputs("\n\n--------------------\n\n", gr_file);
+
+ fputs("Instruction Metrics:\n\n", gr_file);
+ fprintf(gr_file, "Arith %lu, Branch %lu, Load %lu, Store %lu, Float %lu, Alloc %lu\n",
+ gran_arith_cost, gran_branch_cost,
+ gran_load_cost, gran_store_cost, gran_float_cost, gran_heapalloc_cost);
+ fputs("\n\n++++++++++++++++++++\n\n", gr_file);
+ }
+ if (do_gr_binary)
+ grputw(sizeof(TIME));
+
+ Idlers = max_proc;
+ return (0);
+}
+
+void
+end_gr_simulation(STG_NO_ARGS)
+{
+ if (do_gr_sim) {
+ fprintf(stderr, "The simulation is finished. Look at %s for details.\n",
+ gr_filename);
+ fclose(gr_file);
+ }
+}
+
+#endif /* GRAN */
+
+#ifdef PAR
+char gr_filename[50]; /*ToDo: (small) magic constant alert!!!! WDP 95/07 */
+
+I_ do_gr_profile = 0;
+I_ do_sp_profile = 0;
+I_ do_gr_binary = 0;
+
+void
+init_gr_profiling(rts_argc, rts_argv, prog_argc, prog_argv)
+char *prog_argv[], *rts_argv[];
+int prog_argc, rts_argc;
+{
+ int i;
+
+ char *extension = do_gr_binary ? "gb" : "gr";
+
+ sprintf(gr_filename, "%0.28s.%03d.%0.2s", prog_argv[0], thisPE, extension);
+
+ if ((gr_file = fopen(gr_filename, "w")) == NULL) {
+ fprintf(stderr, "Can't open activity report file %s\n", gr_filename);
+ EXIT(EXIT_FAILURE);
+ }
+
+ for (i = 0; i < prog_argc; ++i) {
+ fputs(prog_argv[i], gr_file);
+ fputc(' ', gr_file);
+ }
+
+ if (rts_argc > 0) {
+ fputs("+RTS ", gr_file);
+
+ for (i = 0; i < rts_argc; ++i) {
+ fputs(rts_argv[i], gr_file);
+ fputc(' ', gr_file);
+ }
+ }
+ fputc('\n', gr_file);
+
+ startTime = CURRENT_TIME;
+
+ if (startTime > LL(1000000000)) {
+ /* This shouldn't overflow twice */
+ fprintf(gr_file, "PE %2u [%lu%lu]: TIME\n", thisPE,
+ (TIME) (startTime / LL(1000000000)),
+ (TIME) (startTime % LL(1000000000)));
+ } else {
+ fprintf(gr_file, "PE %2u [%lu]: TIME\n", thisPE, (TIME) startTime);
+ }
+
+ if (do_gr_binary)
+ grputw(sizeof(TIME));
+}
+#endif /* PAR */
+
+#endif /* GRAN || PAR */
+\end{code}
+
+
diff --git a/ghc/runtime/main/Itimer.lc b/ghc/runtime/main/Itimer.lc
new file mode 100644
index 0000000000..87c146064a
--- /dev/null
+++ b/ghc/runtime/main/Itimer.lc
@@ -0,0 +1,84 @@
+%
+% (c) The AQUA Project, Glasgow University, 1995
+%
+%************************************************************************
+%* *
+\section[Itimer.lc]{Interval Timer}
+%* *
+%************************************************************************
+
+The interval timer is used for profiling and for context switching in the
+threaded build. Though POSIX 1003.4 includes a standard interface for
+such things, no one really seems to be implementing them yet. Even
+Solaris 2.3 only seems to provide support for @CLOCK_REAL@, whereas we're
+keen on getting access to @CLOCK_VIRTUAL@.
+
+Hence, we use the old-fashioned @setitimer@ that just about everyone seems
+to support. So much for standards.
+
+\begin{code}
+
+#if defined(USE_COST_CENTRES) || defined(CONCURRENT)
+
+# include "platform.h"
+
+# define NON_POSIX_SOURCE
+
+# include "rtsdefs.h"
+
+/* As recommended in the autoconf manual */
+# ifdef TIME_WITH_SYS_TIME
+# include <sys/time.h>
+# include <time.h>
+# else
+# ifdef HAVE_SYS_TIME_H
+# include <sys/time.h>
+# else
+# include <time.h>
+# endif
+# endif
+
+int
+initialize_virtual_timer(ms)
+int ms;
+{
+# ifndef HAVE_SETITIMER
+ fprintf(stderr, "No virtual timer on this system\n");
+ return -1;
+# else
+ struct itimerval it;
+
+ it.it_value.tv_sec = ms / 1000;
+ it.it_value.tv_usec = 1000 * (ms - (1000 * it.it_value.tv_sec));
+ it.it_interval = it.it_value;
+ return (setitimer(ITIMER_VIRTUAL, &it, NULL));
+# endif
+}
+
+# if 0
+/* This is a potential POSIX version */
+int
+initialize_virtual_timer(ms)
+int ms;
+{
+ struct sigevent se;
+ struct itimerspec it;
+ timer_t tid;
+
+ se.sigev_notify = SIGEV_SIGNAL;
+ se.sigev_signo = SIGVTALRM;
+ se.sigev_value.sival_int = SIGVTALRM;
+ if (timer_create(CLOCK_VIRTUAL, &se, &tid)) {
+ fprintf(stderr, "Can't create virtual timer.\n");
+ EXIT(EXIT_FAILURE);
+ }
+ it.it_value.tv_sec = ms / 1000;
+ it.it_value.tv_nsec = 1000000 * (ms - 1000 * it.it_value.tv_sec);
+ it.it_interval = it.it_value;
+ timer_settime(tid, TIMER_RELTIME, &it, NULL);
+}
+# endif
+
+#endif /* USE_COST_CENTRES || CONCURRENT */
+
+\end{code}
diff --git a/ghc/runtime/main/RednCounts.lc b/ghc/runtime/main/RednCounts.lc
new file mode 100644
index 0000000000..142dc8423c
--- /dev/null
+++ b/ghc/runtime/main/RednCounts.lc
@@ -0,0 +1,682 @@
+%
+% (c) The GRASP Project, Glasgow University, 1992-1993
+%
+%************************************************************************
+%* *
+\section[RednCounts.lc]{Stuff for ``ticky-ticky'' profiling}
+%* *
+%************************************************************************
+
+Goes with \tr{imports/RednCounts.lh}; more documentation there.
+
+%************************************************************************
+%* *
+\subsection[RednCounts-counters]{Declare all the counters}
+%* *
+%************************************************************************
+
+\begin{code}
+#define NULL_REG_MAP /* Not threaded */
+
+#include "../storage/SMinternal.h" /* Bad boy, Will (ToDo) */
+
+#if defined(DO_REDN_COUNTING)
+
+extern FILE *tickyfile;
+
+I_ ALLOC_HEAP_ctr = 0;
+I_ ALLOC_HEAP_tot = 0;
+
+PP_ max_SpA; /* set in re_enterable_part_of_main */
+P_ max_SpB;
+
+/* not used at all
+I_ A_STK_REUSE_ctr = 0;
+I_ B_STK_REUSE_ctr = 0;
+*/
+I_ A_STK_STUB_ctr = 0;
+
+I_ ALLOC_FUN_ctr = 0;
+I_ ALLOC_FUN_adm = 0;
+I_ ALLOC_FUN_gds = 0;
+I_ ALLOC_FUN_slp = 0;
+I_ ALLOC_FUN_hst[5] = {0,0,0,0,0};
+I_ ALLOC_THK_ctr = 0;
+I_ ALLOC_THK_adm = 0;
+I_ ALLOC_THK_gds = 0;
+I_ ALLOC_THK_slp = 0;
+I_ ALLOC_THK_hst[5] = {0,0,0,0,0};
+I_ ALLOC_CON_ctr = 0;
+I_ ALLOC_CON_adm = 0;
+I_ ALLOC_CON_gds = 0;
+I_ ALLOC_CON_slp = 0;
+I_ ALLOC_CON_hst[5] = {0,0,0,0,0};
+I_ ALLOC_TUP_ctr = 0;
+I_ ALLOC_TUP_adm = 0;
+I_ ALLOC_TUP_gds = 0;
+I_ ALLOC_TUP_slp = 0;
+I_ ALLOC_TUP_hst[5] = {0,0,0,0,0};
+I_ ALLOC_BH_ctr = 0;
+I_ ALLOC_BH_adm = 0;
+I_ ALLOC_BH_gds = 0;
+I_ ALLOC_BH_slp = 0;
+I_ ALLOC_BH_hst[5] = {0,0,0,0,0};
+/*
+I_ ALLOC_PAP_ctr = 0;
+I_ ALLOC_PAP_adm = 0;
+I_ ALLOC_PAP_gds = 0;
+I_ ALLOC_PAP_slp = 0;
+I_ ALLOC_PAP_hst[5] = {0,0,0,0,0};
+*/
+I_ ALLOC_PRIM_ctr = 0;
+I_ ALLOC_PRIM_adm = 0;
+I_ ALLOC_PRIM_gds = 0;
+I_ ALLOC_PRIM_slp = 0;
+I_ ALLOC_PRIM_hst[5] = {0,0,0,0,0};
+/*
+I_ ALLOC_UPD_CON_ctr = 0;
+I_ ALLOC_UPD_CON_adm = 0;
+I_ ALLOC_UPD_CON_gds = 0;
+I_ ALLOC_UPD_CON_slp = 0;
+I_ ALLOC_UPD_CON_hst[5] = {0,0,0,0,0};
+*/
+I_ ALLOC_UPD_PAP_ctr = 0;
+I_ ALLOC_UPD_PAP_adm = 0;
+I_ ALLOC_UPD_PAP_gds = 0;
+I_ ALLOC_UPD_PAP_slp = 0;
+I_ ALLOC_UPD_PAP_hst[5] = {0,0,0,0,0};
+
+#ifdef CONCURRENT
+I_ ALLOC_STK_ctr = 0;
+I_ ALLOC_STK_adm = 0;
+I_ ALLOC_STK_gds = 0;
+I_ ALLOC_STK_slp = 0;
+I_ ALLOC_STK_hst[5] = {0,0,0,0,0};
+I_ ALLOC_TSO_ctr = 0;
+I_ ALLOC_TSO_adm = 0;
+I_ ALLOC_TSO_gds = 0;
+I_ ALLOC_TSO_slp = 0;
+I_ ALLOC_TSO_hst[5] = {0,0,0,0,0};
+
+#ifdef PAR
+I_ ALLOC_FMBQ_ctr = 0;
+I_ ALLOC_FMBQ_adm = 0;
+I_ ALLOC_FMBQ_gds = 0;
+I_ ALLOC_FMBQ_slp = 0;
+I_ ALLOC_FMBQ_hst[5] = {0,0,0,0,0};
+I_ ALLOC_FME_ctr = 0;
+I_ ALLOC_FME_adm = 0;
+I_ ALLOC_FME_gds = 0;
+I_ ALLOC_FME_slp = 0;
+I_ ALLOC_FME_hst[5] = {0,0,0,0,0};
+I_ ALLOC_BF_ctr = 0;
+I_ ALLOC_BF_adm = 0;
+I_ ALLOC_BF_gds = 0;
+I_ ALLOC_BF_slp = 0;
+I_ ALLOC_BF_hst[5] = {0,0,0,0,0};
+#endif
+#endif
+
+I_ ENT_VIA_NODE_ctr = 0;
+I_ ENT_CON_ctr = 0;
+I_ ENT_FUN_STD_ctr = 0;
+I_ ENT_FUN_DIRECT_ctr = 0;
+I_ ENT_IND_ctr = 0;
+I_ ENT_PAP_ctr = 0;
+I_ ENT_THK_ctr = 0;
+
+I_ RET_NEW_IN_HEAP_ctr = 0;
+I_ RET_NEW_IN_REGS_ctr = 0;
+I_ RET_OLD_IN_HEAP_ctr = 0;
+I_ RET_OLD_IN_REGS_ctr = 0;
+I_ RET_SEMI_BY_DEFAULT_ctr = 0;
+I_ RET_SEMI_IN_HEAP_ctr = 0;
+I_ RET_SEMI_IN_REGS_ctr = 0;
+I_ VEC_RETURN_ctr = 0;
+
+I_ ReturnInRegsNodeValid = 0; /* i.e., False */
+
+I_ UPDF_OMITTED_ctr = 0;
+I_ UPDF_STD_PUSHED_ctr = 0;
+I_ UPDF_CON_PUSHED_ctr = 0;
+I_ UPDF_HOLE_PUSHED_ctr = 0;
+
+I_ UPDF_RCC_PUSHED_ctr = 0;
+I_ UPDF_RCC_OMITTED_ctr = 0;
+
+I_ UPD_EXISTING_ctr = 0;
+I_ UPD_CON_W_NODE_ctr = 0;
+I_ UPD_CON_IN_PLACE_ctr = 0;
+I_ UPD_CON_IN_NEW_ctr = 0;
+I_ UPD_PAP_IN_PLACE_ctr = 0;
+I_ UPD_PAP_IN_NEW_ctr = 0;
+
+I_ UPD_ENTERED_ctr = 0;
+I_ UPD_ENTERED_AGAIN_ctr = 0;
+
+I_ UPD_NEW_IND_ctr = 0;
+I_ UPD_NEW_IN_PLACE_PTRS_ctr = 0;
+I_ UPD_NEW_IN_PLACE_NOPTRS_ctr = 0;
+I_ UPD_OLD_IND_ctr = 0;
+I_ UPD_OLD_IN_PLACE_PTRS_ctr = 0;
+I_ UPD_OLD_IN_PLACE_NOPTRS_ctr = 0;
+
+I_ UPD_IN_PLACE_COPY_ctr = 0;
+\end{code}
+
+\begin{code}
+#if 0
+/* testing only */
+void
+TICKY_PARANOIA(const char *file, I_ line)
+{
+ I_ tot_adm_wds = /* total number of admin words allocated */
+ ALLOC_FUN_adm + ALLOC_THK_adm + ALLOC_CON_adm + ALLOC_TUP_adm +
+ ALLOC_BH_adm /*+ ALLOC_PAP_adm*/ /*+ ALLOC_UPD_CON_adm*/ + ALLOC_UPD_PAP_adm +
+ ALLOC_PRIM_adm;
+ I_ tot_gds_wds = /* total number of words of ``good stuff'' allocated */
+ ALLOC_FUN_gds + ALLOC_THK_gds + ALLOC_CON_gds + ALLOC_TUP_gds +
+ ALLOC_BH_gds /*+ ALLOC_PAP_gds*/ /*+ ALLOC_UPD_CON_gds*/ + ALLOC_UPD_PAP_gds +
+ ALLOC_PRIM_gds;
+ I_ tot_slp_wds = /* total number of ``slop'' words allocated */
+ ALLOC_FUN_slp + ALLOC_THK_slp + ALLOC_CON_slp + ALLOC_TUP_slp +
+ ALLOC_BH_slp /*+ ALLOC_PAP_slp*/ /*+ ALLOC_UPD_CON_slp*/ + ALLOC_UPD_PAP_slp +
+ ALLOC_PRIM_slp;
+ I_ tot_wds = /* total words */
+ tot_adm_wds + tot_gds_wds + tot_slp_wds;
+ if (ALLOC_HEAP_tot != tot_wds) {
+ fprintf(stderr, "Eek! %ld != %ld, %s, %d\n",ALLOC_HEAP_tot, tot_wds, file, line);
+ } else {
+ fprintf(stderr, "OK. %ld != %ld, %s, %d\n",ALLOC_HEAP_tot, tot_wds, file, line);
+ }
+}
+#endif /* 0 */
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[RednCounts-print]{Print out all the counters}
+%* *
+%************************************************************************
+
+\begin{code}
+extern void printRegisteredCounterInfo (STG_NO_ARGS); /* fwd decl */
+
+#define INTAVG(a,b) ((b == 0) ? 0.0 : ((StgDouble) (a) / (StgDouble) (b)))
+#define PC(a) (100.0 * a)
+
+#define AVG(thing) \
+ StgDouble CAT2(avg,thing) = INTAVG(CAT2(tot,thing),CAT2(ctr,thing))
+
+void
+PrintRednCountInfo()
+{
+ I_ tot_allocs = /* total number of things allocated */
+ ALLOC_FUN_ctr + ALLOC_THK_ctr + ALLOC_CON_ctr + ALLOC_TUP_ctr +
+#ifdef CONCURRENT
+ ALLOC_STK_ctr + ALLOC_TSO_ctr +
+#ifdef PAR
+ ALLOC_FMBQ_ctr + ALLOC_FME_ctr + ALLOC_BF_ctr +
+#endif
+#endif
+ ALLOC_BH_ctr /*+ ALLOC_PAP_ctr*/ /*+ ALLOC_UPD_CON_ctr*/ + ALLOC_UPD_PAP_ctr +
+ ALLOC_PRIM_ctr;
+ I_ tot_adm_wds = /* total number of admin words allocated */
+ ALLOC_FUN_adm + ALLOC_THK_adm + ALLOC_CON_adm + ALLOC_TUP_adm +
+#ifdef CONCURRENT
+ ALLOC_STK_adm + ALLOC_TSO_adm +
+#ifdef PAR
+ ALLOC_FMBQ_adm + ALLOC_FME_adm + ALLOC_BF_adm +
+#endif
+#endif
+ ALLOC_BH_adm /*+ ALLOC_PAP_adm*/ /*+ ALLOC_UPD_CON_adm*/ + ALLOC_UPD_PAP_adm +
+ ALLOC_PRIM_adm;
+ I_ tot_gds_wds = /* total number of words of ``good stuff'' allocated */
+ ALLOC_FUN_gds + ALLOC_THK_gds + ALLOC_CON_gds + ALLOC_TUP_gds +
+#ifdef CONCURRENT
+ ALLOC_STK_gds + ALLOC_TSO_gds +
+#ifdef PAR
+ ALLOC_FMBQ_gds + ALLOC_FME_gds + ALLOC_BF_gds +
+#endif
+#endif
+ ALLOC_BH_gds /*+ ALLOC_PAP_gds*/ /*+ ALLOC_UPD_CON_gds*/ + ALLOC_UPD_PAP_gds +
+ ALLOC_PRIM_gds;
+ I_ tot_slp_wds = /* total number of ``slop'' words allocated */
+ ALLOC_FUN_slp + ALLOC_THK_slp + ALLOC_CON_slp + ALLOC_TUP_slp +
+#ifdef CONCURRENT
+ ALLOC_STK_slp + ALLOC_TSO_slp +
+#ifdef PAR
+ ALLOC_FMBQ_slp + ALLOC_FME_slp + ALLOC_BF_slp +
+#endif
+#endif
+ ALLOC_BH_slp /*+ ALLOC_PAP_slp*/ /*+ ALLOC_UPD_CON_slp*/ + ALLOC_UPD_PAP_slp +
+ ALLOC_PRIM_slp;
+ I_ tot_wds = /* total words */
+ tot_adm_wds + tot_gds_wds + tot_slp_wds;
+
+ I_ tot_enters =
+ ENT_CON_ctr + ENT_FUN_DIRECT_ctr +
+ ENT_IND_ctr + ENT_PAP_ctr + ENT_THK_ctr;
+ I_ jump_direct_enters =
+ tot_enters - ENT_VIA_NODE_ctr;
+ I_ bypass_enters =
+ ENT_FUN_DIRECT_ctr -
+ (ENT_FUN_STD_ctr - UPD_PAP_IN_PLACE_ctr - UPD_PAP_IN_NEW_ctr);
+
+ I_ tot_returns_in_regs =
+ RET_NEW_IN_REGS_ctr + RET_OLD_IN_REGS_ctr + RET_SEMI_IN_REGS_ctr;
+ I_ tot_returns_in_heap =
+ RET_NEW_IN_HEAP_ctr + RET_OLD_IN_HEAP_ctr + RET_SEMI_IN_HEAP_ctr + RET_SEMI_BY_DEFAULT_ctr/*???*/;
+ I_ tot_returns_of_new =
+ RET_NEW_IN_REGS_ctr + RET_NEW_IN_HEAP_ctr;
+ I_ tot_returns_of_old = /* NB: NOT USED ???! 94/05 WDP */
+ RET_OLD_IN_REGS_ctr + RET_OLD_IN_HEAP_ctr +
+ RET_SEMI_BY_DEFAULT_ctr + RET_SEMI_IN_HEAP_ctr + RET_SEMI_IN_REGS_ctr /*???*/;
+
+ I_ tot_returns =
+ tot_returns_in_regs + tot_returns_in_heap;
+
+ I_ tot_upd_frames =
+ UPDF_STD_PUSHED_ctr + UPDF_CON_PUSHED_ctr; /*DBH*/
+
+ I_ con_updates =
+ UPD_CON_W_NODE_ctr + UPD_CON_IN_PLACE_ctr + UPD_CON_IN_NEW_ctr;
+ I_ pap_updates =
+ UPD_PAP_IN_PLACE_ctr + UPD_PAP_IN_NEW_ctr;
+ I_ tot_updates =
+ UPD_EXISTING_ctr + con_updates + pap_updates;
+ I_ tot_in_place_updates =
+ UPD_CON_IN_PLACE_ctr + UPD_PAP_IN_PLACE_ctr;
+
+ I_ tot_new_updates =
+ UPD_NEW_IN_PLACE_NOPTRS_ctr + UPD_NEW_IN_PLACE_PTRS_ctr + UPD_NEW_IND_ctr;
+ I_ tot_old_updates =
+ UPD_OLD_IN_PLACE_NOPTRS_ctr + UPD_OLD_IN_PLACE_PTRS_ctr + UPD_OLD_IND_ctr;
+ I_ tot_gengc_updates =
+ tot_new_updates + tot_old_updates;
+
+ fprintf(tickyfile,"\n\nALLOCATIONS: %ld (%ld words total: %ld admin, %ld goods, %ld slop)\n",
+ tot_allocs, tot_wds, tot_adm_wds, tot_gds_wds, tot_slp_wds);
+ fprintf(tickyfile,"\t\t\t\ttotal words:\t 2 3 4 5 6+\n");
+
+#define ALLOC_HISTO_MAGIC(categ) \
+ (PC(INTAVG(CAT3(ALLOC_,categ,_hst)[0], CAT3(ALLOC_,categ,_ctr)))), \
+ (PC(INTAVG(CAT3(ALLOC_,categ,_hst)[1], CAT3(ALLOC_,categ,_ctr)))), \
+ (PC(INTAVG(CAT3(ALLOC_,categ,_hst)[2], CAT3(ALLOC_,categ,_ctr)))), \
+ (PC(INTAVG(CAT3(ALLOC_,categ,_hst)[3], CAT3(ALLOC_,categ,_ctr)))), \
+ (PC(INTAVG(CAT3(ALLOC_,categ,_hst)[4], CAT3(ALLOC_,categ,_ctr))))
+
+ fprintf(tickyfile,"%7ld (%5.1f%%) function values",
+ ALLOC_FUN_ctr,
+ PC(INTAVG(ALLOC_FUN_ctr, tot_allocs)));
+ if (ALLOC_FUN_ctr != 0)
+ fprintf(tickyfile,"\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(FUN));
+
+ fprintf(tickyfile,"\n%7ld (%5.1f%%) thunks",
+ ALLOC_THK_ctr,
+ PC(INTAVG(ALLOC_THK_ctr, tot_allocs)));
+ if (ALLOC_THK_ctr != 0)
+ fprintf(tickyfile,"\t\t\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(THK));
+
+ fprintf(tickyfile,"\n%7ld (%5.1f%%) data values",
+ ALLOC_CON_ctr,
+ PC(INTAVG(ALLOC_CON_ctr, tot_allocs)));
+ if (ALLOC_CON_ctr != 0)
+ fprintf(tickyfile,"\t\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(CON));
+
+ fprintf(tickyfile,"\n%7ld (%5.1f%%) big tuples",
+ ALLOC_TUP_ctr,
+ PC(INTAVG(ALLOC_TUP_ctr, tot_allocs)));
+ if (ALLOC_TUP_ctr != 0)
+ fprintf(tickyfile,"\t\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(TUP));
+
+ fprintf(tickyfile,"\n%7ld (%5.1f%%) black holes",
+ ALLOC_BH_ctr,
+ PC(INTAVG(ALLOC_BH_ctr, tot_allocs)));
+ if (ALLOC_BH_ctr != 0)
+ fprintf(tickyfile,"\t\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(BH));
+
+ fprintf(tickyfile,"\n%7ld (%5.1f%%) prim things",
+ ALLOC_PRIM_ctr,
+ PC(INTAVG(ALLOC_PRIM_ctr, tot_allocs)));
+ if (ALLOC_PRIM_ctr != 0)
+ fprintf(tickyfile,"\t\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(PRIM));
+
+#if 0
+ fprintf(tickyfile,"\n%7ld (%5.1f%%) partial applications",
+ ALLOC_PAP_ctr,
+ PC(INTAVG(ALLOC_PAP_ctr, tot_allocs)));
+ if (ALLOC_PAP_ctr != 0)
+ fprintf(tickyfile,"\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(PAP));
+#endif /* 0 */
+
+ fprintf(tickyfile,"\n%7ld (%5.1f%%) partial applications",
+ ALLOC_UPD_PAP_ctr,
+ PC(INTAVG(ALLOC_UPD_PAP_ctr, tot_allocs)));
+ if (ALLOC_UPD_PAP_ctr != 0)
+ fprintf(tickyfile,"\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(UPD_PAP));
+
+#if 0
+ fprintf(tickyfile,"\n%7ld (%5.1f%%) data-value updates",
+ ALLOC_UPD_CON_ctr,
+ PC(INTAVG(ALLOC_UPD_CON_ctr, tot_allocs)));
+ if (ALLOC_UPD_CON_ctr != 0)
+ fprintf(tickyfile,"\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(UPD_CON));
+#endif /* 0 */
+
+#ifdef CONCURRENT
+ fprintf(tickyfile,"\n%7ld (%5.1f%%) stack objects",
+ ALLOC_STK_ctr,
+ PC(INTAVG(ALLOC_STK_ctr, tot_allocs)));
+ if (ALLOC_STK_ctr != 0)
+ fprintf(tickyfile,"\t\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(STK));
+ fprintf(tickyfile,"\n%7ld (%5.1f%%) thread state objects",
+ ALLOC_TSO_ctr,
+ PC(INTAVG(ALLOC_TSO_ctr, tot_allocs)));
+ if (ALLOC_TSO_ctr != 0)
+ fprintf(tickyfile,"\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(TSO));
+#ifdef PAR
+ fprintf(tickyfile,"\n%7ld (%5.1f%%) thread state objects",
+ ALLOC_FMBQ_ctr,
+ PC(INTAVG(ALLOC_FMBQ_ctr, tot_allocs)));
+ if (ALLOC_FMBQ_ctr != 0)
+ fprintf(tickyfile,"\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(FMBQ));
+ fprintf(tickyfile,"\n%7ld (%5.1f%%) thread state objects",
+ ALLOC_FME_ctr,
+ PC(INTAVG(ALLOC_FME_ctr, tot_allocs)));
+ if (ALLOC_FME_ctr != 0)
+ fprintf(tickyfile,"\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(FME));
+ fprintf(tickyfile,"\n%7ld (%5.1f%%) thread state objects",
+ ALLOC_BF_ctr,
+ PC(INTAVG(ALLOC_BF_ctr, tot_allocs)));
+ if (ALLOC_BF_ctr != 0)
+ fprintf(tickyfile,"\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(BF));
+#endif
+#endif
+ fprintf(tickyfile,"\n");
+
+ fprintf(tickyfile,"\nTotal storage-manager allocations: %ld (%ld words)\n\t[%ld words lost to speculative heap-checks]\n", ALLOC_HEAP_ctr, ALLOC_HEAP_tot, ALLOC_HEAP_tot - tot_wds);
+
+ fprintf(tickyfile,"\nSTACK USAGE:\n"); /* NB: some bits are direction sensitive */
+ fprintf(tickyfile,"\tA stack slots stubbed: %ld\n", A_STK_STUB_ctr);
+/* not used at all
+ fprintf(tickyfile,"\tA stack slots re-used: %ld\n", A_STK_REUSE_ctr);
+ fprintf(tickyfile,"\tB stack slots re-used: %ld\n", B_STK_REUSE_ctr);
+*/
+#ifndef CONCURRENT
+ fprintf(tickyfile,"\tA stack max. depth: %ld words\n",
+ (I_) (stackInfo.botA - max_SpA));
+ fprintf(tickyfile,"\tB stack max. depth: %ld words\n",
+ (I_) (max_SpB - stackInfo.botB)); /* And cheating, too (ToDo) */
+#endif
+
+ fprintf(tickyfile,"\nENTERS: %ld of which %ld (%.1f%%) direct to the entry code\n\t\t [the rest indirected via Node's info ptr]\n",
+ tot_enters,
+ jump_direct_enters,
+ PC(INTAVG(jump_direct_enters,tot_enters)));
+ fprintf(tickyfile,"%7ld (%5.1f%%) thunks\n",
+ ENT_THK_ctr,
+ PC(INTAVG(ENT_THK_ctr,tot_enters)));
+ fprintf(tickyfile,"%7ld (%5.1f%%) data values\n",
+ ENT_CON_ctr,
+ PC(INTAVG(ENT_CON_ctr,tot_enters)));
+ fprintf(tickyfile,"%7ld (%5.1f%%) function values\n\t\t [of which %ld (%.1f%%) bypassed arg-satisfaction chk]\n",
+ ENT_FUN_DIRECT_ctr,
+ PC(INTAVG(ENT_FUN_DIRECT_ctr,tot_enters)),
+ bypass_enters,
+ PC(INTAVG(bypass_enters,ENT_FUN_DIRECT_ctr)));
+ fprintf(tickyfile,"%7ld (%5.1f%%) partial applications\n",
+ ENT_PAP_ctr,
+ PC(INTAVG(ENT_PAP_ctr,tot_enters)));
+ fprintf(tickyfile,"%7ld (%5.1f%%) indirections\n",
+ ENT_IND_ctr,
+ PC(INTAVG(ENT_IND_ctr,tot_enters)));
+
+ fprintf(tickyfile,"\nRETURNS: %ld\n", tot_returns);
+ fprintf(tickyfile,"%7ld (%5.1f%%) in registers [the rest in the heap]\n",
+ tot_returns_in_regs,
+ PC(INTAVG(tot_returns_in_regs,tot_returns)));
+ fprintf(tickyfile,"%7ld (%5.1f%%) from entering a new constructor\n\t\t [the rest from entering an existing constructor]\n",
+ tot_returns_of_new,
+ PC(INTAVG(tot_returns_of_new,tot_returns)));
+ fprintf(tickyfile,"%7ld (%5.1f%%) vectored [the rest unvectored]\n",
+ VEC_RETURN_ctr,
+ PC(INTAVG(VEC_RETURN_ctr,tot_returns)));
+
+ fprintf(tickyfile,"\nUPDATE FRAMES: %ld (%ld omitted from thunks)\n",
+ tot_upd_frames,
+ UPDF_OMITTED_ctr);
+ fprintf(tickyfile,"%7ld (%5.1f%%) standard frames\n",
+ UPDF_STD_PUSHED_ctr,
+ PC(INTAVG(UPDF_STD_PUSHED_ctr,tot_upd_frames)));
+ fprintf(tickyfile,"%7ld (%5.1f%%) constructor frames\n",
+ UPDF_CON_PUSHED_ctr,
+ PC(INTAVG(UPDF_CON_PUSHED_ctr,tot_upd_frames)));
+ fprintf(tickyfile,"\t\t [of which %ld (%.1f%%) were for black-holes]\n",
+ UPDF_HOLE_PUSHED_ctr,
+ PC(INTAVG(UPDF_HOLE_PUSHED_ctr,UPDF_CON_PUSHED_ctr))); /*DBH*/
+
+ if (UPDF_RCC_PUSHED_ctr != 0)
+ fprintf(tickyfile,"%7ld restore cost centre frames (%ld omitted)\n",
+ UPDF_RCC_PUSHED_ctr,
+ UPDF_RCC_OMITTED_ctr);
+
+ fprintf(tickyfile,"\nUPDATES: %ld\n", tot_updates);
+ fprintf(tickyfile,"%7ld (%5.1f%%) data values\n\t\t [%ld in place, %ld allocated new space, %ld with Node]\n",
+ con_updates,
+ PC(INTAVG(con_updates,tot_updates)),
+ UPD_CON_IN_PLACE_ctr, UPD_CON_IN_NEW_ctr, UPD_CON_W_NODE_ctr);
+ fprintf(tickyfile,"%7ld (%5.1f%%) partial applications\n\t\t [%ld in place, %ld allocated new space]\n",
+ pap_updates,
+ PC(INTAVG(pap_updates,tot_updates)),
+ UPD_PAP_IN_PLACE_ctr, UPD_PAP_IN_NEW_ctr);
+ fprintf(tickyfile,"%7ld (%5.1f%%) updates to existing heap objects\n",
+ UPD_EXISTING_ctr,
+ PC(INTAVG(UPD_EXISTING_ctr,tot_updates)));
+ fprintf(tickyfile,"%7ld (%5.1f%%) in-place updates copied\n",
+ UPD_IN_PLACE_COPY_ctr,
+ PC(INTAVG(UPD_IN_PLACE_COPY_ctr,tot_in_place_updates)));
+ if (UPD_ENTERED_ctr != 0) {
+ fprintf(tickyfile,"%7ld (%5.1f%%) subsequently entered\n",
+ UPD_ENTERED_ctr,
+ PC(INTAVG(UPD_ENTERED_ctr,tot_updates)));
+ fprintf(tickyfile,"%7ld (%5.1f%%) subsequently entered more than once\n",
+ UPD_ENTERED_AGAIN_ctr,
+ PC(INTAVG(UPD_ENTERED_AGAIN_ctr,tot_updates)));
+ }
+
+ if (tot_gengc_updates != 0) {
+ fprintf(tickyfile,"\nNEW GEN UPDATES: %ld (%5.1f%%)\n",
+ tot_new_updates,
+ PC(INTAVG(tot_new_updates,tot_gengc_updates)));
+ fprintf(tickyfile,"%7ld (%5.1f%%) indirections\n",
+ UPD_NEW_IND_ctr,
+ PC(INTAVG(UPD_NEW_IND_ctr,tot_gengc_updates)));
+ fprintf(tickyfile,"%7ld (%5.1f%%) inplace with ptrs\n",
+ UPD_NEW_IN_PLACE_PTRS_ctr,
+ PC(INTAVG(UPD_NEW_IN_PLACE_PTRS_ctr,tot_gengc_updates)));
+ fprintf(tickyfile,"%7ld (%5.1f%%) inplace without ptrs\n",
+ UPD_NEW_IN_PLACE_NOPTRS_ctr,
+ PC(INTAVG(UPD_NEW_IN_PLACE_NOPTRS_ctr,tot_gengc_updates)));
+ fprintf(tickyfile,"\nOLD GEN UPDATES: %ld (%5.1f%%)\n",
+ tot_old_updates,
+ PC(INTAVG(tot_old_updates,tot_gengc_updates)));
+ fprintf(tickyfile,"%7ld (%5.1f%%) indirections\n",
+ UPD_OLD_IND_ctr,
+ PC(INTAVG(UPD_OLD_IND_ctr,tot_gengc_updates)));
+ fprintf(tickyfile,"%7ld (%5.1f%%) inplace with ptrs\n",
+ UPD_OLD_IN_PLACE_PTRS_ctr,
+ PC(INTAVG(UPD_OLD_IN_PLACE_PTRS_ctr,tot_gengc_updates)));
+ fprintf(tickyfile,"%7ld (%5.1f%%) inplace without ptrs\n",
+ UPD_OLD_IN_PLACE_NOPTRS_ctr,
+ PC(INTAVG(UPD_OLD_IN_PLACE_NOPTRS_ctr,tot_gengc_updates)));
+ }
+
+ printRegisteredCounterInfo();
+
+ fprintf(tickyfile,"\n**************************************************\n");
+ fprintf(tickyfile,"%6ld ALLOC_HEAP_ctr\n", ALLOC_HEAP_ctr);
+ fprintf(tickyfile,"%6ld ALLOC_HEAP_tot\n", ALLOC_HEAP_tot);
+
+#ifndef CONCURRENT
+ fprintf(tickyfile,"%6ld HWM_SpA\n", (I_) (stackInfo.botA - max_SpA));
+ fprintf(tickyfile,"%6ld HWM_SpB\n", (I_) (max_SpB - stackInfo.botB));
+#endif
+
+ fprintf(tickyfile,"%6ld ALLOC_FUN_ctr\n", ALLOC_FUN_ctr);
+ fprintf(tickyfile,"%6ld ALLOC_FUN_adm\n", ALLOC_FUN_adm);
+ fprintf(tickyfile,"%6ld ALLOC_FUN_gds\n", ALLOC_FUN_gds);
+ fprintf(tickyfile,"%6ld ALLOC_FUN_slp\n", ALLOC_FUN_slp);
+ fprintf(tickyfile,"%6ld ALLOC_THK_ctr\n", ALLOC_THK_ctr);
+ fprintf(tickyfile,"%6ld ALLOC_THK_adm\n", ALLOC_THK_adm);
+ fprintf(tickyfile,"%6ld ALLOC_THK_gds\n", ALLOC_THK_gds);
+ fprintf(tickyfile,"%6ld ALLOC_THK_slp\n", ALLOC_THK_slp);
+ fprintf(tickyfile,"%6ld ALLOC_CON_ctr\n", ALLOC_CON_ctr);
+ fprintf(tickyfile,"%6ld ALLOC_CON_adm\n", ALLOC_CON_adm);
+ fprintf(tickyfile,"%6ld ALLOC_CON_gds\n", ALLOC_CON_gds);
+ fprintf(tickyfile,"%6ld ALLOC_CON_slp\n", ALLOC_CON_slp);
+ fprintf(tickyfile,"%6ld ALLOC_TUP_ctr\n", ALLOC_TUP_ctr);
+ fprintf(tickyfile,"%6ld ALLOC_TUP_adm\n", ALLOC_TUP_adm);
+ fprintf(tickyfile,"%6ld ALLOC_TUP_gds\n", ALLOC_TUP_gds);
+ fprintf(tickyfile,"%6ld ALLOC_TUP_slp\n", ALLOC_TUP_slp);
+ fprintf(tickyfile,"%6ld ALLOC_BH_ctr\n", ALLOC_BH_ctr);
+ fprintf(tickyfile,"%6ld ALLOC_BH_adm\n", ALLOC_BH_adm);
+ fprintf(tickyfile,"%6ld ALLOC_BH_gds\n", ALLOC_BH_gds);
+ fprintf(tickyfile,"%6ld ALLOC_BH_slp\n", ALLOC_BH_slp);
+/*
+ fprintf(tickyfile,"%6ld ALLOC_PAP_ctr\n", ALLOC_PAP_ctr);
+ fprintf(tickyfile,"%6ld ALLOC_PAP_adm\n", ALLOC_PAP_adm);
+ fprintf(tickyfile,"%6ld ALLOC_PAP_gds\n", ALLOC_PAP_gds);
+ fprintf(tickyfile,"%6ld ALLOC_PAP_slp\n", ALLOC_PAP_slp);
+*/
+ fprintf(tickyfile,"%6ld ALLOC_PRIM_ctr\n", ALLOC_PRIM_ctr);
+ fprintf(tickyfile,"%6ld ALLOC_PRIM_adm\n", ALLOC_PRIM_adm);
+ fprintf(tickyfile,"%6ld ALLOC_PRIM_gds\n", ALLOC_PRIM_gds);
+ fprintf(tickyfile,"%6ld ALLOC_PRIM_slp\n", ALLOC_PRIM_slp);
+/*
+ fprintf(tickyfile,"%6ld ALLOC_UPD_CON_ctr\n", ALLOC_UPD_CON_ctr);
+ fprintf(tickyfile,"%6ld ALLOC_UPD_CON_adm\n", ALLOC_UPD_CON_adm);
+ fprintf(tickyfile,"%6ld ALLOC_UPD_CON_gds\n", ALLOC_UPD_CON_gds);
+ fprintf(tickyfile,"%6ld ALLOC_UPD_CON_slp\n", ALLOC_UPD_CON_slp);
+*/
+ fprintf(tickyfile,"%6ld ALLOC_UPD_PAP_ctr\n", ALLOC_UPD_PAP_ctr);
+ fprintf(tickyfile,"%6ld ALLOC_UPD_PAP_adm\n", ALLOC_UPD_PAP_adm);
+ fprintf(tickyfile,"%6ld ALLOC_UPD_PAP_gds\n", ALLOC_UPD_PAP_gds);
+ fprintf(tickyfile,"%6ld ALLOC_UPD_PAP_slp\n", ALLOC_UPD_PAP_slp);
+
+#ifdef CONCURRENT
+ fprintf(tickyfile,"%6ld ALLOC_STK_ctr\n", ALLOC_STK_ctr);
+ fprintf(tickyfile,"%6ld ALLOC_STK_adm\n", ALLOC_STK_adm);
+ fprintf(tickyfile,"%6ld ALLOC_STK_gds\n", ALLOC_STK_gds);
+ fprintf(tickyfile,"%6ld ALLOC_STK_slp\n", ALLOC_STK_slp);
+ fprintf(tickyfile,"%6ld ALLOC_TSO_ctr\n", ALLOC_TSO_ctr);
+ fprintf(tickyfile,"%6ld ALLOC_TSO_adm\n", ALLOC_TSO_adm);
+ fprintf(tickyfile,"%6ld ALLOC_TSO_gds\n", ALLOC_TSO_gds);
+ fprintf(tickyfile,"%6ld ALLOC_TSO_slp\n", ALLOC_TSO_slp);
+#ifdef PAR
+ fprintf(tickyfile,"%6ld ALLOC_FMBQ_ctr\n", ALLOC_FMBQ_ctr);
+ fprintf(tickyfile,"%6ld ALLOC_FMBQ_adm\n", ALLOC_FMBQ_adm);
+ fprintf(tickyfile,"%6ld ALLOC_FMBQ_gds\n", ALLOC_FMBQ_gds);
+ fprintf(tickyfile,"%6ld ALLOC_FMBQ_slp\n", ALLOC_FMBQ_slp);
+ fprintf(tickyfile,"%6ld ALLOC_FME_ctr\n", ALLOC_FME_ctr);
+ fprintf(tickyfile,"%6ld ALLOC_FME_adm\n", ALLOC_FME_adm);
+ fprintf(tickyfile,"%6ld ALLOC_FME_gds\n", ALLOC_FME_gds);
+ fprintf(tickyfile,"%6ld ALLOC_FME_slp\n", ALLOC_FME_slp);
+ fprintf(tickyfile,"%6ld ALLOC_BF_ctr\n", ALLOC_BF_ctr);
+ fprintf(tickyfile,"%6ld ALLOC_BF_adm\n", ALLOC_BF_adm);
+ fprintf(tickyfile,"%6ld ALLOC_BF_gds\n", ALLOC_BF_gds);
+ fprintf(tickyfile,"%6ld ALLOC_BF_slp\n", ALLOC_BF_slp);
+#endif
+#endif
+
+ fprintf(tickyfile,"%6ld ENT_VIA_NODE_ctr\n", ENT_VIA_NODE_ctr);
+ fprintf(tickyfile,"%6ld ENT_CON_ctr\n", ENT_CON_ctr);
+ fprintf(tickyfile,"%6ld ENT_FUN_STD_ctr\n", ENT_FUN_STD_ctr);
+ fprintf(tickyfile,"%6ld ENT_FUN_DIRECT_ctr\n", ENT_FUN_DIRECT_ctr);
+ fprintf(tickyfile,"%6ld ENT_IND_ctr\n", ENT_IND_ctr);
+ fprintf(tickyfile,"%6ld ENT_PAP_ctr\n", ENT_PAP_ctr);
+ fprintf(tickyfile,"%6ld ENT_THK_ctr\n", ENT_THK_ctr);
+
+ fprintf(tickyfile,"%6ld RET_NEW_IN_HEAP_ctr\n", RET_NEW_IN_HEAP_ctr);
+ fprintf(tickyfile,"%6ld RET_NEW_IN_REGS_ctr\n", RET_NEW_IN_REGS_ctr);
+ fprintf(tickyfile,"%6ld RET_OLD_IN_HEAP_ctr\n", RET_OLD_IN_HEAP_ctr);
+ fprintf(tickyfile,"%6ld RET_OLD_IN_REGS_ctr\n", RET_OLD_IN_REGS_ctr);
+ fprintf(tickyfile,"%6ld RET_SEMI_BY_DEFAULT_ctr\n", RET_SEMI_BY_DEFAULT_ctr);
+ fprintf(tickyfile,"%6ld RET_SEMI_IN_HEAP_ctr\n", RET_SEMI_IN_HEAP_ctr);
+ fprintf(tickyfile,"%6ld RET_SEMI_IN_REGS_ctr\n", RET_SEMI_IN_REGS_ctr);
+ fprintf(tickyfile,"%6ld VEC_RETURN_ctr\n", VEC_RETURN_ctr);
+
+ fprintf(tickyfile,"%6ld UPDF_OMITTED_ctr\n", UPDF_OMITTED_ctr);
+ fprintf(tickyfile,"%6ld UPDF_STD_PUSHED_ctr\n", UPDF_STD_PUSHED_ctr);
+ fprintf(tickyfile,"%6ld UPDF_CON_PUSHED_ctr\n", UPDF_CON_PUSHED_ctr);
+ fprintf(tickyfile,"%6ld UPDF_HOLE_PUSHED_ctr\n", UPDF_HOLE_PUSHED_ctr);
+
+ fprintf(tickyfile,"%6ld UPDF_RCC_PUSHED_ctr\n", UPDF_RCC_PUSHED_ctr);
+ fprintf(tickyfile,"%6ld UPDF_RCC_OMITTED_ctr\n", UPDF_RCC_OMITTED_ctr);
+
+ fprintf(tickyfile,"%6ld UPD_EXISTING_ctr\n", UPD_EXISTING_ctr);
+ fprintf(tickyfile,"%6ld UPD_CON_W_NODE_ctr\n", UPD_CON_W_NODE_ctr);
+ fprintf(tickyfile,"%6ld UPD_CON_IN_PLACE_ctr\n", UPD_CON_IN_PLACE_ctr);
+ fprintf(tickyfile,"%6ld UPD_CON_IN_NEW_ctr\n", UPD_CON_IN_NEW_ctr);
+ fprintf(tickyfile,"%6ld UPD_PAP_IN_PLACE_ctr\n", UPD_PAP_IN_PLACE_ctr);
+ fprintf(tickyfile,"%6ld UPD_PAP_IN_NEW_ctr\n", UPD_PAP_IN_NEW_ctr);
+ fprintf(tickyfile,"%6ld UPD_ENTERED_ctr\n", UPD_ENTERED_ctr);
+ fprintf(tickyfile,"%6ld UPD_ENTERED_AGAIN_ctr\n",UPD_ENTERED_AGAIN_ctr);
+
+ fprintf(tickyfile,"%6ld UPD_NEW_IND_ctr\n", UPD_NEW_IND_ctr);
+ fprintf(tickyfile,"%6ld UPD_NEW_IN_PLACE_PTRS_ctr\n", UPD_NEW_IN_PLACE_PTRS_ctr);
+ fprintf(tickyfile,"%6ld UPD_NEW_IN_PLACE_NOPTRS_ctr\n", UPD_NEW_IN_PLACE_NOPTRS_ctr);
+ fprintf(tickyfile,"%6ld UPD_OLD_IND_ctr\n", UPD_OLD_IND_ctr);
+ fprintf(tickyfile,"%6ld UPD_OLD_IN_PLACE_PTRS_ctr\n", UPD_OLD_IN_PLACE_PTRS_ctr);
+ fprintf(tickyfile,"%6ld UPD_OLD_IN_PLACE_NOPTRS_ctr\n", UPD_OLD_IN_PLACE_NOPTRS_ctr);
+}
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[RednCounts-ent-counters]{Handle named entry counters}
+%* *
+%************************************************************************
+
+Data structure used in ``registering'' one of these counters.
+\begin{code}
+struct ent_counter *ListOfEntryCtrs = NULL; /* root of list of them */
+\end{code}
+
+To print out all the registered-counter info:
+\begin{code}
+void
+printRegisteredCounterInfo ( STG_NO_ARGS )
+{
+ struct ent_counter *p;
+
+ if ( ListOfEntryCtrs != NULL ) {
+ fprintf(tickyfile,"\n**************************************************\n");
+ }
+
+ for (p = ListOfEntryCtrs; p != NULL; p = p->link) {
+ /* common stuff first; then the wrapper info if avail */
+ fprintf(tickyfile, "%-40s%u\t%u\t%u\t%-16s%ld",
+ p->f_str,
+ p->arity,
+ p->Astk_args,
+ p->Bstk_args,
+ p->f_arg_kinds,
+ p->ctr);
+
+ if ( p->wrap_str == NULL ) {
+ fprintf(tickyfile, "\n");
+
+ } else {
+ fprintf(tickyfile, "\t%s\t%s\n",
+ p->wrap_str,
+ p->wrap_arg_kinds);
+ }
+ }
+}
+\end{code}
+
+That's all, folks.
+\begin{code}
+#endif /* DO_REDN_COUNTING */
+\end{code}
diff --git a/ghc/runtime/main/SMRep.lc b/ghc/runtime/main/SMRep.lc
new file mode 100644
index 0000000000..2609195521
--- /dev/null
+++ b/ghc/runtime/main/SMRep.lc
@@ -0,0 +1,204 @@
+%
+% (c) The AQUA Project, Glasgow University, 1994
+%
+
+% Guilty party: BOS
+
+%************************************************************************
+%* *
+\section[Rep.lc]{Global rep tables}
+%* *
+%************************************************************************
+
+These are the single, global static instances of each rep table type.
+
+\begin{code}
+#define COMPILING_REP_LC
+
+#include "rtsdefs.h"
+#include "../storage/SMinternal.h"
+
+EXTFUN(_PRIn_0);
+EXTFUN(_PRIn_1);
+EXTFUN(_PRIn_2);
+EXTFUN(_PRIn_3);
+EXTFUN(_PRIn_4);
+EXTFUN(_PRIn_5);
+EXTFUN(_PRIn_6);
+EXTFUN(_PRIn_7);
+EXTFUN(_PRIn_8);
+EXTFUN(_PRIn_9);
+EXTFUN(_PRIn_10);
+EXTFUN(_PRIn_11);
+EXTFUN(_PRIn_12);
+
+/* SPEC_x_RTBL(size,ptrs) */
+
+SPEC_N_RTBL(1,0);
+SPEC_N_RTBL(1,1);
+SPEC_N_RTBL(2,0);
+SPEC_N_RTBL(2,1);
+SPEC_N_RTBL(2,2);
+SPEC_N_RTBL(3,0);
+SPEC_N_RTBL(3,1);
+SPEC_N_RTBL(3,2);
+SPEC_N_RTBL(3,3);
+SPEC_N_RTBL(4,0);
+SPEC_N_RTBL(4,4);
+SPEC_N_RTBL(5,0);
+SPEC_N_RTBL(5,5);
+SPEC_N_RTBL(6,6);
+SPEC_N_RTBL(7,7);
+SPEC_N_RTBL(8,8);
+SPEC_N_RTBL(9,9);
+SPEC_N_RTBL(10,10);
+SPEC_N_RTBL(11,11);
+SPEC_N_RTBL(12,12);
+
+SPEC_S_RTBL(1,0);
+SPEC_S_RTBL(1,1);
+SPEC_S_RTBL(2,0);
+SPEC_S_RTBL(2,1);
+SPEC_S_RTBL(2,2);
+SPEC_S_RTBL(3,0);
+SPEC_S_RTBL(3,1);
+SPEC_S_RTBL(3,2);
+SPEC_S_RTBL(3,3);
+SPEC_S_RTBL(4,0);
+SPEC_S_RTBL(4,4);
+SPEC_S_RTBL(5,0);
+SPEC_S_RTBL(5,5);
+SPEC_S_RTBL(6,6);
+SPEC_S_RTBL(7,7);
+SPEC_S_RTBL(8,8);
+SPEC_S_RTBL(9,9);
+SPEC_S_RTBL(10,10);
+SPEC_S_RTBL(11,11);
+SPEC_S_RTBL(12,12);
+
+SPEC_U_RTBL(1,0);
+SPEC_U_RTBL(1,1);
+SPEC_U_RTBL(2,0);
+SPEC_U_RTBL(2,1);
+SPEC_U_RTBL(2,2);
+SPEC_U_RTBL(3,0);
+SPEC_U_RTBL(3,1);
+SPEC_U_RTBL(3,2);
+SPEC_U_RTBL(3,3);
+SPEC_U_RTBL(4,0);
+SPEC_U_RTBL(4,4);
+SPEC_U_RTBL(5,0);
+SPEC_U_RTBL(5,5);
+SPEC_U_RTBL(6,6);
+SPEC_U_RTBL(7,7);
+SPEC_U_RTBL(8,8);
+SPEC_U_RTBL(9,9);
+SPEC_U_RTBL(10,10);
+SPEC_U_RTBL(11,11);
+SPEC_U_RTBL(12,12);
+
+/* SELECT_RTBL(size,ptrs,select_word_i) */
+
+SELECT_RTBL(2,1,0);
+SELECT_RTBL(2,1,1);
+SELECT_RTBL(2,1,2);
+SELECT_RTBL(2,1,3);
+SELECT_RTBL(2,1,4);
+SELECT_RTBL(2,1,5);
+SELECT_RTBL(2,1,6);
+SELECT_RTBL(2,1,7);
+SELECT_RTBL(2,1,8);
+SELECT_RTBL(2,1,9);
+SELECT_RTBL(2,1,10);
+SELECT_RTBL(2,1,11);
+SELECT_RTBL(2,1,12);
+
+GEN_N_RTBL();
+GEN_S_RTBL();
+GEN_U_RTBL();
+DYN_RTBL();
+TUPLE_RTBL();
+DATA_RTBL();
+MUTUPLE_RTBL();
+IMMUTUPLE_RTBL();
+STATIC_RTBL();
+
+#ifndef PAR
+MallocPtr_RTBL();
+#endif
+
+BH_RTBL(N);
+BH_RTBL(U);
+
+IND_RTBL();
+PERM_IND_RTBL();
+CAF_RTBL();
+CONST_RTBL();
+CHARLIKE_RTBL();
+INTLIKE_RTBL();
+
+CAF_EVAC_UPD_RTBL();
+
+#ifdef GCgn
+FORWARDREF_RTBL(_Evacuate_Old_Forward_Ref);
+FORWARDREF_RTBL(_Evacuate_New_Forward_Ref);
+FORWARDREF_RTBL(_Evacuate_OldRoot_Forward);
+#endif
+FORWARDREF_RTBL(_Evacuate_Forward_Ref);
+
+#ifdef _INFO_MARKING
+DUMMY_PRRETURN_RTBL(_PRMarking_MarkNextRoot,_Dummy_PRReturn_entry);
+DUMMY_PRRETURN_RTBL(_PRMarking_MarkNextCAF,_Dummy_PRReturn_entry);
+# ifdef CONCURRENT
+DUMMY_PRRETURN_RTBL(_PRMarking_MarkNextSpark,_Dummy_PRReturn_entry);
+# endif
+# ifdef PAR
+DUMMY_PRRETURN_RTBL(_PRMarking_MarkNextGA,_Dummy_PRReturn_entry);
+# else
+DUMMY_PRRETURN_RTBL(_PRMarking_MarkNextAStack,_Dummy_PRReturn_entry);
+DUMMY_PRRETURN_RTBL(_PRMarking_MarkNextBStack,_Dummy_PRReturn_entry);
+# endif
+#endif
+
+#ifdef GCgn
+OLDROOT_RTBL();
+#endif
+
+#ifdef CONCURRENT
+TSO_RTBL();
+STKO_RTBL();
+BQ_RTBL();
+# ifndef PAR
+STKO_STATIC_RTBL();
+# else
+FETCHME_RTBL();
+FMBQ_RTBL();
+BF_RTBL();
+# endif
+#endif
+
+#ifdef PAR
+SPEC_RBH_RTBL(2,0);
+SPEC_RBH_RTBL(2,1);
+SPEC_RBH_RTBL(2,2);
+SPEC_RBH_RTBL(3,0);
+SPEC_RBH_RTBL(3,1);
+SPEC_RBH_RTBL(3,2);
+SPEC_RBH_RTBL(3,3);
+SPEC_RBH_RTBL(4,0);
+SPEC_RBH_RTBL(4,4);
+SPEC_RBH_RTBL(5,0);
+SPEC_RBH_RTBL(5,5);
+SPEC_RBH_RTBL(6,6);
+SPEC_RBH_RTBL(7,7);
+SPEC_RBH_RTBL(8,8);
+SPEC_RBH_RTBL(9,9);
+SPEC_RBH_RTBL(10,10);
+SPEC_RBH_RTBL(11,11);
+SPEC_RBH_RTBL(12,12);
+
+GEN_RBH_RTBL();
+#endif
+
+
+\end{code}
diff --git a/ghc/runtime/main/Select.lc b/ghc/runtime/main/Select.lc
new file mode 100644
index 0000000000..1f10c7ac85
--- /dev/null
+++ b/ghc/runtime/main/Select.lc
@@ -0,0 +1,123 @@
+%
+% (c) The AQUA Project, Glasgow University, 1995
+%
+%************************************************************************
+%* *
+\section[Select.lc]{Select Available File Descriptors}
+%* *
+%************************************************************************
+
+\begin{code}
+
+#ifdef CONCURRENT
+
+/* #define STK_CHK_DEBUG */
+
+#define NULL_REG_MAP
+#define NON_POSIX_SOURCE
+/* Should there be a POSIX alternative based on poll()? */
+#include "stgdefs.h"
+
+# if defined(HAVE_SYS_TYPES_H)
+# include <sys/types.h>
+# endif
+
+# ifdef HAVE_SYS_TIME_H
+# include <sys/time.h>
+# endif
+
+void
+AwaitEvent(delta)
+I_ delta;
+{
+ P_ tso, prev, next;
+ rtsBool ready;
+ fd_set rfd;
+ I_ us;
+ I_ min;
+ struct timeval tv;
+
+ min = delta == 0 ? 0x7fffffff : 0;
+
+ /*
+ * Collect all of the fd's that we're interested in, and capture
+ * the minimum waiting time for the delayed threads.
+ */
+ FD_ZERO(&rfd);
+ for(tso = WaitingThreadsHd; tso != Nil_closure; tso = TSO_LINK(tso)) {
+ us = (I_) TSO_EVENT(tso);
+ if (us > 0) {
+ /* Looking at a delay event */
+ if (us < min)
+ min = us;
+ } else {
+ /* Looking at a wait event */
+ FD_SET((-us), &rfd);
+ }
+ }
+
+ /* Check for any interesting events */
+
+ tv.tv_sec = min / 1000000;
+ tv.tv_usec = min % 1000000;
+
+ while (select(FD_SETSIZE, &rfd, NULL, NULL, &tv) < 0) {
+ if (errno != EINTR) {
+ fflush(stdout);
+ fprintf(stderr, "AwaitEvent: select failed\n");
+ EXIT(EXIT_FAILURE);
+ }
+ }
+
+ if (delta == 0)
+ delta = min;
+
+ prev = NULL;
+ for(tso = WaitingThreadsHd; tso != Nil_closure; tso = next) {
+ next = TSO_LINK(tso);
+ us = (I_) TSO_EVENT(tso);
+ if (us > 0) {
+ /* Looking at a delay event */
+ us -= delta;
+ ready = (us <= 0);
+ if (!ready)
+ TSO_EVENT(tso) = (W_) us;
+ } else {
+ /* Looking at a wait event */
+ ready = FD_ISSET((-us), &rfd);
+ }
+ if (ready) {
+
+#if defined(GRAN)
+ if (ThreadQueueTl == Nil_closure)
+ ThreadQueueHd = tso;
+ else
+ TSO_LINK(ThreadQueueTl) = tso;
+ ThreadQueueTl = tso;
+ TSO_LINK(tso) = Nil_closure;
+#else
+ if (RunnableThreadsTl == Nil_closure)
+ RunnableThreadsHd = tso;
+ else
+ TSO_LINK(RunnableThreadsTl) = tso;
+ RunnableThreadsTl = tso;
+ TSO_LINK(tso) = Nil_closure;
+#endif
+ } else {
+ if (prev == NULL)
+ WaitingThreadsHd = tso;
+ else
+ TSO_LINK(prev) = tso;
+ prev = tso;
+ }
+ }
+ if (prev == NULL)
+ WaitingThreadsHd = WaitingThreadsTl = Nil_closure;
+ else {
+ TSO_LINK(prev) = Nil_closure;
+ WaitingThreadsTl = prev;
+ }
+}
+
+#endif /* CONCURRENT */
+\end{code}
diff --git a/ghc/runtime/main/Signals.lc b/ghc/runtime/main/Signals.lc
new file mode 100644
index 0000000000..3796f9965d
--- /dev/null
+++ b/ghc/runtime/main/Signals.lc
@@ -0,0 +1,588 @@
+%
+% (c) The AQUA Project, Glasgow University, 1995
+%
+%************************************************************************
+%* *
+\section[Signals.lc]{Signal Handlers}
+%* *
+%************************************************************************
+
+There are two particular signals that we find interesting in the RTS:
+segmentation faults (for cheap stack overflow checks) and virtual
+timer alarms (for profiling and thread context switching). POSIX
+compliance is supposed to make this kind of thing easy, but it
+doesn't. Expect every new target platform to require gory hacks to
+get this stuff to work.
+
+Then, there are the user-specified signal handlers to cope with.
+Since they're pretty rudimentary, they shouldn't actually cause as
+much pain.
+
+\begin{code}
+
+#include "platform.h"
+
+#if defined(sunos4_TARGET_OS)
+ /* The sigaction in SunOS 4.1.X does not grok SA_SIGINFO */
+# define NON_POSIX_SOURCE
+#endif
+
+#if defined(osf1_TARGET_OS)
+ /* The include files for OSF1 do not normally define SA_SIGINFO */
+# define _OSF_SOURCE 1
+#endif
+
+#if defined(linuxaout_TARGET_OS) || defined(linux_TARGET_OS)
+ /* I have no idea why this works (WDP 95/03) */
+# define _BSD_SOURCE 1
+#endif
+
+#include "rtsdefs.h"
+
+#if defined(HAVE_SYS_TYPES_H)
+# include <sys/types.h>
+#endif
+
+#if defined(HAVE_SIGNAL_H)
+# include <signal.h>
+#endif
+#if irix_TARGET_OS
+/* SIGVTALRM not avail w/ POSIX_SOURCE, but worse things happen without */
+/* SIGH: triple SIGH (WDP 95/07) */
+# define SIGVTALRM 28
+#endif
+
+#if defined(HAVE_SIGINFO_H)
+ /* DEC OSF1 seems to need this explicitly. Maybe others do as well? */
+# include <siginfo.h>
+#endif
+
+\end{code}
+
+%************************************************************************
+%* *
+\subsection{Stack-check by protected-memory-faulting}
+%* *
+%************************************************************************
+
+If we are checking stack overflow by page faulting, then we need to be
+able to install a @SIGSEGV@ handler, preferably one which can
+determine where the fault occurred, so that we can satisfy ourselves
+that it really was a stack overflow and not some random segmentation
+fault.
+
+\begin{code}
+
+#if STACK_CHECK_BY_PAGE_FAULT
+
+extern P_ stks_space; /* Where the stacks live, from SMstacks.lc */
+extern I_ SM_word_stk_size; /* How big they are (ditto) */
+
+\end{code}
+
+SunOS 4.x is too old to have @SA_SIGINFO@ as a flag to @sigaction@, so
+we use the older @signal@ call instead. This means that we also have
+to set up the handler to expect a different collection of arguments.
+Fun, eh?
+
+\begin{code}
+
+# if defined(sunos4_TARGET_OS) || defined(linuxaout_TARGET_OS) || defined(linux_TARGET_OS)
+
+static void
+segv_handler(sig, code, scp, addr)
+ int sig;
+ int code;
+ struct sigcontext *scp;
+ caddr_t addr;
+{
+ extern void StackOverflow(STG_NO_ARGS) STG_NORETURN;
+
+ if (addr >= (caddr_t) stks_space
+ && addr < (caddr_t) (stks_space + SM_word_stk_size))
+ StackOverflow();
+
+ fflush(stdout);
+ fprintf(stderr, "Segmentation fault caught, address = %lx\n", (W_) addr);
+ abort();
+}
+
+int
+install_segv_handler()
+{
+ return (int) signal(SIGSEGV, segv_handler) == -1;
+}
+
+# else /* Not SunOS 4 */
+
+# if defined(irix_TARGET_OS)
+ /* certainly BOGUS (WDP 94/05) -- copied from /usr/include/sys/siginfo.h */
+# define si_addr _data._fault._addr
+# endif
+
+static void
+segv_handler(sig, sip)
+ int sig;
+ siginfo_t *sip;
+{
+ fflush(stdout);
+ if (sip == NULL) {
+ fprintf(stderr, "Segmentation fault caught, address unknown\n");
+ } else {
+ if (sip->si_addr >= (caddr_t) stks_space
+ && sip->si_addr < (caddr_t) (stks_space + SM_word_stk_size))
+ StackOverflow();
+
+ fprintf(stderr, "Segmentation fault caught, address = %08lx\n", (W_) sip->si_addr);
+ }
+ abort();
+}
+
+int
+install_segv_handler()
+{
+ struct sigaction action;
+
+ action.sa_handler = segv_handler;
+ sigemptyset(&action.sa_mask);
+ action.sa_flags = SA_SIGINFO;
+ return sigaction(SIGSEGV, &action, NULL);
+}
+
+# endif /* not SunOS 4 */
+
+#endif /* STACK_CHECK_BY_PAGE_FAULT */
+
+\end{code}
+
+%************************************************************************
+%* *
+\subsection{Virtual-timer alarm (for profiling, etc.)}
+%* *
+%************************************************************************
+
+The timer interrupt is somewhat simpler, and we could probably use
+sigaction across the board, but since we have committed ourselves to
+the non-POSIX signal under SunOS 4.1.X, we adopt the same approach
+here.
+
+\begin{code}
+#if (defined(USE_COST_CENTRES) || defined(CONCURRENT)) && !defined(GRAN)
+
+# if defined(USE_COST_CENTRES)
+extern I_ heap_profiling_req;
+# endif
+
+# ifdef CONCURRENT
+
+# if defined(USE_COST_CENTRES) || defined(GUM)
+I_ contextSwitchTicks;
+I_ profilerTicks;
+# endif
+
+# ifdef PAR
+extern P_ CurrentTSO;
+# endif
+extern I_ contextSwitchTime;
+
+static void
+vtalrm_handler(sig)
+ int sig;
+{
+/*
+ For the parallel world, currentTSO is set if there is any work
+ on the current PE. In this case we DO want to context switch,
+ in case other PEs have sent us messages which must be processed.
+*/
+
+# if defined(USE_COST_CENTRES) || defined(GUM)
+ static I_ csTicks = 0, pTicks = 0;
+
+ if (time_profiling) {
+ if (++pTicks % profilerTicks == 0) {
+# if ! defined(USE_COST_CENTRES)
+ handle_tick_serial();
+# else
+ if (cc_profiling > 1 || heap_profiling_req != HEAP_NO_PROFILING)
+ handle_tick_serial();
+ else
+ handle_tick_noserial();
+# endif
+ }
+ if (++csTicks % contextSwitchTicks != 0)
+ return;
+ }
+# endif
+
+ if (WaitingThreadsHd != Nil_closure)
+ AwaitEvent(contextSwitchTime);
+
+# ifdef PAR
+ if (PendingSparksTl[REQUIRED_POOL] == PendingSparksLim[REQUIRED_POOL] ||
+ PendingSparksTl[ADVISORY_POOL] == PendingSparksLim[ADVISORY_POOL]) {
+ PruneSparks();
+ if (PendingSparksTl[REQUIRED_POOL] == PendingSparksLim[REQUIRED_POOL])
+ PendingSparksTl[REQUIRED_POOL] = PendingSparksBase[REQUIRED_POOL] +
+ SparkLimit[REQUIRED_POOL] / 2;
+ if (PendingSparksTl[ADVISORY_POOL] == PendingSparksLim[ADVISORY_POOL])
+ PendingSparksTl[ADVISORY_POOL] = PendingSparksBase[ADVISORY_POOL] +
+ SparkLimit[ADVISORY_POOL] / 2;
+ }
+
+ if (CurrentTSO != NULL ||
+# else
+ if (RunnableThreadsHd != Nil_closure ||
+# endif
+ PendingSparksHd[REQUIRED_POOL] < PendingSparksTl[REQUIRED_POOL] ||
+ PendingSparksHd[ADVISORY_POOL] < PendingSparksTl[ADVISORY_POOL]) {
+ /* ToDo: anything else for GRAN? WDP */
+ context_switch = 1;
+ }
+}
+
+# endif
+
+# if defined(sunos4_TARGET_OS) || defined(linuxaout_TARGET_OS) || defined(linux_TARGET_OS)
+
+int
+install_vtalrm_handler()
+{
+ void (*old)();
+
+# ifdef CONCURRENT
+ old = signal(SIGVTALRM, vtalrm_handler);
+# else
+ if (cc_profiling > 1 || heap_profiling_req != HEAP_NO_PROFILING)
+ old = signal(SIGVTALRM, handle_tick_serial);
+ else
+ old = signal(SIGVTALRM, handle_tick_noserial);
+# endif
+ return (int) old == -1;
+}
+
+static int vtalrm_mask;
+
+void
+blockVtAlrmSignal(STG_NO_ARGS)
+{
+ vtalrm_mask = sigblock(sigmask(SIGVTALRM));
+}
+
+void
+unblockVtAlrmSignal(STG_NO_ARGS)
+{
+ (void) sigsetmask(vtalrm_mask);
+}
+
+# else /* Not SunOS 4 */
+
+int
+install_vtalrm_handler(STG_NO_ARGS)
+{
+ struct sigaction action;
+
+# ifdef CONCURRENT
+ action.sa_handler = vtalrm_handler;
+# else
+ if (cc_profiling > 1 || heap_profiling_req != HEAP_NO_PROFILING)
+ action.sa_handler = handle_tick_serial;
+ else
+ action.sa_handler = handle_tick_noserial;
+# endif
+
+ sigemptyset(&action.sa_mask);
+ action.sa_flags = 0;
+
+ return sigaction(SIGVTALRM, &action, NULL);
+}
+
+void
+blockVtAlrmSignal(STG_NO_ARGS)
+{
+ sigset_t signals;
+
+ sigemptyset(&signals);
+ sigaddset(&signals, SIGVTALRM);
+
+ (void) sigprocmask(SIG_BLOCK, &signals, NULL);
+}
+
+void
+unblockVtAlrmSignal(STG_NO_ARGS)
+{
+ sigset_t signals;
+
+ sigemptyset(&signals);
+ sigaddset(&signals, SIGVTALRM);
+
+ (void) sigprocmask(SIG_UNBLOCK, &signals, NULL);
+}
+
+# endif /* SunOS 4 */
+
+#endif /* USE_COST_CENTRES || CONCURRENT (but not GRAN) */
+
+\end{code}
+
+Signal handling support for user-specified signal handlers. Since we
+need stable pointers to do this properly, we just refuse to try in the
+parallel world. Sorry.
+
+\begin{code}
+
+#ifdef PAR
+
+void
+blockUserSignals()
+{
+ return;
+}
+
+void
+unblockUserSignals()
+{
+ return;
+}
+
+I_
+# ifdef _POSIX_SOURCE
+sig_install(sig, spi, mask)
+ sigset_t *mask;
+# else
+ sig_install(sig, spi)
+# endif
+ I_ sig;
+ I_ spi;
+{
+ fflush(stdout);
+ fprintf(stderr,"No signal handling support in a parallel implementation.\n");
+ EXIT(EXIT_FAILURE);
+}
+
+#else /* !PAR */
+
+# include <setjmp.h>
+
+extern StgPtr deRefStablePointer PROTO((StgStablePtr));
+extern void freeStablePointer PROTO((I_));
+extern jmp_buf restart_main;
+
+static I_ *handlers = NULL; /* Dynamically grown array of signal handlers */
+static I_ nHandlers = 0; /* Size of handlers array */
+
+static void
+more_handlers(sig)
+ I_ sig;
+{
+ I_ i;
+
+ if (sig < nHandlers)
+ return;
+
+ if (handlers == NULL)
+ handlers = (I_ *) malloc((sig + 1) * sizeof(I_));
+ else
+ handlers = (I_ *) realloc(handlers, (sig + 1) * sizeof(I_));
+
+ if (handlers == NULL) {
+ fflush(stdout);
+ fprintf(stderr, "VM exhausted\n");
+ EXIT(EXIT_FAILURE);
+ }
+ for(i = nHandlers; i <= sig; i++)
+ /* Fill in the new slots with default actions */
+ handlers[i] = STG_SIG_DFL;
+
+ nHandlers = sig + 1;
+}
+
+# ifdef _POSIX_SOURCE
+
+static void
+generic_handler(sig)
+{
+ sigset_t signals;
+
+ SAVE_Hp = SAVE_HpLim; /* Just to be safe */
+ if (initStacks(&StorageMgrInfo) != 0) {
+ fflush(stdout);
+ fprintf(stderr, "initStacks failed!\n");
+ EXIT(EXIT_FAILURE);
+ }
+ TopClosure = deRefStablePointer(handlers[sig]);
+ sigemptyset(&signals);
+ sigaddset(&signals, sig);
+ sigprocmask(SIG_UNBLOCK, &signals, NULL);
+ longjmp(restart_main, sig);
+}
+
+static sigset_t userSignals;
+static sigset_t savedSignals;
+
+void
+initUserSignals()
+{
+ sigemptyset(&userSignals);
+}
+
+void
+blockUserSignals()
+{
+ sigprocmask(SIG_SETMASK, &userSignals, &savedSignals);
+}
+
+void
+unblockUserSignals()
+{
+ sigprocmask(SIG_SETMASK, &savedSignals, NULL);
+}
+
+
+I_ nocldstop = 0;
+
+I_
+sig_install(sig, spi, mask)
+ I_ sig;
+ I_ spi;
+ sigset_t *mask;
+{
+ sigset_t signals;
+ struct sigaction action;
+ I_ previous_spi;
+
+ /* Block the signal until we figure out what to do */
+ /* Count on this to fail if the signal number is invalid */
+ if(sig < 0 || sigemptyset(&signals) || sigaddset(&signals, sig) ||
+ sigprocmask(SIG_BLOCK, &signals, NULL))
+ return STG_SIG_ERR;
+
+ more_handlers(sig);
+
+ previous_spi = handlers[sig];
+
+ switch(spi) {
+ case STG_SIG_IGN:
+ handlers[sig] = STG_SIG_IGN;
+ sigdelset(&userSignals, sig);
+ action.sa_handler = SIG_IGN;
+ break;
+
+ case STG_SIG_DFL:
+ handlers[sig] = STG_SIG_DFL;
+ sigdelset(&userSignals, sig);
+ action.sa_handler = SIG_DFL;
+ break;
+ default:
+ handlers[sig] = spi;
+ sigaddset(&userSignals, sig);
+ action.sa_handler = generic_handler;
+ break;
+ }
+
+ if (mask != NULL)
+ action.sa_mask = *mask;
+ else
+ sigemptyset(&action.sa_mask);
+
+ action.sa_flags = sig == SIGCHLD && nocldstop ? SA_NOCLDSTOP : 0;
+ if (sigaction(sig, &action, NULL) || sigprocmask(SIG_UNBLOCK, &signals, NULL)) {
+ if (previous_spi)
+ freeStablePointer(handlers[sig]);
+ return STG_SIG_ERR;
+ }
+
+ return previous_spi;
+}
+
+# else /* !POSIX */
+
+static void
+generic_handler(sig)
+{
+ SAVE_Hp = SAVE_HpLim; /* Just to be safe */
+ if (initStacks(&StorageMgrInfo) != 0) {
+ fflush(stdout);
+ fprintf(stderr, "initStacks failed!\n");
+ EXIT(EXIT_FAILURE);
+ }
+ TopClosure = deRefStablePointer(handlers[sig]);
+ sigsetmask(0);
+ longjmp(restart_main, sig);
+}
+
+static int userSignals;
+static int savedSignals;
+
+void
+initUserSignals()
+{
+ userSignals = 0;
+}
+
+void
+blockUserSignals()
+{
+ savedSignals = sigsetmask(userSignals);
+}
+
+void
+unblockUserSignals()
+{
+ sigsetmask(savedSignals);
+}
+
+I_
+sig_install(sig, spi)
+ I_ sig;
+ I_ spi;
+{
+ I_ previous_spi;
+ int mask;
+ void (*handler)();
+
+ /* Block the signal until we figure out what to do */
+ /* Count on this to fail if the signal number is invalid */
+ if(sig < 0 || (mask = sigmask(sig)) == 0)
+ return STG_SIG_ERR;
+
+ mask = sigblock(mask);
+
+ more_handlers(sig);
+
+ previous_spi = handlers[sig];
+
+ switch(spi) {
+ case STG_SIG_IGN:
+ handlers[sig] = STG_SIG_IGN;
+ userSignals &= ~sigmask(sig);
+ handler = SIG_IGN;
+ break;
+
+ case STG_SIG_DFL:
+ handlers[sig] = STG_SIG_DFL;
+ userSignals &= ~sigmask(sig);
+ handler = SIG_DFL;
+ break;
+ default:
+ handlers[sig] = spi;
+ userSignals |= sigmask(sig);
+ handler = generic_handler;
+ break;
+ }
+
+ if (signal(sig, handler) < 0) {
+ if (previous_spi)
+ freeStablePointer(handlers[sig]);
+ sigsetmask(mask);
+ return STG_SIG_ERR;
+ }
+
+ sigsetmask(mask);
+ return previous_spi;
+}
+
+# endif /* POSIX */
+
+#endif /* PAR */
+
+\end{code}
diff --git a/ghc/runtime/main/StgOverflow.lc b/ghc/runtime/main/StgOverflow.lc
new file mode 100644
index 0000000000..720f243f58
--- /dev/null
+++ b/ghc/runtime/main/StgOverflow.lc
@@ -0,0 +1,450 @@
+\section[stk-overflow]{Stack overflow routine}
+
+%************************************************************************
+%* *
+\subsection[arity-error]{Arity error has nothing to do with stack overflow}
+%* *
+%************************************************************************
+
+\begin{code}
+
+#include "rtsdefs.h"
+
+extern void PrintRednCountInfo(STG_NO_ARGS);
+extern I_ showRednCountStats;
+
+#ifdef __DO_ARITY_CHKS__
+I_ ExpectedArity;
+
+void
+ArityError(n)
+ I_ n;
+{
+ fflush(stdout);
+ fprintf(stderr, "Arity error: called with %ld args, should have been %ld\n",
+ ExpectedArity, n);
+
+#if defined(DO_REDN_COUNTING)
+ if (showRednCountStats) {
+ PrintRednCountInfo();
+ }
+#endif
+
+ EXIT(EXIT_FAILURE);
+}
+
+#endif /* __DO_ARITY_CHECKS__ */
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[stk-oflow-seq]{Boring sequential stack overflow}
+%* *
+%************************************************************************
+
+\begin{code}
+#ifndef CONCURRENT
+
+void
+StackOverflow(STG_NO_ARGS)
+{
+ fflush(stdout);
+ StackOverflowHook(SM_word_stk_size * sizeof(W_)); /*msg*/
+
+#if defined(DO_REDN_COUNTING)
+ if (showRednCountStats) {
+ PrintRednCountInfo();
+ }
+#endif
+
+ EXIT(EXIT_FAILURE);
+}
+#endif
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[stk-squeeze]{Code for squeezing out update frames}
+%* *
+%************************************************************************
+
+Code for squeezing out vacuous update frames. Updatees of squeezed frames
+are turned into indirections to the common black hole (or blocking queue).
+
+\begin{code}
+
+I_ squeeze_upd_frames = 1; /* now ON by default */
+
+I_
+SqueezeUpdateFrames(bottom, top, frame)
+P_ bottom;
+P_ top;
+P_ frame;
+{
+ I_ displacement = 0;
+ P_ next_frame = NULL; /* Temporally next */
+ P_ prev_frame; /* Temporally previous */
+
+ /*
+ * If we have no update frames, there is nothing to do.
+ */
+
+ if (frame <= bottom)
+ return 0;
+
+ if ((prev_frame = GRAB_SuB(frame)) <= bottom) {
+#if !defined(CONCURRENT) && defined(SM_DO_BH_UPDATE)
+ if (!noBlackHoles)
+ UPD_BH(GRAB_UPDATEE(frame), BH_UPD_info);
+#endif
+ return 0;
+ }
+
+ /*
+ * Walk down the stack, reversing the SuB pointers so that we can walk back up
+ * as we squeeze from the bottom. Note that next_frame and prev_frame refer to
+ * next and previous as they were added to the stack, rather than the way we see
+ * them in this walk. (It makes the next loop less confusing.)
+ */
+
+ while (prev_frame > bottom) {
+ PUSH_SuB(frame, next_frame);
+ next_frame = frame;
+ frame = prev_frame;
+ prev_frame = GRAB_SuB(frame);
+ }
+
+ /*
+ * Now, we're at the bottom. Frame points to the lowest update frame on the
+ * stack, and its saved SuB actually points to the frame above. We have to walk
+ * back up the stack, squeezing out empty update frames and turning the pointers
+ * back around on the way back up.
+ */
+
+ /*
+ * The bottom-most frame has not been altered, and we never want to eliminate it
+ * anyway. Just black hole the updatee and walk one step up
+ * before starting to squeeze. When you get to the topmost frame,
+ * remember that there are still some words above it that might
+ * have to be moved.
+ */
+
+#if !defined(CONCURRENT) && defined(SM_DO_BH_UPDATE)
+ if (!noBlackHoles)
+ UPD_BH(GRAB_UPDATEE(frame), BH_UPD_info);
+#endif
+ prev_frame = frame;
+ frame = next_frame;
+
+ /*
+ * Loop through all of the middle frames (everything except the very
+ * bottom and the very top).
+ */
+ while ((next_frame = GRAB_SuB(frame)) != NULL) {
+ P_ sp;
+ P_ frame_bottom = frame + BREL(STD_UF_SIZE);
+
+ /* Check to see if the current frame is empty (both A and B) */
+ if (prev_frame == frame_bottom + BREL(displacement) &&
+ GRAB_SuA(next_frame) == GRAB_SuA(frame)) {
+
+ /* Now squeeze out the current frame */
+ P_ updatee_keep = GRAB_UPDATEE(prev_frame);
+ P_ updatee_bypass = GRAB_UPDATEE(frame);
+
+ /*
+ fprintf(stderr, "squeezing frame at %lx, ret %lx\n", frame,
+ GRAB_RET(frame));
+ */
+
+#ifdef CONCURRENT
+ /* Check for a blocking queue on the node that's going away */
+ if (INFO_PTR(updatee_bypass) == (W_) BQ_info) {
+ /* Sigh. It has one. Don't lose those threads! */
+ if (INFO_PTR(updatee_keep) == (W_) BQ_info) {
+ /* Urgh. Two queues. Merge them. */
+ P_ tso = (P_) BQ_ENTRIES(updatee_keep);
+
+ while (TSO_LINK(tso) != Nil_closure)
+ tso = TSO_LINK(tso);
+
+ TSO_LINK(tso) = (P_) BQ_ENTRIES(updatee_bypass);
+ } else {
+ /* For simplicity, just swap the BQ for the BH */
+ P_ temp = updatee_keep;
+
+ updatee_keep = updatee_bypass;
+ updatee_bypass = temp;
+
+ /* Record the swap in the kept frame (below) */
+ PUSH_UPDATEE(prev_frame, updatee_keep);
+ }
+ }
+#endif
+
+ UPD_EXISTING(); /* ticky stuff (NB: nothing for spat-profiling) */
+ UPD_IND(updatee_bypass, updatee_keep);
+
+ sp = frame - BREL(1); /* Toss the current frame */
+ displacement += STD_UF_SIZE;
+
+ } else {
+#if !defined(CONCURRENT) && defined(SM_DO_BH_UPDATE)
+ if (!noBlackHoles)
+ UPD_BH(GRAB_UPDATEE(frame), BH_UPD_info);
+#endif
+
+ /* No squeeze for this frame */
+ sp = frame_bottom - BREL(1); /* Keep the current frame */
+
+ /* Fix the SuB in the current frame (should point to the frame below) */
+ PUSH_SuB(frame, prev_frame);
+ }
+
+ /* Now slide all words from sp up to the next frame */
+
+ if (displacement > 0) {
+ P_ next_frame_bottom = next_frame + BREL(STD_UF_SIZE);
+
+ /*
+ fprintf(stderr, "sliding [%lx, %lx] by %d\n", sp, next_frame_bottom,
+ displacement);
+ */
+
+ while (sp <= next_frame_bottom) {
+ sp[BREL(displacement)] = *sp;
+ sp -= BREL(1);
+ }
+ }
+ prev_frame = frame + BREL(displacement);
+ frame = next_frame;
+ }
+
+ /*
+ * Now handle the topmost frame. Patch SuB, black hole the updatee,
+ * and slide down.
+ */
+
+ PUSH_SuB(frame, prev_frame);
+
+#if !defined(CONCURRENT) && defined(SM_DO_BH_UPDATE)
+ if (!noBlackHoles)
+ UPD_BH(GRAB_UPDATEE(frame), BH_UPD_info);
+#endif
+
+ if (displacement > 0) {
+ P_ sp = frame + BREL(STD_UF_SIZE) - BREL(1);
+
+ /*
+ fprintf(stderr, "sliding [%lx, %lx] by %d\n", sp, top, displacement);
+ */
+
+ while (sp <= top) {
+ sp[BREL(displacement)] = *sp;
+ sp -= BREL(1);
+ }
+ }
+ return displacement;
+}
+
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[stk-ouflow-par]{Rather exciting parallel stack overflow and underflow}
+%* *
+%************************************************************************
+
+\begin{code}
+#ifdef CONCURRENT
+\end{code}
+
+StackOverflow: called inside a nice ``callwrapper'' when stack
+overflow occurs. The state is already saved in the TSO, and the stack
+is in a tidy saved state.
+
+\begin{code}
+EXTDATA_RO(StkO_info); /* boring extern decl */
+EXTFUN(EnterNodeCode); /* For reentering node after potential GC */
+
+#ifdef PAR
+EXTDATA_RO(FetchMe_info);
+#endif
+
+I_
+StackOverflow(args1, args2)
+W_ args1;
+W_ args2;
+{
+ I_ i;
+ P_ old_stko, new_stko;
+ W_ headroom = STACK_OVERFLOW_HEADROOM(args1, args2);
+ I_ cts_size;
+
+#ifdef PAR
+ W_ is_prim_return = STACK_OVERFLOW_PRIM_RETURN(args1, args2);
+#endif
+ W_ reenter = STACK_OVERFLOW_REENTER(args1, args2);
+ W_ words_of_a = STACK_OVERFLOW_AWORDS(args1, args2);
+ W_ words_of_b = STACK_OVERFLOW_BWORDS(args1, args2);
+ W_ liveness = STACK_OVERFLOW_LIVENESS(args1, args2);
+ I_ really_reenter_node = 0;
+
+ SET_TASK_ACTIVITY(ST_OVERHEAD);
+
+
+ /*
+ * fprintf(stderr,"StackOverflow:liveness=%lx,a=%lx,b=%lx\n",
+ * liveness,words_of_a,words_of_b);
+ */
+
+ old_stko = SAVE_StkO;
+
+ /*
+ * fprintf(stderr, "SpA %lx SuA %lx SpB %lx SuB %lx\n", STKO_SpA(old_stko),
+ * STKO_SuA(old_stko), STKO_SpB(old_stko), STKO_SuB(old_stko));
+ */
+
+ if (squeeze_upd_frames) {
+ i = SqueezeUpdateFrames(STKO_BSTK_BOT(old_stko), STKO_SpB(old_stko),
+ STKO_SuB(old_stko));
+ STKO_SuB(old_stko) += BREL(i);
+ STKO_SpB(old_stko) += BREL(i);
+ if ((P_) STKO_SpA(old_stko) - AREL(headroom) > STKO_SpB(old_stko)) {
+
+ /*
+ * fprintf(stderr, "SpA %lx SpB %lx headroom %d\n", STKO_SpA(old_stko),
+ * STKO_SpB(old_stko), headroom);
+ */
+
+ /* We saved enough space to continue on the old StkO */
+ return 0;
+ }
+ }
+ SAVE_Liveness = liveness;
+
+ /* Double the stack chunk size each time we grow the stack */
+ cts_size = STKO_CLOSURE_CTS_SIZE(old_stko) * 2;
+
+ if (SAVE_Hp + STKO_HS + cts_size > SAVE_HpLim) {
+ if (reenter) {
+ /*
+ * Even in the uniprocessor world, we may have to reenter node in case
+ * node is a selector shorted out by GC.
+ */
+ assert(liveness & LIVENESS_R1);
+ TSO_PC2(CurrentTSO) = EnterNodeCode;
+ really_reenter_node = 1;
+ }
+ ReallyPerformThreadGC(STKO_HS + cts_size, rtsFalse);
+ old_stko = SAVE_StkO;
+ }
+ ALLOC_STK(STKO_HS, cts_size, 0);
+ new_stko = SAVE_Hp + 1;
+ SAVE_Hp += STKO_HS + cts_size;
+ SET_STKO_HDR(new_stko, StkO_info, CCC);
+
+ /* Initialize the StkO, as in NewThread */
+ STKO_SIZE(new_stko) = cts_size + STKO_VHS;
+ STKO_SpB(new_stko) = STKO_SuB(new_stko) = STKO_BSTK_BOT(new_stko) + BREL(1);
+ STKO_SpA(new_stko) = STKO_SuA(new_stko) = STKO_ASTK_BOT(new_stko) + AREL(1);
+ STKO_LINK(new_stko) = old_stko;
+
+ STKO_RETURN(new_stko) = SAVE_Ret;
+
+#ifdef PAR
+
+ /*
+ * When we fall off of the top stack segment, we will either be
+ * returning an algebraic data type, in which case R2 holds a
+ * valid info ptr, or we will be returning a primitive
+ * (e.g. int#), in which case R2 is garbage. If we need to perform
+ * GC to pull in the lower stack segment (this should only happen
+ * because of task migration), then we need to know the register
+ * liveness for the algebraic returns. We get the liveness out of
+ * the info table. Now, we could set up the primitive returns
+ * with a bogus infoptr, which has a NO_LIVENESS field in the info
+ * table, but that would involve a lot more overhead than the
+ * current approach. At present, we set up RetReg to point to
+ * *either* a polymorphic algebraic return point, or a primitive
+ * return point.
+ */
+
+ SAVE_Ret = is_prim_return ? (P_) PrimUnderflow : (P_) vtbl_Underflow;
+#else
+ SAVE_Ret = (P_) vtbl_Underflow;
+#endif
+
+ STKO_SpA(old_stko) += AREL(words_of_a);
+ STKO_SpB(old_stko) += BREL(words_of_b);
+
+#ifdef DO_REDN_COUNTING
+ /* Record the stack depths in chunks below the new stack object */
+
+ STKO_ADEP(new_stko) = STKO_ADEP(old_stko) +
+ AREL((I_) STKO_ASTK_BOT(old_stko) - (I_) STKO_SpA(old_stko));
+ STKO_BDEP(new_stko) = STKO_BDEP(old_stko) +
+ BREL((I_) STKO_BSTK_BOT(old_stko) - (I_) STKO_SpB(old_stko));
+#endif
+
+ if (STKO_SpB(old_stko) < STKO_BSTK_BOT(old_stko)) {
+
+ /*
+ * This _should_ only happen if PAP_entry fails a stack check and there is
+ * no update frame on the current stack. We can deal with this by storing a
+ * function's argument requirements in its info table, peering into the PAP
+ * (it had better be in R1) for the function pointer and taking only the
+ * necessary number of arguments, but this would be hard, so we haven't done
+ * it.
+ */
+ fflush(stdout);
+ fprintf(stderr, "StackOverflow too deep. Probably a PAP with no update frame.\n");
+ abort(); /* an 'abort' may be overkill WDP 95/04 */
+ }
+ /* Move A stack words from old StkO to new StkO */
+ for (i = 1; i <= words_of_a; i++) {
+ STKO_SpA(new_stko)[-AREL(i)] = STKO_SpA(old_stko)[-AREL(i)];
+ }
+ STKO_SpA(new_stko) -= AREL(words_of_a);
+
+ /* Move B stack words from old StkO to new StkO */
+ for (i = 1; i <= words_of_b; i++) {
+ STKO_SpB(new_stko)[-BREL(i)] = STKO_SpB(old_stko)[-BREL(i)];
+ }
+ STKO_SpB(new_stko) -= BREL(words_of_b);
+
+ /* Now, handle movement of a single update frame */
+ /* ToDo: Make this more efficient. (JSM) */
+ if (STKO_SpB(old_stko) < STKO_SuB(old_stko)) {
+ /* Yikes! PAP_entry stole an update frame. Fix the world! */
+ P_ frame = STKO_SuB(new_stko) - BREL(STD_UF_SIZE);
+
+ /*
+ * fprintf(stderr, "Stolen update frame: (old %lx, new %lx) SuA %lx, SuB
+ * %lx, return %lx\n", old_stko, new_stko, GRAB_SuA(frame), GRAB_SuB(frame),
+ * GRAB_RET(frame));
+ */
+
+ STKO_SuA(old_stko) = GRAB_SuA(frame);
+ STKO_SuB(old_stko) = GRAB_SuB(frame);
+
+ SAVE_Ret = STKO_RETURN(new_stko);
+ STKO_RETURN(new_stko) = GRAB_RET(frame);
+
+ PUSH_SuA(frame, STKO_SuA(new_stko));
+ PUSH_SuB(frame, STKO_SuB(new_stko));
+ PUSH_RET(frame, vtbl_Underflow);
+
+ STKO_SuB(new_stko) = frame;
+ }
+ SAVE_StkO = new_stko;
+ return really_reenter_node;
+}
+\end{code}
+
+Underflow things are all done in the threaded world. The code is in
+main/StgThreads.lhc.
+
+\begin{code}
+#endif /* parallel */
+\end{code}
diff --git a/ghc/runtime/main/StgStartup.lhc b/ghc/runtime/main/StgStartup.lhc
new file mode 100644
index 0000000000..9728711982
--- /dev/null
+++ b/ghc/runtime/main/StgStartup.lhc
@@ -0,0 +1,662 @@
+%/****************************************************************
+%* *
+%* Basic Continuations required by the STG Machine runtime *
+%* *
+%****************************************************************/
+
+
+First continuation called by the mini-interpreter is
+evaluateTopClosure. It has to set up return and jump to the user's
+@main@ closure. If @errorIO@ is called, we will be back here, doing
+the same thing for the specified continuation.
+
+\begin{code}
+#define MAIN_REG_MAP /* STG world */
+#include "rtsdefs.h"
+
+#if 0
+#ifdef PAR
+#include "Statistics.h"
+#endif
+#endif
+
+/* ptr to the user's "main" closure (or "errorIO" arg closure),
+ to which we hope to be linked
+*/
+extern P_ TopClosure;
+
+EXTFUN(stopThreadDirectReturn);
+UNVECTBL(,vtbl_stopStgWorld,stopThreadDirectReturn)
+
+/* Well, we have to put the ArrayOfData and ArrayOfPtrs info tables
+ somewhere...
+*/
+
+/* Array of data -- mutable */
+STATICFUN(ArrayOfData_entry)
+{
+ FB_
+ /* Don't wrap the calls; we're done with STG land */
+ fflush(stdout);
+ fprintf(stderr, "Entered a primitive array (of data)---this shouldn't happen!\n");
+ abort();
+ FE_
+}
+
+DATA_ITBL(ArrayOfData_info,ArrayOfData_entry,UpdErr,0,INFO_OTHER_TAG,,,const,IF_,ARR_K,"DATA-ARRAY","ARRAY");
+/* ToDo: could put a useful tag in there!!! */
+
+/* Array of pointers -- mutable */
+STATICFUN(ArrayOfPtrs_entry)
+{
+ FB_
+ /* Don't wrap the calls; we're done with STG land */
+ fflush(stdout);
+ fprintf(stderr, "Entered a primitive array (of pointers)---this shouldn't happen!\n");
+ abort();
+ FE_
+}
+
+MUTUPLE_ITBL(ArrayOfPtrs_info,ArrayOfPtrs_entry,UpdErr,0,INFO_OTHER_TAG,,,const,IF_,ARR_K,"PTR-ARRAY(mut)","ARRAY");
+/* ToDo: could put a useful tag in there!!! */
+
+STATICFUN(FullSVar_entry)
+{
+ FB_
+ /* Don't wrap the calls; we're done with STG land */
+ fflush(stdout);
+ fprintf(stderr, "Entered a full SVar---this shouldn't happen!\n");
+ abort();
+ FE_
+}
+
+MUTUPLE_ITBL(FullSVar_info,FullSVar_entry,UpdErr,0,INFO_OTHER_TAG,,,const,IF_,ARR_K,"FullSVar","ARRAY");
+/* ToDo: could put a useful tag in there!!! */
+
+STATICFUN(EmptySVar_entry)
+{
+ FB_
+ /* Don't wrap the calls; we're done with STG land */
+ fflush(stdout);
+ fprintf(stderr, "Entered an empty SVar---this shouldn't happen!\n");
+ abort();
+ FE_
+}
+
+MUTUPLE_ITBL(EmptySVar_info,EmptySVar_entry,UpdErr,0,INFO_OTHER_TAG,,,const,IF_,ARR_K,"EmptySVar","ARRAY");
+/* ToDo: could put a useful tag in there!!! */
+
+/* Array of pointers -- immutable */
+STATICFUN(ImMutArrayOfPtrs_entry)
+{
+ FB_
+ /* Don't wrap the calls; we're done with STG land */
+ fflush(stdout);
+ fprintf(stderr, "Entered a primitive array (immutable, pointers)---this shouldn't happen!\n");
+ abort();
+ FE_
+}
+
+IMMUTUPLE_ITBL(ImMutArrayOfPtrs_info,ImMutArrayOfPtrs_entry,UpdErr,0,INFO_OTHER_TAG,,,const,IF_,ARR_K,"PTR-ARRAY(immut)","ARRAY");
+/* ToDo: could put a useful tag in there!!! */
+
+/* (end of Array whatnot) */
+
+/* Question for Will: There seem to be a lot of these static things
+now - worth putting them in a file by themselves?? [ADR] */
+
+
+#ifndef PAR
+
+/* Ditto for Malloc Pointer entry point and info tables. [ADR]
+
+ BTW Will, I copied most of this blindly from above - what's with
+ this TAG stuff? And what kind of description/ type is wanted here?
+*/
+
+STATICFUN(MallocPtr_entry)
+{
+ FB_
+ /* Don't wrap the calls; we're done with STG land */
+ fflush(stdout);
+ fprintf(stderr, "Compiler bug: Entered a Malloc Pointer---this shouldn't happen!\n");
+ abort();
+ FE_
+}
+
+MallocPtr_ITBL(MallocPtr_info,MallocPtr_entry,UpdErr,0,INFO_OTHER_TAG,,,const,EF_,MallocPtr_K,"MALLOC PTR","MallocPtr");
+
+/* End of MallocPtr stuff */
+
+/* Ditto for the unused Stable Pointer info table. [ADR]
+*/
+
+extern void raiseError PROTO((StgStablePtr));
+extern StgStablePtr errorHandler;
+
+/* Unused Stable Pointer (ie unused slot in a stable pointer table) */
+STATICFUN(UnusedSP_entry)
+{
+ FB_
+ (void) SAFESTGCALL1(I_,(void *, FILE *),fflush,stdout);
+ (void) SAFESTGCALL2(I_,(void *, FILE *, char *),fprintf,stderr, "Entered an unused Stable Pointer---this shouldn't happen!\n(This could be program error (using stable pointer after freeing) or compiler bug.)\n");
+
+ (void) STGCALL1(void,(void *, StgStablePtr), raiseError, errorHandler);
+ FE_
+}
+
+STATIC_ITBL(UnusedSP_static_info,UnusedSP_entry,UpdErr,0,INFO_OTHER_TAG,0,0,const,IF_,CON_K,"UNUSED STABLE PTR","USP");
+
+SET_STATIC_HDR(UnusedSP_closure,UnusedSP_static_info,CC_SUBSUMED,,ED_RO_)
+};
+
+/* Entry point and Info table for Stable Pointer Table. */
+
+STATICFUN(StablePointerTable_entry)
+{
+ FB_
+ /* Don't wrap the calls; we're done with STG land */
+ fflush(stdout);
+ fprintf(stderr, "Entered the stable pointer table---this shouldn't happen!\n");
+ abort();
+ FE_
+}
+
+STATIC_ITBL(EmptyStablePointerTable_static_info,StablePointerTable_entry,UpdErr,0,INFO_OTHER_TAG,0,0,const,IF_,SPT_K,"SP_TABLE","SP_TABLE");
+/* ToDo: could put a useful tag in there!!! */
+
+DYN_ITBL(StablePointerTable_info,StablePointerTable_entry,UpdErr,0,INFO_OTHER_TAG,0,0,const,IF_,SPT_K,"SP_TABLE","SP_TABLE");
+/* ToDo: could put a useful tag in there!!! */
+
+
+/* To ease initialisation of the heap, we start with an empty stable
+ pointer table. When we try to create the first stable pointer, the
+ overflow will trigger creation of a table of useful size.
+*/
+
+SET_STATIC_HDR(EmptySPTable_closure,EmptyStablePointerTable_static_info,CC_SUBSUMED,,ED_RO_)
+, (W_) DYN_VHS + 0 + 1 + 0 /* size = DYN_VHS + n + 1 + n with n = 0 */
+, (W_) 0 /* number of ptrs */
+, (W_) 0 /* top of stack */
+};
+
+/* End of SP stuff */
+#endif /* !PAR */
+
+
+/* the IoWorld token to start the whole thing off */
+/* Question: this is just an amusing hex code isn't it
+ -- or does it mean something? ADR */
+P_ realWorldZh_closure = (P_) 0xbadbadbaL;
+
+SET_STATIC_HDR(WorldStateToken_closure,SZh_static_info,CC_SUBSUMED/*harmless*/,,ED_RO_)
+, (W_) 0xbadbadbaL
+};
+
+#ifndef CONCURRENT
+
+STGFUN(startStgWorld)
+{
+ FB_
+ /* At this point we are in the threaded-code world.
+
+ TopClosure points to a closure of type PrimIO (), which should be
+ performed (by applying it to the state of the world).
+
+ The smInfo storage-management info block is assumed to be
+ up to date, and is used to load the STG registers.
+ */
+
+#if defined (DO_SPAT_PROFILING)
+ SET_ACTIVITY(ACT_REDN); /* init: do this first, so we count the restore insns */
+#endif
+
+ RestoreAllStgRegs(); /* inline! */
+
+ /* ------- STG registers are now valid! -------------------------*/
+
+ /* Put a suitable return address on the B stack */
+ RetReg = (StgRetAddr) UNVEC(stopThreadDirectReturn,vtbl_stopStgWorld);
+
+ /* Put an IoWorld token on the A stack */
+ SpA -= AREL(1);
+ *SpA = (P_) WorldStateToken_closure;
+
+ Node = (P_) TopClosure; /* Point to the closure for main/errorIO-arg */
+ ENT_VIA_NODE();
+ InfoPtr=(D_)(INFO_PTR(Node));
+ JMP_(ENTRY_CODE(InfoPtr));
+ FE_
+}
+#endif /* ! CONCURRENT */
+
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[thread-return]{Polymorphic end-of-thread code}
+%* *
+%************************************************************************
+
+\begin{code}
+
+/*
+ Here's the polymorphic return for the end of a thread.
+
+ NB: For direct returns to work properly, the name of the routine must be
+ the same as the name of the vector table with vtbl_ removed and DirectReturn
+ appended. This is all the mangler understands.
+*/
+
+const W_
+vtbl_stopThread[] = {
+ /* at least "MAX_VECTORED_RTN" elements (see GhcConstants.lh) */
+ (W_) stopThreadDirectReturn,
+ (W_) stopThreadDirectReturn,
+ (W_) stopThreadDirectReturn,
+ (W_) stopThreadDirectReturn,
+ (W_) stopThreadDirectReturn,
+ (W_) stopThreadDirectReturn,
+ (W_) stopThreadDirectReturn,
+ (W_) stopThreadDirectReturn
+};
+
+STGFUN(stopThreadDirectReturn)
+{
+ FB_
+ /* The final exit.
+
+ The top-top-level closures (e.g., "main") are of type "IO ()".
+ When entered, they perform an IO action and return a () --
+ essentially, TagReg is set to 1. Here, we don't need to do
+ anything with that.
+
+ We just tidy up the register stuff (real regs in *_SAVE, then
+ *_SAVE -> smInfo locs).
+ */
+
+#ifdef CONCURRENT
+ SET_TASK_ACTIVITY(ST_OVERHEAD);
+#endif
+
+ SaveAllStgRegs(); /* inline! */
+
+#ifdef CONCURRENT
+ EndThread();
+#else
+ RESUME_(miniInterpretEnd);
+#endif
+ FE_
+}
+
+\end{code}
+
+\begin{code}
+I_ ErrorIO_call_count = 0;
+
+#ifdef CONCURRENT
+EXTFUN(EnterNodeCode);
+
+STGFUN(ErrorIO_innards)
+ /* Assumes that "TopClosure" has been set already */
+{
+ FB_
+ if (ErrorIO_call_count >= 16 /* MAGIC CONSTANT */ ) {
+ /* Don't wrap the calls; we're done with STG land */
+ fflush(stdout);
+ fprintf(stderr, "too many nested calls to `error'\n");
+ EXIT(EXIT_FAILURE);
+ }
+ ErrorIO_call_count++; /* NB: undo later if decide to let someone else handle it */
+
+ /* Unlock all global closures held by this thread! (ToDo) --JSM */
+
+ switch(TSO_TYPE(CurrentTSO)) {
+ case T_MAIN:
+ /* Re-initialize stack pointers (cf. NewThread) */
+#ifdef PAR
+ SpB = SuB = STKO_BSTK_BOT(StkOReg) + BREL(1);
+ SuA = STKO_ASTK_BOT(StkOReg) + AREL(1);
+#else
+ SuA = stackInfo.botA + AREL(1);
+ SuB = stackInfo.botB + BREL(1);
+#endif
+ break;
+
+ case T_REQUIRED:
+ /* Re-initialize stack pointers (cf. NewThread) */
+ SpB = SuB = STKO_BSTK_BOT(StkOReg) + BREL(1);
+ SuA = STKO_ASTK_BOT(StkOReg) + AREL(1);
+ break;
+
+ case T_ADVISORY:
+ ErrorIO_call_count--; /* undo the damage, as someone else will deal with it */
+ /* Let the main thread eventually handle it */
+ JMP_(stopThreadDirectReturn);
+
+ case T_FAIL:
+ EXIT(EXIT_FAILURE);
+
+ default:
+ /* Don't wrap the calls; we're done with STG land */
+ fflush(stdout);
+ fprintf(stderr,"ErrorIO: %x unknown\n", TSO_TYPE(CurrentTSO));
+ EXIT(EXIT_FAILURE);
+ }
+
+ /* Finish stack setup as if for a top-level task and enter the error node */
+
+ SpA = SuA - AREL(1);
+
+ *SpA = (P_) WorldStateToken_closure;
+
+ STKO_LINK(StkOReg) = Nil_closure;
+ STKO_RETURN(StkOReg) = NULL;
+
+#ifdef DO_REDN_COUNTING
+ STKO_ADEP(StkOReg) = STKO_BDEP(StkOReg) = 0;
+#endif
+
+ /* Go! */
+ Node = (P_) TopClosure;
+ RetReg = (StgRetAddr) UNVEC(stopThreadDirectReturn,vtbl_stopStgWorld);
+ JMP_(EnterNodeCode);
+
+ FE_
+}
+\end{code}
+
+We cannot afford to call @error@ too many times
+(e.g., \tr{error x where x = error x}), so we keep count.
+
+\begin{code}
+#else /* !CONCURRENT */
+
+StgFunPtr
+ErrorIO_innards(STG_NO_ARGS)
+ /* Assumes that "TopClosure" has been set already */
+{
+ FB_
+ if (ErrorIO_call_count >= 16 /* MAGIC CONSTANT */ ) {
+ /* Don't wrap the calls; we're done with STG land */
+ fflush(stdout);
+ fprintf(stderr, "too many nested calls to `error'\n");
+ EXIT(EXIT_FAILURE);
+ }
+ ErrorIO_call_count++;
+
+ /* Copy the heap-related registers into smInfo. (Other registers get
+ saved in this process, but we aren't interested in them.)
+
+ Get a new stack (which re-initialises the smInfo stack stuff),
+ and start the world again.
+ */
+ /* ToDo: chk this has been handled in parallel world */
+
+ SaveAllStgRegs(); /* inline! */
+
+ if ( initStacks( &StorageMgrInfo ) != 0) {
+ /* Don't wrap the calls; we're done with STG land */
+ fflush(stdout);
+ fprintf(stderr, "initStacks failed!\n");
+ EXIT(EXIT_FAILURE);
+ }
+
+ JMP_( startStgWorld );
+ FE_
+}
+
+#endif /* !CONCURRENT */
+\end{code}
+
+\begin{code}
+#ifdef PAR
+
+STATICFUN(RBH_Save_0_entry)
+{
+ FB_
+ fprintf(stderr,"Oops, entered an RBH save\n");
+ EXIT(EXIT_FAILURE);
+ FE_
+}
+
+STATICFUN(RBH_Save_1_entry)
+{
+ FB_
+ fprintf(stderr,"Oops, entered an RBH save\n");
+ EXIT(EXIT_FAILURE);
+ FE_
+}
+
+STATICFUN(RBH_Save_2_entry)
+{
+ FB_
+ fprintf(stderr,"Oops, entered an RBH save\n");
+ EXIT(EXIT_FAILURE);
+ FE_
+}
+
+SPEC_N_ITBL(RBH_Save_0_info,RBH_Save_0_entry,UpdErr,0,INFO_OTHER_TAG,2,0,,IF_,INTERNAL_KIND,"RBH-SAVE","RBH_Save_0");
+SPEC_N_ITBL(RBH_Save_1_info,RBH_Save_1_entry,UpdErr,0,INFO_OTHER_TAG,2,1,,IF_,INTERNAL_KIND,"RBH-SAVE","RBH_Save_1");
+SPEC_N_ITBL(RBH_Save_2_info,RBH_Save_2_entry,UpdErr,0,INFO_OTHER_TAG,2,2,,IF_,INTERNAL_KIND,"RBH-SAVE","RBH_Save_2");
+
+#endif /* PAR */
+\end{code}
+
+
+%/****************************************************************
+%* *
+%* Other Bits and Pieces *
+%* *
+%****************************************************************/
+
+\begin{code}
+/* If we don't need the slow entry code for a closure, we put in a
+ pointer to this in the closure's slow entry code pointer instead.
+ */
+
+STGFUN(__std_entry_error__) {
+ FB_
+ /* Don't wrap the calls; we're done with STG land */
+ fflush(stdout);
+ fprintf(stderr, "Called non-existent slow-entry code!!!\n");
+ abort();
+ JMP_(0);
+ FE_
+}
+
+/* entry code */
+STGFUN(STK_STUB_entry) {
+ FB_
+ /* Don't wrap the calls; we're done with STG land */
+ fflush(stdout);
+ fprintf(stderr, "Entered from a stubbed stack slot!\n");
+ abort();
+ JMP_(0);
+ FE_
+}
+
+/* info table */
+STATIC_ITBL(STK_STUB_static_info,STK_STUB_entry,UpdErr,0,INFO_OTHER_TAG,0,0,const,EF_,INTERNAL_KIND,"STK_STUB","STK_STUB");
+
+/* closure */
+SET_STATIC_HDR(STK_STUB_closure,STK_STUB_static_info,CC_SUBSUMED,,EXTDATA_RO)
+ , (W_)0, (W_)0
+};
+\end{code}
+
+\begin{code}
+#ifdef GRAN
+
+STGFUN(Event_Queue_entry) {
+ FB_
+ /* Don't wrap the calls; we're done with STG land */
+ fflush(stdout);
+ fprintf(stderr, "Entered from an event queue!\n");
+ abort();
+ JMP_(0);
+ FE_
+}
+
+GEN_N_ITBL(Event_Queue_info,Event_Queue_entry,UpdErr,0,INFO_OTHER_TAG,5,2,const,EF_,INTERNAL_KIND,"EventQ","EventQ");
+
+#endif /* GRAN */
+\end{code}
+
+
+
+%/****************************************************************
+%* *
+%* Some GC info tables *
+%* *
+%****************************************************************/
+
+These have to be in a .lhc file, so they will be reversed correctly.
+
+\begin{code}
+#include "../storage/SMinternal.h"
+
+#if defined(_INFO_COPYING)
+
+STGFUN(Caf_Evac_Upd_entry) {
+ FB_
+ /* Don't wrap the calls; we're done with STG land */
+ fflush(stdout);
+ fprintf(stderr,"Entered Caf_Evac_Upd %lx: Should never occur!\n", (W_) Node);
+ abort();
+ FE_
+}
+
+CAF_EVAC_UPD_ITBL(Caf_Evac_Upd_info,Caf_Evac_Upd_entry,const/*not static*/);
+
+#if defined(GCgn)
+
+STGFUN(Forward_Ref_New_entry) {
+ FB_
+ /* Don't wrap the calls; we're done with STG land */
+ fflush(stdout);
+ fprintf(stderr,"Entered Forward_Ref_New %lx: Should never occur!\n", (W_) Node);
+ abort();
+ FE_
+}
+FORWARDREF_ITBL(Forward_Ref_New_info,Forward_Ref_New_entry,const/*not static*/,_Evacuate_Old_Forward_Ref);
+
+STGFUN(Forward_Ref_Old_entry) {
+ FB_
+ /* Don't wrap the calls; we're done with STG land */
+ fflush(stdout);
+ fprintf(stderr,"Entered Forward_Ref_Old %lx: Should never occur!\n", (W_) Node);
+ abort();
+ FE_
+}
+FORWARDREF_ITBL(Forward_Ref_Old_info,Forward_Ref_Old_entry,const/*not static*/,_Evacuate_New_Forward_Ref);
+
+STGFUN(OldRoot_Forward_Ref_entry) {
+ FB_
+ /* Don't wrap the calls; we're done with STG land */
+ fflush(stdout);
+ fprintf(stderr,"Entered OldRoot_Forward_Ref %lx: Should never occur!\n", (W_) Node);
+ abort();
+ FE_
+}
+FORWARDREF_ITBL(OldRoot_Forward_Ref_info,OldRoot_Forward_Ref_entry,const/*not static*/,_Evacuate_OldRoot_Forward);
+#else /* ! GCgn */
+
+STGFUN(Forward_Ref_entry) {
+ FB_
+ /* Don't wrap the calls; we're done with STG land */
+ fflush(stdout);
+ fprintf(stderr,"Entered Forward_Ref %lx: Should never occur!\n", (W_) Node);
+ abort();
+ FE_
+}
+FORWARDREF_ITBL(Forward_Ref_info,Forward_Ref_entry,const/*not static*/,_Evacuate_Forward_Ref);
+#endif /* ! GCgn */
+
+#endif /* _INFO_COPYING */
+
+#if defined(GCgn)
+OLDROOT_ITBL(OldRoot_info,Ind_Entry,const,EF_);
+#endif /* GCgn */
+\end{code}
+
+
+%/***************************************************************
+%* *
+%* Cost Centre stuff ... *
+%* *
+%****************************************************************/
+
+For cost centres we need prelude cost centres and register routine.
+
+N.B. ALL prelude cost centres should be declared here as none will
+ be declared when the prelude is compiled.
+
+ToDo: Explicit cost centres in prelude for Input and Output costs.
+
+\begin{code}
+#if defined(USE_COST_CENTRES)
+
+STGFUN(startCcRegisteringWorld)
+{
+ FB_
+ /*
+ * We used to push miniInterpretEnd on the register stack, but
+ * miniInterpretEnd must only be entered with the RESUME_ macro,
+ * whereas the other addresses on the register stack must only be
+ * entered with the JMP_ macro. Now, we push NULL and test for
+ * it explicitly at each pop.
+ */
+ PUSH_REGISTER_STACK(NULL);
+ JMP_(_regMain);
+ FE_
+}
+
+CC_DECLARE(CC_CAFs, "CAFs_in_...", "PRELUDE", "PRELUDE", CC_IS_CAF,/*not static*/);
+CC_DECLARE(CC_DICTs, "DICTs_in_...", "PRELUDE", "PRELUDE", CC_IS_DICT,/*not static*/);
+
+START_REGISTER_PRELUDE(_regPrelude);
+REGISTER_CC(CC_CAFs);
+REGISTER_CC(CC_DICTs);
+END_REGISTER_CCS()
+\end{code}
+
+We also need cost centre declarations and registering routines for other
+built-in prelude-like modules.
+
+ToDo: What built-in prelude-like modules exist ?
+
+\begin{code}
+START_REGISTER_PRELUDE(_regByteOps); /* used in Glasgow tests only? */
+END_REGISTER_CCS()
+
+/* _regPrelude is above */
+
+START_REGISTER_PRELUDE(_regPreludeArray);
+END_REGISTER_CCS()
+
+START_REGISTER_PRELUDE(_regPreludeCore);
+END_REGISTER_CCS()
+
+START_REGISTER_PRELUDE(_regPreludeDialogueIO);
+END_REGISTER_CCS()
+
+START_REGISTER_PRELUDE(_regPreludeGlaMisc);
+END_REGISTER_CCS()
+
+START_REGISTER_PRELUDE(_regPreludeGlaST);
+END_REGISTER_CCS()
+
+START_REGISTER_PRELUDE(_regPreludeIOError);
+END_REGISTER_CCS()
+
+START_REGISTER_PRELUDE(_regPreludePS);
+END_REGISTER_CCS()
+
+START_REGISTER_PRELUDE(_regPreludePrimIO);
+END_REGISTER_CCS()
+
+START_REGISTER_PRELUDE(_regPreludeStdIO);
+END_REGISTER_CCS()
+#endif
+\end{code}
diff --git a/ghc/runtime/main/StgThreads.lhc b/ghc/runtime/main/StgThreads.lhc
new file mode 100644
index 0000000000..b3f9f28d0c
--- /dev/null
+++ b/ghc/runtime/main/StgThreads.lhc
@@ -0,0 +1,496 @@
+%
+% (c) The AQUA Project, Glasgow University, 1994
+%
+%************************************************************************
+%* *
+\section[StgThreads.lhc]{Threaded Threads Support}
+%* *
+%************************************************************************
+
+Some of the threads support is done in threaded code. How's that for ambiguous
+overloading?
+
+\begin{code}
+
+#ifdef CONCURRENT
+
+#define MAIN_REG_MAP /* STG world */
+#include "rtsdefs.h"
+
+#if 0
+#ifdef PAR
+#include "Statistics.h"
+#endif
+#endif
+
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[thread-objects]{Special objects for thread support}
+%* *
+%************************************************************************
+
+TSO's are Thread State Objects, where the thread context is stored when the
+thread is sleeping, and where we have slots for STG registers that don't
+live in real machine registers.
+
+\begin{code}
+
+TSO_ITBL();
+
+STGFUN(TSO_entry)
+{
+ FB_
+ fflush(stdout);
+ fprintf(stderr, "TSO Entry: panic");
+ abort();
+ FE_
+}
+
+\end{code}
+
+Stack objects are chunks of stack words allocated out of the heap and
+linked together in a chain.
+
+\begin{code}
+
+STKO_ITBL();
+
+STGFUN(StkO_entry)
+{
+ FB_
+ fflush(stdout);
+ fprintf(stderr, "StkO Entry: panic");
+ abort();
+ FE_
+
+}
+
+#ifndef PAR
+
+STKO_STATIC_ITBL();
+
+STGFUN(StkO_static_entry)
+{
+ FB_
+ fflush(stdout);
+ fprintf(stderr, "StkO_static Entry: panic");
+ abort();
+ FE_
+
+}
+
+#endif
+
+\end{code}
+
+Blocking queues are essentially black holes with threads attached. These
+are the threads to be awakened when the closure is updated.
+
+\begin{code}
+
+EXTFUN(EnterNodeCode);
+
+STGFUN(BQ_entry)
+{
+ FB_
+
+#if defined(GRAN)
+ STGCALL0(void,(),GranSimBlock); /* Before overwriting TSO_LINK */
+#endif
+
+ TSO_LINK(CurrentTSO) = (P_) BQ_ENTRIES(Node);
+ BQ_ENTRIES(Node) = (W_) CurrentTSO;
+
+ LivenessReg = LIVENESS_R1;
+ SaveAllStgRegs();
+ TSO_PC1(CurrentTSO) = EnterNodeCode;
+
+ if (DO_QP_PROF) {
+ QP_Event1("GR", CurrentTSO);
+ }
+#ifdef PAR
+ if(do_gr_profile) {
+ /* Note that CURRENT_TIME may perform an unsafe call */
+ TIME now = CURRENT_TIME;
+ TSO_EXECTIME(CurrentTSO) += now - TSO_BLOCKEDAT(CurrentTSO);
+ TSO_BLOCKCOUNT(CurrentTSO)++;
+ TSO_QUEUE(CurrentTSO) = Q_BLOCKED;
+ TSO_BLOCKEDAT(CurrentTSO) = now;
+ DumpGranEvent(GR_BLOCK, CurrentTSO);
+ }
+#endif
+#if defined(GRAN)
+ ReSchedule(NEW_THREAD);
+#else
+ ReSchedule(0);
+#endif
+ FE_
+}
+
+BQ_ITBL();
+
+\end{code}
+
+Revertible black holes are needed in the parallel world, to handle
+negative acknowledgements of messages containing updatable closures.
+The idea is that when the original message is transmitted, the closure
+is turned into a revertible black hole...an object which acts like a
+black hole when local threads try to enter it, but which can be
+reverted back to the original closure if necessary.
+
+It's actually a lot like a blocking queue (BQ) entry, because
+revertible black holes are initially set up with an empty blocking
+queue.
+
+The combination of GrAnSim with revertible black holes has not been
+checked, yet. -- HWL
+
+\begin{code}
+
+#ifdef PAR
+
+STGFUN(RBH_entry)
+{
+ FB_
+
+#if defined(GRAN)
+ STGCALL0(void, (), GranSimBlock); /* Before overwriting TSO_LINK */
+#endif
+
+ switch (INFO_TYPE(InfoPtr)) {
+ case INFO_SPEC_RBH_TYPE:
+ TSO_LINK(CurrentTSO) = (P_) SPEC_RBH_BQ(Node);
+ SPEC_RBH_BQ(Node) = (W_) CurrentTSO;
+ break;
+ case INFO_GEN_RBH_TYPE:
+ TSO_LINK(CurrentTSO) = (P_) GEN_RBH_BQ(Node);
+ GEN_RBH_BQ(Node) = (W_) CurrentTSO;
+ break;
+ default:
+ fflush(stdout);
+ fprintf(stderr, "Panic: non-{SPEC,GEN} RBH %#lx (IP %#lx)\n", Node, InfoPtr);
+ EXIT(EXIT_FAILURE);
+ }
+
+ LivenessReg = LIVENESS_R1;
+ SaveAllStgRegs();
+ TSO_PC1(CurrentTSO) = EnterNodeCode;
+
+ if (DO_QP_PROF) {
+ QP_Event1("GR", CurrentTSO);
+ }
+
+ if(do_gr_profile) {
+ /* Note that CURRENT_TIME may perform an unsafe call */
+ TIME now = CURRENT_TIME;
+ TSO_EXECTIME(CurrentTSO) += now - TSO_BLOCKEDAT(CurrentTSO);
+ TSO_BLOCKCOUNT(CurrentTSO)++;
+ TSO_QUEUE(CurrentTSO) = Q_BLOCKED;
+ TSO_BLOCKEDAT(CurrentTSO) = now;
+ DumpGranEvent(GR_BLOCK, CurrentTSO);
+ }
+
+#if defined(GRAN)
+ ReSchedule(NEW_THREAD);
+#else
+ ReSchedule(0);
+#endif
+
+ FE_
+}
+
+#endif
+
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[thread-entrypoints]{Scheduler-Thread Interfaces}
+%* *
+%************************************************************************
+
+The normal way of entering a thread is through resumeThread, which
+short-circuits and indirections to the TSO and StkO, sets up STG registers,
+and jumps to the saved PC.
+
+\begin{code}
+
+STGFUN(resumeThread)
+{
+ FB_
+
+ while((P_) INFO_PTR(CurrentTSO) == Ind_info) {
+ CurrentTSO = (P_) IND_CLOSURE_PTR(CurrentTSO);
+ }
+
+#ifdef PAR
+ if (do_gr_profile) {
+ TSO_QUEUE(CurrentTSO) = Q_RUNNING;
+ /* Note that CURRENT_TIME may perform an unsafe call */
+ TSO_BLOCKEDAT(CurrentTSO) = CURRENT_TIME;
+ }
+#endif
+
+ CurrentRegTable = TSO_INTERNAL_PTR(CurrentTSO);
+
+ while((P_) INFO_PTR(SAVE_StkO) == Ind_info) {
+ SAVE_StkO = (P_) IND_CLOSURE_PTR(SAVE_StkO);
+ }
+ RestoreAllStgRegs();
+
+ SET_TASK_ACTIVITY(ST_REDUCING);
+ SET_ACTIVITY(ACT_REDN); /* back to normal reduction */
+ RESTORE_CCC(TSO_CCC(CurrentTSO));
+ JMP_(TSO_PC1(CurrentTSO));
+ FE_
+}
+
+\end{code}
+
+Since we normally context switch during a heap check, it is possible
+that we will return to a previously suspended thread without
+sufficient heap for the thread to continue. However, we have cleverly
+stashed away the heap requirements in @TSO_ARG1@ so that we can decide
+whether or not to perform a garbage collection before resuming the
+thread. The actual thread resumption address (either @EnterNodeCode@
+or elsewhere) is stashed in TSO_PC2.
+
+\begin{code}
+
+STGFUN(CheckHeapCode)
+{
+ FB_
+
+ ALLOC_HEAP(TSO_ARG1(CurrentTSO)); /* ticky profiling */
+ SET_ACTIVITY(ACT_HEAP_CHK); /* SPAT counting */
+ if ((Hp += TSO_ARG1(CurrentTSO)) > HpLim) {
+ ReallyPerformThreadGC(TSO_ARG1(CurrentTSO), rtsFalse);
+ JMP_(resumeThread);
+ }
+ SET_TASK_ACTIVITY(ST_REDUCING);
+ SET_ACTIVITY(ACT_REDN); /* back to normal reduction */
+ RESUME_(TSO_PC2(CurrentTSO));
+ FE_
+}
+
+\end{code}
+
+Often, a thread starts (or rather, resumes) by entering the closure
+that Node points to. Here's a tiny code fragment to do just that.
+The saved PC in the TSO can be set to @EnterNodeCode@ whenever we
+want this to happen upon resumption of the thread.
+
+\begin{code}
+
+STGFUN(EnterNodeCode)
+{
+ FB_
+ ENT_VIA_NODE();
+ InfoPtr=(D_)(INFO_PTR(Node));
+ GRAN_EXEC(5,1,2,0,0);
+ JMP_(ENTRY_CODE(InfoPtr));
+ FE_
+}
+
+\end{code}
+
+Then, there are the occasions when we just want to pick up where we left off.
+We use RESUME_ here instead of JMP_, because when we return to a call site,
+the alpha is going to try to load %gp from %ra rather than %pv, and JMP_ only
+sets %pv. Resuming to the start of a function is currently okay, but an
+extremely bad practice. As we add support for more architectures, we can expect
+the difference between RESUME_ and JMP_ to become more acute.
+
+\begin{code}
+
+STGFUN(Continue)
+{
+ FB_
+
+ SET_TASK_ACTIVITY(ST_REDUCING);
+ SET_ACTIVITY(ACT_REDN); /* back to normal reduction */
+ RESUME_(TSO_PC2(CurrentTSO));
+ FE_
+}
+
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[stack-chunk-underflow-code]{Underflow code for stack chunks}
+%* *
+%************************************************************************
+
+\begin{code}
+
+extern P_ AvailableStack;
+
+#ifndef PAR
+
+\end{code}
+
+On a uniprocessor, stack underflow causes us no great headaches. The
+old value of RetReg is squirreled away at the base of the top stack
+object (the one that's about to get blown away). We just yank it
+outta there and perform the same kind of return that got us here in
+the first place.
+
+This simplicity is due to the fact that we never have to fetch a stack
+object on underflow.
+
+\begin{code}
+
+#define DO_RETURN_TEMPLATE(label, cont) \
+ STGFUN(label) \
+ { \
+ P_ temp; \
+ FB_ \
+ temp = STKO_LINK(StkOReg); \
+ RetReg = STKO_RETURN(StkOReg); \
+ StkOReg = temp; \
+ RestoreStackStgRegs(); \
+ JMP_(cont); \
+ FE_ \
+ }
+
+DO_RETURN_TEMPLATE(UnderflowDirectReturn, DIRECT(((P_)RetReg)))
+DO_RETURN_TEMPLATE(UnderflowVect0, ((P_)RetReg)[RVREL(0)])
+DO_RETURN_TEMPLATE(UnderflowVect1, ((P_)RetReg)[RVREL(1)])
+DO_RETURN_TEMPLATE(UnderflowVect2, ((P_)RetReg)[RVREL(2)])
+DO_RETURN_TEMPLATE(UnderflowVect3, ((P_)RetReg)[RVREL(3)])
+DO_RETURN_TEMPLATE(UnderflowVect4, ((P_)RetReg)[RVREL(4)])
+
+DO_RETURN_TEMPLATE(UnderflowVect5, ((P_)RetReg)[RVREL(5)])
+DO_RETURN_TEMPLATE(UnderflowVect6, ((P_)RetReg)[RVREL(6)])
+DO_RETURN_TEMPLATE(UnderflowVect7, ((P_)RetReg)[RVREL(7)])
+
+DO_RETURN_TEMPLATE(StackUnderflowEnterNode, EnterNodeCode)
+
+#else
+
+\end{code}
+
+In the parallel world, we may have to fetch the StkO from a remote
+location before we can load up the stack registers and perform the
+return. Our convention is that we load RetReg up with the exact
+continuation address (after a vector table lookup, if necessary),
+and tail-call the code to fetch the stack object. (Of course, if
+the stack object is already local, we then just jump to the
+continuation address.)
+
+\begin{code}
+
+STGFUN(CommonUnderflow)
+{
+ P_ temp;
+
+ FB_
+ temp = STKO_LINK(StkOReg);
+ StkOReg = temp;
+ /* ToDo: Fetch the remote stack object here! */
+ RestoreStackStgRegs();
+ JMP_(RetReg);
+ FE_
+}
+
+#define DO_RETURN_TEMPLATE(label, cont) \
+ STGFUN(label) \
+ { \
+ FB_ \
+ RetReg = STKO_RETURN(StkOReg); \
+ RetReg = (StgRetAddr)(cont); \
+ LivenessReg = INFO_LIVENESS(InfoPtr); \
+ JMP_(CommonUnderflow); \
+ FE_ \
+ }
+
+DO_RETURN_TEMPLATE(UnderflowDirectReturn, DIRECT(((P_)RetReg)))
+DO_RETURN_TEMPLATE(UnderflowVect0, ((P_)RetReg)[RVREL(0)])
+DO_RETURN_TEMPLATE(UnderflowVect1, ((P_)RetReg)[RVREL(1)])
+DO_RETURN_TEMPLATE(UnderflowVect2, ((P_)RetReg)[RVREL(2)])
+DO_RETURN_TEMPLATE(UnderflowVect3, ((P_)RetReg)[RVREL(3)])
+DO_RETURN_TEMPLATE(UnderflowVect4, ((P_)RetReg)[RVREL(4)])
+DO_RETURN_TEMPLATE(UnderflowVect5, ((P_)RetReg)[RVREL(5)])
+DO_RETURN_TEMPLATE(UnderflowVect6, ((P_)RetReg)[RVREL(6)])
+DO_RETURN_TEMPLATE(UnderflowVect7, ((P_)RetReg)[RVREL(7)])
+
+STGFUN(PrimUnderflow)
+{
+ FB_
+ RetReg = STKO_RETURN(StkOReg);
+ RetReg = (StgRetAddr)DIRECT(((P_)RetReg));
+ LivenessReg = NO_LIVENESS;
+ JMP_(CommonUnderflow);
+ FE_
+}
+
+/*
+ * This one is similar, but isn't part of the return vector. It's only used
+ * when we fall off of a stack chunk and want to enter Node rather than
+ * returning through RetReg. (This occurs during UpdatePAP, when the updatee
+ * isn't on the current stack chunk.) It can't be done with the template,
+ * because R2 is dead, and R1 points to a PAP. Only R1 is live.
+ */
+
+STGFUN(StackUnderflowEnterNode)
+{
+ FB_
+ RetReg = (StgRetAddr)(EnterNodeCode);
+ LivenessReg = LIVENESS_R1;
+ JMP_(CommonUnderflow);
+ FE_
+}
+
+#endif
+
+const W_
+vtbl_Underflow[] = {
+ /* "MAX_VECTORED_RTN" elements (see GhcConstants.lh) */
+ (W_) UnderflowVect0,
+ (W_) UnderflowVect1,
+ (W_) UnderflowVect2,
+ (W_) UnderflowVect3,
+ (W_) UnderflowVect4,
+ (W_) UnderflowVect5,
+ (W_) UnderflowVect6,
+ (W_) UnderflowVect7
+};
+
+\end{code}
+
+\begin{code}
+
+IFN_(seqDirectReturn) {
+ void *cont;
+
+ FB_
+ RetReg = (StgRetAddr) SpB[BREL(0)];
+ cont = (void *) SpB[BREL(1)];
+ SpB += BREL(2);
+/* GRAN_EXEC(1,1,2,0,0); /? ToDo: RE-CHECK (WDP) */
+ JMP_(cont);
+ FE_
+}
+
+/*
+ NB: For direct returns to work properly, the name of the routine must be
+ the same as the name of the vector table with vtbl_ removed and DirectReturn
+ appended. This is all the mangler understands.
+ */
+
+const W_
+vtbl_seq[] = {
+ (W_) seqDirectReturn,
+ (W_) seqDirectReturn,
+ (W_) seqDirectReturn,
+ (W_) seqDirectReturn,
+ (W_) seqDirectReturn,
+ (W_) seqDirectReturn,
+ (W_) seqDirectReturn,
+ (W_) seqDirectReturn
+};
+
+#endif /* CONCURRENT */
+\end{code}
diff --git a/ghc/runtime/main/StgTrace.lc b/ghc/runtime/main/StgTrace.lc
new file mode 100644
index 0000000000..0c4ab4ca33
--- /dev/null
+++ b/ghc/runtime/main/StgTrace.lc
@@ -0,0 +1,74 @@
+\begin{code}
+
+#include "rtsdefs.h"
+
+#if defined(DO_RUNTIME_TRACE_UPDATES)
+
+/********** Debugging Tracing of Updates ***********/
+
+/* These will only be called if StgUpdate.h macro calls
+ compiled with -DDO_RUNTIME_TRACE_UPDATES
+ */
+
+extern I_ traceUpdates; /* a Bool, essentially */
+
+void
+TRACE_UPDATE_Ind(updclosure,heapptr)
+P_ updclosure,heapptr;
+{
+#if defined(GCap)
+ if (traceUpdates) {
+ fprintf(stderr,"Upd Ind %s Gen: 0x%lx -> 0x%lx\n",
+ (updclosure) <= StorageMgrInfo.OldLim ? "Old" : "New",
+ (W_) updclosure, (W_) heapptr);
+ }
+#else
+ if (traceUpdates) {
+ fprintf(stderr,"Upd Ind: 0x%lx -> 0x%lx\n",
+ (W_) updclosure, (W_) heapptr);
+ }
+#endif
+}
+
+void
+TRACE_UPDATE_Inplace_NoPtrs(updclosure)
+P_ updclosure;
+{
+#if defined(GCap)
+ if (traceUpdates) {
+ fprintf(stderr,"Upd Inplace %s Gen: 0x%lx\n",
+ (updclosure) <= StorageMgrInfo.OldLim ? "Old" : "New",
+ (W_) updclosure);
+ }
+#else
+ if (traceUpdates) {
+ fprintf(stderr,"Upd Inplace: 0x%lx\n", (W_) updclosure);
+ }
+#endif
+}
+
+void
+TRACE_UPDATE_Inplace_Ptrs(updclosure, hp)
+P_ updclosure;
+P_ hp;
+{
+#if defined(GCap)
+ if (traceUpdates) {
+ if ((updclosure) <= StorageMgrInfo.OldLim) {
+ fprintf(stderr,"Upd Redirect Old Gen (Ptrs): 0x%lx -> 0x%lx\n",
+ (W_) updclosure,
+ (W_) (hp + 1));
+ } else {
+ fprintf(stderr,"Upd Inplace New Gen (Ptrs): 0x%lx\n", (W_) updclosure);
+ }
+ }
+#else
+ if (traceUpdates) {
+ fprintf(stderr,"Update Inplace: 0x%lx\n", (W_) updclosure);
+ }
+#endif
+}
+
+#endif /* DO_RUNTIME_TRACE_UPDATES */
+
+\end{code}
diff --git a/ghc/runtime/main/StgUpdate.lhc b/ghc/runtime/main/StgUpdate.lhc
new file mode 100644
index 0000000000..904f637124
--- /dev/null
+++ b/ghc/runtime/main/StgUpdate.lhc
@@ -0,0 +1,730 @@
+%************************************************************************
+%* *
+\section[update-code]{Code required for update abstraction}
+%* *
+%************************************************************************
+
+This code is required by the update interface which sits on top of the
+storage manager interface (See \tr{SMupdate.lh}).
+
+Some of this stuff has been separated (correctly!) into StgThreads.lhc
+for version 0.23. Could someone (Hans?) bring us up to date, please!
+KH.
+
+\begin{itemize}
+\item Indirection entry code and info table.
+\item Black Hole entry code and info table.
+\item Update frame code and return vectors.
+\item PAP update code.
+\item PAP entry code and info table.
+\end{itemize}
+
+System-wide constants need to be included:
+\begin{code}
+#define MAIN_REG_MAP /* STG world */
+
+#include "rtsdefs.h"
+#include "SMupdate.h"
+#if 0
+#ifdef PAR
+# include "Statistics.h"
+#endif
+#endif
+
+EXTDATA(Nil_closure);
+
+#if defined(DO_REDN_COUNTING)
+extern void PrintRednCountInfo(STG_NO_ARGS);
+extern I_ showRednCountStats;
+#endif
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[indirection-code]{Indirection code}
+%* *
+%************************************************************************
+
+The entry code for indirections and the indirection info-table.
+\begin{code}
+STGFUN(Ind_entry)
+{
+ FB_
+ ENT_IND(Node); /* Ticky-ticky profiling info */
+ SET_ACTIVITY(ACT_INDIRECT); /* SPAT profiling */
+
+ Node = (P_) IND_CLOSURE_PTR((P_) Node);
+ ENT_VIA_NODE();
+ InfoPtr=(D_)(INFO_PTR(Node));
+ JMP_(ENTRY_CODE(InfoPtr));
+ FE_
+}
+
+IND_ITBL(Ind_info,Ind_entry,const,EF_);
+
+\end{code}
+
+We also need a special @CAF@ indirection info table which is used to
+indirect @CAF@s to evaluated results in the heap.
+\begin{code}
+STGFUN(Caf_entry) /* same as Ind_entry */
+{
+ FB_
+ ENT_IND(Node);
+ SET_ACTIVITY(ACT_INDIRECT); /* SPAT profiling */
+
+ Node = (P_) IND_CLOSURE_PTR((P_) Node);
+ ENT_VIA_NODE();
+ InfoPtr=(D_)(INFO_PTR(Node));
+ JMP_(ENTRY_CODE(InfoPtr));
+ FE_
+}
+
+CAF_ITBL(Caf_info,Caf_entry,const,EF_);
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[black-hole-code]{Black Hole code}
+%* *
+%************************************************************************
+
+The entry code for black holes abort indicating a cyclic data dependency.
+It is used to overwrite closures currently being evaluated.
+
+In the concurrent world, black holes are synchronization points, and they
+are turned into blocking queues when there are threads waiting for the
+evaluation of the closure to finish.
+
+\begin{code}
+#ifdef CONCURRENT
+EXTFUN(EnterNodeCode);
+EXTFUN(StackUnderflowEnterNode);
+EXTDATA_RO(BQ_info);
+#else
+extern StgStablePtr errorHandler;
+extern void raiseError PROTO((StgStablePtr));
+#endif
+
+STGFUN(BH_UPD_entry)
+{
+#ifndef CONCURRENT
+ FB_
+ (void) STGCALL1(int,(void *, FILE *),fflush,stdout);
+ (void) STGCALL2(int,(),fprintf,stderr,"Entered a `black hole': the program has a cyclic data dependency.\n");
+
+# if defined(USE_COST_CENTRES)
+ {
+ CostCentre cc = (CostCentre) CC_HDR(Node);
+ (void) STGCALL5(int,(),fprintf,stderr,"Cost Centre: %s Module: %s Group %s\n",cc->label, cc->module, cc->group);
+ }
+# endif
+
+# if defined(DO_REDN_COUNTING)
+ if (showRednCountStats) {
+ (void) STGCALL0(void,(),PrintRednCountInfo);
+ }
+# endif
+
+ (void) STGCALL1(void,(void *, StgStablePtr), raiseError, errorHandler);
+ FE_
+
+#else /* threads! */
+
+ FB_
+
+# if 0
+ if ( debug & 0x80 )
+ (void) STGCALL4(int,(),fprintf,stderr,"GRAN_CHECK in BH_UPD_entry: Entered a `black hole' @ 0x%x (CurrentTSO @ 0x%x\n ",Node,CurrentTSO);
+#endif
+
+# if defined(GRAN)
+ STGCALL0(void,(),GranSimBlock); /* Do this before losing its TSO_LINK */
+# endif
+
+ TSO_LINK(CurrentTSO) = Nil_closure;
+ SET_INFO_PTR(Node, BQ_info);
+ BQ_ENTRIES(Node) = (W_) CurrentTSO;
+
+# if defined(GCap) || defined(GCgn)
+ /* If we modify a black hole in the old generation,
+ we have to make sure it goes on the mutables list */
+
+ if(Node <= StorageMgrInfo.OldLim) {
+ MUT_LINK(Node) = (W_) StorageMgrInfo.OldMutables;
+ StorageMgrInfo.OldMutables = Node;
+ } else
+ MUT_LINK(Node) = MUT_NOT_LINKED;
+# endif
+
+ LivenessReg = LIVENESS_R1;
+ SaveAllStgRegs();
+ TSO_PC1(CurrentTSO) = EnterNodeCode;
+
+ if (DO_QP_PROF) {
+ QP_Event1("GR", CurrentTSO);
+ }
+
+# ifdef PAR
+ if(do_gr_profile) {
+ TIME now = CURRENT_TIME;
+ TSO_EXECTIME(CurrentTSO) += now - TSO_BLOCKEDAT(CurrentTSO);
+ TSO_BLOCKCOUNT(CurrentTSO)++;
+ TSO_QUEUE(CurrentTSO) = Q_BLOCKED;
+ TSO_BLOCKEDAT(CurrentTSO) = now;
+ DumpGranEvent(GR_BLOCK, CurrentTSO);
+ }
+# endif
+
+# if defined(GRAN)
+ /* CurrentTSO = Nil_closure; */
+ ReSchedule(NEW_THREAD);
+# else
+ ReSchedule(0);
+# endif
+
+ FE_
+#endif /* threads */
+}
+
+/* made external so that debugger can get at it more effectively */
+STGFUN(BH_SINGLE_entry)
+{
+ FB_
+
+ (void) STGCALL1(int,(void *, FILE *),fflush,stdout);
+ (void) STGCALL2(int,(),fprintf,stderr,"Entered a single-entry `black hole' --\n");
+ (void) STGCALL2(int,(),fprintf,stderr,"either the compiler made a mistake on single-entryness,\n");
+ (void) STGCALL2(int,(),fprintf,stderr,"or the program has a cyclic data dependency.\n");
+
+#if defined(USE_COST_CENTRES)
+ {
+ CostCentre cc = (CostCentre) CC_HDR(Node);
+ (void) STGCALL5(int,(),fprintf,stderr, "Cost Centre: %s Module: %s Group %s\n",cc->label, cc->module, cc->group);
+ }
+#endif
+
+# if defined(DO_REDN_COUNTING)
+ if (showRednCountStats) {
+ (void) STGCALL0(void,(),PrintRednCountInfo);
+ }
+# endif
+
+#ifndef CONCURRENT
+ (void) STGCALL1(void,(void *, StgStablePtr), raiseError, errorHandler);
+#else
+ EXIT(EXIT_FAILURE);
+#endif
+
+ FE_
+}
+\end{code}
+
+Updatable closures are overwritten with a black hole of a fixed size,
+@MIN_UPD_SIZE@.
+
+\begin{code}
+CAT_DECLARE(BH,BH_K,"BH","BH") /* just one, shared */
+
+BH_ITBL(BH_UPD_info,BH_UPD_entry,U,const,EF_);
+\end{code}
+
+Single-Entry closures, which are not updated, are also overwritten
+with a black hole. They have size @MIN_NONUPD_SIZE@.
+
+\begin{code}
+BH_ITBL(BH_SINGLE_info,BH_SINGLE_entry,N,const,EF_);
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[static-update-code]{Static update code in update frames}
+%* *
+%************************************************************************
+
+This code is pointed to from update frames. It has to cope with
+any kind of algebraic return: vectored or unvectored.
+
+See \tr{SMupdate.lh} for a description of the various update frames
+and the macros defining their layout.
+
+On entry to this code:
+\begin{itemize}
+\item @R1@ points to a recently created heap object (return in heap) or
+is dead (return in regs).
+\item @R2@ points to the info table for the constructor.
+\item When returning in regs, any of the return-regs (@R3@...) may be live,
+but aren't used by this code. They must be preserved.
+\item @SpB@ points to the topmost word of the update frame.
+\end{itemize}
+
+NEW update mechanism (Jan '94):
+
+When returning to an update frame, we want to jump directly to the
+update code for the constructor in hand. Because of the various
+possible return conventions (all of which must be handled by the
+generic update frame), we actually end up with a somewhat indirect
+jump.
+
+\begin{code}
+
+STGFUN(StdUpdFrameDirectReturn)
+{
+ FB_
+ JMP_(UPDATE_CODE(InfoPtr));
+ FE_
+}
+
+/*
+ NB: For direct returns to work properly, the name of the routine must be
+ the same as the name of the vector table with vtbl_ removed and DirectReturn
+ appended. This is all the mangler understands.
+*/
+
+const
+W_
+vtbl_StdUpdFrame[] = {
+ /* at least "MAX_VECTORED_RTN" elements (see GhcConstants.lh) */
+ (W_) StdUpdFrameDirectReturn/*0*/,
+ (W_) StdUpdFrameDirectReturn/*1*/,
+ (W_) StdUpdFrameDirectReturn/*2*/,
+ (W_) StdUpdFrameDirectReturn/*3*/,
+ (W_) StdUpdFrameDirectReturn/*4*/,
+ (W_) StdUpdFrameDirectReturn/*5*/,
+ (W_) StdUpdFrameDirectReturn/*6*/,
+ (W_) StdUpdFrameDirectReturn/*7*/
+};
+
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[existing-con-update-code]{Update code for existing constructors}
+%* *
+%************************************************************************
+
+Here is the standard update code for objects that are returned in the heap
+(or those which are initially returned in registers, but have already been
+allocated in the heap earlier in the update chain.) In either case, Node
+points to the heap object. The update code grabs the address of the updatee
+out of the partial update frame (the return address has already been popped),
+makes the updatee an indirection to Node, and returns according to the convention
+for the constructor.
+
+\begin{code}
+#define IND_UPD_TEMPLATE(label, retvector) \
+ STGFUN(label) \
+ { \
+ FB_ \
+ UPD_EXISTING(); /* Ticky-ticky profiling info */ \
+ /* Update thing off stk with an indirection to Node */ \
+ UPD_IND(GRAB_UPDATEE(SpB), Node); \
+ /* Pop the standard update frame */ \
+ POP_STD_UPD_FRAME() \
+ \
+ JMP_(retvector); \
+ FE_ \
+ }
+
+IND_UPD_TEMPLATE(IndUpdRetDir, DIRECT(((P_)RetReg)))
+IND_UPD_TEMPLATE(IndUpdRetV0, ((P_)RetReg)[RVREL(0)])
+IND_UPD_TEMPLATE(IndUpdRetV1, ((P_)RetReg)[RVREL(1)])
+IND_UPD_TEMPLATE(IndUpdRetV2, ((P_)RetReg)[RVREL(2)])
+IND_UPD_TEMPLATE(IndUpdRetV3, ((P_)RetReg)[RVREL(3)])
+IND_UPD_TEMPLATE(IndUpdRetV4, ((P_)RetReg)[RVREL(4)])
+IND_UPD_TEMPLATE(IndUpdRetV5, ((P_)RetReg)[RVREL(5)])
+IND_UPD_TEMPLATE(IndUpdRetV6, ((P_)RetReg)[RVREL(6)])
+IND_UPD_TEMPLATE(IndUpdRetV7, ((P_)RetReg)[RVREL(7)])
+
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[no-update-code]{Code for Erroneous Updates}
+%* *
+%************************************************************************
+
+\begin{code}
+
+STGFUN(UpdErr)
+{
+ FB_
+
+ fflush(stdout);
+ fprintf(stderr, "Update error: not a constructor!\n");
+ abort();
+
+ FE_
+}
+
+STGFUN(StdErrorCode)
+{
+ FB_
+
+ fflush(stdout);
+ fprintf(stderr, "Standard error: should never happen!\n");
+ abort();
+
+ FE_
+}
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[permanent-indirections]{Lexical Scoping Updates}
+%* *
+%************************************************************************
+
+A function entered without any arguments is updated with an
+indirection. For lexically scoped profiling we still need to set the
+cost centre if we enter the PAP. As the indirection is removed by the
+garbage collector this would not be possible.
+
+To solve this problem we introduce a permanent indirection which sets
+the cost centre when entered. The heap profiler ignores the space
+occupied by it as it would not reside in the heap during normal
+execution.
+
+\begin{code}
+#if defined(USE_COST_CENTRES)
+
+STGFUN(Perm_Ind_entry)
+{
+ FB_
+
+ /* Don't add INDs to granularity cost */
+
+ ENT_IND(Node); /* Ticky-ticky profiling info */
+
+ /* Enter PAP cost centre -- lexical scoping only */
+ ENTER_CC_PAP_CL(Node);
+
+ Node = (P_) IND_CLOSURE_PTR((P_) Node);
+ ENT_VIA_NODE(); /* Ticky-ticky profiling info */
+
+ InfoPtr=(D_)(INFO_PTR(Node));
+# if defined(GRAN)
+ GRAN_EXEC(1,1,2,0,0);
+# endif
+ JMP_(ENTRY_CODE(InfoPtr));
+ FE_
+}
+
+PERM_IND_ITBL(Perm_Ind_info,Perm_Ind_entry,const,EF_);
+
+#endif /* USE_COST_CENTRES */
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[partial-application-updates]{Partial applications}
+%* *
+%************************************************************************
+
+See STG paper implementation section of Partial application updates.
+
+We jump here when the current function fails an argument satisfaction
+check. There can be two reasons for this. In the usual case, there
+is an update frame blocking our access to anything deeper on the
+stack. We then update the updatee in the frame with a partial
+application node and squeeze out the update frame. The other
+possibility is that we are running threaded code, and we are sitting
+on the bottom of a stack chunk. In this case, we still build the
+partial application, but we have nothing in our hands to update, so we
+underflow the stack (awakening the previous chunk) and enter the
+partial application node just built.
+
+On entry to @UpdatePAP@, we assume the following:
+\begin{itemize}
+\item SuB points to topmost word of an update frame or to the bottom of a
+stack chunk.
+\item SpA and SpB point to the topmost words of their respective stacks.
+\item Node points to the closure which needs more arguments than are there.
+\end{itemize}
+
+\begin{code}
+
+STGFUN(UpdatePAP)
+{
+ /*
+ * Use STG registers for these locals which must survive the HEAP_CHK.
+ * Don't squash Node (R1), because it's an implicit argument.
+ */
+
+#define NNonPtrWords (R2.i)
+#define NPtrWords (R3.i)
+#define NArgWords (R4.i)
+#define PapSize (R5.i)
+#if defined(USE_COST_CENTRES)
+# define CC_pap ((CostCentre)(R7.p))
+#endif
+
+ /* These other locals do not have to survive a HEAP_CHK */
+
+ P_ PapClosure;
+ P_ Updatee;
+ P_ p;
+ I_ i;
+
+ FB_
+
+#if defined(COUNT)
+ ++nPAPs;
+#endif
+
+ SET_ACTIVITY(ACT_UPDATE_PAP); /* SPAT profiling */
+
+ NPtrWords = AREL(SuA - SpA);
+ NNonPtrWords = BREL(SuB - SpB);
+
+ ASSERT(NPtrWords >= 0);
+ ASSERT(NNonPtrWords >= 0);
+
+ NArgWords = NPtrWords + NNonPtrWords + 1; /* +1 for Node */
+
+#if defined(USE_COST_CENTRES)
+ /* set "CC_pap" to go in the updatee (see Sansom thesis, p 183) */
+
+ CC_pap /*really cc_enter*/ = (CostCentre) CC_HDR(Node);
+ if (IS_SUBSUMED_CC(CC_pap) /*really cc_enter*/)
+ CC_pap = CCC;
+#endif
+
+ if (NArgWords == 1) {
+
+ /*
+ * No arguments, only Node. Skip building the PAP and
+ * just plan to update with an indirection.
+ */
+
+ PapClosure = Node;
+
+ } else {
+
+ /* Build the PAP. A generic PAP closure is laid out thus:
+ * code ptr, size, no of words of ptrs, Node, ptrs, non-ptrs
+ * (i.e. a DYN closure)
+ * ToDo: add stuff for special cases, to omit size and no. of ptrs
+ * (Still ToDo? (JSM))
+ */
+
+ PapSize = NArgWords + DYN_HS;
+
+ ALLOC_UPD_PAP(DYN_HS, NArgWords, 0, PapSize);
+ CC_ALLOC(CC_pap, PapSize, PAP_K);
+
+ /* Allocate PapClosure -- Only Node (R1) is live */
+ HEAP_CHK(LIVENESS_R1, PapSize, 0);
+
+ SET_ACTIVITY(ACT_UPDATE_PAP); /* back to it (for SPAT profiling) */
+
+ PapClosure = Hp + 1 - PapSize; /* The new PapClosure */
+
+ SET_DYN_HDR(PapClosure, PAP_info, CC_pap, NArgWords + DYN_VHS, NPtrWords + 1);
+
+ /* Now fill in the closure fields */
+
+ p = Hp;
+ for (i = NNonPtrWords - 1; i >= 0; i--) *p-- = (W_) SpB[BREL(i)];
+ for (i = NPtrWords - 1; i >= 0; i--) *p-- = (W_) SpA[AREL(i)];
+ *p = (W_) Node;
+ }
+
+ /*
+ * Finished constructing PAP closure; now update the updatee.
+ * But wait! What if there is no updatee? Then we fall off the stack.
+ */
+
+#ifdef CONCURRENT
+ if (SuB < STKO_BSTK_BOT(StkOReg)) {
+ Node = PapClosure;
+# ifdef PAR
+ LivenessReg = LIVENESS_R1;
+# endif
+ JMP_(StackUnderflowEnterNode);
+ }
+#endif
+
+ /*
+ * Now we have a standard update frame, so we update the updatee with
+ * either the new PAP or Node.
+ *
+ * Supposedly, it is not possible to get a constructor update frame,
+ * (Why not? (JSM))
+ * (Because they have *never* been implemented. (WDP))
+ */
+
+ Updatee = GRAB_UPDATEE(SuB);
+ UPD_IND(Updatee, PapClosure); /* Indirect Updatee to PapClosure */
+
+ if (NArgWords != 1) {
+ UPD_PAP_IN_NEW();
+
+ } else {
+ UPD_PAP_IN_PLACE();
+
+#if defined(USE_COST_CENTRES)
+ /*
+ * Lexical scoping requires a *permanent* indirection, and we
+ * also have to set the cost centre for the indirection.
+ */
+ INFO_PTR(Updatee) = (W_) Perm_Ind_info;
+ SET_CC_HDR(Updatee, CC_pap);
+
+#endif /* USE_COST_CENTRES */
+ }
+
+#if defined(USE_COST_CENTRES)
+ /*
+ * Restore the Cost Centre too (if required); again see Sansom thesis p 183.
+ * Take the CC out of the update frame if a CAF/DICT.
+ */
+
+ CCC = (IS_CAF_OR_DICT_CC(CC_pap)) ? GRAB_COST_CENTRE(SuB) : CC_pap;
+
+#endif /* USE_COST_CENTRES */
+
+ /* Restore SuA, SuB, RetReg */
+ RetReg = GRAB_RET(SuB);
+ SuA = GRAB_SuA(SuB);
+ SuB = GRAB_SuB(SuB);
+
+ /*
+ * Squeeze out update frame from B stack. Note that despite our best
+ * efforts with [AB]REL and friends, the loop order depends on the B
+ * stack growing up.
+ */
+ for (i = NNonPtrWords - 1; i >= 0; i--)
+ SpB[BREL(i+STD_UF_SIZE)] = SpB[BREL(i)];
+
+ SpB += BREL(STD_UF_SIZE);
+
+ /*
+ * All done! Restart by re-entering Node
+ * Don't count this entry for ticky-ticky profiling.
+ */
+
+#if defined(GRAN)
+ GRAN_EXEC(16,4,7,4,0);
+#endif
+ InfoPtr=(D_)(INFO_PTR(Node));
+ JMP_(ENTRY_CODE(InfoPtr));
+ FE_
+
+#undef NNonPtrWords
+#undef NPtrWords
+#undef NArgWords
+#undef PapSize
+#ifdef USE_COST_CENTRES
+# undef CC_pap
+#endif
+}
+\end{code}
+
+The entry code for a generic PAP. @Node@ points to the PAP closure.
+Reload the stacks from the PAP, and enter the closure stored in the
+PAP. PAPs are in HNF so no update frame is needed.
+
+\begin{code}
+STGFUN(PAP_entry)
+{
+ /* Use STG registers for these locals which must survive the STK_CHK */
+#define NPtrWords (R2.i)
+#define NNonPtrWords (R3.i)
+#if defined(USE_COST_CENTRES)
+# define CC_pap ((CostCentre)(R7.p))
+#endif
+
+ /* These locals don't have to survive a HEAP_CHK */
+ P_ Updatee;
+ P_ p;
+ I_ i;
+ I_ aWords, bWords;
+
+ FB_
+
+ SET_ACTIVITY(ACT_UPDATE_PAP); /* SPAT profiling */
+
+ while (AREL(SuA - SpA) == 0 && BREL(SuB - SpB) == 0) {
+#ifdef CONCURRENT
+ if (SuB < STKO_BSTK_BOT(StkOReg)) {
+# ifdef PAR
+ LivenessReg = LIVENESS_R1;
+# endif
+ JMP_(StackUnderflowEnterNode);
+ }
+#endif
+
+ /* We're sitting on top of an update frame, so let's do the business */
+
+ Updatee = GRAB_UPDATEE(SuB);
+ UPD_IND(Updatee, Node);
+
+#if defined(USE_COST_CENTRES)
+ /*
+ * Restore the Cost Centre too (if required); again see Sansom thesis p 183.
+ * Take the CC out of the update frame if a CAF/DICT.
+ */
+
+ CC_pap = (CostCentre) CC_HDR(Node);
+ CCC = (IS_CAF_OR_DICT_CC(CC_pap)) ? GRAB_COST_CENTRE(SuB) : CC_pap;
+
+#endif /* USE_COST_CENTRES */
+
+ RetReg = GRAB_RET(SuB);
+ SuA = GRAB_SuA(SuB);
+ SuB = GRAB_SuB(SuB);
+ SpB += BREL(STD_UF_SIZE);
+ }
+
+ NPtrWords = DYN_CLOSURE_NoPTRS(Node) - 1; /* The saved Node counts as one */
+ NNonPtrWords = DYN_CLOSURE_NoNONPTRS(Node);
+
+ /* Ticky-ticky profiling info */
+ ENT_PAP(Node);
+
+ /* Enter PAP cost centre -- lexical scoping only */
+ ENTER_CC_PAP_CL(Node);
+
+ /*
+ * Check for stack overflow. Ask to take all of the current frame with
+ * us to the new world. If there is no update frame on the current stack,
+ * bWords will exceed the size of the B stack, but StackOverflow will deal
+ * with it.
+ */
+
+ aWords = AREL(SuA - SpA);
+ bWords = BREL(SuB - SpB) + STD_UF_SIZE;
+
+ STK_CHK(LIVENESS_R1, NPtrWords, NNonPtrWords, aWords, bWords, 0, 0);
+
+ SpA -= AREL(NPtrWords);
+ SpB -= BREL(NNonPtrWords);
+
+ /* Reload Node */
+ p = Node + DYN_HS; /* Point to first pointer word */
+ Node = (P_) *p++;
+
+ /* Reload the stacks */
+
+ for (i=0; i<NPtrWords; i++) SpA[AREL(i)] = (P_) *p++;
+ for (i=0; i<NNonPtrWords; i++) SpB[BREL(i)] = *p++;
+
+ /* Off we go! */
+ ENT_VIA_NODE();
+ InfoPtr=(D_)(INFO_PTR(Node));
+ JMP_(ENTRY_CODE(InfoPtr));
+ FE_
+
+#undef NPtrWords
+#undef NNonPtrWords
+#ifdef USE_COST_CENTRES
+# undef CC_pap
+#endif
+}
+\end{code}
+
+The info table for a generic PAP:
+\begin{code}
+DYN_ITBL(PAP_info,PAP_entry,UpdErr,0,INFO_OTHER_TAG,0,0,const,EF_,PAP_K,"PAP","->");
+\end{code}
diff --git a/ghc/runtime/main/Threads.lc b/ghc/runtime/main/Threads.lc
new file mode 100644
index 0000000000..a767ec940a
--- /dev/null
+++ b/ghc/runtime/main/Threads.lc
@@ -0,0 +1,3749 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1995
+%
+%************************************************************************
+%* *
+\section[Threads.lc]{Thread Control Routines}
+%* *
+%************************************************************************
+
+%************************************************************************
+%
+\subsection[thread-overview]{Overview of the Thread Management System}
+%
+%************************************************************************
+
+%************************************************************************
+%
+\subsection[thread-decls]{Thread Declarations}
+%
+%************************************************************************
+
+% I haven't checked if GRAN can work with QP profiling. But as we use our
+% own profiling (GR profiling) that should be irrelevant. -- HWL
+
+\begin{code}
+
+#if defined(CONCURRENT)
+
+# define NON_POSIX_SOURCE /* so says Solaris */
+
+# include "rtsdefs.h"
+# include <setjmp.h>
+
+#include "LLC.h"
+#include "HLC.h"
+
+static void init_qp_profiling(STG_NO_ARGS); /* forward decl */
+\end{code}
+
+@AvailableStack@ is used to determine whether an existing stack can be
+reused without new allocation, so reducing garbage collection, and
+stack setup time. At present, it is only used for the first stack
+chunk of a thread, the one that's got @StkOChunkSize@ words.
+
+\begin{code}
+P_ AvailableStack = Nil_closure;
+P_ AvailableTSO = Nil_closure;
+\end{code}
+
+Macros for dealing with the new and improved GA field for simulating
+parallel execution. Based on @CONCURRENT@ package. The GA field now
+contains a mask, where the n-th bit stands for the n-th processor,
+where this data can be found. In case of multiple copies, several bits
+are set. The total number of processors is bounded by @MAX_PROC@,
+which should be <= the length of a word in bits. -- HWL
+
+\begin{code}
+/* mattson thinks this is obsolete */
+
+# if 0 && defined(GRAN)
+extern FILE *main_statsfile; /* Might be of general interest HWL */
+
+typedef unsigned long TIME;
+typedef unsigned char PROC;
+typedef unsigned char EVTTYPE;
+
+
+# undef max
+# define max(a,b) (a>b?a:b)
+
+static PROC
+ga_to_proc(W_ ga)
+{ PROC i;
+
+ for (i=0; i<MAX_PROC && !IS_LOCAL_TO(ga,i); i++) ;
+
+ return (i);
+}
+
+/* NB: This takes a *node* rather than just a ga as input */
+static PROC
+where_is(P_ node)
+{ return (ga_to_proc(PROCS(node))); } /* Access the GA field of the node */
+
+static PROC
+no_of_copies(P_ node) /* DaH lo'lu'Qo'; currently unused */
+{ PROC i, n;
+
+ for (i=0, n=0; i<MAX_PROC; i++)
+ if (IS_LOCAL_TO(PROCS(node),i))
+ n++;;
+
+ return (n);
+}
+
+# endif /* GRAN ; HWL */
+\end{code}
+
+%****************************************************************
+%* *
+\subsection[thread-getthread]{The Thread Scheduler}
+%* *
+%****************************************************************
+
+This is the heart of the thread scheduling code.
+
+\begin{code}
+# if defined(GRAN_CHECK) && defined(GRAN)
+W_ debug = 0;
+# endif
+
+W_ event_trace = 0;
+W_ event_trace_all = 0;
+
+STGRegisterTable *CurrentRegTable = NULL;
+P_ CurrentTSO = NULL;
+
+# if defined(GRAN) /* HWL */
+
+unsigned CurrentProc = 0;
+W_ IdleProcs = ~0L, Idlers = MAX_PROC;
+
+# if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
+# define FETCH_MASK_TSO 0x08000000 /* only bits 0, 1, 2 should be used */
+ /* normally */
+# endif
+
+I_ DoFairSchedule = 0;
+I_ DoReScheduleOnFetch = 0;
+I_ DoStealThreadsFirst = 0;
+I_ SimplifiedFetch = 0;
+I_ DoAlwaysCreateThreads = 0;
+I_ DoGUMMFetching = 0;
+I_ DoThreadMigration = 0;
+I_ FetchStrategy = 4;
+I_ PreferSparksOfLocalNodes = 0;
+
+# if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
+I_ NoForward = 0;
+I_ PrintFetchMisses = 0, fetch_misses = 0;
+# endif
+
+# if defined(COUNT)
+I_ nUPDs = 0, nUPDs_old = 0, nUPDs_new = 0, nUPDs_BQ = 0, nPAPs = 0,
+ BQ_lens = 0;
+# endif
+
+I_ do_gr_binary = 0;
+I_ do_gr_profile = 0; /* Full .gr profile or only END events? */
+I_ no_gr_profile = 0; /* Don't create any .gr file at all? */
+I_ do_sp_profile = 0;
+I_ do_gr_migration = 0;
+
+P_ RunnableThreadsHd[MAX_PROC];
+P_ RunnableThreadsTl[MAX_PROC];
+
+P_ WaitThreadsHd[MAX_PROC];
+P_ WaitThreadsTl[MAX_PROC];
+
+sparkq PendingSparksHd[MAX_PROC][SPARK_POOLS];
+sparkq PendingSparksTl[MAX_PROC][SPARK_POOLS];
+
+W_ CurrentTime[MAX_PROC]; /* Per PE clock */
+
+# if defined(GRAN_CHECK) && defined(GRAN)
+P_ BlockedOnFetch[MAX_PROC]; /* HWL-CHECK */
+# endif
+
+I_ OutstandingFetches[MAX_PROC];
+
+W_ SparksAvail = 0; /* How many sparks are available */
+W_ SurplusThreads = 0; /* How many excess threads are there */
+
+StgBool NeedToReSchedule = StgFalse; /* Do we need to reschedule following a fetch? */
+
+/* Communication Cost Variables -- set in main program */
+
+W_ gran_latency = LATENCY, gran_additional_latency = ADDITIONAL_LATENCY,
+ gran_fetchtime = FETCHTIME,
+ gran_lunblocktime = LOCALUNBLOCKTIME, gran_gunblocktime = GLOBALUNBLOCKTIME,
+ gran_mpacktime = MSGPACKTIME, gran_munpacktime = MSGUNPACKTIME,
+ gran_mtidytime = 0;
+
+W_ gran_threadcreatetime = THREADCREATETIME,
+ gran_threadqueuetime = THREADQUEUETIME,
+ gran_threaddescheduletime = THREADDESCHEDULETIME,
+ gran_threadscheduletime = THREADSCHEDULETIME,
+ gran_threadcontextswitchtime = THREADCONTEXTSWITCHTIME;
+
+/* Instruction Cost Variables -- set in main program */
+
+W_ gran_arith_cost = ARITH_COST, gran_branch_cost = BRANCH_COST,
+ gran_load_cost = LOAD_COST, gran_store_cost = STORE_COST,
+ gran_float_cost = FLOAT_COST, gran_heapalloc_cost = 0;
+
+W_ max_proc = MAX_PROC;
+
+/* Granularity event types' names for output */
+
+char *event_names[] =
+ { "STARTTHREAD", "CONTINUETHREAD", "RESUMETHREAD",
+ "MOVESPARK", "MOVETHREAD", "FINDWORK",
+ "FETCHNODE", "FETCHREPLY"
+ };
+
+# if defined(GRAN)
+/* Prototypes of GrAnSim debugging functions */
+void DEBUG_PRINT_NODE PROTO((P_));
+void DEBUG_TREE PROTO((P_));
+void DEBUG_INFO_TABLE PROTO((P_));
+void DEBUG_CURR_THREADQ PROTO((I_));
+void DEBUG_THREADQ PROTO((P_, I_));
+void DEBUG_TSO PROTO((P_, I_));
+void DEBUG_EVENT PROTO((eventq, I_));
+void DEBUG_SPARK PROTO((sparkq, I_));
+void DEBUG_SPARKQ PROTO((sparkq, I_));
+void DEBUG_CURR_SPARKQ PROTO((I_));
+void DEBUG_PROC PROTO((I_, I_));
+void DCT(STG_NO_ARGS);
+void DCP(STG_NO_ARGS);
+void DEQ(STG_NO_ARGS);
+void DSQ(STG_NO_ARGS);
+
+void HandleFetchRequest PROTO((P_, PROC, P_));
+# endif /* GRAN ; HWL */
+
+#if defined(GRAN_CHECK) && defined(GRAN)
+static eventq DelayedEventHd = NULL, DelayedEventTl = NULL;
+
+static I_ noOfEvents = 0;
+static I_ event_counts[] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+#endif
+
+TIME SparkStealTime();
+
+/* Fcts for manipulating event queues have been deleted -- HWL */
+/* ---------------------------------- */
+
+static void
+print_spark(spark)
+ sparkq spark;
+{
+
+ if (spark==NULL)
+ fprintf(stderr,"Spark: NIL\n");
+ else
+ fprintf(stderr,"Spark: Node 0x%lx, Name 0x%lx, Exported %s, Prev 0x%x, Next 0x%x\n",
+ (W_) SPARK_NODE(spark), SPARK_NAME(spark),
+ ((SPARK_EXPORTED(spark))?"True":"False"),
+ SPARK_PREV(spark), SPARK_NEXT(spark) );
+}
+
+static print_sparkq(hd)
+sparkq hd;
+{
+ sparkq x;
+
+ fprintf(stderr,"Spark Queue with root at %x:\n",hd);
+ for (x=hd; x!=NULL; x=SPARK_NEXT(x)) {
+ print_spark(x);
+ }
+}
+
+static print_event(event)
+eventq event;
+{
+
+ if (event==NULL)
+ fprintf(stderr,"Evt: NIL\n");
+ else
+ fprintf(stderr,"Evt: %s (%u), PE %u [%u], Time %lu, TSO 0x%lx, node 0x%lx\n",
+ event_names[EVENT_TYPE(event)],EVENT_TYPE(event),
+ EVENT_PROC(event), EVENT_CREATOR(event),
+ EVENT_TIME(event), EVENT_TSO(event), EVENT_NODE(event) /*,
+ EVENT_SPARK(event), EVENT_NEXT(event)*/ );
+
+}
+
+static print_eventq(hd)
+eventq hd;
+{
+ eventq x;
+
+ fprintf(stderr,"Event Queue with root at %x:\n",hd);
+ for (x=hd; x!=NULL; x=EVENT_NEXT(x)) {
+ print_event(x);
+ }
+}
+
+/* ---------------------------------- */
+
+#if 0 /* moved */
+static eventq getnextevent()
+{
+ static eventq entry = NULL;
+
+ if(EventHd == NULL)
+ {
+ fprintf(stderr,"No next event\n");
+ exit(EXIT_FAILURE); /* ToDo: abort()? EXIT??? */
+ }
+
+ if(entry != NULL)
+ free((char *)entry);
+
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if (debug & 0x20) { /* count events */
+ noOfEvents++;
+ event_counts[EVENT_TYPE(EventHd)]++;
+ }
+#endif
+
+ entry = EventHd;
+ EventHd = EVENT_NEXT(EventHd);
+ return(entry);
+}
+
+/* ToDo: replace malloc/free with a free list */
+
+static insert_event(newentry)
+eventq newentry;
+{
+ EVTTYPE evttype = EVENT_TYPE(newentry);
+ eventq event, *prev;
+
+ /* Search the queue and insert at the right point:
+ FINDWORK before everything, CONTINUETHREAD after everything.
+
+ This ensures that we find any available work after all threads have
+ executed the current cycle. This level of detail would normally be
+ irrelevant, but matters for ridiculously low latencies...
+ */
+
+ if(EventHd == NULL)
+ EventHd = newentry;
+ else
+ {
+ for (event = EventHd, prev=&EventHd; event != NULL;
+ prev = &(EVENT_NEXT(event)), event = EVENT_NEXT(event))
+ {
+ if(evttype == FINDWORK ? (EVENT_TIME(event) >= EVENT_TIME(newentry)) :
+ evttype == CONTINUETHREAD ? (EVENT_TIME(event) > EVENT_TIME(newentry)) :
+ (EVENT_TIME(event) > EVENT_TIME(newentry) ||
+ (EVENT_TIME(event) == EVENT_TIME(newentry) &&
+ EVENT_TYPE(event) != FINDWORK )))
+ {
+ *prev = newentry;
+ EVENT_NEXT(newentry) = event;
+ break;
+ }
+ }
+ if (event == NULL)
+ *prev = newentry;
+ }
+}
+
+static newevent(proc,creator,time,evttype,tso,node,spark)
+PROC proc, creator;
+TIME time;
+EVTTYPE evttype;
+P_ tso, node;
+sparkq spark;
+{
+ extern P_ xmalloc();
+ eventq newentry = (eventq) xmalloc(sizeof(struct event));
+
+ EVENT_PROC(newentry) = proc;
+ EVENT_CREATOR(newentry) = creator;
+ EVENT_TIME(newentry) = time;
+ EVENT_TYPE(newentry) = evttype;
+ EVENT_TSO(newentry) = tso;
+ EVENT_NODE(newentry) = node;
+ EVENT_SPARK(newentry) = spark;
+ EVENT_NEXT(newentry) = NULL;
+
+ insert_event(newentry);
+}
+#endif /* 0 moved */
+
+# else /* !GRAN */
+
+P_ RunnableThreadsHd = Nil_closure;
+P_ RunnableThreadsTl = Nil_closure;
+
+P_ WaitingThreadsHd = Nil_closure;
+P_ WaitingThreadsTl = Nil_closure;
+
+PP_ PendingSparksBase[SPARK_POOLS];
+PP_ PendingSparksLim[SPARK_POOLS];
+
+PP_ PendingSparksHd[SPARK_POOLS];
+PP_ PendingSparksTl[SPARK_POOLS];
+
+# endif /* GRAN ; HWL */
+
+static jmp_buf scheduler_loop;
+
+I_ MaxThreads = DEFAULT_MAX_THREADS;
+I_ required_thread_count = 0;
+I_ advisory_thread_count = 0;
+
+EXTFUN(resumeThread);
+
+P_ NewThread PROTO((P_, W_));
+
+I_ context_switch = 0;
+
+I_ contextSwitchTime = CS_MIN_MILLISECS; /* In milliseconds */
+
+#if !defined(GRAN)
+
+I_ threadId = 0;
+
+I_ MaxLocalSparks = DEFAULT_MAX_LOCAL_SPARKS;
+I_ SparkLimit[SPARK_POOLS];
+
+extern I_ doSanityChks;
+extern void checkAStack(STG_NO_ARGS);
+
+rtsBool
+initThreadPools(size)
+I_ size;
+{
+ SparkLimit[ADVISORY_POOL] = SparkLimit[REQUIRED_POOL] = size;
+ if ((PendingSparksBase[ADVISORY_POOL] = (PP_) malloc(size * sizeof(P_))) == NULL)
+ return rtsFalse;
+ if ((PendingSparksBase[REQUIRED_POOL] = (PP_) malloc(size * sizeof(P_))) == NULL)
+ return rtsFalse;
+ PendingSparksLim[ADVISORY_POOL] = PendingSparksBase[ADVISORY_POOL] + size;
+ PendingSparksLim[REQUIRED_POOL] = PendingSparksBase[REQUIRED_POOL] + size;
+ return rtsTrue;
+}
+#endif
+
+#ifdef PAR
+rtsBool sameThread;
+#endif
+
+void
+ScheduleThreads(topClosure)
+P_ topClosure;
+{
+ I_ i;
+ P_ tso;
+
+#if defined(USE_COST_CENTRES) || defined(GUM)
+ if (time_profiling || contextSwitchTime > 0) {
+ if (initialize_virtual_timer(tick_millisecs)) {
+#else
+ if (contextSwitchTime > 0) {
+ if (initialize_virtual_timer(contextSwitchTime)) {
+#endif
+ fflush(stdout);
+ fprintf(stderr, "Can't initialize virtual timer.\n");
+ EXIT(EXIT_FAILURE);
+ }
+ } else
+ context_switch = 0 /* 1 HWL */;
+
+#if defined(GRAN_CHECK) && defined(GRAN) /* HWL */
+ if ( debug & 0x40 ) {
+ fprintf(stderr,"D> Doing init in ScheduleThreads now ...\n");
+ }
+#endif
+
+#if defined(GRAN) /* KH */
+ for (i=0; i<max_proc; i++)
+ {
+ RunnableThreadsHd[i] = RunnableThreadsTl[i] = Nil_closure;
+ WaitThreadsHd[i] = WaitThreadsTl[i] = Nil_closure;
+ PendingSparksHd[i][REQUIRED_POOL] = PendingSparksHd[i][ADVISORY_POOL] =
+ PendingSparksTl[i][REQUIRED_POOL] = PendingSparksTl[i][ADVISORY_POOL] =
+ NULL;
+
+# if defined(GRAN_CHECK)
+ if (debug & 0x04)
+ BlockedOnFetch[i] = 0; /*- StgFalse; -*/ /* HWL-CHECK */
+# endif
+ OutstandingFetches[i] = 0;
+ }
+
+ CurrentProc = MainProc;
+#endif /* GRAN */
+
+ if (DO_QP_PROF)
+ init_qp_profiling();
+
+ /*
+ * We perform GC so that a signal handler can install a new TopClosure and start
+ * a new main thread.
+ */
+#ifdef PAR
+ if (IAmMainThread) {
+#endif
+ if ((tso = NewThread(topClosure, T_MAIN)) == NULL) {
+ /* kludge to save the top closure as a root */
+ CurrentTSO = topClosure;
+ ReallyPerformThreadGC(0, rtsTrue);
+ topClosure = CurrentTSO;
+ if ((tso = NewThread(topClosure, T_MAIN)) == NULL) {
+ fflush(stdout);
+ fprintf(stderr, "Not enough heap for main thread\n");
+ EXIT(EXIT_FAILURE);
+ }
+ }
+#ifndef GRAN
+ RunnableThreadsHd = RunnableThreadsTl = tso;
+#else
+ /* NB: CurrentProc must have been set to MainProc before that! -- HWL */
+ ThreadQueueHd = ThreadQueueTl = tso;
+
+# if defined(GRAN_CHECK)
+ if ( debug & 0x40 ) {
+ fprintf(stderr,"D> MainTSO has been initialized (0x%x)\n", tso);
+ }
+# endif
+#endif
+
+#ifdef PAR
+ if (do_gr_profile) {
+ DumpGranEvent(GR_START, tso);
+ sameThread = rtsTrue;
+ }
+#endif
+
+#if defined(GRAN)
+ MAKE_BUSY(MainProc); /* Everything except the main PE is idle */
+#endif
+
+ required_thread_count = 1;
+ advisory_thread_count = 0;
+#ifdef PAR
+ } /*if IAmMainThread ...*/
+#endif
+
+ /* ----------------------------------------------------------------- */
+ /* This part is the MAIN SCHEDULER LOOP; jumped at from ReSchedule */
+ /* ----------------------------------------------------------------- */
+
+ if(setjmp(scheduler_loop) < 0)
+ return;
+
+#if defined(GRAN) && defined(GRAN_CHECK)
+ if ( debug & 0x80 ) {
+ fprintf(stderr,"D> MAIN Schedule Loop; ThreadQueueHd is ");
+ DEBUG_TSO(ThreadQueueHd,1);
+ /* if (ThreadQueueHd == MainTSO) {
+ fprintf(stderr,"D> Event Queue is now:\n");
+ DEQ();
+ } */
+ }
+#endif
+
+#ifdef PAR
+ if (PendingFetches != Nil_closure) {
+ processFetches();
+ }
+
+#elif defined(GRAN)
+ if (ThreadQueueHd == Nil_closure) {
+ fprintf(stderr, "No runnable threads!\n");
+ EXIT(EXIT_FAILURE);
+ }
+ if (DO_QP_PROF > 1 && CurrentTSO != ThreadQueueHd) {
+ QP_Event1("AG", ThreadQueueHd);
+ }
+#endif
+
+#ifndef PAR
+ while (RunnableThreadsHd == Nil_closure) {
+ /* If we've no work */
+ if (WaitingThreadsHd == Nil_closure) {
+ fflush(stdout);
+ fprintf(stderr, "No runnable threads!\n");
+ EXIT(EXIT_FAILURE);
+ }
+ AwaitEvent(0);
+ }
+#else
+ if (RunnableThreadsHd == Nil_closure) {
+ if (advisory_thread_count < MaxThreads &&
+ (PendingSparksHd[REQUIRED_POOL] < PendingSparksTl[REQUIRED_POOL] ||
+ PendingSparksHd[ADVISORY_POOL] < PendingSparksTl[ADVISORY_POOL])) {
+ /*
+ * If we're here (no runnable threads) and we have pending sparks,
+ * we must have a space problem. Get enough space to turn one of
+ * those pending sparks into a thread...ReallyPerformGC doesn't
+ * return until the space is available, so it may force global GC.
+ * ToDo: Is this unnecessary here? Duplicated in ReSchedule()? --JSM
+ */
+ ReallyPerformThreadGC(THREAD_SPACE_REQUIRED, rtsTrue);
+ SAVE_Hp -= THREAD_SPACE_REQUIRED;
+ } else {
+ /*
+ * We really have absolutely no work. Send out a fish (there may be
+ * some out there already), and wait for something to arrive. We
+ * clearly can't run any threads until a SCHEDULE or RESUME arrives,
+ * and so that's what we're hoping to see. (Of course, we still have
+ * to respond to other types of messages.)
+ */
+ if (!fishing)
+ sendFish(choosePE(), mytid, NEW_FISH_AGE, NEW_FISH_HISTORY,
+ NEW_FISH_HUNGER);
+ processMessages();
+ }
+ ReSchedule(0);
+ } else if (PacketsWaiting()) { /* Look for incoming messages */
+ processMessages();
+ }
+#endif /* PAR */
+
+ if (DO_QP_PROF > 1 && CurrentTSO != RunnableThreadsHd) {
+ QP_Event1("AG", RunnableThreadsHd);
+ }
+
+#ifdef PAR
+ if (do_gr_profile && !sameThread)
+ DumpGranEvent(GR_SCHEDULE, RunnableThreadsHd);
+#endif
+
+#if !GRAN /* ROUND_ROBIN */
+ CurrentTSO = RunnableThreadsHd;
+ RunnableThreadsHd = TSO_LINK(RunnableThreadsHd);
+ TSO_LINK(CurrentTSO) = Nil_closure;
+
+ if (RunnableThreadsHd == Nil_closure)
+ RunnableThreadsTl = Nil_closure;
+
+#else /* GRAN */
+ /* This used to be Round Robin. KH.
+ I think we can ignore that, and move it down to ReSchedule instead.
+ */
+ CurrentTSO = ThreadQueueHd;
+ /* TSO_LINK(CurrentTSO) = Nil_closure; humbug */
+#endif
+
+ /* If we're not running a timer, just leave the flag on */
+ if (contextSwitchTime > 0)
+ context_switch = 0;
+
+#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
+ if (CurrentTSO == Nil_closure) {
+ fprintf(stderr,"Error: Trying to execute Nil_closure on proc %d (@ %d)\n",
+ CurrentProc,CurrentTime[CurrentProc]);
+ exit(99);
+ }
+
+ if (debug & 0x04) {
+ if (BlockedOnFetch[CurrentProc]) {
+ fprintf(stderr,"Error: Trying to execute TSO 0x%x on proc %d (@ %d) which is blocked-on-fetch by TSO 0x%x\n",
+ CurrentTSO,CurrentProc,CurrentTime[CurrentProc],BlockedOnFetch[CurrentProc]);
+ exit(99);
+ }
+ }
+
+ if ( (debug & 0x10) &&
+ (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) ) {
+ fprintf(stderr,"Error: Trying to execute TSO 0x%x on proc %d (@ %d) which should be asleep!\n",
+ CurrentTSO,CurrentProc,CurrentTime[CurrentProc]);
+ exit(99);
+ }
+#endif
+
+# if defined(__STG_TAILJUMPS__)
+ miniInterpret((StgFunPtr)resumeThread);
+# else
+ if (doSanityChks)
+ miniInterpret_debug((StgFunPtr)resumeThread, checkAStack);
+ else
+ miniInterpret((StgFunPtr)resumeThread);
+# endif /* __STG_TAILJUMPS__ */
+}
+\end{code}
+
+% Some remarks on GrAnSim -- HWL
+
+The ReSchedule fct is the heart of GrAnSim. Based on its par it issues a
+CONTINUETRHEAD to carry on executing the current thread in due course or it
+watches out for new work (e.g. called from EndThread).
+
+Then it picks the next event (getnextevent) and handles it appropriately
+(see switch construct). Note that a continue in the switch causes the next
+event to be handled and a break causes a jmp to the scheduler_loop where
+the TSO at the head of the current processor's runnable queue is executed.
+
+ReSchedule is mostly entered from HpOverflow.lc:PerformReSchedule which is
+itself called via the GRAN_RESCHEDULE macro in the compiler generated code.
+
+\begin{code}
+#if defined(GRAN)
+
+void
+ReSchedule(what_next)
+int what_next; /* Run the current thread again? */
+{
+ sparkq spark, nextspark;
+ P_ tso;
+ P_ node;
+ eventq event;
+
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if ( debug & 0x80 ) {
+ fprintf(stderr,"D> Entering ReSchedule with mode %u; tso is\n",what_next);
+ DEBUG_TSO(ThreadQueueHd,1);
+ }
+#endif
+
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if ( (debug & 0x80) || (debug & 0x40 ) )
+ if (what_next<FIND_THREAD || what_next>CHANGE_THREAD)
+ fprintf(stderr,"ReSchedule: illegal parameter %u for what_next\n",
+ what_next);
+#endif
+
+ /* Run the current thread again (if there is one) */
+ if(what_next==SAME_THREAD && ThreadQueueHd != Nil_closure)
+ {
+ /* A bit of a hassle if the event queue is empty, but ... */
+ CurrentTSO = ThreadQueueHd;
+
+ newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
+ CONTINUETHREAD,CurrentTSO,Nil_closure,NULL);
+
+ /* This code does round-Robin, if preferred. */
+ if(DoFairSchedule && TSO_LINK(CurrentTSO) != Nil_closure)
+ {
+ if(do_gr_profile)
+ DumpGranEvent(GR_DESCHEDULE,ThreadQueueHd);
+ ThreadQueueHd = TSO_LINK(CurrentTSO);
+ TSO_LINK(ThreadQueueTl) = CurrentTSO;
+ ThreadQueueTl = CurrentTSO;
+ TSO_LINK(CurrentTSO) = Nil_closure;
+ if (do_gr_profile)
+ DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
+ CurrentTime[CurrentProc] += gran_threadcontextswitchtime;
+ }
+ }
+ /* Schedule `next thread' which is at ThreadQueueHd now i.e. thread queue */
+ /* has been updated before that already. */
+ else if(what_next==NEW_THREAD && ThreadQueueHd != Nil_closure)
+ {
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if(DoReScheduleOnFetch)
+ {
+ fprintf(stderr,"ReSchedule(NEW_THREAD) shouldn't be used!!\n");
+ exit(99);
+ }
+#endif
+
+ if(do_gr_profile)
+ DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
+
+ CurrentTSO = ThreadQueueHd;
+ newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
+ CONTINUETHREAD,Nil_closure,Nil_closure,NULL);
+
+ CurrentTime[CurrentProc] += gran_threadcontextswitchtime;
+ }
+
+ /* We go in here if the current thread is blocked on fetch => don'd CONT */
+ else if(what_next==CHANGE_THREAD)
+ {
+ /* just fall into event handling loop for next event */
+ }
+
+ /* We go in here if we have no runnable threads or what_next==0 */
+ else
+ {
+ newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
+ FINDWORK,Nil_closure,Nil_closure,NULL);
+ CurrentTSO = Nil_closure;
+ }
+
+ /* ----------------------------------------------------------------- */
+ /* This part is the EVENT HANDLING LOOP */
+ /* ----------------------------------------------------------------- */
+
+ do {
+ /* Choose the processor with the next event */
+ event = getnextevent();
+ CurrentProc = EVENT_PROC(event);
+ if(EVENT_TIME(event) > CurrentTime[CurrentProc])
+ CurrentTime[CurrentProc] = EVENT_TIME(event);
+
+ MAKE_BUSY(CurrentProc);
+
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if (debug & 0x80)
+ fprintf(stderr,"D> After getnextevent, before HandleIdlePEs\n");
+#endif
+
+ /* Deal with the idlers */
+ HandleIdlePEs();
+
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if (event_trace &&
+ (event_trace_all || EVENT_TYPE(event) != CONTINUETHREAD ||
+ (debug & 0x80) ))
+ print_event(event);
+#endif
+
+ switch (EVENT_TYPE(event))
+ {
+ /* Should just be continuing execution */
+ case CONTINUETHREAD:
+#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
+ if ( (debug & 0x04) && BlockedOnFetch[CurrentProc]) {
+ fprintf(stderr,"Warning: Discarding CONTINUETHREAD on blocked proc %u @ %u\n",
+ CurrentProc,CurrentTime[CurrentProc]);
+ print_event(event);
+ continue;
+ }
+#endif
+ if(ThreadQueueHd==Nil_closure)
+ {
+ newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
+ FINDWORK,Nil_closure,Nil_closure,NULL);
+ continue; /* Catches superfluous CONTINUEs -- should be unnecessary */
+ }
+ else
+ break; /* fall into scheduler loop */
+
+ case FETCHNODE:
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if (SimplifiedFetch) {
+ fprintf(stderr,"Error: FETCHNODE events not valid with simplified fetch\n");
+ exit (99);
+ }
+#endif
+
+ CurrentTime[CurrentProc] += gran_munpacktime;
+ HandleFetchRequest(EVENT_NODE(event),
+ EVENT_CREATOR(event),
+ EVENT_TSO(event));
+ continue;
+
+ case FETCHREPLY:
+#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
+ if (SimplifiedFetch) {
+ fprintf(stderr,"Error: FETCHREPLY events not valid with simplified fetch\n");
+ exit (99);
+ }
+
+ if (debug & 0x10) {
+ if (TSO_TYPE(EVENT_TSO(event)) & FETCH_MASK_TSO) {
+ TSO_TYPE(EVENT_TSO(event)) &= ~FETCH_MASK_TSO;
+ } else {
+ fprintf(stderr,"Error: FETCHREPLY: TSO 0x%x has fetch mask not set @ %d\n",
+ CurrentTSO,CurrentTime[CurrentProc]);
+ exit(99);
+ }
+ }
+
+ if (debug & 0x04) {
+ if (BlockedOnFetch[CurrentProc]!=ThreadQueueHd) {
+ fprintf(stderr,"Error: FETCHREPLY: Proc %d (with TSO 0x%x) not blocked-on-fetch by TSO 0x%x\n",
+ CurrentProc,CurrentTSO,BlockedOnFetch[CurrentProc]);
+ exit(99);
+ } else {
+ BlockedOnFetch[CurrentProc] = 0; /*- StgFalse; -*/
+ }
+ }
+#endif
+
+ /* Copy or move node to CurrentProc */
+ if (FetchNode(EVENT_NODE(event),
+ EVENT_CREATOR(event),
+ EVENT_PROC(event)) ) {
+ /* Fetch has failed i.e. node has been grabbed by another PE */
+ P_ node = EVENT_NODE(event), tso = EVENT_TSO(event);
+ PROC p = where_is(node);
+ TIME fetchtime;
+
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if (PrintFetchMisses) {
+ fprintf(stderr,"Fetch miss @ %lu: node 0x%x is at proc %u (rather than proc %u)\n",
+ CurrentTime[CurrentProc],node,p,EVENT_CREATOR(event));
+ fetch_misses++;
+ }
+#endif /* GRAN_CHECK */
+
+ CurrentTime[CurrentProc] += gran_mpacktime;
+
+ /* Count fetch again !? */
+ ++TSO_FETCHCOUNT(tso);
+ TSO_FETCHTIME(tso) += gran_fetchtime;
+
+ fetchtime = max(CurrentTime[CurrentProc],CurrentTime[p]) +
+ gran_latency;
+
+ /* Chase the grabbed node */
+ newevent(p,CurrentProc,fetchtime,FETCHNODE,tso,node,NULL);
+
+#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
+ if (debug & 0x04)
+ BlockedOnFetch[CurrentProc] = tso; /*-StgTrue;-*/
+
+ if (debug & 0x10)
+ TSO_TYPE(tso) |= FETCH_MASK_TSO;
+#endif
+
+ CurrentTime[CurrentProc] += gran_mtidytime;
+
+ continue; /* NB: no REPLy has been processed; tso still sleeping */
+ }
+
+ /* -- Qapla'! Fetch has been successful; node is here, now */
+ ++TSO_FETCHCOUNT(EVENT_TSO(event));
+ TSO_FETCHTIME(EVENT_TSO(event)) += gran_fetchtime;
+
+ if (do_gr_profile)
+ DumpGranEventAndNode(GR_REPLY,EVENT_TSO(event),
+ EVENT_NODE(event),EVENT_CREATOR(event));
+
+ --OutstandingFetches[CurrentProc];
+#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
+ if (OutstandingFetches[CurrentProc] < 0) {
+ fprintf(stderr,"OutstandingFetches of proc %u has become negative\n",CurrentProc);
+ exit (99);
+ }
+#endif
+
+ if (!DoReScheduleOnFetch) {
+ CurrentTSO = EVENT_TSO(event); /* awaken blocked thread */
+ newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
+ CONTINUETHREAD,Nil_closure,Nil_closure,NULL);
+ TSO_BLOCKTIME(EVENT_TSO(event)) += CurrentTime[CurrentProc] -
+ TSO_BLOCKEDAT(EVENT_TSO(event));
+ if(do_gr_profile)
+ DumpGranEvent(GR_RESUME,EVENT_TSO(event));
+ continue;
+ } else {
+ /* fall through to RESUMETHREAD */
+ }
+
+ case RESUMETHREAD: /* Move from the blocked queue to the tail of */
+ /* the runnable queue ( i.e. Qu' SImqa'lu') */
+ TSO_BLOCKTIME(EVENT_TSO(event)) += CurrentTime[CurrentProc] -
+ TSO_BLOCKEDAT(EVENT_TSO(event));
+ StartThread(event,GR_RESUME);
+ continue;
+
+ case STARTTHREAD:
+ StartThread(event,GR_START);
+ continue;
+
+ case MOVETHREAD:
+#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
+ if (!DoThreadMigration) {
+ fprintf(stderr,"MOVETHREAD events should never occur without -bM\n");
+ exit (99);
+ }
+#endif
+ CurrentTime[CurrentProc] += gran_munpacktime;
+ StartThread(event,GR_STOLEN);
+ continue; /* to the next event */
+
+ case MOVESPARK:
+ CurrentTime[CurrentProc] += gran_munpacktime;
+ spark = EVENT_SPARK(event);
+
+ ADD_TO_SPARK_QUEUE(spark); /* NB: this macro side-effects its arg.
+ so the assignment above is needed. */
+
+ if(do_sp_profile)
+ DumpSparkGranEvent(SP_ACQUIRED,spark);
+
+ ++SparksAvail; /* Probably Temporarily */
+ /* Drop into FINDWORK */
+
+ if (!DoReScheduleOnFetch &&
+ (ThreadQueueHd != Nil_closure) ) { /* If we block on fetch then go */
+ continue; /* to next event (i.e. leave */
+ } /* spark in sparkq for now) */
+
+ case FINDWORK:
+ if((ThreadQueueHd == Nil_closure || DoAlwaysCreateThreads)
+ && (FetchStrategy >= 2 || OutstandingFetches[CurrentProc] == 0))
+ {
+ W_ found = 0;
+ sparkq spark_of_non_local_node = NULL;
+
+ /* Choose a spark from the local spark queue */
+ spark = SparkQueueHd;
+
+ while (spark != NULL && !found)
+ {
+ node = SPARK_NODE(spark);
+ if (!SHOULD_SPARK(node))
+ {
+ if(do_sp_profile)
+ DumpSparkGranEvent(SP_PRUNED,spark);
+
+ assert(spark != NULL);
+
+ SparkQueueHd = SPARK_NEXT(spark);
+ if(SparkQueueHd == NULL)
+ SparkQueueTl = NULL;
+
+ DisposeSpark(spark);
+
+ spark = SparkQueueHd;
+ }
+ /* -- node should eventually be sparked */
+ else if (PreferSparksOfLocalNodes &&
+ !IS_LOCAL_TO(PROCS(node),CurrentProc))
+ {
+ /* We have seen this spark before => no local sparks */
+ if (spark==spark_of_non_local_node) {
+ found = 1;
+ break;
+ }
+
+ /* Remember first non-local node */
+ if (spark_of_non_local_node==NULL)
+ spark_of_non_local_node = spark;
+
+ /* Special case: 1 elem sparkq with non-local spark */
+ if (spark==SparkQueueTl) {
+ found = 1;
+ break;
+ }
+
+ /* Put spark (non-local!) at the end of the sparkq */
+ SPARK_NEXT(SparkQueueTl) = spark;
+ SparkQueueHd = SPARK_NEXT(spark);
+ SPARK_NEXT(spark) = NULL;
+ SparkQueueTl = spark;
+
+ spark = SparkQueueHd;
+ }
+ else
+ {
+ found = 1;
+ }
+ }
+
+ /* We've found a node; now, create thread (DaH Qu' yIchen) */
+ if (found)
+ {
+ CurrentTime[CurrentProc] += gran_threadcreatetime;
+
+ node = SPARK_NODE(spark);
+ if((tso = NewThread(node, T_REQUIRED))==NULL)
+ {
+ /* Some kind of backoff needed here in case there's too little heap */
+ newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc]+1,
+ FINDWORK,Nil_closure,Nil_closure,NULL);
+ ReallyPerformThreadGC(TSO_HS+TSO_CTS_SIZE,StgTrue);
+ spark = NULL;
+ continue; /* to the next event, eventually */
+ }
+
+ TSO_EXPORTED(tso) = SPARK_EXPORTED(spark);
+ TSO_LOCKED(tso) = !SPARK_GLOBAL(spark);
+ TSO_SPARKNAME(tso) = SPARK_NAME(spark);
+
+ newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
+ STARTTHREAD,tso,Nil_closure,NULL);
+
+ assert(spark != NULL);
+
+ SparkQueueHd = SPARK_NEXT(spark);
+ if(SparkQueueHd == NULL)
+ SparkQueueTl = NULL;
+
+ DisposeSpark(spark);
+ }
+ else
+ /* Make the PE idle if nothing sparked and we have no threads. */
+ {
+ if(ThreadQueueHd == Nil_closure)
+#if defined(GRAN_CHECK) && defined(GRAN)
+ {
+ MAKE_IDLE(CurrentProc);
+ if ( (debug & 0x40) || (debug & 0x80) ) {
+ fprintf(stderr,"Warning in FINDWORK handling: No work found for PROC %u\n",CurrentProc);
+ }
+ }
+#else
+ MAKE_IDLE(CurrentProc);
+#endif /* GRAN_CHECK */
+ else
+ newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
+ CONTINUETHREAD,Nil_closure,Nil_closure,NULL);
+ }
+
+ continue; /* to the next event */
+ }
+ else
+ {
+#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
+ if ( (debug & 0x04) &&
+ (!DoReScheduleOnFetch && ThreadQueueHd != Nil_closure)
+ ) {
+ fprintf(stderr,"Waning in FINDWORK handling:\n");
+ fprintf(stderr,"ThreadQueueHd!=Nil_closure should never happen with !DoReScheduleOnFetch");
+ }
+#endif
+ if (FetchStrategy < 2 && OutstandingFetches[CurrentProc] != 0)
+ continue; /* to next event */
+ else
+ break; /* run ThreadQueueHd */
+ }
+ /* never reached */
+
+ default:
+ fprintf(stderr,"Illegal event type %u\n",EVENT_TYPE(event));
+ continue;
+ }
+ _longjmp(scheduler_loop, 1);
+ } while(1);
+ }
+\end{code}
+
+Here follows the non-GRAN @ReSchedule@.
+\begin{code}
+#else /* !GRAN */
+
+void
+ReSchedule(again)
+int again; /* Run the current thread again? */
+{
+ P_ spark;
+ PP_ sparkp;
+ P_ tso;
+
+#ifdef PAR
+ /*
+ * In the parallel world, we do unfair scheduling for the moment.
+ * Ultimately, this should all be merged with the more sophicticated
+ * GrAnSim scheduling options. (Of course, some provision should be
+ * made for *required* threads to make sure that they don't starve,
+ * but for now we assume that no one is running concurrent Haskell on
+ * a multi-processor platform.)
+ */
+
+ sameThread = again;
+
+ if (again) {
+ if (RunnableThreadsHd == Nil_closure)
+ RunnableThreadsTl = CurrentTSO;
+ TSO_LINK(CurrentTSO) = RunnableThreadsHd;
+ RunnableThreadsHd = CurrentTSO;
+ }
+
+#else
+
+ /*
+ * In the sequential world, we assume that the whole point of running
+ * the threaded build is for concurrent Haskell, so we provide round-robin
+ * scheduling.
+ */
+
+ if (again) {
+ if(RunnableThreadsHd == Nil_closure) {
+ RunnableThreadsHd = CurrentTSO;
+ } else {
+ TSO_LINK(RunnableThreadsTl) = CurrentTSO;
+ if (DO_QP_PROF > 1) {
+ QP_Event1("GA", CurrentTSO);
+ }
+ }
+ RunnableThreadsTl = CurrentTSO;
+ }
+#endif
+
+#if 1
+ /*
+ * Debugging code, which is useful enough (and cheap enough) to compile
+ * in all the time. This makes sure that we don't access saved registers,
+ * etc. in threads which are supposed to be sleeping.
+ */
+ CurrentTSO = Nil_closure;
+ CurrentRegTable = NULL;
+#endif
+
+ /* First the required sparks */
+
+ for (sparkp = PendingSparksHd[REQUIRED_POOL];
+ sparkp < PendingSparksTl[REQUIRED_POOL]; sparkp++) {
+ spark = *sparkp;
+ if (SHOULD_SPARK(spark)) {
+ if ((tso = NewThread(spark, T_REQUIRED)) == NULL)
+ break;
+ if (RunnableThreadsHd == Nil_closure) {
+ RunnableThreadsHd = tso;
+#ifdef PAR
+ if (do_gr_profile) {
+ DumpGranEvent(GR_START, tso);
+ sameThread = rtsTrue;
+ }
+#endif
+ } else {
+ TSO_LINK(RunnableThreadsTl) = tso;
+#ifdef PAR
+ if (do_gr_profile)
+ DumpGranEvent(GR_STARTQ, tso);
+#endif
+ }
+ RunnableThreadsTl = tso;
+ } else {
+ if (DO_QP_PROF)
+ QP_Event0(threadId++, spark);
+#ifdef PAR
+ if(do_sp_profile)
+ DumpSparkGranEvent(SP_PRUNED, threadId++);
+#endif
+ }
+ }
+ PendingSparksHd[REQUIRED_POOL] = sparkp;
+
+ /* Now, almost the same thing for advisory sparks */
+
+ for (sparkp = PendingSparksHd[ADVISORY_POOL];
+ sparkp < PendingSparksTl[ADVISORY_POOL]; sparkp++) {
+ spark = *sparkp;
+ if (SHOULD_SPARK(spark)) {
+ if (
+#ifdef PAR
+ /* In the parallel world, don't create advisory threads if we are
+ * about to rerun the same thread, or already have runnable threads,
+ * or the main thread has terminated */
+ (RunnableThreadsHd != Nil_closure ||
+ (required_thread_count == 0 && IAmMainThread)) ||
+#endif
+ advisory_thread_count == MaxThreads ||
+ (tso = NewThread(spark, T_ADVISORY)) == NULL)
+ break;
+ advisory_thread_count++;
+ if (RunnableThreadsHd == Nil_closure) {
+ RunnableThreadsHd = tso;
+#ifdef PAR
+ if (do_gr_profile) {
+ DumpGranEvent(GR_START, tso);
+ sameThread = rtsTrue;
+ }
+#endif
+ } else {
+ TSO_LINK(RunnableThreadsTl) = tso;
+#ifdef PAR
+ if (do_gr_profile)
+ DumpGranEvent(GR_STARTQ, tso);
+#endif
+ }
+ RunnableThreadsTl = tso;
+ } else {
+ if (DO_QP_PROF)
+ QP_Event0(threadId++, spark);
+#ifdef PAR
+ if(do_sp_profile)
+ DumpSparkGranEvent(SP_PRUNED, threadId++);
+#endif
+ }
+ }
+ PendingSparksHd[ADVISORY_POOL] = sparkp;
+
+#ifndef PAR
+ longjmp(scheduler_loop, required_thread_count == 0 ? -1 : 1);
+#else
+ longjmp(scheduler_loop, required_thread_count == 0 && IAmMainThread ? -1 : 1);
+#endif
+}
+
+#endif /* !GRAN */
+
+\end{code}
+
+%****************************************************************************
+%
+\subsection[thread-gransim-execution]{Starting, Idling and Migrating
+ Threads (GrAnSim only)}
+%
+%****************************************************************************
+
+Thread start, idle and migration code for GrAnSim (i.e. simulating multiple
+processors).
+
+\begin{code}
+#if defined(GRAN)
+
+StartThread(event,event_type)
+eventq event;
+enum gran_event_types event_type;
+{
+ if(ThreadQueueHd==Nil_closure)
+ {
+ CurrentTSO = ThreadQueueHd = ThreadQueueTl = EVENT_TSO(event);
+ newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc]+gran_threadqueuetime,
+ CONTINUETHREAD,Nil_closure,Nil_closure,NULL);
+ if(do_gr_profile)
+ DumpGranEvent(event_type,EVENT_TSO(event));
+ }
+ else
+ {
+ TSO_LINK(ThreadQueueTl) = EVENT_TSO(event);
+ ThreadQueueTl = EVENT_TSO(event);
+
+ if(DoThreadMigration)
+ ++SurplusThreads;
+
+ if(do_gr_profile)
+ DumpGranEvent(event_type+1,EVENT_TSO(event));
+
+ }
+ CurrentTime[CurrentProc] += gran_threadqueuetime;
+}
+\end{code}
+
+Export work to idle PEs.
+
+\begin{code}
+HandleIdlePEs()
+{
+ PROC proc;
+
+ if(ANY_IDLE && (SparksAvail > 0l || SurplusThreads > 0l))
+ for(proc = 0; proc < max_proc; proc++)
+ if(IS_IDLE(proc))
+ {
+ if(DoStealThreadsFirst &&
+ (FetchStrategy >= 4 || OutstandingFetches[proc] == 0))
+ {
+ if (SurplusThreads > 0l) /* Steal a thread */
+ StealThread(proc);
+
+ if(!IS_IDLE(proc))
+ break;
+ }
+
+ if(SparksAvail > 0l &&
+ (FetchStrategy >= 3 || OutstandingFetches[proc] == 0)) /* Steal a spark */
+ StealSpark(proc);
+
+ if (IS_IDLE(proc) && SurplusThreads > 0l &&
+ (FetchStrategy >= 4 || OutstandingFetches[proc] == 0)) /* Steal a thread */
+ StealThread(proc);
+ }
+}
+\end{code}
+
+Steal a spark and schedule moving it to proc. We want to look at PEs in
+clock order -- most retarded first. Currently sparks are only stolen from
+the @ADVISORY_POOL@ never from the @REQUIRED_POOL@. Eventually, this should
+be changed to first steal from the former then from the latter.
+
+\begin{code}
+StealSpark(proc)
+PROC proc;
+{
+ PROC p;
+ sparkq spark, prev, next;
+ int stolen = 0;
+ TIME times[MAX_PROC], stealtime;
+ unsigned ntimes=0, i, j;
+
+ /* times shall contain processors from which we may steal sparks */
+ for(p=0; p < max_proc; ++p)
+ if(proc != p &&
+ PendingSparksHd[p][ADVISORY_POOL] != NULL &&
+ CurrentTime[p] <= CurrentTime[CurrentProc])
+ times[ntimes++] = p;
+
+ /* sort times */
+ for(i=0; i < ntimes; ++i)
+ for(j=i+1; j < ntimes; ++j)
+ if(CurrentTime[times[i]] > CurrentTime[times[j]])
+ {
+ unsigned temp = times[i];
+ times[i] = times[j];
+ times[j] = temp;
+ }
+
+ for(i=0; i < ntimes && !stolen; ++i)
+ {
+ p = times[i];
+
+ for(prev=NULL, spark = PendingSparksHd[p][ADVISORY_POOL];
+ spark != NULL && !stolen;
+ spark=next)
+ {
+ next = SPARK_NEXT(spark);
+
+ if(SHOULD_SPARK(SPARK_NODE(spark)))
+ {
+ /* Don't Steal local sparks */
+ if(!SPARK_GLOBAL(spark))
+ {
+ prev=spark;
+ continue;
+ }
+
+ SPARK_NEXT(spark) = NULL;
+ CurrentTime[p] += gran_mpacktime;
+
+ stealtime = (CurrentTime[p] > CurrentTime[proc]? CurrentTime[p]: CurrentTime[proc])
+ + SparkStealTime();
+
+ newevent(proc,p /* CurrentProc */,stealtime,
+ MOVESPARK,Nil_closure,Nil_closure,spark);
+
+ MAKE_BUSY(proc);
+ stolen = 1;
+ ++SPARK_GLOBAL(spark);
+
+ if(do_sp_profile)
+ DumpSparkGranEvent(SP_EXPORTED,spark);
+
+ CurrentTime[p] += gran_mtidytime;
+
+ --SparksAvail;
+ }
+ else
+ {
+ if(do_sp_profile)
+ DumpSparkGranEvent(SP_PRUNED,spark);
+ DisposeSpark(spark);
+ }
+
+ if(spark == PendingSparksHd[p][ADVISORY_POOL])
+ PendingSparksHd[p][ADVISORY_POOL] = next;
+
+ if(prev!=NULL)
+ SPARK_NEXT(prev) = next;
+ }
+
+ if(PendingSparksHd[p][ADVISORY_POOL] == NULL)
+ PendingSparksTl[p][ADVISORY_POOL] = NULL;
+ }
+}
+\end{code}
+
+Steal a spark and schedule moving it to proc.
+
+\begin{code}
+StealThread(proc)
+PROC proc;
+{
+ PROC p;
+ P_ thread, prev;
+ TIME times[MAX_PROC], stealtime;
+ unsigned ntimes=0, i, j;
+
+ /* Hunt for a thread */
+
+ /* times shall contain processors from which we may steal threads */
+ for(p=0; p < max_proc; ++p)
+ if(proc != p && RunnableThreadsHd[p] != Nil_closure &&
+ CurrentTime[p] <= CurrentTime[CurrentProc])
+ times[ntimes++] = p;
+
+ /* sort times */
+ for(i=0; i < ntimes; ++i)
+ for(j=i+1; j < ntimes; ++j)
+ if(CurrentTime[times[i]] > CurrentTime[times[j]])
+ {
+ unsigned temp = times[i];
+ times[i] = times[j];
+ times[j] = temp;
+ }
+
+ for(i=0; i < ntimes; ++i)
+ {
+ p = times[i];
+
+ /* Steal the first exportable thread in the runnable queue after the */
+ /* first one */
+
+ if(RunnableThreadsHd[p] != Nil_closure)
+ {
+ for(prev = RunnableThreadsHd[p], thread = TSO_LINK(RunnableThreadsHd[p]);
+ thread != Nil_closure && TSO_LOCKED(thread);
+ prev = thread, thread = TSO_LINK(thread))
+ /* SKIP */;
+
+ if(thread != Nil_closure) /* Take thread out of runnable queue */
+ {
+ TSO_LINK(prev) = TSO_LINK(thread);
+
+ TSO_LINK(thread) = Nil_closure;
+
+ if(RunnableThreadsTl[p] == thread)
+ RunnableThreadsTl[p] = prev;
+
+ /* Turn magic constants into params !? -- HWL */
+
+ CurrentTime[p] += 5l * gran_mpacktime;
+
+ stealtime = (CurrentTime[p] > CurrentTime[proc]? CurrentTime[p]: CurrentTime[proc])
+ + SparkStealTime() + 4l * gran_additional_latency
+ + 5l * gran_munpacktime;
+
+ /* Move the thread */
+ SET_PROCS(thread,PE_NUMBER(proc));
+
+ /* Move from one queue to another */
+ newevent(proc,p,stealtime,MOVETHREAD,thread,Nil_closure,NULL);
+ MAKE_BUSY(proc);
+ --SurplusThreads;
+
+ if(do_gr_profile)
+ DumpRawGranEvent(p,GR_STEALING,TSO_ID(thread));
+
+ CurrentTime[p] += 5l * gran_mtidytime;
+
+ /* Found one */
+ break;
+ }
+ }
+ }
+}
+
+TIME SparkStealTime()
+{
+ double fishdelay, sparkdelay, latencydelay;
+ fishdelay = (double)max_proc/2;
+ sparkdelay = fishdelay - ((fishdelay-1)/(double)(max_proc-1))*(double)Idlers;
+ latencydelay = sparkdelay*((double)gran_latency);
+
+/*
+ fprintf(stderr,"fish delay = %g, spark delay = %g, latency delay = %g, Idlers = %u\n",
+ fishdelay,sparkdelay,latencydelay,Idlers);
+*/
+ return((TIME)latencydelay);
+}
+#endif /* GRAN ; HWL */
+
+\end{code}
+
+%****************************************************************************
+%
+\subsection[thread-execution]{Executing Threads}
+%
+%****************************************************************************
+
+\begin{code}
+EXTDATA_RO(StkO_info);
+EXTDATA_RO(TSO_info);
+EXTDATA_RO(WorldStateToken_closure);
+
+EXTFUN(EnterNodeCode);
+UNVEC(EXTFUN(stopThreadDirectReturn);,EXTDATA(vtbl_stopStgWorld);)
+
+#if defined(GRAN)
+
+/* Slow but relatively reliable method uses xmalloc */
+/* Eventually change that to heap allocated sparks. */
+
+sparkq
+NewSpark(node,name,local)
+P_ node;
+I_ name, local;
+{
+ extern P_ xmalloc();
+ sparkq newspark = (sparkq) xmalloc(sizeof(struct spark));
+ SPARK_PREV(newspark) = SPARK_NEXT(newspark) = NULL;
+ SPARK_NODE(newspark) = node;
+ SPARK_NAME(newspark) = name;
+ SPARK_GLOBAL(newspark) = !local;
+ return(newspark);
+}
+
+void
+DisposeSpark(spark)
+sparkq spark;
+{
+ if(spark!=NULL)
+ free(spark);
+
+ --SparksAvail;
+
+/* Heap-allocated disposal.
+
+ FREEZE_MUT_HDR(spark, ImMutArrayOfPtrs);
+ SPARK_PREV(spark) = SPARK_NEXT(spark) = SPARK_NODE(spark) = Nil_closure;
+*/
+}
+
+DisposeSparkQ(spark)
+sparkq spark;
+{
+ if (spark==NULL)
+ return;
+
+ DisposeSparkQ(SPARK_NEXT(spark));
+
+#ifdef GRAN_CHECK
+ if (SparksAvail < 0)
+ fprintf(stderr,"DisposeSparkQ: SparksAvail<0 after disposing sparkq @ 0x%lx\n", spark);
+#endif
+
+ free(spark);
+}
+
+#endif
+
+I_ StkOChunkSize = DEFAULT_STKO_CHUNK_SIZE;
+
+/* Create a new TSO, with the specified closure to enter and thread type */
+
+P_
+NewThread(topClosure, type)
+P_ topClosure;
+W_ type;
+{
+ P_ stko, tso;
+
+ if (AvailableTSO != Nil_closure) {
+ tso = AvailableTSO;
+#if defined(GRAN)
+ SET_PROCS(tso,ThisPE); /* Allocate it locally! */
+#endif
+ AvailableTSO = TSO_LINK(tso);
+ } else if (SAVE_Hp + TSO_HS + TSO_CTS_SIZE > SAVE_HpLim) {
+ return(NULL);
+ } else {
+ ALLOC_TSO(TSO_HS,BYTES_TO_STGWORDS(sizeof(STGRegisterTable)),
+ BYTES_TO_STGWORDS(sizeof(StgDouble)));
+ tso = SAVE_Hp + 1;
+ SAVE_Hp += TSO_HS + TSO_CTS_SIZE;
+ SET_TSO_HDR(tso, TSO_info, CCC);
+ }
+
+ TSO_LINK(tso) = Nil_closure;
+ TSO_CCC(tso) = (CostCentre)STATIC_CC_REF(CC_MAIN);
+ TSO_NAME(tso) = (P_) INFO_PTR(topClosure); /* A string would be nicer -- JSM */
+ TSO_ID(tso) = threadId++;
+ TSO_TYPE(tso) = type;
+ TSO_PC1(tso) = TSO_PC2(tso) = EnterNodeCode;
+ TSO_ARG1(tso) = TSO_EVENT(tso) = 0;
+ TSO_SWITCH(tso) = NULL;
+
+#ifdef DO_REDN_COUNTING
+ TSO_AHWM(tso) = 0;
+ TSO_BHWM(tso) = 0;
+#endif
+
+#if defined(GRAN) || defined(PAR)
+ TSO_SPARKNAME(tso) = 0;
+#if defined(GRAN)
+ TSO_STARTEDAT(tso) = CurrentTime[CurrentProc];
+#else
+ TSO_STARTEDAT(tso) = CURRENT_TIME;
+#endif
+ TSO_EXPORTED(tso) = 0;
+ TSO_BASICBLOCKS(tso) = 0;
+ TSO_ALLOCS(tso) = 0;
+ TSO_EXECTIME(tso) = 0;
+ TSO_FETCHTIME(tso) = 0;
+ TSO_FETCHCOUNT(tso) = 0;
+ TSO_BLOCKTIME(tso) = 0;
+ TSO_BLOCKCOUNT(tso) = 0;
+ TSO_BLOCKEDAT(tso) = 0;
+ TSO_GLOBALSPARKS(tso) = 0;
+ TSO_LOCALSPARKS(tso) = 0;
+#endif
+ /*
+ * set pc, Node (R1), liveness
+ */
+ CurrentRegTable = TSO_INTERNAL_PTR(tso);
+ SAVE_Liveness = LIVENESS_R1;
+ SAVE_R1.p = topClosure;
+
+# ifndef PAR
+ if (type == T_MAIN) {
+ stko = MainStkO;
+ } else {
+# endif
+ if (AvailableStack != Nil_closure) {
+ stko = AvailableStack;
+#if defined(GRAN)
+ SET_PROCS(stko,ThisPE);
+#endif
+ AvailableStack = STKO_LINK(AvailableStack);
+ } else if (SAVE_Hp + STKO_HS + StkOChunkSize > SAVE_HpLim) {
+ return(NULL);
+ } else {
+ ALLOC_STK(STKO_HS,StkOChunkSize,0);
+ stko = SAVE_Hp + 1;
+ SAVE_Hp += STKO_HS + StkOChunkSize;
+ SET_STKO_HDR(stko, StkO_info, CCC);
+ }
+ STKO_SIZE(stko) = StkOChunkSize + STKO_VHS;
+ STKO_SpB(stko) = STKO_SuB(stko) = STKO_BSTK_BOT(stko) + BREL(1);
+ STKO_SpA(stko) = STKO_SuA(stko) = STKO_ASTK_BOT(stko) + AREL(1);
+ STKO_LINK(stko) = Nil_closure;
+ STKO_RETURN(stko) = NULL;
+# ifndef PAR
+ }
+# endif
+
+#ifdef DO_REDN_COUNTING
+ STKO_ADEP(stko) = STKO_BDEP(stko) = 0;
+#endif
+
+ if (type == T_MAIN) {
+ STKO_SpA(stko) -= AREL(1);
+ *STKO_SpA(stko) = (P_) WorldStateToken_closure;
+ }
+
+ SAVE_Ret = (StgRetAddr) UNVEC(stopThreadDirectReturn,vtbl_stopStgWorld);
+ SAVE_StkO = stko;
+
+ if (DO_QP_PROF) {
+ QP_Event1(do_qp_prof > 1 ? "*A" : "*G", tso);
+ }
+ return tso;
+}
+\end{code}
+
+\begin{code}
+
+void
+EndThread(STG_NO_ARGS)
+{
+#ifdef PAR
+ TIME now = CURRENT_TIME;
+#endif
+#ifdef DO_REDN_COUNTING
+ extern FILE *tickyfile;
+
+ if (tickyfile != NULL) {
+ fprintf(tickyfile, "Thread %d (%lx)\n\tA stack max. depth: %ld words\n",
+ TSO_ID(CurrentTSO), TSO_NAME(CurrentTSO), TSO_AHWM(CurrentTSO));
+ fprintf(tickyfile, "\tB stack max. depth: %ld words\n",
+ TSO_BHWM(CurrentTSO));
+ }
+#endif
+
+ if (DO_QP_PROF) {
+ QP_Event1("G*", CurrentTSO);
+ }
+
+#if defined(GRAN)
+ assert(CurrentTSO == ThreadQueueHd);
+ ThreadQueueHd = TSO_LINK(CurrentTSO);
+
+ if(ThreadQueueHd == Nil_closure)
+ ThreadQueueTl = Nil_closure;
+
+ else if (DoThreadMigration)
+ --SurplusThreads;
+
+ if (do_gr_sim)
+ {
+ if(TSO_TYPE(CurrentTSO)==T_MAIN)
+ {
+ int i;
+ for(i=0; i < max_proc; ++i) {
+ StgBool is_first = StgTrue;
+ while(RunnableThreadsHd[i] != Nil_closure)
+ {
+ /* We schedule runnable threads before killing them to */
+ /* make the job of bookkeeping the running, runnable, */
+ /* blocked threads easier for scripts like gr2ps -- HWL */
+
+ if (do_gr_profile && !is_first)
+ DumpRawGranEvent(i,GR_SCHEDULE,
+ TSO_ID(RunnableThreadsHd[i]));
+ if (!no_gr_profile)
+ DumpGranInfo(i,RunnableThreadsHd[i],StgTrue);
+ RunnableThreadsHd[i] = TSO_LINK(RunnableThreadsHd[i]);
+ is_first = StgFalse;
+ }
+ }
+
+ ThreadQueueHd = Nil_closure;
+
+#if defined(GRAN_CHECK) && defined(GRAN)
+ /* Print event stats */
+ if (debug & 0x20) {
+ int i;
+
+ fprintf(stderr,"Statistics of events (total=%d):\n",
+ noOfEvents);
+ for (i=0; i<=7; i++) {
+ fprintf(stderr,"> %s (%d): \t%ld \t%f%%\n",
+ event_names[i],i,event_counts[i],
+ (float)(100*event_counts[i])/(float)(noOfEvents) );
+ }
+ }
+#endif
+
+ }
+
+ if (!no_gr_profile)
+ DumpGranInfo(CurrentProc,CurrentTSO,
+ TSO_TYPE(CurrentTSO) != T_ADVISORY);
+
+ /* Note ThreadQueueHd is Nil when the main thread terminates */
+ if(ThreadQueueHd != Nil_closure)
+ {
+ if (do_gr_profile && !no_gr_profile)
+ DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
+ CurrentTime[CurrentProc] += gran_threadscheduletime;
+ }
+
+ else if (do_gr_binary && TSO_TYPE(CurrentTSO)==T_MAIN &&
+ !no_gr_profile)
+ grterminate(CurrentTime[CurrentProc]);
+ }
+#endif /* GRAN */
+
+#ifdef PAR
+ if (do_gr_profile) {
+ TSO_EXECTIME(CurrentTSO) += now - TSO_BLOCKEDAT(CurrentTSO);
+ DumpGranInfo(thisPE, CurrentTSO, TSO_TYPE(CurrentTSO) != T_ADVISORY);
+ }
+#endif
+
+ switch (TSO_TYPE(CurrentTSO)) {
+ case T_MAIN:
+ required_thread_count--;
+#ifdef PAR
+ if (do_gr_binary)
+ grterminate(now);
+#endif
+
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if ( (debug & 0x80) || (debug & 0x40) )
+ fprintf(stderr,"\nGRAN: I hereby terminate the main thread!\n");
+
+ /* I've stolen that from the end of ReSchedule (!GRAN). HWL */
+ longjmp(scheduler_loop, required_thread_count > 0 ? 1 : -1);
+#else
+ ReSchedule(0);
+#endif /* GRAN */
+
+ case T_REQUIRED:
+ required_thread_count--;
+ break;
+
+ case T_ADVISORY:
+ advisory_thread_count--;
+ break;
+
+ case T_FAIL:
+ EXIT(EXIT_FAILURE);
+
+ default:
+ fflush(stdout);
+ fprintf(stderr, "EndThread: %lx unknown\n", (W_) TSO_TYPE(CurrentTSO));
+ EXIT(EXIT_FAILURE);
+ }
+
+ /* Reuse stack object space */
+ ASSERT(STKO_LINK(SAVE_StkO) == Nil_closure);
+ STKO_LINK(SAVE_StkO) = AvailableStack;
+ AvailableStack = SAVE_StkO;
+ /* Reuse TSO */
+ TSO_LINK(CurrentTSO) = AvailableTSO;
+ AvailableTSO = CurrentTSO;
+ CurrentTSO = Nil_closure;
+ CurrentRegTable = NULL;
+
+#if defined(GRAN)
+ /* NB: Now ThreadQueueHd is either the next runnable thread on this */
+ /* proc or it's Nil_closure. In the latter case, a FINDWORK will be */
+ /* issued by ReSchedule. */
+ ReSchedule(SAME_THREAD); /* back for more! */
+#else
+ ReSchedule(0); /* back for more! */
+#endif
+}
+\end{code}
+
+%****************************************************************************
+%
+\subsection[thread-blocking]{Local Blocking}
+%
+%****************************************************************************
+
+\begin{code}
+
+#if defined(COUNT)
+void CountnUPDs() { ++nUPDs; }
+void CountnUPDs_old() { ++nUPDs_old; }
+void CountnUPDs_new() { ++nUPDs_new; }
+
+void CountnPAPs() { ++nPAPs; }
+#endif
+
+EXTDATA_RO(BQ_info);
+
+#ifndef GRAN
+/* NB: non-GRAN version ToDo
+ *
+ * AwakenBlockingQueue awakens a list of TSOs and FBQs.
+ */
+
+P_ PendingFetches = Nil_closure;
+
+void
+AwakenBlockingQueue(bqe)
+ P_ bqe;
+{
+ P_ last_tso = NULL;
+
+# ifdef PAR
+ P_ next;
+ TIME now = CURRENT_TIME;
+
+# endif
+
+# ifndef PAR
+ while (bqe != Nil_closure) {
+# else
+ while (IS_MUTABLE(INFO_PTR(bqe))) {
+ switch (INFO_TYPE(INFO_PTR(bqe))) {
+ case INFO_TSO_TYPE:
+# endif
+ if (DO_QP_PROF) {
+ QP_Event2(do_qp_prof > 1 ? "RA" : "RG", bqe, CurrentTSO);
+ }
+# ifdef PAR
+ if (do_gr_profile) {
+ DumpGranEvent(GR_RESUMEQ, bqe);
+ switch (TSO_QUEUE(bqe)) {
+ case Q_BLOCKED:
+ TSO_BLOCKTIME(bqe) += now - TSO_BLOCKEDAT(bqe);
+ break;
+ case Q_FETCHING:
+ TSO_FETCHTIME(bqe) += now - TSO_BLOCKEDAT(bqe);
+ break;
+ default:
+ fflush(stdout);
+ fprintf(stderr, "ABQ: TSO_QUEUE invalid.\n");
+ EXIT(EXIT_FAILURE);
+ }
+ }
+# endif
+ if (last_tso == NULL) {
+ if (RunnableThreadsHd == Nil_closure) {
+ RunnableThreadsHd = bqe;
+ } else {
+ TSO_LINK(RunnableThreadsTl) = bqe;
+ }
+ }
+ last_tso = bqe;
+ bqe = TSO_LINK(bqe);
+# ifdef PAR
+ break;
+ case INFO_BF_TYPE:
+ next = BF_LINK(bqe);
+ BF_LINK(bqe) = PendingFetches;
+ PendingFetches = bqe;
+ bqe = next;
+ if (last_tso != NULL)
+ TSO_LINK(last_tso) = next;
+ break;
+ default:
+ fprintf(stderr, "Unexpected IP (%#lx) in blocking queue at %#lx\n",
+ INFO_PTR(bqe), (W_) bqe);
+ EXIT(EXIT_FAILURE);
+ }
+ }
+#else
+ }
+# endif
+ if (last_tso != NULL) {
+ RunnableThreadsTl = last_tso;
+# ifdef PAR
+ TSO_LINK(last_tso) = Nil_closure;
+# endif
+ }
+}
+#endif /* !GRAN */
+
+#ifdef GRAN
+
+/* NB: GRAN version only ToDo
+ *
+ * AwakenBlockingQueue returns True if we are on the oldmutables list,
+ * so that the update code knows what to do next.
+ */
+
+I_
+AwakenBlockingQueue(node)
+ P_ node;
+{
+ P_ tso = (P_) BQ_ENTRIES(node);
+ P_ prev;
+
+ if(do_gr_sim)
+ {
+ W_ notifytime;
+
+# if defined(COUNT)
+ ++nUPDs;
+ if (tso != Nil_closure)
+ ++nUPDs_BQ;
+# endif
+
+ while(tso != Nil_closure) {
+ W_ proc;
+ assert(TSO_INTERNAL_PTR(tso)->rR[0].p == node);
+
+# if defined(COUNT)
+ ++BQ_lens;
+# endif
+
+ /* Find where the tso lives */
+ proc = where_is(tso);
+
+ if(proc == CurrentProc)
+ notifytime = CurrentTime[CurrentProc] + gran_lunblocktime;
+ else
+ {
+ CurrentTime[CurrentProc] += gran_mpacktime;
+ notifytime = CurrentTime[CurrentProc] + gran_gunblocktime;
+ CurrentTime[CurrentProc] += gran_mtidytime;
+ }
+
+ /* and create a resume message */
+ newevent(proc, CurrentProc, notifytime,
+ RESUMETHREAD,tso,Nil_closure,NULL);
+
+ prev = tso;
+ tso = TSO_LINK(tso);
+ TSO_LINK(prev) = Nil_closure;
+ }
+ }
+ else
+ {
+ if (ThreadQueueHd == Nil_closure)
+ ThreadQueueHd = tso;
+ else
+ TSO_LINK(ThreadQueueTl) = tso;
+
+ while(TSO_LINK(tso) != Nil_closure) {
+ assert(TSO_INTERNAL_PTR(tso)->rR[0].p == node);
+ if (DO_QP_PROF) {
+ QP_Event2(do_qp_prof > 1 ? "RA" : "RG", tso, CurrentTSO);
+ }
+ tso = TSO_LINK(tso);
+ }
+
+ assert(TSO_INTERNAL_PTR(tso)->rR[0].p == node);
+ if (DO_QP_PROF) {
+ QP_Event2(do_qp_prof > 1 ? "RA" : "RG", tso, CurrentTSO);
+ }
+
+ ThreadQueueTl = tso;
+ }
+
+ return MUT_LINK(node) != MUT_NOT_LINKED;
+}
+
+#endif /* GRAN only */
+
+EXTFUN(Continue);
+
+void
+Yield(args)
+W_ args;
+{
+ SAVE_Liveness = args >> 1;
+ TSO_PC1(CurrentTSO) = Continue;
+ if (DO_QP_PROF) {
+ QP_Event1("GR", CurrentTSO);
+ }
+#ifdef PAR
+ if (do_gr_profile) {
+ /* Note that CURRENT_TIME may perform an unsafe call */
+ TSO_EXECTIME(CurrentTSO) += CURRENT_TIME - TSO_BLOCKEDAT(CurrentTSO);
+ }
+#endif
+ ReSchedule(args & 1);
+}
+
+\end{code}
+
+%****************************************************************************
+%
+\subsection[gr-fetch]{Fetching Nodes (GrAnSim only)}
+%
+%****************************************************************************
+
+The following GrAnSim routines simulate the fetching of nodes from a remote
+processor. We use a 1 word bitmask to indicate on which processor a node is
+lying. Thus, moving or copying a node from one processor to another just
+requires an appropriate change in this bitmask (using @SET_GA@).
+Additionally, the clocks have to be updated.
+
+A special case arises when the node that is needed by processor A has been
+moved from a processor B to a processor C between sending out a @FETCH@
+(from A) and its arrival at B. In that case the @FETCH@ has to be forwarded
+to C.
+
+Currently, we only support GRIP-like single closure fetching. We plan to
+incorporate GUM-like packet fetching in the near future.
+
+\begin{code}
+#if defined(GRAN)
+
+/* Fetch node "node" to processor "p" */
+
+int
+FetchNode(node,from,to)
+P_ node;
+PROC from, to;
+{
+ assert(to==CurrentProc);
+ if (!IS_LOCAL_TO(PROCS(node),from) &&
+ !IS_LOCAL_TO(PROCS(node),to) )
+ return 1;
+
+ if(IS_NF(INFO_PTR(node))) /* Old: || IS_BQ(node) */
+ PROCS(node) |= PE_NUMBER(to); /* Copy node */
+ else
+ PROCS(node) = PE_NUMBER(to); /* Move node */
+
+ /* Now fetch the children */
+ if(DoGUMMFetching)
+ {
+ fprintf(stderr,"Sorry, GUMM fetching not yet implemented.\n");
+ }
+
+ return 0;
+}
+
+/* --------------------------------------------------
+ Cost of sending a packet of size n = C + P*n
+ where C = packet construction constant,
+ P = cost of packing one word into a packet
+ [Should also account for multiple packets].
+ -------------------------------------------------- */
+
+void
+HandleFetchRequest(node,p,tso)
+P_ node, tso;
+PROC p;
+{
+ if (IS_LOCAL_TO(PROCS(node),p) ) /* Somebody else moved node already => */
+ { /* start tso */
+ newevent(p,CurrentProc,
+ CurrentTime[CurrentProc] /* +gran_latency */,
+ FETCHREPLY,tso,node,NULL); /* node needed ?? */
+ CurrentTime[CurrentProc] += gran_mtidytime;
+ }
+ else if (IS_LOCAL_TO(PROCS(node),CurrentProc) ) /* Is node still here? */
+ {
+ /* Actual moving/copying of node is done on arrival; see FETCHREPLY */
+ /* Send a reply to the originator */
+ CurrentTime[CurrentProc] += gran_mpacktime;
+
+ newevent(p,CurrentProc,
+ CurrentTime[CurrentProc]+gran_latency,
+ FETCHREPLY,tso,node,NULL); /* node needed ?? */
+
+ CurrentTime[CurrentProc] += gran_mtidytime;
+ }
+ else
+ { /* Qu'vatlh! node has been grabbed by another proc => forward */
+ PROC p_new = where_is(node);
+ TIME fetchtime;
+
+#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
+ if (NoForward) {
+ newevent(p,p_new,
+ max(CurrentTime[p_new],CurrentTime[CurrentProc])+gran_latency,
+ FETCHREPLY,tso,node,NULL); /* node needed ?? */
+ CurrentTime[CurrentProc] += gran_mtidytime;
+ return;
+ }
+#endif
+
+#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
+ if (debug & 0x2) /* 0x2 should be somehting like DBG_PRINT_FWD */
+ fprintf(stderr,"Qu'vatlh! node 0x%x has been grabbed by %d (current=%d; demander=%d) @ %d\n",
+ node,p_new,CurrentProc,p,CurrentTime[CurrentProc]);
+#endif
+ /* Prepare FORWARD message to proc p_new */
+ CurrentTime[CurrentProc] += gran_mpacktime;
+
+ fetchtime = max(CurrentTime[CurrentProc],CurrentTime[p_new]) +
+ gran_latency;
+
+ newevent(p_new,p,fetchtime,FETCHNODE,tso,node,NULL);
+
+ CurrentTime[CurrentProc] += gran_mtidytime;
+ }
+}
+#endif
+\end{code}
+
+%****************************************************************************
+%
+\subsection[gr-simulation]{Granularity Simulation}
+%
+%****************************************************************************
+
+\begin{code}
+#if 0 /* moved to GranSim.lc */
+#if defined(GRAN)
+I_ do_gr_sim = 0;
+FILE *gr_file = NULL;
+char gr_filename[32];
+
+init_gr_simulation(rts_argc,rts_argv,prog_argc,prog_argv)
+char *prog_argv[], *rts_argv[];
+int prog_argc, rts_argc;
+{
+ I_ i;
+
+ if(do_gr_sim)
+ {
+ char *extension = do_gr_binary? "gb": "gr";
+ sprintf(gr_filename, "%0.28s.%0.2s", prog_argv[0],extension);
+
+ if ((gr_file = fopen(gr_filename,"w")) == NULL )
+ {
+ fprintf(stderr, "Can't open granularity simulation report file %s\n", gr_filename);
+ exit(EXIT_FAILURE);
+ }
+
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if(DoReScheduleOnFetch)
+ setbuf(gr_file,NULL);
+#endif
+
+ fputs("Granularity Simulation for ",gr_file);
+ for(i=0; i < prog_argc; ++i)
+ {
+ fputs(prog_argv[i],gr_file);
+ fputc(' ',gr_file);
+ }
+
+ if(rts_argc > 0)
+ {
+ fputs("+RTS ",gr_file);
+
+ for(i=0; i < rts_argc; ++i)
+ {
+ fputs(rts_argv[i],gr_file);
+ fputc(' ',gr_file);
+ }
+ }
+
+ fputs("\n\n--------------------\n\n",gr_file);
+
+ fputs("General Parameters:\n\n",gr_file);
+
+ fprintf(gr_file, "PEs %u, %s Scheduler, %sMigrate Threads%s\n",
+ max_proc,DoFairSchedule?"Fair":"Unfair",
+ DoThreadMigration?"":"Don't ",
+ DoThreadMigration && DoStealThreadsFirst?" Before Sparks":"",
+ DoReScheduleOnFetch?"":"Don't ");
+
+ fprintf(gr_file, "%s, Fetch %s in Each Packet\n",
+ SimplifiedFetch?"Simplified Fetch":(DoReScheduleOnFetch?"Reschedule on Fetch":"Block on Fetch"),
+ DoGUMMFetching?"Many Closures":"Exactly One Closure");
+ fprintf(gr_file, "Fetch Strategy(%u): If outstanding fetches %s\n",
+ FetchStrategy,
+ FetchStrategy==1?"only run runnable threads (don't create new ones":
+ FetchStrategy==2?"create threads only from local sparks":
+ FetchStrategy==3?"create threads from local or global sparks":
+ FetchStrategy==4?"create sparks and steal threads if necessary":
+ "unknown");
+
+ fprintf(gr_file, "Thread Creation Time %lu, Thread Queue Time %lu\n",
+ gran_threadcreatetime,gran_threadqueuetime);
+ fprintf(gr_file, "Thread DeSchedule Time %lu, Thread Schedule Time %lu\n",
+ gran_threaddescheduletime,gran_threadscheduletime);
+ fprintf(gr_file, "Thread Context-Switch Time %lu\n",
+ gran_threadcontextswitchtime);
+ fputs("\n\n--------------------\n\n",gr_file);
+
+ fputs("Communication Metrics:\n\n",gr_file);
+ fprintf(gr_file,
+ "Latency %lu (1st) %lu (rest), Fetch %lu, Notify %lu (Global) %lu (Local)\n",
+ gran_latency, gran_additional_latency, gran_fetchtime,
+ gran_gunblocktime, gran_lunblocktime);
+ fprintf(gr_file,
+ "Message Creation %lu (+ %lu after send), Message Read %lu\n",
+ gran_mpacktime, gran_mtidytime, gran_munpacktime);
+ fputs("\n\n--------------------\n\n",gr_file);
+
+ fputs("Instruction Metrics:\n\n",gr_file);
+ fprintf(gr_file,"Arith %lu, Branch %lu, Load %lu, Store %lu, Float %lu, Alloc %lu\n",
+ gran_arith_cost, gran_branch_cost,
+ gran_load_cost, gran_store_cost,gran_float_cost,gran_heapalloc_cost);
+ fputs("\n\n++++++++++++++++++++\n\n",gr_file);
+ }
+
+ if(do_gr_binary)
+ grputw(sizeof(TIME));
+
+ Idlers = max_proc;
+ return(0);
+}
+
+void end_gr_simulation() {
+ if(do_gr_sim)
+ {
+ fprintf(stderr,"The simulation is finished. Look at %s for details.\n",
+ gr_filename);
+ fclose(gr_file);
+ }
+}
+#endif /*0*/
+\end{code}
+
+%****************************************************************************
+%
+\subsection[qp-profile]{Quasi-Parallel Profiling}
+%
+%****************************************************************************
+
+\begin{code}
+#ifndef PAR
+
+I_ do_qp_prof;
+FILE *qp_file;
+
+/* *Virtual* Time in milliseconds */
+long
+qp_elapsed_time(STG_NO_ARGS)
+{
+ extern StgDouble usertime();
+
+ return ((long) (usertime() * 1e3));
+}
+
+static void
+init_qp_profiling(STG_NO_ARGS)
+{
+ I_ i;
+ char qp_filename[32];
+
+ sprintf(qp_filename, "%0.24s.qp", prog_argv[0]);
+ if ((qp_file = fopen(qp_filename,"w")) == NULL ) {
+ fprintf(stderr, "Can't open quasi-parallel profile report file %s\n",
+ qp_filename);
+ do_qp_prof = 0;
+ } else {
+ fputs(prog_argv[0], qp_file);
+ for(i = 1; prog_argv[i]; i++) {
+ fputc(' ', qp_file);
+ fputs(prog_argv[i], qp_file);
+ }
+ fprintf(qp_file, " +RTS -C%d -t%d\n", contextSwitchTime, MaxThreads);
+ fputs(time_str(), qp_file);
+ fputc('\n', qp_file);
+ }
+}
+
+void
+QP_Event0(tid, node)
+I_ tid;
+P_ node;
+{
+ fprintf(qp_file, "%lu ** %lu 0x%lx\n", qp_elapsed_time(), tid, INFO_PTR(node));
+}
+
+void
+QP_Event1(event, tso)
+char *event;
+P_ tso;
+{
+ fprintf(qp_file, "%lu %s %lu 0x%lx\n", qp_elapsed_time(), event,
+ TSO_ID(tso), TSO_NAME(tso));
+}
+
+void
+QP_Event2(event, tso1, tso2)
+char *event;
+P_ tso1, tso2;
+{
+ fprintf(qp_file, "%lu %s %lu 0x%lx %lu 0x%lx\n", qp_elapsed_time(), event,
+ TSO_ID(tso1), TSO_NAME(tso1), TSO_ID(tso2), TSO_NAME(tso2));
+}
+
+#endif /* !PAR */
+\end{code}
+
+%****************************************************************************
+%
+\subsection[entry-points]{Routines directly called from Haskell world}
+%
+%****************************************************************************
+
+The @GranSim...@ rotuines in here are directly called via macros from the
+threaded world.
+
+First some auxiliary routines.
+
+\begin{code}
+#ifdef GRAN
+/* Take the current thread off the thread queue and thereby activate the */
+/* next thread. It's assumed that the next ReSchedule after this uses */
+/* NEW_THREAD as param. */
+/* This fct is called from GranSimBlock and GranSimFetch */
+
+void
+ActivateNextThread ()
+{
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if(ThreadQueueHd != CurrentTSO) {
+ fprintf(stderr,"Error: ThreadQueueHd != CurrentTSO in ActivateNextThread\n");
+ exit(99);
+ }
+#endif
+
+ ThreadQueueHd = TSO_LINK(ThreadQueueHd);
+ if(ThreadQueueHd==Nil_closure) {
+ MAKE_IDLE(CurrentProc);
+ ThreadQueueTl = Nil_closure;
+ } else if (do_gr_profile) {
+ CurrentTime[CurrentProc] += gran_threadcontextswitchtime;
+ DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
+ }
+}
+\end{code}
+
+Now the main stg-called routines:
+
+\begin{code}
+/* ------------------------------------------------------------------------ */
+/* The following GranSim... fcts are stg-called from the threaded world. */
+/* ------------------------------------------------------------------------ */
+
+/* Called from HEAP_CHK -- NB: node and liveness are junk here now.
+ They are left temporarily to avoid complete recompilation.
+ KH
+*/
+void
+GranSimAllocate(n,node,liveness)
+I_ n;
+P_ node;
+W_ liveness;
+{
+ TSO_ALLOCS(CurrentTSO) += n;
+ ++TSO_BASICBLOCKS(CurrentTSO);
+
+ TSO_EXECTIME(CurrentTSO) += gran_heapalloc_cost;
+ CurrentTime[CurrentProc] += gran_heapalloc_cost;
+}
+
+/*
+ Subtract the values added above, if a heap check fails and
+ so has to be redone.
+*/
+void
+GranSimUnallocate(n,node,liveness)
+W_ n;
+P_ node;
+W_ liveness;
+{
+ TSO_ALLOCS(CurrentTSO) -= n;
+ --TSO_BASICBLOCKS(CurrentTSO);
+
+ TSO_EXECTIME(CurrentTSO) -= gran_heapalloc_cost;
+ CurrentTime[CurrentProc] -= gran_heapalloc_cost;
+}
+
+void
+GranSimExec(ariths,branches,loads,stores,floats)
+W_ ariths,branches,loads,stores,floats;
+{
+ W_ cost = gran_arith_cost*ariths + gran_branch_cost*branches + gran_load_cost * loads +
+ gran_store_cost*stores + gran_float_cost*floats;
+
+ TSO_EXECTIME(CurrentTSO) += cost;
+ CurrentTime[CurrentProc] += cost;
+}
+
+
+/*
+ Fetch the node if it isn't local
+ -- result indicates whether fetch has been done.
+
+ This is GRIP-style single item fetching.
+*/
+
+I_
+GranSimFetch(node /* , liveness_mask */ )
+P_ node;
+/* I_ liveness_mask; */
+{
+ /* Note: once a node has been fetched, this test will be passed */
+ if(!IS_LOCAL_TO(PROCS(node),CurrentProc) )
+ {
+ /* I suppose we shouldn't do this for CAFs? -- KH */
+ /* Should reschedule if the latency is high */
+ /* We should add mpacktime to the remote PE for the reply,
+ but we don't know who owns the node
+ */
+ /* if(DYNAMIC_POINTER(node)) */ /* For 0.22; gone in 0.23 !!! */
+ {
+ PROC p = where_is(node);
+ TIME fetchtime;
+
+#ifdef GRAN_CHECK
+ if ( ( debug & 0x40 ) &&
+ p == CurrentProc )
+ fprintf(stderr,"GranSimFetch: Trying to fetch from own processor%u\n", p);
+#endif /* GRAN_CHECK */
+
+ CurrentTime[CurrentProc] += gran_mpacktime;
+
+ ++TSO_FETCHCOUNT(CurrentTSO);
+ TSO_FETCHTIME(CurrentTSO) += gran_fetchtime;
+
+ if (SimplifiedFetch)
+ {
+ FetchNode(node,CurrentProc);
+ CurrentTime[CurrentProc] += gran_mtidytime+gran_fetchtime+
+ gran_munpacktime;
+ return(1);
+ }
+
+ fetchtime = max(CurrentTime[CurrentProc],CurrentTime[p]) +
+ gran_latency;
+
+ newevent(p,CurrentProc,fetchtime,FETCHNODE,CurrentTSO,node,NULL);
+ ++OutstandingFetches[CurrentProc];
+
+ /* About to block */
+ TSO_BLOCKEDAT(CurrentTSO) = CurrentTime[p];
+
+ if (DoReScheduleOnFetch)
+ {
+
+ /* Remove CurrentTSO from the queue
+ -- assumes head of queue == CurrentTSO */
+ if(!DoFairSchedule)
+ {
+ if(do_gr_profile)
+ DumpGranEventAndNode(GR_FETCH,CurrentTSO,node,p);
+
+ ActivateNextThread();
+
+#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
+ if (debug & 0x10) {
+ if (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) {
+ fprintf(stderr,"FETCHNODE: TSO 0x%x has fetch-mask set @ %d\n",
+ CurrentTSO,CurrentTime[CurrentProc]);
+ exit (99);
+ } else {
+ TSO_TYPE(CurrentTSO) |= FETCH_MASK_TSO;
+ }
+
+ }
+#endif
+
+ TSO_LINK(CurrentTSO) = Nil_closure;
+ /* CurrentTSO = Nil_closure; */
+
+ /* ThreadQueueHd is now the next TSO to schedule or NULL */
+ /* CurrentTSO is pointed to by the FETCHNODE event */
+ }
+ else /* DoFairSchedule */
+ {
+ /* Remove from the tail of the thread queue */
+ fprintf(stderr,"Reschedule-on-fetch is not yet compatible with fair scheduling\n");
+ exit(99);
+ }
+ }
+ else /* !DoReScheduleOnFetch */
+ {
+ /* Note: CurrentProc is still busy as it's blocked on fetch */
+ if(do_gr_profile)
+ DumpGranEventAndNode(GR_FETCH,CurrentTSO,node,p);
+
+#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
+ if (debug & 0x04)
+ BlockedOnFetch[CurrentProc] = CurrentTSO; /*- StgTrue; -*/
+
+ if (debug & 0x10) {
+ if (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) {
+ fprintf(stderr,"FETCHNODE: TSO 0x%x has fetch-mask set @ %d\n",
+ CurrentTSO,CurrentTime[CurrentProc]);
+ exit (99);
+ } else {
+ TSO_TYPE(CurrentTSO) |= FETCH_MASK_TSO;
+ }
+
+ CurrentTSO = Nil_closure;
+ }
+#endif
+ }
+
+ CurrentTime[CurrentProc] += gran_mtidytime;
+
+ /* Rescheduling is necessary */
+ NeedToReSchedule = StgTrue;
+
+ return(1);
+ }
+ }
+ return(0);
+}
+
+void
+GranSimSpark(local,node)
+W_ local;
+P_ node;
+{
+ ++SparksAvail;
+ if(do_sp_profile)
+ DumpSparkGranEvent(SP_SPARK,node);
+
+ /* Force the PE to take notice of the spark */
+ if(DoAlwaysCreateThreads)
+ newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
+ FINDWORK,Nil_closure,Nil_closure,NULL);
+
+ if(local)
+ ++TSO_LOCALSPARKS(CurrentTSO);
+ else
+ ++TSO_GLOBALSPARKS(CurrentTSO);
+}
+
+void
+GranSimSparkAt(spark,where,identifier)
+sparkq spark;
+P_ where; /* This should be a node; alternatively could be a GA */
+I_ identifier;
+{
+ PROC p = where_is(where);
+ TIME exporttime;
+
+ if(do_sp_profile)
+ DumpSparkGranEvent(SP_SPARKAT,SPARK_NODE(spark));
+
+ CurrentTime[CurrentProc] += gran_mpacktime;
+
+ exporttime = (CurrentTime[p] > CurrentTime[CurrentProc]?
+ CurrentTime[p]: CurrentTime[CurrentProc])
+ + gran_latency;
+
+ newevent(p,CurrentProc,exporttime,MOVESPARK,Nil_closure,Nil_closure,spark);
+
+ CurrentTime[CurrentProc] += gran_mtidytime;
+
+ ++TSO_GLOBALSPARKS(CurrentTSO);
+}
+
+void
+GranSimBlock()
+{
+ if(do_gr_profile)
+ DumpGranEvent(GR_BLOCK,CurrentTSO);
+
+ ++TSO_BLOCKCOUNT(CurrentTSO);
+ TSO_BLOCKEDAT(CurrentTSO) = CurrentTime[CurrentProc];
+ ActivateNextThread();
+}
+
+#endif /* GRAN */
+
+\end{code}
+
+%****************************************************************************
+%
+\subsection[gc-GrAnSim]{Garbage collection routines for GrAnSim objects}
+%
+%****************************************************************************
+
+Garbage collection code for the event queue. We walk the event queue
+so that if the only reference to a TSO is in some event (e.g. RESUME),
+the TSO is still preserved.
+
+\begin{code}
+#ifdef GRAN
+
+extern smInfo StorageMgrInfo;
+
+I_
+SaveEventRoots(num_ptr_roots)
+I_ num_ptr_roots;
+{
+ eventq event = EventHd;
+ while(event != NULL)
+ {
+ if(EVENT_TYPE(event) == RESUMETHREAD ||
+ EVENT_TYPE(event) == MOVETHREAD ||
+ EVENT_TYPE(event) == STARTTHREAD )
+ StorageMgrInfo.roots[num_ptr_roots++] = EVENT_TSO(event);
+
+ else if(EVENT_TYPE(event) == MOVESPARK)
+ StorageMgrInfo.roots[num_ptr_roots++] = SPARK_NODE(EVENT_SPARK(event));
+
+ else if (EVENT_TYPE(event) == FETCHNODE ||
+ EVENT_TYPE(event) == FETCHREPLY )
+ {
+ StorageMgrInfo.roots[num_ptr_roots++] = EVENT_TSO(event);
+ StorageMgrInfo.roots[num_ptr_roots++] = EVENT_NODE(event);
+ }
+
+ event = EVENT_NEXT(event);
+ }
+ return(num_ptr_roots);
+}
+
+I_
+SaveSparkRoots(num_ptr_roots)
+I_ num_ptr_roots;
+{
+ sparkq spark, /* prev, */ disposeQ=NULL;
+ PROC proc;
+ I_ i, sparkroots=0, prunedSparks=0;
+
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if ( debug & 0x40 )
+ fprintf(stderr,"D> Saving spark roots for GC ...\n");
+#endif
+
+ for(proc = 0; proc < max_proc; ++proc) {
+ for(i = 0; i < SPARK_POOLS; ++i) {
+ for(/* prev = &PendingSparksHd[proc][i],*/ spark = PendingSparksHd[proc][i];
+ spark != NULL;
+ /* prev = &SPARK_NEXT(spark), */ spark = SPARK_NEXT(spark))
+ {
+ if(++sparkroots <= MAX_SPARKS)
+ {
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if ( debug & 0x40 )
+ fprintf(main_statsfile,"Saving Spark Root %d(proc: %d; pool: %d) -- 0x%lx\n",
+ num_ptr_roots,proc,i,SPARK_NODE(spark));
+#endif
+ StorageMgrInfo.roots[num_ptr_roots++] = SPARK_NODE(spark);
+ }
+ else
+ {
+ SPARK_NODE(spark) = Nil_closure;
+ if (prunedSparks==0) {
+ disposeQ = spark;
+ /*
+ *prev = NULL;
+ */
+ }
+ prunedSparks++;
+ }
+ } /* forall spark ... */
+ if (prunedSparks>0) {
+ fprintf(main_statsfile,"Pruning and disposing %lu excess sparks (> %lu) on proc %d for GC purposes\n",
+ prunedSparks,MAX_SPARKS,proc);
+ if (disposeQ == PendingSparksHd[proc][i])
+ PendingSparksHd[proc][i] = NULL;
+ else
+ SPARK_NEXT(SPARK_PREV(disposeQ)) = NULL;
+ DisposeSparkQ(disposeQ);
+ prunedSparks = 0;
+ disposeQ = NULL;
+ }
+ } /* forall i ... */
+ } /*forall proc .. */
+
+ return(num_ptr_roots);
+}
+
+/*
+ GC roots must be restored in *reverse order*.
+ The recursion is a little ugly, but is better than
+ in-place pointer reversal.
+*/
+
+static I_
+RestoreEvtRoots(event,num_ptr_roots)
+eventq event;
+I_ num_ptr_roots;
+{
+ if(event != NULL)
+ {
+ num_ptr_roots = RestoreEvtRoots(EVENT_NEXT(event),num_ptr_roots);
+
+ if(EVENT_TYPE(event) == RESUMETHREAD ||
+ EVENT_TYPE(event) == MOVETHREAD ||
+ EVENT_TYPE(event) == STARTTHREAD )
+ EVENT_TSO(event) = StorageMgrInfo.roots[--num_ptr_roots];
+
+ else if(EVENT_TYPE(event) == MOVESPARK )
+ SPARK_NODE(EVENT_SPARK(event)) = StorageMgrInfo.roots[--num_ptr_roots];
+
+ else if (EVENT_TYPE(event) == FETCHNODE ||
+ EVENT_TYPE(event) == FETCHREPLY )
+ {
+ EVENT_NODE(event) = StorageMgrInfo.roots[--num_ptr_roots];
+ EVENT_TSO(event) = StorageMgrInfo.roots[--num_ptr_roots];
+ }
+ }
+
+ return(num_ptr_roots);
+}
+
+I_
+RestoreEventRoots(num_ptr_roots)
+I_ num_ptr_roots;
+{
+ return(RestoreEvtRoots(EventHd,num_ptr_roots));
+}
+
+static I_
+RestoreSpkRoots(spark,num_ptr_roots,sparkroots)
+sparkq spark;
+I_ num_ptr_roots, sparkroots;
+{
+ if(spark != NULL)
+ {
+ num_ptr_roots = RestoreSpkRoots(SPARK_NEXT(spark),num_ptr_roots,++sparkroots);
+ if(sparkroots <= MAX_SPARKS)
+ {
+ P_ n = SPARK_NODE(spark);
+ SPARK_NODE(spark) = StorageMgrInfo.roots[--num_ptr_roots];
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if ( debug & 0x40 )
+ fprintf(main_statsfile,"Restoring Spark Root %d -- new: 0x%lx \n",
+ num_ptr_roots,SPARK_NODE(spark));
+#endif
+ }
+ else
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if ( debug & 0x40 )
+ fprintf(main_statsfile,"Error in RestoreSpkRoots (%d; @ spark 0x%x): More than MAX_SPARKS (%d) sparks\n",
+ num_ptr_roots,SPARK_NODE(spark),MAX_SPARKS);
+#endif
+
+ }
+
+ return(num_ptr_roots);
+}
+
+I_
+RestoreSparkRoots(num_ptr_roots)
+I_ num_ptr_roots;
+{
+ PROC proc;
+ I_ i;
+
+ /* NB: PROC is currently an unsigned datatype i.e. proc>=0 is always */
+ /* true ((PROC)-1 == (PROC)255). So we need a second clause in the head */
+ /* of the for loop. For i that is currently not necessary. C is really */
+ /* impressive in datatype abstraction! -- HWL */
+
+ for(proc = max_proc - 1; (proc >= 0) && (proc < max_proc); --proc) {
+ for(i = SPARK_POOLS - 1; (i >= 0) && (i < SPARK_POOLS) ; --i) {
+ num_ptr_roots = RestoreSpkRoots(PendingSparksHd[proc][i],num_ptr_roots,0);
+ }
+ }
+ return(num_ptr_roots);
+}
+
+#endif /* GRAN */
+
+\end{code}
+
+%****************************************************************************
+%
+\subsection[GrAnSim-profile]{Writing profiling info for GrAnSim}
+%
+%****************************************************************************
+
+Event dumping routines.
+
+\begin{code}
+#ifdef GRAN
+
+DumpGranEvent(name,tso)
+enum gran_event_types name;
+P_ tso;
+{
+ DumpRawGranEvent(CurrentProc,name,TSO_ID(tso));
+}
+
+DumpSparkGranEvent(name,id)
+enum gran_event_types name;
+W_ id;
+{
+ DumpRawGranEvent(CurrentProc,name,id);
+}
+
+DumpGranEventAndNode(name,tso,node,proc)
+enum gran_event_types name;
+P_ tso, node;
+PROC proc;
+{
+ PROC pe = CurrentProc;
+ W_ id = TSO_ID(tso);
+
+ if(name > GR_EVENT_MAX)
+ name = GR_EVENT_MAX;
+
+ if(do_gr_binary)
+ {
+ grputw(name);
+ grputw(pe);
+ grputw(CurrentTime[CurrentProc]);
+ grputw(id);
+ }
+ else
+ fprintf(gr_file,"PE %2u [%lu]: %s %lx \t0x%lx\t(from %2u)\n",
+ pe,CurrentTime[CurrentProc],gran_event_names[name],id,node,proc);
+}
+
+DumpRawGranEvent(pe,name,id)
+PROC pe;
+enum gran_event_types name;
+W_ id;
+{
+ if(name > GR_EVENT_MAX)
+ name = GR_EVENT_MAX;
+
+ if(do_gr_binary)
+ {
+ grputw(name);
+ grputw(pe);
+ grputw(CurrentTime[CurrentProc]);
+ grputw(id);
+ }
+ else
+ fprintf(gr_file,"PE %2u [%lu]: %s %lx\n",
+ pe,CurrentTime[CurrentProc],gran_event_names[name],id);
+}
+
+DumpGranInfo(pe,tso,mandatory_thread)
+PROC pe;
+P_ tso;
+I_ mandatory_thread;
+{
+ if(do_gr_binary)
+ {
+ grputw(GR_END);
+ grputw(pe);
+ grputw(CurrentTime[CurrentProc]);
+ grputw(TSO_ID(tso));
+ grputw(TSO_SPARKNAME(tso));
+ grputw(TSO_STARTEDAT(tso));
+ grputw(TSO_EXPORTED(tso));
+ grputw(TSO_BASICBLOCKS(tso));
+ grputw(TSO_ALLOCS(tso));
+ grputw(TSO_EXECTIME(tso));
+ grputw(TSO_BLOCKTIME(tso));
+ grputw(TSO_BLOCKCOUNT(tso));
+ grputw(TSO_FETCHTIME(tso));
+ grputw(TSO_FETCHCOUNT(tso));
+ grputw(TSO_LOCALSPARKS(tso));
+ grputw(TSO_GLOBALSPARKS(tso));
+ grputw(mandatory_thread);
+ }
+ else
+ {
+ /* NB: DumpGranEvent cannot be used because PE may be wrong (as well as the extra info) */
+ fprintf(gr_file,"PE %2u [%lu]: END %lx, SN %lu, ST %lu, EXP %c, BB %lu, HA %lu, RT %lu, BT %lu (%lu), FT %lu (%lu), LS %lu, GS %lu, MY %c\n"
+ ,pe
+ ,CurrentTime[CurrentProc]
+ ,TSO_ID(tso)
+ ,TSO_SPARKNAME(tso)
+ ,TSO_STARTEDAT(tso)
+ ,TSO_EXPORTED(tso)?'T':'F'
+ ,TSO_BASICBLOCKS(tso)
+ ,TSO_ALLOCS(tso)
+ ,TSO_EXECTIME(tso)
+ ,TSO_BLOCKTIME(tso)
+ ,TSO_BLOCKCOUNT(tso)
+ ,TSO_FETCHTIME(tso)
+ ,TSO_FETCHCOUNT(tso)
+ ,TSO_LOCALSPARKS(tso)
+ ,TSO_GLOBALSPARKS(tso)
+ ,mandatory_thread?'T':'F'
+ );
+ }
+}
+
+DumpTSO(tso)
+P_ tso;
+{
+ fprintf(stderr,"TSO 0x%lx, NAME 0x%lx, ID %lu, LINK 0x%lx, TYPE %s\n"
+ ,tso
+ ,TSO_NAME(tso)
+ ,TSO_ID(tso)
+ ,TSO_LINK(tso)
+ ,TSO_TYPE(tso)==T_MAIN?"MAIN":
+ TSO_TYPE(tso)==T_FAIL?"FAIL":
+ TSO_TYPE(tso)==T_REQUIRED?"REQUIRED":
+ TSO_TYPE(tso)==T_ADVISORY?"ADVISORY":
+ "???"
+ );
+
+ fprintf(stderr,"PC (0x%lx,0x%lx), ARG (0x%lx,0x%lx), SWITCH %lx0x\n"
+ ,TSO_PC1(tso)
+ ,TSO_PC2(tso)
+ ,TSO_ARG1(tso)
+ ,TSO_ARG2(tso)
+ ,TSO_SWITCH(tso)
+ );
+
+ fprintf(gr_file,"SN %lu, ST %lu, GBL %c, BB %lu, HA %lu, RT %lu, BT %lu (%lu), FT %lu (%lu) LS %lu, GS %lu\n"
+ ,TSO_SPARKNAME(tso)
+ ,TSO_STARTEDAT(tso)
+ ,TSO_EXPORTED(tso)?'T':'F'
+ ,TSO_BASICBLOCKS(tso)
+ ,TSO_ALLOCS(tso)
+ ,TSO_EXECTIME(tso)
+ ,TSO_BLOCKTIME(tso)
+ ,TSO_BLOCKCOUNT(tso)
+ ,TSO_FETCHTIME(tso)
+ ,TSO_FETCHCOUNT(tso)
+ ,TSO_LOCALSPARKS(tso)
+ ,TSO_GLOBALSPARKS(tso)
+ );
+}
+
+/*
+ Output a terminate event and an 8-byte time.
+*/
+
+grterminate(v)
+TIME v;
+{
+ DumpGranEvent(GR_TERMINATE,0);
+
+ if(sizeof(TIME)==4)
+ {
+ putc('\0',gr_file);
+ putc('\0',gr_file);
+ putc('\0',gr_file);
+ putc('\0',gr_file);
+ }
+ else
+ {
+ putc(v >> 56l,gr_file);
+ putc((v >> 48l)&0xffl,gr_file);
+ putc((v >> 40l)&0xffl,gr_file);
+ putc((v >> 32l)&0xffl,gr_file);
+ }
+ putc((v >> 24l)&0xffl,gr_file);
+ putc((v >> 16l)&0xffl,gr_file);
+ putc((v >> 8l)&0xffl,gr_file);
+ putc(v&0xffl,gr_file);
+}
+
+/*
+ Length-coded output: first 3 bits contain length coding
+
+ 00x 1 byte
+ 01x 2 bytes
+ 10x 4 bytes
+ 110 8 bytes
+ 111 5 or 9 bytes
+*/
+
+grputw(v)
+TIME v;
+{
+ if(v <= 0x3fl)
+ {
+ fputc(v & 0x3f,gr_file);
+ }
+
+ else if (v <= 0x3fffl)
+ {
+ fputc((v >> 8l)|0x40l,gr_file);
+ fputc(v&0xffl,gr_file);
+ }
+
+ else if (v <= 0x3fffffffl)
+ {
+ fputc((v >> 24l)|0x80l,gr_file);
+ fputc((v >> 16l)&0xffl,gr_file);
+ fputc((v >> 8l)&0xffl,gr_file);
+ fputc(v&0xffl,gr_file);
+ }
+
+ else if (sizeof(TIME) == 4)
+ {
+ fputc(0x70,gr_file);
+ fputc((v >> 24l)&0xffl,gr_file);
+ fputc((v >> 16l)&0xffl,gr_file);
+ fputc((v >> 8l)&0xffl,gr_file);
+ fputc(v&0xffl,gr_file);
+ }
+
+ else
+ {
+ if (v <= 0x3fffffffffffffl)
+ putc((v >> 56l)|0x60l,gr_file);
+ else
+ {
+ putc(0x70,gr_file);
+ putc((v >> 56l)&0xffl,gr_file);
+ }
+
+ putc((v >> 48l)&0xffl,gr_file);
+ putc((v >> 40l)&0xffl,gr_file);
+ putc((v >> 32l)&0xffl,gr_file);
+ putc((v >> 24l)&0xffl,gr_file);
+ putc((v >> 16l)&0xffl,gr_file);
+ putc((v >> 8l)&0xffl,gr_file);
+ putc(v&0xffl,gr_file);
+ }
+}
+#endif /* GRAN */
+
+\end{code}
+
+%****************************************************************************
+%
+\subsection[GrAnSim-debug]{Debugging routines for GrAnSim}
+%
+%****************************************************************************
+
+Debugging routines, mainly for GrAnSim. They should really be in a separate file.
+
+The first couple of routines are general ones (look also into
+c-as-asm/StgDebug.lc).
+
+\begin{code}
+
+#define NULL_REG_MAP /* Not threaded */
+#include "stgdefs.h"
+
+char *
+info_hdr_type(info_ptr)
+W_ info_ptr;
+{
+#if ! defined(PAR) && !defined(GRAN)
+ switch (INFO_TAG(info_ptr))
+ {
+ case INFO_OTHER_TAG:
+ return("OTHER_TAG");
+/* case INFO_IND_TAG:
+ return("IND_TAG");
+*/ default:
+ return("TAG<n>");
+ }
+#else /* PAR */
+ switch(INFO_TYPE(info_ptr))
+ {
+ case INFO_SPEC_U_TYPE:
+ return("SPECU");
+
+ case INFO_SPEC_N_TYPE:
+ return("SPECN");
+
+ case INFO_GEN_U_TYPE:
+ return("GENU");
+
+ case INFO_GEN_N_TYPE:
+ return("GENN");
+
+ case INFO_DYN_TYPE:
+ return("DYN");
+
+ /*
+ case INFO_DYN_TYPE_N:
+ return("DYNN");
+
+ case INFO_DYN_TYPE_U:
+ return("DYNU");
+ */
+
+ case INFO_TUPLE_TYPE:
+ return("TUPLE");
+
+ case INFO_DATA_TYPE:
+ return("DATA");
+
+ case INFO_MUTUPLE_TYPE:
+ return("MUTUPLE");
+
+ case INFO_IMMUTUPLE_TYPE:
+ return("IMMUTUPLE");
+
+ case INFO_STATIC_TYPE:
+ return("STATIC");
+
+ case INFO_CONST_TYPE:
+ return("CONST");
+
+ case INFO_CHARLIKE_TYPE:
+ return("CHAR");
+
+ case INFO_INTLIKE_TYPE:
+ return("INT");
+
+ case INFO_BH_TYPE:
+ return("BHOLE");
+
+ case INFO_IND_TYPE:
+ return("IND");
+
+ case INFO_CAF_TYPE:
+ return("CAF");
+
+ case INFO_FETCHME_TYPE:
+ return("FETCHME");
+
+ case INFO_BQ_TYPE:
+ return("BQ");
+
+ /*
+ case INFO_BQENT_TYPE:
+ return("BQENT");
+ */
+
+ case INFO_TSO_TYPE:
+ return("TSO");
+
+ case INFO_STKO_TYPE:
+ return("STKO");
+
+ default:
+ fprintf(stderr,"Unknown header type %lu\n",INFO_TYPE(info_ptr));
+ return("??");
+ }
+#endif /* PAR */
+}
+
+/*
+@var_hdr_size@ computes the size of the variable header for a closure.
+*/
+
+I_
+var_hdr_size(node)
+P_ node;
+{
+ switch(INFO_TYPE(INFO_PTR(node)))
+ {
+ case INFO_SPEC_U_TYPE: return(0); /* by decree */
+ case INFO_SPEC_N_TYPE: return(0);
+ case INFO_GEN_U_TYPE: return(GEN_VHS);
+ case INFO_GEN_N_TYPE: return(GEN_VHS);
+ case INFO_DYN_TYPE: return(DYN_VHS);
+ /*
+ case INFO_DYN_TYPE_N: return(DYN_VHS);
+ case INFO_DYN_TYPE_U: return(DYN_VHS);
+ */
+ case INFO_TUPLE_TYPE: return(TUPLE_VHS);
+ case INFO_DATA_TYPE: return(DATA_VHS);
+ case INFO_MUTUPLE_TYPE: return(MUTUPLE_VHS);
+ case INFO_IMMUTUPLE_TYPE: return(MUTUPLE_VHS); /* same layout */
+ case INFO_STATIC_TYPE: return(STATIC_VHS);
+ case INFO_CONST_TYPE: return(0);
+ case INFO_CHARLIKE_TYPE: return(0);
+ case INFO_INTLIKE_TYPE: return(0);
+ case INFO_BH_TYPE: return(0);
+ case INFO_IND_TYPE: return(0);
+ case INFO_CAF_TYPE: return(0);
+ case INFO_FETCHME_TYPE: return(0);
+ case INFO_BQ_TYPE: return(0);
+ /*
+ case INFO_BQENT_TYPE: return(0);
+ */
+ case INFO_TSO_TYPE: return(TSO_VHS);
+ case INFO_STKO_TYPE: return(STKO_VHS);
+ default:
+ fprintf(stderr,"Unknown info type 0x%lx (%lu)\n", INFO_PTR(node),
+ INFO_TYPE(INFO_PTR(node)));
+ return(0);
+ }
+}
+
+
+/* Determine the size and number of pointers for this kind of closure */
+void
+size_and_ptrs(node,size,ptrs)
+P_ node;
+W_ *size, *ptrs;
+{
+ switch(INFO_TYPE(INFO_PTR(node)))
+ {
+ case INFO_SPEC_U_TYPE:
+ case INFO_SPEC_N_TYPE:
+ *size = INFO_SIZE(INFO_PTR(node)); /* New for 0.24; check */
+ *ptrs = INFO_NoPTRS(INFO_PTR(node)); /* that! -- HWL */
+ /*
+ *size = SPEC_CLOSURE_SIZE(node);
+ *ptrs = SPEC_CLOSURE_NoPTRS(node);
+ */
+ break;
+
+ case INFO_GEN_U_TYPE:
+ case INFO_GEN_N_TYPE:
+ *size = GEN_CLOSURE_SIZE(node);
+ *ptrs = GEN_CLOSURE_NoPTRS(node);
+ break;
+
+ /*
+ case INFO_DYN_TYPE_U:
+ case INFO_DYN_TYPE_N:
+ */
+ case INFO_DYN_TYPE:
+ *size = DYN_CLOSURE_SIZE(node);
+ *ptrs = DYN_CLOSURE_NoPTRS(node);
+ break;
+
+ case INFO_TUPLE_TYPE:
+ *size = TUPLE_CLOSURE_SIZE(node);
+ *ptrs = TUPLE_CLOSURE_NoPTRS(node);
+ break;
+
+ case INFO_DATA_TYPE:
+ *size = DATA_CLOSURE_SIZE(node);
+ *ptrs = DATA_CLOSURE_NoPTRS(node);
+ break;
+
+ case INFO_IND_TYPE:
+ *size = IND_CLOSURE_SIZE(node);
+ *ptrs = IND_CLOSURE_NoPTRS(node);
+ break;
+
+/* ToDo: more (WDP) */
+
+ /* Don't know about the others */
+ default:
+ *size = *ptrs = 0;
+ break;
+ }
+}
+
+void
+DEBUG_PRINT_NODE(node)
+P_ node;
+{
+ W_ info_ptr = INFO_PTR(node);
+ I_ size = 0, ptrs = 0, i, vhs = 0;
+ char *info_type = info_hdr_type(info_ptr);
+
+ size_and_ptrs(node,&size,&ptrs);
+ vhs = var_hdr_size(node);
+
+ fprintf(stderr,"Node: 0x%lx", (W_) node);
+
+#if defined(PAR)
+ fprintf(stderr," [GA: 0x%lx]",GA(node));
+#endif
+
+#if defined(USE_COST_CENTRES)
+ fprintf(stderr," [CC: 0x%lx]",CC_HDR(node));
+#endif
+
+#if defined(GRAN)
+ fprintf(stderr," [Bitmask: 0%lo]",PROCS(node));
+#endif
+
+ fprintf(stderr," IP: 0x%lx (%s), size %ld, %ld ptrs\n",
+ info_ptr,info_type,size,ptrs);
+
+ /* For now, we ignore the variable header */
+
+ for(i=0; i < size; ++i)
+ {
+ if(i == 0)
+ fprintf(stderr,"Data: ");
+
+ else if(i % 6 == 0)
+ fprintf(stderr,"\n ");
+
+ if(i < ptrs)
+ fprintf(stderr," 0x%lx[P]",*(node+_FHS+vhs+i));
+ else
+ fprintf(stderr," %lu[D]",*(node+_FHS+vhs+i));
+ }
+ fprintf(stderr, "\n");
+}
+
+
+#define INFO_MASK 0x80000000
+
+void
+DEBUG_TREE(node)
+P_ node;
+{
+ W_ size = 0, ptrs = 0, i, vhs = 0;
+
+ /* Don't print cycles */
+ if((INFO_PTR(node) & INFO_MASK) != 0)
+ return;
+
+ size_and_ptrs(node,&size,&ptrs);
+ vhs = var_hdr_size(node);
+
+ DEBUG_PRINT_NODE(node);
+ fprintf(stderr, "\n");
+
+ /* Mark the node -- may be dangerous */
+ INFO_PTR(node) |= INFO_MASK;
+
+ for(i = 0; i < ptrs; ++i)
+ DEBUG_TREE((P_)node[i+vhs+_FHS]);
+
+ /* Unmark the node */
+ INFO_PTR(node) &= ~INFO_MASK;
+}
+
+
+void
+DEBUG_INFO_TABLE(node)
+P_ node;
+{
+ W_ info_ptr = INFO_PTR(node);
+ char *ip_type = info_hdr_type(info_ptr);
+
+ fprintf(stderr,"%s Info Ptr @0x%lx; Entry: 0x%lx; Size: %lu; Ptrs: %lu\n\n",
+ ip_type,info_ptr,(W_) ENTRY_CODE(info_ptr),INFO_SIZE(info_ptr),INFO_NoPTRS(info_ptr));
+#if defined(PAR)
+ fprintf(stderr,"Enter Flush Entry: 0x%lx;\tExit Flush Entry: 0x%lx\n",INFO_FLUSHENT(info_ptr),INFO_FLUSH(info_ptr));
+#endif
+
+#if defined(USE_COST_CENTRES)
+ fprintf(stderr,"Cost Centre (???): 0x%lx\n",INFO_CAT(info_ptr));
+#endif
+
+#if defined(_INFO_COPYING)
+ fprintf(stderr,"Evacuate Entry: 0x%lx;\tScavenge Entry: 0x%lx\n",
+ INFO_EVAC_2S(info_ptr),INFO_SCAV_2S(info_ptr));
+#endif
+
+#if defined(_INFO_COMPACTING)
+ fprintf(stderr,"Scan Link: 0x%lx;\tScan Move: 0x%lx\n",
+ (W_) INFO_SCAN_LINK_1S(info_ptr), (W_) INFO_SCAN_MOVE_1S(info_ptr));
+ fprintf(stderr,"Mark: 0x%lx;\tMarked: 0x%lx;\t",
+ (W_) INFO_MARK_1S(info_ptr), (W_) INFO_MARKED_1S(info_ptr));
+#if 0 /* avoid INFO_TYPE */
+ if(BASE_INFO_TYPE(info_ptr)==INFO_SPEC_TYPE)
+ fprintf(stderr,"plus specialised code\n");
+ else
+ fprintf(stderr,"Marking: 0x%lx\n",(W_) INFO_MARKING_1S(info_ptr));
+#endif /* 0 */
+#endif
+}
+#endif /* GRAN */
+
+\end{code}
+
+The remaining debugging routines are more or less specific for GrAnSim.
+
+\begin{code}
+#if defined(GRAN) && defined(GRAN_CHECK)
+void
+DEBUG_CURR_THREADQ(verbose)
+I_ verbose;
+{
+ fprintf(stderr,"Thread Queue on proc %d: ", CurrentProc);
+ DEBUG_THREADQ(ThreadQueueHd, verbose);
+}
+
+void
+DEBUG_THREADQ(closure, verbose)
+P_ closure;
+I_ verbose;
+{
+ P_ x;
+
+ fprintf(stderr,"Thread Queue: ");
+ for (x=closure; x!=Nil_closure; x=TSO_LINK(x))
+ if (verbose)
+ DEBUG_TSO(x,0);
+ else
+ fprintf(stderr," 0x%x",x);
+
+ if (closure==Nil_closure)
+ fprintf(stderr,"NIL\n");
+ else
+ fprintf(stderr,"\n");
+}
+
+/* Check with Threads.lh */
+static char *type_name[] = { "T_MAIN", "T_REQUIRED", "T_ADVISORY", "T_FAIL"};
+
+void
+DEBUG_TSO(closure,verbose)
+P_ closure;
+I_ verbose;
+{
+
+ if (closure==Nil_closure) {
+ fprintf(stderr,"TSO at 0x%x is Nil_closure!\n");
+ return;
+ }
+
+ fprintf(stderr,"TSO at 0x%x has the following contents:\n",closure);
+
+ fprintf(stderr,"> Name: 0x%x",TSO_NAME(closure));
+ fprintf(stderr,"\tLink: 0x%x\n",TSO_LINK(closure));
+ fprintf(stderr,"> Id: 0x%x",TSO_ID(closure));
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if (debug & 0x10)
+ fprintf(stderr,"\tType: %s %s\n",
+ type_name[TSO_TYPE(closure) & ~FETCH_MASK_TSO],
+ (TSO_TYPE(closure) & FETCH_MASK_TSO) ? "SLEEPING" : "");
+ else
+ fprintf(stderr,"\tType: %s\n",type_name[TSO_TYPE(closure)]);
+#else
+ fprintf(stderr,"\tType: %s\n",type_name[TSO_TYPE(closure)]);
+#endif
+ fprintf(stderr,"> PC1: 0x%x",TSO_PC1(closure));
+ fprintf(stderr,"\tPC2: 0x%x\n",TSO_PC2(closure));
+ fprintf(stderr,"> ARG1: 0x%x",TSO_ARG1(closure));
+ fprintf(stderr,"\tARG2: 0x%x\n",TSO_ARG2(closure));
+ fprintf(stderr,"> SWITCH: 0x%x\n", TSO_SWITCH(closure));
+
+ if (verbose) {
+ fprintf(stderr,"} LOCKED: 0x%x",TSO_LOCKED(closure));
+ fprintf(stderr,"\tSPARKNAME: 0x%x\n", TSO_SPARKNAME(closure));
+ fprintf(stderr,"} STARTEDAT: 0x%x", TSO_STARTEDAT(closure));
+ fprintf(stderr,"\tEXPORTED: 0x%x\n", TSO_EXPORTED(closure));
+ fprintf(stderr,"} BASICBLOCKS: 0x%x", TSO_BASICBLOCKS(closure));
+ fprintf(stderr,"\tALLOCS: 0x%x\n", TSO_ALLOCS(closure));
+ fprintf(stderr,"} EXECTIME: 0x%x", TSO_EXECTIME(closure));
+ fprintf(stderr,"\tFETCHTIME: 0x%x\n", TSO_FETCHTIME(closure));
+ fprintf(stderr,"} FETCHCOUNT: 0x%x", TSO_FETCHCOUNT(closure));
+ fprintf(stderr,"\tBLOCKTIME: 0x%x\n", TSO_BLOCKTIME(closure));
+ fprintf(stderr,"} BLOCKCOUNT: 0x%x", TSO_BLOCKCOUNT(closure));
+ fprintf(stderr,"\tBLOCKEDAT: 0x%x\n", TSO_BLOCKEDAT(closure));
+ fprintf(stderr,"} GLOBALSPARKS: 0x%x", TSO_GLOBALSPARKS(closure));
+ fprintf(stderr,"\tLOCALSPARKS: 0x%x\n", TSO_LOCALSPARKS(closure));
+ }
+}
+
+void
+DEBUG_EVENT(event, verbose)
+eventq event;
+I_ verbose;
+{
+ if (verbose) {
+ print_event(event);
+ }else{
+ fprintf(stderr," 0x%x",event);
+ }
+}
+
+void
+DEBUG_EVENTQ(verbose)
+I_ verbose;
+{
+ eventq x;
+
+ fprintf(stderr,"Eventq (hd @0x%x):\n",EventHd);
+ for (x=EventHd; x!=NULL; x=EVENT_NEXT(x)) {
+ DEBUG_EVENT(x,verbose);
+ }
+ if (EventHd==NULL)
+ fprintf(stderr,"NIL\n");
+ else
+ fprintf(stderr,"\n");
+}
+
+void
+DEBUG_SPARK(spark, verbose)
+sparkq spark;
+I_ verbose;
+{
+ if (verbose)
+ print_spark(spark);
+ else
+ fprintf(stderr," 0x%x",spark);
+}
+
+void
+DEBUG_SPARKQ(spark,verbose)
+sparkq spark;
+I_ verbose;
+{
+ sparkq x;
+
+ fprintf(stderr,"Sparkq (hd @0x%x):\n",spark);
+ for (x=spark; x!=NULL; x=SPARK_NEXT(x)) {
+ DEBUG_SPARK(x,verbose);
+ }
+ if (spark==NULL)
+ fprintf(stderr,"NIL\n");
+ else
+ fprintf(stderr,"\n");
+}
+
+void
+DEBUG_CURR_SPARKQ(verbose)
+I_ verbose;
+{
+ DEBUG_SPARKQ(SparkQueueHd,verbose);
+}
+
+void
+DEBUG_PROC(proc,verbose)
+I_ proc;
+I_ verbose;
+{
+ fprintf(stderr,"Status of proc %d at time %d (0x%x): %s\n",
+ proc,CurrentTime[proc],CurrentTime[proc],
+ (CurrentProc==proc)?"ACTIVE":"INACTIVE");
+ DEBUG_THREADQ(RunnableThreadsHd[proc],verbose & 0x2);
+ if ( (CurrentProc==proc) )
+ DEBUG_TSO(CurrentTSO,1);
+
+ if (EventHd!=NULL)
+ fprintf(stderr,"Next event (%s) is on proc %d\n",
+ event_names[EVENT_TYPE(EventHd)],EVENT_PROC(EventHd));
+
+ if (verbose & 0x1) {
+ fprintf(stderr,"\nREQUIRED sparks: ");
+ DEBUG_SPARKQ(PendingSparksHd[proc][REQUIRED_POOL],1);
+ fprintf(stderr,"\nADVISORY_sparks: ");
+ DEBUG_SPARKQ(PendingSparksHd[proc][ADVISORY_POOL],1);
+ }
+}
+
+/* Debug CurrentTSO */
+void
+DCT(){
+ fprintf(stderr,"Current Proc: %d\n",CurrentProc);
+ DEBUG_TSO(CurrentTSO,1);
+}
+
+/* Debug Current Processor */
+void
+DCP(){ DEBUG_PROC(CurrentProc,2); }
+
+/* Shorthand for debugging event queue */
+void
+DEQ() { DEBUG_EVENTQ(1); }
+
+/* Shorthand for debugging spark queue */
+void
+DSQ() { DEBUG_CURR_SPARKQ(1); }
+
+/* Shorthand for printing a node */
+void
+DN(P_ node) { DEBUG_PRINT_NODE(node); }
+
+#endif /* GRAN */
+\end{code}
+
+
+%****************************************************************************
+%
+\subsection[qp-profile]{Quasi-Parallel Profiling}
+%
+%****************************************************************************
+
+\begin{code}
+#ifndef GRAN
+I_ do_qp_prof;
+FILE *qp_file;
+
+/* *Virtual* Time in milliseconds */
+long
+qp_elapsed_time()
+{
+ return ((long) (usertime() * 1e3));
+}
+
+static void
+init_qp_profiling(STG_NO_ARGS)
+{
+ I_ i;
+ char qp_filename[32];
+
+ sprintf(qp_filename, "%0.24s.qp", prog_argv[0]);
+ if ((qp_file = fopen(qp_filename,"w")) == NULL ) {
+ fprintf(stderr, "Can't open quasi-parallel profile report file %s\n",
+ qp_filename);
+ do_qp_prof = 0;
+ } else {
+ fputs(prog_argv[0], qp_file);
+ for(i = 1; prog_argv[i]; i++) {
+ fputc(' ', qp_file);
+ fputs(prog_argv[i], qp_file);
+ }
+ fprintf(qp_file, "+RTS -C%ld -t%ld\n", contextSwitchTime, MaxThreads);
+ fputs(time_str(), qp_file);
+ fputc('\n', qp_file);
+ }
+}
+
+void
+QP_Event0(tid, node)
+I_ tid;
+P_ node;
+{
+ fprintf(qp_file, "%lu ** %lu 0x%lx\n", qp_elapsed_time(), tid, INFO_PTR(node));
+}
+
+void
+QP_Event1(event, tso)
+char *event;
+P_ tso;
+{
+ fprintf(qp_file, "%lu %s %lu 0x%lx\n", qp_elapsed_time(), event,
+ TSO_ID(tso), (W_) TSO_NAME(tso));
+}
+
+void
+QP_Event2(event, tso1, tso2)
+char *event;
+P_ tso1, tso2;
+{
+ fprintf(qp_file, "%lu %s %lu 0x%lx %lu 0x%lx\n", qp_elapsed_time(), event,
+ TSO_ID(tso1), (W_) TSO_NAME(tso1), TSO_ID(tso2), (W_) TSO_NAME(tso2));
+}
+#endif /* 0 */
+#endif /* GRAN */
+
+#if defined(CONCURRENT) && !defined(GRAN)
+/* romoluSnganpu' SamuS! */
+
+unsigned CurrentProc = 0;
+W_ IdleProcs = ~0l, Idlers = 32;
+
+void
+GranSimAllocate(n,node,liveness)
+I_ n;
+P_ node;
+W_ liveness;
+{ }
+
+void
+GranSimUnallocate(n,node,liveness)
+W_ n;
+P_ node;
+W_ liveness;
+{ }
+
+
+void
+GranSimExec(ariths,branches,loads,stores,floats)
+W_ ariths,branches,loads,stores,floats;
+{ }
+
+I_
+GranSimFetch(node /* , liveness_mask */ )
+P_ node;
+/* I_ liveness_mask; */
+{ }
+
+void
+GranSimSpark(local,node)
+W_ local;
+P_ node;
+{ }
+
+#if 0
+void
+GranSimSparkAt(spark,where,identifier)
+sparkq spark;
+P_ where; /* This should be a node; alternatively could be a GA */
+I_ identifier;
+{ }
+#endif
+
+void
+GranSimBlock()
+{ }
+#endif
+
+\end{code}
+
diff --git a/ghc/runtime/main/TopClosure.lc b/ghc/runtime/main/TopClosure.lc
new file mode 100644
index 0000000000..e2d670c3db
--- /dev/null
+++ b/ghc/runtime/main/TopClosure.lc
@@ -0,0 +1,8 @@
+/* this one will be linked in for Haskell 1.2 */
+\begin{code}
+#include "rtsdefs.h"
+
+EXTDATA(Main_mainPrimIO_closure);
+
+P_ TopClosure = Main_mainPrimIO_closure;
+\end{code}
diff --git a/ghc/runtime/main/TopClosure13.lc b/ghc/runtime/main/TopClosure13.lc
new file mode 100644
index 0000000000..07792c2198
--- /dev/null
+++ b/ghc/runtime/main/TopClosure13.lc
@@ -0,0 +1,8 @@
+/* this one will be linked in for Haskell 1.3 */
+\begin{code}
+#include "rtsdefs.h"
+
+EXTDATA(Main_mainPrimIO13_closure);
+
+P_ TopClosure = Main_mainPrimIO13_closure;
+\end{code}
diff --git a/ghc/runtime/main/main.lc b/ghc/runtime/main/main.lc
new file mode 100644
index 0000000000..98002a3bd9
--- /dev/null
+++ b/ghc/runtime/main/main.lc
@@ -0,0 +1,1355 @@
+%/****************************************************************
+%* *
+%* This is where everything starts *
+%* *
+%****************************************************************/
+
+\begin{code}
+#if defined(USE_COST_CENTRES) || defined(GUM) || defined(CONCURRENT)
+#define NON_POSIX_SOURCE /* time things on Solaris -- sigh */
+#endif
+
+#include "rtsdefs.h"
+#include <setjmp.h>
+
+#if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
+# include <string.h>
+/* An ANSI string.h and pre-ANSI memory.h might conflict. */
+# if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
+# include <memory.h>
+# endif /* not STDC_HEADERS and HAVE_MEMORY_H */
+# define index strchr
+# define rindex strrchr
+# define bcopy(s, d, n) memcpy ((d), (s), (n))
+# define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
+# define bzero(s, n) memset ((s), 0, (n))
+#else /* not STDC_HEADERS and not HAVE_STRING_H */
+# include <strings.h>
+/* memory.h and strings.h conflict on some systems. */
+#endif /* not STDC_HEADERS and not HAVE_STRING_H */
+
+#if defined(USE_COST_CENTRES) || defined(GUM)
+/* need some "time" things */
+
+/* ToDo: This is a mess! Improve ? */
+
+# ifdef HAVE_SYS_TYPES_H
+# include <sys/types.h>
+# endif
+
+# ifdef HAVE_SYS_TIMES_H
+# include <sys/times.h>
+# endif
+
+# ifdef HAVE_SYS_TIME_H
+# include <sys/time.h>
+# endif
+#endif /* USE_COST_CENTRES || GUM */
+
+#ifndef PAR
+STGRegisterTable MainRegTable;
+#endif
+
+/* fwd decls */
+void setupRtsFlags PROTO((int *argc, char *argv[], I_ *rtsc, char *rtsv[]));
+void shutdownHaskell(STG_NO_ARGS);
+
+EXTFUN(startStgWorld);
+extern void PrintRednCountInfo(STG_NO_ARGS);
+extern void checkAStack(STG_NO_ARGS);
+
+/* a real nasty Global Variable */
+/* moved to main/TopClosure(13)?.lc -- *one* of them will get linked in
+P_ TopClosure = Main_mainPrimIO_closure;
+ */
+
+/* structure to carry around info about the storage manager */
+smInfo StorageMgrInfo;
+
+FILE *main_statsfile = NULL;
+#if defined(DO_REDN_COUNTING)
+FILE *tickyfile = NULL;
+#endif
+#if defined(SM_DO_BH_UPDATE)
+I_ noBlackHoles = 0;
+#endif
+I_ doSanityChks = 0;
+I_ showRednCountStats = 0;
+I_ traceUpdates = 0;
+extern I_ squeeze_upd_frames;
+
+#ifdef PAR
+extern I_ OkToGC, buckets, average_stats();
+extern rtsBool TraceSparks, OutputDisabled, DelaySparks,
+ DeferGlobalUpdates, ParallelStats;
+
+extern void RunParallelSystem PROTO((P_));
+extern void initParallelSystem(STG_NO_ARGS);
+extern void SynchroniseSystem(STG_NO_ARGS);
+
+extern void SetTrace PROTO((W_ address, I_ level/*?*/));
+#endif
+
+#if defined(GRAN_CHECK) && defined(GRAN)
+extern W_ debug;
+extern W_ event_trace ;
+extern W_ event_trace_all ;
+#endif
+
+extern void *stgAllocForGMP PROTO((size_t));
+extern void *stgReallocForGMP PROTO ((void *, size_t, size_t));
+extern void stgDeallocForGMP PROTO ((void *, size_t));
+
+#if defined (DO_SPAT_PROFILING) && sparc_TARGET_ARCH
+ /* NOTE: I, WDP, do not use this in my SPAT profiling */
+W_ KHHP, KHHPLIM, KHSPA, KHSPB;
+#endif
+
+/* NeXTs can't just reach out and touch "end", to use in
+ distinguishing things in static vs dynamic (malloc'd) memory.
+*/
+#if nextstep2_TARGET_OS || nextstep3_TARGET_OS /* ToDo: use END_BY_FUNNY_MEANS or something */
+void *get_end_result;
+#endif
+
+I_ prog_argc;
+char **prog_argv;
+I_ rts_argc;
+char *rts_argv[MAX_RTS_ARGS];
+
+#ifndef PAR
+jmp_buf restart_main; /* For restarting after a signal */
+#endif
+
+#if defined(PVM)
+unsigned nPEs = 0, nIMUs = 0;
+#endif
+
+#if defined(GUM)
+int nPEs = 0;
+#endif
+
+int /* return type of "main" is defined by the C standard */
+main(argc, argv)
+ int argc;
+ char *argv[];
+{
+\end{code}
+
+The very first thing we do is grab the start time...just in case we're
+collecting timing statistics.
+
+\begin{code}
+
+ start_time();
+
+\end{code}
+
+The parallel system needs to be initialised and synchronised before
+the program is run. This is done {\em before} heap allocation, so we
+can grab all remaining heap without needing to consider the System
+Manager's requirements.
+
+\begin{code}
+#ifdef PAR
+ /*
+ * Grab the number of PEs out of the argument vector, and eliminate it
+ * from further argument processing
+ */
+ nPEs = atoi(argv[1]);
+ argv[1] = argv[0];
+ argv++;
+ argc--;
+
+/* fprintf(stderr, "I'm alive, nPEs = %d \n", nPEs); */
+ SynchroniseSystem();
+#endif
+
+#if defined(USE_COST_CENTRES) || defined(GUM)
+ /* setup string indicating time of run -- only used for profiling */
+ (void) time_str();
+#endif
+
+#if nextstep2_TARGET_OS || nextstep3_TARGET_OS /* ToDo: use END_BY_FUNNY_MEANS or something */
+ get_end_result = get_end();
+#endif
+
+ /*
+ divide the command-line args between pgm and RTS;
+ figure out what statsfile to use (if any);
+ [if so, write the whole cmd-line into it]
+
+ This is unlikely to work well in parallel! KH.
+ */
+ setupRtsFlags(&argc, argv, &rts_argc, rts_argv);
+ prog_argc = argc;
+ prog_argv = argv;
+
+#if defined(PAR)
+ /* Initialise the parallel system -- before initHeap! */
+ initParallelSystem();
+#endif /* PAR */
+
+#if defined(LIFE_PROFILE)
+ if (life_profile_init(rts_argv, prog_argv) != 0) {
+ fflush(stdout);
+ fprintf(stderr, "life_profile_init failed!\n");
+ EXIT(EXIT_FAILURE);
+ }
+#endif
+
+#if defined(USE_COST_CENTRES) || defined(GUM)
+ if (init_cc_profiling(rts_argc, rts_argv, prog_argv) != 0) {
+ fflush(stdout);
+ fprintf(stderr, "init_cc_profiling failed!\n");
+ EXIT(EXIT_FAILURE);
+ }
+#endif
+
+#if defined(CONCURRENT) && defined(GRAN)
+ if (!no_gr_profile)
+ if (init_gr_simulation(rts_argc, rts_argv, prog_argc, prog_argv) != 0) {
+ fprintf(stderr, "init_gr_simulation failed!\n"); EXIT(EXIT_FAILURE);
+ }
+#endif
+
+#ifdef PAR
+ if (do_gr_profile)
+ init_gr_profiling(rts_argc, rts_argv, prog_argc, prog_argv);
+#endif
+
+ /*
+ initialize the storage manager
+ */
+ if ( initSM(rts_argc, rts_argv, main_statsfile) != 0) {
+ fflush(stdout);
+ fprintf(stderr, "initSM failed!\n");
+ EXIT(EXIT_FAILURE);
+ }
+
+#ifndef PAR
+ if ( initStacks( &StorageMgrInfo ) != 0) {
+ fflush(stdout);
+ fprintf(stderr, "initStacks failed!\n");
+ EXIT(EXIT_FAILURE);
+ }
+#endif
+
+ if ( initHeap( &StorageMgrInfo ) != 0) {
+ fflush(stdout);
+ fprintf(stderr, "initHeap failed!\n"); EXIT(EXIT_FAILURE);
+ }
+
+#if defined(CONCURRENT) && !defined(GRAN)
+ if (!initThreadPools(MaxLocalSparks)) {
+ fflush(stdout);
+ fprintf(stderr, "initThreadPools failed!\n");
+ EXIT(EXIT_FAILURE);
+ }
+#endif
+
+#if defined(USE_COST_CENTRES) || defined(GUM)
+ /* call cost centre registering routine (after heap allocated) */
+ cc_register();
+#endif
+
+/* Information needed by runtime trace analysers -- don't even ask what it does! */
+ /* NOTE: I, WDP, do not use this in my SPAT profiling */
+#if defined (DO_SPAT_PROFILING) && sparc_TARGET_ARCH
+ KHHPLIM = (W_) StorageMgrInfo.hplim;
+ KHHP = (W_) StorageMgrInfo.hp;
+ KHSPA = (W_) SAVE_SpA,
+ KHSPB = (W_) SAVE_SpB;
+
+/* fprintf(stderr,"Hp = %lx, HpLim = %lx, SpA = %lx, SpB = %lx\n",KHHP,KHHPLIM,KHSPA,KHSPB); */
+
+/* NOT ME:
+ __asm__("sethi %%hi(_KHHP),%%o0\n\tld [%%o0+%%lo(_KHHP)],%%g0" : : : "%%o0");
+ __asm__("sethi %%hi(_KHHPLIM),%%o0\n\tld [%%o0+%%lo(_KHHPLIM)],%%g0" : : : "%%o0");
+ __asm__("sethi %%hi(_KHSPA),%%o0\n\tld [%%o0+%%lo(_KHSPA)],%%g0" : : : "%%o0");
+ __asm__("sethi %%hi(_KHSPB),%%o0\n\tld [%%o0+%%lo(_KHSPB)],%%g0" : : : "%%o0");
+*/
+#endif
+
+#if defined(DO_REDN_COUNTING)
+ max_SpA = MAIN_SpA; /* initial high-water marks */
+ max_SpB = MAIN_SpB;
+#endif
+
+ /* Tell GNU multi-precision pkg about our custom alloc functions */
+ mp_set_memory_functions(stgAllocForGMP, stgReallocForGMP, stgDeallocForGMP);
+
+ /* Record initialization times */
+ end_init();
+
+#if defined(USE_COST_CENTRES) || defined(CONCURRENT)
+ /*
+ * Both the context-switcher and the cost-center profiler use
+ * a virtual timer.
+ */
+ if (install_vtalrm_handler()) {
+ fflush(stdout);
+ fprintf(stderr, "Can't install VTALRM handler.\n");
+ EXIT(EXIT_FAILURE);
+ }
+#if (defined(CONCURRENT) && defined(USE_COST_CENTRES)) || defined(GUM)
+ if (time_profiling) {
+ if (contextSwitchTime % (1000/TICK_FREQUENCY) == 0)
+ tick_millisecs = TICK_MILLISECS;
+ else
+ tick_millisecs = CS_MIN_MILLISECS;
+
+ contextSwitchTicks = contextSwitchTime / tick_millisecs;
+ profilerTicks = TICK_MILLISECS / tick_millisecs;
+ } else
+ tick_millisecs = contextSwitchTime;
+#endif
+
+#ifndef CONCURRENT
+ START_TIME_PROFILER;
+#endif
+
+#endif /* USE_COST_CENTRES || CONCURRENT */
+
+#ifndef PAR
+ setjmp(restart_main);
+ initUserSignals();
+#endif
+
+#ifdef CONCURRENT
+# if defined(GRAN) /* HWL */
+ /* RunnableThreadsHd etc. are init in ScheduleThreads */
+ /*
+ * I'm not sure about this. Note that this code is for re-initializing
+ * things when a longjmp to restart_main occurs. --JSM
+ */
+
+# else /* !GRAN */
+ AvailableStack = AvailableTSO = Nil_closure;
+ RunnableThreadsHd = RunnableThreadsTl = Nil_closure;
+ WaitingThreadsHd = WaitingThreadsTl = Nil_closure;
+ PendingSparksHd[REQUIRED_POOL] =
+ PendingSparksTl[REQUIRED_POOL] = PendingSparksBase[REQUIRED_POOL];
+ PendingSparksHd[ADVISORY_POOL] =
+ PendingSparksTl[ADVISORY_POOL] = PendingSparksBase[ADVISORY_POOL];
+# endif
+
+ CurrentTSO = Nil_closure;
+
+# ifdef PAR
+ RunParallelSystem(TopClosure);
+# else
+ STKO_LINK(MainStkO) = Nil_closure;
+ ScheduleThreads(TopClosure);
+# endif /* PAR */
+
+#else /* not threaded (sequential) */
+
+# if defined(__STG_TAILJUMPS__)
+ miniInterpret((StgFunPtr)startStgWorld);
+# else
+ if (doSanityChks)
+ miniInterpret_debug((StgFunPtr)startStgWorld, checkAStack);
+ else
+ miniInterpret((StgFunPtr)startStgWorld);
+# endif /* not tail-jumping */
+#endif /* !CONCURRENT */
+
+ shutdownHaskell();
+ return(EXIT_SUCCESS); /* don't use EXIT! :-) */
+}
+\end{code}
+
+It should be possible to call @shutdownHaskell@ whenever you want to
+shut a Haskell program down in an orderly way.
+
+Note that some of this code probably depends on the integrity of
+various internal data structures so this should not be called in
+response to detecting a catastrophic error.
+
+\begin{code}
+void
+shutdownHaskell(STG_NO_ARGS)
+{
+ STOP_TIME_PROFILER;
+
+ if (exitSM(&StorageMgrInfo) != 0) {
+ fflush(stdout);
+ fprintf(stderr, "exitSM failed!\n");
+ EXIT(EXIT_FAILURE);
+ }
+#if defined(LIFE_PROFILE)
+ {
+ extern P_ hp_start; /* from the SM -- Hack! */
+ life_profile_finish(StorageMgrInfo.hp - hp_start, prog_argv);
+ }
+#endif
+
+#if defined(USE_COST_CENTRES)
+ heap_profile_finish();
+#endif
+#if defined(USE_COST_CENTRES) || defined(GUM)
+ report_cc_profiling(1 /* final */ );
+#endif
+
+#if defined(DO_REDN_COUNTING)
+ if (showRednCountStats) {
+ PrintRednCountInfo();
+ }
+#endif
+
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if (PrintFetchMisses)
+ fprintf(stderr,"Number of fetch misses: %d\n",fetch_misses);
+
+# if defined(COUNT)
+ fprintf(stderr,"COUNT statistics:\n");
+ fprintf(stderr," Total number of updates: %u\n",nUPDs);
+ fprintf(stderr," Needed to awaken BQ: %u with avg BQ len of: %f\n",
+ nUPDs_BQ,(float)BQ_lens/(float)nUPDs_BQ);
+ fprintf(stderr," Number of PAPs: %u\n",nPAPs);
+# endif
+
+ if (!no_gr_profile)
+ end_gr_simulation();
+#endif
+
+ fflush(stdout);
+ /* This fflush is important, because: if "main" just returns,
+ then we will end up in pre-supplied exit code that will close
+ streams and flush buffers. In particular we have seen: it
+ will close fd 0 (stdin), then flush fd 1 (stdout), then <who
+ cares>...
+
+ But if you're playing with sockets, that "close fd 0" might
+ suggest to the daemon that all is over, only to be presented
+ with more stuff on "fd 1" at the flush.
+
+ The fflush avoids this sad possibility.
+ */
+}
+\end{code}
+
+%/****************************************************************
+%* *
+%* Getting default settings for RTS parameters *
+%* *
+%* +RTS indicates following arguments destined for RTS *
+%* -RTS indicates following arguments destined for program *
+%* *
+%****************************************************************/
+\begin{code}
+
+char *flagtext[] = {
+"",
+"Usage: <prog> <args> [+RTS <rtsopts> | -RTS <args>] ... --RTS <args>",
+"",
+" +RTS Indicates run time system options follow",
+" -RTS Indicates program arguments follow",
+" --RTS Indicates that ALL subsequent arguments will be given to the",
+" program (including any of these RTS flags)",
+"",
+"The following run time system options are available:",
+"",
+" -? -f Prints this message and exits; the program is not executed",
+"",
+" -K<size> Sets the stack size (default 64k) Egs: -K32k -K512k",
+" -H<size> Sets the heap size (default 4M) -H512k -H16M",
+" -s<file> Summary GC statistics (default file: <program>.stat)",
+" -S<file> Detailed GC statistics (with -Sstderr going to stderr)",
+"",
+#if defined(GCap)
+" -M<n>% Sets minimum size of alloc area as % of heap (default 3%)",
+" -A<size> Fixes size of alloc area, overriding any minimum (-A gives 64k)",
+" -G<size> Fixes size of major generation (default is dynamic threshold)",
+" -F2s Forces program compiled for Appel gc to use 2s collection",
+#else
+# if defined(GCgn)
+" -A<size> Specifies size of alloc area (default 64k)",
+" -G<size> Fixes size of major generation (default is available heap)",
+" -F2s Forces program compiled for Gen gc to use 2s collection",
+# else
+" -M<n>% Minimum % of heap which must be available (default 3%)",
+" -A<size> Fixes size of heap area allocated between GCs (-A gives 64k)",
+# endif
+#endif
+#if defined(FORCE_GC)
+" -j<size> Forces major GC at every <size> bytes allocated",
+#endif /* FORCE_GC */
+#if defined(GCdu)
+" -u<percent> Fixes residency threshold at which mode switches (range 0.0..0.95)",
+#endif
+"",
+#if defined(SM_DO_BH_UPDATE)
+" -N No black-holing (for use when a signal handler is present)",
+#endif
+" -Z Don't squeeze out update frames on stack overflow",
+" -B Sound the bell at the start of each (major) garbage collection",
+#if defined(USE_COST_CENTRES) || defined(GUM)
+"",
+" -p<sort> Produce cost centre time profile (output file <program>.prof)",
+" sort: T = time (default), A = alloc, C = cost centre label",
+" -P<sort> Produce serial time profile (output file <program>.time)",
+" and a -p profile with detailed caf/enter/tick/alloc info",
+#if defined(USE_COST_CENTRES)
+"",
+" -h<break-down> Heap residency profile (output file <program>.hp)",
+" break-down: C = cost centre (default), M = module, G = group",
+" D = closure description, Y = type description",
+" T<ints>,<start> = time closure created",
+" ints: no. of interval bands plotted (default 18)",
+" start: seconds after which intervals start (default 0.0)",
+" A subset of closures may be selected by the attached cost centre using:",
+" -c{mod:lab,mod:lab...}, specific module:label cost centre(s)",
+" -m{mod,mod...} all cost centres from the specified modules(s)",
+" -g{grp,grp...} all cost centres from the specified group(s)",
+" Selections can also be made by description, type, kind and age:",
+" -d{des,des...} closures with specified closure descriptions",
+" -y{typ,typ...} closures with specified type descriptions",
+" -k{knd,knd...} closures of the specified kinds",
+" -a<age> closures which survived <age> complete intervals",
+" The selection logic used is summarised as follows:",
+" ([-c] or [-m] or [-g]) and ([-d] or [-y] or [-k]) and [-a]",
+" where an option is true if not specified",
+#endif
+"",
+" -z<tbl><size> set hash table <size> for <tbl> (C, M, G, D or Y)",
+"",
+" -i<secs> Number of seconds in a profiling interval (default 1.0):",
+" heap profile (-h) and/or serial time profile (-P) frequency",
+#endif /* USE_COST_CENTRES */
+#if defined(LIFE_PROFILE)
+"",
+" -l<res> Produce liftime and update profile (output file <program>.life)",
+" res: the age resolution in bytes allocated (default 10,000)",
+#endif /* LIFE_PROFILE */
+"",
+#if defined(DO_REDN_COUNTING)
+" -r<file> Produce reduction profiling statistics (with -rstderr for stderr)",
+"",
+#endif
+" -I Use debugging miniInterpret with stack and heap sanity-checking.",
+" -T<level> Trace garbage collection execution (debugging)",
+#ifdef CONCURRENT
+"",
+# ifdef PAR
+" -N<n> Use <n> PVMish processors in parallel (default: 2)",
+/* NB: the -N<n> is implemented by the driver!! */
+# endif
+" -C<secs> Context-switch interval in seconds",
+" (0 or no argument means switch as often as possible)",
+" the default is .01 sec; resolution is .01 sec",
+" -e<size> Size of spark pools (default 100)",
+# ifdef PAR
+" -q Enable activity profile (output files in ~/<program>*.gr)",
+" -qb Enable binary activity profile (output file /tmp/<program>.gb)",
+#else
+" -q[v] Enable quasi-parallel profile (output file <program>.qp)",
+# endif
+" -t<num> Set maximum number of advisory threads per PE (default 32)",
+" -o<num> Set stack chunk size (default 1024)",
+# ifdef PAR
+" -d Turn on PVM-ish debugging",
+" -O Disable output for performance measurement",
+# endif /* PAR */
+#endif /* CONCURRENT */
+"",
+"Other RTS options may be available for programs compiled a different way.",
+"The GHC User's Guide has full details.",
+"",
+0
+};
+
+#define RTS 1
+#define PGM 0
+
+#ifndef atof
+extern double atof();
+/* no proto because some machines use const and some do not */
+#endif
+
+void
+setupRtsFlags(argc, argv, rts_argc, rts_argv)
+int *argc;
+I_ *rts_argc;
+char *argv[], *rts_argv[];
+{
+ I_ error = 0;
+ I_ mode;
+ I_ arg, total_arg;
+ char *last_slash;
+
+ /* Remove directory from argv[0] -- default files in current directory */
+
+ if ((last_slash = (char *) rindex(argv[0], '/')) != NULL)
+ strcpy(argv[0], last_slash+1);
+
+ /* Split arguments (argv) into PGM (argv) and RTS (rts_argv) parts */
+ /* argv[0] must be PGM argument -- leave in argv */
+
+ total_arg = *argc;
+ arg = 1;
+
+ *argc = 1;
+ *rts_argc = 0;
+
+ for (mode = PGM; arg < total_arg && strcmp("--RTS", argv[arg]) != 0; arg++) {
+ if (strcmp("+RTS", argv[arg]) == 0) {
+ mode = RTS;
+ }
+ else if (strcmp("-RTS", argv[arg]) == 0) {
+ mode = PGM;
+ }
+ else if (mode == RTS && *rts_argc < MAX_RTS_ARGS-1) {
+ rts_argv[(*rts_argc)++] = argv[arg];
+ }
+ else if (mode == PGM) {
+ argv[(*argc)++] = argv[arg];
+ }
+ else {
+ fflush(stdout);
+ fprintf(stderr, "setupRtsFlags: Too many RTS arguments (max %d)\n",
+ MAX_RTS_ARGS-1);
+ EXIT(EXIT_FAILURE);
+ }
+ }
+ if (arg < total_arg) {
+ /* arg must be --RTS; process remaining program arguments */
+ while (++arg < total_arg) {
+ argv[(*argc)++] = argv[arg];
+ }
+ }
+ argv[*argc] = (char *) 0;
+ rts_argv[*rts_argc] = (char *) 0;
+
+ /* Process RTS (rts_argv) part: mainly to determine statsfile */
+
+ for (arg = 0; arg < *rts_argc; arg++) {
+ if (rts_argv[arg][0] == '-') {
+ switch(rts_argv[arg][1]) {
+ case '?':
+ case 'f':
+ error = 1;
+ break;
+
+ case 'Z': /* Don't squeeze out update frames */
+ squeeze_upd_frames = 0;
+ break;
+
+#if defined(SM_DO_BH_UPDATE)
+ case 'N':
+ noBlackHoles++;
+ break;
+#endif
+
+ case 'I':
+ doSanityChks++;
+#if defined(__STG_TAILJUMPS__)
+ /* Blech -- too many errors if run in parallel -- KH */
+ fprintf(stderr, "setupRtsFlags: Using Tail Jumps: Sanity checks not possible: %s\n", rts_argv[arg]);
+ error = 1;
+#endif
+ break;
+
+ case 'U':
+ traceUpdates++;
+#if ! defined(DO_RUNTIME_TRACE_UPDATES)
+ fprintf(stderr, "setupRtsFlags: Update Tracing not compiled in: %s\n", rts_argv[arg]);
+ error = 1;
+#endif
+ break;
+
+ case 'r': /* Basic profiling stats */
+ showRednCountStats++;
+#if ! defined(DO_REDN_COUNTING)
+ fprintf(stderr, "setupRtsFlags: Reduction counting not compiled in: %s\n", rts_argv[arg]);
+ error = 1;
+
+#else /* ticky-ticky! */
+ if (strcmp(rts_argv[arg]+2, "stderr") == 0) /* use real stderr */
+ tickyfile = stderr;
+ else if (rts_argv[arg][2] != '\0') /* ticky file specified */
+ tickyfile = fopen(rts_argv[arg]+2,"w");
+ else {
+ char stats_filename[STATS_FILENAME_MAXLEN]; /* default <program>.ticky */
+ sprintf(stats_filename, TICKY_FILENAME_FMT, argv[0]);
+ tickyfile = fopen(stats_filename,"w");
+ }
+ if (tickyfile == NULL) {
+ fprintf(stderr, "Can't open tickyfile %s (default %0.24s.ticky)\n",
+ rts_argv[arg]+2, argv[0]);
+ error = 1;
+ } else {
+ /* Write argv and rtsv into start of ticky file */
+ I_ count;
+ for(count = 0; count < *argc; count++)
+ fprintf(tickyfile, "%s ", argv[count]);
+ fprintf(tickyfile, "+RTS ");
+ for(count = 0; count < *rts_argc; count++)
+ fprintf(tickyfile, "%s ", rts_argv[count]);
+ fprintf(tickyfile, "\n");
+ }
+#endif /* ticky-ticky! */
+ break;
+
+ case 's': /* Also used by GC -- open file here */
+ case 'S':
+#ifdef PAR
+ /* Opening all those files would almost certainly fail... */
+ ParallelStats = rtsTrue;
+ main_statsfile = stderr; /* temporary; ToDo: rm */
+#else
+ if (strcmp(rts_argv[arg]+2, "stderr") == 0) /* use real stderr */
+ main_statsfile = stderr;
+ else if (rts_argv[arg][2] != '\0') /* stats file specified */
+ main_statsfile = fopen(rts_argv[arg]+2,"w");
+ else {
+ char stats_filename[STATS_FILENAME_MAXLEN]; /* default <program>.stat */
+ sprintf(stats_filename, STAT_FILENAME_FMT, argv[0]);
+ main_statsfile = fopen(stats_filename,"w");
+ }
+ if (main_statsfile == NULL) {
+ fprintf(stderr, "Can't open statsfile %s (default %0.24s.stat)\n", rts_argv[arg]+2, argv[0]);
+ error = 1;
+ } else {
+ /* Write argv and rtsv into start of stats file */
+ I_ count;
+ for(count = 0; count < *argc; count++)
+ fprintf(main_statsfile, "%s ", argv[count]);
+ fprintf(main_statsfile, "+RTS ");
+ for(count = 0; count < *rts_argc; count++)
+ fprintf(main_statsfile, "%s ", rts_argv[count]);
+ fprintf(main_statsfile, "\n");
+ }
+#endif
+ break;
+
+ case 'P': /* detailed cost centre profiling (time/alloc) */
+ case 'p': /* cost centre profiling (time/alloc) */
+ case 'i': /* serial profiling -- initial timer interval */
+#if ! (defined(USE_COST_CENTRES) || defined(GUM))
+ fprintf(stderr, "setupRtsFlags: Not built for cost centre profiling: %s\n", rts_argv[arg]);
+ error = 1;
+#endif /* ! (USE_COST_CENTRES || GUM) */
+ break;
+ case 'h': /* serial heap profile */
+ case 'z': /* size of index tables */
+ case 'c': /* cost centre label select */
+ case 'm': /* cost centre module select */
+ case 'g': /* cost centre group select */
+ case 'd': /* closure descr select */
+ case 'y': /* closure type select */
+ case 'k': /* closure kind select */
+ case 'a': /* closure age select */
+#if ! defined(USE_COST_CENTRES)
+ fprintf(stderr, "setupRtsFlags: Not built for cost centre profiling: %s\n", rts_argv[arg]);
+ error = 1;
+#endif /* ! USE_COST_CENTRES */
+ break;
+
+ case 'j': /* force GC option */
+#if defined(FORCE_GC)
+ force_GC++;
+ if (rts_argv[arg][2]) {
+ GCInterval = decode(rts_argv[arg]+2) / sizeof(W_);
+ }
+#else /* ! FORCE_GC */
+ fprintf(stderr, "setupRtsFlags: Not built for forcing GC: %s\n", rts_argv[arg]);
+ error = 1;
+#endif /* ! FORCE_GC */
+ break;
+
+ case 'l': /* life profile option */
+#if defined(LIFE_PROFILE)
+ do_life_prof++;
+ if (rts_argv[arg][2]) {
+ LifeInterval = decode(rts_argv[arg]+2) / sizeof(W_);
+ }
+#else /* ! LIFE_PROFILE */
+ fprintf(stderr, "setupRtsFlags: Not built for lifetime profiling: %s\n", rts_argv[arg]);
+ error = 1;
+#endif /* ! LIFE_PROFILE */
+ break;
+
+ /* Flags for the threaded RTS */
+
+#ifdef CONCURRENT
+ case 'C': /* context switch interval */
+ if (rts_argv[arg][2] != '\0') {
+ /* Convert to milliseconds */
+ contextSwitchTime = (I_) ((atof(rts_argv[arg]+2) * 1000));
+ contextSwitchTime = (contextSwitchTime / CS_MIN_MILLISECS)
+ * CS_MIN_MILLISECS;
+ if (contextSwitchTime < CS_MIN_MILLISECS)
+ contextSwitchTime = CS_MIN_MILLISECS;
+ } else
+ contextSwitchTime = 0;
+ break;
+#if !defined(GRAN)
+ case 'e':
+ if (rts_argv[arg][2] != '\0') {
+ MaxLocalSparks = strtol(rts_argv[arg]+2, (char **) NULL, 10);
+ if (MaxLocalSparks <= 0) {
+ fprintf(stderr, "setupRtsFlags: bad value for -e\n");
+ error = 1;
+ }
+ } else
+ MaxLocalSparks = DEFAULT_MAX_LOCAL_SPARKS;
+ break;
+#endif
+#ifdef PAR
+ case 'q': /* activity profile option */
+ if (rts_argv[arg][2] == 'b')
+ do_gr_binary++;
+ else
+ do_gr_profile++;
+ break;
+#else
+ case 'q': /* quasi-parallel profile option */
+ if (rts_argv[arg][2] == 'v')
+ do_qp_prof = 2;
+ else
+ do_qp_prof++;
+ break;
+#endif
+ case 't':
+ if (rts_argv[arg][2] != '\0') {
+ MaxThreads = strtol(rts_argv[arg]+2, (char **) NULL, 10);
+ } else {
+ fprintf(stderr, "setupRtsFlags: missing size for -t\n");
+ error = 1;
+ }
+ break;
+
+ case 'o':
+ if (rts_argv[arg][2] != '\0') {
+ StkOChunkSize = decode(rts_argv[arg]+2);
+ if (StkOChunkSize < MIN_STKO_CHUNK_SIZE)
+ StkOChunkSize = MIN_STKO_CHUNK_SIZE;
+ } else {
+ fprintf(stderr, "setupRtsFlags: missing size for -o\n");
+ error = 1;
+ }
+ break;
+
+# ifdef PAR
+ case 'O':
+ OutputDisabled = rtsTrue;
+ break;
+
+# else /* PAR */
+
+# if !defined(GRAN)
+ case 'b': /* will fall through to disaster */
+# else
+ case 'b':
+ if (rts_argv[arg][2] != '\0') {
+
+ /* Should we emulate hbcpp */
+ if(strcmp((rts_argv[arg]+2),"roken")==0) {
+ ++DoAlwaysCreateThreads;
+ strcpy(rts_argv[arg]+2,"oring");
+ }
+
+ /* or a ridiculously idealised simulator */
+ if(strcmp((rts_argv[arg]+2),"oring")==0) {
+ gran_latency = gran_fetchtime = gran_additional_latency =
+ gran_gunblocktime = gran_lunblocktime
+ = gran_threadcreatetime = gran_threadqueuetime
+ = gran_threadscheduletime = gran_threaddescheduletime
+ = gran_threadcontextswitchtime
+ = 0;
+
+ gran_mpacktime = gran_munpacktime = 0;
+
+ gran_arith_cost = gran_float_cost = gran_load_cost
+ = gran_store_cost = gran_branch_cost = 0;
+
+ gran_heapalloc_cost = 1;
+
+ /* ++DoFairSchedule; */
+ ++DoStealThreadsFirst;
+ ++DoThreadMigration;
+ ++do_gr_profile;
+ }
+
+ /* or a ridiculously idealised simulator */
+ if(strcmp((rts_argv[arg]+2),"onzo")==0) {
+ gran_latency = gran_fetchtime = gran_additional_latency =
+ gran_gunblocktime = gran_lunblocktime
+ = gran_threadcreatetime = gran_threadqueuetime
+ = gran_threadscheduletime = gran_threaddescheduletime
+ = gran_threadcontextswitchtime
+ = 0;
+
+ gran_mpacktime = gran_munpacktime = 0;
+
+ /* Keep default values for these
+ gran_arith_cost = gran_float_cost = gran_load_cost
+ = gran_store_cost = gran_branch_cost = 0;
+ */
+
+ gran_heapalloc_cost = 1;
+
+ /* ++DoFairSchedule; */ /* -b-R */
+ /* ++DoStealThreadsFirst; */ /* -b-T */
+ ++DoReScheduleOnFetch; /* -bZ */
+ ++DoThreadMigration; /* -bM */
+ ++do_gr_profile; /* -bP */
+# if defined(GRAN_CHECK) && defined(GRAN)
+ debug = 0x20; /* print event statistics */
+# endif
+ }
+
+ /* Communication and task creation cost parameters */
+ else switch(rts_argv[arg][2]) {
+ case 'l':
+ if (rts_argv[arg][3] != '\0')
+ {
+ gran_gunblocktime = gran_latency = decode(rts_argv[arg]+3);
+ gran_fetchtime = 2* gran_latency;
+ }
+ else
+ gran_latency = LATENCY;
+ break;
+
+ case 'a':
+ if (rts_argv[arg][3] != '\0')
+ gran_additional_latency = decode(rts_argv[arg]+3);
+ else
+ gran_additional_latency = ADDITIONAL_LATENCY;
+ break;
+
+ case 'm':
+ if (rts_argv[arg][3] != '\0')
+ gran_mpacktime = decode(rts_argv[arg]+3);
+ else
+ gran_mpacktime = MSGPACKTIME;
+ break;
+
+ case 'x':
+ if (rts_argv[arg][3] != '\0')
+ gran_mtidytime = decode(rts_argv[arg]+3);
+ else
+ gran_mtidytime = 0;
+ break;
+
+ case 'r':
+ if (rts_argv[arg][3] != '\0')
+ gran_munpacktime = decode(rts_argv[arg]+3);
+ else
+ gran_munpacktime = MSGUNPACKTIME;
+ break;
+
+ case 'f':
+ if (rts_argv[arg][3] != '\0')
+ gran_fetchtime = decode(rts_argv[arg]+3);
+ else
+ gran_fetchtime = FETCHTIME;
+ break;
+
+ case 'n':
+ if (rts_argv[arg][3] != '\0')
+ gran_gunblocktime = decode(rts_argv[arg]+3);
+ else
+ gran_gunblocktime = GLOBALUNBLOCKTIME;
+ break;
+
+ case 'u':
+ if (rts_argv[arg][3] != '\0')
+ gran_lunblocktime = decode(rts_argv[arg]+3);
+ else
+ gran_lunblocktime = LOCALUNBLOCKTIME;
+ break;
+
+ /* Thread-related metrics */
+ case 't':
+ if (rts_argv[arg][3] != '\0')
+ gran_threadcreatetime = decode(rts_argv[arg]+3);
+ else
+ gran_threadcreatetime = THREADCREATETIME;
+ break;
+
+ case 'q':
+ if (rts_argv[arg][3] != '\0')
+ gran_threadqueuetime = decode(rts_argv[arg]+3);
+ else
+ gran_threadqueuetime = THREADQUEUETIME;
+ break;
+
+ case 'c':
+ if (rts_argv[arg][3] != '\0')
+ gran_threadscheduletime = decode(rts_argv[arg]+3);
+ else
+ gran_threadscheduletime = THREADSCHEDULETIME;
+
+ gran_threadcontextswitchtime = gran_threadscheduletime
+ + gran_threaddescheduletime;
+ break;
+
+ case 'd':
+ if (rts_argv[arg][3] != '\0')
+ gran_threaddescheduletime = decode(rts_argv[arg]+3);
+ else
+ gran_threaddescheduletime = THREADDESCHEDULETIME;
+
+ gran_threadcontextswitchtime = gran_threadscheduletime
+ + gran_threaddescheduletime;
+ break;
+
+ /* Instruction Cost Metrics */
+ case 'A':
+ if (rts_argv[arg][3] != '\0')
+ gran_arith_cost = decode(rts_argv[arg]+3);
+ else
+ gran_arith_cost = ARITH_COST;
+ break;
+
+ case 'F':
+ if (rts_argv[arg][3] != '\0')
+ gran_float_cost = decode(rts_argv[arg]+3);
+ else
+ gran_float_cost = FLOAT_COST;
+ break;
+
+ case 'B':
+ if (rts_argv[arg][3] != '\0')
+ gran_branch_cost = decode(rts_argv[arg]+3);
+ else
+ gran_branch_cost = BRANCH_COST;
+ break;
+
+ case 'L':
+ if (rts_argv[arg][3] != '\0')
+ gran_load_cost = decode(rts_argv[arg]+3);
+ else
+ gran_load_cost = LOAD_COST;
+ break;
+
+ case 'S':
+ if (rts_argv[arg][3] != '\0')
+ gran_store_cost = decode(rts_argv[arg]+3);
+ else
+ gran_store_cost = STORE_COST;
+ break;
+
+ case 'H':
+ if (rts_argv[arg][3] != '\0')
+ gran_heapalloc_cost = decode(rts_argv[arg]+3);
+ else
+ gran_heapalloc_cost = 0;
+ break;
+
+ case 'y':
+ if (rts_argv[arg][3] != '\0')
+ FetchStrategy = decode(rts_argv[arg]+3);
+ else
+ FetchStrategy = 4; /* default: fetch everything */
+ break;
+
+ /* General Parameters */
+ case 'p':
+ if (rts_argv[arg][3] != '\0')
+ {
+ max_proc = decode(rts_argv[arg]+3);
+ if(max_proc > MAX_PROC || max_proc < 1)
+ {
+ fprintf(stderr,"setupRtsFlags: no more than %u processors allowed\n", MAX_PROC);
+ error = 1;
+ }
+ }
+ else
+ max_proc = MAX_PROC;
+ break;
+
+ case 'C':
+ ++DoAlwaysCreateThreads;
+ ++DoThreadMigration;
+ break;
+
+ case 'G':
+ ++DoGUMMFetching;
+ break;
+
+ case 'M':
+ ++DoThreadMigration;
+ break;
+
+ case 'R':
+ ++DoFairSchedule;
+ break;
+
+ case 'T':
+ ++DoStealThreadsFirst;
+ ++DoThreadMigration;
+ break;
+
+ case 'Z':
+ ++DoReScheduleOnFetch;
+ break;
+
+ case 'z':
+ ++SimplifiedFetch;
+ break;
+
+ case 'N':
+ ++PreferSparksOfLocalNodes;
+ break;
+
+ case 'b':
+ ++do_gr_binary;
+ break;
+
+ case 'P':
+ ++do_gr_profile;
+ break;
+
+ case 's':
+ ++do_sp_profile;
+ break;
+
+ case '-':
+ switch(rts_argv[arg][3]) {
+
+ case 'C':
+ DoAlwaysCreateThreads=0;
+ DoThreadMigration=0;
+ break;
+
+ case 'G':
+ DoGUMMFetching=0;
+ break;
+
+ case 'M':
+ DoThreadMigration=0;
+ break;
+
+ case 'R':
+ DoFairSchedule=0;
+ break;
+
+ case 'T':
+ DoStealThreadsFirst=0;
+ DoThreadMigration=0;
+ break;
+
+ case 'Z':
+ DoReScheduleOnFetch=0;
+ break;
+
+ case 'N':
+ PreferSparksOfLocalNodes=0;
+ break;
+
+ case 'P':
+ do_gr_profile=0;
+ no_gr_profile=1;
+ break;
+
+ case 's':
+ do_sp_profile=0;
+ break;
+
+ case 'b':
+ do_gr_binary=0;
+ break;
+
+ default:
+ badoption( rts_argv[arg] );
+ break;
+ }
+ break;
+
+# if defined(GRAN_CHECK) && defined(GRAN)
+ case 'D':
+ switch(rts_argv[arg][3]) {
+ case 'e': /* event trace */
+ fprintf(stderr,"Printing event trace.\n");
+ ++event_trace;
+ break;
+
+ case 'f':
+ fprintf(stderr,"Printing forwarding of FETCHNODES.\n");
+ debug |= 0x2; /* print fwd messages */
+ break;
+
+ case 'z':
+ fprintf(stderr,"Check for blocked on fetch.\n");
+ debug |= 0x4; /* debug non-reschedule-on-fetch */
+ break;
+
+ case 't':
+ fprintf(stderr,"Check for TSO asleep on fetch.\n");
+ debug |= 0x10; /* debug TSO asleep for fetch */
+ break;
+
+ case 'E':
+ fprintf(stderr,"Printing event statistics.\n");
+ debug |= 0x20; /* print event statistics */
+ break;
+
+ case 'F':
+ fprintf(stderr,"Prohibiting forward.\n");
+ NoForward = 1; /* prohibit forwarding */
+ break;
+
+ case 'm':
+ fprintf(stderr,"Printing fetch misses.\n");
+ PrintFetchMisses = 1; /* prohibit forwarding */
+ break;
+
+ case 'd':
+ fprintf(stderr,"Debug mode.\n");
+ debug |= 0x40;
+ break;
+
+ case 'D':
+ fprintf(stderr,"Severe debug mode.\n");
+ debug |= 0x80;
+ break;
+
+ case '\0':
+ debug = 1;
+ break;
+
+ default:
+ badoption( rts_argv[arg] );
+ break;
+ }
+ break;
+# endif
+ default:
+ badoption( rts_argv[arg] );
+ break;
+ }
+ }
+ do_gr_sim++;
+ contextSwitchTime = 0;
+ break;
+# endif
+ case 'J':
+ case 'Q':
+ case 'D':
+ case 'R':
+ case 'L':
+ case 'O':
+ fprintf(stderr, "setupRtsFlags: Not built for parallel execution: %s\n", rts_argv[arg]);
+ error = 1;
+# endif /* PAR */
+#else /* CONCURRENT */
+ case 't':
+ fprintf(stderr, "setupRtsFlags: Not built for threaded execution: %s\n", rts_argv[arg]);
+ error = 1;
+
+#endif /* CONCURRENT */
+ case 'H': /* SM options -- ignore */
+ case 'A':
+ case 'G':
+ case 'F':
+ case 'K':
+ case 'M':
+ case 'B':
+ case 'T':
+#ifdef GCdu
+ case 'u': /* set dual mode threshold */
+#endif
+ break;
+
+ default: /* Unknown option ! */
+ fprintf(stderr, "setupRtsFlags: Unknown RTS option: %s\n", rts_argv[arg]);
+ error = 1;
+ break;
+ }
+ }
+ else {
+ fflush(stdout);
+ fprintf(stderr, "setupRtsFlags: Unexpected RTS argument: %s\n",
+ rts_argv[arg]);
+ error = 1;
+ }
+ }
+ if (error == 1) {
+ char **p;
+ fflush(stdout);
+ for (p = flagtext; *p; p++)
+ fprintf(stderr, "%s\n", *p);
+ EXIT(EXIT_FAILURE);
+ }
+}
+\end{code}
+
+Sets up and returns a string indicating the date/time of the run.
+Successive calls simply return the same string again. Initially
+called by @main.lc@ to initialise the string at the start of the run.
+Only used for profiling.
+
+\begin{code}
+#if defined(USE_COST_CENTRES) || defined(CONCURRENT)
+# include <time.h>
+
+char *
+time_str(STG_NO_ARGS)
+{
+ static time_t now = 0;
+ static char nowstr[26];
+
+ if (now == 0) {
+ time(&now);
+ strcpy(nowstr, ctime(&now));
+ strcpy(nowstr+16,nowstr+19);
+ nowstr[21] = '\0';
+ }
+ return nowstr;
+}
+#endif /* profiling */
+\end{code}
+
+ToDo: Will this work under threads?
+
+\begin{code}
+StgStablePtr errorHandler = -1;
+
+StgInt getErrorHandler()
+{
+ return (StgInt) errorHandler;
+}
+
+#ifndef PAR
+
+void raiseError( handler )
+StgStablePtr handler;
+{
+ if (handler == -1) {
+ shutdownHaskell();
+ } else {
+ TopClosure = deRefStablePointer( handler );
+ longjmp(restart_main,1);
+ }
+}
+\end{code}
+
+\begin{code}
+StgInt
+catchError( newErrorHandler )
+StgStablePtr newErrorHandler;
+{
+ StgStablePtr oldErrorHandler = errorHandler;
+ errorHandler = newErrorHandler;
+ return oldErrorHandler;
+}
+
+#endif
+\end{code}
+
+If we have installed an error handler, we might want to
+indicate that we have successfully recovered from an error by
+decrementing the counter.
+
+\begin{code}
+void
+decrementErrorCount()
+{
+ ErrorIO_call_count-=1;
+}
+
+\end{code}