diff options
Diffstat (limited to 'ghc/runtime/main')
-rw-r--r-- | ghc/runtime/main/GranSim.lc | 595 | ||||
-rw-r--r-- | ghc/runtime/main/Itimer.lc | 84 | ||||
-rw-r--r-- | ghc/runtime/main/RednCounts.lc | 682 | ||||
-rw-r--r-- | ghc/runtime/main/SMRep.lc | 204 | ||||
-rw-r--r-- | ghc/runtime/main/Select.lc | 123 | ||||
-rw-r--r-- | ghc/runtime/main/Signals.lc | 588 | ||||
-rw-r--r-- | ghc/runtime/main/StgOverflow.lc | 450 | ||||
-rw-r--r-- | ghc/runtime/main/StgStartup.lhc | 662 | ||||
-rw-r--r-- | ghc/runtime/main/StgThreads.lhc | 496 | ||||
-rw-r--r-- | ghc/runtime/main/StgTrace.lc | 74 | ||||
-rw-r--r-- | ghc/runtime/main/StgUpdate.lhc | 730 | ||||
-rw-r--r-- | ghc/runtime/main/Threads.lc | 3749 | ||||
-rw-r--r-- | ghc/runtime/main/TopClosure.lc | 8 | ||||
-rw-r--r-- | ghc/runtime/main/TopClosure13.lc | 8 | ||||
-rw-r--r-- | ghc/runtime/main/main.lc | 1355 |
15 files changed, 9808 insertions, 0 deletions
diff --git a/ghc/runtime/main/GranSim.lc b/ghc/runtime/main/GranSim.lc new file mode 100644 index 0000000000..de603ddbc2 --- /dev/null +++ b/ghc/runtime/main/GranSim.lc @@ -0,0 +1,595 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1995 +% +%************************************************************************ +%* * +\section[GranSim.lc]{Granularity Simulator Routines} +%* * +%************************************************************************ + +Macros for dealing with the new and improved GA field for simulating +parallel execution. Based on @CONCURRENT@ package. The GA field now +contains a mask, where the n-th bit stands for the n-th processor, +where this data can be found. In case of multiple copies, several bits +are set. The total number of processors is bounded by @MAX_PROC@, +which should be <= the length of a word in bits. -- HWL + +\begin{code} +#if defined(GRAN) || defined(PAR) + +#define NON_POSIX_SOURCE /* gettimeofday */ + +#include "rtsdefs.h" + + +#ifdef HAVE_GETCLOCK + +#ifdef HAVE_SYS_TIMERS_H +#define POSIX_4D9 1 +#include <sys/timers.h> +#endif + +#else +#ifdef HAVE_GETTIMEOFDAY + +#ifdef HAVE_SYS_TIME_H +#include <sys/time.h> +#endif + +#else + +#ifdef HAVE_TIME_H +#include <time.h> +#endif + +#endif +#endif + +void grputw PROTO((TIME v)); + +#if defined(GRAN) +/* Pointer to the event queue; events are currently malloc'ed */ +static eventq EventHd = NULL; + +PROC +ga_to_proc(W_ ga) +{ + PROC i; + + for (i = 0; i < MAX_PROC && !IS_LOCAL_TO(ga, i); i++); + + return (i); +} + +/* NB: This takes a *node* rather than just a ga as input */ +PROC +where_is(P_ node) +{ return (ga_to_proc(PROCS(node))); } /* Access the GA field of the node */ + +#if 0 +PROC +no_of_copies(W_ ga) /* DaH lo'lu'Qo'; currently unused */ +{ + PROC i, n; + + for (i = 0, n = 0; i < MAX_PROC; i++) + if (IS_LOCAL_TO(ga, i)) + n++;; + + return (n); +} +#endif + +eventq +getnextevent() +{ + static eventq entry = NULL; + + if(EventHd == NULL) + { + fprintf(stderr,"No next event\n"); + exit(EXIT_FAILURE); /* why not EXIT??? WDP 95/07 */ + } + + if(entry != NULL) + free((char *)entry); + +#if defined(GRAN_CHECK) && defined(GRAN) + if (debug & 0x20) { /* count events */ + noOfEvents++; + event_counts[(EVENT_TYPE(EventHd)>=CONTINUETHREAD1) ? + CONTINUETHREAD : + EVENT_TYPE(EventHd)]++; + } +#endif + + entry = EventHd; + EventHd = EVENT_NEXT(EventHd); + return(entry); +} + +/* ToDo: replace malloc/free with a free list */ + +/* NB: newevent unused (WDP 95/07) */ + +static +newevent(proc,creator,time,evttype,tso,node,spark) + PROC proc, creator; + TIME time; + EVTTYPE evttype; + P_ tso, node; + sparkq spark; +{ + eventq newentry = (eventq) xmalloc(sizeof(struct event)); + + EVENT_PROC(newentry) = proc; + EVENT_CREATOR(newentry) = creator; + EVENT_TIME(newentry) = time; + EVENT_TYPE(newentry) = evttype; + EVENT_TSO(newentry) = tso; + EVENT_NODE(newentry) = node; + EVENT_SPARK(newentry) = spark; + EVENT_NEXT(newentry) = NULL; + + insert_event(newentry); +} + +#endif /* GRAN ; HWL */ +\end{code} + +%**************************************************************************** +% +\subsection[GrAnSim-profile]{Writing profiling info for GrAnSim} +% +%**************************************************************************** + +Event dumping routines. + +\begin{code} + +FILE *gr_file = NULL; + +char *gran_event_names[] = { + "START", "START(Q)", + "STEALING", "STOLEN", "STOLEN(Q)", + "FETCH", "REPLY", "BLOCK", "RESUME", "RESUME(Q)", + "SCHEDULE", "DESCHEDULE", + "END", + "SPARK", "SPARKAT", "USED", "PRUNED", "EXPORTED", "ACQUIRED", + "TERMINATE", + "??" +}; + +/* + * If you're not using GNUC and you're on a 32-bit machine, you're + * probably out of luck here. However, since CONCURRENT currently + * requires GNUC, I'm not too worried about it. --JSM + */ + +#if !defined(GRAN) + +static ullong startTime = 0; + +ullong +msTime(STG_NO_ARGS) +{ +# ifdef HAVE_GETCLOCK + struct timespec tv; + + if (getclock(TIMEOFDAY, &tv) != 0) { + fflush(stdout); + fprintf(stderr, "Clock failed\n"); + EXIT(EXIT_FAILURE); + } + return tv.tv_sec * LL(1000) + tv.tv_nsec / LL(1000000) - startTime; +# else +# ifdef HAVE_GETTIMEOFDAY + struct timeval tv; + + if (gettimeofday(&tv, NULL) != 0) { + fflush(stdout); + fprintf(stderr, "Clock failed\n"); + EXIT(EXIT_FAILURE); + } + return tv.tv_sec * LL(1000) + tv.tv_usec / LL(1000) - startTime; +# else + time_t t; + if ((t = time(NULL)) == (time_t) -1) { + fflush(stdout); + fprintf(stderr, "Clock failed\n"); + EXIT(EXIT_FAILURE); + } + return t * LL(1000); +# endif +# endif +} + +#endif /* !GRAN */ + + +void +DumpGranEvent(name, tso) +enum gran_event_types name; +P_ tso; +{ + DumpRawGranEvent(CURRENT_PROC, name, TSO_ID(tso)); +} + +void +DumpSparkGranEvent(name, id) +enum gran_event_types name; +W_ id; +{ + DumpRawGranEvent(CURRENT_PROC, name, id); +} + +void +DumpGranEventAndNode(name, tso, node, proc) +enum gran_event_types name; +P_ tso, node; +PROC proc; +{ + PROC pe = CURRENT_PROC; + W_ id; + + char time_string[500]; /*ToDo: kill magic constant */ + ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/); + +#ifdef PAR + id = tso == NULL ? -1 : TSO_ID(tso); +#else + id = TSO_ID(tso); +#endif + if (name > GR_EVENT_MAX) + name = GR_EVENT_MAX; + + if (do_gr_binary) { + grputw(name); + grputw(pe); + abort(); /* die please: a single word doesn't represent long long times */ + grputw(CURRENT_TIME); /* this line is bound to do the wrong thing */ + grputw(id); + } else + fprintf(gr_file, "PE %2u [%s]: %s %lx \t0x%lx\t(from %2u)\n", + pe, time_string, gran_event_names[name], id, (W_) node, proc); +} + +void +DumpRawGranEvent(pe, name, id) +PROC pe; +enum gran_event_types name; +W_ id; +{ + char time_string[500]; /* ToDo: kill magic constant */ + + if (name > GR_EVENT_MAX) + name = GR_EVENT_MAX; + + ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/); + + if (do_gr_binary) { + grputw(name); + grputw(pe); + abort(); /* die please: a single word doesn't represent long long times */ + grputw(CURRENT_TIME); /* this line is bound to fail */ + grputw(id); + } else + fprintf(gr_file, "PE %2u [%s]: %s %lx\n", + pe, time_string, gran_event_names[name], id); +} + +void +DumpGranInfo(pe, tso, mandatory_thread) +PROC pe; +P_ tso; +rtsBool mandatory_thread; +{ + char time_string[500]; /* ToDo: kill magic constant */ + ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/); + + if (do_gr_binary) { + grputw(GR_END); + grputw(pe); + abort(); /* die please: a single word doesn't represent long long times */ + grputw(CURRENT_TIME); /* this line is bound to fail */ + grputw(TSO_ID(tso)); +#ifdef PAR + grputw(0); + grputw(0); + grputw(0); + grputw(0); + grputw(0); + grputw(0); + grputw(0); + grputw(0); + grputw(0); + grputw(0); + grputw(0); + grputw(0); +#else + grputw(TSO_SPARKNAME(tso)); + grputw(TSO_STARTEDAT(tso)); + grputw(TSO_EXPORTED(tso)); + grputw(TSO_BASICBLOCKS(tso)); + grputw(TSO_ALLOCS(tso)); + grputw(TSO_EXECTIME(tso)); + grputw(TSO_BLOCKTIME(tso)); + grputw(TSO_BLOCKCOUNT(tso)); + grputw(TSO_FETCHTIME(tso)); + grputw(TSO_FETCHCOUNT(tso)); + grputw(TSO_LOCALSPARKS(tso)); + grputw(TSO_GLOBALSPARKS(tso)); +#endif + grputw(mandatory_thread); + } else { + + /* + * NB: DumpGranEvent cannot be used because PE may be wrong (as well as the + * extra info) + */ + fprintf(gr_file, "PE %2u [%s]: END %lx, SN %lu, ST %lu, EXP %c, BB %lu, HA %lu, RT %lu, BT %lu (%lu), FT %lu (%lu), LS %lu, GS %lu, MY %c\n" + ,pe + ,time_string + ,TSO_ID(tso) + ,TSO_SPARKNAME(tso) + ,TSO_STARTEDAT(tso) + ,TSO_EXPORTED(tso) ? 'T' : 'F' + ,TSO_BASICBLOCKS(tso) + ,TSO_ALLOCS(tso) + ,TSO_EXECTIME(tso) + ,TSO_BLOCKTIME(tso) + ,TSO_BLOCKCOUNT(tso) + ,TSO_FETCHTIME(tso) + ,TSO_FETCHCOUNT(tso) + ,TSO_LOCALSPARKS(tso) + ,TSO_GLOBALSPARKS(tso) + ,mandatory_thread ? 'T' : 'F' + ); + } +} + +/* + Output a terminate event and an 8-byte time. +*/ + +void +grterminate(v) +TIME v; +{ + DumpGranEvent(GR_TERMINATE, 0); + + if (sizeof(TIME) == 4) { + putc('\0', gr_file); + putc('\0', gr_file); + putc('\0', gr_file); + putc('\0', gr_file); + } else { + putc(v >> 56l, gr_file); + putc((v >> 48l) & 0xffl, gr_file); + putc((v >> 40l) & 0xffl, gr_file); + putc((v >> 32l) & 0xffl, gr_file); + } + putc((v >> 24l) & 0xffl, gr_file); + putc((v >> 16l) & 0xffl, gr_file); + putc((v >> 8l) & 0xffl, gr_file); + putc(v & 0xffl, gr_file); +} + +/* + Length-coded output: first 3 bits contain length coding + + 00x 1 byte + 01x 2 bytes + 10x 4 bytes + 110 8 bytes + 111 5 or 9 bytes +*/ + +void +grputw(v) +TIME v; +{ + if (v <= 0x3fl) { + fputc(v & 0x3f, gr_file); + } else if (v <= 0x3fffl) { + fputc((v >> 8l) | 0x40l, gr_file); + fputc(v & 0xffl, gr_file); + } else if (v <= 0x3fffffffl) { + fputc((v >> 24l) | 0x80l, gr_file); + fputc((v >> 16l) & 0xffl, gr_file); + fputc((v >> 8l) & 0xffl, gr_file); + fputc(v & 0xffl, gr_file); + } else if (sizeof(TIME) == 4) { + fputc(0x70, gr_file); + fputc((v >> 24l) & 0xffl, gr_file); + fputc((v >> 16l) & 0xffl, gr_file); + fputc((v >> 8l) & 0xffl, gr_file); + fputc(v & 0xffl, gr_file); + } else { + if (v <= 0x3fffffffffffffl) + putc((v >> 56l) | 0x60l, gr_file); + else { + putc(0x70, gr_file); + putc((v >> 56l) & 0xffl, gr_file); + } + + putc((v >> 48l) & 0xffl, gr_file); + putc((v >> 40l) & 0xffl, gr_file); + putc((v >> 32l) & 0xffl, gr_file); + putc((v >> 24l) & 0xffl, gr_file); + putc((v >> 16l) & 0xffl, gr_file); + putc((v >> 8l) & 0xffl, gr_file); + putc(v & 0xffl, gr_file); + } +} + +\end{code} + +%**************************************************************************** +% +\subsection[gr-simulation]{Granularity Simulation} +% +%**************************************************************************** + +\begin{code} +#ifdef GRAN +char gr_filename[32]; /*ToDo: magic short filename constant????? WDP 95/07 */ +I_ do_gr_sim = 0; + +int +init_gr_simulation(rts_argc, rts_argv, prog_argc, prog_argv) +char *prog_argv[], *rts_argv[]; +int prog_argc, rts_argc; +{ + I_ i; + + if (do_gr_sim) { + char *extension = do_gr_binary ? "gb" : "gr"; + + sprintf(gr_filename, "%0.28s.%0.2s", prog_argv[0], extension); + + if ((gr_file = fopen(gr_filename, "w")) == NULL) { + fprintf(stderr, "Can't open granularity simulation report file %s\n", gr_filename); + exit(EXIT_FAILURE); /* why not EXIT??? WDP 95/07 */ + } +#if defined(GRAN_CHECK) && defined(GRAN) + if (DoReScheduleOnFetch) + setbuf(gr_file, NULL); +#endif + + fputs("Granularity Simulation for ", gr_file); + for (i = 0; i < prog_argc; ++i) { + fputs(prog_argv[i], gr_file); + fputc(' ', gr_file); + } + + if (rts_argc > 0) { + fputs("+RTS ", gr_file); + + for (i = 0; i < rts_argc; ++i) { + fputs(rts_argv[i], gr_file); + fputc(' ', gr_file); + } + } + fputs("\n\n--------------------\n\n", gr_file); + + fputs("General Parameters:\n\n", gr_file); + + fprintf(gr_file, "PEs %u, %s Scheduler, %sMigrate Threads%s ????? %s\n", + max_proc, DoFairSchedule ? "Fair" : "Unfair", + DoThreadMigration ? "" : "Don't ", + DoThreadMigration && DoStealThreadsFirst ? " Before Sparks" : "", + DoReScheduleOnFetch ? "" : "Don't "); + + fprintf(gr_file, "%s, Fetch %s in Each Packet\n", + SimplifiedFetch ? "Simplified Fetch" : (DoReScheduleOnFetch ? "Reschedule on Fetch" : "Block on Fetch"), + DoGUMMFetching ? "Many Closures" : "Exactly One Closure"); + fprintf(gr_file, "Fetch Strategy(%lu): If outstanding fetches %s\n", + FetchStrategy, + FetchStrategy == 1 ? "only run runnable threads (don't create new ones" : + FetchStrategy == 2 ? "create threads only from local sparks" : + FetchStrategy == 3 ? "create threads from local or global sparks" : + FetchStrategy == 4 ? "create sparks and steal threads if necessary" : + "unknown"); + + fprintf(gr_file, "Thread Creation Time %lu, Thread Queue Time %lu\n", + gran_threadcreatetime, gran_threadqueuetime); + fprintf(gr_file, "Thread DeSchedule Time %lu, Thread Schedule Time %lu\n", + gran_threaddescheduletime, gran_threadscheduletime); + fprintf(gr_file, "Thread Context-Switch Time %lu\n", + gran_threadcontextswitchtime); + fputs("\n\n--------------------\n\n", gr_file); + + fputs("Communication Metrics:\n\n", gr_file); + fprintf(gr_file, + "Latency %lu (1st) %lu (rest), Fetch %lu, Notify %lu (Global) %lu (Local)\n", + gran_latency, gran_additional_latency, gran_fetchtime, + gran_gunblocktime, gran_lunblocktime); + fprintf(gr_file, + "Message Creation %lu (+ %lu after send), Message Read %lu\n", + gran_mpacktime, gran_mtidytime, gran_munpacktime); + fputs("\n\n--------------------\n\n", gr_file); + + fputs("Instruction Metrics:\n\n", gr_file); + fprintf(gr_file, "Arith %lu, Branch %lu, Load %lu, Store %lu, Float %lu, Alloc %lu\n", + gran_arith_cost, gran_branch_cost, + gran_load_cost, gran_store_cost, gran_float_cost, gran_heapalloc_cost); + fputs("\n\n++++++++++++++++++++\n\n", gr_file); + } + if (do_gr_binary) + grputw(sizeof(TIME)); + + Idlers = max_proc; + return (0); +} + +void +end_gr_simulation(STG_NO_ARGS) +{ + if (do_gr_sim) { + fprintf(stderr, "The simulation is finished. Look at %s for details.\n", + gr_filename); + fclose(gr_file); + } +} + +#endif /* GRAN */ + +#ifdef PAR +char gr_filename[50]; /*ToDo: (small) magic constant alert!!!! WDP 95/07 */ + +I_ do_gr_profile = 0; +I_ do_sp_profile = 0; +I_ do_gr_binary = 0; + +void +init_gr_profiling(rts_argc, rts_argv, prog_argc, prog_argv) +char *prog_argv[], *rts_argv[]; +int prog_argc, rts_argc; +{ + int i; + + char *extension = do_gr_binary ? "gb" : "gr"; + + sprintf(gr_filename, "%0.28s.%03d.%0.2s", prog_argv[0], thisPE, extension); + + if ((gr_file = fopen(gr_filename, "w")) == NULL) { + fprintf(stderr, "Can't open activity report file %s\n", gr_filename); + EXIT(EXIT_FAILURE); + } + + for (i = 0; i < prog_argc; ++i) { + fputs(prog_argv[i], gr_file); + fputc(' ', gr_file); + } + + if (rts_argc > 0) { + fputs("+RTS ", gr_file); + + for (i = 0; i < rts_argc; ++i) { + fputs(rts_argv[i], gr_file); + fputc(' ', gr_file); + } + } + fputc('\n', gr_file); + + startTime = CURRENT_TIME; + + if (startTime > LL(1000000000)) { + /* This shouldn't overflow twice */ + fprintf(gr_file, "PE %2u [%lu%lu]: TIME\n", thisPE, + (TIME) (startTime / LL(1000000000)), + (TIME) (startTime % LL(1000000000))); + } else { + fprintf(gr_file, "PE %2u [%lu]: TIME\n", thisPE, (TIME) startTime); + } + + if (do_gr_binary) + grputw(sizeof(TIME)); +} +#endif /* PAR */ + +#endif /* GRAN || PAR */ +\end{code} + + diff --git a/ghc/runtime/main/Itimer.lc b/ghc/runtime/main/Itimer.lc new file mode 100644 index 0000000000..87c146064a --- /dev/null +++ b/ghc/runtime/main/Itimer.lc @@ -0,0 +1,84 @@ +% +% (c) The AQUA Project, Glasgow University, 1995 +% +%************************************************************************ +%* * +\section[Itimer.lc]{Interval Timer} +%* * +%************************************************************************ + +The interval timer is used for profiling and for context switching in the +threaded build. Though POSIX 1003.4 includes a standard interface for +such things, no one really seems to be implementing them yet. Even +Solaris 2.3 only seems to provide support for @CLOCK_REAL@, whereas we're +keen on getting access to @CLOCK_VIRTUAL@. + +Hence, we use the old-fashioned @setitimer@ that just about everyone seems +to support. So much for standards. + +\begin{code} + +#if defined(USE_COST_CENTRES) || defined(CONCURRENT) + +# include "platform.h" + +# define NON_POSIX_SOURCE + +# include "rtsdefs.h" + +/* As recommended in the autoconf manual */ +# ifdef TIME_WITH_SYS_TIME +# include <sys/time.h> +# include <time.h> +# else +# ifdef HAVE_SYS_TIME_H +# include <sys/time.h> +# else +# include <time.h> +# endif +# endif + +int +initialize_virtual_timer(ms) +int ms; +{ +# ifndef HAVE_SETITIMER + fprintf(stderr, "No virtual timer on this system\n"); + return -1; +# else + struct itimerval it; + + it.it_value.tv_sec = ms / 1000; + it.it_value.tv_usec = 1000 * (ms - (1000 * it.it_value.tv_sec)); + it.it_interval = it.it_value; + return (setitimer(ITIMER_VIRTUAL, &it, NULL)); +# endif +} + +# if 0 +/* This is a potential POSIX version */ +int +initialize_virtual_timer(ms) +int ms; +{ + struct sigevent se; + struct itimerspec it; + timer_t tid; + + se.sigev_notify = SIGEV_SIGNAL; + se.sigev_signo = SIGVTALRM; + se.sigev_value.sival_int = SIGVTALRM; + if (timer_create(CLOCK_VIRTUAL, &se, &tid)) { + fprintf(stderr, "Can't create virtual timer.\n"); + EXIT(EXIT_FAILURE); + } + it.it_value.tv_sec = ms / 1000; + it.it_value.tv_nsec = 1000000 * (ms - 1000 * it.it_value.tv_sec); + it.it_interval = it.it_value; + timer_settime(tid, TIMER_RELTIME, &it, NULL); +} +# endif + +#endif /* USE_COST_CENTRES || CONCURRENT */ + +\end{code} diff --git a/ghc/runtime/main/RednCounts.lc b/ghc/runtime/main/RednCounts.lc new file mode 100644 index 0000000000..142dc8423c --- /dev/null +++ b/ghc/runtime/main/RednCounts.lc @@ -0,0 +1,682 @@ +% +% (c) The GRASP Project, Glasgow University, 1992-1993 +% +%************************************************************************ +%* * +\section[RednCounts.lc]{Stuff for ``ticky-ticky'' profiling} +%* * +%************************************************************************ + +Goes with \tr{imports/RednCounts.lh}; more documentation there. + +%************************************************************************ +%* * +\subsection[RednCounts-counters]{Declare all the counters} +%* * +%************************************************************************ + +\begin{code} +#define NULL_REG_MAP /* Not threaded */ + +#include "../storage/SMinternal.h" /* Bad boy, Will (ToDo) */ + +#if defined(DO_REDN_COUNTING) + +extern FILE *tickyfile; + +I_ ALLOC_HEAP_ctr = 0; +I_ ALLOC_HEAP_tot = 0; + +PP_ max_SpA; /* set in re_enterable_part_of_main */ +P_ max_SpB; + +/* not used at all +I_ A_STK_REUSE_ctr = 0; +I_ B_STK_REUSE_ctr = 0; +*/ +I_ A_STK_STUB_ctr = 0; + +I_ ALLOC_FUN_ctr = 0; +I_ ALLOC_FUN_adm = 0; +I_ ALLOC_FUN_gds = 0; +I_ ALLOC_FUN_slp = 0; +I_ ALLOC_FUN_hst[5] = {0,0,0,0,0}; +I_ ALLOC_THK_ctr = 0; +I_ ALLOC_THK_adm = 0; +I_ ALLOC_THK_gds = 0; +I_ ALLOC_THK_slp = 0; +I_ ALLOC_THK_hst[5] = {0,0,0,0,0}; +I_ ALLOC_CON_ctr = 0; +I_ ALLOC_CON_adm = 0; +I_ ALLOC_CON_gds = 0; +I_ ALLOC_CON_slp = 0; +I_ ALLOC_CON_hst[5] = {0,0,0,0,0}; +I_ ALLOC_TUP_ctr = 0; +I_ ALLOC_TUP_adm = 0; +I_ ALLOC_TUP_gds = 0; +I_ ALLOC_TUP_slp = 0; +I_ ALLOC_TUP_hst[5] = {0,0,0,0,0}; +I_ ALLOC_BH_ctr = 0; +I_ ALLOC_BH_adm = 0; +I_ ALLOC_BH_gds = 0; +I_ ALLOC_BH_slp = 0; +I_ ALLOC_BH_hst[5] = {0,0,0,0,0}; +/* +I_ ALLOC_PAP_ctr = 0; +I_ ALLOC_PAP_adm = 0; +I_ ALLOC_PAP_gds = 0; +I_ ALLOC_PAP_slp = 0; +I_ ALLOC_PAP_hst[5] = {0,0,0,0,0}; +*/ +I_ ALLOC_PRIM_ctr = 0; +I_ ALLOC_PRIM_adm = 0; +I_ ALLOC_PRIM_gds = 0; +I_ ALLOC_PRIM_slp = 0; +I_ ALLOC_PRIM_hst[5] = {0,0,0,0,0}; +/* +I_ ALLOC_UPD_CON_ctr = 0; +I_ ALLOC_UPD_CON_adm = 0; +I_ ALLOC_UPD_CON_gds = 0; +I_ ALLOC_UPD_CON_slp = 0; +I_ ALLOC_UPD_CON_hst[5] = {0,0,0,0,0}; +*/ +I_ ALLOC_UPD_PAP_ctr = 0; +I_ ALLOC_UPD_PAP_adm = 0; +I_ ALLOC_UPD_PAP_gds = 0; +I_ ALLOC_UPD_PAP_slp = 0; +I_ ALLOC_UPD_PAP_hst[5] = {0,0,0,0,0}; + +#ifdef CONCURRENT +I_ ALLOC_STK_ctr = 0; +I_ ALLOC_STK_adm = 0; +I_ ALLOC_STK_gds = 0; +I_ ALLOC_STK_slp = 0; +I_ ALLOC_STK_hst[5] = {0,0,0,0,0}; +I_ ALLOC_TSO_ctr = 0; +I_ ALLOC_TSO_adm = 0; +I_ ALLOC_TSO_gds = 0; +I_ ALLOC_TSO_slp = 0; +I_ ALLOC_TSO_hst[5] = {0,0,0,0,0}; + +#ifdef PAR +I_ ALLOC_FMBQ_ctr = 0; +I_ ALLOC_FMBQ_adm = 0; +I_ ALLOC_FMBQ_gds = 0; +I_ ALLOC_FMBQ_slp = 0; +I_ ALLOC_FMBQ_hst[5] = {0,0,0,0,0}; +I_ ALLOC_FME_ctr = 0; +I_ ALLOC_FME_adm = 0; +I_ ALLOC_FME_gds = 0; +I_ ALLOC_FME_slp = 0; +I_ ALLOC_FME_hst[5] = {0,0,0,0,0}; +I_ ALLOC_BF_ctr = 0; +I_ ALLOC_BF_adm = 0; +I_ ALLOC_BF_gds = 0; +I_ ALLOC_BF_slp = 0; +I_ ALLOC_BF_hst[5] = {0,0,0,0,0}; +#endif +#endif + +I_ ENT_VIA_NODE_ctr = 0; +I_ ENT_CON_ctr = 0; +I_ ENT_FUN_STD_ctr = 0; +I_ ENT_FUN_DIRECT_ctr = 0; +I_ ENT_IND_ctr = 0; +I_ ENT_PAP_ctr = 0; +I_ ENT_THK_ctr = 0; + +I_ RET_NEW_IN_HEAP_ctr = 0; +I_ RET_NEW_IN_REGS_ctr = 0; +I_ RET_OLD_IN_HEAP_ctr = 0; +I_ RET_OLD_IN_REGS_ctr = 0; +I_ RET_SEMI_BY_DEFAULT_ctr = 0; +I_ RET_SEMI_IN_HEAP_ctr = 0; +I_ RET_SEMI_IN_REGS_ctr = 0; +I_ VEC_RETURN_ctr = 0; + +I_ ReturnInRegsNodeValid = 0; /* i.e., False */ + +I_ UPDF_OMITTED_ctr = 0; +I_ UPDF_STD_PUSHED_ctr = 0; +I_ UPDF_CON_PUSHED_ctr = 0; +I_ UPDF_HOLE_PUSHED_ctr = 0; + +I_ UPDF_RCC_PUSHED_ctr = 0; +I_ UPDF_RCC_OMITTED_ctr = 0; + +I_ UPD_EXISTING_ctr = 0; +I_ UPD_CON_W_NODE_ctr = 0; +I_ UPD_CON_IN_PLACE_ctr = 0; +I_ UPD_CON_IN_NEW_ctr = 0; +I_ UPD_PAP_IN_PLACE_ctr = 0; +I_ UPD_PAP_IN_NEW_ctr = 0; + +I_ UPD_ENTERED_ctr = 0; +I_ UPD_ENTERED_AGAIN_ctr = 0; + +I_ UPD_NEW_IND_ctr = 0; +I_ UPD_NEW_IN_PLACE_PTRS_ctr = 0; +I_ UPD_NEW_IN_PLACE_NOPTRS_ctr = 0; +I_ UPD_OLD_IND_ctr = 0; +I_ UPD_OLD_IN_PLACE_PTRS_ctr = 0; +I_ UPD_OLD_IN_PLACE_NOPTRS_ctr = 0; + +I_ UPD_IN_PLACE_COPY_ctr = 0; +\end{code} + +\begin{code} +#if 0 +/* testing only */ +void +TICKY_PARANOIA(const char *file, I_ line) +{ + I_ tot_adm_wds = /* total number of admin words allocated */ + ALLOC_FUN_adm + ALLOC_THK_adm + ALLOC_CON_adm + ALLOC_TUP_adm + + ALLOC_BH_adm /*+ ALLOC_PAP_adm*/ /*+ ALLOC_UPD_CON_adm*/ + ALLOC_UPD_PAP_adm + + ALLOC_PRIM_adm; + I_ tot_gds_wds = /* total number of words of ``good stuff'' allocated */ + ALLOC_FUN_gds + ALLOC_THK_gds + ALLOC_CON_gds + ALLOC_TUP_gds + + ALLOC_BH_gds /*+ ALLOC_PAP_gds*/ /*+ ALLOC_UPD_CON_gds*/ + ALLOC_UPD_PAP_gds + + ALLOC_PRIM_gds; + I_ tot_slp_wds = /* total number of ``slop'' words allocated */ + ALLOC_FUN_slp + ALLOC_THK_slp + ALLOC_CON_slp + ALLOC_TUP_slp + + ALLOC_BH_slp /*+ ALLOC_PAP_slp*/ /*+ ALLOC_UPD_CON_slp*/ + ALLOC_UPD_PAP_slp + + ALLOC_PRIM_slp; + I_ tot_wds = /* total words */ + tot_adm_wds + tot_gds_wds + tot_slp_wds; + if (ALLOC_HEAP_tot != tot_wds) { + fprintf(stderr, "Eek! %ld != %ld, %s, %d\n",ALLOC_HEAP_tot, tot_wds, file, line); + } else { + fprintf(stderr, "OK. %ld != %ld, %s, %d\n",ALLOC_HEAP_tot, tot_wds, file, line); + } +} +#endif /* 0 */ +\end{code} + +%************************************************************************ +%* * +\subsection[RednCounts-print]{Print out all the counters} +%* * +%************************************************************************ + +\begin{code} +extern void printRegisteredCounterInfo (STG_NO_ARGS); /* fwd decl */ + +#define INTAVG(a,b) ((b == 0) ? 0.0 : ((StgDouble) (a) / (StgDouble) (b))) +#define PC(a) (100.0 * a) + +#define AVG(thing) \ + StgDouble CAT2(avg,thing) = INTAVG(CAT2(tot,thing),CAT2(ctr,thing)) + +void +PrintRednCountInfo() +{ + I_ tot_allocs = /* total number of things allocated */ + ALLOC_FUN_ctr + ALLOC_THK_ctr + ALLOC_CON_ctr + ALLOC_TUP_ctr + +#ifdef CONCURRENT + ALLOC_STK_ctr + ALLOC_TSO_ctr + +#ifdef PAR + ALLOC_FMBQ_ctr + ALLOC_FME_ctr + ALLOC_BF_ctr + +#endif +#endif + ALLOC_BH_ctr /*+ ALLOC_PAP_ctr*/ /*+ ALLOC_UPD_CON_ctr*/ + ALLOC_UPD_PAP_ctr + + ALLOC_PRIM_ctr; + I_ tot_adm_wds = /* total number of admin words allocated */ + ALLOC_FUN_adm + ALLOC_THK_adm + ALLOC_CON_adm + ALLOC_TUP_adm + +#ifdef CONCURRENT + ALLOC_STK_adm + ALLOC_TSO_adm + +#ifdef PAR + ALLOC_FMBQ_adm + ALLOC_FME_adm + ALLOC_BF_adm + +#endif +#endif + ALLOC_BH_adm /*+ ALLOC_PAP_adm*/ /*+ ALLOC_UPD_CON_adm*/ + ALLOC_UPD_PAP_adm + + ALLOC_PRIM_adm; + I_ tot_gds_wds = /* total number of words of ``good stuff'' allocated */ + ALLOC_FUN_gds + ALLOC_THK_gds + ALLOC_CON_gds + ALLOC_TUP_gds + +#ifdef CONCURRENT + ALLOC_STK_gds + ALLOC_TSO_gds + +#ifdef PAR + ALLOC_FMBQ_gds + ALLOC_FME_gds + ALLOC_BF_gds + +#endif +#endif + ALLOC_BH_gds /*+ ALLOC_PAP_gds*/ /*+ ALLOC_UPD_CON_gds*/ + ALLOC_UPD_PAP_gds + + ALLOC_PRIM_gds; + I_ tot_slp_wds = /* total number of ``slop'' words allocated */ + ALLOC_FUN_slp + ALLOC_THK_slp + ALLOC_CON_slp + ALLOC_TUP_slp + +#ifdef CONCURRENT + ALLOC_STK_slp + ALLOC_TSO_slp + +#ifdef PAR + ALLOC_FMBQ_slp + ALLOC_FME_slp + ALLOC_BF_slp + +#endif +#endif + ALLOC_BH_slp /*+ ALLOC_PAP_slp*/ /*+ ALLOC_UPD_CON_slp*/ + ALLOC_UPD_PAP_slp + + ALLOC_PRIM_slp; + I_ tot_wds = /* total words */ + tot_adm_wds + tot_gds_wds + tot_slp_wds; + + I_ tot_enters = + ENT_CON_ctr + ENT_FUN_DIRECT_ctr + + ENT_IND_ctr + ENT_PAP_ctr + ENT_THK_ctr; + I_ jump_direct_enters = + tot_enters - ENT_VIA_NODE_ctr; + I_ bypass_enters = + ENT_FUN_DIRECT_ctr - + (ENT_FUN_STD_ctr - UPD_PAP_IN_PLACE_ctr - UPD_PAP_IN_NEW_ctr); + + I_ tot_returns_in_regs = + RET_NEW_IN_REGS_ctr + RET_OLD_IN_REGS_ctr + RET_SEMI_IN_REGS_ctr; + I_ tot_returns_in_heap = + RET_NEW_IN_HEAP_ctr + RET_OLD_IN_HEAP_ctr + RET_SEMI_IN_HEAP_ctr + RET_SEMI_BY_DEFAULT_ctr/*???*/; + I_ tot_returns_of_new = + RET_NEW_IN_REGS_ctr + RET_NEW_IN_HEAP_ctr; + I_ tot_returns_of_old = /* NB: NOT USED ???! 94/05 WDP */ + RET_OLD_IN_REGS_ctr + RET_OLD_IN_HEAP_ctr + + RET_SEMI_BY_DEFAULT_ctr + RET_SEMI_IN_HEAP_ctr + RET_SEMI_IN_REGS_ctr /*???*/; + + I_ tot_returns = + tot_returns_in_regs + tot_returns_in_heap; + + I_ tot_upd_frames = + UPDF_STD_PUSHED_ctr + UPDF_CON_PUSHED_ctr; /*DBH*/ + + I_ con_updates = + UPD_CON_W_NODE_ctr + UPD_CON_IN_PLACE_ctr + UPD_CON_IN_NEW_ctr; + I_ pap_updates = + UPD_PAP_IN_PLACE_ctr + UPD_PAP_IN_NEW_ctr; + I_ tot_updates = + UPD_EXISTING_ctr + con_updates + pap_updates; + I_ tot_in_place_updates = + UPD_CON_IN_PLACE_ctr + UPD_PAP_IN_PLACE_ctr; + + I_ tot_new_updates = + UPD_NEW_IN_PLACE_NOPTRS_ctr + UPD_NEW_IN_PLACE_PTRS_ctr + UPD_NEW_IND_ctr; + I_ tot_old_updates = + UPD_OLD_IN_PLACE_NOPTRS_ctr + UPD_OLD_IN_PLACE_PTRS_ctr + UPD_OLD_IND_ctr; + I_ tot_gengc_updates = + tot_new_updates + tot_old_updates; + + fprintf(tickyfile,"\n\nALLOCATIONS: %ld (%ld words total: %ld admin, %ld goods, %ld slop)\n", + tot_allocs, tot_wds, tot_adm_wds, tot_gds_wds, tot_slp_wds); + fprintf(tickyfile,"\t\t\t\ttotal words:\t 2 3 4 5 6+\n"); + +#define ALLOC_HISTO_MAGIC(categ) \ + (PC(INTAVG(CAT3(ALLOC_,categ,_hst)[0], CAT3(ALLOC_,categ,_ctr)))), \ + (PC(INTAVG(CAT3(ALLOC_,categ,_hst)[1], CAT3(ALLOC_,categ,_ctr)))), \ + (PC(INTAVG(CAT3(ALLOC_,categ,_hst)[2], CAT3(ALLOC_,categ,_ctr)))), \ + (PC(INTAVG(CAT3(ALLOC_,categ,_hst)[3], CAT3(ALLOC_,categ,_ctr)))), \ + (PC(INTAVG(CAT3(ALLOC_,categ,_hst)[4], CAT3(ALLOC_,categ,_ctr)))) + + fprintf(tickyfile,"%7ld (%5.1f%%) function values", + ALLOC_FUN_ctr, + PC(INTAVG(ALLOC_FUN_ctr, tot_allocs))); + if (ALLOC_FUN_ctr != 0) + fprintf(tickyfile,"\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(FUN)); + + fprintf(tickyfile,"\n%7ld (%5.1f%%) thunks", + ALLOC_THK_ctr, + PC(INTAVG(ALLOC_THK_ctr, tot_allocs))); + if (ALLOC_THK_ctr != 0) + fprintf(tickyfile,"\t\t\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(THK)); + + fprintf(tickyfile,"\n%7ld (%5.1f%%) data values", + ALLOC_CON_ctr, + PC(INTAVG(ALLOC_CON_ctr, tot_allocs))); + if (ALLOC_CON_ctr != 0) + fprintf(tickyfile,"\t\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(CON)); + + fprintf(tickyfile,"\n%7ld (%5.1f%%) big tuples", + ALLOC_TUP_ctr, + PC(INTAVG(ALLOC_TUP_ctr, tot_allocs))); + if (ALLOC_TUP_ctr != 0) + fprintf(tickyfile,"\t\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(TUP)); + + fprintf(tickyfile,"\n%7ld (%5.1f%%) black holes", + ALLOC_BH_ctr, + PC(INTAVG(ALLOC_BH_ctr, tot_allocs))); + if (ALLOC_BH_ctr != 0) + fprintf(tickyfile,"\t\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(BH)); + + fprintf(tickyfile,"\n%7ld (%5.1f%%) prim things", + ALLOC_PRIM_ctr, + PC(INTAVG(ALLOC_PRIM_ctr, tot_allocs))); + if (ALLOC_PRIM_ctr != 0) + fprintf(tickyfile,"\t\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(PRIM)); + +#if 0 + fprintf(tickyfile,"\n%7ld (%5.1f%%) partial applications", + ALLOC_PAP_ctr, + PC(INTAVG(ALLOC_PAP_ctr, tot_allocs))); + if (ALLOC_PAP_ctr != 0) + fprintf(tickyfile,"\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(PAP)); +#endif /* 0 */ + + fprintf(tickyfile,"\n%7ld (%5.1f%%) partial applications", + ALLOC_UPD_PAP_ctr, + PC(INTAVG(ALLOC_UPD_PAP_ctr, tot_allocs))); + if (ALLOC_UPD_PAP_ctr != 0) + fprintf(tickyfile,"\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(UPD_PAP)); + +#if 0 + fprintf(tickyfile,"\n%7ld (%5.1f%%) data-value updates", + ALLOC_UPD_CON_ctr, + PC(INTAVG(ALLOC_UPD_CON_ctr, tot_allocs))); + if (ALLOC_UPD_CON_ctr != 0) + fprintf(tickyfile,"\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(UPD_CON)); +#endif /* 0 */ + +#ifdef CONCURRENT + fprintf(tickyfile,"\n%7ld (%5.1f%%) stack objects", + ALLOC_STK_ctr, + PC(INTAVG(ALLOC_STK_ctr, tot_allocs))); + if (ALLOC_STK_ctr != 0) + fprintf(tickyfile,"\t\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(STK)); + fprintf(tickyfile,"\n%7ld (%5.1f%%) thread state objects", + ALLOC_TSO_ctr, + PC(INTAVG(ALLOC_TSO_ctr, tot_allocs))); + if (ALLOC_TSO_ctr != 0) + fprintf(tickyfile,"\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(TSO)); +#ifdef PAR + fprintf(tickyfile,"\n%7ld (%5.1f%%) thread state objects", + ALLOC_FMBQ_ctr, + PC(INTAVG(ALLOC_FMBQ_ctr, tot_allocs))); + if (ALLOC_FMBQ_ctr != 0) + fprintf(tickyfile,"\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(FMBQ)); + fprintf(tickyfile,"\n%7ld (%5.1f%%) thread state objects", + ALLOC_FME_ctr, + PC(INTAVG(ALLOC_FME_ctr, tot_allocs))); + if (ALLOC_FME_ctr != 0) + fprintf(tickyfile,"\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(FME)); + fprintf(tickyfile,"\n%7ld (%5.1f%%) thread state objects", + ALLOC_BF_ctr, + PC(INTAVG(ALLOC_BF_ctr, tot_allocs))); + if (ALLOC_BF_ctr != 0) + fprintf(tickyfile,"\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(BF)); +#endif +#endif + fprintf(tickyfile,"\n"); + + fprintf(tickyfile,"\nTotal storage-manager allocations: %ld (%ld words)\n\t[%ld words lost to speculative heap-checks]\n", ALLOC_HEAP_ctr, ALLOC_HEAP_tot, ALLOC_HEAP_tot - tot_wds); + + fprintf(tickyfile,"\nSTACK USAGE:\n"); /* NB: some bits are direction sensitive */ + fprintf(tickyfile,"\tA stack slots stubbed: %ld\n", A_STK_STUB_ctr); +/* not used at all + fprintf(tickyfile,"\tA stack slots re-used: %ld\n", A_STK_REUSE_ctr); + fprintf(tickyfile,"\tB stack slots re-used: %ld\n", B_STK_REUSE_ctr); +*/ +#ifndef CONCURRENT + fprintf(tickyfile,"\tA stack max. depth: %ld words\n", + (I_) (stackInfo.botA - max_SpA)); + fprintf(tickyfile,"\tB stack max. depth: %ld words\n", + (I_) (max_SpB - stackInfo.botB)); /* And cheating, too (ToDo) */ +#endif + + fprintf(tickyfile,"\nENTERS: %ld of which %ld (%.1f%%) direct to the entry code\n\t\t [the rest indirected via Node's info ptr]\n", + tot_enters, + jump_direct_enters, + PC(INTAVG(jump_direct_enters,tot_enters))); + fprintf(tickyfile,"%7ld (%5.1f%%) thunks\n", + ENT_THK_ctr, + PC(INTAVG(ENT_THK_ctr,tot_enters))); + fprintf(tickyfile,"%7ld (%5.1f%%) data values\n", + ENT_CON_ctr, + PC(INTAVG(ENT_CON_ctr,tot_enters))); + fprintf(tickyfile,"%7ld (%5.1f%%) function values\n\t\t [of which %ld (%.1f%%) bypassed arg-satisfaction chk]\n", + ENT_FUN_DIRECT_ctr, + PC(INTAVG(ENT_FUN_DIRECT_ctr,tot_enters)), + bypass_enters, + PC(INTAVG(bypass_enters,ENT_FUN_DIRECT_ctr))); + fprintf(tickyfile,"%7ld (%5.1f%%) partial applications\n", + ENT_PAP_ctr, + PC(INTAVG(ENT_PAP_ctr,tot_enters))); + fprintf(tickyfile,"%7ld (%5.1f%%) indirections\n", + ENT_IND_ctr, + PC(INTAVG(ENT_IND_ctr,tot_enters))); + + fprintf(tickyfile,"\nRETURNS: %ld\n", tot_returns); + fprintf(tickyfile,"%7ld (%5.1f%%) in registers [the rest in the heap]\n", + tot_returns_in_regs, + PC(INTAVG(tot_returns_in_regs,tot_returns))); + fprintf(tickyfile,"%7ld (%5.1f%%) from entering a new constructor\n\t\t [the rest from entering an existing constructor]\n", + tot_returns_of_new, + PC(INTAVG(tot_returns_of_new,tot_returns))); + fprintf(tickyfile,"%7ld (%5.1f%%) vectored [the rest unvectored]\n", + VEC_RETURN_ctr, + PC(INTAVG(VEC_RETURN_ctr,tot_returns))); + + fprintf(tickyfile,"\nUPDATE FRAMES: %ld (%ld omitted from thunks)\n", + tot_upd_frames, + UPDF_OMITTED_ctr); + fprintf(tickyfile,"%7ld (%5.1f%%) standard frames\n", + UPDF_STD_PUSHED_ctr, + PC(INTAVG(UPDF_STD_PUSHED_ctr,tot_upd_frames))); + fprintf(tickyfile,"%7ld (%5.1f%%) constructor frames\n", + UPDF_CON_PUSHED_ctr, + PC(INTAVG(UPDF_CON_PUSHED_ctr,tot_upd_frames))); + fprintf(tickyfile,"\t\t [of which %ld (%.1f%%) were for black-holes]\n", + UPDF_HOLE_PUSHED_ctr, + PC(INTAVG(UPDF_HOLE_PUSHED_ctr,UPDF_CON_PUSHED_ctr))); /*DBH*/ + + if (UPDF_RCC_PUSHED_ctr != 0) + fprintf(tickyfile,"%7ld restore cost centre frames (%ld omitted)\n", + UPDF_RCC_PUSHED_ctr, + UPDF_RCC_OMITTED_ctr); + + fprintf(tickyfile,"\nUPDATES: %ld\n", tot_updates); + fprintf(tickyfile,"%7ld (%5.1f%%) data values\n\t\t [%ld in place, %ld allocated new space, %ld with Node]\n", + con_updates, + PC(INTAVG(con_updates,tot_updates)), + UPD_CON_IN_PLACE_ctr, UPD_CON_IN_NEW_ctr, UPD_CON_W_NODE_ctr); + fprintf(tickyfile,"%7ld (%5.1f%%) partial applications\n\t\t [%ld in place, %ld allocated new space]\n", + pap_updates, + PC(INTAVG(pap_updates,tot_updates)), + UPD_PAP_IN_PLACE_ctr, UPD_PAP_IN_NEW_ctr); + fprintf(tickyfile,"%7ld (%5.1f%%) updates to existing heap objects\n", + UPD_EXISTING_ctr, + PC(INTAVG(UPD_EXISTING_ctr,tot_updates))); + fprintf(tickyfile,"%7ld (%5.1f%%) in-place updates copied\n", + UPD_IN_PLACE_COPY_ctr, + PC(INTAVG(UPD_IN_PLACE_COPY_ctr,tot_in_place_updates))); + if (UPD_ENTERED_ctr != 0) { + fprintf(tickyfile,"%7ld (%5.1f%%) subsequently entered\n", + UPD_ENTERED_ctr, + PC(INTAVG(UPD_ENTERED_ctr,tot_updates))); + fprintf(tickyfile,"%7ld (%5.1f%%) subsequently entered more than once\n", + UPD_ENTERED_AGAIN_ctr, + PC(INTAVG(UPD_ENTERED_AGAIN_ctr,tot_updates))); + } + + if (tot_gengc_updates != 0) { + fprintf(tickyfile,"\nNEW GEN UPDATES: %ld (%5.1f%%)\n", + tot_new_updates, + PC(INTAVG(tot_new_updates,tot_gengc_updates))); + fprintf(tickyfile,"%7ld (%5.1f%%) indirections\n", + UPD_NEW_IND_ctr, + PC(INTAVG(UPD_NEW_IND_ctr,tot_gengc_updates))); + fprintf(tickyfile,"%7ld (%5.1f%%) inplace with ptrs\n", + UPD_NEW_IN_PLACE_PTRS_ctr, + PC(INTAVG(UPD_NEW_IN_PLACE_PTRS_ctr,tot_gengc_updates))); + fprintf(tickyfile,"%7ld (%5.1f%%) inplace without ptrs\n", + UPD_NEW_IN_PLACE_NOPTRS_ctr, + PC(INTAVG(UPD_NEW_IN_PLACE_NOPTRS_ctr,tot_gengc_updates))); + fprintf(tickyfile,"\nOLD GEN UPDATES: %ld (%5.1f%%)\n", + tot_old_updates, + PC(INTAVG(tot_old_updates,tot_gengc_updates))); + fprintf(tickyfile,"%7ld (%5.1f%%) indirections\n", + UPD_OLD_IND_ctr, + PC(INTAVG(UPD_OLD_IND_ctr,tot_gengc_updates))); + fprintf(tickyfile,"%7ld (%5.1f%%) inplace with ptrs\n", + UPD_OLD_IN_PLACE_PTRS_ctr, + PC(INTAVG(UPD_OLD_IN_PLACE_PTRS_ctr,tot_gengc_updates))); + fprintf(tickyfile,"%7ld (%5.1f%%) inplace without ptrs\n", + UPD_OLD_IN_PLACE_NOPTRS_ctr, + PC(INTAVG(UPD_OLD_IN_PLACE_NOPTRS_ctr,tot_gengc_updates))); + } + + printRegisteredCounterInfo(); + + fprintf(tickyfile,"\n**************************************************\n"); + fprintf(tickyfile,"%6ld ALLOC_HEAP_ctr\n", ALLOC_HEAP_ctr); + fprintf(tickyfile,"%6ld ALLOC_HEAP_tot\n", ALLOC_HEAP_tot); + +#ifndef CONCURRENT + fprintf(tickyfile,"%6ld HWM_SpA\n", (I_) (stackInfo.botA - max_SpA)); + fprintf(tickyfile,"%6ld HWM_SpB\n", (I_) (max_SpB - stackInfo.botB)); +#endif + + fprintf(tickyfile,"%6ld ALLOC_FUN_ctr\n", ALLOC_FUN_ctr); + fprintf(tickyfile,"%6ld ALLOC_FUN_adm\n", ALLOC_FUN_adm); + fprintf(tickyfile,"%6ld ALLOC_FUN_gds\n", ALLOC_FUN_gds); + fprintf(tickyfile,"%6ld ALLOC_FUN_slp\n", ALLOC_FUN_slp); + fprintf(tickyfile,"%6ld ALLOC_THK_ctr\n", ALLOC_THK_ctr); + fprintf(tickyfile,"%6ld ALLOC_THK_adm\n", ALLOC_THK_adm); + fprintf(tickyfile,"%6ld ALLOC_THK_gds\n", ALLOC_THK_gds); + fprintf(tickyfile,"%6ld ALLOC_THK_slp\n", ALLOC_THK_slp); + fprintf(tickyfile,"%6ld ALLOC_CON_ctr\n", ALLOC_CON_ctr); + fprintf(tickyfile,"%6ld ALLOC_CON_adm\n", ALLOC_CON_adm); + fprintf(tickyfile,"%6ld ALLOC_CON_gds\n", ALLOC_CON_gds); + fprintf(tickyfile,"%6ld ALLOC_CON_slp\n", ALLOC_CON_slp); + fprintf(tickyfile,"%6ld ALLOC_TUP_ctr\n", ALLOC_TUP_ctr); + fprintf(tickyfile,"%6ld ALLOC_TUP_adm\n", ALLOC_TUP_adm); + fprintf(tickyfile,"%6ld ALLOC_TUP_gds\n", ALLOC_TUP_gds); + fprintf(tickyfile,"%6ld ALLOC_TUP_slp\n", ALLOC_TUP_slp); + fprintf(tickyfile,"%6ld ALLOC_BH_ctr\n", ALLOC_BH_ctr); + fprintf(tickyfile,"%6ld ALLOC_BH_adm\n", ALLOC_BH_adm); + fprintf(tickyfile,"%6ld ALLOC_BH_gds\n", ALLOC_BH_gds); + fprintf(tickyfile,"%6ld ALLOC_BH_slp\n", ALLOC_BH_slp); +/* + fprintf(tickyfile,"%6ld ALLOC_PAP_ctr\n", ALLOC_PAP_ctr); + fprintf(tickyfile,"%6ld ALLOC_PAP_adm\n", ALLOC_PAP_adm); + fprintf(tickyfile,"%6ld ALLOC_PAP_gds\n", ALLOC_PAP_gds); + fprintf(tickyfile,"%6ld ALLOC_PAP_slp\n", ALLOC_PAP_slp); +*/ + fprintf(tickyfile,"%6ld ALLOC_PRIM_ctr\n", ALLOC_PRIM_ctr); + fprintf(tickyfile,"%6ld ALLOC_PRIM_adm\n", ALLOC_PRIM_adm); + fprintf(tickyfile,"%6ld ALLOC_PRIM_gds\n", ALLOC_PRIM_gds); + fprintf(tickyfile,"%6ld ALLOC_PRIM_slp\n", ALLOC_PRIM_slp); +/* + fprintf(tickyfile,"%6ld ALLOC_UPD_CON_ctr\n", ALLOC_UPD_CON_ctr); + fprintf(tickyfile,"%6ld ALLOC_UPD_CON_adm\n", ALLOC_UPD_CON_adm); + fprintf(tickyfile,"%6ld ALLOC_UPD_CON_gds\n", ALLOC_UPD_CON_gds); + fprintf(tickyfile,"%6ld ALLOC_UPD_CON_slp\n", ALLOC_UPD_CON_slp); +*/ + fprintf(tickyfile,"%6ld ALLOC_UPD_PAP_ctr\n", ALLOC_UPD_PAP_ctr); + fprintf(tickyfile,"%6ld ALLOC_UPD_PAP_adm\n", ALLOC_UPD_PAP_adm); + fprintf(tickyfile,"%6ld ALLOC_UPD_PAP_gds\n", ALLOC_UPD_PAP_gds); + fprintf(tickyfile,"%6ld ALLOC_UPD_PAP_slp\n", ALLOC_UPD_PAP_slp); + +#ifdef CONCURRENT + fprintf(tickyfile,"%6ld ALLOC_STK_ctr\n", ALLOC_STK_ctr); + fprintf(tickyfile,"%6ld ALLOC_STK_adm\n", ALLOC_STK_adm); + fprintf(tickyfile,"%6ld ALLOC_STK_gds\n", ALLOC_STK_gds); + fprintf(tickyfile,"%6ld ALLOC_STK_slp\n", ALLOC_STK_slp); + fprintf(tickyfile,"%6ld ALLOC_TSO_ctr\n", ALLOC_TSO_ctr); + fprintf(tickyfile,"%6ld ALLOC_TSO_adm\n", ALLOC_TSO_adm); + fprintf(tickyfile,"%6ld ALLOC_TSO_gds\n", ALLOC_TSO_gds); + fprintf(tickyfile,"%6ld ALLOC_TSO_slp\n", ALLOC_TSO_slp); +#ifdef PAR + fprintf(tickyfile,"%6ld ALLOC_FMBQ_ctr\n", ALLOC_FMBQ_ctr); + fprintf(tickyfile,"%6ld ALLOC_FMBQ_adm\n", ALLOC_FMBQ_adm); + fprintf(tickyfile,"%6ld ALLOC_FMBQ_gds\n", ALLOC_FMBQ_gds); + fprintf(tickyfile,"%6ld ALLOC_FMBQ_slp\n", ALLOC_FMBQ_slp); + fprintf(tickyfile,"%6ld ALLOC_FME_ctr\n", ALLOC_FME_ctr); + fprintf(tickyfile,"%6ld ALLOC_FME_adm\n", ALLOC_FME_adm); + fprintf(tickyfile,"%6ld ALLOC_FME_gds\n", ALLOC_FME_gds); + fprintf(tickyfile,"%6ld ALLOC_FME_slp\n", ALLOC_FME_slp); + fprintf(tickyfile,"%6ld ALLOC_BF_ctr\n", ALLOC_BF_ctr); + fprintf(tickyfile,"%6ld ALLOC_BF_adm\n", ALLOC_BF_adm); + fprintf(tickyfile,"%6ld ALLOC_BF_gds\n", ALLOC_BF_gds); + fprintf(tickyfile,"%6ld ALLOC_BF_slp\n", ALLOC_BF_slp); +#endif +#endif + + fprintf(tickyfile,"%6ld ENT_VIA_NODE_ctr\n", ENT_VIA_NODE_ctr); + fprintf(tickyfile,"%6ld ENT_CON_ctr\n", ENT_CON_ctr); + fprintf(tickyfile,"%6ld ENT_FUN_STD_ctr\n", ENT_FUN_STD_ctr); + fprintf(tickyfile,"%6ld ENT_FUN_DIRECT_ctr\n", ENT_FUN_DIRECT_ctr); + fprintf(tickyfile,"%6ld ENT_IND_ctr\n", ENT_IND_ctr); + fprintf(tickyfile,"%6ld ENT_PAP_ctr\n", ENT_PAP_ctr); + fprintf(tickyfile,"%6ld ENT_THK_ctr\n", ENT_THK_ctr); + + fprintf(tickyfile,"%6ld RET_NEW_IN_HEAP_ctr\n", RET_NEW_IN_HEAP_ctr); + fprintf(tickyfile,"%6ld RET_NEW_IN_REGS_ctr\n", RET_NEW_IN_REGS_ctr); + fprintf(tickyfile,"%6ld RET_OLD_IN_HEAP_ctr\n", RET_OLD_IN_HEAP_ctr); + fprintf(tickyfile,"%6ld RET_OLD_IN_REGS_ctr\n", RET_OLD_IN_REGS_ctr); + fprintf(tickyfile,"%6ld RET_SEMI_BY_DEFAULT_ctr\n", RET_SEMI_BY_DEFAULT_ctr); + fprintf(tickyfile,"%6ld RET_SEMI_IN_HEAP_ctr\n", RET_SEMI_IN_HEAP_ctr); + fprintf(tickyfile,"%6ld RET_SEMI_IN_REGS_ctr\n", RET_SEMI_IN_REGS_ctr); + fprintf(tickyfile,"%6ld VEC_RETURN_ctr\n", VEC_RETURN_ctr); + + fprintf(tickyfile,"%6ld UPDF_OMITTED_ctr\n", UPDF_OMITTED_ctr); + fprintf(tickyfile,"%6ld UPDF_STD_PUSHED_ctr\n", UPDF_STD_PUSHED_ctr); + fprintf(tickyfile,"%6ld UPDF_CON_PUSHED_ctr\n", UPDF_CON_PUSHED_ctr); + fprintf(tickyfile,"%6ld UPDF_HOLE_PUSHED_ctr\n", UPDF_HOLE_PUSHED_ctr); + + fprintf(tickyfile,"%6ld UPDF_RCC_PUSHED_ctr\n", UPDF_RCC_PUSHED_ctr); + fprintf(tickyfile,"%6ld UPDF_RCC_OMITTED_ctr\n", UPDF_RCC_OMITTED_ctr); + + fprintf(tickyfile,"%6ld UPD_EXISTING_ctr\n", UPD_EXISTING_ctr); + fprintf(tickyfile,"%6ld UPD_CON_W_NODE_ctr\n", UPD_CON_W_NODE_ctr); + fprintf(tickyfile,"%6ld UPD_CON_IN_PLACE_ctr\n", UPD_CON_IN_PLACE_ctr); + fprintf(tickyfile,"%6ld UPD_CON_IN_NEW_ctr\n", UPD_CON_IN_NEW_ctr); + fprintf(tickyfile,"%6ld UPD_PAP_IN_PLACE_ctr\n", UPD_PAP_IN_PLACE_ctr); + fprintf(tickyfile,"%6ld UPD_PAP_IN_NEW_ctr\n", UPD_PAP_IN_NEW_ctr); + fprintf(tickyfile,"%6ld UPD_ENTERED_ctr\n", UPD_ENTERED_ctr); + fprintf(tickyfile,"%6ld UPD_ENTERED_AGAIN_ctr\n",UPD_ENTERED_AGAIN_ctr); + + fprintf(tickyfile,"%6ld UPD_NEW_IND_ctr\n", UPD_NEW_IND_ctr); + fprintf(tickyfile,"%6ld UPD_NEW_IN_PLACE_PTRS_ctr\n", UPD_NEW_IN_PLACE_PTRS_ctr); + fprintf(tickyfile,"%6ld UPD_NEW_IN_PLACE_NOPTRS_ctr\n", UPD_NEW_IN_PLACE_NOPTRS_ctr); + fprintf(tickyfile,"%6ld UPD_OLD_IND_ctr\n", UPD_OLD_IND_ctr); + fprintf(tickyfile,"%6ld UPD_OLD_IN_PLACE_PTRS_ctr\n", UPD_OLD_IN_PLACE_PTRS_ctr); + fprintf(tickyfile,"%6ld UPD_OLD_IN_PLACE_NOPTRS_ctr\n", UPD_OLD_IN_PLACE_NOPTRS_ctr); +} +\end{code} + +%************************************************************************ +%* * +\subsection[RednCounts-ent-counters]{Handle named entry counters} +%* * +%************************************************************************ + +Data structure used in ``registering'' one of these counters. +\begin{code} +struct ent_counter *ListOfEntryCtrs = NULL; /* root of list of them */ +\end{code} + +To print out all the registered-counter info: +\begin{code} +void +printRegisteredCounterInfo ( STG_NO_ARGS ) +{ + struct ent_counter *p; + + if ( ListOfEntryCtrs != NULL ) { + fprintf(tickyfile,"\n**************************************************\n"); + } + + for (p = ListOfEntryCtrs; p != NULL; p = p->link) { + /* common stuff first; then the wrapper info if avail */ + fprintf(tickyfile, "%-40s%u\t%u\t%u\t%-16s%ld", + p->f_str, + p->arity, + p->Astk_args, + p->Bstk_args, + p->f_arg_kinds, + p->ctr); + + if ( p->wrap_str == NULL ) { + fprintf(tickyfile, "\n"); + + } else { + fprintf(tickyfile, "\t%s\t%s\n", + p->wrap_str, + p->wrap_arg_kinds); + } + } +} +\end{code} + +That's all, folks. +\begin{code} +#endif /* DO_REDN_COUNTING */ +\end{code} diff --git a/ghc/runtime/main/SMRep.lc b/ghc/runtime/main/SMRep.lc new file mode 100644 index 0000000000..2609195521 --- /dev/null +++ b/ghc/runtime/main/SMRep.lc @@ -0,0 +1,204 @@ +% +% (c) The AQUA Project, Glasgow University, 1994 +% + +% Guilty party: BOS + +%************************************************************************ +%* * +\section[Rep.lc]{Global rep tables} +%* * +%************************************************************************ + +These are the single, global static instances of each rep table type. + +\begin{code} +#define COMPILING_REP_LC + +#include "rtsdefs.h" +#include "../storage/SMinternal.h" + +EXTFUN(_PRIn_0); +EXTFUN(_PRIn_1); +EXTFUN(_PRIn_2); +EXTFUN(_PRIn_3); +EXTFUN(_PRIn_4); +EXTFUN(_PRIn_5); +EXTFUN(_PRIn_6); +EXTFUN(_PRIn_7); +EXTFUN(_PRIn_8); +EXTFUN(_PRIn_9); +EXTFUN(_PRIn_10); +EXTFUN(_PRIn_11); +EXTFUN(_PRIn_12); + +/* SPEC_x_RTBL(size,ptrs) */ + +SPEC_N_RTBL(1,0); +SPEC_N_RTBL(1,1); +SPEC_N_RTBL(2,0); +SPEC_N_RTBL(2,1); +SPEC_N_RTBL(2,2); +SPEC_N_RTBL(3,0); +SPEC_N_RTBL(3,1); +SPEC_N_RTBL(3,2); +SPEC_N_RTBL(3,3); +SPEC_N_RTBL(4,0); +SPEC_N_RTBL(4,4); +SPEC_N_RTBL(5,0); +SPEC_N_RTBL(5,5); +SPEC_N_RTBL(6,6); +SPEC_N_RTBL(7,7); +SPEC_N_RTBL(8,8); +SPEC_N_RTBL(9,9); +SPEC_N_RTBL(10,10); +SPEC_N_RTBL(11,11); +SPEC_N_RTBL(12,12); + +SPEC_S_RTBL(1,0); +SPEC_S_RTBL(1,1); +SPEC_S_RTBL(2,0); +SPEC_S_RTBL(2,1); +SPEC_S_RTBL(2,2); +SPEC_S_RTBL(3,0); +SPEC_S_RTBL(3,1); +SPEC_S_RTBL(3,2); +SPEC_S_RTBL(3,3); +SPEC_S_RTBL(4,0); +SPEC_S_RTBL(4,4); +SPEC_S_RTBL(5,0); +SPEC_S_RTBL(5,5); +SPEC_S_RTBL(6,6); +SPEC_S_RTBL(7,7); +SPEC_S_RTBL(8,8); +SPEC_S_RTBL(9,9); +SPEC_S_RTBL(10,10); +SPEC_S_RTBL(11,11); +SPEC_S_RTBL(12,12); + +SPEC_U_RTBL(1,0); +SPEC_U_RTBL(1,1); +SPEC_U_RTBL(2,0); +SPEC_U_RTBL(2,1); +SPEC_U_RTBL(2,2); +SPEC_U_RTBL(3,0); +SPEC_U_RTBL(3,1); +SPEC_U_RTBL(3,2); +SPEC_U_RTBL(3,3); +SPEC_U_RTBL(4,0); +SPEC_U_RTBL(4,4); +SPEC_U_RTBL(5,0); +SPEC_U_RTBL(5,5); +SPEC_U_RTBL(6,6); +SPEC_U_RTBL(7,7); +SPEC_U_RTBL(8,8); +SPEC_U_RTBL(9,9); +SPEC_U_RTBL(10,10); +SPEC_U_RTBL(11,11); +SPEC_U_RTBL(12,12); + +/* SELECT_RTBL(size,ptrs,select_word_i) */ + +SELECT_RTBL(2,1,0); +SELECT_RTBL(2,1,1); +SELECT_RTBL(2,1,2); +SELECT_RTBL(2,1,3); +SELECT_RTBL(2,1,4); +SELECT_RTBL(2,1,5); +SELECT_RTBL(2,1,6); +SELECT_RTBL(2,1,7); +SELECT_RTBL(2,1,8); +SELECT_RTBL(2,1,9); +SELECT_RTBL(2,1,10); +SELECT_RTBL(2,1,11); +SELECT_RTBL(2,1,12); + +GEN_N_RTBL(); +GEN_S_RTBL(); +GEN_U_RTBL(); +DYN_RTBL(); +TUPLE_RTBL(); +DATA_RTBL(); +MUTUPLE_RTBL(); +IMMUTUPLE_RTBL(); +STATIC_RTBL(); + +#ifndef PAR +MallocPtr_RTBL(); +#endif + +BH_RTBL(N); +BH_RTBL(U); + +IND_RTBL(); +PERM_IND_RTBL(); +CAF_RTBL(); +CONST_RTBL(); +CHARLIKE_RTBL(); +INTLIKE_RTBL(); + +CAF_EVAC_UPD_RTBL(); + +#ifdef GCgn +FORWARDREF_RTBL(_Evacuate_Old_Forward_Ref); +FORWARDREF_RTBL(_Evacuate_New_Forward_Ref); +FORWARDREF_RTBL(_Evacuate_OldRoot_Forward); +#endif +FORWARDREF_RTBL(_Evacuate_Forward_Ref); + +#ifdef _INFO_MARKING +DUMMY_PRRETURN_RTBL(_PRMarking_MarkNextRoot,_Dummy_PRReturn_entry); +DUMMY_PRRETURN_RTBL(_PRMarking_MarkNextCAF,_Dummy_PRReturn_entry); +# ifdef CONCURRENT +DUMMY_PRRETURN_RTBL(_PRMarking_MarkNextSpark,_Dummy_PRReturn_entry); +# endif +# ifdef PAR +DUMMY_PRRETURN_RTBL(_PRMarking_MarkNextGA,_Dummy_PRReturn_entry); +# else +DUMMY_PRRETURN_RTBL(_PRMarking_MarkNextAStack,_Dummy_PRReturn_entry); +DUMMY_PRRETURN_RTBL(_PRMarking_MarkNextBStack,_Dummy_PRReturn_entry); +# endif +#endif + +#ifdef GCgn +OLDROOT_RTBL(); +#endif + +#ifdef CONCURRENT +TSO_RTBL(); +STKO_RTBL(); +BQ_RTBL(); +# ifndef PAR +STKO_STATIC_RTBL(); +# else +FETCHME_RTBL(); +FMBQ_RTBL(); +BF_RTBL(); +# endif +#endif + +#ifdef PAR +SPEC_RBH_RTBL(2,0); +SPEC_RBH_RTBL(2,1); +SPEC_RBH_RTBL(2,2); +SPEC_RBH_RTBL(3,0); +SPEC_RBH_RTBL(3,1); +SPEC_RBH_RTBL(3,2); +SPEC_RBH_RTBL(3,3); +SPEC_RBH_RTBL(4,0); +SPEC_RBH_RTBL(4,4); +SPEC_RBH_RTBL(5,0); +SPEC_RBH_RTBL(5,5); +SPEC_RBH_RTBL(6,6); +SPEC_RBH_RTBL(7,7); +SPEC_RBH_RTBL(8,8); +SPEC_RBH_RTBL(9,9); +SPEC_RBH_RTBL(10,10); +SPEC_RBH_RTBL(11,11); +SPEC_RBH_RTBL(12,12); + +GEN_RBH_RTBL(); +#endif + + +\end{code} diff --git a/ghc/runtime/main/Select.lc b/ghc/runtime/main/Select.lc new file mode 100644 index 0000000000..1f10c7ac85 --- /dev/null +++ b/ghc/runtime/main/Select.lc @@ -0,0 +1,123 @@ +% +% (c) The AQUA Project, Glasgow University, 1995 +% +%************************************************************************ +%* * +\section[Select.lc]{Select Available File Descriptors} +%* * +%************************************************************************ + +\begin{code} + +#ifdef CONCURRENT + +/* #define STK_CHK_DEBUG */ + +#define NULL_REG_MAP +#define NON_POSIX_SOURCE +/* Should there be a POSIX alternative based on poll()? */ +#include "stgdefs.h" + +# if defined(HAVE_SYS_TYPES_H) +# include <sys/types.h> +# endif + +# ifdef HAVE_SYS_TIME_H +# include <sys/time.h> +# endif + +void +AwaitEvent(delta) +I_ delta; +{ + P_ tso, prev, next; + rtsBool ready; + fd_set rfd; + I_ us; + I_ min; + struct timeval tv; + + min = delta == 0 ? 0x7fffffff : 0; + + /* + * Collect all of the fd's that we're interested in, and capture + * the minimum waiting time for the delayed threads. + */ + FD_ZERO(&rfd); + for(tso = WaitingThreadsHd; tso != Nil_closure; tso = TSO_LINK(tso)) { + us = (I_) TSO_EVENT(tso); + if (us > 0) { + /* Looking at a delay event */ + if (us < min) + min = us; + } else { + /* Looking at a wait event */ + FD_SET((-us), &rfd); + } + } + + /* Check for any interesting events */ + + tv.tv_sec = min / 1000000; + tv.tv_usec = min % 1000000; + + while (select(FD_SETSIZE, &rfd, NULL, NULL, &tv) < 0) { + if (errno != EINTR) { + fflush(stdout); + fprintf(stderr, "AwaitEvent: select failed\n"); + EXIT(EXIT_FAILURE); + } + } + + if (delta == 0) + delta = min; + + prev = NULL; + for(tso = WaitingThreadsHd; tso != Nil_closure; tso = next) { + next = TSO_LINK(tso); + us = (I_) TSO_EVENT(tso); + if (us > 0) { + /* Looking at a delay event */ + us -= delta; + ready = (us <= 0); + if (!ready) + TSO_EVENT(tso) = (W_) us; + } else { + /* Looking at a wait event */ + ready = FD_ISSET((-us), &rfd); + } + if (ready) { + +#if defined(GRAN) + if (ThreadQueueTl == Nil_closure) + ThreadQueueHd = tso; + else + TSO_LINK(ThreadQueueTl) = tso; + ThreadQueueTl = tso; + TSO_LINK(tso) = Nil_closure; +#else + if (RunnableThreadsTl == Nil_closure) + RunnableThreadsHd = tso; + else + TSO_LINK(RunnableThreadsTl) = tso; + RunnableThreadsTl = tso; + TSO_LINK(tso) = Nil_closure; +#endif + } else { + if (prev == NULL) + WaitingThreadsHd = tso; + else + TSO_LINK(prev) = tso; + prev = tso; + } + } + if (prev == NULL) + WaitingThreadsHd = WaitingThreadsTl = Nil_closure; + else { + TSO_LINK(prev) = Nil_closure; + WaitingThreadsTl = prev; + } +} + +#endif /* CONCURRENT */ +\end{code} diff --git a/ghc/runtime/main/Signals.lc b/ghc/runtime/main/Signals.lc new file mode 100644 index 0000000000..3796f9965d --- /dev/null +++ b/ghc/runtime/main/Signals.lc @@ -0,0 +1,588 @@ +% +% (c) The AQUA Project, Glasgow University, 1995 +% +%************************************************************************ +%* * +\section[Signals.lc]{Signal Handlers} +%* * +%************************************************************************ + +There are two particular signals that we find interesting in the RTS: +segmentation faults (for cheap stack overflow checks) and virtual +timer alarms (for profiling and thread context switching). POSIX +compliance is supposed to make this kind of thing easy, but it +doesn't. Expect every new target platform to require gory hacks to +get this stuff to work. + +Then, there are the user-specified signal handlers to cope with. +Since they're pretty rudimentary, they shouldn't actually cause as +much pain. + +\begin{code} + +#include "platform.h" + +#if defined(sunos4_TARGET_OS) + /* The sigaction in SunOS 4.1.X does not grok SA_SIGINFO */ +# define NON_POSIX_SOURCE +#endif + +#if defined(osf1_TARGET_OS) + /* The include files for OSF1 do not normally define SA_SIGINFO */ +# define _OSF_SOURCE 1 +#endif + +#if defined(linuxaout_TARGET_OS) || defined(linux_TARGET_OS) + /* I have no idea why this works (WDP 95/03) */ +# define _BSD_SOURCE 1 +#endif + +#include "rtsdefs.h" + +#if defined(HAVE_SYS_TYPES_H) +# include <sys/types.h> +#endif + +#if defined(HAVE_SIGNAL_H) +# include <signal.h> +#endif +#if irix_TARGET_OS +/* SIGVTALRM not avail w/ POSIX_SOURCE, but worse things happen without */ +/* SIGH: triple SIGH (WDP 95/07) */ +# define SIGVTALRM 28 +#endif + +#if defined(HAVE_SIGINFO_H) + /* DEC OSF1 seems to need this explicitly. Maybe others do as well? */ +# include <siginfo.h> +#endif + +\end{code} + +%************************************************************************ +%* * +\subsection{Stack-check by protected-memory-faulting} +%* * +%************************************************************************ + +If we are checking stack overflow by page faulting, then we need to be +able to install a @SIGSEGV@ handler, preferably one which can +determine where the fault occurred, so that we can satisfy ourselves +that it really was a stack overflow and not some random segmentation +fault. + +\begin{code} + +#if STACK_CHECK_BY_PAGE_FAULT + +extern P_ stks_space; /* Where the stacks live, from SMstacks.lc */ +extern I_ SM_word_stk_size; /* How big they are (ditto) */ + +\end{code} + +SunOS 4.x is too old to have @SA_SIGINFO@ as a flag to @sigaction@, so +we use the older @signal@ call instead. This means that we also have +to set up the handler to expect a different collection of arguments. +Fun, eh? + +\begin{code} + +# if defined(sunos4_TARGET_OS) || defined(linuxaout_TARGET_OS) || defined(linux_TARGET_OS) + +static void +segv_handler(sig, code, scp, addr) + int sig; + int code; + struct sigcontext *scp; + caddr_t addr; +{ + extern void StackOverflow(STG_NO_ARGS) STG_NORETURN; + + if (addr >= (caddr_t) stks_space + && addr < (caddr_t) (stks_space + SM_word_stk_size)) + StackOverflow(); + + fflush(stdout); + fprintf(stderr, "Segmentation fault caught, address = %lx\n", (W_) addr); + abort(); +} + +int +install_segv_handler() +{ + return (int) signal(SIGSEGV, segv_handler) == -1; +} + +# else /* Not SunOS 4 */ + +# if defined(irix_TARGET_OS) + /* certainly BOGUS (WDP 94/05) -- copied from /usr/include/sys/siginfo.h */ +# define si_addr _data._fault._addr +# endif + +static void +segv_handler(sig, sip) + int sig; + siginfo_t *sip; +{ + fflush(stdout); + if (sip == NULL) { + fprintf(stderr, "Segmentation fault caught, address unknown\n"); + } else { + if (sip->si_addr >= (caddr_t) stks_space + && sip->si_addr < (caddr_t) (stks_space + SM_word_stk_size)) + StackOverflow(); + + fprintf(stderr, "Segmentation fault caught, address = %08lx\n", (W_) sip->si_addr); + } + abort(); +} + +int +install_segv_handler() +{ + struct sigaction action; + + action.sa_handler = segv_handler; + sigemptyset(&action.sa_mask); + action.sa_flags = SA_SIGINFO; + return sigaction(SIGSEGV, &action, NULL); +} + +# endif /* not SunOS 4 */ + +#endif /* STACK_CHECK_BY_PAGE_FAULT */ + +\end{code} + +%************************************************************************ +%* * +\subsection{Virtual-timer alarm (for profiling, etc.)} +%* * +%************************************************************************ + +The timer interrupt is somewhat simpler, and we could probably use +sigaction across the board, but since we have committed ourselves to +the non-POSIX signal under SunOS 4.1.X, we adopt the same approach +here. + +\begin{code} +#if (defined(USE_COST_CENTRES) || defined(CONCURRENT)) && !defined(GRAN) + +# if defined(USE_COST_CENTRES) +extern I_ heap_profiling_req; +# endif + +# ifdef CONCURRENT + +# if defined(USE_COST_CENTRES) || defined(GUM) +I_ contextSwitchTicks; +I_ profilerTicks; +# endif + +# ifdef PAR +extern P_ CurrentTSO; +# endif +extern I_ contextSwitchTime; + +static void +vtalrm_handler(sig) + int sig; +{ +/* + For the parallel world, currentTSO is set if there is any work + on the current PE. In this case we DO want to context switch, + in case other PEs have sent us messages which must be processed. +*/ + +# if defined(USE_COST_CENTRES) || defined(GUM) + static I_ csTicks = 0, pTicks = 0; + + if (time_profiling) { + if (++pTicks % profilerTicks == 0) { +# if ! defined(USE_COST_CENTRES) + handle_tick_serial(); +# else + if (cc_profiling > 1 || heap_profiling_req != HEAP_NO_PROFILING) + handle_tick_serial(); + else + handle_tick_noserial(); +# endif + } + if (++csTicks % contextSwitchTicks != 0) + return; + } +# endif + + if (WaitingThreadsHd != Nil_closure) + AwaitEvent(contextSwitchTime); + +# ifdef PAR + if (PendingSparksTl[REQUIRED_POOL] == PendingSparksLim[REQUIRED_POOL] || + PendingSparksTl[ADVISORY_POOL] == PendingSparksLim[ADVISORY_POOL]) { + PruneSparks(); + if (PendingSparksTl[REQUIRED_POOL] == PendingSparksLim[REQUIRED_POOL]) + PendingSparksTl[REQUIRED_POOL] = PendingSparksBase[REQUIRED_POOL] + + SparkLimit[REQUIRED_POOL] / 2; + if (PendingSparksTl[ADVISORY_POOL] == PendingSparksLim[ADVISORY_POOL]) + PendingSparksTl[ADVISORY_POOL] = PendingSparksBase[ADVISORY_POOL] + + SparkLimit[ADVISORY_POOL] / 2; + } + + if (CurrentTSO != NULL || +# else + if (RunnableThreadsHd != Nil_closure || +# endif + PendingSparksHd[REQUIRED_POOL] < PendingSparksTl[REQUIRED_POOL] || + PendingSparksHd[ADVISORY_POOL] < PendingSparksTl[ADVISORY_POOL]) { + /* ToDo: anything else for GRAN? WDP */ + context_switch = 1; + } +} + +# endif + +# if defined(sunos4_TARGET_OS) || defined(linuxaout_TARGET_OS) || defined(linux_TARGET_OS) + +int +install_vtalrm_handler() +{ + void (*old)(); + +# ifdef CONCURRENT + old = signal(SIGVTALRM, vtalrm_handler); +# else + if (cc_profiling > 1 || heap_profiling_req != HEAP_NO_PROFILING) + old = signal(SIGVTALRM, handle_tick_serial); + else + old = signal(SIGVTALRM, handle_tick_noserial); +# endif + return (int) old == -1; +} + +static int vtalrm_mask; + +void +blockVtAlrmSignal(STG_NO_ARGS) +{ + vtalrm_mask = sigblock(sigmask(SIGVTALRM)); +} + +void +unblockVtAlrmSignal(STG_NO_ARGS) +{ + (void) sigsetmask(vtalrm_mask); +} + +# else /* Not SunOS 4 */ + +int +install_vtalrm_handler(STG_NO_ARGS) +{ + struct sigaction action; + +# ifdef CONCURRENT + action.sa_handler = vtalrm_handler; +# else + if (cc_profiling > 1 || heap_profiling_req != HEAP_NO_PROFILING) + action.sa_handler = handle_tick_serial; + else + action.sa_handler = handle_tick_noserial; +# endif + + sigemptyset(&action.sa_mask); + action.sa_flags = 0; + + return sigaction(SIGVTALRM, &action, NULL); +} + +void +blockVtAlrmSignal(STG_NO_ARGS) +{ + sigset_t signals; + + sigemptyset(&signals); + sigaddset(&signals, SIGVTALRM); + + (void) sigprocmask(SIG_BLOCK, &signals, NULL); +} + +void +unblockVtAlrmSignal(STG_NO_ARGS) +{ + sigset_t signals; + + sigemptyset(&signals); + sigaddset(&signals, SIGVTALRM); + + (void) sigprocmask(SIG_UNBLOCK, &signals, NULL); +} + +# endif /* SunOS 4 */ + +#endif /* USE_COST_CENTRES || CONCURRENT (but not GRAN) */ + +\end{code} + +Signal handling support for user-specified signal handlers. Since we +need stable pointers to do this properly, we just refuse to try in the +parallel world. Sorry. + +\begin{code} + +#ifdef PAR + +void +blockUserSignals() +{ + return; +} + +void +unblockUserSignals() +{ + return; +} + +I_ +# ifdef _POSIX_SOURCE +sig_install(sig, spi, mask) + sigset_t *mask; +# else + sig_install(sig, spi) +# endif + I_ sig; + I_ spi; +{ + fflush(stdout); + fprintf(stderr,"No signal handling support in a parallel implementation.\n"); + EXIT(EXIT_FAILURE); +} + +#else /* !PAR */ + +# include <setjmp.h> + +extern StgPtr deRefStablePointer PROTO((StgStablePtr)); +extern void freeStablePointer PROTO((I_)); +extern jmp_buf restart_main; + +static I_ *handlers = NULL; /* Dynamically grown array of signal handlers */ +static I_ nHandlers = 0; /* Size of handlers array */ + +static void +more_handlers(sig) + I_ sig; +{ + I_ i; + + if (sig < nHandlers) + return; + + if (handlers == NULL) + handlers = (I_ *) malloc((sig + 1) * sizeof(I_)); + else + handlers = (I_ *) realloc(handlers, (sig + 1) * sizeof(I_)); + + if (handlers == NULL) { + fflush(stdout); + fprintf(stderr, "VM exhausted\n"); + EXIT(EXIT_FAILURE); + } + for(i = nHandlers; i <= sig; i++) + /* Fill in the new slots with default actions */ + handlers[i] = STG_SIG_DFL; + + nHandlers = sig + 1; +} + +# ifdef _POSIX_SOURCE + +static void +generic_handler(sig) +{ + sigset_t signals; + + SAVE_Hp = SAVE_HpLim; /* Just to be safe */ + if (initStacks(&StorageMgrInfo) != 0) { + fflush(stdout); + fprintf(stderr, "initStacks failed!\n"); + EXIT(EXIT_FAILURE); + } + TopClosure = deRefStablePointer(handlers[sig]); + sigemptyset(&signals); + sigaddset(&signals, sig); + sigprocmask(SIG_UNBLOCK, &signals, NULL); + longjmp(restart_main, sig); +} + +static sigset_t userSignals; +static sigset_t savedSignals; + +void +initUserSignals() +{ + sigemptyset(&userSignals); +} + +void +blockUserSignals() +{ + sigprocmask(SIG_SETMASK, &userSignals, &savedSignals); +} + +void +unblockUserSignals() +{ + sigprocmask(SIG_SETMASK, &savedSignals, NULL); +} + + +I_ nocldstop = 0; + +I_ +sig_install(sig, spi, mask) + I_ sig; + I_ spi; + sigset_t *mask; +{ + sigset_t signals; + struct sigaction action; + I_ previous_spi; + + /* Block the signal until we figure out what to do */ + /* Count on this to fail if the signal number is invalid */ + if(sig < 0 || sigemptyset(&signals) || sigaddset(&signals, sig) || + sigprocmask(SIG_BLOCK, &signals, NULL)) + return STG_SIG_ERR; + + more_handlers(sig); + + previous_spi = handlers[sig]; + + switch(spi) { + case STG_SIG_IGN: + handlers[sig] = STG_SIG_IGN; + sigdelset(&userSignals, sig); + action.sa_handler = SIG_IGN; + break; + + case STG_SIG_DFL: + handlers[sig] = STG_SIG_DFL; + sigdelset(&userSignals, sig); + action.sa_handler = SIG_DFL; + break; + default: + handlers[sig] = spi; + sigaddset(&userSignals, sig); + action.sa_handler = generic_handler; + break; + } + + if (mask != NULL) + action.sa_mask = *mask; + else + sigemptyset(&action.sa_mask); + + action.sa_flags = sig == SIGCHLD && nocldstop ? SA_NOCLDSTOP : 0; + if (sigaction(sig, &action, NULL) || sigprocmask(SIG_UNBLOCK, &signals, NULL)) { + if (previous_spi) + freeStablePointer(handlers[sig]); + return STG_SIG_ERR; + } + + return previous_spi; +} + +# else /* !POSIX */ + +static void +generic_handler(sig) +{ + SAVE_Hp = SAVE_HpLim; /* Just to be safe */ + if (initStacks(&StorageMgrInfo) != 0) { + fflush(stdout); + fprintf(stderr, "initStacks failed!\n"); + EXIT(EXIT_FAILURE); + } + TopClosure = deRefStablePointer(handlers[sig]); + sigsetmask(0); + longjmp(restart_main, sig); +} + +static int userSignals; +static int savedSignals; + +void +initUserSignals() +{ + userSignals = 0; +} + +void +blockUserSignals() +{ + savedSignals = sigsetmask(userSignals); +} + +void +unblockUserSignals() +{ + sigsetmask(savedSignals); +} + +I_ +sig_install(sig, spi) + I_ sig; + I_ spi; +{ + I_ previous_spi; + int mask; + void (*handler)(); + + /* Block the signal until we figure out what to do */ + /* Count on this to fail if the signal number is invalid */ + if(sig < 0 || (mask = sigmask(sig)) == 0) + return STG_SIG_ERR; + + mask = sigblock(mask); + + more_handlers(sig); + + previous_spi = handlers[sig]; + + switch(spi) { + case STG_SIG_IGN: + handlers[sig] = STG_SIG_IGN; + userSignals &= ~sigmask(sig); + handler = SIG_IGN; + break; + + case STG_SIG_DFL: + handlers[sig] = STG_SIG_DFL; + userSignals &= ~sigmask(sig); + handler = SIG_DFL; + break; + default: + handlers[sig] = spi; + userSignals |= sigmask(sig); + handler = generic_handler; + break; + } + + if (signal(sig, handler) < 0) { + if (previous_spi) + freeStablePointer(handlers[sig]); + sigsetmask(mask); + return STG_SIG_ERR; + } + + sigsetmask(mask); + return previous_spi; +} + +# endif /* POSIX */ + +#endif /* PAR */ + +\end{code} diff --git a/ghc/runtime/main/StgOverflow.lc b/ghc/runtime/main/StgOverflow.lc new file mode 100644 index 0000000000..720f243f58 --- /dev/null +++ b/ghc/runtime/main/StgOverflow.lc @@ -0,0 +1,450 @@ +\section[stk-overflow]{Stack overflow routine} + +%************************************************************************ +%* * +\subsection[arity-error]{Arity error has nothing to do with stack overflow} +%* * +%************************************************************************ + +\begin{code} + +#include "rtsdefs.h" + +extern void PrintRednCountInfo(STG_NO_ARGS); +extern I_ showRednCountStats; + +#ifdef __DO_ARITY_CHKS__ +I_ ExpectedArity; + +void +ArityError(n) + I_ n; +{ + fflush(stdout); + fprintf(stderr, "Arity error: called with %ld args, should have been %ld\n", + ExpectedArity, n); + +#if defined(DO_REDN_COUNTING) + if (showRednCountStats) { + PrintRednCountInfo(); + } +#endif + + EXIT(EXIT_FAILURE); +} + +#endif /* __DO_ARITY_CHECKS__ */ +\end{code} + +%************************************************************************ +%* * +\subsection[stk-oflow-seq]{Boring sequential stack overflow} +%* * +%************************************************************************ + +\begin{code} +#ifndef CONCURRENT + +void +StackOverflow(STG_NO_ARGS) +{ + fflush(stdout); + StackOverflowHook(SM_word_stk_size * sizeof(W_)); /*msg*/ + +#if defined(DO_REDN_COUNTING) + if (showRednCountStats) { + PrintRednCountInfo(); + } +#endif + + EXIT(EXIT_FAILURE); +} +#endif +\end{code} + +%************************************************************************ +%* * +\subsection[stk-squeeze]{Code for squeezing out update frames} +%* * +%************************************************************************ + +Code for squeezing out vacuous update frames. Updatees of squeezed frames +are turned into indirections to the common black hole (or blocking queue). + +\begin{code} + +I_ squeeze_upd_frames = 1; /* now ON by default */ + +I_ +SqueezeUpdateFrames(bottom, top, frame) +P_ bottom; +P_ top; +P_ frame; +{ + I_ displacement = 0; + P_ next_frame = NULL; /* Temporally next */ + P_ prev_frame; /* Temporally previous */ + + /* + * If we have no update frames, there is nothing to do. + */ + + if (frame <= bottom) + return 0; + + if ((prev_frame = GRAB_SuB(frame)) <= bottom) { +#if !defined(CONCURRENT) && defined(SM_DO_BH_UPDATE) + if (!noBlackHoles) + UPD_BH(GRAB_UPDATEE(frame), BH_UPD_info); +#endif + return 0; + } + + /* + * Walk down the stack, reversing the SuB pointers so that we can walk back up + * as we squeeze from the bottom. Note that next_frame and prev_frame refer to + * next and previous as they were added to the stack, rather than the way we see + * them in this walk. (It makes the next loop less confusing.) + */ + + while (prev_frame > bottom) { + PUSH_SuB(frame, next_frame); + next_frame = frame; + frame = prev_frame; + prev_frame = GRAB_SuB(frame); + } + + /* + * Now, we're at the bottom. Frame points to the lowest update frame on the + * stack, and its saved SuB actually points to the frame above. We have to walk + * back up the stack, squeezing out empty update frames and turning the pointers + * back around on the way back up. + */ + + /* + * The bottom-most frame has not been altered, and we never want to eliminate it + * anyway. Just black hole the updatee and walk one step up + * before starting to squeeze. When you get to the topmost frame, + * remember that there are still some words above it that might + * have to be moved. + */ + +#if !defined(CONCURRENT) && defined(SM_DO_BH_UPDATE) + if (!noBlackHoles) + UPD_BH(GRAB_UPDATEE(frame), BH_UPD_info); +#endif + prev_frame = frame; + frame = next_frame; + + /* + * Loop through all of the middle frames (everything except the very + * bottom and the very top). + */ + while ((next_frame = GRAB_SuB(frame)) != NULL) { + P_ sp; + P_ frame_bottom = frame + BREL(STD_UF_SIZE); + + /* Check to see if the current frame is empty (both A and B) */ + if (prev_frame == frame_bottom + BREL(displacement) && + GRAB_SuA(next_frame) == GRAB_SuA(frame)) { + + /* Now squeeze out the current frame */ + P_ updatee_keep = GRAB_UPDATEE(prev_frame); + P_ updatee_bypass = GRAB_UPDATEE(frame); + + /* + fprintf(stderr, "squeezing frame at %lx, ret %lx\n", frame, + GRAB_RET(frame)); + */ + +#ifdef CONCURRENT + /* Check for a blocking queue on the node that's going away */ + if (INFO_PTR(updatee_bypass) == (W_) BQ_info) { + /* Sigh. It has one. Don't lose those threads! */ + if (INFO_PTR(updatee_keep) == (W_) BQ_info) { + /* Urgh. Two queues. Merge them. */ + P_ tso = (P_) BQ_ENTRIES(updatee_keep); + + while (TSO_LINK(tso) != Nil_closure) + tso = TSO_LINK(tso); + + TSO_LINK(tso) = (P_) BQ_ENTRIES(updatee_bypass); + } else { + /* For simplicity, just swap the BQ for the BH */ + P_ temp = updatee_keep; + + updatee_keep = updatee_bypass; + updatee_bypass = temp; + + /* Record the swap in the kept frame (below) */ + PUSH_UPDATEE(prev_frame, updatee_keep); + } + } +#endif + + UPD_EXISTING(); /* ticky stuff (NB: nothing for spat-profiling) */ + UPD_IND(updatee_bypass, updatee_keep); + + sp = frame - BREL(1); /* Toss the current frame */ + displacement += STD_UF_SIZE; + + } else { +#if !defined(CONCURRENT) && defined(SM_DO_BH_UPDATE) + if (!noBlackHoles) + UPD_BH(GRAB_UPDATEE(frame), BH_UPD_info); +#endif + + /* No squeeze for this frame */ + sp = frame_bottom - BREL(1); /* Keep the current frame */ + + /* Fix the SuB in the current frame (should point to the frame below) */ + PUSH_SuB(frame, prev_frame); + } + + /* Now slide all words from sp up to the next frame */ + + if (displacement > 0) { + P_ next_frame_bottom = next_frame + BREL(STD_UF_SIZE); + + /* + fprintf(stderr, "sliding [%lx, %lx] by %d\n", sp, next_frame_bottom, + displacement); + */ + + while (sp <= next_frame_bottom) { + sp[BREL(displacement)] = *sp; + sp -= BREL(1); + } + } + prev_frame = frame + BREL(displacement); + frame = next_frame; + } + + /* + * Now handle the topmost frame. Patch SuB, black hole the updatee, + * and slide down. + */ + + PUSH_SuB(frame, prev_frame); + +#if !defined(CONCURRENT) && defined(SM_DO_BH_UPDATE) + if (!noBlackHoles) + UPD_BH(GRAB_UPDATEE(frame), BH_UPD_info); +#endif + + if (displacement > 0) { + P_ sp = frame + BREL(STD_UF_SIZE) - BREL(1); + + /* + fprintf(stderr, "sliding [%lx, %lx] by %d\n", sp, top, displacement); + */ + + while (sp <= top) { + sp[BREL(displacement)] = *sp; + sp -= BREL(1); + } + } + return displacement; +} + +\end{code} + +%************************************************************************ +%* * +\subsection[stk-ouflow-par]{Rather exciting parallel stack overflow and underflow} +%* * +%************************************************************************ + +\begin{code} +#ifdef CONCURRENT +\end{code} + +StackOverflow: called inside a nice ``callwrapper'' when stack +overflow occurs. The state is already saved in the TSO, and the stack +is in a tidy saved state. + +\begin{code} +EXTDATA_RO(StkO_info); /* boring extern decl */ +EXTFUN(EnterNodeCode); /* For reentering node after potential GC */ + +#ifdef PAR +EXTDATA_RO(FetchMe_info); +#endif + +I_ +StackOverflow(args1, args2) +W_ args1; +W_ args2; +{ + I_ i; + P_ old_stko, new_stko; + W_ headroom = STACK_OVERFLOW_HEADROOM(args1, args2); + I_ cts_size; + +#ifdef PAR + W_ is_prim_return = STACK_OVERFLOW_PRIM_RETURN(args1, args2); +#endif + W_ reenter = STACK_OVERFLOW_REENTER(args1, args2); + W_ words_of_a = STACK_OVERFLOW_AWORDS(args1, args2); + W_ words_of_b = STACK_OVERFLOW_BWORDS(args1, args2); + W_ liveness = STACK_OVERFLOW_LIVENESS(args1, args2); + I_ really_reenter_node = 0; + + SET_TASK_ACTIVITY(ST_OVERHEAD); + + + /* + * fprintf(stderr,"StackOverflow:liveness=%lx,a=%lx,b=%lx\n", + * liveness,words_of_a,words_of_b); + */ + + old_stko = SAVE_StkO; + + /* + * fprintf(stderr, "SpA %lx SuA %lx SpB %lx SuB %lx\n", STKO_SpA(old_stko), + * STKO_SuA(old_stko), STKO_SpB(old_stko), STKO_SuB(old_stko)); + */ + + if (squeeze_upd_frames) { + i = SqueezeUpdateFrames(STKO_BSTK_BOT(old_stko), STKO_SpB(old_stko), + STKO_SuB(old_stko)); + STKO_SuB(old_stko) += BREL(i); + STKO_SpB(old_stko) += BREL(i); + if ((P_) STKO_SpA(old_stko) - AREL(headroom) > STKO_SpB(old_stko)) { + + /* + * fprintf(stderr, "SpA %lx SpB %lx headroom %d\n", STKO_SpA(old_stko), + * STKO_SpB(old_stko), headroom); + */ + + /* We saved enough space to continue on the old StkO */ + return 0; + } + } + SAVE_Liveness = liveness; + + /* Double the stack chunk size each time we grow the stack */ + cts_size = STKO_CLOSURE_CTS_SIZE(old_stko) * 2; + + if (SAVE_Hp + STKO_HS + cts_size > SAVE_HpLim) { + if (reenter) { + /* + * Even in the uniprocessor world, we may have to reenter node in case + * node is a selector shorted out by GC. + */ + assert(liveness & LIVENESS_R1); + TSO_PC2(CurrentTSO) = EnterNodeCode; + really_reenter_node = 1; + } + ReallyPerformThreadGC(STKO_HS + cts_size, rtsFalse); + old_stko = SAVE_StkO; + } + ALLOC_STK(STKO_HS, cts_size, 0); + new_stko = SAVE_Hp + 1; + SAVE_Hp += STKO_HS + cts_size; + SET_STKO_HDR(new_stko, StkO_info, CCC); + + /* Initialize the StkO, as in NewThread */ + STKO_SIZE(new_stko) = cts_size + STKO_VHS; + STKO_SpB(new_stko) = STKO_SuB(new_stko) = STKO_BSTK_BOT(new_stko) + BREL(1); + STKO_SpA(new_stko) = STKO_SuA(new_stko) = STKO_ASTK_BOT(new_stko) + AREL(1); + STKO_LINK(new_stko) = old_stko; + + STKO_RETURN(new_stko) = SAVE_Ret; + +#ifdef PAR + + /* + * When we fall off of the top stack segment, we will either be + * returning an algebraic data type, in which case R2 holds a + * valid info ptr, or we will be returning a primitive + * (e.g. int#), in which case R2 is garbage. If we need to perform + * GC to pull in the lower stack segment (this should only happen + * because of task migration), then we need to know the register + * liveness for the algebraic returns. We get the liveness out of + * the info table. Now, we could set up the primitive returns + * with a bogus infoptr, which has a NO_LIVENESS field in the info + * table, but that would involve a lot more overhead than the + * current approach. At present, we set up RetReg to point to + * *either* a polymorphic algebraic return point, or a primitive + * return point. + */ + + SAVE_Ret = is_prim_return ? (P_) PrimUnderflow : (P_) vtbl_Underflow; +#else + SAVE_Ret = (P_) vtbl_Underflow; +#endif + + STKO_SpA(old_stko) += AREL(words_of_a); + STKO_SpB(old_stko) += BREL(words_of_b); + +#ifdef DO_REDN_COUNTING + /* Record the stack depths in chunks below the new stack object */ + + STKO_ADEP(new_stko) = STKO_ADEP(old_stko) + + AREL((I_) STKO_ASTK_BOT(old_stko) - (I_) STKO_SpA(old_stko)); + STKO_BDEP(new_stko) = STKO_BDEP(old_stko) + + BREL((I_) STKO_BSTK_BOT(old_stko) - (I_) STKO_SpB(old_stko)); +#endif + + if (STKO_SpB(old_stko) < STKO_BSTK_BOT(old_stko)) { + + /* + * This _should_ only happen if PAP_entry fails a stack check and there is + * no update frame on the current stack. We can deal with this by storing a + * function's argument requirements in its info table, peering into the PAP + * (it had better be in R1) for the function pointer and taking only the + * necessary number of arguments, but this would be hard, so we haven't done + * it. + */ + fflush(stdout); + fprintf(stderr, "StackOverflow too deep. Probably a PAP with no update frame.\n"); + abort(); /* an 'abort' may be overkill WDP 95/04 */ + } + /* Move A stack words from old StkO to new StkO */ + for (i = 1; i <= words_of_a; i++) { + STKO_SpA(new_stko)[-AREL(i)] = STKO_SpA(old_stko)[-AREL(i)]; + } + STKO_SpA(new_stko) -= AREL(words_of_a); + + /* Move B stack words from old StkO to new StkO */ + for (i = 1; i <= words_of_b; i++) { + STKO_SpB(new_stko)[-BREL(i)] = STKO_SpB(old_stko)[-BREL(i)]; + } + STKO_SpB(new_stko) -= BREL(words_of_b); + + /* Now, handle movement of a single update frame */ + /* ToDo: Make this more efficient. (JSM) */ + if (STKO_SpB(old_stko) < STKO_SuB(old_stko)) { + /* Yikes! PAP_entry stole an update frame. Fix the world! */ + P_ frame = STKO_SuB(new_stko) - BREL(STD_UF_SIZE); + + /* + * fprintf(stderr, "Stolen update frame: (old %lx, new %lx) SuA %lx, SuB + * %lx, return %lx\n", old_stko, new_stko, GRAB_SuA(frame), GRAB_SuB(frame), + * GRAB_RET(frame)); + */ + + STKO_SuA(old_stko) = GRAB_SuA(frame); + STKO_SuB(old_stko) = GRAB_SuB(frame); + + SAVE_Ret = STKO_RETURN(new_stko); + STKO_RETURN(new_stko) = GRAB_RET(frame); + + PUSH_SuA(frame, STKO_SuA(new_stko)); + PUSH_SuB(frame, STKO_SuB(new_stko)); + PUSH_RET(frame, vtbl_Underflow); + + STKO_SuB(new_stko) = frame; + } + SAVE_StkO = new_stko; + return really_reenter_node; +} +\end{code} + +Underflow things are all done in the threaded world. The code is in +main/StgThreads.lhc. + +\begin{code} +#endif /* parallel */ +\end{code} diff --git a/ghc/runtime/main/StgStartup.lhc b/ghc/runtime/main/StgStartup.lhc new file mode 100644 index 0000000000..9728711982 --- /dev/null +++ b/ghc/runtime/main/StgStartup.lhc @@ -0,0 +1,662 @@ +%/**************************************************************** +%* * +%* Basic Continuations required by the STG Machine runtime * +%* * +%****************************************************************/ + + +First continuation called by the mini-interpreter is +evaluateTopClosure. It has to set up return and jump to the user's +@main@ closure. If @errorIO@ is called, we will be back here, doing +the same thing for the specified continuation. + +\begin{code} +#define MAIN_REG_MAP /* STG world */ +#include "rtsdefs.h" + +#if 0 +#ifdef PAR +#include "Statistics.h" +#endif +#endif + +/* ptr to the user's "main" closure (or "errorIO" arg closure), + to which we hope to be linked +*/ +extern P_ TopClosure; + +EXTFUN(stopThreadDirectReturn); +UNVECTBL(,vtbl_stopStgWorld,stopThreadDirectReturn) + +/* Well, we have to put the ArrayOfData and ArrayOfPtrs info tables + somewhere... +*/ + +/* Array of data -- mutable */ +STATICFUN(ArrayOfData_entry) +{ + FB_ + /* Don't wrap the calls; we're done with STG land */ + fflush(stdout); + fprintf(stderr, "Entered a primitive array (of data)---this shouldn't happen!\n"); + abort(); + FE_ +} + +DATA_ITBL(ArrayOfData_info,ArrayOfData_entry,UpdErr,0,INFO_OTHER_TAG,,,const,IF_,ARR_K,"DATA-ARRAY","ARRAY"); +/* ToDo: could put a useful tag in there!!! */ + +/* Array of pointers -- mutable */ +STATICFUN(ArrayOfPtrs_entry) +{ + FB_ + /* Don't wrap the calls; we're done with STG land */ + fflush(stdout); + fprintf(stderr, "Entered a primitive array (of pointers)---this shouldn't happen!\n"); + abort(); + FE_ +} + +MUTUPLE_ITBL(ArrayOfPtrs_info,ArrayOfPtrs_entry,UpdErr,0,INFO_OTHER_TAG,,,const,IF_,ARR_K,"PTR-ARRAY(mut)","ARRAY"); +/* ToDo: could put a useful tag in there!!! */ + +STATICFUN(FullSVar_entry) +{ + FB_ + /* Don't wrap the calls; we're done with STG land */ + fflush(stdout); + fprintf(stderr, "Entered a full SVar---this shouldn't happen!\n"); + abort(); + FE_ +} + +MUTUPLE_ITBL(FullSVar_info,FullSVar_entry,UpdErr,0,INFO_OTHER_TAG,,,const,IF_,ARR_K,"FullSVar","ARRAY"); +/* ToDo: could put a useful tag in there!!! */ + +STATICFUN(EmptySVar_entry) +{ + FB_ + /* Don't wrap the calls; we're done with STG land */ + fflush(stdout); + fprintf(stderr, "Entered an empty SVar---this shouldn't happen!\n"); + abort(); + FE_ +} + +MUTUPLE_ITBL(EmptySVar_info,EmptySVar_entry,UpdErr,0,INFO_OTHER_TAG,,,const,IF_,ARR_K,"EmptySVar","ARRAY"); +/* ToDo: could put a useful tag in there!!! */ + +/* Array of pointers -- immutable */ +STATICFUN(ImMutArrayOfPtrs_entry) +{ + FB_ + /* Don't wrap the calls; we're done with STG land */ + fflush(stdout); + fprintf(stderr, "Entered a primitive array (immutable, pointers)---this shouldn't happen!\n"); + abort(); + FE_ +} + +IMMUTUPLE_ITBL(ImMutArrayOfPtrs_info,ImMutArrayOfPtrs_entry,UpdErr,0,INFO_OTHER_TAG,,,const,IF_,ARR_K,"PTR-ARRAY(immut)","ARRAY"); +/* ToDo: could put a useful tag in there!!! */ + +/* (end of Array whatnot) */ + +/* Question for Will: There seem to be a lot of these static things +now - worth putting them in a file by themselves?? [ADR] */ + + +#ifndef PAR + +/* Ditto for Malloc Pointer entry point and info tables. [ADR] + + BTW Will, I copied most of this blindly from above - what's with + this TAG stuff? And what kind of description/ type is wanted here? +*/ + +STATICFUN(MallocPtr_entry) +{ + FB_ + /* Don't wrap the calls; we're done with STG land */ + fflush(stdout); + fprintf(stderr, "Compiler bug: Entered a Malloc Pointer---this shouldn't happen!\n"); + abort(); + FE_ +} + +MallocPtr_ITBL(MallocPtr_info,MallocPtr_entry,UpdErr,0,INFO_OTHER_TAG,,,const,EF_,MallocPtr_K,"MALLOC PTR","MallocPtr"); + +/* End of MallocPtr stuff */ + +/* Ditto for the unused Stable Pointer info table. [ADR] +*/ + +extern void raiseError PROTO((StgStablePtr)); +extern StgStablePtr errorHandler; + +/* Unused Stable Pointer (ie unused slot in a stable pointer table) */ +STATICFUN(UnusedSP_entry) +{ + FB_ + (void) SAFESTGCALL1(I_,(void *, FILE *),fflush,stdout); + (void) SAFESTGCALL2(I_,(void *, FILE *, char *),fprintf,stderr, "Entered an unused Stable Pointer---this shouldn't happen!\n(This could be program error (using stable pointer after freeing) or compiler bug.)\n"); + + (void) STGCALL1(void,(void *, StgStablePtr), raiseError, errorHandler); + FE_ +} + +STATIC_ITBL(UnusedSP_static_info,UnusedSP_entry,UpdErr,0,INFO_OTHER_TAG,0,0,const,IF_,CON_K,"UNUSED STABLE PTR","USP"); + +SET_STATIC_HDR(UnusedSP_closure,UnusedSP_static_info,CC_SUBSUMED,,ED_RO_) +}; + +/* Entry point and Info table for Stable Pointer Table. */ + +STATICFUN(StablePointerTable_entry) +{ + FB_ + /* Don't wrap the calls; we're done with STG land */ + fflush(stdout); + fprintf(stderr, "Entered the stable pointer table---this shouldn't happen!\n"); + abort(); + FE_ +} + +STATIC_ITBL(EmptyStablePointerTable_static_info,StablePointerTable_entry,UpdErr,0,INFO_OTHER_TAG,0,0,const,IF_,SPT_K,"SP_TABLE","SP_TABLE"); +/* ToDo: could put a useful tag in there!!! */ + +DYN_ITBL(StablePointerTable_info,StablePointerTable_entry,UpdErr,0,INFO_OTHER_TAG,0,0,const,IF_,SPT_K,"SP_TABLE","SP_TABLE"); +/* ToDo: could put a useful tag in there!!! */ + + +/* To ease initialisation of the heap, we start with an empty stable + pointer table. When we try to create the first stable pointer, the + overflow will trigger creation of a table of useful size. +*/ + +SET_STATIC_HDR(EmptySPTable_closure,EmptyStablePointerTable_static_info,CC_SUBSUMED,,ED_RO_) +, (W_) DYN_VHS + 0 + 1 + 0 /* size = DYN_VHS + n + 1 + n with n = 0 */ +, (W_) 0 /* number of ptrs */ +, (W_) 0 /* top of stack */ +}; + +/* End of SP stuff */ +#endif /* !PAR */ + + +/* the IoWorld token to start the whole thing off */ +/* Question: this is just an amusing hex code isn't it + -- or does it mean something? ADR */ +P_ realWorldZh_closure = (P_) 0xbadbadbaL; + +SET_STATIC_HDR(WorldStateToken_closure,SZh_static_info,CC_SUBSUMED/*harmless*/,,ED_RO_) +, (W_) 0xbadbadbaL +}; + +#ifndef CONCURRENT + +STGFUN(startStgWorld) +{ + FB_ + /* At this point we are in the threaded-code world. + + TopClosure points to a closure of type PrimIO (), which should be + performed (by applying it to the state of the world). + + The smInfo storage-management info block is assumed to be + up to date, and is used to load the STG registers. + */ + +#if defined (DO_SPAT_PROFILING) + SET_ACTIVITY(ACT_REDN); /* init: do this first, so we count the restore insns */ +#endif + + RestoreAllStgRegs(); /* inline! */ + + /* ------- STG registers are now valid! -------------------------*/ + + /* Put a suitable return address on the B stack */ + RetReg = (StgRetAddr) UNVEC(stopThreadDirectReturn,vtbl_stopStgWorld); + + /* Put an IoWorld token on the A stack */ + SpA -= AREL(1); + *SpA = (P_) WorldStateToken_closure; + + Node = (P_) TopClosure; /* Point to the closure for main/errorIO-arg */ + ENT_VIA_NODE(); + InfoPtr=(D_)(INFO_PTR(Node)); + JMP_(ENTRY_CODE(InfoPtr)); + FE_ +} +#endif /* ! CONCURRENT */ + +\end{code} + +%************************************************************************ +%* * +\subsection[thread-return]{Polymorphic end-of-thread code} +%* * +%************************************************************************ + +\begin{code} + +/* + Here's the polymorphic return for the end of a thread. + + NB: For direct returns to work properly, the name of the routine must be + the same as the name of the vector table with vtbl_ removed and DirectReturn + appended. This is all the mangler understands. +*/ + +const W_ +vtbl_stopThread[] = { + /* at least "MAX_VECTORED_RTN" elements (see GhcConstants.lh) */ + (W_) stopThreadDirectReturn, + (W_) stopThreadDirectReturn, + (W_) stopThreadDirectReturn, + (W_) stopThreadDirectReturn, + (W_) stopThreadDirectReturn, + (W_) stopThreadDirectReturn, + (W_) stopThreadDirectReturn, + (W_) stopThreadDirectReturn +}; + +STGFUN(stopThreadDirectReturn) +{ + FB_ + /* The final exit. + + The top-top-level closures (e.g., "main") are of type "IO ()". + When entered, they perform an IO action and return a () -- + essentially, TagReg is set to 1. Here, we don't need to do + anything with that. + + We just tidy up the register stuff (real regs in *_SAVE, then + *_SAVE -> smInfo locs). + */ + +#ifdef CONCURRENT + SET_TASK_ACTIVITY(ST_OVERHEAD); +#endif + + SaveAllStgRegs(); /* inline! */ + +#ifdef CONCURRENT + EndThread(); +#else + RESUME_(miniInterpretEnd); +#endif + FE_ +} + +\end{code} + +\begin{code} +I_ ErrorIO_call_count = 0; + +#ifdef CONCURRENT +EXTFUN(EnterNodeCode); + +STGFUN(ErrorIO_innards) + /* Assumes that "TopClosure" has been set already */ +{ + FB_ + if (ErrorIO_call_count >= 16 /* MAGIC CONSTANT */ ) { + /* Don't wrap the calls; we're done with STG land */ + fflush(stdout); + fprintf(stderr, "too many nested calls to `error'\n"); + EXIT(EXIT_FAILURE); + } + ErrorIO_call_count++; /* NB: undo later if decide to let someone else handle it */ + + /* Unlock all global closures held by this thread! (ToDo) --JSM */ + + switch(TSO_TYPE(CurrentTSO)) { + case T_MAIN: + /* Re-initialize stack pointers (cf. NewThread) */ +#ifdef PAR + SpB = SuB = STKO_BSTK_BOT(StkOReg) + BREL(1); + SuA = STKO_ASTK_BOT(StkOReg) + AREL(1); +#else + SuA = stackInfo.botA + AREL(1); + SuB = stackInfo.botB + BREL(1); +#endif + break; + + case T_REQUIRED: + /* Re-initialize stack pointers (cf. NewThread) */ + SpB = SuB = STKO_BSTK_BOT(StkOReg) + BREL(1); + SuA = STKO_ASTK_BOT(StkOReg) + AREL(1); + break; + + case T_ADVISORY: + ErrorIO_call_count--; /* undo the damage, as someone else will deal with it */ + /* Let the main thread eventually handle it */ + JMP_(stopThreadDirectReturn); + + case T_FAIL: + EXIT(EXIT_FAILURE); + + default: + /* Don't wrap the calls; we're done with STG land */ + fflush(stdout); + fprintf(stderr,"ErrorIO: %x unknown\n", TSO_TYPE(CurrentTSO)); + EXIT(EXIT_FAILURE); + } + + /* Finish stack setup as if for a top-level task and enter the error node */ + + SpA = SuA - AREL(1); + + *SpA = (P_) WorldStateToken_closure; + + STKO_LINK(StkOReg) = Nil_closure; + STKO_RETURN(StkOReg) = NULL; + +#ifdef DO_REDN_COUNTING + STKO_ADEP(StkOReg) = STKO_BDEP(StkOReg) = 0; +#endif + + /* Go! */ + Node = (P_) TopClosure; + RetReg = (StgRetAddr) UNVEC(stopThreadDirectReturn,vtbl_stopStgWorld); + JMP_(EnterNodeCode); + + FE_ +} +\end{code} + +We cannot afford to call @error@ too many times +(e.g., \tr{error x where x = error x}), so we keep count. + +\begin{code} +#else /* !CONCURRENT */ + +StgFunPtr +ErrorIO_innards(STG_NO_ARGS) + /* Assumes that "TopClosure" has been set already */ +{ + FB_ + if (ErrorIO_call_count >= 16 /* MAGIC CONSTANT */ ) { + /* Don't wrap the calls; we're done with STG land */ + fflush(stdout); + fprintf(stderr, "too many nested calls to `error'\n"); + EXIT(EXIT_FAILURE); + } + ErrorIO_call_count++; + + /* Copy the heap-related registers into smInfo. (Other registers get + saved in this process, but we aren't interested in them.) + + Get a new stack (which re-initialises the smInfo stack stuff), + and start the world again. + */ + /* ToDo: chk this has been handled in parallel world */ + + SaveAllStgRegs(); /* inline! */ + + if ( initStacks( &StorageMgrInfo ) != 0) { + /* Don't wrap the calls; we're done with STG land */ + fflush(stdout); + fprintf(stderr, "initStacks failed!\n"); + EXIT(EXIT_FAILURE); + } + + JMP_( startStgWorld ); + FE_ +} + +#endif /* !CONCURRENT */ +\end{code} + +\begin{code} +#ifdef PAR + +STATICFUN(RBH_Save_0_entry) +{ + FB_ + fprintf(stderr,"Oops, entered an RBH save\n"); + EXIT(EXIT_FAILURE); + FE_ +} + +STATICFUN(RBH_Save_1_entry) +{ + FB_ + fprintf(stderr,"Oops, entered an RBH save\n"); + EXIT(EXIT_FAILURE); + FE_ +} + +STATICFUN(RBH_Save_2_entry) +{ + FB_ + fprintf(stderr,"Oops, entered an RBH save\n"); + EXIT(EXIT_FAILURE); + FE_ +} + +SPEC_N_ITBL(RBH_Save_0_info,RBH_Save_0_entry,UpdErr,0,INFO_OTHER_TAG,2,0,,IF_,INTERNAL_KIND,"RBH-SAVE","RBH_Save_0"); +SPEC_N_ITBL(RBH_Save_1_info,RBH_Save_1_entry,UpdErr,0,INFO_OTHER_TAG,2,1,,IF_,INTERNAL_KIND,"RBH-SAVE","RBH_Save_1"); +SPEC_N_ITBL(RBH_Save_2_info,RBH_Save_2_entry,UpdErr,0,INFO_OTHER_TAG,2,2,,IF_,INTERNAL_KIND,"RBH-SAVE","RBH_Save_2"); + +#endif /* PAR */ +\end{code} + + +%/**************************************************************** +%* * +%* Other Bits and Pieces * +%* * +%****************************************************************/ + +\begin{code} +/* If we don't need the slow entry code for a closure, we put in a + pointer to this in the closure's slow entry code pointer instead. + */ + +STGFUN(__std_entry_error__) { + FB_ + /* Don't wrap the calls; we're done with STG land */ + fflush(stdout); + fprintf(stderr, "Called non-existent slow-entry code!!!\n"); + abort(); + JMP_(0); + FE_ +} + +/* entry code */ +STGFUN(STK_STUB_entry) { + FB_ + /* Don't wrap the calls; we're done with STG land */ + fflush(stdout); + fprintf(stderr, "Entered from a stubbed stack slot!\n"); + abort(); + JMP_(0); + FE_ +} + +/* info table */ +STATIC_ITBL(STK_STUB_static_info,STK_STUB_entry,UpdErr,0,INFO_OTHER_TAG,0,0,const,EF_,INTERNAL_KIND,"STK_STUB","STK_STUB"); + +/* closure */ +SET_STATIC_HDR(STK_STUB_closure,STK_STUB_static_info,CC_SUBSUMED,,EXTDATA_RO) + , (W_)0, (W_)0 +}; +\end{code} + +\begin{code} +#ifdef GRAN + +STGFUN(Event_Queue_entry) { + FB_ + /* Don't wrap the calls; we're done with STG land */ + fflush(stdout); + fprintf(stderr, "Entered from an event queue!\n"); + abort(); + JMP_(0); + FE_ +} + +GEN_N_ITBL(Event_Queue_info,Event_Queue_entry,UpdErr,0,INFO_OTHER_TAG,5,2,const,EF_,INTERNAL_KIND,"EventQ","EventQ"); + +#endif /* GRAN */ +\end{code} + + + +%/**************************************************************** +%* * +%* Some GC info tables * +%* * +%****************************************************************/ + +These have to be in a .lhc file, so they will be reversed correctly. + +\begin{code} +#include "../storage/SMinternal.h" + +#if defined(_INFO_COPYING) + +STGFUN(Caf_Evac_Upd_entry) { + FB_ + /* Don't wrap the calls; we're done with STG land */ + fflush(stdout); + fprintf(stderr,"Entered Caf_Evac_Upd %lx: Should never occur!\n", (W_) Node); + abort(); + FE_ +} + +CAF_EVAC_UPD_ITBL(Caf_Evac_Upd_info,Caf_Evac_Upd_entry,const/*not static*/); + +#if defined(GCgn) + +STGFUN(Forward_Ref_New_entry) { + FB_ + /* Don't wrap the calls; we're done with STG land */ + fflush(stdout); + fprintf(stderr,"Entered Forward_Ref_New %lx: Should never occur!\n", (W_) Node); + abort(); + FE_ +} +FORWARDREF_ITBL(Forward_Ref_New_info,Forward_Ref_New_entry,const/*not static*/,_Evacuate_Old_Forward_Ref); + +STGFUN(Forward_Ref_Old_entry) { + FB_ + /* Don't wrap the calls; we're done with STG land */ + fflush(stdout); + fprintf(stderr,"Entered Forward_Ref_Old %lx: Should never occur!\n", (W_) Node); + abort(); + FE_ +} +FORWARDREF_ITBL(Forward_Ref_Old_info,Forward_Ref_Old_entry,const/*not static*/,_Evacuate_New_Forward_Ref); + +STGFUN(OldRoot_Forward_Ref_entry) { + FB_ + /* Don't wrap the calls; we're done with STG land */ + fflush(stdout); + fprintf(stderr,"Entered OldRoot_Forward_Ref %lx: Should never occur!\n", (W_) Node); + abort(); + FE_ +} +FORWARDREF_ITBL(OldRoot_Forward_Ref_info,OldRoot_Forward_Ref_entry,const/*not static*/,_Evacuate_OldRoot_Forward); +#else /* ! GCgn */ + +STGFUN(Forward_Ref_entry) { + FB_ + /* Don't wrap the calls; we're done with STG land */ + fflush(stdout); + fprintf(stderr,"Entered Forward_Ref %lx: Should never occur!\n", (W_) Node); + abort(); + FE_ +} +FORWARDREF_ITBL(Forward_Ref_info,Forward_Ref_entry,const/*not static*/,_Evacuate_Forward_Ref); +#endif /* ! GCgn */ + +#endif /* _INFO_COPYING */ + +#if defined(GCgn) +OLDROOT_ITBL(OldRoot_info,Ind_Entry,const,EF_); +#endif /* GCgn */ +\end{code} + + +%/*************************************************************** +%* * +%* Cost Centre stuff ... * +%* * +%****************************************************************/ + +For cost centres we need prelude cost centres and register routine. + +N.B. ALL prelude cost centres should be declared here as none will + be declared when the prelude is compiled. + +ToDo: Explicit cost centres in prelude for Input and Output costs. + +\begin{code} +#if defined(USE_COST_CENTRES) + +STGFUN(startCcRegisteringWorld) +{ + FB_ + /* + * We used to push miniInterpretEnd on the register stack, but + * miniInterpretEnd must only be entered with the RESUME_ macro, + * whereas the other addresses on the register stack must only be + * entered with the JMP_ macro. Now, we push NULL and test for + * it explicitly at each pop. + */ + PUSH_REGISTER_STACK(NULL); + JMP_(_regMain); + FE_ +} + +CC_DECLARE(CC_CAFs, "CAFs_in_...", "PRELUDE", "PRELUDE", CC_IS_CAF,/*not static*/); +CC_DECLARE(CC_DICTs, "DICTs_in_...", "PRELUDE", "PRELUDE", CC_IS_DICT,/*not static*/); + +START_REGISTER_PRELUDE(_regPrelude); +REGISTER_CC(CC_CAFs); +REGISTER_CC(CC_DICTs); +END_REGISTER_CCS() +\end{code} + +We also need cost centre declarations and registering routines for other +built-in prelude-like modules. + +ToDo: What built-in prelude-like modules exist ? + +\begin{code} +START_REGISTER_PRELUDE(_regByteOps); /* used in Glasgow tests only? */ +END_REGISTER_CCS() + +/* _regPrelude is above */ + +START_REGISTER_PRELUDE(_regPreludeArray); +END_REGISTER_CCS() + +START_REGISTER_PRELUDE(_regPreludeCore); +END_REGISTER_CCS() + +START_REGISTER_PRELUDE(_regPreludeDialogueIO); +END_REGISTER_CCS() + +START_REGISTER_PRELUDE(_regPreludeGlaMisc); +END_REGISTER_CCS() + +START_REGISTER_PRELUDE(_regPreludeGlaST); +END_REGISTER_CCS() + +START_REGISTER_PRELUDE(_regPreludeIOError); +END_REGISTER_CCS() + +START_REGISTER_PRELUDE(_regPreludePS); +END_REGISTER_CCS() + +START_REGISTER_PRELUDE(_regPreludePrimIO); +END_REGISTER_CCS() + +START_REGISTER_PRELUDE(_regPreludeStdIO); +END_REGISTER_CCS() +#endif +\end{code} diff --git a/ghc/runtime/main/StgThreads.lhc b/ghc/runtime/main/StgThreads.lhc new file mode 100644 index 0000000000..b3f9f28d0c --- /dev/null +++ b/ghc/runtime/main/StgThreads.lhc @@ -0,0 +1,496 @@ +% +% (c) The AQUA Project, Glasgow University, 1994 +% +%************************************************************************ +%* * +\section[StgThreads.lhc]{Threaded Threads Support} +%* * +%************************************************************************ + +Some of the threads support is done in threaded code. How's that for ambiguous +overloading? + +\begin{code} + +#ifdef CONCURRENT + +#define MAIN_REG_MAP /* STG world */ +#include "rtsdefs.h" + +#if 0 +#ifdef PAR +#include "Statistics.h" +#endif +#endif + +\end{code} + +%************************************************************************ +%* * +\subsection[thread-objects]{Special objects for thread support} +%* * +%************************************************************************ + +TSO's are Thread State Objects, where the thread context is stored when the +thread is sleeping, and where we have slots for STG registers that don't +live in real machine registers. + +\begin{code} + +TSO_ITBL(); + +STGFUN(TSO_entry) +{ + FB_ + fflush(stdout); + fprintf(stderr, "TSO Entry: panic"); + abort(); + FE_ +} + +\end{code} + +Stack objects are chunks of stack words allocated out of the heap and +linked together in a chain. + +\begin{code} + +STKO_ITBL(); + +STGFUN(StkO_entry) +{ + FB_ + fflush(stdout); + fprintf(stderr, "StkO Entry: panic"); + abort(); + FE_ + +} + +#ifndef PAR + +STKO_STATIC_ITBL(); + +STGFUN(StkO_static_entry) +{ + FB_ + fflush(stdout); + fprintf(stderr, "StkO_static Entry: panic"); + abort(); + FE_ + +} + +#endif + +\end{code} + +Blocking queues are essentially black holes with threads attached. These +are the threads to be awakened when the closure is updated. + +\begin{code} + +EXTFUN(EnterNodeCode); + +STGFUN(BQ_entry) +{ + FB_ + +#if defined(GRAN) + STGCALL0(void,(),GranSimBlock); /* Before overwriting TSO_LINK */ +#endif + + TSO_LINK(CurrentTSO) = (P_) BQ_ENTRIES(Node); + BQ_ENTRIES(Node) = (W_) CurrentTSO; + + LivenessReg = LIVENESS_R1; + SaveAllStgRegs(); + TSO_PC1(CurrentTSO) = EnterNodeCode; + + if (DO_QP_PROF) { + QP_Event1("GR", CurrentTSO); + } +#ifdef PAR + if(do_gr_profile) { + /* Note that CURRENT_TIME may perform an unsafe call */ + TIME now = CURRENT_TIME; + TSO_EXECTIME(CurrentTSO) += now - TSO_BLOCKEDAT(CurrentTSO); + TSO_BLOCKCOUNT(CurrentTSO)++; + TSO_QUEUE(CurrentTSO) = Q_BLOCKED; + TSO_BLOCKEDAT(CurrentTSO) = now; + DumpGranEvent(GR_BLOCK, CurrentTSO); + } +#endif +#if defined(GRAN) + ReSchedule(NEW_THREAD); +#else + ReSchedule(0); +#endif + FE_ +} + +BQ_ITBL(); + +\end{code} + +Revertible black holes are needed in the parallel world, to handle +negative acknowledgements of messages containing updatable closures. +The idea is that when the original message is transmitted, the closure +is turned into a revertible black hole...an object which acts like a +black hole when local threads try to enter it, but which can be +reverted back to the original closure if necessary. + +It's actually a lot like a blocking queue (BQ) entry, because +revertible black holes are initially set up with an empty blocking +queue. + +The combination of GrAnSim with revertible black holes has not been +checked, yet. -- HWL + +\begin{code} + +#ifdef PAR + +STGFUN(RBH_entry) +{ + FB_ + +#if defined(GRAN) + STGCALL0(void, (), GranSimBlock); /* Before overwriting TSO_LINK */ +#endif + + switch (INFO_TYPE(InfoPtr)) { + case INFO_SPEC_RBH_TYPE: + TSO_LINK(CurrentTSO) = (P_) SPEC_RBH_BQ(Node); + SPEC_RBH_BQ(Node) = (W_) CurrentTSO; + break; + case INFO_GEN_RBH_TYPE: + TSO_LINK(CurrentTSO) = (P_) GEN_RBH_BQ(Node); + GEN_RBH_BQ(Node) = (W_) CurrentTSO; + break; + default: + fflush(stdout); + fprintf(stderr, "Panic: non-{SPEC,GEN} RBH %#lx (IP %#lx)\n", Node, InfoPtr); + EXIT(EXIT_FAILURE); + } + + LivenessReg = LIVENESS_R1; + SaveAllStgRegs(); + TSO_PC1(CurrentTSO) = EnterNodeCode; + + if (DO_QP_PROF) { + QP_Event1("GR", CurrentTSO); + } + + if(do_gr_profile) { + /* Note that CURRENT_TIME may perform an unsafe call */ + TIME now = CURRENT_TIME; + TSO_EXECTIME(CurrentTSO) += now - TSO_BLOCKEDAT(CurrentTSO); + TSO_BLOCKCOUNT(CurrentTSO)++; + TSO_QUEUE(CurrentTSO) = Q_BLOCKED; + TSO_BLOCKEDAT(CurrentTSO) = now; + DumpGranEvent(GR_BLOCK, CurrentTSO); + } + +#if defined(GRAN) + ReSchedule(NEW_THREAD); +#else + ReSchedule(0); +#endif + + FE_ +} + +#endif + +\end{code} + +%************************************************************************ +%* * +\subsection[thread-entrypoints]{Scheduler-Thread Interfaces} +%* * +%************************************************************************ + +The normal way of entering a thread is through resumeThread, which +short-circuits and indirections to the TSO and StkO, sets up STG registers, +and jumps to the saved PC. + +\begin{code} + +STGFUN(resumeThread) +{ + FB_ + + while((P_) INFO_PTR(CurrentTSO) == Ind_info) { + CurrentTSO = (P_) IND_CLOSURE_PTR(CurrentTSO); + } + +#ifdef PAR + if (do_gr_profile) { + TSO_QUEUE(CurrentTSO) = Q_RUNNING; + /* Note that CURRENT_TIME may perform an unsafe call */ + TSO_BLOCKEDAT(CurrentTSO) = CURRENT_TIME; + } +#endif + + CurrentRegTable = TSO_INTERNAL_PTR(CurrentTSO); + + while((P_) INFO_PTR(SAVE_StkO) == Ind_info) { + SAVE_StkO = (P_) IND_CLOSURE_PTR(SAVE_StkO); + } + RestoreAllStgRegs(); + + SET_TASK_ACTIVITY(ST_REDUCING); + SET_ACTIVITY(ACT_REDN); /* back to normal reduction */ + RESTORE_CCC(TSO_CCC(CurrentTSO)); + JMP_(TSO_PC1(CurrentTSO)); + FE_ +} + +\end{code} + +Since we normally context switch during a heap check, it is possible +that we will return to a previously suspended thread without +sufficient heap for the thread to continue. However, we have cleverly +stashed away the heap requirements in @TSO_ARG1@ so that we can decide +whether or not to perform a garbage collection before resuming the +thread. The actual thread resumption address (either @EnterNodeCode@ +or elsewhere) is stashed in TSO_PC2. + +\begin{code} + +STGFUN(CheckHeapCode) +{ + FB_ + + ALLOC_HEAP(TSO_ARG1(CurrentTSO)); /* ticky profiling */ + SET_ACTIVITY(ACT_HEAP_CHK); /* SPAT counting */ + if ((Hp += TSO_ARG1(CurrentTSO)) > HpLim) { + ReallyPerformThreadGC(TSO_ARG1(CurrentTSO), rtsFalse); + JMP_(resumeThread); + } + SET_TASK_ACTIVITY(ST_REDUCING); + SET_ACTIVITY(ACT_REDN); /* back to normal reduction */ + RESUME_(TSO_PC2(CurrentTSO)); + FE_ +} + +\end{code} + +Often, a thread starts (or rather, resumes) by entering the closure +that Node points to. Here's a tiny code fragment to do just that. +The saved PC in the TSO can be set to @EnterNodeCode@ whenever we +want this to happen upon resumption of the thread. + +\begin{code} + +STGFUN(EnterNodeCode) +{ + FB_ + ENT_VIA_NODE(); + InfoPtr=(D_)(INFO_PTR(Node)); + GRAN_EXEC(5,1,2,0,0); + JMP_(ENTRY_CODE(InfoPtr)); + FE_ +} + +\end{code} + +Then, there are the occasions when we just want to pick up where we left off. +We use RESUME_ here instead of JMP_, because when we return to a call site, +the alpha is going to try to load %gp from %ra rather than %pv, and JMP_ only +sets %pv. Resuming to the start of a function is currently okay, but an +extremely bad practice. As we add support for more architectures, we can expect +the difference between RESUME_ and JMP_ to become more acute. + +\begin{code} + +STGFUN(Continue) +{ + FB_ + + SET_TASK_ACTIVITY(ST_REDUCING); + SET_ACTIVITY(ACT_REDN); /* back to normal reduction */ + RESUME_(TSO_PC2(CurrentTSO)); + FE_ +} + +\end{code} + +%************************************************************************ +%* * +\subsection[stack-chunk-underflow-code]{Underflow code for stack chunks} +%* * +%************************************************************************ + +\begin{code} + +extern P_ AvailableStack; + +#ifndef PAR + +\end{code} + +On a uniprocessor, stack underflow causes us no great headaches. The +old value of RetReg is squirreled away at the base of the top stack +object (the one that's about to get blown away). We just yank it +outta there and perform the same kind of return that got us here in +the first place. + +This simplicity is due to the fact that we never have to fetch a stack +object on underflow. + +\begin{code} + +#define DO_RETURN_TEMPLATE(label, cont) \ + STGFUN(label) \ + { \ + P_ temp; \ + FB_ \ + temp = STKO_LINK(StkOReg); \ + RetReg = STKO_RETURN(StkOReg); \ + StkOReg = temp; \ + RestoreStackStgRegs(); \ + JMP_(cont); \ + FE_ \ + } + +DO_RETURN_TEMPLATE(UnderflowDirectReturn, DIRECT(((P_)RetReg))) +DO_RETURN_TEMPLATE(UnderflowVect0, ((P_)RetReg)[RVREL(0)]) +DO_RETURN_TEMPLATE(UnderflowVect1, ((P_)RetReg)[RVREL(1)]) +DO_RETURN_TEMPLATE(UnderflowVect2, ((P_)RetReg)[RVREL(2)]) +DO_RETURN_TEMPLATE(UnderflowVect3, ((P_)RetReg)[RVREL(3)]) +DO_RETURN_TEMPLATE(UnderflowVect4, ((P_)RetReg)[RVREL(4)]) + +DO_RETURN_TEMPLATE(UnderflowVect5, ((P_)RetReg)[RVREL(5)]) +DO_RETURN_TEMPLATE(UnderflowVect6, ((P_)RetReg)[RVREL(6)]) +DO_RETURN_TEMPLATE(UnderflowVect7, ((P_)RetReg)[RVREL(7)]) + +DO_RETURN_TEMPLATE(StackUnderflowEnterNode, EnterNodeCode) + +#else + +\end{code} + +In the parallel world, we may have to fetch the StkO from a remote +location before we can load up the stack registers and perform the +return. Our convention is that we load RetReg up with the exact +continuation address (after a vector table lookup, if necessary), +and tail-call the code to fetch the stack object. (Of course, if +the stack object is already local, we then just jump to the +continuation address.) + +\begin{code} + +STGFUN(CommonUnderflow) +{ + P_ temp; + + FB_ + temp = STKO_LINK(StkOReg); + StkOReg = temp; + /* ToDo: Fetch the remote stack object here! */ + RestoreStackStgRegs(); + JMP_(RetReg); + FE_ +} + +#define DO_RETURN_TEMPLATE(label, cont) \ + STGFUN(label) \ + { \ + FB_ \ + RetReg = STKO_RETURN(StkOReg); \ + RetReg = (StgRetAddr)(cont); \ + LivenessReg = INFO_LIVENESS(InfoPtr); \ + JMP_(CommonUnderflow); \ + FE_ \ + } + +DO_RETURN_TEMPLATE(UnderflowDirectReturn, DIRECT(((P_)RetReg))) +DO_RETURN_TEMPLATE(UnderflowVect0, ((P_)RetReg)[RVREL(0)]) +DO_RETURN_TEMPLATE(UnderflowVect1, ((P_)RetReg)[RVREL(1)]) +DO_RETURN_TEMPLATE(UnderflowVect2, ((P_)RetReg)[RVREL(2)]) +DO_RETURN_TEMPLATE(UnderflowVect3, ((P_)RetReg)[RVREL(3)]) +DO_RETURN_TEMPLATE(UnderflowVect4, ((P_)RetReg)[RVREL(4)]) +DO_RETURN_TEMPLATE(UnderflowVect5, ((P_)RetReg)[RVREL(5)]) +DO_RETURN_TEMPLATE(UnderflowVect6, ((P_)RetReg)[RVREL(6)]) +DO_RETURN_TEMPLATE(UnderflowVect7, ((P_)RetReg)[RVREL(7)]) + +STGFUN(PrimUnderflow) +{ + FB_ + RetReg = STKO_RETURN(StkOReg); + RetReg = (StgRetAddr)DIRECT(((P_)RetReg)); + LivenessReg = NO_LIVENESS; + JMP_(CommonUnderflow); + FE_ +} + +/* + * This one is similar, but isn't part of the return vector. It's only used + * when we fall off of a stack chunk and want to enter Node rather than + * returning through RetReg. (This occurs during UpdatePAP, when the updatee + * isn't on the current stack chunk.) It can't be done with the template, + * because R2 is dead, and R1 points to a PAP. Only R1 is live. + */ + +STGFUN(StackUnderflowEnterNode) +{ + FB_ + RetReg = (StgRetAddr)(EnterNodeCode); + LivenessReg = LIVENESS_R1; + JMP_(CommonUnderflow); + FE_ +} + +#endif + +const W_ +vtbl_Underflow[] = { + /* "MAX_VECTORED_RTN" elements (see GhcConstants.lh) */ + (W_) UnderflowVect0, + (W_) UnderflowVect1, + (W_) UnderflowVect2, + (W_) UnderflowVect3, + (W_) UnderflowVect4, + (W_) UnderflowVect5, + (W_) UnderflowVect6, + (W_) UnderflowVect7 +}; + +\end{code} + +\begin{code} + +IFN_(seqDirectReturn) { + void *cont; + + FB_ + RetReg = (StgRetAddr) SpB[BREL(0)]; + cont = (void *) SpB[BREL(1)]; + SpB += BREL(2); +/* GRAN_EXEC(1,1,2,0,0); /? ToDo: RE-CHECK (WDP) */ + JMP_(cont); + FE_ +} + +/* + NB: For direct returns to work properly, the name of the routine must be + the same as the name of the vector table with vtbl_ removed and DirectReturn + appended. This is all the mangler understands. + */ + +const W_ +vtbl_seq[] = { + (W_) seqDirectReturn, + (W_) seqDirectReturn, + (W_) seqDirectReturn, + (W_) seqDirectReturn, + (W_) seqDirectReturn, + (W_) seqDirectReturn, + (W_) seqDirectReturn, + (W_) seqDirectReturn +}; + +#endif /* CONCURRENT */ +\end{code} diff --git a/ghc/runtime/main/StgTrace.lc b/ghc/runtime/main/StgTrace.lc new file mode 100644 index 0000000000..0c4ab4ca33 --- /dev/null +++ b/ghc/runtime/main/StgTrace.lc @@ -0,0 +1,74 @@ +\begin{code} + +#include "rtsdefs.h" + +#if defined(DO_RUNTIME_TRACE_UPDATES) + +/********** Debugging Tracing of Updates ***********/ + +/* These will only be called if StgUpdate.h macro calls + compiled with -DDO_RUNTIME_TRACE_UPDATES + */ + +extern I_ traceUpdates; /* a Bool, essentially */ + +void +TRACE_UPDATE_Ind(updclosure,heapptr) +P_ updclosure,heapptr; +{ +#if defined(GCap) + if (traceUpdates) { + fprintf(stderr,"Upd Ind %s Gen: 0x%lx -> 0x%lx\n", + (updclosure) <= StorageMgrInfo.OldLim ? "Old" : "New", + (W_) updclosure, (W_) heapptr); + } +#else + if (traceUpdates) { + fprintf(stderr,"Upd Ind: 0x%lx -> 0x%lx\n", + (W_) updclosure, (W_) heapptr); + } +#endif +} + +void +TRACE_UPDATE_Inplace_NoPtrs(updclosure) +P_ updclosure; +{ +#if defined(GCap) + if (traceUpdates) { + fprintf(stderr,"Upd Inplace %s Gen: 0x%lx\n", + (updclosure) <= StorageMgrInfo.OldLim ? "Old" : "New", + (W_) updclosure); + } +#else + if (traceUpdates) { + fprintf(stderr,"Upd Inplace: 0x%lx\n", (W_) updclosure); + } +#endif +} + +void +TRACE_UPDATE_Inplace_Ptrs(updclosure, hp) +P_ updclosure; +P_ hp; +{ +#if defined(GCap) + if (traceUpdates) { + if ((updclosure) <= StorageMgrInfo.OldLim) { + fprintf(stderr,"Upd Redirect Old Gen (Ptrs): 0x%lx -> 0x%lx\n", + (W_) updclosure, + (W_) (hp + 1)); + } else { + fprintf(stderr,"Upd Inplace New Gen (Ptrs): 0x%lx\n", (W_) updclosure); + } + } +#else + if (traceUpdates) { + fprintf(stderr,"Update Inplace: 0x%lx\n", (W_) updclosure); + } +#endif +} + +#endif /* DO_RUNTIME_TRACE_UPDATES */ + +\end{code} diff --git a/ghc/runtime/main/StgUpdate.lhc b/ghc/runtime/main/StgUpdate.lhc new file mode 100644 index 0000000000..904f637124 --- /dev/null +++ b/ghc/runtime/main/StgUpdate.lhc @@ -0,0 +1,730 @@ +%************************************************************************ +%* * +\section[update-code]{Code required for update abstraction} +%* * +%************************************************************************ + +This code is required by the update interface which sits on top of the +storage manager interface (See \tr{SMupdate.lh}). + +Some of this stuff has been separated (correctly!) into StgThreads.lhc +for version 0.23. Could someone (Hans?) bring us up to date, please! +KH. + +\begin{itemize} +\item Indirection entry code and info table. +\item Black Hole entry code and info table. +\item Update frame code and return vectors. +\item PAP update code. +\item PAP entry code and info table. +\end{itemize} + +System-wide constants need to be included: +\begin{code} +#define MAIN_REG_MAP /* STG world */ + +#include "rtsdefs.h" +#include "SMupdate.h" +#if 0 +#ifdef PAR +# include "Statistics.h" +#endif +#endif + +EXTDATA(Nil_closure); + +#if defined(DO_REDN_COUNTING) +extern void PrintRednCountInfo(STG_NO_ARGS); +extern I_ showRednCountStats; +#endif +\end{code} + +%************************************************************************ +%* * +\subsection[indirection-code]{Indirection code} +%* * +%************************************************************************ + +The entry code for indirections and the indirection info-table. +\begin{code} +STGFUN(Ind_entry) +{ + FB_ + ENT_IND(Node); /* Ticky-ticky profiling info */ + SET_ACTIVITY(ACT_INDIRECT); /* SPAT profiling */ + + Node = (P_) IND_CLOSURE_PTR((P_) Node); + ENT_VIA_NODE(); + InfoPtr=(D_)(INFO_PTR(Node)); + JMP_(ENTRY_CODE(InfoPtr)); + FE_ +} + +IND_ITBL(Ind_info,Ind_entry,const,EF_); + +\end{code} + +We also need a special @CAF@ indirection info table which is used to +indirect @CAF@s to evaluated results in the heap. +\begin{code} +STGFUN(Caf_entry) /* same as Ind_entry */ +{ + FB_ + ENT_IND(Node); + SET_ACTIVITY(ACT_INDIRECT); /* SPAT profiling */ + + Node = (P_) IND_CLOSURE_PTR((P_) Node); + ENT_VIA_NODE(); + InfoPtr=(D_)(INFO_PTR(Node)); + JMP_(ENTRY_CODE(InfoPtr)); + FE_ +} + +CAF_ITBL(Caf_info,Caf_entry,const,EF_); +\end{code} + +%************************************************************************ +%* * +\subsection[black-hole-code]{Black Hole code} +%* * +%************************************************************************ + +The entry code for black holes abort indicating a cyclic data dependency. +It is used to overwrite closures currently being evaluated. + +In the concurrent world, black holes are synchronization points, and they +are turned into blocking queues when there are threads waiting for the +evaluation of the closure to finish. + +\begin{code} +#ifdef CONCURRENT +EXTFUN(EnterNodeCode); +EXTFUN(StackUnderflowEnterNode); +EXTDATA_RO(BQ_info); +#else +extern StgStablePtr errorHandler; +extern void raiseError PROTO((StgStablePtr)); +#endif + +STGFUN(BH_UPD_entry) +{ +#ifndef CONCURRENT + FB_ + (void) STGCALL1(int,(void *, FILE *),fflush,stdout); + (void) STGCALL2(int,(),fprintf,stderr,"Entered a `black hole': the program has a cyclic data dependency.\n"); + +# if defined(USE_COST_CENTRES) + { + CostCentre cc = (CostCentre) CC_HDR(Node); + (void) STGCALL5(int,(),fprintf,stderr,"Cost Centre: %s Module: %s Group %s\n",cc->label, cc->module, cc->group); + } +# endif + +# if defined(DO_REDN_COUNTING) + if (showRednCountStats) { + (void) STGCALL0(void,(),PrintRednCountInfo); + } +# endif + + (void) STGCALL1(void,(void *, StgStablePtr), raiseError, errorHandler); + FE_ + +#else /* threads! */ + + FB_ + +# if 0 + if ( debug & 0x80 ) + (void) STGCALL4(int,(),fprintf,stderr,"GRAN_CHECK in BH_UPD_entry: Entered a `black hole' @ 0x%x (CurrentTSO @ 0x%x\n ",Node,CurrentTSO); +#endif + +# if defined(GRAN) + STGCALL0(void,(),GranSimBlock); /* Do this before losing its TSO_LINK */ +# endif + + TSO_LINK(CurrentTSO) = Nil_closure; + SET_INFO_PTR(Node, BQ_info); + BQ_ENTRIES(Node) = (W_) CurrentTSO; + +# if defined(GCap) || defined(GCgn) + /* If we modify a black hole in the old generation, + we have to make sure it goes on the mutables list */ + + if(Node <= StorageMgrInfo.OldLim) { + MUT_LINK(Node) = (W_) StorageMgrInfo.OldMutables; + StorageMgrInfo.OldMutables = Node; + } else + MUT_LINK(Node) = MUT_NOT_LINKED; +# endif + + LivenessReg = LIVENESS_R1; + SaveAllStgRegs(); + TSO_PC1(CurrentTSO) = EnterNodeCode; + + if (DO_QP_PROF) { + QP_Event1("GR", CurrentTSO); + } + +# ifdef PAR + if(do_gr_profile) { + TIME now = CURRENT_TIME; + TSO_EXECTIME(CurrentTSO) += now - TSO_BLOCKEDAT(CurrentTSO); + TSO_BLOCKCOUNT(CurrentTSO)++; + TSO_QUEUE(CurrentTSO) = Q_BLOCKED; + TSO_BLOCKEDAT(CurrentTSO) = now; + DumpGranEvent(GR_BLOCK, CurrentTSO); + } +# endif + +# if defined(GRAN) + /* CurrentTSO = Nil_closure; */ + ReSchedule(NEW_THREAD); +# else + ReSchedule(0); +# endif + + FE_ +#endif /* threads */ +} + +/* made external so that debugger can get at it more effectively */ +STGFUN(BH_SINGLE_entry) +{ + FB_ + + (void) STGCALL1(int,(void *, FILE *),fflush,stdout); + (void) STGCALL2(int,(),fprintf,stderr,"Entered a single-entry `black hole' --\n"); + (void) STGCALL2(int,(),fprintf,stderr,"either the compiler made a mistake on single-entryness,\n"); + (void) STGCALL2(int,(),fprintf,stderr,"or the program has a cyclic data dependency.\n"); + +#if defined(USE_COST_CENTRES) + { + CostCentre cc = (CostCentre) CC_HDR(Node); + (void) STGCALL5(int,(),fprintf,stderr, "Cost Centre: %s Module: %s Group %s\n",cc->label, cc->module, cc->group); + } +#endif + +# if defined(DO_REDN_COUNTING) + if (showRednCountStats) { + (void) STGCALL0(void,(),PrintRednCountInfo); + } +# endif + +#ifndef CONCURRENT + (void) STGCALL1(void,(void *, StgStablePtr), raiseError, errorHandler); +#else + EXIT(EXIT_FAILURE); +#endif + + FE_ +} +\end{code} + +Updatable closures are overwritten with a black hole of a fixed size, +@MIN_UPD_SIZE@. + +\begin{code} +CAT_DECLARE(BH,BH_K,"BH","BH") /* just one, shared */ + +BH_ITBL(BH_UPD_info,BH_UPD_entry,U,const,EF_); +\end{code} + +Single-Entry closures, which are not updated, are also overwritten +with a black hole. They have size @MIN_NONUPD_SIZE@. + +\begin{code} +BH_ITBL(BH_SINGLE_info,BH_SINGLE_entry,N,const,EF_); +\end{code} + +%************************************************************************ +%* * +\subsection[static-update-code]{Static update code in update frames} +%* * +%************************************************************************ + +This code is pointed to from update frames. It has to cope with +any kind of algebraic return: vectored or unvectored. + +See \tr{SMupdate.lh} for a description of the various update frames +and the macros defining their layout. + +On entry to this code: +\begin{itemize} +\item @R1@ points to a recently created heap object (return in heap) or +is dead (return in regs). +\item @R2@ points to the info table for the constructor. +\item When returning in regs, any of the return-regs (@R3@...) may be live, +but aren't used by this code. They must be preserved. +\item @SpB@ points to the topmost word of the update frame. +\end{itemize} + +NEW update mechanism (Jan '94): + +When returning to an update frame, we want to jump directly to the +update code for the constructor in hand. Because of the various +possible return conventions (all of which must be handled by the +generic update frame), we actually end up with a somewhat indirect +jump. + +\begin{code} + +STGFUN(StdUpdFrameDirectReturn) +{ + FB_ + JMP_(UPDATE_CODE(InfoPtr)); + FE_ +} + +/* + NB: For direct returns to work properly, the name of the routine must be + the same as the name of the vector table with vtbl_ removed and DirectReturn + appended. This is all the mangler understands. +*/ + +const +W_ +vtbl_StdUpdFrame[] = { + /* at least "MAX_VECTORED_RTN" elements (see GhcConstants.lh) */ + (W_) StdUpdFrameDirectReturn/*0*/, + (W_) StdUpdFrameDirectReturn/*1*/, + (W_) StdUpdFrameDirectReturn/*2*/, + (W_) StdUpdFrameDirectReturn/*3*/, + (W_) StdUpdFrameDirectReturn/*4*/, + (W_) StdUpdFrameDirectReturn/*5*/, + (W_) StdUpdFrameDirectReturn/*6*/, + (W_) StdUpdFrameDirectReturn/*7*/ +}; + +\end{code} + +%************************************************************************ +%* * +\subsection[existing-con-update-code]{Update code for existing constructors} +%* * +%************************************************************************ + +Here is the standard update code for objects that are returned in the heap +(or those which are initially returned in registers, but have already been +allocated in the heap earlier in the update chain.) In either case, Node +points to the heap object. The update code grabs the address of the updatee +out of the partial update frame (the return address has already been popped), +makes the updatee an indirection to Node, and returns according to the convention +for the constructor. + +\begin{code} +#define IND_UPD_TEMPLATE(label, retvector) \ + STGFUN(label) \ + { \ + FB_ \ + UPD_EXISTING(); /* Ticky-ticky profiling info */ \ + /* Update thing off stk with an indirection to Node */ \ + UPD_IND(GRAB_UPDATEE(SpB), Node); \ + /* Pop the standard update frame */ \ + POP_STD_UPD_FRAME() \ + \ + JMP_(retvector); \ + FE_ \ + } + +IND_UPD_TEMPLATE(IndUpdRetDir, DIRECT(((P_)RetReg))) +IND_UPD_TEMPLATE(IndUpdRetV0, ((P_)RetReg)[RVREL(0)]) +IND_UPD_TEMPLATE(IndUpdRetV1, ((P_)RetReg)[RVREL(1)]) +IND_UPD_TEMPLATE(IndUpdRetV2, ((P_)RetReg)[RVREL(2)]) +IND_UPD_TEMPLATE(IndUpdRetV3, ((P_)RetReg)[RVREL(3)]) +IND_UPD_TEMPLATE(IndUpdRetV4, ((P_)RetReg)[RVREL(4)]) +IND_UPD_TEMPLATE(IndUpdRetV5, ((P_)RetReg)[RVREL(5)]) +IND_UPD_TEMPLATE(IndUpdRetV6, ((P_)RetReg)[RVREL(6)]) +IND_UPD_TEMPLATE(IndUpdRetV7, ((P_)RetReg)[RVREL(7)]) + +\end{code} + +%************************************************************************ +%* * +\subsection[no-update-code]{Code for Erroneous Updates} +%* * +%************************************************************************ + +\begin{code} + +STGFUN(UpdErr) +{ + FB_ + + fflush(stdout); + fprintf(stderr, "Update error: not a constructor!\n"); + abort(); + + FE_ +} + +STGFUN(StdErrorCode) +{ + FB_ + + fflush(stdout); + fprintf(stderr, "Standard error: should never happen!\n"); + abort(); + + FE_ +} +\end{code} + +%************************************************************************ +%* * +\subsection[permanent-indirections]{Lexical Scoping Updates} +%* * +%************************************************************************ + +A function entered without any arguments is updated with an +indirection. For lexically scoped profiling we still need to set the +cost centre if we enter the PAP. As the indirection is removed by the +garbage collector this would not be possible. + +To solve this problem we introduce a permanent indirection which sets +the cost centre when entered. The heap profiler ignores the space +occupied by it as it would not reside in the heap during normal +execution. + +\begin{code} +#if defined(USE_COST_CENTRES) + +STGFUN(Perm_Ind_entry) +{ + FB_ + + /* Don't add INDs to granularity cost */ + + ENT_IND(Node); /* Ticky-ticky profiling info */ + + /* Enter PAP cost centre -- lexical scoping only */ + ENTER_CC_PAP_CL(Node); + + Node = (P_) IND_CLOSURE_PTR((P_) Node); + ENT_VIA_NODE(); /* Ticky-ticky profiling info */ + + InfoPtr=(D_)(INFO_PTR(Node)); +# if defined(GRAN) + GRAN_EXEC(1,1,2,0,0); +# endif + JMP_(ENTRY_CODE(InfoPtr)); + FE_ +} + +PERM_IND_ITBL(Perm_Ind_info,Perm_Ind_entry,const,EF_); + +#endif /* USE_COST_CENTRES */ +\end{code} + +%************************************************************************ +%* * +\subsection[partial-application-updates]{Partial applications} +%* * +%************************************************************************ + +See STG paper implementation section of Partial application updates. + +We jump here when the current function fails an argument satisfaction +check. There can be two reasons for this. In the usual case, there +is an update frame blocking our access to anything deeper on the +stack. We then update the updatee in the frame with a partial +application node and squeeze out the update frame. The other +possibility is that we are running threaded code, and we are sitting +on the bottom of a stack chunk. In this case, we still build the +partial application, but we have nothing in our hands to update, so we +underflow the stack (awakening the previous chunk) and enter the +partial application node just built. + +On entry to @UpdatePAP@, we assume the following: +\begin{itemize} +\item SuB points to topmost word of an update frame or to the bottom of a +stack chunk. +\item SpA and SpB point to the topmost words of their respective stacks. +\item Node points to the closure which needs more arguments than are there. +\end{itemize} + +\begin{code} + +STGFUN(UpdatePAP) +{ + /* + * Use STG registers for these locals which must survive the HEAP_CHK. + * Don't squash Node (R1), because it's an implicit argument. + */ + +#define NNonPtrWords (R2.i) +#define NPtrWords (R3.i) +#define NArgWords (R4.i) +#define PapSize (R5.i) +#if defined(USE_COST_CENTRES) +# define CC_pap ((CostCentre)(R7.p)) +#endif + + /* These other locals do not have to survive a HEAP_CHK */ + + P_ PapClosure; + P_ Updatee; + P_ p; + I_ i; + + FB_ + +#if defined(COUNT) + ++nPAPs; +#endif + + SET_ACTIVITY(ACT_UPDATE_PAP); /* SPAT profiling */ + + NPtrWords = AREL(SuA - SpA); + NNonPtrWords = BREL(SuB - SpB); + + ASSERT(NPtrWords >= 0); + ASSERT(NNonPtrWords >= 0); + + NArgWords = NPtrWords + NNonPtrWords + 1; /* +1 for Node */ + +#if defined(USE_COST_CENTRES) + /* set "CC_pap" to go in the updatee (see Sansom thesis, p 183) */ + + CC_pap /*really cc_enter*/ = (CostCentre) CC_HDR(Node); + if (IS_SUBSUMED_CC(CC_pap) /*really cc_enter*/) + CC_pap = CCC; +#endif + + if (NArgWords == 1) { + + /* + * No arguments, only Node. Skip building the PAP and + * just plan to update with an indirection. + */ + + PapClosure = Node; + + } else { + + /* Build the PAP. A generic PAP closure is laid out thus: + * code ptr, size, no of words of ptrs, Node, ptrs, non-ptrs + * (i.e. a DYN closure) + * ToDo: add stuff for special cases, to omit size and no. of ptrs + * (Still ToDo? (JSM)) + */ + + PapSize = NArgWords + DYN_HS; + + ALLOC_UPD_PAP(DYN_HS, NArgWords, 0, PapSize); + CC_ALLOC(CC_pap, PapSize, PAP_K); + + /* Allocate PapClosure -- Only Node (R1) is live */ + HEAP_CHK(LIVENESS_R1, PapSize, 0); + + SET_ACTIVITY(ACT_UPDATE_PAP); /* back to it (for SPAT profiling) */ + + PapClosure = Hp + 1 - PapSize; /* The new PapClosure */ + + SET_DYN_HDR(PapClosure, PAP_info, CC_pap, NArgWords + DYN_VHS, NPtrWords + 1); + + /* Now fill in the closure fields */ + + p = Hp; + for (i = NNonPtrWords - 1; i >= 0; i--) *p-- = (W_) SpB[BREL(i)]; + for (i = NPtrWords - 1; i >= 0; i--) *p-- = (W_) SpA[AREL(i)]; + *p = (W_) Node; + } + + /* + * Finished constructing PAP closure; now update the updatee. + * But wait! What if there is no updatee? Then we fall off the stack. + */ + +#ifdef CONCURRENT + if (SuB < STKO_BSTK_BOT(StkOReg)) { + Node = PapClosure; +# ifdef PAR + LivenessReg = LIVENESS_R1; +# endif + JMP_(StackUnderflowEnterNode); + } +#endif + + /* + * Now we have a standard update frame, so we update the updatee with + * either the new PAP or Node. + * + * Supposedly, it is not possible to get a constructor update frame, + * (Why not? (JSM)) + * (Because they have *never* been implemented. (WDP)) + */ + + Updatee = GRAB_UPDATEE(SuB); + UPD_IND(Updatee, PapClosure); /* Indirect Updatee to PapClosure */ + + if (NArgWords != 1) { + UPD_PAP_IN_NEW(); + + } else { + UPD_PAP_IN_PLACE(); + +#if defined(USE_COST_CENTRES) + /* + * Lexical scoping requires a *permanent* indirection, and we + * also have to set the cost centre for the indirection. + */ + INFO_PTR(Updatee) = (W_) Perm_Ind_info; + SET_CC_HDR(Updatee, CC_pap); + +#endif /* USE_COST_CENTRES */ + } + +#if defined(USE_COST_CENTRES) + /* + * Restore the Cost Centre too (if required); again see Sansom thesis p 183. + * Take the CC out of the update frame if a CAF/DICT. + */ + + CCC = (IS_CAF_OR_DICT_CC(CC_pap)) ? GRAB_COST_CENTRE(SuB) : CC_pap; + +#endif /* USE_COST_CENTRES */ + + /* Restore SuA, SuB, RetReg */ + RetReg = GRAB_RET(SuB); + SuA = GRAB_SuA(SuB); + SuB = GRAB_SuB(SuB); + + /* + * Squeeze out update frame from B stack. Note that despite our best + * efforts with [AB]REL and friends, the loop order depends on the B + * stack growing up. + */ + for (i = NNonPtrWords - 1; i >= 0; i--) + SpB[BREL(i+STD_UF_SIZE)] = SpB[BREL(i)]; + + SpB += BREL(STD_UF_SIZE); + + /* + * All done! Restart by re-entering Node + * Don't count this entry for ticky-ticky profiling. + */ + +#if defined(GRAN) + GRAN_EXEC(16,4,7,4,0); +#endif + InfoPtr=(D_)(INFO_PTR(Node)); + JMP_(ENTRY_CODE(InfoPtr)); + FE_ + +#undef NNonPtrWords +#undef NPtrWords +#undef NArgWords +#undef PapSize +#ifdef USE_COST_CENTRES +# undef CC_pap +#endif +} +\end{code} + +The entry code for a generic PAP. @Node@ points to the PAP closure. +Reload the stacks from the PAP, and enter the closure stored in the +PAP. PAPs are in HNF so no update frame is needed. + +\begin{code} +STGFUN(PAP_entry) +{ + /* Use STG registers for these locals which must survive the STK_CHK */ +#define NPtrWords (R2.i) +#define NNonPtrWords (R3.i) +#if defined(USE_COST_CENTRES) +# define CC_pap ((CostCentre)(R7.p)) +#endif + + /* These locals don't have to survive a HEAP_CHK */ + P_ Updatee; + P_ p; + I_ i; + I_ aWords, bWords; + + FB_ + + SET_ACTIVITY(ACT_UPDATE_PAP); /* SPAT profiling */ + + while (AREL(SuA - SpA) == 0 && BREL(SuB - SpB) == 0) { +#ifdef CONCURRENT + if (SuB < STKO_BSTK_BOT(StkOReg)) { +# ifdef PAR + LivenessReg = LIVENESS_R1; +# endif + JMP_(StackUnderflowEnterNode); + } +#endif + + /* We're sitting on top of an update frame, so let's do the business */ + + Updatee = GRAB_UPDATEE(SuB); + UPD_IND(Updatee, Node); + +#if defined(USE_COST_CENTRES) + /* + * Restore the Cost Centre too (if required); again see Sansom thesis p 183. + * Take the CC out of the update frame if a CAF/DICT. + */ + + CC_pap = (CostCentre) CC_HDR(Node); + CCC = (IS_CAF_OR_DICT_CC(CC_pap)) ? GRAB_COST_CENTRE(SuB) : CC_pap; + +#endif /* USE_COST_CENTRES */ + + RetReg = GRAB_RET(SuB); + SuA = GRAB_SuA(SuB); + SuB = GRAB_SuB(SuB); + SpB += BREL(STD_UF_SIZE); + } + + NPtrWords = DYN_CLOSURE_NoPTRS(Node) - 1; /* The saved Node counts as one */ + NNonPtrWords = DYN_CLOSURE_NoNONPTRS(Node); + + /* Ticky-ticky profiling info */ + ENT_PAP(Node); + + /* Enter PAP cost centre -- lexical scoping only */ + ENTER_CC_PAP_CL(Node); + + /* + * Check for stack overflow. Ask to take all of the current frame with + * us to the new world. If there is no update frame on the current stack, + * bWords will exceed the size of the B stack, but StackOverflow will deal + * with it. + */ + + aWords = AREL(SuA - SpA); + bWords = BREL(SuB - SpB) + STD_UF_SIZE; + + STK_CHK(LIVENESS_R1, NPtrWords, NNonPtrWords, aWords, bWords, 0, 0); + + SpA -= AREL(NPtrWords); + SpB -= BREL(NNonPtrWords); + + /* Reload Node */ + p = Node + DYN_HS; /* Point to first pointer word */ + Node = (P_) *p++; + + /* Reload the stacks */ + + for (i=0; i<NPtrWords; i++) SpA[AREL(i)] = (P_) *p++; + for (i=0; i<NNonPtrWords; i++) SpB[BREL(i)] = *p++; + + /* Off we go! */ + ENT_VIA_NODE(); + InfoPtr=(D_)(INFO_PTR(Node)); + JMP_(ENTRY_CODE(InfoPtr)); + FE_ + +#undef NPtrWords +#undef NNonPtrWords +#ifdef USE_COST_CENTRES +# undef CC_pap +#endif +} +\end{code} + +The info table for a generic PAP: +\begin{code} +DYN_ITBL(PAP_info,PAP_entry,UpdErr,0,INFO_OTHER_TAG,0,0,const,EF_,PAP_K,"PAP","->"); +\end{code} diff --git a/ghc/runtime/main/Threads.lc b/ghc/runtime/main/Threads.lc new file mode 100644 index 0000000000..a767ec940a --- /dev/null +++ b/ghc/runtime/main/Threads.lc @@ -0,0 +1,3749 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1995 +% +%************************************************************************ +%* * +\section[Threads.lc]{Thread Control Routines} +%* * +%************************************************************************ + +%************************************************************************ +% +\subsection[thread-overview]{Overview of the Thread Management System} +% +%************************************************************************ + +%************************************************************************ +% +\subsection[thread-decls]{Thread Declarations} +% +%************************************************************************ + +% I haven't checked if GRAN can work with QP profiling. But as we use our +% own profiling (GR profiling) that should be irrelevant. -- HWL + +\begin{code} + +#if defined(CONCURRENT) + +# define NON_POSIX_SOURCE /* so says Solaris */ + +# include "rtsdefs.h" +# include <setjmp.h> + +#include "LLC.h" +#include "HLC.h" + +static void init_qp_profiling(STG_NO_ARGS); /* forward decl */ +\end{code} + +@AvailableStack@ is used to determine whether an existing stack can be +reused without new allocation, so reducing garbage collection, and +stack setup time. At present, it is only used for the first stack +chunk of a thread, the one that's got @StkOChunkSize@ words. + +\begin{code} +P_ AvailableStack = Nil_closure; +P_ AvailableTSO = Nil_closure; +\end{code} + +Macros for dealing with the new and improved GA field for simulating +parallel execution. Based on @CONCURRENT@ package. The GA field now +contains a mask, where the n-th bit stands for the n-th processor, +where this data can be found. In case of multiple copies, several bits +are set. The total number of processors is bounded by @MAX_PROC@, +which should be <= the length of a word in bits. -- HWL + +\begin{code} +/* mattson thinks this is obsolete */ + +# if 0 && defined(GRAN) +extern FILE *main_statsfile; /* Might be of general interest HWL */ + +typedef unsigned long TIME; +typedef unsigned char PROC; +typedef unsigned char EVTTYPE; + + +# undef max +# define max(a,b) (a>b?a:b) + +static PROC +ga_to_proc(W_ ga) +{ PROC i; + + for (i=0; i<MAX_PROC && !IS_LOCAL_TO(ga,i); i++) ; + + return (i); +} + +/* NB: This takes a *node* rather than just a ga as input */ +static PROC +where_is(P_ node) +{ return (ga_to_proc(PROCS(node))); } /* Access the GA field of the node */ + +static PROC +no_of_copies(P_ node) /* DaH lo'lu'Qo'; currently unused */ +{ PROC i, n; + + for (i=0, n=0; i<MAX_PROC; i++) + if (IS_LOCAL_TO(PROCS(node),i)) + n++;; + + return (n); +} + +# endif /* GRAN ; HWL */ +\end{code} + +%**************************************************************** +%* * +\subsection[thread-getthread]{The Thread Scheduler} +%* * +%**************************************************************** + +This is the heart of the thread scheduling code. + +\begin{code} +# if defined(GRAN_CHECK) && defined(GRAN) +W_ debug = 0; +# endif + +W_ event_trace = 0; +W_ event_trace_all = 0; + +STGRegisterTable *CurrentRegTable = NULL; +P_ CurrentTSO = NULL; + +# if defined(GRAN) /* HWL */ + +unsigned CurrentProc = 0; +W_ IdleProcs = ~0L, Idlers = MAX_PROC; + +# if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */ +# define FETCH_MASK_TSO 0x08000000 /* only bits 0, 1, 2 should be used */ + /* normally */ +# endif + +I_ DoFairSchedule = 0; +I_ DoReScheduleOnFetch = 0; +I_ DoStealThreadsFirst = 0; +I_ SimplifiedFetch = 0; +I_ DoAlwaysCreateThreads = 0; +I_ DoGUMMFetching = 0; +I_ DoThreadMigration = 0; +I_ FetchStrategy = 4; +I_ PreferSparksOfLocalNodes = 0; + +# if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */ +I_ NoForward = 0; +I_ PrintFetchMisses = 0, fetch_misses = 0; +# endif + +# if defined(COUNT) +I_ nUPDs = 0, nUPDs_old = 0, nUPDs_new = 0, nUPDs_BQ = 0, nPAPs = 0, + BQ_lens = 0; +# endif + +I_ do_gr_binary = 0; +I_ do_gr_profile = 0; /* Full .gr profile or only END events? */ +I_ no_gr_profile = 0; /* Don't create any .gr file at all? */ +I_ do_sp_profile = 0; +I_ do_gr_migration = 0; + +P_ RunnableThreadsHd[MAX_PROC]; +P_ RunnableThreadsTl[MAX_PROC]; + +P_ WaitThreadsHd[MAX_PROC]; +P_ WaitThreadsTl[MAX_PROC]; + +sparkq PendingSparksHd[MAX_PROC][SPARK_POOLS]; +sparkq PendingSparksTl[MAX_PROC][SPARK_POOLS]; + +W_ CurrentTime[MAX_PROC]; /* Per PE clock */ + +# if defined(GRAN_CHECK) && defined(GRAN) +P_ BlockedOnFetch[MAX_PROC]; /* HWL-CHECK */ +# endif + +I_ OutstandingFetches[MAX_PROC]; + +W_ SparksAvail = 0; /* How many sparks are available */ +W_ SurplusThreads = 0; /* How many excess threads are there */ + +StgBool NeedToReSchedule = StgFalse; /* Do we need to reschedule following a fetch? */ + +/* Communication Cost Variables -- set in main program */ + +W_ gran_latency = LATENCY, gran_additional_latency = ADDITIONAL_LATENCY, + gran_fetchtime = FETCHTIME, + gran_lunblocktime = LOCALUNBLOCKTIME, gran_gunblocktime = GLOBALUNBLOCKTIME, + gran_mpacktime = MSGPACKTIME, gran_munpacktime = MSGUNPACKTIME, + gran_mtidytime = 0; + +W_ gran_threadcreatetime = THREADCREATETIME, + gran_threadqueuetime = THREADQUEUETIME, + gran_threaddescheduletime = THREADDESCHEDULETIME, + gran_threadscheduletime = THREADSCHEDULETIME, + gran_threadcontextswitchtime = THREADCONTEXTSWITCHTIME; + +/* Instruction Cost Variables -- set in main program */ + +W_ gran_arith_cost = ARITH_COST, gran_branch_cost = BRANCH_COST, + gran_load_cost = LOAD_COST, gran_store_cost = STORE_COST, + gran_float_cost = FLOAT_COST, gran_heapalloc_cost = 0; + +W_ max_proc = MAX_PROC; + +/* Granularity event types' names for output */ + +char *event_names[] = + { "STARTTHREAD", "CONTINUETHREAD", "RESUMETHREAD", + "MOVESPARK", "MOVETHREAD", "FINDWORK", + "FETCHNODE", "FETCHREPLY" + }; + +# if defined(GRAN) +/* Prototypes of GrAnSim debugging functions */ +void DEBUG_PRINT_NODE PROTO((P_)); +void DEBUG_TREE PROTO((P_)); +void DEBUG_INFO_TABLE PROTO((P_)); +void DEBUG_CURR_THREADQ PROTO((I_)); +void DEBUG_THREADQ PROTO((P_, I_)); +void DEBUG_TSO PROTO((P_, I_)); +void DEBUG_EVENT PROTO((eventq, I_)); +void DEBUG_SPARK PROTO((sparkq, I_)); +void DEBUG_SPARKQ PROTO((sparkq, I_)); +void DEBUG_CURR_SPARKQ PROTO((I_)); +void DEBUG_PROC PROTO((I_, I_)); +void DCT(STG_NO_ARGS); +void DCP(STG_NO_ARGS); +void DEQ(STG_NO_ARGS); +void DSQ(STG_NO_ARGS); + +void HandleFetchRequest PROTO((P_, PROC, P_)); +# endif /* GRAN ; HWL */ + +#if defined(GRAN_CHECK) && defined(GRAN) +static eventq DelayedEventHd = NULL, DelayedEventTl = NULL; + +static I_ noOfEvents = 0; +static I_ event_counts[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; +#endif + +TIME SparkStealTime(); + +/* Fcts for manipulating event queues have been deleted -- HWL */ +/* ---------------------------------- */ + +static void +print_spark(spark) + sparkq spark; +{ + + if (spark==NULL) + fprintf(stderr,"Spark: NIL\n"); + else + fprintf(stderr,"Spark: Node 0x%lx, Name 0x%lx, Exported %s, Prev 0x%x, Next 0x%x\n", + (W_) SPARK_NODE(spark), SPARK_NAME(spark), + ((SPARK_EXPORTED(spark))?"True":"False"), + SPARK_PREV(spark), SPARK_NEXT(spark) ); +} + +static print_sparkq(hd) +sparkq hd; +{ + sparkq x; + + fprintf(stderr,"Spark Queue with root at %x:\n",hd); + for (x=hd; x!=NULL; x=SPARK_NEXT(x)) { + print_spark(x); + } +} + +static print_event(event) +eventq event; +{ + + if (event==NULL) + fprintf(stderr,"Evt: NIL\n"); + else + fprintf(stderr,"Evt: %s (%u), PE %u [%u], Time %lu, TSO 0x%lx, node 0x%lx\n", + event_names[EVENT_TYPE(event)],EVENT_TYPE(event), + EVENT_PROC(event), EVENT_CREATOR(event), + EVENT_TIME(event), EVENT_TSO(event), EVENT_NODE(event) /*, + EVENT_SPARK(event), EVENT_NEXT(event)*/ ); + +} + +static print_eventq(hd) +eventq hd; +{ + eventq x; + + fprintf(stderr,"Event Queue with root at %x:\n",hd); + for (x=hd; x!=NULL; x=EVENT_NEXT(x)) { + print_event(x); + } +} + +/* ---------------------------------- */ + +#if 0 /* moved */ +static eventq getnextevent() +{ + static eventq entry = NULL; + + if(EventHd == NULL) + { + fprintf(stderr,"No next event\n"); + exit(EXIT_FAILURE); /* ToDo: abort()? EXIT??? */ + } + + if(entry != NULL) + free((char *)entry); + +#if defined(GRAN_CHECK) && defined(GRAN) + if (debug & 0x20) { /* count events */ + noOfEvents++; + event_counts[EVENT_TYPE(EventHd)]++; + } +#endif + + entry = EventHd; + EventHd = EVENT_NEXT(EventHd); + return(entry); +} + +/* ToDo: replace malloc/free with a free list */ + +static insert_event(newentry) +eventq newentry; +{ + EVTTYPE evttype = EVENT_TYPE(newentry); + eventq event, *prev; + + /* Search the queue and insert at the right point: + FINDWORK before everything, CONTINUETHREAD after everything. + + This ensures that we find any available work after all threads have + executed the current cycle. This level of detail would normally be + irrelevant, but matters for ridiculously low latencies... + */ + + if(EventHd == NULL) + EventHd = newentry; + else + { + for (event = EventHd, prev=&EventHd; event != NULL; + prev = &(EVENT_NEXT(event)), event = EVENT_NEXT(event)) + { + if(evttype == FINDWORK ? (EVENT_TIME(event) >= EVENT_TIME(newentry)) : + evttype == CONTINUETHREAD ? (EVENT_TIME(event) > EVENT_TIME(newentry)) : + (EVENT_TIME(event) > EVENT_TIME(newentry) || + (EVENT_TIME(event) == EVENT_TIME(newentry) && + EVENT_TYPE(event) != FINDWORK ))) + { + *prev = newentry; + EVENT_NEXT(newentry) = event; + break; + } + } + if (event == NULL) + *prev = newentry; + } +} + +static newevent(proc,creator,time,evttype,tso,node,spark) +PROC proc, creator; +TIME time; +EVTTYPE evttype; +P_ tso, node; +sparkq spark; +{ + extern P_ xmalloc(); + eventq newentry = (eventq) xmalloc(sizeof(struct event)); + + EVENT_PROC(newentry) = proc; + EVENT_CREATOR(newentry) = creator; + EVENT_TIME(newentry) = time; + EVENT_TYPE(newentry) = evttype; + EVENT_TSO(newentry) = tso; + EVENT_NODE(newentry) = node; + EVENT_SPARK(newentry) = spark; + EVENT_NEXT(newentry) = NULL; + + insert_event(newentry); +} +#endif /* 0 moved */ + +# else /* !GRAN */ + +P_ RunnableThreadsHd = Nil_closure; +P_ RunnableThreadsTl = Nil_closure; + +P_ WaitingThreadsHd = Nil_closure; +P_ WaitingThreadsTl = Nil_closure; + +PP_ PendingSparksBase[SPARK_POOLS]; +PP_ PendingSparksLim[SPARK_POOLS]; + +PP_ PendingSparksHd[SPARK_POOLS]; +PP_ PendingSparksTl[SPARK_POOLS]; + +# endif /* GRAN ; HWL */ + +static jmp_buf scheduler_loop; + +I_ MaxThreads = DEFAULT_MAX_THREADS; +I_ required_thread_count = 0; +I_ advisory_thread_count = 0; + +EXTFUN(resumeThread); + +P_ NewThread PROTO((P_, W_)); + +I_ context_switch = 0; + +I_ contextSwitchTime = CS_MIN_MILLISECS; /* In milliseconds */ + +#if !defined(GRAN) + +I_ threadId = 0; + +I_ MaxLocalSparks = DEFAULT_MAX_LOCAL_SPARKS; +I_ SparkLimit[SPARK_POOLS]; + +extern I_ doSanityChks; +extern void checkAStack(STG_NO_ARGS); + +rtsBool +initThreadPools(size) +I_ size; +{ + SparkLimit[ADVISORY_POOL] = SparkLimit[REQUIRED_POOL] = size; + if ((PendingSparksBase[ADVISORY_POOL] = (PP_) malloc(size * sizeof(P_))) == NULL) + return rtsFalse; + if ((PendingSparksBase[REQUIRED_POOL] = (PP_) malloc(size * sizeof(P_))) == NULL) + return rtsFalse; + PendingSparksLim[ADVISORY_POOL] = PendingSparksBase[ADVISORY_POOL] + size; + PendingSparksLim[REQUIRED_POOL] = PendingSparksBase[REQUIRED_POOL] + size; + return rtsTrue; +} +#endif + +#ifdef PAR +rtsBool sameThread; +#endif + +void +ScheduleThreads(topClosure) +P_ topClosure; +{ + I_ i; + P_ tso; + +#if defined(USE_COST_CENTRES) || defined(GUM) + if (time_profiling || contextSwitchTime > 0) { + if (initialize_virtual_timer(tick_millisecs)) { +#else + if (contextSwitchTime > 0) { + if (initialize_virtual_timer(contextSwitchTime)) { +#endif + fflush(stdout); + fprintf(stderr, "Can't initialize virtual timer.\n"); + EXIT(EXIT_FAILURE); + } + } else + context_switch = 0 /* 1 HWL */; + +#if defined(GRAN_CHECK) && defined(GRAN) /* HWL */ + if ( debug & 0x40 ) { + fprintf(stderr,"D> Doing init in ScheduleThreads now ...\n"); + } +#endif + +#if defined(GRAN) /* KH */ + for (i=0; i<max_proc; i++) + { + RunnableThreadsHd[i] = RunnableThreadsTl[i] = Nil_closure; + WaitThreadsHd[i] = WaitThreadsTl[i] = Nil_closure; + PendingSparksHd[i][REQUIRED_POOL] = PendingSparksHd[i][ADVISORY_POOL] = + PendingSparksTl[i][REQUIRED_POOL] = PendingSparksTl[i][ADVISORY_POOL] = + NULL; + +# if defined(GRAN_CHECK) + if (debug & 0x04) + BlockedOnFetch[i] = 0; /*- StgFalse; -*/ /* HWL-CHECK */ +# endif + OutstandingFetches[i] = 0; + } + + CurrentProc = MainProc; +#endif /* GRAN */ + + if (DO_QP_PROF) + init_qp_profiling(); + + /* + * We perform GC so that a signal handler can install a new TopClosure and start + * a new main thread. + */ +#ifdef PAR + if (IAmMainThread) { +#endif + if ((tso = NewThread(topClosure, T_MAIN)) == NULL) { + /* kludge to save the top closure as a root */ + CurrentTSO = topClosure; + ReallyPerformThreadGC(0, rtsTrue); + topClosure = CurrentTSO; + if ((tso = NewThread(topClosure, T_MAIN)) == NULL) { + fflush(stdout); + fprintf(stderr, "Not enough heap for main thread\n"); + EXIT(EXIT_FAILURE); + } + } +#ifndef GRAN + RunnableThreadsHd = RunnableThreadsTl = tso; +#else + /* NB: CurrentProc must have been set to MainProc before that! -- HWL */ + ThreadQueueHd = ThreadQueueTl = tso; + +# if defined(GRAN_CHECK) + if ( debug & 0x40 ) { + fprintf(stderr,"D> MainTSO has been initialized (0x%x)\n", tso); + } +# endif +#endif + +#ifdef PAR + if (do_gr_profile) { + DumpGranEvent(GR_START, tso); + sameThread = rtsTrue; + } +#endif + +#if defined(GRAN) + MAKE_BUSY(MainProc); /* Everything except the main PE is idle */ +#endif + + required_thread_count = 1; + advisory_thread_count = 0; +#ifdef PAR + } /*if IAmMainThread ...*/ +#endif + + /* ----------------------------------------------------------------- */ + /* This part is the MAIN SCHEDULER LOOP; jumped at from ReSchedule */ + /* ----------------------------------------------------------------- */ + + if(setjmp(scheduler_loop) < 0) + return; + +#if defined(GRAN) && defined(GRAN_CHECK) + if ( debug & 0x80 ) { + fprintf(stderr,"D> MAIN Schedule Loop; ThreadQueueHd is "); + DEBUG_TSO(ThreadQueueHd,1); + /* if (ThreadQueueHd == MainTSO) { + fprintf(stderr,"D> Event Queue is now:\n"); + DEQ(); + } */ + } +#endif + +#ifdef PAR + if (PendingFetches != Nil_closure) { + processFetches(); + } + +#elif defined(GRAN) + if (ThreadQueueHd == Nil_closure) { + fprintf(stderr, "No runnable threads!\n"); + EXIT(EXIT_FAILURE); + } + if (DO_QP_PROF > 1 && CurrentTSO != ThreadQueueHd) { + QP_Event1("AG", ThreadQueueHd); + } +#endif + +#ifndef PAR + while (RunnableThreadsHd == Nil_closure) { + /* If we've no work */ + if (WaitingThreadsHd == Nil_closure) { + fflush(stdout); + fprintf(stderr, "No runnable threads!\n"); + EXIT(EXIT_FAILURE); + } + AwaitEvent(0); + } +#else + if (RunnableThreadsHd == Nil_closure) { + if (advisory_thread_count < MaxThreads && + (PendingSparksHd[REQUIRED_POOL] < PendingSparksTl[REQUIRED_POOL] || + PendingSparksHd[ADVISORY_POOL] < PendingSparksTl[ADVISORY_POOL])) { + /* + * If we're here (no runnable threads) and we have pending sparks, + * we must have a space problem. Get enough space to turn one of + * those pending sparks into a thread...ReallyPerformGC doesn't + * return until the space is available, so it may force global GC. + * ToDo: Is this unnecessary here? Duplicated in ReSchedule()? --JSM + */ + ReallyPerformThreadGC(THREAD_SPACE_REQUIRED, rtsTrue); + SAVE_Hp -= THREAD_SPACE_REQUIRED; + } else { + /* + * We really have absolutely no work. Send out a fish (there may be + * some out there already), and wait for something to arrive. We + * clearly can't run any threads until a SCHEDULE or RESUME arrives, + * and so that's what we're hoping to see. (Of course, we still have + * to respond to other types of messages.) + */ + if (!fishing) + sendFish(choosePE(), mytid, NEW_FISH_AGE, NEW_FISH_HISTORY, + NEW_FISH_HUNGER); + processMessages(); + } + ReSchedule(0); + } else if (PacketsWaiting()) { /* Look for incoming messages */ + processMessages(); + } +#endif /* PAR */ + + if (DO_QP_PROF > 1 && CurrentTSO != RunnableThreadsHd) { + QP_Event1("AG", RunnableThreadsHd); + } + +#ifdef PAR + if (do_gr_profile && !sameThread) + DumpGranEvent(GR_SCHEDULE, RunnableThreadsHd); +#endif + +#if !GRAN /* ROUND_ROBIN */ + CurrentTSO = RunnableThreadsHd; + RunnableThreadsHd = TSO_LINK(RunnableThreadsHd); + TSO_LINK(CurrentTSO) = Nil_closure; + + if (RunnableThreadsHd == Nil_closure) + RunnableThreadsTl = Nil_closure; + +#else /* GRAN */ + /* This used to be Round Robin. KH. + I think we can ignore that, and move it down to ReSchedule instead. + */ + CurrentTSO = ThreadQueueHd; + /* TSO_LINK(CurrentTSO) = Nil_closure; humbug */ +#endif + + /* If we're not running a timer, just leave the flag on */ + if (contextSwitchTime > 0) + context_switch = 0; + +#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */ + if (CurrentTSO == Nil_closure) { + fprintf(stderr,"Error: Trying to execute Nil_closure on proc %d (@ %d)\n", + CurrentProc,CurrentTime[CurrentProc]); + exit(99); + } + + if (debug & 0x04) { + if (BlockedOnFetch[CurrentProc]) { + fprintf(stderr,"Error: Trying to execute TSO 0x%x on proc %d (@ %d) which is blocked-on-fetch by TSO 0x%x\n", + CurrentTSO,CurrentProc,CurrentTime[CurrentProc],BlockedOnFetch[CurrentProc]); + exit(99); + } + } + + if ( (debug & 0x10) && + (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) ) { + fprintf(stderr,"Error: Trying to execute TSO 0x%x on proc %d (@ %d) which should be asleep!\n", + CurrentTSO,CurrentProc,CurrentTime[CurrentProc]); + exit(99); + } +#endif + +# if defined(__STG_TAILJUMPS__) + miniInterpret((StgFunPtr)resumeThread); +# else + if (doSanityChks) + miniInterpret_debug((StgFunPtr)resumeThread, checkAStack); + else + miniInterpret((StgFunPtr)resumeThread); +# endif /* __STG_TAILJUMPS__ */ +} +\end{code} + +% Some remarks on GrAnSim -- HWL + +The ReSchedule fct is the heart of GrAnSim. Based on its par it issues a +CONTINUETRHEAD to carry on executing the current thread in due course or it +watches out for new work (e.g. called from EndThread). + +Then it picks the next event (getnextevent) and handles it appropriately +(see switch construct). Note that a continue in the switch causes the next +event to be handled and a break causes a jmp to the scheduler_loop where +the TSO at the head of the current processor's runnable queue is executed. + +ReSchedule is mostly entered from HpOverflow.lc:PerformReSchedule which is +itself called via the GRAN_RESCHEDULE macro in the compiler generated code. + +\begin{code} +#if defined(GRAN) + +void +ReSchedule(what_next) +int what_next; /* Run the current thread again? */ +{ + sparkq spark, nextspark; + P_ tso; + P_ node; + eventq event; + +#if defined(GRAN_CHECK) && defined(GRAN) + if ( debug & 0x80 ) { + fprintf(stderr,"D> Entering ReSchedule with mode %u; tso is\n",what_next); + DEBUG_TSO(ThreadQueueHd,1); + } +#endif + +#if defined(GRAN_CHECK) && defined(GRAN) + if ( (debug & 0x80) || (debug & 0x40 ) ) + if (what_next<FIND_THREAD || what_next>CHANGE_THREAD) + fprintf(stderr,"ReSchedule: illegal parameter %u for what_next\n", + what_next); +#endif + + /* Run the current thread again (if there is one) */ + if(what_next==SAME_THREAD && ThreadQueueHd != Nil_closure) + { + /* A bit of a hassle if the event queue is empty, but ... */ + CurrentTSO = ThreadQueueHd; + + newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc], + CONTINUETHREAD,CurrentTSO,Nil_closure,NULL); + + /* This code does round-Robin, if preferred. */ + if(DoFairSchedule && TSO_LINK(CurrentTSO) != Nil_closure) + { + if(do_gr_profile) + DumpGranEvent(GR_DESCHEDULE,ThreadQueueHd); + ThreadQueueHd = TSO_LINK(CurrentTSO); + TSO_LINK(ThreadQueueTl) = CurrentTSO; + ThreadQueueTl = CurrentTSO; + TSO_LINK(CurrentTSO) = Nil_closure; + if (do_gr_profile) + DumpGranEvent(GR_SCHEDULE,ThreadQueueHd); + CurrentTime[CurrentProc] += gran_threadcontextswitchtime; + } + } + /* Schedule `next thread' which is at ThreadQueueHd now i.e. thread queue */ + /* has been updated before that already. */ + else if(what_next==NEW_THREAD && ThreadQueueHd != Nil_closure) + { +#if defined(GRAN_CHECK) && defined(GRAN) + if(DoReScheduleOnFetch) + { + fprintf(stderr,"ReSchedule(NEW_THREAD) shouldn't be used!!\n"); + exit(99); + } +#endif + + if(do_gr_profile) + DumpGranEvent(GR_SCHEDULE,ThreadQueueHd); + + CurrentTSO = ThreadQueueHd; + newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc], + CONTINUETHREAD,Nil_closure,Nil_closure,NULL); + + CurrentTime[CurrentProc] += gran_threadcontextswitchtime; + } + + /* We go in here if the current thread is blocked on fetch => don'd CONT */ + else if(what_next==CHANGE_THREAD) + { + /* just fall into event handling loop for next event */ + } + + /* We go in here if we have no runnable threads or what_next==0 */ + else + { + newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc], + FINDWORK,Nil_closure,Nil_closure,NULL); + CurrentTSO = Nil_closure; + } + + /* ----------------------------------------------------------------- */ + /* This part is the EVENT HANDLING LOOP */ + /* ----------------------------------------------------------------- */ + + do { + /* Choose the processor with the next event */ + event = getnextevent(); + CurrentProc = EVENT_PROC(event); + if(EVENT_TIME(event) > CurrentTime[CurrentProc]) + CurrentTime[CurrentProc] = EVENT_TIME(event); + + MAKE_BUSY(CurrentProc); + +#if defined(GRAN_CHECK) && defined(GRAN) + if (debug & 0x80) + fprintf(stderr,"D> After getnextevent, before HandleIdlePEs\n"); +#endif + + /* Deal with the idlers */ + HandleIdlePEs(); + +#if defined(GRAN_CHECK) && defined(GRAN) + if (event_trace && + (event_trace_all || EVENT_TYPE(event) != CONTINUETHREAD || + (debug & 0x80) )) + print_event(event); +#endif + + switch (EVENT_TYPE(event)) + { + /* Should just be continuing execution */ + case CONTINUETHREAD: +#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */ + if ( (debug & 0x04) && BlockedOnFetch[CurrentProc]) { + fprintf(stderr,"Warning: Discarding CONTINUETHREAD on blocked proc %u @ %u\n", + CurrentProc,CurrentTime[CurrentProc]); + print_event(event); + continue; + } +#endif + if(ThreadQueueHd==Nil_closure) + { + newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc], + FINDWORK,Nil_closure,Nil_closure,NULL); + continue; /* Catches superfluous CONTINUEs -- should be unnecessary */ + } + else + break; /* fall into scheduler loop */ + + case FETCHNODE: +#if defined(GRAN_CHECK) && defined(GRAN) + if (SimplifiedFetch) { + fprintf(stderr,"Error: FETCHNODE events not valid with simplified fetch\n"); + exit (99); + } +#endif + + CurrentTime[CurrentProc] += gran_munpacktime; + HandleFetchRequest(EVENT_NODE(event), + EVENT_CREATOR(event), + EVENT_TSO(event)); + continue; + + case FETCHREPLY: +#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */ + if (SimplifiedFetch) { + fprintf(stderr,"Error: FETCHREPLY events not valid with simplified fetch\n"); + exit (99); + } + + if (debug & 0x10) { + if (TSO_TYPE(EVENT_TSO(event)) & FETCH_MASK_TSO) { + TSO_TYPE(EVENT_TSO(event)) &= ~FETCH_MASK_TSO; + } else { + fprintf(stderr,"Error: FETCHREPLY: TSO 0x%x has fetch mask not set @ %d\n", + CurrentTSO,CurrentTime[CurrentProc]); + exit(99); + } + } + + if (debug & 0x04) { + if (BlockedOnFetch[CurrentProc]!=ThreadQueueHd) { + fprintf(stderr,"Error: FETCHREPLY: Proc %d (with TSO 0x%x) not blocked-on-fetch by TSO 0x%x\n", + CurrentProc,CurrentTSO,BlockedOnFetch[CurrentProc]); + exit(99); + } else { + BlockedOnFetch[CurrentProc] = 0; /*- StgFalse; -*/ + } + } +#endif + + /* Copy or move node to CurrentProc */ + if (FetchNode(EVENT_NODE(event), + EVENT_CREATOR(event), + EVENT_PROC(event)) ) { + /* Fetch has failed i.e. node has been grabbed by another PE */ + P_ node = EVENT_NODE(event), tso = EVENT_TSO(event); + PROC p = where_is(node); + TIME fetchtime; + +#if defined(GRAN_CHECK) && defined(GRAN) + if (PrintFetchMisses) { + fprintf(stderr,"Fetch miss @ %lu: node 0x%x is at proc %u (rather than proc %u)\n", + CurrentTime[CurrentProc],node,p,EVENT_CREATOR(event)); + fetch_misses++; + } +#endif /* GRAN_CHECK */ + + CurrentTime[CurrentProc] += gran_mpacktime; + + /* Count fetch again !? */ + ++TSO_FETCHCOUNT(tso); + TSO_FETCHTIME(tso) += gran_fetchtime; + + fetchtime = max(CurrentTime[CurrentProc],CurrentTime[p]) + + gran_latency; + + /* Chase the grabbed node */ + newevent(p,CurrentProc,fetchtime,FETCHNODE,tso,node,NULL); + +#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */ + if (debug & 0x04) + BlockedOnFetch[CurrentProc] = tso; /*-StgTrue;-*/ + + if (debug & 0x10) + TSO_TYPE(tso) |= FETCH_MASK_TSO; +#endif + + CurrentTime[CurrentProc] += gran_mtidytime; + + continue; /* NB: no REPLy has been processed; tso still sleeping */ + } + + /* -- Qapla'! Fetch has been successful; node is here, now */ + ++TSO_FETCHCOUNT(EVENT_TSO(event)); + TSO_FETCHTIME(EVENT_TSO(event)) += gran_fetchtime; + + if (do_gr_profile) + DumpGranEventAndNode(GR_REPLY,EVENT_TSO(event), + EVENT_NODE(event),EVENT_CREATOR(event)); + + --OutstandingFetches[CurrentProc]; +#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */ + if (OutstandingFetches[CurrentProc] < 0) { + fprintf(stderr,"OutstandingFetches of proc %u has become negative\n",CurrentProc); + exit (99); + } +#endif + + if (!DoReScheduleOnFetch) { + CurrentTSO = EVENT_TSO(event); /* awaken blocked thread */ + newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc], + CONTINUETHREAD,Nil_closure,Nil_closure,NULL); + TSO_BLOCKTIME(EVENT_TSO(event)) += CurrentTime[CurrentProc] - + TSO_BLOCKEDAT(EVENT_TSO(event)); + if(do_gr_profile) + DumpGranEvent(GR_RESUME,EVENT_TSO(event)); + continue; + } else { + /* fall through to RESUMETHREAD */ + } + + case RESUMETHREAD: /* Move from the blocked queue to the tail of */ + /* the runnable queue ( i.e. Qu' SImqa'lu') */ + TSO_BLOCKTIME(EVENT_TSO(event)) += CurrentTime[CurrentProc] - + TSO_BLOCKEDAT(EVENT_TSO(event)); + StartThread(event,GR_RESUME); + continue; + + case STARTTHREAD: + StartThread(event,GR_START); + continue; + + case MOVETHREAD: +#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */ + if (!DoThreadMigration) { + fprintf(stderr,"MOVETHREAD events should never occur without -bM\n"); + exit (99); + } +#endif + CurrentTime[CurrentProc] += gran_munpacktime; + StartThread(event,GR_STOLEN); + continue; /* to the next event */ + + case MOVESPARK: + CurrentTime[CurrentProc] += gran_munpacktime; + spark = EVENT_SPARK(event); + + ADD_TO_SPARK_QUEUE(spark); /* NB: this macro side-effects its arg. + so the assignment above is needed. */ + + if(do_sp_profile) + DumpSparkGranEvent(SP_ACQUIRED,spark); + + ++SparksAvail; /* Probably Temporarily */ + /* Drop into FINDWORK */ + + if (!DoReScheduleOnFetch && + (ThreadQueueHd != Nil_closure) ) { /* If we block on fetch then go */ + continue; /* to next event (i.e. leave */ + } /* spark in sparkq for now) */ + + case FINDWORK: + if((ThreadQueueHd == Nil_closure || DoAlwaysCreateThreads) + && (FetchStrategy >= 2 || OutstandingFetches[CurrentProc] == 0)) + { + W_ found = 0; + sparkq spark_of_non_local_node = NULL; + + /* Choose a spark from the local spark queue */ + spark = SparkQueueHd; + + while (spark != NULL && !found) + { + node = SPARK_NODE(spark); + if (!SHOULD_SPARK(node)) + { + if(do_sp_profile) + DumpSparkGranEvent(SP_PRUNED,spark); + + assert(spark != NULL); + + SparkQueueHd = SPARK_NEXT(spark); + if(SparkQueueHd == NULL) + SparkQueueTl = NULL; + + DisposeSpark(spark); + + spark = SparkQueueHd; + } + /* -- node should eventually be sparked */ + else if (PreferSparksOfLocalNodes && + !IS_LOCAL_TO(PROCS(node),CurrentProc)) + { + /* We have seen this spark before => no local sparks */ + if (spark==spark_of_non_local_node) { + found = 1; + break; + } + + /* Remember first non-local node */ + if (spark_of_non_local_node==NULL) + spark_of_non_local_node = spark; + + /* Special case: 1 elem sparkq with non-local spark */ + if (spark==SparkQueueTl) { + found = 1; + break; + } + + /* Put spark (non-local!) at the end of the sparkq */ + SPARK_NEXT(SparkQueueTl) = spark; + SparkQueueHd = SPARK_NEXT(spark); + SPARK_NEXT(spark) = NULL; + SparkQueueTl = spark; + + spark = SparkQueueHd; + } + else + { + found = 1; + } + } + + /* We've found a node; now, create thread (DaH Qu' yIchen) */ + if (found) + { + CurrentTime[CurrentProc] += gran_threadcreatetime; + + node = SPARK_NODE(spark); + if((tso = NewThread(node, T_REQUIRED))==NULL) + { + /* Some kind of backoff needed here in case there's too little heap */ + newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc]+1, + FINDWORK,Nil_closure,Nil_closure,NULL); + ReallyPerformThreadGC(TSO_HS+TSO_CTS_SIZE,StgTrue); + spark = NULL; + continue; /* to the next event, eventually */ + } + + TSO_EXPORTED(tso) = SPARK_EXPORTED(spark); + TSO_LOCKED(tso) = !SPARK_GLOBAL(spark); + TSO_SPARKNAME(tso) = SPARK_NAME(spark); + + newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc], + STARTTHREAD,tso,Nil_closure,NULL); + + assert(spark != NULL); + + SparkQueueHd = SPARK_NEXT(spark); + if(SparkQueueHd == NULL) + SparkQueueTl = NULL; + + DisposeSpark(spark); + } + else + /* Make the PE idle if nothing sparked and we have no threads. */ + { + if(ThreadQueueHd == Nil_closure) +#if defined(GRAN_CHECK) && defined(GRAN) + { + MAKE_IDLE(CurrentProc); + if ( (debug & 0x40) || (debug & 0x80) ) { + fprintf(stderr,"Warning in FINDWORK handling: No work found for PROC %u\n",CurrentProc); + } + } +#else + MAKE_IDLE(CurrentProc); +#endif /* GRAN_CHECK */ + else + newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc], + CONTINUETHREAD,Nil_closure,Nil_closure,NULL); + } + + continue; /* to the next event */ + } + else + { +#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */ + if ( (debug & 0x04) && + (!DoReScheduleOnFetch && ThreadQueueHd != Nil_closure) + ) { + fprintf(stderr,"Waning in FINDWORK handling:\n"); + fprintf(stderr,"ThreadQueueHd!=Nil_closure should never happen with !DoReScheduleOnFetch"); + } +#endif + if (FetchStrategy < 2 && OutstandingFetches[CurrentProc] != 0) + continue; /* to next event */ + else + break; /* run ThreadQueueHd */ + } + /* never reached */ + + default: + fprintf(stderr,"Illegal event type %u\n",EVENT_TYPE(event)); + continue; + } + _longjmp(scheduler_loop, 1); + } while(1); + } +\end{code} + +Here follows the non-GRAN @ReSchedule@. +\begin{code} +#else /* !GRAN */ + +void +ReSchedule(again) +int again; /* Run the current thread again? */ +{ + P_ spark; + PP_ sparkp; + P_ tso; + +#ifdef PAR + /* + * In the parallel world, we do unfair scheduling for the moment. + * Ultimately, this should all be merged with the more sophicticated + * GrAnSim scheduling options. (Of course, some provision should be + * made for *required* threads to make sure that they don't starve, + * but for now we assume that no one is running concurrent Haskell on + * a multi-processor platform.) + */ + + sameThread = again; + + if (again) { + if (RunnableThreadsHd == Nil_closure) + RunnableThreadsTl = CurrentTSO; + TSO_LINK(CurrentTSO) = RunnableThreadsHd; + RunnableThreadsHd = CurrentTSO; + } + +#else + + /* + * In the sequential world, we assume that the whole point of running + * the threaded build is for concurrent Haskell, so we provide round-robin + * scheduling. + */ + + if (again) { + if(RunnableThreadsHd == Nil_closure) { + RunnableThreadsHd = CurrentTSO; + } else { + TSO_LINK(RunnableThreadsTl) = CurrentTSO; + if (DO_QP_PROF > 1) { + QP_Event1("GA", CurrentTSO); + } + } + RunnableThreadsTl = CurrentTSO; + } +#endif + +#if 1 + /* + * Debugging code, which is useful enough (and cheap enough) to compile + * in all the time. This makes sure that we don't access saved registers, + * etc. in threads which are supposed to be sleeping. + */ + CurrentTSO = Nil_closure; + CurrentRegTable = NULL; +#endif + + /* First the required sparks */ + + for (sparkp = PendingSparksHd[REQUIRED_POOL]; + sparkp < PendingSparksTl[REQUIRED_POOL]; sparkp++) { + spark = *sparkp; + if (SHOULD_SPARK(spark)) { + if ((tso = NewThread(spark, T_REQUIRED)) == NULL) + break; + if (RunnableThreadsHd == Nil_closure) { + RunnableThreadsHd = tso; +#ifdef PAR + if (do_gr_profile) { + DumpGranEvent(GR_START, tso); + sameThread = rtsTrue; + } +#endif + } else { + TSO_LINK(RunnableThreadsTl) = tso; +#ifdef PAR + if (do_gr_profile) + DumpGranEvent(GR_STARTQ, tso); +#endif + } + RunnableThreadsTl = tso; + } else { + if (DO_QP_PROF) + QP_Event0(threadId++, spark); +#ifdef PAR + if(do_sp_profile) + DumpSparkGranEvent(SP_PRUNED, threadId++); +#endif + } + } + PendingSparksHd[REQUIRED_POOL] = sparkp; + + /* Now, almost the same thing for advisory sparks */ + + for (sparkp = PendingSparksHd[ADVISORY_POOL]; + sparkp < PendingSparksTl[ADVISORY_POOL]; sparkp++) { + spark = *sparkp; + if (SHOULD_SPARK(spark)) { + if ( +#ifdef PAR + /* In the parallel world, don't create advisory threads if we are + * about to rerun the same thread, or already have runnable threads, + * or the main thread has terminated */ + (RunnableThreadsHd != Nil_closure || + (required_thread_count == 0 && IAmMainThread)) || +#endif + advisory_thread_count == MaxThreads || + (tso = NewThread(spark, T_ADVISORY)) == NULL) + break; + advisory_thread_count++; + if (RunnableThreadsHd == Nil_closure) { + RunnableThreadsHd = tso; +#ifdef PAR + if (do_gr_profile) { + DumpGranEvent(GR_START, tso); + sameThread = rtsTrue; + } +#endif + } else { + TSO_LINK(RunnableThreadsTl) = tso; +#ifdef PAR + if (do_gr_profile) + DumpGranEvent(GR_STARTQ, tso); +#endif + } + RunnableThreadsTl = tso; + } else { + if (DO_QP_PROF) + QP_Event0(threadId++, spark); +#ifdef PAR + if(do_sp_profile) + DumpSparkGranEvent(SP_PRUNED, threadId++); +#endif + } + } + PendingSparksHd[ADVISORY_POOL] = sparkp; + +#ifndef PAR + longjmp(scheduler_loop, required_thread_count == 0 ? -1 : 1); +#else + longjmp(scheduler_loop, required_thread_count == 0 && IAmMainThread ? -1 : 1); +#endif +} + +#endif /* !GRAN */ + +\end{code} + +%**************************************************************************** +% +\subsection[thread-gransim-execution]{Starting, Idling and Migrating + Threads (GrAnSim only)} +% +%**************************************************************************** + +Thread start, idle and migration code for GrAnSim (i.e. simulating multiple +processors). + +\begin{code} +#if defined(GRAN) + +StartThread(event,event_type) +eventq event; +enum gran_event_types event_type; +{ + if(ThreadQueueHd==Nil_closure) + { + CurrentTSO = ThreadQueueHd = ThreadQueueTl = EVENT_TSO(event); + newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc]+gran_threadqueuetime, + CONTINUETHREAD,Nil_closure,Nil_closure,NULL); + if(do_gr_profile) + DumpGranEvent(event_type,EVENT_TSO(event)); + } + else + { + TSO_LINK(ThreadQueueTl) = EVENT_TSO(event); + ThreadQueueTl = EVENT_TSO(event); + + if(DoThreadMigration) + ++SurplusThreads; + + if(do_gr_profile) + DumpGranEvent(event_type+1,EVENT_TSO(event)); + + } + CurrentTime[CurrentProc] += gran_threadqueuetime; +} +\end{code} + +Export work to idle PEs. + +\begin{code} +HandleIdlePEs() +{ + PROC proc; + + if(ANY_IDLE && (SparksAvail > 0l || SurplusThreads > 0l)) + for(proc = 0; proc < max_proc; proc++) + if(IS_IDLE(proc)) + { + if(DoStealThreadsFirst && + (FetchStrategy >= 4 || OutstandingFetches[proc] == 0)) + { + if (SurplusThreads > 0l) /* Steal a thread */ + StealThread(proc); + + if(!IS_IDLE(proc)) + break; + } + + if(SparksAvail > 0l && + (FetchStrategy >= 3 || OutstandingFetches[proc] == 0)) /* Steal a spark */ + StealSpark(proc); + + if (IS_IDLE(proc) && SurplusThreads > 0l && + (FetchStrategy >= 4 || OutstandingFetches[proc] == 0)) /* Steal a thread */ + StealThread(proc); + } +} +\end{code} + +Steal a spark and schedule moving it to proc. We want to look at PEs in +clock order -- most retarded first. Currently sparks are only stolen from +the @ADVISORY_POOL@ never from the @REQUIRED_POOL@. Eventually, this should +be changed to first steal from the former then from the latter. + +\begin{code} +StealSpark(proc) +PROC proc; +{ + PROC p; + sparkq spark, prev, next; + int stolen = 0; + TIME times[MAX_PROC], stealtime; + unsigned ntimes=0, i, j; + + /* times shall contain processors from which we may steal sparks */ + for(p=0; p < max_proc; ++p) + if(proc != p && + PendingSparksHd[p][ADVISORY_POOL] != NULL && + CurrentTime[p] <= CurrentTime[CurrentProc]) + times[ntimes++] = p; + + /* sort times */ + for(i=0; i < ntimes; ++i) + for(j=i+1; j < ntimes; ++j) + if(CurrentTime[times[i]] > CurrentTime[times[j]]) + { + unsigned temp = times[i]; + times[i] = times[j]; + times[j] = temp; + } + + for(i=0; i < ntimes && !stolen; ++i) + { + p = times[i]; + + for(prev=NULL, spark = PendingSparksHd[p][ADVISORY_POOL]; + spark != NULL && !stolen; + spark=next) + { + next = SPARK_NEXT(spark); + + if(SHOULD_SPARK(SPARK_NODE(spark))) + { + /* Don't Steal local sparks */ + if(!SPARK_GLOBAL(spark)) + { + prev=spark; + continue; + } + + SPARK_NEXT(spark) = NULL; + CurrentTime[p] += gran_mpacktime; + + stealtime = (CurrentTime[p] > CurrentTime[proc]? CurrentTime[p]: CurrentTime[proc]) + + SparkStealTime(); + + newevent(proc,p /* CurrentProc */,stealtime, + MOVESPARK,Nil_closure,Nil_closure,spark); + + MAKE_BUSY(proc); + stolen = 1; + ++SPARK_GLOBAL(spark); + + if(do_sp_profile) + DumpSparkGranEvent(SP_EXPORTED,spark); + + CurrentTime[p] += gran_mtidytime; + + --SparksAvail; + } + else + { + if(do_sp_profile) + DumpSparkGranEvent(SP_PRUNED,spark); + DisposeSpark(spark); + } + + if(spark == PendingSparksHd[p][ADVISORY_POOL]) + PendingSparksHd[p][ADVISORY_POOL] = next; + + if(prev!=NULL) + SPARK_NEXT(prev) = next; + } + + if(PendingSparksHd[p][ADVISORY_POOL] == NULL) + PendingSparksTl[p][ADVISORY_POOL] = NULL; + } +} +\end{code} + +Steal a spark and schedule moving it to proc. + +\begin{code} +StealThread(proc) +PROC proc; +{ + PROC p; + P_ thread, prev; + TIME times[MAX_PROC], stealtime; + unsigned ntimes=0, i, j; + + /* Hunt for a thread */ + + /* times shall contain processors from which we may steal threads */ + for(p=0; p < max_proc; ++p) + if(proc != p && RunnableThreadsHd[p] != Nil_closure && + CurrentTime[p] <= CurrentTime[CurrentProc]) + times[ntimes++] = p; + + /* sort times */ + for(i=0; i < ntimes; ++i) + for(j=i+1; j < ntimes; ++j) + if(CurrentTime[times[i]] > CurrentTime[times[j]]) + { + unsigned temp = times[i]; + times[i] = times[j]; + times[j] = temp; + } + + for(i=0; i < ntimes; ++i) + { + p = times[i]; + + /* Steal the first exportable thread in the runnable queue after the */ + /* first one */ + + if(RunnableThreadsHd[p] != Nil_closure) + { + for(prev = RunnableThreadsHd[p], thread = TSO_LINK(RunnableThreadsHd[p]); + thread != Nil_closure && TSO_LOCKED(thread); + prev = thread, thread = TSO_LINK(thread)) + /* SKIP */; + + if(thread != Nil_closure) /* Take thread out of runnable queue */ + { + TSO_LINK(prev) = TSO_LINK(thread); + + TSO_LINK(thread) = Nil_closure; + + if(RunnableThreadsTl[p] == thread) + RunnableThreadsTl[p] = prev; + + /* Turn magic constants into params !? -- HWL */ + + CurrentTime[p] += 5l * gran_mpacktime; + + stealtime = (CurrentTime[p] > CurrentTime[proc]? CurrentTime[p]: CurrentTime[proc]) + + SparkStealTime() + 4l * gran_additional_latency + + 5l * gran_munpacktime; + + /* Move the thread */ + SET_PROCS(thread,PE_NUMBER(proc)); + + /* Move from one queue to another */ + newevent(proc,p,stealtime,MOVETHREAD,thread,Nil_closure,NULL); + MAKE_BUSY(proc); + --SurplusThreads; + + if(do_gr_profile) + DumpRawGranEvent(p,GR_STEALING,TSO_ID(thread)); + + CurrentTime[p] += 5l * gran_mtidytime; + + /* Found one */ + break; + } + } + } +} + +TIME SparkStealTime() +{ + double fishdelay, sparkdelay, latencydelay; + fishdelay = (double)max_proc/2; + sparkdelay = fishdelay - ((fishdelay-1)/(double)(max_proc-1))*(double)Idlers; + latencydelay = sparkdelay*((double)gran_latency); + +/* + fprintf(stderr,"fish delay = %g, spark delay = %g, latency delay = %g, Idlers = %u\n", + fishdelay,sparkdelay,latencydelay,Idlers); +*/ + return((TIME)latencydelay); +} +#endif /* GRAN ; HWL */ + +\end{code} + +%**************************************************************************** +% +\subsection[thread-execution]{Executing Threads} +% +%**************************************************************************** + +\begin{code} +EXTDATA_RO(StkO_info); +EXTDATA_RO(TSO_info); +EXTDATA_RO(WorldStateToken_closure); + +EXTFUN(EnterNodeCode); +UNVEC(EXTFUN(stopThreadDirectReturn);,EXTDATA(vtbl_stopStgWorld);) + +#if defined(GRAN) + +/* Slow but relatively reliable method uses xmalloc */ +/* Eventually change that to heap allocated sparks. */ + +sparkq +NewSpark(node,name,local) +P_ node; +I_ name, local; +{ + extern P_ xmalloc(); + sparkq newspark = (sparkq) xmalloc(sizeof(struct spark)); + SPARK_PREV(newspark) = SPARK_NEXT(newspark) = NULL; + SPARK_NODE(newspark) = node; + SPARK_NAME(newspark) = name; + SPARK_GLOBAL(newspark) = !local; + return(newspark); +} + +void +DisposeSpark(spark) +sparkq spark; +{ + if(spark!=NULL) + free(spark); + + --SparksAvail; + +/* Heap-allocated disposal. + + FREEZE_MUT_HDR(spark, ImMutArrayOfPtrs); + SPARK_PREV(spark) = SPARK_NEXT(spark) = SPARK_NODE(spark) = Nil_closure; +*/ +} + +DisposeSparkQ(spark) +sparkq spark; +{ + if (spark==NULL) + return; + + DisposeSparkQ(SPARK_NEXT(spark)); + +#ifdef GRAN_CHECK + if (SparksAvail < 0) + fprintf(stderr,"DisposeSparkQ: SparksAvail<0 after disposing sparkq @ 0x%lx\n", spark); +#endif + + free(spark); +} + +#endif + +I_ StkOChunkSize = DEFAULT_STKO_CHUNK_SIZE; + +/* Create a new TSO, with the specified closure to enter and thread type */ + +P_ +NewThread(topClosure, type) +P_ topClosure; +W_ type; +{ + P_ stko, tso; + + if (AvailableTSO != Nil_closure) { + tso = AvailableTSO; +#if defined(GRAN) + SET_PROCS(tso,ThisPE); /* Allocate it locally! */ +#endif + AvailableTSO = TSO_LINK(tso); + } else if (SAVE_Hp + TSO_HS + TSO_CTS_SIZE > SAVE_HpLim) { + return(NULL); + } else { + ALLOC_TSO(TSO_HS,BYTES_TO_STGWORDS(sizeof(STGRegisterTable)), + BYTES_TO_STGWORDS(sizeof(StgDouble))); + tso = SAVE_Hp + 1; + SAVE_Hp += TSO_HS + TSO_CTS_SIZE; + SET_TSO_HDR(tso, TSO_info, CCC); + } + + TSO_LINK(tso) = Nil_closure; + TSO_CCC(tso) = (CostCentre)STATIC_CC_REF(CC_MAIN); + TSO_NAME(tso) = (P_) INFO_PTR(topClosure); /* A string would be nicer -- JSM */ + TSO_ID(tso) = threadId++; + TSO_TYPE(tso) = type; + TSO_PC1(tso) = TSO_PC2(tso) = EnterNodeCode; + TSO_ARG1(tso) = TSO_EVENT(tso) = 0; + TSO_SWITCH(tso) = NULL; + +#ifdef DO_REDN_COUNTING + TSO_AHWM(tso) = 0; + TSO_BHWM(tso) = 0; +#endif + +#if defined(GRAN) || defined(PAR) + TSO_SPARKNAME(tso) = 0; +#if defined(GRAN) + TSO_STARTEDAT(tso) = CurrentTime[CurrentProc]; +#else + TSO_STARTEDAT(tso) = CURRENT_TIME; +#endif + TSO_EXPORTED(tso) = 0; + TSO_BASICBLOCKS(tso) = 0; + TSO_ALLOCS(tso) = 0; + TSO_EXECTIME(tso) = 0; + TSO_FETCHTIME(tso) = 0; + TSO_FETCHCOUNT(tso) = 0; + TSO_BLOCKTIME(tso) = 0; + TSO_BLOCKCOUNT(tso) = 0; + TSO_BLOCKEDAT(tso) = 0; + TSO_GLOBALSPARKS(tso) = 0; + TSO_LOCALSPARKS(tso) = 0; +#endif + /* + * set pc, Node (R1), liveness + */ + CurrentRegTable = TSO_INTERNAL_PTR(tso); + SAVE_Liveness = LIVENESS_R1; + SAVE_R1.p = topClosure; + +# ifndef PAR + if (type == T_MAIN) { + stko = MainStkO; + } else { +# endif + if (AvailableStack != Nil_closure) { + stko = AvailableStack; +#if defined(GRAN) + SET_PROCS(stko,ThisPE); +#endif + AvailableStack = STKO_LINK(AvailableStack); + } else if (SAVE_Hp + STKO_HS + StkOChunkSize > SAVE_HpLim) { + return(NULL); + } else { + ALLOC_STK(STKO_HS,StkOChunkSize,0); + stko = SAVE_Hp + 1; + SAVE_Hp += STKO_HS + StkOChunkSize; + SET_STKO_HDR(stko, StkO_info, CCC); + } + STKO_SIZE(stko) = StkOChunkSize + STKO_VHS; + STKO_SpB(stko) = STKO_SuB(stko) = STKO_BSTK_BOT(stko) + BREL(1); + STKO_SpA(stko) = STKO_SuA(stko) = STKO_ASTK_BOT(stko) + AREL(1); + STKO_LINK(stko) = Nil_closure; + STKO_RETURN(stko) = NULL; +# ifndef PAR + } +# endif + +#ifdef DO_REDN_COUNTING + STKO_ADEP(stko) = STKO_BDEP(stko) = 0; +#endif + + if (type == T_MAIN) { + STKO_SpA(stko) -= AREL(1); + *STKO_SpA(stko) = (P_) WorldStateToken_closure; + } + + SAVE_Ret = (StgRetAddr) UNVEC(stopThreadDirectReturn,vtbl_stopStgWorld); + SAVE_StkO = stko; + + if (DO_QP_PROF) { + QP_Event1(do_qp_prof > 1 ? "*A" : "*G", tso); + } + return tso; +} +\end{code} + +\begin{code} + +void +EndThread(STG_NO_ARGS) +{ +#ifdef PAR + TIME now = CURRENT_TIME; +#endif +#ifdef DO_REDN_COUNTING + extern FILE *tickyfile; + + if (tickyfile != NULL) { + fprintf(tickyfile, "Thread %d (%lx)\n\tA stack max. depth: %ld words\n", + TSO_ID(CurrentTSO), TSO_NAME(CurrentTSO), TSO_AHWM(CurrentTSO)); + fprintf(tickyfile, "\tB stack max. depth: %ld words\n", + TSO_BHWM(CurrentTSO)); + } +#endif + + if (DO_QP_PROF) { + QP_Event1("G*", CurrentTSO); + } + +#if defined(GRAN) + assert(CurrentTSO == ThreadQueueHd); + ThreadQueueHd = TSO_LINK(CurrentTSO); + + if(ThreadQueueHd == Nil_closure) + ThreadQueueTl = Nil_closure; + + else if (DoThreadMigration) + --SurplusThreads; + + if (do_gr_sim) + { + if(TSO_TYPE(CurrentTSO)==T_MAIN) + { + int i; + for(i=0; i < max_proc; ++i) { + StgBool is_first = StgTrue; + while(RunnableThreadsHd[i] != Nil_closure) + { + /* We schedule runnable threads before killing them to */ + /* make the job of bookkeeping the running, runnable, */ + /* blocked threads easier for scripts like gr2ps -- HWL */ + + if (do_gr_profile && !is_first) + DumpRawGranEvent(i,GR_SCHEDULE, + TSO_ID(RunnableThreadsHd[i])); + if (!no_gr_profile) + DumpGranInfo(i,RunnableThreadsHd[i],StgTrue); + RunnableThreadsHd[i] = TSO_LINK(RunnableThreadsHd[i]); + is_first = StgFalse; + } + } + + ThreadQueueHd = Nil_closure; + +#if defined(GRAN_CHECK) && defined(GRAN) + /* Print event stats */ + if (debug & 0x20) { + int i; + + fprintf(stderr,"Statistics of events (total=%d):\n", + noOfEvents); + for (i=0; i<=7; i++) { + fprintf(stderr,"> %s (%d): \t%ld \t%f%%\n", + event_names[i],i,event_counts[i], + (float)(100*event_counts[i])/(float)(noOfEvents) ); + } + } +#endif + + } + + if (!no_gr_profile) + DumpGranInfo(CurrentProc,CurrentTSO, + TSO_TYPE(CurrentTSO) != T_ADVISORY); + + /* Note ThreadQueueHd is Nil when the main thread terminates */ + if(ThreadQueueHd != Nil_closure) + { + if (do_gr_profile && !no_gr_profile) + DumpGranEvent(GR_SCHEDULE,ThreadQueueHd); + CurrentTime[CurrentProc] += gran_threadscheduletime; + } + + else if (do_gr_binary && TSO_TYPE(CurrentTSO)==T_MAIN && + !no_gr_profile) + grterminate(CurrentTime[CurrentProc]); + } +#endif /* GRAN */ + +#ifdef PAR + if (do_gr_profile) { + TSO_EXECTIME(CurrentTSO) += now - TSO_BLOCKEDAT(CurrentTSO); + DumpGranInfo(thisPE, CurrentTSO, TSO_TYPE(CurrentTSO) != T_ADVISORY); + } +#endif + + switch (TSO_TYPE(CurrentTSO)) { + case T_MAIN: + required_thread_count--; +#ifdef PAR + if (do_gr_binary) + grterminate(now); +#endif + +#if defined(GRAN_CHECK) && defined(GRAN) + if ( (debug & 0x80) || (debug & 0x40) ) + fprintf(stderr,"\nGRAN: I hereby terminate the main thread!\n"); + + /* I've stolen that from the end of ReSchedule (!GRAN). HWL */ + longjmp(scheduler_loop, required_thread_count > 0 ? 1 : -1); +#else + ReSchedule(0); +#endif /* GRAN */ + + case T_REQUIRED: + required_thread_count--; + break; + + case T_ADVISORY: + advisory_thread_count--; + break; + + case T_FAIL: + EXIT(EXIT_FAILURE); + + default: + fflush(stdout); + fprintf(stderr, "EndThread: %lx unknown\n", (W_) TSO_TYPE(CurrentTSO)); + EXIT(EXIT_FAILURE); + } + + /* Reuse stack object space */ + ASSERT(STKO_LINK(SAVE_StkO) == Nil_closure); + STKO_LINK(SAVE_StkO) = AvailableStack; + AvailableStack = SAVE_StkO; + /* Reuse TSO */ + TSO_LINK(CurrentTSO) = AvailableTSO; + AvailableTSO = CurrentTSO; + CurrentTSO = Nil_closure; + CurrentRegTable = NULL; + +#if defined(GRAN) + /* NB: Now ThreadQueueHd is either the next runnable thread on this */ + /* proc or it's Nil_closure. In the latter case, a FINDWORK will be */ + /* issued by ReSchedule. */ + ReSchedule(SAME_THREAD); /* back for more! */ +#else + ReSchedule(0); /* back for more! */ +#endif +} +\end{code} + +%**************************************************************************** +% +\subsection[thread-blocking]{Local Blocking} +% +%**************************************************************************** + +\begin{code} + +#if defined(COUNT) +void CountnUPDs() { ++nUPDs; } +void CountnUPDs_old() { ++nUPDs_old; } +void CountnUPDs_new() { ++nUPDs_new; } + +void CountnPAPs() { ++nPAPs; } +#endif + +EXTDATA_RO(BQ_info); + +#ifndef GRAN +/* NB: non-GRAN version ToDo + * + * AwakenBlockingQueue awakens a list of TSOs and FBQs. + */ + +P_ PendingFetches = Nil_closure; + +void +AwakenBlockingQueue(bqe) + P_ bqe; +{ + P_ last_tso = NULL; + +# ifdef PAR + P_ next; + TIME now = CURRENT_TIME; + +# endif + +# ifndef PAR + while (bqe != Nil_closure) { +# else + while (IS_MUTABLE(INFO_PTR(bqe))) { + switch (INFO_TYPE(INFO_PTR(bqe))) { + case INFO_TSO_TYPE: +# endif + if (DO_QP_PROF) { + QP_Event2(do_qp_prof > 1 ? "RA" : "RG", bqe, CurrentTSO); + } +# ifdef PAR + if (do_gr_profile) { + DumpGranEvent(GR_RESUMEQ, bqe); + switch (TSO_QUEUE(bqe)) { + case Q_BLOCKED: + TSO_BLOCKTIME(bqe) += now - TSO_BLOCKEDAT(bqe); + break; + case Q_FETCHING: + TSO_FETCHTIME(bqe) += now - TSO_BLOCKEDAT(bqe); + break; + default: + fflush(stdout); + fprintf(stderr, "ABQ: TSO_QUEUE invalid.\n"); + EXIT(EXIT_FAILURE); + } + } +# endif + if (last_tso == NULL) { + if (RunnableThreadsHd == Nil_closure) { + RunnableThreadsHd = bqe; + } else { + TSO_LINK(RunnableThreadsTl) = bqe; + } + } + last_tso = bqe; + bqe = TSO_LINK(bqe); +# ifdef PAR + break; + case INFO_BF_TYPE: + next = BF_LINK(bqe); + BF_LINK(bqe) = PendingFetches; + PendingFetches = bqe; + bqe = next; + if (last_tso != NULL) + TSO_LINK(last_tso) = next; + break; + default: + fprintf(stderr, "Unexpected IP (%#lx) in blocking queue at %#lx\n", + INFO_PTR(bqe), (W_) bqe); + EXIT(EXIT_FAILURE); + } + } +#else + } +# endif + if (last_tso != NULL) { + RunnableThreadsTl = last_tso; +# ifdef PAR + TSO_LINK(last_tso) = Nil_closure; +# endif + } +} +#endif /* !GRAN */ + +#ifdef GRAN + +/* NB: GRAN version only ToDo + * + * AwakenBlockingQueue returns True if we are on the oldmutables list, + * so that the update code knows what to do next. + */ + +I_ +AwakenBlockingQueue(node) + P_ node; +{ + P_ tso = (P_) BQ_ENTRIES(node); + P_ prev; + + if(do_gr_sim) + { + W_ notifytime; + +# if defined(COUNT) + ++nUPDs; + if (tso != Nil_closure) + ++nUPDs_BQ; +# endif + + while(tso != Nil_closure) { + W_ proc; + assert(TSO_INTERNAL_PTR(tso)->rR[0].p == node); + +# if defined(COUNT) + ++BQ_lens; +# endif + + /* Find where the tso lives */ + proc = where_is(tso); + + if(proc == CurrentProc) + notifytime = CurrentTime[CurrentProc] + gran_lunblocktime; + else + { + CurrentTime[CurrentProc] += gran_mpacktime; + notifytime = CurrentTime[CurrentProc] + gran_gunblocktime; + CurrentTime[CurrentProc] += gran_mtidytime; + } + + /* and create a resume message */ + newevent(proc, CurrentProc, notifytime, + RESUMETHREAD,tso,Nil_closure,NULL); + + prev = tso; + tso = TSO_LINK(tso); + TSO_LINK(prev) = Nil_closure; + } + } + else + { + if (ThreadQueueHd == Nil_closure) + ThreadQueueHd = tso; + else + TSO_LINK(ThreadQueueTl) = tso; + + while(TSO_LINK(tso) != Nil_closure) { + assert(TSO_INTERNAL_PTR(tso)->rR[0].p == node); + if (DO_QP_PROF) { + QP_Event2(do_qp_prof > 1 ? "RA" : "RG", tso, CurrentTSO); + } + tso = TSO_LINK(tso); + } + + assert(TSO_INTERNAL_PTR(tso)->rR[0].p == node); + if (DO_QP_PROF) { + QP_Event2(do_qp_prof > 1 ? "RA" : "RG", tso, CurrentTSO); + } + + ThreadQueueTl = tso; + } + + return MUT_LINK(node) != MUT_NOT_LINKED; +} + +#endif /* GRAN only */ + +EXTFUN(Continue); + +void +Yield(args) +W_ args; +{ + SAVE_Liveness = args >> 1; + TSO_PC1(CurrentTSO) = Continue; + if (DO_QP_PROF) { + QP_Event1("GR", CurrentTSO); + } +#ifdef PAR + if (do_gr_profile) { + /* Note that CURRENT_TIME may perform an unsafe call */ + TSO_EXECTIME(CurrentTSO) += CURRENT_TIME - TSO_BLOCKEDAT(CurrentTSO); + } +#endif + ReSchedule(args & 1); +} + +\end{code} + +%**************************************************************************** +% +\subsection[gr-fetch]{Fetching Nodes (GrAnSim only)} +% +%**************************************************************************** + +The following GrAnSim routines simulate the fetching of nodes from a remote +processor. We use a 1 word bitmask to indicate on which processor a node is +lying. Thus, moving or copying a node from one processor to another just +requires an appropriate change in this bitmask (using @SET_GA@). +Additionally, the clocks have to be updated. + +A special case arises when the node that is needed by processor A has been +moved from a processor B to a processor C between sending out a @FETCH@ +(from A) and its arrival at B. In that case the @FETCH@ has to be forwarded +to C. + +Currently, we only support GRIP-like single closure fetching. We plan to +incorporate GUM-like packet fetching in the near future. + +\begin{code} +#if defined(GRAN) + +/* Fetch node "node" to processor "p" */ + +int +FetchNode(node,from,to) +P_ node; +PROC from, to; +{ + assert(to==CurrentProc); + if (!IS_LOCAL_TO(PROCS(node),from) && + !IS_LOCAL_TO(PROCS(node),to) ) + return 1; + + if(IS_NF(INFO_PTR(node))) /* Old: || IS_BQ(node) */ + PROCS(node) |= PE_NUMBER(to); /* Copy node */ + else + PROCS(node) = PE_NUMBER(to); /* Move node */ + + /* Now fetch the children */ + if(DoGUMMFetching) + { + fprintf(stderr,"Sorry, GUMM fetching not yet implemented.\n"); + } + + return 0; +} + +/* -------------------------------------------------- + Cost of sending a packet of size n = C + P*n + where C = packet construction constant, + P = cost of packing one word into a packet + [Should also account for multiple packets]. + -------------------------------------------------- */ + +void +HandleFetchRequest(node,p,tso) +P_ node, tso; +PROC p; +{ + if (IS_LOCAL_TO(PROCS(node),p) ) /* Somebody else moved node already => */ + { /* start tso */ + newevent(p,CurrentProc, + CurrentTime[CurrentProc] /* +gran_latency */, + FETCHREPLY,tso,node,NULL); /* node needed ?? */ + CurrentTime[CurrentProc] += gran_mtidytime; + } + else if (IS_LOCAL_TO(PROCS(node),CurrentProc) ) /* Is node still here? */ + { + /* Actual moving/copying of node is done on arrival; see FETCHREPLY */ + /* Send a reply to the originator */ + CurrentTime[CurrentProc] += gran_mpacktime; + + newevent(p,CurrentProc, + CurrentTime[CurrentProc]+gran_latency, + FETCHREPLY,tso,node,NULL); /* node needed ?? */ + + CurrentTime[CurrentProc] += gran_mtidytime; + } + else + { /* Qu'vatlh! node has been grabbed by another proc => forward */ + PROC p_new = where_is(node); + TIME fetchtime; + +#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */ + if (NoForward) { + newevent(p,p_new, + max(CurrentTime[p_new],CurrentTime[CurrentProc])+gran_latency, + FETCHREPLY,tso,node,NULL); /* node needed ?? */ + CurrentTime[CurrentProc] += gran_mtidytime; + return; + } +#endif + +#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */ + if (debug & 0x2) /* 0x2 should be somehting like DBG_PRINT_FWD */ + fprintf(stderr,"Qu'vatlh! node 0x%x has been grabbed by %d (current=%d; demander=%d) @ %d\n", + node,p_new,CurrentProc,p,CurrentTime[CurrentProc]); +#endif + /* Prepare FORWARD message to proc p_new */ + CurrentTime[CurrentProc] += gran_mpacktime; + + fetchtime = max(CurrentTime[CurrentProc],CurrentTime[p_new]) + + gran_latency; + + newevent(p_new,p,fetchtime,FETCHNODE,tso,node,NULL); + + CurrentTime[CurrentProc] += gran_mtidytime; + } +} +#endif +\end{code} + +%**************************************************************************** +% +\subsection[gr-simulation]{Granularity Simulation} +% +%**************************************************************************** + +\begin{code} +#if 0 /* moved to GranSim.lc */ +#if defined(GRAN) +I_ do_gr_sim = 0; +FILE *gr_file = NULL; +char gr_filename[32]; + +init_gr_simulation(rts_argc,rts_argv,prog_argc,prog_argv) +char *prog_argv[], *rts_argv[]; +int prog_argc, rts_argc; +{ + I_ i; + + if(do_gr_sim) + { + char *extension = do_gr_binary? "gb": "gr"; + sprintf(gr_filename, "%0.28s.%0.2s", prog_argv[0],extension); + + if ((gr_file = fopen(gr_filename,"w")) == NULL ) + { + fprintf(stderr, "Can't open granularity simulation report file %s\n", gr_filename); + exit(EXIT_FAILURE); + } + +#if defined(GRAN_CHECK) && defined(GRAN) + if(DoReScheduleOnFetch) + setbuf(gr_file,NULL); +#endif + + fputs("Granularity Simulation for ",gr_file); + for(i=0; i < prog_argc; ++i) + { + fputs(prog_argv[i],gr_file); + fputc(' ',gr_file); + } + + if(rts_argc > 0) + { + fputs("+RTS ",gr_file); + + for(i=0; i < rts_argc; ++i) + { + fputs(rts_argv[i],gr_file); + fputc(' ',gr_file); + } + } + + fputs("\n\n--------------------\n\n",gr_file); + + fputs("General Parameters:\n\n",gr_file); + + fprintf(gr_file, "PEs %u, %s Scheduler, %sMigrate Threads%s\n", + max_proc,DoFairSchedule?"Fair":"Unfair", + DoThreadMigration?"":"Don't ", + DoThreadMigration && DoStealThreadsFirst?" Before Sparks":"", + DoReScheduleOnFetch?"":"Don't "); + + fprintf(gr_file, "%s, Fetch %s in Each Packet\n", + SimplifiedFetch?"Simplified Fetch":(DoReScheduleOnFetch?"Reschedule on Fetch":"Block on Fetch"), + DoGUMMFetching?"Many Closures":"Exactly One Closure"); + fprintf(gr_file, "Fetch Strategy(%u): If outstanding fetches %s\n", + FetchStrategy, + FetchStrategy==1?"only run runnable threads (don't create new ones": + FetchStrategy==2?"create threads only from local sparks": + FetchStrategy==3?"create threads from local or global sparks": + FetchStrategy==4?"create sparks and steal threads if necessary": + "unknown"); + + fprintf(gr_file, "Thread Creation Time %lu, Thread Queue Time %lu\n", + gran_threadcreatetime,gran_threadqueuetime); + fprintf(gr_file, "Thread DeSchedule Time %lu, Thread Schedule Time %lu\n", + gran_threaddescheduletime,gran_threadscheduletime); + fprintf(gr_file, "Thread Context-Switch Time %lu\n", + gran_threadcontextswitchtime); + fputs("\n\n--------------------\n\n",gr_file); + + fputs("Communication Metrics:\n\n",gr_file); + fprintf(gr_file, + "Latency %lu (1st) %lu (rest), Fetch %lu, Notify %lu (Global) %lu (Local)\n", + gran_latency, gran_additional_latency, gran_fetchtime, + gran_gunblocktime, gran_lunblocktime); + fprintf(gr_file, + "Message Creation %lu (+ %lu after send), Message Read %lu\n", + gran_mpacktime, gran_mtidytime, gran_munpacktime); + fputs("\n\n--------------------\n\n",gr_file); + + fputs("Instruction Metrics:\n\n",gr_file); + fprintf(gr_file,"Arith %lu, Branch %lu, Load %lu, Store %lu, Float %lu, Alloc %lu\n", + gran_arith_cost, gran_branch_cost, + gran_load_cost, gran_store_cost,gran_float_cost,gran_heapalloc_cost); + fputs("\n\n++++++++++++++++++++\n\n",gr_file); + } + + if(do_gr_binary) + grputw(sizeof(TIME)); + + Idlers = max_proc; + return(0); +} + +void end_gr_simulation() { + if(do_gr_sim) + { + fprintf(stderr,"The simulation is finished. Look at %s for details.\n", + gr_filename); + fclose(gr_file); + } +} +#endif /*0*/ +\end{code} + +%**************************************************************************** +% +\subsection[qp-profile]{Quasi-Parallel Profiling} +% +%**************************************************************************** + +\begin{code} +#ifndef PAR + +I_ do_qp_prof; +FILE *qp_file; + +/* *Virtual* Time in milliseconds */ +long +qp_elapsed_time(STG_NO_ARGS) +{ + extern StgDouble usertime(); + + return ((long) (usertime() * 1e3)); +} + +static void +init_qp_profiling(STG_NO_ARGS) +{ + I_ i; + char qp_filename[32]; + + sprintf(qp_filename, "%0.24s.qp", prog_argv[0]); + if ((qp_file = fopen(qp_filename,"w")) == NULL ) { + fprintf(stderr, "Can't open quasi-parallel profile report file %s\n", + qp_filename); + do_qp_prof = 0; + } else { + fputs(prog_argv[0], qp_file); + for(i = 1; prog_argv[i]; i++) { + fputc(' ', qp_file); + fputs(prog_argv[i], qp_file); + } + fprintf(qp_file, " +RTS -C%d -t%d\n", contextSwitchTime, MaxThreads); + fputs(time_str(), qp_file); + fputc('\n', qp_file); + } +} + +void +QP_Event0(tid, node) +I_ tid; +P_ node; +{ + fprintf(qp_file, "%lu ** %lu 0x%lx\n", qp_elapsed_time(), tid, INFO_PTR(node)); +} + +void +QP_Event1(event, tso) +char *event; +P_ tso; +{ + fprintf(qp_file, "%lu %s %lu 0x%lx\n", qp_elapsed_time(), event, + TSO_ID(tso), TSO_NAME(tso)); +} + +void +QP_Event2(event, tso1, tso2) +char *event; +P_ tso1, tso2; +{ + fprintf(qp_file, "%lu %s %lu 0x%lx %lu 0x%lx\n", qp_elapsed_time(), event, + TSO_ID(tso1), TSO_NAME(tso1), TSO_ID(tso2), TSO_NAME(tso2)); +} + +#endif /* !PAR */ +\end{code} + +%**************************************************************************** +% +\subsection[entry-points]{Routines directly called from Haskell world} +% +%**************************************************************************** + +The @GranSim...@ rotuines in here are directly called via macros from the +threaded world. + +First some auxiliary routines. + +\begin{code} +#ifdef GRAN +/* Take the current thread off the thread queue and thereby activate the */ +/* next thread. It's assumed that the next ReSchedule after this uses */ +/* NEW_THREAD as param. */ +/* This fct is called from GranSimBlock and GranSimFetch */ + +void +ActivateNextThread () +{ +#if defined(GRAN_CHECK) && defined(GRAN) + if(ThreadQueueHd != CurrentTSO) { + fprintf(stderr,"Error: ThreadQueueHd != CurrentTSO in ActivateNextThread\n"); + exit(99); + } +#endif + + ThreadQueueHd = TSO_LINK(ThreadQueueHd); + if(ThreadQueueHd==Nil_closure) { + MAKE_IDLE(CurrentProc); + ThreadQueueTl = Nil_closure; + } else if (do_gr_profile) { + CurrentTime[CurrentProc] += gran_threadcontextswitchtime; + DumpGranEvent(GR_SCHEDULE,ThreadQueueHd); + } +} +\end{code} + +Now the main stg-called routines: + +\begin{code} +/* ------------------------------------------------------------------------ */ +/* The following GranSim... fcts are stg-called from the threaded world. */ +/* ------------------------------------------------------------------------ */ + +/* Called from HEAP_CHK -- NB: node and liveness are junk here now. + They are left temporarily to avoid complete recompilation. + KH +*/ +void +GranSimAllocate(n,node,liveness) +I_ n; +P_ node; +W_ liveness; +{ + TSO_ALLOCS(CurrentTSO) += n; + ++TSO_BASICBLOCKS(CurrentTSO); + + TSO_EXECTIME(CurrentTSO) += gran_heapalloc_cost; + CurrentTime[CurrentProc] += gran_heapalloc_cost; +} + +/* + Subtract the values added above, if a heap check fails and + so has to be redone. +*/ +void +GranSimUnallocate(n,node,liveness) +W_ n; +P_ node; +W_ liveness; +{ + TSO_ALLOCS(CurrentTSO) -= n; + --TSO_BASICBLOCKS(CurrentTSO); + + TSO_EXECTIME(CurrentTSO) -= gran_heapalloc_cost; + CurrentTime[CurrentProc] -= gran_heapalloc_cost; +} + +void +GranSimExec(ariths,branches,loads,stores,floats) +W_ ariths,branches,loads,stores,floats; +{ + W_ cost = gran_arith_cost*ariths + gran_branch_cost*branches + gran_load_cost * loads + + gran_store_cost*stores + gran_float_cost*floats; + + TSO_EXECTIME(CurrentTSO) += cost; + CurrentTime[CurrentProc] += cost; +} + + +/* + Fetch the node if it isn't local + -- result indicates whether fetch has been done. + + This is GRIP-style single item fetching. +*/ + +I_ +GranSimFetch(node /* , liveness_mask */ ) +P_ node; +/* I_ liveness_mask; */ +{ + /* Note: once a node has been fetched, this test will be passed */ + if(!IS_LOCAL_TO(PROCS(node),CurrentProc) ) + { + /* I suppose we shouldn't do this for CAFs? -- KH */ + /* Should reschedule if the latency is high */ + /* We should add mpacktime to the remote PE for the reply, + but we don't know who owns the node + */ + /* if(DYNAMIC_POINTER(node)) */ /* For 0.22; gone in 0.23 !!! */ + { + PROC p = where_is(node); + TIME fetchtime; + +#ifdef GRAN_CHECK + if ( ( debug & 0x40 ) && + p == CurrentProc ) + fprintf(stderr,"GranSimFetch: Trying to fetch from own processor%u\n", p); +#endif /* GRAN_CHECK */ + + CurrentTime[CurrentProc] += gran_mpacktime; + + ++TSO_FETCHCOUNT(CurrentTSO); + TSO_FETCHTIME(CurrentTSO) += gran_fetchtime; + + if (SimplifiedFetch) + { + FetchNode(node,CurrentProc); + CurrentTime[CurrentProc] += gran_mtidytime+gran_fetchtime+ + gran_munpacktime; + return(1); + } + + fetchtime = max(CurrentTime[CurrentProc],CurrentTime[p]) + + gran_latency; + + newevent(p,CurrentProc,fetchtime,FETCHNODE,CurrentTSO,node,NULL); + ++OutstandingFetches[CurrentProc]; + + /* About to block */ + TSO_BLOCKEDAT(CurrentTSO) = CurrentTime[p]; + + if (DoReScheduleOnFetch) + { + + /* Remove CurrentTSO from the queue + -- assumes head of queue == CurrentTSO */ + if(!DoFairSchedule) + { + if(do_gr_profile) + DumpGranEventAndNode(GR_FETCH,CurrentTSO,node,p); + + ActivateNextThread(); + +#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */ + if (debug & 0x10) { + if (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) { + fprintf(stderr,"FETCHNODE: TSO 0x%x has fetch-mask set @ %d\n", + CurrentTSO,CurrentTime[CurrentProc]); + exit (99); + } else { + TSO_TYPE(CurrentTSO) |= FETCH_MASK_TSO; + } + + } +#endif + + TSO_LINK(CurrentTSO) = Nil_closure; + /* CurrentTSO = Nil_closure; */ + + /* ThreadQueueHd is now the next TSO to schedule or NULL */ + /* CurrentTSO is pointed to by the FETCHNODE event */ + } + else /* DoFairSchedule */ + { + /* Remove from the tail of the thread queue */ + fprintf(stderr,"Reschedule-on-fetch is not yet compatible with fair scheduling\n"); + exit(99); + } + } + else /* !DoReScheduleOnFetch */ + { + /* Note: CurrentProc is still busy as it's blocked on fetch */ + if(do_gr_profile) + DumpGranEventAndNode(GR_FETCH,CurrentTSO,node,p); + +#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */ + if (debug & 0x04) + BlockedOnFetch[CurrentProc] = CurrentTSO; /*- StgTrue; -*/ + + if (debug & 0x10) { + if (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) { + fprintf(stderr,"FETCHNODE: TSO 0x%x has fetch-mask set @ %d\n", + CurrentTSO,CurrentTime[CurrentProc]); + exit (99); + } else { + TSO_TYPE(CurrentTSO) |= FETCH_MASK_TSO; + } + + CurrentTSO = Nil_closure; + } +#endif + } + + CurrentTime[CurrentProc] += gran_mtidytime; + + /* Rescheduling is necessary */ + NeedToReSchedule = StgTrue; + + return(1); + } + } + return(0); +} + +void +GranSimSpark(local,node) +W_ local; +P_ node; +{ + ++SparksAvail; + if(do_sp_profile) + DumpSparkGranEvent(SP_SPARK,node); + + /* Force the PE to take notice of the spark */ + if(DoAlwaysCreateThreads) + newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc], + FINDWORK,Nil_closure,Nil_closure,NULL); + + if(local) + ++TSO_LOCALSPARKS(CurrentTSO); + else + ++TSO_GLOBALSPARKS(CurrentTSO); +} + +void +GranSimSparkAt(spark,where,identifier) +sparkq spark; +P_ where; /* This should be a node; alternatively could be a GA */ +I_ identifier; +{ + PROC p = where_is(where); + TIME exporttime; + + if(do_sp_profile) + DumpSparkGranEvent(SP_SPARKAT,SPARK_NODE(spark)); + + CurrentTime[CurrentProc] += gran_mpacktime; + + exporttime = (CurrentTime[p] > CurrentTime[CurrentProc]? + CurrentTime[p]: CurrentTime[CurrentProc]) + + gran_latency; + + newevent(p,CurrentProc,exporttime,MOVESPARK,Nil_closure,Nil_closure,spark); + + CurrentTime[CurrentProc] += gran_mtidytime; + + ++TSO_GLOBALSPARKS(CurrentTSO); +} + +void +GranSimBlock() +{ + if(do_gr_profile) + DumpGranEvent(GR_BLOCK,CurrentTSO); + + ++TSO_BLOCKCOUNT(CurrentTSO); + TSO_BLOCKEDAT(CurrentTSO) = CurrentTime[CurrentProc]; + ActivateNextThread(); +} + +#endif /* GRAN */ + +\end{code} + +%**************************************************************************** +% +\subsection[gc-GrAnSim]{Garbage collection routines for GrAnSim objects} +% +%**************************************************************************** + +Garbage collection code for the event queue. We walk the event queue +so that if the only reference to a TSO is in some event (e.g. RESUME), +the TSO is still preserved. + +\begin{code} +#ifdef GRAN + +extern smInfo StorageMgrInfo; + +I_ +SaveEventRoots(num_ptr_roots) +I_ num_ptr_roots; +{ + eventq event = EventHd; + while(event != NULL) + { + if(EVENT_TYPE(event) == RESUMETHREAD || + EVENT_TYPE(event) == MOVETHREAD || + EVENT_TYPE(event) == STARTTHREAD ) + StorageMgrInfo.roots[num_ptr_roots++] = EVENT_TSO(event); + + else if(EVENT_TYPE(event) == MOVESPARK) + StorageMgrInfo.roots[num_ptr_roots++] = SPARK_NODE(EVENT_SPARK(event)); + + else if (EVENT_TYPE(event) == FETCHNODE || + EVENT_TYPE(event) == FETCHREPLY ) + { + StorageMgrInfo.roots[num_ptr_roots++] = EVENT_TSO(event); + StorageMgrInfo.roots[num_ptr_roots++] = EVENT_NODE(event); + } + + event = EVENT_NEXT(event); + } + return(num_ptr_roots); +} + +I_ +SaveSparkRoots(num_ptr_roots) +I_ num_ptr_roots; +{ + sparkq spark, /* prev, */ disposeQ=NULL; + PROC proc; + I_ i, sparkroots=0, prunedSparks=0; + +#if defined(GRAN_CHECK) && defined(GRAN) + if ( debug & 0x40 ) + fprintf(stderr,"D> Saving spark roots for GC ...\n"); +#endif + + for(proc = 0; proc < max_proc; ++proc) { + for(i = 0; i < SPARK_POOLS; ++i) { + for(/* prev = &PendingSparksHd[proc][i],*/ spark = PendingSparksHd[proc][i]; + spark != NULL; + /* prev = &SPARK_NEXT(spark), */ spark = SPARK_NEXT(spark)) + { + if(++sparkroots <= MAX_SPARKS) + { +#if defined(GRAN_CHECK) && defined(GRAN) + if ( debug & 0x40 ) + fprintf(main_statsfile,"Saving Spark Root %d(proc: %d; pool: %d) -- 0x%lx\n", + num_ptr_roots,proc,i,SPARK_NODE(spark)); +#endif + StorageMgrInfo.roots[num_ptr_roots++] = SPARK_NODE(spark); + } + else + { + SPARK_NODE(spark) = Nil_closure; + if (prunedSparks==0) { + disposeQ = spark; + /* + *prev = NULL; + */ + } + prunedSparks++; + } + } /* forall spark ... */ + if (prunedSparks>0) { + fprintf(main_statsfile,"Pruning and disposing %lu excess sparks (> %lu) on proc %d for GC purposes\n", + prunedSparks,MAX_SPARKS,proc); + if (disposeQ == PendingSparksHd[proc][i]) + PendingSparksHd[proc][i] = NULL; + else + SPARK_NEXT(SPARK_PREV(disposeQ)) = NULL; + DisposeSparkQ(disposeQ); + prunedSparks = 0; + disposeQ = NULL; + } + } /* forall i ... */ + } /*forall proc .. */ + + return(num_ptr_roots); +} + +/* + GC roots must be restored in *reverse order*. + The recursion is a little ugly, but is better than + in-place pointer reversal. +*/ + +static I_ +RestoreEvtRoots(event,num_ptr_roots) +eventq event; +I_ num_ptr_roots; +{ + if(event != NULL) + { + num_ptr_roots = RestoreEvtRoots(EVENT_NEXT(event),num_ptr_roots); + + if(EVENT_TYPE(event) == RESUMETHREAD || + EVENT_TYPE(event) == MOVETHREAD || + EVENT_TYPE(event) == STARTTHREAD ) + EVENT_TSO(event) = StorageMgrInfo.roots[--num_ptr_roots]; + + else if(EVENT_TYPE(event) == MOVESPARK ) + SPARK_NODE(EVENT_SPARK(event)) = StorageMgrInfo.roots[--num_ptr_roots]; + + else if (EVENT_TYPE(event) == FETCHNODE || + EVENT_TYPE(event) == FETCHREPLY ) + { + EVENT_NODE(event) = StorageMgrInfo.roots[--num_ptr_roots]; + EVENT_TSO(event) = StorageMgrInfo.roots[--num_ptr_roots]; + } + } + + return(num_ptr_roots); +} + +I_ +RestoreEventRoots(num_ptr_roots) +I_ num_ptr_roots; +{ + return(RestoreEvtRoots(EventHd,num_ptr_roots)); +} + +static I_ +RestoreSpkRoots(spark,num_ptr_roots,sparkroots) +sparkq spark; +I_ num_ptr_roots, sparkroots; +{ + if(spark != NULL) + { + num_ptr_roots = RestoreSpkRoots(SPARK_NEXT(spark),num_ptr_roots,++sparkroots); + if(sparkroots <= MAX_SPARKS) + { + P_ n = SPARK_NODE(spark); + SPARK_NODE(spark) = StorageMgrInfo.roots[--num_ptr_roots]; +#if defined(GRAN_CHECK) && defined(GRAN) + if ( debug & 0x40 ) + fprintf(main_statsfile,"Restoring Spark Root %d -- new: 0x%lx \n", + num_ptr_roots,SPARK_NODE(spark)); +#endif + } + else +#if defined(GRAN_CHECK) && defined(GRAN) + if ( debug & 0x40 ) + fprintf(main_statsfile,"Error in RestoreSpkRoots (%d; @ spark 0x%x): More than MAX_SPARKS (%d) sparks\n", + num_ptr_roots,SPARK_NODE(spark),MAX_SPARKS); +#endif + + } + + return(num_ptr_roots); +} + +I_ +RestoreSparkRoots(num_ptr_roots) +I_ num_ptr_roots; +{ + PROC proc; + I_ i; + + /* NB: PROC is currently an unsigned datatype i.e. proc>=0 is always */ + /* true ((PROC)-1 == (PROC)255). So we need a second clause in the head */ + /* of the for loop. For i that is currently not necessary. C is really */ + /* impressive in datatype abstraction! -- HWL */ + + for(proc = max_proc - 1; (proc >= 0) && (proc < max_proc); --proc) { + for(i = SPARK_POOLS - 1; (i >= 0) && (i < SPARK_POOLS) ; --i) { + num_ptr_roots = RestoreSpkRoots(PendingSparksHd[proc][i],num_ptr_roots,0); + } + } + return(num_ptr_roots); +} + +#endif /* GRAN */ + +\end{code} + +%**************************************************************************** +% +\subsection[GrAnSim-profile]{Writing profiling info for GrAnSim} +% +%**************************************************************************** + +Event dumping routines. + +\begin{code} +#ifdef GRAN + +DumpGranEvent(name,tso) +enum gran_event_types name; +P_ tso; +{ + DumpRawGranEvent(CurrentProc,name,TSO_ID(tso)); +} + +DumpSparkGranEvent(name,id) +enum gran_event_types name; +W_ id; +{ + DumpRawGranEvent(CurrentProc,name,id); +} + +DumpGranEventAndNode(name,tso,node,proc) +enum gran_event_types name; +P_ tso, node; +PROC proc; +{ + PROC pe = CurrentProc; + W_ id = TSO_ID(tso); + + if(name > GR_EVENT_MAX) + name = GR_EVENT_MAX; + + if(do_gr_binary) + { + grputw(name); + grputw(pe); + grputw(CurrentTime[CurrentProc]); + grputw(id); + } + else + fprintf(gr_file,"PE %2u [%lu]: %s %lx \t0x%lx\t(from %2u)\n", + pe,CurrentTime[CurrentProc],gran_event_names[name],id,node,proc); +} + +DumpRawGranEvent(pe,name,id) +PROC pe; +enum gran_event_types name; +W_ id; +{ + if(name > GR_EVENT_MAX) + name = GR_EVENT_MAX; + + if(do_gr_binary) + { + grputw(name); + grputw(pe); + grputw(CurrentTime[CurrentProc]); + grputw(id); + } + else + fprintf(gr_file,"PE %2u [%lu]: %s %lx\n", + pe,CurrentTime[CurrentProc],gran_event_names[name],id); +} + +DumpGranInfo(pe,tso,mandatory_thread) +PROC pe; +P_ tso; +I_ mandatory_thread; +{ + if(do_gr_binary) + { + grputw(GR_END); + grputw(pe); + grputw(CurrentTime[CurrentProc]); + grputw(TSO_ID(tso)); + grputw(TSO_SPARKNAME(tso)); + grputw(TSO_STARTEDAT(tso)); + grputw(TSO_EXPORTED(tso)); + grputw(TSO_BASICBLOCKS(tso)); + grputw(TSO_ALLOCS(tso)); + grputw(TSO_EXECTIME(tso)); + grputw(TSO_BLOCKTIME(tso)); + grputw(TSO_BLOCKCOUNT(tso)); + grputw(TSO_FETCHTIME(tso)); + grputw(TSO_FETCHCOUNT(tso)); + grputw(TSO_LOCALSPARKS(tso)); + grputw(TSO_GLOBALSPARKS(tso)); + grputw(mandatory_thread); + } + else + { + /* NB: DumpGranEvent cannot be used because PE may be wrong (as well as the extra info) */ + fprintf(gr_file,"PE %2u [%lu]: END %lx, SN %lu, ST %lu, EXP %c, BB %lu, HA %lu, RT %lu, BT %lu (%lu), FT %lu (%lu), LS %lu, GS %lu, MY %c\n" + ,pe + ,CurrentTime[CurrentProc] + ,TSO_ID(tso) + ,TSO_SPARKNAME(tso) + ,TSO_STARTEDAT(tso) + ,TSO_EXPORTED(tso)?'T':'F' + ,TSO_BASICBLOCKS(tso) + ,TSO_ALLOCS(tso) + ,TSO_EXECTIME(tso) + ,TSO_BLOCKTIME(tso) + ,TSO_BLOCKCOUNT(tso) + ,TSO_FETCHTIME(tso) + ,TSO_FETCHCOUNT(tso) + ,TSO_LOCALSPARKS(tso) + ,TSO_GLOBALSPARKS(tso) + ,mandatory_thread?'T':'F' + ); + } +} + +DumpTSO(tso) +P_ tso; +{ + fprintf(stderr,"TSO 0x%lx, NAME 0x%lx, ID %lu, LINK 0x%lx, TYPE %s\n" + ,tso + ,TSO_NAME(tso) + ,TSO_ID(tso) + ,TSO_LINK(tso) + ,TSO_TYPE(tso)==T_MAIN?"MAIN": + TSO_TYPE(tso)==T_FAIL?"FAIL": + TSO_TYPE(tso)==T_REQUIRED?"REQUIRED": + TSO_TYPE(tso)==T_ADVISORY?"ADVISORY": + "???" + ); + + fprintf(stderr,"PC (0x%lx,0x%lx), ARG (0x%lx,0x%lx), SWITCH %lx0x\n" + ,TSO_PC1(tso) + ,TSO_PC2(tso) + ,TSO_ARG1(tso) + ,TSO_ARG2(tso) + ,TSO_SWITCH(tso) + ); + + fprintf(gr_file,"SN %lu, ST %lu, GBL %c, BB %lu, HA %lu, RT %lu, BT %lu (%lu), FT %lu (%lu) LS %lu, GS %lu\n" + ,TSO_SPARKNAME(tso) + ,TSO_STARTEDAT(tso) + ,TSO_EXPORTED(tso)?'T':'F' + ,TSO_BASICBLOCKS(tso) + ,TSO_ALLOCS(tso) + ,TSO_EXECTIME(tso) + ,TSO_BLOCKTIME(tso) + ,TSO_BLOCKCOUNT(tso) + ,TSO_FETCHTIME(tso) + ,TSO_FETCHCOUNT(tso) + ,TSO_LOCALSPARKS(tso) + ,TSO_GLOBALSPARKS(tso) + ); +} + +/* + Output a terminate event and an 8-byte time. +*/ + +grterminate(v) +TIME v; +{ + DumpGranEvent(GR_TERMINATE,0); + + if(sizeof(TIME)==4) + { + putc('\0',gr_file); + putc('\0',gr_file); + putc('\0',gr_file); + putc('\0',gr_file); + } + else + { + putc(v >> 56l,gr_file); + putc((v >> 48l)&0xffl,gr_file); + putc((v >> 40l)&0xffl,gr_file); + putc((v >> 32l)&0xffl,gr_file); + } + putc((v >> 24l)&0xffl,gr_file); + putc((v >> 16l)&0xffl,gr_file); + putc((v >> 8l)&0xffl,gr_file); + putc(v&0xffl,gr_file); +} + +/* + Length-coded output: first 3 bits contain length coding + + 00x 1 byte + 01x 2 bytes + 10x 4 bytes + 110 8 bytes + 111 5 or 9 bytes +*/ + +grputw(v) +TIME v; +{ + if(v <= 0x3fl) + { + fputc(v & 0x3f,gr_file); + } + + else if (v <= 0x3fffl) + { + fputc((v >> 8l)|0x40l,gr_file); + fputc(v&0xffl,gr_file); + } + + else if (v <= 0x3fffffffl) + { + fputc((v >> 24l)|0x80l,gr_file); + fputc((v >> 16l)&0xffl,gr_file); + fputc((v >> 8l)&0xffl,gr_file); + fputc(v&0xffl,gr_file); + } + + else if (sizeof(TIME) == 4) + { + fputc(0x70,gr_file); + fputc((v >> 24l)&0xffl,gr_file); + fputc((v >> 16l)&0xffl,gr_file); + fputc((v >> 8l)&0xffl,gr_file); + fputc(v&0xffl,gr_file); + } + + else + { + if (v <= 0x3fffffffffffffl) + putc((v >> 56l)|0x60l,gr_file); + else + { + putc(0x70,gr_file); + putc((v >> 56l)&0xffl,gr_file); + } + + putc((v >> 48l)&0xffl,gr_file); + putc((v >> 40l)&0xffl,gr_file); + putc((v >> 32l)&0xffl,gr_file); + putc((v >> 24l)&0xffl,gr_file); + putc((v >> 16l)&0xffl,gr_file); + putc((v >> 8l)&0xffl,gr_file); + putc(v&0xffl,gr_file); + } +} +#endif /* GRAN */ + +\end{code} + +%**************************************************************************** +% +\subsection[GrAnSim-debug]{Debugging routines for GrAnSim} +% +%**************************************************************************** + +Debugging routines, mainly for GrAnSim. They should really be in a separate file. + +The first couple of routines are general ones (look also into +c-as-asm/StgDebug.lc). + +\begin{code} + +#define NULL_REG_MAP /* Not threaded */ +#include "stgdefs.h" + +char * +info_hdr_type(info_ptr) +W_ info_ptr; +{ +#if ! defined(PAR) && !defined(GRAN) + switch (INFO_TAG(info_ptr)) + { + case INFO_OTHER_TAG: + return("OTHER_TAG"); +/* case INFO_IND_TAG: + return("IND_TAG"); +*/ default: + return("TAG<n>"); + } +#else /* PAR */ + switch(INFO_TYPE(info_ptr)) + { + case INFO_SPEC_U_TYPE: + return("SPECU"); + + case INFO_SPEC_N_TYPE: + return("SPECN"); + + case INFO_GEN_U_TYPE: + return("GENU"); + + case INFO_GEN_N_TYPE: + return("GENN"); + + case INFO_DYN_TYPE: + return("DYN"); + + /* + case INFO_DYN_TYPE_N: + return("DYNN"); + + case INFO_DYN_TYPE_U: + return("DYNU"); + */ + + case INFO_TUPLE_TYPE: + return("TUPLE"); + + case INFO_DATA_TYPE: + return("DATA"); + + case INFO_MUTUPLE_TYPE: + return("MUTUPLE"); + + case INFO_IMMUTUPLE_TYPE: + return("IMMUTUPLE"); + + case INFO_STATIC_TYPE: + return("STATIC"); + + case INFO_CONST_TYPE: + return("CONST"); + + case INFO_CHARLIKE_TYPE: + return("CHAR"); + + case INFO_INTLIKE_TYPE: + return("INT"); + + case INFO_BH_TYPE: + return("BHOLE"); + + case INFO_IND_TYPE: + return("IND"); + + case INFO_CAF_TYPE: + return("CAF"); + + case INFO_FETCHME_TYPE: + return("FETCHME"); + + case INFO_BQ_TYPE: + return("BQ"); + + /* + case INFO_BQENT_TYPE: + return("BQENT"); + */ + + case INFO_TSO_TYPE: + return("TSO"); + + case INFO_STKO_TYPE: + return("STKO"); + + default: + fprintf(stderr,"Unknown header type %lu\n",INFO_TYPE(info_ptr)); + return("??"); + } +#endif /* PAR */ +} + +/* +@var_hdr_size@ computes the size of the variable header for a closure. +*/ + +I_ +var_hdr_size(node) +P_ node; +{ + switch(INFO_TYPE(INFO_PTR(node))) + { + case INFO_SPEC_U_TYPE: return(0); /* by decree */ + case INFO_SPEC_N_TYPE: return(0); + case INFO_GEN_U_TYPE: return(GEN_VHS); + case INFO_GEN_N_TYPE: return(GEN_VHS); + case INFO_DYN_TYPE: return(DYN_VHS); + /* + case INFO_DYN_TYPE_N: return(DYN_VHS); + case INFO_DYN_TYPE_U: return(DYN_VHS); + */ + case INFO_TUPLE_TYPE: return(TUPLE_VHS); + case INFO_DATA_TYPE: return(DATA_VHS); + case INFO_MUTUPLE_TYPE: return(MUTUPLE_VHS); + case INFO_IMMUTUPLE_TYPE: return(MUTUPLE_VHS); /* same layout */ + case INFO_STATIC_TYPE: return(STATIC_VHS); + case INFO_CONST_TYPE: return(0); + case INFO_CHARLIKE_TYPE: return(0); + case INFO_INTLIKE_TYPE: return(0); + case INFO_BH_TYPE: return(0); + case INFO_IND_TYPE: return(0); + case INFO_CAF_TYPE: return(0); + case INFO_FETCHME_TYPE: return(0); + case INFO_BQ_TYPE: return(0); + /* + case INFO_BQENT_TYPE: return(0); + */ + case INFO_TSO_TYPE: return(TSO_VHS); + case INFO_STKO_TYPE: return(STKO_VHS); + default: + fprintf(stderr,"Unknown info type 0x%lx (%lu)\n", INFO_PTR(node), + INFO_TYPE(INFO_PTR(node))); + return(0); + } +} + + +/* Determine the size and number of pointers for this kind of closure */ +void +size_and_ptrs(node,size,ptrs) +P_ node; +W_ *size, *ptrs; +{ + switch(INFO_TYPE(INFO_PTR(node))) + { + case INFO_SPEC_U_TYPE: + case INFO_SPEC_N_TYPE: + *size = INFO_SIZE(INFO_PTR(node)); /* New for 0.24; check */ + *ptrs = INFO_NoPTRS(INFO_PTR(node)); /* that! -- HWL */ + /* + *size = SPEC_CLOSURE_SIZE(node); + *ptrs = SPEC_CLOSURE_NoPTRS(node); + */ + break; + + case INFO_GEN_U_TYPE: + case INFO_GEN_N_TYPE: + *size = GEN_CLOSURE_SIZE(node); + *ptrs = GEN_CLOSURE_NoPTRS(node); + break; + + /* + case INFO_DYN_TYPE_U: + case INFO_DYN_TYPE_N: + */ + case INFO_DYN_TYPE: + *size = DYN_CLOSURE_SIZE(node); + *ptrs = DYN_CLOSURE_NoPTRS(node); + break; + + case INFO_TUPLE_TYPE: + *size = TUPLE_CLOSURE_SIZE(node); + *ptrs = TUPLE_CLOSURE_NoPTRS(node); + break; + + case INFO_DATA_TYPE: + *size = DATA_CLOSURE_SIZE(node); + *ptrs = DATA_CLOSURE_NoPTRS(node); + break; + + case INFO_IND_TYPE: + *size = IND_CLOSURE_SIZE(node); + *ptrs = IND_CLOSURE_NoPTRS(node); + break; + +/* ToDo: more (WDP) */ + + /* Don't know about the others */ + default: + *size = *ptrs = 0; + break; + } +} + +void +DEBUG_PRINT_NODE(node) +P_ node; +{ + W_ info_ptr = INFO_PTR(node); + I_ size = 0, ptrs = 0, i, vhs = 0; + char *info_type = info_hdr_type(info_ptr); + + size_and_ptrs(node,&size,&ptrs); + vhs = var_hdr_size(node); + + fprintf(stderr,"Node: 0x%lx", (W_) node); + +#if defined(PAR) + fprintf(stderr," [GA: 0x%lx]",GA(node)); +#endif + +#if defined(USE_COST_CENTRES) + fprintf(stderr," [CC: 0x%lx]",CC_HDR(node)); +#endif + +#if defined(GRAN) + fprintf(stderr," [Bitmask: 0%lo]",PROCS(node)); +#endif + + fprintf(stderr," IP: 0x%lx (%s), size %ld, %ld ptrs\n", + info_ptr,info_type,size,ptrs); + + /* For now, we ignore the variable header */ + + for(i=0; i < size; ++i) + { + if(i == 0) + fprintf(stderr,"Data: "); + + else if(i % 6 == 0) + fprintf(stderr,"\n "); + + if(i < ptrs) + fprintf(stderr," 0x%lx[P]",*(node+_FHS+vhs+i)); + else + fprintf(stderr," %lu[D]",*(node+_FHS+vhs+i)); + } + fprintf(stderr, "\n"); +} + + +#define INFO_MASK 0x80000000 + +void +DEBUG_TREE(node) +P_ node; +{ + W_ size = 0, ptrs = 0, i, vhs = 0; + + /* Don't print cycles */ + if((INFO_PTR(node) & INFO_MASK) != 0) + return; + + size_and_ptrs(node,&size,&ptrs); + vhs = var_hdr_size(node); + + DEBUG_PRINT_NODE(node); + fprintf(stderr, "\n"); + + /* Mark the node -- may be dangerous */ + INFO_PTR(node) |= INFO_MASK; + + for(i = 0; i < ptrs; ++i) + DEBUG_TREE((P_)node[i+vhs+_FHS]); + + /* Unmark the node */ + INFO_PTR(node) &= ~INFO_MASK; +} + + +void +DEBUG_INFO_TABLE(node) +P_ node; +{ + W_ info_ptr = INFO_PTR(node); + char *ip_type = info_hdr_type(info_ptr); + + fprintf(stderr,"%s Info Ptr @0x%lx; Entry: 0x%lx; Size: %lu; Ptrs: %lu\n\n", + ip_type,info_ptr,(W_) ENTRY_CODE(info_ptr),INFO_SIZE(info_ptr),INFO_NoPTRS(info_ptr)); +#if defined(PAR) + fprintf(stderr,"Enter Flush Entry: 0x%lx;\tExit Flush Entry: 0x%lx\n",INFO_FLUSHENT(info_ptr),INFO_FLUSH(info_ptr)); +#endif + +#if defined(USE_COST_CENTRES) + fprintf(stderr,"Cost Centre (???): 0x%lx\n",INFO_CAT(info_ptr)); +#endif + +#if defined(_INFO_COPYING) + fprintf(stderr,"Evacuate Entry: 0x%lx;\tScavenge Entry: 0x%lx\n", + INFO_EVAC_2S(info_ptr),INFO_SCAV_2S(info_ptr)); +#endif + +#if defined(_INFO_COMPACTING) + fprintf(stderr,"Scan Link: 0x%lx;\tScan Move: 0x%lx\n", + (W_) INFO_SCAN_LINK_1S(info_ptr), (W_) INFO_SCAN_MOVE_1S(info_ptr)); + fprintf(stderr,"Mark: 0x%lx;\tMarked: 0x%lx;\t", + (W_) INFO_MARK_1S(info_ptr), (W_) INFO_MARKED_1S(info_ptr)); +#if 0 /* avoid INFO_TYPE */ + if(BASE_INFO_TYPE(info_ptr)==INFO_SPEC_TYPE) + fprintf(stderr,"plus specialised code\n"); + else + fprintf(stderr,"Marking: 0x%lx\n",(W_) INFO_MARKING_1S(info_ptr)); +#endif /* 0 */ +#endif +} +#endif /* GRAN */ + +\end{code} + +The remaining debugging routines are more or less specific for GrAnSim. + +\begin{code} +#if defined(GRAN) && defined(GRAN_CHECK) +void +DEBUG_CURR_THREADQ(verbose) +I_ verbose; +{ + fprintf(stderr,"Thread Queue on proc %d: ", CurrentProc); + DEBUG_THREADQ(ThreadQueueHd, verbose); +} + +void +DEBUG_THREADQ(closure, verbose) +P_ closure; +I_ verbose; +{ + P_ x; + + fprintf(stderr,"Thread Queue: "); + for (x=closure; x!=Nil_closure; x=TSO_LINK(x)) + if (verbose) + DEBUG_TSO(x,0); + else + fprintf(stderr," 0x%x",x); + + if (closure==Nil_closure) + fprintf(stderr,"NIL\n"); + else + fprintf(stderr,"\n"); +} + +/* Check with Threads.lh */ +static char *type_name[] = { "T_MAIN", "T_REQUIRED", "T_ADVISORY", "T_FAIL"}; + +void +DEBUG_TSO(closure,verbose) +P_ closure; +I_ verbose; +{ + + if (closure==Nil_closure) { + fprintf(stderr,"TSO at 0x%x is Nil_closure!\n"); + return; + } + + fprintf(stderr,"TSO at 0x%x has the following contents:\n",closure); + + fprintf(stderr,"> Name: 0x%x",TSO_NAME(closure)); + fprintf(stderr,"\tLink: 0x%x\n",TSO_LINK(closure)); + fprintf(stderr,"> Id: 0x%x",TSO_ID(closure)); +#if defined(GRAN_CHECK) && defined(GRAN) + if (debug & 0x10) + fprintf(stderr,"\tType: %s %s\n", + type_name[TSO_TYPE(closure) & ~FETCH_MASK_TSO], + (TSO_TYPE(closure) & FETCH_MASK_TSO) ? "SLEEPING" : ""); + else + fprintf(stderr,"\tType: %s\n",type_name[TSO_TYPE(closure)]); +#else + fprintf(stderr,"\tType: %s\n",type_name[TSO_TYPE(closure)]); +#endif + fprintf(stderr,"> PC1: 0x%x",TSO_PC1(closure)); + fprintf(stderr,"\tPC2: 0x%x\n",TSO_PC2(closure)); + fprintf(stderr,"> ARG1: 0x%x",TSO_ARG1(closure)); + fprintf(stderr,"\tARG2: 0x%x\n",TSO_ARG2(closure)); + fprintf(stderr,"> SWITCH: 0x%x\n", TSO_SWITCH(closure)); + + if (verbose) { + fprintf(stderr,"} LOCKED: 0x%x",TSO_LOCKED(closure)); + fprintf(stderr,"\tSPARKNAME: 0x%x\n", TSO_SPARKNAME(closure)); + fprintf(stderr,"} STARTEDAT: 0x%x", TSO_STARTEDAT(closure)); + fprintf(stderr,"\tEXPORTED: 0x%x\n", TSO_EXPORTED(closure)); + fprintf(stderr,"} BASICBLOCKS: 0x%x", TSO_BASICBLOCKS(closure)); + fprintf(stderr,"\tALLOCS: 0x%x\n", TSO_ALLOCS(closure)); + fprintf(stderr,"} EXECTIME: 0x%x", TSO_EXECTIME(closure)); + fprintf(stderr,"\tFETCHTIME: 0x%x\n", TSO_FETCHTIME(closure)); + fprintf(stderr,"} FETCHCOUNT: 0x%x", TSO_FETCHCOUNT(closure)); + fprintf(stderr,"\tBLOCKTIME: 0x%x\n", TSO_BLOCKTIME(closure)); + fprintf(stderr,"} BLOCKCOUNT: 0x%x", TSO_BLOCKCOUNT(closure)); + fprintf(stderr,"\tBLOCKEDAT: 0x%x\n", TSO_BLOCKEDAT(closure)); + fprintf(stderr,"} GLOBALSPARKS: 0x%x", TSO_GLOBALSPARKS(closure)); + fprintf(stderr,"\tLOCALSPARKS: 0x%x\n", TSO_LOCALSPARKS(closure)); + } +} + +void +DEBUG_EVENT(event, verbose) +eventq event; +I_ verbose; +{ + if (verbose) { + print_event(event); + }else{ + fprintf(stderr," 0x%x",event); + } +} + +void +DEBUG_EVENTQ(verbose) +I_ verbose; +{ + eventq x; + + fprintf(stderr,"Eventq (hd @0x%x):\n",EventHd); + for (x=EventHd; x!=NULL; x=EVENT_NEXT(x)) { + DEBUG_EVENT(x,verbose); + } + if (EventHd==NULL) + fprintf(stderr,"NIL\n"); + else + fprintf(stderr,"\n"); +} + +void +DEBUG_SPARK(spark, verbose) +sparkq spark; +I_ verbose; +{ + if (verbose) + print_spark(spark); + else + fprintf(stderr," 0x%x",spark); +} + +void +DEBUG_SPARKQ(spark,verbose) +sparkq spark; +I_ verbose; +{ + sparkq x; + + fprintf(stderr,"Sparkq (hd @0x%x):\n",spark); + for (x=spark; x!=NULL; x=SPARK_NEXT(x)) { + DEBUG_SPARK(x,verbose); + } + if (spark==NULL) + fprintf(stderr,"NIL\n"); + else + fprintf(stderr,"\n"); +} + +void +DEBUG_CURR_SPARKQ(verbose) +I_ verbose; +{ + DEBUG_SPARKQ(SparkQueueHd,verbose); +} + +void +DEBUG_PROC(proc,verbose) +I_ proc; +I_ verbose; +{ + fprintf(stderr,"Status of proc %d at time %d (0x%x): %s\n", + proc,CurrentTime[proc],CurrentTime[proc], + (CurrentProc==proc)?"ACTIVE":"INACTIVE"); + DEBUG_THREADQ(RunnableThreadsHd[proc],verbose & 0x2); + if ( (CurrentProc==proc) ) + DEBUG_TSO(CurrentTSO,1); + + if (EventHd!=NULL) + fprintf(stderr,"Next event (%s) is on proc %d\n", + event_names[EVENT_TYPE(EventHd)],EVENT_PROC(EventHd)); + + if (verbose & 0x1) { + fprintf(stderr,"\nREQUIRED sparks: "); + DEBUG_SPARKQ(PendingSparksHd[proc][REQUIRED_POOL],1); + fprintf(stderr,"\nADVISORY_sparks: "); + DEBUG_SPARKQ(PendingSparksHd[proc][ADVISORY_POOL],1); + } +} + +/* Debug CurrentTSO */ +void +DCT(){ + fprintf(stderr,"Current Proc: %d\n",CurrentProc); + DEBUG_TSO(CurrentTSO,1); +} + +/* Debug Current Processor */ +void +DCP(){ DEBUG_PROC(CurrentProc,2); } + +/* Shorthand for debugging event queue */ +void +DEQ() { DEBUG_EVENTQ(1); } + +/* Shorthand for debugging spark queue */ +void +DSQ() { DEBUG_CURR_SPARKQ(1); } + +/* Shorthand for printing a node */ +void +DN(P_ node) { DEBUG_PRINT_NODE(node); } + +#endif /* GRAN */ +\end{code} + + +%**************************************************************************** +% +\subsection[qp-profile]{Quasi-Parallel Profiling} +% +%**************************************************************************** + +\begin{code} +#ifndef GRAN +I_ do_qp_prof; +FILE *qp_file; + +/* *Virtual* Time in milliseconds */ +long +qp_elapsed_time() +{ + return ((long) (usertime() * 1e3)); +} + +static void +init_qp_profiling(STG_NO_ARGS) +{ + I_ i; + char qp_filename[32]; + + sprintf(qp_filename, "%0.24s.qp", prog_argv[0]); + if ((qp_file = fopen(qp_filename,"w")) == NULL ) { + fprintf(stderr, "Can't open quasi-parallel profile report file %s\n", + qp_filename); + do_qp_prof = 0; + } else { + fputs(prog_argv[0], qp_file); + for(i = 1; prog_argv[i]; i++) { + fputc(' ', qp_file); + fputs(prog_argv[i], qp_file); + } + fprintf(qp_file, "+RTS -C%ld -t%ld\n", contextSwitchTime, MaxThreads); + fputs(time_str(), qp_file); + fputc('\n', qp_file); + } +} + +void +QP_Event0(tid, node) +I_ tid; +P_ node; +{ + fprintf(qp_file, "%lu ** %lu 0x%lx\n", qp_elapsed_time(), tid, INFO_PTR(node)); +} + +void +QP_Event1(event, tso) +char *event; +P_ tso; +{ + fprintf(qp_file, "%lu %s %lu 0x%lx\n", qp_elapsed_time(), event, + TSO_ID(tso), (W_) TSO_NAME(tso)); +} + +void +QP_Event2(event, tso1, tso2) +char *event; +P_ tso1, tso2; +{ + fprintf(qp_file, "%lu %s %lu 0x%lx %lu 0x%lx\n", qp_elapsed_time(), event, + TSO_ID(tso1), (W_) TSO_NAME(tso1), TSO_ID(tso2), (W_) TSO_NAME(tso2)); +} +#endif /* 0 */ +#endif /* GRAN */ + +#if defined(CONCURRENT) && !defined(GRAN) +/* romoluSnganpu' SamuS! */ + +unsigned CurrentProc = 0; +W_ IdleProcs = ~0l, Idlers = 32; + +void +GranSimAllocate(n,node,liveness) +I_ n; +P_ node; +W_ liveness; +{ } + +void +GranSimUnallocate(n,node,liveness) +W_ n; +P_ node; +W_ liveness; +{ } + + +void +GranSimExec(ariths,branches,loads,stores,floats) +W_ ariths,branches,loads,stores,floats; +{ } + +I_ +GranSimFetch(node /* , liveness_mask */ ) +P_ node; +/* I_ liveness_mask; */ +{ } + +void +GranSimSpark(local,node) +W_ local; +P_ node; +{ } + +#if 0 +void +GranSimSparkAt(spark,where,identifier) +sparkq spark; +P_ where; /* This should be a node; alternatively could be a GA */ +I_ identifier; +{ } +#endif + +void +GranSimBlock() +{ } +#endif + +\end{code} + diff --git a/ghc/runtime/main/TopClosure.lc b/ghc/runtime/main/TopClosure.lc new file mode 100644 index 0000000000..e2d670c3db --- /dev/null +++ b/ghc/runtime/main/TopClosure.lc @@ -0,0 +1,8 @@ +/* this one will be linked in for Haskell 1.2 */ +\begin{code} +#include "rtsdefs.h" + +EXTDATA(Main_mainPrimIO_closure); + +P_ TopClosure = Main_mainPrimIO_closure; +\end{code} diff --git a/ghc/runtime/main/TopClosure13.lc b/ghc/runtime/main/TopClosure13.lc new file mode 100644 index 0000000000..07792c2198 --- /dev/null +++ b/ghc/runtime/main/TopClosure13.lc @@ -0,0 +1,8 @@ +/* this one will be linked in for Haskell 1.3 */ +\begin{code} +#include "rtsdefs.h" + +EXTDATA(Main_mainPrimIO13_closure); + +P_ TopClosure = Main_mainPrimIO13_closure; +\end{code} diff --git a/ghc/runtime/main/main.lc b/ghc/runtime/main/main.lc new file mode 100644 index 0000000000..98002a3bd9 --- /dev/null +++ b/ghc/runtime/main/main.lc @@ -0,0 +1,1355 @@ +%/**************************************************************** +%* * +%* This is where everything starts * +%* * +%****************************************************************/ + +\begin{code} +#if defined(USE_COST_CENTRES) || defined(GUM) || defined(CONCURRENT) +#define NON_POSIX_SOURCE /* time things on Solaris -- sigh */ +#endif + +#include "rtsdefs.h" +#include <setjmp.h> + +#if defined(STDC_HEADERS) || defined(HAVE_STRING_H) +# include <string.h> +/* An ANSI string.h and pre-ANSI memory.h might conflict. */ +# if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H) +# include <memory.h> +# endif /* not STDC_HEADERS and HAVE_MEMORY_H */ +# define index strchr +# define rindex strrchr +# define bcopy(s, d, n) memcpy ((d), (s), (n)) +# define bcmp(s1, s2, n) memcmp ((s1), (s2), (n)) +# define bzero(s, n) memset ((s), 0, (n)) +#else /* not STDC_HEADERS and not HAVE_STRING_H */ +# include <strings.h> +/* memory.h and strings.h conflict on some systems. */ +#endif /* not STDC_HEADERS and not HAVE_STRING_H */ + +#if defined(USE_COST_CENTRES) || defined(GUM) +/* need some "time" things */ + +/* ToDo: This is a mess! Improve ? */ + +# ifdef HAVE_SYS_TYPES_H +# include <sys/types.h> +# endif + +# ifdef HAVE_SYS_TIMES_H +# include <sys/times.h> +# endif + +# ifdef HAVE_SYS_TIME_H +# include <sys/time.h> +# endif +#endif /* USE_COST_CENTRES || GUM */ + +#ifndef PAR +STGRegisterTable MainRegTable; +#endif + +/* fwd decls */ +void setupRtsFlags PROTO((int *argc, char *argv[], I_ *rtsc, char *rtsv[])); +void shutdownHaskell(STG_NO_ARGS); + +EXTFUN(startStgWorld); +extern void PrintRednCountInfo(STG_NO_ARGS); +extern void checkAStack(STG_NO_ARGS); + +/* a real nasty Global Variable */ +/* moved to main/TopClosure(13)?.lc -- *one* of them will get linked in +P_ TopClosure = Main_mainPrimIO_closure; + */ + +/* structure to carry around info about the storage manager */ +smInfo StorageMgrInfo; + +FILE *main_statsfile = NULL; +#if defined(DO_REDN_COUNTING) +FILE *tickyfile = NULL; +#endif +#if defined(SM_DO_BH_UPDATE) +I_ noBlackHoles = 0; +#endif +I_ doSanityChks = 0; +I_ showRednCountStats = 0; +I_ traceUpdates = 0; +extern I_ squeeze_upd_frames; + +#ifdef PAR +extern I_ OkToGC, buckets, average_stats(); +extern rtsBool TraceSparks, OutputDisabled, DelaySparks, + DeferGlobalUpdates, ParallelStats; + +extern void RunParallelSystem PROTO((P_)); +extern void initParallelSystem(STG_NO_ARGS); +extern void SynchroniseSystem(STG_NO_ARGS); + +extern void SetTrace PROTO((W_ address, I_ level/*?*/)); +#endif + +#if defined(GRAN_CHECK) && defined(GRAN) +extern W_ debug; +extern W_ event_trace ; +extern W_ event_trace_all ; +#endif + +extern void *stgAllocForGMP PROTO((size_t)); +extern void *stgReallocForGMP PROTO ((void *, size_t, size_t)); +extern void stgDeallocForGMP PROTO ((void *, size_t)); + +#if defined (DO_SPAT_PROFILING) && sparc_TARGET_ARCH + /* NOTE: I, WDP, do not use this in my SPAT profiling */ +W_ KHHP, KHHPLIM, KHSPA, KHSPB; +#endif + +/* NeXTs can't just reach out and touch "end", to use in + distinguishing things in static vs dynamic (malloc'd) memory. +*/ +#if nextstep2_TARGET_OS || nextstep3_TARGET_OS /* ToDo: use END_BY_FUNNY_MEANS or something */ +void *get_end_result; +#endif + +I_ prog_argc; +char **prog_argv; +I_ rts_argc; +char *rts_argv[MAX_RTS_ARGS]; + +#ifndef PAR +jmp_buf restart_main; /* For restarting after a signal */ +#endif + +#if defined(PVM) +unsigned nPEs = 0, nIMUs = 0; +#endif + +#if defined(GUM) +int nPEs = 0; +#endif + +int /* return type of "main" is defined by the C standard */ +main(argc, argv) + int argc; + char *argv[]; +{ +\end{code} + +The very first thing we do is grab the start time...just in case we're +collecting timing statistics. + +\begin{code} + + start_time(); + +\end{code} + +The parallel system needs to be initialised and synchronised before +the program is run. This is done {\em before} heap allocation, so we +can grab all remaining heap without needing to consider the System +Manager's requirements. + +\begin{code} +#ifdef PAR + /* + * Grab the number of PEs out of the argument vector, and eliminate it + * from further argument processing + */ + nPEs = atoi(argv[1]); + argv[1] = argv[0]; + argv++; + argc--; + +/* fprintf(stderr, "I'm alive, nPEs = %d \n", nPEs); */ + SynchroniseSystem(); +#endif + +#if defined(USE_COST_CENTRES) || defined(GUM) + /* setup string indicating time of run -- only used for profiling */ + (void) time_str(); +#endif + +#if nextstep2_TARGET_OS || nextstep3_TARGET_OS /* ToDo: use END_BY_FUNNY_MEANS or something */ + get_end_result = get_end(); +#endif + + /* + divide the command-line args between pgm and RTS; + figure out what statsfile to use (if any); + [if so, write the whole cmd-line into it] + + This is unlikely to work well in parallel! KH. + */ + setupRtsFlags(&argc, argv, &rts_argc, rts_argv); + prog_argc = argc; + prog_argv = argv; + +#if defined(PAR) + /* Initialise the parallel system -- before initHeap! */ + initParallelSystem(); +#endif /* PAR */ + +#if defined(LIFE_PROFILE) + if (life_profile_init(rts_argv, prog_argv) != 0) { + fflush(stdout); + fprintf(stderr, "life_profile_init failed!\n"); + EXIT(EXIT_FAILURE); + } +#endif + +#if defined(USE_COST_CENTRES) || defined(GUM) + if (init_cc_profiling(rts_argc, rts_argv, prog_argv) != 0) { + fflush(stdout); + fprintf(stderr, "init_cc_profiling failed!\n"); + EXIT(EXIT_FAILURE); + } +#endif + +#if defined(CONCURRENT) && defined(GRAN) + if (!no_gr_profile) + if (init_gr_simulation(rts_argc, rts_argv, prog_argc, prog_argv) != 0) { + fprintf(stderr, "init_gr_simulation failed!\n"); EXIT(EXIT_FAILURE); + } +#endif + +#ifdef PAR + if (do_gr_profile) + init_gr_profiling(rts_argc, rts_argv, prog_argc, prog_argv); +#endif + + /* + initialize the storage manager + */ + if ( initSM(rts_argc, rts_argv, main_statsfile) != 0) { + fflush(stdout); + fprintf(stderr, "initSM failed!\n"); + EXIT(EXIT_FAILURE); + } + +#ifndef PAR + if ( initStacks( &StorageMgrInfo ) != 0) { + fflush(stdout); + fprintf(stderr, "initStacks failed!\n"); + EXIT(EXIT_FAILURE); + } +#endif + + if ( initHeap( &StorageMgrInfo ) != 0) { + fflush(stdout); + fprintf(stderr, "initHeap failed!\n"); EXIT(EXIT_FAILURE); + } + +#if defined(CONCURRENT) && !defined(GRAN) + if (!initThreadPools(MaxLocalSparks)) { + fflush(stdout); + fprintf(stderr, "initThreadPools failed!\n"); + EXIT(EXIT_FAILURE); + } +#endif + +#if defined(USE_COST_CENTRES) || defined(GUM) + /* call cost centre registering routine (after heap allocated) */ + cc_register(); +#endif + +/* Information needed by runtime trace analysers -- don't even ask what it does! */ + /* NOTE: I, WDP, do not use this in my SPAT profiling */ +#if defined (DO_SPAT_PROFILING) && sparc_TARGET_ARCH + KHHPLIM = (W_) StorageMgrInfo.hplim; + KHHP = (W_) StorageMgrInfo.hp; + KHSPA = (W_) SAVE_SpA, + KHSPB = (W_) SAVE_SpB; + +/* fprintf(stderr,"Hp = %lx, HpLim = %lx, SpA = %lx, SpB = %lx\n",KHHP,KHHPLIM,KHSPA,KHSPB); */ + +/* NOT ME: + __asm__("sethi %%hi(_KHHP),%%o0\n\tld [%%o0+%%lo(_KHHP)],%%g0" : : : "%%o0"); + __asm__("sethi %%hi(_KHHPLIM),%%o0\n\tld [%%o0+%%lo(_KHHPLIM)],%%g0" : : : "%%o0"); + __asm__("sethi %%hi(_KHSPA),%%o0\n\tld [%%o0+%%lo(_KHSPA)],%%g0" : : : "%%o0"); + __asm__("sethi %%hi(_KHSPB),%%o0\n\tld [%%o0+%%lo(_KHSPB)],%%g0" : : : "%%o0"); +*/ +#endif + +#if defined(DO_REDN_COUNTING) + max_SpA = MAIN_SpA; /* initial high-water marks */ + max_SpB = MAIN_SpB; +#endif + + /* Tell GNU multi-precision pkg about our custom alloc functions */ + mp_set_memory_functions(stgAllocForGMP, stgReallocForGMP, stgDeallocForGMP); + + /* Record initialization times */ + end_init(); + +#if defined(USE_COST_CENTRES) || defined(CONCURRENT) + /* + * Both the context-switcher and the cost-center profiler use + * a virtual timer. + */ + if (install_vtalrm_handler()) { + fflush(stdout); + fprintf(stderr, "Can't install VTALRM handler.\n"); + EXIT(EXIT_FAILURE); + } +#if (defined(CONCURRENT) && defined(USE_COST_CENTRES)) || defined(GUM) + if (time_profiling) { + if (contextSwitchTime % (1000/TICK_FREQUENCY) == 0) + tick_millisecs = TICK_MILLISECS; + else + tick_millisecs = CS_MIN_MILLISECS; + + contextSwitchTicks = contextSwitchTime / tick_millisecs; + profilerTicks = TICK_MILLISECS / tick_millisecs; + } else + tick_millisecs = contextSwitchTime; +#endif + +#ifndef CONCURRENT + START_TIME_PROFILER; +#endif + +#endif /* USE_COST_CENTRES || CONCURRENT */ + +#ifndef PAR + setjmp(restart_main); + initUserSignals(); +#endif + +#ifdef CONCURRENT +# if defined(GRAN) /* HWL */ + /* RunnableThreadsHd etc. are init in ScheduleThreads */ + /* + * I'm not sure about this. Note that this code is for re-initializing + * things when a longjmp to restart_main occurs. --JSM + */ + +# else /* !GRAN */ + AvailableStack = AvailableTSO = Nil_closure; + RunnableThreadsHd = RunnableThreadsTl = Nil_closure; + WaitingThreadsHd = WaitingThreadsTl = Nil_closure; + PendingSparksHd[REQUIRED_POOL] = + PendingSparksTl[REQUIRED_POOL] = PendingSparksBase[REQUIRED_POOL]; + PendingSparksHd[ADVISORY_POOL] = + PendingSparksTl[ADVISORY_POOL] = PendingSparksBase[ADVISORY_POOL]; +# endif + + CurrentTSO = Nil_closure; + +# ifdef PAR + RunParallelSystem(TopClosure); +# else + STKO_LINK(MainStkO) = Nil_closure; + ScheduleThreads(TopClosure); +# endif /* PAR */ + +#else /* not threaded (sequential) */ + +# if defined(__STG_TAILJUMPS__) + miniInterpret((StgFunPtr)startStgWorld); +# else + if (doSanityChks) + miniInterpret_debug((StgFunPtr)startStgWorld, checkAStack); + else + miniInterpret((StgFunPtr)startStgWorld); +# endif /* not tail-jumping */ +#endif /* !CONCURRENT */ + + shutdownHaskell(); + return(EXIT_SUCCESS); /* don't use EXIT! :-) */ +} +\end{code} + +It should be possible to call @shutdownHaskell@ whenever you want to +shut a Haskell program down in an orderly way. + +Note that some of this code probably depends on the integrity of +various internal data structures so this should not be called in +response to detecting a catastrophic error. + +\begin{code} +void +shutdownHaskell(STG_NO_ARGS) +{ + STOP_TIME_PROFILER; + + if (exitSM(&StorageMgrInfo) != 0) { + fflush(stdout); + fprintf(stderr, "exitSM failed!\n"); + EXIT(EXIT_FAILURE); + } +#if defined(LIFE_PROFILE) + { + extern P_ hp_start; /* from the SM -- Hack! */ + life_profile_finish(StorageMgrInfo.hp - hp_start, prog_argv); + } +#endif + +#if defined(USE_COST_CENTRES) + heap_profile_finish(); +#endif +#if defined(USE_COST_CENTRES) || defined(GUM) + report_cc_profiling(1 /* final */ ); +#endif + +#if defined(DO_REDN_COUNTING) + if (showRednCountStats) { + PrintRednCountInfo(); + } +#endif + +#if defined(GRAN_CHECK) && defined(GRAN) + if (PrintFetchMisses) + fprintf(stderr,"Number of fetch misses: %d\n",fetch_misses); + +# if defined(COUNT) + fprintf(stderr,"COUNT statistics:\n"); + fprintf(stderr," Total number of updates: %u\n",nUPDs); + fprintf(stderr," Needed to awaken BQ: %u with avg BQ len of: %f\n", + nUPDs_BQ,(float)BQ_lens/(float)nUPDs_BQ); + fprintf(stderr," Number of PAPs: %u\n",nPAPs); +# endif + + if (!no_gr_profile) + end_gr_simulation(); +#endif + + fflush(stdout); + /* This fflush is important, because: if "main" just returns, + then we will end up in pre-supplied exit code that will close + streams and flush buffers. In particular we have seen: it + will close fd 0 (stdin), then flush fd 1 (stdout), then <who + cares>... + + But if you're playing with sockets, that "close fd 0" might + suggest to the daemon that all is over, only to be presented + with more stuff on "fd 1" at the flush. + + The fflush avoids this sad possibility. + */ +} +\end{code} + +%/**************************************************************** +%* * +%* Getting default settings for RTS parameters * +%* * +%* +RTS indicates following arguments destined for RTS * +%* -RTS indicates following arguments destined for program * +%* * +%****************************************************************/ +\begin{code} + +char *flagtext[] = { +"", +"Usage: <prog> <args> [+RTS <rtsopts> | -RTS <args>] ... --RTS <args>", +"", +" +RTS Indicates run time system options follow", +" -RTS Indicates program arguments follow", +" --RTS Indicates that ALL subsequent arguments will be given to the", +" program (including any of these RTS flags)", +"", +"The following run time system options are available:", +"", +" -? -f Prints this message and exits; the program is not executed", +"", +" -K<size> Sets the stack size (default 64k) Egs: -K32k -K512k", +" -H<size> Sets the heap size (default 4M) -H512k -H16M", +" -s<file> Summary GC statistics (default file: <program>.stat)", +" -S<file> Detailed GC statistics (with -Sstderr going to stderr)", +"", +#if defined(GCap) +" -M<n>% Sets minimum size of alloc area as % of heap (default 3%)", +" -A<size> Fixes size of alloc area, overriding any minimum (-A gives 64k)", +" -G<size> Fixes size of major generation (default is dynamic threshold)", +" -F2s Forces program compiled for Appel gc to use 2s collection", +#else +# if defined(GCgn) +" -A<size> Specifies size of alloc area (default 64k)", +" -G<size> Fixes size of major generation (default is available heap)", +" -F2s Forces program compiled for Gen gc to use 2s collection", +# else +" -M<n>% Minimum % of heap which must be available (default 3%)", +" -A<size> Fixes size of heap area allocated between GCs (-A gives 64k)", +# endif +#endif +#if defined(FORCE_GC) +" -j<size> Forces major GC at every <size> bytes allocated", +#endif /* FORCE_GC */ +#if defined(GCdu) +" -u<percent> Fixes residency threshold at which mode switches (range 0.0..0.95)", +#endif +"", +#if defined(SM_DO_BH_UPDATE) +" -N No black-holing (for use when a signal handler is present)", +#endif +" -Z Don't squeeze out update frames on stack overflow", +" -B Sound the bell at the start of each (major) garbage collection", +#if defined(USE_COST_CENTRES) || defined(GUM) +"", +" -p<sort> Produce cost centre time profile (output file <program>.prof)", +" sort: T = time (default), A = alloc, C = cost centre label", +" -P<sort> Produce serial time profile (output file <program>.time)", +" and a -p profile with detailed caf/enter/tick/alloc info", +#if defined(USE_COST_CENTRES) +"", +" -h<break-down> Heap residency profile (output file <program>.hp)", +" break-down: C = cost centre (default), M = module, G = group", +" D = closure description, Y = type description", +" T<ints>,<start> = time closure created", +" ints: no. of interval bands plotted (default 18)", +" start: seconds after which intervals start (default 0.0)", +" A subset of closures may be selected by the attached cost centre using:", +" -c{mod:lab,mod:lab...}, specific module:label cost centre(s)", +" -m{mod,mod...} all cost centres from the specified modules(s)", +" -g{grp,grp...} all cost centres from the specified group(s)", +" Selections can also be made by description, type, kind and age:", +" -d{des,des...} closures with specified closure descriptions", +" -y{typ,typ...} closures with specified type descriptions", +" -k{knd,knd...} closures of the specified kinds", +" -a<age> closures which survived <age> complete intervals", +" The selection logic used is summarised as follows:", +" ([-c] or [-m] or [-g]) and ([-d] or [-y] or [-k]) and [-a]", +" where an option is true if not specified", +#endif +"", +" -z<tbl><size> set hash table <size> for <tbl> (C, M, G, D or Y)", +"", +" -i<secs> Number of seconds in a profiling interval (default 1.0):", +" heap profile (-h) and/or serial time profile (-P) frequency", +#endif /* USE_COST_CENTRES */ +#if defined(LIFE_PROFILE) +"", +" -l<res> Produce liftime and update profile (output file <program>.life)", +" res: the age resolution in bytes allocated (default 10,000)", +#endif /* LIFE_PROFILE */ +"", +#if defined(DO_REDN_COUNTING) +" -r<file> Produce reduction profiling statistics (with -rstderr for stderr)", +"", +#endif +" -I Use debugging miniInterpret with stack and heap sanity-checking.", +" -T<level> Trace garbage collection execution (debugging)", +#ifdef CONCURRENT +"", +# ifdef PAR +" -N<n> Use <n> PVMish processors in parallel (default: 2)", +/* NB: the -N<n> is implemented by the driver!! */ +# endif +" -C<secs> Context-switch interval in seconds", +" (0 or no argument means switch as often as possible)", +" the default is .01 sec; resolution is .01 sec", +" -e<size> Size of spark pools (default 100)", +# ifdef PAR +" -q Enable activity profile (output files in ~/<program>*.gr)", +" -qb Enable binary activity profile (output file /tmp/<program>.gb)", +#else +" -q[v] Enable quasi-parallel profile (output file <program>.qp)", +# endif +" -t<num> Set maximum number of advisory threads per PE (default 32)", +" -o<num> Set stack chunk size (default 1024)", +# ifdef PAR +" -d Turn on PVM-ish debugging", +" -O Disable output for performance measurement", +# endif /* PAR */ +#endif /* CONCURRENT */ +"", +"Other RTS options may be available for programs compiled a different way.", +"The GHC User's Guide has full details.", +"", +0 +}; + +#define RTS 1 +#define PGM 0 + +#ifndef atof +extern double atof(); +/* no proto because some machines use const and some do not */ +#endif + +void +setupRtsFlags(argc, argv, rts_argc, rts_argv) +int *argc; +I_ *rts_argc; +char *argv[], *rts_argv[]; +{ + I_ error = 0; + I_ mode; + I_ arg, total_arg; + char *last_slash; + + /* Remove directory from argv[0] -- default files in current directory */ + + if ((last_slash = (char *) rindex(argv[0], '/')) != NULL) + strcpy(argv[0], last_slash+1); + + /* Split arguments (argv) into PGM (argv) and RTS (rts_argv) parts */ + /* argv[0] must be PGM argument -- leave in argv */ + + total_arg = *argc; + arg = 1; + + *argc = 1; + *rts_argc = 0; + + for (mode = PGM; arg < total_arg && strcmp("--RTS", argv[arg]) != 0; arg++) { + if (strcmp("+RTS", argv[arg]) == 0) { + mode = RTS; + } + else if (strcmp("-RTS", argv[arg]) == 0) { + mode = PGM; + } + else if (mode == RTS && *rts_argc < MAX_RTS_ARGS-1) { + rts_argv[(*rts_argc)++] = argv[arg]; + } + else if (mode == PGM) { + argv[(*argc)++] = argv[arg]; + } + else { + fflush(stdout); + fprintf(stderr, "setupRtsFlags: Too many RTS arguments (max %d)\n", + MAX_RTS_ARGS-1); + EXIT(EXIT_FAILURE); + } + } + if (arg < total_arg) { + /* arg must be --RTS; process remaining program arguments */ + while (++arg < total_arg) { + argv[(*argc)++] = argv[arg]; + } + } + argv[*argc] = (char *) 0; + rts_argv[*rts_argc] = (char *) 0; + + /* Process RTS (rts_argv) part: mainly to determine statsfile */ + + for (arg = 0; arg < *rts_argc; arg++) { + if (rts_argv[arg][0] == '-') { + switch(rts_argv[arg][1]) { + case '?': + case 'f': + error = 1; + break; + + case 'Z': /* Don't squeeze out update frames */ + squeeze_upd_frames = 0; + break; + +#if defined(SM_DO_BH_UPDATE) + case 'N': + noBlackHoles++; + break; +#endif + + case 'I': + doSanityChks++; +#if defined(__STG_TAILJUMPS__) + /* Blech -- too many errors if run in parallel -- KH */ + fprintf(stderr, "setupRtsFlags: Using Tail Jumps: Sanity checks not possible: %s\n", rts_argv[arg]); + error = 1; +#endif + break; + + case 'U': + traceUpdates++; +#if ! defined(DO_RUNTIME_TRACE_UPDATES) + fprintf(stderr, "setupRtsFlags: Update Tracing not compiled in: %s\n", rts_argv[arg]); + error = 1; +#endif + break; + + case 'r': /* Basic profiling stats */ + showRednCountStats++; +#if ! defined(DO_REDN_COUNTING) + fprintf(stderr, "setupRtsFlags: Reduction counting not compiled in: %s\n", rts_argv[arg]); + error = 1; + +#else /* ticky-ticky! */ + if (strcmp(rts_argv[arg]+2, "stderr") == 0) /* use real stderr */ + tickyfile = stderr; + else if (rts_argv[arg][2] != '\0') /* ticky file specified */ + tickyfile = fopen(rts_argv[arg]+2,"w"); + else { + char stats_filename[STATS_FILENAME_MAXLEN]; /* default <program>.ticky */ + sprintf(stats_filename, TICKY_FILENAME_FMT, argv[0]); + tickyfile = fopen(stats_filename,"w"); + } + if (tickyfile == NULL) { + fprintf(stderr, "Can't open tickyfile %s (default %0.24s.ticky)\n", + rts_argv[arg]+2, argv[0]); + error = 1; + } else { + /* Write argv and rtsv into start of ticky file */ + I_ count; + for(count = 0; count < *argc; count++) + fprintf(tickyfile, "%s ", argv[count]); + fprintf(tickyfile, "+RTS "); + for(count = 0; count < *rts_argc; count++) + fprintf(tickyfile, "%s ", rts_argv[count]); + fprintf(tickyfile, "\n"); + } +#endif /* ticky-ticky! */ + break; + + case 's': /* Also used by GC -- open file here */ + case 'S': +#ifdef PAR + /* Opening all those files would almost certainly fail... */ + ParallelStats = rtsTrue; + main_statsfile = stderr; /* temporary; ToDo: rm */ +#else + if (strcmp(rts_argv[arg]+2, "stderr") == 0) /* use real stderr */ + main_statsfile = stderr; + else if (rts_argv[arg][2] != '\0') /* stats file specified */ + main_statsfile = fopen(rts_argv[arg]+2,"w"); + else { + char stats_filename[STATS_FILENAME_MAXLEN]; /* default <program>.stat */ + sprintf(stats_filename, STAT_FILENAME_FMT, argv[0]); + main_statsfile = fopen(stats_filename,"w"); + } + if (main_statsfile == NULL) { + fprintf(stderr, "Can't open statsfile %s (default %0.24s.stat)\n", rts_argv[arg]+2, argv[0]); + error = 1; + } else { + /* Write argv and rtsv into start of stats file */ + I_ count; + for(count = 0; count < *argc; count++) + fprintf(main_statsfile, "%s ", argv[count]); + fprintf(main_statsfile, "+RTS "); + for(count = 0; count < *rts_argc; count++) + fprintf(main_statsfile, "%s ", rts_argv[count]); + fprintf(main_statsfile, "\n"); + } +#endif + break; + + case 'P': /* detailed cost centre profiling (time/alloc) */ + case 'p': /* cost centre profiling (time/alloc) */ + case 'i': /* serial profiling -- initial timer interval */ +#if ! (defined(USE_COST_CENTRES) || defined(GUM)) + fprintf(stderr, "setupRtsFlags: Not built for cost centre profiling: %s\n", rts_argv[arg]); + error = 1; +#endif /* ! (USE_COST_CENTRES || GUM) */ + break; + case 'h': /* serial heap profile */ + case 'z': /* size of index tables */ + case 'c': /* cost centre label select */ + case 'm': /* cost centre module select */ + case 'g': /* cost centre group select */ + case 'd': /* closure descr select */ + case 'y': /* closure type select */ + case 'k': /* closure kind select */ + case 'a': /* closure age select */ +#if ! defined(USE_COST_CENTRES) + fprintf(stderr, "setupRtsFlags: Not built for cost centre profiling: %s\n", rts_argv[arg]); + error = 1; +#endif /* ! USE_COST_CENTRES */ + break; + + case 'j': /* force GC option */ +#if defined(FORCE_GC) + force_GC++; + if (rts_argv[arg][2]) { + GCInterval = decode(rts_argv[arg]+2) / sizeof(W_); + } +#else /* ! FORCE_GC */ + fprintf(stderr, "setupRtsFlags: Not built for forcing GC: %s\n", rts_argv[arg]); + error = 1; +#endif /* ! FORCE_GC */ + break; + + case 'l': /* life profile option */ +#if defined(LIFE_PROFILE) + do_life_prof++; + if (rts_argv[arg][2]) { + LifeInterval = decode(rts_argv[arg]+2) / sizeof(W_); + } +#else /* ! LIFE_PROFILE */ + fprintf(stderr, "setupRtsFlags: Not built for lifetime profiling: %s\n", rts_argv[arg]); + error = 1; +#endif /* ! LIFE_PROFILE */ + break; + + /* Flags for the threaded RTS */ + +#ifdef CONCURRENT + case 'C': /* context switch interval */ + if (rts_argv[arg][2] != '\0') { + /* Convert to milliseconds */ + contextSwitchTime = (I_) ((atof(rts_argv[arg]+2) * 1000)); + contextSwitchTime = (contextSwitchTime / CS_MIN_MILLISECS) + * CS_MIN_MILLISECS; + if (contextSwitchTime < CS_MIN_MILLISECS) + contextSwitchTime = CS_MIN_MILLISECS; + } else + contextSwitchTime = 0; + break; +#if !defined(GRAN) + case 'e': + if (rts_argv[arg][2] != '\0') { + MaxLocalSparks = strtol(rts_argv[arg]+2, (char **) NULL, 10); + if (MaxLocalSparks <= 0) { + fprintf(stderr, "setupRtsFlags: bad value for -e\n"); + error = 1; + } + } else + MaxLocalSparks = DEFAULT_MAX_LOCAL_SPARKS; + break; +#endif +#ifdef PAR + case 'q': /* activity profile option */ + if (rts_argv[arg][2] == 'b') + do_gr_binary++; + else + do_gr_profile++; + break; +#else + case 'q': /* quasi-parallel profile option */ + if (rts_argv[arg][2] == 'v') + do_qp_prof = 2; + else + do_qp_prof++; + break; +#endif + case 't': + if (rts_argv[arg][2] != '\0') { + MaxThreads = strtol(rts_argv[arg]+2, (char **) NULL, 10); + } else { + fprintf(stderr, "setupRtsFlags: missing size for -t\n"); + error = 1; + } + break; + + case 'o': + if (rts_argv[arg][2] != '\0') { + StkOChunkSize = decode(rts_argv[arg]+2); + if (StkOChunkSize < MIN_STKO_CHUNK_SIZE) + StkOChunkSize = MIN_STKO_CHUNK_SIZE; + } else { + fprintf(stderr, "setupRtsFlags: missing size for -o\n"); + error = 1; + } + break; + +# ifdef PAR + case 'O': + OutputDisabled = rtsTrue; + break; + +# else /* PAR */ + +# if !defined(GRAN) + case 'b': /* will fall through to disaster */ +# else + case 'b': + if (rts_argv[arg][2] != '\0') { + + /* Should we emulate hbcpp */ + if(strcmp((rts_argv[arg]+2),"roken")==0) { + ++DoAlwaysCreateThreads; + strcpy(rts_argv[arg]+2,"oring"); + } + + /* or a ridiculously idealised simulator */ + if(strcmp((rts_argv[arg]+2),"oring")==0) { + gran_latency = gran_fetchtime = gran_additional_latency = + gran_gunblocktime = gran_lunblocktime + = gran_threadcreatetime = gran_threadqueuetime + = gran_threadscheduletime = gran_threaddescheduletime + = gran_threadcontextswitchtime + = 0; + + gran_mpacktime = gran_munpacktime = 0; + + gran_arith_cost = gran_float_cost = gran_load_cost + = gran_store_cost = gran_branch_cost = 0; + + gran_heapalloc_cost = 1; + + /* ++DoFairSchedule; */ + ++DoStealThreadsFirst; + ++DoThreadMigration; + ++do_gr_profile; + } + + /* or a ridiculously idealised simulator */ + if(strcmp((rts_argv[arg]+2),"onzo")==0) { + gran_latency = gran_fetchtime = gran_additional_latency = + gran_gunblocktime = gran_lunblocktime + = gran_threadcreatetime = gran_threadqueuetime + = gran_threadscheduletime = gran_threaddescheduletime + = gran_threadcontextswitchtime + = 0; + + gran_mpacktime = gran_munpacktime = 0; + + /* Keep default values for these + gran_arith_cost = gran_float_cost = gran_load_cost + = gran_store_cost = gran_branch_cost = 0; + */ + + gran_heapalloc_cost = 1; + + /* ++DoFairSchedule; */ /* -b-R */ + /* ++DoStealThreadsFirst; */ /* -b-T */ + ++DoReScheduleOnFetch; /* -bZ */ + ++DoThreadMigration; /* -bM */ + ++do_gr_profile; /* -bP */ +# if defined(GRAN_CHECK) && defined(GRAN) + debug = 0x20; /* print event statistics */ +# endif + } + + /* Communication and task creation cost parameters */ + else switch(rts_argv[arg][2]) { + case 'l': + if (rts_argv[arg][3] != '\0') + { + gran_gunblocktime = gran_latency = decode(rts_argv[arg]+3); + gran_fetchtime = 2* gran_latency; + } + else + gran_latency = LATENCY; + break; + + case 'a': + if (rts_argv[arg][3] != '\0') + gran_additional_latency = decode(rts_argv[arg]+3); + else + gran_additional_latency = ADDITIONAL_LATENCY; + break; + + case 'm': + if (rts_argv[arg][3] != '\0') + gran_mpacktime = decode(rts_argv[arg]+3); + else + gran_mpacktime = MSGPACKTIME; + break; + + case 'x': + if (rts_argv[arg][3] != '\0') + gran_mtidytime = decode(rts_argv[arg]+3); + else + gran_mtidytime = 0; + break; + + case 'r': + if (rts_argv[arg][3] != '\0') + gran_munpacktime = decode(rts_argv[arg]+3); + else + gran_munpacktime = MSGUNPACKTIME; + break; + + case 'f': + if (rts_argv[arg][3] != '\0') + gran_fetchtime = decode(rts_argv[arg]+3); + else + gran_fetchtime = FETCHTIME; + break; + + case 'n': + if (rts_argv[arg][3] != '\0') + gran_gunblocktime = decode(rts_argv[arg]+3); + else + gran_gunblocktime = GLOBALUNBLOCKTIME; + break; + + case 'u': + if (rts_argv[arg][3] != '\0') + gran_lunblocktime = decode(rts_argv[arg]+3); + else + gran_lunblocktime = LOCALUNBLOCKTIME; + break; + + /* Thread-related metrics */ + case 't': + if (rts_argv[arg][3] != '\0') + gran_threadcreatetime = decode(rts_argv[arg]+3); + else + gran_threadcreatetime = THREADCREATETIME; + break; + + case 'q': + if (rts_argv[arg][3] != '\0') + gran_threadqueuetime = decode(rts_argv[arg]+3); + else + gran_threadqueuetime = THREADQUEUETIME; + break; + + case 'c': + if (rts_argv[arg][3] != '\0') + gran_threadscheduletime = decode(rts_argv[arg]+3); + else + gran_threadscheduletime = THREADSCHEDULETIME; + + gran_threadcontextswitchtime = gran_threadscheduletime + + gran_threaddescheduletime; + break; + + case 'd': + if (rts_argv[arg][3] != '\0') + gran_threaddescheduletime = decode(rts_argv[arg]+3); + else + gran_threaddescheduletime = THREADDESCHEDULETIME; + + gran_threadcontextswitchtime = gran_threadscheduletime + + gran_threaddescheduletime; + break; + + /* Instruction Cost Metrics */ + case 'A': + if (rts_argv[arg][3] != '\0') + gran_arith_cost = decode(rts_argv[arg]+3); + else + gran_arith_cost = ARITH_COST; + break; + + case 'F': + if (rts_argv[arg][3] != '\0') + gran_float_cost = decode(rts_argv[arg]+3); + else + gran_float_cost = FLOAT_COST; + break; + + case 'B': + if (rts_argv[arg][3] != '\0') + gran_branch_cost = decode(rts_argv[arg]+3); + else + gran_branch_cost = BRANCH_COST; + break; + + case 'L': + if (rts_argv[arg][3] != '\0') + gran_load_cost = decode(rts_argv[arg]+3); + else + gran_load_cost = LOAD_COST; + break; + + case 'S': + if (rts_argv[arg][3] != '\0') + gran_store_cost = decode(rts_argv[arg]+3); + else + gran_store_cost = STORE_COST; + break; + + case 'H': + if (rts_argv[arg][3] != '\0') + gran_heapalloc_cost = decode(rts_argv[arg]+3); + else + gran_heapalloc_cost = 0; + break; + + case 'y': + if (rts_argv[arg][3] != '\0') + FetchStrategy = decode(rts_argv[arg]+3); + else + FetchStrategy = 4; /* default: fetch everything */ + break; + + /* General Parameters */ + case 'p': + if (rts_argv[arg][3] != '\0') + { + max_proc = decode(rts_argv[arg]+3); + if(max_proc > MAX_PROC || max_proc < 1) + { + fprintf(stderr,"setupRtsFlags: no more than %u processors allowed\n", MAX_PROC); + error = 1; + } + } + else + max_proc = MAX_PROC; + break; + + case 'C': + ++DoAlwaysCreateThreads; + ++DoThreadMigration; + break; + + case 'G': + ++DoGUMMFetching; + break; + + case 'M': + ++DoThreadMigration; + break; + + case 'R': + ++DoFairSchedule; + break; + + case 'T': + ++DoStealThreadsFirst; + ++DoThreadMigration; + break; + + case 'Z': + ++DoReScheduleOnFetch; + break; + + case 'z': + ++SimplifiedFetch; + break; + + case 'N': + ++PreferSparksOfLocalNodes; + break; + + case 'b': + ++do_gr_binary; + break; + + case 'P': + ++do_gr_profile; + break; + + case 's': + ++do_sp_profile; + break; + + case '-': + switch(rts_argv[arg][3]) { + + case 'C': + DoAlwaysCreateThreads=0; + DoThreadMigration=0; + break; + + case 'G': + DoGUMMFetching=0; + break; + + case 'M': + DoThreadMigration=0; + break; + + case 'R': + DoFairSchedule=0; + break; + + case 'T': + DoStealThreadsFirst=0; + DoThreadMigration=0; + break; + + case 'Z': + DoReScheduleOnFetch=0; + break; + + case 'N': + PreferSparksOfLocalNodes=0; + break; + + case 'P': + do_gr_profile=0; + no_gr_profile=1; + break; + + case 's': + do_sp_profile=0; + break; + + case 'b': + do_gr_binary=0; + break; + + default: + badoption( rts_argv[arg] ); + break; + } + break; + +# if defined(GRAN_CHECK) && defined(GRAN) + case 'D': + switch(rts_argv[arg][3]) { + case 'e': /* event trace */ + fprintf(stderr,"Printing event trace.\n"); + ++event_trace; + break; + + case 'f': + fprintf(stderr,"Printing forwarding of FETCHNODES.\n"); + debug |= 0x2; /* print fwd messages */ + break; + + case 'z': + fprintf(stderr,"Check for blocked on fetch.\n"); + debug |= 0x4; /* debug non-reschedule-on-fetch */ + break; + + case 't': + fprintf(stderr,"Check for TSO asleep on fetch.\n"); + debug |= 0x10; /* debug TSO asleep for fetch */ + break; + + case 'E': + fprintf(stderr,"Printing event statistics.\n"); + debug |= 0x20; /* print event statistics */ + break; + + case 'F': + fprintf(stderr,"Prohibiting forward.\n"); + NoForward = 1; /* prohibit forwarding */ + break; + + case 'm': + fprintf(stderr,"Printing fetch misses.\n"); + PrintFetchMisses = 1; /* prohibit forwarding */ + break; + + case 'd': + fprintf(stderr,"Debug mode.\n"); + debug |= 0x40; + break; + + case 'D': + fprintf(stderr,"Severe debug mode.\n"); + debug |= 0x80; + break; + + case '\0': + debug = 1; + break; + + default: + badoption( rts_argv[arg] ); + break; + } + break; +# endif + default: + badoption( rts_argv[arg] ); + break; + } + } + do_gr_sim++; + contextSwitchTime = 0; + break; +# endif + case 'J': + case 'Q': + case 'D': + case 'R': + case 'L': + case 'O': + fprintf(stderr, "setupRtsFlags: Not built for parallel execution: %s\n", rts_argv[arg]); + error = 1; +# endif /* PAR */ +#else /* CONCURRENT */ + case 't': + fprintf(stderr, "setupRtsFlags: Not built for threaded execution: %s\n", rts_argv[arg]); + error = 1; + +#endif /* CONCURRENT */ + case 'H': /* SM options -- ignore */ + case 'A': + case 'G': + case 'F': + case 'K': + case 'M': + case 'B': + case 'T': +#ifdef GCdu + case 'u': /* set dual mode threshold */ +#endif + break; + + default: /* Unknown option ! */ + fprintf(stderr, "setupRtsFlags: Unknown RTS option: %s\n", rts_argv[arg]); + error = 1; + break; + } + } + else { + fflush(stdout); + fprintf(stderr, "setupRtsFlags: Unexpected RTS argument: %s\n", + rts_argv[arg]); + error = 1; + } + } + if (error == 1) { + char **p; + fflush(stdout); + for (p = flagtext; *p; p++) + fprintf(stderr, "%s\n", *p); + EXIT(EXIT_FAILURE); + } +} +\end{code} + +Sets up and returns a string indicating the date/time of the run. +Successive calls simply return the same string again. Initially +called by @main.lc@ to initialise the string at the start of the run. +Only used for profiling. + +\begin{code} +#if defined(USE_COST_CENTRES) || defined(CONCURRENT) +# include <time.h> + +char * +time_str(STG_NO_ARGS) +{ + static time_t now = 0; + static char nowstr[26]; + + if (now == 0) { + time(&now); + strcpy(nowstr, ctime(&now)); + strcpy(nowstr+16,nowstr+19); + nowstr[21] = '\0'; + } + return nowstr; +} +#endif /* profiling */ +\end{code} + +ToDo: Will this work under threads? + +\begin{code} +StgStablePtr errorHandler = -1; + +StgInt getErrorHandler() +{ + return (StgInt) errorHandler; +} + +#ifndef PAR + +void raiseError( handler ) +StgStablePtr handler; +{ + if (handler == -1) { + shutdownHaskell(); + } else { + TopClosure = deRefStablePointer( handler ); + longjmp(restart_main,1); + } +} +\end{code} + +\begin{code} +StgInt +catchError( newErrorHandler ) +StgStablePtr newErrorHandler; +{ + StgStablePtr oldErrorHandler = errorHandler; + errorHandler = newErrorHandler; + return oldErrorHandler; +} + +#endif +\end{code} + +If we have installed an error handler, we might want to +indicate that we have successfully recovered from an error by +decrementing the counter. + +\begin{code} +void +decrementErrorCount() +{ + ErrorIO_call_count-=1; +} + +\end{code} |