1 files changed, 2103 insertions, 0 deletions
diff --git a/ghc/includes/StgMacros.lh b/ghc/includes/StgMacros.lh
new file mode 100644
index 0000000000..f74d18a0e0
--- /dev/null
+++ b/ghc/includes/StgMacros.lh
@@ -0,0 +1,2103 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1993-1994
+%
+\section[StgMacros]{C macros used in GHC-generated \tr{.hc} files}
+
+\begin{code}
+#ifndef STGMACROS_H
+#define STGMACROS_H
+\end{code}
+
+%************************************************************************
+%*									*
+\subsection[StgMacros-abbrev]{Abbreviatory(?) and general macros}
+%*									*
+%************************************************************************
+
+Mere abbreviations:
+\begin{code}
+/* for function declarations */
+#define STGFUN(f)  F_ f(STG_NO_ARGS)
+#define STATICFUN(f) static F_ f(STG_NO_ARGS)
+
+/* for functions/data that are really external to this module */
+#define EXTFUN(f)	extern F_ f(STG_NO_ARGS)
+#define EXTDATA(d)	extern W_ d[]
+#define EXTDATA_RO(d)	extern const W_ d[] /* read-only */
+
+/* for fwd decls to functions/data somewhere else in this module */
+/* (identical for the mo') */
+#define INTFUN(f)	static F_ f(STG_NO_ARGS)
+#define INTDATA(d)	extern W_ d[]
+#define INTDATA_RO(d)	extern const W_ d[] /* read-only */
+
+/* short forms of most of the above */
+
+#define FN_(f)		F_ f(STG_NO_ARGS)
+#define IFN_(f)		static F_ f(STG_NO_ARGS)
+#define EF_(f)		extern F_ f(STG_NO_ARGS)
+#define ED_(d)		extern W_ d[]
+#define ED_RO_(d)	extern const W_ d[] /* read-only */
+#define IF_(f)		static F_ f(STG_NO_ARGS)
+
+/* GCC is uncooperative about the next one: */
+/* But, the "extern" prevents initialisation... ADR */
+#if defined(__GNUC__)
+#define ID_(d)		extern W_ d[]
+#define ID_RO_(d)	extern const W_ d[] /* read-only */
+#else
+#define ID_(d)		static W_ d[]
+#define ID_RO_(d)	static const W_ d[] /* read-only */
+#endif /* not GCC */
+\end{code}
+
+General things; note: general-but-``machine-dependent'' macros are
+given in \tr{StgMachDeps.lh}.
+\begin{code}
+#define STG_MAX(a,b)	(((a)>=(b)) ? (a) : (b))
+
+/*
+Macros to combine two short words into a single
+word and split such a word back into two.
+
+Dependent on machine word size :-)
+*/
+
+#define	COMBINE_WORDS(word,short1,short2)		\
+	do {						\
+	    ((packed_shorts *)&(word))->wu.s1 = short1;	\
+	    ((packed_shorts *)&(word))->wu.s2 = short2;	\
+	} while(0)
+
+#define SPLIT_WORD(word,short1,short2)			\
+	do {						\
+	    short1 = ((packed_shorts *)&(word))->wu.s1;	\
+	    short2 = ((packed_shorts *)&(word))->wu.s2;	\
+	} while(0)
+
+\end{code}
+
+%************************************************************************
+%*									*
+\subsection[StgMacros-gen-stg]{General STGish macros}
+%*									*
+%************************************************************************
+
+Common sizes of vector-return tables.
+
+Claim: don't need fwd decls for return pts in \tr{VECTBL*}, because
+the AbsC flattener ensures that things come out sufficiently
+``backwards''.
+
+\begin{code}
+#ifdef __STG_REV_TBLS__
+#define UNVECTBL(staticp,label,a)   /* nothing */
+#else
+#define UNVECTBL(staticp,label,a) \
+EXTFUN(a); \
+staticp const W_ label[] = { \
+  (W_) a \
+};
+#endif
+\end{code}
+
+\begin{code}
+#if defined(USE_SPLIT_MARKERS)
+#define __STG_SPLIT_MARKER(n) FN_(CAT2(__stg_split_marker,n)){ }
+#else
+#define __STG_SPLIT_MARKER(n) /* nothing */
+#endif
+\end{code}
+
+%************************************************************************
+%*									*
+\subsection[StgMacros-exceptions]{Exception-checking macros}
+%*									*
+%************************************************************************
+
+Argument-satisfaction check, stack(s) overflow check, heap overflow
+check.
+
+The @SUBTRACT(upper, lower)@ macros return a positive result in words
+indicating the amount by which upper is above lower on the stack.
+
+\begin{code}
+#define SUBTRACT_A_STK( upper, lower ) AREL( (lower) - (upper) )
+#define SUBTRACT_B_STK( upper, lower ) BREL( (lower) - (upper) )
+\end{code}
+
+%************************************************************************
+%*									*
+\subsubsection[StgMacros-arg-satis]{Argument-satisfaction checks}
+%*									*
+%************************************************************************
+
+@ARGS_CHK(n)@ sees of there are @n@ words of args on the A/B stack.
+If not, it jumps to @UpdatePAP@.
+
+@ARGS_CHK@ args are pre-directionified.
+Notice that we do the comparisons in the form (x < a+n), for
+some constant n.  This generates more efficient code (with GCC at least)
+than (x-a < n).
+
+\begin{code}
+#define ARGS_CHK_A(n)						\
+	SET_ACTIVITY(ACT_ARGS_CHK); /* SPAT counting */		\
+	if (SuA /*SUBTRACT_A_STK( SpA, SuA )*/ < (SpA+(n))) {	\
+		JMP_( UpdatePAP );				\
+	}							\
+	SET_ACTIVITY(ACT_TAILCALL)
+
+#define ARGS_CHK_A_LOAD_NODE(n, closure_addr)			\
+	SET_ACTIVITY(ACT_ARGS_CHK); /* SPAT counting */		\
+	if (SuA /*SUBTRACT_A_STK( SpA, SuA )*/ < (SpA+(n))) {	\
+		Node = (P_) closure_addr;			\
+		JMP_( UpdatePAP );				\
+	}							\
+	SET_ACTIVITY(ACT_TAILCALL)
+
+
+#define ARGS_CHK_B(n)						\
+	SET_ACTIVITY(ACT_ARGS_CHK); /* SPAT counting */		\
+	if (SpB /*SUBTRACT_B_STK( SpB, SuB )*/ < (SuB-(n))) {	\
+		JMP_( UpdatePAP );				\
+	}							\
+	SET_ACTIVITY(ACT_TAILCALL)
+
+
+#define ARGS_CHK_B_LOAD_NODE(n, closure_addr)			\
+	SET_ACTIVITY(ACT_ARGS_CHK); /* SPAT counting */		\
+	if (SpB /*SUBTRACT_B_STK( SpB, SuB )*/ < (SuB-(n))) {	\
+		Node = (P_) closure_addr;			\
+		JMP_( UpdatePAP );				\
+	}							\
+	SET_ACTIVITY(ACT_TAILCALL)
+
+\end{code}
+
+%************************************************************************
+%*									*
+\subsubsection[StgMacros-stk-chks]{Stack-overflow check}
+%*									*
+%************************************************************************
+
+@STK_CHK(a,b)@ [misc args omitted...] checks that we can allocate @a@
+words of A stack and @b@ words of B stack.  If not, it calls
+@StackOverflow@ (which dies).
+
+(It will be different in the parallel case.)
+
+NB: args @a@ and @b@ are pre-direction-ified!
+\begin{code}
+extern I_ SqueezeUpdateFrames PROTO((P_, P_, P_));
+
+#if ! defined(CONCURRENT)
+
+extern void StackOverflow(STG_NO_ARGS) STG_NORETURN;
+
+#if STACK_CHECK_BY_PAGE_FAULT
+
+#define STACK_OVERFLOW(liveness,hda,hdb,spa,spb,rtype,reenter)	\
+    /* use memory protection instead; still need ticky-ness */
+
+#else
+
+#define STACK_OVERFLOW(liveness,hda,hdb,spa,spb,rtype,reenter)	\
+    ULTRASAFESTGCALL0(void,(void *),StackOverflow)
+
+#endif /* not using page-faulting */
+
+#else /* threaded */
+
+extern I_ StackOverflow PROTO((W_, W_));
+
+/*
+ * On a uniprocessor, we do *NOT* context switch on a stack overflow 
+ * (though we may GC).  Therefore, we never have to reenter node.
+ */
+
+#define STACK_OVERFLOW(liveness,hda,hdb,spa,spb,rtype,reenter) \
+    DO_STACKOVERFLOW((hda+hdb)<<2|((rtype)<<1)|(reenter),((spa)<<20)|((spb)<<8)|(liveness))
+
+#define STACK_OVERFLOW_HEADROOM(args,y)	    ((args) >> 2)
+#define STACK_OVERFLOW_PRIM_RETURN(args,y)  ((args) & 2)
+#define STACK_OVERFLOW_REENTER(args,y)	    ((args) & 1)
+
+#define STACK_OVERFLOW_AWORDS(x,args)	    (((args) >> 20) & 0x0fff)
+#define STACK_OVERFLOW_BWORDS(x,args)	    (((args) >> 8) & 0x0fff)
+#define STACK_OVERFLOW_LIVENESS(x,args)	    ((args) & 0xff)
+
+#endif	/* CONCURRENT */
+
+#define STK_CHK(liveness_mask,a_headroom,b_headroom,spa,spb,ret_type,reenter)\
+do {								\
+  DO_ASTK_HWM(); /* ticky-ticky profiling */			\
+  DO_BSTK_HWM();						\
+  /* SET_ACTIVITY(ACT_STK_CHK); /? SPAT counting -- no, using page faulting */	\
+  if (STKS_OVERFLOW_OP((a_headroom) + 1, (b_headroom) + 1)) {	\
+    STACK_OVERFLOW(liveness_mask,a_headroom,b_headroom,spa,spb,ret_type,reenter);\
+  }								\
+}while(0)
+\end{code}
+
+%************************************************************************
+%*									*
+\subsubsection[StgMacros-arity-chks]{Arity checks (for debugging)}
+%*									*
+%************************************************************************
+
+This is a debugging feature.  Each call to fast-entry-point code sets
+@ExpectedArity@ to some value, and the callee then checks that the
+value is as expected.
+
+\begin{code}
+#if defined(__DO_ARITY_CHKS__)
+
+extern I_			 ExpectedArity;
+extern void ArityError PROTO((I_)) STG_NORETURN;
+
+#define SET_ARITY(n) do { ExpectedArity = (n); } while(0)
+#define CHK_ARITY(n)			\
+	do {				\
+	if (ExpectedArity != (n)) {	\
+	    ULTRASAFESTGCALL1(void,(void *, I_),ArityError,n);	\
+	}}while(0)
+
+#else /* ! __DO_ARITY_CHKS__: normal case */
+
+#define SET_ARITY(n) /* nothing */
+#define CHK_ARITY(n) /* nothing */
+
+#endif /* ! __DO_ARITY_CHKS__ */
+\end{code}
+
+%************************************************************************
+%*									*
+\subsubsection[StgMacros-heap-chks]{Heap-overflow checks}
+%*									*
+%************************************************************************
+
+Please see the general discussion/commentary about ``what really
+happens in a GC,'' in \tr{SMinterface.lh}.
+
+\begin{code}
+extern void PerformGC PROTO((W_));
+void RealPerformGC PROTO((W_ liveness, W_ reqsize, W_  always_reenter_node, rtsBool do_full_collection));
+void checkInCCallGC(STG_NO_ARGS);
+
+#ifndef PAR
+void StgPerformGarbageCollection(STG_NO_ARGS);
+#endif
+
+#ifndef CONCURRENT
+
+#define OR_MSG_PENDING	/* never */
+
+#define HEAP_OVERFLOW(liveness,n,reenter)	\
+    do {					\
+    SET_ACTIVITY(ACT_GC); /* SPAT profiling */	\
+    DO_GC((((W_)n)<<8)|(liveness));		\
+    SET_ACTIVITY(ACT_GC_STOP);			\
+    } while (0)
+
+#define REQSIZE_BITMASK	((1L << ((BITS_IN(W_) - 8 + 1))) - 1)
+#define HEAP_OVERFLOW_REQSIZE(args)	(((args) >> 8) & REQSIZE_BITMASK)
+#define HEAP_OVERFLOW_REENTER(args)	0
+#define HEAP_OVERFLOW_LIVENESS(args)	((args) & 0xff)
+
+#else /* CONCURRENT */
+
+extern void ReallyPerformThreadGC PROTO((W_, rtsBool));
+
+#define HEAP_OVERFLOW(liveness,n,reenter) 	\
+    do {					\
+    SET_ACTIVITY(ACT_GC); /* SPAT profiling */	\
+    DO_GC((((W_)(n))<<9)|((reenter)<<8)|(liveness)); \
+    SET_ACTIVITY(ACT_GC_STOP);			\
+    } while (0)
+
+#define REQSIZE_BITMASK	((1L << ((BITS_IN(W_) - 9 + 1))) - 1)
+#define HEAP_OVERFLOW_REQSIZE(args)	(((args) >> 9) & REQSIZE_BITMASK)
+#define HEAP_OVERFLOW_REENTER(args)	(((args) >> 8) & 0x1)
+#define HEAP_OVERFLOW_LIVENESS(args)	((args) & 0xff)
+
+#ifndef PAR
+
+#define OR_MSG_PENDING	/* never */
+
+#else 
+
+extern int PacketsWaiting;		/*Probes for incoming messages*/
+extern int heapChkCounter;		/*Not currently used! We check for messages when*/
+					/*a thread is resheduled PWT*/
+/* #define OR_MSG_PENDING	|| (--heapChkCounter == 0 && PacketsWaiting())*/
+#define OR_MSG_PENDING	/* never */
+
+#endif	/* PAR */
+#endif	/* CONCURRENT */
+
+#if 0 /* alpha_TARGET_ARCH */
+#define CACHE_LINE  4	/* words */
+#define LINES_AHEAD 3
+#define PRE_FETCH(n)					\
+do {							\
+ StgInt j;						\
+ j = ((STG_VOLATILE StgInt *) Hp)[LINES_AHEAD * CACHE_LINE];	\
+} while(0);
+#define EXTRA_HEAP_WORDS (CACHE_LINE * LINES_AHEAD)
+#else
+#define PRE_FETCH(reg)
+#define EXTRA_HEAP_WORDS 0
+#endif
+
+#if defined(GRAN)
+#define HEAP_CHK(liveness_mask,n,reenter)			\
+	do {							\
+	/* TICKY_PARANOIA(__FILE__, __LINE__); */		\
+	/* THREAD_CONTEXT_SWITCH(liveness_mask,reenter); */	        \
+	ALLOC_HEAP(n); /* ticky profiling */			\
+        GRAN_ALLOC_HEAP(n,liveness_mask); /* Granularity Simulation */ \
+	SET_ACTIVITY(ACT_HEAP_CHK); /* SPAT counting */		\
+	if (((Hp = Hp + (n)) > HpLim)) {			\
+	    /* Old:  STGCALL3_GC(PerformGC,liveness_mask,n,StgFalse); */\
+	    HEAP_OVERFLOW(liveness_mask,n,StgFalse); \
+	}							\
+	SET_ACTIVITY(ACT_REDN); /* back to normal reduction */	\
+	}while(0)
+
+#else
+
+#define HEAP_CHK(liveness_mask,n,reenter)		\
+do {							\
+  /* TICKY_PARANOIA(__FILE__, __LINE__); */		\
+  PRE_FETCH(n);						\
+  ALLOC_HEAP(n); /* ticky profiling */			\
+  SET_ACTIVITY(ACT_HEAP_CHK); /* SPAT counting */	\
+  if (((Hp = Hp + (n)) > HpLim) OR_INTERVAL_EXPIRED OR_CONTEXT_SWITCH OR_MSG_PENDING) {	\
+    HEAP_OVERFLOW(liveness_mask,n,reenter);		\
+  }							\
+} while(0)
+
+#endif  /* GRAN */
+
+#ifdef CONCURRENT
+
+#define HEAP_CHK_AND_RESTORE_N(liveness_mask,n,reenter)	\
+do {							\
+  /* TICKY_PARANOIA(__FILE__, __LINE__); */		\
+  PRE_FETCH(n);						\
+  ALLOC_HEAP(n); /* ticky profiling */			\
+  SET_ACTIVITY(ACT_HEAP_CHK); /* SPAT counting */	\
+  if (((Hp = Hp + (n)) > HpLim) OR_INTERVAL_EXPIRED OR_CONTEXT_SWITCH OR_MSG_PENDING) {	\
+    HEAP_OVERFLOW(liveness_mask,n,reenter);		\
+    n = TSO_ARG1(CurrentTSO);				\
+  }							\
+  SET_ACTIVITY(ACT_REDN); /* back to normal reduction */\
+} while(0)
+
+#else
+
+#define HEAP_CHK_AND_RESTORE_N(liveness_mask,n,reenter)	    \
+    HEAP_CHK(liveness_mask,n,reenter)
+
+#endif
+
+\end{code}
+
+
+%************************************************************************
+%*									*
+\subsection[StgMacros-prim-ops]{Primitive operations}
+%*									*
+%************************************************************************
+
+One thing to be {\em very careful about} with these macros that assign
+to results is that the assignment must come {\em last}.	 Some of the
+other arguments may be in terms of addressing modes that get clobbered
+by the assignment.  (Dirty imperative programming RULES!)
+
+The order here is roughly that in \tr{compiler/prelude/PrimOps.lhs}.
+
+%************************************************************************
+%*									*
+\subsubsection[StgMacros-compare-primops]{Primitive comparison ops on basic types}
+%*									*
+%************************************************************************
+
+We cast the chars in case one of them is a literal (so C things work right
+even for 8-bit chars).
+\begin{code}
+#define gtCharZh(r,a,b)	r=(I_)((a)> (b))
+#define geCharZh(r,a,b)	r=(I_)((a)>=(b))
+#define eqCharZh(r,a,b)	r=(I_)((a)==(b))
+#define neCharZh(r,a,b)	r=(I_)((a)!=(b))
+#define ltCharZh(r,a,b)	r=(I_)((a)< (b))
+#define leCharZh(r,a,b)	r=(I_)((a)<=(b))
+
+#define gtIntZh(r,a,b)	r=(I_)((a) >(b))
+#define geIntZh(r,a,b)	r=(I_)((a)>=(b))
+#define eqIntZh(r,a,b)	r=(I_)((a)==(b))
+#define neIntZh(r,a,b)	r=(I_)((a)!=(b))
+#define ltIntZh(r,a,b)	r=(I_)((a) <(b))
+#define leIntZh(r,a,b)	r=(I_)((a)<=(b))
+
+#define gtWordZh(r,a,b)	r=(I_)((a) >(b))
+#define geWordZh(r,a,b)	r=(I_)((a)>=(b))
+#define eqWordZh(r,a,b)	r=(I_)((a)==(b))
+#define neWordZh(r,a,b)	r=(I_)((a)!=(b))
+#define ltWordZh(r,a,b)	r=(I_)((a) <(b))
+#define leWordZh(r,a,b)	r=(I_)((a)<=(b))
+
+#define gtAddrZh(r,a,b)	r=(I_)((a) >(b))
+#define geAddrZh(r,a,b)	r=(I_)((a)>=(b))
+#define eqAddrZh(r,a,b)	r=(I_)((a)==(b))
+#define neAddrZh(r,a,b)	r=(I_)((a)!=(b))
+#define ltAddrZh(r,a,b)	r=(I_)((a) <(b))
+#define leAddrZh(r,a,b)	r=(I_)((a)<=(b))
+
+#define gtFloatZh(r,a,b)  r=(I_)((a)> (b))
+#define geFloatZh(r,a,b)  r=(I_)((a)>=(b))
+#define eqFloatZh(r,a,b)  r=(I_)((a)==(b))
+#define neFloatZh(r,a,b)  r=(I_)((a)!=(b))
+#define ltFloatZh(r,a,b)  r=(I_)((a)< (b))
+#define leFloatZh(r,a,b)  r=(I_)((a)<=(b))
+
+#define gtDoubleZh(r,a,b) r=(I_)((a)> (b))
+#define geDoubleZh(r,a,b) r=(I_)((a)>=(b))
+#define eqDoubleZh(r,a,b) r=(I_)((a)==(b))
+#define neDoubleZh(r,a,b) r=(I_)((a)!=(b))
+#define ltDoubleZh(r,a,b) r=(I_)((a)< (b))
+#define leDoubleZh(r,a,b) r=(I_)((a)<=(b))
+\end{code}
+
+%************************************************************************
+%*									*
+\subsubsection[StgMacros-char-primops]{Primitive @Char#@ ops (and @LitString#@ish things, too)}
+%*									*
+%************************************************************************
+
+We cast the chars in case one of them is a literal (so C things work right
+even for 8-bit chars).
+\begin{code}
+#define ordZh(r,a)	r=(I_)((W_) (a))
+#define chrZh(r,a)	r=(StgChar)((W_)(a))
+\end{code}
+
+%************************************************************************
+%*									*
+\subsubsection[StgMacros-int-primops]{Primitive @Int#@ ops}
+%*									*
+%************************************************************************
+
+\begin{code}
+I_ stg_div PROTO((I_ a, I_ b));
+
+#define plusIntZh(r,a,b)	r=(a)+(b)
+#define minusIntZh(r,a,b)	r=(a)-(b)
+#define timesIntZh(r,a,b)	r=(a)*(b)
+#define quotIntZh(r,a,b)	r=(a)/(b)
+#define divIntZh(r,a,b)		r=ULTRASAFESTGCALL2(I_,(void *, I_, I_),stg_div,(a),(b))
+#define remIntZh(r,a,b)		r=(a)%(b)
+#define negateIntZh(r,a)	r=-(a)
+\end{code}
+
+%************************************************************************
+%*									*
+\subsubsection[StgMacros-word-primops]{Primitive @Word#@ ops}
+%*									*
+%************************************************************************
+
+\begin{code}
+#define andZh(r,a,b)	r=(a)&(b)
+#define orZh(r,a,b)	r=(a)|(b)
+#define notZh(r,a)	r=~(a)
+
+#define shiftLZh(r,a,b)	  r=(a)<<(b)
+#define shiftRAZh(r,a,b)  r=(a)>>(b)
+#define shiftRLZh(r,a,b)  r=(a)>>(b)
+#define iShiftLZh(r,a,b)  r=(a)<<(b)
+#define iShiftRAZh(r,a,b) r=(a)>>(b)
+#define iShiftRLZh(r,a,b) r=(a)>>(b)
+
+#define int2WordZh(r,a) r=(W_)(a)
+#define word2IntZh(r,a) r=(I_)(a)
+\end{code}
+
+%************************************************************************
+%*									*
+\subsubsection[StgMacros-addr-primops]{Primitive @Addr#@ ops}
+%*									*
+%************************************************************************
+
+\begin{code}
+#define int2AddrZh(r,a) r=(A_)(a)
+#define addr2IntZh(r,a) r=(I_)(a)
+\end{code}
+
+%************************************************************************
+%*									*
+\subsubsection[StgMacros-float-primops]{Primitive @Float#@ ops}
+%*									*
+%************************************************************************
+
+\begin{code}
+#define plusFloatZh(r,a,b)	r=(a)+(b)
+#define minusFloatZh(r,a,b)	r=(a)-(b)
+#define timesFloatZh(r,a,b)	r=(a)*(b)
+#define divideFloatZh(r,a,b)	r=(a)/(b)
+#define negateFloatZh(r,a)	r=-(a)
+
+#define int2FloatZh(r,a)	r=(StgFloat)(a)
+#define float2IntZh(r,a)	r=(I_)(a)
+
+#define expFloatZh(r,a)		r=(StgFloat) SAFESTGCALL1(StgDouble,(void *, StgDouble),exp,a)
+#define logFloatZh(r,a)		r=(StgFloat) SAFESTGCALL1(StgDouble,(void *, StgDouble),log,a)
+#define sqrtFloatZh(r,a)	r=(StgFloat) SAFESTGCALL1(StgDouble,(void *, StgDouble),sqrt,a)
+#define sinFloatZh(r,a)		r=(StgFloat) SAFESTGCALL1(StgDouble,(void *, StgDouble),sin,a)
+#define cosFloatZh(r,a)		r=(StgFloat) SAFESTGCALL1(StgDouble,(void *, StgDouble),cos,a)
+#define tanFloatZh(r,a)		r=(StgFloat) SAFESTGCALL1(StgDouble,(void *, StgDouble),tan,a)
+#define asinFloatZh(r,a)	r=(StgFloat) SAFESTGCALL1(StgDouble,(void *, StgDouble),asin,a)
+#define acosFloatZh(r,a)	r=(StgFloat) SAFESTGCALL1(StgDouble,(void *, StgDouble),acos,a)
+#define atanFloatZh(r,a)	r=(StgFloat) SAFESTGCALL1(StgDouble,(void *, StgDouble),atan,a)
+#define sinhFloatZh(r,a)	r=(StgFloat) SAFESTGCALL1(StgDouble,(void *, StgDouble),sinh,a)
+#define coshFloatZh(r,a)	r=(StgFloat) SAFESTGCALL1(StgDouble,(void *, StgDouble),cosh,a)
+#define tanhFloatZh(r,a)	r=(StgFloat) SAFESTGCALL1(StgDouble,(void *, StgDouble),tanh,a)
+#define powerFloatZh(r,a,b)	r=(StgFloat) SAFESTGCALL2(StgDouble,(void *, StgDouble,StgDouble),pow,a,b)
+
+/* encoding/decoding given w/ Integer stuff */
+\end{code}
+
+%************************************************************************
+%*									*
+\subsubsection[StgMacros-double-primops]{Primitive @Double#@ ops}
+%*									*
+%************************************************************************
+
+\begin{code}
+#define plusDoubleZh(r,a,b)	r=(a)+(b)
+#define minusDoubleZh(r,a,b)	r=(a)-(b)
+#define timesDoubleZh(r,a,b)	r=(a)*(b)
+#define divideDoubleZh(r,a,b)	r=(a)/(b)
+#define negateDoubleZh(r,a)	r=-(a)
+
+#define int2DoubleZh(r,a)	r=(StgDouble)(a)
+#define double2IntZh(r,a)	r=(I_)(a)
+
+#define float2DoubleZh(r,a)	r=(StgDouble)(a)
+#define double2FloatZh(r,a)	r=(StgFloat)(a)
+
+#define expDoubleZh(r,a)	r=(StgDouble) SAFESTGCALL1(StgDouble,(void *, StgDouble),exp,a)
+#define logDoubleZh(r,a)	r=(StgDouble) SAFESTGCALL1(StgDouble,(void *, StgDouble),log,a)
+#define sqrtDoubleZh(r,a)	r=(StgDouble) SAFESTGCALL1(StgDouble,(void *, StgDouble),sqrt,a)
+#define sinDoubleZh(r,a)	r=(StgDouble) SAFESTGCALL1(StgDouble,(void *, StgDouble),sin,a)
+#define cosDoubleZh(r,a)	r=(StgDouble) SAFESTGCALL1(StgDouble,(void *, StgDouble),cos,a)
+#define tanDoubleZh(r,a)	r=(StgDouble) SAFESTGCALL1(StgDouble,(void *, StgDouble),tan,a)
+#define asinDoubleZh(r,a)	r=(StgDouble) SAFESTGCALL1(StgDouble,(void *, StgDouble),asin,a)
+#define acosDoubleZh(r,a)	r=(StgDouble) SAFESTGCALL1(StgDouble,(void *, StgDouble),acos,a)
+#define atanDoubleZh(r,a)	r=(StgDouble) SAFESTGCALL1(StgDouble,(void *, StgDouble),atan,a)
+#define sinhDoubleZh(r,a)	r=(StgDouble) SAFESTGCALL1(StgDouble,(void *, StgDouble),sinh,a)
+#define coshDoubleZh(r,a)	r=(StgDouble) SAFESTGCALL1(StgDouble,(void *, StgDouble),cosh,a)
+#define tanhDoubleZh(r,a)	r=(StgDouble) SAFESTGCALL1(StgDouble,(void *, StgDouble),tanh,a)
+#define powerDoubleZh(r,a,b)	r=(StgDouble) SAFESTGCALL2(StgDouble,(void *, StgDouble,StgDouble),pow,a,b)
+\end{code}
+
+%************************************************************************
+%*									*
+\subsubsection[StgMacros-integer-primops]{Primitive @Integer@-related ops (GMP stuff)}
+%*									*
+%************************************************************************
+
+Dirty macros we use for the real business.
+
+INVARIANT: When one of these macros is called, the only live data is
+tidily on the STG stacks or in the STG registers (the code generator
+ensures this).	If there are any pointer-arguments, they will be in
+the first \tr{Ret*} registers (e.g., \tr{da} arg of \tr{gmpTake1Return1}).
+
+OK, here are the real macros:
+\begin{code}
+#define gmpTake1Return1(size_chk_macro, liveness, mpz_op, ar,sr,dr, aa,sa,da)	\
+{ MP_INT arg;									\
+  MP_INT result;								\
+  I_ space = size_chk_macro(sa);						\
+										\
+  /* Check that there will be enough heap & make Hp visible to GMP allocator */	\
+  GMP_HEAP_LOOKAHEAD(liveness,space);						\
+										\
+  /* Now we can initialise (post possible GC) */				\
+  arg.alloc	= (aa);								\
+  arg.size	= (sa);								\
+  arg.d		= (unsigned long int *) (BYTE_ARR_CTS(da));			\
+										\
+  SAFESTGCALL1(void,(void *, MP_INT *),mpz_init,&result);			\
+										\
+  /* Perform the operation */							\
+  SAFESTGCALL2(void,(void *, MP_INT *, MP_INT *),mpz_op,&result,&arg);		\
+										\
+  GMP_HEAP_HANDBACK();		/* restore Hp */				\
+  (ar) = result.alloc;								\
+  (sr) = result.size;								\
+  (dr) = (B_) (result.d - DATA_HS);						\
+  /* pt to *beginning* of object (GMP has been monkeying around in the middle) */ \
+}
+
+
+#define gmpTake2Return1(size_chk_macro, liveness, mpz_op, ar,sr,dr, a1,s1,d1, a2,s2,d2)\
+{ MP_INT arg1;									\
+  MP_INT arg2;									\
+  MP_INT result;								\
+  I_ space = size_chk_macro(s1,s2);						\
+										\
+  /* Check that there will be enough heap & make Hp visible to GMP allocator */	\
+  GMP_HEAP_LOOKAHEAD(liveness,space);						\
+										\
+  /* Now we can initialise (post possible GC) */				\
+  arg1.alloc	= (a1);								\
+  arg1.size	= (s1);								\
+  arg1.d	= (unsigned long int *) (BYTE_ARR_CTS(d1));			\
+  arg2.alloc	= (a2);								\
+  arg2.size	= (s2);								\
+  arg2.d	= (unsigned long int *) (BYTE_ARR_CTS(d2));			\
+										\
+  SAFESTGCALL1(void,(void *, MP_INT *),mpz_init,&result);			\
+										\
+  /* Perform the operation */							\
+  SAFESTGCALL3(void,(void *, MP_INT *, MP_INT *, MP_INT *),mpz_op,&result,&arg1,&arg2); \
+										\
+  GMP_HEAP_HANDBACK();		/* restore Hp */				\
+  (ar) = result.alloc;								\
+  (sr) = result.size;								\
+  (dr) = (B_) (result.d - DATA_HS);						\
+  /* pt to *beginning* of object (GMP has been monkeying around in the middle) */ \
+}
+
+#define gmpTake2Return2(size_chk_macro, liveness, mpz_op, ar1,sr1,dr1, ar2,sr2,dr2, a1,s1,d1, a2,s2,d2) \
+{ MP_INT arg1;									\
+  MP_INT arg2;									\
+  MP_INT result1;								\
+  MP_INT result2;								\
+  I_ space = size_chk_macro(s1,s2);						\
+										\
+  /* Check that there will be enough heap & make Hp visible to GMP allocator */	\
+  GMP_HEAP_LOOKAHEAD(liveness,space);						\
+										\
+  /* Now we can initialise (post possible GC) */				\
+  arg1.alloc	= (a1);								\
+  arg1.size	= (s1);								\
+  arg1.d	= (unsigned long int *) (BYTE_ARR_CTS(d1));			\
+  arg2.alloc	= (a2);								\
+  arg2.size	= (s2);								\
+  arg2.d	= (unsigned long int *) (BYTE_ARR_CTS(d2));			\
+										\
+  SAFESTGCALL1(void,(void *, MP_INT *),mpz_init,&result1);			\
+  SAFESTGCALL1(void,(void *, MP_INT *),mpz_init,&result2);			\
+										\
+  /* Perform the operation */							\
+  SAFESTGCALL4(void,(void *, MP_INT *, MP_INT *, MP_INT *, MP_INT *),mpz_op,&result1,&result2,&arg1,&arg2); \
+										\
+  GMP_HEAP_HANDBACK();		/* restore Hp */				\
+  (ar1) = result1.alloc;							\
+  (sr1) = result1.size;								\
+  (dr1) = (B_) (result1.d - DATA_HS);						\
+  (ar2) = result2.alloc;							\
+  (sr2) = result2.size;								\
+  (dr2) = (B_) (result2.d - DATA_HS);						\
+}
+\end{code}
+
+Some handy size-munging macros: sometimes gratuitously {\em conservative}.
+The \tr{+16} is to allow for the initial allocation of \tr{MP_INT} results.
+The \tr{__abs} stuff is because negative-ness of GMP things is encoded
+in their ``size''...
+\begin{code}
+#define __abs(a)		(( (a) >= 0 ) ? (a) : (-(a)))
+#define GMP_SIZE_ONE()		(2 + DATA_HS + 16)
+#define GMP_SAME_SIZE(a)	(__abs(a) + DATA_HS + 16)
+#define GMP_MAX_SIZE(a,b)	((__abs(a) > __abs(b) ? __abs(a) : __abs(b)) + 1 + DATA_HS + 16)
+				/* NB: the +1 is for the carry (or whatever) */
+#define GMP_2MAX_SIZE(a,b)	(2 * GMP_MAX_SIZE(a,b))
+#define GMP_ADD_SIZES(a,b)	(__abs(a) + __abs(b) + 1 + DATA_HS + 16)
+				/* the +1 may just be paranoia */
+\end{code}
+
+For the Integer/GMP stuff, we have macros that {\em look ahead} for
+some space, but don't actually grab it.
+
+If there are live pointers at the time of the lookahead, the caller
+must make sure they are in \tr{Ret1}, \tr{Ret2}, ..., so they can be
+handled normally.  We achieve this by having the code generator {\em
+always} pass args to may-invoke-GC primitives in registers, using the
+normal pointers-first policy.  This means that, if we do go to garbage
+collection, everything is already in the Right Place.
+
+Saving and restoring Hp register so the MP allocator can see them. If we are
+performing liftime profiling need to save and restore HpLim as well so that
+it can be bumped if allocation occurs.
+
+The second argument to @GMP_HEAP_LOOKAHEAD@ must be an lvalue so that
+it can be restored from @TSO_ARG1@ after a failed @HEAP_CHK@ in
+threaded land.
+
+\begin{code}
+#define GMP_HEAP_LOOKAHEAD(liveness,n)			\
+	do {						\
+	HEAP_CHK_AND_RESTORE_N(liveness,n,0);		\
+	Hp = Hp - (n);					\
+	UN_ALLOC_HEAP(n);	/* Undo ticky-ticky */	\
+	SAVE_Hp = Hp;		/* Hand over the hp */	\
+	DEBUG_SetGMPAllocBudget(n)			\
+	OptSaveHpLimRegister()				\
+	}while(0)
+
+#define GMP_HEAP_HANDBACK()				\
+	Hp = SAVE_Hp;					\
+	DEBUG_ResetGMPAllocBudget()			\
+	OptRestoreHpLimRegister()
+\end{code}
+
+\begin{code}
+void *stgAllocForGMP   PROTO((size_t size_in_bytes));
+void *stgReallocForGMP PROTO((void *ptr, size_t old_size, size_t new_size));
+void stgDeallocForGMP  PROTO((void *ptr, size_t size));
+
+#ifdef ALLOC_DEBUG
+extern StgInt DEBUG_GMPAllocBudget;
+#define DEBUG_SetGMPAllocBudget(n)  DEBUG_GMPAllocBudget = (n);
+#define DEBUG_ResetGMPAllocBudget() DEBUG_GMPAllocBudget = 0;
+#else
+#define DEBUG_SetGMPAllocBudget(n)  /*nothing*/
+#define DEBUG_ResetGMPAllocBudget() /*nothing*/
+#endif
+\end{code}
+
+\begin{code}
+#if defined (LIFE_PROFILE)
+
+#define OptSaveHpLimRegister() \
+	SAVE_HpLim = HpLim
+#define OptRestoreHpLimRegister() \
+	HpLim = SAVE_HpLim
+
+#else  /* ! LIFE_PROFILE */
+
+#define OptSaveHpLimRegister()	    /* nothing */
+#define OptRestoreHpLimRegister()   /* nothing */
+
+#endif /* ! LIFE_PROFILE */
+\end{code}
+
+The real business (defining Integer primops):
+\begin{code}
+#define negateIntegerZh(ar,sr,dr, liveness, aa,sa,da) \
+	gmpTake1Return1(GMP_SAME_SIZE, liveness, mpz_neg, ar,sr,dr, aa,sa,da)
+
+#define plusIntegerZh(ar,sr,dr, liveness, a1,s1,d1, a2,s2,d2) \
+	gmpTake2Return1(GMP_MAX_SIZE, liveness, mpz_add, ar,sr,dr, a1,s1,d1, a2,s2,d2)
+#define minusIntegerZh(ar,sr,dr, liveness, a1,s1,d1, a2,s2,d2) \
+	gmpTake2Return1(GMP_MAX_SIZE, liveness, mpz_sub, ar,sr,dr, a1,s1,d1, a2,s2,d2)
+#define timesIntegerZh(ar,sr,dr, liveness, a1,s1,d1, a2,s2,d2) \
+	gmpTake2Return1(GMP_ADD_SIZES, liveness, mpz_mul, ar,sr,dr, a1,s1,d1, a2,s2,d2)
+
+/* div, mod, quot, rem are defined w/ quotRem & divMod */
+
+#define quotRemIntegerZh(ar1,sr1,dr1, ar2,sr2,dr2, liveness, a1,s1,d1, a2,s2,d2) \
+	gmpTake2Return2(GMP_2MAX_SIZE, liveness, mpz_divmod, ar1,sr1,dr1, ar2,sr2,dr2, a1,s1,d1, a2,s2,d2)
+#define divModIntegerZh(ar1,sr1,dr1, ar2,sr2,dr2, liveness,  a1,s1,d1, a2,s2,d2) \
+	gmpTake2Return2(GMP_2MAX_SIZE, liveness, mpz_mdivmod, ar1,sr1,dr1, ar2,sr2,dr2, a1,s1,d1, a2,s2,d2)
+\end{code}
+
+Comparison ops (@<@, @>=@, etc.) are defined in terms of the cmp
+fellow (returns -ve, 0, or +ve).
+\begin{code}
+#define cmpIntegerZh(r, hp, a1,s1,d1, a2,s2,d2) /* calls mpz_cmp */		\
+{ MP_INT arg1;									\
+  MP_INT arg2;									\
+  /* Does not allocate memory */						\
+										\
+  arg1.alloc	= (a1);								\
+  arg1.size	= (s1);								\
+  arg1.d	= (unsigned long int *) (BYTE_ARR_CTS(d1));			\
+  arg2.alloc	= (a2);								\
+  arg2.size	= (s2);								\
+  arg2.d	= (unsigned long int *) (BYTE_ARR_CTS(d2));			\
+										\
+  (r) = SAFESTGCALL2(I_,(void *, MP_INT *, MP_INT *),mpz_cmp,&arg1,&arg2);	\
+}
+\end{code}
+
+Coercions:
+
+\begin{code}
+#define integer2IntZh(r, hp, aa,sa,da)						\
+{ MP_INT arg;									\
+  /* Does not allocate memory */						\
+										\
+  arg.alloc	= (aa);								\
+  arg.size	= (sa);								\
+  arg.d		= (unsigned long int *) (BYTE_ARR_CTS(da));			\
+										\
+  (r) = SAFESTGCALL1(I_,(void *, MP_INT *),mpz_get_si,&arg);			\
+}
+
+/* Since we're forced to know a little bit about MP_INT layout to do this with
+   pre-allocated heap, we just inline the whole of mpz_init_set_si here.
+	** DIRE WARNING.  if mpz_init_set_si changes, so does this! ***
+*/
+
+#define int2IntegerZh(ar,sr,dr, hp, i)						\
+{ StgInt val; /* to snaffle arg to avoid aliasing */				\
+										\
+  val = (i);  /* snaffle... */							\
+										\
+  SET_DATA_HDR((hp),ArrayOfData_info,CCC,DATA_VHS+MIN_MP_INT_SIZE,0);		\
+										\
+  if	  ((val) < 0) { (sr) = -1; (hp)[DATA_HS] = -(val); }			\
+  else if ((val) > 0) { (sr) =	1; (hp)[DATA_HS] =  (val); }			\
+  else /* val==0 */   { (sr) =	0; }						\
+  (ar) = 1;									\
+  (dr) = (B_)(hp);		/* dr is an StgByteArray */			\
+}
+
+#define word2IntegerZh(ar,sr,dr, hp, i)						\
+{ StgWord val; /* to snaffle arg to avoid aliasing */				\
+										\
+  val = (i);  /* snaffle... */							\
+										\
+  SET_DATA_HDR((hp),ArrayOfData_info,CCC,DATA_VHS+MIN_MP_INT_SIZE,0);		\
+										\
+  if ((val) != 0)     { (sr) =	1; (hp)[DATA_HS] =  (val); }			\
+  else /* val==0 */   { (sr) =	0; }						\
+  (ar) = 1;									\
+  (dr) = (B_)(hp);		/* dr is an StgByteArray */			\
+}
+
+\end{code}
+
+Then there are a few oddments to make life easier:
+\begin{code}
+/*
+   DIRE WARNING.
+   The "str" argument must be a literal C string.
+
+	addr2Integer( ..., "foo")   OK!
+
+	x = "foo";
+	addr2Integer( ..., x)	    NO! NO!
+*/
+
+#define addr2IntegerZh(ar,sr,dr, liveness, str)					\
+{ MP_INT result;								\
+  /* taking the number of bytes/8 as the number of words of lookahead		\
+     is plenty conservative */							\
+  I_ space = GMP_SAME_SIZE(sizeof(str) / 8 + 1);				\
+										\
+  GMP_HEAP_LOOKAHEAD(liveness, space);						\
+										\
+  /* Perform the operation */							\
+  if (SAFESTGCALL3(I_,(void *, MP_INT *, char *, int), mpz_init_set_str,&result,(str),/*base*/10)) \
+      abort();									\
+										\
+  GMP_HEAP_HANDBACK();		/* restore Hp */				\
+  (ar) = result.alloc;								\
+  (sr) = result.size;								\
+  (dr) = (B_) (result.d - DATA_HS);						\
+  /* pt to *beginning* of object (GMP has been monkeying around in the middle) */ \
+}
+\end{code}
+
+Encoding and decoding float-ish things is pretty Integer-ish.  We use
+these pretty magical support functions, essentially stolen from Lennart:
+\begin{code}
+StgFloat  __encodeFloat	 PROTO((MP_INT *, I_));
+void  __decodeFloat  PROTO((MP_INT * /*result1*/,
+				I_ * /*result2*/,
+				StgFloat));
+
+StgDouble __encodeDouble PROTO((MP_INT *, I_));
+void  __decodeDouble PROTO((MP_INT * /*result1*/,
+				I_ * /*result2*/,
+				StgDouble));
+\end{code}
+
+Some floating-point format info, made with the \tr{enquire} program
+(version~4.3) [comes with gcc].
+\begin{code}
+/* this should be done by CPU architecture, insofar as possible [WDP] */
+
+#if sparc_TARGET_ARCH	\
+ || alpha_TARGET_ARCH	\
+ || hppa1_1_TARGET_ARCH	\
+ || i386_TARGET_ARCH	\
+ || i486_TARGET_ARCH	\
+ || m68k_TARGET_ARCH	\
+ || mipsel_TARGET_ARCH	\
+ || mipseb_TARGET_ARCH	\
+ || rs6000_TARGET_ARCH
+
+/* yes, it is IEEE floating point */
+#include "ieee-flpt.h"
+
+#if alpha_dec_osf1_TARGET	\
+ || i386_TARGET_ARCH		\
+ || i486_TARGET_ARCH		\
+ || mipsel_TARGET_ARCH
+
+#undef BIGENDIAN /* little-endian weirdos... */
+#else
+#define BIGENDIAN 1
+#endif
+
+#else /* unknown floating-point format */
+
+******* ERROR *********** Any ideas about floating-point format?
+
+#endif /* unknown floating-point */
+\end{code}
+
+\begin{code}
+#if alpha_dec_osf1_TARGET
+#define encodeFloatZh(r, hp, aa,sa,da, expon)	encodeDoubleZh(r, hp, aa,sa,da, expon)
+#else
+#define encodeFloatZh(r, hp, aa,sa,da, expon)	\
+{ MP_INT arg;					\
+  /* Does not allocate memory */		\
+						\
+  arg.alloc	= aa;				\
+  arg.size	= sa;				\
+  arg.d		= (unsigned long int *) (BYTE_ARR_CTS(da)); \
+						\
+  r = SAFESTGCALL2(StgFloat,(void *, MP_INT *, I_), __encodeFloat,&arg,(expon));	\
+}
+#endif /* ! alpha */
+
+#define encodeDoubleZh(r, hp, aa,sa,da, expon)	\
+{ MP_INT arg;					\
+  /* Does not allocate memory */		\
+						\
+  arg.alloc	= aa;				\
+  arg.size	= sa;				\
+  arg.d		= (unsigned long int *) (BYTE_ARR_CTS(da)); \
+						\
+  r = SAFESTGCALL2(StgDouble,(void *, MP_INT *, I_), __encodeDouble,&arg,(expon));\
+}
+
+#if alpha_dec_osf1_TARGET
+#define decodeFloatZh(exponr, ar,sr,dr, hp, f)	decodeDoubleZh(exponr, ar,sr,dr, hp, f)
+#else
+#define decodeFloatZh(exponr, ar,sr,dr, hp, f)				\
+{ MP_INT mantissa;							\
+  I_ exponent;								\
+  StgFloat arg = (f);							\
+									\
+  /* Be prepared to tell Lennart-coded __decodeFloat	*/		\
+  /* where mantissa.d can be put (it does not care about the rest) */	\
+  SET_DATA_HDR(hp,ArrayOfData_info,CCC,DATA_VHS+MIN_MP_INT_SIZE,0);	\
+  mantissa.d = (hp) + DATA_HS;						\
+									\
+  /* Perform the operation */						\
+  SAFESTGCALL3(void,(void *, MP_INT *, I_ *, StgFloat),__decodeFloat,&mantissa,&exponent,arg);		\
+  exponr= exponent;							\
+  ar	= mantissa.alloc;						\
+  sr	= mantissa.size;						\
+  dr	= (B_)(hp);							\
+}
+#endif /* !alpha */
+
+#define decodeDoubleZh(exponr, ar,sr,dr, hp, f)				\
+{ MP_INT mantissa;							\
+  I_ exponent;								\
+  StgDouble arg = (f);							\
+									\
+  /* Be prepared to tell Lennart-coded __decodeDouble	*/		\
+  /* where mantissa.d can be put (it does not care about the rest) */	\
+  SET_DATA_HDR(hp,ArrayOfData_info,CCC,DATA_VHS+MIN_MP_INT_SIZE,0);	\
+  mantissa.d = (hp) + DATA_HS;						\
+									\
+  /* Perform the operation */						\
+  SAFESTGCALL3(void,(void *, MP_INT *, I_ *, StgDouble),__decodeDouble,&mantissa,&exponent,arg);		\
+  exponr= exponent;							\
+  ar	= mantissa.alloc;						\
+  sr	= mantissa.size;						\
+  dr	= (B_)(hp);							\
+}
+\end{code}
+
+%************************************************************************
+%*									*
+\subsubsection[StgMacros-mv-floats]{Moving floats and doubles around (e.g., to/from stacks)}
+%*									*
+%************************************************************************
+
+With GCC, we use magic non-standard inlining; for other compilers, we
+just use functions (see also \tr{runtime/prims/PrimArith.lc}).
+
+(The @OMIT_...@ is only used in compiling some of the RTS, none of
+which uses these anyway.)
+
+\begin{code}
+#if alpha_TARGET_ARCH	\
+ || i386_TARGET_ARCH	\
+ || i486_TARGET_ARCH	\
+ || m68k_TARGET_ARCH
+
+#define ASSIGN_FLT(dst, src) *(StgFloat *)(dst) = (src);
+#define PK_FLT(src) (*(StgFloat *)(src))
+
+#define ASSIGN_DBL(dst, src) *(StgDouble *)(dst) = (src);
+#define PK_DBL(src) (*(StgDouble *)(src))
+
+#else	/* not m68k || alpha || i[34]86 */
+
+/* Special handling for machines with troublesome alignment constraints */
+
+#define FLOAT_ALIGNMENT_TROUBLES    TRUE
+
+#if ! defined(__GNUC__) || ! defined(__STG_GCC_REGS__)
+
+extern void	    ASSIGN_DBL PROTO((W_ [], StgDouble));
+extern StgDouble    PK_DBL     PROTO((W_ []));
+extern void	    ASSIGN_FLT PROTO((W_ [], StgFloat));
+extern StgFloat	    PK_FLT     PROTO((W_ []));
+
+#else /* yes, its __GNUC__ && we really want them */
+
+#if sparc_TARGET_ARCH
+
+#define ASSIGN_FLT(dst, src) *(StgFloat *)(dst) = (src);
+#define PK_FLT(src) (*(StgFloat *)(src))
+
+#define ASSIGN_DBL(dst,src) \
+      __asm__("st %2,%0\n\tst %R2,%1" : "=m" (((P_)(dst))[0]), \
+	"=m" (((P_)(dst))[1]) : "f" (src));
+
+#define PK_DBL(src) \
+    ( { register double d; \
+      __asm__("ld %1,%0\n\tld %2,%R0" : "=f" (d) : \
+	"m" (((P_)(src))[0]), "m" (((P_)(src))[1])); d; \
+    } )
+
+#else /* ! sparc */
+
+extern STG_INLINE
+void
+ASSIGN_DBL(W_ p_dest[], StgDouble src)
+{
+    double_thing y;
+    y.d = src;
+    p_dest[0] = y.du.dhi;
+    p_dest[1] = y.du.dlo;
+}
+
+/* GCC also works with this version, but it generates
+   the same code as the previous one, and is not ANSI
+
+#define ASSIGN_DBL( p_dest, src ) \
+	*p_dest = ((double_thing) src).du.dhi; \
+	*(p_dest+1) = ((double_thing) src).du.dlo \
+*/
+
+extern STG_INLINE
+StgDouble
+PK_DBL(W_ p_src[])
+{
+    double_thing y;
+    y.du.dhi = p_src[0];
+    y.du.dlo = p_src[1];
+    return(y.d);
+}
+
+extern STG_INLINE
+void
+ASSIGN_FLT(W_ p_dest[], StgFloat src)
+{
+    float_thing y;
+    y.f = src;
+    *p_dest = y.fu;
+}
+
+extern STG_INLINE
+StgFloat
+PK_FLT(W_ p_src[])
+{
+    float_thing y;
+    y.fu = *p_src;
+    return(y.f);
+}
+
+#endif /* ! sparc */
+
+#endif /* __GNUC__ */
+
+#endif /* not __m68k__ */
+\end{code}
+
+%************************************************************************
+%*									*
+\subsubsection[StgMacros-array-primops]{Primitive arrays}
+%*									*
+%************************************************************************
+
+We regularly use this macro to fish the ``contents'' part
+out of a DATA or TUPLE closure, which is what is used for
+non-ptr and ptr arrays (respectively).
+
+BYTE_ARR_CTS returns a @C_ *@!
+
+We {\em ASSUME} we can use the same macro for both!!
+\begin{code}
+
+#ifdef DEBUG
+#define BYTE_ARR_CTS(a)					\
+ ({ ASSERT(INFO_PTR(a) == (W_) ArrayOfData_info);	\
+    ((C_ *) (((StgPtr) (a))+DATA_HS)); })
+#define PTRS_ARR_CTS(a)					\
+ ({ ASSERT((INFO_PTR(a) == (W_) ArrayOfPtrs_info)	\
+	|| (INFO_PTR(a) == (W_) ImMutArrayOfPtrs_info));\
+    ((a)+MUTUPLE_HS);} )
+#else
+#define BYTE_ARR_CTS(a)		((char *) (((StgPtr) (a))+DATA_HS))
+#define PTRS_ARR_CTS(a)		((a)+MUTUPLE_HS)
+#endif
+
+/* sigh */
+extern I_ genSymZh(STG_NO_ARGS);
+extern I_ resetGenSymZh(STG_NO_ARGS);
+extern I_ incSeqWorldZh(STG_NO_ARGS);
+
+/* sigh again: without these some (notably "float") willnae work */
+extern I_ long2bytes__	 PROTO((long,	unsigned char *));
+extern I_ int2bytes__	 PROTO((int,	unsigned char *));
+extern I_ short2bytes__	 PROTO((short,	unsigned char *));
+extern I_ float2bytes__	 PROTO((float,	unsigned char *));
+extern I_ double2bytes__ PROTO((double, unsigned char *));
+
+/* these may not be necessary; and they create warnings (WDP) */
+extern I_ bytes2long__	 PROTO((P_, I_ *));
+extern I_ bytes2int__	 PROTO((P_, I_ *));
+extern I_ bytes2short__	 PROTO((P_, I_ *));
+extern I_ bytes2float__	 PROTO((P_, StgFloat *));
+extern I_ bytes2double__ PROTO((P_, StgDouble *));
+
+extern I_ byteArrayHasNUL__ PROTO((const char *, I_));
+\end{code}
+
+OK, the easy ops first: (all except \tr{newArr*}:
+
+VERY IMPORTANT!	 The read/write/index primitive ops
+on @ByteArray#@s index the array using a {\em BYTE} offset, even
+if the thing begin gotten out is a multi-byte @Int#@, @Float#@ etc.
+This is because you might be trying to take apart a C struct, where
+the offset from the start of the struct isn't a multiple of the
+size of the thing you're getting.  Hence the @(char *)@ casts.
+
+In the case of messing with @StgAddrs@ (@A_@), which are really \tr{void *},
+we cast to @P_@, because you can't index off an uncast \tr{void *}.
+
+In the case of @Array#@ (which contain pointers), the offset is in units
+of one ptr (not bytes).
+
+\begin{code}
+#define sameMutableArrayZh(r,a,b)	r=(I_)((a)==(b))
+#define sameMutableByteArrayZh(r,a,b)	r=(I_)((B_)(a)==(B_)(b))
+
+#define readArrayZh(r,a,i)	 r=((PP_) PTRS_ARR_CTS(a))[(i)]
+
+#define readCharArrayZh(r,a,i)	 indexCharOffAddrZh(r,BYTE_ARR_CTS(a),i)
+#define readIntArrayZh(r,a,i)	 indexIntOffAddrZh(r,BYTE_ARR_CTS(a),i)
+#define readAddrArrayZh(r,a,i)	 indexAddrOffAddrZh(r,BYTE_ARR_CTS(a),i)
+#define readFloatArrayZh(r,a,i)	 indexFloatOffAddrZh(r,BYTE_ARR_CTS(a),i)
+#define readDoubleArrayZh(r,a,i) indexDoubleOffAddrZh(r,BYTE_ARR_CTS(a),i)
+
+/* result ("r") arg ignored in write macros! */
+#define writeArrayZh(a,i,v)	((PP_) PTRS_ARR_CTS(a))[(i)]=(v)
+
+#define writeCharArrayZh(a,i,v)	  ((C_ *)(BYTE_ARR_CTS(a)))[i] = (v)
+#define writeIntArrayZh(a,i,v)	  ((I_ *)(BYTE_ARR_CTS(a)))[i] = (v)
+#define writeAddrArrayZh(a,i,v)	  ((PP_)(BYTE_ARR_CTS(a)))[i] = (v)
+#define writeFloatArrayZh(a,i,v)  \
+	ASSIGN_FLT((P_) (((StgFloat *)(BYTE_ARR_CTS(a))) + i),v)
+#define writeDoubleArrayZh(a,i,v) \
+	ASSIGN_DBL((P_) (((StgDouble *)(BYTE_ARR_CTS(a))) + i),v)
+
+#define indexArrayZh(r,a,i)	  r=((PP_) PTRS_ARR_CTS(a))[(i)]
+
+#define indexCharArrayZh(r,a,i)	  indexCharOffAddrZh(r,BYTE_ARR_CTS(a),i)
+#define indexIntArrayZh(r,a,i)	  indexIntOffAddrZh(r,BYTE_ARR_CTS(a),i)
+#define indexAddrArrayZh(r,a,i)	  indexAddrOffAddrZh(r,BYTE_ARR_CTS(a),i)
+#define indexFloatArrayZh(r,a,i)  indexFloatOffAddrZh(r,BYTE_ARR_CTS(a),i)
+#define indexDoubleArrayZh(r,a,i) indexDoubleOffAddrZh(r,BYTE_ARR_CTS(a),i)
+
+#define indexCharOffAddrZh(r,a,i)   r= ((C_ *)(a))[i]
+#define indexIntOffAddrZh(r,a,i)    r= ((I_ *)(a))[i]
+#define indexAddrOffAddrZh(r,a,i)   r= ((PP_)(a))[i]
+#define indexFloatOffAddrZh(r,a,i)  r= PK_FLT((P_) (((StgFloat *)(a)) + i))
+#define indexDoubleOffAddrZh(r,a,i) r= PK_DBL((P_) (((StgDouble *)(a)) + i))
+
+/* Freezing arrays-of-ptrs requires changing an info table, for the
+   benefit of the generational collector.  It needs to scavenge mutable
+   objects, even if they are in old space.  When they become immutable,
+   they can be removed from this scavenge list.	 */
+#define unsafeFreezeArrayZh(r,a)				\
+	do {						\
+	P_ result;					\
+	result=(P_) (a);				\
+	FREEZE_MUT_HDR(result,ImMutArrayOfPtrs_info);	\
+	r = result;					\
+	}while(0)
+
+#define unsafeFreezeByteArrayZh(r,a)	r=(B_)(a)
+\end{code}
+
+Now the \tr{newArr*} ops:
+
+\begin{code}
+/*
+--------------------
+Will: ToDo: we need to find suitable places to put this comment, and the
+"in-general" one which follows.
+
+************ Nota Bene.	 The "n" in this macro is guaranteed to
+be a register, *not* (say) Node[1].  That means that it is guaranteed
+to survive GC, provided only that the register is kept unaltered.
+This is important, because "n" is used after the HEAP_CHK.
+
+In general, *all* parameters to these primitive-op macros are always
+registers.  (Will: For exactly *which* primitive-op macros is this guaranteed?
+Exactly those which can trigger GC?)
+------------------------
+
+NOTE: the above may now be OLD (WDP 94/02/10)
+*/
+\end{code}
+
+For char arrays, the size is in {\em BYTES}.
+
+\begin{code}
+#define newCharArrayZh(r,liveness,n)	newByteArray(r,liveness,(n) * sizeof(C_))
+#define newIntArrayZh(r,liveness,n)	newByteArray(r,liveness,(n) * sizeof(I_))
+#define newAddrArrayZh(r,liveness,n)	newByteArray(r,liveness,(n) * sizeof(P_))
+#define newFloatArrayZh(r,liveness,n)	newByteArray(r,liveness,(n) * sizeof(StgFloat))
+#define newDoubleArrayZh(r,liveness,n)	newByteArray(r,liveness,(n) * sizeof(StgDouble))
+
+#define newByteArray(r,liveness,n)				\
+{								\
+  P_ result;							\
+  I_ size;							\
+								\
+  HEAP_CHK(liveness,DATA_HS+BYTES_TO_STGWORDS(n),0);		\
+  size = BYTES_TO_STGWORDS(n);					\
+  ALLOC_PRIM(DATA_HS,size,0,DATA_HS+size) /* ticky ticky */;	\
+  CC_ALLOC(CCC,DATA_HS+size,ARR_K);				\
+								\
+  result = Hp-(DATA_HS+size)+1;					\
+  SET_DATA_HDR(result,ArrayOfData_info,CCC,DATA_VHS+size,0);	\
+  r = (B_) result;						\
+}
+\end{code}
+
+Arrays of pointers need to be initialised; uses \tr{TUPLES}!
+The initialisation value is guaranteed to be in a register,
+and will be indicated by the liveness mask, so it's ok to do
+a \tr{HEAP_CHK}, which may trigger GC.
+
+\begin{code}
+/* The new array initialization routine for the NCG */
+void newArrZh_init PROTO((P_ result, I_ n, P_ init));
+
+#define newArrayZh(r,liveness,n,init)			\
+{							\
+  P_ p;							\
+  P_ result;						\
+							\
+  HEAP_CHK(liveness, MUTUPLE_HS+(n),0);			\
+  ALLOC_PRIM(MUTUPLE_HS,(n),0,MUTUPLE_HS+(n)) /* ticky ticky */; \
+  CC_ALLOC(CCC,MUTUPLE_HS+(n),ARR_K); /* cc prof */	\
+							\
+  result = Hp + 1 - (MUTUPLE_HS+(n));			\
+  SET_MUTUPLE_HDR(result,ArrayOfPtrs_info,CCC,MUTUPLE_VHS+(n),0) \
+  for (p = result+MUTUPLE_HS; p < (result+MUTUPLE_HS+(n)); p++) { \
+	*p = (W_) (init);				\
+  }							\
+  SET_ACTIVITY(ACT_REDN); /* back to normal reduction */\
+							\
+  r = result;						\
+}
+\end{code}
+
+%************************************************************************
+%*									*
+\subsubsection[StgMacros-SynchVar-primops]{Synchronizing Variables PrimOps}
+%*									*
+%************************************************************************
+
+\begin{code}
+ED_(Nil_closure);
+
+#define newSynchVarZh(r, hp) 				\
+{							\
+  ALLOC_PRIM(MUTUPLE_HS,3,0,MUTUPLE_HS+3) /* ticky ticky */; \
+  CC_ALLOC(CCC,MUTUPLE_HS+3,ARR_K); /* cc prof */	\
+  SET_SVAR_HDR(hp,EmptySVar_info,CCC);			\
+  SVAR_HEAD(hp) = SVAR_TAIL(hp) = SVAR_VALUE(hp) = Nil_closure;	\
+  r = hp;						\
+}
+\end{code}
+
+\begin{code}
+#ifdef CONCURRENT
+
+extern void Yield PROTO((W_));
+
+#define takeMVarZh(r, liveness, node)    		\
+{							\
+  while (INFO_PTR(node) != (W_) FullSVar_info) {	\
+    if (SVAR_HEAD(node) == Nil_closure) 	    	\
+      SVAR_HEAD(node) = CurrentTSO; 	    	    	\
+    else    	    	    	    	    	    	\
+      TSO_LINK(SVAR_TAIL(node)) = CurrentTSO;	    	\
+    TSO_LINK(CurrentTSO) = (P_) Nil_closure;	    	\
+    SVAR_TAIL(node) = CurrentTSO;   	    	    	\
+    DO_YIELD(liveness << 1);				\
+  } 	    	    	    	    	    	    	\
+  SET_INFO_PTR(node, EmptySVar_info);			\
+  r = SVAR_VALUE(node);	    	    	    	    	\
+  SVAR_VALUE(node) = Nil_closure;   	    	    	\
+}
+
+#else
+
+#define takeMVarZh(r, liveness, node)			\
+{							\
+  if (INFO_PTR(node) != (W_) FullSVar_info) {		\
+    /* Don't wrap the calls; we're done with STG land */\
+    fflush(stdout); 	    	    	    	    	\
+    fprintf(stderr, "takeMVar#: MVar is empty.\n");    	\
+    EXIT(EXIT_FAILURE);	    	    	    	    	\
+  } 	    	    	    	    	    	    	\
+  SET_INFO_PTR(node, EmptySVar_info);			\
+  r = SVAR_VALUE(node);	    	    	    	    	\
+  SVAR_VALUE(node) = Nil_closure;   	    	    	\
+}
+
+#endif
+\end{code}
+
+\begin{code}
+#ifdef CONCURRENT
+
+#ifdef GRAN
+
+/* Only difference to the !GRAN def: RunnableThreadsHd has been replaced by */
+/* ThreadQueueHd i.e. the tso is added at the end of the thread queue on */
+/* the CurrentProc. This means we have an implicit context switch after */
+/* putMVar even if unfair scheduling is used in GranSim (default)!  -- HWL */
+
+#define putMVarZh(node, value)	    			\
+{							\
+  P_ tso;   	    	    	    	    	    	\
+  if (INFO_PTR(node) == (W_) FullSVar_info) {		\
+    /* Don't wrap the calls; we're done with STG land */\
+    fflush(stdout); 	    	    	    	    	\
+    fprintf(stderr, "putMVar#: MVar already full.\n");	\
+    EXIT(EXIT_FAILURE);	    	    	    	    	\
+  } 	    	    	    	    	    	    	\
+  SET_INFO_PTR(node, FullSVar_info);			\
+  SVAR_VALUE(node) = value; 	    	    	    	\
+  tso = SVAR_HEAD(node);    	    	    	    	\
+  if (tso != (P_) Nil_closure) {    	    	    	\
+    if (DO_QP_PROF) 	    	    	    	    	\
+      STGCALL3(void,(void *, char *, P_, P_),QP_Event2,do_qp_prof > 1 ? "RA" : "RG",tso,CurrentTSO);	\
+    if (ThreadQueueHd == Nil_closure)   	    	\
+      ThreadQueueHd = tso;    	    	    	\
+    else    	    	    	    	    	    	\
+      TSO_LINK(ThreadQueueTl) = tso;  	    	\
+    ThreadQueueTl = tso;	    	    	    	\
+    SVAR_HEAD(node) = TSO_LINK(tso);  	    	    	\
+    TSO_LINK(tso) = (P_) Nil_closure; 	    	    	\
+    if(SVAR_HEAD(node) == (P_) Nil_closure)   	    	\
+      SVAR_TAIL(node) = (P_) Nil_closure;	    	\
+  } 	    	    	    	    	    	    	\
+}
+
+#else /* !GRAN */
+
+#define putMVarZh(node, value)	    			\
+{							\
+  P_ tso;   	    	    	    	    	    	\
+  if (INFO_PTR(node) == (W_) FullSVar_info) {		\
+    /* Don't wrap the calls; we're done with STG land */\
+    fflush(stdout); 	    	    	    	    	\
+    fprintf(stderr, "putMVar#: MVar already full.\n");	\
+    EXIT(EXIT_FAILURE);	    	    	    	    	\
+  } 	    	    	    	    	    	    	\
+  SET_INFO_PTR(node, FullSVar_info);			\
+  SVAR_VALUE(node) = value; 	    	    	    	\
+  tso = SVAR_HEAD(node);    	    	    	    	\
+  if (tso != (P_) Nil_closure) {    	    	    	\
+    if (DO_QP_PROF) 	    	    	    	    	\
+      STGCALL3(void,(void *, char *, P_, P_),QP_Event2,do_qp_prof > 1 ? "RA" : "RG",tso,CurrentTSO);	\
+    if (RunnableThreadsHd == Nil_closure)   	    	\
+      RunnableThreadsHd = tso;    	    	    	\
+    else    	    	    	    	    	    	\
+      TSO_LINK(RunnableThreadsTl) = tso;  	    	\
+    RunnableThreadsTl = tso;	    	    	    	\
+    SVAR_HEAD(node) = TSO_LINK(tso);  	    	    	\
+    TSO_LINK(tso) = (P_) Nil_closure; 	    	    	\
+    if(SVAR_HEAD(node) == (P_) Nil_closure)   	    	\
+      SVAR_TAIL(node) = (P_) Nil_closure;	    	\
+  } 	    	    	    	    	    	    	\
+}
+
+#endif  /* GRAN */
+
+#else
+
+#define putMVarZh(node, value)    			\
+{							\
+  P_ tso;   	    	    	    	    	    	\
+  if (INFO_PTR(node) == (W_) FullSVar_info) {		\
+    /* Don't wrap the calls; we're done with STG land */\
+    fflush(stdout); 	    	    	    	    	\
+    fprintf(stderr, "putMVar#: MVar already full.\n");	\
+    EXIT(EXIT_FAILURE);	    	    	    	    	\
+  } 	    	    	    	    	    	    	\
+  SET_INFO_PTR(node, FullSVar_info);			\
+  SVAR_VALUE(node) = value; 	    	    	    	\
+}
+
+#endif
+\end{code}
+
+\begin{code}
+#ifdef CONCURRENT
+
+#define readIVarZh(r, liveness, node)			\
+{							\
+  if (INFO_PTR(node) != (W_) ImMutArrayOfPtrs_info) {	\
+    if (SVAR_HEAD(node) == Nil_closure) 	    	\
+      SVAR_HEAD(node) = CurrentTSO; 	    	    	\
+    else    	    	    	    	    	    	\
+      TSO_LINK(SVAR_TAIL(node)) = CurrentTSO;	    	\
+    TSO_LINK(CurrentTSO) = (P_) Nil_closure;	    	\
+    SVAR_TAIL(node) = CurrentTSO;   	    	    	\
+    DO_YIELD(liveness << 1);				\
+  } 	    	    	    	    	    	    	\
+  r = SVAR_VALUE(node);	    	    	    	    	\
+}
+
+#else
+
+#define readIVarZh(r, liveness, node)			\
+{							\
+  if (INFO_PTR(node) != (W_) ImMutArrayOfPtrs_info) {	\
+    /* Don't wrap the calls; we're done with STG land */\
+    fflush(stdout); 	    	    	    	    	\
+    fprintf(stderr, "readIVar#: IVar is empty.\n");    	\
+    EXIT(EXIT_FAILURE);	    	    	    	    	\
+  } 	    	    	    	    	    	    	\
+  r = SVAR_VALUE(node);	    	    	    	    	\
+}
+
+#endif
+\end{code}
+
+\begin{code}
+#ifdef CONCURRENT
+
+#ifdef GRAN
+
+/* Only difference to the !GRAN def: RunnableThreadsHd has been replaced by */
+/* ThreadQueueHd i.e. the tso is added at the end of the thread queue on */
+/* the CurrentProc. This means we have an implicit context switch after */
+/* writeIVar even if unfair scheduling is used in GranSim (default)!  -- HWL */
+
+#define writeIVarZh(node, value)    			\
+{							\
+  P_ tso;   	    	    	    	    	    	\
+  if (INFO_PTR(node) == (W_) ImMutArrayOfPtrs_info) {	\
+    /* Don't wrap the calls; we're done with STG land */\
+    fflush(stdout); 	    	    	    	    	\
+    fprintf(stderr, "writeIVar#: IVar already full.\n");\
+    EXIT(EXIT_FAILURE);	    	    	    	    	\
+  } 	    	    	    	    	    	    	\
+  tso = SVAR_HEAD(node);    	    	    	    	\
+  if (tso != (P_) Nil_closure) {    	    	    	\
+    if (ThreadQueueHd == Nil_closure)   	    	\
+      ThreadQueueHd = tso;    	    	    	\
+    else    	    	    	    	    	    	\
+      TSO_LINK(ThreadQueueTl) = tso;  	    	\
+    while(TSO_LINK(tso) != Nil_closure) {   	    	\
+      if (DO_QP_PROF)	    	    	    	    	\
+        STGCALL3(void,(void *, char *, P_, P_),QP_Event2,do_qp_prof > 1 ? "RA" : "RG",tso,CurrentTSO);	\
+      tso = TSO_LINK(tso);  	    	    	    	\
+    }	    	    	    	    	    	    	\
+    if (DO_QP_PROF) 	    	    	    	    	\
+      STGCALL3(void,(void *, char *, P_, P_),QP_Event2,do_qp_prof > 1 ? "RA" : "RG",tso,CurrentTSO);	\
+    ThreadQueueTl = tso;	    	    	    	\
+  } 	    	    	    	    	    	    	\
+  /* Don't use freeze, since it's conditional on GC */	\
+  SET_INFO_PTR(node, ImMutArrayOfPtrs_info);		\
+  MUTUPLE_CLOSURE_SIZE(node) = (MUTUPLE_VHS+1);    	\
+  SVAR_VALUE(node) = value; 	    	    	    	\
+}
+
+#else /* !GRAN */
+
+#define writeIVarZh(node, value)    			\
+{							\
+  P_ tso;   	    	    	    	    	    	\
+  if (INFO_PTR(node) == (W_) ImMutArrayOfPtrs_info) {	\
+    /* Don't wrap the calls; we're done with STG land */\
+    fflush(stdout); 	    	    	    	    	\
+    fprintf(stderr, "writeIVar#: IVar already full.\n");\
+    EXIT(EXIT_FAILURE);	    	    	    	    	\
+  } 	    	    	    	    	    	    	\
+  tso = SVAR_HEAD(node);    	    	    	    	\
+  if (tso != (P_) Nil_closure) {    	    	    	\
+    if (RunnableThreadsHd == Nil_closure)   	    	\
+      RunnableThreadsHd = tso;    	    	    	\
+    else    	    	    	    	    	    	\
+      TSO_LINK(RunnableThreadsTl) = tso;  	    	\
+    while(TSO_LINK(tso) != Nil_closure) {   	    	\
+      if (DO_QP_PROF)	    	    	    	    	\
+        STGCALL3(void,(void *, char *, P_, P_),QP_Event2,do_qp_prof > 1 ? "RA" : "RG",tso,CurrentTSO);	\
+      tso = TSO_LINK(tso);  	    	    	    	\
+    }	    	    	    	    	    	    	\
+    if (DO_QP_PROF) 	    	    	    	    	\
+      STGCALL3(void,(void *, char *, P_, P_),QP_Event2,do_qp_prof > 1 ? "RA" : "RG",tso,CurrentTSO);	\
+    RunnableThreadsTl = tso;	    	    	    	\
+  } 	    	    	    	    	    	    	\
+  /* Don't use freeze, since it's conditional on GC */	\
+  SET_INFO_PTR(node, ImMutArrayOfPtrs_info);		\
+  MUTUPLE_CLOSURE_SIZE(node) = (MUTUPLE_VHS+1);    	\
+  SVAR_VALUE(node) = value; 	    	    	    	\
+}
+
+#endif  /* GRAN */
+
+#else
+
+#define writeIVarZh(node, value)    			\
+{							\
+  P_ tso;   	    	    	    	    	    	\
+  if (INFO_PTR(node) == (W_) ImMutArrayOfPtrs_info) {	\
+    /* Don't wrap the calls; we're done with STG land */\
+    fflush(stdout); 	    	    	    	    	\
+    fprintf(stderr, "writeIVar#: IVar already full.\n");\
+    EXIT(EXIT_FAILURE);	    	    	    	    	\
+  } 	    	    	    	    	    	    	\
+  /* Don't use freeze, since it's conditional on GC */	\
+  SET_INFO_PTR(node, ImMutArrayOfPtrs_info);		\
+  MUTUPLE_CLOSURE_SIZE(node) = (MUTUPLE_VHS+1);    	\
+  SVAR_VALUE(node) = value; 	    	    	    	\
+}
+
+#endif
+\end{code}
+
+%************************************************************************
+%*									*
+\subsubsection[StgMacros-Wait-primops]{Delay/Wait PrimOps}
+%*									*
+%************************************************************************
+
+\begin{code}
+#ifdef CONCURRENT
+
+/* ToDo: for GRAN */
+
+#define delayZh(liveness, us)				\
+  {							\
+    if (WaitingThreadsTl == Nil_closure)		\
+      WaitingThreadsHd = CurrentTSO;			\
+    else						\
+      TSO_LINK(WaitingThreadsTl) = CurrentTSO;		\
+    WaitingThreadsTl = CurrentTSO;			\
+    TSO_LINK(CurrentTSO) = Nil_closure;			\
+    TSO_EVENT(CurrentTSO) = (W_) ((us) < 1 ? 1 : (us)); \
+    DO_YIELD(liveness << 1);				\
+  }
+
+#else
+
+#define delayZh(liveness, us)				\
+  {							\
+    fflush(stdout); 	    	    	    	    	\
+    fprintf(stderr, "delay#: unthreaded build.\n");    	\
+    EXIT(EXIT_FAILURE);	    	    	    	    	\
+  }
+
+#endif
+
+#ifdef CONCURRENT
+
+/* ToDo: something for GRAN */
+
+#define waitZh(liveness, fd)				\
+  {							\
+    if (WaitingThreadsTl == Nil_closure)		\
+      WaitingThreadsHd = CurrentTSO;			\
+    else						\
+      TSO_LINK(WaitingThreadsTl) = CurrentTSO;		\
+    WaitingThreadsTl = CurrentTSO;			\
+    TSO_LINK(CurrentTSO) = Nil_closure;			\
+    TSO_EVENT(CurrentTSO) = (W_) (-(fd));		\
+    DO_YIELD(liveness << 1);				\
+  }
+
+#else
+
+#define waitZh(liveness, fd)				\
+  {							\
+    fflush(stdout); 	    	    	    	    	\
+    fprintf(stderr, "wait#: unthreaded build.\n");    	\
+    EXIT(EXIT_FAILURE);	    	    	    	    	\
+  }
+
+#endif
+
+\end{code}
+
+%************************************************************************
+%*									*
+\subsubsection[StgMacros-IO-primops]{Primitive I/O, error-handling primops}
+%*									*
+%************************************************************************
+
+\begin{code}
+extern P_ TopClosure;
+EXTFUN(ErrorIO_innards);
+EXTFUN(__std_entry_error__);
+
+#define errorIOZh(a)		\
+    do { TopClosure=(a);	\
+	 (void) SAFESTGCALL1(I_,(void *, FILE *),fflush,stdout); \
+	 (void) SAFESTGCALL1(I_,(void *, FILE *),fflush,stderr); \
+	 JMP_(ErrorIO_innards); \
+    } while(0)
+
+#if !defined(CALLER_SAVES_SYSTEM)
+/* can use the macros */
+#define stg_getc(stream)	getc((FILE *) (stream))
+#define stg_putc(c,stream)	putc((c),((FILE *) (stream)))
+#else
+/* must not use the macros (they contain embedded calls to _filbuf/whatnot) */
+#define stg_getc(stream)	SAFESTGCALL1(I_,(void *, FILE *),fgetc,(FILE *) (stream))
+#define stg_putc(c,stream)	SAFESTGCALL2(I_,(void *, char, FILE *),fputc,(c),((FILE *) (stream)))
+#endif
+
+int initialize_virtual_timer(int us);
+int install_segv_handler(STG_NO_ARGS);
+int install_vtalrm_handler(STG_NO_ARGS);
+void initUserSignals(STG_NO_ARGS);
+void blockUserSignals(STG_NO_ARGS);
+void unblockUserSignals(STG_NO_ARGS);
+IF_RTS(void blockVtAlrmSignal(STG_NO_ARGS);)
+IF_RTS(void unblockVtAlrmSignal(STG_NO_ARGS);)
+
+#ifdef _POSIX_SOURCE
+extern I_ sig_install PROTO((I_, I_, sigset_t *));
+#define stg_sig_ignore(s,m)	SAFESTGCALL3(I_,(void *, I_, I_),sig_install,s,STG_SIG_IGN,(sigset_t *)m)
+#define stg_sig_default(s,m)	SAFESTGCALL3(I_,(void *, I_, I_),sig_install,s,STG_SIG_DFL,(sigset_t *)m)
+#define stg_sig_catch(s,sp,m)	SAFESTGCALL3(I_,(void *, I_, I_),sig_install,s,sp,(sigset_t *)m)
+#else
+extern I_ sig_install PROTO((I_, I_));
+#define stg_sig_ignore(s,m)	SAFESTGCALL2(I_,(void *, I_, I_),sig_install,s,STG_SIG_IGN)
+#define stg_sig_default(s,m)	SAFESTGCALL2(I_,(void *, I_, I_),sig_install,s,STG_SIG_DFL)
+#define stg_sig_catch(s,sp,m)	SAFESTGCALL2(I_,(void *, I_, I_),sig_install,s,sp)
+#endif
+
+#define STG_SIG_DFL	(-1)
+#define STG_SIG_IGN	(-2)
+#define STG_SIG_ERR	(-3)
+
+StgInt getErrorHandler(STG_NO_ARGS);
+#ifndef PAR
+void raiseError PROTO((StgStablePtr handler));
+StgInt catchError PROTO((StgStablePtr newErrorHandler));
+#endif
+void decrementErrorCount(STG_NO_ARGS);
+
+#define stg_catchError(sp)      SAFESTGCALL1(I_,(void *, StgStablePtr),catchError,sp)
+#define stg_decrementErrorCount() SAFESTGCALL0(void,(void *),decrementErrorCount)
+\end{code}
+
+%************************************************************************
+%*									*
+\subsubsection[StgMacros-stable-ptr]{Primitive ops for manipulating stable pointers}
+%*									*
+%************************************************************************
+
+
+The type of these should be:
+
+\begin{verbatim}
+makeStablePointer#  :: a -> State# _RealWorld -> StateAndStablePtr# _RealWorld a
+deRefStablePointer# :: StablePtr# a -> State# _RealWorld -> StateAndPtr _RealWorld a
+\end{verbatim}
+
+Since world-tokens are no longer explicitly passed around, the
+implementations have a few less arguments/results.
+
+The simpler one is @deRefStablePointer#@ (which is only a primop
+because it is more polymorphic than is allowed of a ccall).
+
+\begin{code}
+#ifdef PAR
+
+#define deRefStablePtrZh(ri,sp)					    \
+do {								    \
+    fflush(stdout);						    \
+    fprintf(stderr, "deRefStablePtr#: no stable pointer support.\n");\
+    EXIT(EXIT_FAILURE);						    \
+} while(0)
+
+#else /* !PAR */
+
+extern StgPtr _deRefStablePointer PROTO((StgInt, StgPtr));
+
+#define deRefStablePtrZh(ri,sp) \
+   ri = SAFESTGCALL2(I_,(void *, I_, P_),_deRefStablePointer,sp,StorageMgrInfo.StablePointerTable);
+
+\end{code}
+
+Declarations for other stable pointer operations.
+
+\begin{code}
+void	freeStablePointer	PROTO((I_ stablePtr));
+
+void	enterStablePtr		PROTO((StgStablePtr, StgFunPtr));
+void	performIO		PROTO((StgStablePtr));
+I_	enterInt		PROTO((StgStablePtr));
+I_	enterFloat		PROTO((StgStablePtr));
+P_	deRefStablePointer	PROTO((StgStablePtr));
+IF_RTS(I_ catchSoftHeapOverflow	PROTO((StgStablePtr, I_));)
+IF_RTS(I_ getSoftHeapOverflowHandler(STG_NO_ARGS);)
+IF_RTS(extern StgStablePtr softHeapOverflowHandler;)
+IF_RTS(void shutdownHaskell(STG_NO_ARGS);)
+IF_RTS(extern I_ noBlackHoles;)
+IF_RTS(extern I_ SM_word_stk_size;)
+
+EXTFUN(stopPerformIODirectReturn);
+EXTFUN(startPerformIO);
+EXTFUN(stopEnterIntDirectReturn);
+EXTFUN(startEnterInt);
+EXTFUN(stopEnterFloatDirectReturn);
+EXTFUN(startEnterFloat);
+
+void enterStablePtr PROTO((StgStablePtr stableIndex, StgFunPtr startCode));
+
+#endif /* !PAR */
+
+IF_RTS(extern I_ ErrorIO_call_count;)
+\end{code}
+
+Somewhat harder is @makeStablePointer#@ --- it is usually simple but
+if we're unlucky, it will have to allocate a new table and copy the
+old bit over.  Since we might, very occasionally, have to call the
+garbage collector, this has to be a macro... sigh!
+
+NB @newSP@ is required because it is entirely possible that
+@stablePtr@ and @unstablePtr@ are aliases and so we can't do the
+assignment to @stablePtr@ until we've finished with @unstablePtr@.
+
+Another obscure piece of coding is the recalculation of the size of
+the table.  We do this just in case Jim's threads decide they want to
+context switch---in which case any stack-allocated variables may get
+trashed.  (If only there was a special heap check which didn't
+consider context switching...)
+
+\begin{code}
+#ifndef PAR
+
+/* Calculate SP Table size from number of pointers */
+#define SPTSizeFromNoPtrs( newNP ) (DYN_VHS + 1 + 2 * (newNP))
+
+/* Calculate number of pointers in new table from number in old table:
+   any strictly increasing expression will do here */
+#define CalcNewNoSPtrs( i ) ((i)*2 + 100)
+
+extern void enlargeSPTable PROTO((P_, P_));
+
+#define makeStablePtrZh(stablePtr,liveness,unstablePtr)		    \
+do {								    \
+  EXTDATA_RO(StablePointerTable_info);				    \
+  EXTDATA(UnusedSP);						    \
+  StgStablePtr newSP;						    \
+								    \
+  if (SPT_EMPTY(StorageMgrInfo.StablePointerTable)) { /* free stack is empty */ \
+    { /* Variables used before the heap check */		    \
+      I_ OldNoPtrs = SPT_NoPTRS( StorageMgrInfo.StablePointerTable ); \
+      I_ NewNoPtrs = CalcNewNoSPtrs( OldNoPtrs );		    \
+      I_ NewSize = SPTSizeFromNoPtrs( NewNoPtrs );		    \
+      HEAP_CHK(liveness, _FHS+NewSize, 0);			    \
+    }								    \
+    { /* Variables used after the heap check - same values */	    \
+      I_ OldNoPtrs = SPT_NoPTRS( StorageMgrInfo.StablePointerTable ); \
+      I_ NewNoPtrs = CalcNewNoSPtrs( OldNoPtrs );		    \
+      I_ NewSize = SPTSizeFromNoPtrs( NewNoPtrs );		    \
+      P_ SPTable = Hp + 1 - (_FHS + NewSize);			    \
+								    \
+      CC_ALLOC(CCC, _FHS+NewSize, SPT_K); /* cc prof */		    \
+      SET_DYN_HDR(SPTable,StablePointerTable_info,CCC,NewSize,NewNoPtrs);\
+      SAFESTGCALL2(void, (void *, P_, P_), enlargeSPTable, SPTable, StorageMgrInfo.StablePointerTable);	     \
+      StorageMgrInfo.StablePointerTable = SPTable;		    \
+    }								    \
+  }								    \
+								    \
+  newSP = SPT_POP(StorageMgrInfo.StablePointerTable);		    \
+  SPT_SPTR(StorageMgrInfo.StablePointerTable, newSP) = unstablePtr; \
+  stablePtr = newSP;						    \
+} while (0)
+
+#else
+
+#define makeStablePtrZh(stablePtr,liveness,unstablePtr)		    \
+do {								    \
+    fflush(stdout);						    \
+    fprintf(stderr, "makeStablePtr#: no stable pointer support.\n");\
+    EXIT(EXIT_FAILURE);						    \
+} while(0)
+
+#endif /* !PAR */
+\end{code}
+
+%************************************************************************
+%*									*
+\subsubsection[StgMacros-unsafePointerEquality]{Primitive `op' for breaking referential transparency}
+%*									*
+%************************************************************************
+
+The type of this is @reallyUnsafePtrEquality :: a -> a -> Int#@ so we
+can expect three parameters: the two arguments and a "register" to put
+the result into.
+
+Message to Will: This primop breaks referential transparency so badly
+you might want to leave it out.	 On the other hand, if you hide it
+away in an appropriate monad, it's perfectly safe. [ADR]
+
+Note that this primop is non-deterministic: different results can be
+obtained depending on just what the garbage collector (and code
+optimiser??) has done.	However, we can guarantee that if two objects
+are pointer-equal, they have the same denotation --- the converse most
+certainly doesn't hold.
+
+ToDo ADR: The degree of non-determinism could be greatly reduced by
+following indirections.
+
+\begin{code}
+#define reallyUnsafePtrEqualityZh(r,a,b) r=((StgPtr)(a) == (StgPtr)(b))
+\end{code}
+
+%************************************************************************
+%*									*
+\subsubsection[StgMacros-parallel-primop]{Primitive `op' for sparking (etc)}
+%*									*
+%************************************************************************
+
+Assuming local sparking in some form, we can now inline the spark request.
+
+We build a doubly-linked list in the heap, so that we can handle FIFO
+or LIFO scheduling as we please.
+
+Anything with tag >= 0 is in WHNF, so we discard it.
+
+\begin{code}
+#ifdef CONCURRENT
+
+ED_(Nil_closure);
+ED_(True_closure);
+
+#if defined(GRAN)
+#define parZh(r,hp,node,rest)				\
+	PARZh(r,hp,node,rest,0,0)
+
+#define parAtZh(r,hp,node,where,identifier,rest)	\
+	parATZh(r,hp,node,where,identifier,rest,1)
+
+#define parAtForNowZh(r,hp,node,where,identifier,rest)	\
+	parATZh(r,hp,node,where,identifier,rest,0)
+
+#define parATZh(r,hp,node,where,identifier,rest,local)	\
+{							\
+  sparkq result;						\
+  if (SHOULD_SPARK(node)) {				\
+    result = NewSpark((P_)node,identifier,local);	\
+    SAFESTGCALL3(void,(W_),GranSimSparkAt,result,where,identifier);	\
+  } else if (do_qp_prof) {				\
+    I_ tid = threadId++;				\
+    SAFESTGCALL2(void,(I_, P_),QP_Event0,tid,node);	\
+  }							\
+  r = (rest);						\
+}
+
+#define parLocalZh(r,hp,node,identifier,rest)		\
+	PARZh(r,hp,node,rest,identifier,1)
+
+#define parGlobalZh(r,hp,node,identifier,rest)		\
+	PARZh(r,hp,node,rest,identifier,0)
+
+#define PARZh(r,hp,node,rest,identifier,local)		\
+{							\
+  sparkq result;						\
+  if (SHOULD_SPARK(node)) {				\
+    result = NewSpark((P_)node,identifier,local);	\
+    ADD_TO_SPARK_QUEUE(result);				\
+    SAFESTGCALL2(void,(W_),GranSimSpark,local,(P_)node);	\
+    /* context_switch = 1;  not needed any more -- HWL */					\
+  } else if (do_qp_prof) {				\
+    I_ tid = threadId++;				\
+    SAFESTGCALL2(void,(I_, P_),QP_Event0,tid,node);	\
+  }							\
+  r = (rest);						\
+}
+
+#else  /* !GRAN */
+
+extern I_ required_thread_count;
+
+#ifdef PAR
+#define COUNT_SPARK     TSO_GLOBALSPARKS(CurrentTSO)++
+#else
+#define COUNT_SPARK
+#endif
+
+/* 
+   Note that we must bump the required thread count NOW, rather
+   than when the thread is actually created.  
+ */
+
+#define forkZh(r,liveness,node)				\
+{							\
+  while (PendingSparksTl[REQUIRED_POOL] == PendingSparksLim[REQUIRED_POOL]) \
+    DO_YIELD((liveness << 1) | 1);			\
+  COUNT_SPARK;						\
+  if (SHOULD_SPARK(node)) {				\
+    *PendingSparksTl[REQUIRED_POOL]++ = (P_)(node);	\
+  } else if (DO_QP_PROF) {				\
+    I_ tid = threadId++;				\
+    SAFESTGCALL2(void,(I_, P_),QP_Event0,tid,node);	\
+  }							\
+  required_thread_count++;				\
+  context_switch = 1;					\
+  r = 1; /* Should not be necessary */			\
+}
+
+#define parZh(r,node)					\
+{							\
+  COUNT_SPARK;						\
+  if (SHOULD_SPARK(node) &&				\
+   PendingSparksTl[ADVISORY_POOL] < PendingSparksLim[ADVISORY_POOL]) {	\
+    *PendingSparksTl[ADVISORY_POOL]++ = (P_)(node);	\
+  } else if (DO_QP_PROF) {				\
+    I_ tid = threadId++;				\
+    SAFESTGCALL2(void,(I_, P_),QP_Event0,tid,node);	\
+  }							\
+  r = 1; /* Should not be necessary */			\
+}
+
+\end{code}
+
+The following seq# code should only be used in unoptimized code.
+Be warned: it's a potential bug-farm.
+
+First we push two words on the B stack: the current value of RetReg 
+(which may or may not be live), and a continuation snatched largely out
+of thin air (it's a point within this code block).  Then we set RetReg
+to the special polymorphic return code for seq, load up Node with the
+closure to be evaluated, and we're off.  When the eval returns to the
+polymorphic seq return point, the two words are popped off the B stack,
+RetReg is restored, and we jump to the continuation, completing the
+primop and going on our merry way.
+
+\begin{code}
+
+ED_RO_(vtbl_seq);
+
+#define seqZh(r,liveness,node)		    \
+  ({					    \
+    __label__ cont;			    \
+    STK_CHK(liveness,0,2,0,0,0,0);	    \
+    SpB -= BREL(2);			    \
+    SpB[BREL(0)] = (W_) RetReg;		    \
+    SpB[BREL(1)] = (W_) &&cont;		    \
+    RetReg = (StgRetAddr) vtbl_seq;	    \
+    Node = node;			    \
+    ENT_VIA_NODE();			    \
+    InfoPtr = (D_)(INFO_PTR(Node));	    \
+    JMP_(ENTRY_CODE(InfoPtr));		    \
+    cont:				    \
+    r = 1; /* Should be unnecessary */	    \
+  })
+
+#endif  /* GRAN */ 
+#endif	/* CONCURRENT */
+\end{code}
+
+%************************************************************************
+%*									*
+\subsubsection[StgMacros-malloc-ptrs]{Malloc Pointers}
+%*									*
+%************************************************************************
+
+This macro is used to construct a MallocPtr on the heap after a ccall.
+Since MallocPtr's are like arrays in many ways, this is heavily based
+on the stuff for arrays above.
+
+What this does is plug the pointer (which will be in a local
+variable), into a fresh heap object and then sets a result (which will
+be a register) to point to the fresh heap object.
+
+Question: what's this "SET_ACTIVITY" stuff - should I be doing this
+too?  (It's if you want to use the SPAT profiling tools to
+characterize program behavior by ``activity'' -- tail-calling,
+heap-checking, etc. -- see RednCounts.lh.  It is quite specialized.
+WDP 95/1)
+
+\begin{code}
+#ifndef PAR
+
+StgInt eqMallocPtr PROTO((StgMallocPtr p1, StgMallocPtr p2));
+void FreeMallocPtr PROTO((StgMallocPtr mp));
+
+#define constructMallocPtr(liveness, r, mptr)		\
+do {							\
+  P_ result;						\
+							\
+  HEAP_CHK(liveness, _FHS + MallocPtr_SIZE,0);		\
+  CC_ALLOC(CCC,_FHS + MallocPtr_SIZE,MallocPtr_K); /* cc prof */   \
+								   \
+  result = Hp + 1 - (_FHS + MallocPtr_SIZE);			   \
+  SET_MallocPtr_HDR(result,MallocPtr_info,CCC,_FHS + MallocPtr_SIZE,0); \
+  MallocPtr_CLOSURE_DATA(result) = mptr;                	   \
+  MallocPtr_CLOSURE_LINK(result) = StorageMgrInfo.MallocPtrList;   \
+  StorageMgrInfo.MallocPtrList = result;			   \
+							\
+/*							\
+  printf("DEBUG: MallocPtr(0x%x) = <0x%x, 0x%x, 0x%x, 0x%x>\n",	\
+      result,						\
+      result[0],result[1],				\
+      result[2],result[3]);				\
+*/							\
+  CHECK_MallocPtr_CLOSURE( result );			\
+  VALIDATE_MallocPtrList( StorageMgrInfo.MallocPtrList ); \
+							\
+  (r) = (P_) result;					\
+} while (0)
+
+#else
+#define constructMallocPtr(liveness, r, mptr)			    \
+do {								    \
+    fflush(stdout);						    \
+    fprintf(stderr, "constructMallocPtr: no malloc pointer support.\n");\
+    EXIT(EXIT_FAILURE);						    \
+} while(0)
+
+#endif /* !PAR */
+\end{code}
+
+
+End-of-file's multi-slurp protection:
+\begin{code}
+#endif /* ! STGMACROS_H */
+\end{code}