summaryrefslogtreecommitdiff
path: root/rts/Updates.cmm
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2012-10-03 09:30:56 +0100
committerSimon Marlow <marlowsd@gmail.com>2012-10-08 09:04:40 +0100
commita7c0387d20c1c9994d1100b14fbb8fb4e28a259e (patch)
treeb95d0a512f951a4a463f1aa5178b0cd5c4fdb410 /rts/Updates.cmm
parentaed37acd4d157791381800d5de960a2461bcbef3 (diff)
downloadhaskell-a7c0387d20c1c9994d1100b14fbb8fb4e28a259e.tar.gz
Produce new-style Cmm from the Cmm parser
The main change here is that the Cmm parser now allows high-level cmm code with argument-passing and function calls. For example: foo ( gcptr a, bits32 b ) { if (b > 0) { // we can make tail calls passing arguments: jump stg_ap_0_fast(a); } return (x,y); } More details on the new cmm syntax are in Note [Syntax of .cmm files] in CmmParse.y. The old syntax is still more-or-less supported for those occasional code fragments that really need to explicitly manipulate the stack. However there are a couple of differences: it is now obligatory to give a list of live GlobalRegs on every jump, e.g. jump %ENTRY_CODE(Sp(0)) [R1]; Again, more details in Note [Syntax of .cmm files]. I have rewritten most of the .cmm files in the RTS into the new syntax, except for AutoApply.cmm which is generated by the genapply program: this file could be generated in the new syntax instead and would probably be better off for it, but I ran out of enthusiasm. Some other changes in this batch: - The PrimOp calling convention is gone, primops now use the ordinary NativeNodeCall convention. This means that primops and "foreign import prim" code must be written in high-level cmm, but they can now take more than 10 arguments. - CmmSink now does constant-folding (should fix #7219) - .cmm files now go through the cmmPipeline, and as a result we generate better code in many cases. All the object files generated for the RTS .cmm files are now smaller. Performance should be better too, but I haven't measured it yet. - RET_DYN frames are removed from the RTS, lots of code goes away - we now have some more canned GC points to cover unboxed-tuples with 2-4 pointers, which will reduce code size a little.
Diffstat (limited to 'rts/Updates.cmm')
-rw-r--r--rts/Updates.cmm99
1 files changed, 43 insertions, 56 deletions
diff --git a/rts/Updates.cmm b/rts/Updates.cmm
index 44fbc0e194..2bc21ec332 100644
--- a/rts/Updates.cmm
+++ b/rts/Updates.cmm
@@ -16,85 +16,72 @@
#include "Updates.h"
-#if defined(PROFILING)
-#define UPD_FRAME_PARAMS W_ unused1, W_ unused2, P_ unused3
-#else
-#define UPD_FRAME_PARAMS P_ unused1
-#endif
-
-/* The update fragment has been tuned so as to generate good
- code with gcc, which accounts for some of the strangeness in the
- way it is written.
-
- In particular, the JMP_(ret) bit is passed down and pinned on the
- end of each branch (there end up being two major branches in the
- code), since we don't mind duplicating this jump.
-*/
-
-/* on entry to the update code
- (1) R1 points to the closure being returned
- (2) Sp points to the update frame
-*/
-
-INFO_TABLE_RET( stg_upd_frame, UPDATE_FRAME, UPD_FRAME_PARAMS)
+/*
+ * The update code is PERFORMANCE CRITICAL, if you make any changes
+ * here make sure you eyeball the assembly and check that the fast
+ * path (update in generation 0) is optimal.
+ *
+ * The return(ret) bit is passed down and pinned on the end of each
+ * branch (there end up being two major branches in the code), since
+ * we don't mind duplicating this jump.
+ */
+INFO_TABLE_RET ( stg_upd_frame, UPDATE_FRAME,
+ UPDATE_FRAME_FIELDS(W_,P_,info_ptr,updatee) )
+ return (P_ ret) /* the closure being returned */
{
- W_ updatee;
-
- updatee = StgUpdateFrame_updatee(Sp);
-
- /* remove the update frame from the stack */
- Sp = Sp + SIZEOF_StgUpdateFrame;
-
/* ToDo: it might be a PAP, so we should check... */
TICK_UPD_CON_IN_NEW(sizeW_fromITBL(%GET_STD_INFO(updatee)));
-
- updateWithIndirection(updatee,
- R1,
- jump %ENTRY_CODE(Sp(0)) [R1]);
-}
+ updateWithIndirection(updatee, ret, return (ret));
+}
-INFO_TABLE_RET( stg_marked_upd_frame, UPDATE_FRAME, UPD_FRAME_PARAMS)
+/*
+ * An update frame where the updatee has been replaced by a BLACKHOLE
+ * closure by threadPaused. We may have threads to wake up, and we
+ * also have to check whether the blackhole has been updated by
+ * another thread in the meantime.
+ */
+INFO_TABLE_RET ( stg_marked_upd_frame, UPDATE_FRAME,
+ UPDATE_FRAME_FIELDS(W_,P_,info_ptr,updatee) )
+ return (P_ ret) /* the closure being returned */
{
- W_ updatee, v, i, tso, link;
+ W_ v, i, tso, link;
// we know the closure is a BLACKHOLE
- updatee = StgUpdateFrame_updatee(Sp);
v = StgInd_indirectee(updatee);
- // remove the update frame from the stack
- Sp = Sp + SIZEOF_StgUpdateFrame;
-
if (GETTAG(v) != 0) {
// updated by someone else: discard our value and use the
// other one to increase sharing, but check the blocking
// queues to see if any threads were waiting on this BLACKHOLE.
- R1 = v;
- foreign "C" checkBlockingQueues(MyCapability() "ptr",
- CurrentTSO "ptr") [R1];
- jump %ENTRY_CODE(Sp(0)) [R1];
+ ccall checkBlockingQueues(MyCapability() "ptr", CurrentTSO "ptr");
+ return (v);
}
// common case: it is still our BLACKHOLE
if (v == CurrentTSO) {
- updateWithIndirection(updatee,
- R1,
- jump %ENTRY_CODE(Sp(0)) [R1]);
+ updateWithIndirection(updatee, ret, return (ret));
}
// The other cases are all handled by the generic code
- foreign "C" updateThunk (MyCapability() "ptr", CurrentTSO "ptr",
- updatee "ptr", R1 "ptr") [R1];
+ ccall updateThunk (MyCapability() "ptr", CurrentTSO "ptr",
+ updatee "ptr", ret "ptr");
- jump %ENTRY_CODE(Sp(0)) [R1];
+ return (ret);
}
-// Special update frame code for CAFs and eager-blackholed thunks: it
-// knows how to update blackholes, but is distinct from
-// stg_marked_upd_frame so that lazy blackholing won't treat it as the
-// high watermark.
-INFO_TABLE_RET (stg_bh_upd_frame, UPDATE_FRAME, UPD_FRAME_PARAMS)
+/*
+ * Special update frame code for CAFs and eager-blackholed thunks: it
+ * knows how to update blackholes, but is distinct from
+ * stg_marked_upd_frame so that lazy blackholing won't treat it as the
+ * high watermark.
+ */
+INFO_TABLE_RET ( stg_bh_upd_frame, UPDATE_FRAME,
+ UPDATE_FRAME_FIELDS(W_,P_,info_ptr,updatee) )
+ return (P_ ret) /* the closure being returned */
{
- jump RET_LBL(stg_marked_upd_frame) [R1];
+ // This all compiles away to a single jump instruction (sigh)
+ jump RET_LBL(stg_marked_upd_frame)
+ ( UPDATE_FRAME_FIELDS(,,info_ptr,updatee) )
+ (ret);
}
-