summaryrefslogtreecommitdiff
path: root/compiler/codeGen/StgCmmLayout.hs
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2012-10-03 09:30:56 +0100
committerSimon Marlow <marlowsd@gmail.com>2012-10-08 09:04:40 +0100
commita7c0387d20c1c9994d1100b14fbb8fb4e28a259e (patch)
treeb95d0a512f951a4a463f1aa5178b0cd5c4fdb410 /compiler/codeGen/StgCmmLayout.hs
parentaed37acd4d157791381800d5de960a2461bcbef3 (diff)
downloadhaskell-a7c0387d20c1c9994d1100b14fbb8fb4e28a259e.tar.gz
Produce new-style Cmm from the Cmm parser
The main change here is that the Cmm parser now allows high-level cmm code with argument-passing and function calls. For example: foo ( gcptr a, bits32 b ) { if (b > 0) { // we can make tail calls passing arguments: jump stg_ap_0_fast(a); } return (x,y); } More details on the new cmm syntax are in Note [Syntax of .cmm files] in CmmParse.y. The old syntax is still more-or-less supported for those occasional code fragments that really need to explicitly manipulate the stack. However there are a couple of differences: it is now obligatory to give a list of live GlobalRegs on every jump, e.g. jump %ENTRY_CODE(Sp(0)) [R1]; Again, more details in Note [Syntax of .cmm files]. I have rewritten most of the .cmm files in the RTS into the new syntax, except for AutoApply.cmm which is generated by the genapply program: this file could be generated in the new syntax instead and would probably be better off for it, but I ran out of enthusiasm. Some other changes in this batch: - The PrimOp calling convention is gone, primops now use the ordinary NativeNodeCall convention. This means that primops and "foreign import prim" code must be written in high-level cmm, but they can now take more than 10 arguments. - CmmSink now does constant-folding (should fix #7219) - .cmm files now go through the cmmPipeline, and as a result we generate better code in many cases. All the object files generated for the RTS .cmm files are now smaller. Performance should be better too, but I haven't measured it yet. - RET_DYN frames are removed from the RTS, lots of code goes away - we now have some more canned GC points to cover unboxed-tuples with 2-4 pointers, which will reduce code size a little.
Diffstat (limited to 'compiler/codeGen/StgCmmLayout.hs')
-rw-r--r--compiler/codeGen/StgCmmLayout.hs44
1 files changed, 9 insertions, 35 deletions
diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs
index 75d8d1c38f..4742332107 100644
--- a/compiler/codeGen/StgCmmLayout.hs
+++ b/compiler/codeGen/StgCmmLayout.hs
@@ -111,7 +111,7 @@ emitCall convs fun args
--
emitCallWithExtraStack
:: (Convention, Convention) -> CmmExpr -> [CmmExpr]
- -> (ByteOff, [(CmmExpr,ByteOff)]) -> FCode ReturnKind
+ -> [CmmExpr] -> FCode ReturnKind
emitCallWithExtraStack (callConv, retConv) fun args extra_stack
= do { dflags <- getDynFlags
; adjustHpBackwards
@@ -124,7 +124,7 @@ emitCallWithExtraStack (callConv, retConv) fun args extra_stack
AssignTo res_regs _ -> do
k <- newLabelC
let area = Young k
- (off, copyin) = copyInOflow dflags retConv area res_regs
+ (off, copyin) = copyInOflow dflags retConv area res_regs []
copyout = mkCallReturnsTo dflags fun callConv args k off updfr_off
extra_stack
emit (copyout <*> mkLabel k <*> copyin)
@@ -222,7 +222,7 @@ direct_call caller call_conv lbl arity args
emitCallWithExtraStack (call_conv, NativeReturn)
target
(nonVArgs fast_args)
- (mkStkOffsets dflags (stack_args dflags))
+ (nonVArgs (stack_args dflags))
where
target = CmmLit (CmmLabel lbl)
(fast_args, rest_args) = splitAt real_arity args
@@ -326,32 +326,7 @@ slowCallPattern [] = (fsLit "stg_ap_0", 0)
-------------------------------------------------------------------------
--- Fix the byte-offsets of a bunch of things to push on the stack
-
--- This is used for pushing slow-call continuations.
--- See Note [over-saturated calls].
-
-mkStkOffsets
- :: DynFlags
- -> [(ArgRep, Maybe CmmExpr)] -- things to make offsets for
- -> ( ByteOff -- OUTPUTS: Topmost allocated word
- , [(CmmExpr, ByteOff)] ) -- things with offsets (voids filtered out)
-mkStkOffsets dflags things
- = loop 0 [] (reverse things)
- where
- loop offset offs [] = (offset,offs)
- loop offset offs ((_,Nothing):things) = loop offset offs things
- -- ignore Void arguments
- loop offset offs ((rep,Just thing):things)
- = loop thing_off ((thing, thing_off):offs) things
- where
- thing_off = offset + argRepSizeW dflags rep * wORD_SIZE dflags
- -- offset of thing is offset+size, because we're
- -- growing the stack *downwards* as the offsets increase.
-
-
--------------------------------------------------------------------------
--- Classifying arguments: ArgRep
+-- Classifying arguments: ArgRep
-------------------------------------------------------------------------
-- ArgRep is not exported (even abstractly)
@@ -472,7 +447,7 @@ mkArgDescr _nm args
let arg_bits = argBits dflags arg_reps
arg_reps = filter isNonV (map idArgRep args)
-- Getting rid of voids eases matching of standard patterns
- case stdPattern dflags arg_reps of
+ case stdPattern arg_reps of
Just spec_id -> return (ArgSpec spec_id)
Nothing -> return (ArgGen arg_bits)
@@ -483,10 +458,9 @@ argBits dflags (arg : args) = take (argRepSizeW dflags arg) (repeat True)
++ argBits dflags args
----------------------
-stdPattern :: DynFlags -> [ArgRep] -> Maybe StgHalfWord
-stdPattern dflags reps
- = fmap (toStgHalfWord dflags)
- $ case reps of
+stdPattern :: [ArgRep] -> Maybe Int
+stdPattern reps
+ = case reps of
[] -> Just ARG_NONE -- just void args, probably
[N] -> Just ARG_N
[P] -> Just ARG_P
@@ -545,7 +519,7 @@ emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl args body
; let args' = if node_points then (node : arg_regs) else arg_regs
conv = if nodeMustPointToIt dflags lf_info then NativeNodeCall
else NativeDirectCall
- (offset, _) = mkCallEntry dflags conv args'
+ (offset, _) = mkCallEntry dflags conv args' []
; emitClosureAndInfoTable info_tbl conv args' $ body (offset, node, arg_regs)
}