diff options
Diffstat (limited to 'compiler/GHC/CmmToAsm/X86/CodeGen.hs')
-rw-r--r-- | compiler/GHC/CmmToAsm/X86/CodeGen.hs | 480 |
1 files changed, 239 insertions, 241 deletions
diff --git a/compiler/GHC/CmmToAsm/X86/CodeGen.hs b/compiler/GHC/CmmToAsm/X86/CodeGen.hs index 17e246366b..bf282fcac4 100644 --- a/compiler/GHC/CmmToAsm/X86/CodeGen.hs +++ b/compiler/GHC/CmmToAsm/X86/CodeGen.hs @@ -332,7 +332,6 @@ stmtToInstrs :: BlockId -- ^ Basic block this statement will start to be placed -- ^ Instructions, and bid of new block if successive -- statements are placed in a different basic block. stmtToInstrs bid stmt = do - dflags <- getDynFlags is32Bit <- is32BitPlatform platform <- getPlatform case stmt of @@ -345,7 +344,7 @@ stmtToInstrs bid stmt = do CmmUnwind regs -> do let to_unwind_entry :: (GlobalReg, Maybe CmmExpr) -> UnwindTable - to_unwind_entry (reg, expr) = M.singleton reg (fmap toUnwindExpr expr) + to_unwind_entry (reg, expr) = M.singleton reg (fmap (toUnwindExpr platform) expr) case foldMap to_unwind_entry regs of tbl | M.null tbl -> return nilOL | otherwise -> do @@ -356,14 +355,14 @@ stmtToInstrs bid stmt = do | isFloatType ty -> assignReg_FltCode format reg src | is32Bit && isWord64 ty -> assignReg_I64Code reg src | otherwise -> assignReg_IntCode format reg src - where ty = cmmRegType dflags reg + where ty = cmmRegType platform reg format = cmmTypeFormat ty CmmStore addr src | isFloatType ty -> assignMem_FltCode format addr src | is32Bit && isWord64 ty -> assignMem_I64Code addr src | otherwise -> assignMem_IntCode format addr src - where ty = cmmExprType dflags src + where ty = cmmExprType platform src format = cmmTypeFormat ty CmmBranch id -> return $ genBranch id @@ -487,10 +486,10 @@ jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel) -- Expand CmmRegOff. ToDo: should we do it this way around, or convert -- CmmExprs into CmmRegOff? -mangleIndexTree :: DynFlags -> CmmReg -> Int -> CmmExpr -mangleIndexTree dflags reg off +mangleIndexTree :: Platform -> CmmReg -> Int -> CmmExpr +mangleIndexTree platform reg off = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)] - where width = typeWidth (cmmRegType dflags reg) + where width = typeWidth (cmmRegType platform reg) -- | The dual to getAnyReg: compute an expression into a register, but -- we don't mind which one it is. @@ -637,13 +636,13 @@ iselExpr64 expr -------------------------------------------------------------------------------- getRegister :: CmmExpr -> NatM Register -getRegister e = do dflags <- getDynFlags +getRegister e = do platform <- getPlatform is32Bit <- is32BitPlatform - getRegister' dflags is32Bit e + getRegister' platform is32Bit e -getRegister' :: DynFlags -> Bool -> CmmExpr -> NatM Register +getRegister' :: Platform -> Bool -> CmmExpr -> NatM Register -getRegister' dflags is32Bit (CmmReg reg) +getRegister' platform is32Bit (CmmReg reg) = case reg of CmmGlobal PicBaseReg | is32Bit -> @@ -655,7 +654,7 @@ getRegister' dflags is32Bit (CmmReg reg) _ -> do let - fmt = cmmTypeFormat (cmmRegType dflags reg) + fmt = cmmTypeFormat (cmmRegType platform reg) format = fmt -- platform <- ncgPlatform <$> getConfig @@ -664,11 +663,11 @@ getRegister' dflags is32Bit (CmmReg reg) nilOL) -getRegister' dflags is32Bit (CmmRegOff r n) - = getRegister' dflags is32Bit $ mangleIndexTree dflags r n +getRegister' platform is32Bit (CmmRegOff r n) + = getRegister' platform is32Bit $ mangleIndexTree platform r n -getRegister' dflags is32Bit (CmmMachOp (MO_AlignmentCheck align _) [e]) - = addAlignmentCheck align <$> getRegister' dflags is32Bit e +getRegister' platform is32Bit (CmmMachOp (MO_AlignmentCheck align _) [e]) + = addAlignmentCheck align <$> getRegister' platform is32Bit e -- for 32-bit architectures, support some 64 -> 32 bit conversions: -- TO_W_(x), TO_W_(x >> 32) @@ -764,7 +763,7 @@ getRegister' _ is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg), return $ Any II64 (\dst -> unitOL $ LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst)) -getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps +getRegister' platform is32Bit (CmmMachOp mop [x]) = do -- unary MachOps case mop of MO_F_Neg w -> sse2NegCode w x @@ -892,7 +891,7 @@ getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps conversionNop :: Format -> CmmExpr -> NatM Register conversionNop new_format expr - = do e_code <- getRegister' dflags is32Bit expr + = do e_code <- getRegister' platform is32Bit expr return (swizzleRegisterRep e_code new_format) @@ -1165,8 +1164,8 @@ getRegister' _ is32Bit (CmmLit (CmmInt 0 width)) -- optimisation for loading small literals on x86_64: take advantage -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit -- instruction forms are shorter. -getRegister' dflags is32Bit (CmmLit lit) - | not is32Bit, isWord64 (cmmLitType dflags lit), not (isBigLit lit) +getRegister' platform is32Bit (CmmLit lit) + | not is32Bit, isWord64 (cmmLitType platform lit), not (isBigLit lit) = let imm = litToImm lit code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst)) @@ -1181,8 +1180,8 @@ getRegister' dflags is32Bit (CmmLit lit) -- note2: all labels are small, because we're assuming the -- small memory model (see gcc docs, -mcmodel=small). -getRegister' dflags _ (CmmLit lit) - = do let format = cmmTypeFormat (cmmLitType dflags lit) +getRegister' platform _ (CmmLit lit) + = do let format = cmmTypeFormat (cmmLitType platform lit) imm = litToImm lit code dst = unitOL (MOV format (OpImm imm) (OpReg dst)) return (Any format code) @@ -1260,8 +1259,8 @@ getAmode e = do is32Bit <- is32BitPlatform getAmode' is32Bit e getAmode' :: Bool -> CmmExpr -> NatM Amode -getAmode' _ (CmmRegOff r n) = do dflags <- getDynFlags - getAmode $ mangleIndexTree dflags r n +getAmode' _ (CmmRegOff r n) = do platform <- getPlatform + getAmode $ mangleIndexTree platform r n getAmode' is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg), CmmLit displacement]) @@ -1361,7 +1360,7 @@ x86_complex_amode base index shift offset getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock) getNonClobberedOperand (CmmLit lit) = do - if isSuitableFloatingPointLit lit + if isSuitableFloatingPointLit lit then do let CmmFloat _ w = lit Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit @@ -1369,8 +1368,8 @@ getNonClobberedOperand (CmmLit lit) = do else do is32Bit <- is32BitPlatform - dflags <- getDynFlags - if is32BitLit is32Bit lit && not (isFloatType (cmmLitType dflags lit)) + platform <- getPlatform + if is32BitLit is32Bit lit && not (isFloatType (cmmLitType platform lit)) then return (OpImm (litToImm lit), nilOL) else getNonClobberedOperand_generic (CmmLit lit) @@ -1428,8 +1427,8 @@ getOperand (CmmLit lit) = do else do is32Bit <- is32BitPlatform - dflags <- getDynFlags - if is32BitLit is32Bit lit && not (isFloatType (cmmLitType dflags lit)) + platform <- getPlatform + if is32BitLit is32Bit lit && not (isFloatType (cmmLitType platform lit)) then return (OpImm (litToImm lit), nilOL) else getOperand_generic (CmmLit lit) @@ -1622,34 +1621,34 @@ condIntCode' _ cond x (CmmLit (CmmInt 0 pk)) = do -- anything vs operand condIntCode' is32Bit cond x y | isOperand is32Bit y = do - dflags <- getDynFlags + platform <- getPlatform (x_reg, x_code) <- getNonClobberedReg x (y_op, y_code) <- getOperand y let code = x_code `appOL` y_code `snocOL` - CMP (cmmTypeFormat (cmmExprType dflags x)) y_op (OpReg x_reg) + CMP (cmmTypeFormat (cmmExprType platform x)) y_op (OpReg x_reg) return (CondCode False cond code) -- operand vs. anything: invert the comparison so that we can use a -- single comparison instruction. | isOperand is32Bit x , Just revcond <- maybeFlipCond cond = do - dflags <- getDynFlags + platform <- getPlatform (y_reg, y_code) <- getNonClobberedReg y (x_op, x_code) <- getOperand x let code = y_code `appOL` x_code `snocOL` - CMP (cmmTypeFormat (cmmExprType dflags x)) x_op (OpReg y_reg) + CMP (cmmTypeFormat (cmmExprType platform x)) x_op (OpReg y_reg) return (CondCode False revcond code) -- anything vs anything condIntCode' _ cond x y = do - dflags <- getDynFlags + platform <- getPlatform (y_reg, y_code) <- getNonClobberedReg y (x_op, x_code) <- getRegOrMem x let code = y_code `appOL` x_code `snocOL` - CMP (cmmTypeFormat (cmmExprType dflags x)) (OpReg y_reg) x_op + CMP (cmmTypeFormat (cmmExprType platform x)) (OpReg y_reg) x_op return (CondCode False cond code) @@ -1666,13 +1665,13 @@ condFltCode cond x y -- an operand, but the right must be a reg. We can probably do better -- than this general case... condFltCode_sse2 = do - dflags <- getDynFlags + platform <- getPlatform (x_reg, x_code) <- getNonClobberedReg x (y_op, y_code) <- getOperand y let code = x_code `appOL` y_code `snocOL` - CMP (floatFormat $ cmmExprWidth dflags x) y_op (OpReg x_reg) + CMP (floatFormat $ cmmExprWidth platform x) y_op (OpReg x_reg) -- NB(1): we need to use the unsigned comparison operators on the -- result of this comparison. return (CondCode True (condToUnsigned cond) code) @@ -2529,7 +2528,7 @@ genCCall' _ is32Bit (PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] _ = d where format = intFormat width -genCCall' dflags is32Bit target dest_regs args bid = do +genCCall' _ is32Bit target dest_regs args bid = do platform <- ncgPlatform <$> getConfig case (target, dest_regs) of -- void return type prim op @@ -2639,8 +2638,8 @@ genCCall' dflags is32Bit target dest_regs args bid = do _ -> panic "genCCall: Wrong number of arguments/results for imul2" _ -> if is32Bit - then genCCall32' dflags target dest_regs args - else genCCall64' dflags target dest_regs args + then genCCall32' target dest_regs args + else genCCall64' target dest_regs args where divOp1 platform signed width results [arg_x, arg_y] = divOp platform signed width results Nothing arg_x arg_y @@ -2719,22 +2718,82 @@ genCCall' dflags is32Bit target dest_regs args bid = do -- and get the results from %al, %dl. This is not optimal, but a few -- register moves are probably not a huge deal when doing division. -genCCall32' :: DynFlags - -> ForeignTarget -- function to call +genCCall32' :: ForeignTarget -- function to call -> [CmmFormal] -- where to put the result -> [CmmActual] -- arguments (of mixed type) -> NatM InstrBlock -genCCall32' dflags target dest_regs args = do - let - prom_args = map (maybePromoteCArg dflags W32) args +genCCall32' target dest_regs args = do + config <- getConfig + let platform = ncgPlatform config + prom_args = map (maybePromoteCArg platform W32) args + + -- If the size is smaller than the word, we widen things (see maybePromoteCArg) + arg_size_bytes :: CmmType -> Int + arg_size_bytes ty = max (widthInBytes (typeWidth ty)) (widthInBytes (wordWidth platform)) + + roundTo a x | x `mod` a == 0 = x + | otherwise = x + a - (x `mod` a) + push_arg :: CmmActual {-current argument-} + -> NatM InstrBlock -- code + + push_arg arg -- we don't need the hints on x86 + | isWord64 arg_ty = do + ChildCode64 code r_lo <- iselExpr64 arg + delta <- getDeltaNat + setDeltaNat (delta - 8) + let r_hi = getHiVRegFromLo r_lo + return ( code `appOL` + toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4), + PUSH II32 (OpReg r_lo), DELTA (delta - 8), + DELTA (delta-8)] + ) + + | isFloatType arg_ty = do + (reg, code) <- getSomeReg arg + delta <- getDeltaNat + setDeltaNat (delta-size) + return (code `appOL` + toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp), + DELTA (delta-size), + let addr = AddrBaseIndex (EABaseReg esp) + EAIndexNone + (ImmInt 0) + format = floatFormat (typeWidth arg_ty) + in + + -- assume SSE2 + MOV format (OpReg reg) (OpAddr addr) + + ] + ) + + | otherwise = do + -- Arguments can be smaller than 32-bit, but we still use @PUSH + -- II32@ - the usual calling conventions expect integers to be + -- 4-byte aligned. + ASSERT((typeWidth arg_ty) <= W32) return () + (operand, code) <- getOperand arg + delta <- getDeltaNat + setDeltaNat (delta-size) + return (code `snocOL` + PUSH II32 operand `snocOL` + DELTA (delta-size)) + + where + arg_ty = cmmExprType platform arg + size = arg_size_bytes arg_ty -- Byte size + + let -- Align stack to 16n for calls, assuming a starting stack -- alignment of 16n - word_size on procedure entry. Which we -- maintiain. See Note [rts/StgCRun.c : Stack Alignment on X86] - sizes = map (arg_size_bytes . cmmExprType dflags) (reverse args) - raw_arg_size = sum sizes + wORD_SIZE dflags + sizes = map (arg_size_bytes . cmmExprType platform) (reverse args) + raw_arg_size = sum sizes + platformWordSizeInBytes platform arg_pad_size = (roundTo 16 $ raw_arg_size) - raw_arg_size - tot_arg_size = raw_arg_size + arg_pad_size - wORD_SIZE dflags + tot_arg_size = raw_arg_size + arg_pad_size - platformWordSizeInBytes platform + + delta0 <- getDeltaNat setDeltaNat (delta0 - arg_pad_size) @@ -2751,7 +2810,7 @@ genCCall32' dflags target dest_regs args = do where fn_imm = ImmCLbl lbl ForeignTarget expr conv -> do { (dyn_r, dyn_c) <- getSomeReg expr - ; ASSERT( isWord32 (cmmExprType dflags expr) ) + ; ASSERT( isWord32 (cmmExprType platform expr) ) return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) } PrimTarget _ -> panic $ "genCCall: Can't handle PrimTarget call type here, error " @@ -2783,8 +2842,6 @@ genCCall32' dflags target dest_regs args = do ) setDeltaNat delta0 - platform <- getPlatform - let -- assign the results, if necessary assign_code [] = nilOL @@ -2815,198 +2872,24 @@ genCCall32' dflags target dest_regs args = do w = typeWidth ty b = widthInBytes w r_dest_hi = getHiVRegFromLo r_dest - r_dest = getRegisterReg platform (CmmLocal dest) + r_dest = getRegisterReg platform (CmmLocal dest) assign_code many = pprPanic "genCCall.assign_code - too many return values:" (ppr many) return (push_code `appOL` call `appOL` assign_code dest_regs) - where - -- If the size is smaller than the word, we widen things (see maybePromoteCArg) - arg_size_bytes :: CmmType -> Int - arg_size_bytes ty = max (widthInBytes (typeWidth ty)) (widthInBytes (wordWidth dflags)) - - roundTo a x | x `mod` a == 0 = x - | otherwise = x + a - (x `mod` a) - - push_arg :: CmmActual {-current argument-} - -> NatM InstrBlock -- code - - push_arg arg -- we don't need the hints on x86 - | isWord64 arg_ty = do - ChildCode64 code r_lo <- iselExpr64 arg - delta <- getDeltaNat - setDeltaNat (delta - 8) - let r_hi = getHiVRegFromLo r_lo - return ( code `appOL` - toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4), - PUSH II32 (OpReg r_lo), DELTA (delta - 8), - DELTA (delta-8)] - ) - - | isFloatType arg_ty = do - (reg, code) <- getSomeReg arg - delta <- getDeltaNat - setDeltaNat (delta-size) - return (code `appOL` - toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp), - DELTA (delta-size), - let addr = AddrBaseIndex (EABaseReg esp) - EAIndexNone - (ImmInt 0) - format = floatFormat (typeWidth arg_ty) - in - - -- assume SSE2 - MOV format (OpReg reg) (OpAddr addr) - - ] - ) - - | otherwise = do - -- Arguments can be smaller than 32-bit, but we still use @PUSH - -- II32@ - the usual calling conventions expect integers to be - -- 4-byte aligned. - ASSERT((typeWidth arg_ty) <= W32) return () - (operand, code) <- getOperand arg - delta <- getDeltaNat - setDeltaNat (delta-size) - return (code `snocOL` - PUSH II32 operand `snocOL` - DELTA (delta-size)) - - where - arg_ty = cmmExprType dflags arg - size = arg_size_bytes arg_ty -- Byte size - -genCCall64' :: DynFlags - -> ForeignTarget -- function to call +genCCall64' :: ForeignTarget -- function to call -> [CmmFormal] -- where to put the result -> [CmmActual] -- arguments (of mixed type) -> NatM InstrBlock -genCCall64' dflags target dest_regs args = do +genCCall64' target dest_regs args = do config <- getConfig let platform = ncgPlatform config -- load up the register arguments - let prom_args = map (maybePromoteCArg dflags W32) args - - (stack_args, int_regs_used, fp_regs_used, load_args_code, assign_args_code) - <- - if platformOS platform == OSMinGW32 - then load_args_win prom_args [] [] (allArgRegs platform) nilOL - else do - (stack_args, aregs, fregs, load_args_code, assign_args_code) - <- load_args prom_args (allIntArgRegs platform) - (allFPArgRegs platform) - nilOL nilOL - let used_regs rs as = reverse (drop (length rs) (reverse as)) - fregs_used = used_regs fregs (allFPArgRegs platform) - aregs_used = used_regs aregs (allIntArgRegs platform) - return (stack_args, aregs_used, fregs_used, load_args_code - , assign_args_code) - - let - arg_regs_used = int_regs_used ++ fp_regs_used - arg_regs = [eax] ++ arg_regs_used - -- for annotating the call instruction with - sse_regs = length fp_regs_used - arg_stack_slots = if platformOS platform == OSMinGW32 - then length stack_args + length (allArgRegs platform) - else length stack_args - tot_arg_size = arg_size * arg_stack_slots - - - -- Align stack to 16n for calls, assuming a starting stack - -- alignment of 16n - word_size on procedure entry. Which we - -- maintain. See Note [rts/StgCRun.c : Stack Alignment on X86] - let word_size = platformWordSizeInBytes platform - (real_size, adjust_rsp) <- - if (tot_arg_size + word_size) `rem` 16 == 0 - then return (tot_arg_size, nilOL) - else do -- we need to adjust... - delta <- getDeltaNat - setDeltaNat (delta - word_size) - return (tot_arg_size + word_size, toOL [ - SUB II64 (OpImm (ImmInt word_size)) (OpReg rsp), - DELTA (delta - word_size) ]) - - -- push the stack args, right to left - push_code <- push_args (reverse stack_args) nilOL - -- On Win64, we also have to leave stack space for the arguments - -- that we are passing in registers - lss_code <- if platformOS platform == OSMinGW32 - then leaveStackSpace (length (allArgRegs platform)) - else return nilOL - delta <- getDeltaNat - - -- deal with static vs dynamic call targets - (callinsns,_cconv) <- - case target of - ForeignTarget (CmmLit (CmmLabel lbl)) conv - -> -- ToDo: stdcall arg sizes - return (unitOL (CALL (Left fn_imm) arg_regs), conv) - where fn_imm = ImmCLbl lbl - ForeignTarget expr conv - -> do (dyn_r, dyn_c) <- getSomeReg expr - return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv) - PrimTarget _ - -> panic $ "genCCall: Can't handle PrimTarget call type here, error " - ++ "probably because too many return values." - - let - -- The x86_64 ABI requires us to set %al to the number of SSE2 - -- registers that contain arguments, if the called routine - -- is a varargs function. We don't know whether it's a - -- varargs function or not, so we have to assume it is. - -- - -- It's not safe to omit this assignment, even if the number - -- of SSE2 regs in use is zero. If %al is larger than 8 - -- on entry to a varargs function, seg faults ensue. - assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax)) - - let call = callinsns `appOL` - toOL ( - -- Deallocate parameters after call for ccall; - -- stdcall has callee do it, but is not supported on - -- x86_64 target (see #3336) - (if real_size==0 then [] else - [ADD (intFormat (ncgWordWidth config)) (OpImm (ImmInt real_size)) (OpReg esp)]) - ++ - [DELTA (delta + real_size)] - ) - setDeltaNat (delta + real_size) - - let - -- assign the results, if necessary - assign_code [] = nilOL - assign_code [dest] = - case typeWidth rep of - W32 | isFloatType rep -> unitOL (MOV (floatFormat W32) - (OpReg xmm0) - (OpReg r_dest)) - W64 | isFloatType rep -> unitOL (MOV (floatFormat W64) - (OpReg xmm0) - (OpReg r_dest)) - _ -> unitOL (MOV (cmmTypeFormat rep) (OpReg rax) (OpReg r_dest)) - where - rep = localRegType dest - r_dest = getRegisterReg platform (CmmLocal dest) - assign_code _many = panic "genCCall.assign_code many" + let prom_args = map (maybePromoteCArg platform W32) args - return (adjust_rsp `appOL` - push_code `appOL` - load_args_code `appOL` - assign_args_code `appOL` - lss_code `appOL` - assign_eax sse_regs `appOL` - call `appOL` - assign_code dest_regs) - - where arg_size = 8 -- always, at the mo - - - load_args :: [CmmExpr] + let load_args :: [CmmExpr] -> [Reg] -- int regs avail for args -> [Reg] -- FP regs avail for args -> InstrBlock -- code computing args @@ -3064,7 +2947,7 @@ genCCall64' dflags target dest_regs args = do acode' = acode `snocOL` reg2reg arg_fmt tmp r return (code',acode') - arg_rep = cmmExprType dflags arg + arg_rep = cmmExprType platform arg arg_fmt = cmmTypeFormat arg_rep load_args_win :: [CmmExpr] @@ -3095,7 +2978,9 @@ genCCall64' dflags target dest_regs args = do load_args_win rest (ireg : usedInt) usedFP regs (code `appOL` arg_code ireg) where - arg_rep = cmmExprType dflags arg + arg_rep = cmmExprType platform arg + + arg_size = 8 -- always, at the mo push_args [] code = return code push_args (arg:rest) code @@ -3104,9 +2989,9 @@ genCCall64' dflags target dest_regs args = do delta <- getDeltaNat setDeltaNat (delta-arg_size) let code' = code `appOL` arg_code `appOL` toOL [ - SUB (intFormat (wordWidth dflags)) (OpImm (ImmInt arg_size)) (OpReg rsp), + SUB (intFormat (wordWidth platform)) (OpImm (ImmInt arg_size)) (OpReg rsp), DELTA (delta-arg_size), - MOV (floatFormat width) (OpReg arg_reg) (OpAddr (spRel (targetPlatform dflags) 0))] + MOV (floatFormat width) (OpReg arg_reg) (OpAddr (spRel platform 0))] push_args rest code' | otherwise = do @@ -3122,22 +3007,135 @@ genCCall64' dflags target dest_regs args = do DELTA (delta-arg_size)] push_args rest code' where - arg_rep = cmmExprType dflags arg + arg_rep = cmmExprType platform arg width = typeWidth arg_rep leaveStackSpace n = do delta <- getDeltaNat setDeltaNat (delta - n * arg_size) return $ toOL [ - SUB II64 (OpImm (ImmInt (n * wORD_SIZE dflags))) (OpReg rsp), + SUB II64 (OpImm (ImmInt (n * platformWordSizeInBytes platform))) (OpReg rsp), DELTA (delta - n * arg_size)] -maybePromoteCArg :: DynFlags -> Width -> CmmExpr -> CmmExpr -maybePromoteCArg dflags wto arg + (stack_args, int_regs_used, fp_regs_used, load_args_code, assign_args_code) + <- + if platformOS platform == OSMinGW32 + then load_args_win prom_args [] [] (allArgRegs platform) nilOL + else do + (stack_args, aregs, fregs, load_args_code, assign_args_code) + <- load_args prom_args (allIntArgRegs platform) + (allFPArgRegs platform) + nilOL nilOL + let used_regs rs as = reverse (drop (length rs) (reverse as)) + fregs_used = used_regs fregs (allFPArgRegs platform) + aregs_used = used_regs aregs (allIntArgRegs platform) + return (stack_args, aregs_used, fregs_used, load_args_code + , assign_args_code) + + let + arg_regs_used = int_regs_used ++ fp_regs_used + arg_regs = [eax] ++ arg_regs_used + -- for annotating the call instruction with + sse_regs = length fp_regs_used + arg_stack_slots = if platformOS platform == OSMinGW32 + then length stack_args + length (allArgRegs platform) + else length stack_args + tot_arg_size = arg_size * arg_stack_slots + + + -- Align stack to 16n for calls, assuming a starting stack + -- alignment of 16n - word_size on procedure entry. Which we + -- maintain. See Note [rts/StgCRun.c : Stack Alignment on X86] + let word_size = platformWordSizeInBytes (ncgPlatform config) + (real_size, adjust_rsp) <- + if (tot_arg_size + word_size) `rem` 16 == 0 + then return (tot_arg_size, nilOL) + else do -- we need to adjust... + delta <- getDeltaNat + setDeltaNat (delta - word_size) + return (tot_arg_size + word_size, toOL [ + SUB II64 (OpImm (ImmInt word_size)) (OpReg rsp), + DELTA (delta - word_size) ]) + + -- push the stack args, right to left + push_code <- push_args (reverse stack_args) nilOL + -- On Win64, we also have to leave stack space for the arguments + -- that we are passing in registers + lss_code <- if platformOS platform == OSMinGW32 + then leaveStackSpace (length (allArgRegs platform)) + else return nilOL + delta <- getDeltaNat + + -- deal with static vs dynamic call targets + (callinsns,_cconv) <- + case target of + ForeignTarget (CmmLit (CmmLabel lbl)) conv + -> -- ToDo: stdcall arg sizes + return (unitOL (CALL (Left fn_imm) arg_regs), conv) + where fn_imm = ImmCLbl lbl + ForeignTarget expr conv + -> do (dyn_r, dyn_c) <- getSomeReg expr + return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv) + PrimTarget _ + -> panic $ "genCCall: Can't handle PrimTarget call type here, error " + ++ "probably because too many return values." + + let + -- The x86_64 ABI requires us to set %al to the number of SSE2 + -- registers that contain arguments, if the called routine + -- is a varargs function. We don't know whether it's a + -- varargs function or not, so we have to assume it is. + -- + -- It's not safe to omit this assignment, even if the number + -- of SSE2 regs in use is zero. If %al is larger than 8 + -- on entry to a varargs function, seg faults ensue. + assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax)) + + let call = callinsns `appOL` + toOL ( + -- Deallocate parameters after call for ccall; + -- stdcall has callee do it, but is not supported on + -- x86_64 target (see #3336) + (if real_size==0 then [] else + [ADD (intFormat (ncgWordWidth config)) (OpImm (ImmInt real_size)) (OpReg esp)]) + ++ + [DELTA (delta + real_size)] + ) + setDeltaNat (delta + real_size) + + let + -- assign the results, if necessary + assign_code [] = nilOL + assign_code [dest] = + case typeWidth rep of + W32 | isFloatType rep -> unitOL (MOV (floatFormat W32) + (OpReg xmm0) + (OpReg r_dest)) + W64 | isFloatType rep -> unitOL (MOV (floatFormat W64) + (OpReg xmm0) + (OpReg r_dest)) + _ -> unitOL (MOV (cmmTypeFormat rep) (OpReg rax) (OpReg r_dest)) + where + rep = localRegType dest + r_dest = getRegisterReg platform (CmmLocal dest) + assign_code _many = panic "genCCall.assign_code many" + + return (adjust_rsp `appOL` + push_code `appOL` + load_args_code `appOL` + assign_args_code `appOL` + lss_code `appOL` + assign_eax sse_regs `appOL` + call `appOL` + assign_code dest_regs) + + +maybePromoteCArg :: Platform -> Width -> CmmExpr -> CmmExpr +maybePromoteCArg platform wto arg | wfrom < wto = CmmMachOp (MO_UU_Conv wfrom wto) [arg] | otherwise = arg where - wfrom = cmmExprWidth dflags arg + wfrom = cmmExprWidth platform arg outOfLineCmmOp :: BlockId -> CallishMachOp -> Maybe CmmFormal -> [CmmActual] -> NatM InstrBlock @@ -3257,7 +3255,7 @@ genSwitch expr targets = do let platform = ncgPlatform config if ncgPIC config then do - (reg,e_code) <- getNonClobberedReg (cmmOffset dflags expr offset) + (reg,e_code) <- getNonClobberedReg (cmmOffset platform expr offset) -- getNonClobberedReg because it needs to survive across t_code lbl <- getNewLabelNat let is32bit = target32Bit platform @@ -3298,7 +3296,7 @@ genSwitch expr targets = do JMP_TBL (OpReg tableReg) ids rosection lbl ] else do - (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset) + (reg,e_code) <- getSomeReg (cmmOffset platform expr offset) lbl <- getNewLabelNat let op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg (platformWordSizeInBytes platform)) (ImmCLbl lbl)) code = e_code `appOL` toOL [ |