diff options
Diffstat (limited to 'compiler/GHC/CmmToAsm/AArch64/CodeGen.hs')
-rw-r--r-- | compiler/GHC/CmmToAsm/AArch64/CodeGen.hs | 1358 |
1 files changed, 1358 insertions, 0 deletions
diff --git a/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs b/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs new file mode 100644 index 0000000000..b0984070fc --- /dev/null +++ b/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs @@ -0,0 +1,1358 @@ +{-# language GADTs #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE BinaryLiterals #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE NumericUnderscores #-} +module GHC.CmmToAsm.AArch64.CodeGen ( + cmmTopCodeGen + , generateJumpTableForInstr +) + +where + +-- NCG stuff: +import GHC.Prelude hiding (EQ) + +import GHC.Platform.Regs +import GHC.CmmToAsm.AArch64.Instr +import GHC.CmmToAsm.AArch64.Regs +import GHC.CmmToAsm.AArch64.Cond + +import GHC.CmmToAsm.CPrim +import GHC.Cmm.DebugBlock +import GHC.CmmToAsm.Monad + ( NatM, getNewRegNat + , getPicBaseMaybeNat, getPlatform, getConfig + , getDebugBlock, getFileId + ) +-- import GHC.CmmToAsm.Instr +import GHC.CmmToAsm.PIC +import GHC.CmmToAsm.Format +import GHC.CmmToAsm.Config +import GHC.CmmToAsm.Types +import GHC.Platform.Reg +import GHC.Platform + +-- Our intermediate code: +import GHC.Cmm.BlockId +import GHC.Cmm +import GHC.Cmm.Utils +import GHC.Cmm.Switch +import GHC.Cmm.CLabel +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Graph +import GHC.Types.Tickish ( GenTickish(..) ) +import GHC.Types.SrcLoc ( srcSpanFile, srcSpanStartLine, srcSpanStartCol ) + +-- The rest: +import GHC.Data.OrdList +import GHC.Utils.Outputable + +import Control.Monad ( mapAndUnzipM, when, foldM ) +import Data.Word +import Data.Maybe +import GHC.Float + +import GHC.Types.Basic +import GHC.Types.ForeignCall +import GHC.Data.FastString +import GHC.Utils.Misc +import GHC.Utils.Panic + +-- Note [General layout of an NCG] +-- @cmmTopCodeGen@ will be our main entry point to code gen. Here we'll get +-- @RawCmmDecl@; see GHC.Cmm +-- +-- RawCmmDecl = GenCmmDecl RawCmmStatics (LabelMap RawCmmStatics) CmmGraph +-- +-- GenCmmDecl d h g = CmmProc h CLabel [GlobalReg] g +-- | CmmData Section d +-- +-- As a result we want to transform this to a list of @NatCmmDecl@, which is +-- defined @GHC.CmmToAsm.Instr@ as +-- +-- type NatCmmDecl statics instr +-- = GenCmmDecl statics (LabelMap RawCmmStatics) (ListGraph instr) +-- +-- Thus well' turn +-- GenCmmDecl RawCmmStatics (LabelMap RawCmmStatics) CmmGraph +-- into +-- [GenCmmDecl RawCmmStatics (LabelMap RawCmmStatics) (ListGraph Instr)] +-- +-- where @CmmGraph@ is +-- +-- type CmmGraph = GenCmmGraph CmmNode +-- data GenCmmGraph n = CmmGraph { g_entry :: BlockId, g_graph :: Graph n C C } +-- type CmmBlock = Block CmmNode C C +-- +-- and @ListGraph Instr@ is +-- +-- newtype ListGraph i = ListGraph [GenBasicBlock i] +-- data GenBasicBlock i = BasicBlock BlockId [i] + +cmmTopCodeGen + :: RawCmmDecl + -> NatM [NatCmmDecl RawCmmStatics Instr] + +-- Thus we'll have to deal with either CmmProc ... +cmmTopCodeGen _cmm@(CmmProc info lab live graph) = do + -- do + -- traceM $ "-- -------------------------- cmmTopGen (CmmProc) -------------------------- --\n" + -- ++ showSDocUnsafe (ppr cmm) + + let blocks = toBlockListEntryFirst graph + (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks + picBaseMb <- getPicBaseMaybeNat + + let proc = CmmProc info lab live (ListGraph $ concat nat_blocks) + tops = proc : concat statics + + case picBaseMb of + Just _picBase -> panic "AArch64.cmmTopCodeGen: picBase not implemented" + Nothing -> return tops + +-- ... or CmmData. +cmmTopCodeGen _cmm@(CmmData sec dat) = do + -- do + -- traceM $ "-- -------------------------- cmmTopGen (CmmData) -------------------------- --\n" + -- ++ showSDocUnsafe (ppr cmm) + return [CmmData sec dat] -- no translation, we just use CmmStatic + +basicBlockCodeGen + :: Block CmmNode C C + -> NatM ( [NatBasicBlock Instr] + , [NatCmmDecl RawCmmStatics Instr]) + +basicBlockCodeGen block = do + config <- getConfig + -- do + -- traceM $ "-- --------------------------- basicBlockCodeGen --------------------------- --\n" + -- ++ showSDocUnsafe (ppr block) + let (_, nodes, tail) = blockSplit block + id = entryLabel block + stmts = blockToList nodes + + header_comment_instr = unitOL $ MULTILINE_COMMENT ( + text "-- --------------------------- basicBlockCodeGen --------------------------- --\n" + $+$ pdoc (ncgPlatform config) block + ) + -- Generate location directive + dbg <- getDebugBlock (entryLabel block) + loc_instrs <- case dblSourceTick =<< dbg of + Just (SourceNote span name) + -> do fileId <- getFileId (srcSpanFile span) + let line = srcSpanStartLine span; col = srcSpanStartCol span + return $ unitOL $ LOCATION fileId line col name + _ -> return nilOL + (mid_instrs,mid_bid) <- stmtsToInstrs id stmts + (!tail_instrs,_) <- stmtToInstrs mid_bid tail + let instrs = header_comment_instr `appOL` loc_instrs `appOL` mid_instrs `appOL` tail_instrs + -- TODO: Then x86 backend run @verifyBasicBlock@ here and inserts + -- unwinding info. See Ticket 19913 + -- code generation may introduce new basic block boundaries, which + -- are indicated by the NEWBLOCK instruction. We must split up the + -- instruction stream into basic blocks again. Also, we extract + -- LDATAs here too. + let + (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs + + mkBlocks (NEWBLOCK id) (instrs,blocks,statics) + = ([], BasicBlock id instrs : blocks, statics) + mkBlocks (LDATA sec dat) (instrs,blocks,statics) + = (instrs, blocks, CmmData sec dat:statics) + mkBlocks instr (instrs,blocks,statics) + = (instr:instrs, blocks, statics) + return (BasicBlock id top : other_blocks, statics) + + +-- ----------------------------------------------------------------------------- +-- | Utilities +ann :: SDoc -> Instr -> Instr +ann doc instr {- | debugIsOn -} = ANN doc instr +-- ann _ instr = instr +{-# INLINE ann #-} + +-- Using pprExpr will hide the AST, @ANN@ will end up in the assembly with +-- -dppr-debug. The idea is that we can trivially see how a cmm expression +-- ended up producing the assmebly we see. By having the verbatim AST printed +-- we can simply check the patterns that were matched to arrive at the assmebly +-- we generated. +-- +-- pprExpr will hide a lot of noise of the underlying data structure and print +-- the expression into something that can be easily read by a human. However +-- going back to the exact CmmExpr representation can be labourous and adds +-- indirections to find the matches that lead to the assembly. +-- +-- An improvement oculd be to have +-- +-- (pprExpr genericPlatform e) <> parens (text. show e) +-- +-- to have the best of both worlds. +-- +-- Note: debugIsOn is too restrictive, it only works for debug compilers. +-- However, we do not only want to inspect this for debug compilers. Ideally +-- we'd have a check for -dppr-debug here already, such that we don't even +-- generate the ANN expressions. However, as they are lazy, they shouldn't be +-- forced until we actually force them, and without -dppr-debug they should +-- never end up being forced. +annExpr :: CmmExpr -> Instr -> Instr +annExpr e instr {- | debugIsOn -} = ANN (text . show $ e) instr +-- annExpr e instr {- | debugIsOn -} = ANN (pprExpr genericPlatform e) instr +-- annExpr _ instr = instr +{-# INLINE annExpr #-} + +-- ----------------------------------------------------------------------------- +-- Generating a table-branch + +-- TODO jump tables would be a lot faster, but we'll use bare bones for now. +-- this is usually done by sticking the jump table ids into an instruction +-- and then have the @generateJumpTableForInstr@ callback produce the jump +-- table as a static. +-- +-- See Ticket 19912 +-- +-- data SwitchTargets = +-- SwitchTargets +-- Bool -- Signed values +-- (Integer, Integer) -- Range +-- (Maybe Label) -- Default value +-- (M.Map Integer Label) -- The branches +-- +-- Non Jumptable plan: +-- xE <- expr +-- +genSwitch :: CmmExpr -> SwitchTargets -> NatM InstrBlock +genSwitch expr targets = do -- pprPanic "genSwitch" (ppr expr) + (reg, format, code) <- getSomeReg expr + let w = formatToWidth format + let mkbranch acc (key, bid) = do + (keyReg, _format, code) <- getSomeReg (CmmLit (CmmInt key w)) + return $ code `appOL` + toOL [ CMP (OpReg w reg) (OpReg w keyReg) + , BCOND EQ (TBlock bid) + ] `appOL` acc + def_code = case switchTargetsDefault targets of + Just bid -> unitOL (B (TBlock bid)) + Nothing -> nilOL + + switch_code <- foldM mkbranch nilOL (switchTargetsCases targets) + return $ code `appOL` switch_code `appOL` def_code + +-- We don't do jump tables for now, see Ticket 19912 +generateJumpTableForInstr :: NCGConfig -> Instr + -> Maybe (NatCmmDecl RawCmmStatics Instr) +generateJumpTableForInstr _ _ = Nothing + +-- ----------------------------------------------------------------------------- +-- Top-level of the instruction selector + +-- See Note [Keeping track of the current block] for why +-- we pass the BlockId. +stmtsToInstrs :: BlockId -- ^ Basic block these statement will start to be placed in. + -> [CmmNode O O] -- ^ Cmm Statement + -> NatM (InstrBlock, BlockId) -- ^ Resulting instruction +stmtsToInstrs bid stmts = + go bid stmts nilOL + where + go bid [] instrs = return (instrs,bid) + go bid (s:stmts) instrs = do + (instrs',bid') <- stmtToInstrs bid s + -- If the statement introduced a new block, we use that one + let !newBid = fromMaybe bid bid' + go newBid stmts (instrs `appOL` instrs') + +-- | `bid` refers to the current block and is used to update the CFG +-- if new blocks are inserted in the control flow. +-- See Note [Keeping track of the current block] for more details. +stmtToInstrs :: BlockId -- ^ Basic block this statement will start to be placed in. + -> CmmNode e x + -> NatM (InstrBlock, Maybe BlockId) + -- ^ Instructions, and bid of new block if successive + -- statements are placed in a different basic block. +stmtToInstrs bid stmt = do + -- traceM $ "-- -------------------------- stmtToInstrs -------------------------- --\n" + -- ++ showSDocUnsafe (ppr stmt) + platform <- getPlatform + case stmt of + CmmUnsafeForeignCall target result_regs args + -> genCCall target result_regs args bid + + _ -> (,Nothing) <$> case stmt of + CmmComment s -> return (unitOL (COMMENT (ftext s))) + CmmTick {} -> return nilOL + + CmmAssign reg src + | isFloatType ty -> assignReg_FltCode format reg src + | otherwise -> assignReg_IntCode format reg src + where ty = cmmRegType platform reg + format = cmmTypeFormat ty + + CmmStore addr src + | isFloatType ty -> assignMem_FltCode format addr src + | otherwise -> assignMem_IntCode format addr src + where ty = cmmExprType platform src + format = cmmTypeFormat ty + + CmmBranch id -> genBranch id + + --We try to arrange blocks such that the likely branch is the fallthrough + --in GHC.Cmm.ContFlowOpt. So we can assume the condition is likely false here. + CmmCondBranch arg true false _prediction -> + genCondBranch bid true false arg + + CmmSwitch arg ids -> genSwitch arg ids + + CmmCall { cml_target = arg } -> genJump arg + + CmmUnwind _regs -> return nilOL + + _ -> pprPanic "stmtToInstrs: statement should have been cps'd away" (pdoc platform stmt) + +-------------------------------------------------------------------------------- +-- | 'InstrBlock's are the insn sequences generated by the insn selectors. +-- They are really trees of insns to facilitate fast appending, where a +-- left-to-right traversal yields the insns in the correct order. +-- +type InstrBlock + = OrdList Instr + +-- | Register's passed up the tree. If the stix code forces the register +-- to live in a pre-decided machine register, it comes out as @Fixed@; +-- otherwise, it comes out as @Any@, and the parent can decide which +-- register to put it in. +-- +data Register + = Fixed Format Reg InstrBlock + | Any Format (Reg -> InstrBlock) + +-- | Sometimes we need to change the Format of a register. Primarily during +-- conversion. +swizzleRegisterRep :: Format -> Register -> Register +swizzleRegisterRep format (Fixed _ reg code) = Fixed format reg code +swizzleRegisterRep format (Any _ codefn) = Any format codefn + +-- | Grab the Reg for a CmmReg +getRegisterReg :: Platform -> CmmReg -> Reg + +getRegisterReg _ (CmmLocal (LocalReg u pk)) + = RegVirtual $ mkVirtualReg u (cmmTypeFormat pk) + +getRegisterReg platform (CmmGlobal mid) + = case globalRegMaybe platform mid of + Just reg -> RegReal reg + Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid) + -- By this stage, the only MagicIds remaining should be the + -- ones which map to a real machine register on this + -- platform. Hence if it's not mapped to a registers something + -- went wrong earlier in the pipeline. +-- | Convert a BlockId to some CmmStatic data +-- TODO: Add JumpTable Logic, see Ticket 19912 +-- jumpTableEntry :: NCGConfig -> Maybe BlockId -> CmmStatic +-- jumpTableEntry config Nothing = CmmStaticLit (CmmInt 0 (ncgWordWidth config)) +-- jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel) +-- where blockLabel = blockLbl blockid + +-- ----------------------------------------------------------------------------- +-- General things for putting together code sequences + +-- | The dual to getAnyReg: compute an expression into a register, but +-- we don't mind which one it is. +getSomeReg :: CmmExpr -> NatM (Reg, Format, InstrBlock) +getSomeReg expr = do + r <- getRegister expr + case r of + Any rep code -> do + tmp <- getNewRegNat rep + return (tmp, rep, code tmp) + Fixed rep reg code -> + return (reg, rep, code) + +-- TODO OPT: we might be able give getRegister +-- a hint, what kind of register we want. +getFloatReg :: HasCallStack => CmmExpr -> NatM (Reg, Format, InstrBlock) +getFloatReg expr = do + r <- getRegister expr + case r of + Any rep code | isFloatFormat rep -> do + tmp <- getNewRegNat rep + return (tmp, rep, code tmp) + Any II32 code -> do + tmp <- getNewRegNat FF32 + return (tmp, FF32, code tmp) + Any II64 code -> do + tmp <- getNewRegNat FF64 + return (tmp, FF64, code tmp) + Any _w _code -> do + config <- getConfig + pprPanic "can't do getFloatReg on" (pdoc (ncgPlatform config) expr) + -- can't do much for fixed. + Fixed rep reg code -> + return (reg, rep, code) + +-- TODO: TODO, bounds. We can't put any immediate +-- value in. They are constrained. +-- See Ticket 19911 +litToImm' :: CmmLit -> NatM (Operand, InstrBlock) +litToImm' lit = return (OpImm (litToImm lit), nilOL) + + +getRegister :: CmmExpr -> NatM Register +getRegister e = do + config <- getConfig + getRegister' config (ncgPlatform config) e + +-- Note [Handling PIC on AArch64] +-- AArch64 does not have a special PIC register, the general approach is to +-- simply go through the GOT, and there is assembly support for this: +-- +-- // Load the address of 'sym' from the GOT using ADRP and LDR (used for +-- // position-independent code on AArch64): +-- adrp x0, #:got:sym +-- ldr x0, [x0, #:got_lo12:sym] +-- +-- See also: https://developer.arm.com/documentation/dui0774/i/armclang-integrated-assembler-directives/assembly-expressions +-- +-- CmmGlobal @PicBaseReg@'s are generated in @GHC.CmmToAsm.PIC@ in the +-- @cmmMakePicReference@. This is in turn called from @cmmMakeDynamicReference@ +-- also in @Cmm.CmmToAsm.PIC@ from where it is also exported. There are two +-- callsites for this. One is in this module to produce the @target@ in @genCCall@ +-- the other is in @GHC.CmmToAsm@ in @cmmExprNative@. +-- +-- Conceptually we do not want any special PicBaseReg to be used on AArch64. If +-- we want to distinguish between symbol loading, we need to address this through +-- the way we load it, not through a register. +-- + +getRegister' :: NCGConfig -> Platform -> CmmExpr -> NatM Register +-- OPTIMIZATION WARNING: CmmExpr rewrites +-- 1. Rewrite: Reg + (-n) => Reg - n +-- TODO: this expression souldn't even be generated to begin with. +getRegister' config plat (CmmMachOp (MO_Add w0) [x, CmmLit (CmmInt i w1)]) | i < 0 + = getRegister' config plat (CmmMachOp (MO_Sub w0) [x, CmmLit (CmmInt (-i) w1)]) + +getRegister' config plat (CmmMachOp (MO_Sub w0) [x, CmmLit (CmmInt i w1)]) | i < 0 + = getRegister' config plat (CmmMachOp (MO_Add w0) [x, CmmLit (CmmInt (-i) w1)]) + + +-- Generic case. +getRegister' config plat expr + = case expr of + CmmReg (CmmGlobal PicBaseReg) + -> pprPanic "getRegisterReg-memory" (ppr $ PicBaseReg) + CmmLit lit + -> case lit of + + -- TODO handle CmmInt 0 specially, use wzr or xzr. + + CmmInt i W8 -> do + return (Any (intFormat W8) (\dst -> unitOL $ annExpr expr (MOV (OpReg W8 dst) (OpImm (ImmInteger (narrowS W8 i)))))) + CmmInt i W16 -> do + return (Any (intFormat W16) (\dst -> unitOL $ annExpr expr (MOV (OpReg W16 dst) (OpImm (ImmInteger (narrowS W16 i)))))) + + -- We need to be careful to not shorten this for negative literals. + -- Those need the upper bits set. We'd either have to explicitly sign + -- or figure out something smarter. Lowered to + -- `MOV dst XZR` + CmmInt i w | isNbitEncodeable 16 i, i >= 0 -> do + return (Any (intFormat w) (\dst -> unitOL $ annExpr expr (MOV (OpReg W16 dst) (OpImm (ImmInteger i))))) + CmmInt i w | isNbitEncodeable 32 i, i >= 0 -> do + let half0 = fromIntegral (fromIntegral i :: Word16) + half1 = fromIntegral (fromIntegral (i `shiftR` 16) :: Word16) + return (Any (intFormat w) (\dst -> toOL [ annExpr expr + $ MOV (OpReg W32 dst) (OpImm (ImmInt half0)) + , MOVK (OpReg W32 dst) (OpImmShift (ImmInt half1) SLSL 16) + ])) + -- fallback for W32 + CmmInt i W32 -> do + let half0 = fromIntegral (fromIntegral i :: Word16) + half1 = fromIntegral (fromIntegral (i `shiftR` 16) :: Word16) + return (Any (intFormat W32) (\dst -> toOL [ annExpr expr + $ MOV (OpReg W32 dst) (OpImm (ImmInt half0)) + , MOVK (OpReg W32 dst) (OpImmShift (ImmInt half1) SLSL 16) + ])) + -- anything else + CmmInt i W64 -> do + let half0 = fromIntegral (fromIntegral i :: Word16) + half1 = fromIntegral (fromIntegral (i `shiftR` 16) :: Word16) + half2 = fromIntegral (fromIntegral (i `shiftR` 32) :: Word16) + half3 = fromIntegral (fromIntegral (i `shiftR` 48) :: Word16) + return (Any (intFormat W64) (\dst -> toOL [ annExpr expr + $ MOV (OpReg W64 dst) (OpImm (ImmInt half0)) + , MOVK (OpReg W64 dst) (OpImmShift (ImmInt half1) SLSL 16) + , MOVK (OpReg W64 dst) (OpImmShift (ImmInt half2) SLSL 32) + , MOVK (OpReg W64 dst) (OpImmShift (ImmInt half3) SLSL 48) + ])) + CmmInt _i rep -> do + (op, imm_code) <- litToImm' lit + return (Any (intFormat rep) (\dst -> imm_code `snocOL` annExpr expr (MOV (OpReg rep dst) op))) + + -- floatToBytes (fromRational f) + CmmFloat 0 w -> do + (op, imm_code) <- litToImm' lit + return (Any (floatFormat w) (\dst -> imm_code `snocOL` annExpr expr (MOV (OpReg w dst) op))) + + CmmFloat _f W8 -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for bytes" (pdoc plat expr) + CmmFloat _f W16 -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for halfs" (pdoc plat expr) + CmmFloat f W32 -> do + let word = castFloatToWord32 (fromRational f) :: Word32 + half0 = fromIntegral (fromIntegral word :: Word16) + half1 = fromIntegral (fromIntegral (word `shiftR` 16) :: Word16) + tmp <- getNewRegNat (intFormat W32) + return (Any (floatFormat W32) (\dst -> toOL [ annExpr expr + $ MOV (OpReg W32 tmp) (OpImm (ImmInt half0)) + , MOVK (OpReg W32 tmp) (OpImmShift (ImmInt half1) SLSL 16) + , MOV (OpReg W32 dst) (OpReg W32 tmp) + ])) + CmmFloat f W64 -> do + let word = castDoubleToWord64 (fromRational f) :: Word64 + half0 = fromIntegral (fromIntegral word :: Word16) + half1 = fromIntegral (fromIntegral (word `shiftR` 16) :: Word16) + half2 = fromIntegral (fromIntegral (word `shiftR` 32) :: Word16) + half3 = fromIntegral (fromIntegral (word `shiftR` 48) :: Word16) + tmp <- getNewRegNat (intFormat W64) + return (Any (floatFormat W64) (\dst -> toOL [ annExpr expr + $ MOV (OpReg W64 tmp) (OpImm (ImmInt half0)) + , MOVK (OpReg W64 tmp) (OpImmShift (ImmInt half1) SLSL 16) + , MOVK (OpReg W64 tmp) (OpImmShift (ImmInt half2) SLSL 32) + , MOVK (OpReg W64 tmp) (OpImmShift (ImmInt half3) SLSL 48) + , MOV (OpReg W64 dst) (OpReg W64 tmp) + ])) + CmmFloat _f _w -> pprPanic "getRegister' (CmmLit:CmmFloat), unsupported float lit" (pdoc plat expr) + CmmVec _ -> pprPanic "getRegister' (CmmLit:CmmVec): " (pdoc plat expr) + CmmLabel _lbl -> do + (op, imm_code) <- litToImm' lit + let rep = cmmLitType plat lit + format = cmmTypeFormat rep + return (Any format (\dst -> imm_code `snocOL` (annExpr expr $ LDR format (OpReg (formatToWidth format) dst) op))) + + CmmLabelOff _lbl off | isNbitEncodeable 12 (fromIntegral off) -> do + (op, imm_code) <- litToImm' lit + let rep = cmmLitType plat lit + format = cmmTypeFormat rep + -- width = typeWidth rep + return (Any format (\dst -> imm_code `snocOL` LDR format (OpReg (formatToWidth format) dst) op)) + + CmmLabelOff lbl off -> do + (op, imm_code) <- litToImm' (CmmLabel lbl) + let rep = cmmLitType plat lit + format = cmmTypeFormat rep + width = typeWidth rep + (off_r, _off_format, off_code) <- getSomeReg $ CmmLit (CmmInt (fromIntegral off) width) + return (Any format (\dst -> imm_code `appOL` off_code `snocOL` LDR format (OpReg (formatToWidth format) dst) op `snocOL` ADD (OpReg width dst) (OpReg width dst) (OpReg width off_r))) + + CmmLabelDiffOff _ _ _ _ -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr) + CmmBlock _ -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr) + CmmHighStackMark -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr) + CmmLoad mem rep -> do + Amode addr addr_code <- getAmode plat mem + let format = cmmTypeFormat rep + return (Any format (\dst -> addr_code `snocOL` LDR format (OpReg (formatToWidth format) dst) (OpAddr addr))) + CmmStackSlot _ _ + -> pprPanic "getRegister' (CmmStackSlot): " (pdoc plat expr) + CmmReg reg + -> return (Fixed (cmmTypeFormat (cmmRegType plat reg)) + (getRegisterReg plat reg) + nilOL) + CmmRegOff reg off | isNbitEncodeable 12 (fromIntegral off) -> do + getRegister' config plat $ + CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)] + where width = typeWidth (cmmRegType plat reg) + + CmmRegOff reg off -> do + (off_r, _off_format, off_code) <- getSomeReg $ CmmLit (CmmInt (fromIntegral off) width) + (reg, _format, code) <- getSomeReg $ CmmReg reg + return $ Any (intFormat width) (\dst -> off_code `appOL` code `snocOL` ADD (OpReg width dst) (OpReg width reg) (OpReg width off_r)) + where width = typeWidth (cmmRegType plat reg) + + + + -- for MachOps, see GHC.Cmm.MachOp + -- For CmmMachOp, see GHC.Cmm.Expr + CmmMachOp op [e] -> do + (reg, _format, code) <- getSomeReg e + case op of + MO_Not w -> return $ Any (intFormat w) (\dst -> code `snocOL` MVN (OpReg w dst) (OpReg w reg)) + + MO_S_Neg w -> return $ Any (intFormat w) (\dst -> code `snocOL` NEG (OpReg w dst) (OpReg w reg)) + MO_F_Neg w -> return $ Any (floatFormat w) (\dst -> code `snocOL` NEG (OpReg w dst) (OpReg w reg)) + + MO_SF_Conv from to -> return $ Any (floatFormat to) (\dst -> code `snocOL` SCVTF (OpReg to dst) (OpReg from reg)) -- (Signed ConVerT Float) + MO_FS_Conv from to -> return $ Any (intFormat to) (\dst -> code `snocOL` FCVTZS (OpReg to dst) (OpReg from reg)) -- (float convert (-> zero) signed) + + -- TODO this is very hacky + -- Note, UBFM and SBFM expect source and target register to be of the same size, so we'll use @max from to@ + -- UBFM will set the high bits to 0. SBFM will copy the sign (sign extend). + MO_UU_Conv from to -> return $ Any (intFormat to) (\dst -> code `snocOL` UBFM (OpReg (max from to) dst) (OpReg (max from to) reg) (OpImm (ImmInt 0)) (toImm (min from to))) + MO_SS_Conv from to -> return $ Any (intFormat to) (\dst -> code `snocOL` SBFM (OpReg (max from to) dst) (OpReg (max from to) reg) (OpImm (ImmInt 0)) (toImm (min from to))) + MO_FF_Conv from to -> return $ Any (floatFormat to) (\dst -> code `snocOL` FCVT (OpReg to dst) (OpReg from reg)) + + -- Conversions + MO_XX_Conv _from to -> swizzleRegisterRep (intFormat to) <$> getRegister e + + _ -> pprPanic "getRegister' (monadic CmmMachOp):" (pdoc plat expr) + where toImm W8 = (OpImm (ImmInt 7)) + toImm W16 = (OpImm (ImmInt 15)) + toImm W32 = (OpImm (ImmInt 31)) + toImm W64 = (OpImm (ImmInt 63)) + toImm W128 = (OpImm (ImmInt 127)) + toImm W256 = (OpImm (ImmInt 255)) + toImm W512 = (OpImm (ImmInt 511)) + -- Dyadic machops: + -- + -- The general idea is: + -- compute x<i> <- x + -- compute x<j> <- y + -- OP x<r>, x<i>, x<j> + -- + -- TODO: for now we'll only implement the 64bit versions. And rely on the + -- fallthrough to alert us if things go wrong! + -- OPTIMIZATION WARNING: Dyadic CmmMachOp destructuring + -- 0. TODO This should not exist! Rewrite: Reg +- 0 -> Reg + CmmMachOp (MO_Add _) [expr'@(CmmReg (CmmGlobal _r)), CmmLit (CmmInt 0 _)] -> getRegister' config plat expr' + CmmMachOp (MO_Sub _) [expr'@(CmmReg (CmmGlobal _r)), CmmLit (CmmInt 0 _)] -> getRegister' config plat expr' + -- 1. Compute Reg +/- n directly. + -- For Add/Sub we can directly encode 12bits, or 12bits lsl #12. + CmmMachOp (MO_Add w) [(CmmReg reg), CmmLit (CmmInt n _)] + | n > 0 && n < 4096 -> return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (ADD (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n)))) + -- TODO: 12bits lsl #12; e.g. lower 12 bits of n are 0; shift n >> 12, and set lsl to #12. + where w' = formatToWidth (cmmTypeFormat (cmmRegType plat reg)) + r' = getRegisterReg plat reg + CmmMachOp (MO_Sub w) [(CmmReg reg), CmmLit (CmmInt n _)] + | n > 0 && n < 4096 -> return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (SUB (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n)))) + -- TODO: 12bits lsl #12; e.g. lower 12 bits of n are 0; shift n >> 12, and set lsl to #12. + where w' = formatToWidth (cmmTypeFormat (cmmRegType plat reg)) + r' = getRegisterReg plat reg + + -- 2. Shifts. x << n, x >> n. + CmmMachOp (MO_Shl w) [x, (CmmLit (CmmInt n _))] | w == W32, 0 <= n, n < 32 -> do + (reg_x, _format_x, code_x) <- getSomeReg x + return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (LSL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))) + CmmMachOp (MO_Shl w) [x, (CmmLit (CmmInt n _))] | w == W64, 0 <= n, n < 64 -> do + (reg_x, _format_x, code_x) <- getSomeReg x + return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (LSL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))) + + CmmMachOp (MO_U_Shr w) [x, (CmmLit (CmmInt n _))] | w == W32, 0 <= n, n < 32 -> do + (reg_x, _format_x, code_x) <- getSomeReg x + return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (LSR (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))) + CmmMachOp (MO_U_Shr w) [x, (CmmLit (CmmInt n _))] | w == W64, 0 <= n, n < 64 -> do + (reg_x, _format_x, code_x) <- getSomeReg x + return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (LSR (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))) + + -- 3. Logic &&, || + CmmMachOp (MO_And w) [(CmmReg reg), CmmLit (CmmInt n _)] | isBitMaskImmediate (fromIntegral n) -> + return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (AND (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n)))) + where w' = formatToWidth (cmmTypeFormat (cmmRegType plat reg)) + r' = getRegisterReg plat reg + + CmmMachOp (MO_Or w) [(CmmReg reg), CmmLit (CmmInt n _)] | isBitMaskImmediate (fromIntegral n) -> + return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (ORR (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n)))) + where w' = formatToWidth (cmmTypeFormat (cmmRegType plat reg)) + r' = getRegisterReg plat reg + + -- Generic case. + CmmMachOp op [x, y] -> do + -- alright, so we have an operation, and two expressions. And we want to essentially do + -- ensure we get float regs + let genOp w op = do + (reg_x, format_x, code_x) <- getSomeReg x + (reg_y, format_y, code_y) <- getSomeReg y + when ((isFloatFormat format_x && isIntFormat format_y) || (isIntFormat format_x && isFloatFormat format_y)) $ pprPanic "getRegister:genOp" (text "formats don't match:" <+> text (show format_x) <+> text "/=" <+> text (show format_y)) + return $ Any format_x (\dst -> code_x `appOL` code_y `appOL` op (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y)) + + withTempIntReg w op = OpReg w <$> getNewRegNat (intFormat w) >>= op + -- withTempFloatReg w op = OpReg w <$> getNewRegNat (floatFormat w) >>= op + + intOp w op = do + -- compute x<m> <- x + -- compute x<o> <- y + -- <OP> x<n>, x<m>, x<o> + (reg_x, _format_x, code_x) <- getSomeReg x + (reg_y, _format_y, code_y) <- getSomeReg y + return $ Any (intFormat w) (\dst -> code_x `appOL` code_y `appOL` op (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y)) + floatOp w op = do + (reg_fx, _format_x, code_fx) <- getFloatReg x + (reg_fy, _format_y, code_fy) <- getFloatReg y + return $ Any (floatFormat w) (\dst -> code_fx `appOL` code_fy `appOL` op (OpReg w dst) (OpReg w reg_fx) (OpReg w reg_fy)) + -- need a special one for conditionals, as they return ints + floatCond w op = do + (reg_fx, _format_x, code_fx) <- getFloatReg x + (reg_fy, _format_y, code_fy) <- getFloatReg y + return $ Any (intFormat w) (\dst -> code_fx `appOL` code_fy `appOL` op (OpReg w dst) (OpReg w reg_fx) (OpReg w reg_fy)) + + case op of + -- Integer operations + -- Add/Sub should only be Interger Options. + -- But our Cmm parser doesn't care about types + -- and thus we end up with <float> + <float> => MO_Add <float> <float> + MO_Add w -> genOp w (\d x y -> unitOL $ annExpr expr (ADD d x y)) + MO_Sub w -> genOp w (\d x y -> unitOL $ annExpr expr (SUB d x y)) + -- 31 30 29 28 + -- .---+---+---+---+-- - - + -- | N | Z | C | V | + -- '---+---+---+---+-- - - + -- Negative + -- Zero + -- Carry + -- oVerflow + -- + -- .------+-------------------------------------+-----------------+----------. + -- | Code | Meaning | Flags | Encoding | + -- |------+-------------------------------------+-----------------+----------| + -- | EQ | Equal | Z = 1 | 0000 | + -- | NE | Not Equal | Z = 0 | 0001 | + -- | HI | Unsigned Higher | C = 1 && Z = 0 | 1000 | + -- | HS | Unsigned Higher or Same | C = 1 | 0010 | + -- | LS | Unsigned Lower or Same | C = 0 || Z = 1 | 1001 | + -- | LO | Unsigned Lower | C = 0 | 0011 | + -- | GT | Signed Greater Than | Z = 0 && N = V | 1100 | + -- | GE | Signed Greater Than or Equal | N = V | 1010 | + -- | LE | Signed Less Than or Equal | Z = 1 || N /= V | 1101 | + -- | LT | Signed Less Than | N /= V | 1011 | + -- | CS | Carry Set (Unsigned Overflow) | C = 1 | 0010 | + -- | CC | Carry Clear (No Unsigned Overflow) | C = 0 | 0011 | + -- | VS | Signed Overflow | V = 1 | 0110 | + -- | VC | No Signed Overflow | V = 0 | 0111 | + -- | MI | Minus, Negative | N = 1 | 0100 | + -- | PL | Plus, Positive or Zero (!) | N = 0 | 0101 | + -- | AL | Always | Any | 1110 | + -- | NV | Never | Any | 1111 | + --- '-------------------------------------------------------------------------' + + MO_Eq w -> intOp w (\d x y -> toOL [ CMP x y, CSET d EQ ]) + MO_Ne w -> intOp w (\d x y -> toOL [ CMP x y, CSET d NE ]) + MO_Mul w -> intOp w (\d x y -> unitOL $ MUL d x y) + + -- Signed multiply/divide + MO_S_MulMayOflo w -> intOp w (\d x y -> toOL [ MUL d x y, CSET d VS ]) + MO_S_Quot w -> intOp w (\d x y -> unitOL $ SDIV d x y) + + -- No native rem instruction. So we'll compute the following + -- Rd <- Rx / Ry | 2 <- 7 / 3 -- SDIV Rd Rx Ry + -- Rd' <- Rx - Rd * Ry | 1 <- 7 - 2 * 3 -- MSUB Rd' Rd Ry Rx + -- | '---|----------------|---' | + -- | '----------------|-------' + -- '--------------------------' + -- Note the swap in Rx and Ry. + MO_S_Rem w -> withTempIntReg w $ \t -> + intOp w (\d x y -> toOL [ SDIV t x y, MSUB d t y x ]) + + -- Unsigned multiply/divide + MO_U_MulMayOflo _w -> unsupportedP plat expr + MO_U_Quot w -> intOp w (\d x y -> unitOL $ UDIV d x y) + MO_U_Rem w -> withTempIntReg w $ \t -> + intOp w (\d x y -> toOL [ UDIV t x y, MSUB d t y x ]) + + -- Signed comparisons -- see above for the CSET discussion + MO_S_Ge w -> intOp w (\d x y -> toOL [ CMP x y, CSET d SGE ]) + MO_S_Le w -> intOp w (\d x y -> toOL [ CMP x y, CSET d SLE ]) + MO_S_Gt w -> intOp w (\d x y -> toOL [ CMP x y, CSET d SGT ]) + MO_S_Lt w -> intOp w (\d x y -> toOL [ CMP x y, CSET d SLT ]) + + -- Unsigned comparisons + MO_U_Ge w -> intOp w (\d x y -> toOL [ CMP x y, CSET d UGE ]) + MO_U_Le w -> intOp w (\d x y -> toOL [ CMP x y, CSET d ULE ]) + MO_U_Gt w -> intOp w (\d x y -> toOL [ CMP x y, CSET d UGT ]) + MO_U_Lt w -> intOp w (\d x y -> toOL [ CMP x y, CSET d ULT ]) + + -- Floating point arithmetic + MO_F_Add w -> floatOp w (\d x y -> unitOL $ ADD d x y) + MO_F_Sub w -> floatOp w (\d x y -> unitOL $ SUB d x y) + MO_F_Mul w -> floatOp w (\d x y -> unitOL $ MUL d x y) + MO_F_Quot w -> floatOp w (\d x y -> unitOL $ SDIV d x y) + + -- Floating point comparison + MO_F_Eq w -> floatCond w (\d x y -> toOL [ CMP x y, CSET d EQ ]) + MO_F_Ne w -> floatCond w (\d x y -> toOL [ CMP x y, CSET d NE ]) + + -- careful with the floating point operations. + -- SLE is effectively LE or unordered (NaN) + -- SLT is the same. ULE, and ULT will not return true for NaN. + -- This is a bit counter intutive. Don't let yourself be fooled by + -- the S/U prefix for floats, it's only meaningful for integers. + MO_F_Ge w -> floatCond w (\d x y -> toOL [ CMP x y, CSET d OGE ]) + MO_F_Le w -> floatCond w (\d x y -> toOL [ CMP x y, CSET d OLE ]) -- x <= y <=> y > x + MO_F_Gt w -> floatCond w (\d x y -> toOL [ CMP x y, CSET d OGT ]) + MO_F_Lt w -> floatCond w (\d x y -> toOL [ CMP x y, CSET d OLT ]) -- x < y <=> y >= x + + -- Bitwise operations + MO_And w -> intOp w (\d x y -> unitOL $ AND d x y) + MO_Or w -> intOp w (\d x y -> unitOL $ ORR d x y) + MO_Xor w -> intOp w (\d x y -> unitOL $ EOR d x y) + -- MO_Not W64 -> + MO_Shl w -> intOp w (\d x y -> unitOL $ LSL d x y) + MO_U_Shr w -> intOp w (\d x y -> unitOL $ LSR d x y) + MO_S_Shr w -> intOp w (\d x y -> unitOL $ ASR d x y) + + -- TODO + + op -> pprPanic "getRegister' (unhandled dyadic CmmMachOp): " $ (pprMachOp op) <+> text "in" <+> (pdoc plat expr) + CmmMachOp _op _xs + -> pprPanic "getRegister' (variadic CmmMachOp): " (pdoc plat expr) + + where + unsupportedP :: OutputableP env a => env -> a -> b + unsupportedP platform op = pprPanic "Unsupported op:" (pdoc platform op) + + isNbitEncodeable :: Int -> Integer -> Bool + isNbitEncodeable n i = let shift = n - 1 in (-1 `shiftL` shift) <= i && i < (1 `shiftL` shift) + -- This needs to check if n can be encoded as a bitmask immediate: + -- + -- See https://stackoverflow.com/questions/30904718/range-of-immediate-values-in-armv8-a64-assembly + -- + isBitMaskImmediate :: Integer -> Bool + isBitMaskImmediate i = i `elem` [0b0000_0001, 0b0000_0010, 0b0000_0100, 0b0000_1000, 0b0001_0000, 0b0010_0000, 0b0100_0000, 0b1000_0000 + ,0b0000_0011, 0b0000_0110, 0b0000_1100, 0b0001_1000, 0b0011_0000, 0b0110_0000, 0b1100_0000 + ,0b0000_0111, 0b0000_1110, 0b0001_1100, 0b0011_1000, 0b0111_0000, 0b1110_0000 + ,0b0000_1111, 0b0001_1110, 0b0011_1100, 0b0111_1000, 0b1111_0000 + ,0b0001_1111, 0b0011_1110, 0b0111_1100, 0b1111_1000 + ,0b0011_1111, 0b0111_1110, 0b1111_1100 + ,0b0111_1111, 0b1111_1110 + ,0b1111_1111] + + +-- ----------------------------------------------------------------------------- +-- The 'Amode' type: Memory addressing modes passed up the tree. +data Amode = Amode AddrMode InstrBlock + +getAmode :: Platform -> CmmExpr -> NatM Amode +-- TODO: Specialize stuff we can destructure here. + +-- OPTIMIZATION WARNING: Addressing modes. +-- Addressing options: +-- LDUR/STUR: imm9: -256 - 255 +getAmode platform (CmmRegOff reg off) | -256 <= off, off <= 255 + = return $ Amode (AddrRegImm reg' off') nilOL + where reg' = getRegisterReg platform reg + off' = ImmInt off +-- LDR/STR: imm12: if reg is 32bit: 0 -- 16380 in multiples of 4 +getAmode platform (CmmRegOff reg off) + | typeWidth (cmmRegType platform reg) == W32, 0 <= off, off <= 16380, off `mod` 4 == 0 + = return $ Amode (AddrRegImm reg' off') nilOL + where reg' = getRegisterReg platform reg + off' = ImmInt off +-- LDR/STR: imm12: if reg is 64bit: 0 -- 32760 in multiples of 8 +getAmode platform (CmmRegOff reg off) + | typeWidth (cmmRegType platform reg) == W64, 0 <= off, off <= 32760, off `mod` 8 == 0 + = return $ Amode (AddrRegImm reg' off') nilOL + where reg' = getRegisterReg platform reg + off' = ImmInt off + +-- For Stores we often see something like this: +-- CmmStore (CmmMachOp (MO_Add w) [CmmLoad expr, CmmLit (CmmInt n w')]) (expr2) +-- E.g. a CmmStoreOff really. This can be translated to `str $expr2, [$expr, #n ] +-- for `n` in range. +getAmode _platform (CmmMachOp (MO_Add _w) [expr, CmmLit (CmmInt off _w')]) + | -256 <= off, off <= 255 + = do (reg, _format, code) <- getSomeReg expr + return $ Amode (AddrRegImm reg (ImmInteger off)) code + +getAmode _platform (CmmMachOp (MO_Sub _w) [expr, CmmLit (CmmInt off _w')]) + | -256 <= -off, -off <= 255 + = do (reg, _format, code) <- getSomeReg expr + return $ Amode (AddrRegImm reg (ImmInteger (-off))) code + +-- Generic case +getAmode _platform expr + = do (reg, _format, code) <- getSomeReg expr + return $ Amode (AddrReg reg) code + +-- ----------------------------------------------------------------------------- +-- Generating assignments + +-- Assignments are really at the heart of the whole code generation +-- business. Almost all top-level nodes of any real importance are +-- assignments, which correspond to loads, stores, or register +-- transfers. If we're really lucky, some of the register transfers +-- will go away, because we can use the destination register to +-- complete the code generation for the right hand side. This only +-- fails when the right hand side is forced into a fixed register +-- (e.g. the result of a call). + +assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock +assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock + +assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock +assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock + +assignMem_IntCode rep addrE srcE + = do + (src_reg, _format, code) <- getSomeReg srcE + platform <- getPlatform + Amode addr addr_code <- getAmode platform addrE + return $ COMMENT (text "CmmStore" <+> parens (text (show addrE)) <+> parens (text (show srcE))) + `consOL` (code + `appOL` addr_code + `snocOL` STR rep (OpReg (formatToWidth rep) src_reg) (OpAddr addr)) + +assignReg_IntCode _ reg src + = do + platform <- getPlatform + let dst = getRegisterReg platform reg + r <- getRegister src + return $ case r of + Any _ code -> COMMENT (text "CmmAssign" <+> parens (text (show reg)) <+> parens (text (show src))) `consOL` code dst + Fixed format freg fcode -> COMMENT (text "CmmAssign" <+> parens (text (show reg)) <+> parens (text (show src))) `consOL` (fcode `snocOL` MOV (OpReg (formatToWidth format) dst) (OpReg (formatToWidth format) freg)) + +-- Let's treat Floating point stuff +-- as integer code for now. Opaque. +assignMem_FltCode = assignMem_IntCode +assignReg_FltCode = assignReg_IntCode + +-- ----------------------------------------------------------------------------- +-- Jumps +genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock +genJump expr@(CmmLit (CmmLabel lbl)) + = return $ unitOL (annExpr expr (J (TLabel lbl))) + +genJump expr = do + (target, _format, code) <- getSomeReg expr + return (code `appOL` unitOL (annExpr expr (J (TReg target)))) + +-- ----------------------------------------------------------------------------- +-- Unconditional branches +genBranch :: BlockId -> NatM InstrBlock +genBranch = return . toOL . mkJumpInstr + +-- ----------------------------------------------------------------------------- +-- Conditional branches +genCondJump + :: BlockId + -> CmmExpr + -> NatM InstrBlock +genCondJump bid expr = do + case expr of + -- Optimized == 0 case. + CmmMachOp (MO_Eq w) [x, CmmLit (CmmInt 0 _)] -> do + (reg_x, _format_x, code_x) <- getSomeReg x + return $ code_x `snocOL` (annExpr expr (CBZ (OpReg w reg_x) (TBlock bid))) + + -- Optimized /= 0 case. + CmmMachOp (MO_Ne w) [x, CmmLit (CmmInt 0 _)] -> do + (reg_x, _format_x, code_x) <- getSomeReg x + return $ code_x `snocOL` (annExpr expr (CBNZ (OpReg w reg_x) (TBlock bid))) + + -- Generic case. + CmmMachOp mop [x, y] -> do + + let bcond w cmp = do + -- compute both sides. + (reg_x, _format_x, code_x) <- getSomeReg x + (reg_y, _format_y, code_y) <- getSomeReg y + return $ code_x `appOL` code_y `snocOL` CMP (OpReg w reg_x) (OpReg w reg_y) `snocOL` (annExpr expr (BCOND cmp (TBlock bid))) + fbcond w cmp = do + -- ensure we get float regs + (reg_fx, _format_fx, code_fx) <- getFloatReg x + (reg_fy, _format_fy, code_fy) <- getFloatReg y + return $ code_fx `appOL` code_fy `snocOL` CMP (OpReg w reg_fx) (OpReg w reg_fy) `snocOL` (annExpr expr (BCOND cmp (TBlock bid))) + + case mop of + MO_F_Eq w -> fbcond w EQ + MO_F_Ne w -> fbcond w NE + + MO_F_Gt w -> fbcond w OGT + MO_F_Ge w -> fbcond w OGE + MO_F_Lt w -> fbcond w OLT + MO_F_Le w -> fbcond w OLE + + MO_Eq w -> bcond w EQ + MO_Ne w -> bcond w NE + + MO_S_Gt w -> bcond w SGT + MO_S_Ge w -> bcond w SGE + MO_S_Lt w -> bcond w SLT + MO_S_Le w -> bcond w SLE + MO_U_Gt w -> bcond w UGT + MO_U_Ge w -> bcond w UGE + MO_U_Lt w -> bcond w ULT + MO_U_Le w -> bcond w ULE + _ -> pprPanic "AArch64.genCondJump:case mop: " (text $ show expr) + _ -> pprPanic "AArch64.genCondJump: " (text $ show expr) + + +genCondBranch + :: BlockId -- the source of the jump + -> BlockId -- the true branch target + -> BlockId -- the false branch target + -> CmmExpr -- the condition on which to branch + -> NatM InstrBlock -- Instructions + +genCondBranch _ true false expr = do + b1 <- genCondJump true expr + b2 <- genBranch false + return (b1 `appOL` b2) + +-- ----------------------------------------------------------------------------- +-- Generating C calls + +-- Now the biggest nightmare---calls. Most of the nastiness is buried in +-- @get_arg@, which moves the arguments to the correct registers/stack +-- locations. Apart from that, the code is easy. +-- +-- As per *convention*: +-- x0-x7: (volatile) argument registers +-- x8: (volatile) indirect result register / Linux syscall no +-- x9-x15: (volatile) caller saved regs +-- x16,x17: (volatile) intra-procedure-call registers +-- x18: (volatile) platform register. don't use for portability +-- x19-x28: (non-volatile) callee save regs +-- x29: (non-volatile) frame pointer +-- x30: link register +-- x31: stack pointer / zero reg +-- +-- Thus, this is what a c function will expect. Find the arguments in x0-x7, +-- anything above that on the stack. We'll ignore c functions with more than +-- 8 arguments for now. Sorry. +-- +-- We need to make sure we preserve x9-x15, don't want to touch x16, x17. + +-- Note [PLT vs GOT relocations] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- When linking objects together, we may need to lookup foreign references. That +-- is symbolic references to functions or values in other objects. When +-- compiling the object, we can not know where those elements will end up in +-- memory (relative to the current location). Thus the use of symbols. There +-- are two types of items we are interested, code segments we want to jump to +-- and continue execution there (functions, ...), and data items we want to look +-- up (strings, numbers, ...). For functions we can use the fact that we can use +-- an intermediate jump without visibility to the programs execution. If we +-- want to jump to a function that is simply too far away to reach for the B/BL +-- instruction, we can create a small piece of code that loads the full target +-- address and jumps to that on demand. Say f wants to call g, however g is out +-- of range for a direct jump, we can create a function h in range for f, that +-- will load the address of g, and jump there. The area where we construct h +-- is called the Procedure Linking Table (PLT), we have essentially replaced +-- f -> g with f -> h -> g. This is fine for function calls. However if we +-- want to lookup values, this trick doesn't work, so we need something else. +-- We will instead reserve a slot in memory, and have a symbol pointing to that +-- slot. Now what we essentially do is, we reference that slot, and expect that +-- slot to hold the final resting address of the data we are interested in. +-- Thus what that symbol really points to is the location of the final data. +-- The block of memory where we hold all those slots is the Global Offset Table +-- (GOT). Instead of x <- $foo, we now do y <- $fooPtr, and x <- [$y]. +-- +-- For JUMP/CALLs we have 26bits (+/- 128MB), for conditional branches we only +-- have 19bits (+/- 1MB). Symbol lookups are also within +/- 1MB, thus for most +-- of the LOAD/STOREs we'd want to use adrp, and add to compute a value within +-- 4GB of the PC, and load that. For anything outside of that range, we'd have +-- to go through the GOT. +-- +-- adrp x0, <symbol> +-- add x0, :lo:<symbol> +-- +-- will compute the address of <symbol> int x0 if <symbol> is within 4GB of the +-- PC. +-- +-- If we want to get the slot in the global offset table (GOT), we can do this: +-- +-- adrp x0, #:got:<symbol> +-- ldr x0, [x0, #:got_lo12:<symbol>] +-- +-- this will compute the address anywhere in the addressable 64bit space into +-- x0, by loading the address from the GOT slot. +-- +-- To actually get the value of <symbol>, we'd need to ldr x0, x0 still, which +-- for the first case can be optimized to use ldr x0, [x0, #:lo12:<symbol>] +-- instaed of the add instruction. +-- +-- As the memory model for AArch64 for PIC is considered to be +/- 4GB, we do +-- not need to go through the GOT, unless we want to address the full address +-- range within 64bit. + +genCCall + :: ForeignTarget -- function to call + -> [CmmFormal] -- where to put the result + -> [CmmActual] -- arguments (of mixed type) + -> BlockId -- The block we are in + -> NatM (InstrBlock, Maybe BlockId) +-- TODO: Specialize where we can. +-- Generic impl +genCCall target dest_regs arg_regs bid = do + -- we want to pass arg_regs into allArgRegs + -- pprTraceM "genCCall target" (ppr target) + -- pprTraceM "genCCall formal" (ppr dest_regs) + -- pprTraceM "genCCall actual" (ppr arg_regs) + + case target of + -- The target :: ForeignTarget call can either + -- be a foreign procedure with an address expr + -- and a calling convention. + ForeignTarget expr _cconv -> do + (call_target, call_target_code) <- case expr of + -- if this is a label, let's just directly to it. This will produce the + -- correct CALL relocation for BL... + (CmmLit (CmmLabel lbl)) -> pure (TLabel lbl, nilOL) + -- ... if it's not a label--well--let's compute the expression into a + -- register and jump to that. See Note [PLT vs GOT relocations] + _ -> do (reg, _format, reg_code) <- getSomeReg expr + pure (TReg reg, reg_code) + -- compute the code and register logic for all arg_regs. + -- this will give us the format information to match on. + arg_regs' <- mapM getSomeReg arg_regs + + -- Now this is stupid. Our Cmm expressions doesn't carry the proper sizes + -- so while in Cmm we might get W64 incorrectly for an int, that is W32 in + -- STG; this thenn breaks packing of stack arguments, if we need to pack + -- for the pcs, e.g. darwinpcs. Option one would be to fix the Int type + -- in Cmm proper. Option two, which we choose here is to use extended Hint + -- information to contain the size information and use that when packing + -- arguments, spilled onto the stack. + let (_res_hints, arg_hints) = foreignTargetHints target + arg_regs'' = zipWith (\(r, f, c) h -> (r,f,h,c)) arg_regs' arg_hints + + platform <- getPlatform + let packStack = platformOS platform == OSDarwin + + (stackSpace', passRegs, passArgumentsCode) <- passArguments packStack allGpArgRegs allFpArgRegs arg_regs'' 0 [] nilOL + + -- if we pack the stack, we may need to adjust to multiple of 8byte. + -- if we don't pack the stack, it will always be multiple of 8. + let stackSpace = if stackSpace' `mod` 8 /= 0 + then 8 * (stackSpace' `div` 8 + 1) + else stackSpace' + + (returnRegs, readResultsCode) <- readResults allGpArgRegs allFpArgRegs dest_regs [] nilOL + + let moveStackDown 0 = toOL [ PUSH_STACK_FRAME + , DELTA (-16) ] + moveStackDown i | odd i = moveStackDown (i + 1) + moveStackDown i = toOL [ PUSH_STACK_FRAME + , SUB (OpReg W64 (regSingle 31)) (OpReg W64 (regSingle 31)) (OpImm (ImmInt (8 * i))) + , DELTA (-8 * i - 16) ] + moveStackUp 0 = toOL [ POP_STACK_FRAME + , DELTA 0 ] + moveStackUp i | odd i = moveStackUp (i + 1) + moveStackUp i = toOL [ ADD (OpReg W64 (regSingle 31)) (OpReg W64 (regSingle 31)) (OpImm (ImmInt (8 * i))) + , POP_STACK_FRAME + , DELTA 0 ] + + let code = call_target_code -- compute the label (possibly into a register) + `appOL` moveStackDown (stackSpace `div` 8) + `appOL` passArgumentsCode -- put the arguments into x0, ... + `appOL` (unitOL $ BL call_target passRegs returnRegs) -- branch and link. + `appOL` readResultsCode -- parse the results into registers + `appOL` moveStackUp (stackSpace `div` 8) + return (code, Nothing) + + -- or a possibly side-effecting machine operation + -- mop :: CallishMachOp (see GHC.Cmm.MachOp) + PrimTarget mop -> do + -- We'll need config to construct forien targets + case mop of + -- 64 bit float ops + MO_F64_Pwr -> mkCCall "pow" + + MO_F64_Sin -> mkCCall "sin" + MO_F64_Cos -> mkCCall "cos" + MO_F64_Tan -> mkCCall "tan" + + MO_F64_Sinh -> mkCCall "sinh" + MO_F64_Cosh -> mkCCall "cosh" + MO_F64_Tanh -> mkCCall "tanh" + + MO_F64_Asin -> mkCCall "asin" + MO_F64_Acos -> mkCCall "acos" + MO_F64_Atan -> mkCCall "atan" + + MO_F64_Asinh -> mkCCall "asinh" + MO_F64_Acosh -> mkCCall "acosh" + MO_F64_Atanh -> mkCCall "atanh" + + MO_F64_Log -> mkCCall "log" + MO_F64_Log1P -> mkCCall "log1p" + MO_F64_Exp -> mkCCall "exp" + MO_F64_ExpM1 -> mkCCall "expm1" + MO_F64_Fabs -> mkCCall "fabs" + MO_F64_Sqrt -> mkCCall "sqrt" + + -- 32 bit float ops + MO_F32_Pwr -> mkCCall "powf" + + MO_F32_Sin -> mkCCall "sinf" + MO_F32_Cos -> mkCCall "cosf" + MO_F32_Tan -> mkCCall "tanf" + MO_F32_Sinh -> mkCCall "sinhf" + MO_F32_Cosh -> mkCCall "coshf" + MO_F32_Tanh -> mkCCall "tanhf" + MO_F32_Asin -> mkCCall "asinf" + MO_F32_Acos -> mkCCall "acosf" + MO_F32_Atan -> mkCCall "atanf" + MO_F32_Asinh -> mkCCall "asinhf" + MO_F32_Acosh -> mkCCall "acoshf" + MO_F32_Atanh -> mkCCall "atanhf" + MO_F32_Log -> mkCCall "logf" + MO_F32_Log1P -> mkCCall "log1pf" + MO_F32_Exp -> mkCCall "expf" + MO_F32_ExpM1 -> mkCCall "expm1f" + MO_F32_Fabs -> mkCCall "fasbf" + MO_F32_Sqrt -> mkCCall "sqrtf" + + -- Conversion + MO_UF_Conv w -> mkCCall (word2FloatLabel w) + + -- Arithmatic + -- These are not supported on X86, so I doubt they are used much. + MO_S_Mul2 _w -> unsupported mop + MO_S_QuotRem _w -> unsupported mop + MO_U_QuotRem _w -> unsupported mop + MO_U_QuotRem2 _w -> unsupported mop + MO_Add2 _w -> unsupported mop + MO_AddWordC _w -> unsupported mop + MO_SubWordC _w -> unsupported mop + MO_AddIntC _w -> unsupported mop + MO_SubIntC _w -> unsupported mop + MO_U_Mul2 _w -> unsupported mop + + -- Memory Ordering + -- TODO DMBSY is probably *way* too much! + MO_ReadBarrier -> return (unitOL DMBSY, Nothing) + MO_WriteBarrier -> return (unitOL DMBSY, Nothing) + MO_Touch -> return (nilOL, Nothing) -- Keep variables live (when using interior pointers) + -- Prefetch + MO_Prefetch_Data _n -> return (nilOL, Nothing) -- Prefetch hint. + + -- Memory copy/set/move/cmp, with alignment for optimization + + -- TODO Optimize and use e.g. quad registers to move memory around instead + -- of offloading this to memcpy. For small memcpys we can utilize + -- the 128bit quad registers in NEON to move block of bytes around. + -- Might also make sense of small memsets? Use xzr? What's the function + -- call overhead? + MO_Memcpy _align -> mkCCall "memcpy" + MO_Memset _align -> mkCCall "memset" + MO_Memmove _align -> mkCCall "memmove" + MO_Memcmp _align -> mkCCall "memcmp" + + MO_SuspendThread -> mkCCall "suspendThread" + MO_ResumeThread -> mkCCall "resumeThread" + + MO_PopCnt w -> mkCCall (popCntLabel w) + MO_Pdep w -> mkCCall (pdepLabel w) + MO_Pext w -> mkCCall (pextLabel w) + MO_Clz w -> mkCCall (clzLabel w) + MO_Ctz w -> mkCCall (ctzLabel w) + MO_BSwap w -> mkCCall (bSwapLabel w) + MO_BRev w -> mkCCall (bRevLabel w) + + -- -- Atomic read-modify-write. + MO_AtomicRMW w amop -> mkCCall (atomicRMWLabel w amop) + MO_AtomicRead w -> mkCCall (atomicReadLabel w) + MO_AtomicWrite w -> mkCCall (atomicWriteLabel w) + MO_Cmpxchg w -> mkCCall (cmpxchgLabel w) + -- -- Should be an AtomicRMW variant eventually. + -- -- Sequential consistent. + -- TODO: this should be implemented properly! + MO_Xchg w -> mkCCall (xchgLabel w) + + where + unsupported :: Show a => a -> b + unsupported mop = panic ("outOfLineCmmOp: " ++ show mop + ++ " not supported here") + mkCCall :: FastString -> NatM (InstrBlock, Maybe BlockId) + mkCCall name = do + config <- getConfig + target <- cmmMakeDynamicReference config CallReference $ + mkForeignLabel name Nothing ForeignLabelInThisPackage IsFunction + let cconv = ForeignConvention CCallConv [NoHint] [NoHint] CmmMayReturn + genCCall (ForeignTarget target cconv) dest_regs arg_regs bid + + -- TODO: Optimize using paired stores and loads (STP, LDP). It is + -- automomatically done by the allocator for us. However it's not optimal, + -- as we'd rather want to have control over + -- all spill/load registers, so we can optimize with instructions like + -- STP xA, xB, [sp, #-16]! + -- and + -- LDP xA, xB, sp, #16 + -- + passArguments :: Bool -> [Reg] -> [Reg] -> [(Reg, Format, ForeignHint, InstrBlock)] -> Int -> [Reg] -> InstrBlock -> NatM (Int, [Reg], InstrBlock) + passArguments _packStack _ _ [] stackSpace accumRegs accumCode = return (stackSpace, accumRegs, accumCode) + -- passArguments _ _ [] accumCode stackSpace | isEven stackSpace = return $ SUM (OpReg W64 x31) (OpReg W64 x31) OpImm (ImmInt (-8 * stackSpace)) + -- passArguments _ _ [] accumCode stackSpace = return $ SUM (OpReg W64 x31) (OpReg W64 x31) OpImm (ImmInt (-8 * (stackSpace + 1))) + -- passArguments [] fpRegs (arg0:arg1:args) stack accumCode = do + -- -- allocate this on the stack + -- (r0, format0, code_r0) <- getSomeReg arg0 + -- (r1, format1, code_r1) <- getSomeReg arg1 + -- let w0 = formatToWidth format0 + -- w1 = formatToWidth format1 + -- stackCode = unitOL $ STP (OpReg w0 r0) (OpReg w1 R1), (OpAddr (AddrRegImm x31 (ImmInt (stackSpace * 8))) + -- passArguments gpRegs (fpReg:fpRegs) args (stackCode `appOL` accumCode) + + -- float promotion. + -- According to + -- ISO/IEC 9899:2018 + -- Information technology — Programming languages — C + -- + -- e.g. + -- http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1124.pdf + -- http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1256.pdf + -- + -- GHC would need to know the prototype. + -- + -- > If the expression that denotes the called function has a type that does not include a + -- > prototype, the integer promotions are performed on each argument, and arguments that + -- > have type float are promoted to double. + -- + -- As we have no way to get prototypes for C yet, we'll *not* promote this + -- which is in line with the x86_64 backend :( + -- + -- See the encode_values.cmm test. + -- + -- We would essentially need to insert an FCVT (OpReg W64 fpReg) (OpReg W32 fpReg) + -- if w == W32. But *only* if we don't have a prototype m( + -- + -- For AArch64 specificies see: https://developer.arm.com/docs/ihi0055/latest/procedure-call-standard-for-the-arm-64-bit-architecture + -- + -- Still have GP regs, and we want to pass an GP argument. + passArguments pack (gpReg:gpRegs) fpRegs ((r, format, _hint, code_r):args) stackSpace accumRegs accumCode | isIntFormat format = do + let w = formatToWidth format + passArguments pack gpRegs fpRegs args stackSpace (gpReg:accumRegs) (accumCode `appOL` code_r `snocOL` (ann (text "Pass gp argument: " <> ppr r) $ MOV (OpReg w gpReg) (OpReg w r))) + + -- Still have FP regs, and we want to pass an FP argument. + passArguments pack gpRegs (fpReg:fpRegs) ((r, format, _hint, code_r):args) stackSpace accumRegs accumCode | isFloatFormat format = do + let w = formatToWidth format + passArguments pack gpRegs fpRegs args stackSpace (fpReg:accumRegs) (accumCode `appOL` code_r `snocOL` (ann (text "Pass fp argument: " <> ppr r) $ MOV (OpReg w fpReg) (OpReg w r))) + + -- No mor regs left to pass. Must pass on stack. + passArguments pack [] [] ((r, format, _hint, code_r):args) stackSpace accumRegs accumCode = do + let w = formatToWidth format + bytes = widthInBits w `div` 8 + space = if pack then bytes else 8 + stackCode = code_r `snocOL` (ann (text "Pass argument (size " <> ppr w <> text ") on the stack: " <> ppr r) $ STR format (OpReg w r) (OpAddr (AddrRegImm (regSingle 31) (ImmInt stackSpace)))) + passArguments pack [] [] args (stackSpace+space) accumRegs (stackCode `appOL` accumCode) + + -- Still have fpRegs left, but want to pass a GP argument. Must be passed on the stack then. + passArguments pack [] fpRegs ((r, format, _hint, code_r):args) stackSpace accumRegs accumCode | isIntFormat format = do + let w = formatToWidth format + bytes = widthInBits w `div` 8 + space = if pack then bytes else 8 + stackCode = code_r `snocOL` (ann (text "Pass argument (size " <> ppr w <> text ") on the stack: " <> ppr r) $ STR format (OpReg w r) (OpAddr (AddrRegImm (regSingle 31) (ImmInt stackSpace)))) + passArguments pack [] fpRegs args (stackSpace+space) accumRegs (stackCode `appOL` accumCode) + + -- Still have gpRegs left, but want to pass a FP argument. Must be passed on the stack then. + passArguments pack gpRegs [] ((r, format, _hint, code_r):args) stackSpace accumRegs accumCode | isFloatFormat format = do + let w = formatToWidth format + bytes = widthInBits w `div` 8 + space = if pack then bytes else 8 + stackCode = code_r `snocOL` (ann (text "Pass argument (size " <> ppr w <> text ") on the stack: " <> ppr r) $ STR format (OpReg w r) (OpAddr (AddrRegImm (regSingle 31) (ImmInt stackSpace)))) + passArguments pack gpRegs [] args (stackSpace+space) accumRegs (stackCode `appOL` accumCode) + + passArguments _ _ _ _ _ _ _ = pprPanic "passArguments" (text "invalid state") + + readResults :: [Reg] -> [Reg] -> [LocalReg] -> [Reg]-> InstrBlock -> NatM ([Reg], InstrBlock) + readResults _ _ [] accumRegs accumCode = return (accumRegs, accumCode) + readResults [] _ _ _ _ = do + platform <- getPlatform + pprPanic "genCCall, out of gp registers when reading results" (pdoc platform target) + readResults _ [] _ _ _ = do + platform <- getPlatform + pprPanic "genCCall, out of fp registers when reading results" (pdoc platform target) + readResults (gpReg:gpRegs) (fpReg:fpRegs) (dst:dsts) accumRegs accumCode = do + -- gp/fp reg -> dst + platform <- getPlatform + let rep = cmmRegType platform (CmmLocal dst) + format = cmmTypeFormat rep + w = cmmRegWidth platform (CmmLocal dst) + r_dst = getRegisterReg platform (CmmLocal dst) + if isFloatFormat format + then readResults (gpReg:gpRegs) fpRegs dsts (fpReg:accumRegs) (accumCode `snocOL` MOV (OpReg w r_dst) (OpReg w fpReg)) + else readResults gpRegs (fpReg:fpRegs) dsts (gpReg:accumRegs) (accumCode `snocOL` MOV (OpReg w r_dst) (OpReg w gpReg)) |