summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMoritz Angermann <moritz.angermann@gmail.com>2023-05-01 02:51:38 +0000
committerSven Tennie <sven.tennie@wire.com>2023-05-01 16:43:15 +0000
commit332e19ea052096c4a20bfb7759beba0ad8c01b2d (patch)
tree954e47929c7a0a666e0b047b492fe2f630ca774c
parent8c2b36836376bfd69bd22662250fa59b368a5354 (diff)
downloadhaskell-332e19ea052096c4a20bfb7759beba0ad8c01b2d.tar.gz
Add RV64 backend
-rw-r--r--compiler/CodeGen.Platform.h96
-rw-r--r--compiler/GHC/Cmm/CLabel.hs2
-rw-r--r--compiler/GHC/CmmToAsm.hs3
-rw-r--r--compiler/GHC/CmmToAsm/PIC.hs9
-rw-r--r--compiler/GHC/CmmToAsm/RV64.hs59
-rw-r--r--compiler/GHC/CmmToAsm/RV64/CodeGen.hs1712
-rw-r--r--compiler/GHC/CmmToAsm/RV64/Cond.hs68
-rw-r--r--compiler/GHC/CmmToAsm/RV64/Instr.hs894
-rw-r--r--compiler/GHC/CmmToAsm/RV64/Ppr.hs610
-rw-r--r--compiler/GHC/CmmToAsm/RV64/RegInfo.hs31
-rw-r--r--compiler/GHC/CmmToAsm/RV64/Regs.hs153
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear.hs3
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs10
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear/RV64.hs65
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Target.hs12
-rw-r--r--compiler/GHC/Driver/Backend.hs1
-rw-r--r--compiler/ghc.cabal.in8
17 files changed, 3727 insertions, 9 deletions
diff --git a/compiler/CodeGen.Platform.h b/compiler/CodeGen.Platform.h
index fb006c9f1a..9a5aeb7aba 100644
--- a/compiler/CodeGen.Platform.h
+++ b/compiler/CodeGen.Platform.h
@@ -1086,6 +1086,102 @@ freeReg REG_D6 = False
freeReg _ = True
+#elif defined(MACHREGS_riscv64)
+
+-- zero reg
+freeReg 0 = False
+-- link register
+freeReg 1 = False
+-- stack pointer
+freeReg 2 = False
+-- global pointer
+freeReg 3 = False
+-- thread pointer
+freeReg 4 = False
+-- frame pointer
+freeReg 8 = False
+
+# if defined(REG_Base)
+freeReg REG_Base = False
+# endif
+# if defined(REG_Sp)
+freeReg REG_Sp = False
+# endif
+# if defined(REG_SpLim)
+freeReg REG_SpLim = False
+# endif
+# if defined(REG_Hp)
+freeReg REG_Hp = False
+# endif
+# if defined(REG_HpLim)
+freeReg REG_HpLim = False
+# endif
+
+# if defined(REG_R1)
+freeReg REG_R1 = False
+# endif
+# if defined(REG_R2)
+freeReg REG_R2 = False
+# endif
+# if defined(REG_R3)
+freeReg REG_R3 = False
+# endif
+# if defined(REG_R4)
+freeReg REG_R4 = False
+# endif
+# if defined(REG_R5)
+freeReg REG_R5 = False
+# endif
+# if defined(REG_R6)
+freeReg REG_R6 = False
+# endif
+# if defined(REG_R7)
+freeReg REG_R7 = False
+# endif
+# if defined(REG_R8)
+freeReg REG_R8 = False
+# endif
+
+# if defined(REG_F1)
+freeReg REG_F1 = False
+# endif
+# if defined(REG_F2)
+freeReg REG_F2 = False
+# endif
+# if defined(REG_F3)
+freeReg REG_F3 = False
+# endif
+# if defined(REG_F4)
+freeReg REG_F4 = False
+# endif
+# if defined(REG_F5)
+freeReg REG_F5 = False
+# endif
+# if defined(REG_F6)
+freeReg REG_F6 = False
+# endif
+
+# if defined(REG_D1)
+freeReg REG_D1 = False
+# endif
+# if defined(REG_D2)
+freeReg REG_D2 = False
+# endif
+# if defined(REG_D3)
+freeReg REG_D3 = False
+# endif
+# if defined(REG_D4)
+freeReg REG_D4 = False
+# endif
+# if defined(REG_D5)
+freeReg REG_D5 = False
+# endif
+# if defined(REG_D6)
+freeReg REG_D6 = False
+# endif
+
+freeReg _ = True
+
#else
freeReg = panic "freeReg not defined for this platform"
diff --git a/compiler/GHC/Cmm/CLabel.hs b/compiler/GHC/Cmm/CLabel.hs
index e1e69a6296..907dc11d4b 100644
--- a/compiler/GHC/Cmm/CLabel.hs
+++ b/compiler/GHC/Cmm/CLabel.hs
@@ -1722,6 +1722,8 @@ pprDynamicLinkerAsmLabel !platform dllInfo ppLbl =
| platformArch platform == ArchAArch64
= ppLbl
+ | platformArch platform == ArchRISCV64
+ = ppLbl
| platformArch platform == ArchX86_64
= case dllInfo of
diff --git a/compiler/GHC/CmmToAsm.hs b/compiler/GHC/CmmToAsm.hs
index 96f5f6d78b..12171e9623 100644
--- a/compiler/GHC/CmmToAsm.hs
+++ b/compiler/GHC/CmmToAsm.hs
@@ -82,6 +82,7 @@ import qualified GHC.CmmToAsm.X86 as X86
import qualified GHC.CmmToAsm.PPC as PPC
import qualified GHC.CmmToAsm.AArch64 as AArch64
import qualified GHC.CmmToAsm.Wasm as Wasm32
+import qualified GHC.CmmToAsm.RV64 as RV64
import GHC.CmmToAsm.Reg.Liveness
import qualified GHC.CmmToAsm.Reg.Linear as Linear
@@ -166,7 +167,7 @@ nativeCodeGen logger ts config modLoc h us cmms
ArchAlpha -> panic "nativeCodeGen: No NCG for Alpha"
ArchMipseb -> panic "nativeCodeGen: No NCG for mipseb"
ArchMipsel -> panic "nativeCodeGen: No NCG for mipsel"
- ArchRISCV64 -> panic "nativeCodeGen: No NCG for RISCV64"
+ ArchRISCV64 -> nCG' (RV64.ncgRV64 config)
ArchLoongArch64->panic "nativeCodeGen: No NCG for LoongArch64"
ArchUnknown -> panic "nativeCodeGen: No NCG for unknown arch"
ArchJavaScript-> panic "nativeCodeGen: No NCG for JavaScript"
diff --git a/compiler/GHC/CmmToAsm/PIC.hs b/compiler/GHC/CmmToAsm/PIC.hs
index 908c526d74..785cb46d53 100644
--- a/compiler/GHC/CmmToAsm/PIC.hs
+++ b/compiler/GHC/CmmToAsm/PIC.hs
@@ -132,6 +132,11 @@ cmmMakeDynamicReference config referenceKind lbl
addImport symbolPtr
return $ cmmMakePicReference config symbolPtr
+ AccessViaSymbolPtr | ArchRISCV64 <- platformArch platform -> do
+ let symbolPtr = mkDynamicLinkerLabel SymbolPtr lbl
+ addImport symbolPtr
+ return $ cmmMakePicReference config symbolPtr
+
AccessViaSymbolPtr -> do
let symbolPtr = mkDynamicLinkerLabel SymbolPtr lbl
addImport symbolPtr
@@ -164,6 +169,10 @@ cmmMakePicReference config lbl
| ArchAArch64 <- platformArch platform
= CmmLit $ CmmLabel lbl
+ | ArchRISCV64 <- platformArch platform
+ = CmmLit $ CmmLabel lbl
+
+
| OSAIX <- platformOS platform
= CmmMachOp (MO_Add W32)
[ CmmReg (CmmGlobal $ GlobalRegUse PicBaseReg (bWord platform))
diff --git a/compiler/GHC/CmmToAsm/RV64.hs b/compiler/GHC/CmmToAsm/RV64.hs
new file mode 100644
index 0000000000..377cac7c71
--- /dev/null
+++ b/compiler/GHC/CmmToAsm/RV64.hs
@@ -0,0 +1,59 @@
+-- | Native code generator for RiscV64 architectures
+module GHC.CmmToAsm.RV64
+ ( ncgRV64 )
+where
+
+import GHC.Prelude
+
+import GHC.CmmToAsm.Instr
+import GHC.CmmToAsm.Monad
+import GHC.CmmToAsm.Config
+import GHC.CmmToAsm.Types
+import GHC.Utils.Outputable (ftext)
+
+import qualified GHC.CmmToAsm.RV64.Instr as RV64
+import qualified GHC.CmmToAsm.RV64.Ppr as RV64
+import qualified GHC.CmmToAsm.RV64.CodeGen as RV64
+import qualified GHC.CmmToAsm.RV64.Regs as RV64
+import qualified GHC.CmmToAsm.RV64.RegInfo as RV64
+
+ncgRV64 :: NCGConfig -> NcgImpl RawCmmStatics RV64.Instr RV64.JumpDest
+ncgRV64 config
+ = NcgImpl {
+ ncgConfig = config
+ ,cmmTopCodeGen = RV64.cmmTopCodeGen
+ ,generateJumpTableForInstr = RV64.generateJumpTableForInstr config
+ ,getJumpDestBlockId = RV64.getJumpDestBlockId
+ ,canShortcut = RV64.canShortcut
+ ,shortcutStatics = RV64.shortcutStatics
+ ,shortcutJump = RV64.shortcutJump
+ ,pprNatCmmDeclS = RV64.pprNatCmmDecl config
+ ,pprNatCmmDeclH = RV64.pprNatCmmDecl config
+ ,maxSpillSlots = RV64.maxSpillSlots config
+ ,allocatableRegs = RV64.allocatableRegs platform
+ ,ncgAllocMoreStack = RV64.allocMoreStack platform
+ ,ncgMakeFarBranches = const id
+ ,extractUnwindPoints = const []
+ ,invertCondBranches = \_ _ -> id
+ }
+ where
+ platform = ncgPlatform config
+
+ -- | Instruction instance for RV64
+instance Instruction RV64.Instr where
+ regUsageOfInstr = RV64.regUsageOfInstr
+ patchRegsOfInstr = RV64.patchRegsOfInstr
+ isJumpishInstr = RV64.isJumpishInstr
+ jumpDestsOfInstr = RV64.jumpDestsOfInstr
+ patchJumpInstr = RV64.patchJumpInstr
+ mkSpillInstr = RV64.mkSpillInstr
+ mkLoadInstr = RV64.mkLoadInstr
+ takeDeltaInstr = RV64.takeDeltaInstr
+ isMetaInstr = RV64.isMetaInstr
+ mkRegRegMoveInstr _ = RV64.mkRegRegMoveInstr
+ takeRegRegMoveInstr = RV64.takeRegRegMoveInstr
+ mkJumpInstr = RV64.mkJumpInstr
+ mkStackAllocInstr = RV64.mkStackAllocInstr
+ mkStackDeallocInstr = RV64.mkStackDeallocInstr
+ mkComment = pure . RV64.COMMENT . ftext
+ pprInstr = RV64.pprInstr
diff --git a/compiler/GHC/CmmToAsm/RV64/CodeGen.hs b/compiler/GHC/CmmToAsm/RV64/CodeGen.hs
new file mode 100644
index 0000000000..7b377f5aa6
--- /dev/null
+++ b/compiler/GHC/CmmToAsm/RV64/CodeGen.hs
@@ -0,0 +1,1712 @@
+{-# language GADTs #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE BinaryLiterals #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE NumericUnderscores #-}
+module GHC.CmmToAsm.RV64.CodeGen (
+ cmmTopCodeGen
+ , generateJumpTableForInstr
+)
+
+where
+
+-- NCG stuff:
+import GHC.Prelude hiding (EQ)
+
+import Data.Word
+
+import GHC.Platform.Regs
+import GHC.CmmToAsm.RV64.Instr
+import GHC.CmmToAsm.RV64.Regs
+import GHC.CmmToAsm.RV64.Cond
+
+import GHC.CmmToAsm.CPrim
+import GHC.Cmm.DebugBlock
+import GHC.CmmToAsm.Monad
+ ( NatM, getNewRegNat
+ , getPicBaseMaybeNat, getPlatform, getConfig
+ , getDebugBlock, getFileId
+ )
+-- import GHC.CmmToAsm.Instr
+import GHC.CmmToAsm.PIC
+import GHC.CmmToAsm.Format
+import GHC.CmmToAsm.Config
+import GHC.CmmToAsm.Types
+import GHC.Platform.Reg
+import GHC.Platform
+
+-- Our intermediate code:
+import GHC.Cmm.BlockId
+import GHC.Cmm
+import GHC.Cmm.Utils
+import GHC.Cmm.Switch
+import GHC.Cmm.CLabel
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Graph
+import GHC.Types.Tickish ( GenTickish(..) )
+import GHC.Types.SrcLoc ( srcSpanFile, srcSpanStartLine, srcSpanStartCol )
+
+-- The rest:
+import GHC.Data.OrdList
+import GHC.Utils.Outputable
+
+import Control.Monad ( mapAndUnzipM, foldM )
+import Data.Maybe
+import GHC.Float
+
+import GHC.Types.Basic
+import GHC.Types.ForeignCall
+import GHC.Data.FastString
+import GHC.Utils.Misc
+import GHC.Utils.Panic
+import GHC.Utils.Constants (debugIsOn)
+
+-- Note [General layout of an NCG]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- @cmmTopCodeGen@ will be our main entry point to code gen. Here we'll get
+-- @RawCmmDecl@; see GHC.Cmm
+--
+-- RawCmmDecl = GenCmmDecl RawCmmStatics (LabelMap RawCmmStatics) CmmGraph
+--
+-- GenCmmDecl d h g = CmmProc h CLabel [GlobalReg] g
+-- | CmmData Section d
+--
+-- As a result we want to transform this to a list of @NatCmmDecl@, which is
+-- defined @GHC.CmmToAsm.Instr@ as
+--
+-- type NatCmmDecl statics instr
+-- = GenCmmDecl statics (LabelMap RawCmmStatics) (ListGraph instr)
+--
+-- Thus well' turn
+-- GenCmmDecl RawCmmStatics (LabelMap RawCmmStatics) CmmGraph
+-- into
+-- [GenCmmDecl RawCmmStatics (LabelMap RawCmmStatics) (ListGraph Instr)]
+--
+-- where @CmmGraph@ is
+--
+-- type CmmGraph = GenCmmGraph CmmNode
+-- data GenCmmGraph n = CmmGraph { g_entry :: BlockId, g_graph :: Graph n C C }
+-- type CmmBlock = Block CmmNode C C
+--
+-- and @ListGraph Instr@ is
+--
+-- newtype ListGraph i = ListGraph [GenBasicBlock i]
+-- data GenBasicBlock i = BasicBlock BlockId [i]
+
+cmmTopCodeGen
+ :: RawCmmDecl
+ -> NatM [NatCmmDecl RawCmmStatics Instr]
+
+-- Thus we'll have to deal with either CmmProc ...
+cmmTopCodeGen _cmm@(CmmProc info lab live graph) = do
+ -- do
+ -- traceM $ "-- -------------------------- cmmTopGen (CmmProc) -------------------------- --\n"
+ -- ++ showSDocUnsafe (ppr cmm)
+
+ let blocks = toBlockListEntryFirst graph
+ (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
+ picBaseMb <- getPicBaseMaybeNat
+
+ let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
+ tops = proc : concat statics
+
+ case picBaseMb of
+ Just _picBase -> panic "RV64.cmmTopCodeGen: picBase not implemented"
+ Nothing -> return tops
+
+-- ... or CmmData.
+cmmTopCodeGen _cmm@(CmmData sec dat) = do
+ -- do
+ -- traceM $ "-- -------------------------- cmmTopGen (CmmData) -------------------------- --\n"
+ -- ++ showSDocUnsafe (ppr cmm)
+ return [CmmData sec dat] -- no translation, we just use CmmStatic
+
+basicBlockCodeGen
+ :: Block CmmNode C C
+ -> NatM ( [NatBasicBlock Instr]
+ , [NatCmmDecl RawCmmStatics Instr])
+
+basicBlockCodeGen block = do
+ config <- getConfig
+ -- do
+ -- traceM $ "-- --------------------------- basicBlockCodeGen --------------------------- --\n"
+ -- ++ showSDocUnsafe (ppr block)
+ let (_, nodes, tail) = blockSplit block
+ id = entryLabel block
+ stmts = blockToList nodes
+
+ header_comment_instr | debugIsOn = unitOL $ MULTILINE_COMMENT (
+ text "-- --------------------------- basicBlockCodeGen --------------------------- --\n"
+ $+$ withPprStyle defaultDumpStyle (pdoc (ncgPlatform config) block)
+ )
+ | otherwise = nilOL
+ -- Generate location directive
+ dbg <- getDebugBlock (entryLabel block)
+ loc_instrs <- case dblSourceTick =<< dbg of
+ Just (SourceNote span name)
+ -> do fileId <- getFileId (srcSpanFile span)
+ let line = srcSpanStartLine span; col = srcSpanStartCol span
+ return $ unitOL $ LOCATION fileId line col name
+ _ -> return nilOL
+ (mid_instrs,mid_bid) <- stmtsToInstrs id stmts
+ (!tail_instrs,_) <- stmtToInstrs mid_bid tail
+ let instrs = header_comment_instr `appOL` loc_instrs `appOL` mid_instrs `appOL` tail_instrs
+ -- TODO: Then x86 backend run @verifyBasicBlock@ here and inserts
+ -- unwinding info. See Ticket 19913
+ -- code generation may introduce new basic block boundaries, which
+ -- are indicated by the NEWBLOCK instruction. We must split up the
+ -- instruction stream into basic blocks again. Also, we extract
+ -- LDATAs here too.
+ let
+ (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
+
+ mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
+ = ([], BasicBlock id instrs : blocks, statics)
+ mkBlocks (LDATA sec dat) (instrs,blocks,statics)
+ = (instrs, blocks, CmmData sec dat:statics)
+ mkBlocks instr (instrs,blocks,statics)
+ = (instr:instrs, blocks, statics)
+ return (BasicBlock id top : other_blocks, statics)
+
+
+-- -----------------------------------------------------------------------------
+-- | Utilities
+ann :: SDoc -> Instr -> Instr
+ann doc instr {- debugIsOn -} = ANN doc instr
+-- ann _ instr = instr
+{-# INLINE ann #-}
+
+-- Using pprExpr will hide the AST, @ANN@ will end up in the assembly with
+-- -dppr-debug. The idea is that we can trivially see how a cmm expression
+-- ended up producing the assembly we see. By having the verbatim AST printed
+-- we can simply check the patterns that were matched to arrive at the assembly
+-- we generated.
+--
+-- pprExpr will hide a lot of noise of the underlying data structure and print
+-- the expression into something that can be easily read by a human. However
+-- going back to the exact CmmExpr representation can be laborious and adds
+-- indirections to find the matches that lead to the assembly.
+--
+-- An improvement oculd be to have
+--
+-- (pprExpr genericPlatform e) <> parens (text. show e)
+--
+-- to have the best of both worlds.
+--
+-- Note: debugIsOn is too restrictive, it only works for debug compilers.
+-- However, we do not only want to inspect this for debug compilers. Ideally
+-- we'd have a check for -dppr-debug here already, such that we don't even
+-- generate the ANN expressions. However, as they are lazy, they shouldn't be
+-- forced until we actually force them, and without -dppr-debug they should
+-- never end up being forced.
+annExpr :: CmmExpr -> Instr -> Instr
+annExpr e instr {- debugIsOn -} = ANN (text . show $ e) instr
+-- annExpr e instr {- debugIsOn -} = ANN (pprExpr genericPlatform e) instr
+-- annExpr _ instr = instr
+{-# INLINE annExpr #-}
+
+-- -----------------------------------------------------------------------------
+-- Generating a table-branch
+
+-- TODO jump tables would be a lot faster, but we'll use bare bones for now.
+-- this is usually done by sticking the jump table ids into an instruction
+-- and then have the @generateJumpTableForInstr@ callback produce the jump
+-- table as a static.
+--
+-- See Ticket 19912
+--
+-- data SwitchTargets =
+-- SwitchTargets
+-- Bool -- Signed values
+-- (Integer, Integer) -- Range
+-- (Maybe Label) -- Default value
+-- (M.Map Integer Label) -- The branches
+--
+-- Non Jumptable plan:
+-- xE <- expr
+--
+genSwitch :: CmmExpr -> SwitchTargets -> NatM InstrBlock
+genSwitch expr targets = do -- pprPanic "genSwitch" (ppr expr)
+ (reg, format, code) <- getSomeReg expr
+ let w = formatToWidth format
+ let mkbranch acc (key, bid) = do
+ (keyReg, _format, code) <- getSomeReg (CmmLit (CmmInt key w))
+ return $ code `appOL`
+ toOL [ BCOND EQ (OpReg w reg) (OpReg w keyReg) (TBlock bid)
+ ] `appOL` acc
+ def_code = case switchTargetsDefault targets of
+ Just bid -> unitOL (B (TBlock bid))
+ Nothing -> nilOL
+
+ switch_code <- foldM mkbranch nilOL (switchTargetsCases targets)
+ return $ code `appOL` switch_code `appOL` def_code
+
+-- We don't do jump tables for now, see Ticket 19912
+generateJumpTableForInstr :: NCGConfig -> Instr
+ -> Maybe (NatCmmDecl RawCmmStatics Instr)
+generateJumpTableForInstr _ _ = Nothing
+
+-- -----------------------------------------------------------------------------
+-- Top-level of the instruction selector
+
+-- See Note [Keeping track of the current block] for why
+-- we pass the BlockId.
+stmtsToInstrs :: BlockId -- ^ Basic block these statement will start to be placed in.
+ -> [CmmNode O O] -- ^ Cmm Statement
+ -> NatM (InstrBlock, BlockId) -- ^ Resulting instruction
+stmtsToInstrs bid stmts =
+ go bid stmts nilOL
+ where
+ go bid [] instrs = return (instrs,bid)
+ go bid (s:stmts) instrs = do
+ (instrs',bid') <- stmtToInstrs bid s
+ -- If the statement introduced a new block, we use that one
+ let !newBid = fromMaybe bid bid'
+ go newBid stmts (instrs `appOL` instrs')
+
+-- | `bid` refers to the current block and is used to update the CFG
+-- if new blocks are inserted in the control flow.
+-- See Note [Keeping track of the current block] for more details.
+stmtToInstrs :: BlockId -- ^ Basic block this statement will start to be placed in.
+ -> CmmNode e x
+ -> NatM (InstrBlock, Maybe BlockId)
+ -- ^ Instructions, and bid of new block if successive
+ -- statements are placed in a different basic block.
+stmtToInstrs bid stmt = do
+ -- traceM $ "-- -------------------------- stmtToInstrs -------------------------- --\n"
+ -- ++ showSDocUnsafe (ppr stmt)
+ platform <- getPlatform
+ case stmt of
+ CmmUnsafeForeignCall target result_regs args
+ -> genCCall target result_regs args bid
+
+ _ -> (,Nothing) <$> case stmt of
+ CmmComment s -> return (unitOL (COMMENT (ftext s)))
+ CmmTick {} -> return nilOL
+
+ CmmAssign reg src
+ | isFloatType ty -> assignReg_FltCode format reg src
+ | otherwise -> assignReg_IntCode format reg src
+ where ty = cmmRegType platform reg
+ format = cmmTypeFormat ty
+
+ CmmStore addr src _alignment
+ | isFloatType ty -> assignMem_FltCode format addr src
+ | otherwise -> assignMem_IntCode format addr src
+ where ty = cmmExprType platform src
+ format = cmmTypeFormat ty
+
+ CmmBranch id -> genBranch id
+
+ --We try to arrange blocks such that the likely branch is the fallthrough
+ --in GHC.Cmm.ContFlowOpt. So we can assume the condition is likely false here.
+ CmmCondBranch arg true false _prediction ->
+ genCondBranch bid true false arg
+
+ CmmSwitch arg ids -> genSwitch arg ids
+
+ CmmCall { cml_target = arg } -> genJump arg
+
+ CmmUnwind _regs -> return nilOL
+
+ _ -> pprPanic "stmtToInstrs: statement should have been cps'd away" (pdoc platform stmt)
+
+--------------------------------------------------------------------------------
+-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
+-- They are really trees of insns to facilitate fast appending, where a
+-- left-to-right traversal yields the insns in the correct order.
+--
+type InstrBlock
+ = OrdList Instr
+
+-- | Register's passed up the tree. If the stix code forces the register
+-- to live in a pre-decided machine register, it comes out as @Fixed@;
+-- otherwise, it comes out as @Any@, and the parent can decide which
+-- register to put it in.
+--
+data Register
+ = Fixed Format Reg InstrBlock
+ | Any Format (Reg -> InstrBlock)
+
+-- | Sometimes we need to change the Format of a register. Primarily during
+-- conversion.
+swizzleRegisterRep :: Format -> Register -> Register
+swizzleRegisterRep format (Fixed _ reg code) = Fixed format reg code
+swizzleRegisterRep format (Any _ codefn) = Any format codefn
+
+-- | Grab the Reg for a CmmReg
+getRegisterReg :: Platform -> CmmReg -> Reg
+
+getRegisterReg _ (CmmLocal (LocalReg u pk))
+ = RegVirtual $ mkVirtualReg u (cmmTypeFormat pk)
+
+getRegisterReg platform (CmmGlobal mid)
+ = case globalRegMaybe platform mid of
+ Just reg -> RegReal reg
+ Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
+ -- By this stage, the only MagicIds remaining should be the
+ -- ones which map to a real machine register on this
+ -- platform. Hence if it's not mapped to a registers something
+ -- went wrong earlier in the pipeline.
+-- | Convert a BlockId to some CmmStatic data
+-- TODO: Add JumpTable Logic, see Ticket 19912
+-- jumpTableEntry :: NCGConfig -> Maybe BlockId -> CmmStatic
+-- jumpTableEntry config Nothing = CmmStaticLit (CmmInt 0 (ncgWordWidth config))
+-- jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
+-- where blockLabel = blockLbl blockid
+
+-- -----------------------------------------------------------------------------
+-- General things for putting together code sequences
+
+-- | The dual to getAnyReg: compute an expression into a register, but
+-- we don't mind which one it is.
+getSomeReg :: CmmExpr -> NatM (Reg, Format, InstrBlock)
+getSomeReg expr = do
+ r <- getRegister expr
+ case r of
+ Any rep code -> do
+ tmp <- getNewRegNat rep
+ return (tmp, rep, code tmp)
+ Fixed rep reg code ->
+ return (reg, rep, code)
+
+-- TODO OPT: we might be able give getRegister
+-- a hint, what kind of register we want.
+getFloatReg :: HasCallStack => CmmExpr -> NatM (Reg, Format, InstrBlock)
+getFloatReg expr = do
+ r <- getRegister expr
+ case r of
+ Any rep code | isFloatFormat rep -> do
+ tmp <- getNewRegNat rep
+ return (tmp, rep, code tmp)
+ Any II32 code -> do
+ tmp <- getNewRegNat FF32
+ return (tmp, FF32, code tmp)
+ Any II64 code -> do
+ tmp <- getNewRegNat FF64
+ return (tmp, FF64, code tmp)
+ Any _w _code -> do
+ config <- getConfig
+ pprPanic "can't do getFloatReg on" (pdoc (ncgPlatform config) expr)
+ -- can't do much for fixed.
+ Fixed rep reg code ->
+ return (reg, rep, code)
+
+-- TODO: TODO, bounds. We can't put any immediate
+-- value in. They are constrained.
+-- See Ticket 19911
+litToImm' :: CmmLit -> NatM (Operand, InstrBlock)
+litToImm' lit = return (OpImm (litToImm lit), nilOL)
+
+getRegister :: CmmExpr -> NatM Register
+getRegister e = do
+ config <- getConfig
+ getRegister' config (ncgPlatform config) e
+
+-- | The register width to be used for an operation on the given width
+-- operand.
+opRegWidth :: Width -> Width
+opRegWidth W64 = W64 -- x
+opRegWidth W32 = W32 -- w
+opRegWidth W16 = W32 -- w
+opRegWidth W8 = W32 -- w
+opRegWidth w = pprPanic "opRegWidth" (text "Unsupported width" <+> ppr w)
+
+-- Note [Signed arithmetic on AArch64]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- Handling signed arithmetic on sub-word-size values on AArch64 is a bit
+-- tricky as Cmm's type system does not capture signedness. While 32-bit values
+-- are fairly easy to handle due to AArch64's 32-bit instruction variants
+-- (denoted by use of %wN registers), 16- and 8-bit values require quite some
+-- care.
+--
+-- We handle 16-and 8-bit values by using the 32-bit operations and
+-- sign-/zero-extending operands and truncate results as necessary. For
+-- simplicity we maintain the invariant that a register containing a
+-- sub-word-size value always contains the zero-extended form of that value
+-- in between operations.
+--
+-- For instance, consider the program,
+--
+-- test(bits64 buffer)
+-- bits8 a = bits8[buffer];
+-- bits8 b = %mul(a, 42);
+-- bits8 c = %not(b);
+-- bits8 d = %shrl(c, 4::bits8);
+-- return (d);
+-- }
+--
+-- This program begins by loading `a` from memory, for which we use a
+-- zero-extended byte-size load. We next sign-extend `a` to 32-bits, and use a
+-- 32-bit multiplication to compute `b`, and truncate the result back down to
+-- 8-bits.
+--
+-- Next we compute `c`: The `%not` requires no extension of its operands, but
+-- we must still truncate the result back down to 8-bits. Finally the `%shrl`
+-- requires no extension and no truncate since we can assume that
+-- `c` is zero-extended.
+--
+-- TODO:
+-- Don't use Width in Operands
+-- Instructions should rather carry a RegWidth
+--
+-- Note [Handling PIC on RV64]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- RV64 does not have a special PIC register, the general approach is to
+-- simply go through the GOT, and there is assembly support for this:
+--
+-- rv64 assembly has a `la` (load address) pseudo-instruction, that allows
+-- loading a label, ... into a register. The instruction is desugared into
+--
+-- 1: lui rd1, %pcrel_hi(label)
+-- addi rd1, %pcrel_lo(1b)
+--
+-- See https://sourceware.org/binutils/docs/as/RISC_002dV_002dModifiers.html,
+-- PIC can be enabled/disabled through
+--
+-- .option pic
+--
+-- See https://sourceware.org/binutils/docs/as/RISC_002dV_002dDirectives.html#RISC_002dV_002dDirectives
+--
+-- CmmGlobal @PicBaseReg@'s are generated in @GHC.CmmToAsm.PIC@ in the
+-- @cmmMakePicReference@. This is in turn called from @cmmMakeDynamicReference@
+-- also in @Cmm.CmmToAsm.PIC@ from where it is also exported. There are two
+-- callsites for this. One is in this module to produce the @target@ in @genCCall@
+-- the other is in @GHC.CmmToAsm@ in @cmmExprNative@.
+--
+-- Conceptually we do not want any special PicBaseReg to be used on RV64. If
+-- we want to distinguish between symbol loading, we need to address this through
+-- the way we load it, not through a register.
+--
+
+getRegister' :: NCGConfig -> Platform -> CmmExpr -> NatM Register
+-- OPTIMIZATION WARNING: CmmExpr rewrites
+-- 1. Rewrite: Reg + (-n) => Reg - n
+-- TODO: this expression shouldn't even be generated to begin with.
+getRegister' config plat (CmmMachOp (MO_Add w0) [x, CmmLit (CmmInt i w1)]) | i < 0
+ = getRegister' config plat (CmmMachOp (MO_Sub w0) [x, CmmLit (CmmInt (-i) w1)])
+
+getRegister' config plat (CmmMachOp (MO_Sub w0) [x, CmmLit (CmmInt i w1)]) | i < 0
+ = getRegister' config plat (CmmMachOp (MO_Add w0) [x, CmmLit (CmmInt (-i) w1)])
+
+
+-- Generic case.
+getRegister' config plat expr
+ = case expr of
+ CmmReg (CmmGlobal PicBaseReg)
+ -> pprPanic "getRegisterReg-memory" (ppr $ PicBaseReg)
+ CmmLit lit
+ -> case lit of
+
+ -- TODO handle CmmInt 0 specially, use wzr or xzr.
+
+ CmmInt i W8 | i >= 0 -> do
+ return (Any (intFormat W8) (\dst -> unitOL $ annExpr expr (MOV (OpReg W8 dst) (OpImm (ImmInteger (narrowU W8 i))))))
+ CmmInt i W16 | i >= 0 -> do
+ return (Any (intFormat W16) (\dst -> unitOL $ annExpr expr (MOV (OpReg W16 dst) (OpImm (ImmInteger (narrowU W16 i))))))
+
+ CmmInt i W8 -> do
+ return (Any (intFormat W8) (\dst -> unitOL $ annExpr expr (MOV (OpReg W8 dst) (OpImm (ImmInteger (narrowU W8 i))))))
+ CmmInt i W16 -> do
+ return (Any (intFormat W16) (\dst -> unitOL $ annExpr expr (MOV (OpReg W16 dst) (OpImm (ImmInteger (narrowU W16 i))))))
+
+ -- We need to be careful to not shorten this for negative literals.
+ -- Those need the upper bits set. We'd either have to explicitly sign
+ -- or figure out something smarter. Lowered to
+ -- `MOV dst XZR`
+ CmmInt i w -> do
+ return (Any (intFormat w) (\dst -> unitOL $ annExpr expr (MOV (OpReg w dst) (OpImm (ImmInteger i)))))
+
+ CmmInt _i rep -> do
+ (op, imm_code) <- litToImm' lit
+ return (Any (intFormat rep) (\dst -> imm_code `snocOL` annExpr expr (MOV (OpReg rep dst) op)))
+
+ -- floatToBytes (fromRational f)
+ CmmFloat 0 w -> do
+ (op, imm_code) <- litToImm' lit
+ return (Any (floatFormat w) (\dst -> imm_code `snocOL` annExpr expr (MOV (OpReg w dst) op)))
+
+ CmmFloat _f W8 -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for bytes" (pdoc plat expr)
+ CmmFloat _f W16 -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for halfs" (pdoc plat expr)
+ CmmFloat f W32 -> do
+ let word = castFloatToWord32 (fromRational f) :: Word32
+ half0 = fromIntegral (fromIntegral word :: Word16)
+ half1 = fromIntegral (fromIntegral (word `shiftR` 16) :: Word16)
+ tmp <- getNewRegNat (intFormat W32)
+ return (Any (floatFormat W32) (\dst -> toOL [ annExpr expr
+ $ MOV (OpReg W32 tmp) (OpImm (ImmInt half0))
+ , MOVK (OpReg W32 tmp) (OpImmShift (ImmInt half1) SLSL 16)
+ , MOV (OpReg W32 dst) (OpReg W32 tmp)
+ ]))
+ CmmFloat f W64 -> do
+ let word = castDoubleToWord64 (fromRational f) :: Word64
+ half0 = fromIntegral (fromIntegral word :: Word16)
+ half1 = fromIntegral (fromIntegral (word `shiftR` 16) :: Word16)
+ half2 = fromIntegral (fromIntegral (word `shiftR` 32) :: Word16)
+ half3 = fromIntegral (fromIntegral (word `shiftR` 48) :: Word16)
+ tmp <- getNewRegNat (intFormat W64)
+ return (Any (floatFormat W64) (\dst -> toOL [ annExpr expr
+ $ MOV (OpReg W64 tmp) (OpImm (ImmInt half0))
+ , MOVK (OpReg W64 tmp) (OpImmShift (ImmInt half1) SLSL 16)
+ , MOVK (OpReg W64 tmp) (OpImmShift (ImmInt half2) SLSL 32)
+ , MOVK (OpReg W64 tmp) (OpImmShift (ImmInt half3) SLSL 48)
+ , MOV (OpReg W64 dst) (OpReg W64 tmp)
+ ]))
+ CmmFloat _f _w -> pprPanic "getRegister' (CmmLit:CmmFloat), unsupported float lit" (pdoc plat expr)
+ CmmVec _ -> pprPanic "getRegister' (CmmLit:CmmVec): " (pdoc plat expr)
+ CmmLabel _lbl -> do
+ (op, imm_code) <- litToImm' lit
+ let rep = cmmLitType plat lit
+ format = cmmTypeFormat rep
+ return (Any format (\dst -> imm_code `snocOL` (annExpr expr $ LDR format (OpReg (formatToWidth format) dst) op)))
+
+ CmmLabelOff _lbl off | isNbitEncodeable 12 (fromIntegral off) -> do
+ (op, imm_code) <- litToImm' lit
+ let rep = cmmLitType plat lit
+ format = cmmTypeFormat rep
+ -- width = typeWidth rep
+ return (Any format (\dst -> imm_code `snocOL` LDR format (OpReg (formatToWidth format) dst) op))
+
+ CmmLabelOff lbl off -> do
+ (op, imm_code) <- litToImm' (CmmLabel lbl)
+ let rep = cmmLitType plat lit
+ format = cmmTypeFormat rep
+ width = typeWidth rep
+ (off_r, _off_format, off_code) <- getSomeReg $ CmmLit (CmmInt (fromIntegral off) width)
+ return (Any format (\dst -> imm_code `appOL` off_code `snocOL` LDR format (OpReg (formatToWidth format) dst) op `snocOL` ADD (OpReg width dst) (OpReg width dst) (OpReg width off_r)))
+
+ CmmLabelDiffOff _ _ _ _ -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr)
+ CmmBlock _ -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr)
+ CmmHighStackMark -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr)
+ CmmLoad mem rep _ -> do
+ Amode addr addr_code <- getAmode plat (typeWidth rep) mem
+ let format = cmmTypeFormat rep
+ return (Any format (\dst -> addr_code `snocOL` LDR format (OpReg (formatToWidth format) dst) (OpAddr addr)))
+ CmmStackSlot _ _
+ -> pprPanic "getRegister' (CmmStackSlot): " (pdoc plat expr)
+ CmmReg reg
+ -> return (Fixed (cmmTypeFormat (cmmRegType plat reg))
+ (getRegisterReg plat reg)
+ nilOL)
+ CmmRegOff reg off | isNbitEncodeable 12 (fromIntegral off) -> do
+ getRegister' config plat $
+ CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
+ where width = typeWidth (cmmRegType plat reg)
+
+ CmmRegOff reg off -> do
+ (off_r, _off_format, off_code) <- getSomeReg $ CmmLit (CmmInt (fromIntegral off) width)
+ (reg, _format, code) <- getSomeReg $ CmmReg reg
+ return $ Any (intFormat width) (\dst -> off_code `appOL` code `snocOL` ADD (OpReg width dst) (OpReg width reg) (OpReg width off_r))
+ where width = typeWidth (cmmRegType plat reg)
+
+
+
+ -- for MachOps, see GHC.Cmm.MachOp
+ -- For CmmMachOp, see GHC.Cmm.Expr
+ CmmMachOp op [e] -> do
+ (reg, _format, code) <- getSomeReg e
+ case op of
+ MO_Not w -> return $ Any (intFormat w) $ \dst ->
+ let w' = opRegWidth w
+ in code `snocOL`
+ MVN (OpReg w' dst) (OpReg w' reg) `appOL`
+ truncateReg w' w dst -- See Note [Signed arithmetic on AArch64]
+
+ MO_S_Neg w -> negate code w reg
+ MO_F_Neg w -> return $ Any (floatFormat w) (\dst -> code `snocOL` NEG (OpReg w dst) (OpReg w reg))
+
+ MO_SF_Conv from to -> return $ Any (floatFormat to) (\dst -> code `snocOL` SCVTF (OpReg to dst) (OpReg from reg)) -- (Signed ConVerT Float)
+ MO_FS_Conv from to -> return $ Any (intFormat to) (\dst -> code `snocOL` FCVTZS (OpReg to dst) (OpReg from reg)) -- (float convert (-> zero) signed)
+
+ -- TODO this is very slow. We effectively use store + load (byte, half, word, double)
+ -- for this in memory.
+ MO_UU_Conv from to -> return $ Any (intFormat to) (\dst ->
+ code `appOL` toOL [ SUB sp sp (OpImm (ImmInt 8))
+ , STR (intFormat from) (OpReg from reg) (OpAddr (AddrRegImm sp_reg (ImmInt 0)))
+ , LDR (intFormat to) (OpReg to dst) (OpAddr (AddrRegImm sp_reg (ImmInt 0)))
+ , ADD sp sp (OpImm (ImmInt 8))
+ ])
+ -- MO_UU_Conv from to -> return $ Any (intFormat to) (\dst -> code `snocOL` UBFM (OpReg (max from to) dst) (OpReg (max from to) reg) (OpImm (ImmInt 0)) (toImm (min from to)))
+ MO_SS_Conv from to -> ss_conv from to reg code
+ MO_FF_Conv from to -> return $ Any (floatFormat to) (\dst -> code `snocOL` FCVT (OpReg to dst) (OpReg from reg))
+
+ -- Conversions
+ MO_XX_Conv _from to -> swizzleRegisterRep (intFormat to) <$> getRegister e
+
+ _ -> pprPanic "getRegister' (monadic CmmMachOp):" (pdoc plat expr)
+ where
+ toImm W8 = (OpImm (ImmInt 7))
+ toImm W16 = (OpImm (ImmInt 15))
+ toImm W32 = (OpImm (ImmInt 31))
+ toImm W64 = (OpImm (ImmInt 63))
+ toImm W128 = (OpImm (ImmInt 127))
+ toImm W256 = (OpImm (ImmInt 255))
+ toImm W512 = (OpImm (ImmInt 511))
+
+ -- In the case of 16- or 8-bit values we need to sign-extend to 32-bits
+ -- See Note [Signed arithmetic on AArch64].
+ negate code w reg = do
+ let w' = opRegWidth w
+ (reg', code_sx) <- signExtendReg w w' reg
+ return $ Any (intFormat w) $ \dst ->
+ code `appOL`
+ code_sx `snocOL`
+ NEG (OpReg w' dst) (OpReg w' reg') `appOL`
+ truncateReg w' w dst
+
+ ss_conv from to reg code =
+ let w' = opRegWidth (max from to)
+ in return $ Any (intFormat to) $ \dst ->
+ code `snocOL`
+ SBFM (OpReg w' dst) (OpReg w' reg) (OpImm (ImmInt 0)) (toImm (min from to)) `appOL`
+ -- At this point an 8- or 16-bit value would be sign-extended
+ -- to 32-bits. Truncate back down the final width.
+ truncateReg w' to dst
+
+ -- Dyadic machops:
+ --
+ -- The general idea is:
+ -- compute x<i> <- x
+ -- compute x<j> <- y
+ -- OP x<r>, x<i>, x<j>
+ --
+ -- TODO: for now we'll only implement the 64bit versions. And rely on the
+ -- fallthrough to alert us if things go wrong!
+ -- OPTIMIZATION WARNING: Dyadic CmmMachOp destructuring
+ -- 0. TODO This should not exist! Rewrite: Reg +- 0 -> Reg
+ CmmMachOp (MO_Add _) [expr'@(CmmReg (CmmGlobal _r)), CmmLit (CmmInt 0 _)] -> getRegister' config plat expr'
+ CmmMachOp (MO_Sub _) [expr'@(CmmReg (CmmGlobal _r)), CmmLit (CmmInt 0 _)] -> getRegister' config plat expr'
+ -- 1. Compute Reg +/- n directly.
+ -- For Add/Sub we can directly encode 12bits, or 12bits lsl #12.
+ CmmMachOp (MO_Add w) [(CmmReg reg), CmmLit (CmmInt n _)]
+ | n > 0 && n < 4096 -> return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (ADD (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n))))
+ -- TODO: 12bits lsl #12; e.g. lower 12 bits of n are 0; shift n >> 12, and set lsl to #12.
+ where w' = formatToWidth (cmmTypeFormat (cmmRegType plat reg))
+ r' = getRegisterReg plat reg
+ CmmMachOp (MO_Sub w) [(CmmReg reg), CmmLit (CmmInt n _)]
+ | n > 0 && n < 4096 -> return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (SUB (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n))))
+ -- TODO: 12bits lsl #12; e.g. lower 12 bits of n are 0; shift n >> 12, and set lsl to #12.
+ where w' = formatToWidth (cmmTypeFormat (cmmRegType plat reg))
+ r' = getRegisterReg plat reg
+
+ CmmMachOp (MO_U_Quot w) [x, y] | w == W8 -> do
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ (reg_y, _format_y, code_y) <- getSomeReg y
+ return $ Any (intFormat w) (\dst -> code_x `appOL` code_y `snocOL` annExpr expr (UXTB (OpReg w reg_x) (OpReg w reg_x)) `snocOL` (UXTB (OpReg w reg_y) (OpReg w reg_y)) `snocOL` (UDIV (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y)))
+ CmmMachOp (MO_U_Quot w) [x, y] | w == W16 -> do
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ (reg_y, _format_y, code_y) <- getSomeReg y
+ return $ Any (intFormat w) (\dst -> code_x `appOL` code_y `snocOL` annExpr expr (UXTH (OpReg w reg_x) (OpReg w reg_x)) `snocOL` (UXTH (OpReg w reg_y) (OpReg w reg_y)) `snocOL` (UDIV (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y)))
+
+ -- 2. Shifts. x << n, x >> n.
+ CmmMachOp (MO_Shl w) [x, (CmmLit (CmmInt n _))] | w == W32, 0 <= n, n < 32 -> do
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (LSL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n))))
+ CmmMachOp (MO_Shl w) [x, (CmmLit (CmmInt n _))] | w == W64, 0 <= n, n < 64 -> do
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (LSL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n))))
+
+ CmmMachOp (MO_S_Shr w) [x, (CmmLit (CmmInt n _))] | w == W8, 0 <= n, n < 8 -> do
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (SBFX (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)) (OpImm (ImmInteger (8-n)))))
+ CmmMachOp (MO_S_Shr w) [x, y] | w == W8 -> do
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ (reg_y, _format_y, code_y) <- getSomeReg y
+ return $ Any (intFormat w) (\dst -> code_x `appOL` code_y `snocOL` annExpr expr (SXTB (OpReg w reg_x) (OpReg w reg_x)) `snocOL` (ASR (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y)))
+
+ CmmMachOp (MO_S_Shr w) [x, (CmmLit (CmmInt n _))] | w == W16, 0 <= n, n < 16 -> do
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (SBFX (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)) (OpImm (ImmInteger (16-n)))))
+ CmmMachOp (MO_S_Shr w) [x, y] | w == W16 -> do
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ (reg_y, _format_y, code_y) <- getSomeReg y
+ return $ Any (intFormat w) (\dst -> code_x `appOL` code_y `snocOL` annExpr expr (SXTH (OpReg w reg_x) (OpReg w reg_x)) `snocOL` (ASR (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y)))
+
+ CmmMachOp (MO_S_Shr w) [x, (CmmLit (CmmInt n _))] | w == W32, 0 <= n, n < 32 -> do
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (ASR (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n))))
+
+ CmmMachOp (MO_S_Shr w) [x, (CmmLit (CmmInt n _))] | w == W64, 0 <= n, n < 64 -> do
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (ASR (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n))))
+
+
+ CmmMachOp (MO_U_Shr w) [x, (CmmLit (CmmInt n _))] | w == W8, 0 <= n, n < 8 -> do
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (UBFX (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)) (OpImm (ImmInteger (8-n)))))
+ CmmMachOp (MO_U_Shr w) [x, y] | w == W8 -> do
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ (reg_y, _format_y, code_y) <- getSomeReg y
+ return $ Any (intFormat w) (\dst -> code_x `appOL` code_y `snocOL` annExpr expr (UXTB (OpReg w reg_x) (OpReg w reg_x)) `snocOL` (ASR (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y)))
+
+ CmmMachOp (MO_U_Shr w) [x, (CmmLit (CmmInt n _))] | w == W16, 0 <= n, n < 16 -> do
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (UBFX (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)) (OpImm (ImmInteger (16-n)))))
+ CmmMachOp (MO_U_Shr w) [x, y] | w == W16 -> do
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ (reg_y, _format_y, code_y) <- getSomeReg y
+ return $ Any (intFormat w) (\dst -> code_x `appOL` code_y `snocOL` annExpr expr (UXTH (OpReg w reg_x) (OpReg w reg_x)) `snocOL` (ASR (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y)))
+
+ CmmMachOp (MO_U_Shr w) [x, (CmmLit (CmmInt n _))] | w == W32, 0 <= n, n < 32 -> do
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (LSR (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n))))
+
+ CmmMachOp (MO_U_Shr w) [x, (CmmLit (CmmInt n _))] | w == W64, 0 <= n, n < 64 -> do
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (LSR (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n))))
+
+ -- 3. Logic &&, ||
+ CmmMachOp (MO_And w) [(CmmReg reg), CmmLit (CmmInt n _)] | isBitMaskImmediate (fromIntegral n) ->
+ return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (AND (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n))))
+ where w' = formatToWidth (cmmTypeFormat (cmmRegType plat reg))
+ r' = getRegisterReg plat reg
+
+ CmmMachOp (MO_Or w) [(CmmReg reg), CmmLit (CmmInt n _)] | isBitMaskImmediate (fromIntegral n) ->
+ return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (ORR (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n))))
+ where w' = formatToWidth (cmmTypeFormat (cmmRegType plat reg))
+ r' = getRegisterReg plat reg
+
+ -- Generic case.
+ CmmMachOp op [x, y] -> do
+ -- alright, so we have an operation, and two expressions. And we want to essentially do
+ -- ensure we get float regs (TODO(Ben): What?)
+ let withTempIntReg w op = OpReg w <$> getNewRegNat (intFormat w) >>= op
+ -- withTempFloatReg w op = OpReg w <$> getNewRegNat (floatFormat w) >>= op
+
+ -- A "plain" operation.
+ bitOp w op = do
+ -- compute x<m> <- x
+ -- compute x<o> <- y
+ -- <OP> x<n>, x<m>, x<o>
+ (reg_x, format_x, code_x) <- getSomeReg x
+ (reg_y, format_y, code_y) <- getSomeReg y
+ massertPpr (isIntFormat format_x == isIntFormat format_y) $ text "bitOp: incompatible"
+ return $ Any (intFormat w) (\dst ->
+ code_x `appOL`
+ code_y `appOL`
+ op (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y))
+
+ -- A (potentially signed) integer operation.
+ -- In the case of 8- and 16-bit signed arithmetic we must first
+ -- sign-extend both arguments to 32-bits.
+ -- See Note [Signed arithmetic on AArch64].
+ intOp is_signed w op = do
+ -- compute x<m> <- x
+ -- compute x<o> <- y
+ -- <OP> x<n>, x<m>, x<o>
+ (reg_x, format_x, code_x) <- getSomeReg x
+ (reg_y, format_y, code_y) <- getSomeReg y
+ massertPpr (isIntFormat format_x && isIntFormat format_y) $ text "intOp: non-int"
+ -- This is the width of the registers on which the operation
+ -- should be performed.
+ let w' = opRegWidth w
+ signExt r
+ | not is_signed = return (r, nilOL)
+ | otherwise = signExtendReg w w' r
+ (reg_x_sx, code_x_sx) <- signExt reg_x
+ (reg_y_sx, code_y_sx) <- signExt reg_y
+ return $ Any (intFormat w) $ \dst ->
+ code_x `appOL`
+ code_y `appOL`
+ -- sign-extend both operands
+ code_x_sx `appOL`
+ code_y_sx `appOL`
+ op (OpReg w' dst) (OpReg w' reg_x_sx) (OpReg w' reg_y_sx) `appOL`
+ truncateReg w' w dst -- truncate back to the operand's original width
+
+ floatOp w op = do
+ (reg_fx, format_x, code_fx) <- getFloatReg x
+ (reg_fy, format_y, code_fy) <- getFloatReg y
+ massertPpr (isFloatFormat format_x && isFloatFormat format_y) $ text "floatOp: non-float"
+ return $ Any (floatFormat w) (\dst -> code_fx `appOL` code_fy `appOL` op (OpReg w dst) (OpReg w reg_fx) (OpReg w reg_fy))
+
+ -- need a special one for conditionals, as they return ints
+ floatCond w op = do
+ (reg_fx, format_x, code_fx) <- getFloatReg x
+ (reg_fy, format_y, code_fy) <- getFloatReg y
+ massertPpr (isFloatFormat format_x && isFloatFormat format_y) $ text "floatCond: non-float"
+ return $ Any (intFormat w) (\dst -> code_fx `appOL` code_fy `appOL` op (OpReg w dst) (OpReg w reg_fx) (OpReg w reg_fy))
+
+ case op of
+ -- Integer operations
+ -- Add/Sub should only be Integer Options.
+ MO_Add w -> intOp False w (\d x y -> unitOL $ annExpr expr (ADD d x y))
+ -- TODO: Handle sub-word case
+ MO_Sub w -> intOp False w (\d x y -> unitOL $ annExpr expr (SUB d x y))
+
+ -- Note [CSET]
+ -- ~~~~~~~~~~~
+ -- Setting conditional flags: the architecture internally knows the
+ -- following flag bits. And based on thsoe comparisons as in the
+ -- table below.
+ --
+ -- 31 30 29 28
+ -- .---+---+---+---+-- - -
+ -- | N | Z | C | V |
+ -- '---+---+---+---+-- - -
+ -- Negative
+ -- Zero
+ -- Carry
+ -- oVerflow
+ --
+ -- .------+-------------------------------------+-----------------+----------.
+ -- | Code | Meaning | Flags | Encoding |
+ -- |------+-------------------------------------+-----------------+----------|
+ -- | EQ | Equal | Z = 1 | 0000 |
+ -- | NE | Not Equal | Z = 0 | 0001 |
+ -- | HI | Unsigned Higher | C = 1 && Z = 0 | 1000 |
+ -- | HS | Unsigned Higher or Same | C = 1 | 0010 |
+ -- | LS | Unsigned Lower or Same | C = 0 || Z = 1 | 1001 |
+ -- | LO | Unsigned Lower | C = 0 | 0011 |
+ -- | GT | Signed Greater Than | Z = 0 && N = V | 1100 |
+ -- | GE | Signed Greater Than or Equal | N = V | 1010 |
+ -- | LE | Signed Less Than or Equal | Z = 1 || N /= V | 1101 |
+ -- | LT | Signed Less Than | N /= V | 1011 |
+ -- | CS | Carry Set (Unsigned Overflow) | C = 1 | 0010 |
+ -- | CC | Carry Clear (No Unsigned Overflow) | C = 0 | 0011 |
+ -- | VS | Signed Overflow | V = 1 | 0110 |
+ -- | VC | No Signed Overflow | V = 0 | 0111 |
+ -- | MI | Minus, Negative | N = 1 | 0100 |
+ -- | PL | Plus, Positive or Zero (!) | N = 0 | 0101 |
+ -- | AL | Always | Any | 1110 |
+ -- | NV | Never | Any | 1111 |
+ --- '-------------------------------------------------------------------------'
+
+ -- N.B. We needn't sign-extend sub-word size (in)equality comparisons
+ -- since we don't care about ordering.
+ MO_Eq w -> bitOp w (\d x y -> toOL [ CSET d x y EQ ])
+ MO_Ne w -> bitOp w (\d x y -> toOL [ CSET d x y NE ])
+
+ -- Signed multiply/divide
+ MO_Mul w -> intOp True w (\d x y -> unitOL $ MUL d x y)
+ MO_S_MulMayOflo w -> do_mul_may_oflo w x y
+ MO_S_Quot w -> intOp True w (\d x y -> unitOL $ SDIV d x y)
+
+ -- No native rem instruction. So we'll compute the following
+ -- Rd <- Rx / Ry | 2 <- 7 / 3 -- SDIV Rd Rx Ry
+ -- Rd' <- Rx - Rd * Ry | 1 <- 7 - 2 * 3 -- MSUB Rd' Rd Ry Rx
+ -- | '---|----------------|---' |
+ -- | '----------------|-------'
+ -- '--------------------------'
+ -- Note the swap in Rx and Ry.
+ MO_S_Rem w -> withTempIntReg w $ \t ->
+ intOp True w (\d x y -> toOL [ SDIV t x y, MSUB d t y x ])
+
+ -- Unsigned multiply/divide
+ MO_U_Quot w -> intOp False w (\d x y -> unitOL $ UDIV d x y)
+ MO_U_Rem w -> withTempIntReg w $ \t ->
+ intOp False w (\d x y -> toOL [ UDIV t x y, MSUB d t y x ])
+
+ -- Signed comparisons -- see Note [CSET]
+ MO_S_Ge w -> intOp True w (\d x y -> toOL [ CSET d x y SGE ])
+ MO_S_Le w -> intOp True w (\d x y -> toOL [ CSET d x y SLE ])
+ MO_S_Gt w -> intOp True w (\d x y -> toOL [ CSET d x y SGT ])
+ MO_S_Lt w -> intOp True w (\d x y -> toOL [ CSET d x y SLT ])
+
+ -- Unsigned comparisons
+ MO_U_Ge w -> intOp False w (\d x y -> toOL [ CSET d x y UGE ])
+ MO_U_Le w -> intOp False w (\d x y -> toOL [ CSET d x y ULE ])
+ MO_U_Gt w -> intOp False w (\d x y -> toOL [ CSET d x y UGT ])
+ MO_U_Lt w -> intOp False w (\d x y -> toOL [ CSET d x y ULT ])
+
+ -- Floating point arithmetic
+ MO_F_Add w -> floatOp w (\d x y -> unitOL $ ADD d x y)
+ MO_F_Sub w -> floatOp w (\d x y -> unitOL $ SUB d x y)
+ MO_F_Mul w -> floatOp w (\d x y -> unitOL $ MUL d x y)
+ MO_F_Quot w -> floatOp w (\d x y -> unitOL $ SDIV d x y)
+
+ -- Floating point comparison
+ MO_F_Eq w -> floatCond w (\d x y -> toOL [ CSET d x y EQ ])
+ MO_F_Ne w -> floatCond w (\d x y -> toOL [ CSET d x y NE ])
+
+ -- careful with the floating point operations.
+ -- SLE is effectively LE or unordered (NaN)
+ -- SLT is the same. ULE, and ULT will not return true for NaN.
+ -- This is a bit counter-intuitive. Don't let yourself be fooled by
+ -- the S/U prefix for floats, it's only meaningful for integers.
+ MO_F_Ge w -> floatCond w (\d x y -> toOL [ CSET d x y OGE ])
+ MO_F_Le w -> floatCond w (\d x y -> toOL [ CSET d x y OLE ]) -- x <= y <=> y > x
+ MO_F_Gt w -> floatCond w (\d x y -> toOL [ CSET d x y OGT ])
+ MO_F_Lt w -> floatCond w (\d x y -> toOL [ CSET d x y OLT ]) -- x < y <=> y >= x
+
+ -- Bitwise operations
+ MO_And w -> bitOp w (\d x y -> unitOL $ AND d x y)
+ MO_Or w -> bitOp w (\d x y -> unitOL $ ORR d x y)
+ MO_Xor w -> bitOp w (\d x y -> unitOL $ EOR d x y)
+ MO_Shl w -> intOp False w (\d x y -> unitOL $ LSL d x y)
+ MO_U_Shr w -> intOp False w (\d x y -> unitOL $ LSR d x y)
+ MO_S_Shr w -> intOp True w (\d x y -> unitOL $ ASR d x y)
+
+ -- TODO
+
+ op -> pprPanic "getRegister' (unhandled dyadic CmmMachOp): " $ (pprMachOp op) <+> text "in" <+> (pdoc plat expr)
+ CmmMachOp _op _xs
+ -> pprPanic "getRegister' (variadic CmmMachOp): " (pdoc plat expr)
+
+ where
+ isNbitEncodeable :: Int -> Integer -> Bool
+ isNbitEncodeable n i = let shift = n - 1 in (-1 `shiftL` shift) <= i && i < (1 `shiftL` shift)
+ -- FIXME: These are wrong, they are for AArch64, not RISCV! I'm not even sure we need them for RISCV
+ isBitMaskImmediate :: Integer -> Bool
+ isBitMaskImmediate i = i `elem` [0b0000_0001, 0b0000_0010, 0b0000_0100, 0b0000_1000, 0b0001_0000, 0b0010_0000, 0b0100_0000, 0b1000_0000
+ ,0b0000_0011, 0b0000_0110, 0b0000_1100, 0b0001_1000, 0b0011_0000, 0b0110_0000, 0b1100_0000
+ ,0b0000_0111, 0b0000_1110, 0b0001_1100, 0b0011_1000, 0b0111_0000, 0b1110_0000
+ ,0b0000_1111, 0b0001_1110, 0b0011_1100, 0b0111_1000, 0b1111_0000
+ ,0b0001_1111, 0b0011_1110, 0b0111_1100, 0b1111_1000
+ ,0b0011_1111, 0b0111_1110, 0b1111_1100
+ ,0b0111_1111, 0b1111_1110
+ ,0b1111_1111]
+
+ -- N.B. MUL does not set the overflow flag.
+ do_mul_may_oflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
+ do_mul_may_oflo w@W64 x y = do
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ (reg_y, _format_y, code_y) <- getSomeReg y
+ lo <- getNewRegNat II64
+ hi <- getNewRegNat II64
+ return $ Any (intFormat w) (\dst ->
+ code_x `appOL`
+ code_y `snocOL`
+ MUL (OpReg w lo) (OpReg w reg_x) (OpReg w reg_y) `snocOL`
+ SMULH (OpReg w hi) (OpReg w reg_x) (OpReg w reg_y) `snocOL`
+ CSET (OpReg w dst) (OpReg w hi) (OpRegShift w lo SASR 63) NE)
+ do_mul_may_oflo w x y = do
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ (reg_y, _format_y, code_y) <- getSomeReg y
+ let tmp_w = case w of
+ W32 -> W64
+ W16 -> W32
+ W8 -> W32
+ _ -> panic "do_mul_may_oflo: impossible"
+ -- This will hold the product
+ tmp <- getNewRegNat (intFormat tmp_w)
+ let ext_mode = case w of
+ W32 -> ESXTW
+ W16 -> ESXTH
+ W8 -> ESXTB
+ _ -> panic "do_mul_may_oflo: impossible"
+ mul = case w of
+ W32 -> SMULL
+ W16 -> MUL
+ W8 -> MUL
+ _ -> panic "do_mul_may_oflo: impossible"
+ return $ Any (intFormat w) (\dst ->
+ code_x `appOL`
+ code_y `snocOL`
+ mul (OpReg tmp_w tmp) (OpReg w reg_x) (OpReg w reg_y) `snocOL`
+ CSET (OpReg w dst) (OpReg tmp_w tmp) (OpRegExt tmp_w tmp ext_mode 0) NE)
+
+-- | Instructions to sign-extend the value in the given register from width @w@
+-- up to width @w'@.
+signExtendReg :: Width -> Width -> Reg -> NatM (Reg, OrdList Instr)
+signExtendReg w w' r =
+ case w of
+ W64 -> noop
+ W32
+ | w' == W32 -> noop
+ | otherwise -> extend SXTH
+ W16 -> extend SXTH
+ W8 -> extend SXTB
+ _ -> panic "intOp"
+ where
+ noop = return (r, nilOL)
+ extend instr = do
+ r' <- getNewRegNat II64
+ return (r', unitOL $ instr (OpReg w' r') (OpReg w' r))
+
+-- | Instructions to truncate the value in the given register from width @w@
+-- down to width @w'@.
+truncateReg :: Width -> Width -> Reg -> OrdList Instr
+truncateReg w w' r =
+ case w of
+ W64 -> nilOL
+ W32
+ | w' == W32 -> nilOL
+ _ -> unitOL $ UBFM (OpReg w r)
+ (OpReg w r)
+ (OpImm (ImmInt 0))
+ (OpImm $ ImmInt $ widthInBits w' - 1)
+
+-- -----------------------------------------------------------------------------
+-- The 'Amode' type: Memory addressing modes passed up the tree.
+data Amode = Amode AddrMode InstrBlock
+
+getAmode :: Platform
+ -> Width -- ^ width of loaded value
+ -> CmmExpr
+ -> NatM Amode
+-- TODO: Specialize stuff we can destructure here.
+
+-- OPTIMIZATION WARNING: Addressing modes.
+-- Addressing options:
+-- LDUR/STUR: imm9: -256 - 255
+getAmode platform _ (CmmRegOff reg off) | -256 <= off, off <= 255
+ = return $ Amode (AddrRegImm reg' off') nilOL
+ where reg' = getRegisterReg platform reg
+ off' = ImmInt off
+-- LDR/STR: imm12: if reg is 32bit: 0 -- 16380 in multiples of 4
+getAmode platform W32 (CmmRegOff reg off)
+ | 0 <= off, off <= 16380, off `mod` 4 == 0
+ = return $ Amode (AddrRegImm reg' off') nilOL
+ where reg' = getRegisterReg platform reg
+ off' = ImmInt off
+-- LDR/STR: imm12: if reg is 64bit: 0 -- 32760 in multiples of 8
+getAmode platform W64 (CmmRegOff reg off)
+ | 0 <= off, off <= 32760, off `mod` 8 == 0
+ = return $ Amode (AddrRegImm reg' off') nilOL
+ where reg' = getRegisterReg platform reg
+ off' = ImmInt off
+
+-- For Stores we often see something like this:
+-- CmmStore (CmmMachOp (MO_Add w) [CmmLoad expr, CmmLit (CmmInt n w')]) (expr2)
+-- E.g. a CmmStoreOff really. This can be translated to `str $expr2, [$expr, #n ]
+-- for `n` in range.
+getAmode _platform _ (CmmMachOp (MO_Add _w) [expr, CmmLit (CmmInt off _w')])
+ | -256 <= off, off <= 255
+ = do (reg, _format, code) <- getSomeReg expr
+ return $ Amode (AddrRegImm reg (ImmInteger off)) code
+
+getAmode _platform _ (CmmMachOp (MO_Sub _w) [expr, CmmLit (CmmInt off _w')])
+ | -256 <= -off, -off <= 255
+ = do (reg, _format, code) <- getSomeReg expr
+ return $ Amode (AddrRegImm reg (ImmInteger (-off))) code
+
+-- Generic case
+getAmode _platform _ expr
+ = do (reg, _format, code) <- getSomeReg expr
+ return $ Amode (AddrReg reg) code
+
+-- -----------------------------------------------------------------------------
+-- Generating assignments
+
+-- Assignments are really at the heart of the whole code generation
+-- business. Almost all top-level nodes of any real importance are
+-- assignments, which correspond to loads, stores, or register
+-- transfers. If we're really lucky, some of the register transfers
+-- will go away, because we can use the destination register to
+-- complete the code generation for the right hand side. This only
+-- fails when the right hand side is forced into a fixed register
+-- (e.g. the result of a call).
+
+assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
+
+assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
+
+assignMem_IntCode rep addrE srcE
+ = do
+ (src_reg, _format, code) <- getSomeReg srcE
+ platform <- getPlatform
+ let w = formatToWidth rep
+ Amode addr addr_code <- getAmode platform w addrE
+ return $ COMMENT (text "CmmStore" <+> parens (text (show addrE)) <+> parens (text (show srcE)))
+ `consOL` (code
+ `appOL` addr_code
+ `snocOL` STR rep (OpReg w src_reg) (OpAddr addr))
+
+assignReg_IntCode _ reg src
+ = do
+ platform <- getPlatform
+ let dst = getRegisterReg platform reg
+ r <- getRegister src
+ return $ case r of
+ Any _ code -> COMMENT (text "CmmAssign" <+> parens (text (show reg)) <+> parens (text (show src))) `consOL` code dst
+ Fixed format freg fcode -> COMMENT (text "CmmAssign" <+> parens (text (show reg)) <+> parens (text (show src))) `consOL` (fcode `snocOL` MOV (OpReg (formatToWidth format) dst) (OpReg (formatToWidth format) freg))
+
+-- Let's treat Floating point stuff
+-- as integer code for now. Opaque.
+assignMem_FltCode = assignMem_IntCode
+assignReg_FltCode = assignReg_IntCode
+
+-- -----------------------------------------------------------------------------
+-- Jumps
+-- AArch64 has 26bits for targets, whereas RiscV only has 20.
+-- Thus we need to distinguish between far (outside of the)
+-- current compilation unit. And regular branches.
+-- RiscV has ±2MB of displacement, whereas AArch64 has ±128MB.
+-- Thus for most branches we can get away with encoding it
+-- directly in the instruction rather than always loading the
+-- address into a register and then using that to jump.
+-- Under the assumption that our linked build product is less than
+-- ~2*128MB of TEXT, and there are no jump that span the whole
+-- TEXT segment.
+-- Something where riscv's compressed instruction might come in
+-- handy.
+genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
+genJump expr = do
+ (target, _format, code) <- getSomeReg expr
+ return (code `appOL` unitOL (annExpr expr (J (TReg target))))
+
+-- -----------------------------------------------------------------------------
+-- Unconditional branches
+genBranch :: BlockId -> NatM InstrBlock
+genBranch = return . toOL . mkJumpInstr
+
+-- -----------------------------------------------------------------------------
+-- Conditional branches
+genCondJump
+ :: BlockId
+ -> CmmExpr
+ -> NatM InstrBlock
+genCondJump bid expr = do
+ case expr of
+ -- Optimized == 0 case.
+ CmmMachOp (MO_Eq w) [x, CmmLit (CmmInt 0 _)] -> do
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ return $ code_x `snocOL` (annExpr expr (CBZ (OpReg w reg_x) (TBlock bid)))
+
+ -- Optimized /= 0 case.
+ CmmMachOp (MO_Ne w) [x, CmmLit (CmmInt 0 _)] -> do
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ return $ code_x `snocOL` (annExpr expr (CBNZ (OpReg w reg_x) (TBlock bid)))
+
+ -- Generic case.
+ CmmMachOp mop [x, y] -> do
+
+ let ubcond w cmp = do
+ -- compute both sides.
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ (reg_y, _format_y, code_y) <- getSomeReg y
+ let x' = OpReg w reg_x
+ y' = OpReg w reg_y
+ return $ case w of
+ W8 -> code_x `appOL` code_y `appOL` toOL [ UXTB x' x', UXTB y' y', (annExpr expr (BCOND cmp x' y' (TBlock bid))) ]
+ W16 -> code_x `appOL` code_y `appOL` toOL [ UXTH x' x', UXTH y' y', (annExpr expr (BCOND cmp x' y' (TBlock bid))) ]
+ _ -> code_x `appOL` code_y `appOL` toOL [ (annExpr expr (BCOND cmp x' y' (TBlock bid))) ]
+
+ sbcond w cmp = do
+ -- compute both sides.
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ (reg_y, _format_y, code_y) <- getSomeReg y
+ let x' = OpReg w reg_x
+ y' = OpReg w reg_y
+ return $ case w of
+ W8 -> code_x `appOL` code_y `appOL` toOL [ SXTB x' x', SXTB y' y', (annExpr expr (BCOND cmp x' y' (TBlock bid))) ]
+ W16 -> code_x `appOL` code_y `appOL` toOL [ SXTH x' x', SXTH y' y', (annExpr expr (BCOND cmp x' y' (TBlock bid))) ]
+ _ -> code_x `appOL` code_y `appOL` toOL [ (annExpr expr (BCOND cmp x' y' (TBlock bid))) ]
+
+ fbcond w cmp = do
+ -- ensure we get float regs
+ (reg_fx, _format_fx, code_fx) <- getFloatReg x
+ (reg_fy, _format_fy, code_fy) <- getFloatReg y
+ return $ code_fx `appOL` code_fy `snocOL` (annExpr expr (BCOND cmp (OpReg w reg_fx) (OpReg w reg_fy) (TBlock bid)))
+
+ case mop of
+ MO_F_Eq w -> fbcond w EQ
+ MO_F_Ne w -> fbcond w NE
+
+ MO_F_Gt w -> fbcond w OGT
+ MO_F_Ge w -> fbcond w OGE
+ MO_F_Lt w -> fbcond w OLT
+ MO_F_Le w -> fbcond w OLE
+
+ MO_Eq w -> sbcond w EQ
+ MO_Ne w -> sbcond w NE
+
+ MO_S_Gt w -> sbcond w SGT
+ MO_S_Ge w -> sbcond w SGE
+ MO_S_Lt w -> sbcond w SLT
+ MO_S_Le w -> sbcond w SLE
+ MO_U_Gt w -> ubcond w UGT
+ MO_U_Ge w -> ubcond w UGE
+ MO_U_Lt w -> ubcond w ULT
+ MO_U_Le w -> ubcond w ULE
+ _ -> pprPanic "RV64.genCondJump:case mop: " (text $ show expr)
+ _ -> pprPanic "RV64.genCondJump: " (text $ show expr)
+
+
+genCondBranch
+ :: BlockId -- the source of the jump
+ -> BlockId -- the true branch target
+ -> BlockId -- the false branch target
+ -> CmmExpr -- the condition on which to branch
+ -> NatM InstrBlock -- Instructions
+
+genCondBranch _ true false expr = do
+ b1 <- genCondJump true expr
+ b2 <- genBranch false
+ return (b1 `appOL` b2)
+
+-- -----------------------------------------------------------------------------
+-- Generating C calls
+
+-- Now the biggest nightmare---calls. Most of the nastiness is buried in
+-- @get_arg@, which moves the arguments to the correct registers/stack
+-- locations. Apart from that, the code is easy.
+--
+-- As per *convention*:
+-- x0-x7: (volatile) argument registers
+-- x8: (volatile) indirect result register / Linux syscall no
+-- x9-x15: (volatile) caller saved regs
+-- x16,x17: (volatile) intra-procedure-call registers
+-- x18: (volatile) platform register. don't use for portability
+-- x19-x28: (non-volatile) callee save regs
+-- x29: (non-volatile) frame pointer
+-- x30: link register
+-- x31: stack pointer / zero reg
+--
+-- Thus, this is what a c function will expect. Find the arguments in x0-x7,
+-- anything above that on the stack. We'll ignore c functions with more than
+-- 8 arguments for now. Sorry.
+--
+-- We need to make sure we preserve x9-x15, don't want to touch x16, x17.
+
+-- Note [PLT vs GOT relocations]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- When linking objects together, we may need to lookup foreign references. That
+-- is symbolic references to functions or values in other objects. When
+-- compiling the object, we can not know where those elements will end up in
+-- memory (relative to the current location). Thus the use of symbols. There
+-- are two types of items we are interested, code segments we want to jump to
+-- and continue execution there (functions, ...), and data items we want to look
+-- up (strings, numbers, ...). For functions we can use the fact that we can use
+-- an intermediate jump without visibility to the programs execution. If we
+-- want to jump to a function that is simply too far away to reach for the B/BL
+-- instruction, we can create a small piece of code that loads the full target
+-- address and jumps to that on demand. Say f wants to call g, however g is out
+-- of range for a direct jump, we can create a function h in range for f, that
+-- will load the address of g, and jump there. The area where we construct h
+-- is called the Procedure Linking Table (PLT), we have essentially replaced
+-- f -> g with f -> h -> g. This is fine for function calls. However if we
+-- want to lookup values, this trick doesn't work, so we need something else.
+-- We will instead reserve a slot in memory, and have a symbol pointing to that
+-- slot. Now what we essentially do is, we reference that slot, and expect that
+-- slot to hold the final resting address of the data we are interested in.
+-- Thus what that symbol really points to is the location of the final data.
+-- The block of memory where we hold all those slots is the Global Offset Table
+-- (GOT). Instead of x <- $foo, we now do y <- $fooPtr, and x <- [$y].
+--
+-- FIXME: Update for RISCV, the below is still AArch64.
+-- For JUMP/CALLs we have 26bits (+/- 128MB), for conditional branches we only
+-- have 19bits (+/- 1MB). Symbol lookups are also within +/- 1MB, thus for most
+-- of the LOAD/STOREs we'd want to use adrp, and add to compute a value within
+-- 4GB of the PC, and load that. For anything outside of that range, we'd have
+-- to go through the GOT.
+--
+-- adrp x0, <symbol>
+-- add x0, :lo:<symbol>
+--
+-- will compute the address of <symbol> int x0 if <symbol> is within 4GB of the
+-- PC.
+--
+-- If we want to get the slot in the global offset table (GOT), we can do this:
+--
+-- adrp x0, #:got:<symbol>
+-- ldr x0, [x0, #:got_lo12:<symbol>]
+--
+-- this will compute the address anywhere in the addressable 64bit space into
+-- x0, by loading the address from the GOT slot.
+--
+-- To actually get the value of <symbol>, we'd need to ldr x0, x0 still, which
+-- for the first case can be optimized to use ldr x0, [x0, #:lo12:<symbol>]
+-- instead of the add instruction.
+--
+-- As the memory model for AArch64 for PIC is considered to be +/- 4GB, we do
+-- not need to go through the GOT, unless we want to address the full address
+-- range within 64bit.
+
+genCCall
+ :: ForeignTarget -- function to call
+ -> [CmmFormal] -- where to put the result
+ -> [CmmActual] -- arguments (of mixed type)
+ -> BlockId -- The block we are in
+ -> NatM (InstrBlock, Maybe BlockId)
+-- TODO: Specialize where we can.
+-- Generic impl
+genCCall target dest_regs arg_regs bid = do
+ -- we want to pass arg_regs into allArgRegs
+ -- pprTraceM "genCCall target" (ppr target)
+ -- pprTraceM "genCCall formal" (ppr dest_regs)
+ -- pprTraceM "genCCall actual" (ppr arg_regs)
+
+ case target of
+ -- The target :: ForeignTarget call can either
+ -- be a foreign procedure with an address expr
+ -- and a calling convention.
+ ForeignTarget expr _cconv -> do
+ (call_target, call_target_code) <- case expr of
+ -- if this is a label, let's just directly to it. This will produce the
+ -- correct CALL relocation for BL...
+ -- While this works on aarch64, for _most_ labels, it will fall short
+ -- where label branching only works for shoter distances (e.g. riscv)
+ -- (CmmLit (CmmLabel lbl)) -> pure (TLabel lbl, nilOL)
+ -- ... if it's not a label--well--let's compute the expression into a
+ -- register and jump to that. See Note [PLT vs GOT relocations]
+ _ -> do (reg, _format, reg_code) <- getSomeReg expr
+ pure (TReg reg, reg_code)
+ -- compute the code and register logic for all arg_regs.
+ -- this will give us the format information to match on.
+ arg_regs' <- mapM getSomeReg arg_regs
+
+ -- Now this is stupid. Our Cmm expressions doesn't carry the proper sizes
+ -- so while in Cmm we might get W64 incorrectly for an int, that is W32 in
+ -- STG; this thenn breaks packing of stack arguments, if we need to pack
+ -- for the pcs, e.g. darwinpcs. Option one would be to fix the Int type
+ -- in Cmm proper. Option two, which we choose here is to use extended Hint
+ -- information to contain the size information and use that when packing
+ -- arguments, spilled onto the stack.
+ let (_res_hints, arg_hints) = foreignTargetHints target
+ arg_regs'' = zipWith (\(r, f, c) h -> (r,f,h,c)) arg_regs' arg_hints
+
+ platform <- getPlatform
+ let packStack = platformOS platform == OSDarwin
+
+ (stackSpace', passRegs, passArgumentsCode) <- passArguments packStack allGpArgRegs allFpArgRegs arg_regs'' 0 [] nilOL
+
+ -- if we pack the stack, we may need to adjust to multiple of 8byte.
+ -- if we don't pack the stack, it will always be multiple of 8.
+ let stackSpace = if stackSpace' `mod` 8 /= 0
+ then 8 * (stackSpace' `div` 8 + 1)
+ else stackSpace'
+
+ (returnRegs, readResultsCode) <- readResults allGpArgRegs allFpArgRegs dest_regs [] nilOL
+
+ let moveStackDown 0 = toOL [ PUSH_STACK_FRAME
+ , DELTA (-16) ]
+ moveStackDown i | odd i = moveStackDown (i + 1)
+ moveStackDown i = toOL [ PUSH_STACK_FRAME
+ , SUB (OpReg W64 (regSingle 2)) (OpReg W64 (regSingle 2)) (OpImm (ImmInt (8 * i)))
+ , DELTA (-8 * i - 16) ]
+ moveStackUp 0 = toOL [ POP_STACK_FRAME
+ , DELTA 0 ]
+ moveStackUp i | odd i = moveStackUp (i + 1)
+ moveStackUp i = toOL [ ADD (OpReg W64 (regSingle 2)) (OpReg W64 (regSingle 2)) (OpImm (ImmInt (8 * i)))
+ , POP_STACK_FRAME
+ , DELTA 0 ]
+
+ let code = call_target_code -- compute the label (possibly into a register)
+ `appOL` moveStackDown (stackSpace `div` 8)
+ `appOL` passArgumentsCode -- put the arguments into x0, ...
+ `appOL` (unitOL $ BL call_target passRegs returnRegs) -- branch and link.
+ `appOL` readResultsCode -- parse the results into registers
+ `appOL` moveStackUp (stackSpace `div` 8)
+ return (code, Nothing)
+
+ PrimTarget MO_F32_Fabs
+ | [arg_reg] <- arg_regs, [dest_reg] <- dest_regs ->
+ unaryFloatOp W32 (\d x -> unitOL $ FABS d x) arg_reg dest_reg
+ PrimTarget MO_F64_Fabs
+ | [arg_reg] <- arg_regs, [dest_reg] <- dest_regs ->
+ unaryFloatOp W64 (\d x -> unitOL $ FABS d x) arg_reg dest_reg
+
+ -- or a possibly side-effecting machine operation
+ -- mop :: CallishMachOp (see GHC.Cmm.MachOp)
+ PrimTarget mop -> do
+ -- We'll need config to construct forien targets
+ case mop of
+ -- 64 bit float ops
+ MO_F64_Pwr -> mkCCall "pow"
+
+ MO_F64_Sin -> mkCCall "sin"
+ MO_F64_Cos -> mkCCall "cos"
+ MO_F64_Tan -> mkCCall "tan"
+
+ MO_F64_Sinh -> mkCCall "sinh"
+ MO_F64_Cosh -> mkCCall "cosh"
+ MO_F64_Tanh -> mkCCall "tanh"
+
+ MO_F64_Asin -> mkCCall "asin"
+ MO_F64_Acos -> mkCCall "acos"
+ MO_F64_Atan -> mkCCall "atan"
+
+ MO_F64_Asinh -> mkCCall "asinh"
+ MO_F64_Acosh -> mkCCall "acosh"
+ MO_F64_Atanh -> mkCCall "atanh"
+
+ MO_F64_Log -> mkCCall "log"
+ MO_F64_Log1P -> mkCCall "log1p"
+ MO_F64_Exp -> mkCCall "exp"
+ MO_F64_ExpM1 -> mkCCall "expm1"
+ MO_F64_Fabs -> mkCCall "fabs"
+ MO_F64_Sqrt -> mkCCall "sqrt"
+
+ -- 32 bit float ops
+ MO_F32_Pwr -> mkCCall "powf"
+
+ MO_F32_Sin -> mkCCall "sinf"
+ MO_F32_Cos -> mkCCall "cosf"
+ MO_F32_Tan -> mkCCall "tanf"
+ MO_F32_Sinh -> mkCCall "sinhf"
+ MO_F32_Cosh -> mkCCall "coshf"
+ MO_F32_Tanh -> mkCCall "tanhf"
+ MO_F32_Asin -> mkCCall "asinf"
+ MO_F32_Acos -> mkCCall "acosf"
+ MO_F32_Atan -> mkCCall "atanf"
+ MO_F32_Asinh -> mkCCall "asinhf"
+ MO_F32_Acosh -> mkCCall "acoshf"
+ MO_F32_Atanh -> mkCCall "atanhf"
+ MO_F32_Log -> mkCCall "logf"
+ MO_F32_Log1P -> mkCCall "log1pf"
+ MO_F32_Exp -> mkCCall "expf"
+ MO_F32_ExpM1 -> mkCCall "expm1f"
+ MO_F32_Fabs -> mkCCall "fabsf"
+ MO_F32_Sqrt -> mkCCall "sqrtf"
+
+ -- 64-bit primops
+ MO_I64_ToI -> mkCCall "hs_int64ToInt"
+ MO_I64_FromI -> mkCCall "hs_intToInt64"
+ MO_W64_ToW -> mkCCall "hs_word64ToWord"
+ MO_W64_FromW -> mkCCall "hs_wordToWord64"
+ MO_x64_Neg -> mkCCall "hs_neg64"
+ MO_x64_Add -> mkCCall "hs_add64"
+ MO_x64_Sub -> mkCCall "hs_sub64"
+ MO_x64_Mul -> mkCCall "hs_mul64"
+ MO_I64_Quot -> mkCCall "hs_quotInt64"
+ MO_I64_Rem -> mkCCall "hs_remInt64"
+ MO_W64_Quot -> mkCCall "hs_quotWord64"
+ MO_W64_Rem -> mkCCall "hs_remWord64"
+ MO_x64_And -> mkCCall "hs_and64"
+ MO_x64_Or -> mkCCall "hs_or64"
+ MO_x64_Xor -> mkCCall "hs_xor64"
+ MO_x64_Not -> mkCCall "hs_not64"
+ MO_x64_Shl -> mkCCall "hs_uncheckedShiftL64"
+ MO_I64_Shr -> mkCCall "hs_uncheckedIShiftRA64"
+ MO_W64_Shr -> mkCCall "hs_uncheckedShiftRL64"
+ MO_x64_Eq -> mkCCall "hs_eq64"
+ MO_x64_Ne -> mkCCall "hs_ne64"
+ MO_I64_Ge -> mkCCall "hs_geInt64"
+ MO_I64_Gt -> mkCCall "hs_gtInt64"
+ MO_I64_Le -> mkCCall "hs_leInt64"
+ MO_I64_Lt -> mkCCall "hs_ltInt64"
+ MO_W64_Ge -> mkCCall "hs_geWord64"
+ MO_W64_Gt -> mkCCall "hs_gtWord64"
+ MO_W64_Le -> mkCCall "hs_leWord64"
+ MO_W64_Lt -> mkCCall "hs_ltWord64"
+
+ -- Conversion
+ MO_UF_Conv w -> mkCCall (word2FloatLabel w)
+
+ -- Arithmatic
+ -- These are not supported on X86, so I doubt they are used much.
+ MO_S_Mul2 _w -> unsupported mop
+ MO_S_QuotRem _w -> unsupported mop
+ MO_U_QuotRem _w -> unsupported mop
+ MO_U_QuotRem2 _w -> unsupported mop
+ MO_Add2 _w -> unsupported mop
+ MO_AddWordC _w -> unsupported mop
+ MO_SubWordC _w -> unsupported mop
+ MO_AddIntC _w -> unsupported mop
+ MO_SubIntC _w -> unsupported mop
+ MO_U_Mul2 _w -> unsupported mop
+
+ -- Memory Ordering
+ -- TODO DMBSY is probably *way* too much!
+ MO_ReadBarrier -> return (unitOL DMBSY, Nothing)
+ MO_WriteBarrier -> return (unitOL DMBSY, Nothing)
+ MO_Touch -> return (nilOL, Nothing) -- Keep variables live (when using interior pointers)
+ -- Prefetch
+ MO_Prefetch_Data _n -> return (nilOL, Nothing) -- Prefetch hint.
+
+ -- Memory copy/set/move/cmp, with alignment for optimization
+
+ -- TODO Optimize and use e.g. quad registers to move memory around instead
+ -- of offloading this to memcpy. For small memcpys we can utilize
+ -- the 128bit quad registers in NEON to move block of bytes around.
+ -- Might also make sense of small memsets? Use xzr? What's the function
+ -- call overhead?
+ MO_Memcpy _align -> mkCCall "memcpy"
+ MO_Memset _align -> mkCCall "memset"
+ MO_Memmove _align -> mkCCall "memmove"
+ MO_Memcmp _align -> mkCCall "memcmp"
+
+ MO_SuspendThread -> mkCCall "suspendThread"
+ MO_ResumeThread -> mkCCall "resumeThread"
+
+ MO_PopCnt w -> mkCCall (popCntLabel w)
+ MO_Pdep w -> mkCCall (pdepLabel w)
+ MO_Pext w -> mkCCall (pextLabel w)
+ MO_Clz w -> mkCCall (clzLabel w)
+ MO_Ctz w -> mkCCall (ctzLabel w)
+ MO_BSwap w -> mkCCall (bSwapLabel w)
+ MO_BRev w -> mkCCall (bRevLabel w)
+
+ -- -- Atomic read-modify-write.
+ MO_AtomicRead w ord
+ | [p_reg] <- arg_regs
+ , [dst_reg] <- dest_regs -> do
+ (p, _fmt_p, code_p) <- getSomeReg p_reg
+ platform <- getPlatform
+ let instr = case ord of
+ MemOrderRelaxed -> LDR
+ _ -> panic "no proper atomic write support" -- LDAR
+ dst = getRegisterReg platform (CmmLocal dst_reg)
+ code =
+ code_p `snocOL`
+ instr (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p)
+ return (code, Nothing)
+ | otherwise -> panic "mal-formed AtomicRead"
+ MO_AtomicWrite w ord
+ | [p_reg, val_reg] <- arg_regs -> do
+ (p, _fmt_p, code_p) <- getSomeReg p_reg
+ (val, fmt_val, code_val) <- getSomeReg val_reg
+ let instr = case ord of
+ MemOrderRelaxed -> STR
+ _ -> panic "no proper atomic write support" -- STLR
+ code =
+ code_p `appOL`
+ code_val `snocOL`
+ instr fmt_val (OpReg w val) (OpAddr $ AddrReg p)
+ return (code, Nothing)
+ | otherwise -> panic "mal-formed AtomicWrite"
+ MO_AtomicRMW w amop -> mkCCall (atomicRMWLabel w amop)
+ MO_Cmpxchg w -> mkCCall (cmpxchgLabel w)
+ -- -- Should be an AtomicRMW variant eventually.
+ -- -- Sequential consistent.
+ -- TODO: this should be implemented properly!
+ MO_Xchg w -> mkCCall (xchgLabel w)
+
+ where
+ unsupported :: Show a => a -> b
+ unsupported mop = panic ("outOfLineCmmOp: " ++ show mop
+ ++ " not supported here")
+ mkCCall :: FastString -> NatM (InstrBlock, Maybe BlockId)
+ mkCCall name = do
+ config <- getConfig
+ target <- cmmMakeDynamicReference config CallReference $
+ mkForeignLabel name Nothing ForeignLabelInThisPackage IsFunction
+ let cconv = ForeignConvention CCallConv [NoHint] [NoHint] CmmMayReturn
+ genCCall (ForeignTarget target cconv) dest_regs arg_regs bid
+
+ -- TODO: Optimize using paired stores and loads (STP, LDP). It is
+ -- automatically done by the allocator for us. However it's not optimal,
+ -- as we'd rather want to have control over
+ -- all spill/load registers, so we can optimize with instructions like
+ -- STP xA, xB, [sp, #-16]!
+ -- and
+ -- LDP xA, xB, sp, #16
+ --
+ passArguments :: Bool -> [Reg] -> [Reg] -> [(Reg, Format, ForeignHint, InstrBlock)] -> Int -> [Reg] -> InstrBlock -> NatM (Int, [Reg], InstrBlock)
+ passArguments _packStack _ _ [] stackSpace accumRegs accumCode = return (stackSpace, accumRegs, accumCode)
+ -- passArguments _ _ [] accumCode stackSpace | isEven stackSpace = return $ SUM (OpReg W64 x31) (OpReg W64 x31) OpImm (ImmInt (-8 * stackSpace))
+ -- passArguments _ _ [] accumCode stackSpace = return $ SUM (OpReg W64 x31) (OpReg W64 x31) OpImm (ImmInt (-8 * (stackSpace + 1)))
+ -- passArguments [] fpRegs (arg0:arg1:args) stack accumCode = do
+ -- -- allocate this on the stack
+ -- (r0, format0, code_r0) <- getSomeReg arg0
+ -- (r1, format1, code_r1) <- getSomeReg arg1
+ -- let w0 = formatToWidth format0
+ -- w1 = formatToWidth format1
+ -- stackCode = unitOL $ STP (OpReg w0 r0) (OpReg w1 R1), (OpAddr (AddrRegImm x31 (ImmInt (stackSpace * 8)))
+ -- passArguments gpRegs (fpReg:fpRegs) args (stackCode `appOL` accumCode)
+
+ -- float promotion.
+ -- According to
+ -- ISO/IEC 9899:2018
+ -- Information technology — Programming languages — C
+ --
+ -- e.g.
+ -- http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1124.pdf
+ -- http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1256.pdf
+ --
+ -- GHC would need to know the prototype.
+ --
+ -- > If the expression that denotes the called function has a type that does not include a
+ -- > prototype, the integer promotions are performed on each argument, and arguments that
+ -- > have type float are promoted to double.
+ --
+ -- As we have no way to get prototypes for C yet, we'll *not* promote this
+ -- which is in line with the x86_64 backend :(
+ --
+ -- See the encode_values.cmm test.
+ --
+ -- We would essentially need to insert an FCVT (OpReg W64 fpReg) (OpReg W32 fpReg)
+ -- if w == W32. But *only* if we don't have a prototype m(
+ --
+ -- For AArch64 specificies see: https://developer.arm.com/docs/ihi0055/latest/procedure-call-standard-for-the-arm-64-bit-architecture
+ --
+ -- Still have GP regs, and we want to pass an GP argument.
+
+
+ passArguments pack (gpReg:gpRegs) fpRegs ((r, format, hint, code_r):args) stackSpace accumRegs accumCode | isIntFormat format = do
+ platform <- getPlatform
+ let w = formatToWidth format
+ mov
+ -- Specifically, Darwin/AArch64's ABI requires that the caller
+ -- sign-extend arguments which are smaller than 32-bits.
+ | w < W32
+ , platformCConvNeedsExtension platform
+ , SignedHint <- hint
+ = case w of
+ W8 -> SXTB (OpReg W64 gpReg) (OpReg w r)
+ W16 -> SXTH (OpReg W64 gpReg) (OpReg w r)
+ _ -> panic "impossible"
+ | otherwise
+ = MOV (OpReg w gpReg) (OpReg w r)
+ accumCode' = accumCode `appOL`
+ code_r `snocOL`
+ ann (text "Pass gp argument: " <> ppr r) mov
+ passArguments pack gpRegs fpRegs args stackSpace (gpReg:accumRegs) accumCode'
+
+ -- Still have FP regs, and we want to pass an FP argument.
+ passArguments pack gpRegs (fpReg:fpRegs) ((r, format, _hint, code_r):args) stackSpace accumRegs accumCode | isFloatFormat format = do
+ let w = formatToWidth format
+ mov = MOV (OpReg w fpReg) (OpReg w r)
+ accumCode' = accumCode `appOL`
+ code_r `snocOL`
+ ann (text "Pass fp argument: " <> ppr r) mov
+ passArguments pack gpRegs fpRegs args stackSpace (fpReg:accumRegs) accumCode'
+
+ -- No mor regs left to pass. Must pass on stack.
+ passArguments pack [] [] ((r, format, _hint, code_r):args) stackSpace accumRegs accumCode = do
+ let w = formatToWidth format
+ bytes = widthInBits w `div` 8
+ space = if pack then bytes else 8
+ stackSpace' | pack && stackSpace `mod` space /= 0 = stackSpace + space - (stackSpace `mod` space)
+ | otherwise = stackSpace
+ str = STR format (OpReg w r) (OpAddr (AddrRegImm (regSingle 2) (ImmInt stackSpace')))
+ stackCode = code_r `snocOL`
+ ann (text "Pass argument (size " <> ppr w <> text ") on the stack: " <> ppr r) str
+ passArguments pack [] [] args (stackSpace'+space) accumRegs (stackCode `appOL` accumCode)
+
+ -- Still have fpRegs left, but want to pass a GP argument. Must be passed on the stack then.
+ passArguments pack [] fpRegs ((r, format, _hint, code_r):args) stackSpace accumRegs accumCode | isIntFormat format = do
+ let w = formatToWidth format
+ bytes = widthInBits w `div` 8
+ space = if pack then bytes else 8
+ stackSpace' | pack && stackSpace `mod` space /= 0 = stackSpace + space - (stackSpace `mod` space)
+ | otherwise = stackSpace
+ str = STR format (OpReg w r) (OpAddr (AddrRegImm (regSingle 2) (ImmInt stackSpace')))
+ stackCode = code_r `snocOL`
+ ann (text "Pass argument (size " <> ppr w <> text ") on the stack: " <> ppr r) str
+ passArguments pack [] fpRegs args (stackSpace'+space) accumRegs (stackCode `appOL` accumCode)
+
+ -- Still have gpRegs left, but want to pass a FP argument. Must be passed on the stack then.
+ passArguments pack gpRegs [] ((r, format, _hint, code_r):args) stackSpace accumRegs accumCode | isFloatFormat format = do
+ let w = formatToWidth format
+ bytes = widthInBits w `div` 8
+ space = if pack then bytes else 8
+ stackSpace' | pack && stackSpace `mod` space /= 0 = stackSpace + space - (stackSpace `mod` space)
+ | otherwise = stackSpace
+ str = STR format (OpReg w r) (OpAddr (AddrRegImm (regSingle 2) (ImmInt stackSpace')))
+ stackCode = code_r `snocOL`
+ ann (text "Pass argument (size " <> ppr w <> text ") on the stack: " <> ppr r) str
+ passArguments pack gpRegs [] args (stackSpace'+space) accumRegs (stackCode `appOL` accumCode)
+
+ passArguments _ _ _ _ _ _ _ = pprPanic "passArguments" (text "invalid state")
+
+ readResults :: [Reg] -> [Reg] -> [LocalReg] -> [Reg]-> InstrBlock -> NatM ([Reg], InstrBlock)
+ readResults _ _ [] accumRegs accumCode = return (accumRegs, accumCode)
+ readResults [] _ _ _ _ = do
+ platform <- getPlatform
+ pprPanic "genCCall, out of gp registers when reading results" (pdoc platform target)
+ readResults _ [] _ _ _ = do
+ platform <- getPlatform
+ pprPanic "genCCall, out of fp registers when reading results" (pdoc platform target)
+ readResults (gpReg:gpRegs) (fpReg:fpRegs) (dst:dsts) accumRegs accumCode = do
+ -- gp/fp reg -> dst
+ platform <- getPlatform
+ let rep = cmmRegType platform (CmmLocal dst)
+ format = cmmTypeFormat rep
+ w = cmmRegWidth platform (CmmLocal dst)
+ r_dst = getRegisterReg platform (CmmLocal dst)
+ if isFloatFormat format
+ then readResults (gpReg:gpRegs) fpRegs dsts (fpReg:accumRegs) (accumCode `snocOL` MOV (OpReg w r_dst) (OpReg w fpReg))
+ else readResults gpRegs (fpReg:fpRegs) dsts (gpReg:accumRegs) (accumCode `snocOL` MOV (OpReg w r_dst) (OpReg w gpReg))
+
+ unaryFloatOp w op arg_reg dest_reg = do
+ platform <- getPlatform
+ (reg_fx, _format_x, code_fx) <- getFloatReg arg_reg
+ let dst = getRegisterReg platform (CmmLocal dest_reg)
+ let code = code_fx `appOL` op (OpReg w dst) (OpReg w reg_fx)
+ return (code, Nothing)
diff --git a/compiler/GHC/CmmToAsm/RV64/Cond.hs b/compiler/GHC/CmmToAsm/RV64/Cond.hs
new file mode 100644
index 0000000000..645d8c9a60
--- /dev/null
+++ b/compiler/GHC/CmmToAsm/RV64/Cond.hs
@@ -0,0 +1,68 @@
+module GHC.CmmToAsm.RV64.Cond where
+
+import GHC.Prelude
+
+-- FIXME: These conditions originate from the Aarch64 backend. I'm not even sure
+-- we use all of them there. For RISCV we need to synthesize some of them, as
+-- RISCV has a much more reduced (ha!) set of condtionals.
+
+-- TODO: This appears to go a bit overboard? Maybe we should stick with what LLVM
+-- settled on for fcmp?
+-- false: always yields false, regardless of operands.
+-- oeq: yields true if both operands are not a QNAN and op1 is equal to op2.
+-- ogt: yields true if both operands are not a QNAN and op1 is greater than op2.
+-- oge: yields true if both operands are not a QNAN and op1 is greater than or equal to op2.
+-- olt: yields true if both operands are not a QNAN and op1 is less than op2.
+-- ole: yields true if both operands are not a QNAN and op1 is less than or equal to op2.
+-- one: yields true if both operands are not a QNAN and op1 is not equal to op2.
+-- ord: yields true if both operands are not a QNAN.
+-- ueq: yields true if either operand is a QNAN or op1 is equal to op2.
+-- ugt: yields true if either operand is a QNAN or op1 is greater than op2.
+-- uge: yields true if either operand is a QNAN or op1 is greater than or equal to op2.
+-- ult: yields true if either operand is a QNAN or op1 is less than op2.
+-- ule: yields true if either operand is a QNAN or op1 is less than or equal to op2.
+-- une: yields true if either operand is a QNAN or op1 is not equal to op2.
+-- uno: yields true if either operand is a QNAN.
+-- true: always yields true, regardless of operands.
+--
+-- LLVMs icmp knows about:
+-- eq: yields true if the operands are equal, false otherwise. No sign interpretation is necessary or performed.
+-- ne: yields true if the operands are unequal, false otherwise. No sign interpretation is necessary or performed.
+-- ugt: interprets the operands as unsigned values and yields true if op1 is greater than op2.
+-- uge: interprets the operands as unsigned values and yields true if op1 is greater than or equal to op2.
+-- ult: interprets the operands as unsigned values and yields true if op1 is less than op2.
+-- ule: interprets the operands as unsigned values and yields true if op1 is less than or equal to op2.
+-- sgt: interprets the operands as signed values and yields true if op1 is greater than op2.
+-- sge: interprets the operands as signed values and yields true if op1 is greater than or equal to op2.
+-- slt: interprets the operands as signed values and yields true if op1 is less than op2.
+-- sle: interprets the operands as signed values and yields true if op1 is less than or equal to op2.
+
+data Cond
+ = ALWAYS -- b.al
+ | EQ -- b.eq
+ | NE -- b.ne
+ -- signed
+ | SLT -- b.lt
+ | SLE -- b.le
+ | SGE -- b.ge
+ | SGT -- b.gt
+ -- unsigned
+ | ULT -- b.lo
+ | ULE -- b.ls
+ | UGE -- b.hs
+ | UGT -- b.hi
+ -- ordered
+ | OLT -- b.mi
+ | OLE -- b.ls
+ | OGE -- b.ge
+ | OGT -- b.gt
+ -- unordered
+ | UOLT -- b.lt
+ | UOLE -- b.le
+ | UOGE -- b.pl
+ | UOGT -- b.hi
+ -- others
+ | NEVER -- b.nv
+ | VS -- oVerflow set
+ | VC -- oVerflow clear
+ deriving Eq
diff --git a/compiler/GHC/CmmToAsm/RV64/Instr.hs b/compiler/GHC/CmmToAsm/RV64/Instr.hs
new file mode 100644
index 0000000000..9e2b402400
--- /dev/null
+++ b/compiler/GHC/CmmToAsm/RV64/Instr.hs
@@ -0,0 +1,894 @@
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+
+module GHC.CmmToAsm.RV64.Instr
+
+where
+
+import GHC.Prelude
+
+import GHC.CmmToAsm.RV64.Cond
+import GHC.CmmToAsm.RV64.Regs
+
+import GHC.CmmToAsm.Instr (RegUsage(..))
+import GHC.CmmToAsm.Format
+import GHC.CmmToAsm.Types
+import GHC.CmmToAsm.Utils
+import GHC.CmmToAsm.Config
+import GHC.Platform.Reg
+
+import GHC.Platform.Regs
+import GHC.Cmm.BlockId
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow.Label
+import GHC.Cmm
+import GHC.Cmm.CLabel
+import GHC.Utils.Outputable
+import GHC.Platform
+import GHC.Types.Unique.Supply
+
+import GHC.Utils.Panic
+
+import Data.Maybe (fromMaybe)
+
+import GHC.Stack
+
+-- | TODO: verify this!
+stackFrameHeaderSize :: Platform -> Int
+stackFrameHeaderSize _ = 64
+
+-- | All registers are 8 byte wide.
+spillSlotSize :: Int
+spillSlotSize = 8
+
+-- | The number of bytes that the stack pointer should be aligned
+-- to.
+stackAlign :: Int
+stackAlign = 16
+
+-- | The number of spill slots available without allocating more.
+maxSpillSlots :: NCGConfig -> Int
+maxSpillSlots config
+-- = 0 -- set to zero, to see when allocMoreStack has to fire.
+ = let platform = ncgPlatform config
+ in ((ncgSpillPreallocSize config - stackFrameHeaderSize platform)
+ `div` spillSlotSize) - 1
+
+-- | Convert a spill slot number to a *byte* offset, with no sign.
+spillSlotToOffset :: NCGConfig -> Int -> Int
+spillSlotToOffset config slot
+ = stackFrameHeaderSize (ncgPlatform config) + spillSlotSize * slot
+
+-- | Get the registers that are being used by this instruction.
+-- regUsage doesn't need to do any trickery for jumps and such.
+-- Just state precisely the regs read and written by that insn.
+-- The consequences of control flow transfers, as far as register
+-- allocation goes, are taken care of by the register allocator.
+--
+-- RegUsage = RU [<read regs>] [<write regs>]
+
+instance Outputable RegUsage where
+ ppr (RU reads writes) = text "RegUsage(reads:" <+> ppr reads <> comma <+> text "writes:" <+> ppr writes <> char ')'
+
+regUsageOfInstr :: Platform -> Instr -> RegUsage
+regUsageOfInstr platform instr = case instr of
+ ANN _ i -> regUsageOfInstr platform i
+ COMMENT{} -> usage ([], [])
+ MULTILINE_COMMENT{} -> usage ([], [])
+ PUSH_STACK_FRAME -> usage ([], [])
+ POP_STACK_FRAME -> usage ([], [])
+ DELTA{} -> usage ([], [])
+
+ -- 1. Arithmetic Instructions ------------------------------------------------
+ ADD dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ -- CMN l r -> usage (regOp l ++ regOp r, [])
+ -- CMP l r -> usage (regOp l ++ regOp r, [])
+ MSUB dst src1 src2 src3 -> usage (regOp src1 ++ regOp src2 ++ regOp src3, regOp dst)
+ MUL dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ NEG dst src -> usage (regOp src, regOp dst)
+ SMULH dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ SMULL dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ SDIV dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ SUB dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ UDIV dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+
+ -- 2. Bit Manipulation Instructions ------------------------------------------
+ SBFM dst src _ _ -> usage (regOp src, regOp dst)
+ UBFM dst src _ _ -> usage (regOp src, regOp dst)
+ SBFX dst src _ _ -> usage (regOp src, regOp dst)
+ UBFX dst src _ _ -> usage (regOp src, regOp dst)
+ SXTB dst src -> usage (regOp src, regOp dst)
+ UXTB dst src -> usage (regOp src, regOp dst)
+ SXTH dst src -> usage (regOp src, regOp dst)
+ UXTH dst src -> usage (regOp src, regOp dst)
+ -- 3. Logical and Move Instructions ------------------------------------------
+ AND dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ ASR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ BIC dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ BICS dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ EON dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ EOR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ LSL dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ LSR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ MOV dst src -> usage (regOp src, regOp dst)
+ MOVK dst src -> usage (regOp src, regOp dst)
+ MVN dst src -> usage (regOp src, regOp dst)
+ ORR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ ROR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ TST src1 src2 -> usage (regOp src1 ++ regOp src2, [])
+ -- 4. Branch Instructions ----------------------------------------------------
+ J t -> usage (regTarget t, [])
+ B t -> usage (regTarget t, [])
+ BCOND _ l r t -> usage (regTarget t ++ regOp l ++ regOp r, [])
+ BL t ps _rs -> usage (regTarget t ++ ps, callerSavedRegisters)
+
+ -- 5. Atomic Instructions ----------------------------------------------------
+ -- 6. Conditional Instructions -----------------------------------------------
+ CSET dst l r _ -> usage (regOp l ++ regOp r, regOp dst)
+ CBZ src _ -> usage (regOp src, [])
+ CBNZ src _ -> usage (regOp src, [])
+ -- 7. Load and Store Instructions --------------------------------------------
+ STR _ src dst -> usage (regOp src ++ regOp dst, [])
+ -- STLR _ src dst L -> usage (regOp src ++ regOp dst, [])
+ LDR _ dst src -> usage (regOp src, regOp dst)
+ -- LDAR _ dst src -> usage (regOp src, regOp dst)
+ -- TODO is this right? see STR, which I'm only partial about being right?
+ -- STP _ src1 src2 dst -> usage (regOp src1 ++ regOp src2 ++ regOp dst, [])
+ -- LDP _ dst1 dst2 src -> usage (regOp src, regOp dst1 ++ regOp dst2)
+
+ -- 8. Synchronization Instructions -------------------------------------------
+ DMBSY -> usage ([], [])
+
+ -- 9. Floating Point Instructions --------------------------------------------
+ FCVT dst src -> usage (regOp src, regOp dst)
+ SCVTF dst src -> usage (regOp src, regOp dst)
+ FCVTZS dst src -> usage (regOp src, regOp dst)
+ FABS dst src -> usage (regOp src, regOp dst)
+
+ _ -> panic $ "regUsageOfInstr: " ++ instrCon instr
+
+ where
+ -- filtering the usage is necessary, otherwise the register
+ -- allocator will try to allocate pre-defined fixed stg
+ -- registers as well, as they show up.
+ usage (src, dst) = RU (filter (interesting platform) src)
+ (filter (interesting platform) dst)
+
+ regAddr :: AddrMode -> [Reg]
+ regAddr (AddrRegReg r1 r2) = [r1, r2]
+ regAddr (AddrRegImm r1 _) = [r1]
+ regAddr (AddrReg r1) = [r1]
+ regOp :: Operand -> [Reg]
+ regOp (OpReg _ r1) = [r1]
+ regOp (OpRegExt _ r1 _ _) = [r1]
+ regOp (OpRegShift _ r1 _ _) = [r1]
+ regOp (OpAddr a) = regAddr a
+ regOp (OpImm _) = []
+ regOp (OpImmShift _ _ _) = []
+ regTarget :: Target -> [Reg]
+ regTarget (TBlock _) = []
+ regTarget (TLabel _) = []
+ regTarget (TReg r1) = [r1]
+
+ -- Is this register interesting for the register allocator?
+ interesting :: Platform -> Reg -> Bool
+ interesting _ (RegVirtual _) = True
+ interesting _ (RegReal (RealRegSingle (-1))) = False
+ interesting platform (RegReal (RealRegSingle i)) = freeReg platform i
+
+-- Save caller save registers
+-- This is x0-x18
+--
+-- For SIMD/FP Registers:
+-- Registers v8-v15 must be preserved by a callee across subroutine calls;
+-- the remaining registers (v0-v7, v16-v31) do not need to be preserved (or
+-- should be preserved by the caller). Additionally, only the bottom 64 bits
+-- of each value stored in v8-v15 need to be preserved [7]; it is the
+-- responsibility of the caller to preserve larger values.
+--
+-- .---------------------------------------------------------------------------------------------------------------------------------------------------------------.
+-- | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 |
+-- | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 42 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 |
+-- |== General Purpose registers ==================================================================================================================================|
+-- | ZR | RA | SP | GP | TP | <- tmp r. -> | FP | <- | <---- argument passing -------------> | -- callee saved ------------------------------> | <--- tmp regs --> |
+-- | -- | -- | -- | -- | -- | <- free r. > | -- | BR | <---- free registers ---------------> | SP | HP | R1 | R2 | R3 | R4 | R5 | R6 | R7 | SL | <-- free regs --> |
+-- |== SIMD/FP Registers ==========================================================================================================================================|
+-- | <--- temporary registers -----------> | <------ | <---- argument passing -------------> | -- callee saved ------------------------------> | <--- tmp regs --> |
+-- | <---------- free registers ---------> | F1 | F2 | <---- free registers ---------------> | F3 | F4 | F5 | F6 | D1 | D2 | D3 | D4 | D5 | D6 | -- | -- | -- | -- |
+-- '---------------------------------------------------------------------------------------------------------------------------------------------------------------'
+-- ZR: Zero, RA: Return Address, SP: Stack Pointer, GP: Global Pointer, TP: Thread Pointer, FP: Frame Pointer
+-- BR: Base, SL: SpLim
+callerSavedRegisters :: [Reg]
+callerSavedRegisters
+ = map regSingle [5..7]
+ ++ map regSingle [10..17]
+ ++ map regSingle [28..31]
+ ++ map regSingle [32..39]
+ ++ map regSingle [42..49]
+
+-- | Apply a given mapping to all the register references in this
+-- instruction.
+patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
+patchRegsOfInstr instr env = case instr of
+ -- 0. Meta Instructions
+ ANN d i -> ANN d (patchRegsOfInstr i env)
+ COMMENT{} -> instr
+ MULTILINE_COMMENT{} -> instr
+ PUSH_STACK_FRAME -> instr
+ POP_STACK_FRAME -> instr
+ DELTA{} -> instr
+ -- 1. Arithmetic Instructions ----------------------------------------------
+ ADD o1 o2 o3 -> ADD (patchOp o1) (patchOp o2) (patchOp o3)
+ -- CMN o1 o2 -> CMN (patchOp o1) (patchOp o2)
+ -- CMP o1 o2 -> CMP (patchOp o1) (patchOp o2)
+ MSUB o1 o2 o3 o4 -> MSUB (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4)
+ MUL o1 o2 o3 -> MUL (patchOp o1) (patchOp o2) (patchOp o3)
+ NEG o1 o2 -> NEG (patchOp o1) (patchOp o2)
+ SMULH o1 o2 o3 -> SMULH (patchOp o1) (patchOp o2) (patchOp o3)
+ SMULL o1 o2 o3 -> SMULL (patchOp o1) (patchOp o2) (patchOp o3)
+ SDIV o1 o2 o3 -> SDIV (patchOp o1) (patchOp o2) (patchOp o3)
+ SUB o1 o2 o3 -> SUB (patchOp o1) (patchOp o2) (patchOp o3)
+ UDIV o1 o2 o3 -> UDIV (patchOp o1) (patchOp o2) (patchOp o3)
+
+ -- 2. Bit Manipulation Instructions ----------------------------------------
+ SBFM o1 o2 o3 o4 -> SBFM (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4)
+ UBFM o1 o2 o3 o4 -> UBFM (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4)
+ SBFX o1 o2 o3 o4 -> SBFX (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4)
+ UBFX o1 o2 o3 o4 -> UBFX (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4)
+ SXTB o1 o2 -> SXTB (patchOp o1) (patchOp o2)
+ UXTB o1 o2 -> UXTB (patchOp o1) (patchOp o2)
+ SXTH o1 o2 -> SXTH (patchOp o1) (patchOp o2)
+ UXTH o1 o2 -> UXTH (patchOp o1) (patchOp o2)
+
+ -- 3. Logical and Move Instructions ----------------------------------------
+ AND o1 o2 o3 -> AND (patchOp o1) (patchOp o2) (patchOp o3)
+ -- ANDS o1 o2 o3 -> ANDS (patchOp o1) (patchOp o2) (patchOp o3)
+ ASR o1 o2 o3 -> ASR (patchOp o1) (patchOp o2) (patchOp o3)
+ BIC o1 o2 o3 -> BIC (patchOp o1) (patchOp o2) (patchOp o3)
+ BICS o1 o2 o3 -> BICS (patchOp o1) (patchOp o2) (patchOp o3)
+ EON o1 o2 o3 -> EON (patchOp o1) (patchOp o2) (patchOp o3)
+ EOR o1 o2 o3 -> EOR (patchOp o1) (patchOp o2) (patchOp o3)
+ LSL o1 o2 o3 -> LSL (patchOp o1) (patchOp o2) (patchOp o3)
+ LSR o1 o2 o3 -> LSR (patchOp o1) (patchOp o2) (patchOp o3)
+ MOV o1 o2 -> MOV (patchOp o1) (patchOp o2)
+ MOVK o1 o2 -> MOVK (patchOp o1) (patchOp o2)
+ MVN o1 o2 -> MVN (patchOp o1) (patchOp o2)
+ ORR o1 o2 o3 -> ORR (patchOp o1) (patchOp o2) (patchOp o3)
+ ROR o1 o2 o3 -> ROR (patchOp o1) (patchOp o2) (patchOp o3)
+ TST o1 o2 -> TST (patchOp o1) (patchOp o2)
+
+ -- 4. Branch Instructions --------------------------------------------------
+ J t -> J (patchTarget t)
+ B t -> B (patchTarget t)
+ BL t rs ts -> BL (patchTarget t) rs ts
+ BCOND c o1 o2 t -> BCOND c (patchOp o1) (patchOp o2) (patchTarget t)
+
+ -- 5. Atomic Instructions --------------------------------------------------
+ -- 6. Conditional Instructions ---------------------------------------------
+ CSET o l r c -> CSET (patchOp o) (patchOp l) (patchOp r) c
+ CBZ o l -> CBZ (patchOp o) l
+ CBNZ o l -> CBNZ (patchOp o) l
+ -- 7. Load and Store Instructions ------------------------------------------
+ STR f o1 o2 -> STR f (patchOp o1) (patchOp o2)
+ -- STLR f o1 o2 -> STLR f (patchOp o1) (patchOp o2)
+ LDR f o1 o2 -> LDR f (patchOp o1) (patchOp o2)
+ -- LDAR f o1 o2 -> LDAR f (patchOp o1) (patchOp o2)
+ -- STP f o1 o2 o3 -> STP f (patchOp o1) (patchOp o2) (patchOp o3)
+ -- LDP f o1 o2 o3 -> LDP f (patchOp o1) (patchOp o2) (patchOp o3)
+
+ -- 8. Synchronization Instructions -----------------------------------------
+ DMBSY -> DMBSY
+
+ -- 9. Floating Point Instructions ------------------------------------------
+ FCVT o1 o2 -> FCVT (patchOp o1) (patchOp o2)
+ SCVTF o1 o2 -> SCVTF (patchOp o1) (patchOp o2)
+ FCVTZS o1 o2 -> FCVTZS (patchOp o1) (patchOp o2)
+ FABS o1 o2 -> FABS (patchOp o1) (patchOp o2)
+ _ -> panic $ "patchRegsOfInstr: " ++ instrCon instr
+ where
+ patchOp :: Operand -> Operand
+ patchOp (OpReg w r) = OpReg w (env r)
+ patchOp (OpRegExt w r x s) = OpRegExt w (env r) x s
+ patchOp (OpRegShift w r m s) = OpRegShift w (env r) m s
+ patchOp (OpAddr a) = OpAddr (patchAddr a)
+ patchOp op = op
+ patchTarget :: Target -> Target
+ patchTarget (TReg r) = TReg (env r)
+ patchTarget t = t
+ patchAddr :: AddrMode -> AddrMode
+ patchAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2)
+ patchAddr (AddrRegImm r1 i) = AddrRegImm (env r1) i
+ patchAddr (AddrReg r) = AddrReg (env r)
+--------------------------------------------------------------------------------
+-- | Checks whether this instruction is a jump/branch instruction.
+-- One that can change the flow of control in a way that the
+-- register allocator needs to worry about.
+isJumpishInstr :: Instr -> Bool
+isJumpishInstr instr = case instr of
+ ANN _ i -> isJumpishInstr i
+ CBZ{} -> True
+ CBNZ{} -> True
+ J{} -> True
+ B{} -> True
+ BL{} -> True
+ BCOND{} -> True
+ _ -> False
+
+-- | Checks whether this instruction is a jump/branch instruction.
+-- One that can change the flow of control in a way that the
+-- register allocator needs to worry about.
+jumpDestsOfInstr :: Instr -> [BlockId]
+jumpDestsOfInstr (ANN _ i) = jumpDestsOfInstr i
+jumpDestsOfInstr (CBZ _ t) = [ id | TBlock id <- [t]]
+jumpDestsOfInstr (CBNZ _ t) = [ id | TBlock id <- [t]]
+jumpDestsOfInstr (J t) = [id | TBlock id <- [t]]
+jumpDestsOfInstr (B t) = [id | TBlock id <- [t]]
+jumpDestsOfInstr (BL t _ _) = [ id | TBlock id <- [t]]
+jumpDestsOfInstr (BCOND _ _ _ t) = [ id | TBlock id <- [t]]
+jumpDestsOfInstr _ = []
+
+-- | Change the destination of this jump instruction.
+-- Used in the linear allocator when adding fixup blocks for join
+-- points.
+patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr
+patchJumpInstr instr patchF
+ = case instr of
+ ANN d i -> ANN d (patchJumpInstr i patchF)
+ CBZ r (TBlock bid) -> CBZ r (TBlock (patchF bid))
+ CBNZ r (TBlock bid) -> CBNZ r (TBlock (patchF bid))
+ J (TBlock bid) -> J (TBlock (patchF bid))
+ B (TBlock bid) -> B (TBlock (patchF bid))
+ BL (TBlock bid) ps rs -> BL (TBlock (patchF bid)) ps rs
+ BCOND c o1 o2 (TBlock bid) -> BCOND c o1 o2 (TBlock (patchF bid))
+ _ -> panic $ "patchJumpInstr: " ++ instrCon instr
+
+-- -----------------------------------------------------------------------------
+-- Note [Spills and Reloads]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~
+-- We reserve @RESERVED_C_STACK_BYTES@ on the C stack for spilling and reloading
+-- registers. AArch64s maximum displacement for SP relative spills and reloads
+-- is essentially [-256,255], or [0, 0xFFF]*8 = [0, 32760] for 64bits.
+--
+-- The @RESERVED_C_STACK_BYTES@ is 16k, so we can't address any location in a
+-- single instruction. The idea is to use the Inter Procedure 0 (ip0) register
+-- to perform the computations for larger offsets.
+--
+-- Using sp to compute the offset will violate assumptions about the stack pointer
+-- pointing to the top of the stack during signal handling. As we can't force
+-- every signal to use its own stack, we have to ensure that the stack pointer
+-- always points to the top of the stack, and we can't use it for computation.
+--
+-- | An instruction to spill a register into a spill slot.
+mkSpillInstr
+ :: HasCallStack
+ => NCGConfig
+ -> Reg -- register to spill
+ -> Int -- current stack delta
+ -> Int -- spill slot to use
+ -> [Instr]
+
+mkSpillInstr config reg delta slot =
+ case (spillSlotToOffset config slot) - delta of
+ imm | -256 <= imm && imm <= 255 -> [ mkStrSp imm ]
+ imm | imm > 0 && imm .&. 0x7 == 0x0 && imm <= 0xfff -> [ mkStrSp imm ]
+ imm | imm > 0xfff && imm <= 0xffffff && imm .&. 0x7 == 0x0 -> [ mkIp0SpillAddr (imm .&~. 0xfff)
+ , mkStrIp0 (imm .&. 0xfff)
+ ]
+ imm -> pprPanic "mkSpillInstr" (text "Unable to spill into" <+> int imm)
+ where
+ a .&~. b = a .&. (complement b)
+
+ fmt = case reg of
+ RegReal (RealRegSingle n) | n < 32 -> II64
+ _ -> FF64
+ mkIp0SpillAddr imm = ANN (text "Spill: IP0 <- SP + " <> int imm) $ ADD ip0 sp (OpImm (ImmInt imm))
+ mkStrSp imm = ANN (text "Spill@" <> int (off - delta)) $ STR fmt (OpReg W64 reg) (OpAddr (AddrRegImm (regSingle 2) (ImmInt imm)))
+ mkStrIp0 imm = ANN (text "Spill@" <> int (off - delta)) $ STR fmt (OpReg W64 reg) (OpAddr (AddrRegImm (regSingle 16) (ImmInt imm)))
+
+ off = spillSlotToOffset config slot
+
+mkLoadInstr
+ :: NCGConfig
+ -> Reg -- register to load
+ -> Int -- current stack delta
+ -> Int -- spill slot to use
+ -> [Instr]
+
+mkLoadInstr config reg delta slot =
+ case (spillSlotToOffset config slot) - delta of
+ imm | -256 <= imm && imm <= 255 -> [ mkLdrSp imm ]
+ imm | imm > 0 && imm .&. 0x7 == 0x0 && imm <= 0xfff -> [ mkLdrSp imm ]
+ imm | imm > 0xfff && imm <= 0xffffff && imm .&. 0x7 == 0x0 -> [ mkIp0SpillAddr (imm .&~. 0xfff)
+ , mkLdrIp0 (imm .&. 0xfff)
+ ]
+ imm -> pprPanic "mkSpillInstr" (text "Unable to spill into" <+> int imm)
+ where
+ a .&~. b = a .&. (complement b)
+
+ fmt = case reg of
+ RegReal (RealRegSingle n) | n < 32 -> II64
+ _ -> FF64
+
+ mkIp0SpillAddr imm = ANN (text "Reload: IP0 <- SP + " <> int imm) $ ADD ip0 sp (OpImm (ImmInt imm))
+ mkLdrSp imm = ANN (text "Reload@" <> int (off - delta)) $ LDR fmt (OpReg W64 reg) (OpAddr (AddrRegImm (regSingle 2) (ImmInt imm)))
+ mkLdrIp0 imm = ANN (text "Reload@" <> int (off - delta)) $ LDR fmt (OpReg W64 reg) (OpAddr (AddrRegImm (regSingle 16) (ImmInt imm)))
+
+ off = spillSlotToOffset config slot
+
+--------------------------------------------------------------------------------
+-- | See if this instruction is telling us the current C stack delta
+takeDeltaInstr :: Instr -> Maybe Int
+takeDeltaInstr (ANN _ i) = takeDeltaInstr i
+takeDeltaInstr (DELTA i) = Just i
+takeDeltaInstr _ = Nothing
+
+-- Not real instructions. Just meta data
+isMetaInstr :: Instr -> Bool
+isMetaInstr instr
+ = case instr of
+ ANN _ i -> isMetaInstr i
+ COMMENT{} -> True
+ MULTILINE_COMMENT{} -> True
+ LOCATION{} -> True
+ LDATA{} -> True
+ NEWBLOCK{} -> True
+ DELTA{} -> True
+ PUSH_STACK_FRAME -> True
+ POP_STACK_FRAME -> True
+ _ -> False
+
+-- | Copy the value in a register to another one.
+-- Must work for all register classes.
+mkRegRegMoveInstr :: Reg -> Reg -> Instr
+mkRegRegMoveInstr src dst = ANN (text "Reg->Reg Move: " <> ppr src <> text " -> " <> ppr dst) $ MOV (OpReg W64 dst) (OpReg W64 src)
+
+-- | Take the source and destination from this reg -> reg move instruction
+-- or Nothing if it's not one
+takeRegRegMoveInstr :: Instr -> Maybe (Reg,Reg)
+--takeRegRegMoveInstr (MOV (OpReg fmt dst) (OpReg fmt' src)) | fmt == fmt' = Just (src, dst)
+takeRegRegMoveInstr _ = Nothing
+
+-- | Make an unconditional jump instruction.
+mkJumpInstr :: BlockId -> [Instr]
+mkJumpInstr id = [B (TBlock id)]
+
+mkStackAllocInstr :: Platform -> Int -> [Instr]
+mkStackAllocInstr platform n
+ | n == 0 = []
+ | n > 0 && n < 4096 = [ ANN (text "Alloc More Stack") $ SUB sp sp (OpImm (ImmInt n)) ]
+ | n > 0 = ANN (text "Alloc More Stack") (SUB sp sp (OpImm (ImmInt 4095))) : mkStackAllocInstr platform (n - 4095)
+mkStackAllocInstr _platform n = pprPanic "mkStackAllocInstr" (int n)
+
+mkStackDeallocInstr :: Platform -> Int -> [Instr]
+mkStackDeallocInstr platform n
+ | n == 0 = []
+ | n > 0 && n < 4096 = [ ANN (text "Dealloc More Stack") $ ADD sp sp (OpImm (ImmInt n)) ]
+ | n > 0 = ANN (text "Dealloc More Stack") (ADD sp sp (OpImm (ImmInt 4095))) : mkStackDeallocInstr platform (n - 4095)
+mkStackDeallocInstr _platform n = pprPanic "mkStackDeallocInstr" (int n)
+
+--
+-- See Note [extra spill slots] in X86/Instr.hs
+--
+allocMoreStack
+ :: Platform
+ -> Int
+ -> NatCmmDecl statics GHC.CmmToAsm.RV64.Instr.Instr
+ -> UniqSM (NatCmmDecl statics GHC.CmmToAsm.RV64.Instr.Instr, [(BlockId,BlockId)])
+
+allocMoreStack _ _ top@(CmmData _ _) = return (top,[])
+allocMoreStack platform slots proc@(CmmProc info lbl live (ListGraph code)) = do
+ let entries = entryBlocks proc
+
+ uniqs <- getUniquesM
+
+ let
+ delta = ((x + stackAlign - 1) `quot` stackAlign) * stackAlign -- round up
+ where x = slots * spillSlotSize -- sp delta
+
+ alloc = mkStackAllocInstr platform delta
+ dealloc = mkStackDeallocInstr platform delta
+
+ retargetList = (zip entries (map mkBlockId uniqs))
+
+ new_blockmap :: LabelMap BlockId
+ new_blockmap = mapFromList retargetList
+
+ insert_stack_insn (BasicBlock id insns)
+ | Just new_blockid <- mapLookup id new_blockmap
+ = [ BasicBlock id $ alloc ++ [ B (TBlock new_blockid) ]
+ , BasicBlock new_blockid block' ]
+ | otherwise
+ = [ BasicBlock id block' ]
+ where
+ block' = foldr insert_dealloc [] insns
+
+ insert_dealloc insn r = case insn of
+ J _ -> dealloc ++ (insn : r)
+ ANN _ (J _) -> dealloc ++ (insn : r)
+ _other | jumpDestsOfInstr insn /= []
+ -> patchJumpInstr insn retarget : r
+ _other -> insn : r
+
+ where retarget b = fromMaybe b (mapLookup b new_blockmap)
+
+ new_code = concatMap insert_stack_insn code
+ -- in
+ return (CmmProc info lbl live (ListGraph new_code), retargetList)
+-- -----------------------------------------------------------------------------
+-- Machine's assembly language
+
+-- We have a few common "instructions" (nearly all the pseudo-ops) but
+-- mostly all of 'Instr' is machine-specific.
+
+-- Some additional (potential future) instructions are commented out. They are
+-- not needed yet for the backend but could be used in the future.
+
+-- RV64 reference card: https://cs61c.org/sp23/pdfs/resources/reference-card.pdf
+-- RV64 pseudo instructions: https://github.com/riscv-non-isa/riscv-asm-manual/blob/master/riscv-asm.md#-a-listing-of-standard-risc-v-pseudoinstructions
+-- We will target: RV64G(C). That is G = I+A+F+S+D
+-- I: Integer Multiplication and Division
+-- A: Atomic Instructions
+-- F: Single Precision
+-- D: Double Precision
+-- C: Compressed (though we won't use that).
+
+-- This most notably leaves out B. (Bit Manipulation) instructions.
+
+data Instr
+ -- comment pseudo-op
+ = COMMENT SDoc
+ | MULTILINE_COMMENT SDoc
+
+ -- Annotated instruction. Should print <instr> # <doc>
+ | ANN SDoc Instr
+
+ -- location pseudo-op (file, line, col, name)
+ | LOCATION Int Int Int String
+
+ -- some static data spat out during code
+ -- generation. Will be extracted before
+ -- pretty-printing.
+ | LDATA Section RawCmmStatics
+
+ -- start a new basic block. Useful during
+ -- codegen, removed later. Preceding
+ -- instruction should be a jump, as per the
+ -- invariants for a BasicBlock (see Cmm).
+ | NEWBLOCK BlockId
+
+ -- specify current stack offset for
+ -- benefit of subsequent passes
+ | DELTA Int
+
+ -- 0. Pseudo Instructions --------------------------------------------------
+ | SXTB Operand Operand
+ | UXTB Operand Operand
+ | SXTH Operand Operand
+ | UXTH Operand Operand
+ -- | SXTW Operand Operand
+ -- | SXTX Operand Operand
+ | PUSH_STACK_FRAME
+ | POP_STACK_FRAME
+
+ -- == Base Instructions (I) ================================================
+ -- 1. Arithmetic Instructions ----------------------------------------------
+ -- all of these instructions can also take an immediate, in which case they
+ -- hafe a suffix I (except for U suffix, where it's IU then. E.g. SLTIU).
+ | ADD Operand Operand Operand -- rd = rs1 + rs2
+ | SUB Operand Operand Operand -- rd = rs1 - rs2
+
+ | AND Operand Operand Operand -- rd = rs1 & rs2
+ | OR Operand Operand Operand -- rd = rs1 | rs2
+ -- | XOR Operand Operand Operand -- rd = rs1 ^ rs2
+ | LSL {- SLL -} Operand Operand Operand -- rd = rs1 << rs2 (zero ext)
+ | LSR {- SRL -} Operand Operand Operand -- rd = rs1 >> rs2 (zero ext)
+ -- | ASL {- SLA -} Operand Operand Operand -- rd = rs1 << rs2 (sign ext)
+ | ASR {- SRA -} Operand Operand Operand -- rd = rs1 >> rs2 (sign ext)
+ -- | SLT Operand Operand Operand -- rd = rs1 < rs2 ? 1 : 0 (signed)
+ -- | SLTU Operand Operand Operand -- rd = rs1 < rs2 ? 1 : 0 (unsigned)
+
+ -- 2. Memory Load/Store Instructions ---------------------------------------
+ -- Unlike arm, we don't have register shorthands for size.
+ -- We do hover have {L,S}{B,H,W,D}[U] instructions for Load/Store, Byte, Half, Word, Double, (Unsigned).
+ -- Reusing the arm logic with the _format_ specifier will hopefully work.
+ | STR Format Operand Operand -- str Xn, address-mode // Xn -> *addr
+ | LDR Format Operand Operand -- ldr Xn, address-mode // Xn <- *addr
+
+ -- 3. Control Flow ---------------------------------------------------------
+ -- B{EQ,GE,GEU,LT,LTU}, these are effectively BCOND from AArch64;
+ -- however, AArch64 desugars them into CMP + BCOND. So these are a bit more
+ -- powerful.
+ -- JAL / JARL are effectively the BL instruction from AArch64.
+
+
+ -- | CMN Operand Operand -- rd + op2
+ -- | CMP Operand Operand -- rd - op2
+
+ | MSUB Operand Operand Operand Operand -- rd = ra - rn × rm
+ | MUL Operand Operand Operand -- rd = rn × rm
+
+
+ -- Pseudo/synthesized:
+ -- NEG = SUB x, 0, y
+ -- NOT = XOR -1, x
+ | NEG Operand Operand -- rd = -op2
+
+ | SDIV Operand Operand Operand -- rd = rn ÷ rm
+
+ | SMULH Operand Operand Operand
+ | SMULL Operand Operand Operand
+
+ | UDIV Operand Operand Operand -- rd = rn ÷ rm
+
+ -- 2. Bit Manipulation Instructions ----------------------------------------
+ | SBFM Operand Operand Operand Operand -- rd = rn[i,j]
+ -- SXTB = SBFM <Wd>, <Wn>, #0, #7
+ -- SXTH = SBFM <Wd>, <Wn>, #0, #15
+ -- SXTW = SBFM <Wd>, <Wn>, #0, #31
+ | UBFM Operand Operand Operand Operand -- rd = rn[i,j]
+ -- UXTB = UBFM <Wd>, <Wn>, #0, #7
+ -- UXTH = UBFM <Wd>, <Wn>, #0, #15
+ -- Signed/Unsigned bitfield extract
+ | SBFX Operand Operand Operand Operand -- rd = rn[i,j]
+ | UBFX Operand Operand Operand Operand -- rd = rn[i,j]
+
+ -- 3. Logical and Move Instructions ----------------------------------------
+ -- | AND Operand Operand Operand -- rd = rn & op2
+ -- | ANDS Operand Operand Operand -- rd = rn & op2
+ -- | ASR Operand Operand Operand -- rd = rn ≫ rm or rd = rn ≫ #i, i is 6 bits
+ | BIC Operand Operand Operand -- rd = rn & ~op2
+ | BICS Operand Operand Operand -- rd = rn & ~op2
+ | EON Operand Operand Operand -- rd = rn ⊕ ~op2
+ | EOR Operand Operand Operand -- rd = rn ⊕ op2
+ -- | LSL Operand Operand Operand -- rd = rn ≪ rm or rd = rn ≪ #i, i is 6 bits
+ -- | LSR Operand Operand Operand -- rd = rn ≫ rm or rd = rn ≫ #i, i is 6 bits
+ | MOV Operand Operand -- rd = rn or rd = #i
+ | MOVK Operand Operand
+ -- | MOVN Operand Operand
+ -- | MOVZ Operand Operand
+ | MVN Operand Operand -- rd = ~rn
+ | ORN Operand Operand Operand -- rd = rn | ~op2
+ | ORR Operand Operand Operand -- rd = rn | op2
+ | ROR Operand Operand Operand -- rd = rn ≫ rm or rd = rn ≫ #i, i is 6 bits
+ | TST Operand Operand -- rn & op2
+ -- Load and stores.
+ -- TODO STR/LDR might want to change to STP/LDP with XZR for the second register.
+ -- | STR Format Operand Operand -- str Xn, address-mode // Xn -> *addr
+ -- | STLR Format Operand Operand -- stlr Xn, address-mode // Xn -> *addr
+ -- | LDR Format Operand Operand -- ldr Xn, address-mode // Xn <- *addr
+ -- | LDAR Format Operand Operand -- ldar Xn, address-mode // Xn <- *addr
+ -- | STP Format Operand Operand Operand -- stp Xn, Xm, address-mode // Xn -> *addr, Xm -> *(addr + 8)
+ -- | LDP Format Operand Operand Operand -- stp Xn, Xm, address-mode // Xn <- *addr, Xm <- *(addr + 8)
+
+ -- Conditional instructions
+ -- This is a synthetic operation.
+ | CSET Operand Operand Operand Cond -- if(o2 cond o3) op <- 1 else op <- 0
+
+ | CBZ Operand Target -- if op == 0, then branch.
+ | CBNZ Operand Target -- if op /= 0, then branch.
+ -- Branching.
+ | J Target -- like B, but only generated from genJump. Used to distinguish genJumps from others.
+ | B Target -- unconditional branching b/br. (To a blockid, label or register)
+ | BL Target [Reg] [Reg] -- branch and link (e.g. set x30 to next pc, and branch)
+ | BCOND Cond Operand Operand Target -- branch with condition. b.<cond>
+
+ -- 8. Synchronization Instructions -----------------------------------------
+ | DMBSY
+ -- 9. Floating Point Instructions
+ -- Float ConVerT
+ | FCVT Operand Operand
+ -- Signed ConVerT Float
+ | SCVTF Operand Operand
+ -- Float ConVerT to Zero Signed
+ | FCVTZS Operand Operand
+ -- Float ABSolute value
+ | FABS Operand Operand
+
+instrCon :: Instr -> String
+instrCon i =
+ case i of
+ COMMENT{} -> "COMMENT"
+ MULTILINE_COMMENT{} -> "COMMENT"
+ ANN{} -> "ANN"
+ LOCATION{} -> "LOCATION"
+ LDATA{} -> "LDATA"
+ NEWBLOCK{} -> "NEWBLOCK"
+ DELTA{} -> "DELTA"
+ SXTB{} -> "SXTB"
+ UXTB{} -> "UXTB"
+ SXTH{} -> "SXTH"
+ UXTH{} -> "UXTH"
+ PUSH_STACK_FRAME{} -> "PUSH_STACK_FRAME"
+ POP_STACK_FRAME{} -> "POP_STACK_FRAME"
+ ADD{} -> "ADD"
+ -- CMN{} -> "CMN"
+ -- CMP{} -> "CMP"
+ MSUB{} -> "MSUB"
+ MUL{} -> "MUL"
+ NEG{} -> "NEG"
+ SDIV{} -> "SDIV"
+ SMULH{} -> "SMULH"
+ SMULL{} -> "SMULL"
+ SUB{} -> "SUB"
+ UDIV{} -> "UDIV"
+ SBFM{} -> "SBFM"
+ UBFM{} -> "UBFM"
+ SBFX{} -> "SBFX"
+ UBFX{} -> "UBFX"
+ AND{} -> "AND"
+ -- ANDS{} -> "ANDS"
+ ASR{} -> "ASR"
+ BIC{} -> "BIC"
+ BICS{} -> "BICS"
+ EON{} -> "EON"
+ EOR{} -> "EOR"
+ LSL{} -> "LSL"
+ LSR{} -> "LSR"
+ MOV{} -> "MOV"
+ MOVK{} -> "MOVK"
+ MVN{} -> "MVN"
+ ORN{} -> "ORN"
+ ORR{} -> "ORR"
+ ROR{} -> "ROR"
+ TST{} -> "TST"
+ STR{} -> "STR"
+ -- STLR{} -> "STLR"
+ LDR{} -> "LDR"
+ -- LDAR{} -> "LDAR"
+ -- STP{} -> "STP"
+ -- LDP{} -> "LDP"
+ CSET{} -> "CSET"
+ CBZ{} -> "CBZ"
+ CBNZ{} -> "CBNZ"
+ J{} -> "J"
+ B{} -> "B"
+ BL{} -> "BL"
+ BCOND{} -> "BCOND"
+ DMBSY{} -> "DMBSY"
+ FCVT{} -> "FCVT"
+ SCVTF{} -> "SCVTF"
+ FCVTZS{} -> "FCVTZS"
+ FABS{} -> "FABS"
+
+data Target
+ = TBlock BlockId
+ | TLabel CLabel
+ | TReg Reg
+
+
+-- Extension
+-- {Unsigned|Signed}XT{Byte|Half|Word|Doube}
+data ExtMode
+ = EUXTB | EUXTH | EUXTW | EUXTX
+ | ESXTB | ESXTH | ESXTW | ESXTX
+ deriving (Eq, Show)
+
+data ShiftMode
+ = SLSL | SLSR | SASR | SROR
+ deriving (Eq, Show)
+
+
+-- We can also add ExtShift to Extension.
+-- However at most 3bits.
+type ExtShift = Int
+-- at most 6bits
+type RegShift = Int
+
+data Operand
+ = OpReg Width Reg -- register
+ | OpRegExt Width Reg ExtMode ExtShift -- rm, <ext>[, <shift left>]
+ | OpRegShift Width Reg ShiftMode RegShift -- rm, <shift>, <0-64>
+ | OpImm Imm -- immediate value
+ | OpImmShift Imm ShiftMode RegShift
+ | OpAddr AddrMode -- memory reference
+ deriving (Eq, Show)
+
+-- Smart constructors
+opReg :: Width -> Reg -> Operand
+opReg = OpReg
+
+ra_reg, sp_reg :: Reg
+ra_reg = RegReal (RealRegSingle 1)
+sp_reg = RegReal (RealRegSingle 2)
+
+xzr, wzr, sp, ip0 :: Operand
+xzr = OpReg W64 (RegReal (RealRegSingle 0))
+wzr = OpReg W32 (RegReal (RealRegSingle 0))
+ra = OpReg W64 (RegReal (RealRegSingle 1))
+sp = OpReg W64 (RegReal (RealRegSingle 2))
+gp = OpReg W64 (RegReal (RealRegSingle 3))
+tp = OpReg W64 (RegReal (RealRegSingle 4))
+fp = OpReg W64 (RegReal (RealRegSingle 8))
+
+ip0 = OpReg W64 (RegReal (RealRegSingle 16))
+
+_x :: Int -> Operand
+_x i = OpReg W64 (RegReal (RealRegSingle i))
+x0, x1, x2, x3, x4, x5, x6, x7 :: Operand
+x8, x9, x10, x11, x12, x13, x14, x15 :: Operand
+x16, x17, x18, x19, x20, x21, x22, x23 :: Operand
+x24, x25, x26, x27, x28, x29, x30, x31 :: Operand
+x0 = OpReg W64 (RegReal (RealRegSingle 0))
+x1 = OpReg W64 (RegReal (RealRegSingle 1))
+x2 = OpReg W64 (RegReal (RealRegSingle 2))
+x3 = OpReg W64 (RegReal (RealRegSingle 3))
+x4 = OpReg W64 (RegReal (RealRegSingle 4))
+x5 = OpReg W64 (RegReal (RealRegSingle 5))
+x6 = OpReg W64 (RegReal (RealRegSingle 6))
+x7 = OpReg W64 (RegReal (RealRegSingle 7))
+x8 = OpReg W64 (RegReal (RealRegSingle 8))
+x9 = OpReg W64 (RegReal (RealRegSingle 9))
+x10 = OpReg W64 (RegReal (RealRegSingle 10))
+x11 = OpReg W64 (RegReal (RealRegSingle 11))
+x12 = OpReg W64 (RegReal (RealRegSingle 12))
+x13 = OpReg W64 (RegReal (RealRegSingle 13))
+x14 = OpReg W64 (RegReal (RealRegSingle 14))
+x15 = OpReg W64 (RegReal (RealRegSingle 15))
+x16 = OpReg W64 (RegReal (RealRegSingle 16))
+x17 = OpReg W64 (RegReal (RealRegSingle 17))
+x18 = OpReg W64 (RegReal (RealRegSingle 18))
+x19 = OpReg W64 (RegReal (RealRegSingle 19))
+x20 = OpReg W64 (RegReal (RealRegSingle 20))
+x21 = OpReg W64 (RegReal (RealRegSingle 21))
+x22 = OpReg W64 (RegReal (RealRegSingle 22))
+x23 = OpReg W64 (RegReal (RealRegSingle 23))
+x24 = OpReg W64 (RegReal (RealRegSingle 24))
+x25 = OpReg W64 (RegReal (RealRegSingle 25))
+x26 = OpReg W64 (RegReal (RealRegSingle 26))
+x27 = OpReg W64 (RegReal (RealRegSingle 27))
+x28 = OpReg W64 (RegReal (RealRegSingle 28))
+x29 = OpReg W64 (RegReal (RealRegSingle 29))
+x30 = OpReg W64 (RegReal (RealRegSingle 30))
+x31 = OpReg W64 (RegReal (RealRegSingle 31))
+
+_d :: Int -> Operand
+_d = OpReg W64 . RegReal . RealRegSingle
+d0, d1, d2, d3, d4, d5, d6, d7 :: Operand
+d8, d9, d10, d11, d12, d13, d14, d15 :: Operand
+d16, d17, d18, d19, d20, d21, d22, d23 :: Operand
+d24, d25, d26, d27, d28, d29, d30, d31 :: Operand
+d0 = OpReg W64 (RegReal (RealRegSingle 32))
+d1 = OpReg W64 (RegReal (RealRegSingle 33))
+d2 = OpReg W64 (RegReal (RealRegSingle 34))
+d3 = OpReg W64 (RegReal (RealRegSingle 35))
+d4 = OpReg W64 (RegReal (RealRegSingle 36))
+d5 = OpReg W64 (RegReal (RealRegSingle 37))
+d6 = OpReg W64 (RegReal (RealRegSingle 38))
+d7 = OpReg W64 (RegReal (RealRegSingle 39))
+d8 = OpReg W64 (RegReal (RealRegSingle 40))
+d9 = OpReg W64 (RegReal (RealRegSingle 41))
+d10 = OpReg W64 (RegReal (RealRegSingle 42))
+d11 = OpReg W64 (RegReal (RealRegSingle 43))
+d12 = OpReg W64 (RegReal (RealRegSingle 44))
+d13 = OpReg W64 (RegReal (RealRegSingle 45))
+d14 = OpReg W64 (RegReal (RealRegSingle 46))
+d15 = OpReg W64 (RegReal (RealRegSingle 47))
+d16 = OpReg W64 (RegReal (RealRegSingle 48))
+d17 = OpReg W64 (RegReal (RealRegSingle 49))
+d18 = OpReg W64 (RegReal (RealRegSingle 50))
+d19 = OpReg W64 (RegReal (RealRegSingle 51))
+d20 = OpReg W64 (RegReal (RealRegSingle 52))
+d21 = OpReg W64 (RegReal (RealRegSingle 53))
+d22 = OpReg W64 (RegReal (RealRegSingle 54))
+d23 = OpReg W64 (RegReal (RealRegSingle 55))
+d24 = OpReg W64 (RegReal (RealRegSingle 56))
+d25 = OpReg W64 (RegReal (RealRegSingle 57))
+d26 = OpReg W64 (RegReal (RealRegSingle 58))
+d27 = OpReg W64 (RegReal (RealRegSingle 59))
+d28 = OpReg W64 (RegReal (RealRegSingle 60))
+d29 = OpReg W64 (RegReal (RealRegSingle 61))
+d30 = OpReg W64 (RegReal (RealRegSingle 62))
+d31 = OpReg W64 (RegReal (RealRegSingle 63))
+
+opRegUExt :: Width -> Reg -> Operand
+opRegUExt W64 r = OpRegExt W64 r EUXTX 0
+opRegUExt W32 r = OpRegExt W32 r EUXTW 0
+opRegUExt W16 r = OpRegExt W16 r EUXTH 0
+opRegUExt W8 r = OpRegExt W8 r EUXTB 0
+opRegUExt w _r = pprPanic "opRegUExt" (ppr w)
+
+opRegSExt :: Width -> Reg -> Operand
+opRegSExt W64 r = OpRegExt W64 r ESXTX 0
+opRegSExt W32 r = OpRegExt W32 r ESXTW 0
+opRegSExt W16 r = OpRegExt W16 r ESXTH 0
+opRegSExt W8 r = OpRegExt W8 r ESXTB 0
+opRegSExt w _r = pprPanic "opRegSExt" (ppr w)
diff --git a/compiler/GHC/CmmToAsm/RV64/Ppr.hs b/compiler/GHC/CmmToAsm/RV64/Ppr.hs
new file mode 100644
index 0000000000..b53477e2b5
--- /dev/null
+++ b/compiler/GHC/CmmToAsm/RV64/Ppr.hs
@@ -0,0 +1,610 @@
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+{-# LANGUAGE CPP #-}
+
+module GHC.CmmToAsm.RV64.Ppr (pprNatCmmDecl, pprInstr) where
+
+import GHC.Prelude hiding (EQ)
+
+import GHC.CmmToAsm.RV64.Instr
+import GHC.CmmToAsm.RV64.Regs
+import GHC.CmmToAsm.RV64.Cond
+import GHC.CmmToAsm.Ppr
+import GHC.CmmToAsm.Format
+import GHC.Platform.Reg
+import GHC.CmmToAsm.Config
+import GHC.CmmToAsm.Types
+import GHC.CmmToAsm.Utils
+
+import GHC.Cmm hiding (topInfoTable)
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow.Label
+import GHC.Types.Basic (Alignment, mkAlignment, alignmentBytes)
+
+import GHC.Cmm.BlockId
+import GHC.Cmm.CLabel
+
+import GHC.Types.Unique ( pprUniqueAlways, getUnique )
+import GHC.Platform
+import GHC.Utils.Outputable
+
+import GHC.Utils.Panic
+
+pprProcAlignment :: IsDoc doc => NCGConfig -> doc
+pprProcAlignment config = maybe empty (pprAlign platform . mkAlignment) (ncgProcAlignment config)
+ where
+ platform = ncgPlatform config
+
+pprNatCmmDecl :: IsDoc doc => NCGConfig -> NatCmmDecl RawCmmStatics Instr -> doc
+pprNatCmmDecl config (CmmData section dats) =
+ pprSectionAlign config section $$ pprDatas config dats
+
+pprNatCmmDecl config proc@(CmmProc top_info lbl _ (ListGraph blocks)) =
+ let platform = ncgPlatform config in
+ pprProcAlignment config $$
+ case topInfoTable proc of
+ Nothing ->
+ -- special case for code without info table:
+ pprSectionAlign config (Section Text lbl) $$
+ -- do not
+ -- pprProcAlignment config $$
+ pprLabel platform lbl $$ -- blocks guaranteed not null, so label needed
+ vcat (map (pprBasicBlock config top_info) blocks) $$
+ (if ncgDwarfEnabled config
+ then line (pprAsmLabel platform (mkAsmTempEndLabel lbl) <> char ':') else empty) $$
+ pprSizeDecl platform lbl
+
+ Just (CmmStaticsRaw info_lbl _) ->
+ pprSectionAlign config (Section Text info_lbl) $$
+ -- pprProcAlignment config $$
+ (if platformHasSubsectionsViaSymbols platform
+ then line (pprAsmLabel platform (mkDeadStripPreventer info_lbl) <> char ':')
+ else empty) $$
+ vcat (map (pprBasicBlock config top_info) blocks) $$
+ -- above: Even the first block gets a label, because with branch-chain
+ -- elimination, it might be the target of a goto.
+ (if platformHasSubsectionsViaSymbols platform
+ then -- See Note [Subsections Via Symbols]
+ line
+ $ text "\t.long "
+ <+> pprAsmLabel platform info_lbl
+ <+> char '-'
+ <+> pprAsmLabel platform (mkDeadStripPreventer info_lbl)
+ else empty) $$
+ pprSizeDecl platform info_lbl
+{-# SPECIALIZE pprNatCmmDecl :: NCGConfig -> NatCmmDecl RawCmmStatics Instr -> SDoc #-}
+{-# SPECIALIZE pprNatCmmDecl :: NCGConfig -> NatCmmDecl RawCmmStatics Instr -> HDoc #-} -- see Note [SPECIALIZE to HDoc] in GHC.Utils.Outputable
+
+pprLabel :: IsDoc doc => Platform -> CLabel -> doc
+pprLabel platform lbl =
+ pprGloblDecl platform lbl
+ $$ pprTypeDecl platform lbl
+ $$ line (pprAsmLabel platform lbl <> char ':')
+
+pprAlign :: IsDoc doc => Platform -> Alignment -> doc
+pprAlign _platform alignment
+ = line $ text "\t.balign " <> int (alignmentBytes alignment)
+
+-- | Print appropriate alignment for the given section type.
+pprAlignForSection :: IsDoc doc => Platform -> SectionType -> doc
+pprAlignForSection _platform _seg
+ -- .balign is stable, whereas .align is platform dependent.
+ = line (text "\t.balign 8") -- always 8
+
+-- | Print section header and appropriate alignment for that section.
+--
+-- This one will emit the header:
+--
+-- .section .text
+-- .balign 8
+--
+pprSectionAlign :: IsDoc doc => NCGConfig -> Section -> doc
+pprSectionAlign _config (Section (OtherSection _) _) =
+ panic "AArch64.Ppr.pprSectionAlign: unknown section"
+pprSectionAlign config sec@(Section seg _) =
+ line (pprSectionHeader config sec)
+ $$ pprAlignForSection (ncgPlatform config) seg
+
+-- | Output the ELF .size directive.
+pprSizeDecl :: IsDoc doc => Platform -> CLabel -> doc
+pprSizeDecl platform lbl
+ = if osElfTarget (platformOS platform)
+ then line (text "\t.size" <+> pprAsmLabel platform lbl <> text ", .-" <> pprAsmLabel platform lbl)
+ else empty
+
+pprBasicBlock :: IsDoc doc => NCGConfig -> LabelMap RawCmmStatics -> NatBasicBlock Instr
+ -> doc
+pprBasicBlock config info_env (BasicBlock blockid instrs)
+ = maybe_infotable $
+ pprLabel platform asmLbl $$
+ vcat (map (pprInstr platform) (id {-detectTrivialDeadlock-} optInstrs)) $$
+ (if ncgDwarfEnabled config
+ then line (pprAsmLabel platform (mkAsmTempEndLabel asmLbl) <> char ':')
+ else empty
+ )
+ where
+ -- Filter out identity moves. E.g. mov x18, x18 will be dropped.
+ optInstrs = filter f instrs
+ where f (MOV o1 o2) | o1 == o2 = False
+ f _ = True
+
+ asmLbl = blockLbl blockid
+ platform = ncgPlatform config
+ maybe_infotable c = case mapLookup blockid info_env of
+ Nothing -> c
+ Just (CmmStaticsRaw info_lbl info) ->
+ -- pprAlignForSection platform Text $$
+ infoTableLoc $$
+ vcat (map (pprData config) info) $$
+ pprLabel platform info_lbl $$
+ c $$
+ (if ncgDwarfEnabled config
+ then line (pprAsmLabel platform (mkAsmTempEndLabel info_lbl) <> char ':')
+ else empty)
+ -- Make sure the info table has the right .loc for the block
+ -- coming right after it. See Note [Info Offset]
+ infoTableLoc = case instrs of
+ (l@LOCATION{} : _) -> pprInstr platform l
+ _other -> empty
+
+pprDatas :: IsDoc doc => NCGConfig -> RawCmmStatics -> doc
+-- See Note [emit-time elimination of static indirections] in "GHC.Cmm.CLabel".
+pprDatas config (CmmStaticsRaw alias [CmmStaticLit (CmmLabel lbl), CmmStaticLit ind, _, _])
+ | lbl == mkIndStaticInfoLabel
+ , let labelInd (CmmLabelOff l _) = Just l
+ labelInd (CmmLabel l) = Just l
+ labelInd _ = Nothing
+ , Just ind' <- labelInd ind
+ , alias `mayRedirectTo` ind'
+ = pprGloblDecl (ncgPlatform config) alias
+ $$ line (text ".equiv" <+> pprAsmLabel (ncgPlatform config) alias <> comma <> pprAsmLabel (ncgPlatform config) ind')
+
+pprDatas config (CmmStaticsRaw lbl dats)
+ = vcat (pprLabel platform lbl : map (pprData config) dats)
+ where
+ platform = ncgPlatform config
+
+pprData :: IsDoc doc => NCGConfig -> CmmStatic -> doc
+pprData _config (CmmString str) = line (pprString str)
+pprData _config (CmmFileEmbed path _) = line (pprFileEmbed path)
+
+pprData config (CmmUninitialised bytes)
+ = line $ let platform = ncgPlatform config
+ in if platformOS platform == OSDarwin
+ then text ".space " <> int bytes
+ else text ".skip " <> int bytes
+
+pprData config (CmmStaticLit lit) = pprDataItem config lit
+
+pprGloblDecl :: IsDoc doc => Platform -> CLabel -> doc
+pprGloblDecl platform lbl
+ | not (externallyVisibleCLabel lbl) = empty
+ | otherwise = line (text "\t.globl " <> pprAsmLabel platform lbl)
+
+-- Note [Always use objects for info tables]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- See discussion in X86.Ppr for why this is necessary. Essentially we need to
+-- ensure that we never pass function symbols when we might want to lookup the
+-- info table. If we did, we could end up with procedure linking tables
+-- (PLT)s, and thus the lookup wouldn't point to the function, but into the
+-- jump table.
+--
+-- Fun fact: The LLVMMangler exists to patch this issue su on the LLVM side as
+-- well.
+pprLabelType' :: IsLine doc => Platform -> CLabel -> doc
+pprLabelType' platform lbl =
+ if isCFunctionLabel lbl || functionOkInfoTable then
+ text "@function"
+ else
+ text "@object"
+ where
+ functionOkInfoTable = platformTablesNextToCode platform &&
+ isInfoTableLabel lbl && not (isCmmInfoTableLabel lbl) && not (isConInfoTableLabel lbl)
+
+-- this is called pprTypeAndSizeDecl in PPC.Ppr
+pprTypeDecl :: IsDoc doc => Platform -> CLabel -> doc
+pprTypeDecl platform lbl
+ = if osElfTarget (platformOS platform) && externallyVisibleCLabel lbl
+ then line (text ".type " <> pprAsmLabel platform lbl <> text ", " <> pprLabelType' platform lbl)
+ else empty
+
+pprDataItem :: IsDoc doc => NCGConfig -> CmmLit -> doc
+pprDataItem config lit
+ = lines_ (ppr_item (cmmTypeFormat $ cmmLitType platform lit) lit)
+ where
+ platform = ncgPlatform config
+
+ imm = litToImm lit
+
+ ppr_item II8 _ = [text "\t.byte\t" <> pprImm platform imm]
+ ppr_item II16 _ = [text "\t.short\t" <> pprImm platform imm]
+ ppr_item II32 _ = [text "\t.long\t" <> pprImm platform imm]
+ ppr_item II64 _ = [text "\t.quad\t" <> pprImm platform imm]
+
+ ppr_item FF32 (CmmFloat r _)
+ = let bs = floatToBytes (fromRational r)
+ in map (\b -> text "\t.byte\t" <> int (fromIntegral b)) bs
+
+ ppr_item FF64 (CmmFloat r _)
+ = let bs = doubleToBytes (fromRational r)
+ in map (\b -> text "\t.byte\t" <> int (fromIntegral b)) bs
+
+ ppr_item _ _ = pprPanic "pprDataItem:ppr_item" (text $ show lit)
+
+pprImm :: IsLine doc => Platform -> Imm -> doc
+pprImm _ (ImmInt i) = int i
+pprImm _ (ImmInteger i) = integer i
+pprImm p (ImmCLbl l) = pprAsmLabel p l
+pprImm p (ImmIndex l i) = pprAsmLabel p l <> char '+' <> int i
+pprImm _ (ImmLit s) = ftext s
+
+-- TODO: See pprIm below for why this is a bad idea!
+pprImm _ (ImmFloat f)
+ | f == 0 = text "wzr"
+ | otherwise = float (fromRational f)
+pprImm _ (ImmDouble d)
+ | d == 0 = text "xzr"
+ | otherwise = double (fromRational d)
+
+pprImm p (ImmConstantSum a b) = pprImm p a <> char '+' <> pprImm p b
+pprImm p (ImmConstantDiff a b) = pprImm p a <> char '-'
+ <> lparen <> pprImm p b <> rparen
+
+
+-- aarch64 GNU as uses // for comments.
+asmComment :: SDoc -> SDoc
+asmComment c = whenPprDebug $ text "#" <+> c
+
+asmDoubleslashComment :: SDoc -> SDoc
+asmDoubleslashComment c = whenPprDebug $ text "//" <+> c
+
+asmMultilineComment :: SDoc -> SDoc
+asmMultilineComment c = whenPprDebug $ text "/*" $+$ c $+$ text "*/"
+
+pprIm :: IsLine doc => Platform -> Imm -> doc
+pprIm platform im = case im of
+ ImmInt i -> int i
+ ImmInteger i -> integer i
+
+ -- FIXME: This is AArch64 commentry, not necesarily correct for RISCV!
+ -- TODO: This will only work for
+ -- The floating point value must be expressible as ±n ÷ 16 × 2^r,
+ -- where n and r are integers such that 16 ≤ n ≤ 31 and -3 ≤ r ≤ 4.
+ -- and 0 needs to be encoded as wzr/xzr.
+ --
+ -- Except for 0, we might want to either split it up into enough
+ -- ADD operations into an Integer register and then just bit copy it into
+ -- the double register? See the toBytes + fromRational above for data items.
+ -- This is something the x86 backend does.
+ --
+ -- We could also just turn them into statics :-/ Which is what the
+ -- PowerPC backend does.
+ ImmFloat f | f == 0 -> text "wzr"
+ ImmFloat f -> char '#' <> float (fromRational f)
+ ImmDouble d | d == 0 -> text "xzr"
+ ImmDouble d -> char '#' <> double (fromRational d)
+ -- =<lbl> pseudo instruction!
+ ImmCLbl l -> char '=' <> pprAsmLabel platform l
+ ImmIndex l o -> text "[=" <> pprAsmLabel platform l <> comma <+> char '#' <> int o <> char ']'
+ _ -> panic "AArch64.pprIm"
+
+negOp :: Operand -> Operand
+negOp (OpImm (ImmInt i)) = OpImm (ImmInt (negate i))
+negOp (OpImm (ImmInteger i)) = OpImm (ImmInteger (negate i))
+negOp op = pprPanic "RV64.negOp" (text $ show op)
+
+pprExt :: IsLine doc => ExtMode -> doc
+pprExt EUXTB = text "uxtb"
+pprExt EUXTH = text "uxth"
+pprExt EUXTW = text "uxtw"
+pprExt EUXTX = text "uxtx"
+pprExt ESXTB = text "sxtb"
+pprExt ESXTH = text "sxth"
+pprExt ESXTW = text "sxtw"
+pprExt ESXTX = text "sxtx"
+
+pprShift :: IsLine doc => ShiftMode -> doc
+pprShift SLSL = text "lsl"
+pprShift SLSR = text "lsr"
+pprShift SASR = text "asr"
+pprShift SROR = text "ror"
+
+pprOp :: IsLine doc => Platform -> Operand -> doc
+pprOp plat op = case op of
+ OpReg w r -> pprReg w r
+ OpRegExt w r x 0 -> pprReg w r <> comma <+> pprExt x
+ OpRegExt w r x i -> pprReg w r <> comma <+> pprExt x <> comma <+> char '#' <> int i
+ OpRegShift w r s i -> pprReg w r <> comma <+> pprShift s <+> char '#' <> int i
+ OpImm im -> pprIm plat im
+ OpImmShift im s i -> pprIm plat im <> comma <+> pprShift s <+> char '#' <> int i
+ -- TODO: Address computation always use registers as 64bit -- is this correct?
+ OpAddr (AddrRegReg r1 r2) -> pprPanic "No Reg-Reg addressing mode in Riscv" (text $ show op) -- char '[' <+> pprReg W64 r1 <> comma <+> pprReg W64 r2 <+> char ']'
+ OpAddr (AddrRegImm r1 im) -> pprImm plat im <> char '(' <> pprReg W64 r1 <> char ')'
+ OpAddr (AddrReg r1) -> text "0(" <+> pprReg W64 r1 <+> char ')'
+
+pprReg :: forall doc. IsLine doc => Width -> Reg -> doc
+pprReg w r = case r of
+ RegReal (RealRegSingle i) -> ppr_reg_no w i
+ -- virtual regs should not show up, but this is helpful for debugging.
+ RegVirtual (VirtualRegI u) -> text "%vI_" <> pprUniqueAlways u
+ RegVirtual (VirtualRegF u) -> text "%vF_" <> pprUniqueAlways u
+ RegVirtual (VirtualRegD u) -> text "%vD_" <> pprUniqueAlways u
+ _ -> pprPanic "AArch64.pprReg" (text $ show r)
+
+ where
+ ppr_reg_no :: Width -> Int -> doc
+ ppr_reg_no _ 0 = text "zero"
+ ppr_reg_no _ 2 = text "sp"
+
+ ppr_reg_no w i
+ | i < 0, w == W32 = text "wzr"
+ | i < 0, w == W64 = text "xzr"
+ | i < 0 = pprPanic "Invalid Zero Reg" (ppr w <+> int i)
+ -- General Purpose Registers
+ | i <= 31 = text "x" <> int i
+ -- Floating Point Registers
+ | i <= 63 = text "f" <> int (i-32)
+ -- no support for 'q'uad in GHC's NCG yet.
+ | otherwise = text "very naughty powerpc register"
+
+isFloatOp :: Operand -> Bool
+isFloatOp (OpReg _ (RegReal (RealRegSingle i))) | i > 31 = True
+isFloatOp (OpReg _ (RegVirtual (VirtualRegF _))) = True
+isFloatOp (OpReg _ (RegVirtual (VirtualRegD _))) = True
+isFloatOp _ = False
+
+isImmOp :: Operand -> Bool
+isImmOp (OpImm _) = True
+isImmOp _ = False
+
+isLabel :: Target -> Bool
+isLabel (TBlock _) = True
+isLabel (TLabel _) = True
+isLabel _ = False
+
+getLabel :: IsLine doc => Platform -> Target -> doc
+getLabel platform (TBlock bid) = pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
+getLabel platform (TLabel lbl) = pprAsmLabel platform lbl
+getLabel _platform _other = panic "Cannot turn this into a label"
+
+pprInstr :: IsDoc doc => Platform -> Instr -> doc
+pprInstr platform instr = case instr of
+ -- Meta Instructions ---------------------------------------------------------
+ -- see Note [dualLine and dualDoc] in GHC.Utils.Outputable
+ COMMENT s -> dualDoc (asmComment s) empty
+ MULTILINE_COMMENT s -> dualDoc (asmMultilineComment s) empty
+ ANN d i -> dualDoc (pprInstr platform i <+> asmDoubleslashComment d) (pprInstr platform i)
+
+ LOCATION file line' col _name
+ -> line (text "\t.loc" <+> int file <+> int line' <+> int col)
+ DELTA d -> dualDoc (asmComment $ text "\tdelta = " <> int d) empty
+ -- see Note [dualLine and dualDoc] in GHC.Utils.Outputable
+ NEWBLOCK _ -> panic "PprInstr: NEWBLOCK"
+ LDATA _ _ -> panic "pprInstr: LDATA"
+
+ -- Pseudo Instructions -------------------------------------------------------
+
+ PUSH_STACK_FRAME -> lines_ [ text "\taddi sp, sp, -16"
+ , text "\tsd x1, 8(sp)" -- store RA
+ , text "\tsd x8, 0(sp)" -- store FP/s0
+ , text "\taddi x8, sp, 16"]
+
+ POP_STACK_FRAME -> lines_ [ text "\tld x8, 0(sp)" -- restore FP/s0
+ , text "\tld x1, 8(sp)" -- restore RA
+ , text "\taddi sp, sp, 16" ]
+ -- ===========================================================================
+ -- AArch64 Instruction Set
+ -- 1. Arithmetic Instructions ------------------------------------------------
+ ADD o1 o2 o3
+ | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 -> op3 (text "\tfadd") o1 o2 o3
+ | otherwise -> op3 (text "\tadd") o1 o2 o3
+ -- CMN o1 o2 -> op2 (text "\tcmn") o1 o2
+ -- CMP o1 o2
+ -- | isFloatOp o1 && isFloatOp o2 -> op2 (text "\tfcmp") o1 o2
+ -- | otherwise -> op2 (text "\tcmp") o1 o2
+ MSUB o1 o2 o3 o4 -> op4 (text "\tmsub") o1 o2 o3 o4
+ MUL o1 o2 o3
+ | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 -> op3 (text "\tfmul") o1 o2 o3
+ | otherwise -> op3 (text "\tmul") o1 o2 o3
+ SMULH o1 o2 o3 -> op3 (text "\tsmulh") o1 o2 o3
+ SMULL o1 o2 o3 -> op3 (text "\tsmull") o1 o2 o3
+ NEG o1 o2
+ | isFloatOp o1 && isFloatOp o2 -> op2 (text "\tfneg") o1 o2
+ | otherwise -> op2 (text "\tneg") o1 o2
+ SDIV o1 o2 o3 | isFloatOp o1 && isFloatOp o2 && isFloatOp o3
+ -> op3 (text "\tfdiv") o1 o2 o3
+ SDIV o1 o2 o3 -> op3 (text "\tsdiv") o1 o2 o3
+
+ SUB o1 o2 o3
+ | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 -> op3 (text "\tfsub") o1 o2 o3
+ | isImmOp o3 -> op3 (text "\taddi") o1 o2 (negOp o3)
+ | otherwise -> op3 (text "\tsub") o1 o2 o3
+ UDIV o1 o2 o3 -> op3 (text "\tudiv") o1 o2 o3
+
+ -- 2. Bit Manipulation Instructions ------------------------------------------
+ SBFM o1 o2 o3 o4 -> op4 (text "\tsbfm") o1 o2 o3 o4
+ UBFM o1 o2 o3 o4 -> op4 (text "\tubfm") o1 o2 o3 o4
+ -- signed and unsigned bitfield extract
+ SBFX o1 o2 o3 o4 -> op4 (text "\tsbfx") o1 o2 o3 o4
+ UBFX o1 o2 o3 o4 -> op4 (text "\tubfx") o1 o2 o3 o4
+ SXTB o1 o2 -> op2 (text "\tsxtb") o1 o2
+ UXTB o1 o2 -> op2 (text "\tuxtb") o1 o2
+ SXTH o1 o2 -> op2 (text "\tsxth") o1 o2
+ UXTH o1 o2 -> op2 (text "\tuxth") o1 o2
+
+ -- 3. Logical and Move Instructions ------------------------------------------
+ AND o1 o2 o3 -> op3 (text "\tand") o1 o2 o3
+ -- ANDS o1 o2 o3 -> op3 (text "\tands") o1 o2 o3
+ ASR o1 o2 o3 -> op3 (text "\tsra") o1 o2 o3
+ BIC o1 o2 o3 -> op3 (text "\tbic") o1 o2 o3
+ BICS o1 o2 o3 -> op3 (text "\tbics") o1 o2 o3
+ EON o1 o2 o3 -> op3 (text "\teon") o1 o2 o3
+ EOR o1 o2 o3 -> op3 (text "\teor") o1 o2 o3
+ LSL o1 o2 o3 -> op3 (text "\tsll") o1 o2 o3
+ LSR o1 o2 o3 -> op3 (text "\tsrl") o1 o2 o3
+ MOV o1 o2
+ | isFloatOp o1 || isFloatOp o2 -> op2 (text "\tfmov") o1 o2
+ | isImmOp o2
+ , (OpImm (ImmInteger i)) <- o2
+ , (-1 `shiftL` 11) <= i
+ , i <= (1 `shiftL` 11 - 1) -> lines_ [ text "\taddi" <+> pprOp platform o1 <> comma <+> pprOp platform x0 <> comma <+> pprOp platform o2 ]
+ | isImmOp o2
+ , (OpImm (ImmInteger i)) <- o2
+ , (-1 `shiftL` 31) <= i
+ , i <= (1 `shiftL` 31 -1) -> lines_ [ text "\tlui" <+> pprOp platform o1 <> comma <+> text "%hi(" <> pprOp platform o2 <> text ")"
+ , text "\taddi" <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> text "%lo(" <> pprOp platform o2 <> text ")" ]
+ | otherwise -> op3 (text "\taddi") o1 o2 (OpImm (ImmInt 0))
+ MOVK o1 o2 -> op2 (text "\tmovk") o1 o2
+ MVN o1 o2 -> op2 (text "\tmvn") o1 o2
+ ORN o1 o2 o3 -> op3 (text "\torn") o1 o2 o3
+ ORR o1 o2 o3 -> op3 (text "\torr") o1 o2 o3
+ ROR o1 o2 o3 -> op3 (text "\tror") o1 o2 o3
+ TST o1 o2 -> op2 (text "\ttst") o1 o2
+
+ -- 4. Branch Instructions ----------------------------------------------------
+ J t -> pprInstr platform (B t)
+ B l | isLabel l -> line $ text "\tjal" <+> text "x0" <> comma <+> getLabel platform l
+ B (TReg r) -> line $ text "\tjalr" <+> text "x0" <> comma <+> pprReg W64 r <> comma <+> text "0"
+
+ BL l _ _ | isLabel l-> line $ text "\tjal" <+> text "x1" <> comma <+> getLabel platform l
+ BL (TReg r) _ _ -> line $ text "\tjalr" <+> text "x1" <> comma <+> pprReg W64 r <> comma <+> text "0"
+
+ BCOND c l r t | isLabel t -> case c of
+ EQ -> line $ text "\tbeq" <+> pprOp platform l <> comma <+> pprOp platform r <> comma <+> getLabel platform t
+ NE -> line $ text "\tbne" <+> pprOp platform l <> comma <+> pprOp platform r <> comma <+> getLabel platform t
+ SLT -> line $ text "\tblt" <+> pprOp platform l <> comma <+> pprOp platform r <> comma <+> getLabel platform t
+ SLE -> line $ text "\tbge" <+> pprOp platform r <> comma <+> pprOp platform l <> comma <+> getLabel platform t
+ SGE -> line $ text "\tbge" <+> pprOp platform l <> comma <+> pprOp platform r <> comma <+> getLabel platform t
+ SGT -> line $ text "\tblt" <+> pprOp platform r <> comma <+> pprOp platform l <> comma <+> getLabel platform t
+ ULT -> line $ text "\tbltu" <+> pprOp platform l <> comma <+> pprOp platform r <> comma <+> getLabel platform t
+ ULE -> line $ text "\tbgeu" <+> pprOp platform r <> comma <+> pprOp platform l <> comma <+> getLabel platform t
+ UGE -> line $ text "\tbgeu" <+> pprOp platform l <> comma <+> pprOp platform r <> comma <+> getLabel platform t
+ UGT -> line $ text "\tbltu" <+> pprOp platform r <> comma <+> pprOp platform l <> comma <+> getLabel platform t
+
+ BCOND _ _ _ (TReg _) -> panic "RV64.ppr: No conditional branching to registers!"
+
+ -- 5. Atomic Instructions ----------------------------------------------------
+ -- 6. Conditional Instructions -----------------------------------------------
+ CSET o l r c -> case c of
+ EQ -> lines_ [ subFor l r
+ , text "\tseqz" <+> pprOp platform o <> comma <+> pprOp platform o]
+ NE -> lines_ [ subFor l r
+ , text "\tsnez" <+> pprOp platform o <> comma <+> pprOp platform o]
+ SLT -> lines_ [ sltFor l r <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform r ]
+ SLE -> lines_ [ sltFor l r <+> pprOp platform o <> comma <+> pprOp platform r <> comma <+> pprOp platform l
+ , text "\txori" <+> pprOp platform o <> comma <+> pprOp platform o <> comma <+> text "1" ]
+ SGE -> lines_ [ sltFor l r <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform r
+ , text "\txori" <+> pprOp platform o <> comma <+> pprOp platform o <> comma <+> text "1" ]
+ SGT -> lines_ [ sltFor l r <+> pprOp platform o <> comma <+> pprOp platform r <> comma <+> pprOp platform l ]
+ ULT -> lines_ [ sltuFor l r <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform r ]
+ ULE -> lines_ [ sltuFor l r <+> pprOp platform o <> comma <+> pprOp platform r <> comma <+> pprOp platform l
+ , text "\txori" <+> pprOp platform o <> comma <+> pprOp platform o <> comma <+> text "1" ]
+ UGE -> lines_ [ sltuFor l r <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform r
+ , text "\txori" <+> pprOp platform o <> comma <+> pprOp platform o <> comma <+> text "1" ]
+ UGT -> lines_ [ sltuFor l r <+> pprOp platform o <> comma <+> pprOp platform r <> comma <+> pprOp platform l ]
+ _ -> panic "RV64.ppr: unhandled CSET conditional"
+ where
+ subFor l r | (OpImm _) <- r = text "\taddi" <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform (negOp r)
+ | (OpImm _) <- l = panic "RV64.ppr: Cannot SUB IMM _"
+ | otherwise = text "\tsub" <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform r
+ sltFor l r | (OpImm _) <- r = text "\tslti"
+ | (OpImm _) <- l = panic "PV64.ppr: Cannot SLT IMM _"
+ | otherwise = text "\tslt"
+ sltuFor l r| (OpImm _) <- r = text "\tsltui"
+ | (OpImm _) <- l = panic "PV64.ppr: Cannot SLTU IMM _"
+ | otherwise = text "\tsltu"
+
+ CBZ o (TBlock bid) -> line $ text "\tbeq x0, " <+> pprOp platform o <> comma <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
+ CBZ o (TLabel lbl) -> line $ text "\tbeq x0, " <+> pprOp platform o <> comma <+> pprAsmLabel platform lbl
+ CBZ _ (TReg _) -> panic "AArch64.ppr: No conditional (cbz) branching to registers!"
+
+ CBNZ o (TBlock bid) -> line $ text "\tbne x0, " <+> pprOp platform o <> comma <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
+ CBNZ o (TLabel lbl) -> line $ text "\tbne x0, " <+> pprOp platform o <> comma <+> pprAsmLabel platform lbl
+ CBNZ _ (TReg _) -> panic "AArch64.ppr: No conditional (cbnz) branching to registers!"
+
+ -- 7. Load and Store Instructions --------------------------------------------
+ -- NOTE: GHC may do whacky things where it only load the lower part of an
+ -- address. Not observing the correct size when loading will lead
+ -- inevitably to crashes.
+ STR II8 o1 o2 -> op2 (text "\tsb") o1 o2
+ STR II16 o1 o2 -> op2 (text "\tsh") o1 o2
+ STR II32 o1 o2 -> op2 (text "\tsw") o1 o2
+ STR II64 o1 o2 -> op2 (text "\tsd") o1 o2
+
+ LDR _f o1 (OpImm (ImmIndex lbl off)) ->
+ lines_ [ text "\tla" <+> pprOp platform o1 <> comma <+> pprAsmLabel platform lbl
+ , text "\taddi" <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> (int off)
+ ]
+
+ LDR _f o1 (OpImm (ImmCLbl lbl)) ->
+ -- fixing this is _really_ annoyin we need to generate code like:
+ -- 1: auipc x16, %pcrel_hi(<lbl>)
+ -- addi x16, x16, %pcrel_lo(1b)
+ -- I really dislike this (refer back to label 1 syntax from the assembler.)
+ --
+ -- So we'll go with pseudo ops. la and li it is.
+ -- op_adrp o1 (text "%pcrel_hi(" <> pprAsmLabel platform lbl <> text ")") $$
+ -- op_add o1 (text "%pcrel_lo(" <> pprAsmLabel platform lbl <> text ")")
+ line $ text "\tla" <+> pprOp platform o1 <> comma <+> pprAsmLabel platform lbl
+
+ LDR _f o1@(OpReg W8 (RegReal (RealRegSingle i))) o2 | i < 32 ->
+ op2 (text "\tldrb") o1 o2
+ LDR _f o1@(OpReg W16 (RegReal (RealRegSingle i))) o2 | i < 32 ->
+ op2 (text "\tldrh") o1 o2
+
+ LDR II8 o1 o2 -> op2 (text "\tlb") o1 o2
+ LDR II16 o1 o2 -> op2 (text "\tlh") o1 o2
+ LDR II32 o1 o2 -> op2 (text "\tlw") o1 o2
+ LDR II64 o1 o2 -> op2 (text "\tld") o1 o2
+ -- LDAR _f o1 o2 -> op2 (text "\tldar") o1 o2
+
+ -- STP _f o1 o2 o3 -> op3 (text "\tstp") o1 o2 o3
+ -- LDP _f o1 o2 o3 -> op3 (text "\tldp") o1 o2 o3
+
+ -- 8. Synchronization Instructions -------------------------------------------
+ DMBSY -> line $ text "\tdmb sy"
+ -- 9. Floating Point Instructions --------------------------------------------
+ FCVT o1 o2 -> op2 (text "\tfcvt") o1 o2
+ SCVTF o1 o2 -> op2 (text "\tscvtf") o1 o2
+ FCVTZS o1 o2 -> op2 (text "\tfcvtzs") o1 o2
+ FABS o1 o2 -> op2 (text "\tfabs") o1 o2
+ where op2 op o1 o2 = line $ op <+> pprOp platform o1 <> comma <+> pprOp platform o2
+ op3 op o1 o2 o3 = line $ op <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3
+ op4 op o1 o2 o3 o4 = line $ op <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 <> comma <+> pprOp platform o4
+ -- op_ldr o1 rest = line $ text "\tld" <+> pprOp platform o1 <> comma <+> rest <+> text "(" <> pprOp platform o1 <> text ")"
+ -- op_adrp o1 rest = line $ text "\tauipc" <+> pprOp platform o1 <> comma <+> rest
+ -- op_add o1 rest = line $ text "\taddi" <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> rest
+
+pprBcond :: IsLine doc => Cond -> doc
+pprBcond c = text "b." <> pprCond c
+
+pprCond :: IsLine doc => Cond -> doc
+pprCond c = case c of
+ ALWAYS -> text "al" -- Always
+ EQ -> text "eq" -- Equal
+ NE -> text "ne" -- Not Equal
+
+ SLT -> text "lt" -- Signed less than ; Less than, or unordered
+ SLE -> text "le" -- Signed less than or equal ; Less than or equal, or unordered
+ SGE -> text "ge" -- Signed greater than or equal ; Greater than or equal
+ SGT -> text "gt" -- Signed greater than ; Greater than
+
+ ULT -> text "lo" -- Carry clear/ unsigned lower ; less than
+ ULE -> text "ls" -- Unsigned lower or same ; Less than or equal
+ UGE -> text "hs" -- Carry set/unsigned higher or same ; Greater than or equal, or unordered
+ UGT -> text "hi" -- Unsigned higher ; Greater than, or unordered
+
+ NEVER -> text "nv" -- Never
+ VS -> text "vs" -- Overflow ; Unordered (at least one NaN operand)
+ VC -> text "vc" -- No overflow ; Not unordered
+
+ -- Ordered variants. Respecting NaN.
+ OLT -> text "mi"
+ OLE -> text "ls"
+ OGE -> text "ge"
+ OGT -> text "gt"
+
+ -- Unordered
+ UOLT -> text "lt"
+ UOLE -> text "le"
+ UOGE -> text "pl"
+ UOGT -> text "hi"
diff --git a/compiler/GHC/CmmToAsm/RV64/RegInfo.hs b/compiler/GHC/CmmToAsm/RV64/RegInfo.hs
new file mode 100644
index 0000000000..0266444453
--- /dev/null
+++ b/compiler/GHC/CmmToAsm/RV64/RegInfo.hs
@@ -0,0 +1,31 @@
+module GHC.CmmToAsm.RV64.RegInfo where
+
+import GHC.Prelude
+
+import GHC.CmmToAsm.RV64.Instr
+import GHC.Cmm.BlockId
+import GHC.Cmm
+
+import GHC.Utils.Outputable
+
+data JumpDest = DestBlockId BlockId
+
+-- Debug Instance
+instance Outputable JumpDest where
+ ppr (DestBlockId bid) = text "jd<blk>:" <> ppr bid
+
+-- TODO: documen what this does. See Ticket 19914
+getJumpDestBlockId :: JumpDest -> Maybe BlockId
+getJumpDestBlockId (DestBlockId bid) = Just bid
+
+-- TODO: document what this does. See Ticket 19914
+canShortcut :: Instr -> Maybe JumpDest
+canShortcut _ = Nothing
+
+-- TODO: document what this does. See Ticket 19914
+shortcutStatics :: (BlockId -> Maybe JumpDest) -> RawCmmStatics -> RawCmmStatics
+shortcutStatics _ other_static = other_static
+
+-- TODO: document what this does. See Ticket 19914
+shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
+shortcutJump _ other = other \ No newline at end of file
diff --git a/compiler/GHC/CmmToAsm/RV64/Regs.hs b/compiler/GHC/CmmToAsm/RV64/Regs.hs
new file mode 100644
index 0000000000..3b2013e5e9
--- /dev/null
+++ b/compiler/GHC/CmmToAsm/RV64/Regs.hs
@@ -0,0 +1,153 @@
+module GHC.CmmToAsm.RV64.Regs where
+
+import GHC.Prelude
+import GHC.Data.FastString
+
+import GHC.Platform.Reg
+import GHC.Platform.Reg.Class
+import GHC.CmmToAsm.Format
+
+import GHC.Cmm
+import GHC.Cmm.CLabel ( CLabel )
+import GHC.Types.Unique
+
+import GHC.Platform.Regs
+import GHC.Utils.Outputable
+import GHC.Utils.Panic
+import GHC.Platform
+
+allMachRegNos :: [RegNo]
+allMachRegNos = [0..31] ++ [32..63]
+-- allocatableRegs is allMachRegNos with the fixed-use regs removed.
+-- i.e., these are the regs for which we are prepared to allow the
+-- register allocator to attempt to map VRegs to.
+allocatableRegs :: Platform -> [RealReg]
+allocatableRegs platform
+ = let isFree i = freeReg platform i
+ in map RealRegSingle $ filter isFree allMachRegNos
+
+-- argRegs is the set of regs which are read for an n-argument call to C.
+allGpArgRegs :: [Reg]
+allGpArgRegs = map regSingle [10..17]
+allFpArgRegs :: [Reg]
+allFpArgRegs = map regSingle [42..49]
+
+-- STG:
+-- 19: Base
+-- 20: Sp
+-- 21: Hp
+-- 22-27: R1-R6
+-- 28: SpLim
+
+-- This is the STG Sp reg.
+-- sp :: Reg
+-- sp = regSingle 20
+
+-- addressing modes ------------------------------------------------------------
+
+data AddrMode
+ = AddrRegReg Reg Reg
+ | AddrRegImm Reg Imm
+ | AddrReg Reg
+ deriving (Eq, Show)
+
+-- -----------------------------------------------------------------------------
+-- Immediates
+
+data Imm
+ = ImmInt Int
+ | ImmInteger Integer -- Sigh.
+ | ImmCLbl CLabel -- AbstractC Label (with baggage)
+ | ImmLit FastString
+ | ImmIndex CLabel Int
+ | ImmFloat Rational
+ | ImmDouble Rational
+ | ImmConstantSum Imm Imm
+ | ImmConstantDiff Imm Imm
+ deriving (Eq, Show)
+
+strImmLit :: FastString -> Imm
+strImmLit s = ImmLit s
+
+
+litToImm :: CmmLit -> Imm
+litToImm (CmmInt i w) = ImmInteger (narrowS w i)
+ -- narrow to the width: a CmmInt might be out of
+ -- range, but we assume that ImmInteger only contains
+ -- in-range values. A signed value should be fine here.
+litToImm (CmmFloat f W32) = ImmFloat f
+litToImm (CmmFloat f W64) = ImmDouble f
+litToImm (CmmLabel l) = ImmCLbl l
+litToImm (CmmLabelOff l off) = ImmIndex l off
+litToImm (CmmLabelDiffOff l1 l2 off _)
+ = ImmConstantSum
+ (ImmConstantDiff (ImmCLbl l1) (ImmCLbl l2))
+ (ImmInt off)
+litToImm _ = panic "RV64.Regs.litToImm: no match"
+
+
+-- == To satisfy GHC.CmmToAsm.Reg.Target =======================================
+
+-- squeese functions for the graph allocator -----------------------------------
+-- | regSqueeze_class reg
+-- Calculate the maximum number of register colors that could be
+-- denied to a node of this class due to having this reg
+-- as a neighbour.
+--
+{-# INLINE virtualRegSqueeze #-}
+virtualRegSqueeze :: RegClass -> VirtualReg -> Int
+virtualRegSqueeze cls vr
+ = case cls of
+ RcInteger
+ -> case vr of
+ VirtualRegI{} -> 1
+ VirtualRegHi{} -> 1
+ _other -> 0
+
+ RcDouble
+ -> case vr of
+ VirtualRegD{} -> 1
+ VirtualRegF{} -> 0
+ _other -> 0
+
+ _other -> 0
+
+{-# INLINE realRegSqueeze #-}
+realRegSqueeze :: RegClass -> RealReg -> Int
+realRegSqueeze cls rr
+ = case cls of
+ RcInteger
+ -> case rr of
+ RealRegSingle regNo
+ | regNo < 32 -> 1 -- first fp reg is 32
+ | otherwise -> 0
+
+ RcDouble
+ -> case rr of
+ RealRegSingle regNo
+ | regNo < 32 -> 0
+ | otherwise -> 1
+
+ _other -> 0
+
+mkVirtualReg :: Unique -> Format -> VirtualReg
+mkVirtualReg u format
+ | not (isFloatFormat format) = VirtualRegI u
+ | otherwise
+ = case format of
+ FF32 -> VirtualRegD u
+ FF64 -> VirtualRegD u
+ _ -> panic "RV64.mkVirtualReg"
+
+{-# INLINE classOfRealReg #-}
+classOfRealReg :: RealReg -> RegClass
+classOfRealReg (RealRegSingle i)
+ | i < 32 = RcInteger
+ | otherwise = RcDouble
+
+regDotColor :: RealReg -> SDoc
+regDotColor reg
+ = case classOfRealReg reg of
+ RcInteger -> text "blue"
+ RcFloat -> text "red"
+ RcDouble -> text "green"
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear.hs b/compiler/GHC/CmmToAsm/Reg/Linear.hs
index 3064bf0d91..829037dd41 100644
--- a/compiler/GHC/CmmToAsm/Reg/Linear.hs
+++ b/compiler/GHC/CmmToAsm/Reg/Linear.hs
@@ -114,6 +114,7 @@ import qualified GHC.CmmToAsm.Reg.Linear.PPC as PPC
import qualified GHC.CmmToAsm.Reg.Linear.X86 as X86
import qualified GHC.CmmToAsm.Reg.Linear.X86_64 as X86_64
import qualified GHC.CmmToAsm.Reg.Linear.AArch64 as AArch64
+import qualified GHC.CmmToAsm.Reg.Linear.AArch64 as RV64
import GHC.CmmToAsm.Reg.Target
import GHC.CmmToAsm.Reg.Liveness
import GHC.CmmToAsm.Reg.Utils
@@ -223,7 +224,7 @@ linearRegAlloc config entry_ids block_live sccs
ArchAlpha -> panic "linearRegAlloc ArchAlpha"
ArchMipseb -> panic "linearRegAlloc ArchMipseb"
ArchMipsel -> panic "linearRegAlloc ArchMipsel"
- ArchRISCV64 -> panic "linearRegAlloc ArchRISCV64"
+ ArchRISCV64 -> go $ (frInitFreeRegs platform :: RV64.FreeRegs)
ArchLoongArch64-> panic "linearRegAlloc ArchLoongArch64"
ArchJavaScript -> panic "linearRegAlloc ArchJavaScript"
ArchWasm32 -> panic "linearRegAlloc ArchWasm32"
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs b/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs
index 519ea55fca..f3424240d2 100644
--- a/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs
+++ b/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs
@@ -29,10 +29,12 @@ import qualified GHC.CmmToAsm.Reg.Linear.PPC as PPC
import qualified GHC.CmmToAsm.Reg.Linear.X86 as X86
import qualified GHC.CmmToAsm.Reg.Linear.X86_64 as X86_64
import qualified GHC.CmmToAsm.Reg.Linear.AArch64 as AArch64
+import qualified GHC.CmmToAsm.Reg.Linear.RV64 as RV64
import qualified GHC.CmmToAsm.PPC.Instr as PPC.Instr
import qualified GHC.CmmToAsm.X86.Instr as X86.Instr
import qualified GHC.CmmToAsm.AArch64.Instr as AArch64.Instr
+import qualified GHC.CmmToAsm.RV64.Instr as RV64.Instr
class Show freeRegs => FR freeRegs where
frAllocateReg :: Platform -> RealReg -> freeRegs -> freeRegs
@@ -64,6 +66,12 @@ instance FR AArch64.FreeRegs where
frInitFreeRegs = AArch64.initFreeRegs
frReleaseReg = \_ -> AArch64.releaseReg
+instance FR RV64.FreeRegs where
+ frAllocateReg = \_ -> RV64.allocateReg
+ frGetFreeRegs = \_ -> RV64.getFreeRegs
+ frInitFreeRegs = RV64.initFreeRegs
+ frReleaseReg = \_ -> RV64.releaseReg
+
maxSpillSlots :: NCGConfig -> Int
maxSpillSlots config = case platformArch (ncgPlatform config) of
ArchX86 -> X86.Instr.maxSpillSlots config
@@ -76,7 +84,7 @@ maxSpillSlots config = case platformArch (ncgPlatform config) of
ArchAlpha -> panic "maxSpillSlots ArchAlpha"
ArchMipseb -> panic "maxSpillSlots ArchMipseb"
ArchMipsel -> panic "maxSpillSlots ArchMipsel"
- ArchRISCV64 -> panic "maxSpillSlots ArchRISCV64"
+ ArchRISCV64 -> RV64.Instr.maxSpillSlots config
ArchLoongArch64->panic "maxSpillSlots ArchLoongArch64"
ArchJavaScript-> panic "maxSpillSlots ArchJavaScript"
ArchWasm32 -> panic "maxSpillSlots ArchWasm32"
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/RV64.hs b/compiler/GHC/CmmToAsm/Reg/Linear/RV64.hs
new file mode 100644
index 0000000000..47f03ab8fd
--- /dev/null
+++ b/compiler/GHC/CmmToAsm/Reg/Linear/RV64.hs
@@ -0,0 +1,65 @@
+module GHC.CmmToAsm.Reg.Linear.RV64 where
+
+import GHC.Prelude
+
+import GHC.CmmToAsm.RV64.Regs
+import GHC.Platform.Reg.Class
+import GHC.Platform.Reg
+
+import GHC.Utils.Outputable
+import GHC.Utils.Panic
+import GHC.Platform
+
+import Data.Word
+
+import GHC.Stack
+
+data FreeRegs = FreeRegs !Word32 !Word32
+
+instance Show FreeRegs where
+ show (FreeRegs g f) = "FreeRegs: " ++ showBits g ++ "; " ++ showBits f
+
+instance Outputable FreeRegs where
+ ppr (FreeRegs g f) = text " " <+> foldr (\i x -> pad_int i <+> x) (text "") [0..31]
+ $$ text "GPR" <+> foldr (\i x -> show_bit g i <+> x) (text "") [0..31]
+ $$ text "FPR" <+> foldr (\i x -> show_bit f i <+> x) (text "") [0..31]
+ where pad_int i | i < 10 = char ' ' <> int i
+ pad_int i = int i
+ -- remember bit = 1 means it's available.
+ show_bit bits bit | testBit bits bit = text " "
+ show_bit _ _ = text " x"
+
+noFreeRegs :: FreeRegs
+noFreeRegs = FreeRegs 0 0
+
+showBits :: Word32 -> String
+showBits w = map (\i -> if testBit w i then '1' else '0') [0..31]
+
+-- FR instance implementation (See Linear.FreeRegs)
+allocateReg :: HasCallStack => RealReg -> FreeRegs -> FreeRegs
+allocateReg (RealRegSingle r) (FreeRegs g f)
+ | r > 31 && testBit f (r - 32) = FreeRegs g (clearBit f (r - 32))
+ | r < 32 && testBit g r = FreeRegs (clearBit g r) f
+ | r > 31 = panic $ "Linear.RV64.allocReg: double allocation of float reg v" ++ show (r - 32) ++ "; " ++ showBits f
+ | otherwise = pprPanic "Linear.RV64.allocReg" $ text ("double allocation of gp reg x" ++ show r ++ "; " ++ showBits g)
+
+-- For LLVM Interop, see https://github.com/llvm/llvm-project/blob/6ab900f8746e7d8e24afafb5886a40801f6799f4/llvm/lib/Target/RISCV/RISCVISelLowering.cpp#L13638-L13685
+getFreeRegs :: RegClass -> FreeRegs -> [RealReg]
+getFreeRegs cls (FreeRegs g f)
+ | RcFloat <- cls = [] -- For now we only support double and integer registers, floats will need to be promoted.
+ | RcDouble <- cls = go 32 f [0..31]
+ | RcInteger <- cls = go 0 g ([5..7] ++ [10..17] ++ [28..31])
+ where
+ go _ _ [] = []
+ go off x (i:is) | testBit x i = RealRegSingle (off + i) : (go off x $! is)
+ | otherwise = go off x $! is
+
+initFreeRegs :: Platform -> FreeRegs
+initFreeRegs platform = foldl' (flip releaseReg) noFreeRegs (allocatableRegs platform)
+
+releaseReg :: HasCallStack => RealReg -> FreeRegs -> FreeRegs
+releaseReg (RealRegSingle r) (FreeRegs g f)
+ | r > 31 && testBit f (r - 32) = pprPanic "Linear.RV64.releaseReg" (text "can't release non-allocated reg v" <> int (r - 32))
+ | r < 32 && testBit g r = pprPanic "Linear.RV64.releaseReg" (text "can't release non-allocated reg x" <> int r)
+ | r > 31 = FreeRegs g (setBit f (r - 32))
+ | otherwise = FreeRegs (setBit g r) f \ No newline at end of file
diff --git a/compiler/GHC/CmmToAsm/Reg/Target.hs b/compiler/GHC/CmmToAsm/Reg/Target.hs
index a94010987a..0b5bb080ef 100644
--- a/compiler/GHC/CmmToAsm/Reg/Target.hs
+++ b/compiler/GHC/CmmToAsm/Reg/Target.hs
@@ -34,7 +34,7 @@ import qualified GHC.CmmToAsm.X86.Regs as X86
import qualified GHC.CmmToAsm.X86.RegInfo as X86
import qualified GHC.CmmToAsm.PPC.Regs as PPC
import qualified GHC.CmmToAsm.AArch64.Regs as AArch64
-
+import qualified GHC.CmmToAsm.RV64.Regs as RV64
targetVirtualRegSqueeze :: Platform -> RegClass -> VirtualReg -> Int
targetVirtualRegSqueeze platform
@@ -49,7 +49,7 @@ targetVirtualRegSqueeze platform
ArchAlpha -> panic "targetVirtualRegSqueeze ArchAlpha"
ArchMipseb -> panic "targetVirtualRegSqueeze ArchMipseb"
ArchMipsel -> panic "targetVirtualRegSqueeze ArchMipsel"
- ArchRISCV64 -> panic "targetVirtualRegSqueeze ArchRISCV64"
+ ArchRISCV64 -> RV64.virtualRegSqueeze
ArchLoongArch64->panic "targetVirtualRegSqueeze ArchLoongArch64"
ArchJavaScript-> panic "targetVirtualRegSqueeze ArchJavaScript"
ArchWasm32 -> panic "targetVirtualRegSqueeze ArchWasm32"
@@ -69,7 +69,7 @@ targetRealRegSqueeze platform
ArchAlpha -> panic "targetRealRegSqueeze ArchAlpha"
ArchMipseb -> panic "targetRealRegSqueeze ArchMipseb"
ArchMipsel -> panic "targetRealRegSqueeze ArchMipsel"
- ArchRISCV64 -> panic "targetRealRegSqueeze ArchRISCV64"
+ ArchRISCV64 -> RV64.realRegSqueeze
ArchLoongArch64->panic "targetRealRegSqueeze ArchLoongArch64"
ArchJavaScript-> panic "targetRealRegSqueeze ArchJavaScript"
ArchWasm32 -> panic "targetRealRegSqueeze ArchWasm32"
@@ -88,7 +88,7 @@ targetClassOfRealReg platform
ArchAlpha -> panic "targetClassOfRealReg ArchAlpha"
ArchMipseb -> panic "targetClassOfRealReg ArchMipseb"
ArchMipsel -> panic "targetClassOfRealReg ArchMipsel"
- ArchRISCV64 -> panic "targetClassOfRealReg ArchRISCV64"
+ ArchRISCV64 -> RV64.classOfRealReg
ArchLoongArch64->panic "targetClassOfRealReg ArchLoongArch64"
ArchJavaScript-> panic "targetClassOfRealReg ArchJavaScript"
ArchWasm32 -> panic "targetClassOfRealReg ArchWasm32"
@@ -107,7 +107,7 @@ targetMkVirtualReg platform
ArchAlpha -> panic "targetMkVirtualReg ArchAlpha"
ArchMipseb -> panic "targetMkVirtualReg ArchMipseb"
ArchMipsel -> panic "targetMkVirtualReg ArchMipsel"
- ArchRISCV64 -> panic "targetMkVirtualReg ArchRISCV64"
+ ArchRISCV64 -> RV64.mkVirtualReg
ArchLoongArch64->panic "targetMkVirtualReg ArchLoongArch64"
ArchJavaScript-> panic "targetMkVirtualReg ArchJavaScript"
ArchWasm32 -> panic "targetMkVirtualReg ArchWasm32"
@@ -126,7 +126,7 @@ targetRegDotColor platform
ArchAlpha -> panic "targetRegDotColor ArchAlpha"
ArchMipseb -> panic "targetRegDotColor ArchMipseb"
ArchMipsel -> panic "targetRegDotColor ArchMipsel"
- ArchRISCV64 -> panic "targetRegDotColor ArchRISCV64"
+ ArchRISCV64 -> RV64.regDotColor
ArchLoongArch64->panic "targetRegDotColor ArchLoongArch64"
ArchJavaScript-> panic "targetRegDotColor ArchJavaScript"
ArchWasm32 -> panic "targetRegDotColor ArchWasm32"
diff --git a/compiler/GHC/Driver/Backend.hs b/compiler/GHC/Driver/Backend.hs
index e59f0a51f7..5a76f27fb9 100644
--- a/compiler/GHC/Driver/Backend.hs
+++ b/compiler/GHC/Driver/Backend.hs
@@ -217,6 +217,7 @@ platformNcgSupported platform = if
ArchPPC_64 {} -> True
ArchAArch64 -> True
ArchWasm32 -> True
+ ArchRISCV64 -> True
_ -> False
-- | Is the platform supported by the JS backend?
diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in
index 4194dd7b05..169cc2bf9c 100644
--- a/compiler/ghc.cabal.in
+++ b/compiler/ghc.cabal.in
@@ -254,6 +254,7 @@ Library
GHC.CmmToAsm.Reg.Linear.FreeRegs
GHC.CmmToAsm.Reg.Linear.JoinToTargets
GHC.CmmToAsm.Reg.Linear.PPC
+ GHC.CmmToAsm.Reg.Linear.RV64
GHC.CmmToAsm.Reg.Linear.StackMap
GHC.CmmToAsm.Reg.Linear.State
GHC.CmmToAsm.Reg.Linear.Stats
@@ -262,6 +263,13 @@ Library
GHC.CmmToAsm.Reg.Liveness
GHC.CmmToAsm.Reg.Target
GHC.CmmToAsm.Reg.Utils
+ GHC.CmmToAsm.RV64
+ GHC.CmmToAsm.RV64.CodeGen
+ GHC.CmmToAsm.RV64.Cond
+ GHC.CmmToAsm.RV64.Instr
+ GHC.CmmToAsm.RV64.Ppr
+ GHC.CmmToAsm.RV64.RegInfo
+ GHC.CmmToAsm.RV64.Regs
GHC.CmmToAsm.Types
GHC.CmmToAsm.Utils
GHC.CmmToAsm.X86