diff options
author | Sylvain Henry <sylvain@haskus.fr> | 2020-02-17 16:21:11 +0100 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2020-02-18 20:18:12 -0500 |
commit | 1500f0898e85316c7c97a2f759d83278a072ab0e (patch) | |
tree | 7246f4905a279679b1c5106ba6989d6e0e637f6b /compiler/GHC/Llvm | |
parent | 192caf58ca1fc42806166872260d30bdb34dbace (diff) | |
download | haskell-1500f0898e85316c7c97a2f759d83278a072ab0e.tar.gz |
Modules: Llvm (#13009)
Diffstat (limited to 'compiler/GHC/Llvm')
-rw-r--r-- | compiler/GHC/Llvm/MetaData.hs | 95 | ||||
-rw-r--r-- | compiler/GHC/Llvm/Ppr.hs | 499 | ||||
-rw-r--r-- | compiler/GHC/Llvm/Syntax.hs | 352 | ||||
-rw-r--r-- | compiler/GHC/Llvm/Types.hs | 888 |
4 files changed, 1834 insertions, 0 deletions
diff --git a/compiler/GHC/Llvm/MetaData.hs b/compiler/GHC/Llvm/MetaData.hs new file mode 100644 index 0000000000..3e319c7036 --- /dev/null +++ b/compiler/GHC/Llvm/MetaData.hs @@ -0,0 +1,95 @@ +{-# LANGUAGE GeneralizedNewtypeDeriving #-} + +module GHC.Llvm.MetaData where + +import GhcPrelude + +import GHC.Llvm.Types +import Outputable + +-- The LLVM Metadata System. +-- +-- The LLVM metadata feature is poorly documented but roughly follows the +-- following design: +-- * Metadata can be constructed in a few different ways (See below). +-- * After which it can either be attached to LLVM statements to pass along +-- extra information to the optimizer and code generator OR specifically named +-- metadata has an affect on the whole module (i.e., linking behaviour). +-- +-- +-- # Constructing metadata +-- Metadata comes largely in three forms: +-- +-- * Metadata expressions -- these are the raw metadata values that encode +-- information. They consist of metadata strings, metadata nodes, regular +-- LLVM values (both literals and references to global variables) and +-- metadata expressions (i.e., recursive data type). Some examples: +-- !{ !"hello", !0, i32 0 } +-- !{ !1, !{ i32 0 } } +-- +-- * Metadata nodes -- global metadata variables that attach a metadata +-- expression to a number. For example: +-- !0 = !{ [<metadata expressions>] !} +-- +-- * Named metadata -- global metadata variables that attach a metadata nodes +-- to a name. Used ONLY to communicated module level information to LLVM +-- through a meaningful name. For example: +-- !llvm.module.linkage = !{ !0, !1 } +-- +-- +-- # Using Metadata +-- Using metadata depends on the form it is in: +-- +-- * Attach to instructions -- metadata can be attached to LLVM instructions +-- using a specific reference as follows: +-- %l = load i32* @glob, !nontemporal !10 +-- %m = load i32* @glob, !nontemporal !{ i32 0, !{ i32 0 } } +-- Only metadata nodes or expressions can be attached, named metadata cannot. +-- Refer to LLVM documentation for which instructions take metadata and its +-- meaning. +-- +-- * As arguments -- llvm functions can take metadata as arguments, for +-- example: +-- call void @llvm.dbg.value(metadata !{ i32 0 }, i64 0, metadata !1) +-- As with instructions, only metadata nodes or expressions can be attached. +-- +-- * As a named metadata -- Here the metadata is simply declared in global +-- scope using a specific name to communicate module level information to LLVM. +-- For example: +-- !llvm.module.linkage = !{ !0, !1 } +-- + +-- | A reference to an un-named metadata node. +newtype MetaId = MetaId Int + deriving (Eq, Ord, Enum) + +instance Outputable MetaId where + ppr (MetaId n) = char '!' <> int n + +-- | LLVM metadata expressions +data MetaExpr = MetaStr !LMString + | MetaNode !MetaId + | MetaVar !LlvmVar + | MetaStruct [MetaExpr] + deriving (Eq) + +instance Outputable MetaExpr where + ppr (MetaVar (LMLitVar (LMNullLit _))) = text "null" + ppr (MetaStr s ) = char '!' <> doubleQuotes (ftext s) + ppr (MetaNode n ) = ppr n + ppr (MetaVar v ) = ppr v + ppr (MetaStruct es) = char '!' <> braces (ppCommaJoin es) + +-- | Associates some metadata with a specific label for attaching to an +-- instruction. +data MetaAnnot = MetaAnnot LMString MetaExpr + deriving (Eq) + +-- | Metadata declarations. Metadata can only be declared in global scope. +data MetaDecl + -- | Named metadata. Only used for communicating module information to + -- LLVM. ('!name = !{ [!<n>] }' form). + = MetaNamed !LMString [MetaId] + -- | Metadata node declaration. + -- ('!0 = metadata !{ <metadata expression> }' form). + | MetaUnnamed !MetaId !MetaExpr diff --git a/compiler/GHC/Llvm/Ppr.hs b/compiler/GHC/Llvm/Ppr.hs new file mode 100644 index 0000000000..0e8d279a50 --- /dev/null +++ b/compiler/GHC/Llvm/Ppr.hs @@ -0,0 +1,499 @@ +{-# LANGUAGE CPP #-} + +-------------------------------------------------------------------------------- +-- | Pretty print LLVM IR Code. +-- + +module GHC.Llvm.Ppr ( + + -- * Top level LLVM objects. + ppLlvmModule, + ppLlvmComments, + ppLlvmComment, + ppLlvmGlobals, + ppLlvmGlobal, + ppLlvmAliases, + ppLlvmAlias, + ppLlvmMetas, + ppLlvmMeta, + ppLlvmFunctionDecls, + ppLlvmFunctionDecl, + ppLlvmFunctions, + ppLlvmFunction, + + ) where + +#include "HsVersions.h" + +import GhcPrelude + +import GHC.Llvm.Syntax +import GHC.Llvm.MetaData +import GHC.Llvm.Types + +import Data.List ( intersperse ) +import Outputable +import Unique +import FastString ( sLit ) + +-------------------------------------------------------------------------------- +-- * Top Level Print functions +-------------------------------------------------------------------------------- + +-- | Print out a whole LLVM module. +ppLlvmModule :: LlvmModule -> SDoc +ppLlvmModule (LlvmModule comments aliases meta globals decls funcs) + = ppLlvmComments comments $+$ newLine + $+$ ppLlvmAliases aliases $+$ newLine + $+$ ppLlvmMetas meta $+$ newLine + $+$ ppLlvmGlobals globals $+$ newLine + $+$ ppLlvmFunctionDecls decls $+$ newLine + $+$ ppLlvmFunctions funcs + +-- | Print out a multi-line comment, can be inside a function or on its own +ppLlvmComments :: [LMString] -> SDoc +ppLlvmComments comments = vcat $ map ppLlvmComment comments + +-- | Print out a comment, can be inside a function or on its own +ppLlvmComment :: LMString -> SDoc +ppLlvmComment com = semi <+> ftext com + + +-- | Print out a list of global mutable variable definitions +ppLlvmGlobals :: [LMGlobal] -> SDoc +ppLlvmGlobals ls = vcat $ map ppLlvmGlobal ls + +-- | Print out a global mutable variable definition +ppLlvmGlobal :: LMGlobal -> SDoc +ppLlvmGlobal (LMGlobal var@(LMGlobalVar _ _ link x a c) dat) = + let sect = case x of + Just x' -> text ", section" <+> doubleQuotes (ftext x') + Nothing -> empty + + align = case a of + Just a' -> text ", align" <+> int a' + Nothing -> empty + + rhs = case dat of + Just stat -> pprSpecialStatic stat + Nothing -> ppr (pLower $ getVarType var) + + -- Position of linkage is different for aliases. + const = case c of + Global -> "global" + Constant -> "constant" + Alias -> "alias" + + in ppAssignment var $ ppr link <+> text const <+> rhs <> sect <> align + $+$ newLine + +ppLlvmGlobal (LMGlobal var val) = sdocWithDynFlags $ \dflags -> + error $ "Non Global var ppr as global! " + ++ showSDoc dflags (ppr var) ++ " " ++ showSDoc dflags (ppr val) + + +-- | Print out a list of LLVM type aliases. +ppLlvmAliases :: [LlvmAlias] -> SDoc +ppLlvmAliases tys = vcat $ map ppLlvmAlias tys + +-- | Print out an LLVM type alias. +ppLlvmAlias :: LlvmAlias -> SDoc +ppLlvmAlias (name, ty) + = char '%' <> ftext name <+> equals <+> text "type" <+> ppr ty + + +-- | Print out a list of LLVM metadata. +ppLlvmMetas :: [MetaDecl] -> SDoc +ppLlvmMetas metas = vcat $ map ppLlvmMeta metas + +-- | Print out an LLVM metadata definition. +ppLlvmMeta :: MetaDecl -> SDoc +ppLlvmMeta (MetaUnnamed n m) + = ppr n <+> equals <+> ppr m + +ppLlvmMeta (MetaNamed n m) + = exclamation <> ftext n <+> equals <+> exclamation <> braces nodes + where + nodes = hcat $ intersperse comma $ map ppr m + + +-- | Print out a list of function definitions. +ppLlvmFunctions :: LlvmFunctions -> SDoc +ppLlvmFunctions funcs = vcat $ map ppLlvmFunction funcs + +-- | Print out a function definition. +ppLlvmFunction :: LlvmFunction -> SDoc +ppLlvmFunction fun = + let attrDoc = ppSpaceJoin (funcAttrs fun) + secDoc = case funcSect fun of + Just s' -> text "section" <+> (doubleQuotes $ ftext s') + Nothing -> empty + prefixDoc = case funcPrefix fun of + Just v -> text "prefix" <+> ppr v + Nothing -> empty + in text "define" <+> ppLlvmFunctionHeader (funcDecl fun) (funcArgs fun) + <+> attrDoc <+> secDoc <+> prefixDoc + $+$ lbrace + $+$ ppLlvmBlocks (funcBody fun) + $+$ rbrace + $+$ newLine + $+$ newLine + +-- | Print out a function definition header. +ppLlvmFunctionHeader :: LlvmFunctionDecl -> [LMString] -> SDoc +ppLlvmFunctionHeader (LlvmFunctionDecl n l c r varg p a) args + = let varg' = case varg of + VarArgs | null p -> sLit "..." + | otherwise -> sLit ", ..." + _otherwise -> sLit "" + align = case a of + Just a' -> text " align " <> ppr a' + Nothing -> empty + args' = map (\((ty,p),n) -> ppr ty <+> ppSpaceJoin p <+> char '%' + <> ftext n) + (zip p args) + in ppr l <+> ppr c <+> ppr r <+> char '@' <> ftext n <> lparen <> + (hsep $ punctuate comma args') <> ptext varg' <> rparen <> align + +-- | Print out a list of function declaration. +ppLlvmFunctionDecls :: LlvmFunctionDecls -> SDoc +ppLlvmFunctionDecls decs = vcat $ map ppLlvmFunctionDecl decs + +-- | Print out a function declaration. +-- Declarations define the function type but don't define the actual body of +-- the function. +ppLlvmFunctionDecl :: LlvmFunctionDecl -> SDoc +ppLlvmFunctionDecl (LlvmFunctionDecl n l c r varg p a) + = let varg' = case varg of + VarArgs | null p -> sLit "..." + | otherwise -> sLit ", ..." + _otherwise -> sLit "" + align = case a of + Just a' -> text " align" <+> ppr a' + Nothing -> empty + args = hcat $ intersperse (comma <> space) $ + map (\(t,a) -> ppr t <+> ppSpaceJoin a) p + in text "declare" <+> ppr l <+> ppr c <+> ppr r <+> char '@' <> + ftext n <> lparen <> args <> ptext varg' <> rparen <> align $+$ newLine + + +-- | Print out a list of LLVM blocks. +ppLlvmBlocks :: LlvmBlocks -> SDoc +ppLlvmBlocks blocks = vcat $ map ppLlvmBlock blocks + +-- | Print out an LLVM block. +-- It must be part of a function definition. +ppLlvmBlock :: LlvmBlock -> SDoc +ppLlvmBlock (LlvmBlock blockId stmts) = + let isLabel (MkLabel _) = True + isLabel _ = False + (block, rest) = break isLabel stmts + ppRest = case rest of + MkLabel id:xs -> ppLlvmBlock (LlvmBlock id xs) + _ -> empty + in ppLlvmBlockLabel blockId + $+$ (vcat $ map ppLlvmStatement block) + $+$ newLine + $+$ ppRest + +-- | Print out an LLVM block label. +ppLlvmBlockLabel :: LlvmBlockId -> SDoc +ppLlvmBlockLabel id = pprUniqueAlways id <> colon + + +-- | Print out an LLVM statement. +ppLlvmStatement :: LlvmStatement -> SDoc +ppLlvmStatement stmt = + let ind = (text " " <>) + in case stmt of + Assignment dst expr -> ind $ ppAssignment dst (ppLlvmExpression expr) + Fence st ord -> ind $ ppFence st ord + Branch target -> ind $ ppBranch target + BranchIf cond ifT ifF -> ind $ ppBranchIf cond ifT ifF + Comment comments -> ind $ ppLlvmComments comments + MkLabel label -> ppLlvmBlockLabel label + Store value ptr -> ind $ ppStore value ptr + Switch scrut def tgs -> ind $ ppSwitch scrut def tgs + Return result -> ind $ ppReturn result + Expr expr -> ind $ ppLlvmExpression expr + Unreachable -> ind $ text "unreachable" + Nop -> empty + MetaStmt meta s -> ppMetaStatement meta s + + +-- | Print out an LLVM expression. +ppLlvmExpression :: LlvmExpression -> SDoc +ppLlvmExpression expr + = case expr of + Alloca tp amount -> ppAlloca tp amount + LlvmOp op left right -> ppMachOp op left right + Call tp fp args attrs -> ppCall tp fp (map MetaVar args) attrs + CallM tp fp args attrs -> ppCall tp fp args attrs + Cast op from to -> ppCast op from to + Compare op left right -> ppCmpOp op left right + Extract vec idx -> ppExtract vec idx + ExtractV struct idx -> ppExtractV struct idx + Insert vec elt idx -> ppInsert vec elt idx + GetElemPtr inb ptr indexes -> ppGetElementPtr inb ptr indexes + Load ptr -> ppLoad ptr + ALoad ord st ptr -> ppALoad ord st ptr + Malloc tp amount -> ppMalloc tp amount + AtomicRMW aop tgt src ordering -> ppAtomicRMW aop tgt src ordering + CmpXChg addr old new s_ord f_ord -> ppCmpXChg addr old new s_ord f_ord + Phi tp predecessors -> ppPhi tp predecessors + Asm asm c ty v se sk -> ppAsm asm c ty v se sk + MExpr meta expr -> ppMetaExpr meta expr + + +-------------------------------------------------------------------------------- +-- * Individual print functions +-------------------------------------------------------------------------------- + +-- | Should always be a function pointer. So a global var of function type +-- (since globals are always pointers) or a local var of pointer function type. +ppCall :: LlvmCallType -> LlvmVar -> [MetaExpr] -> [LlvmFuncAttr] -> SDoc +ppCall ct fptr args attrs = case fptr of + -- + -- if local var function pointer, unwrap + LMLocalVar _ (LMPointer (LMFunction d)) -> ppCall' d + + -- should be function type otherwise + LMGlobalVar _ (LMFunction d) _ _ _ _ -> ppCall' d + + -- not pointer or function, so error + _other -> error $ "ppCall called with non LMFunction type!\nMust be " + ++ " called with either global var of function type or " + ++ "local var of pointer function type." + + where + ppCall' (LlvmFunctionDecl _ _ cc ret argTy params _) = + let tc = if ct == TailCall then text "tail " else empty + ppValues = hsep $ punctuate comma $ map ppCallMetaExpr args + ppArgTy = (ppCommaJoin $ map fst params) <> + (case argTy of + VarArgs -> text ", ..." + FixedArgs -> empty) + fnty = space <> lparen <> ppArgTy <> rparen + attrDoc = ppSpaceJoin attrs + in tc <> text "call" <+> ppr cc <+> ppr ret + <> fnty <+> ppName fptr <> lparen <+> ppValues + <+> rparen <+> attrDoc + + -- Metadata needs to be marked as having the `metadata` type when used + -- in a call argument + ppCallMetaExpr (MetaVar v) = ppr v + ppCallMetaExpr v = text "metadata" <+> ppr v + +ppMachOp :: LlvmMachOp -> LlvmVar -> LlvmVar -> SDoc +ppMachOp op left right = + (ppr op) <+> (ppr (getVarType left)) <+> ppName left + <> comma <+> ppName right + + +ppCmpOp :: LlvmCmpOp -> LlvmVar -> LlvmVar -> SDoc +ppCmpOp op left right = + let cmpOp + | isInt (getVarType left) && isInt (getVarType right) = text "icmp" + | isFloat (getVarType left) && isFloat (getVarType right) = text "fcmp" + | otherwise = text "icmp" -- Just continue as its much easier to debug + {- + | otherwise = error ("can't compare different types, left = " + ++ (show $ getVarType left) ++ ", right = " + ++ (show $ getVarType right)) + -} + in cmpOp <+> ppr op <+> ppr (getVarType left) + <+> ppName left <> comma <+> ppName right + + +ppAssignment :: LlvmVar -> SDoc -> SDoc +ppAssignment var expr = ppName var <+> equals <+> expr + +ppFence :: Bool -> LlvmSyncOrdering -> SDoc +ppFence st ord = + let singleThread = case st of True -> text "singlethread" + False -> empty + in text "fence" <+> singleThread <+> ppSyncOrdering ord + +ppSyncOrdering :: LlvmSyncOrdering -> SDoc +ppSyncOrdering SyncUnord = text "unordered" +ppSyncOrdering SyncMonotonic = text "monotonic" +ppSyncOrdering SyncAcquire = text "acquire" +ppSyncOrdering SyncRelease = text "release" +ppSyncOrdering SyncAcqRel = text "acq_rel" +ppSyncOrdering SyncSeqCst = text "seq_cst" + +ppAtomicOp :: LlvmAtomicOp -> SDoc +ppAtomicOp LAO_Xchg = text "xchg" +ppAtomicOp LAO_Add = text "add" +ppAtomicOp LAO_Sub = text "sub" +ppAtomicOp LAO_And = text "and" +ppAtomicOp LAO_Nand = text "nand" +ppAtomicOp LAO_Or = text "or" +ppAtomicOp LAO_Xor = text "xor" +ppAtomicOp LAO_Max = text "max" +ppAtomicOp LAO_Min = text "min" +ppAtomicOp LAO_Umax = text "umax" +ppAtomicOp LAO_Umin = text "umin" + +ppAtomicRMW :: LlvmAtomicOp -> LlvmVar -> LlvmVar -> LlvmSyncOrdering -> SDoc +ppAtomicRMW aop tgt src ordering = + text "atomicrmw" <+> ppAtomicOp aop <+> ppr tgt <> comma + <+> ppr src <+> ppSyncOrdering ordering + +ppCmpXChg :: LlvmVar -> LlvmVar -> LlvmVar + -> LlvmSyncOrdering -> LlvmSyncOrdering -> SDoc +ppCmpXChg addr old new s_ord f_ord = + text "cmpxchg" <+> ppr addr <> comma <+> ppr old <> comma <+> ppr new + <+> ppSyncOrdering s_ord <+> ppSyncOrdering f_ord + +-- XXX: On x86, vector types need to be 16-byte aligned for aligned access, but +-- we have no way of guaranteeing that this is true with GHC (we would need to +-- modify the layout of the stack and closures, change the storage manager, +-- etc.). So, we blindly tell LLVM that *any* vector store or load could be +-- unaligned. In the future we may be able to guarantee that certain vector +-- access patterns are aligned, in which case we will need a more granular way +-- of specifying alignment. + +ppLoad :: LlvmVar -> SDoc +ppLoad var = text "load" <+> ppr derefType <> comma <+> ppr var <> align + where + derefType = pLower $ getVarType var + align | isVector . pLower . getVarType $ var = text ", align 1" + | otherwise = empty + +ppALoad :: LlvmSyncOrdering -> SingleThreaded -> LlvmVar -> SDoc +ppALoad ord st var = sdocWithDynFlags $ \dflags -> + let alignment = (llvmWidthInBits dflags $ getVarType var) `quot` 8 + align = text ", align" <+> ppr alignment + sThreaded | st = text " singlethread" + | otherwise = empty + derefType = pLower $ getVarType var + in text "load atomic" <+> ppr derefType <> comma <+> ppr var <> sThreaded + <+> ppSyncOrdering ord <> align + +ppStore :: LlvmVar -> LlvmVar -> SDoc +ppStore val dst + | isVecPtrVar dst = text "store" <+> ppr val <> comma <+> ppr dst <> + comma <+> text "align 1" + | otherwise = text "store" <+> ppr val <> comma <+> ppr dst + where + isVecPtrVar :: LlvmVar -> Bool + isVecPtrVar = isVector . pLower . getVarType + + +ppCast :: LlvmCastOp -> LlvmVar -> LlvmType -> SDoc +ppCast op from to + = ppr op + <+> ppr (getVarType from) <+> ppName from + <+> text "to" + <+> ppr to + + +ppMalloc :: LlvmType -> Int -> SDoc +ppMalloc tp amount = + let amount' = LMLitVar $ LMIntLit (toInteger amount) i32 + in text "malloc" <+> ppr tp <> comma <+> ppr amount' + + +ppAlloca :: LlvmType -> Int -> SDoc +ppAlloca tp amount = + let amount' = LMLitVar $ LMIntLit (toInteger amount) i32 + in text "alloca" <+> ppr tp <> comma <+> ppr amount' + + +ppGetElementPtr :: Bool -> LlvmVar -> [LlvmVar] -> SDoc +ppGetElementPtr inb ptr idx = + let indexes = comma <+> ppCommaJoin idx + inbound = if inb then text "inbounds" else empty + derefType = pLower $ getVarType ptr + in text "getelementptr" <+> inbound <+> ppr derefType <> comma <+> ppr ptr + <> indexes + + +ppReturn :: Maybe LlvmVar -> SDoc +ppReturn (Just var) = text "ret" <+> ppr var +ppReturn Nothing = text "ret" <+> ppr LMVoid + + +ppBranch :: LlvmVar -> SDoc +ppBranch var = text "br" <+> ppr var + + +ppBranchIf :: LlvmVar -> LlvmVar -> LlvmVar -> SDoc +ppBranchIf cond trueT falseT + = text "br" <+> ppr cond <> comma <+> ppr trueT <> comma <+> ppr falseT + + +ppPhi :: LlvmType -> [(LlvmVar,LlvmVar)] -> SDoc +ppPhi tp preds = + let ppPreds (val, label) = brackets $ ppName val <> comma <+> ppName label + in text "phi" <+> ppr tp <+> hsep (punctuate comma $ map ppPreds preds) + + +ppSwitch :: LlvmVar -> LlvmVar -> [(LlvmVar,LlvmVar)] -> SDoc +ppSwitch scrut dflt targets = + let ppTarget (val, lab) = ppr val <> comma <+> ppr lab + ppTargets xs = brackets $ vcat (map ppTarget xs) + in text "switch" <+> ppr scrut <> comma <+> ppr dflt + <+> ppTargets targets + + +ppAsm :: LMString -> LMString -> LlvmType -> [LlvmVar] -> Bool -> Bool -> SDoc +ppAsm asm constraints rty vars sideeffect alignstack = + let asm' = doubleQuotes $ ftext asm + cons = doubleQuotes $ ftext constraints + rty' = ppr rty + vars' = lparen <+> ppCommaJoin vars <+> rparen + side = if sideeffect then text "sideeffect" else empty + align = if alignstack then text "alignstack" else empty + in text "call" <+> rty' <+> text "asm" <+> side <+> align <+> asm' <> comma + <+> cons <> vars' + +ppExtract :: LlvmVar -> LlvmVar -> SDoc +ppExtract vec idx = + text "extractelement" + <+> ppr (getVarType vec) <+> ppName vec <> comma + <+> ppr idx + +ppExtractV :: LlvmVar -> Int -> SDoc +ppExtractV struct idx = + text "extractvalue" + <+> ppr (getVarType struct) <+> ppName struct <> comma + <+> ppr idx + +ppInsert :: LlvmVar -> LlvmVar -> LlvmVar -> SDoc +ppInsert vec elt idx = + text "insertelement" + <+> ppr (getVarType vec) <+> ppName vec <> comma + <+> ppr (getVarType elt) <+> ppName elt <> comma + <+> ppr idx + + +ppMetaStatement :: [MetaAnnot] -> LlvmStatement -> SDoc +ppMetaStatement meta stmt = ppLlvmStatement stmt <> ppMetaAnnots meta + +ppMetaExpr :: [MetaAnnot] -> LlvmExpression -> SDoc +ppMetaExpr meta expr = ppLlvmExpression expr <> ppMetaAnnots meta + +ppMetaAnnots :: [MetaAnnot] -> SDoc +ppMetaAnnots meta = hcat $ map ppMeta meta + where + ppMeta (MetaAnnot name e) + = comma <+> exclamation <> ftext name <+> + case e of + MetaNode n -> ppr n + MetaStruct ms -> exclamation <> braces (ppCommaJoin ms) + other -> exclamation <> braces (ppr other) -- possible? + + +-------------------------------------------------------------------------------- +-- * Misc functions +-------------------------------------------------------------------------------- + +-- | Blank line. +newLine :: SDoc +newLine = empty + +-- | Exclamation point. +exclamation :: SDoc +exclamation = char '!' diff --git a/compiler/GHC/Llvm/Syntax.hs b/compiler/GHC/Llvm/Syntax.hs new file mode 100644 index 0000000000..d048215a0b --- /dev/null +++ b/compiler/GHC/Llvm/Syntax.hs @@ -0,0 +1,352 @@ +-------------------------------------------------------------------------------- +-- | The LLVM abstract syntax. +-- + +module GHC.Llvm.Syntax where + +import GhcPrelude + +import GHC.Llvm.MetaData +import GHC.Llvm.Types + +import Unique + +-- | Block labels +type LlvmBlockId = Unique + +-- | A block of LLVM code. +data LlvmBlock = LlvmBlock { + -- | The code label for this block + blockLabel :: LlvmBlockId, + + -- | A list of LlvmStatement's representing the code for this block. + -- This list must end with a control flow statement. + blockStmts :: [LlvmStatement] + } + +type LlvmBlocks = [LlvmBlock] + +-- | An LLVM Module. This is a top level container in LLVM. +data LlvmModule = LlvmModule { + -- | Comments to include at the start of the module. + modComments :: [LMString], + + -- | LLVM Alias type definitions. + modAliases :: [LlvmAlias], + + -- | LLVM meta data. + modMeta :: [MetaDecl], + + -- | Global variables to include in the module. + modGlobals :: [LMGlobal], + + -- | LLVM Functions used in this module but defined in other modules. + modFwdDecls :: LlvmFunctionDecls, + + -- | LLVM Functions defined in this module. + modFuncs :: LlvmFunctions + } + +-- | An LLVM Function +data LlvmFunction = LlvmFunction { + -- | The signature of this declared function. + funcDecl :: LlvmFunctionDecl, + + -- | The functions arguments + funcArgs :: [LMString], + + -- | The function attributes. + funcAttrs :: [LlvmFuncAttr], + + -- | The section to put the function into, + funcSect :: LMSection, + + -- | Prefix data + funcPrefix :: Maybe LlvmStatic, + + -- | The body of the functions. + funcBody :: LlvmBlocks + } + +type LlvmFunctions = [LlvmFunction] + +type SingleThreaded = Bool + +-- | LLVM ordering types for synchronization purposes. (Introduced in LLVM +-- 3.0). Please see the LLVM documentation for a better description. +data LlvmSyncOrdering + -- | Some partial order of operations exists. + = SyncUnord + -- | A single total order for operations at a single address exists. + | SyncMonotonic + -- | Acquire synchronization operation. + | SyncAcquire + -- | Release synchronization operation. + | SyncRelease + -- | Acquire + Release synchronization operation. + | SyncAcqRel + -- | Full sequential Consistency operation. + | SyncSeqCst + deriving (Show, Eq) + +-- | LLVM atomic operations. Please see the @atomicrmw@ instruction in +-- the LLVM documentation for a complete description. +data LlvmAtomicOp + = LAO_Xchg + | LAO_Add + | LAO_Sub + | LAO_And + | LAO_Nand + | LAO_Or + | LAO_Xor + | LAO_Max + | LAO_Min + | LAO_Umax + | LAO_Umin + deriving (Show, Eq) + +-- | Llvm Statements +data LlvmStatement + {- | + Assign an expression to a variable: + * dest: Variable to assign to + * source: Source expression + -} + = Assignment LlvmVar LlvmExpression + + {- | + Memory fence operation + -} + | Fence Bool LlvmSyncOrdering + + {- | + Always branch to the target label + -} + | Branch LlvmVar + + {- | + Branch to label targetTrue if cond is true otherwise to label targetFalse + * cond: condition that will be tested, must be of type i1 + * targetTrue: label to branch to if cond is true + * targetFalse: label to branch to if cond is false + -} + | BranchIf LlvmVar LlvmVar LlvmVar + + {- | + Comment + Plain comment. + -} + | Comment [LMString] + + {- | + Set a label on this position. + * name: Identifier of this label, unique for this module + -} + | MkLabel LlvmBlockId + + {- | + Store variable value in pointer ptr. If value is of type t then ptr must + be of type t*. + * value: Variable/Constant to store. + * ptr: Location to store the value in + -} + | Store LlvmVar LlvmVar + + {- | + Multiway branch + * scrutinee: Variable or constant which must be of integer type that is + determines which arm is chosen. + * def: The default label if there is no match in target. + * target: A list of (value,label) where the value is an integer + constant and label the corresponding label to jump to if the + scrutinee matches the value. + -} + | Switch LlvmVar LlvmVar [(LlvmVar, LlvmVar)] + + {- | + Return a result. + * result: The variable or constant to return + -} + | Return (Maybe LlvmVar) + + {- | + An instruction for the optimizer that the code following is not reachable + -} + | Unreachable + + {- | + Raise an expression to a statement (if don't want result or want to use + Llvm unnamed values. + -} + | Expr LlvmExpression + + {- | + A nop LLVM statement. Useful as its often more efficient to use this + then to wrap LLvmStatement in a Just or []. + -} + | Nop + + {- | + A LLVM statement with metadata attached to it. + -} + | MetaStmt [MetaAnnot] LlvmStatement + + deriving (Eq) + + +-- | Llvm Expressions +data LlvmExpression + {- | + Allocate amount * sizeof(tp) bytes on the stack + * tp: LlvmType to reserve room for + * amount: The nr of tp's which must be allocated + -} + = Alloca LlvmType Int + + {- | + Perform the machine operator op on the operands left and right + * op: operator + * left: left operand + * right: right operand + -} + | LlvmOp LlvmMachOp LlvmVar LlvmVar + + {- | + Perform a compare operation on the operands left and right + * op: operator + * left: left operand + * right: right operand + -} + | Compare LlvmCmpOp LlvmVar LlvmVar + + {- | + Extract a scalar element from a vector + * val: The vector + * idx: The index of the scalar within the vector + -} + | Extract LlvmVar LlvmVar + + {- | + Extract a scalar element from a structure + * val: The structure + * idx: The index of the scalar within the structure + Corresponds to "extractvalue" instruction. + -} + | ExtractV LlvmVar Int + + {- | + Insert a scalar element into a vector + * val: The source vector + * elt: The scalar to insert + * index: The index at which to insert the scalar + -} + | Insert LlvmVar LlvmVar LlvmVar + + {- | + Allocate amount * sizeof(tp) bytes on the heap + * tp: LlvmType to reserve room for + * amount: The nr of tp's which must be allocated + -} + | Malloc LlvmType Int + + {- | + Load the value at location ptr + -} + | Load LlvmVar + + {- | + Atomic load of the value at location ptr + -} + | ALoad LlvmSyncOrdering SingleThreaded LlvmVar + + {- | + Navigate in a structure, selecting elements + * inbound: Is the pointer inbounds? (computed pointer doesn't overflow) + * ptr: Location of the structure + * indexes: A list of indexes to select the correct value. + -} + | GetElemPtr Bool LlvmVar [LlvmVar] + + {- | + Cast the variable from to the to type. This is an abstraction of three + cast operators in Llvm, inttoptr, ptrtoint and bitcast. + * cast: Cast type + * from: Variable to cast + * to: type to cast to + -} + | Cast LlvmCastOp LlvmVar LlvmType + + {- | + Atomic read-modify-write operation + * op: Atomic operation + * addr: Address to modify + * operand: Operand to operation + * ordering: Ordering requirement + -} + | AtomicRMW LlvmAtomicOp LlvmVar LlvmVar LlvmSyncOrdering + + {- | + Compare-and-exchange operation + * addr: Address to modify + * old: Expected value + * new: New value + * suc_ord: Ordering required in success case + * fail_ord: Ordering required in failure case, can be no stronger than + suc_ord + + Result is an @i1@, true if store was successful. + -} + | CmpXChg LlvmVar LlvmVar LlvmVar LlvmSyncOrdering LlvmSyncOrdering + + {- | + Call a function. The result is the value of the expression. + * tailJumps: CallType to signal if the function should be tail called + * fnptrval: An LLVM value containing a pointer to a function to be + invoked. Can be indirect. Should be LMFunction type. + * args: Concrete arguments for the parameters + * attrs: A list of function attributes for the call. Only NoReturn, + NoUnwind, ReadOnly and ReadNone are valid here. + -} + | Call LlvmCallType LlvmVar [LlvmVar] [LlvmFuncAttr] + + {- | + Call a function as above but potentially taking metadata as arguments. + * tailJumps: CallType to signal if the function should be tail called + * fnptrval: An LLVM value containing a pointer to a function to be + invoked. Can be indirect. Should be LMFunction type. + * args: Arguments that may include metadata. + * attrs: A list of function attributes for the call. Only NoReturn, + NoUnwind, ReadOnly and ReadNone are valid here. + -} + | CallM LlvmCallType LlvmVar [MetaExpr] [LlvmFuncAttr] + + {- | + Merge variables from different basic blocks which are predecessors of this + basic block in a new variable of type tp. + * tp: type of the merged variable, must match the types of the + predecessor variables. + * predecessors: A list of variables and the basic block that they originate + from. + -} + | Phi LlvmType [(LlvmVar,LlvmVar)] + + {- | + Inline assembly expression. Syntax is very similar to the style used by GCC. + * assembly: Actual inline assembly code. + * constraints: Operand constraints. + * return ty: Return type of function. + * vars: Any variables involved in the assembly code. + * sideeffect: Does the expression have side effects not visible from the + constraints list. + * alignstack: Should the stack be conservatively aligned before this + expression is executed. + -} + | Asm LMString LMString LlvmType [LlvmVar] Bool Bool + + {- | + A LLVM expression with metadata attached to it. + -} + | MExpr [MetaAnnot] LlvmExpression + + deriving (Eq) + diff --git a/compiler/GHC/Llvm/Types.hs b/compiler/GHC/Llvm/Types.hs new file mode 100644 index 0000000000..f4fa9a9a56 --- /dev/null +++ b/compiler/GHC/Llvm/Types.hs @@ -0,0 +1,888 @@ +{-# LANGUAGE CPP, GeneralizedNewtypeDeriving #-} + +-------------------------------------------------------------------------------- +-- | The LLVM Type System. +-- + +module GHC.Llvm.Types where + +#include "HsVersions.h" + +import GhcPrelude + +import Data.Char +import Data.Int +import Numeric + +import DynFlags +import FastString +import Outputable +import Unique + +-- from NCG +import PprBase + +import GHC.Float + +-- ----------------------------------------------------------------------------- +-- * LLVM Basic Types and Variables +-- + +-- | A global mutable variable. Maybe defined or external +data LMGlobal = LMGlobal { + getGlobalVar :: LlvmVar, -- ^ Returns the variable of the 'LMGlobal' + getGlobalValue :: Maybe LlvmStatic -- ^ Return the value of the 'LMGlobal' + } + +-- | A String in LLVM +type LMString = FastString + +-- | A type alias +type LlvmAlias = (LMString, LlvmType) + +-- | Llvm Types +data LlvmType + = LMInt Int -- ^ An integer with a given width in bits. + | LMFloat -- ^ 32 bit floating point + | LMDouble -- ^ 64 bit floating point + | LMFloat80 -- ^ 80 bit (x86 only) floating point + | LMFloat128 -- ^ 128 bit floating point + | LMPointer LlvmType -- ^ A pointer to a 'LlvmType' + | LMArray Int LlvmType -- ^ An array of 'LlvmType' + | LMVector Int LlvmType -- ^ A vector of 'LlvmType' + | LMLabel -- ^ A 'LlvmVar' can represent a label (address) + | LMVoid -- ^ Void type + | LMStruct [LlvmType] -- ^ Packed structure type + | LMStructU [LlvmType] -- ^ Unpacked structure type + | LMAlias LlvmAlias -- ^ A type alias + | LMMetadata -- ^ LLVM Metadata + + -- | Function type, used to create pointers to functions + | LMFunction LlvmFunctionDecl + deriving (Eq) + +instance Outputable LlvmType where + ppr (LMInt size ) = char 'i' <> ppr size + ppr (LMFloat ) = text "float" + ppr (LMDouble ) = text "double" + ppr (LMFloat80 ) = text "x86_fp80" + ppr (LMFloat128 ) = text "fp128" + ppr (LMPointer x ) = ppr x <> char '*' + ppr (LMArray nr tp ) = char '[' <> ppr nr <> text " x " <> ppr tp <> char ']' + ppr (LMVector nr tp ) = char '<' <> ppr nr <> text " x " <> ppr tp <> char '>' + ppr (LMLabel ) = text "label" + ppr (LMVoid ) = text "void" + ppr (LMStruct tys ) = text "<{" <> ppCommaJoin tys <> text "}>" + ppr (LMStructU tys ) = text "{" <> ppCommaJoin tys <> text "}" + ppr (LMMetadata ) = text "metadata" + + ppr (LMFunction (LlvmFunctionDecl _ _ _ r varg p _)) + = ppr r <+> lparen <> ppParams varg p <> rparen + + ppr (LMAlias (s,_)) = char '%' <> ftext s + +ppParams :: LlvmParameterListType -> [LlvmParameter] -> SDoc +ppParams varg p + = let varg' = case varg of + VarArgs | null args -> sLit "..." + | otherwise -> sLit ", ..." + _otherwise -> sLit "" + -- by default we don't print param attributes + args = map fst p + in ppCommaJoin args <> ptext varg' + +-- | An LLVM section definition. If Nothing then let LLVM decide the section +type LMSection = Maybe LMString +type LMAlign = Maybe Int + +data LMConst = Global -- ^ Mutable global variable + | Constant -- ^ Constant global variable + | Alias -- ^ Alias of another variable + deriving (Eq) + +-- | LLVM Variables +data LlvmVar + -- | Variables with a global scope. + = LMGlobalVar LMString LlvmType LlvmLinkageType LMSection LMAlign LMConst + -- | Variables local to a function or parameters. + | LMLocalVar Unique LlvmType + -- | Named local variables. Sometimes we need to be able to explicitly name + -- variables (e.g for function arguments). + | LMNLocalVar LMString LlvmType + -- | A constant variable + | LMLitVar LlvmLit + deriving (Eq) + +instance Outputable LlvmVar where + ppr (LMLitVar x) = ppr x + ppr (x ) = ppr (getVarType x) <+> ppName x + + +-- | Llvm Literal Data. +-- +-- These can be used inline in expressions. +data LlvmLit + -- | Refers to an integer constant (i64 42). + = LMIntLit Integer LlvmType + -- | Floating point literal + | LMFloatLit Double LlvmType + -- | Literal NULL, only applicable to pointer types + | LMNullLit LlvmType + -- | Vector literal + | LMVectorLit [LlvmLit] + -- | Undefined value, random bit pattern. Useful for optimisations. + | LMUndefLit LlvmType + deriving (Eq) + +instance Outputable LlvmLit where + ppr l@(LMVectorLit {}) = ppLit l + ppr l = ppr (getLitType l) <+> ppLit l + + +-- | Llvm Static Data. +-- +-- These represent the possible global level variables and constants. +data LlvmStatic + = LMComment LMString -- ^ A comment in a static section + | LMStaticLit LlvmLit -- ^ A static variant of a literal value + | LMUninitType LlvmType -- ^ For uninitialised data + | LMStaticStr LMString LlvmType -- ^ Defines a static 'LMString' + | LMStaticArray [LlvmStatic] LlvmType -- ^ A static array + | LMStaticStruc [LlvmStatic] LlvmType -- ^ A static structure type + | LMStaticPointer LlvmVar -- ^ A pointer to other data + + -- static expressions, could split out but leave + -- for moment for ease of use. Not many of them. + + | LMTrunc LlvmStatic LlvmType -- ^ Truncate + | LMBitc LlvmStatic LlvmType -- ^ Pointer to Pointer conversion + | LMPtoI LlvmStatic LlvmType -- ^ Pointer to Integer conversion + | LMAdd LlvmStatic LlvmStatic -- ^ Constant addition operation + | LMSub LlvmStatic LlvmStatic -- ^ Constant subtraction operation + +instance Outputable LlvmStatic where + ppr (LMComment s) = text "; " <> ftext s + ppr (LMStaticLit l ) = ppr l + ppr (LMUninitType t) = ppr t <> text " undef" + ppr (LMStaticStr s t) = ppr t <> text " c\"" <> ftext s <> text "\\00\"" + ppr (LMStaticArray d t) = ppr t <> text " [" <> ppCommaJoin d <> char ']' + ppr (LMStaticStruc d t) = ppr t <> text "<{" <> ppCommaJoin d <> text "}>" + ppr (LMStaticPointer v) = ppr v + ppr (LMTrunc v t) + = ppr t <> text " trunc (" <> ppr v <> text " to " <> ppr t <> char ')' + ppr (LMBitc v t) + = ppr t <> text " bitcast (" <> ppr v <> text " to " <> ppr t <> char ')' + ppr (LMPtoI v t) + = ppr t <> text " ptrtoint (" <> ppr v <> text " to " <> ppr t <> char ')' + + ppr (LMAdd s1 s2) + = pprStaticArith s1 s2 (sLit "add") (sLit "fadd") "LMAdd" + ppr (LMSub s1 s2) + = pprStaticArith s1 s2 (sLit "sub") (sLit "fsub") "LMSub" + + +pprSpecialStatic :: LlvmStatic -> SDoc +pprSpecialStatic (LMBitc v t) = + ppr (pLower t) <> text ", bitcast (" <> ppr v <> text " to " <> ppr t + <> char ')' +pprSpecialStatic v@(LMStaticPointer x) = ppr (pLower $ getVarType x) <> comma <+> ppr v +pprSpecialStatic stat = ppr stat + + +pprStaticArith :: LlvmStatic -> LlvmStatic -> PtrString -> PtrString + -> String -> SDoc +pprStaticArith s1 s2 int_op float_op op_name = + let ty1 = getStatType s1 + op = if isFloat ty1 then float_op else int_op + in if ty1 == getStatType s2 + then ppr ty1 <+> ptext op <+> lparen <> ppr s1 <> comma <> ppr s2 <> rparen + else sdocWithDynFlags $ \dflags -> + error $ op_name ++ " with different types! s1: " + ++ showSDoc dflags (ppr s1) ++ ", s2: " ++ showSDoc dflags (ppr s2) + +-- ----------------------------------------------------------------------------- +-- ** Operations on LLVM Basic Types and Variables +-- + +-- | Return the variable name or value of the 'LlvmVar' +-- in Llvm IR textual representation (e.g. @\@x@, @%y@ or @42@). +ppName :: LlvmVar -> SDoc +ppName v@(LMGlobalVar {}) = char '@' <> ppPlainName v +ppName v@(LMLocalVar {}) = char '%' <> ppPlainName v +ppName v@(LMNLocalVar {}) = char '%' <> ppPlainName v +ppName v@(LMLitVar {}) = ppPlainName v + +-- | Return the variable name or value of the 'LlvmVar' +-- in a plain textual representation (e.g. @x@, @y@ or @42@). +ppPlainName :: LlvmVar -> SDoc +ppPlainName (LMGlobalVar x _ _ _ _ _) = ftext x +ppPlainName (LMLocalVar x LMLabel ) = text (show x) +ppPlainName (LMLocalVar x _ ) = text ('l' : show x) +ppPlainName (LMNLocalVar x _ ) = ftext x +ppPlainName (LMLitVar x ) = ppLit x + +-- | Print a literal value. No type. +ppLit :: LlvmLit -> SDoc +ppLit (LMIntLit i (LMInt 32)) = ppr (fromInteger i :: Int32) +ppLit (LMIntLit i (LMInt 64)) = ppr (fromInteger i :: Int64) +ppLit (LMIntLit i _ ) = ppr ((fromInteger i)::Int) +ppLit (LMFloatLit r LMFloat ) = ppFloat $ narrowFp r +ppLit (LMFloatLit r LMDouble) = ppDouble r +ppLit f@(LMFloatLit _ _) = sdocWithDynFlags (\dflags -> + error $ "Can't print this float literal!" ++ showSDoc dflags (ppr f)) +ppLit (LMVectorLit ls ) = char '<' <+> ppCommaJoin ls <+> char '>' +ppLit (LMNullLit _ ) = text "null" +-- #11487 was an issue where we passed undef for some arguments +-- that were actually live. By chance the registers holding those +-- arguments usually happened to have the right values anyways, but +-- that was not guaranteed. To find such bugs reliably, we set the +-- flag below when validating, which replaces undef literals (at +-- common types) with values that are likely to cause a crash or test +-- failure. +ppLit (LMUndefLit t ) = sdocWithDynFlags f + where f dflags + | gopt Opt_LlvmFillUndefWithGarbage dflags, + Just lit <- garbageLit t = ppLit lit + | otherwise = text "undef" + +garbageLit :: LlvmType -> Maybe LlvmLit +garbageLit t@(LMInt w) = Just (LMIntLit (0xbbbbbbbbbbbbbbb0 `mod` (2^w)) t) + -- Use a value that looks like an untagged pointer, so we are more + -- likely to try to enter it +garbageLit t + | isFloat t = Just (LMFloatLit 12345678.9 t) +garbageLit t@(LMPointer _) = Just (LMNullLit t) + -- Using null isn't totally ideal, since some functions may check for null. + -- But producing another value is inconvenient since it needs a cast, + -- and the knowledge for how to format casts is in PpLlvm. +garbageLit _ = Nothing + -- More cases could be added, but this should do for now. + +-- | Return the 'LlvmType' of the 'LlvmVar' +getVarType :: LlvmVar -> LlvmType +getVarType (LMGlobalVar _ y _ _ _ _) = y +getVarType (LMLocalVar _ y ) = y +getVarType (LMNLocalVar _ y ) = y +getVarType (LMLitVar l ) = getLitType l + +-- | Return the 'LlvmType' of a 'LlvmLit' +getLitType :: LlvmLit -> LlvmType +getLitType (LMIntLit _ t) = t +getLitType (LMFloatLit _ t) = t +getLitType (LMVectorLit []) = panic "getLitType" +getLitType (LMVectorLit ls) = LMVector (length ls) (getLitType (head ls)) +getLitType (LMNullLit t) = t +getLitType (LMUndefLit t) = t + +-- | Return the 'LlvmType' of the 'LlvmStatic' +getStatType :: LlvmStatic -> LlvmType +getStatType (LMStaticLit l ) = getLitType l +getStatType (LMUninitType t) = t +getStatType (LMStaticStr _ t) = t +getStatType (LMStaticArray _ t) = t +getStatType (LMStaticStruc _ t) = t +getStatType (LMStaticPointer v) = getVarType v +getStatType (LMTrunc _ t) = t +getStatType (LMBitc _ t) = t +getStatType (LMPtoI _ t) = t +getStatType (LMAdd t _) = getStatType t +getStatType (LMSub t _) = getStatType t +getStatType (LMComment _) = error "Can't call getStatType on LMComment!" + +-- | Return the 'LlvmLinkageType' for a 'LlvmVar' +getLink :: LlvmVar -> LlvmLinkageType +getLink (LMGlobalVar _ _ l _ _ _) = l +getLink _ = Internal + +-- | Add a pointer indirection to the supplied type. 'LMLabel' and 'LMVoid' +-- cannot be lifted. +pLift :: LlvmType -> LlvmType +pLift LMLabel = error "Labels are unliftable" +pLift LMVoid = error "Voids are unliftable" +pLift LMMetadata = error "Metadatas are unliftable" +pLift x = LMPointer x + +-- | Lift a variable to 'LMPointer' type. +pVarLift :: LlvmVar -> LlvmVar +pVarLift (LMGlobalVar s t l x a c) = LMGlobalVar s (pLift t) l x a c +pVarLift (LMLocalVar s t ) = LMLocalVar s (pLift t) +pVarLift (LMNLocalVar s t ) = LMNLocalVar s (pLift t) +pVarLift (LMLitVar _ ) = error $ "Can't lower a literal type!" + +-- | Remove the pointer indirection of the supplied type. Only 'LMPointer' +-- constructors can be lowered. +pLower :: LlvmType -> LlvmType +pLower (LMPointer x) = x +pLower x = pprPanic "llvmGen(pLower)" + $ ppr x <+> text " is a unlowerable type, need a pointer" + +-- | Lower a variable of 'LMPointer' type. +pVarLower :: LlvmVar -> LlvmVar +pVarLower (LMGlobalVar s t l x a c) = LMGlobalVar s (pLower t) l x a c +pVarLower (LMLocalVar s t ) = LMLocalVar s (pLower t) +pVarLower (LMNLocalVar s t ) = LMNLocalVar s (pLower t) +pVarLower (LMLitVar _ ) = error $ "Can't lower a literal type!" + +-- | Test if the given 'LlvmType' is an integer +isInt :: LlvmType -> Bool +isInt (LMInt _) = True +isInt _ = False + +-- | Test if the given 'LlvmType' is a floating point type +isFloat :: LlvmType -> Bool +isFloat LMFloat = True +isFloat LMDouble = True +isFloat LMFloat80 = True +isFloat LMFloat128 = True +isFloat _ = False + +-- | Test if the given 'LlvmType' is an 'LMPointer' construct +isPointer :: LlvmType -> Bool +isPointer (LMPointer _) = True +isPointer _ = False + +-- | Test if the given 'LlvmType' is an 'LMVector' construct +isVector :: LlvmType -> Bool +isVector (LMVector {}) = True +isVector _ = False + +-- | Test if a 'LlvmVar' is global. +isGlobal :: LlvmVar -> Bool +isGlobal (LMGlobalVar _ _ _ _ _ _) = True +isGlobal _ = False + +-- | Width in bits of an 'LlvmType', returns 0 if not applicable +llvmWidthInBits :: DynFlags -> LlvmType -> Int +llvmWidthInBits _ (LMInt n) = n +llvmWidthInBits _ (LMFloat) = 32 +llvmWidthInBits _ (LMDouble) = 64 +llvmWidthInBits _ (LMFloat80) = 80 +llvmWidthInBits _ (LMFloat128) = 128 +-- Could return either a pointer width here or the width of what +-- it points to. We will go with the former for now. +-- PMW: At least judging by the way LLVM outputs constants, pointers +-- should use the former, but arrays the latter. +llvmWidthInBits dflags (LMPointer _) = llvmWidthInBits dflags (llvmWord dflags) +llvmWidthInBits dflags (LMArray n t) = n * llvmWidthInBits dflags t +llvmWidthInBits dflags (LMVector n ty) = n * llvmWidthInBits dflags ty +llvmWidthInBits _ LMLabel = 0 +llvmWidthInBits _ LMVoid = 0 +llvmWidthInBits dflags (LMStruct tys) = sum $ map (llvmWidthInBits dflags) tys +llvmWidthInBits _ (LMStructU _) = + -- It's not trivial to calculate the bit width of the unpacked structs, + -- since they will be aligned depending on the specified datalayout ( + -- http://llvm.org/docs/LangRef.html#data-layout ). One way we could support + -- this could be to make the GHC.CmmToLlvm.Ppr.moduleLayout be a data type + -- that exposes the alignment information. However, currently the only place + -- we use unpacked structs is LLVM intrinsics that return them (e.g., + -- llvm.sadd.with.overflow.*), so we don't actually need to compute their + -- bit width. + panic "llvmWidthInBits: not implemented for LMStructU" +llvmWidthInBits _ (LMFunction _) = 0 +llvmWidthInBits dflags (LMAlias (_,t)) = llvmWidthInBits dflags t +llvmWidthInBits _ LMMetadata = panic "llvmWidthInBits: Meta-data has no runtime representation!" + + +-- ----------------------------------------------------------------------------- +-- ** Shortcut for Common Types +-- + +i128, i64, i32, i16, i8, i1, i8Ptr :: LlvmType +i128 = LMInt 128 +i64 = LMInt 64 +i32 = LMInt 32 +i16 = LMInt 16 +i8 = LMInt 8 +i1 = LMInt 1 +i8Ptr = pLift i8 + +-- | The target architectures word size +llvmWord, llvmWordPtr :: DynFlags -> LlvmType +llvmWord dflags = LMInt (wORD_SIZE dflags * 8) +llvmWordPtr dflags = pLift (llvmWord dflags) + +-- ----------------------------------------------------------------------------- +-- * LLVM Function Types +-- + +-- | An LLVM Function +data LlvmFunctionDecl = LlvmFunctionDecl { + -- | Unique identifier of the function + decName :: LMString, + -- | LinkageType of the function + funcLinkage :: LlvmLinkageType, + -- | The calling convention of the function + funcCc :: LlvmCallConvention, + -- | Type of the returned value + decReturnType :: LlvmType, + -- | Indicates if this function uses varargs + decVarargs :: LlvmParameterListType, + -- | Parameter types and attributes + decParams :: [LlvmParameter], + -- | Function align value, must be power of 2 + funcAlign :: LMAlign + } + deriving (Eq) + +instance Outputable LlvmFunctionDecl where + ppr (LlvmFunctionDecl n l c r varg p a) + = let align = case a of + Just a' -> text " align " <> ppr a' + Nothing -> empty + in ppr l <+> ppr c <+> ppr r <+> char '@' <> ftext n <> + lparen <> ppParams varg p <> rparen <> align + +type LlvmFunctionDecls = [LlvmFunctionDecl] + +type LlvmParameter = (LlvmType, [LlvmParamAttr]) + +-- | LLVM Parameter Attributes. +-- +-- Parameter attributes are used to communicate additional information about +-- the result or parameters of a function +data LlvmParamAttr + -- | This indicates to the code generator that the parameter or return value + -- should be zero-extended to a 32-bit value by the caller (for a parameter) + -- or the callee (for a return value). + = ZeroExt + -- | This indicates to the code generator that the parameter or return value + -- should be sign-extended to a 32-bit value by the caller (for a parameter) + -- or the callee (for a return value). + | SignExt + -- | This indicates that this parameter or return value should be treated in + -- a special target-dependent fashion during while emitting code for a + -- function call or return (usually, by putting it in a register as opposed + -- to memory). + | InReg + -- | This indicates that the pointer parameter should really be passed by + -- value to the function. + | ByVal + -- | This indicates that the pointer parameter specifies the address of a + -- structure that is the return value of the function in the source program. + | SRet + -- | This indicates that the pointer does not alias any global or any other + -- parameter. + | NoAlias + -- | This indicates that the callee does not make any copies of the pointer + -- that outlive the callee itself + | NoCapture + -- | This indicates that the pointer parameter can be excised using the + -- trampoline intrinsics. + | Nest + deriving (Eq) + +instance Outputable LlvmParamAttr where + ppr ZeroExt = text "zeroext" + ppr SignExt = text "signext" + ppr InReg = text "inreg" + ppr ByVal = text "byval" + ppr SRet = text "sret" + ppr NoAlias = text "noalias" + ppr NoCapture = text "nocapture" + ppr Nest = text "nest" + +-- | Llvm Function Attributes. +-- +-- Function attributes are set to communicate additional information about a +-- function. Function attributes are considered to be part of the function, +-- not of the function type, so functions with different parameter attributes +-- can have the same function type. Functions can have multiple attributes. +-- +-- Descriptions taken from <http://llvm.org/docs/LangRef.html#fnattrs> +data LlvmFuncAttr + -- | This attribute indicates that the inliner should attempt to inline this + -- function into callers whenever possible, ignoring any active inlining + -- size threshold for this caller. + = AlwaysInline + -- | This attribute indicates that the source code contained a hint that + -- inlining this function is desirable (such as the \"inline\" keyword in + -- C/C++). It is just a hint; it imposes no requirements on the inliner. + | InlineHint + -- | This attribute indicates that the inliner should never inline this + -- function in any situation. This attribute may not be used together + -- with the alwaysinline attribute. + | NoInline + -- | This attribute suggests that optimization passes and code generator + -- passes make choices that keep the code size of this function low, and + -- otherwise do optimizations specifically to reduce code size. + | OptSize + -- | This function attribute indicates that the function never returns + -- normally. This produces undefined behavior at runtime if the function + -- ever does dynamically return. + | NoReturn + -- | This function attribute indicates that the function never returns with + -- an unwind or exceptional control flow. If the function does unwind, its + -- runtime behavior is undefined. + | NoUnwind + -- | This attribute indicates that the function computes its result (or + -- decides to unwind an exception) based strictly on its arguments, without + -- dereferencing any pointer arguments or otherwise accessing any mutable + -- state (e.g. memory, control registers, etc) visible to caller functions. + -- It does not write through any pointer arguments (including byval + -- arguments) and never changes any state visible to callers. This means + -- that it cannot unwind exceptions by calling the C++ exception throwing + -- methods, but could use the unwind instruction. + | ReadNone + -- | This attribute indicates that the function does not write through any + -- pointer arguments (including byval arguments) or otherwise modify any + -- state (e.g. memory, control registers, etc) visible to caller functions. + -- It may dereference pointer arguments and read state that may be set in + -- the caller. A readonly function always returns the same value (or unwinds + -- an exception identically) when called with the same set of arguments and + -- global state. It cannot unwind an exception by calling the C++ exception + -- throwing methods, but may use the unwind instruction. + | ReadOnly + -- | This attribute indicates that the function should emit a stack smashing + -- protector. It is in the form of a \"canary\"—a random value placed on the + -- stack before the local variables that's checked upon return from the + -- function to see if it has been overwritten. A heuristic is used to + -- determine if a function needs stack protectors or not. + -- + -- If a function that has an ssp attribute is inlined into a function that + -- doesn't have an ssp attribute, then the resulting function will have an + -- ssp attribute. + | Ssp + -- | This attribute indicates that the function should always emit a stack + -- smashing protector. This overrides the ssp function attribute. + -- + -- If a function that has an sspreq attribute is inlined into a function + -- that doesn't have an sspreq attribute or which has an ssp attribute, + -- then the resulting function will have an sspreq attribute. + | SspReq + -- | This attribute indicates that the code generator should not use a red + -- zone, even if the target-specific ABI normally permits it. + | NoRedZone + -- | This attributes disables implicit floating point instructions. + | NoImplicitFloat + -- | This attribute disables prologue / epilogue emission for the function. + -- This can have very system-specific consequences. + | Naked + deriving (Eq) + +instance Outputable LlvmFuncAttr where + ppr AlwaysInline = text "alwaysinline" + ppr InlineHint = text "inlinehint" + ppr NoInline = text "noinline" + ppr OptSize = text "optsize" + ppr NoReturn = text "noreturn" + ppr NoUnwind = text "nounwind" + ppr ReadNone = text "readnone" + ppr ReadOnly = text "readonly" + ppr Ssp = text "ssp" + ppr SspReq = text "ssqreq" + ppr NoRedZone = text "noredzone" + ppr NoImplicitFloat = text "noimplicitfloat" + ppr Naked = text "naked" + + +-- | Different types to call a function. +data LlvmCallType + -- | Normal call, allocate a new stack frame. + = StdCall + -- | Tail call, perform the call in the current stack frame. + | TailCall + deriving (Eq,Show) + +-- | Different calling conventions a function can use. +data LlvmCallConvention + -- | The C calling convention. + -- This calling convention (the default if no other calling convention is + -- specified) matches the target C calling conventions. This calling + -- convention supports varargs function calls and tolerates some mismatch in + -- the declared prototype and implemented declaration of the function (as + -- does normal C). + = CC_Ccc + -- | This calling convention attempts to make calls as fast as possible + -- (e.g. by passing things in registers). This calling convention allows + -- the target to use whatever tricks it wants to produce fast code for the + -- target, without having to conform to an externally specified ABI + -- (Application Binary Interface). Implementations of this convention should + -- allow arbitrary tail call optimization to be supported. This calling + -- convention does not support varargs and requires the prototype of al + -- callees to exactly match the prototype of the function definition. + | CC_Fastcc + -- | This calling convention attempts to make code in the caller as efficient + -- as possible under the assumption that the call is not commonly executed. + -- As such, these calls often preserve all registers so that the call does + -- not break any live ranges in the caller side. This calling convention + -- does not support varargs and requires the prototype of all callees to + -- exactly match the prototype of the function definition. + | CC_Coldcc + -- | The GHC-specific 'registerised' calling convention. + | CC_Ghc + -- | Any calling convention may be specified by number, allowing + -- target-specific calling conventions to be used. Target specific calling + -- conventions start at 64. + | CC_Ncc Int + -- | X86 Specific 'StdCall' convention. LLVM includes a specific alias for it + -- rather than just using CC_Ncc. + | CC_X86_Stdcc + deriving (Eq) + +instance Outputable LlvmCallConvention where + ppr CC_Ccc = text "ccc" + ppr CC_Fastcc = text "fastcc" + ppr CC_Coldcc = text "coldcc" + ppr CC_Ghc = text "ghccc" + ppr (CC_Ncc i) = text "cc " <> ppr i + ppr CC_X86_Stdcc = text "x86_stdcallcc" + + +-- | Functions can have a fixed amount of parameters, or a variable amount. +data LlvmParameterListType + -- Fixed amount of arguments. + = FixedArgs + -- Variable amount of arguments. + | VarArgs + deriving (Eq,Show) + + +-- | Linkage type of a symbol. +-- +-- The description of the constructors is copied from the Llvm Assembly Language +-- Reference Manual <http://www.llvm.org/docs/LangRef.html#linkage>, because +-- they correspond to the Llvm linkage types. +data LlvmLinkageType + -- | Global values with internal linkage are only directly accessible by + -- objects in the current module. In particular, linking code into a module + -- with an internal global value may cause the internal to be renamed as + -- necessary to avoid collisions. Because the symbol is internal to the + -- module, all references can be updated. This corresponds to the notion + -- of the @static@ keyword in C. + = Internal + -- | Globals with @linkonce@ linkage are merged with other globals of the + -- same name when linkage occurs. This is typically used to implement + -- inline functions, templates, or other code which must be generated + -- in each translation unit that uses it. Unreferenced linkonce globals are + -- allowed to be discarded. + | LinkOnce + -- | @weak@ linkage is exactly the same as linkonce linkage, except that + -- unreferenced weak globals may not be discarded. This is used for globals + -- that may be emitted in multiple translation units, but that are not + -- guaranteed to be emitted into every translation unit that uses them. One + -- example of this are common globals in C, such as @int X;@ at global + -- scope. + | Weak + -- | @appending@ linkage may only be applied to global variables of pointer + -- to array type. When two global variables with appending linkage are + -- linked together, the two global arrays are appended together. This is + -- the Llvm, typesafe, equivalent of having the system linker append + -- together @sections@ with identical names when .o files are linked. + | Appending + -- | The semantics of this linkage follow the ELF model: the symbol is weak + -- until linked, if not linked, the symbol becomes null instead of being an + -- undefined reference. + | ExternWeak + -- | The symbol participates in linkage and can be used to resolve external + -- symbol references. + | ExternallyVisible + -- | Alias for 'ExternallyVisible' but with explicit textual form in LLVM + -- assembly. + | External + -- | Symbol is private to the module and should not appear in the symbol table + | Private + deriving (Eq) + +instance Outputable LlvmLinkageType where + ppr Internal = text "internal" + ppr LinkOnce = text "linkonce" + ppr Weak = text "weak" + ppr Appending = text "appending" + ppr ExternWeak = text "extern_weak" + -- ExternallyVisible does not have a textual representation, it is + -- the linkage type a function resolves to if no other is specified + -- in Llvm. + ppr ExternallyVisible = empty + ppr External = text "external" + ppr Private = text "private" + +-- ----------------------------------------------------------------------------- +-- * LLVM Operations +-- + +-- | Llvm binary operators machine operations. +data LlvmMachOp + = LM_MO_Add -- ^ add two integer, floating point or vector values. + | LM_MO_Sub -- ^ subtract two ... + | LM_MO_Mul -- ^ multiply .. + | LM_MO_UDiv -- ^ unsigned integer or vector division. + | LM_MO_SDiv -- ^ signed integer .. + | LM_MO_URem -- ^ unsigned integer or vector remainder (mod) + | LM_MO_SRem -- ^ signed ... + + | LM_MO_FAdd -- ^ add two floating point or vector values. + | LM_MO_FSub -- ^ subtract two ... + | LM_MO_FMul -- ^ multiply ... + | LM_MO_FDiv -- ^ divide ... + | LM_MO_FRem -- ^ remainder ... + + -- | Left shift + | LM_MO_Shl + -- | Logical shift right + -- Shift right, filling with zero + | LM_MO_LShr + -- | Arithmetic shift right + -- The most significant bits of the result will be equal to the sign bit of + -- the left operand. + | LM_MO_AShr + + | LM_MO_And -- ^ AND bitwise logical operation. + | LM_MO_Or -- ^ OR bitwise logical operation. + | LM_MO_Xor -- ^ XOR bitwise logical operation. + deriving (Eq) + +instance Outputable LlvmMachOp where + ppr LM_MO_Add = text "add" + ppr LM_MO_Sub = text "sub" + ppr LM_MO_Mul = text "mul" + ppr LM_MO_UDiv = text "udiv" + ppr LM_MO_SDiv = text "sdiv" + ppr LM_MO_URem = text "urem" + ppr LM_MO_SRem = text "srem" + ppr LM_MO_FAdd = text "fadd" + ppr LM_MO_FSub = text "fsub" + ppr LM_MO_FMul = text "fmul" + ppr LM_MO_FDiv = text "fdiv" + ppr LM_MO_FRem = text "frem" + ppr LM_MO_Shl = text "shl" + ppr LM_MO_LShr = text "lshr" + ppr LM_MO_AShr = text "ashr" + ppr LM_MO_And = text "and" + ppr LM_MO_Or = text "or" + ppr LM_MO_Xor = text "xor" + + +-- | Llvm compare operations. +data LlvmCmpOp + = LM_CMP_Eq -- ^ Equal (Signed and Unsigned) + | LM_CMP_Ne -- ^ Not equal (Signed and Unsigned) + | LM_CMP_Ugt -- ^ Unsigned greater than + | LM_CMP_Uge -- ^ Unsigned greater than or equal + | LM_CMP_Ult -- ^ Unsigned less than + | LM_CMP_Ule -- ^ Unsigned less than or equal + | LM_CMP_Sgt -- ^ Signed greater than + | LM_CMP_Sge -- ^ Signed greater than or equal + | LM_CMP_Slt -- ^ Signed less than + | LM_CMP_Sle -- ^ Signed less than or equal + + -- Float comparisons. GHC uses a mix of ordered and unordered float + -- comparisons. + | LM_CMP_Feq -- ^ Float equal + | LM_CMP_Fne -- ^ Float not equal + | LM_CMP_Fgt -- ^ Float greater than + | LM_CMP_Fge -- ^ Float greater than or equal + | LM_CMP_Flt -- ^ Float less than + | LM_CMP_Fle -- ^ Float less than or equal + deriving (Eq) + +instance Outputable LlvmCmpOp where + ppr LM_CMP_Eq = text "eq" + ppr LM_CMP_Ne = text "ne" + ppr LM_CMP_Ugt = text "ugt" + ppr LM_CMP_Uge = text "uge" + ppr LM_CMP_Ult = text "ult" + ppr LM_CMP_Ule = text "ule" + ppr LM_CMP_Sgt = text "sgt" + ppr LM_CMP_Sge = text "sge" + ppr LM_CMP_Slt = text "slt" + ppr LM_CMP_Sle = text "sle" + ppr LM_CMP_Feq = text "oeq" + ppr LM_CMP_Fne = text "une" + ppr LM_CMP_Fgt = text "ogt" + ppr LM_CMP_Fge = text "oge" + ppr LM_CMP_Flt = text "olt" + ppr LM_CMP_Fle = text "ole" + + +-- | Llvm cast operations. +data LlvmCastOp + = LM_Trunc -- ^ Integer truncate + | LM_Zext -- ^ Integer extend (zero fill) + | LM_Sext -- ^ Integer extend (sign fill) + | LM_Fptrunc -- ^ Float truncate + | LM_Fpext -- ^ Float extend + | LM_Fptoui -- ^ Float to unsigned Integer + | LM_Fptosi -- ^ Float to signed Integer + | LM_Uitofp -- ^ Unsigned Integer to Float + | LM_Sitofp -- ^ Signed Int to Float + | LM_Ptrtoint -- ^ Pointer to Integer + | LM_Inttoptr -- ^ Integer to Pointer + | LM_Bitcast -- ^ Cast between types where no bit manipulation is needed + deriving (Eq) + +instance Outputable LlvmCastOp where + ppr LM_Trunc = text "trunc" + ppr LM_Zext = text "zext" + ppr LM_Sext = text "sext" + ppr LM_Fptrunc = text "fptrunc" + ppr LM_Fpext = text "fpext" + ppr LM_Fptoui = text "fptoui" + ppr LM_Fptosi = text "fptosi" + ppr LM_Uitofp = text "uitofp" + ppr LM_Sitofp = text "sitofp" + ppr LM_Ptrtoint = text "ptrtoint" + ppr LM_Inttoptr = text "inttoptr" + ppr LM_Bitcast = text "bitcast" + + +-- ----------------------------------------------------------------------------- +-- * Floating point conversion +-- + +-- | Convert a Haskell Double to an LLVM hex encoded floating point form. In +-- Llvm float literals can be printed in a big-endian hexadecimal format, +-- regardless of underlying architecture. +-- +-- See Note [LLVM Float Types]. +ppDouble :: Double -> SDoc +ppDouble d + = let bs = doubleToBytes d + hex d' = case showHex d' "" of + [] -> error "dToStr: too few hex digits for float" + [x] -> ['0',x] + [x,y] -> [x,y] + _ -> error "dToStr: too many hex digits for float" + + in sdocWithDynFlags (\dflags -> + let fixEndian = if wORDS_BIGENDIAN dflags then id else reverse + str = map toUpper $ concat $ fixEndian $ map hex bs + in text "0x" <> text str) + +-- Note [LLVM Float Types] +-- ~~~~~~~~~~~~~~~~~~~~~~~ +-- We use 'ppDouble' for both printing Float and Double floating point types. This is +-- as LLVM expects all floating point constants (single & double) to be in IEEE +-- 754 Double precision format. However, for single precision numbers (Float) +-- they should be *representable* in IEEE 754 Single precision format. So the +-- easiest way to do this is to narrow and widen again. +-- (i.e., Double -> Float -> Double). We must be careful doing this that GHC +-- doesn't optimize that away. + +-- Note [narrowFp & widenFp] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~ +-- NOTE: we use float2Double & co directly as GHC likes to optimize away +-- successive calls of 'realToFrac', defeating the narrowing. (Bug #7600). +-- 'realToFrac' has inconsistent behaviour with optimisation as well that can +-- also cause issues, these methods don't. + +narrowFp :: Double -> Float +{-# NOINLINE narrowFp #-} +narrowFp = double2Float + +widenFp :: Float -> Double +{-# NOINLINE widenFp #-} +widenFp = float2Double + +ppFloat :: Float -> SDoc +ppFloat = ppDouble . widenFp + + +-------------------------------------------------------------------------------- +-- * Misc functions +-------------------------------------------------------------------------------- + +ppCommaJoin :: (Outputable a) => [a] -> SDoc +ppCommaJoin strs = hsep $ punctuate comma (map ppr strs) + +ppSpaceJoin :: (Outputable a) => [a] -> SDoc +ppSpaceJoin strs = hsep (map ppr strs) |