diff options
author | David Terei <davidterei@gmail.com> | 2010-06-15 09:47:14 +0000 |
---|---|---|
committer | David Terei <davidterei@gmail.com> | 2010-06-15 09:47:14 +0000 |
commit | 49a8e5c021009430d373d6224b29004c7d18c408 (patch) | |
tree | 5e49c02cc6ad756d92ef71d4ab16338b278352a6 | |
parent | 0c41772cba7ec3f558cd2619716c7db771eae935 (diff) | |
download | haskell-49a8e5c021009430d373d6224b29004c7d18c408.tar.gz |
Add new LLVM code generator to GHC. (Version 2)
This was done as part of an honours thesis at UNSW, the paper describing the
work and results can be found at:
http://www.cse.unsw.edu.au/~pls/thesis/davidt-thesis.pdf
A Homepage for the backend can be found at:
http://hackage.haskell.org/trac/ghc/wiki/Commentary/Compiler/Backends/LLVM
Quick summary of performance is that for the 'nofib' benchmark suite, runtimes
are within 5% slower than the NCG and generally better than the C code
generator. For some code though, such as the DPH projects benchmark, the LLVM
code generator outperforms the NCG and C code generator by about a 25%
reduction in run times.
33 files changed, 3402 insertions, 226 deletions
diff --git a/compiler/cmm/Cmm.hs b/compiler/cmm/Cmm.hs index c48269ecf5..5c026227bf 100644 --- a/compiler/cmm/Cmm.hs +++ b/compiler/cmm/Cmm.hs @@ -181,6 +181,7 @@ data ClosureTypeInfo data CmmReturnInfo = CmmMayReturn | CmmNeverReturns + deriving ( Eq ) -- TODO: These types may need refinement data ProfilingInfo = ProfilingInfo CmmLit CmmLit -- closure_type, closure_desc diff --git a/compiler/cmm/CmmUtils.hs b/compiler/cmm/CmmUtils.hs index e01d8f5cba..69320a2f66 100644 --- a/compiler/cmm/CmmUtils.hs +++ b/compiler/cmm/CmmUtils.hs @@ -6,7 +6,7 @@ -- ----------------------------------------------------------------------------- -module CmmUtils( +module CmmUtils( CmmStmts, noStmts, oneStmt, mkStmts, plusStmts, stmtList, isNopStmt, diff --git a/compiler/codeGen/CgForeignCall.hs b/compiler/codeGen/CgForeignCall.hs index 901dd96502..ce689c42f7 100644 --- a/compiler/codeGen/CgForeignCall.hs +++ b/compiler/codeGen/CgForeignCall.hs @@ -122,9 +122,10 @@ emitForeignCall' safety results target args vols _srt ret | not (playSafe safety) = do temp_args <- load_args_into_temps args let (caller_save, caller_load) = callerSaveVolatileRegs vols + let caller_load' = if ret == CmmNeverReturns then [] else caller_load stmtsC caller_save stmtC (CmmCall target results temp_args CmmUnsafe ret) - stmtsC caller_load + stmtsC caller_load' | otherwise = do -- Both 'id' and 'new_base' are GCKindNonPtr because they're diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs index f8b41a087a..d22fee1e75 100644 --- a/compiler/codeGen/CgUtils.hs +++ b/compiler/codeGen/CgUtils.hs @@ -26,6 +26,7 @@ module CgUtils ( tagToClosure, callerSaveVolatileRegs, get_GlobalReg_addr, + activeStgRegs, fixStgRegisters, cmmAndWord, cmmOrWord, cmmNegate, cmmEqWord, cmmNeWord, cmmUGtWord, @@ -423,33 +424,6 @@ callerSaveVolatileRegs vols = (caller_save, caller_load) : next | otherwise = next --- ----------------------------------------------------------------------------- --- Global registers - --- We map STG registers onto appropriate CmmExprs. Either they map --- to real machine registers or stored as offsets from BaseReg. Given --- a GlobalReg, get_GlobalReg_addr always produces the --- register table address for it. --- (See also get_GlobalReg_reg_or_addr in MachRegs) - -get_GlobalReg_addr :: GlobalReg -> CmmExpr -get_GlobalReg_addr BaseReg = regTableOffset 0 -get_GlobalReg_addr mid = get_Regtable_addr_from_offset - (globalRegType mid) (baseRegOffset mid) - --- Calculate a literal representing an offset into the register table. --- Used when we don't have an actual BaseReg to offset from. -regTableOffset n = - CmmLit (CmmLabelOff mkMainCapabilityLabel (oFFSET_Capability_r + n)) - -get_Regtable_addr_from_offset :: CmmType -> Int -> CmmExpr -get_Regtable_addr_from_offset rep offset = -#ifdef REG_Base - CmmRegOff (CmmGlobal BaseReg) offset -#else - regTableOffset offset -#endif - -- | Returns @True@ if this global register is stored in a caller-saves -- machine register. @@ -1010,3 +984,181 @@ clHasCafRefs (ClosureInfo {closureSRT = srt}) = case srt of NoC_SRT -> NoCafRefs _ -> MayHaveCafRefs clHasCafRefs (ConInfo {}) = NoCafRefs + +-- ----------------------------------------------------------------------------- +-- +-- STG/Cmm GlobalReg +-- +-- ----------------------------------------------------------------------------- + +-- | Here is where the STG register map is defined for each target arch. +-- The order matters (for the llvm backend anyway)! We must make sure to +-- maintain the order here with the order used in the LLVM calling conventions. +-- Note that also, this isn't all registers, just the ones that are currently +-- possbily mapped to real registers. +activeStgRegs :: [GlobalReg] +activeStgRegs = [ +#ifdef REG_Base + BaseReg +#endif +#ifdef REG_Sp + ,Sp +#endif +#ifdef REG_Hp + ,Hp +#endif +#ifdef REG_R1 + ,VanillaReg 1 VGcPtr +#endif +#ifdef REG_R2 + ,VanillaReg 2 VGcPtr +#endif +#ifdef REG_R3 + ,VanillaReg 3 VGcPtr +#endif +#ifdef REG_R4 + ,VanillaReg 4 VGcPtr +#endif +#ifdef REG_R5 + ,VanillaReg 5 VGcPtr +#endif +#ifdef REG_R6 + ,VanillaReg 6 VGcPtr +#endif +#ifdef REG_R7 + ,VanillaReg 7 VGcPtr +#endif +#ifdef REG_R8 + ,VanillaReg 8 VGcPtr +#endif +#ifdef REG_SpLim + ,SpLim +#endif +#ifdef REG_F1 + ,FloatReg 1 +#endif +#ifdef REG_F2 + ,FloatReg 2 +#endif +#ifdef REG_F3 + ,FloatReg 3 +#endif +#ifdef REG_F4 + ,FloatReg 4 +#endif +#ifdef REG_D1 + ,DoubleReg 1 +#endif +#ifdef REG_D2 + ,DoubleReg 2 +#endif + ] + +-- | We map STG registers onto appropriate CmmExprs. Either they map +-- to real machine registers or stored as offsets from BaseReg. Given +-- a GlobalReg, get_GlobalReg_addr always produces the +-- register table address for it. +get_GlobalReg_addr :: GlobalReg -> CmmExpr +get_GlobalReg_addr BaseReg = regTableOffset 0 +get_GlobalReg_addr mid = get_Regtable_addr_from_offset + (globalRegType mid) (baseRegOffset mid) + +-- Calculate a literal representing an offset into the register table. +-- Used when we don't have an actual BaseReg to offset from. +regTableOffset n = + CmmLit (CmmLabelOff mkMainCapabilityLabel (oFFSET_Capability_r + n)) + +get_Regtable_addr_from_offset :: CmmType -> Int -> CmmExpr +get_Regtable_addr_from_offset rep offset = +#ifdef REG_Base + CmmRegOff (CmmGlobal BaseReg) offset +#else + regTableOffset offset +#endif + +-- | Fixup global registers so that they assign to locations within the +-- RegTable if they aren't pinned for the current target. +fixStgRegisters :: RawCmmTop -> RawCmmTop +fixStgRegisters top@(CmmData _ _) = top + +fixStgRegisters (CmmProc info lbl params (ListGraph blocks)) = + let blocks' = map fixStgRegBlock blocks + in CmmProc info lbl params $ ListGraph blocks' + +fixStgRegBlock :: CmmBasicBlock -> CmmBasicBlock +fixStgRegBlock (BasicBlock id stmts) = + let stmts' = map fixStgRegStmt stmts + in BasicBlock id stmts' + +fixStgRegStmt :: CmmStmt -> CmmStmt +fixStgRegStmt stmt + = case stmt of + CmmAssign (CmmGlobal reg) src -> + let src' = fixStgRegExpr src + baseAddr = get_GlobalReg_addr reg + in case reg `elem` activeStgRegs of + True -> CmmAssign (CmmGlobal reg) src' + False -> CmmStore baseAddr src' + + CmmAssign reg src -> + let src' = fixStgRegExpr src + in CmmAssign reg src' + + CmmStore addr src -> CmmStore (fixStgRegExpr addr) (fixStgRegExpr src) + + CmmCall target regs args srt returns -> + let target' = case target of + CmmCallee e conv -> CmmCallee (fixStgRegExpr e) conv + other -> other + args' = map (\(CmmHinted arg hint) -> + (CmmHinted (fixStgRegExpr arg) hint)) args + in CmmCall target' regs args' srt returns + + CmmCondBranch test dest -> CmmCondBranch (fixStgRegExpr test) dest + + CmmSwitch expr ids -> CmmSwitch (fixStgRegExpr expr) ids + + CmmJump addr regs -> CmmJump (fixStgRegExpr addr) regs + + -- CmmNop, CmmComment, CmmBranch, CmmReturn + _other -> stmt + + +fixStgRegExpr :: CmmExpr -> CmmExpr +fixStgRegExpr expr + = case expr of + CmmLoad addr ty -> CmmLoad (fixStgRegExpr addr) ty + + CmmMachOp mop args -> CmmMachOp mop args' + where args' = map fixStgRegExpr args + + CmmReg (CmmGlobal reg) -> + -- Replace register leaves with appropriate StixTrees for + -- the given target. MagicIds which map to a reg on this + -- arch are left unchanged. For the rest, BaseReg is taken + -- to mean the address of the reg table in MainCapability, + -- and for all others we generate an indirection to its + -- location in the register table. + case reg `elem` activeStgRegs of + True -> expr + False -> + let baseAddr = get_GlobalReg_addr reg + in case reg of + BaseReg -> fixStgRegExpr baseAddr + _other -> fixStgRegExpr + (CmmLoad baseAddr (globalRegType reg)) + + CmmRegOff (CmmGlobal reg) offset -> + -- RegOf leaves are just a shorthand form. If the reg maps + -- to a real reg, we keep the shorthand, otherwise, we just + -- expand it and defer to the above code. + case reg `elem` activeStgRegs of + True -> expr + False -> fixStgRegExpr (CmmMachOp (MO_Add wordWidth) [ + CmmReg (CmmGlobal reg), + CmmLit (CmmInt (fromIntegral offset) + wordWidth)]) + + -- CmmLit, CmmReg (CmmLocal), CmmStackSlot + _other -> expr + diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in index 448d27bead..479e56d22e 100644 --- a/compiler/ghc.cabal.in +++ b/compiler/ghc.cabal.in @@ -130,7 +130,9 @@ Library ghci hsSyn iface + llvmGen main + nativeGen parser prelude profiling @@ -153,6 +155,16 @@ Library Id IdInfo Literal + Llvm + Llvm.AbsSyn + Llvm.PpLlvm + Llvm.Types + LlvmCodeGen + LlvmCodeGen.Base + LlvmCodeGen.CodeGen + LlvmCodeGen.Data + LlvmCodeGen.Ppr + LlvmCodeGen.Regs MkId Module Name @@ -198,6 +210,7 @@ Library MkZipCfg MkZipCfgCmm OptimizationFuel + PprBase PprC PprCmm PprCmmZ @@ -447,10 +460,9 @@ Library VectUtils Vectorise + -- We only need to expose more modules as some of the ncg code is used + -- by the LLVM backend so its always included if flag(ncg) - hs-source-dirs: - nativeGen - Exposed-Modules: AsmCodeGen TargetReg @@ -459,7 +471,6 @@ Library Size Reg RegClass - PprBase PIC Platform Alpha.Regs diff --git a/compiler/ghc.mk b/compiler/ghc.mk index 1c61494b18..9cbacf4bd4 100644 --- a/compiler/ghc.mk +++ b/compiler/ghc.mk @@ -43,6 +43,12 @@ compiler/stage2/package-data.mk : $(compiler_CONFIG_HS) compiler/stage3/package-data.mk : $(compiler_CONFIG_HS) endif +ifeq "$(GhcEnableTablesNextToCode)" "NO" +GhcWithLlvmCodeGen = YES +else +GhcWithLlvmCodeGen = NO +endif + $(compiler_CONFIG_HS) : mk/config.mk mk/project.mk "$(RM)" $(RM_OPTS) $@ @echo "Creating $@ ... " @@ -67,6 +73,8 @@ $(compiler_CONFIG_HS) : mk/config.mk mk/project.mk @echo "cGhcWithInterpreter = \"$(GhcWithInterpreter)\"" >> $@ @echo "cGhcWithNativeCodeGen :: String" >> $@ @echo "cGhcWithNativeCodeGen = \"$(GhcWithNativeCodeGen)\"" >> $@ + @echo "cGhcWithLlvmCodeGen :: String" >> $@ + @echo "cGhcWithLlvmCodeGen = \"$(GhcWithLlvmCodeGen)\"" >> $@ @echo "cGhcWithSMP :: String" >> $@ @echo "cGhcWithSMP = \"$(GhcWithSMP)\"" >> $@ @echo "cGhcRTSWays :: String" >> $@ @@ -313,7 +321,7 @@ ifeq "$(GhcEnableTablesNextToCode) $(GhcUnregisterised)" "YES NO" # or not? # XXX This should logically be a CPP option, but there doesn't seem to # be a flag for that -compiler_stage2_CONFIGURE_OPTS += --ghc-option=-DGHCI_TABLES_NEXT_TO_CODE +compiler_CONFIGURE_OPTS += --ghc-option=-DGHCI_TABLES_NEXT_TO_CODE endif # Should the debugger commands be enabled? @@ -338,6 +346,8 @@ ifeq "$(HOSTPLATFORM)" "ia64-unknown-linux" # needed for generating proper relocation in large binaries: trac #856 compiler_CONFIGURE_OPTS += --ld-option=-Wl,--relax endif +else +compiler_CONFIGURE_OPTS += --ghc-option=-DNO_REGS endif # We need to turn on profiling either if we have been asked to diff --git a/compiler/llvmGen/Llvm.hs b/compiler/llvmGen/Llvm.hs new file mode 100644 index 0000000000..7a322bd86f --- /dev/null +++ b/compiler/llvmGen/Llvm.hs @@ -0,0 +1,52 @@ +-- ---------------------------------------------------------------------------- +-- | This module supplies bindings to generate Llvm IR from Haskell +-- (<http://www.llvm.org/docs/LangRef.html>). +-- +-- Note: this module is developed in a demand driven way. It is no complete +-- LLVM binding library in Haskell, but enough to generate code for GHC. +-- +-- This code is derived from code taken from the Essential Haskell Compiler +-- (EHC) project (<http://www.cs.uu.nl/wiki/Ehc/WebHome>). +-- + +module Llvm ( + + -- * Modules, Functions and Blocks + LlvmModule(..), + + LlvmFunction(..), LlvmFunctionDecl(..), + LlvmFunctions, LlvmFunctionDecls, + LlvmStatement(..), LlvmExpression(..), + LlvmBlocks, LlvmBlock(..), LlvmBlockId, + + -- * Call Handling + LlvmCallConvention(..), LlvmCallType(..), LlvmParameterListType(..), + LlvmLinkageType(..), LlvmFuncAttr(..), + + -- * Operations and Comparisons + LlvmCmpOp(..), LlvmMachOp(..), LlvmCastOp(..), + + -- * Variables and Type System + LlvmVar(..), LlvmStatic(..), LlvmLit(..), LlvmType(..), + LMGlobal, LMString, LMConstant, + + -- ** Some basic types + i64, i32, i16, i8, i1, llvmWord, llvmWordPtr, + + -- ** Operations on the type system. + isGlobal, getLitType, getLit, getName, getPlainName, getVarType, + getStatType, getGlobalVar, getGlobalType, pVarLower, pLift, pLower, + isInt, isFloat, isPointer, llvmWidthInBits, + + -- * Pretty Printing + ppLlvmModule, ppLlvmComments, ppLlvmComment, ppLlvmConstants, + ppLlvmConstant, ppLlvmGlobals, ppLlvmGlobal, ppLlvmFunctionDecls, + ppLlvmFunctionDecl, ppLlvmFunctions, ppLlvmFunction, ppLlvmType, + ppLlvmTypes, llvmSDoc + + ) where + +import Llvm.AbsSyn +import Llvm.PpLlvm +import Llvm.Types + diff --git a/compiler/llvmGen/Llvm/AbsSyn.hs b/compiler/llvmGen/Llvm/AbsSyn.hs new file mode 100644 index 0000000000..1b8527b31f --- /dev/null +++ b/compiler/llvmGen/Llvm/AbsSyn.hs @@ -0,0 +1,209 @@ +-------------------------------------------------------------------------------- +-- | The LLVM abstract syntax. +-- + +module Llvm.AbsSyn where + +import Llvm.Types + +import Unique + +-- | Block labels +type LlvmBlockId = Unique + +-- | A block of LLVM code. +data LlvmBlock = LlvmBlock { + -- | The code label for this block + blockLabel :: LlvmBlockId, + + -- | A list of LlvmStatement's representing the code for this block. + -- This list must end with a control flow statement. + blockStmts :: [LlvmStatement] + } + +type LlvmBlocks = [LlvmBlock] + +-- | An LLVM Module. This is a top level contianer in LLVM. +data LlvmModule = LlvmModule { + -- | Comments to include at the start of the module. + modComments :: [LMString], + + -- | Constants to include in the module. + modConstants :: [LMConstant], + + -- | Global variables to include in the module. + modGlobals :: [LMGlobal], + + -- | LLVM Functions used in this module but defined in other modules. + modFwdDecls :: LlvmFunctionDecls, + + -- | LLVM Functions defined in this module. + modFuncs :: LlvmFunctions + } + +-- | An LLVM Function +data LlvmFunction = LlvmFunction { + -- | The signature of this declared function. + funcDecl :: LlvmFunctionDecl, + + -- | The function attributes. + funcAttrs :: [LlvmFuncAttr], + + -- | The body of the functions. + funcBody :: LlvmBlocks + } + +type LlvmFunctions = [LlvmFunction] + + +-- | Llvm Statements +data LlvmStatement + {- | + Assign an expression to an variable: + * dest: Variable to assign to + * source: Source expression + -} + = Assignment LlvmVar LlvmExpression + + {- | + Always branch to the target label + -} + | Branch LlvmVar + + {- | + Branch to label targetTrue if cond is true otherwise to label targetFalse + * cond: condition that will be tested, must be of type i1 + * targetTrue: label to branch to if cond is true + * targetFalse: label to branch to if cond is false + -} + | BranchIf LlvmVar LlvmVar LlvmVar + + {- | + Comment + Plain comment. + -} + | Comment [LMString] + + {- | + Set a label on this position. + * name: Identifier of this label, unique for this module + -} + | MkLabel LlvmBlockId + + {- | + Store variable value in pointer ptr. If value is of type t then ptr must + be of type t*. + * value: Variable/Constant to store. + * ptr: Location to store the value in + -} + | Store LlvmVar LlvmVar + + {- | + Mutliway branch + * scrutinee: Variable or constant which must be of integer type that is + determines which arm is chosen. + * def: The default label if there is no match in target. + * target: A list of (value,label) where the value is an integer + constant and label the corresponding label to jump to if the + scrutinee matches the value. + -} + | Switch LlvmVar LlvmVar [(LlvmVar, LlvmVar)] + + {- | + Return a result. + * result: The variable or constant to return + -} + | Return (Maybe LlvmVar) + + {- | + An instruction for the optimizer that the code following is not reachable + -} + | Unreachable + + {- | + Raise an expression to a statement (if don't want result or want to use + Llvm unamed values. + -} + | Expr LlvmExpression + + deriving (Show, Eq) + + +-- | Llvm Expressions +data LlvmExpression + {- | + Allocate amount * sizeof(tp) bytes on the stack + * tp: LlvmType to reserve room for + * amount: The nr of tp's which must be allocated + -} + = Alloca LlvmType Int + + {- | + Perform the machine operator op on the operands left and right + * op: operator + * left: left operand + * right: right operand + -} + | LlvmOp LlvmMachOp LlvmVar LlvmVar + + {- | + Perform a compare operation on the operands left and right + * op: operator + * left: left operand + * right: right operand + -} + | Compare LlvmCmpOp LlvmVar LlvmVar + + {- | + Allocate amount * sizeof(tp) bytes on the heap + * tp: LlvmType to reserve room for + * amount: The nr of tp's which must be allocated + -} + | Malloc LlvmType Int + + {- | + Load the value at location ptr + -} + | Load LlvmVar + + {- | + Navigate in an structure, selecting elements + * ptr: Location of the structure + * indexes: A list of indexes to select the correct value. For example + the first element of the third element of the structure ptr + is selected with [3,1] (zero indexed) + -} + | GetElemPtr LlvmVar [Int] + + {- | + Cast the variable from to the to type. This is an abstraction of three + cast operators in Llvm, inttoptr, prttoint and bitcast. + * cast: Cast type + * from: Variable to cast + * to: type to cast to + -} + | Cast LlvmCastOp LlvmVar LlvmType + + {- | + Call a function. The result is the value of the expression. + * tailJumps: CallType to signal if the function should be tail called + * fnptrval: An LLVM value containing a pointer to a function to be + invoked. Can be indirect. Should be LMFunction type. + * args: Concrete arguments for the parameters + * attrs: A list of function attributes for the call. Only NoReturn, + NoUnwind, ReadOnly and ReadNone are valid here. + -} + | Call LlvmCallType LlvmVar [LlvmVar] [LlvmFuncAttr] + + {- | + Merge variables from different basic blocks which are predecessors of this + basic block in a new variable of type tp. + * tp: type of the merged variable, must match the types of the + precessors variables. + * precessors: A list of variables and the basic block that they originate + from. + -} + | Phi LlvmType [(LlvmVar,LlvmVar)] + + deriving (Show, Eq) + diff --git a/compiler/llvmGen/Llvm/PpLlvm.hs b/compiler/llvmGen/Llvm/PpLlvm.hs new file mode 100644 index 0000000000..8d36511a47 --- /dev/null +++ b/compiler/llvmGen/Llvm/PpLlvm.hs @@ -0,0 +1,325 @@ +-------------------------------------------------------------------------------- +-- | Pretty print LLVM IR Code. +-- + +module Llvm.PpLlvm ( + + -- * Top level LLVM objects. + ppLlvmModule, + ppLlvmComments, + ppLlvmComment, + ppLlvmConstants, + ppLlvmConstant, + ppLlvmGlobals, + ppLlvmGlobal, + ppLlvmType, + ppLlvmTypes, + ppLlvmFunctionDecls, + ppLlvmFunctionDecl, + ppLlvmFunctions, + ppLlvmFunction, + llvmSDoc + + ) where + +#include "HsVersions.h" + +import Llvm.AbsSyn +import Llvm.Types + +import Data.List ( intersperse ) +import Pretty +import qualified Outputable as Outp +import Unique + +-------------------------------------------------------------------------------- +-- * Top Level Print functions +-------------------------------------------------------------------------------- + +-- | Print out a whole LLVM module. +ppLlvmModule :: LlvmModule -> Doc +ppLlvmModule (LlvmModule comments constants globals decls funcs) + = ppLlvmComments comments + $+$ empty + $+$ ppLlvmConstants constants + $+$ ppLlvmGlobals globals + $+$ empty + $+$ ppLlvmFunctionDecls decls + $+$ empty + $+$ ppLlvmFunctions funcs + +-- | Print out a multi-line comment, can be inside a function or on its own +ppLlvmComments :: [LMString] -> Doc +ppLlvmComments comments = vcat $ map ppLlvmComment comments + +-- | Print out a comment, can be inside a function or on its own +ppLlvmComment :: LMString -> Doc +ppLlvmComment com = semi <+> (ftext com) + + +-- | Print out a list of global mutable variable definitions +ppLlvmGlobals :: [LMGlobal] -> Doc +ppLlvmGlobals ls = vcat $ map ppLlvmGlobal ls + +-- | Print out a global mutable variable definition +ppLlvmGlobal :: LMGlobal -> Doc +ppLlvmGlobal (var@(LMGlobalVar _ _ link), Nothing) = + ppAssignment var $ text (show link) <+> text "global" <+> + (text $ show (pLower $ getVarType var)) + +ppLlvmGlobal (var@(LMGlobalVar _ _ link), (Just stat)) = + ppAssignment var $ text (show link) <+> text "global" <+> text (show stat) + +ppLlvmGlobal oth = error $ "Non Global var ppr as global! " ++ show oth + + +-- | Print out a list global constant variable +ppLlvmConstants :: [LMConstant] -> Doc +ppLlvmConstants cons = vcat $ map ppLlvmConstant cons + +-- | Print out a global constant variable +ppLlvmConstant :: LMConstant -> Doc +ppLlvmConstant (dst@(LMGlobalVar _ _ link), src) = + ppAssignment dst $ text (show link) <+> text "constant" <+> text (show src) + +ppLlvmConstant c = error $ "Non global var as constant! " ++ show c + + +-- | Print out a list of LLVM type aliases. +ppLlvmTypes :: [LlvmType] -> Doc +ppLlvmTypes tys = vcat $ map ppLlvmType tys + +-- | Print out an LLVM type alias. +ppLlvmType :: LlvmType -> Doc + +ppLlvmType al@(LMAlias _ t) + = (text $ show al) <+> equals <+> (text "type") <+> (text $ show t) + +ppLlvmType (LMFunction t) + = ppLlvmFunctionDecl t + +ppLlvmType _ = empty + + +-- | Print out a list of function definitions. +ppLlvmFunctions :: LlvmFunctions -> Doc +ppLlvmFunctions funcs = vcat $ map ppLlvmFunction funcs + +-- | Print out a function definition. +ppLlvmFunction :: LlvmFunction -> Doc +ppLlvmFunction (LlvmFunction dec attrs body) = + let attrDoc = ppSpaceJoin attrs + in (text "define") <+> (ppLlvmFuncDecSig dec) + <+> attrDoc + $+$ lbrace + $+$ ppLlvmBlocks body + $+$ rbrace + + +-- | Print out a list of function declaration. +ppLlvmFunctionDecls :: LlvmFunctionDecls -> Doc +ppLlvmFunctionDecls decs = vcat $ map ppLlvmFunctionDecl decs + +-- | Print out a function declaration. +-- Declarations define the function type but don't define the actual body of +-- the function. +ppLlvmFunctionDecl :: LlvmFunctionDecl -> Doc +ppLlvmFunctionDecl dec = (text "declare") <+> ppLlvmFuncDecSig dec + +-- | Print out a functions type signature. +-- This differs from [ppLlvmFunctionDecl] in that it is used for both function +-- declarations and defined functions to print out the type. +ppLlvmFuncDecSig :: LlvmFunctionDecl -> Doc +ppLlvmFuncDecSig (LlvmFunctionDecl name link cc retTy argTy params) + = let linkTxt = show link + linkDoc | linkTxt == "" = empty + | otherwise = (text linkTxt) <> space + ppParams = either ppCommaJoin ppCommaJoin params <> + (case argTy of + VarArgs -> (text ", ...") + FixedArgs -> empty) + in linkDoc <> (text $ show cc) <+> (text $ show retTy) + <+> atsym <> (ftext name) <> lparen <+> ppParams <+> rparen + + +-- | Print out a list of LLVM blocks. +ppLlvmBlocks :: LlvmBlocks -> Doc +ppLlvmBlocks blocks = vcat $ map ppLlvmBlock blocks + +-- | Print out an LLVM block. +-- It must be part of a function definition. +ppLlvmBlock :: LlvmBlock -> Doc +ppLlvmBlock (LlvmBlock blockId stmts) + = ppLlvmStatement (MkLabel blockId) + $+$ nest 4 (vcat $ map ppLlvmStatement stmts) + + +-- | Print out an LLVM statement. +ppLlvmStatement :: LlvmStatement -> Doc +ppLlvmStatement stmt + = case stmt of + Assignment dst expr -> ppAssignment dst (ppLlvmExpression expr) + Branch target -> ppBranch target + BranchIf cond ifT ifF -> ppBranchIf cond ifT ifF + Comment comments -> ppLlvmComments comments + MkLabel label -> (llvmSDoc $ pprUnique label) <> colon + Store value ptr -> ppStore value ptr + Switch scrut def tgs -> ppSwitch scrut def tgs + Return result -> ppReturn result + Expr expr -> ppLlvmExpression expr + Unreachable -> text "unreachable" + + +-- | Print out an LLVM expression. +ppLlvmExpression :: LlvmExpression -> Doc +ppLlvmExpression expr + = case expr of + Alloca tp amount -> ppAlloca tp amount + LlvmOp op left right -> ppMachOp op left right + Call tp fp args attrs -> ppCall tp fp args attrs + Cast op from to -> ppCast op from to + Compare op left right -> ppCmpOp op left right + GetElemPtr ptr indexes -> ppGetElementPtr ptr indexes + Load ptr -> ppLoad ptr + Malloc tp amount -> ppMalloc tp amount + Phi tp precessors -> ppPhi tp precessors + + +-------------------------------------------------------------------------------- +-- * Individual print functions +-------------------------------------------------------------------------------- + +-- | Should always be a function pointer. So a global var of function type +-- (since globals are always pointers) or a local var of pointer function type. +ppCall :: LlvmCallType -> LlvmVar -> [LlvmVar] -> [LlvmFuncAttr] -> Doc +ppCall ct fptr vals attrs = case fptr of + -- + -- if local var function pointer, unwrap + LMLocalVar _ (LMPointer (LMFunction d)) -> ppCall' d + + -- should be function type otherwise + LMGlobalVar _ (LMFunction d) _ -> ppCall' d + + -- not pointer or function, so error + _other -> error $ "ppCall called with non LMFunction type!\nMust be " + ++ " called with either global var of function type or " + ++ "local var of pointer function type." + + where + ppCall' (LlvmFunctionDecl _ _ cc ret argTy params) = + let tc = if ct == TailCall then text "tail " else empty + ppValues = ppCommaJoin vals + ppArgTy = either ppCommaJoin (\x -> ppCommaJoin $ map getVarType x) params <> + (case argTy of + VarArgs -> (text ", ...") + FixedArgs -> empty) + fnty = space <> lparen <> ppArgTy <> rparen <> (text "*") + attrDoc = ppSpaceJoin attrs + in tc <> (text "call") <+> (text $ show cc) <+> (text $ show ret) + <> fnty <+> (text $ getName fptr) <> lparen <+> ppValues + <+> rparen <+> attrDoc + + +ppMachOp :: LlvmMachOp -> LlvmVar -> LlvmVar -> Doc +ppMachOp op left right = + (text $ show op) <+> (text $ show (getVarType left)) <+> (text $ getName left) + <> comma <+> (text $ getName right) + + +ppCmpOp :: LlvmCmpOp -> LlvmVar -> LlvmVar -> Doc +ppCmpOp op left right = + let cmpOp + | isInt (getVarType left) && isInt (getVarType right) = text "icmp" + | isFloat (getVarType left) && isFloat (getVarType right) = text "fcmp" + | otherwise = error ("can't compare different types, left = " + ++ (show $ getVarType left) ++ ", right = " + ++ (show $ getVarType right)) + in cmpOp <+> (text $ show op) <+> (text $ show (getVarType left)) + <+> (text $ getName left) <> comma <+> (text $ getName right) + + +ppAssignment :: LlvmVar -> Doc -> Doc +ppAssignment var expr = (text $ getName var) <+> equals <+> expr + + +ppLoad :: LlvmVar -> Doc +ppLoad var = (text "load") <+> (text $ show var) + + +ppStore :: LlvmVar -> LlvmVar -> Doc +ppStore val dst = + (text "store") <+> (text $ show val) <> comma <+> (text $ show dst) + + +ppCast :: LlvmCastOp -> LlvmVar -> LlvmType -> Doc +ppCast op from to = + let castOp = text $ show op + in castOp <+> (text $ show from) <+> (text "to") <+> (text $ show to) + + +ppMalloc :: LlvmType -> Int -> Doc +ppMalloc tp amount = + let amount' = LMLitVar $ LMIntLit (toInteger amount) i32 + in (text "malloc") <+> (text $ show tp) <> comma <+> (text $ show amount') + + +ppAlloca :: LlvmType -> Int -> Doc +ppAlloca tp amount = + let amount' = LMLitVar $ LMIntLit (toInteger amount) i32 + in (text "alloca") <+> (text $ show tp) <> comma <+> (text $ show amount') + + +ppGetElementPtr :: LlvmVar -> [Int] -> Doc +ppGetElementPtr ptr idx = + let indexes = hcat $ map ((comma <+> (text $ show i32) <+>) . text . show) idx + in (text "getelementptr") <+> (text $ show ptr) <> indexes + + +ppReturn :: Maybe LlvmVar -> Doc +ppReturn (Just var) = (text "ret") <+> (text $ show var) +ppReturn Nothing = (text "ret") <+> (text $ show LMVoid) + + +ppBranch :: LlvmVar -> Doc +ppBranch var = (text "br") <+> (text $ show var) + + +ppBranchIf :: LlvmVar -> LlvmVar -> LlvmVar -> Doc +ppBranchIf cond trueT falseT + = (text "br") <+> (text $ show cond) <> comma <+> (text $ show trueT) <> comma + <+> (text $ show falseT) + + +ppPhi :: LlvmType -> [(LlvmVar,LlvmVar)] -> Doc +ppPhi tp preds = + let ppPreds (val, label) = brackets $ (text $ getName val) <> comma + <+> (text $ getName label) + in (text "phi") <+> (text $ show tp) + <+> (hcat $ intersperse comma (map ppPreds preds)) + + +ppSwitch :: LlvmVar -> LlvmVar -> [(LlvmVar,LlvmVar)] -> Doc +ppSwitch scrut dflt targets = + let ppTarget (val, lab) = (text $ show val) <> comma <+> (text $ show lab) + ppTargets xs = brackets $ vcat (map ppTarget xs) + in (text "switch") <+> (text $ show scrut) <> comma <+> (text $ show dflt) + <+> (ppTargets targets) + + +-------------------------------------------------------------------------------- +-- * Misc functions +-------------------------------------------------------------------------------- +atsym :: Doc +atsym = text "@" + +ppCommaJoin :: (Show a) => [a] -> Doc +ppCommaJoin strs = hcat $ intersperse comma (map (text . show) strs) + +ppSpaceJoin :: (Show a) => [a] -> Doc +ppSpaceJoin strs = hcat $ intersperse space (map (text . show) strs) + +-- | Convert SDoc to Doc +llvmSDoc :: Outp.SDoc -> Doc +llvmSDoc d + = Outp.withPprStyleDoc (Outp.mkCodeStyle Outp.CStyle) d + diff --git a/compiler/llvmGen/Llvm/Types.hs b/compiler/llvmGen/Llvm/Types.hs new file mode 100644 index 0000000000..a4080c4d5c --- /dev/null +++ b/compiler/llvmGen/Llvm/Types.hs @@ -0,0 +1,719 @@ +-------------------------------------------------------------------------------- +-- | The LLVM Type System. +-- + +module Llvm.Types where + +#include "HsVersions.h" +#include "ghcconfig.h" + +import Data.Char +import Numeric + +import Constants +import FastString +import Unique + +-- from NCG +import PprBase + +-- ----------------------------------------------------------------------------- +-- * LLVM Basic Types and Variables +-- + +-- | A global mutable variable. Maybe defined or external +type LMGlobal = (LlvmVar, Maybe LlvmStatic) +-- | A global constant variable +type LMConstant = (LlvmVar, LlvmStatic) +-- | A String in LLVM +type LMString = FastString + + +-- | Llvm Types. +data LlvmType + = LMInt Int -- ^ An integer with a given width in bits. + | LMFloat -- ^ 32 bit floating point + | LMDouble -- ^ 64 bit floating point + | LMFloat80 -- ^ 80 bit (x86 only) floating point + | LMFloat128 -- ^ 128 bit floating point + | LMPointer LlvmType -- ^ A pointer to a 'LlvmType' + | LMArray Int LlvmType -- ^ An array of 'LlvmType' + | LMLabel -- ^ A 'LlvmVar' can represent a label (address) + | LMVoid -- ^ Void type + | LMStruct [LlvmType] -- ^ Structure type + | LMAlias LMString LlvmType -- ^ A type alias + + -- | Function type, used to create pointers to functions + | LMFunction LlvmFunctionDecl + deriving (Eq) + +instance Show LlvmType where + show (LMInt size ) = "i" ++ show size + show (LMFloat ) = "float" + show (LMDouble ) = "double" + show (LMFloat80 ) = "x86_fp80" + show (LMFloat128 ) = "fp128" + show (LMPointer x ) = show x ++ "*" + show (LMArray nr tp ) = "[" ++ show nr ++ " x " ++ show tp ++ "]" + show (LMLabel ) = "label" + show (LMVoid ) = "void" + show (LMStruct tys ) = "{" ++ (commaCat tys) ++ "}" + + show (LMFunction (LlvmFunctionDecl _ _ _ r VarArgs p)) + = (show r) ++ " (" ++ (either commaCat commaCat p) ++ ", ...)" + show (LMFunction (LlvmFunctionDecl _ _ _ r FixedArgs p)) + = (show r) ++ " (" ++ (either commaCat commaCat p) ++ ")" + + show (LMAlias s _ ) = "%" ++ unpackFS s + + +-- | Llvm Variables +data LlvmVar + -- | Variables with a global scope. + = LMGlobalVar LMString LlvmType LlvmLinkageType + -- | Variables local to a function or parameters. + | LMLocalVar Unique LlvmType + -- | Named local variables. Sometimes we need to be able to explicitly name + -- variables (e.g for function arguments). + | LMNLocalVar LMString LlvmType + -- | A constant variable + | LMLitVar LlvmLit + deriving (Eq) + +instance Show LlvmVar where + show (LMLitVar x) = show x + show (x ) = show (getVarType x) ++ " " ++ getName x + + +-- | Llvm Literal Data. +-- +-- These can be used inline in expressions. +data LlvmLit + -- | Refers to an integer constant (i64 42). + = LMIntLit Integer LlvmType + -- | Floating point literal + | LMFloatLit Rational LlvmType + deriving (Eq) + +instance Show LlvmLit where + show l = show (getLitType l) ++ " " ++ getLit l + + +-- | Llvm Static Data. +-- +-- These represent the possible global level variables and constants. +data LlvmStatic + = LMComment LMString -- ^ A comment in a static section + | LMStaticLit LlvmLit -- ^ A static variant of a literal value + | LMUninitType LlvmType -- ^ For uninitialised data + | LMStaticStr LMString LlvmType -- ^ Defines a static 'LMString' + | LMStaticArray [LlvmStatic] LlvmType -- ^ A static array + | LMStaticStruc [LlvmStatic] LlvmType -- ^ A static structure type + | LMStaticPointer LlvmVar -- ^ A pointer to other data + + -- static expressions, could split out but leave + -- for moment for ease of use. Not many of them. + + | LMPtoI LlvmStatic LlvmType -- ^ Pointer to Integer conversion + | LMAdd LlvmStatic LlvmStatic -- ^ Constant addition operation + | LMSub LlvmStatic LlvmStatic -- ^ Constant subtraction operation + deriving (Eq) + +instance Show LlvmStatic where + show (LMComment s) = "; " ++ unpackFS s + show (LMStaticLit l ) = show l + show (LMUninitType t) = show t ++ " undef" + show (LMStaticStr s t) = show t ++ " c\"" ++ unpackFS s ++ "\\00\"" + + show (LMStaticArray d t) + = let struc = case d of + [] -> "[]" + ts -> "[" ++ + (show (head ts) ++ concat (map (\x -> "," ++ show x) + (tail ts))) + ++ "]" + in show t ++ " " ++ struc + + show (LMStaticStruc d t) + = let struc = case d of + [] -> "{}" + ts -> "{" ++ + (show (head ts) ++ concat (map (\x -> "," ++ show x) + (tail ts))) + ++ "}" + in show t ++ " " ++ struc + + show (LMStaticPointer v) = show v + + show (LMPtoI v t) + = show t ++ " ptrtoint (" ++ show v ++ " to " ++ show t ++ ")" + + show (LMAdd s1 s2) + = let ty1 = getStatType s1 + in if ty1 == getStatType s2 + then show ty1 ++ " add (" ++ show s1 ++ "," ++ show s2 ++ ")" + else error $ "LMAdd with different types! s1: " + ++ show s1 ++ ", s2: " ++ show s2 + show (LMSub s1 s2) + = let ty1 = getStatType s1 + in if ty1 == getStatType s2 + then show ty1 ++ " sub (" ++ show s1 ++ "," ++ show s2 ++ ")" + else error $ "LMSub with different types! s1: " + ++ show s1 ++ ", s2: " ++ show s2 + + +-- | Concatenate an array together, separated by commas +commaCat :: Show a => [a] -> String +commaCat [] = "" +commaCat x = show (head x) ++ (concat $ map (\y -> "," ++ show y) (tail x)) + +-- ----------------------------------------------------------------------------- +-- ** Operations on LLVM Basic Types and Variables +-- + +-- | Return the variable name or value of the 'LlvmVar' +-- in Llvm IR textual representation (e.g. @\@x@, @%y@ or @42@). +getName :: LlvmVar -> String +getName v@(LMGlobalVar _ _ _ ) = "@" ++ getPlainName v +getName v@(LMLocalVar _ _ ) = "%" ++ getPlainName v +getName v@(LMNLocalVar _ _ ) = "%" ++ getPlainName v +getName v@(LMLitVar _ ) = getPlainName v + +-- | Return the variable name or value of the 'LlvmVar' +-- in a plain textual representation (e.g. @x@, @y@ or @42@). +getPlainName :: LlvmVar -> String +getPlainName (LMGlobalVar x _ _) = unpackFS x +getPlainName (LMLocalVar x _ ) = show x +getPlainName (LMNLocalVar x _ ) = unpackFS x +getPlainName (LMLitVar x ) = getLit x + +-- | Print a literal value. No type. +getLit :: LlvmLit -> String +getLit (LMIntLit i _) = show ((fromInteger i)::Int) +-- In Llvm float literals can be printed in a big-endian hexadecimal format, +-- regardless of underlying architecture. +getLit (LMFloatLit r LMFloat) = fToStr $ fromRational r +getLit (LMFloatLit r LMDouble) = dToStr $ fromRational r +getLit l = error $ "getLit: Usupported LlvmLit type! " ++ show (getLitType l) + +-- | Return the 'LlvmType' of the 'LlvmVar' +getVarType :: LlvmVar -> LlvmType +getVarType (LMGlobalVar _ y _) = y +getVarType (LMLocalVar _ y ) = y +getVarType (LMNLocalVar _ y ) = y +getVarType (LMLitVar l ) = getLitType l + +-- | Return the 'LlvmType' of a 'LlvmLit' +getLitType :: LlvmLit -> LlvmType +getLitType (LMIntLit _ t) = t +getLitType (LMFloatLit _ t) = t + +-- | Return the 'LlvmType' of the 'LlvmStatic' +getStatType :: LlvmStatic -> LlvmType +getStatType (LMStaticLit l ) = getLitType l +getStatType (LMUninitType t) = t +getStatType (LMStaticStr _ t) = t +getStatType (LMStaticArray _ t) = t +getStatType (LMStaticStruc _ t) = t +getStatType (LMStaticPointer v) = getVarType v +getStatType (LMPtoI _ t) = t +getStatType (LMAdd t _) = getStatType t +getStatType (LMSub t _) = getStatType t +getStatType (LMComment _) = error "Can't call getStatType on LMComment!" + +-- | Return the 'LlvmType' of the 'LMGlobal' +getGlobalType :: LMGlobal -> LlvmType +getGlobalType (v, _) = getVarType v + +-- | Return the 'LlvmVar' part of a 'LMGlobal' +getGlobalVar :: LMGlobal -> LlvmVar +getGlobalVar (v, _) = v + +-- | Return the 'LlvmLinkageType' for a 'LlvmVar' +getLink :: LlvmVar -> LlvmLinkageType +getLink (LMGlobalVar _ _ l) = l +getLink _ = ExternallyVisible + +-- | Add a pointer indirection to the supplied type. 'LMLabel' and 'LMVoid' +-- cannot be lifted. +pLift :: LlvmType -> LlvmType +pLift (LMLabel) = error "Labels are unliftable" +pLift (LMVoid) = error "Voids are unliftable" +pLift x = LMPointer x + +-- | Remove the pointer indirection of the supplied type. Only 'LMPointer' +-- constructors can be lowered. +pLower :: LlvmType -> LlvmType +pLower (LMPointer x) = x +pLower x = error $ show x ++ " is a unlowerable type, need a pointer" + +-- | Lower a variable of 'LMPointer' type. +pVarLower :: LlvmVar -> LlvmVar +pVarLower (LMGlobalVar s t l) = LMGlobalVar s (pLower t) l +pVarLower (LMLocalVar s t ) = LMLocalVar s (pLower t) +pVarLower (LMNLocalVar s t ) = LMNLocalVar s (pLower t) +pVarLower (LMLitVar _ ) = error $ "Can't lower a literal type!" + +-- | Test if the given 'LlvmType' is an integer +isInt :: LlvmType -> Bool +isInt (LMInt _) = True +isInt _ = False + +-- | Test if the given 'LlvmType' is a floating point type +isFloat :: LlvmType -> Bool +isFloat LMFloat = True +isFloat LMDouble = True +isFloat LMFloat80 = True +isFloat LMFloat128 = True +isFloat _ = False + +-- | Test if the given 'LlvmType' is an 'LMPointer' construct +isPointer :: LlvmType -> Bool +isPointer (LMPointer _) = True +isPointer _ = False + +-- | Test if a 'LlvmVar' is global. +isGlobal :: LlvmVar -> Bool +isGlobal (LMGlobalVar _ _ _) = True +isGlobal _ = False + +-- | Width in bits of an 'LlvmType', returns 0 if not applicable +llvmWidthInBits :: LlvmType -> Int +llvmWidthInBits (LMInt n) = n +llvmWidthInBits (LMFloat) = 32 +llvmWidthInBits (LMDouble) = 64 +llvmWidthInBits (LMFloat80) = 80 +llvmWidthInBits (LMFloat128) = 128 +-- Could return either a pointer width here or the width of what +-- it points to. We will go with the former for now. +llvmWidthInBits (LMPointer _) = llvmWidthInBits llvmWord +llvmWidthInBits (LMArray _ _) = llvmWidthInBits llvmWord +llvmWidthInBits LMLabel = 0 +llvmWidthInBits LMVoid = 0 +llvmWidthInBits (LMStruct tys) = sum $ map llvmWidthInBits tys +llvmWidthInBits (LMFunction _) = 0 +llvmWidthInBits (LMAlias _ t) = llvmWidthInBits t + + +-- ----------------------------------------------------------------------------- +-- ** Shortcut for Common Types +-- + +i128, i64, i32, i16, i8, i1 :: LlvmType +i128 = LMInt 128 +i64 = LMInt 64 +i32 = LMInt 32 +i16 = LMInt 16 +i8 = LMInt 8 +i1 = LMInt 1 + +-- | The target architectures word size +llvmWord :: LlvmType +llvmWord = LMInt (wORD_SIZE * 8) + +-- | The target architectures pointer size +llvmWordPtr :: LlvmType +llvmWordPtr = pLift llvmWord + + +-- ----------------------------------------------------------------------------- +-- * LLVM Function Types +-- + +-- | An LLVM Function +data LlvmFunctionDecl = LlvmFunctionDecl { + -- | Unique identifier of the function. + decName :: LMString, + -- | LinkageType of the function. + funcLinkage :: LlvmLinkageType, + -- | The calling convention of the function. + funcCc :: LlvmCallConvention, + -- | Type of the returned value + decReturnType :: LlvmType, + -- | Indicates if this function uses varargs + decVarargs :: LlvmParameterListType, + -- | Signature of the parameters, can be just types or full vars + -- if parameter names are required. + decParams :: Either [LlvmType] [LlvmVar] + } + +instance Show LlvmFunctionDecl where + show (LlvmFunctionDecl n l c r VarArgs p) + = (show l) ++ " " ++ (show c) ++ " " ++ (show r) + ++ " @" ++ unpackFS n ++ "(" ++ (either commaCat commaCat p) ++ ", ...)" + show (LlvmFunctionDecl n l c r FixedArgs p) + = (show l) ++ " " ++ (show c) ++ " " ++ (show r) + ++ " @" ++ unpackFS n ++ "(" ++ (either commaCat commaCat p) ++ ")" + +instance Eq LlvmFunctionDecl where + (LlvmFunctionDecl n1 l1 c1 r1 v1 p1) == (LlvmFunctionDecl n2 l2 c2 r2 v2 p2) + = (n1 == n2) && (l1 == l2) && (c1 == c2) && (r1 == r2) + && (v1 == v2) && (p1 == p2) + +type LlvmFunctionDecls = [LlvmFunctionDecl] + + +-- | Llvm Function Attributes. +-- +-- Function attributes are set to communicate additional information about a +-- function. Function attributes are considered to be part of the function, +-- not of the function type, so functions with different parameter attributes +-- can have the same function type. Functions can have multiple attributes. +-- +-- Descriptions taken from <http://llvm.org/docs/LangRef.html#fnattrs> +data LlvmFuncAttr + -- | This attribute indicates that the inliner should attempt to inline this + -- function into callers whenever possible, ignoring any active inlining + -- size threshold for this caller. + = AlwaysInline + -- | This attribute indicates that the source code contained a hint that + -- inlining this function is desirable (such as the \"inline\" keyword in + -- C/C++). It is just a hint; it imposes no requirements on the inliner. + | InlineHint + -- | This attribute indicates that the inliner should never inline this + -- function in any situation. This attribute may not be used together + -- with the alwaysinline attribute. + | NoInline + -- | This attribute suggests that optimization passes and code generator + -- passes make choices that keep the code size of this function low, and + -- otherwise do optimizations specifically to reduce code size. + | OptSize + -- | This function attribute indicates that the function never returns + -- normally. This produces undefined behavior at runtime if the function + -- ever does dynamically return. + | NoReturn + -- | This function attribute indicates that the function never returns with + -- an unwind or exceptional control flow. If the function does unwind, its + -- runtime behavior is undefined. + | NoUnwind + -- | This attribute indicates that the function computes its result (or + -- decides to unwind an exception) based strictly on its arguments, without + -- dereferencing any pointer arguments or otherwise accessing any mutable + -- state (e.g. memory, control registers, etc) visible to caller functions. + -- It does not write through any pointer arguments (including byval + -- arguments) and never changes any state visible to callers. This means + -- that it cannot unwind exceptions by calling the C++ exception throwing + -- methods, but could use the unwind instruction. + | ReadNone + -- | This attribute indicates that the function does not write through any + -- pointer arguments (including byval arguments) or otherwise modify any + -- state (e.g. memory, control registers, etc) visible to caller functions. + -- It may dereference pointer arguments and read state that may be set in + -- the caller. A readonly function always returns the same value (or unwinds + -- an exception identically) when called with the same set of arguments and + -- global state. It cannot unwind an exception by calling the C++ exception + -- throwing methods, but may use the unwind instruction. + | ReadOnly + -- | This attribute indicates that the function should emit a stack smashing + -- protector. It is in the form of a \"canary\"—a random value placed on the + -- stack before the local variables that's checked upon return from the + -- function to see if it has been overwritten. A heuristic is used to + -- determine if a function needs stack protectors or not. + -- + -- If a function that has an ssp attribute is inlined into a function that + -- doesn't have an ssp attribute, then the resulting function will have an + -- ssp attribute. + | Ssp + -- | This attribute indicates that the function should always emit a stack + -- smashing protector. This overrides the ssp function attribute. + -- + -- If a function that has an sspreq attribute is inlined into a function + -- that doesn't have an sspreq attribute or which has an ssp attribute, + -- then the resulting function will have an sspreq attribute. + | SspReq + -- | This attribute indicates that the code generator should not use a red + -- zone, even if the target-specific ABI normally permits it. + | NoRedZone + -- | This attributes disables implicit floating point instructions. + | NoImplicitFloat + -- | This attribute disables prologue / epilogue emission for the function. + -- This can have very system-specific consequences. + | Naked + deriving (Eq) + +instance Show LlvmFuncAttr where + show AlwaysInline = "alwaysinline" + show InlineHint = "inlinehint" + show NoInline = "noinline" + show OptSize = "optsize" + show NoReturn = "noreturn" + show NoUnwind = "nounwind" + show ReadNone = "readnon" + show ReadOnly = "readonly" + show Ssp = "ssp" + show SspReq = "ssqreq" + show NoRedZone = "noredzone" + show NoImplicitFloat = "noimplicitfloat" + show Naked = "naked" + + +-- | Different types to call a function. +data LlvmCallType + -- | Normal call, allocate a new stack frame. + = StdCall + -- | Tail call, perform the call in the current stack frame. + | TailCall + deriving (Eq,Show) + +-- | Different calling conventions a function can use. +data LlvmCallConvention + -- | The C calling convention. + -- This calling convention (the default if no other calling convention is + -- specified) matches the target C calling conventions. This calling + -- convention supports varargs function calls and tolerates some mismatch in + -- the declared prototype and implemented declaration of the function (as + -- does normal C). + = CC_Ccc + -- | This calling convention attempts to make calls as fast as possible + -- (e.g. by passing things in registers). This calling convention allows + -- the target to use whatever tricks it wants to produce fast code for the + -- target, without having to conform to an externally specified ABI + -- (Application Binary Interface). Implementations of this convention should + -- allow arbitrary tail call optimization to be supported. This calling + -- convention does not support varargs and requires the prototype of al + -- callees to exactly match the prototype of the function definition. + | CC_Fastcc + -- | This calling convention attempts to make code in the caller as efficient + -- as possible under the assumption that the call is not commonly executed. + -- As such, these calls often preserve all registers so that the call does + -- not break any live ranges in the caller side. This calling convention + -- does not support varargs and requires the prototype of all callees to + -- exactly match the prototype of the function definition. + | CC_Coldcc + -- | Any calling convention may be specified by number, allowing + -- target-specific calling conventions to be used. Target specific calling + -- conventions start at 64. + | CC_Ncc Int + -- | X86 Specific 'StdCall' convention. LLVM includes a specific alias for it + -- rather than just using CC_Ncc. + | CC_X86_Stdcc + deriving (Eq) + +instance Show LlvmCallConvention where + show CC_Ccc = "ccc" + show CC_Fastcc = "fastcc" + show CC_Coldcc = "coldcc" + show (CC_Ncc i) = "cc " ++ (show i) + show CC_X86_Stdcc = "x86_stdcallcc" + + +-- | Functions can have a fixed amount of parameters, or a variable amount. +data LlvmParameterListType + -- Fixed amount of arguments. + = FixedArgs + -- Variable amount of arguments. + | VarArgs + deriving (Eq,Show) + + +-- | Linkage type of a symbol. +-- +-- The description of the constructors is copied from the Llvm Assembly Language +-- Reference Manual <http://www.llvm.org/docs/LangRef.html#linkage>, because +-- they correspond to the Llvm linkage types. +data LlvmLinkageType + -- | Global values with internal linkage are only directly accessible by + -- objects in the current module. In particular, linking code into a module + -- with an internal global value may cause the internal to be renamed as + -- necessary to avoid collisions. Because the symbol is internal to the + -- module, all references can be updated. This corresponds to the notion + -- of the @static@ keyword in C. + = Internal + -- | Globals with @linkonce@ linkage are merged with other globals of the + -- same name when linkage occurs. This is typically used to implement + -- inline functions, templates, or other code which must be generated + -- in each translation unit that uses it. Unreferenced linkonce globals are + -- allowed to be discarded. + | LinkOnce + -- | @weak@ linkage is exactly the same as linkonce linkage, except that + -- unreferenced weak globals may not be discarded. This is used for globals + -- that may be emitted in multiple translation units, but that are not + -- guaranteed to be emitted into every translation unit that uses them. One + -- example of this are common globals in C, such as @int X;@ at global + -- scope. + | Weak + -- | @appending@ linkage may only be applied to global variables of pointer + -- to array type. When two global variables with appending linkage are + -- linked together, the two global arrays are appended together. This is + -- the Llvm, typesafe, equivalent of having the system linker append + -- together @sections@ with identical names when .o files are linked. + | Appending + -- | The semantics of this linkage follow the ELF model: the symbol is weak + -- until linked, if not linked, the symbol becomes null instead of being an + -- undefined reference. + | ExternWeak + -- | The symbol participates in linkage and can be used to resolve external + -- symbol references. + | ExternallyVisible + -- | Alias for 'ExternallyVisible' but with explicit textual form in LLVM + -- assembly. + | External + deriving (Eq) + +instance Show LlvmLinkageType where + show Internal = "internal" + show LinkOnce = "linkonce" + show Weak = "weak" + show Appending = "appending" + show ExternWeak = "extern_weak" + -- ExternallyVisible does not have a textual representation, it is + -- the linkage type a function resolves to if no other is specified + -- in Llvm. + show ExternallyVisible = "" + show External = "external" + + +-- ----------------------------------------------------------------------------- +-- * LLVM Operations +-- + +-- | Llvm binary operators machine operations. +data LlvmMachOp + = LM_MO_Add -- ^ add two integer, floating point or vector values. + | LM_MO_Sub -- ^ subtract two ... + | LM_MO_Mul -- ^ multiply .. + | LM_MO_UDiv -- ^ unsigned integer or vector division. + | LM_MO_SDiv -- ^ signed integer .. + | LM_MO_FDiv -- ^ floating point .. + | LM_MO_URem -- ^ unsigned integer or vector remainder (mod) + | LM_MO_SRem -- ^ signed ... + | LM_MO_FRem -- ^ floating point ... + + -- | Left shift + | LM_MO_Shl + -- | Logical shift right + -- Shift right, filling with zero + | LM_MO_LShr + -- | Arithmetic shift right + -- The most significant bits of the result will be equal to the sign bit of + -- the left operand. + | LM_MO_AShr + + | LM_MO_And -- ^ AND bitwise logical operation. + | LM_MO_Or -- ^ OR bitwise logical operation. + | LM_MO_Xor -- ^ XOR bitwise logical operation. + deriving (Eq) + +instance Show LlvmMachOp where + show LM_MO_Add = "add" + show LM_MO_Sub = "sub" + show LM_MO_Mul = "mul" + show LM_MO_UDiv = "udiv" + show LM_MO_SDiv = "sdiv" + show LM_MO_FDiv = "fdiv" + show LM_MO_URem = "urem" + show LM_MO_SRem = "srem" + show LM_MO_FRem = "frem" + show LM_MO_Shl = "shl" + show LM_MO_LShr = "lshr" + show LM_MO_AShr = "ashr" + show LM_MO_And = "and" + show LM_MO_Or = "or" + show LM_MO_Xor = "xor" + + +-- | Llvm compare operations. +data LlvmCmpOp + = LM_CMP_Eq -- ^ Equal (Signed and Unsigned) + | LM_CMP_Ne -- ^ Not equal (Signed and Unsigned) + | LM_CMP_Ugt -- ^ Unsigned greater than + | LM_CMP_Uge -- ^ Unsigned greater than or equal + | LM_CMP_Ult -- ^ Unsigned less than + | LM_CMP_Ule -- ^ Unsigned less than or equal + | LM_CMP_Sgt -- ^ Signed greater than + | LM_CMP_Sge -- ^ Signed greater than or equal + | LM_CMP_Slt -- ^ Signed less than + | LM_CMP_Sle -- ^ Signed less than or equal + + -- Float comparisons. GHC uses a mix of ordered and unordered float + -- comparisons. + | LM_CMP_Feq -- ^ Float equal + | LM_CMP_Fne -- ^ Float not equal + | LM_CMP_Fgt -- ^ Float greater than + | LM_CMP_Fge -- ^ Float greater than or equal + | LM_CMP_Flt -- ^ Float less than + | LM_CMP_Fle -- ^ Float less than or equal + deriving (Eq) + +instance Show LlvmCmpOp where + show LM_CMP_Eq = "eq" + show LM_CMP_Ne = "ne" + show LM_CMP_Ugt = "ugt" + show LM_CMP_Uge = "uge" + show LM_CMP_Ult = "ult" + show LM_CMP_Ule = "ule" + show LM_CMP_Sgt = "sgt" + show LM_CMP_Sge = "sge" + show LM_CMP_Slt = "slt" + show LM_CMP_Sle = "sle" + show LM_CMP_Feq = "oeq" + show LM_CMP_Fne = "une" + show LM_CMP_Fgt = "ogt" + show LM_CMP_Fge = "oge" + show LM_CMP_Flt = "olt" + show LM_CMP_Fle = "ole" + + +-- | Llvm cast operations. +data LlvmCastOp + = LM_Trunc -- ^ Integer truncate + | LM_Zext -- ^ Integer extend (zero fill) + | LM_Sext -- ^ Integer extend (sign fill) + | LM_Fptrunc -- ^ Float truncate + | LM_Fpext -- ^ Float extend + | LM_Fptoui -- ^ Float to unsigned Integer + | LM_Fptosi -- ^ Float to signed Integer + | LM_Uitofp -- ^ Unsigned Integer to Float + | LM_Sitofp -- ^ Signed Int to Float + | LM_Ptrtoint -- ^ Pointer to Integer + | LM_Inttoptr -- ^ Integer to Pointer + | LM_Bitcast -- ^ Cast between types where no bit manipulation is needed + deriving (Eq) + +instance Show LlvmCastOp where + show LM_Trunc = "trunc" + show LM_Zext = "zext" + show LM_Sext = "sext" + show LM_Fptrunc = "fptrunc" + show LM_Fpext = "fpext" + show LM_Fptoui = "fptoui" + show LM_Fptosi = "fptosi" + show LM_Uitofp = "uitofp" + show LM_Sitofp = "sitofp" + show LM_Ptrtoint = "ptrtoint" + show LM_Inttoptr = "inttoptr" + show LM_Bitcast = "bitcast" + + +-- ----------------------------------------------------------------------------- +-- * Floating point conversion +-- + +-- | Convert a Haskell Float to an LLVM hex encoded floating point form +fToStr :: Float -> String +fToStr f = dToStr $ realToFrac f + +-- | Convert a Haskell Double to an LLVM hex encoded floating point form +dToStr :: Double -> String +dToStr d = + let bs = doubleToBytes d + hex d' = case showHex d' "" of + [] -> error "dToStr: too few hex digits for float" + [x] -> ['0',x] + [x,y] -> [x,y] + _ -> error "dToStr: too many hex digits for float" + + str' = concat . fixEndian . (map hex) $ bs + str = map toUpper str' + in "0x" ++ str + +-- | Reverse or leave byte data alone to fix endianness on this +-- target. LLVM generally wants things in Big-Endian form +-- regardless of target architecture. +fixEndian :: [a] -> [a] +#ifdef WORDS_BIGENDIAN +fixEndian = id +#else +fixEndian = reverse +#endif + diff --git a/compiler/llvmGen/LlvmCodeGen.hs b/compiler/llvmGen/LlvmCodeGen.hs new file mode 100644 index 0000000000..e0485e703c --- /dev/null +++ b/compiler/llvmGen/LlvmCodeGen.hs @@ -0,0 +1,166 @@ +-- ----------------------------------------------------------------------------- +-- | This is the top-level module in the LLVM code generator. +-- + +module LlvmCodeGen ( llvmCodeGen ) where + +#include "HsVersions.h" + +import LlvmCodeGen.Base +import LlvmCodeGen.CodeGen +import LlvmCodeGen.Data +import LlvmCodeGen.Ppr + +import Cmm +import CgUtils ( fixStgRegisters ) +import PprCmm + +import BufWrite +import DynFlags +import ErrUtils +import Outputable +import qualified Pretty as Prt +import UniqSupply + +import System.IO + +-- ----------------------------------------------------------------------------- +-- | Top-level of the llvm codegen +-- +llvmCodeGen :: DynFlags -> Handle -> UniqSupply -> [RawCmm] -> IO () +llvmCodeGen dflags h us cmms + = do + let cmm = concat $ map extractRawCmm cmms + + bufh <- newBufHandle h + + Prt.bufLeftRender bufh $ pprLlvmHeader + + env <- cmmDataLlvmGens dflags bufh cmm + cmmProcLlvmGens dflags bufh us env cmm + + bFlush bufh + + return () + + where extractRawCmm (Cmm tops) = tops + + +-- ----------------------------------------------------------------------------- +-- | Do llvm code generation on all these cmms data sections. +-- +cmmDataLlvmGens + :: DynFlags + -> BufHandle + -> [RawCmmTop] + -> IO ( LlvmEnv ) + +cmmDataLlvmGens _ _ [] + = return ( initLlvmEnv ) + +cmmDataLlvmGens dflags h cmm = + let exData (CmmData s d) = [(s,d)] + exData _ = [] + + exProclbl (CmmProc _ l _ _) = [(strCLabel_llvm l)] + exProclbl _ = [] + + cdata = concat $ map exData cmm + -- put the functions into the enviornment + cproc = concat $ map exProclbl cmm + env = foldl (\e l -> funInsert l llvmFunTy e) initLlvmEnv cproc + in cmmDataLlvmGens' dflags h env cdata [] + +cmmDataLlvmGens' + :: DynFlags + -> BufHandle + -> LlvmEnv + -> [(Section, [CmmStatic])] + -> [LlvmUnresData] + -> IO ( LlvmEnv ) + +cmmDataLlvmGens' dflags h env [] lmdata + = do + let (env', lmdata') = resolveLlvmDatas dflags env lmdata [] + let lmdoc = Prt.vcat $ map (pprLlvmData dflags) lmdata' + + dumpIfSet_dyn dflags Opt_D_dump_llvm "LLVM Code" $ docToSDoc lmdoc + + Prt.bufLeftRender h lmdoc + return env' + +cmmDataLlvmGens' dflags h env (cmm:cmms) lmdata + = do + let lmdata'@(l, ty, _) = genLlvmData dflags cmm + let env' = funInsert (strCLabel_llvm l) ty env + cmmDataLlvmGens' dflags h env' cmms (lmdata ++ [lmdata']) + + +-- ----------------------------------------------------------------------------- +-- | Do llvm code generation on all these cmms procs. +-- +cmmProcLlvmGens + :: DynFlags + -> BufHandle + -> UniqSupply + -> LlvmEnv + -> [RawCmmTop] + -> IO () + +cmmProcLlvmGens _ _ _ _ [] + = return () + +cmmProcLlvmGens dflags h us env (cmm : cmms) + = do + (us', env', llvm) <- cmmLlvmGen dflags us (clearVars env) cmm + + Prt.bufLeftRender h $ Prt.vcat $ map (pprLlvmCmmTop dflags) llvm + + cmmProcLlvmGens dflags h us' env' cmms + + +-- | Complete llvm code generation phase for a single top-level chunk of Cmm. +cmmLlvmGen + :: DynFlags + -> UniqSupply + -> LlvmEnv + -> RawCmmTop -- ^ the cmm to generate code for + -> IO ( UniqSupply, + LlvmEnv, + [LlvmCmmTop] ) -- llvm code + +cmmLlvmGen dflags us env cmm + = do + -- rewrite assignments to global regs + let fixed_cmm = fixStgRegisters cmm + + dumpIfSet_dyn dflags Opt_D_dump_opt_cmm "Optimised Cmm" + (pprCmm $ Cmm [fixed_cmm]) + + -- generate llvm code from cmm + let ((env', llvmBC), usGen) = initUs us $ genLlvmCode dflags env fixed_cmm + + dumpIfSet_dyn dflags Opt_D_dump_llvm "LLVM Code" + (vcat $ map (docToSDoc . pprLlvmCmmTop dflags) llvmBC) + + return (usGen, env', llvmBC) + + +-- ----------------------------------------------------------------------------- +-- | Instruction selection +-- +genLlvmCode + :: DynFlags + -> LlvmEnv + -> RawCmmTop + -> UniqSM (LlvmEnv, [LlvmCmmTop]) + +genLlvmCode _ env (CmmData _ _) + = return (env, []) + +genLlvmCode _ env (CmmProc _ _ _ (ListGraph [])) + = return (env, []) + +genLlvmCode _ env cp@(CmmProc _ _ _ _) + = genLlvmProc env cp + diff --git a/compiler/llvmGen/LlvmCodeGen/Base.hs b/compiler/llvmGen/LlvmCodeGen/Base.hs new file mode 100644 index 0000000000..36ffa18d63 --- /dev/null +++ b/compiler/llvmGen/LlvmCodeGen/Base.hs @@ -0,0 +1,164 @@ +-- ---------------------------------------------------------------------------- +-- | Base LLVM Code Generation module +-- +-- Contains functions useful through out the code generator. +-- + +module LlvmCodeGen.Base ( + + LlvmCmmTop, LlvmBasicBlock, + LlvmUnresData, LlvmData, UnresLabel, UnresStatic, + + LlvmEnv, initLlvmEnv, clearVars, varLookup, varInsert, + funLookup, funInsert, + + cmmToLlvmType, widthToLlvmFloat, widthToLlvmInt, llvmFunTy, + llvmFunSig, llvmStdFunAttrs, llvmPtrBits, llvmGhcCC, + + strCLabel_llvm, + genCmmLabelRef, genStringLabelRef + + ) where + +#include "HsVersions.h" + +import Llvm +import LlvmCodeGen.Regs + +import CgUtils ( activeStgRegs ) +import CLabel +import Cmm + +import FastString +import qualified Outputable as Outp +import Unique +import UniqFM + +-- ---------------------------------------------------------------------------- +-- * Some Data Types +-- + +type LlvmCmmTop = GenCmmTop LlvmData [CmmStatic] (ListGraph LlvmStatement) +type LlvmBasicBlock = GenBasicBlock LlvmStatement + +-- | Unresolved code. +-- Of the form: (data label, data type, unresovled data) +type LlvmUnresData = (CLabel, LlvmType, [UnresStatic]) + +-- | Top level LLVM Data (globals and type aliases) +type LlvmData = ([LMGlobal], [LlvmType]) + +-- | An unresolved Label. +-- +-- Labels are unresolved when we haven't yet determined if they are defined in +-- the module we are currently compiling, or an external one. +type UnresLabel = CmmLit +type UnresStatic = Either UnresLabel LlvmStatic + +-- ---------------------------------------------------------------------------- +-- * Type translations +-- + +-- | Translate a basic CmmType to an LlvmType. +cmmToLlvmType :: CmmType -> LlvmType +cmmToLlvmType ty | isFloatType ty = widthToLlvmFloat $ typeWidth ty + | otherwise = widthToLlvmInt $ typeWidth ty + +-- | Translate a Cmm Float Width to a LlvmType. +widthToLlvmFloat :: Width -> LlvmType +widthToLlvmFloat W32 = LMFloat +widthToLlvmFloat W64 = LMDouble +widthToLlvmFloat W80 = LMFloat80 +widthToLlvmFloat W128 = LMFloat128 +widthToLlvmFloat w = panic $ "widthToLlvmFloat: Bad float size: " ++ show w + +-- | Translate a Cmm Bit Width to a LlvmType. +widthToLlvmInt :: Width -> LlvmType +widthToLlvmInt w = LMInt $ widthInBits w + +-- | GHC Call Convention for LLVM +llvmGhcCC :: LlvmCallConvention +llvmGhcCC = CC_Ncc 10 + +-- | Llvm Function type for Cmm function +llvmFunTy :: LlvmType +llvmFunTy + = LMFunction $ + LlvmFunctionDecl (fsLit "a") ExternallyVisible llvmGhcCC LMVoid FixedArgs + (Left $ map getVarType llvmFunArgs) + +-- | Llvm Function signature +llvmFunSig :: CLabel -> LlvmLinkageType -> LlvmFunctionDecl +llvmFunSig lbl link + = let n = strCLabel_llvm lbl + in LlvmFunctionDecl n link llvmGhcCC LMVoid FixedArgs + (Right llvmFunArgs) + +-- | A Function's arguments +llvmFunArgs :: [LlvmVar] +llvmFunArgs = map lmGlobalRegArg activeStgRegs + +-- | Llvm standard fun attributes +llvmStdFunAttrs :: [LlvmFuncAttr] +llvmStdFunAttrs = [NoUnwind] + +-- | Pointer width +llvmPtrBits :: Int +llvmPtrBits = widthInBits $ typeWidth gcWord + + +-- ---------------------------------------------------------------------------- +-- * Environment Handling +-- + +type LlvmEnvMap = UniqFM LlvmType +-- two maps, one for functions and one for local vars. +type LlvmEnv = (LlvmEnvMap, LlvmEnvMap) + +-- | Get initial Llvm environment. +initLlvmEnv :: LlvmEnv +initLlvmEnv = (emptyUFM, emptyUFM) + +-- | Clear variables from the environment. +clearVars :: LlvmEnv -> LlvmEnv +clearVars (e1, _) = (e1, emptyUFM) + +-- | Insert functions into the environment. +varInsert, funInsert :: Uniquable key => key -> LlvmType -> LlvmEnv -> LlvmEnv +varInsert s t (e1, e2) = (e1, addToUFM e2 s t) +funInsert s t (e1, e2) = (addToUFM e1 s t, e2) + +-- | Lookup functions in the environment. +varLookup, funLookup :: Uniquable key => key -> LlvmEnv -> Maybe LlvmType +varLookup s (_, e2) = lookupUFM e2 s +funLookup s (e1, _) = lookupUFM e1 s + + +-- ---------------------------------------------------------------------------- +-- * Label handling +-- + +-- | Pretty print a 'CLabel'. +strCLabel_llvm :: CLabel -> LMString +strCLabel_llvm l = (fsLit . show . llvmSDoc . pprCLabel) l + +-- | Create an external definition for a 'CLabel' defined in another module. +genCmmLabelRef :: CLabel -> LMGlobal +genCmmLabelRef cl = + let mcl = strCLabel_llvm cl + in (LMGlobalVar mcl (LMPointer (LMArray 0 llvmWord)) External, Nothing) + +-- | As above ('genCmmLabelRef') but taking a 'LMString', not 'CLabel'. +genStringLabelRef :: LMString -> LMGlobal +genStringLabelRef cl = + (LMGlobalVar cl (LMPointer (LMArray 0 llvmWord)) External, Nothing) + + +-- ---------------------------------------------------------------------------- +-- * Misc +-- + +-- | Error function +panic :: String -> a +panic s = Outp.panic $ "LlvmCodeGen.Base." ++ s + diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs new file mode 100644 index 0000000000..fb29f7acec --- /dev/null +++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs @@ -0,0 +1,958 @@ +-- ---------------------------------------------------------------------------- +-- | Handle conversion of CmmProc to LLVM code. +-- + +module LlvmCodeGen.CodeGen ( genLlvmProc ) where + +#include "HsVersions.h" + +import Llvm +import LlvmCodeGen.Base +import LlvmCodeGen.Regs + +import BlockId +import CgUtils ( activeStgRegs ) +import CLabel +import Cmm +import qualified PprCmm +import OrdList + +import BasicTypes +import FastString +import ForeignCall +import Outputable hiding ( panic, pprPanic ) +import qualified Outputable +import UniqSupply +import Unique +import Util + +type LlvmStatements = OrdList LlvmStatement + +-- ----------------------------------------------------------------------------- +-- | Top-level of the llvm proc codegen +-- +genLlvmProc :: LlvmEnv -> RawCmmTop -> UniqSM (LlvmEnv, [LlvmCmmTop]) +genLlvmProc env (CmmData _ _) + = return (env, []) + +genLlvmProc env (CmmProc _ _ _ (ListGraph [])) + = return (env, []) + +genLlvmProc env (CmmProc info lbl params (ListGraph blocks)) + = do + (env', lmblocks, lmdata) <- basicBlocksCodeGen env blocks ([], []) + + let proc = CmmProc info lbl params (ListGraph lmblocks) + let tops = lmdata ++ [proc] + + return (env', tops) + + +-- ----------------------------------------------------------------------------- +-- * Block code generation +-- + +-- | Generate code for a list of blocks that make up a complete procedure. +basicBlocksCodeGen :: LlvmEnv + -> [CmmBasicBlock] + -> ( [LlvmBasicBlock] , [LlvmCmmTop] ) + -> UniqSM (LlvmEnv, [LlvmBasicBlock] , [LlvmCmmTop] ) +basicBlocksCodeGen env ([]) (blocks, tops) + = do let (blocks', allocs) = mapAndUnzip dominateAllocs blocks + let allocs' = concat allocs + let ((BasicBlock id fstmts):rblocks) = blocks' + let fblocks = (BasicBlock id (funPrologue ++ allocs' ++ fstmts)):rblocks + return (env, fblocks, tops) + +basicBlocksCodeGen env (block:blocks) (lblocks', ltops') + = do (env', lb, lt) <- basicBlockCodeGen env block + let lblocks = lblocks' ++ lb + let ltops = ltops' ++ lt + basicBlocksCodeGen env' blocks (lblocks, ltops) + + +-- | Generate code for one block +basicBlockCodeGen :: LlvmEnv + -> CmmBasicBlock + -> UniqSM ( LlvmEnv, [LlvmBasicBlock], [LlvmCmmTop] ) +basicBlockCodeGen env (BasicBlock id stmts) + = do (env', instrs, top) <- stmtsToInstrs env stmts (nilOL, []) + return (env', [BasicBlock id (fromOL instrs)], top) + + +-- | Allocations need to be extracted so they can be moved to the entry +-- of a function to make sure they dominate all posible paths in the CFG. +dominateAllocs :: LlvmBasicBlock -> (LlvmBasicBlock, [LlvmStatement]) +dominateAllocs (BasicBlock id stmts) + = (BasicBlock id allstmts, allallocs) + where + (allstmts, allallocs) = foldl split ([],[]) stmts + split (stmts', allocs) s@(Assignment _ (Alloca _ _)) + = (stmts', allocs ++ [s]) + split (stmts', allocs) other + = (stmts' ++ [other], allocs) + + +-- ----------------------------------------------------------------------------- +-- * CmmStmt code generation +-- + +-- A statement conversion return data. +-- * LlvmEnv: The new enviornment +-- * LlvmStatements: The compiled llvm statements. +-- * LlvmCmmTop: Any global data needed. +type StmtData = (LlvmEnv, LlvmStatements, [LlvmCmmTop]) + + +-- | Convert a list of CmmStmt's to LlvmStatement's +stmtsToInstrs :: LlvmEnv -> [CmmStmt] -> (LlvmStatements, [LlvmCmmTop]) + -> UniqSM StmtData +stmtsToInstrs env [] (llvm, top) + = return (env, llvm, top) + +stmtsToInstrs env (stmt : stmts) (llvm, top) + = do (env', instrs, tops) <- stmtToInstrs env stmt + stmtsToInstrs env' stmts (llvm `appOL` instrs, top ++ tops) + + +-- | Convert a CmmStmt to a list of LlvmStatement's +stmtToInstrs :: LlvmEnv -> CmmStmt + -> UniqSM StmtData +stmtToInstrs env stmt = case stmt of + + CmmNop -> return (env, nilOL, []) + CmmComment _ -> return (env, nilOL, []) -- nuke comments +-- CmmComment s -> return (env, unitOL $ Comment (lines $ unpackFS s), +-- []) + + CmmAssign reg src -> genAssign env reg src + CmmStore addr src -> genStore env addr src + + CmmBranch id -> genBranch env id + CmmCondBranch arg id -> genCondBranch env arg id + CmmSwitch arg ids -> genSwitch env arg ids + + -- Foreign Call + CmmCall target res args _ ret + -> genCall env target res args ret + + -- Tail call + CmmJump arg _ -> genJump env arg + + -- CPS, only tail calls, no return's + -- Actually, there are a few return statements that occur because of hand + -- written cmm code. + CmmReturn _ + -> return (env, unitOL $ Return Nothing, []) + + +-- | Foreign Calls +genCall :: LlvmEnv -> CmmCallTarget -> HintedCmmFormals -> HintedCmmActuals + -> CmmReturnInfo -> UniqSM StmtData + +-- Write barrier needs to be handled specially as it is implemented as an llvm +-- intrinsic function. +genCall env (CmmPrim MO_WriteBarrier) _ _ _ = do + let fname = fsLit "llvm.memory.barrier" + let funSig = + LlvmFunctionDecl + fname + ExternallyVisible + CC_Ccc + LMVoid + FixedArgs + (Left [i1, i1, i1, i1, i1]) + let fty = LMFunction funSig + + let fv = LMGlobalVar fname fty (funcLinkage funSig) + let tops = case funLookup fname env of + Just _ -> [] + Nothing -> [CmmData Data [([],[fty])]] + + let args = [lmTrue, lmTrue, lmTrue, lmTrue, lmTrue] + let s1 = Expr $ Call StdCall fv args llvmStdFunAttrs + let env' = funInsert fname fty env + + return (env', unitOL s1, tops) + + where + lmTrue :: LlvmVar + lmTrue = LMLitVar $ LMIntLit (-1) i1 + +-- Handle all other foreign calls and prim ops. +genCall env target res args ret = do + + -- paramater types + let arg_type (CmmHinted _ AddrHint) = pLift i8 + -- cast pointers to i8*. Llvm equivalent of void* + arg_type (CmmHinted expr _ ) = cmmToLlvmType $ cmmExprType expr + + -- ret type + let ret_type ([]) = LMVoid + ret_type ([CmmHinted _ AddrHint]) = pLift i8 + ret_type ([CmmHinted reg _]) = cmmToLlvmType $ localRegType reg + ret_type t = panic $ "genCall: Too many return values! Can only handle" + ++ " 0 or 1, given " ++ show (length t) ++ "." + + -- extract cmm call convention + let cconv = case target of + CmmCallee _ conv -> conv + CmmPrim _ -> PrimCallConv + + -- translate to llvm call convention + let lmconv = case cconv of +#if i386_TARGET_ARCH || x86_64_TARGET_ARCH + StdCallConv -> CC_X86_Stdcc +#else + StdCallConv -> CC_Ccc +#endif + CCallConv -> CC_Ccc + PrimCallConv -> CC_Ccc + CmmCallConv -> panic "CmmCallConv not supported here!" + + {- + Some of the possibilities here are a worry with the use of a custom + calling convention for passing STG args. In practice the more + dangerous combinations (e.g StdCall + llvmGhcCC) don't occur. + + The native code generator only handles StdCall and CCallConv. + -} + + -- call attributes + let fnAttrs | ret == CmmNeverReturns = NoReturn : llvmStdFunAttrs + | otherwise = llvmStdFunAttrs + + -- fun type + let ccTy = StdCall -- tail calls should be done through CmmJump + let retTy = ret_type res + let argTy = Left $ map arg_type args + let funTy name = LMFunction $ + LlvmFunctionDecl name ExternallyVisible lmconv retTy FixedArgs argTy + + -- get paramter values + (env1, argVars, stmts1, top1) <- arg_vars env args ([], nilOL, []) + + -- get the return register + let ret_reg ([CmmHinted reg hint]) = (reg, hint) + ret_reg t = panic $ "genCall: Bad number of registers! Can only handle" + ++ " 1, given " ++ show (length t) ++ "." + + -- deal with call types + let getFunPtr :: CmmCallTarget -> UniqSM ExprData + getFunPtr targ = case targ of + CmmCallee (CmmLit (CmmLabel lbl)) _ -> do + let name = strCLabel_llvm lbl + case funLookup name env1 of + Just ty'@(LMFunction sig) -> do + -- Function in module in right form + let fun = LMGlobalVar name ty' (funcLinkage sig) + return (env1, fun, nilOL, []) + + Just _ -> do + -- label in module but not function pointer, convert + let fty@(LMFunction sig) = funTy name + let fun = LMGlobalVar name fty (funcLinkage sig) + (v1, s1) <- doExpr (pLift fty) + $ Cast LM_Bitcast fun (pLift fty) + return (env1, v1, unitOL s1, []) + + Nothing -> do + -- label not in module, create external reference + let fty@(LMFunction sig) = funTy name + let fun = LMGlobalVar name fty (funcLinkage sig) + let top = CmmData Data [([],[fty])] + let env' = funInsert name fty env1 + return (env', fun, nilOL, [top]) + + CmmCallee expr _ -> do + (env', v1, stmts, top) <- exprToVar env1 expr + let fty = funTy $ fsLit "dynamic" + let cast = case getVarType v1 of + ty | isPointer ty -> LM_Bitcast + ty | isInt ty -> LM_Inttoptr + + ty -> panic $ "genCall: Expr is of bad type for function" + ++ " call! (" ++ show (ty) ++ ")" + + (v2,s1) <- doExpr (pLift fty) $ Cast cast v1 (pLift fty) + return (env', v2, stmts `snocOL` s1, top) + + CmmPrim mop -> do + let name = cmmPrimOpFunctions mop + let lbl = mkForeignLabel name Nothing + ForeignLabelInExternalPackage IsFunction + getFunPtr $ CmmCallee (CmmLit (CmmLabel lbl)) CCallConv + + (env2, fptr, stmts2, top2) <- getFunPtr target + + let retStmt | ccTy == TailCall = unitOL $ Return Nothing + | ret == CmmNeverReturns = unitOL $ Unreachable + | otherwise = nilOL + + -- make the actual call + case retTy of + LMVoid -> do + let s1 = Expr $ Call ccTy fptr argVars fnAttrs + let allStmts = stmts1 `appOL` stmts2 `snocOL` s1 `appOL` retStmt + return (env2, allStmts, top1 ++ top2) + + _ -> do + let (creg, _) = ret_reg res + let (env3, vreg, stmts3, top3) = getCmmReg env2 (CmmLocal creg) + let allStmts = stmts1 `appOL` stmts2 `appOL` stmts3 + (v1, s1) <- doExpr retTy $ Call ccTy fptr argVars fnAttrs + if retTy == pLower (getVarType vreg) + then do + let s2 = Store v1 vreg + return (env3, allStmts `snocOL` s1 `snocOL` s2 + `appOL` retStmt, top1 ++ top2 ++ top3) + else do + let ty = pLower $ getVarType vreg + let op = case ty of + vt | isPointer vt -> LM_Bitcast + | isInt vt -> LM_Ptrtoint + | otherwise -> + panic $ "genCall: CmmReg bad match for" + ++ " returned type!" + + (v2, s2) <- doExpr ty $ Cast op v1 ty + let s3 = Store v2 vreg + return (env3, allStmts `snocOL` s1 `snocOL` s2 `snocOL` s3 + `appOL` retStmt, top1 ++ top2 ++ top3) + + +-- | Conversion of call arguments. +arg_vars :: LlvmEnv + -> HintedCmmActuals + -> ([LlvmVar], LlvmStatements, [LlvmCmmTop]) + -> UniqSM (LlvmEnv, [LlvmVar], LlvmStatements, [LlvmCmmTop]) + +arg_vars env [] (vars, stmts, tops) + = return (env, vars, stmts, tops) + +arg_vars env (CmmHinted e AddrHint:rest) (vars, stmts, tops) + = do (env', v1, stmts', top') <- exprToVar env e + let op = case getVarType v1 of + ty | isPointer ty -> LM_Bitcast + ty | isInt ty -> LM_Inttoptr + + a -> panic $ "genCall: Can't cast llvmType to i8*! (" + ++ show a ++ ")" + + (v2, s1) <- doExpr (pLift i8) $ Cast op v1 (pLift i8) + arg_vars env' rest (vars ++ [v2], stmts `appOL` stmts' `snocOL` s1, tops ++ top') + +arg_vars env (CmmHinted e _:rest) (vars, stmts, tops) + = do (env', v1, stmts', top') <- exprToVar env e + arg_vars env' rest (vars ++ [v1], stmts `appOL` stmts', tops ++ top') + +-- | Decide what C function to use to implement a CallishMachOp +cmmPrimOpFunctions :: CallishMachOp -> FastString +cmmPrimOpFunctions mop + = case mop of + MO_F32_Exp -> fsLit "expf" + MO_F32_Log -> fsLit "logf" + MO_F32_Sqrt -> fsLit "sqrtf" + MO_F32_Pwr -> fsLit "powf" + + MO_F32_Sin -> fsLit "sinf" + MO_F32_Cos -> fsLit "cosf" + MO_F32_Tan -> fsLit "tanf" + + MO_F32_Asin -> fsLit "asinf" + MO_F32_Acos -> fsLit "acosf" + MO_F32_Atan -> fsLit "atanf" + + MO_F32_Sinh -> fsLit "sinhf" + MO_F32_Cosh -> fsLit "coshf" + MO_F32_Tanh -> fsLit "tanhf" + + MO_F64_Exp -> fsLit "exp" + MO_F64_Log -> fsLit "log" + MO_F64_Sqrt -> fsLit "sqrt" + MO_F64_Pwr -> fsLit "pow" + + MO_F64_Sin -> fsLit "sin" + MO_F64_Cos -> fsLit "cos" + MO_F64_Tan -> fsLit "tan" + + MO_F64_Asin -> fsLit "asin" + MO_F64_Acos -> fsLit "acos" + MO_F64_Atan -> fsLit "atan" + + MO_F64_Sinh -> fsLit "sinh" + MO_F64_Cosh -> fsLit "cosh" + MO_F64_Tanh -> fsLit "tanh" + + a -> panic $ "cmmPrimOpFunctions: Unknown callish op! (" ++ show a ++ ")" + + +-- | Tail function calls +genJump :: LlvmEnv -> CmmExpr -> UniqSM StmtData + +-- Call to known function +genJump env (CmmLit (CmmLabel lbl)) = do + (env', vf, stmts, top) <- getHsFunc env lbl + (stgRegs, stgStmts) <- funEpilogue + let s1 = Expr $ Call TailCall vf stgRegs llvmStdFunAttrs + let s2 = Return Nothing + return (env', stmts `appOL` stgStmts `snocOL` s1 `snocOL` s2, top) + + +-- Call to unknown function / address +genJump env expr = do + let fty = llvmFunTy + (env', vf, stmts, top) <- exprToVar env expr + + let cast = case getVarType vf of + ty | isPointer ty -> LM_Bitcast + ty | isInt ty -> LM_Inttoptr + + ty -> panic $ "genJump: Expr is of bad type for function call! (" + ++ show (ty) ++ ")" + + (v1, s1) <- doExpr (pLift fty) $ Cast cast vf (pLift fty) + (stgRegs, stgStmts) <- funEpilogue + let s2 = Expr $ Call TailCall v1 stgRegs llvmStdFunAttrs + let s3 = Return Nothing + return (env', stmts `snocOL` s1 `appOL` stgStmts `snocOL` s2 `snocOL` s3, + top) + + +-- | CmmAssign operation +-- +-- We use stack allocated variables for CmmReg. The optimiser will replace +-- these with registers when possible. +genAssign :: LlvmEnv -> CmmReg -> CmmExpr -> UniqSM StmtData +genAssign env reg val = do + let (env1, vreg, stmts1, top1) = getCmmReg env reg + (env2, vval, stmts2, top2) <- exprToVar env1 val + let s1 = Store vval vreg + return (env2, stmts1 `appOL` stmts2 `snocOL` s1, top1 ++ top2) + + +-- | CmmStore operation +genStore :: LlvmEnv -> CmmExpr -> CmmExpr -> UniqSM StmtData +genStore env addr val = do + (env1, vaddr, stmts1, top1) <- exprToVar env addr + (env2, vval, stmts2, top2) <- exprToVar env1 val + if getVarType vaddr == llvmWord + then do + let vty = pLift $ getVarType vval + (vptr, s1) <- doExpr vty $ Cast LM_Inttoptr vaddr vty + let s2 = Store vval vptr + return (env2, stmts1 `appOL` stmts2 `snocOL` s1 `snocOL` s2, + top1 ++ top2) + + else + panic $ "genStore: ptr not of word size! (" ++ show vaddr ++ ")" + + +-- | Unconditional branch +genBranch :: LlvmEnv -> BlockId -> UniqSM StmtData +genBranch env id = + let label = blockIdToLlvm id + in return (env, unitOL $ Branch label, []) + + +-- | Conditional branch +genCondBranch :: LlvmEnv -> CmmExpr -> BlockId -> UniqSM StmtData +genCondBranch env cond idT = do + idF <- getUniqueUs + let labelT = blockIdToLlvm idT + let labelF = LMLocalVar idF LMLabel + (env', vc, stmts, top) <- exprToVarOpt env i1Option cond + if getVarType vc == i1 + then do + let s1 = BranchIf vc labelT labelF + let s2 = MkLabel idF + return $ (env', stmts `snocOL` s1 `snocOL` s2, top) + else + panic $ "genCondBranch: Cond expr not bool! (" ++ show vc ++ ")" + + +-- | Switch branch +-- +-- N.B. we remove Nothing's from the list of branches, as they are 'undefined'. +-- However, they may be defined one day, so we better document this behaviour. +genSwitch :: LlvmEnv -> CmmExpr -> [Maybe BlockId] -> UniqSM StmtData +genSwitch env cond maybe_ids = do + (env', vc, stmts, top) <- exprToVar env cond + let ty = getVarType vc + + let pairs = [ (ix, id) | (ix,Just id) <- zip ([0..]::[Integer]) maybe_ids ] + let labels = map (\(ix, b) -> (mkIntLit ix ty, blockIdToLlvm b)) pairs + -- out of range is undefied, so lets just branch to first label + let (_, defLbl) = head labels + + let s1 = Switch vc defLbl labels + return $ (env', stmts `snocOL` s1, top) + + +-- ----------------------------------------------------------------------------- +-- * CmmExpr code generation +-- + +-- | An expression conversion return data: +-- * LlvmEnv: The new enviornment +-- * LlvmVar: The var holding the result of the expression +-- * LlvmStatements: Any statements needed to evaluate the expression +-- * LlvmCmmTop: Any global data needed for this expression +type ExprData = (LlvmEnv, LlvmVar, LlvmStatements, [LlvmCmmTop]) + +-- | Values which can be passed to 'exprToVar' to configure its +-- behaviour in certain circumstances. +data EOption = EOption { + -- | The expected LlvmType for the returned variable. + -- + -- Currently just used for determining if a comparison should return + -- a boolean (i1) or a int (i32/i64). + eoExpectedType :: Maybe LlvmType + } + +i1Option :: EOption +i1Option = EOption (Just i1) + +wordOption :: EOption +wordOption = EOption (Just llvmWord) + + +-- | Convert a CmmExpr to a list of LlvmStatements with the result of the +-- expression being stored in the returned LlvmVar. +exprToVar :: LlvmEnv -> CmmExpr -> UniqSM ExprData +exprToVar env = exprToVarOpt env wordOption + +exprToVarOpt :: LlvmEnv -> EOption -> CmmExpr -> UniqSM ExprData +exprToVarOpt env opt e = case e of + + CmmLit lit + -> genLit env lit + + CmmLoad e' ty + -> genCmmLoad env e' ty + + -- Cmmreg in expression is the value, so must load. If you want actual + -- reg pointer, call getCmmReg directly. + CmmReg r -> do + let (env', vreg, stmts, top) = getCmmReg env r + (v1, s1) <- doExpr (pLower $ getVarType vreg) $ Load vreg + return (env', v1, stmts `snocOL` s1 , top) + + CmmMachOp op exprs + -> genMachOp env opt op exprs + + CmmRegOff r i + -> exprToVar env $ expandCmmReg (r, i) + + CmmStackSlot _ _ + -> panic "exprToVar: CmmStackSlot not supported!" + + +-- | Handle CmmMachOp expressions +genMachOp :: LlvmEnv -> EOption -> MachOp -> [CmmExpr] -> UniqSM ExprData + +-- Unary Machop +genMachOp env _ op [x] = case op of + + MO_Not w -> + let all1 = mkIntLit (-1::Int) (widthToLlvmInt w) + in negate (widthToLlvmInt w) all1 LM_MO_Xor + + MO_S_Neg w -> + let all0 = mkIntLit (0::Int) (widthToLlvmInt w) + in negate (widthToLlvmInt w) all0 LM_MO_Sub + + MO_F_Neg w -> + let all0 = LMLitVar $ LMFloatLit 0 (widthToLlvmFloat w) + in negate (widthToLlvmFloat w) all0 LM_MO_Sub + + MO_SF_Conv _ w -> fiConv (widthToLlvmFloat w) LM_Sitofp + MO_FS_Conv _ w -> fiConv (widthToLlvmInt w) LM_Fptosi + + MO_SS_Conv from to + -> sameConv from (widthToLlvmInt to) LM_Trunc LM_Sext + + MO_UU_Conv from to + -> sameConv from (widthToLlvmInt to) LM_Trunc LM_Zext + + MO_FF_Conv from to + -> sameConv from (widthToLlvmFloat to) LM_Fptrunc LM_Fpext + + a -> panic $ "genMachOp: unmatched unary CmmMachOp! (" ++ show a ++ ")" + + where + negate ty v2 negOp = do + (env', vx, stmts, top) <- exprToVar env x + (v1, s1) <- doExpr ty $ LlvmOp negOp v2 vx + return (env', v1, stmts `snocOL` s1, top) + + fiConv ty convOp = do + (env', vx, stmts, top) <- exprToVar env x + (v1, s1) <- doExpr ty $ Cast convOp vx ty + return (env', v1, stmts `snocOL` s1, top) + + sameConv from ty reduce expand = do + x'@(env', vx, stmts, top) <- exprToVar env x + let sameConv' op = do + (v1, s1) <- doExpr ty $ Cast op vx ty + return (env', v1, stmts `snocOL` s1, top) + let toWidth = llvmWidthInBits ty + -- LLVM doesn't like trying to convert to same width, so + -- need to check for that as we do get cmm code doing it. + case widthInBits from of + w | w < toWidth -> sameConv' expand + w | w > toWidth -> sameConv' reduce + _w -> return x' + + +-- Binary MachOp +genMachOp env opt op [x, y] = case op of + + MO_Eq _ -> genBinComp opt LM_CMP_Eq + MO_Ne _ -> genBinComp opt LM_CMP_Ne + + MO_S_Gt _ -> genBinComp opt LM_CMP_Sgt + MO_S_Ge _ -> genBinComp opt LM_CMP_Sge + MO_S_Lt _ -> genBinComp opt LM_CMP_Slt + MO_S_Le _ -> genBinComp opt LM_CMP_Sle + + MO_U_Gt _ -> genBinComp opt LM_CMP_Ugt + MO_U_Ge _ -> genBinComp opt LM_CMP_Uge + MO_U_Lt _ -> genBinComp opt LM_CMP_Ult + MO_U_Le _ -> genBinComp opt LM_CMP_Ule + + MO_Add _ -> genBinMach LM_MO_Add + MO_Sub _ -> genBinMach LM_MO_Sub + MO_Mul _ -> genBinMach LM_MO_Mul + + MO_U_MulMayOflo _ -> panic "genMachOp: MO_U_MulMayOflo unsupported!" + + MO_S_MulMayOflo w -> isSMulOK w x y + + MO_S_Quot _ -> genBinMach LM_MO_SDiv + MO_S_Rem _ -> genBinMach LM_MO_SRem + + MO_U_Quot _ -> genBinMach LM_MO_UDiv + MO_U_Rem _ -> genBinMach LM_MO_URem + + MO_F_Eq _ -> genBinComp opt LM_CMP_Feq + MO_F_Ne _ -> genBinComp opt LM_CMP_Fne + MO_F_Gt _ -> genBinComp opt LM_CMP_Fgt + MO_F_Ge _ -> genBinComp opt LM_CMP_Fge + MO_F_Lt _ -> genBinComp opt LM_CMP_Flt + MO_F_Le _ -> genBinComp opt LM_CMP_Fle + + MO_F_Add _ -> genBinMach LM_MO_Add + MO_F_Sub _ -> genBinMach LM_MO_Sub + MO_F_Mul _ -> genBinMach LM_MO_Mul + MO_F_Quot _ -> genBinMach LM_MO_FDiv + + MO_And _ -> genBinMach LM_MO_And + MO_Or _ -> genBinMach LM_MO_Or + MO_Xor _ -> genBinMach LM_MO_Xor + MO_Shl _ -> genBinMach LM_MO_Shl + MO_U_Shr _ -> genBinMach LM_MO_LShr + MO_S_Shr _ -> genBinMach LM_MO_AShr + + a -> panic $ "genMachOp: unmatched binary CmmMachOp! (" ++ show a ++ ")" + + where + binLlvmOp ty binOp = do + (env1, vx, stmts1, top1) <- exprToVar env x + (env2, vy, stmts2, top2) <- exprToVar env1 y + if getVarType vx == getVarType vy + then do + (v1, s1) <- doExpr (ty vx) $ binOp vx vy + return (env2, v1, stmts1 `appOL` stmts2 `snocOL` s1, + top1 ++ top2) + + else do + -- XXX: Error. Continue anyway so we can debug the generated + -- ll file. + let cmmToStr = (lines . show . llvmSDoc . PprCmm.pprExpr) + let dx = Comment $ map fsLit $ cmmToStr x + let dy = Comment $ map fsLit $ cmmToStr y + (v1, s1) <- doExpr (ty vx) $ binOp vx vy + let allStmts = stmts1 `appOL` stmts2 `snocOL` dx + `snocOL` dy `snocOL` s1 + return (env2, v1, allStmts, top1 ++ top2) + + -- let o = case binOp vx vy of + -- Compare op _ _ -> show op + -- LlvmOp op _ _ -> show op + -- _ -> "unknown" + -- panic $ "genMachOp: comparison between different types (" + -- ++ o ++ " "++ show vx ++ ", " ++ show vy ++ ")" + -- ++ "\ne1: " ++ (show.llvmSDoc.PprCmm.pprExpr $ x) + -- ++ "\ne2: " ++ (show.llvmSDoc.PprCmm.pprExpr $ y) + + -- | Need to use EOption here as Cmm expects word size results from + -- comparisons while llvm return i1. Need to extend to llvmWord type + -- if expected + genBinComp opt cmp = do + ed@(env', v1, stmts, top) <- binLlvmOp (\_ -> i1) $ Compare cmp + + if getVarType v1 == i1 + then + case eoExpectedType opt of + Nothing -> + return ed + + Just t | t == i1 -> + return ed + + | isInt t -> do + (v2, s1) <- doExpr t $ Cast LM_Zext v1 t + return (env', v2, stmts `snocOL` s1, top) + + | otherwise -> + panic $ "genBinComp: Can't case i1 compare" + ++ "res to non int type " ++ show (t) + else + panic $ "genBinComp: Compare returned type other then i1! " + ++ (show $ getVarType v1) + + genBinMach op = binLlvmOp getVarType (LlvmOp op) + + -- | Detect if overflow will occur in signed multiply of the two + -- CmmExpr's. This is the LLVM assembly equivalent of the NCG + -- implementation. Its much longer due to type information/safety. + -- This should actually compile to only about 3 asm instructions. + isSMulOK :: Width -> CmmExpr -> CmmExpr -> UniqSM ExprData + isSMulOK _ x y = do + (env1, vx, stmts1, top1) <- exprToVar env x + (env2, vy, stmts2, top2) <- exprToVar env1 y + + let word = getVarType vx + let word2 = LMInt $ 2 * (llvmWidthInBits $ getVarType vx) + let shift = llvmWidthInBits word + let shift1 = mkIntLit (shift - 1) llvmWord + let shift2 = mkIntLit shift llvmWord + + if isInt word + then do + (x1, s1) <- doExpr word2 $ Cast LM_Sext vx word2 + (y1, s2) <- doExpr word2 $ Cast LM_Sext vy word2 + (r1, s3) <- doExpr word2 $ LlvmOp LM_MO_Mul x1 y1 + (rlow1, s4) <- doExpr word $ Cast LM_Trunc r1 word + (rlow2, s5) <- doExpr word $ LlvmOp LM_MO_AShr rlow1 shift1 + (rhigh1, s6) <- doExpr word2 $ LlvmOp LM_MO_AShr r1 shift2 + (rhigh2, s7) <- doExpr word $ Cast LM_Trunc rhigh1 word + (dst, s8) <- doExpr word $ LlvmOp LM_MO_Sub rlow2 rhigh2 + let stmts = (unitOL s1) `snocOL` s2 `snocOL` s3 `snocOL` s4 + `snocOL` s5 `snocOL` s6 `snocOL` s7 `snocOL` s8 + return (env2, dst, stmts1 `appOL` stmts2 `appOL` stmts, + top1 ++ top2) + + else + panic $ "isSMulOK: Not bit type! (" ++ show word ++ ")" + + +-- More then two expression, invalid! +genMachOp _ _ _ _ = panic "genMachOp: More then 2 expressions in MachOp!" + + +-- | Handle CmmLoad expression +genCmmLoad :: LlvmEnv -> CmmExpr -> CmmType -> UniqSM ExprData +genCmmLoad env e ty = do + (env', iptr, stmts, tops) <- exprToVar env e + let ety = getVarType iptr + case (isInt ety) of + True | llvmPtrBits == llvmWidthInBits ety -> do + let pty = LMPointer $ cmmToLlvmType ty + (ptr, cast) <- doExpr pty $ Cast LM_Inttoptr iptr pty + (dvar, load) <- doExpr (cmmToLlvmType ty) $ Load ptr + return (env', dvar, stmts `snocOL` cast `snocOL` load, tops) + + | otherwise + -> pprPanic + ("exprToVar: can't cast to pointer as int not of " + ++ "pointer size!") + (PprCmm.pprExpr e <+> text ( + "Size of Ptr: " ++ show llvmPtrBits ++ + ", Size of var: " ++ show (llvmWidthInBits ety) ++ + ", Var: " ++ show iptr)) + + False -> panic "exprToVar: CmmLoad expression is not of type int!" + + +-- | Handle CmmReg expression +-- +-- We allocate CmmReg on the stack. This avoids having to map a CmmReg to an +-- equivalent SSA form and avoids having to deal with Phi node insertion. +-- This is also the approach recommended by llvm developers. +getCmmReg :: LlvmEnv -> CmmReg -> ExprData +getCmmReg env r@(CmmLocal (LocalReg un _)) + = let exists = varLookup un env + + (newv, stmts) = allocReg r + nenv = varInsert un (pLower $ getVarType newv) env + in case exists of + Just ety -> (env, (LMLocalVar un $ pLift ety), nilOL, []) + Nothing -> (nenv, newv, stmts, []) + +getCmmReg env (CmmGlobal g) = (env, lmGlobalRegVar g, nilOL, []) + + +-- | Allocate a CmmReg on the stack +allocReg :: CmmReg -> (LlvmVar, LlvmStatements) +allocReg (CmmLocal (LocalReg un ty)) + = let ty' = cmmToLlvmType ty + var = LMLocalVar un (LMPointer ty') + alc = Alloca ty' 1 + in (var, unitOL $ Assignment var alc) + +allocReg _ = panic $ "allocReg: Global reg encountered! Global registers should" + ++ " have been handled elsewhere!" + + +-- | Generate code for a literal +genLit :: LlvmEnv -> CmmLit -> UniqSM ExprData +genLit env (CmmInt i w) + = return (env, mkIntLit i (LMInt $ widthInBits w), nilOL, []) + +genLit env (CmmFloat r w) + = return (env, LMLitVar $ LMFloatLit r (widthToLlvmFloat w), nilOL, []) + +genLit env cmm@(CmmLabel l) + = let label = strCLabel_llvm l + ty = funLookup label env + lmty = cmmToLlvmType $ cmmLitType cmm + in case ty of + -- Make generic external label defenition and then pointer to it + Nothing -> do + let glob@(var, _) = genStringLabelRef label + let ldata = [CmmData Data [([glob], [])]] + let env' = funInsert label (pLower $ getVarType var) env + (v1, s1) <- doExpr lmty $ Cast LM_Ptrtoint var llvmWord + return (env', v1, unitOL s1, ldata) + -- Referenced data exists in this module, retrieve type and make + -- pointer to it. + Just ty' -> do + let var = LMGlobalVar label (LMPointer ty') ExternallyVisible + (v1, s1) <- doExpr lmty $ Cast LM_Ptrtoint var llvmWord + return (env, v1, unitOL s1, []) + +genLit env (CmmLabelOff label off) = do + (env', vlbl, stmts, stat) <- genLit env (CmmLabel label) + let voff = mkIntLit off llvmWord + (v1, s1) <- doExpr (getVarType vlbl) $ LlvmOp LM_MO_Add vlbl voff + return (env', v1, stmts `snocOL` s1, stat) + +genLit env (CmmLabelDiffOff l1 l2 off) = do + (env1, vl1, stmts1, stat1) <- genLit env (CmmLabel l1) + (env2, vl2, stmts2, stat2) <- genLit env1 (CmmLabel l2) + let voff = mkIntLit off llvmWord + let ty1 = getVarType vl1 + let ty2 = getVarType vl2 + if (isInt ty1) && (isInt ty2) + && (llvmWidthInBits ty1 == llvmWidthInBits ty2) + + then do + (v1, s1) <- doExpr (getVarType vl1) $ LlvmOp LM_MO_Sub vl1 vl2 + (v2, s2) <- doExpr (getVarType v1 ) $ LlvmOp LM_MO_Add v1 voff + return (env2, v2, stmts1 `appOL` stmts2 `snocOL` s1 `snocOL` s2, + stat1 ++ stat2) + + else + panic "genLit: CmmLabelDiffOff encountered with different label ty!" + +genLit env (CmmBlock b) + = genLit env (CmmLabel $ infoTblLbl b) + +genLit _ CmmHighStackMark + = panic "genStaticLit - CmmHighStackMark unsupported!" + + +-- ----------------------------------------------------------------------------- +-- * Misc +-- + +-- | Function prologue. Load STG arguments into variables for function. +funPrologue :: [LlvmStatement] +funPrologue = concat $ map getReg activeStgRegs + where getReg rr = + let reg = lmGlobalRegVar rr + arg = lmGlobalRegArg rr + alloc = Assignment reg $ Alloca (pLower $ getVarType reg) 1 + store = Store arg reg + in [alloc, store] + + +-- | Function epilogue. Load STG variables to use as argument for call. +funEpilogue :: UniqSM ([LlvmVar], LlvmStatements) +funEpilogue = do + let loadExpr r = do + (v,s) <- doExpr (pLower $ getVarType r) $ Load r + return (v, unitOL s) + loads <- mapM (loadExpr . lmGlobalRegVar) activeStgRegs + let (vars, stmts) = unzip loads + return (vars, concatOL stmts) + + +-- | Get a function pointer to the CLabel specified. +-- +-- This is for Haskell functions, function type is assumed, so doesn't work +-- with foreign functions. +getHsFunc :: LlvmEnv -> CLabel -> UniqSM ExprData +getHsFunc env lbl + = let fname = strCLabel_llvm lbl + ty = funLookup fname env + in case ty of + Just ty'@(LMFunction sig) -> do + -- Function in module in right form + let fun = LMGlobalVar fname ty' (funcLinkage sig) + return (env, fun, nilOL, []) + Just ty' -> do + -- label in module but not function pointer, convert + let fun = LMGlobalVar fname (pLift ty') ExternallyVisible + (v1, s1) <- doExpr (pLift llvmFunTy) $ Cast LM_Bitcast fun (pLift llvmFunTy) + return (env, v1, unitOL s1, []) + Nothing -> do + -- label not in module, create external reference + let ty' = LMFunction $ llvmFunSig lbl ExternallyVisible + let fun = LMGlobalVar fname ty' ExternallyVisible + let top = CmmData Data [([],[ty'])] + let env' = funInsert fname ty' env + return (env', fun, nilOL, [top]) + + +-- | Create a new local var +mkLocalVar :: LlvmType -> UniqSM LlvmVar +mkLocalVar ty = do + un <- getUniqueUs + return $ LMLocalVar un ty + + +-- | Execute an expression, assigning result to a var +doExpr :: LlvmType -> LlvmExpression -> UniqSM (LlvmVar, LlvmStatement) +doExpr ty expr = do + v <- mkLocalVar ty + return (v, Assignment v expr) + + +-- | Expand CmmRegOff +expandCmmReg :: (CmmReg, Int) -> CmmExpr +expandCmmReg (reg, off) + = let width = typeWidth (cmmRegType reg) + voff = CmmLit $ CmmInt (fromIntegral off) width + in CmmMachOp (MO_Add width) [CmmReg reg, voff] + + +-- | Convert a block id into a appropriate Llvm label +blockIdToLlvm :: BlockId -> LlvmVar +blockIdToLlvm bid = LMLocalVar (getUnique bid) LMLabel + + +-- | Create Llvm int Literal +mkIntLit :: Integral a => a -> LlvmType -> LlvmVar +mkIntLit i ty = LMLitVar $ LMIntLit (toInteger i) ty + + +-- | Error functions +panic :: String -> a +panic s = Outputable.panic $ "LlvmCodeGen.CodeGen." ++ s + +pprPanic :: String -> SDoc -> a +pprPanic s d = Outputable.pprPanic ("LlvmCodeGen.CodeGen." ++ s) d + diff --git a/compiler/llvmGen/LlvmCodeGen/Data.hs b/compiler/llvmGen/LlvmCodeGen/Data.hs new file mode 100644 index 0000000000..a5b82aadf2 --- /dev/null +++ b/compiler/llvmGen/LlvmCodeGen/Data.hs @@ -0,0 +1,190 @@ +-- ---------------------------------------------------------------------------- +-- | Handle conversion of CmmData to LLVM code. +-- + +module LlvmCodeGen.Data ( + genLlvmData, resolveLlvmDatas, resolveLlvmData + ) where + +#include "HsVersions.h" + +import Llvm +import LlvmCodeGen.Base + +import BlockId +import CLabel +import Cmm + +import DynFlags +import FastString +import qualified Outputable + +import Data.Maybe + + +-- ---------------------------------------------------------------------------- +-- * Constants +-- + +-- | The string appended to a variable name to create its structure type alias +structStr :: LMString +structStr = fsLit "_struct" + +-- ---------------------------------------------------------------------------- +-- * Top level +-- + +-- | Pass a CmmStatic section to an equivalent Llvm code. Can't +-- complete this completely though as we need to pass all CmmStatic +-- sections before all references can be resolved. This last step is +-- done by 'resolveLlvmData'. +genLlvmData :: DynFlags -> (Section, [CmmStatic]) -> LlvmUnresData +genLlvmData _ ( _ , (CmmDataLabel lbl):xs) = + let static = map genData xs + label = strCLabel_llvm lbl + + types = map getStatTypes static + getStatTypes (Left x) = cmmToLlvmType $ cmmLitType x + getStatTypes (Right x) = getStatType x + + strucTy = LMStruct types + alias = LMAlias (label `appendFS` structStr) strucTy + in (lbl, alias, static) + +genLlvmData _ _ = panic "genLlvmData: CmmData section doesn't start with label!" + +resolveLlvmDatas :: DynFlags -> LlvmEnv -> [LlvmUnresData] -> [LlvmData] + -> (LlvmEnv, [LlvmData]) +resolveLlvmDatas _ env [] ldata + = (env, ldata) + +resolveLlvmDatas dflags env (udata : rest) ldata + = let (env', ndata) = resolveLlvmData dflags env udata + in resolveLlvmDatas dflags env' rest (ldata ++ [ndata]) + +-- | Fix up CLabel references now that we should have passed all CmmData. +resolveLlvmData :: DynFlags -> LlvmEnv -> LlvmUnresData -> (LlvmEnv, LlvmData) +resolveLlvmData _ env (lbl, alias, unres) = + let (env', static, refs) = resDatas env unres ([], []) + refs' = catMaybes refs + struct = Just $ LMStaticStruc static alias + label = strCLabel_llvm lbl + link = if (externallyVisibleCLabel lbl) + then ExternallyVisible else Internal + glob = LMGlobalVar label alias link + in (env', (refs' ++ [(glob, struct)], [alias])) + + +-- ---------------------------------------------------------------------------- +-- ** Resolve Data/CLabel references +-- + +-- | Resolve data list +resDatas :: LlvmEnv -> [UnresStatic] -> ([LlvmStatic], [Maybe LMGlobal]) + -> (LlvmEnv, [LlvmStatic], [Maybe LMGlobal]) + +resDatas env [] (stat, glob) + = (env, stat, glob) + +resDatas env (cmm : rest) (stats, globs) + = let (env', nstat, nglob) = resData env cmm + in resDatas env' rest (stats ++ [nstat], globs ++ nglob) + +-- | Resolve an individual static label if it needs to be. +-- +-- We check the 'LlvmEnv' to see if the reference has been defined in this +-- module. If it has we can retrieve its type and make a pointer, otherwise +-- we introduce a generic external defenition for the referenced label and +-- then make a pointer. +resData :: LlvmEnv -> UnresStatic -> (LlvmEnv, LlvmStatic, [Maybe LMGlobal]) + +resData env (Right stat) = (env, stat, [Nothing]) + +resData env (Left cmm@(CmmLabel l)) = + let label = strCLabel_llvm l + ty = funLookup label env + lmty = cmmToLlvmType $ cmmLitType cmm + in case ty of + -- Make generic external label defenition and then pointer to it + Nothing -> + let glob@(var, _) = genStringLabelRef label + env' = funInsert label (pLower $ getVarType var) env + ptr = LMStaticPointer var + in (env', LMPtoI ptr lmty, [Just glob]) + -- Referenced data exists in this module, retrieve type and make + -- pointer to it. + Just ty' -> + let var = LMGlobalVar label (LMPointer ty') ExternallyVisible + ptr = LMStaticPointer var + in (env, LMPtoI ptr lmty, [Nothing]) + +resData env (Left (CmmLabelOff label off)) = + let (env', var, glob) = resData env (Left (CmmLabel label)) + offset = LMStaticLit $ LMIntLit (toInteger off) llvmWord + in (env', LMAdd var offset, glob) + +resData env (Left (CmmLabelDiffOff l1 l2 off)) = + let (env1, var1, glob1) = resData env (Left (CmmLabel l1)) + (env2, var2, glob2) = resData env1 (Left (CmmLabel l2)) + var = LMSub var1 var2 + offset = LMStaticLit $ LMIntLit (toInteger off) llvmWord + in (env2, LMAdd var offset, glob1 ++ glob2) + +resData _ _ = panic "resData: Non CLabel expr as left type!" + +-- ---------------------------------------------------------------------------- +-- * Generate static data +-- + +-- | Handle static data +-- Don't handle 'CmmAlign' or a 'CmmDataLabel'. +genData :: CmmStatic -> UnresStatic + +genData (CmmString str) = + let v = map (\x -> LMStaticLit $ LMIntLit (fromIntegral x) i8) str + ve = v ++ [LMStaticLit $ LMIntLit 0 i8] + in Right $ LMStaticArray ve (LMArray (length ve) i8) + +genData (CmmUninitialised bytes) + = Right $ LMUninitType (LMArray bytes i8) + +genData (CmmStaticLit lit) + = genStaticLit lit + +genData (CmmAlign _) + = panic "genData: Can't handle CmmAlign!" + +genData (CmmDataLabel _) + = panic "genData: Can't handle data labels not at top of data!" + + +-- | Generate Llvm code for a static literal. +-- +-- Will either generate the code or leave it unresolved if it is a 'CLabel' +-- which isn't yet known. +genStaticLit :: CmmLit -> UnresStatic +genStaticLit (CmmInt i w) + = Right $ LMStaticLit (LMIntLit i (LMInt $ widthInBits w)) + +genStaticLit (CmmFloat r w) + = Right $ LMStaticLit (LMFloatLit r (widthToLlvmFloat w)) + +-- Leave unresolved, will fix later +genStaticLit c@(CmmLabel _ ) = Left $ c +genStaticLit c@(CmmLabelOff _ _) = Left $ c +genStaticLit c@(CmmLabelDiffOff _ _ _) = Left $ c + +genStaticLit (CmmBlock b) = Left $ CmmLabel $ infoTblLbl b + +genStaticLit (CmmHighStackMark) + = panic "genStaticLit: CmmHighStackMark unsupported!" + + +-- ----------------------------------------------------------------------------- +-- * Misc +-- + +-- | Error Function +panic :: String -> a +panic s = Outputable.panic $ "LlvmCodeGen.Data." ++ s + diff --git a/compiler/llvmGen/LlvmCodeGen/Ppr.hs b/compiler/llvmGen/LlvmCodeGen/Ppr.hs new file mode 100644 index 0000000000..bccc336093 --- /dev/null +++ b/compiler/llvmGen/LlvmCodeGen/Ppr.hs @@ -0,0 +1,91 @@ +-- ---------------------------------------------------------------------------- +-- | Pretty print helpers for the LLVM Code generator. +-- + +module LlvmCodeGen.Ppr ( + pprLlvmHeader, pprLlvmCmmTop, pprLlvmData + ) where + +#include "HsVersions.h" + +import Llvm +import LlvmCodeGen.Base +import LlvmCodeGen.Data + +import CLabel +import Cmm + +import DynFlags +import Pretty +import Unique + +-- ---------------------------------------------------------------------------- +-- * Top level +-- + +-- | LLVM module layout description for the host target +moduleLayout :: Doc +moduleLayout = +#ifdef i386_TARGET_ARCH + +#ifdef darwin_TARGET_OS + (text "target datalayout = \"e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128\"") + $+$ (text "target triple = \"i386-apple-darwin9.8\"") +#else + (text "target datalayout = \"e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32\"") + $+$ (text "target triple = \"i386-linux-gnu\"") +#endif + +#else + +#ifdef x86_64_TARGET_ARCH + (text "target datalayout = \"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128\"") + $+$ (text "target triple = \"x86_64-linux-gnu\"") + +#else /* Not i386 */ + -- FIX: Other targets + empty +#endif + +#endif + +-- | Header code for LLVM modules +pprLlvmHeader :: Doc +pprLlvmHeader = moduleLayout + +-- | Pretty print LLVM code +pprLlvmCmmTop :: DynFlags -> LlvmCmmTop -> Doc +pprLlvmCmmTop dflags (CmmData _ lmdata) + = vcat $ map (pprLlvmData dflags) lmdata + +pprLlvmCmmTop dflags (CmmProc info lbl _ (ListGraph blocks)) + = ( + let static = CmmDataLabel (entryLblToInfoLbl lbl) : info + in if not (null info) + then pprCmmStatic dflags static + else empty + ) $+$ ( + let link = if (externallyVisibleCLabel lbl) + then ExternallyVisible else Internal + funDec = llvmFunSig lbl link + lmblocks = map (\(BasicBlock id stmts) -> LlvmBlock (getUnique id) stmts) blocks + fun = LlvmFunction funDec [NoUnwind] lmblocks + in ppLlvmFunction fun + ) + + +-- | Pretty print LLVM data code +pprLlvmData :: DynFlags -> LlvmData -> Doc +pprLlvmData _ (globals, types ) = + let globals' = ppLlvmGlobals globals + types' = ppLlvmTypes types + in types' $+$ globals' + + +-- | Pretty print CmmStatic +pprCmmStatic :: DynFlags -> [CmmStatic] -> Doc +pprCmmStatic dflags stat + = let unres = genLlvmData dflags (Data,stat) + (_, ldata) = resolveLlvmData dflags initLlvmEnv unres + in pprLlvmData dflags ldata + diff --git a/compiler/llvmGen/LlvmCodeGen/Regs.hs b/compiler/llvmGen/LlvmCodeGen/Regs.hs new file mode 100644 index 0000000000..b731a863e2 --- /dev/null +++ b/compiler/llvmGen/LlvmCodeGen/Regs.hs @@ -0,0 +1,54 @@ +-- ---------------------------------------------------------------------------- +-- | Deal with Cmm registers +-- + +module LlvmCodeGen.Regs ( + lmGlobalRegArg, lmGlobalRegVar + ) where + +#include "HsVersions.h" + +import Llvm + +import CmmExpr +import Outputable ( panic ) +import FastString + +-- | Get the LlvmVar function variable storing the real register +lmGlobalRegVar :: GlobalReg -> LlvmVar +lmGlobalRegVar = lmGlobalReg "_Var" + +-- | Get the LlvmVar function argument storing the real register +lmGlobalRegArg :: GlobalReg -> LlvmVar +lmGlobalRegArg = (pVarLower . lmGlobalReg "_Arg") + +{- Need to make sure the names here can't conflict with the unique generated + names. Uniques generated names containing only base62 chars. So using say + the '_' char guarantees this. +-} +lmGlobalReg :: String -> GlobalReg -> LlvmVar +lmGlobalReg suf reg + = case reg of + BaseReg -> wordGlobal $ "Base" ++ suf + Sp -> wordGlobal $ "Sp" ++ suf + Hp -> wordGlobal $ "Hp" ++ suf + VanillaReg 1 _ -> wordGlobal $ "R1" ++ suf + VanillaReg 2 _ -> wordGlobal $ "R2" ++ suf + VanillaReg 3 _ -> wordGlobal $ "R3" ++ suf + VanillaReg 4 _ -> wordGlobal $ "R4" ++ suf + VanillaReg 5 _ -> wordGlobal $ "R5" ++ suf + VanillaReg 6 _ -> wordGlobal $ "R6" ++ suf + SpLim -> wordGlobal $ "SpLim" ++ suf + FloatReg 1 -> floatGlobal $"F1" ++ suf + FloatReg 2 -> floatGlobal $"F2" ++ suf + FloatReg 3 -> floatGlobal $"F3" ++ suf + FloatReg 4 -> floatGlobal $"F4" ++ suf + DoubleReg 1 -> doubleGlobal $ "D1" ++ suf + DoubleReg 2 -> doubleGlobal $ "D2" ++ suf + _other -> panic $ "LlvmCodeGen.Reg: GlobalReg (" ++ (show reg) + ++ ") not supported!" + where + wordGlobal name = LMNLocalVar (fsLit name) llvmWordPtr + floatGlobal name = LMNLocalVar (fsLit name) $ pLift LMFloat + doubleGlobal name = LMNLocalVar (fsLit name) $ pLift LMDouble + diff --git a/compiler/main/CodeOutput.lhs b/compiler/main/CodeOutput.lhs index 83f23cfbc3..40f4f11a81 100644 --- a/compiler/main/CodeOutput.lhs +++ b/compiler/main/CodeOutput.lhs @@ -9,10 +9,14 @@ module CodeOutput( codeOutput, outputForeignStubs ) where #include "HsVersions.h" #ifndef OMIT_NATIVE_CODEGEN -import UniqSupply ( mkSplitUniqSupply ) import AsmCodeGen ( nativeCodeGen ) #endif +import UniqSupply ( mkSplitUniqSupply ) +#ifndef GHCI_TABLES_NEXT_TO_CODE +import qualified LlvmCodeGen ( llvmCodeGen ) +#endif + #ifdef JAVA import JavaGen ( javaGen ) import qualified PrintJava @@ -81,6 +85,7 @@ codeOutput dflags this_mod location foreign_stubs pkg_deps flat_abstractC HscInterpreted -> return (); HscAsm -> outputAsm dflags filenm flat_abstractC; HscC -> outputC dflags filenm flat_abstractC pkg_deps; + HscLlvm -> outputLlvm dflags filenm flat_abstractC; HscJava -> #ifdef JAVA outputJava dflags filenm mod_name tycons core_binds; @@ -168,6 +173,30 @@ outputAsm _ _ _ %************************************************************************ %* * +\subsection{LLVM} +%* * +%************************************************************************ + +\begin{code} +outputLlvm :: DynFlags -> FilePath -> [RawCmm] -> IO () + +#ifndef GHCI_TABLES_NEXT_TO_CODE +outputLlvm dflags filenm flat_absC + = do ncg_uniqs <- mkSplitUniqSupply 'n' + doOutput filenm $ \f -> + LlvmCodeGen.llvmCodeGen dflags f ncg_uniqs flat_absC +#else +outputLlvm _ _ _ + = pprPanic "This compiler was built with the LLVM backend disabled" + (text ("This is because the TABLES_NEXT_TO_CODE optimisation is" + ++ " enabled, which the LLVM backend doesn't support right now.") + $+$ text "Use -fasm instead") +#endif +\end{code} + + +%************************************************************************ +%* * \subsection{Java} %* * %************************************************************************ diff --git a/compiler/main/DriverPhases.hs b/compiler/main/DriverPhases.hs index eb64134626..398da79b79 100644 --- a/compiler/main/DriverPhases.hs +++ b/compiler/main/DriverPhases.hs @@ -80,6 +80,9 @@ data Phase | SplitMangle -- after mangler if splitting | SplitAs | As + | LlvmAs -- LLVM assembly to bitcode file + | LlvmOpt -- Run LLVM opt tool over llvm assembly + | LlvmLlc -- LLVM bitcode to native assembly | CmmCpp -- pre-process Cmm source | Cmm -- parse & compile Cmm code @@ -109,6 +112,9 @@ eqPhase Mangle Mangle = True eqPhase SplitMangle SplitMangle = True eqPhase SplitAs SplitAs = True eqPhase As As = True +eqPhase LlvmAs LlvmAs = True +eqPhase LlvmOpt LlvmOpt = True +eqPhase LlvmLlc LlvmLlc = True eqPhase CmmCpp CmmCpp = True eqPhase Cmm Cmm = True eqPhase StopLn StopLn = True @@ -133,6 +139,9 @@ nextPhase HCc = Mangle nextPhase Mangle = SplitMangle nextPhase SplitMangle = As nextPhase As = SplitAs +nextPhase LlvmAs = LlvmOpt +nextPhase LlvmOpt = LlvmLlc +nextPhase LlvmLlc = As nextPhase SplitAs = StopLn nextPhase Ccpp = As nextPhase Cc = As @@ -160,6 +169,9 @@ startPhase "raw_s" = Mangle startPhase "split_s" = SplitMangle startPhase "s" = As startPhase "S" = As +startPhase "ll" = LlvmAs +startPhase "bc" = LlvmOpt +startPhase "opt_bc" = LlvmLlc startPhase "o" = StopLn startPhase "cmm" = CmmCpp startPhase "cmmcpp" = Cmm @@ -184,6 +196,9 @@ phaseInputExt Cc = "c" phaseInputExt Mangle = "raw_s" phaseInputExt SplitMangle = "split_s" -- not really generated phaseInputExt As = "s" +phaseInputExt LlvmAs = "ll" +phaseInputExt LlvmOpt = "bc" +phaseInputExt LlvmLlc = "opt_bc" phaseInputExt SplitAs = "split_s" -- not really generated phaseInputExt CmmCpp = "cmm" phaseInputExt Cmm = "cmmcpp" @@ -195,7 +210,7 @@ haskellish_src_suffixes, haskellish_suffixes, cish_suffixes, haskellish_src_suffixes = haskellish_user_src_suffixes ++ [ "hspp", "hscpp", "hcr", "cmm" ] haskellish_suffixes = haskellish_src_suffixes ++ ["hc", "raw_s"] -cish_suffixes = [ "c", "cpp", "C", "cc", "cxx", "s", "S" ] +cish_suffixes = [ "c", "cpp", "C", "cc", "cxx", "s", "S", "ll", "bc", "opt_bc" ] extcoreish_suffixes = [ "hcr" ] -- Will not be deleted as temp files: haskellish_user_src_suffixes = [ "hs", "lhs", "hs-boot", "lhs-boot" ] diff --git a/compiler/main/DriverPipeline.hs b/compiler/main/DriverPipeline.hs index c6d3d0a47c..7274f2a9b2 100644 --- a/compiler/main/DriverPipeline.hs +++ b/compiler/main/DriverPipeline.hs @@ -605,6 +605,7 @@ getOutputFilename stop_phase output basename keep_hc = dopt Opt_KeepHcFiles dflags keep_raw_s = dopt Opt_KeepRawSFiles dflags keep_s = dopt Opt_KeepSFiles dflags + keep_bc = dopt Opt_KeepLlvmFiles dflags myPhaseInputExt HCc = hcsuf myPhaseInputExt StopLn = osuf @@ -615,11 +616,13 @@ getOutputFilename stop_phase output basename -- sometimes, we keep output from intermediate stages keep_this_output = case next_phase of - StopLn -> True - Mangle | keep_raw_s -> True - As | keep_s -> True - HCc | keep_hc -> True - _other -> False + StopLn -> True + Mangle | keep_raw_s -> True + As | keep_s -> True + LlvmAs | keep_bc -> True + LlvmOpt | keep_bc -> True + HCc | keep_hc -> True + _other -> False suffix = myPhaseInputExt next_phase @@ -1232,6 +1235,77 @@ runPhase SplitAs _stop hsc_env _basename _suff _input_fn get_output_fn maybe_loc return (StopLn, dflags, maybe_loc, output_fn) + +----------------------------------------------------------------------------- +-- LlvmAs phase + +runPhase LlvmAs _stop hsc_env _basename _suff input_fn get_output_fn maybe_loc + = liftIO $ do + let dflags = hsc_dflags hsc_env + let la_opts = getOpts dflags opt_la + + output_fn <- get_output_fn dflags LlvmOpt maybe_loc + + SysTools.runLlvmAs dflags + (map SysTools.Option la_opts + ++ [ SysTools.FileOption "" input_fn, + SysTools.Option "-o", SysTools.FileOption "" output_fn]) + + return (LlvmOpt, dflags, maybe_loc, output_fn) + + +----------------------------------------------------------------------------- +-- LlvmOpt phase + +runPhase LlvmOpt _stop hsc_env _basename _suff input_fn get_output_fn maybe_loc + = liftIO $ do + let dflags = hsc_dflags hsc_env + let lo_opts = getOpts dflags opt_lo + let opt_lvl = max 0 (min 2 $ optLevel dflags) + + -- only run if > 0 OR opt options given by user + if opt_lvl /= 0 || lo_opts /= [] + then do + output_fn <- get_output_fn dflags LlvmLlc maybe_loc + + SysTools.runLlvmOpt dflags + (map SysTools.Option lo_opts + ++ [ SysTools.FileOption "" input_fn, + SysTools.Option (llvmOpts !! opt_lvl), + SysTools.Option "-o", + SysTools.FileOption "" output_fn]) + + return (LlvmLlc, dflags, maybe_loc, output_fn) + + else + return (LlvmLlc, dflags, maybe_loc, input_fn) + where + llvmOpts = ["-O1", "-O2", "-O3"] + + +----------------------------------------------------------------------------- +-- LlvmLlc phase + +runPhase LlvmLlc _stop hsc_env _basename _suff input_fn get_output_fn maybe_loc + = liftIO $ do + let dflags = hsc_dflags hsc_env + let lc_opts = getOpts dflags opt_lc + let opt_lvl = max 0 (min 2 $ optLevel dflags) + + output_fn <- get_output_fn dflags As maybe_loc + + SysTools.runLlvmLlc dflags + (map SysTools.Option lc_opts + ++ [ -- SysTools.Option "-tailcallopt", + SysTools.Option (llvmOpts !! opt_lvl), + SysTools.FileOption "" input_fn, + SysTools.Option "-o", SysTools.FileOption "" output_fn]) + + return (As, dflags, maybe_loc, output_fn) + where + llvmOpts = ["", "-O2", "-O3"] + + -- warning suppression runPhase other _stop _dflags _basename _suff _input_fn _get_output_fn _maybe_loc = panic ("runPhase: don't know how to run phase " ++ show other) @@ -1832,6 +1906,7 @@ hscNextPhase dflags _ hsc_lang = HscC -> HCc HscAsm | dopt Opt_SplitObjs dflags -> SplitMangle | otherwise -> As + HscLlvm -> LlvmAs HscNothing -> StopLn HscInterpreted -> StopLn _other -> StopLn diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs index 1f83d29c34..70b135592e 100644 --- a/compiler/main/DynFlags.hs +++ b/compiler/main/DynFlags.hs @@ -107,6 +107,8 @@ data DynFlag | Opt_D_dump_asm_conflicts | Opt_D_dump_asm_stats | Opt_D_dump_asm_expanded + | Opt_D_dump_llvm + | Opt_D_dump_llvm_opt | Opt_D_dump_cpranal | Opt_D_dump_deriv | Opt_D_dump_ds @@ -332,6 +334,7 @@ data DynFlag | Opt_KeepRawSFiles | Opt_KeepTmpFiles | Opt_KeepRawTokenStream + | Opt_KeepLlvmFiles deriving (Eq, Show) @@ -420,6 +423,9 @@ data DynFlags = DynFlags { opt_a :: [String], opt_l :: [String], opt_windres :: [String], + opt_la :: [String], -- LLVM: llvm-as assembler + opt_lo :: [String], -- LLVM: llvm optimiser + opt_lc :: [String], -- LLVM: llc static compiler -- commands for particular phases pgm_L :: String, @@ -434,6 +440,9 @@ data DynFlags = DynFlags { pgm_T :: String, pgm_sysman :: String, pgm_windres :: String, + pgm_la :: (String,[Option]), -- LLVM: llvm-as assembler + pgm_lo :: (String,[Option]), -- LLVM: opt llvm optimiser + pgm_lc :: (String,[Option]), -- LLVM: llc static compiler -- For ghc -M depMakefile :: FilePath, @@ -498,6 +507,7 @@ wayNames = map wayName . ways data HscTarget = HscC -- ^ Generate C code. | HscAsm -- ^ Generate assembly using the native code generator. + | HscLlvm -- ^ Generate assembly using the llvm code generator. | HscJava -- ^ Generate Java bytecode. | HscInterpreted -- ^ Generate bytecode. (Requires 'LinkInMemory') | HscNothing -- ^ Don't generate any code. See notes above. @@ -507,6 +517,7 @@ data HscTarget isObjectTarget :: HscTarget -> Bool isObjectTarget HscC = True isObjectTarget HscAsm = True +isObjectTarget HscLlvm = True isObjectTarget _ = False -- | The 'GhcMode' tells us whether we're doing multi-module @@ -656,6 +667,9 @@ defaultDynFlags = opt_m = [], opt_l = [], opt_windres = [], + opt_la = [], + opt_lo = [], + opt_lc = [], extraPkgConfs = [], packageFlags = [], @@ -682,6 +696,9 @@ defaultDynFlags = pgm_T = panic "defaultDynFlags: No pgm_T", pgm_sysman = panic "defaultDynFlags: No pgm_sysman", pgm_windres = panic "defaultDynFlags: No pgm_windres", + pgm_la = panic "defaultDynFlags: No pgm_la", + pgm_lo = panic "defaultDynFlags: No pgm_lo", + pgm_lc = panic "defaultDynFlags: No pgm_lc", -- end of initSysTools values -- ghc -M values depMakefile = "Makefile", @@ -770,8 +787,9 @@ getVerbFlag dflags setObjectDir, setHiDir, setStubDir, setOutputDir, setDylibInstallName, setObjectSuf, setHiSuf, setHcSuf, parseDynLibLoaderMode, setPgmP, setPgmL, setPgmF, setPgmc, setPgmm, setPgms, setPgma, setPgml, setPgmdll, setPgmwindres, - addOptL, addOptP, addOptF, addOptc, addOptm, addOpta, addOptl, addOptwindres, - addCmdlineFramework, addHaddockOpts + setPgmla, setPgmlo, setPgmlc, + addOptL, addOptP, addOptF, addOptc, addOptm, addOpta, addOptl, addOptwindres, addOptla, addOptlo, + addOptlc, addCmdlineFramework, addHaddockOpts :: String -> DynFlags -> DynFlags setOutputFile, setOutputHi, setDumpPrefixForce :: Maybe String -> DynFlags -> DynFlags @@ -815,6 +833,9 @@ setPgma f d = d{ pgm_a = (f,[])} setPgml f d = d{ pgm_l = (f,[])} setPgmdll f d = d{ pgm_dll = (f,[])} setPgmwindres f d = d{ pgm_windres = f} +setPgmla f d = d{ pgm_la = (f,[])} +setPgmlo f d = d{ pgm_lo = (f,[])} +setPgmlc f d = d{ pgm_lc = (f,[])} addOptL f d = d{ opt_L = f : opt_L d} addOptP f d = d{ opt_P = f : opt_P d} @@ -824,6 +845,9 @@ addOptm f d = d{ opt_m = f : opt_m d} addOpta f d = d{ opt_a = f : opt_a d} addOptl f d = d{ opt_l = f : opt_l d} addOptwindres f d = d{ opt_windres = f : opt_windres d} +addOptla f d = d{ opt_la = f : opt_la d} +addOptlo f d = d{ opt_lo = f : opt_lo d} +addOptlc f d = d{ opt_lc = f : opt_lc d} setDepMakefile :: FilePath -> DynFlags -> DynFlags setDepMakefile f d = d { depMakefile = deOptDep f } @@ -1018,6 +1042,11 @@ dynamic_flags = [ , Flag "v" (OptIntSuffix setVerbosity) Supported ------- Specific phases -------------------------------------------- + -- need to appear before -pgmL to be parsed as LLVM flags. + , Flag "pgmla" (HasArg (upd . setPgmla)) Supported + , Flag "pgmlo" (HasArg (upd . setPgmlo)) Supported + , Flag "pgmlc" (HasArg (upd . setPgmlc)) Supported + , Flag "pgmL" (HasArg (upd . setPgmL)) Supported , Flag "pgmP" (HasArg (upd . setPgmP)) Supported , Flag "pgmF" (HasArg (upd . setPgmF)) Supported @@ -1029,6 +1058,11 @@ dynamic_flags = [ , Flag "pgmdll" (HasArg (upd . setPgmdll)) Supported , Flag "pgmwindres" (HasArg (upd . setPgmwindres)) Supported + -- need to appear before -optl/-opta to be parsed as LLVM flags. + , Flag "optla" (HasArg (upd . addOptla)) Supported + , Flag "optlo" (HasArg (upd . addOptlo)) Supported + , Flag "optlc" (HasArg (upd . addOptlc)) Supported + , Flag "optL" (HasArg (upd . addOptL)) Supported , Flag "optP" (HasArg (upd . addOptP)) Supported , Flag "optF" (HasArg (upd . addOptF)) Supported @@ -1102,6 +1136,8 @@ dynamic_flags = [ , Flag "keep-s-files" (NoArg (setDynFlag Opt_KeepSFiles)) Supported , Flag "keep-raw-s-file" (NoArg (setDynFlag Opt_KeepRawSFiles)) Supported , Flag "keep-raw-s-files" (NoArg (setDynFlag Opt_KeepRawSFiles)) Supported + , Flag "keep-llvm-file" (NoArg (setDynFlag Opt_KeepLlvmFiles)) Supported + , Flag "keep-llvm-files" (NoArg (setDynFlag Opt_KeepLlvmFiles)) Supported -- This only makes sense as plural , Flag "keep-tmp-files" (NoArg (setDynFlag Opt_KeepTmpFiles)) Supported @@ -1162,6 +1198,11 @@ dynamic_flags = [ Supported , Flag "ddump-asm-expanded" (setDumpFlag Opt_D_dump_asm_expanded) Supported + , Flag "ddump-llvm" (NoArg (do { setObjTarget HscLlvm + ; setDumpFlag' Opt_D_dump_llvm})) + Supported + , Flag "ddump-opt-llvm" (setDumpFlag Opt_D_dump_llvm_opt) + Supported , Flag "ddump-cpranal" (setDumpFlag Opt_D_dump_cpranal) Supported , Flag "ddump-deriv" (setDumpFlag Opt_D_dump_deriv) @@ -1384,6 +1425,7 @@ dynamic_flags = [ , Flag "fasm" (NoArg (setObjTarget HscAsm)) Supported , Flag "fvia-c" (NoArg (setObjTarget HscC)) Supported , Flag "fvia-C" (NoArg (setObjTarget HscC)) Supported + , Flag "fllvm" (NoArg (setObjTarget HscLlvm)) Supported , Flag "fno-code" (NoArg (do upd $ \d -> d{ ghcLink=NoLink } setTarget HscNothing)) @@ -1787,9 +1829,12 @@ unSetDynFlag f = upd (\dfs -> dopt_unset dfs f) -------------------------- setDumpFlag :: DynFlag -> OptKind DynP -setDumpFlag dump_flag - = NoArg (do { setDynFlag dump_flag - ; when want_recomp forceRecompile }) +setDumpFlag dump_flag = NoArg (setDumpFlag' dump_flag) + +setDumpFlag' :: DynFlag -> DynP () +setDumpFlag' dump_flag + = do { setDynFlag dump_flag + ; when want_recomp forceRecompile } where -- Certain dumpy-things are really interested in what's going -- on during recompilation checking, so in those cases we @@ -2185,6 +2230,7 @@ compilerInfo = [("Project name", String cProjectName), ("Have interpreter", String cGhcWithInterpreter), ("Object splitting", String cSplitObjs), ("Have native code generator", String cGhcWithNativeCodeGen), + ("Have llvm code generator", String cGhcWithLlvmCodeGen), ("Support SMP", String cGhcWithSMP), ("Unregisterised", String cGhcUnregisterised), ("Tables next to code", String cGhcEnableTablesNextToCode), diff --git a/compiler/main/SysTools.lhs b/compiler/main/SysTools.lhs index c479a66ab5..29889db4f3 100644 --- a/compiler/main/SysTools.lhs +++ b/compiler/main/SysTools.lhs @@ -18,6 +18,9 @@ module SysTools ( runAs, runLink, -- [Option] -> IO () runMkDLL, runWindres, + runLlvmAs, + runLlvmOpt, + runLlvmLlc, touch, -- String -> String -> IO () copy, @@ -219,6 +222,11 @@ initSysTools mbMinusB dflags0 ; let as_prog = gcc_prog ld_prog = gcc_prog + -- figure out llvm location. (TODO: Acutally implement). + ; let la_prog = "llvm-as" + lc_prog = "llc" + lo_prog = "opt" + ; return dflags1{ ghcUsagePath = ghc_usage_msg_path, ghciUsagePath = ghci_usage_msg_path, @@ -235,7 +243,10 @@ initSysTools mbMinusB dflags0 pgm_dll = (mkdll_prog,mkdll_args), pgm_T = touch_path, pgm_sysman = top_dir ++ "/ghc/rts/parallel/SysMan", - pgm_windres = windres_path + pgm_windres = windres_path, + pgm_la = (la_prog,[]), + pgm_lo = (lo_prog,[]), + pgm_lc = (lc_prog,[]) -- Hans: this isn't right in general, but you can -- elaborate it in the same way as the others } @@ -381,6 +392,21 @@ runAs dflags args = do mb_env <- getGccEnv args1 runSomethingFiltered dflags id "Assembler" p args1 mb_env +runLlvmAs :: DynFlags -> [Option] -> IO () +runLlvmAs dflags args = do + let (p,args0) = pgm_la dflags + runSomething dflags "LLVM Assembler" p (args0++args) + +runLlvmOpt :: DynFlags -> [Option] -> IO () +runLlvmOpt dflags args = do + let (p,args0) = pgm_lo dflags + runSomething dflags "LLVM Optimiser" p (args0++args) + +runLlvmLlc :: DynFlags -> [Option] -> IO () +runLlvmLlc dflags args = do + let (p,args0) = pgm_lc dflags + runSomething dflags "LLVM Compiler" p (args0++args) + runLink :: DynFlags -> [Option] -> IO () runLink dflags args = do let (p,args0) = pgm_l dflags diff --git a/compiler/nativeGen/Alpha/Regs.hs b/compiler/nativeGen/Alpha/Regs.hs index 2d85c5f141..ee490509de 100644 --- a/compiler/nativeGen/Alpha/Regs.hs +++ b/compiler/nativeGen/Alpha/Regs.hs @@ -27,7 +27,6 @@ import RegsBase import BlockId import Cmm -import CgUtils ( get_GlobalReg_addr ) import CLabel ( CLabel, mkMainCapabilityLabel ) import Pretty import Outputable ( Outputable(..), pprPanic, panic ) diff --git a/compiler/nativeGen/AsmCodeGen.lhs b/compiler/nativeGen/AsmCodeGen.lhs index d79fbb69e0..d73cb89984 100644 --- a/compiler/nativeGen/AsmCodeGen.lhs +++ b/compiler/nativeGen/AsmCodeGen.lhs @@ -73,6 +73,7 @@ import RegClass import NCGMonad import BlockId +import CgUtils ( fixStgRegisters ) import Cmm import CmmOpt ( cmmMiniInline, cmmMachOpFold ) import PprCmm @@ -278,9 +279,9 @@ cmmNativeGen dflags us cmm count = do -- rewrite assignments to global regs - let (fixed_cmm, usFix) = - {-# SCC "fixAssignsTop" #-} - initUs us $ fixAssignsTop cmm + let fixed_cmm = + {-# SCC "fixStgRegisters" #-} + fixStgRegisters cmm -- cmm to cmm optimisations let (opt_cmm, imports) = @@ -294,13 +295,12 @@ cmmNativeGen dflags us cmm count -- generate native code from cmm let ((native, lastMinuteImports), usGen) = {-# SCC "genMachCode" #-} - initUs usFix $ genMachCode dflags opt_cmm + initUs us $ genMachCode dflags opt_cmm dumpIfSet_dyn dflags Opt_D_dump_asm_native "Native code" (vcat $ map (docToSDoc . pprNatCmmTop) native) - -- tag instructions with register liveness information let (withLiveness, usLive) = {-# SCC "regLiveness" #-} @@ -309,7 +309,6 @@ cmmNativeGen dflags us cmm count dumpIfSet_dyn dflags Opt_D_dump_asm_liveness "Liveness annotations added" (vcat $ map ppr withLiveness) - -- allocate registers (alloced, usAlloc, ppr_raStatsColor, ppr_raStatsLinear) <- @@ -323,7 +322,6 @@ cmmNativeGen dflags us cmm count emptyUFM $ allocatableRegs - -- do the graph coloring register allocation let ((alloced, regAllocStats), usAlloc) = {-# SCC "RegAlloc" #-} @@ -697,44 +695,6 @@ genMachCode dflags cmm_top else pprPanic "genMachCode: nonzero final delta" (int final_delta) } --- ----------------------------------------------------------------------------- --- Fixup assignments to global registers so that they assign to --- locations within the RegTable, if appropriate. - --- Note that we currently don't fixup reads here: they're done by --- the generic optimiser below, to avoid having two separate passes --- over the Cmm. - -fixAssignsTop :: RawCmmTop -> UniqSM RawCmmTop -fixAssignsTop top@(CmmData _ _) = returnUs top -fixAssignsTop (CmmProc info lbl params (ListGraph blocks)) = - mapUs fixAssignsBlock blocks `thenUs` \ blocks' -> - returnUs (CmmProc info lbl params (ListGraph blocks')) - -fixAssignsBlock :: CmmBasicBlock -> UniqSM CmmBasicBlock -fixAssignsBlock (BasicBlock id stmts) = - fixAssigns stmts `thenUs` \ stmts' -> - returnUs (BasicBlock id stmts') - -fixAssigns :: [CmmStmt] -> UniqSM [CmmStmt] -fixAssigns stmts = - mapUs fixAssign stmts `thenUs` \ stmtss -> - returnUs (concat stmtss) - -fixAssign :: CmmStmt -> UniqSM [CmmStmt] -fixAssign (CmmAssign (CmmGlobal reg) src) - | Left realreg <- reg_or_addr - = returnUs [CmmAssign (CmmGlobal reg) src] - | Right baseRegAddr <- reg_or_addr - = returnUs [CmmStore baseRegAddr src] - -- Replace register leaves with appropriate StixTrees for - -- the given target. GlobalRegs which map to a reg on this - -- arch are left unchanged. Assigning to BaseReg is always - -- illegal, so we check for that. - where - reg_or_addr = get_GlobalReg_reg_or_addr reg - -fixAssign other_stmt = returnUs [other_stmt] -- ----------------------------------------------------------------------------- -- Generic Cmm optimiser @@ -745,10 +705,7 @@ Here we do: (a) Constant folding (b) Simple inlining: a temporary which is assigned to and then used, once, can be shorted. - (c) Replacement of references to GlobalRegs which do not have - machine registers by the appropriate memory load (eg. - Hp ==> *(BaseReg + 34) ). - (d) Position independent code and dynamic linking + (c) Position independent code and dynamic linking (i) introduce the appropriate indirections and position independent refs (ii) compile a list of imported symbols @@ -883,42 +840,8 @@ cmmExprConFold referenceKind expr CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "__stg_gc_fun"))) #endif - CmmReg (CmmGlobal mid) - -- Replace register leaves with appropriate StixTrees for - -- the given target. MagicIds which map to a reg on this - -- arch are left unchanged. For the rest, BaseReg is taken - -- to mean the address of the reg table in MainCapability, - -- and for all others we generate an indirection to its - -- location in the register table. - -> case get_GlobalReg_reg_or_addr mid of - Left realreg -> return expr - Right baseRegAddr - -> case mid of - BaseReg -> cmmExprConFold DataReference baseRegAddr - other -> cmmExprConFold DataReference - (CmmLoad baseRegAddr (globalRegType mid)) - -- eliminate zero offsets - CmmRegOff reg 0 - -> cmmExprConFold referenceKind (CmmReg reg) - - CmmRegOff (CmmGlobal mid) offset - -- RegOf leaves are just a shorthand form. If the reg maps - -- to a real reg, we keep the shorthand, otherwise, we just - -- expand it and defer to the above code. - -> case get_GlobalReg_reg_or_addr mid of - Left realreg -> return expr - Right baseRegAddr - -> cmmExprConFold DataReference (CmmMachOp (MO_Add wordWidth) [ - CmmReg (CmmGlobal mid), - CmmLit (CmmInt (fromIntegral offset) - wordWidth)]) other -> return other --- ----------------------------------------------------------------------------- --- Utils - -bind f x = x $! f - \end{code} diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs index a1b55ce412..8a4228b578 100644 --- a/compiler/nativeGen/PPC/CodeGen.hs +++ b/compiler/nativeGen/PPC/CodeGen.hs @@ -180,12 +180,12 @@ getRegisterReg (CmmLocal (LocalReg u pk)) = RegVirtual $ mkVirtualReg u (cmmTypeSize pk) getRegisterReg (CmmGlobal mid) - = case get_GlobalReg_reg_or_addr mid of - Left reg -> reg - _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid) - -- By this stage, the only MagicIds remaining should be the - -- ones which map to a real machine register on this - -- platform. Hence ... + = case globalRegMaybe mid of + Just reg -> reg + Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid) + -- By this stage, the only MagicIds remaining should be the + -- ones which map to a real machine register on this + -- platform. Hence ... {- diff --git a/compiler/nativeGen/PPC/Regs.hs b/compiler/nativeGen/PPC/Regs.hs index d649d847f1..e00dd7e496 100644 --- a/compiler/nativeGen/PPC/Regs.hs +++ b/compiler/nativeGen/PPC/Regs.hs @@ -40,7 +40,6 @@ module PPC.Regs ( -- horrow show freeReg, globalRegMaybe, - get_GlobalReg_reg_or_addr, allocatableRegs ) @@ -55,7 +54,6 @@ import Reg import RegClass import Size -import CgUtils ( get_GlobalReg_addr ) import BlockId import Cmm import CLabel ( CLabel ) @@ -595,20 +593,6 @@ globalRegMaybe _ = panic "PPC.Regs.globalRegMaybe: not defined" #endif /* powerpc_TARGET_ARCH */ --- We map STG registers onto appropriate CmmExprs. Either they map --- to real machine registers or stored as offsets from BaseReg. Given --- a GlobalReg, get_GlobalReg_reg_or_addr produces either the real --- register it is in, on this platform, or a CmmExpr denoting the --- address in the register table holding it. --- (See also get_GlobalReg_addr in CgUtils.) - -get_GlobalReg_reg_or_addr :: GlobalReg -> Either Reg CmmExpr -get_GlobalReg_reg_or_addr mid - = case globalRegMaybe mid of - Just rr -> Left rr - Nothing -> Right (get_GlobalReg_addr mid) - - -- allocatableRegs is allMachRegNos with the fixed-use regs removed. -- i.e., these are the regs for which we are prepared to allow the -- register allocator to attempt to map VRegs to. diff --git a/compiler/nativeGen/SPARC/CodeGen/Base.hs b/compiler/nativeGen/SPARC/CodeGen/Base.hs index 54bbf9b51a..c85d8065ad 100644 --- a/compiler/nativeGen/SPARC/CodeGen/Base.hs +++ b/compiler/nativeGen/SPARC/CodeGen/Base.hs @@ -18,6 +18,7 @@ import SPARC.Instr import SPARC.Cond import SPARC.AddrMode import SPARC.Regs +import SPARC.RegPlate import Size import Reg @@ -95,11 +96,11 @@ getRegisterReg (CmmLocal (LocalReg u pk)) = RegVirtual $ mkVirtualReg u (cmmTypeSize pk) getRegisterReg (CmmGlobal mid) - = case get_GlobalReg_reg_or_addr mid of - Left rr -> RegReal rr - - _ -> pprPanic "SPARC.CodeGen.Base.getRegisterReg: global is in memory" - (ppr $ CmmGlobal mid) + = case globalRegMaybe mid of + Just reg -> RegReal reg + Nothing -> pprPanic + "SPARC.CodeGen.Base.getRegisterReg: global is in memory" + (ppr $ CmmGlobal mid) -- Expand CmmRegOff. ToDo: should we do it this way around, or convert diff --git a/compiler/nativeGen/SPARC/Regs.hs b/compiler/nativeGen/SPARC/Regs.hs index cd19138aa4..98151ecfa5 100644 --- a/compiler/nativeGen/SPARC/Regs.hs +++ b/compiler/nativeGen/SPARC/Regs.hs @@ -18,7 +18,6 @@ module SPARC.Regs ( -- allocatable allocatableRegs, - get_GlobalReg_reg_or_addr, -- args argRegs, @@ -38,9 +37,7 @@ import Reg import RegClass import Size -import Cmm import PprCmm () -import CgUtils ( get_GlobalReg_addr ) import Unique import Outputable @@ -214,21 +211,6 @@ allocatableRegs in filter isFree allRealRegs - --- We map STG registers onto appropriate CmmExprs. Either they map --- to real machine registers or stored as offsets from BaseReg. Given --- a GlobalReg, get_GlobalReg_reg_or_addr produces either the real --- register it is in, on this platform, or a CmmExpr denoting the --- address in the register table holding it. --- (See also get_GlobalReg_addr in CgUtils.) - -get_GlobalReg_reg_or_addr :: GlobalReg -> Either RealReg CmmExpr -get_GlobalReg_reg_or_addr mid - = case globalRegMaybe mid of - Just rr -> Left rr - Nothing -> Right (get_GlobalReg_addr mid) - - -- | The registers to place arguments for function calls, -- for some number of arguments. -- diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index 2698406aac..89a26a9772 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -226,12 +226,12 @@ getRegisterReg use_sse2 (CmmLocal (LocalReg u pk)) else RegVirtual (mkVirtualReg u sz) getRegisterReg _ (CmmGlobal mid) - = case get_GlobalReg_reg_or_addr mid of - Left reg -> RegReal $ reg - _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid) - -- By this stage, the only MagicIds remaining should be the - -- ones which map to a real machine register on this - -- platform. Hence ... + = case globalRegMaybe mid of + Just reg -> RegReal $ reg + Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid) + -- By this stage, the only MagicIds remaining should be the + -- ones which map to a real machine register on this + -- platform. Hence ... -- | Memory addressing modes passed up the tree. diff --git a/compiler/nativeGen/X86/Regs.hs b/compiler/nativeGen/X86/Regs.hs index a04e854787..b9a23a6510 100644 --- a/compiler/nativeGen/X86/Regs.hs +++ b/compiler/nativeGen/X86/Regs.hs @@ -40,7 +40,6 @@ module X86.Regs ( freeReg, globalRegMaybe, - get_GlobalReg_reg_or_addr, allocatableRegs ) @@ -54,7 +53,6 @@ where import Reg import RegClass -import CgUtils ( get_GlobalReg_addr ) import BlockId import Cmm import CLabel ( CLabel ) @@ -662,20 +660,6 @@ callClobberedRegs = panic "X86.Regs.globalRegMaybe: not defined" #endif --- We map STG registers onto appropriate CmmExprs. Either they map --- to real machine registers or stored as offsets from BaseReg. Given --- a GlobalReg, get_GlobalReg_reg_or_addr produces either the real --- register it is in, on this platform, or a CmmExpr denoting the --- address in the register table holding it. --- (See also get_GlobalReg_addr in CgUtils.) - -get_GlobalReg_reg_or_addr :: GlobalReg -> Either RealReg CmmExpr -get_GlobalReg_reg_or_addr mid - = case globalRegMaybe mid of - Just rr -> Left rr - Nothing -> Right (get_GlobalReg_addr mid) - - -- allocatableRegs is allMachRegNos with the fixed-use regs removed. -- i.e., these are the regs for which we are prepared to allow the -- register allocator to attempt to map VRegs to. diff --git a/compiler/typecheck/TcForeign.lhs b/compiler/typecheck/TcForeign.lhs index fdb7ce5f23..935127c587 100644 --- a/compiler/typecheck/TcForeign.lhs +++ b/compiler/typecheck/TcForeign.lhs @@ -94,7 +94,7 @@ tcCheckFIType :: Type -> [Type] -> Type -> ForeignImport -> TcM ForeignImport tcCheckFIType sig_ty arg_tys res_ty idecl@(CImport _ safety _ (CLabel _)) = ASSERT( null arg_tys ) - do { checkCg checkCOrAsmOrInterp + do { checkCg checkCOrAsmOrLlvmOrInterp ; checkSafety safety ; check (isFFILabelTy res_ty) (illegalForeignTyErr empty sig_ty) ; return idecl } -- NB check res_ty not sig_ty! @@ -106,7 +106,7 @@ tcCheckFIType sig_ty arg_tys res_ty idecl@(CImport cconv safety _ CWrapper) = do -- valid foreign type. For legacy reasons ft -> IO (Ptr ft) as well -- as ft -> IO Addr is accepted, too. The use of the latter two forms -- is DEPRECATED, though. - checkCg checkCOrAsmOrInterp + checkCg checkCOrAsmOrLlvmOrInterp checkCConv cconv checkSafety safety case arg_tys of @@ -121,7 +121,7 @@ tcCheckFIType sig_ty arg_tys res_ty idecl@(CImport cconv safety _ CWrapper) = do tcCheckFIType sig_ty arg_tys res_ty idecl@(CImport cconv safety _ (CFunction target)) | isDynamicTarget target = do -- Foreign import dynamic - checkCg checkCOrAsmOrInterp + checkCg checkCOrAsmOrLlvmOrInterp checkCConv cconv checkSafety safety case arg_tys of -- The first arg must be Ptr, FunPtr, or Addr @@ -139,7 +139,7 @@ tcCheckFIType sig_ty arg_tys res_ty idecl@(CImport cconv safety _ (CFunction tar dflags <- getDOpts check (dopt Opt_GHCForeignImportPrim dflags) (text "Use -XGHCForeignImportPrim to allow `foreign import prim'.") - checkCg (checkCOrAsmOrDotNetOrInterp) + checkCg (checkCOrAsmOrLlvmOrDotNetOrInterp) checkCTarget target check (playSafe safety) (text "The safe/unsafe annotation should not be used with `foreign import prim'.") @@ -148,7 +148,7 @@ tcCheckFIType sig_ty arg_tys res_ty idecl@(CImport cconv safety _ (CFunction tar checkForeignRes nonIOok (isFFIPrimResultTy dflags) res_ty return idecl | otherwise = do -- Normal foreign import - checkCg (checkCOrAsmOrDotNetOrInterp) + checkCg (checkCOrAsmOrLlvmOrDotNetOrInterp) checkCConv cconv checkSafety safety checkCTarget target @@ -163,7 +163,7 @@ tcCheckFIType sig_ty arg_tys res_ty idecl@(CImport cconv safety _ (CFunction tar -- that the C identifier is valid for C checkCTarget :: CCallTarget -> TcM () checkCTarget (StaticTarget str _) = do - checkCg checkCOrAsmOrDotNetOrInterp + checkCg checkCOrAsmOrLlvmOrDotNetOrInterp check (isCLabelString str) (badCName str) checkCTarget DynamicTarget = panic "checkCTarget DynamicTarget" @@ -247,7 +247,7 @@ tcFExport d = pprPanic "tcFExport" (ppr d) \begin{code} tcCheckFEType :: Type -> ForeignExport -> TcM () tcCheckFEType sig_ty (CExport (CExportStatic str cconv)) = do - checkCg checkCOrAsm + checkCg checkCOrAsmOrLlvm check (isCLabelString str) (badCName str) checkCConv cconv checkForeignArgs isFFIExternalTy arg_tys @@ -297,25 +297,28 @@ checkForeignRes non_io_result_ok pred_res_ty ty \end{code} \begin{code} -checkCOrAsm :: HscTarget -> Maybe SDoc -checkCOrAsm HscC = Nothing -checkCOrAsm HscAsm = Nothing -checkCOrAsm _ - = Just (text "requires via-C or native code generation (-fvia-C)") - -checkCOrAsmOrInterp :: HscTarget -> Maybe SDoc -checkCOrAsmOrInterp HscC = Nothing -checkCOrAsmOrInterp HscAsm = Nothing -checkCOrAsmOrInterp HscInterpreted = Nothing -checkCOrAsmOrInterp _ - = Just (text "requires interpreted, C or native code generation") - -checkCOrAsmOrDotNetOrInterp :: HscTarget -> Maybe SDoc -checkCOrAsmOrDotNetOrInterp HscC = Nothing -checkCOrAsmOrDotNetOrInterp HscAsm = Nothing -checkCOrAsmOrDotNetOrInterp HscInterpreted = Nothing -checkCOrAsmOrDotNetOrInterp _ - = Just (text "requires interpreted, C or native code generation") +checkCOrAsmOrLlvm :: HscTarget -> Maybe SDoc +checkCOrAsmOrLlvm HscC = Nothing +checkCOrAsmOrLlvm HscAsm = Nothing +checkCOrAsmOrLlvm HscLlvm = Nothing +checkCOrAsmOrLlvm _ + = Just (text "requires via-C, llvm (-fllvm) or native code generation (-fvia-C)") + +checkCOrAsmOrLlvmOrInterp :: HscTarget -> Maybe SDoc +checkCOrAsmOrLlvmOrInterp HscC = Nothing +checkCOrAsmOrLlvmOrInterp HscAsm = Nothing +checkCOrAsmOrLlvmOrInterp HscLlvm = Nothing +checkCOrAsmOrLlvmOrInterp HscInterpreted = Nothing +checkCOrAsmOrLlvmOrInterp _ + = Just (text "requires interpreted, C, Llvm or native code generation") + +checkCOrAsmOrLlvmOrDotNetOrInterp :: HscTarget -> Maybe SDoc +checkCOrAsmOrLlvmOrDotNetOrInterp HscC = Nothing +checkCOrAsmOrLlvmOrDotNetOrInterp HscAsm = Nothing +checkCOrAsmOrLlvmOrDotNetOrInterp HscLlvm = Nothing +checkCOrAsmOrLlvmOrDotNetOrInterp HscInterpreted = Nothing +checkCOrAsmOrLlvmOrDotNetOrInterp _ + = Just (text "requires interpreted, C, Llvm or native code generation") checkCg :: (HscTarget -> Maybe SDoc) -> TcM () checkCg check = do diff --git a/driver/mangler/ghc-asm.lprl b/driver/mangler/ghc-asm.lprl index 1ff78a4aa8..a354caada7 100644 --- a/driver/mangler/ghc-asm.lprl +++ b/driver/mangler/ghc-asm.lprl @@ -1445,7 +1445,8 @@ sub mangle_asm { # If this is an entry point with an info table, # eliminate the entry symbol and all directives involving it. - if (defined($infochk{$symb}) && $TargetPlatform !~ /^ia64-/m) { + if (defined($infochk{$symb}) && $TargetPlatform !~ /^ia64-/m + && $TABLES_NEXT_TO_CODE ~~ "YES") { @o = (); foreach $l (split(/\n/m,$c)) { next if $l =~ /^.*$symb_(entry|ret)${T_POST_LBL}/m; @@ -1880,7 +1881,8 @@ sub rev_tbl { # use vars '$discard1'; # Unused? local($symb, $tbl, $discard1) = @_; - return ($tbl) if ($TargetPlatform =~ /^ia64-/m); + return ($tbl) if ($TargetPlatform =~ /^ia64-/m + || $TABLES_NEXT_TO_CODE ~~ "NO"); local($before) = ''; local($label) = ''; diff --git a/mk/config.mk.in b/mk/config.mk.in index 40f1ea7c4f..e1a124c579 100644 --- a/mk/config.mk.in +++ b/mk/config.mk.in @@ -139,6 +139,9 @@ GhcWithNativeCodeGen=$(strip\ $(if $(filter YESYESNO,\ $(OsSupportsNCG)$(ArchSupportsNCG)$(GhcUnregisterised)),YES,NO)) +# Build a compiler with the llvm code generator backend +GhcWithLlvmCodeGen=NO + HaveLibDL = @HaveLibDL@ # ArchSupportsSMP should be set iff there is support for that arch in diff --git a/rules/build-perl.mk b/rules/build-perl.mk index cfa71cc53d..5ee1d2350b 100644 --- a/rules/build-perl.mk +++ b/rules/build-perl.mk @@ -45,6 +45,7 @@ $1/$2/$$($1_$2_PROG): $1/$2/$$($1_$2_PROG).prl "$$(RM)" $$(RM_OPTS) $$@ echo '#!$$(PERL)' >> $$@ echo '$$$$TARGETPLATFORM = "$$(TARGETPLATFORM)";' >> $$@ + echo '$$$$TABLES_NEXT_TO_CODE = "$(GhcEnableTablesNextToCode)";' >> $$@ cat $$< >> $$@ $$($1_$2_INPLACE): $1/$2/$$($1_$2_PROG) | $$$$(dir $$$$@)/. |