diff options
Diffstat (limited to 'compiler/cmm')
-rw-r--r-- | compiler/cmm/CLabel.hs | 831 | ||||
-rw-r--r-- | compiler/cmm/Cmm.hs | 322 | ||||
-rw-r--r-- | compiler/cmm/CmmLex.x | 311 | ||||
-rw-r--r-- | compiler/cmm/CmmLint.hs | 159 | ||||
-rw-r--r-- | compiler/cmm/CmmOpt.hs | 507 | ||||
-rw-r--r-- | compiler/cmm/CmmParse.y | 890 | ||||
-rw-r--r-- | compiler/cmm/CmmUtils.hs | 177 | ||||
-rw-r--r-- | compiler/cmm/MachOp.hs | 652 | ||||
-rw-r--r-- | compiler/cmm/PprC.hs | 1028 | ||||
-rw-r--r-- | compiler/cmm/PprCmm.hs | 462 |
10 files changed, 5339 insertions, 0 deletions
diff --git a/compiler/cmm/CLabel.hs b/compiler/cmm/CLabel.hs new file mode 100644 index 0000000000..e42b92db5a --- /dev/null +++ b/compiler/cmm/CLabel.hs @@ -0,0 +1,831 @@ +----------------------------------------------------------------------------- +-- +-- Object-file symbols (called CLabel for histerical raisins). +-- +-- (c) The University of Glasgow 2004 +-- +----------------------------------------------------------------------------- + +module CLabel ( + CLabel, -- abstract type + + mkClosureLabel, + mkSRTLabel, + mkSRTDescLabel, + mkInfoTableLabel, + mkEntryLabel, + mkSlowEntryLabel, + mkConEntryLabel, + mkStaticConEntryLabel, + mkRednCountsLabel, + mkConInfoTableLabel, + mkStaticInfoTableLabel, + mkApEntryLabel, + mkApInfoTableLabel, + mkClosureTableLabel, + + mkLocalClosureLabel, + mkLocalInfoTableLabel, + mkLocalEntryLabel, + mkLocalConEntryLabel, + mkLocalStaticConEntryLabel, + mkLocalConInfoTableLabel, + mkLocalStaticInfoTableLabel, + mkLocalClosureTableLabel, + + mkReturnPtLabel, + mkReturnInfoLabel, + mkAltLabel, + mkDefaultLabel, + mkBitmapLabel, + mkStringLitLabel, + + mkAsmTempLabel, + + mkModuleInitLabel, + mkPlainModuleInitLabel, + + mkSplitMarkerLabel, + mkDirty_MUT_VAR_Label, + mkUpdInfoLabel, + mkSeqInfoLabel, + mkIndStaticInfoLabel, + mkMainCapabilityLabel, + mkMAP_FROZEN_infoLabel, + mkMAP_DIRTY_infoLabel, + mkEMPTY_MVAR_infoLabel, + + mkTopTickyCtrLabel, + mkCAFBlackHoleInfoTableLabel, + mkSECAFBlackHoleInfoTableLabel, + mkRtsPrimOpLabel, + mkRtsSlowTickyCtrLabel, + + moduleRegdLabel, + + mkSelectorInfoLabel, + mkSelectorEntryLabel, + + mkRtsInfoLabel, + mkRtsEntryLabel, + mkRtsRetInfoLabel, + mkRtsRetLabel, + mkRtsCodeLabel, + mkRtsDataLabel, + + mkRtsInfoLabelFS, + mkRtsEntryLabelFS, + mkRtsRetInfoLabelFS, + mkRtsRetLabelFS, + mkRtsCodeLabelFS, + mkRtsDataLabelFS, + + mkRtsApFastLabel, + + mkForeignLabel, + + mkCCLabel, mkCCSLabel, + + DynamicLinkerLabelInfo(..), + mkDynamicLinkerLabel, + dynamicLinkerLabelInfo, + + mkPicBaseLabel, + mkDeadStripPreventer, + + infoLblToEntryLbl, entryLblToInfoLbl, + needsCDecl, isAsmTemp, externallyVisibleCLabel, + CLabelType(..), labelType, labelDynamic, + + pprCLabel + ) where + + +#include "HsVersions.h" + +import Packages ( HomeModules ) +import StaticFlags ( opt_Static, opt_DoTickyProfiling ) +import Packages ( isHomeModule, isDllName ) +import DataCon ( ConTag ) +import Module ( moduleFS, Module ) +import Name ( Name, isExternalName ) +import Unique ( pprUnique, Unique ) +import PrimOp ( PrimOp ) +import Config ( cLeadingUnderscore ) +import CostCentre ( CostCentre, CostCentreStack ) +import Outputable +import FastString + +-- ----------------------------------------------------------------------------- +-- The CLabel type + +{- +CLabel is an abstract type that supports the following operations: + + - Pretty printing + + - In a C file, does it need to be declared before use? (i.e. is it + guaranteed to be already in scope in the places we need to refer to it?) + + - If it needs to be declared, what type (code or data) should it be + declared to have? + + - Is it visible outside this object file or not? + + - Is it "dynamic" (see details below) + + - Eq and Ord, so that we can make sets of CLabels (currently only + used in outputting C as far as I can tell, to avoid generating + more than one declaration for any given label). + + - Converting an info table label into an entry label. +-} + +data CLabel + = IdLabel -- A family of labels related to the + Name -- definition of a particular Id or Con + IdLabelInfo + + | DynIdLabel -- like IdLabel, but in a separate package, + Name -- and might therefore need a dynamic + IdLabelInfo -- reference. + + | CaseLabel -- A family of labels related to a particular + -- case expression. + {-# UNPACK #-} !Unique -- Unique says which case expression + CaseLabelInfo + + | AsmTempLabel + {-# UNPACK #-} !Unique + + | StringLitLabel + {-# UNPACK #-} !Unique + + | ModuleInitLabel + Module -- the module name + String -- its "way" + Bool -- True <=> is in a different package + -- at some point we might want some kind of version number in + -- the module init label, to guard against compiling modules in + -- the wrong order. We can't use the interface file version however, + -- because we don't always recompile modules which depend on a module + -- whose version has changed. + + | PlainModuleInitLabel -- without the vesrion & way info + Module + Bool -- True <=> is in a different package + + | ModuleRegdLabel + + | RtsLabel RtsLabelInfo + + | ForeignLabel FastString -- a 'C' (or otherwise foreign) label + (Maybe Int) -- possible '@n' suffix for stdcall functions + -- When generating C, the '@n' suffix is omitted, but when + -- generating assembler we must add it to the label. + Bool -- True <=> is dynamic + + | CC_Label CostCentre + | CCS_Label CostCentreStack + + -- Dynamic Linking in the NCG: + -- generated and used inside the NCG only, + -- see module PositionIndependentCode for details. + + | DynamicLinkerLabel DynamicLinkerLabelInfo CLabel + -- special variants of a label used for dynamic linking + + | PicBaseLabel -- a label used as a base for PIC calculations + -- on some platforms. + -- It takes the form of a local numeric + -- assembler label '1'; it is pretty-printed + -- as 1b, referring to the previous definition + -- of 1: in the assembler source file. + + | DeadStripPreventer CLabel + -- label before an info table to prevent excessive dead-stripping on darwin + + deriving (Eq, Ord) + +data IdLabelInfo + = Closure -- Label for closure + | SRT -- Static reference table + | SRTDesc -- Static reference table descriptor + | InfoTable -- Info tables for closures; always read-only + | Entry -- entry point + | Slow -- slow entry point + + | RednCounts -- Label of place to keep Ticky-ticky info for + -- this Id + + | Bitmap -- A bitmap (function or case return) + + | ConEntry -- constructor entry point + | ConInfoTable -- corresponding info table + | StaticConEntry -- static constructor entry point + | StaticInfoTable -- corresponding info table + + | ClosureTable -- table of closures for Enum tycons + + deriving (Eq, Ord) + + +data CaseLabelInfo + = CaseReturnPt + | CaseReturnInfo + | CaseAlt ConTag + | CaseDefault + deriving (Eq, Ord) + + +data RtsLabelInfo + = RtsSelectorInfoTable Bool{-updatable-} Int{-offset-} -- Selector thunks + | RtsSelectorEntry Bool{-updatable-} Int{-offset-} + + | RtsApInfoTable Bool{-updatable-} Int{-arity-} -- AP thunks + | RtsApEntry Bool{-updatable-} Int{-arity-} + + | RtsPrimOp PrimOp + + | RtsInfo LitString -- misc rts info tables + | RtsEntry LitString -- misc rts entry points + | RtsRetInfo LitString -- misc rts ret info tables + | RtsRet LitString -- misc rts return points + | RtsData LitString -- misc rts data bits, eg CHARLIKE_closure + | RtsCode LitString -- misc rts code + + | RtsInfoFS FastString -- misc rts info tables + | RtsEntryFS FastString -- misc rts entry points + | RtsRetInfoFS FastString -- misc rts ret info tables + | RtsRetFS FastString -- misc rts return points + | RtsDataFS FastString -- misc rts data bits, eg CHARLIKE_closure + | RtsCodeFS FastString -- misc rts code + + | RtsApFast LitString -- _fast versions of generic apply + + | RtsSlowTickyCtr String + + deriving (Eq, Ord) + -- NOTE: Eq on LitString compares the pointer only, so this isn't + -- a real equality. + +data DynamicLinkerLabelInfo + = CodeStub -- MachO: Lfoo$stub, ELF: foo@plt + | SymbolPtr -- MachO: Lfoo$non_lazy_ptr, Windows: __imp_foo + | GotSymbolPtr -- ELF: foo@got + | GotSymbolOffset -- ELF: foo@gotoff + + deriving (Eq, Ord) + +-- ----------------------------------------------------------------------------- +-- Constructing CLabels + +-- These are always local: +mkSRTLabel name = IdLabel name SRT +mkSRTDescLabel name = IdLabel name SRTDesc +mkSlowEntryLabel name = IdLabel name Slow +mkBitmapLabel name = IdLabel name Bitmap +mkRednCountsLabel name = IdLabel name RednCounts + +-- These have local & (possibly) external variants: +mkLocalClosureLabel name = IdLabel name Closure +mkLocalInfoTableLabel name = IdLabel name InfoTable +mkLocalEntryLabel name = IdLabel name Entry +mkLocalClosureTableLabel name = IdLabel name ClosureTable + +mkClosureLabel hmods name + | isDllName hmods name = DynIdLabel name Closure + | otherwise = IdLabel name Closure + +mkInfoTableLabel hmods name + | isDllName hmods name = DynIdLabel name InfoTable + | otherwise = IdLabel name InfoTable + +mkEntryLabel hmods name + | isDllName hmods name = DynIdLabel name Entry + | otherwise = IdLabel name Entry + +mkClosureTableLabel hmods name + | isDllName hmods name = DynIdLabel name ClosureTable + | otherwise = IdLabel name ClosureTable + +mkLocalConInfoTableLabel con = IdLabel con ConInfoTable +mkLocalConEntryLabel con = IdLabel con ConEntry +mkLocalStaticInfoTableLabel con = IdLabel con StaticInfoTable +mkLocalStaticConEntryLabel con = IdLabel con StaticConEntry + +mkConInfoTableLabel name False = IdLabel name ConInfoTable +mkConInfoTableLabel name True = DynIdLabel name ConInfoTable + +mkStaticInfoTableLabel name False = IdLabel name StaticInfoTable +mkStaticInfoTableLabel name True = DynIdLabel name StaticInfoTable + +mkConEntryLabel hmods name + | isDllName hmods name = DynIdLabel name ConEntry + | otherwise = IdLabel name ConEntry + +mkStaticConEntryLabel hmods name + | isDllName hmods name = DynIdLabel name StaticConEntry + | otherwise = IdLabel name StaticConEntry + + +mkReturnPtLabel uniq = CaseLabel uniq CaseReturnPt +mkReturnInfoLabel uniq = CaseLabel uniq CaseReturnInfo +mkAltLabel uniq tag = CaseLabel uniq (CaseAlt tag) +mkDefaultLabel uniq = CaseLabel uniq CaseDefault + +mkStringLitLabel = StringLitLabel +mkAsmTempLabel = AsmTempLabel + +mkModuleInitLabel :: HomeModules -> Module -> String -> CLabel +mkModuleInitLabel hmods mod way + = ModuleInitLabel mod way $! (not (isHomeModule hmods mod)) + +mkPlainModuleInitLabel :: HomeModules -> Module -> CLabel +mkPlainModuleInitLabel hmods mod + = PlainModuleInitLabel mod $! (not (isHomeModule hmods mod)) + + -- Some fixed runtime system labels + +mkSplitMarkerLabel = RtsLabel (RtsCode SLIT("__stg_split_marker")) +mkDirty_MUT_VAR_Label = RtsLabel (RtsCode SLIT("dirty_MUT_VAR")) +mkUpdInfoLabel = RtsLabel (RtsInfo SLIT("stg_upd_frame")) +mkSeqInfoLabel = RtsLabel (RtsInfo SLIT("stg_seq_frame")) +mkIndStaticInfoLabel = RtsLabel (RtsInfo SLIT("stg_IND_STATIC")) +mkMainCapabilityLabel = RtsLabel (RtsData SLIT("MainCapability")) +mkMAP_FROZEN_infoLabel = RtsLabel (RtsInfo SLIT("stg_MUT_ARR_PTRS_FROZEN0")) +mkMAP_DIRTY_infoLabel = RtsLabel (RtsInfo SLIT("stg_MUT_ARR_PTRS_DIRTY")) +mkEMPTY_MVAR_infoLabel = RtsLabel (RtsInfo SLIT("stg_EMPTY_MVAR")) + +mkTopTickyCtrLabel = RtsLabel (RtsData SLIT("top_ct")) +mkCAFBlackHoleInfoTableLabel = RtsLabel (RtsInfo SLIT("stg_CAF_BLACKHOLE")) +mkSECAFBlackHoleInfoTableLabel = if opt_DoTickyProfiling then + RtsLabel (RtsInfo SLIT("stg_SE_CAF_BLACKHOLE")) + else -- RTS won't have info table unless -ticky is on + panic "mkSECAFBlackHoleInfoTableLabel requires -ticky" +mkRtsPrimOpLabel primop = RtsLabel (RtsPrimOp primop) + +moduleRegdLabel = ModuleRegdLabel + +mkSelectorInfoLabel upd off = RtsLabel (RtsSelectorInfoTable upd off) +mkSelectorEntryLabel upd off = RtsLabel (RtsSelectorEntry upd off) + +mkApInfoTableLabel upd off = RtsLabel (RtsApInfoTable upd off) +mkApEntryLabel upd off = RtsLabel (RtsApEntry upd off) + + -- Foreign labels + +mkForeignLabel :: FastString -> Maybe Int -> Bool -> CLabel +mkForeignLabel str mb_sz is_dynamic = ForeignLabel str mb_sz is_dynamic + + -- Cost centres etc. + +mkCCLabel cc = CC_Label cc +mkCCSLabel ccs = CCS_Label ccs + +mkRtsInfoLabel str = RtsLabel (RtsInfo str) +mkRtsEntryLabel str = RtsLabel (RtsEntry str) +mkRtsRetInfoLabel str = RtsLabel (RtsRetInfo str) +mkRtsRetLabel str = RtsLabel (RtsRet str) +mkRtsCodeLabel str = RtsLabel (RtsCode str) +mkRtsDataLabel str = RtsLabel (RtsData str) + +mkRtsInfoLabelFS str = RtsLabel (RtsInfoFS str) +mkRtsEntryLabelFS str = RtsLabel (RtsEntryFS str) +mkRtsRetInfoLabelFS str = RtsLabel (RtsRetInfoFS str) +mkRtsRetLabelFS str = RtsLabel (RtsRetFS str) +mkRtsCodeLabelFS str = RtsLabel (RtsCodeFS str) +mkRtsDataLabelFS str = RtsLabel (RtsDataFS str) + +mkRtsApFastLabel str = RtsLabel (RtsApFast str) + +mkRtsSlowTickyCtrLabel :: String -> CLabel +mkRtsSlowTickyCtrLabel pat = RtsLabel (RtsSlowTickyCtr pat) + + -- Dynamic linking + +mkDynamicLinkerLabel :: DynamicLinkerLabelInfo -> CLabel -> CLabel +mkDynamicLinkerLabel = DynamicLinkerLabel + +dynamicLinkerLabelInfo :: CLabel -> Maybe (DynamicLinkerLabelInfo, CLabel) +dynamicLinkerLabelInfo (DynamicLinkerLabel info lbl) = Just (info, lbl) +dynamicLinkerLabelInfo _ = Nothing + + -- Position independent code + +mkPicBaseLabel :: CLabel +mkPicBaseLabel = PicBaseLabel + +mkDeadStripPreventer :: CLabel -> CLabel +mkDeadStripPreventer lbl = DeadStripPreventer lbl + +-- ----------------------------------------------------------------------------- +-- Converting info labels to entry labels. + +infoLblToEntryLbl :: CLabel -> CLabel +infoLblToEntryLbl (IdLabel n InfoTable) = IdLabel n Entry +infoLblToEntryLbl (IdLabel n ConInfoTable) = IdLabel n ConEntry +infoLblToEntryLbl (IdLabel n StaticInfoTable) = IdLabel n StaticConEntry +infoLblToEntryLbl (DynIdLabel n InfoTable) = DynIdLabel n Entry +infoLblToEntryLbl (DynIdLabel n ConInfoTable) = DynIdLabel n ConEntry +infoLblToEntryLbl (DynIdLabel n StaticInfoTable) = DynIdLabel n StaticConEntry +infoLblToEntryLbl (CaseLabel n CaseReturnInfo) = CaseLabel n CaseReturnPt +infoLblToEntryLbl (RtsLabel (RtsInfo s)) = RtsLabel (RtsEntry s) +infoLblToEntryLbl (RtsLabel (RtsRetInfo s)) = RtsLabel (RtsRet s) +infoLblToEntryLbl (RtsLabel (RtsInfoFS s)) = RtsLabel (RtsEntryFS s) +infoLblToEntryLbl (RtsLabel (RtsRetInfoFS s)) = RtsLabel (RtsRetFS s) +infoLblToEntryLbl _ = panic "CLabel.infoLblToEntryLbl" + +entryLblToInfoLbl :: CLabel -> CLabel +entryLblToInfoLbl (IdLabel n Entry) = IdLabel n InfoTable +entryLblToInfoLbl (IdLabel n ConEntry) = IdLabel n ConInfoTable +entryLblToInfoLbl (IdLabel n StaticConEntry) = IdLabel n StaticInfoTable +entryLblToInfoLbl (DynIdLabel n Entry) = DynIdLabel n InfoTable +entryLblToInfoLbl (DynIdLabel n ConEntry) = DynIdLabel n ConInfoTable +entryLblToInfoLbl (DynIdLabel n StaticConEntry) = DynIdLabel n StaticInfoTable +entryLblToInfoLbl (CaseLabel n CaseReturnPt) = CaseLabel n CaseReturnInfo +entryLblToInfoLbl (RtsLabel (RtsEntry s)) = RtsLabel (RtsInfo s) +entryLblToInfoLbl (RtsLabel (RtsRet s)) = RtsLabel (RtsRetInfo s) +entryLblToInfoLbl (RtsLabel (RtsEntryFS s)) = RtsLabel (RtsInfoFS s) +entryLblToInfoLbl (RtsLabel (RtsRetFS s)) = RtsLabel (RtsRetInfoFS s) +entryLblToInfoLbl l = pprPanic "CLabel.entryLblToInfoLbl" (pprCLabel l) + +-- ----------------------------------------------------------------------------- +-- Does a CLabel need declaring before use or not? + +needsCDecl :: CLabel -> Bool + -- False <=> it's pre-declared; don't bother + -- don't bother declaring SRT & Bitmap labels, we always make sure + -- they are defined before use. +needsCDecl (IdLabel _ SRT) = False +needsCDecl (IdLabel _ SRTDesc) = False +needsCDecl (IdLabel _ Bitmap) = False +needsCDecl (IdLabel _ _) = True +needsCDecl (DynIdLabel _ _) = True +needsCDecl (CaseLabel _ _) = True +needsCDecl (ModuleInitLabel _ _ _) = True +needsCDecl (PlainModuleInitLabel _ _) = True +needsCDecl ModuleRegdLabel = False + +needsCDecl (StringLitLabel _) = False +needsCDecl (AsmTempLabel _) = False +needsCDecl (RtsLabel _) = False +needsCDecl (ForeignLabel _ _ _) = False +needsCDecl (CC_Label _) = True +needsCDecl (CCS_Label _) = True + +-- Whether the label is an assembler temporary: + +isAsmTemp :: CLabel -> Bool -- is a local temporary for native code generation +isAsmTemp (AsmTempLabel _) = True +isAsmTemp _ = False + +-- ----------------------------------------------------------------------------- +-- Is a CLabel visible outside this object file or not? + +-- From the point of view of the code generator, a name is +-- externally visible if it has to be declared as exported +-- in the .o file's symbol table; that is, made non-static. + +externallyVisibleCLabel :: CLabel -> Bool -- not C "static" +externallyVisibleCLabel (CaseLabel _ _) = False +externallyVisibleCLabel (StringLitLabel _) = False +externallyVisibleCLabel (AsmTempLabel _) = False +externallyVisibleCLabel (ModuleInitLabel _ _ _)= True +externallyVisibleCLabel (PlainModuleInitLabel _ _)= True +externallyVisibleCLabel ModuleRegdLabel = False +externallyVisibleCLabel (RtsLabel _) = True +externallyVisibleCLabel (ForeignLabel _ _ _) = True +externallyVisibleCLabel (IdLabel name _) = isExternalName name +externallyVisibleCLabel (DynIdLabel name _) = isExternalName name +externallyVisibleCLabel (CC_Label _) = True +externallyVisibleCLabel (CCS_Label _) = True +externallyVisibleCLabel (DynamicLinkerLabel _ _) = False + +-- ----------------------------------------------------------------------------- +-- Finding the "type" of a CLabel + +-- For generating correct types in label declarations: + +data CLabelType + = CodeLabel + | DataLabel + +labelType :: CLabel -> CLabelType +labelType (RtsLabel (RtsSelectorInfoTable _ _)) = DataLabel +labelType (RtsLabel (RtsApInfoTable _ _)) = DataLabel +labelType (RtsLabel (RtsData _)) = DataLabel +labelType (RtsLabel (RtsCode _)) = CodeLabel +labelType (RtsLabel (RtsInfo _)) = DataLabel +labelType (RtsLabel (RtsEntry _)) = CodeLabel +labelType (RtsLabel (RtsRetInfo _)) = DataLabel +labelType (RtsLabel (RtsRet _)) = CodeLabel +labelType (RtsLabel (RtsDataFS _)) = DataLabel +labelType (RtsLabel (RtsCodeFS _)) = CodeLabel +labelType (RtsLabel (RtsInfoFS _)) = DataLabel +labelType (RtsLabel (RtsEntryFS _)) = CodeLabel +labelType (RtsLabel (RtsRetInfoFS _)) = DataLabel +labelType (RtsLabel (RtsRetFS _)) = CodeLabel +labelType (RtsLabel (RtsApFast _)) = CodeLabel +labelType (CaseLabel _ CaseReturnInfo) = DataLabel +labelType (CaseLabel _ _) = CodeLabel +labelType (ModuleInitLabel _ _ _) = CodeLabel +labelType (PlainModuleInitLabel _ _) = CodeLabel + +labelType (IdLabel _ info) = idInfoLabelType info +labelType (DynIdLabel _ info) = idInfoLabelType info +labelType _ = DataLabel + +idInfoLabelType info = + case info of + InfoTable -> DataLabel + Closure -> DataLabel + Bitmap -> DataLabel + ConInfoTable -> DataLabel + StaticInfoTable -> DataLabel + ClosureTable -> DataLabel + _ -> CodeLabel + + +-- ----------------------------------------------------------------------------- +-- Does a CLabel need dynamic linkage? + +-- When referring to data in code, we need to know whether +-- that data resides in a DLL or not. [Win32 only.] +-- @labelDynamic@ returns @True@ if the label is located +-- in a DLL, be it a data reference or not. + +labelDynamic :: CLabel -> Bool +labelDynamic lbl = + case lbl of + RtsLabel _ -> not opt_Static -- i.e., is the RTS in a DLL or not? + IdLabel n k -> False + DynIdLabel n k -> True +#if mingw32_TARGET_OS + ForeignLabel _ _ d -> d +#else + -- On Mac OS X and on ELF platforms, false positives are OK, + -- so we claim that all foreign imports come from dynamic libraries + ForeignLabel _ _ _ -> True +#endif + ModuleInitLabel m _ dyn -> not opt_Static && dyn + PlainModuleInitLabel m dyn -> not opt_Static && dyn + + -- Note that DynamicLinkerLabels do NOT require dynamic linking themselves. + _ -> False + +{- +OLD?: These GRAN functions are needed for spitting out GRAN_FETCH() at the +right places. It is used to detect when the abstractC statement of an +CCodeBlock actually contains the code for a slow entry point. -- HWL + +We need at least @Eq@ for @CLabels@, because we want to avoid +duplicate declarations in generating C (see @labelSeenTE@ in +@PprAbsC@). +-} + +----------------------------------------------------------------------------- +-- Printing out CLabels. + +{- +Convention: + + <name>_<type> + +where <name> is <Module>_<name> for external names and <unique> for +internal names. <type> is one of the following: + + info Info table + srt Static reference table + srtd Static reference table descriptor + entry Entry code (function, closure) + slow Slow entry code (if any) + ret Direct return address + vtbl Vector table + <n>_alt Case alternative (tag n) + dflt Default case alternative + btm Large bitmap vector + closure Static closure + con_entry Dynamic Constructor entry code + con_info Dynamic Constructor info table + static_entry Static Constructor entry code + static_info Static Constructor info table + sel_info Selector info table + sel_entry Selector entry code + cc Cost centre + ccs Cost centre stack + +Many of these distinctions are only for documentation reasons. For +example, _ret is only distinguished from _entry to make it easy to +tell whether a code fragment is a return point or a closure/function +entry. +-} + +instance Outputable CLabel where + ppr = pprCLabel + +pprCLabel :: CLabel -> SDoc + +#if ! OMIT_NATIVE_CODEGEN +pprCLabel (AsmTempLabel u) + = getPprStyle $ \ sty -> + if asmStyle sty then + ptext asmTempLabelPrefix <> pprUnique u + else + char '_' <> pprUnique u + +pprCLabel (DynamicLinkerLabel info lbl) + = pprDynamicLinkerAsmLabel info lbl + +pprCLabel PicBaseLabel + = ptext SLIT("1b") + +pprCLabel (DeadStripPreventer lbl) + = pprCLabel lbl <> ptext SLIT("_dsp") +#endif + +pprCLabel lbl = +#if ! OMIT_NATIVE_CODEGEN + getPprStyle $ \ sty -> + if asmStyle sty then + maybe_underscore (pprAsmCLbl lbl) + else +#endif + pprCLbl lbl + +maybe_underscore doc + | underscorePrefix = pp_cSEP <> doc + | otherwise = doc + +#ifdef mingw32_TARGET_OS +-- In asm mode, we need to put the suffix on a stdcall ForeignLabel. +-- (The C compiler does this itself). +pprAsmCLbl (ForeignLabel fs (Just sz) _) + = ftext fs <> char '@' <> int sz +#endif +pprAsmCLbl lbl + = pprCLbl lbl + +pprCLbl (StringLitLabel u) + = pprUnique u <> ptext SLIT("_str") + +pprCLbl (CaseLabel u CaseReturnPt) + = hcat [pprUnique u, ptext SLIT("_ret")] +pprCLbl (CaseLabel u CaseReturnInfo) + = hcat [pprUnique u, ptext SLIT("_info")] +pprCLbl (CaseLabel u (CaseAlt tag)) + = hcat [pprUnique u, pp_cSEP, int tag, ptext SLIT("_alt")] +pprCLbl (CaseLabel u CaseDefault) + = hcat [pprUnique u, ptext SLIT("_dflt")] + +pprCLbl (RtsLabel (RtsCode str)) = ptext str +pprCLbl (RtsLabel (RtsData str)) = ptext str +pprCLbl (RtsLabel (RtsCodeFS str)) = ftext str +pprCLbl (RtsLabel (RtsDataFS str)) = ftext str + +pprCLbl (RtsLabel (RtsApFast str)) = ptext str <> ptext SLIT("_fast") + +pprCLbl (RtsLabel (RtsSelectorInfoTable upd_reqd offset)) + = hcat [ptext SLIT("stg_sel_"), text (show offset), + ptext (if upd_reqd + then SLIT("_upd_info") + else SLIT("_noupd_info")) + ] + +pprCLbl (RtsLabel (RtsSelectorEntry upd_reqd offset)) + = hcat [ptext SLIT("stg_sel_"), text (show offset), + ptext (if upd_reqd + then SLIT("_upd_entry") + else SLIT("_noupd_entry")) + ] + +pprCLbl (RtsLabel (RtsApInfoTable upd_reqd arity)) + = hcat [ptext SLIT("stg_ap_"), text (show arity), + ptext (if upd_reqd + then SLIT("_upd_info") + else SLIT("_noupd_info")) + ] + +pprCLbl (RtsLabel (RtsApEntry upd_reqd arity)) + = hcat [ptext SLIT("stg_ap_"), text (show arity), + ptext (if upd_reqd + then SLIT("_upd_entry") + else SLIT("_noupd_entry")) + ] + +pprCLbl (RtsLabel (RtsInfo fs)) + = ptext fs <> ptext SLIT("_info") + +pprCLbl (RtsLabel (RtsEntry fs)) + = ptext fs <> ptext SLIT("_entry") + +pprCLbl (RtsLabel (RtsRetInfo fs)) + = ptext fs <> ptext SLIT("_info") + +pprCLbl (RtsLabel (RtsRet fs)) + = ptext fs <> ptext SLIT("_ret") + +pprCLbl (RtsLabel (RtsInfoFS fs)) + = ftext fs <> ptext SLIT("_info") + +pprCLbl (RtsLabel (RtsEntryFS fs)) + = ftext fs <> ptext SLIT("_entry") + +pprCLbl (RtsLabel (RtsRetInfoFS fs)) + = ftext fs <> ptext SLIT("_info") + +pprCLbl (RtsLabel (RtsRetFS fs)) + = ftext fs <> ptext SLIT("_ret") + +pprCLbl (RtsLabel (RtsPrimOp primop)) + = ppr primop <> ptext SLIT("_fast") + +pprCLbl (RtsLabel (RtsSlowTickyCtr pat)) + = ptext SLIT("SLOW_CALL_") <> text pat <> ptext SLIT("_ctr") + +pprCLbl ModuleRegdLabel + = ptext SLIT("_module_registered") + +pprCLbl (ForeignLabel str _ _) + = ftext str + +pprCLbl (IdLabel name flavor) = ppr name <> ppIdFlavor flavor +pprCLbl (DynIdLabel name flavor) = ppr name <> ppIdFlavor flavor + +pprCLbl (CC_Label cc) = ppr cc +pprCLbl (CCS_Label ccs) = ppr ccs + +pprCLbl (ModuleInitLabel mod way _) + = ptext SLIT("__stginit_") <> ppr mod + <> char '_' <> text way +pprCLbl (PlainModuleInitLabel mod _) + = ptext SLIT("__stginit_") <> ppr mod + +ppIdFlavor :: IdLabelInfo -> SDoc +ppIdFlavor x = pp_cSEP <> + (case x of + Closure -> ptext SLIT("closure") + SRT -> ptext SLIT("srt") + SRTDesc -> ptext SLIT("srtd") + InfoTable -> ptext SLIT("info") + Entry -> ptext SLIT("entry") + Slow -> ptext SLIT("slow") + RednCounts -> ptext SLIT("ct") + Bitmap -> ptext SLIT("btm") + ConEntry -> ptext SLIT("con_entry") + ConInfoTable -> ptext SLIT("con_info") + StaticConEntry -> ptext SLIT("static_entry") + StaticInfoTable -> ptext SLIT("static_info") + ClosureTable -> ptext SLIT("closure_tbl") + ) + + +pp_cSEP = char '_' + +-- ----------------------------------------------------------------------------- +-- Machine-dependent knowledge about labels. + +underscorePrefix :: Bool -- leading underscore on assembler labels? +underscorePrefix = (cLeadingUnderscore == "YES") + +asmTempLabelPrefix :: LitString -- for formatting labels +asmTempLabelPrefix = +#if alpha_TARGET_OS + {- The alpha assembler likes temporary labels to look like $L123 + instead of L123. (Don't toss the L, because then Lf28 + turns into $f28.) + -} + SLIT("$") +#elif darwin_TARGET_OS + SLIT("L") +#else + SLIT(".L") +#endif + +pprDynamicLinkerAsmLabel :: DynamicLinkerLabelInfo -> CLabel -> SDoc + +#if darwin_TARGET_OS +pprDynamicLinkerAsmLabel SymbolPtr lbl + = char 'L' <> pprCLabel lbl <> text "$non_lazy_ptr" +pprDynamicLinkerAsmLabel CodeStub lbl + = char 'L' <> pprCLabel lbl <> text "$stub" +#elif powerpc_TARGET_ARCH && linux_TARGET_OS +pprDynamicLinkerAsmLabel CodeStub lbl + = pprCLabel lbl <> text "@plt" +pprDynamicLinkerAsmLabel SymbolPtr lbl + = text ".LC_" <> pprCLabel lbl +#elif linux_TARGET_OS +pprDynamicLinkerAsmLabel CodeStub lbl + = pprCLabel lbl <> text "@plt" +pprDynamicLinkerAsmLabel GotSymbolPtr lbl + = pprCLabel lbl <> text "@got" +pprDynamicLinkerAsmLabel GotSymbolOffset lbl + = pprCLabel lbl <> text "@gotoff" +pprDynamicLinkerAsmLabel SymbolPtr lbl + = text ".LC_" <> pprCLabel lbl +#elif mingw32_TARGET_OS +pprDynamicLinkerAsmLabel SymbolPtr lbl + = text "__imp_" <> pprCLabel lbl +#endif +pprDynamicLinkerAsmLabel _ _ + = panic "pprDynamicLinkerAsmLabel" diff --git a/compiler/cmm/Cmm.hs b/compiler/cmm/Cmm.hs new file mode 100644 index 0000000000..13961c15d3 --- /dev/null +++ b/compiler/cmm/Cmm.hs @@ -0,0 +1,322 @@ +----------------------------------------------------------------------------- +-- +-- Cmm data types +-- +-- (c) The University of Glasgow 2004 +-- +----------------------------------------------------------------------------- + +module Cmm ( + GenCmm(..), Cmm, + GenCmmTop(..), CmmTop, + GenBasicBlock(..), CmmBasicBlock, blockId, blockStmts, + CmmStmt(..), + CmmCallTarget(..), + CmmStatic(..), Section(..), + CmmExpr(..), cmmExprRep, + CmmReg(..), cmmRegRep, + CmmLit(..), cmmLitRep, + LocalReg(..), localRegRep, + BlockId(..), + GlobalReg(..), globalRegRep, + + node, nodeReg, spReg, hpReg, + ) where + +#include "HsVersions.h" + +import MachOp +import CLabel ( CLabel ) +import ForeignCall ( CCallConv ) +import Unique ( Unique, Uniquable(..) ) +import FastString ( FastString ) +import DATA_WORD ( Word8 ) + +----------------------------------------------------------------------------- +-- Cmm, CmmTop, CmmBasicBlock +----------------------------------------------------------------------------- + +-- A file is a list of top-level chunks. These may be arbitrarily +-- re-orderd during code generation. + +-- GenCmm is abstracted over +-- (a) the type of static data elements +-- (b) the contents of a basic block. +-- We expect there to be two main instances of this type: +-- (a) Plain C--, i.e. populated with CmmLit and CmmExpr respectively, +-- (b) Native code, populated with instructions +-- +newtype GenCmm d i = Cmm [GenCmmTop d i] + +type Cmm = GenCmm CmmStatic CmmStmt + +-- A top-level chunk, abstracted over the type of the contents of +-- the basic blocks (Cmm or instructions are the likely instantiations). +data GenCmmTop d i + = CmmProc + [d] -- Info table, may be empty + CLabel -- Used to generate both info & entry labels + [LocalReg] -- Argument locals live on entry (C-- procedure params) + [GenBasicBlock i] -- Code, may be empty. The first block is + -- the entry point. The order is otherwise initially + -- unimportant, but at some point the code gen will + -- fix the order. + + -- the BlockId of the first block does not give rise + -- to a label. To jump to the first block in a Proc, + -- use the appropriate CLabel. + + -- some static data. + | CmmData Section [d] -- constant values only + +type CmmTop = GenCmmTop CmmStatic CmmStmt + +-- A basic block containing a single label, at the beginning. +-- The list of basic blocks in a top-level code block may be re-ordered. +-- Fall-through is not allowed: there must be an explicit jump at the +-- end of each basic block, but the code generator might rearrange basic +-- blocks in order to turn some jumps into fallthroughs. + +data GenBasicBlock i = BasicBlock BlockId [i] + -- ToDo: Julian suggests that we might need to annotate this type + -- with the out & in edges in the graph, i.e. two * [BlockId]. This + -- information can be derived from the contents, but it might be + -- helpful to cache it here. + +type CmmBasicBlock = GenBasicBlock CmmStmt + +blockId :: GenBasicBlock i -> BlockId +-- The branch block id is that of the first block in +-- the branch, which is that branch's entry point +blockId (BasicBlock blk_id _ ) = blk_id + +blockStmts :: GenBasicBlock i -> [i] +blockStmts (BasicBlock _ stmts) = stmts + + +----------------------------------------------------------------------------- +-- CmmStmt +-- A "statement". Note that all branches are explicit: there are no +-- control transfers to computed addresses, except when transfering +-- control to a new function. +----------------------------------------------------------------------------- + +data CmmStmt + = CmmNop + | CmmComment FastString + + | CmmAssign CmmReg CmmExpr -- Assign to register + + | CmmStore CmmExpr CmmExpr -- Assign to memory location. Size is + -- given by cmmExprRep of the rhs. + + | CmmCall -- A foreign call, with + CmmCallTarget + [(CmmReg,MachHint)] -- zero or more results + [(CmmExpr,MachHint)] -- zero or more arguments + (Maybe [GlobalReg]) -- Global regs that may need to be saved + -- if they will be clobbered by the call. + -- Nothing <=> save *all* globals that + -- might be clobbered. + + | CmmBranch BlockId -- branch to another BB in this fn + + | CmmCondBranch CmmExpr BlockId -- conditional branch + + | CmmSwitch CmmExpr [Maybe BlockId] -- Table branch + -- The scrutinee is zero-based; + -- zero -> first block + -- one -> second block etc + -- Undefined outside range, and when there's a Nothing + + | CmmJump CmmExpr [LocalReg] -- Jump to another function, with these + -- parameters. + +----------------------------------------------------------------------------- +-- CmmCallTarget +-- +-- The target of a CmmCall. +----------------------------------------------------------------------------- + +data CmmCallTarget + = CmmForeignCall -- Call to a foreign function + CmmExpr -- literal label <=> static call + -- other expression <=> dynamic call + CCallConv -- The calling convention + + | CmmPrim -- Call to a "primitive" (eg. sin, cos) + CallishMachOp -- These might be implemented as inline + -- code by the backend. + +----------------------------------------------------------------------------- +-- CmmExpr +-- An expression. Expressions have no side effects. +----------------------------------------------------------------------------- + +data CmmExpr + = CmmLit CmmLit -- Literal + | CmmLoad CmmExpr MachRep -- Read memory location + | CmmReg CmmReg -- Contents of register + | CmmMachOp MachOp [CmmExpr] -- Machine operation (+, -, *, etc.) + | CmmRegOff CmmReg Int + -- CmmRegOff reg i + -- ** is shorthand only, meaning ** + -- CmmMachOp (MO_S_Add rep (CmmReg reg) (CmmLit (CmmInt i rep))) + -- where rep = cmmRegRep reg + +cmmExprRep :: CmmExpr -> MachRep +cmmExprRep (CmmLit lit) = cmmLitRep lit +cmmExprRep (CmmLoad _ rep) = rep +cmmExprRep (CmmReg reg) = cmmRegRep reg +cmmExprRep (CmmMachOp op _) = resultRepOfMachOp op +cmmExprRep (CmmRegOff reg _) = cmmRegRep reg + +data CmmReg + = CmmLocal LocalReg + | CmmGlobal GlobalReg + deriving( Eq ) + +cmmRegRep :: CmmReg -> MachRep +cmmRegRep (CmmLocal reg) = localRegRep reg +cmmRegRep (CmmGlobal reg) = globalRegRep reg + +data LocalReg + = LocalReg !Unique MachRep + +instance Eq LocalReg where + (LocalReg u1 _) == (LocalReg u2 _) = u1 == u2 + +instance Uniquable LocalReg where + getUnique (LocalReg uniq _) = uniq + +localRegRep :: LocalReg -> MachRep +localRegRep (LocalReg _ rep) = rep + +data CmmLit + = CmmInt Integer MachRep + -- Interpretation: the 2's complement representation of the value + -- is truncated to the specified size. This is easier than trying + -- to keep the value within range, because we don't know whether + -- it will be used as a signed or unsigned value (the MachRep doesn't + -- distinguish between signed & unsigned). + | CmmFloat Rational MachRep + | CmmLabel CLabel -- Address of label + | CmmLabelOff CLabel Int -- Address of label + byte offset + + -- Due to limitations in the C backend, the following + -- MUST ONLY be used inside the info table indicated by label2 + -- (label2 must be the info label), and label1 must be an + -- SRT, a slow entrypoint or a large bitmap (see the Mangler) + -- Don't use it at all unless tablesNextToCode. + -- It is also used inside the NCG during when generating + -- position-independent code. + | CmmLabelDiffOff CLabel CLabel Int -- label1 - label2 + offset + +cmmLitRep :: CmmLit -> MachRep +cmmLitRep (CmmInt _ rep) = rep +cmmLitRep (CmmFloat _ rep) = rep +cmmLitRep (CmmLabel _) = wordRep +cmmLitRep (CmmLabelOff _ _) = wordRep +cmmLitRep (CmmLabelDiffOff _ _ _) = wordRep + +----------------------------------------------------------------------------- +-- A local label. + +-- Local labels must be unique within a single compilation unit. + +newtype BlockId = BlockId Unique + deriving (Eq,Ord) + +instance Uniquable BlockId where + getUnique (BlockId u) = u + +----------------------------------------------------------------------------- +-- Static Data +----------------------------------------------------------------------------- + +data Section + = Text + | Data + | ReadOnlyData + | RelocatableReadOnlyData + | UninitialisedData + | ReadOnlyData16 -- .rodata.cst16 on x86_64, 16-byte aligned + | OtherSection String + +data CmmStatic + = CmmStaticLit CmmLit + -- a literal value, size given by cmmLitRep of the literal. + | CmmUninitialised Int + -- uninitialised data, N bytes long + | CmmAlign Int + -- align to next N-byte boundary (N must be a power of 2). + | CmmDataLabel CLabel + -- label the current position in this section. + | CmmString [Word8] + -- string of 8-bit values only, not zero terminated. + +----------------------------------------------------------------------------- +-- Global STG registers +----------------------------------------------------------------------------- + +data GlobalReg + -- Argument and return registers + = VanillaReg -- pointers, unboxed ints and chars + {-# UNPACK #-} !Int -- its number + + | FloatReg -- single-precision floating-point registers + {-# UNPACK #-} !Int -- its number + + | DoubleReg -- double-precision floating-point registers + {-# UNPACK #-} !Int -- its number + + | LongReg -- long int registers (64-bit, really) + {-# UNPACK #-} !Int -- its number + + -- STG registers + | Sp -- Stack ptr; points to last occupied stack location. + | SpLim -- Stack limit + | Hp -- Heap ptr; points to last occupied heap location. + | HpLim -- Heap limit register + | CurrentTSO -- pointer to current thread's TSO + | CurrentNursery -- pointer to allocation area + | HpAlloc -- allocation count for heap check failure + + -- We keep the address of some commonly-called + -- functions in the register table, to keep code + -- size down: + | GCEnter1 -- stg_gc_enter_1 + | GCFun -- stg_gc_fun + + -- Base offset for the register table, used for accessing registers + -- which do not have real registers assigned to them. This register + -- will only appear after we have expanded GlobalReg into memory accesses + -- (where necessary) in the native code generator. + | BaseReg + + -- Base Register for PIC (position-independent code) calculations + -- Only used inside the native code generator. It's exact meaning differs + -- from platform to platform (see module PositionIndependentCode). + | PicBaseReg + + deriving( Eq +#ifdef DEBUG + , Show +#endif + ) + +-- convenient aliases +spReg, hpReg, nodeReg :: CmmReg +spReg = CmmGlobal Sp +hpReg = CmmGlobal Hp +nodeReg = CmmGlobal node + +node :: GlobalReg +node = VanillaReg 1 + +globalRegRep :: GlobalReg -> MachRep +globalRegRep (VanillaReg _) = wordRep +globalRegRep (FloatReg _) = F32 +globalRegRep (DoubleReg _) = F64 +globalRegRep (LongReg _) = I64 +globalRegRep _ = wordRep diff --git a/compiler/cmm/CmmLex.x b/compiler/cmm/CmmLex.x new file mode 100644 index 0000000000..c2efd17710 --- /dev/null +++ b/compiler/cmm/CmmLex.x @@ -0,0 +1,311 @@ +----------------------------------------------------------------------------- +-- (c) The University of Glasgow, 2004 +-- +-- Lexer for concrete Cmm. We try to stay close to the C-- spec, but there +-- are a few minor differences: +-- +-- * extra keywords for our macros, and float32/float64 types +-- * global registers (Sp,Hp, etc.) +-- +----------------------------------------------------------------------------- + +{ +module CmmLex ( + CmmToken(..), cmmlex, + ) where + +#include "HsVersions.h" + +import Cmm +import Lexer + +import SrcLoc +import UniqFM +import StringBuffer +import FastString +import Ctype +import Util ( readRational ) +--import TRACE +} + +$whitechar = [\ \t\n\r\f\v\xa0] +$white_no_nl = $whitechar # \n + +$ascdigit = 0-9 +$unidigit = \x01 +$digit = [$ascdigit $unidigit] +$octit = 0-7 +$hexit = [$digit A-F a-f] + +$unilarge = \x03 +$asclarge = [A-Z \xc0-\xd6 \xd8-\xde] +$large = [$asclarge $unilarge] + +$unismall = \x04 +$ascsmall = [a-z \xdf-\xf6 \xf8-\xff] +$small = [$ascsmall $unismall \_] + +$namebegin = [$large $small \_ \. \$ \@] +$namechar = [$namebegin $digit] + +@decimal = $digit+ +@octal = $octit+ +@hexadecimal = $hexit+ +@exponent = [eE] [\-\+]? @decimal + +@floating_point = @decimal \. @decimal @exponent? | @decimal @exponent + +@escape = \\ ([abfnrt\\\'\"\?] | x @hexadecimal | @octal) +@strchar = ($printable # [\"\\]) | @escape + +cmm :- + +$white_no_nl+ ; +^\# pragma .* \n ; -- Apple GCC 3.3 CPP generates pragmas in its output + +^\# (line)? { begin line_prag } + +-- single-line line pragmas, of the form +-- # <line> "<file>" <extra-stuff> \n +<line_prag> $digit+ { setLine line_prag1 } +<line_prag1> \" ($printable # \")* \" { setFile line_prag2 } +<line_prag2> .* { pop } + +<0> { + \n ; + + [\:\;\{\}\[\]\(\)\=\`\~\/\*\%\-\+\&\^\|\>\<\,\!] { special_char } + + ".." { kw CmmT_DotDot } + "::" { kw CmmT_DoubleColon } + ">>" { kw CmmT_Shr } + "<<" { kw CmmT_Shl } + ">=" { kw CmmT_Ge } + "<=" { kw CmmT_Le } + "==" { kw CmmT_Eq } + "!=" { kw CmmT_Ne } + "&&" { kw CmmT_BoolAnd } + "||" { kw CmmT_BoolOr } + + R@decimal { global_regN VanillaReg } + F@decimal { global_regN FloatReg } + D@decimal { global_regN DoubleReg } + L@decimal { global_regN LongReg } + Sp { global_reg Sp } + SpLim { global_reg SpLim } + Hp { global_reg Hp } + HpLim { global_reg HpLim } + CurrentTSO { global_reg CurrentTSO } + CurrentNursery { global_reg CurrentNursery } + HpAlloc { global_reg HpAlloc } + BaseReg { global_reg BaseReg } + + $namebegin $namechar* { name } + + 0 @octal { tok_octal } + @decimal { tok_decimal } + 0[xX] @hexadecimal { tok_hexadecimal } + @floating_point { strtoken tok_float } + + \" @strchar* \" { strtoken tok_string } +} + +{ +data CmmToken + = CmmT_SpecChar Char + | CmmT_DotDot + | CmmT_DoubleColon + | CmmT_Shr + | CmmT_Shl + | CmmT_Ge + | CmmT_Le + | CmmT_Eq + | CmmT_Ne + | CmmT_BoolAnd + | CmmT_BoolOr + | CmmT_CLOSURE + | CmmT_INFO_TABLE + | CmmT_INFO_TABLE_RET + | CmmT_INFO_TABLE_FUN + | CmmT_INFO_TABLE_CONSTR + | CmmT_INFO_TABLE_SELECTOR + | CmmT_else + | CmmT_export + | CmmT_section + | CmmT_align + | CmmT_goto + | CmmT_if + | CmmT_jump + | CmmT_foreign + | CmmT_import + | CmmT_switch + | CmmT_case + | CmmT_default + | CmmT_bits8 + | CmmT_bits16 + | CmmT_bits32 + | CmmT_bits64 + | CmmT_float32 + | CmmT_float64 + | CmmT_GlobalReg GlobalReg + | CmmT_Name FastString + | CmmT_String String + | CmmT_Int Integer + | CmmT_Float Rational + | CmmT_EOF +#ifdef DEBUG + deriving (Show) +#endif + +-- ----------------------------------------------------------------------------- +-- Lexer actions + +type Action = SrcSpan -> StringBuffer -> Int -> P (Located CmmToken) + +begin :: Int -> Action +begin code _span _str _len = do pushLexState code; lexToken + +pop :: Action +pop _span _buf _len = do popLexState; lexToken + +special_char :: Action +special_char span buf len = return (L span (CmmT_SpecChar (currentChar buf))) + +kw :: CmmToken -> Action +kw tok span buf len = return (L span tok) + +global_regN :: (Int -> GlobalReg) -> Action +global_regN con span buf len + = return (L span (CmmT_GlobalReg (con (fromIntegral n)))) + where buf' = stepOn buf + n = parseInteger buf' (len-1) 10 octDecDigit + +global_reg :: GlobalReg -> Action +global_reg r span buf len = return (L span (CmmT_GlobalReg r)) + +strtoken :: (String -> CmmToken) -> Action +strtoken f span buf len = + return (L span $! (f $! lexemeToString buf len)) + +name :: Action +name span buf len = + case lookupUFM reservedWordsFM fs of + Just tok -> return (L span tok) + Nothing -> return (L span (CmmT_Name fs)) + where + fs = lexemeToFastString buf len + +reservedWordsFM = listToUFM $ + map (\(x, y) -> (mkFastString x, y)) [ + ( "CLOSURE", CmmT_CLOSURE ), + ( "INFO_TABLE", CmmT_INFO_TABLE ), + ( "INFO_TABLE_RET", CmmT_INFO_TABLE_RET ), + ( "INFO_TABLE_FUN", CmmT_INFO_TABLE_FUN ), + ( "INFO_TABLE_CONSTR", CmmT_INFO_TABLE_CONSTR ), + ( "INFO_TABLE_SELECTOR",CmmT_INFO_TABLE_SELECTOR ), + ( "else", CmmT_else ), + ( "export", CmmT_export ), + ( "section", CmmT_section ), + ( "align", CmmT_align ), + ( "goto", CmmT_goto ), + ( "if", CmmT_if ), + ( "jump", CmmT_jump ), + ( "foreign", CmmT_foreign ), + ( "import", CmmT_import ), + ( "switch", CmmT_switch ), + ( "case", CmmT_case ), + ( "default", CmmT_default ), + ( "bits8", CmmT_bits8 ), + ( "bits16", CmmT_bits16 ), + ( "bits32", CmmT_bits32 ), + ( "bits64", CmmT_bits64 ), + ( "float32", CmmT_float32 ), + ( "float64", CmmT_float64 ) + ] + +tok_decimal span buf len + = return (L span (CmmT_Int $! parseInteger buf len 10 octDecDigit)) + +tok_octal span buf len + = return (L span (CmmT_Int $! parseInteger (offsetBytes 1 buf) (len-1) 8 octDecDigit)) + +tok_hexadecimal span buf len + = return (L span (CmmT_Int $! parseInteger (offsetBytes 2 buf) (len-2) 16 hexDigit)) + +tok_float str = CmmT_Float $! readRational str + +tok_string str = CmmT_String (read str) + -- urk, not quite right, but it'll do for now + +-- ----------------------------------------------------------------------------- +-- Line pragmas + +setLine :: Int -> Action +setLine code span buf len = do + let line = parseInteger buf len 10 octDecDigit + setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 0) + -- subtract one: the line number refers to the *following* line + -- trace ("setLine " ++ show line) $ do + popLexState + pushLexState code + lexToken + +setFile :: Int -> Action +setFile code span buf len = do + let file = lexemeToFastString (stepOn buf) (len-2) + setSrcLoc (mkSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span)) + popLexState + pushLexState code + lexToken + +-- ----------------------------------------------------------------------------- +-- This is the top-level function: called from the parser each time a +-- new token is to be read from the input. + +cmmlex :: (Located CmmToken -> P a) -> P a +cmmlex cont = do + tok@(L _ tok__) <- lexToken + --trace ("token: " ++ show tok__) $ do + cont tok + +lexToken :: P (Located CmmToken) +lexToken = do + inp@(loc1,buf) <- getInput + sc <- getLexState + case alexScan inp sc of + AlexEOF -> do let span = mkSrcSpan loc1 loc1 + setLastToken span 0 + return (L span CmmT_EOF) + AlexError (loc2,_) -> do failLocMsgP loc1 loc2 "lexical error" + AlexSkip inp2 _ -> do + setInput inp2 + lexToken + AlexToken inp2@(end,buf2) len t -> do + setInput inp2 + let span = mkSrcSpan loc1 end + span `seq` setLastToken span len + t span buf len + +-- ----------------------------------------------------------------------------- +-- Monad stuff + +-- Stuff that Alex needs to know about our input type: +type AlexInput = (SrcLoc,StringBuffer) + +alexInputPrevChar :: AlexInput -> Char +alexInputPrevChar (_,s) = prevChar s '\n' + +alexGetChar :: AlexInput -> Maybe (Char,AlexInput) +alexGetChar (loc,s) + | atEnd s = Nothing + | otherwise = c `seq` loc' `seq` s' `seq` Just (c, (loc', s')) + where c = currentChar s + loc' = advanceSrcLoc loc c + s' = stepOn s + +getInput :: P AlexInput +getInput = P $ \s@PState{ loc=l, buffer=b } -> POk s (l,b) + +setInput :: AlexInput -> P () +setInput (l,b) = P $ \s -> POk s{ loc=l, buffer=b } () +} diff --git a/compiler/cmm/CmmLint.hs b/compiler/cmm/CmmLint.hs new file mode 100644 index 0000000000..fbfb14c165 --- /dev/null +++ b/compiler/cmm/CmmLint.hs @@ -0,0 +1,159 @@ +----------------------------------------------------------------------------- +-- +-- CmmLint: checking the correctness of Cmm statements and expressions +-- +-- (c) The University of Glasgow 2004 +-- +----------------------------------------------------------------------------- + +module CmmLint ( + cmmLint, cmmLintTop + ) where + +#include "HsVersions.h" + +import Cmm +import CLabel ( pprCLabel ) +import MachOp +import Outputable +import PprCmm +import Unique ( getUnique ) +import Constants ( wORD_SIZE ) + +import Monad ( when ) + +-- ----------------------------------------------------------------------------- +-- Exported entry points: + +cmmLint :: Cmm -> Maybe SDoc +cmmLint (Cmm tops) = runCmmLint $ mapM_ lintCmmTop tops + +cmmLintTop :: CmmTop -> Maybe SDoc +cmmLintTop top = runCmmLint $ lintCmmTop top + +runCmmLint :: CmmLint a -> Maybe SDoc +runCmmLint l = + case unCL l of + Left err -> Just (ptext SLIT("Cmm lint error:") $$ nest 2 err) + Right _ -> Nothing + +lintCmmTop (CmmProc _info lbl _args blocks) + = addLintInfo (text "in proc " <> pprCLabel lbl) $ + mapM_ lintCmmBlock blocks +lintCmmTop _other + = return () + +lintCmmBlock (BasicBlock id stmts) + = addLintInfo (text "in basic block " <> ppr (getUnique id)) $ + mapM_ lintCmmStmt stmts + +-- ----------------------------------------------------------------------------- +-- lintCmmExpr + +-- Checks whether a CmmExpr is "type-correct", and check for obvious-looking +-- byte/word mismatches. + +lintCmmExpr :: CmmExpr -> CmmLint MachRep +lintCmmExpr (CmmLoad expr rep) = do + lintCmmExpr expr + when (machRepByteWidth rep >= wORD_SIZE) $ + cmmCheckWordAddress expr + return rep +lintCmmExpr expr@(CmmMachOp op args) = do + mapM_ lintCmmExpr args + if map cmmExprRep args == machOpArgReps op + then cmmCheckMachOp op args + else cmmLintMachOpErr expr +lintCmmExpr (CmmRegOff reg offset) + = lintCmmExpr (CmmMachOp (MO_Add rep) + [CmmReg reg, CmmLit (CmmInt (fromIntegral offset) rep)]) + where rep = cmmRegRep reg +lintCmmExpr lit@(CmmLit (CmmInt _ rep)) + | isFloatingRep rep + = cmmLintErr (text "integer literal with floating MachRep: " <> ppr lit) +lintCmmExpr expr = + return (cmmExprRep expr) + +-- Check for some common byte/word mismatches (eg. Sp + 1) +cmmCheckMachOp op args@[CmmReg reg, CmmLit (CmmInt i _)] + | isWordOffsetReg reg && isOffsetOp op && i `rem` fromIntegral wORD_SIZE /= 0 + = cmmLintDubiousWordOffset (CmmMachOp op args) +cmmCheckMachOp op [lit@(CmmLit (CmmInt i _)), reg@(CmmReg _)] + = cmmCheckMachOp op [reg, lit] +cmmCheckMachOp op@(MO_U_Conv from to) args + | isFloatingRep from || isFloatingRep to + = cmmLintErr (text "unsigned conversion from/to floating rep: " + <> ppr (CmmMachOp op args)) +cmmCheckMachOp op args + = return (resultRepOfMachOp op) + +isWordOffsetReg (CmmGlobal Sp) = True +isWordOffsetReg (CmmGlobal Hp) = True +isWordOffsetReg _ = False + +isOffsetOp (MO_Add _) = True +isOffsetOp (MO_Sub _) = True +isOffsetOp _ = False + +-- This expression should be an address from which a word can be loaded: +-- check for funny-looking sub-word offsets. +cmmCheckWordAddress e@(CmmMachOp op [arg, CmmLit (CmmInt i _)]) + | isOffsetOp op && i `rem` fromIntegral wORD_SIZE /= 0 + = cmmLintDubiousWordOffset e +cmmCheckWordAddress e@(CmmMachOp op [CmmLit (CmmInt i _), arg]) + | isOffsetOp op && i `rem` fromIntegral wORD_SIZE /= 0 + = cmmLintDubiousWordOffset e +cmmCheckWordAddress _ + = return () + + +lintCmmStmt :: CmmStmt -> CmmLint () +lintCmmStmt stmt@(CmmAssign reg expr) = do + erep <- lintCmmExpr expr + if (erep == cmmRegRep reg) + then return () + else cmmLintAssignErr stmt +lintCmmStmt (CmmStore l r) = do + lintCmmExpr l + lintCmmExpr r + return () +lintCmmStmt (CmmCall _target _res args _vols) = mapM_ (lintCmmExpr.fst) args +lintCmmStmt (CmmCondBranch e _id) = lintCmmExpr e >> return () +lintCmmStmt (CmmSwitch e _branches) = lintCmmExpr e >> return () +lintCmmStmt (CmmJump e _args) = lintCmmExpr e >> return () +lintCmmStmt _other = return () + +-- ----------------------------------------------------------------------------- +-- CmmLint monad + +-- just a basic error monad: + +newtype CmmLint a = CmmLint { unCL :: Either SDoc a } + +instance Monad CmmLint where + CmmLint m >>= k = CmmLint $ case m of + Left e -> Left e + Right a -> unCL (k a) + return a = CmmLint (Right a) + +cmmLintErr :: SDoc -> CmmLint a +cmmLintErr msg = CmmLint (Left msg) + +addLintInfo :: SDoc -> CmmLint a -> CmmLint a +addLintInfo info thing = CmmLint $ + case unCL thing of + Left err -> Left (hang info 2 err) + Right a -> Right a + +cmmLintMachOpErr :: CmmExpr -> CmmLint a +cmmLintMachOpErr expr = cmmLintErr (text "in MachOp application: " $$ + nest 2 (pprExpr expr)) + +cmmLintAssignErr :: CmmStmt -> CmmLint a +cmmLintAssignErr stmt = cmmLintErr (text "in assignment: " $$ + nest 2 (pprStmt stmt)) + +cmmLintDubiousWordOffset :: CmmExpr -> CmmLint a +cmmLintDubiousWordOffset expr + = cmmLintErr (text "offset is not a multiple of words: " $$ + nest 2 (pprExpr expr)) diff --git a/compiler/cmm/CmmOpt.hs b/compiler/cmm/CmmOpt.hs new file mode 100644 index 0000000000..c454ff4c6a --- /dev/null +++ b/compiler/cmm/CmmOpt.hs @@ -0,0 +1,507 @@ +----------------------------------------------------------------------------- +-- +-- Cmm optimisation +-- +-- (c) The University of Glasgow 2006 +-- +----------------------------------------------------------------------------- + +module CmmOpt ( + cmmMiniInline, + cmmMachOpFold, + cmmLoopifyForC, + ) where + +#include "HsVersions.h" + +import Cmm +import CmmUtils ( hasNoGlobalRegs ) +import CLabel ( entryLblToInfoLbl ) +import MachOp +import SMRep ( tablesNextToCode ) + +import UniqFM +import Unique ( Unique ) +import Panic ( panic ) + +import Outputable + +import Bits +import Word +import Int +import GLAEXTS + + +-- ----------------------------------------------------------------------------- +-- The mini-inliner + +{- +This pass inlines assignments to temporaries that are used just +once. It works as follows: + + - count uses of each temporary + - for each temporary that occurs just once: + - attempt to push it forward to the statement that uses it + - only push forward past assignments to other temporaries + (assumes that temporaries are single-assignment) + - if we reach the statement that uses it, inline the rhs + and delete the original assignment. + +Possible generalisations: here is an example from factorial + +Fac_zdwfac_entry: + cmG: + _smi = R2; + if (_smi != 0) goto cmK; + R1 = R3; + jump I64[Sp]; + cmK: + _smn = _smi * R3; + R2 = _smi + (-1); + R3 = _smn; + jump Fac_zdwfac_info; + +We want to inline _smi and _smn. To inline _smn: + + - we must be able to push forward past assignments to global regs. + We can do this if the rhs of the assignment we are pushing + forward doesn't refer to the global reg being assigned to; easy + to test. + +To inline _smi: + + - It is a trivial replacement, reg for reg, but it occurs more than + once. + - We can inline trivial assignments even if the temporary occurs + more than once, as long as we don't eliminate the original assignment + (this doesn't help much on its own). + - We need to be able to propagate the assignment forward through jumps; + if we did this, we would find that it can be inlined safely in all + its occurrences. +-} + +cmmMiniInline :: [CmmBasicBlock] -> [CmmBasicBlock] +cmmMiniInline blocks = map do_inline blocks + where + blockUses (BasicBlock _ stmts) + = foldr (plusUFM_C (+)) emptyUFM (map getStmtUses stmts) + + uses = foldr (plusUFM_C (+)) emptyUFM (map blockUses blocks) + + do_inline (BasicBlock id stmts) + = BasicBlock id (cmmMiniInlineStmts uses stmts) + + +cmmMiniInlineStmts :: UniqFM Int -> [CmmStmt] -> [CmmStmt] +cmmMiniInlineStmts uses [] = [] +cmmMiniInlineStmts uses (stmt@(CmmAssign (CmmLocal (LocalReg u _)) expr) : stmts) + | Just 1 <- lookupUFM uses u, + Just stmts' <- lookForInline u expr stmts + = +#ifdef NCG_DEBUG + trace ("nativeGen: inlining " ++ showSDoc (pprStmt stmt)) $ +#endif + cmmMiniInlineStmts uses stmts' + +cmmMiniInlineStmts uses (stmt:stmts) + = stmt : cmmMiniInlineStmts uses stmts + + +-- Try to inline a temporary assignment. We can skip over assignments to +-- other tempoararies, because we know that expressions aren't side-effecting +-- and temporaries are single-assignment. +lookForInline u expr (stmt@(CmmAssign (CmmLocal (LocalReg u' _)) rhs) : rest) + | u /= u' + = case lookupUFM (getExprUses rhs) u of + Just 1 -> Just (inlineStmt u expr stmt : rest) + _other -> case lookForInline u expr rest of + Nothing -> Nothing + Just stmts -> Just (stmt:stmts) + +lookForInline u expr (CmmNop : rest) + = lookForInline u expr rest + +lookForInline u expr (stmt:stmts) + = case lookupUFM (getStmtUses stmt) u of + Just 1 | ok_to_inline -> Just (inlineStmt u expr stmt : stmts) + _other -> Nothing + where + -- we don't inline into CmmCall if the expression refers to global + -- registers. This is a HACK to avoid global registers clashing with + -- C argument-passing registers, really the back-end ought to be able + -- to handle it properly, but currently neither PprC nor the NCG can + -- do it. See also CgForeignCall:load_args_into_temps. + ok_to_inline = case stmt of + CmmCall{} -> hasNoGlobalRegs expr + _ -> True + +-- ----------------------------------------------------------------------------- +-- Boring Cmm traversals for collecting usage info and substitutions. + +getStmtUses :: CmmStmt -> UniqFM Int +getStmtUses (CmmAssign _ e) = getExprUses e +getStmtUses (CmmStore e1 e2) = plusUFM_C (+) (getExprUses e1) (getExprUses e2) +getStmtUses (CmmCall target _ es _) + = plusUFM_C (+) (uses target) (getExprsUses (map fst es)) + where uses (CmmForeignCall e _) = getExprUses e + uses _ = emptyUFM +getStmtUses (CmmCondBranch e _) = getExprUses e +getStmtUses (CmmSwitch e _) = getExprUses e +getStmtUses (CmmJump e _) = getExprUses e +getStmtUses _ = emptyUFM + +getExprUses :: CmmExpr -> UniqFM Int +getExprUses (CmmReg (CmmLocal (LocalReg u _))) = unitUFM u 1 +getExprUses (CmmRegOff (CmmLocal (LocalReg u _)) _) = unitUFM u 1 +getExprUses (CmmLoad e _) = getExprUses e +getExprUses (CmmMachOp _ es) = getExprsUses es +getExprUses _other = emptyUFM + +getExprsUses es = foldr (plusUFM_C (+)) emptyUFM (map getExprUses es) + +inlineStmt :: Unique -> CmmExpr -> CmmStmt -> CmmStmt +inlineStmt u a (CmmAssign r e) = CmmAssign r (inlineExpr u a e) +inlineStmt u a (CmmStore e1 e2) = CmmStore (inlineExpr u a e1) (inlineExpr u a e2) +inlineStmt u a (CmmCall target regs es vols) + = CmmCall (infn target) regs es' vols + where infn (CmmForeignCall fn cconv) = CmmForeignCall fn cconv + infn (CmmPrim p) = CmmPrim p + es' = [ (inlineExpr u a e, hint) | (e,hint) <- es ] +inlineStmt u a (CmmCondBranch e d) = CmmCondBranch (inlineExpr u a e) d +inlineStmt u a (CmmSwitch e d) = CmmSwitch (inlineExpr u a e) d +inlineStmt u a (CmmJump e d) = CmmJump (inlineExpr u a e) d +inlineStmt u a other_stmt = other_stmt + +inlineExpr :: Unique -> CmmExpr -> CmmExpr -> CmmExpr +inlineExpr u a e@(CmmReg (CmmLocal (LocalReg u' _))) + | u == u' = a + | otherwise = e +inlineExpr u a e@(CmmRegOff (CmmLocal (LocalReg u' rep)) off) + | u == u' = CmmMachOp (MO_Add rep) [a, CmmLit (CmmInt (fromIntegral off) rep)] + | otherwise = e +inlineExpr u a (CmmLoad e rep) = CmmLoad (inlineExpr u a e) rep +inlineExpr u a (CmmMachOp op es) = CmmMachOp op (map (inlineExpr u a) es) +inlineExpr u a other_expr = other_expr + +-- ----------------------------------------------------------------------------- +-- MachOp constant folder + +-- Now, try to constant-fold the MachOps. The arguments have already +-- been optimized and folded. + +cmmMachOpFold + :: MachOp -- The operation from an CmmMachOp + -> [CmmExpr] -- The optimized arguments + -> CmmExpr + +cmmMachOpFold op arg@[CmmLit (CmmInt x rep)] + = case op of + MO_S_Neg r -> CmmLit (CmmInt (-x) rep) + MO_Not r -> CmmLit (CmmInt (complement x) rep) + + -- these are interesting: we must first narrow to the + -- "from" type, in order to truncate to the correct size. + -- The final narrow/widen to the destination type + -- is implicit in the CmmLit. + MO_S_Conv from to + | isFloatingRep to -> CmmLit (CmmFloat (fromInteger x) to) + | otherwise -> CmmLit (CmmInt (narrowS from x) to) + MO_U_Conv from to -> CmmLit (CmmInt (narrowU from x) to) + + _ -> panic "cmmMachOpFold: unknown unary op" + + +-- Eliminate conversion NOPs +cmmMachOpFold (MO_S_Conv rep1 rep2) [x] | rep1 == rep2 = x +cmmMachOpFold (MO_U_Conv rep1 rep2) [x] | rep1 == rep2 = x + +-- Eliminate nested conversions where possible +cmmMachOpFold conv_outer args@[CmmMachOp conv_inner [x]] + | Just (rep1,rep2,signed1) <- isIntConversion conv_inner, + Just (_, rep3,signed2) <- isIntConversion conv_outer + = case () of + -- widen then narrow to the same size is a nop + _ | rep1 < rep2 && rep1 == rep3 -> x + -- Widen then narrow to different size: collapse to single conversion + -- but remember to use the signedness from the widening, just in case + -- the final conversion is a widen. + | rep1 < rep2 && rep2 > rep3 -> + cmmMachOpFold (intconv signed1 rep1 rep3) [x] + -- Nested widenings: collapse if the signedness is the same + | rep1 < rep2 && rep2 < rep3 && signed1 == signed2 -> + cmmMachOpFold (intconv signed1 rep1 rep3) [x] + -- Nested narrowings: collapse + | rep1 > rep2 && rep2 > rep3 -> + cmmMachOpFold (MO_U_Conv rep1 rep3) [x] + | otherwise -> + CmmMachOp conv_outer args + where + isIntConversion (MO_U_Conv rep1 rep2) + | not (isFloatingRep rep1) && not (isFloatingRep rep2) + = Just (rep1,rep2,False) + isIntConversion (MO_S_Conv rep1 rep2) + | not (isFloatingRep rep1) && not (isFloatingRep rep2) + = Just (rep1,rep2,True) + isIntConversion _ = Nothing + + intconv True = MO_S_Conv + intconv False = MO_U_Conv + +-- ToDo: a narrow of a load can be collapsed into a narrow load, right? +-- but what if the architecture only supports word-sized loads, should +-- we do the transformation anyway? + +cmmMachOpFold mop args@[CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)] + = case mop of + -- for comparisons: don't forget to narrow the arguments before + -- comparing, since they might be out of range. + MO_Eq r -> CmmLit (CmmInt (if x_u == y_u then 1 else 0) wordRep) + MO_Ne r -> CmmLit (CmmInt (if x_u /= y_u then 1 else 0) wordRep) + + MO_U_Gt r -> CmmLit (CmmInt (if x_u > y_u then 1 else 0) wordRep) + MO_U_Ge r -> CmmLit (CmmInt (if x_u >= y_u then 1 else 0) wordRep) + MO_U_Lt r -> CmmLit (CmmInt (if x_u < y_u then 1 else 0) wordRep) + MO_U_Le r -> CmmLit (CmmInt (if x_u <= y_u then 1 else 0) wordRep) + + MO_S_Gt r -> CmmLit (CmmInt (if x_s > y_s then 1 else 0) wordRep) + MO_S_Ge r -> CmmLit (CmmInt (if x_s >= y_s then 1 else 0) wordRep) + MO_S_Lt r -> CmmLit (CmmInt (if x_s < y_s then 1 else 0) wordRep) + MO_S_Le r -> CmmLit (CmmInt (if x_s <= y_s then 1 else 0) wordRep) + + MO_Add r -> CmmLit (CmmInt (x + y) r) + MO_Sub r -> CmmLit (CmmInt (x - y) r) + MO_Mul r -> CmmLit (CmmInt (x * y) r) + MO_S_Quot r | y /= 0 -> CmmLit (CmmInt (x `quot` y) r) + MO_S_Rem r | y /= 0 -> CmmLit (CmmInt (x `rem` y) r) + + MO_And r -> CmmLit (CmmInt (x .&. y) r) + MO_Or r -> CmmLit (CmmInt (x .|. y) r) + MO_Xor r -> CmmLit (CmmInt (x `xor` y) r) + + MO_Shl r -> CmmLit (CmmInt (x `shiftL` fromIntegral y) r) + MO_U_Shr r -> CmmLit (CmmInt (x_u `shiftR` fromIntegral y) r) + MO_S_Shr r -> CmmLit (CmmInt (x `shiftR` fromIntegral y) r) + + other -> CmmMachOp mop args + + where + x_u = narrowU xrep x + y_u = narrowU xrep y + x_s = narrowS xrep x + y_s = narrowS xrep y + + +-- When possible, shift the constants to the right-hand side, so that we +-- can match for strength reductions. Note that the code generator will +-- also assume that constants have been shifted to the right when +-- possible. + +cmmMachOpFold op [x@(CmmLit _), y] + | not (isLit y) && isCommutableMachOp op + = cmmMachOpFold op [y, x] + +-- Turn (a+b)+c into a+(b+c) where possible. Because literals are +-- moved to the right, it is more likely that we will find +-- opportunities for constant folding when the expression is +-- right-associated. +-- +-- ToDo: this appears to introduce a quadratic behaviour due to the +-- nested cmmMachOpFold. Can we fix this? +-- +-- Why do we check isLit arg1? If arg1 is a lit, it means that arg2 +-- is also a lit (otherwise arg1 would be on the right). If we +-- put arg1 on the left of the rearranged expression, we'll get into a +-- loop: (x1+x2)+x3 => x1+(x2+x3) => (x2+x3)+x1 => x2+(x3+x1) ... +-- +cmmMachOpFold mop1 [CmmMachOp mop2 [arg1,arg2], arg3] + | mop1 == mop2 && isAssociativeMachOp mop1 && not (isLit arg1) + = cmmMachOpFold mop1 [arg1, cmmMachOpFold mop2 [arg2,arg3]] + +-- Make a RegOff if we can +cmmMachOpFold (MO_Add _) [CmmReg reg, CmmLit (CmmInt n rep)] + = CmmRegOff reg (fromIntegral (narrowS rep n)) +cmmMachOpFold (MO_Add _) [CmmRegOff reg off, CmmLit (CmmInt n rep)] + = CmmRegOff reg (off + fromIntegral (narrowS rep n)) +cmmMachOpFold (MO_Sub _) [CmmReg reg, CmmLit (CmmInt n rep)] + = CmmRegOff reg (- fromIntegral (narrowS rep n)) +cmmMachOpFold (MO_Sub _) [CmmRegOff reg off, CmmLit (CmmInt n rep)] + = CmmRegOff reg (off - fromIntegral (narrowS rep n)) + +-- Fold label(+/-)offset into a CmmLit where possible + +cmmMachOpFold (MO_Add _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)] + = CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i))) +cmmMachOpFold (MO_Add _) [CmmLit (CmmInt i rep), CmmLit (CmmLabel lbl)] + = CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i))) +cmmMachOpFold (MO_Sub _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)] + = CmmLit (CmmLabelOff lbl (fromIntegral (negate (narrowU rep i)))) + +-- We can often do something with constants of 0 and 1 ... + +cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 0 _))] + = case mop of + MO_Add r -> x + MO_Sub r -> x + MO_Mul r -> y + MO_And r -> y + MO_Or r -> x + MO_Xor r -> x + MO_Shl r -> x + MO_S_Shr r -> x + MO_U_Shr r -> x + MO_Ne r | isComparisonExpr x -> x + MO_Eq r | Just x' <- maybeInvertConditionalExpr x -> x' + MO_U_Gt r | isComparisonExpr x -> x + MO_S_Gt r | isComparisonExpr x -> x + MO_U_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep) + MO_S_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep) + MO_U_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep) + MO_S_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep) + MO_U_Le r | Just x' <- maybeInvertConditionalExpr x -> x' + MO_S_Le r | Just x' <- maybeInvertConditionalExpr x -> x' + other -> CmmMachOp mop args + +cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 1 rep))] + = case mop of + MO_Mul r -> x + MO_S_Quot r -> x + MO_U_Quot r -> x + MO_S_Rem r -> CmmLit (CmmInt 0 rep) + MO_U_Rem r -> CmmLit (CmmInt 0 rep) + MO_Ne r | Just x' <- maybeInvertConditionalExpr x -> x' + MO_Eq r | isComparisonExpr x -> x + MO_U_Lt r | Just x' <- maybeInvertConditionalExpr x -> x' + MO_S_Lt r | Just x' <- maybeInvertConditionalExpr x -> x' + MO_U_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep) + MO_S_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep) + MO_U_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep) + MO_S_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep) + MO_U_Ge r | isComparisonExpr x -> x + MO_S_Ge r | isComparisonExpr x -> x + other -> CmmMachOp mop args + +-- Now look for multiplication/division by powers of 2 (integers). + +cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt n _))] + = case mop of + MO_Mul rep + -> case exactLog2 n of + Nothing -> unchanged + Just p -> CmmMachOp (MO_Shl rep) [x, CmmLit (CmmInt p rep)] + MO_S_Quot rep + -> case exactLog2 n of + Nothing -> unchanged + Just p -> CmmMachOp (MO_S_Shr rep) [x, CmmLit (CmmInt p rep)] + other + -> unchanged + where + unchanged = CmmMachOp mop args + +-- Anything else is just too hard. + +cmmMachOpFold mop args = CmmMachOp mop args + +-- ----------------------------------------------------------------------------- +-- exactLog2 + +-- This algorithm for determining the $\log_2$ of exact powers of 2 comes +-- from GCC. It requires bit manipulation primitives, and we use GHC +-- extensions. Tough. +-- +-- Used to be in MachInstrs --SDM. +-- ToDo: remove use of unboxery --SDM. + +w2i x = word2Int# x +i2w x = int2Word# x + +exactLog2 :: Integer -> Maybe Integer +exactLog2 x + = if (x <= 0 || x >= 2147483648) then + Nothing + else + case fromInteger x of { I# x# -> + if (w2i ((i2w x#) `and#` (i2w (0# -# x#))) /=# x#) then + Nothing + else + Just (toInteger (I# (pow2 x#))) + } + where + pow2 x# | x# ==# 1# = 0# + | otherwise = 1# +# pow2 (w2i (i2w x# `shiftRL#` 1#)) + + +-- ----------------------------------------------------------------------------- +-- widening / narrowing + +narrowU :: MachRep -> Integer -> Integer +narrowU I8 x = fromIntegral (fromIntegral x :: Word8) +narrowU I16 x = fromIntegral (fromIntegral x :: Word16) +narrowU I32 x = fromIntegral (fromIntegral x :: Word32) +narrowU I64 x = fromIntegral (fromIntegral x :: Word64) +narrowU _ _ = panic "narrowTo" + +narrowS :: MachRep -> Integer -> Integer +narrowS I8 x = fromIntegral (fromIntegral x :: Int8) +narrowS I16 x = fromIntegral (fromIntegral x :: Int16) +narrowS I32 x = fromIntegral (fromIntegral x :: Int32) +narrowS I64 x = fromIntegral (fromIntegral x :: Int64) +narrowS _ _ = panic "narrowTo" + +-- ----------------------------------------------------------------------------- +-- Loopify for C + +{- + This is a simple pass that replaces tail-recursive functions like this: + + fac() { + ... + jump fac(); + } + + with this: + + fac() { + L: + ... + goto L; + } + + the latter generates better C code, because the C compiler treats it + like a loop, and brings full loop optimisation to bear. + + In my measurements this makes little or no difference to anything + except factorial, but what the hell. +-} + +cmmLoopifyForC :: CmmTop -> CmmTop +cmmLoopifyForC p@(CmmProc info entry_lbl [] blocks@(BasicBlock top_id _ : _)) + | null info = p -- only if there's an info table, ignore case alts + | otherwise = +-- pprTrace "jump_lbl" (ppr jump_lbl <+> ppr entry_lbl) $ + CmmProc info entry_lbl [] blocks' + where blocks' = [ BasicBlock id (map do_stmt stmts) + | BasicBlock id stmts <- blocks ] + + do_stmt (CmmJump (CmmLit (CmmLabel lbl)) _) | lbl == jump_lbl + = CmmBranch top_id + do_stmt stmt = stmt + + jump_lbl | tablesNextToCode = entryLblToInfoLbl entry_lbl + | otherwise = entry_lbl + +cmmLoopifyForC top = top + +-- ----------------------------------------------------------------------------- +-- Utils + +isLit (CmmLit _) = True +isLit _ = False + +isComparisonExpr :: CmmExpr -> Bool +isComparisonExpr (CmmMachOp op _) = isComparisonMachOp op +isComparisonExpr _other = False + +maybeInvertConditionalExpr :: CmmExpr -> Maybe CmmExpr +maybeInvertConditionalExpr (CmmMachOp op args) + | Just op' <- maybeInvertComparison op = Just (CmmMachOp op' args) +maybeInvertConditionalExpr _ = Nothing diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y new file mode 100644 index 0000000000..73618bc35b --- /dev/null +++ b/compiler/cmm/CmmParse.y @@ -0,0 +1,890 @@ +----------------------------------------------------------------------------- +-- +-- (c) The University of Glasgow, 2004 +-- +-- Parser for concrete Cmm. +-- +----------------------------------------------------------------------------- + +{ +module CmmParse ( parseCmmFile ) where + +import CgMonad +import CgHeapery +import CgUtils +import CgProf +import CgTicky +import CgInfoTbls +import CgForeignCall +import CgTailCall ( pushUnboxedTuple ) +import CgStackery ( emitPushUpdateFrame ) +import ClosureInfo ( C_SRT(..) ) +import CgCallConv ( smallLiveness ) +import CgClosure ( emitBlackHoleCode ) +import CostCentre ( dontCareCCS ) + +import Cmm +import PprCmm +import CmmUtils ( mkIntCLit ) +import CmmLex +import CLabel +import MachOp +import SMRep ( fixedHdrSize, CgRep(..) ) +import Lexer + +import ForeignCall ( CCallConv(..), Safety(..) ) +import Literal ( mkMachInt ) +import Unique +import UniqFM +import SrcLoc +import DynFlags ( DynFlags, DynFlag(..) ) +import Packages ( HomeModules ) +import StaticFlags ( opt_SccProfilingOn ) +import ErrUtils ( printError, dumpIfSet_dyn, showPass ) +import StringBuffer ( hGetStringBuffer ) +import FastString +import Panic ( panic ) +import Constants ( wORD_SIZE ) +import Outputable + +import Monad ( when ) +import Data.Char ( ord ) + +#include "HsVersions.h" +} + +%token + ':' { L _ (CmmT_SpecChar ':') } + ';' { L _ (CmmT_SpecChar ';') } + '{' { L _ (CmmT_SpecChar '{') } + '}' { L _ (CmmT_SpecChar '}') } + '[' { L _ (CmmT_SpecChar '[') } + ']' { L _ (CmmT_SpecChar ']') } + '(' { L _ (CmmT_SpecChar '(') } + ')' { L _ (CmmT_SpecChar ')') } + '=' { L _ (CmmT_SpecChar '=') } + '`' { L _ (CmmT_SpecChar '`') } + '~' { L _ (CmmT_SpecChar '~') } + '/' { L _ (CmmT_SpecChar '/') } + '*' { L _ (CmmT_SpecChar '*') } + '%' { L _ (CmmT_SpecChar '%') } + '-' { L _ (CmmT_SpecChar '-') } + '+' { L _ (CmmT_SpecChar '+') } + '&' { L _ (CmmT_SpecChar '&') } + '^' { L _ (CmmT_SpecChar '^') } + '|' { L _ (CmmT_SpecChar '|') } + '>' { L _ (CmmT_SpecChar '>') } + '<' { L _ (CmmT_SpecChar '<') } + ',' { L _ (CmmT_SpecChar ',') } + '!' { L _ (CmmT_SpecChar '!') } + + '..' { L _ (CmmT_DotDot) } + '::' { L _ (CmmT_DoubleColon) } + '>>' { L _ (CmmT_Shr) } + '<<' { L _ (CmmT_Shl) } + '>=' { L _ (CmmT_Ge) } + '<=' { L _ (CmmT_Le) } + '==' { L _ (CmmT_Eq) } + '!=' { L _ (CmmT_Ne) } + '&&' { L _ (CmmT_BoolAnd) } + '||' { L _ (CmmT_BoolOr) } + + 'CLOSURE' { L _ (CmmT_CLOSURE) } + 'INFO_TABLE' { L _ (CmmT_INFO_TABLE) } + 'INFO_TABLE_RET'{ L _ (CmmT_INFO_TABLE_RET) } + 'INFO_TABLE_FUN'{ L _ (CmmT_INFO_TABLE_FUN) } + 'INFO_TABLE_CONSTR'{ L _ (CmmT_INFO_TABLE_CONSTR) } + 'INFO_TABLE_SELECTOR'{ L _ (CmmT_INFO_TABLE_SELECTOR) } + 'else' { L _ (CmmT_else) } + 'export' { L _ (CmmT_export) } + 'section' { L _ (CmmT_section) } + 'align' { L _ (CmmT_align) } + 'goto' { L _ (CmmT_goto) } + 'if' { L _ (CmmT_if) } + 'jump' { L _ (CmmT_jump) } + 'foreign' { L _ (CmmT_foreign) } + 'import' { L _ (CmmT_import) } + 'switch' { L _ (CmmT_switch) } + 'case' { L _ (CmmT_case) } + 'default' { L _ (CmmT_default) } + 'bits8' { L _ (CmmT_bits8) } + 'bits16' { L _ (CmmT_bits16) } + 'bits32' { L _ (CmmT_bits32) } + 'bits64' { L _ (CmmT_bits64) } + 'float32' { L _ (CmmT_float32) } + 'float64' { L _ (CmmT_float64) } + + GLOBALREG { L _ (CmmT_GlobalReg $$) } + NAME { L _ (CmmT_Name $$) } + STRING { L _ (CmmT_String $$) } + INT { L _ (CmmT_Int $$) } + FLOAT { L _ (CmmT_Float $$) } + +%monad { P } { >>= } { return } +%lexer { cmmlex } { L _ CmmT_EOF } +%name cmmParse cmm +%tokentype { Located CmmToken } + +-- C-- operator precedences, taken from the C-- spec +%right '||' -- non-std extension, called %disjoin in C-- +%right '&&' -- non-std extension, called %conjoin in C-- +%right '!' +%nonassoc '>=' '>' '<=' '<' '!=' '==' +%left '|' +%left '^' +%left '&' +%left '>>' '<<' +%left '-' '+' +%left '/' '*' '%' +%right '~' + +%% + +cmm :: { ExtCode } + : {- empty -} { return () } + | cmmtop cmm { do $1; $2 } + +cmmtop :: { ExtCode } + : cmmproc { $1 } + | cmmdata { $1 } + | decl { $1 } + | 'CLOSURE' '(' NAME ',' NAME lits ')' ';' + { do lits <- sequence $6; + staticClosure $3 $5 (map getLit lits) } + +-- The only static closures in the RTS are dummy closures like +-- stg_END_TSO_QUEUE_closure and stg_dummy_ret. We don't need +-- to provide the full generality of static closures here. +-- In particular: +-- * CCS can always be CCS_DONT_CARE +-- * closure is always extern +-- * payload is always empty +-- * we can derive closure and info table labels from a single NAME + +cmmdata :: { ExtCode } + : 'section' STRING '{' statics '}' + { do ss <- sequence $4; + code (emitData (section $2) (concat ss)) } + +statics :: { [ExtFCode [CmmStatic]] } + : {- empty -} { [] } + | static statics { $1 : $2 } + +-- Strings aren't used much in the RTS HC code, so it doesn't seem +-- worth allowing inline strings. C-- doesn't allow them anyway. +static :: { ExtFCode [CmmStatic] } + : NAME ':' { return [CmmDataLabel (mkRtsDataLabelFS $1)] } + | type expr ';' { do e <- $2; + return [CmmStaticLit (getLit e)] } + | type ';' { return [CmmUninitialised + (machRepByteWidth $1)] } + | 'bits8' '[' ']' STRING ';' { return [mkString $4] } + | 'bits8' '[' INT ']' ';' { return [CmmUninitialised + (fromIntegral $3)] } + | typenot8 '[' INT ']' ';' { return [CmmUninitialised + (machRepByteWidth $1 * + fromIntegral $3)] } + | 'align' INT ';' { return [CmmAlign (fromIntegral $2)] } + | 'CLOSURE' '(' NAME lits ')' + { do lits <- sequence $4; + return $ map CmmStaticLit $ + mkStaticClosure (mkRtsInfoLabelFS $3) + dontCareCCS (map getLit lits) [] [] [] } + -- arrays of closures required for the CHARLIKE & INTLIKE arrays + +lits :: { [ExtFCode CmmExpr] } + : {- empty -} { [] } + | ',' expr lits { $2 : $3 } + +cmmproc :: { ExtCode } + : info '{' body '}' + { do (info_lbl, info1, info2) <- $1; + stmts <- getCgStmtsEC (loopDecls $3) + blks <- code (cgStmtsToBlocks stmts) + code (emitInfoTableAndCode info_lbl info1 info2 [] blks) } + + | info ';' + { do (info_lbl, info1, info2) <- $1; + code (emitInfoTableAndCode info_lbl info1 info2 [] []) } + + | NAME '{' body '}' + { do stmts <- getCgStmtsEC (loopDecls $3); + blks <- code (cgStmtsToBlocks stmts) + code (emitProc [] (mkRtsCodeLabelFS $1) [] blks) } + +info :: { ExtFCode (CLabel, [CmmLit],[CmmLit]) } + : 'INFO_TABLE' '(' NAME ',' INT ',' INT ',' INT ',' STRING ',' STRING ')' + -- ptrs, nptrs, closure type, description, type + { stdInfo $3 $5 $7 0 $9 $11 $13 } + + | 'INFO_TABLE_FUN' '(' NAME ',' INT ',' INT ',' INT ',' STRING ',' STRING ',' INT ')' + -- ptrs, nptrs, closure type, description, type, fun type + { funInfo $3 $5 $7 $9 $11 $13 $15 } + + | 'INFO_TABLE_CONSTR' '(' NAME ',' INT ',' INT ',' INT ',' INT ',' STRING ',' STRING ')' + -- ptrs, nptrs, tag, closure type, description, type + { stdInfo $3 $5 $7 $9 $11 $13 $15 } + + | 'INFO_TABLE_SELECTOR' '(' NAME ',' INT ',' INT ',' STRING ',' STRING ')' + -- selector, closure type, description, type + { basicInfo $3 (mkIntCLit (fromIntegral $5)) 0 $7 $9 $11 } + + | 'INFO_TABLE_RET' '(' NAME ',' INT ',' INT ',' INT maybe_vec ')' + { retInfo $3 $5 $7 $9 $10 } + +maybe_vec :: { [CmmLit] } + : {- empty -} { [] } + | ',' NAME maybe_vec { CmmLabel (mkRtsCodeLabelFS $2) : $3 } + +body :: { ExtCode } + : {- empty -} { return () } + | decl body { do $1; $2 } + | stmt body { do $1; $2 } + +decl :: { ExtCode } + : type names ';' { mapM_ (newLocal $1) $2 } + | 'import' names ';' { return () } -- ignore imports + | 'export' names ';' { return () } -- ignore exports + +names :: { [FastString] } + : NAME { [$1] } + | NAME ',' names { $1 : $3 } + +stmt :: { ExtCode } + : ';' { nopEC } + + | block_id ':' { code (labelC $1) } + + | lreg '=' expr ';' + { do reg <- $1; e <- $3; stmtEC (CmmAssign reg e) } + | type '[' expr ']' '=' expr ';' + { doStore $1 $3 $6 } + | 'foreign' STRING expr '(' hint_exprs0 ')' vols ';' + {% foreignCall $2 [] $3 $5 $7 } + | lreg '=' 'foreign' STRING expr '(' hint_exprs0 ')' vols ';' + {% let result = do r <- $1; return (r,NoHint) in + foreignCall $4 [result] $5 $7 $9 } + | STRING lreg '=' 'foreign' STRING expr '(' hint_exprs0 ')' vols ';' + {% do h <- parseHint $1; + let result = do r <- $2; return (r,h) in + foreignCall $5 [result] $6 $8 $10 } + -- stmt-level macros, stealing syntax from ordinary C-- function calls. + -- Perhaps we ought to use the %%-form? + | NAME '(' exprs0 ')' ';' + {% stmtMacro $1 $3 } + | 'switch' maybe_range expr '{' arms default '}' + { doSwitch $2 $3 $5 $6 } + | 'goto' block_id ';' + { stmtEC (CmmBranch $2) } + | 'jump' expr {-maybe_actuals-} ';' + { do e <- $2; stmtEC (CmmJump e []) } + | 'if' bool_expr '{' body '}' else + { ifThenElse $2 $4 $6 } + +bool_expr :: { ExtFCode BoolExpr } + : bool_op { $1 } + | expr { do e <- $1; return (BoolTest e) } + +bool_op :: { ExtFCode BoolExpr } + : bool_expr '&&' bool_expr { do e1 <- $1; e2 <- $3; + return (BoolAnd e1 e2) } + | bool_expr '||' bool_expr { do e1 <- $1; e2 <- $3; + return (BoolOr e1 e2) } + | '!' bool_expr { do e <- $2; return (BoolNot e) } + | '(' bool_op ')' { $2 } + +-- This is not C-- syntax. What to do? +vols :: { Maybe [GlobalReg] } + : {- empty -} { Nothing } + | '[' ']' { Just [] } + | '[' globals ']' { Just $2 } + +globals :: { [GlobalReg] } + : GLOBALREG { [$1] } + | GLOBALREG ',' globals { $1 : $3 } + +maybe_range :: { Maybe (Int,Int) } + : '[' INT '..' INT ']' { Just (fromIntegral $2, fromIntegral $4) } + | {- empty -} { Nothing } + +arms :: { [([Int],ExtCode)] } + : {- empty -} { [] } + | arm arms { $1 : $2 } + +arm :: { ([Int],ExtCode) } + : 'case' ints ':' '{' body '}' { ($2, $5) } + +ints :: { [Int] } + : INT { [ fromIntegral $1 ] } + | INT ',' ints { fromIntegral $1 : $3 } + +default :: { Maybe ExtCode } + : 'default' ':' '{' body '}' { Just $4 } + -- taking a few liberties with the C-- syntax here; C-- doesn't have + -- 'default' branches + | {- empty -} { Nothing } + +else :: { ExtCode } + : {- empty -} { nopEC } + | 'else' '{' body '}' { $3 } + +-- we have to write this out longhand so that Happy's precedence rules +-- can kick in. +expr :: { ExtFCode CmmExpr } + : expr '/' expr { mkMachOp MO_U_Quot [$1,$3] } + | expr '*' expr { mkMachOp MO_Mul [$1,$3] } + | expr '%' expr { mkMachOp MO_U_Rem [$1,$3] } + | expr '-' expr { mkMachOp MO_Sub [$1,$3] } + | expr '+' expr { mkMachOp MO_Add [$1,$3] } + | expr '>>' expr { mkMachOp MO_U_Shr [$1,$3] } + | expr '<<' expr { mkMachOp MO_Shl [$1,$3] } + | expr '&' expr { mkMachOp MO_And [$1,$3] } + | expr '^' expr { mkMachOp MO_Xor [$1,$3] } + | expr '|' expr { mkMachOp MO_Or [$1,$3] } + | expr '>=' expr { mkMachOp MO_U_Ge [$1,$3] } + | expr '>' expr { mkMachOp MO_U_Gt [$1,$3] } + | expr '<=' expr { mkMachOp MO_U_Le [$1,$3] } + | expr '<' expr { mkMachOp MO_U_Lt [$1,$3] } + | expr '!=' expr { mkMachOp MO_Ne [$1,$3] } + | expr '==' expr { mkMachOp MO_Eq [$1,$3] } + | '~' expr { mkMachOp MO_Not [$2] } + | '-' expr { mkMachOp MO_S_Neg [$2] } + | expr0 '`' NAME '`' expr0 {% do { mo <- nameToMachOp $3 ; + return (mkMachOp mo [$1,$5]) } } + | expr0 { $1 } + +expr0 :: { ExtFCode CmmExpr } + : INT maybe_ty { return (CmmLit (CmmInt $1 $2)) } + | FLOAT maybe_ty { return (CmmLit (CmmFloat $1 $2)) } + | STRING { do s <- code (mkStringCLit $1); + return (CmmLit s) } + | reg { $1 } + | type '[' expr ']' { do e <- $3; return (CmmLoad e $1) } + | '%' NAME '(' exprs0 ')' {% exprOp $2 $4 } + | '(' expr ')' { $2 } + + +-- leaving out the type of a literal gives you the native word size in C-- +maybe_ty :: { MachRep } + : {- empty -} { wordRep } + | '::' type { $2 } + +hint_exprs0 :: { [ExtFCode (CmmExpr, MachHint)] } + : {- empty -} { [] } + | hint_exprs { $1 } + +hint_exprs :: { [ExtFCode (CmmExpr, MachHint)] } + : hint_expr { [$1] } + | hint_expr ',' hint_exprs { $1 : $3 } + +hint_expr :: { ExtFCode (CmmExpr, MachHint) } + : expr { do e <- $1; return (e, inferHint e) } + | expr STRING {% do h <- parseHint $2; + return $ do + e <- $1; return (e,h) } + +exprs0 :: { [ExtFCode CmmExpr] } + : {- empty -} { [] } + | exprs { $1 } + +exprs :: { [ExtFCode CmmExpr] } + : expr { [ $1 ] } + | expr ',' exprs { $1 : $3 } + +reg :: { ExtFCode CmmExpr } + : NAME { lookupName $1 } + | GLOBALREG { return (CmmReg (CmmGlobal $1)) } + +lreg :: { ExtFCode CmmReg } + : NAME { do e <- lookupName $1; + return $ + case e of + CmmReg r -> r + other -> pprPanic "CmmParse:" (ftext $1 <> text " not a register") } + | GLOBALREG { return (CmmGlobal $1) } + +block_id :: { BlockId } + : NAME { BlockId (newTagUnique (getUnique $1) 'L') } + -- TODO: ugh. The unique of a FastString has a null + -- tag, so we have to put our own tag on. We should + -- really make a new unique for every label, and keep + -- them in an environment. + +type :: { MachRep } + : 'bits8' { I8 } + | typenot8 { $1 } + +typenot8 :: { MachRep } + : 'bits16' { I16 } + | 'bits32' { I32 } + | 'bits64' { I64 } + | 'float32' { F32 } + | 'float64' { F64 } +{ +section :: String -> Section +section "text" = Text +section "data" = Data +section "rodata" = ReadOnlyData +section "bss" = UninitialisedData +section s = OtherSection s + +mkString :: String -> CmmStatic +mkString s = CmmString (map (fromIntegral.ord) s) + +-- mkMachOp infers the type of the MachOp from the type of its first +-- argument. We assume that this is correct: for MachOps that don't have +-- symmetrical args (e.g. shift ops), the first arg determines the type of +-- the op. +mkMachOp :: (MachRep -> MachOp) -> [ExtFCode CmmExpr] -> ExtFCode CmmExpr +mkMachOp fn args = do + arg_exprs <- sequence args + return (CmmMachOp (fn (cmmExprRep (head arg_exprs))) arg_exprs) + +getLit :: CmmExpr -> CmmLit +getLit (CmmLit l) = l +getLit (CmmMachOp (MO_S_Neg _) [CmmLit (CmmInt i r)]) = CmmInt (negate i) r +getLit _ = panic "invalid literal" -- TODO messy failure + +nameToMachOp :: FastString -> P (MachRep -> MachOp) +nameToMachOp name = + case lookupUFM machOps name of + Nothing -> fail ("unknown primitive " ++ unpackFS name) + Just m -> return m + +exprOp :: FastString -> [ExtFCode CmmExpr] -> P (ExtFCode CmmExpr) +exprOp name args_code = + case lookupUFM exprMacros name of + Just f -> return $ do + args <- sequence args_code + return (f args) + Nothing -> do + mo <- nameToMachOp name + return $ mkMachOp mo args_code + +exprMacros :: UniqFM ([CmmExpr] -> CmmExpr) +exprMacros = listToUFM [ + ( FSLIT("ENTRY_CODE"), \ [x] -> entryCode x ), + ( FSLIT("INFO_PTR"), \ [x] -> closureInfoPtr x ), + ( FSLIT("STD_INFO"), \ [x] -> infoTable x ), + ( FSLIT("FUN_INFO"), \ [x] -> funInfoTable x ), + ( FSLIT("GET_ENTRY"), \ [x] -> entryCode (closureInfoPtr x) ), + ( FSLIT("GET_STD_INFO"), \ [x] -> infoTable (closureInfoPtr x) ), + ( FSLIT("GET_FUN_INFO"), \ [x] -> funInfoTable (closureInfoPtr x) ), + ( FSLIT("INFO_TYPE"), \ [x] -> infoTableClosureType x ), + ( FSLIT("INFO_PTRS"), \ [x] -> infoTablePtrs x ), + ( FSLIT("INFO_NPTRS"), \ [x] -> infoTableNonPtrs x ), + ( FSLIT("RET_VEC"), \ [info, conZ] -> retVec info conZ ) + ] + +-- we understand a subset of C-- primitives: +machOps = listToUFM $ + map (\(x, y) -> (mkFastString x, y)) [ + ( "add", MO_Add ), + ( "sub", MO_Sub ), + ( "eq", MO_Eq ), + ( "ne", MO_Ne ), + ( "mul", MO_Mul ), + ( "neg", MO_S_Neg ), + ( "quot", MO_S_Quot ), + ( "rem", MO_S_Rem ), + ( "divu", MO_U_Quot ), + ( "modu", MO_U_Rem ), + + ( "ge", MO_S_Ge ), + ( "le", MO_S_Le ), + ( "gt", MO_S_Gt ), + ( "lt", MO_S_Lt ), + + ( "geu", MO_U_Ge ), + ( "leu", MO_U_Le ), + ( "gtu", MO_U_Gt ), + ( "ltu", MO_U_Lt ), + + ( "flt", MO_S_Lt ), + ( "fle", MO_S_Le ), + ( "feq", MO_Eq ), + ( "fne", MO_Ne ), + ( "fgt", MO_S_Gt ), + ( "fge", MO_S_Ge ), + ( "fneg", MO_S_Neg ), + + ( "and", MO_And ), + ( "or", MO_Or ), + ( "xor", MO_Xor ), + ( "com", MO_Not ), + ( "shl", MO_Shl ), + ( "shrl", MO_U_Shr ), + ( "shra", MO_S_Shr ), + + ( "lobits8", flip MO_U_Conv I8 ), + ( "lobits16", flip MO_U_Conv I16 ), + ( "lobits32", flip MO_U_Conv I32 ), + ( "lobits64", flip MO_U_Conv I64 ), + ( "sx16", flip MO_S_Conv I16 ), + ( "sx32", flip MO_S_Conv I32 ), + ( "sx64", flip MO_S_Conv I64 ), + ( "zx16", flip MO_U_Conv I16 ), + ( "zx32", flip MO_U_Conv I32 ), + ( "zx64", flip MO_U_Conv I64 ), + ( "f2f32", flip MO_S_Conv F32 ), -- TODO; rounding mode + ( "f2f64", flip MO_S_Conv F64 ), -- TODO; rounding mode + ( "f2i8", flip MO_S_Conv I8 ), + ( "f2i16", flip MO_S_Conv I8 ), + ( "f2i32", flip MO_S_Conv I8 ), + ( "f2i64", flip MO_S_Conv I8 ), + ( "i2f32", flip MO_S_Conv F32 ), + ( "i2f64", flip MO_S_Conv F64 ) + ] + +parseHint :: String -> P MachHint +parseHint "ptr" = return PtrHint +parseHint "signed" = return SignedHint +parseHint "float" = return FloatHint +parseHint str = fail ("unrecognised hint: " ++ str) + +-- labels are always pointers, so we might as well infer the hint +inferHint :: CmmExpr -> MachHint +inferHint (CmmLit (CmmLabel _)) = PtrHint +inferHint (CmmReg (CmmGlobal g)) | isPtrGlobalReg g = PtrHint +inferHint _ = NoHint + +isPtrGlobalReg Sp = True +isPtrGlobalReg SpLim = True +isPtrGlobalReg Hp = True +isPtrGlobalReg HpLim = True +isPtrGlobalReg CurrentTSO = True +isPtrGlobalReg CurrentNursery = True +isPtrGlobalReg _ = False + +happyError :: P a +happyError = srcParseFail + +-- ----------------------------------------------------------------------------- +-- Statement-level macros + +stmtMacro :: FastString -> [ExtFCode CmmExpr] -> P ExtCode +stmtMacro fun args_code = do + case lookupUFM stmtMacros fun of + Nothing -> fail ("unknown macro: " ++ unpackFS fun) + Just fcode -> return $ do + args <- sequence args_code + code (fcode args) + +stmtMacros :: UniqFM ([CmmExpr] -> Code) +stmtMacros = listToUFM [ + ( FSLIT("CCS_ALLOC"), \[words,ccs] -> profAlloc words ccs ), + ( FSLIT("CLOSE_NURSERY"), \[] -> emitCloseNursery ), + ( FSLIT("ENTER_CCS_PAP_CL"), \[e] -> enterCostCentrePAP e ), + ( FSLIT("ENTER_CCS_THUNK"), \[e] -> enterCostCentreThunk e ), + ( FSLIT("HP_CHK_GEN"), \[words,liveness,reentry] -> + hpChkGen words liveness reentry ), + ( FSLIT("HP_CHK_NP_ASSIGN_SP0"), \[e,f] -> hpChkNodePointsAssignSp0 e f ), + ( FSLIT("LOAD_THREAD_STATE"), \[] -> emitLoadThreadState ), + ( FSLIT("LDV_ENTER"), \[e] -> ldvEnter e ), + ( FSLIT("LDV_RECORD_CREATE"), \[e] -> ldvRecordCreate e ), + ( FSLIT("OPEN_NURSERY"), \[] -> emitOpenNursery ), + ( FSLIT("PUSH_UPD_FRAME"), \[sp,e] -> emitPushUpdateFrame sp e ), + ( FSLIT("SAVE_THREAD_STATE"), \[] -> emitSaveThreadState ), + ( FSLIT("SET_HDR"), \[ptr,info,ccs] -> + emitSetDynHdr ptr info ccs ), + ( FSLIT("STK_CHK_GEN"), \[words,liveness,reentry] -> + stkChkGen words liveness reentry ), + ( FSLIT("STK_CHK_NP"), \[e] -> stkChkNodePoints e ), + ( FSLIT("TICK_ALLOC_PRIM"), \[hdr,goods,slop] -> + tickyAllocPrim hdr goods slop ), + ( FSLIT("TICK_ALLOC_PAP"), \[goods,slop] -> + tickyAllocPAP goods slop ), + ( FSLIT("TICK_ALLOC_UP_THK"), \[goods,slop] -> + tickyAllocThunk goods slop ), + ( FSLIT("UPD_BH_UPDATABLE"), \[] -> emitBlackHoleCode False ), + ( FSLIT("UPD_BH_SINGLE_ENTRY"), \[] -> emitBlackHoleCode True ), + + ( FSLIT("RET_P"), \[a] -> emitRetUT [(PtrArg,a)]), + ( FSLIT("RET_N"), \[a] -> emitRetUT [(NonPtrArg,a)]), + ( FSLIT("RET_PP"), \[a,b] -> emitRetUT [(PtrArg,a),(PtrArg,b)]), + ( FSLIT("RET_NN"), \[a,b] -> emitRetUT [(NonPtrArg,a),(NonPtrArg,b)]), + ( FSLIT("RET_NP"), \[a,b] -> emitRetUT [(NonPtrArg,a),(PtrArg,b)]), + ( FSLIT("RET_PPP"), \[a,b,c] -> emitRetUT [(PtrArg,a),(PtrArg,b),(PtrArg,c)]), + ( FSLIT("RET_NNP"), \[a,b,c] -> emitRetUT [(NonPtrArg,a),(NonPtrArg,b),(PtrArg,c)]), + ( FSLIT("RET_NNNP"), \[a,b,c,d] -> emitRetUT [(NonPtrArg,a),(NonPtrArg,b),(NonPtrArg,c),(PtrArg,d)]), + ( FSLIT("RET_NPNP"), \[a,b,c,d] -> emitRetUT [(NonPtrArg,a),(PtrArg,b),(NonPtrArg,c),(PtrArg,d)]) + + ] + +-- ----------------------------------------------------------------------------- +-- Our extended FCode monad. + +-- We add a mapping from names to CmmExpr, to support local variable names in +-- the concrete C-- code. The unique supply of the underlying FCode monad +-- is used to grab a new unique for each local variable. + +-- In C--, a local variable can be declared anywhere within a proc, +-- and it scopes from the beginning of the proc to the end. Hence, we have +-- to collect declarations as we parse the proc, and feed the environment +-- back in circularly (to avoid a two-pass algorithm). + +type Decls = [(FastString,CmmExpr)] +type Env = UniqFM CmmExpr + +newtype ExtFCode a = EC { unEC :: Env -> Decls -> FCode (Decls, a) } + +type ExtCode = ExtFCode () + +returnExtFC a = EC $ \e s -> return (s, a) +thenExtFC (EC m) k = EC $ \e s -> do (s',r) <- m e s; unEC (k r) e s' + +instance Monad ExtFCode where + (>>=) = thenExtFC + return = returnExtFC + +-- This function takes the variable decarations and imports and makes +-- an environment, which is looped back into the computation. In this +-- way, we can have embedded declarations that scope over the whole +-- procedure, and imports that scope over the entire module. +loopDecls :: ExtFCode a -> ExtFCode a +loopDecls (EC fcode) = + EC $ \e s -> fixC (\ ~(decls,a) -> fcode (addListToUFM e decls) []) + +getEnv :: ExtFCode Env +getEnv = EC $ \e s -> return (s, e) + +addVarDecl :: FastString -> CmmExpr -> ExtCode +addVarDecl var expr = EC $ \e s -> return ((var,expr):s, ()) + +newLocal :: MachRep -> FastString -> ExtCode +newLocal ty name = do + u <- code newUnique + addVarDecl name (CmmReg (CmmLocal (LocalReg u ty))) + +-- Unknown names are treated as if they had been 'import'ed. +-- This saves us a lot of bother in the RTS sources, at the expense of +-- deferring some errors to link time. +lookupName :: FastString -> ExtFCode CmmExpr +lookupName name = do + env <- getEnv + return $ + case lookupUFM env name of + Nothing -> CmmLit (CmmLabel (mkRtsCodeLabelFS name)) + Just e -> e + +-- Lifting FCode computations into the ExtFCode monad: +code :: FCode a -> ExtFCode a +code fc = EC $ \e s -> do r <- fc; return (s, r) + +code2 :: (FCode (Decls,b) -> FCode ((Decls,b),c)) + -> ExtFCode b -> ExtFCode c +code2 f (EC ec) = EC $ \e s -> do ((s',b),c) <- f (ec e s); return (s',c) + +nopEC = code nopC +stmtEC stmt = code (stmtC stmt) +stmtsEC stmts = code (stmtsC stmts) +getCgStmtsEC = code2 getCgStmts' + +forkLabelledCodeEC ec = do + stmts <- getCgStmtsEC ec + code (forkCgStmts stmts) + +retInfo name size live_bits cl_type vector = do + let liveness = smallLiveness (fromIntegral size) (fromIntegral live_bits) + info_lbl = mkRtsRetInfoLabelFS name + (info1,info2) = mkRetInfoTable info_lbl liveness NoC_SRT + (fromIntegral cl_type) vector + return (info_lbl, info1, info2) + +stdInfo name ptrs nptrs srt_bitmap cl_type desc_str ty_str = + basicInfo name (packHalfWordsCLit ptrs nptrs) + srt_bitmap cl_type desc_str ty_str + +basicInfo name layout srt_bitmap cl_type desc_str ty_str = do + lit1 <- if opt_SccProfilingOn + then code $ mkStringCLit desc_str + else return (mkIntCLit 0) + lit2 <- if opt_SccProfilingOn + then code $ mkStringCLit ty_str + else return (mkIntCLit 0) + let info1 = mkStdInfoTable lit1 lit2 (fromIntegral cl_type) + (fromIntegral srt_bitmap) + layout + return (mkRtsInfoLabelFS name, info1, []) + +funInfo name ptrs nptrs cl_type desc_str ty_str fun_type = do + (label,info1,_) <- stdInfo name ptrs nptrs 0{-srt_bitmap-} + cl_type desc_str ty_str + let info2 = mkFunGenInfoExtraBits (fromIntegral fun_type) 0 zero zero zero + -- we leave most of the fields zero here. This is only used + -- to generate the BCO info table in the RTS at the moment. + return (label,info1,info2) + where + zero = mkIntCLit 0 + + +staticClosure :: FastString -> FastString -> [CmmLit] -> ExtCode +staticClosure cl_label info payload + = code $ emitDataLits (mkRtsDataLabelFS cl_label) lits + where lits = mkStaticClosure (mkRtsInfoLabelFS info) dontCareCCS payload [] [] [] + +foreignCall + :: String + -> [ExtFCode (CmmReg,MachHint)] + -> ExtFCode CmmExpr + -> [ExtFCode (CmmExpr,MachHint)] + -> Maybe [GlobalReg] -> P ExtCode +foreignCall "C" results_code expr_code args_code vols + = return $ do + results <- sequence results_code + expr <- expr_code + args <- sequence args_code + code (emitForeignCall' PlayRisky results + (CmmForeignCall expr CCallConv) args vols) +foreignCall conv _ _ _ _ + = fail ("unknown calling convention: " ++ conv) + +doStore :: MachRep -> ExtFCode CmmExpr -> ExtFCode CmmExpr -> ExtCode +doStore rep addr_code val_code + = do addr <- addr_code + val <- val_code + -- if the specified store type does not match the type of the expr + -- on the rhs, then we insert a coercion that will cause the type + -- mismatch to be flagged by cmm-lint. If we don't do this, then + -- the store will happen at the wrong type, and the error will not + -- be noticed. + let coerce_val + | cmmExprRep val /= rep = CmmMachOp (MO_U_Conv rep rep) [val] + | otherwise = val + stmtEC (CmmStore addr coerce_val) + +-- Return an unboxed tuple. +emitRetUT :: [(CgRep,CmmExpr)] -> Code +emitRetUT args = do + tickyUnboxedTupleReturn (length args) -- TICK + (sp, stmts) <- pushUnboxedTuple 0 args + emitStmts stmts + when (sp /= 0) $ stmtC (CmmAssign spReg (cmmRegOffW spReg (-sp))) + stmtC (CmmJump (entryCode (CmmLoad (cmmRegOffW spReg sp) wordRep)) []) + +-- ----------------------------------------------------------------------------- +-- If-then-else and boolean expressions + +data BoolExpr + = BoolExpr `BoolAnd` BoolExpr + | BoolExpr `BoolOr` BoolExpr + | BoolNot BoolExpr + | BoolTest CmmExpr + +-- ToDo: smart constructors which simplify the boolean expression. + +ifThenElse cond then_part else_part = do + then_id <- code newLabelC + join_id <- code newLabelC + c <- cond + emitCond c then_id + else_part + stmtEC (CmmBranch join_id) + code (labelC then_id) + then_part + -- fall through to join + code (labelC join_id) + +-- 'emitCond cond true_id' emits code to test whether the cond is true, +-- branching to true_id if so, and falling through otherwise. +emitCond (BoolTest e) then_id = do + stmtEC (CmmCondBranch e then_id) +emitCond (BoolNot (BoolTest (CmmMachOp op args))) then_id + | Just op' <- maybeInvertComparison op + = emitCond (BoolTest (CmmMachOp op' args)) then_id +emitCond (BoolNot e) then_id = do + else_id <- code newLabelC + emitCond e else_id + stmtEC (CmmBranch then_id) + code (labelC else_id) +emitCond (e1 `BoolOr` e2) then_id = do + emitCond e1 then_id + emitCond e2 then_id +emitCond (e1 `BoolAnd` e2) then_id = do + -- we'd like to invert one of the conditionals here to avoid an + -- extra branch instruction, but we can't use maybeInvertComparison + -- here because we can't look too closely at the expression since + -- we're in a loop. + and_id <- code newLabelC + else_id <- code newLabelC + emitCond e1 and_id + stmtEC (CmmBranch else_id) + code (labelC and_id) + emitCond e2 then_id + code (labelC else_id) + + +-- ----------------------------------------------------------------------------- +-- Table jumps + +-- We use a simplified form of C-- switch statements for now. A +-- switch statement always compiles to a table jump. Each arm can +-- specify a list of values (not ranges), and there can be a single +-- default branch. The range of the table is given either by the +-- optional range on the switch (eg. switch [0..7] {...}), or by +-- the minimum/maximum values from the branches. + +doSwitch :: Maybe (Int,Int) -> ExtFCode CmmExpr -> [([Int],ExtCode)] + -> Maybe ExtCode -> ExtCode +doSwitch mb_range scrut arms deflt + = do + -- Compile code for the default branch + dflt_entry <- + case deflt of + Nothing -> return Nothing + Just e -> do b <- forkLabelledCodeEC e; return (Just b) + + -- Compile each case branch + table_entries <- mapM emitArm arms + + -- Construct the table + let + all_entries = concat table_entries + ixs = map fst all_entries + (min,max) + | Just (l,u) <- mb_range = (l,u) + | otherwise = (minimum ixs, maximum ixs) + + entries = elems (accumArray (\_ a -> Just a) dflt_entry (min,max) + all_entries) + expr <- scrut + -- ToDo: check for out of range and jump to default if necessary + stmtEC (CmmSwitch expr entries) + where + emitArm :: ([Int],ExtCode) -> ExtFCode [(Int,BlockId)] + emitArm (ints,code) = do + blockid <- forkLabelledCodeEC code + return [ (i,blockid) | i <- ints ] + + +-- ----------------------------------------------------------------------------- +-- Putting it all together + +-- The initial environment: we define some constants that the compiler +-- knows about here. +initEnv :: Env +initEnv = listToUFM [ + ( FSLIT("SIZEOF_StgHeader"), + CmmLit (CmmInt (fromIntegral (fixedHdrSize * wORD_SIZE)) wordRep) ), + ( FSLIT("SIZEOF_StgInfoTable"), + CmmLit (CmmInt (fromIntegral stdInfoTableSizeB) wordRep) ) + ] + +parseCmmFile :: DynFlags -> HomeModules -> FilePath -> IO (Maybe Cmm) +parseCmmFile dflags hmods filename = do + showPass dflags "ParseCmm" + buf <- hGetStringBuffer filename + let + init_loc = mkSrcLoc (mkFastString filename) 1 0 + init_state = (mkPState buf init_loc dflags) { lex_state = [0] } + -- reset the lex_state: the Lexer monad leaves some stuff + -- in there we don't want. + case unP cmmParse init_state of + PFailed span err -> do printError span err; return Nothing + POk _ code -> do + cmm <- initC dflags hmods no_module (getCmm (unEC code initEnv [] >> return ())) + dumpIfSet_dyn dflags Opt_D_dump_cmm "Cmm" (pprCmms [cmm]) + return (Just cmm) + where + no_module = panic "parseCmmFile: no module" +} diff --git a/compiler/cmm/CmmUtils.hs b/compiler/cmm/CmmUtils.hs new file mode 100644 index 0000000000..a04935b279 --- /dev/null +++ b/compiler/cmm/CmmUtils.hs @@ -0,0 +1,177 @@ +----------------------------------------------------------------------------- +-- +-- Cmm utilities. +-- +-- (c) The University of Glasgow 2004 +-- +----------------------------------------------------------------------------- + +module CmmUtils( + CmmStmts, noStmts, oneStmt, mkStmts, plusStmts, stmtList, + isNopStmt, + + isTrivialCmmExpr, hasNoGlobalRegs, + + cmmRegOff, cmmLabelOff, cmmOffset, cmmOffsetLit, cmmIndex, + cmmOffsetExpr, cmmIndexExpr, cmmLoadIndex, + + mkIntCLit, zeroCLit, + + mkLblExpr, + ) where + +#include "HsVersions.h" + +import CLabel ( CLabel ) +import Cmm +import MachOp +import OrdList +import Outputable + +--------------------------------------------------- +-- +-- CmmStmts +-- +--------------------------------------------------- + +type CmmStmts = OrdList CmmStmt + +noStmts :: CmmStmts +noStmts = nilOL + +oneStmt :: CmmStmt -> CmmStmts +oneStmt = unitOL + +mkStmts :: [CmmStmt] -> CmmStmts +mkStmts = toOL + +plusStmts :: CmmStmts -> CmmStmts -> CmmStmts +plusStmts = appOL + +stmtList :: CmmStmts -> [CmmStmt] +stmtList = fromOL + + +--------------------------------------------------- +-- +-- CmmStmt +-- +--------------------------------------------------- + +isNopStmt :: CmmStmt -> Bool +-- If isNopStmt returns True, the stmt is definitely a no-op; +-- but it might be a no-op even if isNopStmt returns False +isNopStmt CmmNop = True +isNopStmt (CmmAssign r e) = cheapEqReg r e +isNopStmt (CmmStore e1 (CmmLoad e2 _)) = cheapEqExpr e1 e2 +isNopStmt s = False + +cheapEqExpr :: CmmExpr -> CmmExpr -> Bool +cheapEqExpr (CmmReg r) e = cheapEqReg r e +cheapEqExpr (CmmRegOff r 0) e = cheapEqReg r e +cheapEqExpr (CmmRegOff r n) (CmmRegOff r' n') = r==r' && n==n' +cheapEqExpr e1 e2 = False + +cheapEqReg :: CmmReg -> CmmExpr -> Bool +cheapEqReg r (CmmReg r') = r==r' +cheapEqReg r (CmmRegOff r' 0) = r==r' +cheapEqReg r e = False + +--------------------------------------------------- +-- +-- CmmExpr +-- +--------------------------------------------------- + +isTrivialCmmExpr :: CmmExpr -> Bool +isTrivialCmmExpr (CmmLoad _ _) = False +isTrivialCmmExpr (CmmMachOp _ _) = False +isTrivialCmmExpr (CmmLit _) = True +isTrivialCmmExpr (CmmReg _) = True +isTrivialCmmExpr (CmmRegOff _ _) = True + +hasNoGlobalRegs :: CmmExpr -> Bool +hasNoGlobalRegs (CmmLoad e _) = hasNoGlobalRegs e +hasNoGlobalRegs (CmmMachOp _ es) = all hasNoGlobalRegs es +hasNoGlobalRegs (CmmLit _) = True +hasNoGlobalRegs (CmmReg (CmmLocal _)) = True +hasNoGlobalRegs (CmmRegOff (CmmLocal _) _) = True +hasNoGlobalRegs _ = False + +--------------------------------------------------- +-- +-- Expr Construction helpers +-- +--------------------------------------------------- + +cmmOffsetExpr :: CmmExpr -> CmmExpr -> CmmExpr +-- assumes base and offset have the same MachRep +cmmOffsetExpr e (CmmLit (CmmInt n _)) = cmmOffset e (fromInteger n) +cmmOffsetExpr e byte_off = CmmMachOp (MO_Add (cmmExprRep e)) [e, byte_off] + +-- NB. Do *not* inspect the value of the offset in these smart constructors!!! +-- +-- because the offset is sometimes involved in a loop in the code generator +-- (we don't know the real Hp offset until we've generated code for the entire +-- basic block, for example). So we cannot eliminate zero offsets at this +-- stage; they're eliminated later instead (either during printing or +-- a later optimisation step on Cmm). +-- +cmmOffset :: CmmExpr -> Int -> CmmExpr +cmmOffset (CmmReg reg) byte_off = cmmRegOff reg byte_off +cmmOffset (CmmRegOff reg m) byte_off = cmmRegOff reg (m+byte_off) +cmmOffset (CmmLit lit) byte_off = CmmLit (cmmOffsetLit lit byte_off) +cmmOffset (CmmMachOp (MO_Add rep) [expr, CmmLit (CmmInt byte_off1 _rep)]) byte_off2 + = CmmMachOp (MO_Add rep) + [expr, CmmLit (CmmInt (byte_off1 + toInteger byte_off2) rep)] +cmmOffset expr byte_off + = CmmMachOp (MO_Add rep) [expr, CmmLit (CmmInt (toInteger byte_off) rep)] + where + rep = cmmExprRep expr + +-- Smart constructor for CmmRegOff. Same caveats as cmmOffset above. +cmmRegOff :: CmmReg -> Int -> CmmExpr +cmmRegOff reg byte_off = CmmRegOff reg byte_off + +cmmOffsetLit :: CmmLit -> Int -> CmmLit +cmmOffsetLit (CmmLabel l) byte_off = cmmLabelOff l byte_off +cmmOffsetLit (CmmLabelOff l m) byte_off = cmmLabelOff l (m+byte_off) +cmmOffsetLit (CmmInt m rep) byte_off = CmmInt (m + fromIntegral byte_off) rep +cmmOffsetLit other byte_off = pprPanic "cmmOffsetLit" (ppr byte_off) + +cmmLabelOff :: CLabel -> Int -> CmmLit +-- Smart constructor for CmmLabelOff +cmmLabelOff lbl 0 = CmmLabel lbl +cmmLabelOff lbl byte_off = CmmLabelOff lbl byte_off + +-- | Useful for creating an index into an array, with a staticaly known offset. +cmmIndex :: MachRep -> CmmExpr -> Int -> CmmExpr +cmmIndex rep base idx = cmmOffset base (idx * machRepByteWidth rep) + +-- | Useful for creating an index into an array, with an unknown offset. +cmmIndexExpr :: MachRep -> CmmExpr -> CmmExpr -> CmmExpr +cmmIndexExpr rep base (CmmLit (CmmInt n _)) = cmmIndex rep base (fromInteger n) +cmmIndexExpr rep base idx = + cmmOffsetExpr base byte_off + where + idx_rep = cmmExprRep idx + byte_off = CmmMachOp (MO_Shl idx_rep) [ + idx, CmmLit (mkIntCLit (machRepLogWidth rep))] + +cmmLoadIndex :: MachRep -> CmmExpr -> Int -> CmmExpr +cmmLoadIndex rep expr ix = CmmLoad (cmmIndex rep expr ix) rep + +--------------------------------------------------- +-- +-- Literal construction functions +-- +--------------------------------------------------- + +mkIntCLit :: Int -> CmmLit +mkIntCLit i = CmmInt (toInteger i) wordRep + +zeroCLit :: CmmLit +zeroCLit = CmmInt 0 wordRep + +mkLblExpr :: CLabel -> CmmExpr +mkLblExpr lbl = CmmLit (CmmLabel lbl) diff --git a/compiler/cmm/MachOp.hs b/compiler/cmm/MachOp.hs new file mode 100644 index 0000000000..5bbff6de78 --- /dev/null +++ b/compiler/cmm/MachOp.hs @@ -0,0 +1,652 @@ +----------------------------------------------------------------------------- +-- +-- (c) The University of Glasgow 2002-2004 +-- +-- Low-level machine operations, used in the Cmm datatype. +-- +----------------------------------------------------------------------------- + +module MachOp ( + MachRep(..), + machRepBitWidth, + machRepByteWidth, + machRepLogWidth, + isFloatingRep, + + MachHint(..), + + MachOp(..), + pprMachOp, + isCommutableMachOp, + isAssociativeMachOp, + isComparisonMachOp, + resultRepOfMachOp, + machOpArgReps, + maybeInvertComparison, + + CallishMachOp(..), + pprCallishMachOp, + + wordRep, + halfWordRep, + cIntRep, cLongRep, + + mo_wordAdd, + mo_wordSub, + mo_wordEq, + mo_wordNe, + mo_wordMul, + mo_wordSQuot, + mo_wordSRem, + mo_wordSNeg, + mo_wordUQuot, + mo_wordURem, + + mo_wordSGe, + mo_wordSLe, + mo_wordSGt, + mo_wordSLt, + + mo_wordUGe, + mo_wordULe, + mo_wordUGt, + mo_wordULt, + + mo_wordAnd, + mo_wordOr, + mo_wordXor, + mo_wordNot, + mo_wordShl, + mo_wordSShr, + mo_wordUShr, + + mo_u_8To32, + mo_s_8To32, + mo_u_16To32, + mo_s_16To32, + + mo_u_8ToWord, + mo_s_8ToWord, + mo_u_16ToWord, + mo_s_16ToWord, + mo_u_32ToWord, + mo_s_32ToWord, + + mo_32To8, + mo_32To16, + mo_WordTo8, + mo_WordTo16, + mo_WordTo32, + ) where + +#include "HsVersions.h" + +import Constants +import Outputable + +-- ----------------------------------------------------------------------------- +-- MachRep + +{- | +A MachRep is the "representation" of a value in Cmm. It is used for +resource allocation: eg. which kind of register a value should be +stored in. + +The primary requirement is that there exists a function + + cmmExprRep :: CmmExpr -> MachRep + +This means that: + + - a register has an implicit MachRep + - a literal has an implicit MachRep + - an operation (MachOp) has an implicit result MachRep + +It also means that we can check that the arguments to a MachOp have +the correct MachRep, i.e. we can do a kind of lint-style type checking +on Cmm. +-} + +data MachRep + = I8 + | I16 + | I32 + | I64 + | I128 + | F32 + | F64 + | F80 -- extended double-precision, used in x86 native codegen only. + deriving (Eq, Ord, Show) + +mrStr I8 = SLIT("I8") +mrStr I16 = SLIT("I16") +mrStr I32 = SLIT("I32") +mrStr I64 = SLIT("I64") +mrStr I128 = SLIT("I128") +mrStr F32 = SLIT("F32") +mrStr F64 = SLIT("F64") +mrStr F80 = SLIT("F80") + +instance Outputable MachRep where + ppr rep = ptext (mrStr rep) + +{- +Implementation notes: + +It might suffice to keep just a width, without distinguishing between +floating and integer types. However, keeping the distinction will +help the native code generator to assign registers more easily. +-} + +{- +Should a MachRep include a signed vs. unsigned distinction? + +This is very much like a "hint" in C-- terminology: it isn't necessary +in order to generate correct code, but it might be useful in that the +compiler can generate better code if it has access to higher-level +hints about data. This is important at call boundaries, because the +definition of a function is not visible at all of its call sites, so +the compiler cannot infer the hints. + +Here in Cmm, we're taking a slightly different approach. We include +the int vs. float hint in the MachRep, because (a) the majority of +platforms have a strong distinction between float and int registers, +and (b) we don't want to do any heavyweight hint-inference in the +native code backend in order to get good code. We're treating the +hint more like a type: our Cmm is always completely consistent with +respect to hints. All coercions between float and int are explicit. + +What about the signed vs. unsigned hint? This information might be +useful if we want to keep sub-word-sized values in word-size +registers, which we must do if we only have word-sized registers. + +On such a system, there are two straightforward conventions for +representing sub-word-sized values: + +(a) Leave the upper bits undefined. Comparison operations must + sign- or zero-extend both operands before comparing them, + depending on whether the comparison is signed or unsigned. + +(b) Always keep the values sign- or zero-extended as appropriate. + Arithmetic operations must narrow the result to the appropriate + size. + +A clever compiler might not use either (a) or (b) exclusively, instead +it would attempt to minimize the coercions by analysis: the same kind +of analysis that propagates hints around. In Cmm we don't want to +have to do this, so we plump for having richer types and keeping the +type information consistent. + +If signed/unsigned hints are missing from MachRep, then the only +choice we have is (a), because we don't know whether the result of an +operation should be sign- or zero-extended. + +Many architectures have extending load operations, which work well +with (b). To make use of them with (a), you need to know whether the +value is going to be sign- or zero-extended by an enclosing comparison +(for example), which involves knowing above the context. This is +doable but more complex. + +Further complicating the issue is foreign calls: a foreign calling +convention can specify that signed 8-bit quantities are passed as +sign-extended 32 bit quantities, for example (this is the case on the +PowerPC). So we *do* need sign information on foreign call arguments. + +Pros for adding signed vs. unsigned to MachRep: + + - It would let us use convention (b) above, and get easier + code generation for extending loads. + + - Less information required on foreign calls. + + - MachOp type would be simpler + +Cons: + + - More complexity + + - What is the MachRep for a VanillaReg? Currently it is + always wordRep, but now we have to decide whether it is + signed or unsigned. The same VanillaReg can thus have + different MachReps in different parts of the program. + + - Extra coercions cluttering up expressions. + +Currently for GHC, the foreign call point is moot, because we do our +own promotion of sub-word-sized values to word-sized values. The Int8 +type is represnted by an Int# which is kept sign-extended at all times +(this is slightly naughty, because we're making assumptions about the +C calling convention rather early on in the compiler). However, given +this, the cons outweigh the pros. + +-} + + +machRepBitWidth :: MachRep -> Int +machRepBitWidth I8 = 8 +machRepBitWidth I16 = 16 +machRepBitWidth I32 = 32 +machRepBitWidth I64 = 64 +machRepBitWidth I128 = 128 +machRepBitWidth F32 = 32 +machRepBitWidth F64 = 64 +machRepBitWidth F80 = 80 + +machRepByteWidth :: MachRep -> Int +machRepByteWidth I8 = 1 +machRepByteWidth I16 = 2 +machRepByteWidth I32 = 4 +machRepByteWidth I64 = 8 +machRepByteWidth I128 = 16 +machRepByteWidth F32 = 4 +machRepByteWidth F64 = 8 +machRepByteWidth F80 = 10 + +-- log_2 of the width in bytes, useful for generating shifts. +machRepLogWidth :: MachRep -> Int +machRepLogWidth I8 = 0 +machRepLogWidth I16 = 1 +machRepLogWidth I32 = 2 +machRepLogWidth I64 = 3 +machRepLogWidth I128 = 4 +machRepLogWidth F32 = 2 +machRepLogWidth F64 = 3 +machRepLogWidth F80 = panic "machRepLogWidth: F80" + +isFloatingRep :: MachRep -> Bool +isFloatingRep F32 = True +isFloatingRep F64 = True +isFloatingRep F80 = True +isFloatingRep _ = False + +-- ----------------------------------------------------------------------------- +-- Hints + +{- +A hint gives a little more information about a data value. Hints are +used on the arguments to a foreign call, where the code generator needs +to know some extra information on top of the MachRep of each argument in +order to generate a correct call. +-} + +data MachHint + = NoHint + | PtrHint + | SignedHint + | FloatHint + deriving Eq + +mhStr NoHint = SLIT("NoHint") +mhStr PtrHint = SLIT("PtrHint") +mhStr SignedHint = SLIT("SignedHint") +mhStr FloatHint = SLIT("FloatHint") + +instance Outputable MachHint where + ppr hint = ptext (mhStr hint) + +-- ----------------------------------------------------------------------------- +-- MachOp + +{- | +Machine-level primops; ones which we can reasonably delegate to the +native code generators to handle. Basically contains C's primops +and no others. + +Nomenclature: all ops indicate width and signedness, where +appropriate. Widths: 8\/16\/32\/64 means the given size, obviously. +Nat means the operation works on STG word sized objects. +Signedness: S means signed, U means unsigned. For operations where +signedness is irrelevant or makes no difference (for example +integer add), the signedness component is omitted. + +An exception: NatP is a ptr-typed native word. From the point of +view of the native code generators this distinction is irrelevant, +but the C code generator sometimes needs this info to emit the +right casts. +-} + +data MachOp + + -- Integer operations + = MO_Add MachRep + | MO_Sub MachRep + | MO_Eq MachRep + | MO_Ne MachRep + | MO_Mul MachRep -- low word of multiply + | MO_S_MulMayOflo MachRep -- nonzero if signed multiply overflows + | MO_S_Quot MachRep -- signed / (same semantics as IntQuotOp) + | MO_S_Rem MachRep -- signed % (same semantics as IntRemOp) + | MO_S_Neg MachRep -- unary - + | MO_U_MulMayOflo MachRep -- nonzero if unsigned multiply overflows + | MO_U_Quot MachRep -- unsigned / (same semantics as WordQuotOp) + | MO_U_Rem MachRep -- unsigned % (same semantics as WordRemOp) + + -- Signed comparisons (floating-point comparisons also use these) + | MO_S_Ge MachRep + | MO_S_Le MachRep + | MO_S_Gt MachRep + | MO_S_Lt MachRep + + -- Unsigned comparisons + | MO_U_Ge MachRep + | MO_U_Le MachRep + | MO_U_Gt MachRep + | MO_U_Lt MachRep + + -- Bitwise operations. Not all of these may be supported at all sizes, + -- and only integral MachReps are valid. + | MO_And MachRep + | MO_Or MachRep + | MO_Xor MachRep + | MO_Not MachRep + | MO_Shl MachRep + | MO_U_Shr MachRep -- unsigned shift right + | MO_S_Shr MachRep -- signed shift right + + -- Conversions. Some of these will be NOPs. + -- Floating-point conversions use the signed variant. + | MO_S_Conv MachRep{-from-} MachRep{-to-} -- signed conversion + | MO_U_Conv MachRep{-from-} MachRep{-to-} -- unsigned conversion + + deriving (Eq, Show) + +pprMachOp :: MachOp -> SDoc +pprMachOp mo = text (show mo) + + +-- These MachOps tend to be implemented by foreign calls in some backends, +-- so we separate them out. In Cmm, these can only occur in a +-- statement position, in contrast to an ordinary MachOp which can occur +-- anywhere in an expression. +data CallishMachOp + = MO_F64_Pwr + | MO_F64_Sin + | MO_F64_Cos + | MO_F64_Tan + | MO_F64_Sinh + | MO_F64_Cosh + | MO_F64_Tanh + | MO_F64_Asin + | MO_F64_Acos + | MO_F64_Atan + | MO_F64_Log + | MO_F64_Exp + | MO_F64_Sqrt + | MO_F32_Pwr + | MO_F32_Sin + | MO_F32_Cos + | MO_F32_Tan + | MO_F32_Sinh + | MO_F32_Cosh + | MO_F32_Tanh + | MO_F32_Asin + | MO_F32_Acos + | MO_F32_Atan + | MO_F32_Log + | MO_F32_Exp + | MO_F32_Sqrt + deriving (Eq, Show) + +pprCallishMachOp :: CallishMachOp -> SDoc +pprCallishMachOp mo = text (show mo) + +-- ----------------------------------------------------------------------------- +-- Some common MachReps + +-- A 'wordRep' is a machine word on the target architecture +-- Specifically, it is the size of an Int#, Word#, Addr# +-- and the unit of allocation on the stack and the heap +-- Any pointer is also guaranteed to be a wordRep. + +wordRep | wORD_SIZE == 4 = I32 + | wORD_SIZE == 8 = I64 + | otherwise = panic "MachOp.wordRep: Unknown word size" + +halfWordRep | wORD_SIZE == 4 = I16 + | wORD_SIZE == 8 = I32 + | otherwise = panic "MachOp.halfWordRep: Unknown word size" + +mo_wordAdd = MO_Add wordRep +mo_wordSub = MO_Sub wordRep +mo_wordEq = MO_Eq wordRep +mo_wordNe = MO_Ne wordRep +mo_wordMul = MO_Mul wordRep +mo_wordSQuot = MO_S_Quot wordRep +mo_wordSRem = MO_S_Rem wordRep +mo_wordSNeg = MO_S_Neg wordRep +mo_wordUQuot = MO_U_Quot wordRep +mo_wordURem = MO_U_Rem wordRep + +mo_wordSGe = MO_S_Ge wordRep +mo_wordSLe = MO_S_Le wordRep +mo_wordSGt = MO_S_Gt wordRep +mo_wordSLt = MO_S_Lt wordRep + +mo_wordUGe = MO_U_Ge wordRep +mo_wordULe = MO_U_Le wordRep +mo_wordUGt = MO_U_Gt wordRep +mo_wordULt = MO_U_Lt wordRep + +mo_wordAnd = MO_And wordRep +mo_wordOr = MO_Or wordRep +mo_wordXor = MO_Xor wordRep +mo_wordNot = MO_Not wordRep +mo_wordShl = MO_Shl wordRep +mo_wordSShr = MO_S_Shr wordRep +mo_wordUShr = MO_U_Shr wordRep + +mo_u_8To32 = MO_U_Conv I8 I32 +mo_s_8To32 = MO_S_Conv I8 I32 +mo_u_16To32 = MO_U_Conv I16 I32 +mo_s_16To32 = MO_S_Conv I16 I32 + +mo_u_8ToWord = MO_U_Conv I8 wordRep +mo_s_8ToWord = MO_S_Conv I8 wordRep +mo_u_16ToWord = MO_U_Conv I16 wordRep +mo_s_16ToWord = MO_S_Conv I16 wordRep +mo_s_32ToWord = MO_S_Conv I32 wordRep +mo_u_32ToWord = MO_U_Conv I32 wordRep + +mo_WordTo8 = MO_U_Conv wordRep I8 +mo_WordTo16 = MO_U_Conv wordRep I16 +mo_WordTo32 = MO_U_Conv wordRep I32 + +mo_32To8 = MO_U_Conv I32 I8 +mo_32To16 = MO_U_Conv I32 I16 + +-- cIntRep is the MachRep for a C-language 'int' +#if SIZEOF_INT == 4 +cIntRep = I32 +#elif SIZEOF_INT == 8 +cIntRep = I64 +#endif + +#if SIZEOF_LONG == 4 +cLongRep = I32 +#elif SIZEOF_LONG == 8 +cLongRep = I64 +#endif + +-- ---------------------------------------------------------------------------- +-- isCommutableMachOp + +{- | +Returns 'True' if the MachOp has commutable arguments. This is used +in the platform-independent Cmm optimisations. + +If in doubt, return 'False'. This generates worse code on the +native routes, but is otherwise harmless. +-} +isCommutableMachOp :: MachOp -> Bool +isCommutableMachOp mop = + case mop of + MO_Add _ -> True + MO_Eq _ -> True + MO_Ne _ -> True + MO_Mul _ -> True + MO_S_MulMayOflo _ -> True + MO_U_MulMayOflo _ -> True + MO_And _ -> True + MO_Or _ -> True + MO_Xor _ -> True + _other -> False + +-- ---------------------------------------------------------------------------- +-- isAssociativeMachOp + +{- | +Returns 'True' if the MachOp is associative (i.e. @(x+y)+z == x+(y+z)@) +This is used in the platform-independent Cmm optimisations. + +If in doubt, return 'False'. This generates worse code on the +native routes, but is otherwise harmless. +-} +isAssociativeMachOp :: MachOp -> Bool +isAssociativeMachOp mop = + case mop of + MO_Add r -> not (isFloatingRep r) + MO_Mul r -> not (isFloatingRep r) + MO_And _ -> True + MO_Or _ -> True + MO_Xor _ -> True + _other -> False + +-- ---------------------------------------------------------------------------- +-- isComparisonMachOp + +{- | +Returns 'True' if the MachOp is a comparison. + +If in doubt, return False. This generates worse code on the +native routes, but is otherwise harmless. +-} +isComparisonMachOp :: MachOp -> Bool +isComparisonMachOp mop = + case mop of + MO_Eq _ -> True + MO_Ne _ -> True + MO_S_Ge _ -> True + MO_S_Le _ -> True + MO_S_Gt _ -> True + MO_S_Lt _ -> True + MO_U_Ge _ -> True + MO_U_Le _ -> True + MO_U_Gt _ -> True + MO_U_Lt _ -> True + _other -> False + +-- ----------------------------------------------------------------------------- +-- Inverting conditions + +-- Sometimes it's useful to be able to invert the sense of a +-- condition. Not all conditional tests are invertible: in +-- particular, floating point conditionals cannot be inverted, because +-- there exist floating-point values which return False for both senses +-- of a condition (eg. !(NaN > NaN) && !(NaN /<= NaN)). + +maybeInvertComparison :: MachOp -> Maybe MachOp +maybeInvertComparison op + = case op of + MO_Eq r | not (isFloatingRep r) -> Just (MO_Ne r) + MO_Ne r | not (isFloatingRep r) -> Just (MO_Eq r) + MO_U_Lt r | not (isFloatingRep r) -> Just (MO_U_Ge r) + MO_U_Gt r | not (isFloatingRep r) -> Just (MO_U_Le r) + MO_U_Le r | not (isFloatingRep r) -> Just (MO_U_Gt r) + MO_U_Ge r | not (isFloatingRep r) -> Just (MO_U_Lt r) + MO_S_Lt r | not (isFloatingRep r) -> Just (MO_S_Ge r) + MO_S_Gt r | not (isFloatingRep r) -> Just (MO_S_Le r) + MO_S_Le r | not (isFloatingRep r) -> Just (MO_S_Gt r) + MO_S_Ge r | not (isFloatingRep r) -> Just (MO_S_Lt r) + _other -> Nothing + +-- ---------------------------------------------------------------------------- +-- resultRepOfMachOp + +{- | +Returns the MachRep of the result of a MachOp. +-} +resultRepOfMachOp :: MachOp -> MachRep +resultRepOfMachOp mop = + case mop of + MO_Add r -> r + MO_Sub r -> r + MO_Eq r -> comparisonResultRep + MO_Ne r -> comparisonResultRep + MO_Mul r -> r + MO_S_MulMayOflo r -> r + MO_S_Quot r -> r + MO_S_Rem r -> r + MO_S_Neg r -> r + MO_U_MulMayOflo r -> r + MO_U_Quot r -> r + MO_U_Rem r -> r + + MO_S_Ge r -> comparisonResultRep + MO_S_Le r -> comparisonResultRep + MO_S_Gt r -> comparisonResultRep + MO_S_Lt r -> comparisonResultRep + + MO_U_Ge r -> comparisonResultRep + MO_U_Le r -> comparisonResultRep + MO_U_Gt r -> comparisonResultRep + MO_U_Lt r -> comparisonResultRep + + MO_And r -> r + MO_Or r -> r + MO_Xor r -> r + MO_Not r -> r + MO_Shl r -> r + MO_U_Shr r -> r + MO_S_Shr r -> r + + MO_S_Conv from to -> to + MO_U_Conv from to -> to + + +comparisonResultRep = wordRep -- is it? + + +-- ----------------------------------------------------------------------------- +-- machOpArgReps + +-- | This function is used for debugging only: we can check whether an +-- application of a MachOp is "type-correct" by checking that the MachReps of +-- its arguments are the same as the MachOp expects. This is used when +-- linting a CmmExpr. + +machOpArgReps :: MachOp -> [MachRep] +machOpArgReps op = + case op of + MO_Add r -> [r,r] + MO_Sub r -> [r,r] + MO_Eq r -> [r,r] + MO_Ne r -> [r,r] + MO_Mul r -> [r,r] + MO_S_MulMayOflo r -> [r,r] + MO_S_Quot r -> [r,r] + MO_S_Rem r -> [r,r] + MO_S_Neg r -> [r] + MO_U_MulMayOflo r -> [r,r] + MO_U_Quot r -> [r,r] + MO_U_Rem r -> [r,r] + + MO_S_Ge r -> [r,r] + MO_S_Le r -> [r,r] + MO_S_Gt r -> [r,r] + MO_S_Lt r -> [r,r] + + MO_U_Ge r -> [r,r] + MO_U_Le r -> [r,r] + MO_U_Gt r -> [r,r] + MO_U_Lt r -> [r,r] + + MO_And r -> [r,r] + MO_Or r -> [r,r] + MO_Xor r -> [r,r] + MO_Not r -> [r] + MO_Shl r -> [r,wordRep] + MO_U_Shr r -> [r,wordRep] + MO_S_Shr r -> [r,wordRep] + + MO_S_Conv from to -> [from] + MO_U_Conv from to -> [from] diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs new file mode 100644 index 0000000000..a8d30668b7 --- /dev/null +++ b/compiler/cmm/PprC.hs @@ -0,0 +1,1028 @@ +----------------------------------------------------------------------------- +-- +-- Pretty-printing of Cmm as C, suitable for feeding gcc +-- +-- (c) The University of Glasgow 2004 +-- +----------------------------------------------------------------------------- + +-- +-- Print Cmm as real C, for -fvia-C +-- +-- This is simpler than the old PprAbsC, because Cmm is "macro-expanded" +-- relative to the old AbstractC, and many oddities/decorations have +-- disappeared from the data type. +-- + +-- ToDo: save/restore volatile registers around calls. + +module PprC ( + writeCs, + pprStringInCStyle + ) where + +#include "HsVersions.h" + +-- Cmm stuff +import Cmm +import CLabel +import MachOp +import ForeignCall + +-- Utils +import DynFlags ( DynFlags, DynFlag(..), dopt ) +import Unique ( getUnique ) +import UniqSet +import FiniteMap +import UniqFM ( eltsUFM ) +import FastString +import Outputable +import Constants +import StaticFlags ( opt_Unregisterised ) + +-- The rest +import Data.List ( intersperse, groupBy ) +import Data.Bits ( shiftR ) +import Char ( ord, chr ) +import IO ( Handle ) +import DATA_BITS +import Data.Word ( Word8 ) + +#ifdef DEBUG +import PprCmm () -- instances only +-- import Debug.Trace +#endif + +#if __GLASGOW_HASKELL__ >= 504 +import Data.Array.ST +#endif +import MONAD_ST + +-- -------------------------------------------------------------------------- +-- Top level + +pprCs :: DynFlags -> [Cmm] -> SDoc +pprCs dflags cmms + = pprCode CStyle (vcat $ map (\c -> split_marker $$ pprC c) cmms) + where + split_marker + | dopt Opt_SplitObjs dflags = ptext SLIT("__STG_SPLIT_MARKER") + | otherwise = empty + +writeCs :: DynFlags -> Handle -> [Cmm] -> IO () +writeCs dflags handle cmms + = printForC handle (pprCs dflags cmms) + +-- -------------------------------------------------------------------------- +-- Now do some real work +-- +-- for fun, we could call cmmToCmm over the tops... +-- + +pprC :: Cmm -> SDoc +pprC (Cmm tops) = vcat $ intersperse (text "") $ map pprTop tops + +-- +-- top level procs +-- +pprTop :: CmmTop -> SDoc +pprTop (CmmProc info clbl _params blocks) = + (if not (null info) + then pprDataExterns info $$ + pprWordArray (entryLblToInfoLbl clbl) info + else empty) $$ + (case blocks of + [] -> empty + -- the first block doesn't get a label: + (BasicBlock _ stmts : rest) -> vcat [ + text "", + extern_decls, + (if (externallyVisibleCLabel clbl) + then mkFN_ else mkIF_) (pprCLabel clbl) <+> lbrace, + nest 8 temp_decls, + nest 8 mkFB_, + nest 8 (vcat (map pprStmt stmts)) $$ + vcat (map pprBBlock rest), + nest 8 mkFE_, + rbrace ] + ) + where + (temp_decls, extern_decls) = pprTempAndExternDecls blocks + + +-- Chunks of static data. + +-- We only handle (a) arrays of word-sized things and (b) strings. + +pprTop (CmmData _section _ds@[CmmDataLabel lbl, CmmString str]) = + hcat [ + pprLocalness lbl, ptext SLIT("char "), pprCLabel lbl, + ptext SLIT("[] = "), pprStringInCStyle str, semi + ] + +pprTop (CmmData _section _ds@[CmmDataLabel lbl, CmmUninitialised size]) = + hcat [ + pprLocalness lbl, ptext SLIT("char "), pprCLabel lbl, + brackets (int size), semi + ] + +pprTop top@(CmmData _section (CmmDataLabel lbl : lits)) = + pprDataExterns lits $$ + pprWordArray lbl lits + +-- these shouldn't appear? +pprTop (CmmData _ _) = panic "PprC.pprTop: can't handle this data" + + +-- -------------------------------------------------------------------------- +-- BasicBlocks are self-contained entities: they always end in a jump. +-- +-- Like nativeGen/AsmCodeGen, we could probably reorder blocks to turn +-- as many jumps as possible into fall throughs. +-- + +pprBBlock :: CmmBasicBlock -> SDoc +pprBBlock (BasicBlock lbl stmts) = + if null stmts then + pprTrace "pprC.pprBBlock: curious empty code block for" + (pprBlockId lbl) empty + else + nest 4 (pprBlockId lbl <> colon) $$ + nest 8 (vcat (map pprStmt stmts)) + +-- -------------------------------------------------------------------------- +-- Info tables. Just arrays of words. +-- See codeGen/ClosureInfo, and nativeGen/PprMach + +pprWordArray :: CLabel -> [CmmStatic] -> SDoc +pprWordArray lbl ds + = hcat [ pprLocalness lbl, ptext SLIT("StgWord") + , space, pprCLabel lbl, ptext SLIT("[] = {") ] + $$ nest 8 (commafy (pprStatics ds)) + $$ ptext SLIT("};") + +-- +-- has to be static, if it isn't globally visible +-- +pprLocalness :: CLabel -> SDoc +pprLocalness lbl | not $ externallyVisibleCLabel lbl = ptext SLIT("static ") + | otherwise = empty + +-- -------------------------------------------------------------------------- +-- Statements. +-- + +pprStmt :: CmmStmt -> SDoc + +pprStmt stmt = case stmt of + CmmNop -> empty + CmmComment s -> (hang (ptext SLIT("/*")) 3 (ftext s)) $$ ptext SLIT("*/") + + CmmAssign dest src -> pprAssign dest src + + CmmStore dest src + | rep == I64 && wordRep /= I64 + -> ptext SLIT("ASSIGN_Word64") <> + parens (mkP_ <> pprExpr1 dest <> comma <> pprExpr src) <> semi + + | rep == F64 && wordRep /= I64 + -> ptext SLIT("ASSIGN_DBL") <> + parens (mkP_ <> pprExpr1 dest <> comma <> pprExpr src) <> semi + + | otherwise + -> hsep [ pprExpr (CmmLoad dest rep), equals, pprExpr src <> semi ] + where + rep = cmmExprRep src + + CmmCall (CmmForeignCall fn cconv) results args volatile -> + -- Controversial: leave this out for now. + -- pprUndef fn $$ + + pprCall ppr_fn cconv results args volatile + where + ppr_fn = case fn of + CmmLit (CmmLabel lbl) -> pprCLabel lbl + _other -> parens (cCast (pprCFunType cconv results args) fn) + -- for a dynamic call, cast the expression to + -- a function of the right type (we hope). + + -- we #undef a function before calling it: the FFI is supposed to be + -- an interface specifically to C, not to C+CPP. For one thing, this + -- makes the via-C route more compatible with the NCG. If macros + -- are being used for optimisation, then inline functions are probably + -- better anyway. + pprUndef (CmmLit (CmmLabel lbl)) = + ptext SLIT("#undef") <+> pprCLabel lbl + pprUndef _ = empty + + CmmCall (CmmPrim op) results args volatile -> + pprCall ppr_fn CCallConv results args volatile + where + ppr_fn = pprCallishMachOp_for_C op + + CmmBranch ident -> pprBranch ident + CmmCondBranch expr ident -> pprCondBranch expr ident + CmmJump lbl _params -> mkJMP_(pprExpr lbl) <> semi + CmmSwitch arg ids -> pprSwitch arg ids + +pprCFunType :: CCallConv -> [(CmmReg,MachHint)] -> [(CmmExpr,MachHint)] -> SDoc +pprCFunType cconv ress args + = hcat [ + res_type ress, + parens (text (ccallConvAttribute cconv) <> char '*'), + parens (commafy (map arg_type args)) + ] + where + res_type [] = ptext SLIT("void") + res_type [(one,hint)] = machRepHintCType (cmmRegRep one) hint + + arg_type (expr,hint) = machRepHintCType (cmmExprRep expr) hint + +-- --------------------------------------------------------------------- +-- unconditional branches +pprBranch :: BlockId -> SDoc +pprBranch ident = ptext SLIT("goto") <+> pprBlockId ident <> semi + + +-- --------------------------------------------------------------------- +-- conditional branches to local labels +pprCondBranch :: CmmExpr -> BlockId -> SDoc +pprCondBranch expr ident + = hsep [ ptext SLIT("if") , parens(pprExpr expr) , + ptext SLIT("goto") , (pprBlockId ident) <> semi ] + + +-- --------------------------------------------------------------------- +-- a local table branch +-- +-- we find the fall-through cases +-- +-- N.B. we remove Nothing's from the list of branches, as they are +-- 'undefined'. However, they may be defined one day, so we better +-- document this behaviour. +-- +pprSwitch :: CmmExpr -> [ Maybe BlockId ] -> SDoc +pprSwitch e maybe_ids + = let pairs = [ (ix, ident) | (ix,Just ident) <- zip [0..] maybe_ids ] + pairs2 = [ (map fst as, snd (head as)) | as <- groupBy sndEq pairs ] + in + (hang (ptext SLIT("switch") <+> parens ( pprExpr e ) <+> lbrace) + 4 (vcat ( map caseify pairs2 ))) + $$ rbrace + + where + sndEq (_,x) (_,y) = x == y + + -- fall through case + caseify (ix:ixs, ident) = vcat (map do_fallthrough ixs) $$ final_branch ix + where + do_fallthrough ix = + hsep [ ptext SLIT("case") , pprHexVal ix wordRep <> colon , + ptext SLIT("/* fall through */") ] + + final_branch ix = + hsep [ ptext SLIT("case") , pprHexVal ix wordRep <> colon , + ptext SLIT("goto") , (pprBlockId ident) <> semi ] + +-- --------------------------------------------------------------------- +-- Expressions. +-- + +-- C Types: the invariant is that the C expression generated by +-- +-- pprExpr e +-- +-- has a type in C which is also given by +-- +-- machRepCType (cmmExprRep e) +-- +-- (similar invariants apply to the rest of the pretty printer). + +pprExpr :: CmmExpr -> SDoc +pprExpr e = case e of + CmmLit lit -> pprLit lit + + CmmLoad e I64 | wordRep /= I64 + -> ptext SLIT("PK_Word64") <> parens (mkP_ <> pprExpr1 e) + + CmmLoad e F64 | wordRep /= I64 + -> ptext SLIT("PK_DBL") <> parens (mkP_ <> pprExpr1 e) + + CmmLoad (CmmReg r) rep + | isPtrReg r && rep == wordRep + -> char '*' <> pprAsPtrReg r + + CmmLoad (CmmRegOff r 0) rep + | isPtrReg r && rep == wordRep + -> char '*' <> pprAsPtrReg r + + CmmLoad (CmmRegOff r off) rep + | isPtrReg r && rep == wordRep + -- ToDo: check that the offset is a word multiple? + -> pprAsPtrReg r <> brackets (ppr (off `shiftR` wordShift)) + + CmmLoad expr rep -> + -- the general case: + char '*' <> parens (cCast (machRepPtrCType rep) expr) + + CmmReg reg -> pprCastReg reg + CmmRegOff reg 0 -> pprCastReg reg + + CmmRegOff reg i + | i > 0 -> pprRegOff (char '+') i + | otherwise -> pprRegOff (char '-') (-i) + where + pprRegOff op i' = pprCastReg reg <> op <> int i' + + CmmMachOp mop args -> pprMachOpApp mop args + +pprExpr1 :: CmmExpr -> SDoc +pprExpr1 (CmmLit lit) = pprLit1 lit +pprExpr1 e@(CmmReg _reg) = pprExpr e +pprExpr1 other = parens (pprExpr other) + +-- -------------------------------------------------------------------------- +-- MachOp applications + +pprMachOpApp :: MachOp -> [CmmExpr] -> SDoc + +pprMachOpApp op args + | isMulMayOfloOp op + = ptext SLIT("mulIntMayOflo") <> parens (commafy (map pprExpr args)) + where isMulMayOfloOp (MO_U_MulMayOflo _) = True + isMulMayOfloOp (MO_S_MulMayOflo _) = True + isMulMayOfloOp _ = False + +pprMachOpApp mop args + = case args of + -- dyadic + [x,y] -> pprArg x <+> pprMachOp_for_C mop <+> pprArg y + + -- unary + [x] -> pprMachOp_for_C mop <> parens (pprArg x) + + _ -> panic "PprC.pprMachOp : machop with wrong number of args" + + where + pprArg e | signedOp mop = cCast (machRepSignedCType (cmmExprRep e)) e + | otherwise = pprExpr1 e + +-- -------------------------------------------------------------------------- +-- Literals + +pprLit :: CmmLit -> SDoc +pprLit lit = case lit of + CmmInt i rep -> pprHexVal i rep + CmmFloat f rep -> parens (machRepCType rep) <> (rational f) + CmmLabel clbl -> mkW_ <> pprCLabelAddr clbl + CmmLabelOff clbl i -> mkW_ <> pprCLabelAddr clbl <> char '+' <> int i + CmmLabelDiffOff clbl1 clbl2 i + -- WARNING: + -- * the lit must occur in the info table clbl2 + -- * clbl1 must be an SRT, a slow entry point or a large bitmap + -- The Mangler is expected to convert any reference to an SRT, + -- a slow entry point or a large bitmap + -- from an info table to an offset. + -> mkW_ <> pprCLabelAddr clbl1 <> char '+' <> int i + +pprCLabelAddr lbl = char '&' <> pprCLabel lbl + +pprLit1 :: CmmLit -> SDoc +pprLit1 lit@(CmmLabelOff _ _) = parens (pprLit lit) +pprLit1 lit@(CmmLabelDiffOff _ _ _) = parens (pprLit lit) +pprLit1 lit@(CmmFloat _ _) = parens (pprLit lit) +pprLit1 other = pprLit other + +-- --------------------------------------------------------------------------- +-- Static data + +pprStatics :: [CmmStatic] -> [SDoc] +pprStatics [] = [] +pprStatics (CmmStaticLit (CmmFloat f F32) : rest) + = pprLit1 (floatToWord f) : pprStatics rest +pprStatics (CmmStaticLit (CmmFloat f F64) : rest) + = map pprLit1 (doubleToWords f) ++ pprStatics rest +pprStatics (CmmStaticLit (CmmInt i I64) : rest) + | machRepByteWidth I32 == wORD_SIZE +#ifdef WORDS_BIGENDIAN + = pprStatics (CmmStaticLit (CmmInt q I32) : + CmmStaticLit (CmmInt r I32) : rest) +#else + = pprStatics (CmmStaticLit (CmmInt r I32) : + CmmStaticLit (CmmInt q I32) : rest) +#endif + where r = i .&. 0xffffffff + q = i `shiftR` 32 +pprStatics (CmmStaticLit lit : rest) + = pprLit1 lit : pprStatics rest +pprStatics (other : rest) + = pprPanic "pprWord" (pprStatic other) + +pprStatic :: CmmStatic -> SDoc +pprStatic s = case s of + + CmmStaticLit lit -> nest 4 (pprLit lit) + CmmAlign i -> nest 4 (ptext SLIT("/* align */") <+> int i) + CmmDataLabel clbl -> pprCLabel clbl <> colon + CmmUninitialised i -> nest 4 (mkC_ <> brackets (int i)) + + -- these should be inlined, like the old .hc + CmmString s' -> nest 4 (mkW_ <> parens(pprStringInCStyle s')) + + +-- --------------------------------------------------------------------------- +-- Block Ids + +pprBlockId :: BlockId -> SDoc +pprBlockId b = char '_' <> ppr (getUnique b) + +-- -------------------------------------------------------------------------- +-- Print a MachOp in a way suitable for emitting via C. +-- + +pprMachOp_for_C :: MachOp -> SDoc + +pprMachOp_for_C mop = case mop of + + -- Integer operations + MO_Add _ -> char '+' + MO_Sub _ -> char '-' + MO_Eq _ -> ptext SLIT("==") + MO_Ne _ -> ptext SLIT("!=") + MO_Mul _ -> char '*' + + MO_S_Quot _ -> char '/' + MO_S_Rem _ -> char '%' + MO_S_Neg _ -> char '-' + + MO_U_Quot _ -> char '/' + MO_U_Rem _ -> char '%' + + -- Signed comparisons (floating-point comparisons also use these) + -- & Unsigned comparisons + MO_S_Ge _ -> ptext SLIT(">=") + MO_S_Le _ -> ptext SLIT("<=") + MO_S_Gt _ -> char '>' + MO_S_Lt _ -> char '<' + + MO_U_Ge _ -> ptext SLIT(">=") + MO_U_Le _ -> ptext SLIT("<=") + MO_U_Gt _ -> char '>' + MO_U_Lt _ -> char '<' + + -- Bitwise operations. Not all of these may be supported at all + -- sizes, and only integral MachReps are valid. + MO_And _ -> char '&' + MO_Or _ -> char '|' + MO_Xor _ -> char '^' + MO_Not _ -> char '~' + MO_Shl _ -> ptext SLIT("<<") + MO_U_Shr _ -> ptext SLIT(">>") -- unsigned shift right + MO_S_Shr _ -> ptext SLIT(">>") -- signed shift right + +-- Conversions. Some of these will be NOPs. +-- Floating-point conversions use the signed variant. +-- We won't know to generate (void*) casts here, but maybe from +-- context elsewhere + +-- noop casts + MO_U_Conv I8 I8 -> empty + MO_U_Conv I16 I16 -> empty + MO_U_Conv I32 I32 -> empty + MO_U_Conv I64 I64 -> empty + MO_U_Conv I128 I128 -> empty + MO_S_Conv I8 I8 -> empty + MO_S_Conv I16 I16 -> empty + MO_S_Conv I32 I32 -> empty + MO_S_Conv I64 I64 -> empty + MO_S_Conv I128 I128 -> empty + + MO_U_Conv _from to -> parens (machRepCType to) + MO_S_Conv _from to -> parens (machRepSignedCType to) + + _ -> panic "PprC.pprMachOp_for_C: unknown machop" + +signedOp :: MachOp -> Bool +signedOp (MO_S_Quot _) = True +signedOp (MO_S_Rem _) = True +signedOp (MO_S_Neg _) = True +signedOp (MO_S_Ge _) = True +signedOp (MO_S_Le _) = True +signedOp (MO_S_Gt _) = True +signedOp (MO_S_Lt _) = True +signedOp (MO_S_Shr _) = True +signedOp (MO_S_Conv _ _) = True +signedOp _ = False + +-- --------------------------------------------------------------------- +-- tend to be implemented by foreign calls + +pprCallishMachOp_for_C :: CallishMachOp -> SDoc + +pprCallishMachOp_for_C mop + = case mop of + MO_F64_Pwr -> ptext SLIT("pow") + MO_F64_Sin -> ptext SLIT("sin") + MO_F64_Cos -> ptext SLIT("cos") + MO_F64_Tan -> ptext SLIT("tan") + MO_F64_Sinh -> ptext SLIT("sinh") + MO_F64_Cosh -> ptext SLIT("cosh") + MO_F64_Tanh -> ptext SLIT("tanh") + MO_F64_Asin -> ptext SLIT("asin") + MO_F64_Acos -> ptext SLIT("acos") + MO_F64_Atan -> ptext SLIT("atan") + MO_F64_Log -> ptext SLIT("log") + MO_F64_Exp -> ptext SLIT("exp") + MO_F64_Sqrt -> ptext SLIT("sqrt") + MO_F32_Pwr -> ptext SLIT("powf") + MO_F32_Sin -> ptext SLIT("sinf") + MO_F32_Cos -> ptext SLIT("cosf") + MO_F32_Tan -> ptext SLIT("tanf") + MO_F32_Sinh -> ptext SLIT("sinhf") + MO_F32_Cosh -> ptext SLIT("coshf") + MO_F32_Tanh -> ptext SLIT("tanhf") + MO_F32_Asin -> ptext SLIT("asinf") + MO_F32_Acos -> ptext SLIT("acosf") + MO_F32_Atan -> ptext SLIT("atanf") + MO_F32_Log -> ptext SLIT("logf") + MO_F32_Exp -> ptext SLIT("expf") + MO_F32_Sqrt -> ptext SLIT("sqrtf") + +-- --------------------------------------------------------------------- +-- Useful #defines +-- + +mkJMP_, mkFN_, mkIF_ :: SDoc -> SDoc + +mkJMP_ i = ptext SLIT("JMP_") <> parens i +mkFN_ i = ptext SLIT("FN_") <> parens i -- externally visible function +mkIF_ i = ptext SLIT("IF_") <> parens i -- locally visible + + +mkFB_, mkFE_ :: SDoc +mkFB_ = ptext SLIT("FB_") -- function code begin +mkFE_ = ptext SLIT("FE_") -- function code end + +-- from includes/Stg.h +-- +mkC_,mkW_,mkP_,mkPP_,mkI_,mkA_,mkD_,mkF_,mkB_,mkL_,mkLI_,mkLW_ :: SDoc + +mkC_ = ptext SLIT("(C_)") -- StgChar +mkW_ = ptext SLIT("(W_)") -- StgWord +mkP_ = ptext SLIT("(P_)") -- StgWord* +mkPP_ = ptext SLIT("(PP_)") -- P_* +mkI_ = ptext SLIT("(I_)") -- StgInt +mkA_ = ptext SLIT("(A_)") -- StgAddr +mkD_ = ptext SLIT("(D_)") -- const StgWord* +mkF_ = ptext SLIT("(F_)") -- StgFunPtr +mkB_ = ptext SLIT("(B_)") -- StgByteArray +mkL_ = ptext SLIT("(L_)") -- StgClosurePtr + +mkLI_ = ptext SLIT("(LI_)") -- StgInt64 +mkLW_ = ptext SLIT("(LW_)") -- StgWord64 + + +-- --------------------------------------------------------------------- +-- +-- Assignments +-- +-- Generating assignments is what we're all about, here +-- +pprAssign :: CmmReg -> CmmExpr -> SDoc + +-- dest is a reg, rhs is a reg +pprAssign r1 (CmmReg r2) + | not (isStrangeTypeReg r1) && not (isStrangeTypeReg r2) + || isPtrReg r1 && isPtrReg r2 + = hcat [ pprAsPtrReg r1, equals, pprAsPtrReg r2, semi ] + +-- dest is a reg, rhs is a CmmRegOff +pprAssign r1 (CmmRegOff r2 off) + | not (isStrangeTypeReg r1) && not (isStrangeTypeReg r2) + || isPtrReg r1 && isPtrReg r2 + = hcat [ pprAsPtrReg r1, equals, pprAsPtrReg r2, op, int off', semi ] + where + off1 | isPtrReg r2 = off `shiftR` wordShift + | otherwise = off + + (op,off') | off >= 0 = (char '+', off1) + | otherwise = (char '-', -off1) + +-- dest is a reg, rhs is anything. +-- We can't cast the lvalue, so we have to cast the rhs if necessary. Casting +-- the lvalue elicits a warning from new GCC versions (3.4+). +pprAssign r1 r2 + | isPtrReg r1 + = pprAsPtrReg r1 <> ptext SLIT(" = ") <> mkP_ <> pprExpr1 r2 <> semi + | Just ty <- strangeRegType r1 + = pprReg r1 <> ptext SLIT(" = ") <> parens ty <> pprExpr1 r2 <> semi + | otherwise + = pprReg r1 <> ptext SLIT(" = ") <> pprExpr r2 <> semi + +-- --------------------------------------------------------------------- +-- Registers + +pprCastReg reg + | isStrangeTypeReg reg = mkW_ <> pprReg reg + | otherwise = pprReg reg + +-- True if the register has type StgPtr in C, otherwise it has an +-- integer type. We need to take care with pointer arithmetic on registers +-- with type StgPtr. +isPtrReg :: CmmReg -> Bool +isPtrReg (CmmLocal _) = False +isPtrReg (CmmGlobal r) = isPtrGlobalReg r + +isPtrGlobalReg :: GlobalReg -> Bool +isPtrGlobalReg (VanillaReg n) = True +isPtrGlobalReg Sp = True +isPtrGlobalReg Hp = True +isPtrGlobalReg HpLim = True +isPtrGlobalReg SpLim = True +isPtrGlobalReg _ = False + +-- True if in C this register doesn't have the type given by +-- (machRepCType (cmmRegRep reg)), so it has to be cast. +isStrangeTypeReg :: CmmReg -> Bool +isStrangeTypeReg (CmmLocal _) = False +isStrangeTypeReg (CmmGlobal g) = isStrangeTypeGlobal g + +isStrangeTypeGlobal :: GlobalReg -> Bool +isStrangeTypeGlobal CurrentTSO = True +isStrangeTypeGlobal CurrentNursery = True +isStrangeTypeGlobal BaseReg = True +isStrangeTypeGlobal r = isPtrGlobalReg r + +strangeRegType :: CmmReg -> Maybe SDoc +strangeRegType (CmmGlobal CurrentTSO) = Just (ptext SLIT("struct StgTSO_ *")) +strangeRegType (CmmGlobal CurrentNursery) = Just (ptext SLIT("struct bdescr_ *")) +strangeRegType (CmmGlobal BaseReg) = Just (ptext SLIT("struct StgRegTable_ *")) +strangeRegType _ = Nothing + +-- pprReg just prints the register name. +-- +pprReg :: CmmReg -> SDoc +pprReg r = case r of + CmmLocal local -> pprLocalReg local + CmmGlobal global -> pprGlobalReg global + +pprAsPtrReg :: CmmReg -> SDoc +pprAsPtrReg (CmmGlobal (VanillaReg n)) = char 'R' <> int n <> ptext SLIT(".p") +pprAsPtrReg other_reg = pprReg other_reg + +pprGlobalReg :: GlobalReg -> SDoc +pprGlobalReg gr = case gr of + VanillaReg n -> char 'R' <> int n <> ptext SLIT(".w") + FloatReg n -> char 'F' <> int n + DoubleReg n -> char 'D' <> int n + LongReg n -> char 'L' <> int n + Sp -> ptext SLIT("Sp") + SpLim -> ptext SLIT("SpLim") + Hp -> ptext SLIT("Hp") + HpLim -> ptext SLIT("HpLim") + CurrentTSO -> ptext SLIT("CurrentTSO") + CurrentNursery -> ptext SLIT("CurrentNursery") + HpAlloc -> ptext SLIT("HpAlloc") + BaseReg -> ptext SLIT("BaseReg") + GCEnter1 -> ptext SLIT("stg_gc_enter_1") + GCFun -> ptext SLIT("stg_gc_fun") + +pprLocalReg :: LocalReg -> SDoc +pprLocalReg (LocalReg uniq _rep) = char '_' <> ppr uniq + +-- ----------------------------------------------------------------------------- +-- Foreign Calls + +pprCall :: SDoc -> CCallConv -> [(CmmReg,MachHint)] -> [(CmmExpr,MachHint)] + -> Maybe [GlobalReg] -> SDoc + +pprCall ppr_fn cconv results args vols + | not (is_cish cconv) + = panic "pprCall: unknown calling convention" + + | otherwise + = save vols $$ + ptext SLIT("CALLER_SAVE_SYSTEM") $$ +#if x86_64_TARGET_ARCH + -- HACK around gcc optimisations. + -- x86_64 needs a __DISCARD__() here, to create a barrier between + -- putting the arguments into temporaries and passing the arguments + -- to the callee, because the argument expressions may refer to + -- machine registers that are also used for passing arguments in the + -- C calling convention. + (if (not opt_Unregisterised) + then ptext SLIT("__DISCARD__();") + else empty) $$ +#endif + ppr_assign results (ppr_fn <> parens (commafy (map pprArg args))) <> semi $$ + ptext SLIT("CALLER_RESTORE_SYSTEM") $$ + restore vols + where + ppr_assign [] rhs = rhs + ppr_assign [(reg@(CmmGlobal BaseReg), hint)] rhs + | Just ty <- strangeRegType reg + = ptext SLIT("ASSIGN_BaseReg") <> parens (parens ty <> rhs) + -- BaseReg is special, sometimes it isn't an lvalue and we + -- can't assign to it. + ppr_assign [(one,hint)] rhs + | Just ty <- strangeRegType one + = pprReg one <> ptext SLIT(" = ") <> parens ty <> rhs + | otherwise + = pprReg one <> ptext SLIT(" = ") + <> pprUnHint hint (cmmRegRep one) <> rhs + ppr_assign _other _rhs = panic "pprCall: multiple results" + + pprArg (expr, PtrHint) + = cCast (ptext SLIT("void *")) expr + -- see comment by machRepHintCType below + pprArg (expr, SignedHint) + = cCast (machRepSignedCType (cmmExprRep expr)) expr + pprArg (expr, _other) + = pprExpr expr + + pprUnHint PtrHint rep = parens (machRepCType rep) + pprUnHint SignedHint rep = parens (machRepCType rep) + pprUnHint _ _ = empty + + save = save_restore SLIT("CALLER_SAVE") + restore = save_restore SLIT("CALLER_RESTORE") + + -- Nothing says "I don't know what's live; save everything" + -- CALLER_SAVE_USER is defined in ghc/includes/Regs.h + save_restore txt Nothing = ptext txt <> ptext SLIT("_USER") + save_restore txt (Just these) = vcat (map saveRestoreGlobal these) + where saveRestoreGlobal r = ptext txt <> char '_' <> pprGlobalRegName r + +pprGlobalRegName :: GlobalReg -> SDoc +pprGlobalRegName gr = case gr of + VanillaReg n -> char 'R' <> int n -- without the .w suffix + _ -> pprGlobalReg gr + +-- Currently we only have these two calling conventions, but this might +-- change in the future... +is_cish CCallConv = True +is_cish StdCallConv = True + +-- --------------------------------------------------------------------- +-- Find and print local and external declarations for a list of +-- Cmm statements. +-- +pprTempAndExternDecls :: [CmmBasicBlock] -> (SDoc{-temps-}, SDoc{-externs-}) +pprTempAndExternDecls stmts + = (vcat (map pprTempDecl (eltsUFM temps)), + vcat (map (pprExternDecl False{-ToDo-}) (keysFM lbls))) + where (temps, lbls) = runTE (mapM_ te_BB stmts) + +pprDataExterns :: [CmmStatic] -> SDoc +pprDataExterns statics + = vcat (map (pprExternDecl False{-ToDo-}) (keysFM lbls)) + where (_, lbls) = runTE (mapM_ te_Static statics) + +pprTempDecl :: LocalReg -> SDoc +pprTempDecl l@(LocalReg _uniq rep) + = hcat [ machRepCType rep, space, pprLocalReg l, semi ] + +pprExternDecl :: Bool -> CLabel -> SDoc +pprExternDecl in_srt lbl + -- do not print anything for "known external" things + | not (needsCDecl lbl) = empty + | otherwise = + hcat [ visibility, label_type (labelType lbl), + lparen, dyn_wrapper (pprCLabel lbl), text ");" ] + where + dyn_wrapper d + | in_srt && labelDynamic lbl = text "DLL_IMPORT_DATA_VAR" <> parens d + | otherwise = d + + label_type CodeLabel = ptext SLIT("F_") + label_type DataLabel = ptext SLIT("I_") + + visibility + | externallyVisibleCLabel lbl = char 'E' + | otherwise = char 'I' + + +type TEState = (UniqSet LocalReg, FiniteMap CLabel ()) +newtype TE a = TE { unTE :: TEState -> (a, TEState) } + +instance Monad TE where + TE m >>= k = TE $ \s -> case m s of (a, s') -> unTE (k a) s' + return a = TE $ \s -> (a, s) + +te_lbl :: CLabel -> TE () +te_lbl lbl = TE $ \(temps,lbls) -> ((), (temps, addToFM lbls lbl ())) + +te_temp :: LocalReg -> TE () +te_temp r = TE $ \(temps,lbls) -> ((), (addOneToUniqSet temps r, lbls)) + +runTE :: TE () -> TEState +runTE (TE m) = snd (m (emptyUniqSet, emptyFM)) + +te_Static :: CmmStatic -> TE () +te_Static (CmmStaticLit lit) = te_Lit lit +te_Static _ = return () + +te_BB :: CmmBasicBlock -> TE () +te_BB (BasicBlock _ ss) = mapM_ te_Stmt ss + +te_Lit :: CmmLit -> TE () +te_Lit (CmmLabel l) = te_lbl l +te_Lit (CmmLabelOff l _) = te_lbl l +te_Lit (CmmLabelDiffOff l1 l2 _) = te_lbl l1 +te_Lit _ = return () + +te_Stmt :: CmmStmt -> TE () +te_Stmt (CmmAssign r e) = te_Reg r >> te_Expr e +te_Stmt (CmmStore l r) = te_Expr l >> te_Expr r +te_Stmt (CmmCall _ rs es _) = mapM_ (te_Reg.fst) rs >> + mapM_ (te_Expr.fst) es +te_Stmt (CmmCondBranch e _) = te_Expr e +te_Stmt (CmmSwitch e _) = te_Expr e +te_Stmt (CmmJump e _) = te_Expr e +te_Stmt _ = return () + +te_Expr :: CmmExpr -> TE () +te_Expr (CmmLit lit) = te_Lit lit +te_Expr (CmmLoad e _) = te_Expr e +te_Expr (CmmReg r) = te_Reg r +te_Expr (CmmMachOp _ es) = mapM_ te_Expr es +te_Expr (CmmRegOff r _) = te_Reg r + +te_Reg :: CmmReg -> TE () +te_Reg (CmmLocal l) = te_temp l +te_Reg _ = return () + + +-- --------------------------------------------------------------------- +-- C types for MachReps + +cCast :: SDoc -> CmmExpr -> SDoc +cCast ty expr = parens ty <> pprExpr1 expr + +-- This is for finding the types of foreign call arguments. For a pointer +-- argument, we always cast the argument to (void *), to avoid warnings from +-- the C compiler. +machRepHintCType :: MachRep -> MachHint -> SDoc +machRepHintCType rep PtrHint = ptext SLIT("void *") +machRepHintCType rep SignedHint = machRepSignedCType rep +machRepHintCType rep _other = machRepCType rep + +machRepPtrCType :: MachRep -> SDoc +machRepPtrCType r | r == wordRep = ptext SLIT("P_") + | otherwise = machRepCType r <> char '*' + +machRepCType :: MachRep -> SDoc +machRepCType r | r == wordRep = ptext SLIT("W_") + | otherwise = sized_type + where sized_type = case r of + I8 -> ptext SLIT("StgWord8") + I16 -> ptext SLIT("StgWord16") + I32 -> ptext SLIT("StgWord32") + I64 -> ptext SLIT("StgWord64") + F32 -> ptext SLIT("StgFloat") -- ToDo: correct? + F64 -> ptext SLIT("StgDouble") + _ -> panic "machRepCType" + +machRepSignedCType :: MachRep -> SDoc +machRepSignedCType r | r == wordRep = ptext SLIT("I_") + | otherwise = sized_type + where sized_type = case r of + I8 -> ptext SLIT("StgInt8") + I16 -> ptext SLIT("StgInt16") + I32 -> ptext SLIT("StgInt32") + I64 -> ptext SLIT("StgInt64") + F32 -> ptext SLIT("StgFloat") -- ToDo: correct? + F64 -> ptext SLIT("StgDouble") + _ -> panic "machRepCType" + +-- --------------------------------------------------------------------- +-- print strings as valid C strings + +pprStringInCStyle :: [Word8] -> SDoc +pprStringInCStyle s = doubleQuotes (text (concatMap charToC s)) + +charToC :: Word8 -> String +charToC w = + case chr (fromIntegral w) of + '\"' -> "\\\"" + '\'' -> "\\\'" + '\\' -> "\\\\" + c | c >= ' ' && c <= '~' -> [c] + | otherwise -> ['\\', + chr (ord '0' + ord c `div` 64), + chr (ord '0' + ord c `div` 8 `mod` 8), + chr (ord '0' + ord c `mod` 8)] + +-- --------------------------------------------------------------------------- +-- Initialising static objects with floating-point numbers. We can't +-- just emit the floating point number, because C will cast it to an int +-- by rounding it. We want the actual bit-representation of the float. + +-- This is a hack to turn the floating point numbers into ints that we +-- can safely initialise to static locations. + +big_doubles + | machRepByteWidth F64 == 2 * wORD_SIZE = True + | machRepByteWidth F64 == wORD_SIZE = False + | otherwise = panic "big_doubles" + +#if __GLASGOW_HASKELL__ >= 504 +newFloatArray :: (Int,Int) -> ST s (STUArray s Int Float) +newFloatArray = newArray_ + +newDoubleArray :: (Int,Int) -> ST s (STUArray s Int Double) +newDoubleArray = newArray_ + +castFloatToIntArray :: STUArray s Int Float -> ST s (STUArray s Int Int) +castFloatToIntArray = castSTUArray + +castDoubleToIntArray :: STUArray s Int Double -> ST s (STUArray s Int Int) +castDoubleToIntArray = castSTUArray + +writeFloatArray :: STUArray s Int Float -> Int -> Float -> ST s () +writeFloatArray = writeArray + +writeDoubleArray :: STUArray s Int Double -> Int -> Double -> ST s () +writeDoubleArray = writeArray + +readIntArray :: STUArray s Int Int -> Int -> ST s Int +readIntArray = readArray + +#else + +castFloatToIntArray :: MutableByteArray s t -> ST s (MutableByteArray s t) +castFloatToIntArray = return + +castDoubleToIntArray :: MutableByteArray s t -> ST s (MutableByteArray s t) +castDoubleToIntArray = return + +#endif + +-- floats are always 1 word +floatToWord :: Rational -> CmmLit +floatToWord r + = runST (do + arr <- newFloatArray ((0::Int),0) + writeFloatArray arr 0 (fromRational r) + arr' <- castFloatToIntArray arr + i <- readIntArray arr' 0 + return (CmmInt (toInteger i) wordRep) + ) + +doubleToWords :: Rational -> [CmmLit] +doubleToWords r + | big_doubles -- doubles are 2 words + = runST (do + arr <- newDoubleArray ((0::Int),1) + writeDoubleArray arr 0 (fromRational r) + arr' <- castDoubleToIntArray arr + i1 <- readIntArray arr' 0 + i2 <- readIntArray arr' 1 + return [ CmmInt (toInteger i1) wordRep + , CmmInt (toInteger i2) wordRep + ] + ) + | otherwise -- doubles are 1 word + = runST (do + arr <- newDoubleArray ((0::Int),0) + writeDoubleArray arr 0 (fromRational r) + arr' <- castDoubleToIntArray arr + i <- readIntArray arr' 0 + return [ CmmInt (toInteger i) wordRep ] + ) + +-- --------------------------------------------------------------------------- +-- Utils + +wordShift :: Int +wordShift = machRepLogWidth wordRep + +commafy :: [SDoc] -> SDoc +commafy xs = hsep $ punctuate comma xs + +-- Print in C hex format: 0x13fa +pprHexVal :: Integer -> MachRep -> SDoc +pprHexVal 0 _ = ptext SLIT("0x0") +pprHexVal w rep + | w < 0 = parens (char '-' <> ptext SLIT("0x") <> go (-w) <> repsuffix rep) + | otherwise = ptext SLIT("0x") <> go w <> repsuffix rep + where + -- type suffix for literals: + -- Integer literals are unsigned in Cmm/C. We explicitly cast to + -- signed values for doing signed operations, but at all other + -- times values are unsigned. This also helps eliminate occasional + -- warnings about integer overflow from gcc. + + -- on 32-bit platforms, add "ULL" to 64-bit literals + repsuffix I64 | wORD_SIZE == 4 = ptext SLIT("ULL") + -- on 64-bit platforms with 32-bit int, add "L" to 64-bit literals + repsuffix I64 | cINT_SIZE == 4 = ptext SLIT("UL") + repsuffix _ = char 'U' + + go 0 = empty + go w' = go q <> dig + where + (q,r) = w' `quotRem` 16 + dig | r < 10 = char (chr (fromInteger r + ord '0')) + | otherwise = char (chr (fromInteger r - 10 + ord 'a')) + diff --git a/compiler/cmm/PprCmm.hs b/compiler/cmm/PprCmm.hs new file mode 100644 index 0000000000..6e8367d662 --- /dev/null +++ b/compiler/cmm/PprCmm.hs @@ -0,0 +1,462 @@ +---------------------------------------------------------------------------- +-- +-- Pretty-printing of Cmm as (a superset of) C-- +-- +-- (c) The University of Glasgow 2004 +-- +----------------------------------------------------------------------------- + +-- +-- This is where we walk over Cmm emitting an external representation, +-- suitable for parsing, in a syntax strongly reminiscent of C--. This +-- is the "External Core" for the Cmm layer. +-- +-- As such, this should be a well-defined syntax: we want it to look nice. +-- Thus, we try wherever possible to use syntax defined in [1], +-- "The C-- Reference Manual", http://www.cminusminus.org/. We differ +-- slightly, in some cases. For one, we use I8 .. I64 for types, rather +-- than C--'s bits8 .. bits64. +-- +-- We try to ensure that all information available in the abstract +-- syntax is reproduced, or reproducible, in the concrete syntax. +-- Data that is not in printed out can be reconstructed according to +-- conventions used in the pretty printer. There are at least two such +-- cases: +-- 1) if a value has wordRep type, the type is not appended in the +-- output. +-- 2) MachOps that operate over wordRep type are printed in a +-- C-style, rather than as their internal MachRep name. +-- +-- These conventions produce much more readable Cmm output. +-- +-- A useful example pass over Cmm is in nativeGen/MachCodeGen.hs +-- + +module PprCmm ( + writeCmms, pprCmms, pprCmm, pprStmt, pprExpr + ) where + +#include "HsVersions.h" + +import Cmm +import CmmUtils ( isTrivialCmmExpr ) +import MachOp ( MachOp(..), pprMachOp, MachRep(..), wordRep ) +import CLabel ( pprCLabel, mkForeignLabel, entryLblToInfoLbl ) + +import ForeignCall ( CCallConv(..) ) +import Unique ( getUnique ) +import Outputable +import FastString ( mkFastString ) + +import Data.List ( intersperse, groupBy ) +import IO ( Handle ) +import Maybe ( isJust ) +import Data.Char ( chr ) + +pprCmms :: [Cmm] -> SDoc +pprCmms cmms = pprCode CStyle (vcat (intersperse separator $ map ppr cmms)) + where + separator = space $$ ptext SLIT("-------------------") $$ space + +writeCmms :: Handle -> [Cmm] -> IO () +writeCmms handle cmms = printForC handle (pprCmms cmms) + +----------------------------------------------------------------------------- + +instance Outputable Cmm where + ppr c = pprCmm c + +instance Outputable CmmTop where + ppr t = pprTop t + +instance Outputable CmmBasicBlock where + ppr b = pprBBlock b + +instance Outputable CmmStmt where + ppr s = pprStmt s + +instance Outputable CmmExpr where + ppr e = pprExpr e + +instance Outputable CmmReg where + ppr e = pprReg e + +instance Outputable GlobalReg where + ppr e = pprGlobalReg e + +----------------------------------------------------------------------------- + +pprCmm :: Cmm -> SDoc +pprCmm (Cmm tops) = vcat $ intersperse (text "") $ map pprTop tops + +-- -------------------------------------------------------------------------- +-- Top level `procedure' blocks. The info tables, if not null, are +-- printed in the style of C--'s 'stackdata' declaration, just inside +-- the proc body, and are labelled with the procedure name ++ "_info". +-- +pprTop :: CmmTop -> SDoc +pprTop (CmmProc info lbl params blocks ) + + = vcat [ pprCLabel lbl <> parens (commafy $ map pprLocalReg params) <+> lbrace + , nest 8 $ pprInfo info lbl + , nest 4 $ vcat (map ppr blocks) + , rbrace ] + + where + pprInfo [] _ = empty + pprInfo i label = + (hang (pprCLabel (entryLblToInfoLbl label) <+> lbrace ) + 4 $ vcat (map pprStatic i)) + $$ rbrace + +-- -------------------------------------------------------------------------- +-- We follow [1], 4.5 +-- +-- section "data" { ... } +-- +pprTop (CmmData section ds) = + (hang (pprSection section <+> lbrace) 4 (vcat (map pprStatic ds))) + $$ rbrace + + +-- -------------------------------------------------------------------------- +-- Basic blocks look like assembly blocks. +-- lbl: stmt ; stmt ; .. +pprBBlock :: CmmBasicBlock -> SDoc +pprBBlock (BasicBlock ident stmts) = + hang (pprBlockId ident <> colon) 4 (vcat (map ppr stmts)) + +-- -------------------------------------------------------------------------- +-- Statements. C-- usually, exceptions to this should be obvious. +-- +pprStmt :: CmmStmt -> SDoc +pprStmt stmt = case stmt of + + -- ; + CmmNop -> semi + + -- // text + CmmComment s -> text "//" <+> ftext s + + -- reg = expr; + CmmAssign reg expr -> ppr reg <+> equals <+> ppr expr <> semi + + -- rep[lv] = expr; + CmmStore lv expr -> rep <> brackets(ppr lv) <+> equals <+> ppr expr <> semi + where + rep = ppr ( cmmExprRep expr ) + + -- call "ccall" foo(x, y)[r1, r2]; + -- ToDo ppr volatile + CmmCall (CmmForeignCall fn cconv) results args _volatile -> + hcat [ ptext SLIT("call"), space, + doubleQuotes(ppr cconv), space, + target fn, parens ( commafy $ map ppr args ), + (if null results + then empty + else brackets( commafy $ map ppr results)), semi ] + where + target (CmmLit lit) = pprLit lit + target fn' = parens (ppr fn') + + CmmCall (CmmPrim op) results args volatile -> + pprStmt (CmmCall (CmmForeignCall (CmmLit lbl) CCallConv) + results args volatile) + where + lbl = CmmLabel (mkForeignLabel (mkFastString (show op)) Nothing False) + + CmmBranch ident -> genBranch ident + CmmCondBranch expr ident -> genCondBranch expr ident + CmmJump expr params -> genJump expr params + CmmSwitch arg ids -> genSwitch arg ids + +-- -------------------------------------------------------------------------- +-- goto local label. [1], section 6.6 +-- +-- goto lbl; +-- +genBranch :: BlockId -> SDoc +genBranch ident = + ptext SLIT("goto") <+> pprBlockId ident <> semi + +-- -------------------------------------------------------------------------- +-- Conditional. [1], section 6.4 +-- +-- if (expr) { goto lbl; } +-- +genCondBranch :: CmmExpr -> BlockId -> SDoc +genCondBranch expr ident = + hsep [ ptext SLIT("if") + , parens(ppr expr) + , ptext SLIT("goto") + , pprBlockId ident <> semi ] + +-- -------------------------------------------------------------------------- +-- A tail call. [1], Section 6.9 +-- +-- jump foo(a, b, c); +-- +genJump :: CmmExpr -> [LocalReg] -> SDoc +genJump expr actuals = + + hcat [ ptext SLIT("jump") + , space + , if isTrivialCmmExpr expr + then pprExpr expr + else case expr of + CmmLoad (CmmReg _) _ -> pprExpr expr + _ -> parens (pprExpr expr) + , pprActuals actuals + , semi ] + + where + pprActuals [] = empty + pprActuals as = parens ( commafy $ map pprLocalReg as ) + +-- -------------------------------------------------------------------------- +-- Tabled jump to local label +-- +-- The syntax is from [1], section 6.5 +-- +-- switch [0 .. n] (expr) { case ... ; } +-- +genSwitch :: CmmExpr -> [Maybe BlockId] -> SDoc +genSwitch expr maybe_ids + + = let pairs = groupBy snds (zip [0 .. ] maybe_ids ) + + in hang (hcat [ ptext SLIT("switch [0 .. ") + , int (length maybe_ids - 1) + , ptext SLIT("] ") + , if isTrivialCmmExpr expr + then pprExpr expr + else parens (pprExpr expr) + , ptext SLIT(" {") + ]) + 4 (vcat ( map caseify pairs )) $$ rbrace + + where + snds a b = (snd a) == (snd b) + + caseify :: [(Int,Maybe BlockId)] -> SDoc + caseify ixs@((i,Nothing):_) + = ptext SLIT("/* impossible: ") <> hcat (intersperse comma (map (int.fst) ixs)) + <> ptext SLIT(" */") + caseify as + = let (is,ids) = unzip as + in hsep [ ptext SLIT("case") + , hcat (punctuate comma (map int is)) + , ptext SLIT(": goto") + , pprBlockId (head [ id | Just id <- ids]) <> semi ] + +-- -------------------------------------------------------------------------- +-- Expressions +-- + +pprExpr :: CmmExpr -> SDoc +pprExpr e + = case e of + CmmRegOff reg i -> + pprExpr (CmmMachOp (MO_Add rep) + [CmmReg reg, CmmLit (CmmInt (fromIntegral i) rep)]) + where rep = cmmRegRep reg + CmmLit lit -> pprLit lit + _other -> pprExpr1 e + +-- Here's the precedence table from CmmParse.y: +-- %nonassoc '>=' '>' '<=' '<' '!=' '==' +-- %left '|' +-- %left '^' +-- %left '&' +-- %left '>>' '<<' +-- %left '-' '+' +-- %left '/' '*' '%' +-- %right '~' + +-- We just cope with the common operators for now, the rest will get +-- a default conservative behaviour. + +-- %nonassoc '>=' '>' '<=' '<' '!=' '==' +pprExpr1 (CmmMachOp op [x,y]) | Just doc <- infixMachOp1 op + = pprExpr7 x <+> doc <+> pprExpr7 y +pprExpr1 e = pprExpr7 e + +infixMachOp1 (MO_Eq _) = Just (ptext SLIT("==")) +infixMachOp1 (MO_Ne _) = Just (ptext SLIT("!=")) +infixMachOp1 (MO_Shl _) = Just (ptext SLIT("<<")) +infixMachOp1 (MO_U_Shr _) = Just (ptext SLIT(">>")) +infixMachOp1 (MO_U_Ge _) = Just (ptext SLIT(">=")) +infixMachOp1 (MO_U_Le _) = Just (ptext SLIT("<=")) +infixMachOp1 (MO_U_Gt _) = Just (char '>') +infixMachOp1 (MO_U_Lt _) = Just (char '<') +infixMachOp1 _ = Nothing + +-- %left '-' '+' +pprExpr7 (CmmMachOp op [x,y]) | Just doc <- infixMachOp7 op + = pprExpr7 x <+> doc <+> pprExpr8 y +pprExpr7 e = pprExpr8 e + +infixMachOp7 (MO_Add _) = Just (char '+') +infixMachOp7 (MO_Sub _) = Just (char '-') +infixMachOp7 _ = Nothing + +-- %left '/' '*' '%' +pprExpr8 (CmmMachOp op [x,y]) | Just doc <- infixMachOp8 op + = pprExpr8 x <+> doc <+> pprExpr9 y +pprExpr8 e = pprExpr9 e + +infixMachOp8 (MO_U_Quot _) = Just (char '/') +infixMachOp8 (MO_Mul _) = Just (char '*') +infixMachOp8 (MO_U_Rem _) = Just (char '%') +infixMachOp8 _ = Nothing + +pprExpr9 :: CmmExpr -> SDoc +pprExpr9 e = + case e of + CmmLit lit -> pprLit1 lit + CmmLoad expr rep -> ppr rep <> brackets( ppr expr ) + CmmReg reg -> ppr reg + CmmRegOff reg off -> parens (ppr reg <+> char '+' <+> int off) + CmmMachOp mop args -> genMachOp mop args + +genMachOp :: MachOp -> [CmmExpr] -> SDoc +genMachOp mop args + | Just doc <- infixMachOp mop = case args of + -- dyadic + [x,y] -> pprExpr9 x <+> doc <+> pprExpr9 y + + -- unary + [x] -> doc <> pprExpr9 x + + _ -> pprTrace "PprCmm.genMachOp: machop with strange number of args" + (pprMachOp mop <+> + parens (hcat $ punctuate comma (map pprExpr args))) + empty + + | isJust (infixMachOp1 mop) + || isJust (infixMachOp7 mop) + || isJust (infixMachOp8 mop) = parens (pprExpr (CmmMachOp mop args)) + + | otherwise = char '%' <> pprMachOp mop <> parens (commafy (map pprExpr args)) + +-- +-- Unsigned ops on the word size of the machine get nice symbols. +-- All else get dumped in their ugly format. +-- +infixMachOp :: MachOp -> Maybe SDoc +infixMachOp mop + = case mop of + MO_And _ -> Just $ char '&' + MO_Or _ -> Just $ char '|' + MO_Xor _ -> Just $ char '^' + MO_Not _ -> Just $ char '~' + MO_S_Neg _ -> Just $ char '-' -- there is no unsigned neg :) + _ -> Nothing + +-- -------------------------------------------------------------------------- +-- Literals. +-- To minimise line noise we adopt the convention that if the literal +-- has the natural machine word size, we do not append the type +-- +pprLit :: CmmLit -> SDoc +pprLit lit = case lit of + CmmInt i rep -> + hcat [ (if i < 0 then parens else id)(integer i) + , (if rep == wordRep + then empty + else space <> dcolon <+> ppr rep) ] + + CmmFloat f rep -> hsep [ rational f, dcolon, ppr rep ] + CmmLabel clbl -> pprCLabel clbl + CmmLabelOff clbl i -> pprCLabel clbl <> ppr_offset i + CmmLabelDiffOff clbl1 clbl2 i -> pprCLabel clbl1 <> char '-' + <> pprCLabel clbl2 <> ppr_offset i + +pprLit1 lit@(CmmLabelOff clbl i) = parens (pprLit lit) +pprLit1 lit = pprLit lit + +ppr_offset :: Int -> SDoc +ppr_offset i + | i==0 = empty + | i>=0 = char '+' <> int i + | otherwise = char '-' <> int (-i) + +-- -------------------------------------------------------------------------- +-- Static data. +-- Strings are printed as C strings, and we print them as I8[], +-- following C-- +-- +pprStatic :: CmmStatic -> SDoc +pprStatic s = case s of + CmmStaticLit lit -> nest 4 $ ptext SLIT("const") <+> pprLit lit <> semi + CmmUninitialised i -> nest 4 $ text "I8" <> brackets (int i) + CmmAlign i -> nest 4 $ text "align" <+> int i + CmmDataLabel clbl -> pprCLabel clbl <> colon + CmmString s' -> nest 4 $ text "I8[]" <+> + doubleQuotes (text (map (chr.fromIntegral) s')) + +-- -------------------------------------------------------------------------- +-- Registers, whether local (temps) or global +-- +pprReg :: CmmReg -> SDoc +pprReg r + = case r of + CmmLocal local -> pprLocalReg local + CmmGlobal global -> pprGlobalReg global + +-- +-- We only print the type of the local reg if it isn't wordRep +-- +pprLocalReg :: LocalReg -> SDoc +pprLocalReg (LocalReg uniq rep) + = hcat [ char '_', ppr uniq, + (if rep == wordRep + then empty else dcolon <> ppr rep) ] + +-- needs to be kept in syn with Cmm.hs.GlobalReg +-- +pprGlobalReg :: GlobalReg -> SDoc +pprGlobalReg gr + = case gr of + VanillaReg n -> char 'R' <> int n + FloatReg n -> char 'F' <> int n + DoubleReg n -> char 'D' <> int n + LongReg n -> char 'L' <> int n + Sp -> ptext SLIT("Sp") + SpLim -> ptext SLIT("SpLim") + Hp -> ptext SLIT("Hp") + HpLim -> ptext SLIT("HpLim") + CurrentTSO -> ptext SLIT("CurrentTSO") + CurrentNursery -> ptext SLIT("CurrentNursery") + HpAlloc -> ptext SLIT("HpAlloc") + GCEnter1 -> ptext SLIT("stg_gc_enter_1") + GCFun -> ptext SLIT("stg_gc_fun") + BaseReg -> ptext SLIT("BaseReg") + PicBaseReg -> ptext SLIT("PicBaseReg") + +-- -------------------------------------------------------------------------- +-- data sections +-- +pprSection :: Section -> SDoc +pprSection s = case s of + Text -> section <+> doubleQuotes (ptext SLIT("text")) + Data -> section <+> doubleQuotes (ptext SLIT("data")) + ReadOnlyData -> section <+> doubleQuotes (ptext SLIT("readonly")) + RelocatableReadOnlyData + -> section <+> doubleQuotes (ptext SLIT("relreadonly")) + UninitialisedData -> section <+> doubleQuotes (ptext SLIT("uninitialised")) + OtherSection s' -> section <+> doubleQuotes (text s') + where + section = ptext SLIT("section") + +-- -------------------------------------------------------------------------- +-- Basic block ids +-- +pprBlockId :: BlockId -> SDoc +pprBlockId b = ppr $ getUnique b + +----------------------------------------------------------------------------- + +commafy :: [SDoc] -> SDoc +commafy xs = hsep $ punctuate comma xs + |