diff options
author | dias@eecs.harvard.edu <unknown> | 2008-08-14 12:40:27 +0000 |
---|---|---|
committer | dias@eecs.harvard.edu <unknown> | 2008-08-14 12:40:27 +0000 |
commit | 176fa33f17dd78355cc572e006d2ab26898e2c69 (patch) | |
tree | 54f951a515eac57626f8f15d57f7bc75f1096a7a /compiler/cmm/CmmExpr.hs | |
parent | e06951a75a1f519e8f015880c363a8dedc08ff9c (diff) | |
download | haskell-176fa33f17dd78355cc572e006d2ab26898e2c69.tar.gz |
Merging in the new codegen branch
This merge does not turn on the new codegen (which only compiles
a select few programs at this point),
but it does introduce some changes to the old code generator.
The high bits:
1. The Rep Swamp patch is finally here.
The highlight is that the representation of types at the
machine level has changed.
Consequently, this patch contains updates across several back ends.
2. The new Stg -> Cmm path is here, although it appears to have a
fair number of bugs lurking.
3. Many improvements along the CmmCPSZ path, including:
o stack layout
o some code for infotables, half of which is right and half wrong
o proc-point splitting
Diffstat (limited to 'compiler/cmm/CmmExpr.hs')
-rw-r--r-- | compiler/cmm/CmmExpr.hs | 991 |
1 files changed, 894 insertions, 97 deletions
diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs index 69a4952ed6..5893843a20 100644 --- a/compiler/cmm/CmmExpr.hs +++ b/compiler/cmm/CmmExpr.hs @@ -1,22 +1,56 @@ module CmmExpr - ( CmmExpr(..), cmmExprRep, maybeInvertCmmExpr - , CmmReg(..), cmmRegRep - , CmmLit(..), cmmLitRep - , LocalReg(..), localRegRep, localRegGCFollow, GCKind(..) - , GlobalReg(..), globalRegRep, spReg, hpReg, spLimReg, nodeReg, node + ( CmmType -- Abstract + , b8, b16, b32, b64, f32, f64, bWord, bHalfWord, gcWord + , cInt, cLong + , cmmBits, cmmFloat + , typeWidth, cmmEqType, cmmEqType_ignoring_ptrhood + , isFloatType, isGcPtrType, isWord32, isWord64, isFloat64, isFloat32 + + , Width(..) + , widthInBits, widthInBytes, widthInLog + , wordWidth, halfWordWidth, cIntWidth, cLongWidth + + , CmmExpr(..), cmmExprType, cmmExprWidth, maybeInvertCmmExpr + , CmmReg(..), cmmRegType + , CmmLit(..), cmmLitType + , LocalReg(..), localRegType + , GlobalReg(..), globalRegType, spReg, hpReg, spLimReg, nodeReg, node + , VGcPtr(..), vgcFlag -- Temporary! , DefinerOfLocalRegs, UserOfLocalRegs, foldRegsDefd, foldRegsUsed, filterRegsUsed + , DefinerOfSlots, UserOfSlots, foldSlotsDefd, foldSlotsUsed , RegSet, emptyRegSet, elemRegSet, extendRegSet, deleteFromRegSet, mkRegSet , plusRegSet, minusRegSet, timesRegSet - , Area(..), StackSlotMap, getSlot, mkCallArea, outgoingSlot, areaId, areaSize - ) where + , Area(..), AreaId(..), SubArea, StackSlotMap, getSlot + + -- MachOp + , MachOp(..) + , pprMachOp, isCommutableMachOp, isAssociativeMachOp + , isComparisonMachOp, machOpResultType + , machOpArgReps, maybeInvertComparison + + -- MachOp builders + , mo_wordAdd, mo_wordSub, mo_wordEq, mo_wordNe,mo_wordMul, mo_wordSQuot + , mo_wordSRem, mo_wordSNeg, mo_wordUQuot, mo_wordURem + , mo_wordSGe, mo_wordSLe, mo_wordSGt, mo_wordSLt, mo_wordUGe + , mo_wordULe, mo_wordUGt, mo_wordULt + , mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot, mo_wordShl, mo_wordSShr, mo_wordUShr + , mo_u_8To32, mo_s_8To32, mo_u_16To32, mo_s_16To32 + , mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord, mo_u_32ToWord, mo_s_32ToWord + , mo_32To8, mo_32To16, mo_WordTo8, mo_WordTo16, mo_WordTo32 + ) +where + +#include "HsVersions.h" import BlockId import CLabel +import Constants +import FastString import FiniteMap -import MachOp import Maybes import Monad +import Outputable import Panic import Unique import UniqSet @@ -28,16 +62,24 @@ import UniqSet data CmmExpr = CmmLit CmmLit -- Literal - | CmmLoad CmmExpr MachRep -- Read memory location + | CmmLoad CmmExpr CmmType -- Read memory location | CmmReg CmmReg -- Contents of register | CmmMachOp MachOp [CmmExpr] -- Machine operation (+, -, *, etc.) + | CmmStackSlot Area Int -- addressing expression of a stack slot | CmmRegOff CmmReg Int -- CmmRegOff reg i -- ** is shorthand only, meaning ** -- CmmMachOp (MO_S_Add rep (CmmReg reg) (CmmLit (CmmInt i rep))) - -- where rep = cmmRegRep reg - | CmmStackSlot Area Int - deriving Eq + -- where rep = cmmRegType reg + +instance Eq CmmExpr where -- Equality ignores the types + CmmLit l1 == CmmLit l2 = l1==l2 + CmmLoad e1 _ == CmmLoad e2 _ = e1==e2 + CmmReg r1 == CmmReg r2 = r1==r2 + CmmRegOff r1 i1 == CmmRegOff r2 i2 = r1==r2 && i1==i2 + CmmMachOp op1 es1 == CmmMachOp op2 es2 = op1==op2 && es1==es2 + CmmStackSlot a1 i1 == CmmStackSlot a2 i2 = a1==a2 && i1==i2 + _e1 == _e2 = False data CmmReg = CmmLocal LocalReg @@ -48,17 +90,24 @@ data CmmReg -- or the stack space where function arguments and results are passed. data Area = RegSlot LocalReg - | CallArea BlockId Int Int + | CallArea AreaId deriving (Eq, Ord) +data AreaId + = Old -- entry parameters, jumps, and returns share one call area at old end of stack + | Young BlockId + deriving (Eq, Ord) + +type SubArea = (Area, Int, Int) -- area, offset, width + data CmmLit - = CmmInt Integer MachRep + = CmmInt Integer Width -- Interpretation: the 2's complement representation of the value -- is truncated to the specified size. This is easier than trying -- to keep the value within range, because we don't know whether - -- it will be used as a signed or unsigned value (the MachRep doesn't + -- it will be used as a signed or unsigned value (the CmmType doesn't -- distinguish between signed & unsigned). - | CmmFloat Rational MachRep + | CmmFloat Rational Width | CmmLabel CLabel -- Address of label | CmmLabelOff CLabel Int -- Address of label + byte offset @@ -72,14 +121,27 @@ data CmmLit | CmmLabelDiffOff CLabel CLabel Int -- label1 - label2 + offset deriving Eq -instance Eq LocalReg where - (LocalReg u1 _ _) == (LocalReg u2 _ _) = u1 == u2 +cmmExprType :: CmmExpr -> CmmType +cmmExprType (CmmLit lit) = cmmLitType lit +cmmExprType (CmmLoad _ rep) = rep +cmmExprType (CmmReg reg) = cmmRegType reg +cmmExprType (CmmMachOp op args) = machOpResultType op (map cmmExprType args) +cmmExprType (CmmRegOff reg _) = cmmRegType reg +cmmExprType (CmmStackSlot _ _) = bWord -- an address -instance Ord LocalReg where - compare (LocalReg u1 _ _) (LocalReg u2 _ _) = compare u1 u2 +cmmLitType :: CmmLit -> CmmType +cmmLitType (CmmInt _ width) = cmmBits width +cmmLitType (CmmFloat _ width) = cmmFloat width +cmmLitType (CmmLabel lbl) = cmmLabelType lbl +cmmLitType (CmmLabelOff lbl _) = cmmLabelType lbl +cmmLitType (CmmLabelDiffOff {}) = bWord -instance Uniquable LocalReg where - getUnique (LocalReg uniq _ _) = uniq +cmmLabelType :: CLabel -> CmmType +cmmLabelType lbl | isGcPtrLabel lbl = gcWord + | otherwise = bWord + +cmmExprWidth :: CmmExpr -> Width +cmmExprWidth e = typeWidth (cmmExprType e) -------- --- Negation for conditional branches @@ -93,17 +155,33 @@ maybeInvertCmmExpr _ = Nothing -- Local registers ----------------------------------------------------------------------------- --- | Whether a 'LocalReg' is a GC followable pointer -data GCKind = GCKindPtr | GCKindNonPtr deriving (Eq) - data LocalReg - = LocalReg !Unique MachRep GCKind + = LocalReg !Unique CmmType -- ^ Parameters: -- 1. Identifier -- 2. Type - -- 3. Should the GC follow as a pointer --- Sets of local registers +instance Eq LocalReg where + (LocalReg u1 _) == (LocalReg u2 _) = u1 == u2 + +instance Ord LocalReg where + compare (LocalReg u1 _) (LocalReg u2 _) = compare u1 u2 + +instance Uniquable LocalReg where + getUnique (LocalReg uniq _) = uniq + +cmmRegType :: CmmReg -> CmmType +cmmRegType (CmmLocal reg) = localRegType reg +cmmRegType (CmmGlobal reg) = globalRegType reg + +localRegType :: LocalReg -> CmmType +localRegType (LocalReg _ rep) = rep + +----------------------------------------------------------------------------- +-- Register-use information for expressions and other types +----------------------------------------------------------------------------- + +-- | Sets of local registers type RegSet = UniqSet LocalReg emptyRegSet :: RegSet elemRegSet :: LocalReg -> RegSet -> Bool @@ -121,45 +199,6 @@ minusRegSet = minusUniqSet plusRegSet = unionUniqSets timesRegSet = intersectUniqSets ------------------------------------------------------------------------------ --- Stack slots ------------------------------------------------------------------------------ - -mkVarSlot :: LocalReg -> CmmExpr -mkVarSlot r = CmmStackSlot (RegSlot r) 0 - --- Usually, we either want to lookup a variable's spill slot in an environment --- or else allocate it and add it to the environment. --- For a variable, we just need a single area of the appropriate size. -type StackSlotMap = FiniteMap LocalReg CmmExpr -getSlot :: StackSlotMap -> LocalReg -> (StackSlotMap, CmmExpr) -getSlot map r = case lookupFM map r of - Just s -> (map, s) - Nothing -> (addToFM map r s, s) where s = mkVarSlot r - --- Eventually, we'll want something proper that takes arguments and formals --- and gives you back the calling convention code, as well as the stack area. -mkCallArea :: BlockId -> [a] -> Maybe [b] -> Area -mkCallArea id as fs = CallArea id (length as) (liftM length fs `orElse` 0) - --- Return the last slot in the outgoing parameter area. -outgoingSlot :: Area -> CmmExpr -outgoingSlot a@(RegSlot _) = CmmStackSlot a 0 -outgoingSlot a@(CallArea _ outN _) = CmmStackSlot a outN - -areaId :: Area -> BlockId -areaId (RegSlot _) = panic "Register stack slots don't have IDs!" -areaId (CallArea id _ _) = id - -areaSize :: Area -> Int -areaSize (RegSlot _) = 1 -areaSize (CallArea _ outN inN) = max outN inN - - ------------------------------------------------------------------------------ --- Register-use information for expressions and other types ------------------------------------------------------------------------------ - class UserOfLocalRegs a where foldRegsUsed :: (b -> LocalReg -> b) -> b -> a -> b @@ -205,46 +244,69 @@ instance DefinerOfLocalRegs a => DefinerOfLocalRegs [a] where foldRegsDefd _ set [] = set foldRegsDefd f set (x:xs) = foldRegsDefd f (foldRegsDefd f set x) xs + ----------------------------------------------------------------------------- --- MachRep +-- Stack slots ----------------------------------------------------------------------------- +mkVarSlot :: LocalReg -> CmmExpr +mkVarSlot r = CmmStackSlot (RegSlot r) 0 +-- Usually, we either want to lookup a variable's spill slot in an environment +-- or else allocate it and add it to the environment. +-- For a variable, we just need a single area of the appropriate size. +type StackSlotMap = FiniteMap LocalReg CmmExpr +getSlot :: StackSlotMap -> LocalReg -> (StackSlotMap, CmmExpr) +getSlot map r = case lookupFM map r of + Just s -> (map, s) + Nothing -> (addToFM map r s, s) where s = mkVarSlot r -cmmExprRep :: CmmExpr -> MachRep -cmmExprRep (CmmLit lit) = cmmLitRep lit -cmmExprRep (CmmLoad _ rep) = rep -cmmExprRep (CmmReg reg) = cmmRegRep reg -cmmExprRep (CmmMachOp op _) = resultRepOfMachOp op -cmmExprRep (CmmRegOff reg _) = cmmRegRep reg -cmmExprRep (CmmStackSlot _ _) = wordRep +----------------------------------------------------------------------------- +-- Stack slot use information for expressions and other types [_$_] +----------------------------------------------------------------------------- -cmmRegRep :: CmmReg -> MachRep -cmmRegRep (CmmLocal reg) = localRegRep reg -cmmRegRep (CmmGlobal reg) = globalRegRep reg -localRegRep :: LocalReg -> MachRep -localRegRep (LocalReg _ rep _) = rep +-- Fold over the area, the offset into the area, and the width of the subarea. +class UserOfSlots a where + foldSlotsUsed :: (b -> SubArea -> b) -> b -> a -> b +class DefinerOfSlots a where + foldSlotsDefd :: (b -> SubArea -> b) -> b -> a -> b -localRegGCFollow :: LocalReg -> GCKind -localRegGCFollow (LocalReg _ _ p) = p +instance UserOfSlots CmmExpr where + foldSlotsUsed f z e = expr z e + where expr z (CmmLit _) = z + expr z (CmmLoad (CmmStackSlot a i) ty) = f z (a, i, widthInBytes $ typeWidth ty) + expr z (CmmLoad addr _) = foldSlotsUsed f z addr + expr z (CmmReg _) = z + expr z (CmmMachOp _ exprs) = foldSlotsUsed f z exprs + expr z (CmmRegOff _ _) = z + expr z (CmmStackSlot _ _) = z + +instance UserOfSlots a => UserOfSlots [a] where + foldSlotsUsed _ set [] = set + foldSlotsUsed f set (x:xs) = foldSlotsUsed f (foldSlotsUsed f set x) xs + + +----------------------------------------------------------------------------- +-- Global STG registers +----------------------------------------------------------------------------- -cmmLitRep :: CmmLit -> MachRep -cmmLitRep (CmmInt _ rep) = rep -cmmLitRep (CmmFloat _ rep) = rep -cmmLitRep (CmmLabel _) = wordRep -cmmLitRep (CmmLabelOff _ _) = wordRep -cmmLitRep (CmmLabelDiffOff _ _ _) = wordRep +data VGcPtr = VGcPtr | VNonGcPtr deriving( Eq, Show ) + -- TEMPORARY!!! ----------------------------------------------------------------------------- -- Global STG registers ----------------------------------------------------------------------------- +vgcFlag :: CmmType -> VGcPtr +vgcFlag ty | isGcPtrType ty = VGcPtr + | otherwise = VNonGcPtr data GlobalReg -- Argument and return registers = VanillaReg -- pointers, unboxed ints and chars {-# UNPACK #-} !Int -- its number + VGcPtr | FloatReg -- single-precision floating-point registers {-# UNPACK #-} !Int -- its number @@ -282,7 +344,71 @@ data GlobalReg -- from platform to platform (see module PositionIndependentCode). | PicBaseReg - deriving( Eq, Ord, Show ) + deriving( Show ) + +instance Eq GlobalReg where + VanillaReg i _ == VanillaReg j _ = i==j -- Ignore type when seeking clashes + FloatReg i == FloatReg j = i==j + DoubleReg i == DoubleReg j = i==j + LongReg i == LongReg j = i==j + Sp == Sp = True + SpLim == SpLim = True + Hp == Hp = True + HpLim == HpLim = True + CurrentTSO == CurrentTSO = True + CurrentNursery == CurrentNursery = True + HpAlloc == HpAlloc = True + GCEnter1 == GCEnter1 = True + GCFun == GCFun = True + BaseReg == BaseReg = True + PicBaseReg == PicBaseReg = True + _r1 == _r2 = False + +instance Ord GlobalReg where + compare (VanillaReg i _) (VanillaReg j _) = compare i j + -- Ignore type when seeking clashes + compare (FloatReg i) (FloatReg j) = compare i j + compare (DoubleReg i) (DoubleReg j) = compare i j + compare (LongReg i) (LongReg j) = compare i j + compare Sp Sp = EQ + compare SpLim SpLim = EQ + compare Hp Hp = EQ + compare HpLim HpLim = EQ + compare CurrentTSO CurrentTSO = EQ + compare CurrentNursery CurrentNursery = EQ + compare HpAlloc HpAlloc = EQ + compare GCEnter1 GCEnter1 = EQ + compare GCFun GCFun = EQ + compare BaseReg BaseReg = EQ + compare PicBaseReg PicBaseReg = EQ + compare (VanillaReg _ _) _ = LT + compare _ (VanillaReg _ _) = GT + compare (FloatReg _) _ = LT + compare _ (FloatReg _) = GT + compare (DoubleReg _) _ = LT + compare _ (DoubleReg _) = GT + compare (LongReg _) _ = LT + compare _ (LongReg _) = GT + compare Sp _ = LT + compare _ Sp = GT + compare SpLim _ = LT + compare _ SpLim = GT + compare Hp _ = LT + compare _ Hp = GT + compare HpLim _ = LT + compare _ HpLim = GT + compare CurrentTSO _ = LT + compare _ CurrentTSO = GT + compare CurrentNursery _ = LT + compare _ CurrentNursery = GT + compare HpAlloc _ = LT + compare _ HpAlloc = GT + compare GCEnter1 _ = LT + compare _ GCEnter1 = GT + compare GCFun _ = LT + compare _ GCFun = GT + compare BaseReg _ = LT + compare _ BaseReg = GT -- convenient aliases spReg, hpReg, spLimReg, nodeReg :: CmmReg @@ -292,11 +418,682 @@ spLimReg = CmmGlobal SpLim nodeReg = CmmGlobal node node :: GlobalReg -node = VanillaReg 1 - -globalRegRep :: GlobalReg -> MachRep -globalRegRep (VanillaReg _) = wordRep -globalRegRep (FloatReg _) = F32 -globalRegRep (DoubleReg _) = F64 -globalRegRep (LongReg _) = I64 -globalRegRep _ = wordRep +node = VanillaReg 1 VGcPtr + +globalRegType :: GlobalReg -> CmmType +globalRegType (VanillaReg _ VGcPtr) = gcWord +globalRegType (VanillaReg _ VNonGcPtr) = bWord +globalRegType (FloatReg _) = cmmFloat W32 +globalRegType (DoubleReg _) = cmmFloat W64 +globalRegType (LongReg _) = cmmBits W64 +globalRegType Hp = gcWord -- The initialiser for all + -- dynamically allocated closures +globalRegType _ = bWord + + +----------------------------------------------------------------------------- +-- CmmType +----------------------------------------------------------------------------- + + -- NOTE: CmmType is an abstract type, not exported from this + -- module so you can easily change its representation + -- + -- However Width is exported in a concrete way, + -- and is used extensively in pattern-matching + +data CmmType -- The important one! + = CmmType CmmCat Width + +data CmmCat -- "Category" (not exported) + = GcPtrCat -- GC pointer + | BitsCat -- Non-pointer + | FloatCat -- Float + deriving( Eq ) + -- See Note [Signed vs unsigned] at the end + +instance Outputable CmmType where + ppr (CmmType cat wid) = ppr cat <> ppr (widthInBits wid) + +instance Outputable CmmCat where + ppr FloatCat = ptext $ sLit("F") + ppr _ = ptext $ sLit("I") +-- Temp Jan 08 +-- ppr FloatCat = ptext $ sLit("float") +-- ppr BitsCat = ptext $ sLit("bits") +-- ppr GcPtrCat = ptext $ sLit("gcptr") + +-- Why is CmmType stratified? For native code generation, +-- most of the time you just want to know what sort of register +-- to put the thing in, and for this you need to know how +-- many bits thing has and whether it goes in a floating-point +-- register. By contrast, the distinction between GcPtr and +-- GcNonPtr is of interest to only a few parts of the code generator. + +-------- Equality on CmmType -------------- +-- CmmType is *not* an instance of Eq; sometimes we care about the +-- Gc/NonGc distinction, and sometimes we don't +-- So we use an explicit function to force you to think about it +cmmEqType :: CmmType -> CmmType -> Bool -- Exact equality +cmmEqType (CmmType c1 w1) (CmmType c2 w2) = c1==c2 && w1==w2 + +cmmEqType_ignoring_ptrhood :: CmmType -> CmmType -> Bool + -- This equality is temporary; used in CmmLint + -- but the RTS files are not yet well-typed wrt pointers +cmmEqType_ignoring_ptrhood (CmmType c1 w1) (CmmType c2 w2) + = c1 `weak_eq` c2 && w1==w2 + where + FloatCat `weak_eq` FloatCat = True + FloatCat `weak_eq` _other = False + _other `weak_eq` FloatCat = False + _word1 `weak_eq` _word2 = True -- Ignores GcPtr + +--- Simple operations on CmmType ----- +typeWidth :: CmmType -> Width +typeWidth (CmmType _ w) = w + +cmmBits, cmmFloat :: Width -> CmmType +cmmBits = CmmType BitsCat +cmmFloat = CmmType FloatCat + +-------- Common CmmTypes ------------ +-- Floats and words of specific widths +b8, b16, b32, b64, f32, f64 :: CmmType +b8 = cmmBits W8 +b16 = cmmBits W16 +b32 = cmmBits W32 +b64 = cmmBits W64 +f32 = cmmFloat W32 +f64 = cmmFloat W64 + +-- CmmTypes of native word widths +bWord, bHalfWord, gcWord :: CmmType +bWord = cmmBits wordWidth +bHalfWord = cmmBits halfWordWidth +gcWord = CmmType GcPtrCat wordWidth + +cInt, cLong :: CmmType +cInt = cmmBits cIntWidth +cLong = cmmBits cLongWidth + + +------------ Predicates ---------------- +isFloatType, isGcPtrType :: CmmType -> Bool +isFloatType (CmmType FloatCat _) = True +isFloatType _other = False + +isGcPtrType (CmmType GcPtrCat _) = True +isGcPtrType _other = False + +isWord32, isWord64, isFloat32, isFloat64 :: CmmType -> Bool +-- isWord64 is true of 64-bit non-floats (both gc-ptrs and otherwise) +-- isFloat32 and 64 are obvious + +isWord64 (CmmType BitsCat W64) = True +isWord64 (CmmType GcPtrCat W64) = True +isWord64 _other = False + +isWord32 (CmmType BitsCat W32) = True +isWord32 (CmmType GcPtrCat W32) = True +isWord32 _other = False + +isFloat32 (CmmType FloatCat W32) = True +isFloat32 _other = False + +isFloat64 (CmmType FloatCat W64) = True +isFloat64 _other = False + +----------------------------------------------------------------------------- +-- Width +----------------------------------------------------------------------------- + +data Width = W8 | W16 | W32 | W64 + | W80 -- Extended double-precision float, + -- used in x86 native codegen only. + -- (we use Ord, so it'd better be in this order) + | W128 + deriving (Eq, Ord, Show) + +instance Outputable Width where + ppr rep = ptext (mrStr rep) + +mrStr :: Width -> LitString +mrStr W8 = sLit("W8") +mrStr W16 = sLit("W16") +mrStr W32 = sLit("W32") +mrStr W64 = sLit("W64") +mrStr W128 = sLit("W128") +mrStr W80 = sLit("W80") + + +-------- Common Widths ------------ +wordWidth, halfWordWidth :: Width +wordWidth | wORD_SIZE == 4 = W32 + | wORD_SIZE == 8 = W64 + | otherwise = panic "MachOp.wordRep: Unknown word size" + +halfWordWidth | wORD_SIZE == 4 = W16 + | wORD_SIZE == 8 = W32 + | otherwise = panic "MachOp.halfWordRep: Unknown word size" + +-- cIntRep is the Width for a C-language 'int' +cIntWidth, cLongWidth :: Width +#if SIZEOF_INT == 4 +cIntWidth = W32 +#elif SIZEOF_INT == 8 +cIntWidth = W64 +#endif + +#if SIZEOF_LONG == 4 +cLongWidth = W32 +#elif SIZEOF_LONG == 8 +cLongWidth = W64 +#endif + +widthInBits :: Width -> Int +widthInBits W8 = 8 +widthInBits W16 = 16 +widthInBits W32 = 32 +widthInBits W64 = 64 +widthInBits W128 = 128 +widthInBits W80 = 80 + +widthInBytes :: Width -> Int +widthInBytes W8 = 1 +widthInBytes W16 = 2 +widthInBytes W32 = 4 +widthInBytes W64 = 8 +widthInBytes W128 = 16 +widthInBytes W80 = 10 + +-- log_2 of the width in bytes, useful for generating shifts. +widthInLog :: Width -> Int +widthInLog W8 = 0 +widthInLog W16 = 1 +widthInLog W32 = 2 +widthInLog W64 = 3 +widthInLog W128 = 4 +widthInLog W80 = panic "widthInLog: F80" + + +----------------------------------------------------------------------------- +-- MachOp +----------------------------------------------------------------------------- + +{- +Implementation notes: + +It might suffice to keep just a width, without distinguishing between +floating and integer types. However, keeping the distinction will +help the native code generator to assign registers more easily. +-} + + +{- | +Machine-level primops; ones which we can reasonably delegate to the +native code generators to handle. Basically contains C's primops +and no others. + +Nomenclature: all ops indicate width and signedness, where +appropriate. Widths: 8\/16\/32\/64 means the given size, obviously. +Nat means the operation works on STG word sized objects. +Signedness: S means signed, U means unsigned. For operations where +signedness is irrelevant or makes no difference (for example +integer add), the signedness component is omitted. + +An exception: NatP is a ptr-typed native word. From the point of +view of the native code generators this distinction is irrelevant, +but the C code generator sometimes needs this info to emit the +right casts. +-} + +data MachOp + -- Integer operations (insensitive to signed/unsigned) + = MO_Add Width + | MO_Sub Width + | MO_Eq Width + | MO_Ne Width + | MO_Mul Width -- low word of multiply + + -- Signed multiply/divide + | MO_S_MulMayOflo Width -- nonzero if signed multiply overflows + | MO_S_Quot Width -- signed / (same semantics as IntQuotOp) + | MO_S_Rem Width -- signed % (same semantics as IntRemOp) + | MO_S_Neg Width -- unary - + + -- Unsigned multiply/divide + | MO_U_MulMayOflo Width -- nonzero if unsigned multiply overflows + | MO_U_Quot Width -- unsigned / (same semantics as WordQuotOp) + | MO_U_Rem Width -- unsigned % (same semantics as WordRemOp) + + -- Signed comparisons + | MO_S_Ge Width + | MO_S_Le Width + | MO_S_Gt Width + | MO_S_Lt Width + + -- Unsigned comparisons + | MO_U_Ge Width + | MO_U_Le Width + | MO_U_Gt Width + | MO_U_Lt Width + + -- Floating point arithmetic + | MO_F_Add Width + | MO_F_Sub Width + | MO_F_Neg Width -- unary - + | MO_F_Mul Width + | MO_F_Quot Width + + -- Floating point comparison + | MO_F_Eq Width + | MO_F_Ne Width + | MO_F_Ge Width + | MO_F_Le Width + | MO_F_Gt Width + | MO_F_Lt Width + + -- Bitwise operations. Not all of these may be supported + -- at all sizes, and only integral Widths are valid. + | MO_And Width + | MO_Or Width + | MO_Xor Width + | MO_Not Width + | MO_Shl Width + | MO_U_Shr Width -- unsigned shift right + | MO_S_Shr Width -- signed shift right + + -- Conversions. Some of these will be NOPs. + -- Floating-point conversions use the signed variant. + | MO_SF_Conv Width Width -- Signed int -> Float + | MO_FS_Conv Width Width -- Float -> Signed int + | MO_SS_Conv Width Width -- Signed int -> Signed int + | MO_UU_Conv Width Width -- unsigned int -> unsigned int + | MO_FF_Conv Width Width -- Float -> Float + deriving (Eq, Show) + +pprMachOp :: MachOp -> SDoc +pprMachOp mo = text (show mo) + + + +-- ----------------------------------------------------------------------------- +-- Some common MachReps + +-- A 'wordRep' is a machine word on the target architecture +-- Specifically, it is the size of an Int#, Word#, Addr# +-- and the unit of allocation on the stack and the heap +-- Any pointer is also guaranteed to be a wordRep. + +mo_wordAdd, mo_wordSub, mo_wordEq, mo_wordNe,mo_wordMul, mo_wordSQuot + , mo_wordSRem, mo_wordSNeg, mo_wordUQuot, mo_wordURem + , mo_wordSGe, mo_wordSLe, mo_wordSGt, mo_wordSLt, mo_wordUGe + , mo_wordULe, mo_wordUGt, mo_wordULt + , mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot, mo_wordShl, mo_wordSShr, mo_wordUShr + , mo_u_8To32, mo_s_8To32, mo_u_16To32, mo_s_16To32 + , mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord, mo_u_32ToWord, mo_s_32ToWord + , mo_32To8, mo_32To16, mo_WordTo8, mo_WordTo16, mo_WordTo32 + :: MachOp + +mo_wordAdd = MO_Add wordWidth +mo_wordSub = MO_Sub wordWidth +mo_wordEq = MO_Eq wordWidth +mo_wordNe = MO_Ne wordWidth +mo_wordMul = MO_Mul wordWidth +mo_wordSQuot = MO_S_Quot wordWidth +mo_wordSRem = MO_S_Rem wordWidth +mo_wordSNeg = MO_S_Neg wordWidth +mo_wordUQuot = MO_U_Quot wordWidth +mo_wordURem = MO_U_Rem wordWidth + +mo_wordSGe = MO_S_Ge wordWidth +mo_wordSLe = MO_S_Le wordWidth +mo_wordSGt = MO_S_Gt wordWidth +mo_wordSLt = MO_S_Lt wordWidth + +mo_wordUGe = MO_U_Ge wordWidth +mo_wordULe = MO_U_Le wordWidth +mo_wordUGt = MO_U_Gt wordWidth +mo_wordULt = MO_U_Lt wordWidth + +mo_wordAnd = MO_And wordWidth +mo_wordOr = MO_Or wordWidth +mo_wordXor = MO_Xor wordWidth +mo_wordNot = MO_Not wordWidth +mo_wordShl = MO_Shl wordWidth +mo_wordSShr = MO_S_Shr wordWidth +mo_wordUShr = MO_U_Shr wordWidth + +mo_u_8To32 = MO_UU_Conv W8 W32 +mo_s_8To32 = MO_SS_Conv W8 W32 +mo_u_16To32 = MO_UU_Conv W16 W32 +mo_s_16To32 = MO_SS_Conv W16 W32 + +mo_u_8ToWord = MO_UU_Conv W8 wordWidth +mo_s_8ToWord = MO_SS_Conv W8 wordWidth +mo_u_16ToWord = MO_UU_Conv W16 wordWidth +mo_s_16ToWord = MO_SS_Conv W16 wordWidth +mo_s_32ToWord = MO_SS_Conv W32 wordWidth +mo_u_32ToWord = MO_UU_Conv W32 wordWidth + +mo_WordTo8 = MO_UU_Conv wordWidth W8 +mo_WordTo16 = MO_UU_Conv wordWidth W16 +mo_WordTo32 = MO_UU_Conv wordWidth W32 + +mo_32To8 = MO_UU_Conv W32 W8 +mo_32To16 = MO_UU_Conv W32 W16 + + +-- ---------------------------------------------------------------------------- +-- isCommutableMachOp + +{- | +Returns 'True' if the MachOp has commutable arguments. This is used +in the platform-independent Cmm optimisations. + +If in doubt, return 'False'. This generates worse code on the +native routes, but is otherwise harmless. +-} +isCommutableMachOp :: MachOp -> Bool +isCommutableMachOp mop = + case mop of + MO_Add _ -> True + MO_Eq _ -> True + MO_Ne _ -> True + MO_Mul _ -> True + MO_S_MulMayOflo _ -> True + MO_U_MulMayOflo _ -> True + MO_And _ -> True + MO_Or _ -> True + MO_Xor _ -> True + _other -> False + +-- ---------------------------------------------------------------------------- +-- isAssociativeMachOp + +{- | +Returns 'True' if the MachOp is associative (i.e. @(x+y)+z == x+(y+z)@) +This is used in the platform-independent Cmm optimisations. + +If in doubt, return 'False'. This generates worse code on the +native routes, but is otherwise harmless. +-} +isAssociativeMachOp :: MachOp -> Bool +isAssociativeMachOp mop = + case mop of + MO_Add {} -> True -- NB: does not include + MO_Mul {} -> True -- floatint point! + MO_And {} -> True + MO_Or {} -> True + MO_Xor {} -> True + _other -> False + +-- ---------------------------------------------------------------------------- +-- isComparisonMachOp + +{- | +Returns 'True' if the MachOp is a comparison. + +If in doubt, return False. This generates worse code on the +native routes, but is otherwise harmless. +-} +isComparisonMachOp :: MachOp -> Bool +isComparisonMachOp mop = + case mop of + MO_Eq _ -> True + MO_Ne _ -> True + MO_S_Ge _ -> True + MO_S_Le _ -> True + MO_S_Gt _ -> True + MO_S_Lt _ -> True + MO_U_Ge _ -> True + MO_U_Le _ -> True + MO_U_Gt _ -> True + MO_U_Lt _ -> True + MO_F_Eq {} -> True + MO_F_Ne {} -> True + MO_F_Ge {} -> True + MO_F_Le {} -> True + MO_F_Gt {} -> True + MO_F_Lt {} -> True + _other -> False + +-- ----------------------------------------------------------------------------- +-- Inverting conditions + +-- Sometimes it's useful to be able to invert the sense of a +-- condition. Not all conditional tests are invertible: in +-- particular, floating point conditionals cannot be inverted, because +-- there exist floating-point values which return False for both senses +-- of a condition (eg. !(NaN > NaN) && !(NaN /<= NaN)). + +maybeInvertComparison :: MachOp -> Maybe MachOp +maybeInvertComparison op + = case op of -- None of these Just cases include floating point + MO_Eq r -> Just (MO_Ne r) + MO_Ne r -> Just (MO_Eq r) + MO_U_Lt r -> Just (MO_U_Ge r) + MO_U_Gt r -> Just (MO_U_Le r) + MO_U_Le r -> Just (MO_U_Gt r) + MO_U_Ge r -> Just (MO_U_Lt r) + MO_S_Lt r -> Just (MO_S_Ge r) + MO_S_Gt r -> Just (MO_S_Le r) + MO_S_Le r -> Just (MO_S_Gt r) + MO_S_Ge r -> Just (MO_S_Lt r) + MO_F_Eq r -> Just (MO_F_Ne r) + MO_F_Ne r -> Just (MO_F_Eq r) + MO_F_Ge r -> Just (MO_F_Le r) + MO_F_Le r -> Just (MO_F_Ge r) + MO_F_Gt r -> Just (MO_F_Lt r) + MO_F_Lt r -> Just (MO_F_Gt r) + _other -> Nothing + +-- ---------------------------------------------------------------------------- +-- machOpResultType + +{- | +Returns the MachRep of the result of a MachOp. +-} +machOpResultType :: MachOp -> [CmmType] -> CmmType +machOpResultType mop tys = + case mop of + MO_Add {} -> ty1 -- Preserve GC-ptr-hood + MO_Sub {} -> ty1 -- of first arg + MO_Mul r -> cmmBits r + MO_S_MulMayOflo r -> cmmBits r + MO_S_Quot r -> cmmBits r + MO_S_Rem r -> cmmBits r + MO_S_Neg r -> cmmBits r + MO_U_MulMayOflo r -> cmmBits r + MO_U_Quot r -> cmmBits r + MO_U_Rem r -> cmmBits r + + MO_Eq {} -> comparisonResultRep + MO_Ne {} -> comparisonResultRep + MO_S_Ge {} -> comparisonResultRep + MO_S_Le {} -> comparisonResultRep + MO_S_Gt {} -> comparisonResultRep + MO_S_Lt {} -> comparisonResultRep + + MO_U_Ge {} -> comparisonResultRep + MO_U_Le {} -> comparisonResultRep + MO_U_Gt {} -> comparisonResultRep + MO_U_Lt {} -> comparisonResultRep + + MO_F_Add r -> cmmFloat r + MO_F_Sub r -> cmmFloat r + MO_F_Mul r -> cmmFloat r + MO_F_Quot r -> cmmFloat r + MO_F_Neg r -> cmmFloat r + MO_F_Eq {} -> comparisonResultRep + MO_F_Ne {} -> comparisonResultRep + MO_F_Ge {} -> comparisonResultRep + MO_F_Le {} -> comparisonResultRep + MO_F_Gt {} -> comparisonResultRep + MO_F_Lt {} -> comparisonResultRep + + MO_And {} -> ty1 -- Used for pointer masking + MO_Or {} -> ty1 + MO_Xor {} -> ty1 + MO_Not r -> cmmBits r + MO_Shl r -> cmmBits r + MO_U_Shr r -> cmmBits r + MO_S_Shr r -> cmmBits r + + MO_SS_Conv _ to -> cmmBits to + MO_UU_Conv _ to -> cmmBits to + MO_FS_Conv _ to -> cmmBits to + MO_SF_Conv _ to -> cmmFloat to + MO_FF_Conv _ to -> cmmFloat to + where + (ty1:_) = tys + +comparisonResultRep :: CmmType +comparisonResultRep = bWord -- is it? + + +-- ----------------------------------------------------------------------------- +-- machOpArgReps + +-- | This function is used for debugging only: we can check whether an +-- application of a MachOp is "type-correct" by checking that the MachReps of +-- its arguments are the same as the MachOp expects. This is used when +-- linting a CmmExpr. + +machOpArgReps :: MachOp -> [Width] +machOpArgReps op = + case op of + MO_Add r -> [r,r] + MO_Sub r -> [r,r] + MO_Eq r -> [r,r] + MO_Ne r -> [r,r] + MO_Mul r -> [r,r] + MO_S_MulMayOflo r -> [r,r] + MO_S_Quot r -> [r,r] + MO_S_Rem r -> [r,r] + MO_S_Neg r -> [r] + MO_U_MulMayOflo r -> [r,r] + MO_U_Quot r -> [r,r] + MO_U_Rem r -> [r,r] + + MO_S_Ge r -> [r,r] + MO_S_Le r -> [r,r] + MO_S_Gt r -> [r,r] + MO_S_Lt r -> [r,r] + + MO_U_Ge r -> [r,r] + MO_U_Le r -> [r,r] + MO_U_Gt r -> [r,r] + MO_U_Lt r -> [r,r] + + MO_F_Add r -> [r,r] + MO_F_Sub r -> [r,r] + MO_F_Mul r -> [r,r] + MO_F_Quot r -> [r,r] + MO_F_Neg r -> [r] + MO_F_Eq r -> [r,r] + MO_F_Ne r -> [r,r] + MO_F_Ge r -> [r,r] + MO_F_Le r -> [r,r] + MO_F_Gt r -> [r,r] + MO_F_Lt r -> [r,r] + + MO_And r -> [r,r] + MO_Or r -> [r,r] + MO_Xor r -> [r,r] + MO_Not r -> [r] + MO_Shl r -> [r,wordWidth] + MO_U_Shr r -> [r,wordWidth] + MO_S_Shr r -> [r,wordWidth] + + MO_SS_Conv from _ -> [from] + MO_UU_Conv from _ -> [from] + MO_SF_Conv from _ -> [from] + MO_FS_Conv from _ -> [from] + MO_FF_Conv from _ -> [from] + + +------------------------------------------------------------------------- +{- Note [Signed vs unsigned] + ~~~~~~~~~~~~~~~~~~~~~~~~~ +Should a CmmType include a signed vs. unsigned distinction? + +This is very much like a "hint" in C-- terminology: it isn't necessary +in order to generate correct code, but it might be useful in that the +compiler can generate better code if it has access to higher-level +hints about data. This is important at call boundaries, because the +definition of a function is not visible at all of its call sites, so +the compiler cannot infer the hints. + +Here in Cmm, we're taking a slightly different approach. We include +the int vs. float hint in the MachRep, because (a) the majority of +platforms have a strong distinction between float and int registers, +and (b) we don't want to do any heavyweight hint-inference in the +native code backend in order to get good code. We're treating the +hint more like a type: our Cmm is always completely consistent with +respect to hints. All coercions between float and int are explicit. + +What about the signed vs. unsigned hint? This information might be +useful if we want to keep sub-word-sized values in word-size +registers, which we must do if we only have word-sized registers. + +On such a system, there are two straightforward conventions for +representing sub-word-sized values: + +(a) Leave the upper bits undefined. Comparison operations must + sign- or zero-extend both operands before comparing them, + depending on whether the comparison is signed or unsigned. + +(b) Always keep the values sign- or zero-extended as appropriate. + Arithmetic operations must narrow the result to the appropriate + size. + +A clever compiler might not use either (a) or (b) exclusively, instead +it would attempt to minimize the coercions by analysis: the same kind +of analysis that propagates hints around. In Cmm we don't want to +have to do this, so we plump for having richer types and keeping the +type information consistent. + +If signed/unsigned hints are missing from MachRep, then the only +choice we have is (a), because we don't know whether the result of an +operation should be sign- or zero-extended. + +Many architectures have extending load operations, which work well +with (b). To make use of them with (a), you need to know whether the +value is going to be sign- or zero-extended by an enclosing comparison +(for example), which involves knowing above the context. This is +doable but more complex. + +Further complicating the issue is foreign calls: a foreign calling +convention can specify that signed 8-bit quantities are passed as +sign-extended 32 bit quantities, for example (this is the case on the +PowerPC). So we *do* need sign information on foreign call arguments. + +Pros for adding signed vs. unsigned to MachRep: + + - It would let us use convention (b) above, and get easier + code generation for extending loads. + + - Less information required on foreign calls. + + - MachOp type would be simpler + +Cons: + + - More complexity + + - What is the MachRep for a VanillaReg? Currently it is + always wordRep, but now we have to decide whether it is + signed or unsigned. The same VanillaReg can thus have + different MachReps in different parts of the program. + + - Extra coercions cluttering up expressions. + +Currently for GHC, the foreign call point is moot, because we do our +own promotion of sub-word-sized values to word-sized values. The Int8 +type is represnted by an Int# which is kept sign-extended at all times +(this is slightly naughty, because we're making assumptions about the +C calling convention rather early on in the compiler). However, given +this, the cons outweigh the pros. + +-} + |