diff options
-rw-r--r-- | compiler/cmm/CmmOpt.hs | 83 | ||||
-rw-r--r-- | compiler/cmm/CmmSink.hs | 39 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmClosure.hs | 2 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmExpr.hs | 28 | ||||
-rw-r--r-- | compiler/prelude/PrimOp.hs | 7 |
5 files changed, 121 insertions, 38 deletions
diff --git a/compiler/cmm/CmmOpt.hs b/compiler/cmm/CmmOpt.hs index 38c399ebc4..3cb28217f2 100644 --- a/compiler/cmm/CmmOpt.hs +++ b/compiler/cmm/CmmOpt.hs @@ -284,48 +284,68 @@ cmmMachOpFoldM dflags cmp [CmmMachOp conv [x], CmmLit (CmmInt i _)] maybe_comparison _ _ _ = Nothing -- We can often do something with constants of 0 and 1 ... +-- See Note [Comparison operators] cmmMachOpFoldM dflags mop [x, y@(CmmLit (CmmInt 0 _))] = case mop of - MO_Add _ -> Just x - MO_Sub _ -> Just x - MO_Mul _ -> Just y - MO_And _ -> Just y - MO_Or _ -> Just x - MO_Xor _ -> Just x - MO_Shl _ -> Just x - MO_S_Shr _ -> Just x + -- Arithmetic + MO_Add _ -> Just x -- x + 0 = x + MO_Sub _ -> Just x -- x - 0 = x + MO_Mul _ -> Just y -- x * 0 = 0 + + -- Logical operations + MO_And _ -> Just y -- x & 0 = 0 + MO_Or _ -> Just x -- x | 0 = x + MO_Xor _ -> Just x -- x `xor` 0 = x + + -- Shifts + MO_Shl _ -> Just x -- x << 0 = x + MO_S_Shr _ -> Just x -- ditto shift-right MO_U_Shr _ -> Just x - MO_Ne _ | isComparisonExpr x -> Just x - MO_Eq _ | Just x' <- maybeInvertCmmExpr x -> Just x' - MO_U_Gt _ | isComparisonExpr x -> Just x - MO_S_Gt _ | isComparisonExpr x -> Just x - MO_U_Lt _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 (wordWidth dflags)) - MO_S_Lt _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 (wordWidth dflags)) - MO_U_Ge _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 (wordWidth dflags)) - MO_S_Ge _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 (wordWidth dflags)) - MO_U_Le _ | Just x' <- maybeInvertCmmExpr x -> Just x' + + -- Comparisons; these ones are trickier + -- See Note [Comparison operators] + MO_Ne _ | isComparisonExpr x -> Just x -- (x > y) != 0 = x > y + MO_Eq _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- (x > y) == 0 = x <= y + MO_U_Gt _ | isComparisonExpr x -> Just x -- (x > y) > 0 = x > y + MO_S_Gt _ | isComparisonExpr x -> Just x -- ditto + MO_U_Lt _ | isComparisonExpr x -> Just zero -- (x > y) < 0 = 0 + MO_S_Lt _ | isComparisonExpr x -> Just zero + MO_U_Ge _ | isComparisonExpr x -> Just one -- (x > y) >= 0 = 1 + MO_S_Ge _ | isComparisonExpr x -> Just one + + MO_U_Le _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- (x > y) <= 0 = x <= y MO_S_Le _ | Just x' <- maybeInvertCmmExpr x -> Just x' _ -> Nothing + where + zero = CmmLit (CmmInt 0 (wordWidth dflags)) + one = CmmLit (CmmInt 1 (wordWidth dflags)) cmmMachOpFoldM dflags mop [x, (CmmLit (CmmInt 1 rep))] = case mop of + -- Arithmetic: x*1 = x, etc MO_Mul _ -> Just x MO_S_Quot _ -> Just x MO_U_Quot _ -> Just x MO_S_Rem _ -> Just $ CmmLit (CmmInt 0 rep) MO_U_Rem _ -> Just $ CmmLit (CmmInt 0 rep) - MO_Ne _ | Just x' <- maybeInvertCmmExpr x -> Just x' - MO_Eq _ | isComparisonExpr x -> Just x - MO_U_Lt _ | Just x' <- maybeInvertCmmExpr x -> Just x' - MO_S_Lt _ | Just x' <- maybeInvertCmmExpr x -> Just x' - MO_U_Gt _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 (wordWidth dflags)) - MO_S_Gt _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 (wordWidth dflags)) - MO_U_Le _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 (wordWidth dflags)) - MO_S_Le _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 (wordWidth dflags)) - MO_U_Ge _ | isComparisonExpr x -> Just x + + -- Comparisons; trickier + -- See Note [Comparison operators] + MO_Ne _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- (x>y) != 1 = x<=y + MO_Eq _ | isComparisonExpr x -> Just x -- (x>y) == 1 = x>y + MO_U_Lt _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- (x>y) < 1 = x<=y + MO_S_Lt _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- ditto + MO_U_Gt _ | isComparisonExpr x -> Just zero -- (x>y) > 1 = 0 + MO_S_Gt _ | isComparisonExpr x -> Just zero + MO_U_Le _ | isComparisonExpr x -> Just one -- (x>y) <= 1 = 1 + MO_S_Le _ | isComparisonExpr x -> Just one + MO_U_Ge _ | isComparisonExpr x -> Just x -- (x>y) >= 1 = x>y MO_S_Ge _ | isComparisonExpr x -> Just x _ -> Nothing + where + zero = CmmLit (CmmInt 0 (wordWidth dflags)) + one = CmmLit (CmmInt 1 (wordWidth dflags)) -- Now look for multiplication/division by powers of 2 (integers). @@ -376,6 +396,17 @@ cmmMachOpFoldM dflags mop [x, (CmmLit (CmmInt n _))] cmmMachOpFoldM _ _ _ = Nothing +{- Note [Comparison operators] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If we have + CmmCondBranch ((x>#y) == 1) t f +we really want to convert to + CmmCondBranch (x>#y) t f + +That's what the constant-folding operations on comparison operators do above. +-} + + -- ----------------------------------------------------------------------------- -- Utils diff --git a/compiler/cmm/CmmSink.hs b/compiler/cmm/CmmSink.hs index eb8eedb8e2..d21f2422e7 100644 --- a/compiler/cmm/CmmSink.hs +++ b/compiler/cmm/CmmSink.hs @@ -378,6 +378,8 @@ dropAssignments dflags should_drop state assigs -- ----------------------------------------------------------------------------- -- Try to inline assignments into a node. +-- This also does constant folding for primpops, since +-- inlining opens up opportunities for doing so. tryToInline :: DynFlags @@ -432,14 +434,39 @@ tryToInline dflags live node assigs = go usages node [] assigs occurs_once = not l_live && l_usages == Just 1 occurs_none = not l_live && l_usages == Nothing - inl_node = mapExpDeep inline node - -- mapExpDeep is where the inlining actually takes place! - where inline (CmmReg (CmmLocal l')) | l == l' = rhs - inline (CmmRegOff (CmmLocal l') off) | l == l' + inl_node = case mapExpDeep inl_exp node of + -- See Note [Improving conditionals] + CmmCondBranch (CmmMachOp (MO_Ne w) args) + ti fi l + -> CmmCondBranch (cmmMachOpFold dflags (MO_Eq w) args) + fi ti l + node' -> node' + + inl_exp :: CmmExpr -> CmmExpr + -- inl_exp is where the inlining actually takes place! + inl_exp (CmmReg (CmmLocal l')) | l == l' = rhs + inl_exp (CmmRegOff (CmmLocal l') off) | l == l' = cmmOffset dflags rhs off -- re-constant fold after inlining - inline (CmmMachOp op args) = cmmMachOpFold dflags op args - inline other = other + inl_exp (CmmMachOp op args) = cmmMachOpFold dflags op args + inl_exp other = other + +{- Note [Improving conditionals] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Given + CmmCondBranch ((a >## b) != 1) t f +where a,b, are Floats, the constant folder /cannot/ turn it into + CmmCondBranch (a <=## b) t f +because comparison on floats are not invertible +(see CmmMachOp.maybeInvertComparison). + +What we want instead is simply to reverse the true/false branches thus + CmmCondBranch ((a >## b) != 1) t f +--> + CmmCondBranch (a >## b) f t + +And we do that right here in tryToInline, just as we do cmmMachOpFold. +-} -- Note [dependent assignments] -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/compiler/codeGen/StgCmmClosure.hs b/compiler/codeGen/StgCmmClosure.hs index bc5e473d20..37572b7d4e 100644 --- a/compiler/codeGen/StgCmmClosure.hs +++ b/compiler/codeGen/StgCmmClosure.hs @@ -405,7 +405,7 @@ isLFReEntrant _ = False lfClosureType :: LambdaFormInfo -> ClosureTypeInfo lfClosureType (LFReEntrant _ _ arity _ argd) = Fun arity argd lfClosureType (LFCon con) = Constr (dataConTagZ con) - (dataConIdentity con) + (dataConIdentity con) lfClosureType (LFThunk _ _ _ is_sel _) = thunkClosureType is_sel lfClosureType _ = panic "lfClosureType" diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs index 39edd05a8e..edf97eeb0a 100644 --- a/compiler/codeGen/StgCmmExpr.hs +++ b/compiler/codeGen/StgCmmExpr.hs @@ -304,6 +304,7 @@ cgCase (StgOpApp (StgPrimOp op) args _) bndr (AlgAlt tycon) alts ; (mb_deflt, branches) <- cgAlgAltRhss (NoGcInAlts,AssignedDirectly) (NonVoid bndr) alts + -- See Note [GC for conditionals] ; emitSwitch tag_expr branches mb_deflt 0 (tyConFamilySize tycon - 1) ; return AssignedDirectly } @@ -469,7 +470,8 @@ cgCase scrut bndr alt_type alts ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts alt_regs = map (idToReg dflags) ret_bndrs ; simple_scrut <- isSimpleScrut scrut alt_type - ; let do_gc | not simple_scrut = True + ; let do_gc | is_cmp_op scrut = False -- See Note [GC for conditionals] + | not simple_scrut = True | isSingleton alts = False | up_hp_usg > 0 = False | otherwise = True @@ -484,11 +486,29 @@ cgCase scrut bndr alt_type alts ; _ <- bindArgsToRegs ret_bndrs ; cgAlts (gc_plan,ret_kind) (NonVoid bndr) alt_type alts } + where + is_cmp_op (StgOpApp (StgPrimOp op) _ _) = isComparisonPrimOp op + is_cmp_op _ = False + +{- Note [GC for conditionals] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +For boolean conditionals it seems that we have always done NoGcInAlts. +That is, we have always done the GC check before the conditional. +This is enshrined in the special case for + case tagToEnum# (a>b) of ... +See Note [case on bool] + +It's odd, and it's flagrantly inconsistent with the rules described +Note [Compiling case expressions]. However, after eliminating the +tagToEnum# (Trac #13397) we will have: + case (a>b) of ... +Rather than make it behave quite differently, I am testing for a +comparison operator here in in the general case as well. + +ToDo: figure out what the Right Rule should be. - -{- Note [scrut sequel] - +~~~~~~~~~~~~~~~~~~~ The job of the scrutinee is to assign its value(s) to alt_regs. Additionally, if we plan to do a heap-check in the alternatives (see Note [Compiling case expressions]), then we *must* retreat Hp to diff --git a/compiler/prelude/PrimOp.hs b/compiler/prelude/PrimOp.hs index cc632169ef..3a849060ff 100644 --- a/compiler/prelude/PrimOp.hs +++ b/compiler/prelude/PrimOp.hs @@ -22,7 +22,7 @@ module PrimOp ( primOpOkForSpeculation, primOpOkForSideEffects, primOpIsCheap, primOpFixity, - getPrimOpResultInfo, PrimOpResultInfo(..), + getPrimOpResultInfo, isComparisonPrimOp, PrimOpResultInfo(..), PrimCall(..) ) where @@ -552,6 +552,11 @@ primOpOcc op = case primOpInfo op of Compare occ _ -> occ GenPrimOp occ _ _ _ -> occ +isComparisonPrimOp :: PrimOp -> Bool +isComparisonPrimOp op = case primOpInfo op of + Compare {} -> True + _ -> False + -- primOpSig is like primOpType but gives the result split apart: -- (type variables, argument types, result type) -- It also gives arity, strictness info |