summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/cmm/CmmOpt.hs83
-rw-r--r--compiler/cmm/CmmSink.hs39
-rw-r--r--compiler/codeGen/StgCmmClosure.hs2
-rw-r--r--compiler/codeGen/StgCmmExpr.hs28
-rw-r--r--compiler/prelude/PrimOp.hs7
5 files changed, 121 insertions, 38 deletions
diff --git a/compiler/cmm/CmmOpt.hs b/compiler/cmm/CmmOpt.hs
index 38c399ebc4..3cb28217f2 100644
--- a/compiler/cmm/CmmOpt.hs
+++ b/compiler/cmm/CmmOpt.hs
@@ -284,48 +284,68 @@ cmmMachOpFoldM dflags cmp [CmmMachOp conv [x], CmmLit (CmmInt i _)]
maybe_comparison _ _ _ = Nothing
-- We can often do something with constants of 0 and 1 ...
+-- See Note [Comparison operators]
cmmMachOpFoldM dflags mop [x, y@(CmmLit (CmmInt 0 _))]
= case mop of
- MO_Add _ -> Just x
- MO_Sub _ -> Just x
- MO_Mul _ -> Just y
- MO_And _ -> Just y
- MO_Or _ -> Just x
- MO_Xor _ -> Just x
- MO_Shl _ -> Just x
- MO_S_Shr _ -> Just x
+ -- Arithmetic
+ MO_Add _ -> Just x -- x + 0 = x
+ MO_Sub _ -> Just x -- x - 0 = x
+ MO_Mul _ -> Just y -- x * 0 = 0
+
+ -- Logical operations
+ MO_And _ -> Just y -- x & 0 = 0
+ MO_Or _ -> Just x -- x | 0 = x
+ MO_Xor _ -> Just x -- x `xor` 0 = x
+
+ -- Shifts
+ MO_Shl _ -> Just x -- x << 0 = x
+ MO_S_Shr _ -> Just x -- ditto shift-right
MO_U_Shr _ -> Just x
- MO_Ne _ | isComparisonExpr x -> Just x
- MO_Eq _ | Just x' <- maybeInvertCmmExpr x -> Just x'
- MO_U_Gt _ | isComparisonExpr x -> Just x
- MO_S_Gt _ | isComparisonExpr x -> Just x
- MO_U_Lt _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 (wordWidth dflags))
- MO_S_Lt _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 (wordWidth dflags))
- MO_U_Ge _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 (wordWidth dflags))
- MO_S_Ge _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 (wordWidth dflags))
- MO_U_Le _ | Just x' <- maybeInvertCmmExpr x -> Just x'
+
+ -- Comparisons; these ones are trickier
+ -- See Note [Comparison operators]
+ MO_Ne _ | isComparisonExpr x -> Just x -- (x > y) != 0 = x > y
+ MO_Eq _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- (x > y) == 0 = x <= y
+ MO_U_Gt _ | isComparisonExpr x -> Just x -- (x > y) > 0 = x > y
+ MO_S_Gt _ | isComparisonExpr x -> Just x -- ditto
+ MO_U_Lt _ | isComparisonExpr x -> Just zero -- (x > y) < 0 = 0
+ MO_S_Lt _ | isComparisonExpr x -> Just zero
+ MO_U_Ge _ | isComparisonExpr x -> Just one -- (x > y) >= 0 = 1
+ MO_S_Ge _ | isComparisonExpr x -> Just one
+
+ MO_U_Le _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- (x > y) <= 0 = x <= y
MO_S_Le _ | Just x' <- maybeInvertCmmExpr x -> Just x'
_ -> Nothing
+ where
+ zero = CmmLit (CmmInt 0 (wordWidth dflags))
+ one = CmmLit (CmmInt 1 (wordWidth dflags))
cmmMachOpFoldM dflags mop [x, (CmmLit (CmmInt 1 rep))]
= case mop of
+ -- Arithmetic: x*1 = x, etc
MO_Mul _ -> Just x
MO_S_Quot _ -> Just x
MO_U_Quot _ -> Just x
MO_S_Rem _ -> Just $ CmmLit (CmmInt 0 rep)
MO_U_Rem _ -> Just $ CmmLit (CmmInt 0 rep)
- MO_Ne _ | Just x' <- maybeInvertCmmExpr x -> Just x'
- MO_Eq _ | isComparisonExpr x -> Just x
- MO_U_Lt _ | Just x' <- maybeInvertCmmExpr x -> Just x'
- MO_S_Lt _ | Just x' <- maybeInvertCmmExpr x -> Just x'
- MO_U_Gt _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 (wordWidth dflags))
- MO_S_Gt _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 (wordWidth dflags))
- MO_U_Le _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 (wordWidth dflags))
- MO_S_Le _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 (wordWidth dflags))
- MO_U_Ge _ | isComparisonExpr x -> Just x
+
+ -- Comparisons; trickier
+ -- See Note [Comparison operators]
+ MO_Ne _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- (x>y) != 1 = x<=y
+ MO_Eq _ | isComparisonExpr x -> Just x -- (x>y) == 1 = x>y
+ MO_U_Lt _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- (x>y) < 1 = x<=y
+ MO_S_Lt _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- ditto
+ MO_U_Gt _ | isComparisonExpr x -> Just zero -- (x>y) > 1 = 0
+ MO_S_Gt _ | isComparisonExpr x -> Just zero
+ MO_U_Le _ | isComparisonExpr x -> Just one -- (x>y) <= 1 = 1
+ MO_S_Le _ | isComparisonExpr x -> Just one
+ MO_U_Ge _ | isComparisonExpr x -> Just x -- (x>y) >= 1 = x>y
MO_S_Ge _ | isComparisonExpr x -> Just x
_ -> Nothing
+ where
+ zero = CmmLit (CmmInt 0 (wordWidth dflags))
+ one = CmmLit (CmmInt 1 (wordWidth dflags))
-- Now look for multiplication/division by powers of 2 (integers).
@@ -376,6 +396,17 @@ cmmMachOpFoldM dflags mop [x, (CmmLit (CmmInt n _))]
cmmMachOpFoldM _ _ _ = Nothing
+{- Note [Comparison operators]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If we have
+ CmmCondBranch ((x>#y) == 1) t f
+we really want to convert to
+ CmmCondBranch (x>#y) t f
+
+That's what the constant-folding operations on comparison operators do above.
+-}
+
+
-- -----------------------------------------------------------------------------
-- Utils
diff --git a/compiler/cmm/CmmSink.hs b/compiler/cmm/CmmSink.hs
index eb8eedb8e2..d21f2422e7 100644
--- a/compiler/cmm/CmmSink.hs
+++ b/compiler/cmm/CmmSink.hs
@@ -378,6 +378,8 @@ dropAssignments dflags should_drop state assigs
-- -----------------------------------------------------------------------------
-- Try to inline assignments into a node.
+-- This also does constant folding for primpops, since
+-- inlining opens up opportunities for doing so.
tryToInline
:: DynFlags
@@ -432,14 +434,39 @@ tryToInline dflags live node assigs = go usages node [] assigs
occurs_once = not l_live && l_usages == Just 1
occurs_none = not l_live && l_usages == Nothing
- inl_node = mapExpDeep inline node
- -- mapExpDeep is where the inlining actually takes place!
- where inline (CmmReg (CmmLocal l')) | l == l' = rhs
- inline (CmmRegOff (CmmLocal l') off) | l == l'
+ inl_node = case mapExpDeep inl_exp node of
+ -- See Note [Improving conditionals]
+ CmmCondBranch (CmmMachOp (MO_Ne w) args)
+ ti fi l
+ -> CmmCondBranch (cmmMachOpFold dflags (MO_Eq w) args)
+ fi ti l
+ node' -> node'
+
+ inl_exp :: CmmExpr -> CmmExpr
+ -- inl_exp is where the inlining actually takes place!
+ inl_exp (CmmReg (CmmLocal l')) | l == l' = rhs
+ inl_exp (CmmRegOff (CmmLocal l') off) | l == l'
= cmmOffset dflags rhs off
-- re-constant fold after inlining
- inline (CmmMachOp op args) = cmmMachOpFold dflags op args
- inline other = other
+ inl_exp (CmmMachOp op args) = cmmMachOpFold dflags op args
+ inl_exp other = other
+
+{- Note [Improving conditionals]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Given
+ CmmCondBranch ((a >## b) != 1) t f
+where a,b, are Floats, the constant folder /cannot/ turn it into
+ CmmCondBranch (a <=## b) t f
+because comparison on floats are not invertible
+(see CmmMachOp.maybeInvertComparison).
+
+What we want instead is simply to reverse the true/false branches thus
+ CmmCondBranch ((a >## b) != 1) t f
+-->
+ CmmCondBranch (a >## b) f t
+
+And we do that right here in tryToInline, just as we do cmmMachOpFold.
+-}
-- Note [dependent assignments]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/compiler/codeGen/StgCmmClosure.hs b/compiler/codeGen/StgCmmClosure.hs
index bc5e473d20..37572b7d4e 100644
--- a/compiler/codeGen/StgCmmClosure.hs
+++ b/compiler/codeGen/StgCmmClosure.hs
@@ -405,7 +405,7 @@ isLFReEntrant _ = False
lfClosureType :: LambdaFormInfo -> ClosureTypeInfo
lfClosureType (LFReEntrant _ _ arity _ argd) = Fun arity argd
lfClosureType (LFCon con) = Constr (dataConTagZ con)
- (dataConIdentity con)
+ (dataConIdentity con)
lfClosureType (LFThunk _ _ _ is_sel _) = thunkClosureType is_sel
lfClosureType _ = panic "lfClosureType"
diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs
index 39edd05a8e..edf97eeb0a 100644
--- a/compiler/codeGen/StgCmmExpr.hs
+++ b/compiler/codeGen/StgCmmExpr.hs
@@ -304,6 +304,7 @@ cgCase (StgOpApp (StgPrimOp op) args _) bndr (AlgAlt tycon) alts
; (mb_deflt, branches) <- cgAlgAltRhss (NoGcInAlts,AssignedDirectly)
(NonVoid bndr) alts
+ -- See Note [GC for conditionals]
; emitSwitch tag_expr branches mb_deflt 0 (tyConFamilySize tycon - 1)
; return AssignedDirectly
}
@@ -469,7 +470,8 @@ cgCase scrut bndr alt_type alts
; let ret_bndrs = chooseReturnBndrs bndr alt_type alts
alt_regs = map (idToReg dflags) ret_bndrs
; simple_scrut <- isSimpleScrut scrut alt_type
- ; let do_gc | not simple_scrut = True
+ ; let do_gc | is_cmp_op scrut = False -- See Note [GC for conditionals]
+ | not simple_scrut = True
| isSingleton alts = False
| up_hp_usg > 0 = False
| otherwise = True
@@ -484,11 +486,29 @@ cgCase scrut bndr alt_type alts
; _ <- bindArgsToRegs ret_bndrs
; cgAlts (gc_plan,ret_kind) (NonVoid bndr) alt_type alts
}
+ where
+ is_cmp_op (StgOpApp (StgPrimOp op) _ _) = isComparisonPrimOp op
+ is_cmp_op _ = False
+
+{- Note [GC for conditionals]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+For boolean conditionals it seems that we have always done NoGcInAlts.
+That is, we have always done the GC check before the conditional.
+This is enshrined in the special case for
+ case tagToEnum# (a>b) of ...
+See Note [case on bool]
+
+It's odd, and it's flagrantly inconsistent with the rules described
+Note [Compiling case expressions]. However, after eliminating the
+tagToEnum# (Trac #13397) we will have:
+ case (a>b) of ...
+Rather than make it behave quite differently, I am testing for a
+comparison operator here in in the general case as well.
+
+ToDo: figure out what the Right Rule should be.
-
-{-
Note [scrut sequel]
-
+~~~~~~~~~~~~~~~~~~~
The job of the scrutinee is to assign its value(s) to alt_regs.
Additionally, if we plan to do a heap-check in the alternatives (see
Note [Compiling case expressions]), then we *must* retreat Hp to
diff --git a/compiler/prelude/PrimOp.hs b/compiler/prelude/PrimOp.hs
index cc632169ef..3a849060ff 100644
--- a/compiler/prelude/PrimOp.hs
+++ b/compiler/prelude/PrimOp.hs
@@ -22,7 +22,7 @@ module PrimOp (
primOpOkForSpeculation, primOpOkForSideEffects,
primOpIsCheap, primOpFixity,
- getPrimOpResultInfo, PrimOpResultInfo(..),
+ getPrimOpResultInfo, isComparisonPrimOp, PrimOpResultInfo(..),
PrimCall(..)
) where
@@ -552,6 +552,11 @@ primOpOcc op = case primOpInfo op of
Compare occ _ -> occ
GenPrimOp occ _ _ _ -> occ
+isComparisonPrimOp :: PrimOp -> Bool
+isComparisonPrimOp op = case primOpInfo op of
+ Compare {} -> True
+ _ -> False
+
-- primOpSig is like primOpType but gives the result split apart:
-- (type variables, argument types, result type)
-- It also gives arity, strictness info