summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Klebinger <klebinger.andreas@gmx.at>2022-06-08 18:07:30 +0200
committerMarge Bot <ben+marge-bot@smart-cactus.org>2022-08-08 16:46:37 -0400
commit742292e461e4040faecf3482349a4574a9184239 (patch)
tree3d54ae153d7b0afd041a16c4c083718e7e6901f3
parent20457d775885d6c3df020d204da9a7acfb3c2e5a (diff)
downloadhaskell-742292e461e4040faecf3482349a4574a9184239.tar.gz
dataToTag#: Skip runtime tag check if argument is infered tagged
This addresses one part of #21710.
-rw-r--r--compiler/GHC/Stg/InferTags/Rewrite.hs18
-rw-r--r--compiler/GHC/StgToCmm/Expr.hs30
-rw-r--r--testsuite/tests/codeGen/should_compile/T21710a.hs15
-rw-r--r--testsuite/tests/codeGen/should_compile/T21710a.stderr446
-rw-r--r--testsuite/tests/codeGen/should_compile/all.T1
5 files changed, 499 insertions, 11 deletions
diff --git a/compiler/GHC/Stg/InferTags/Rewrite.hs b/compiler/GHC/Stg/InferTags/Rewrite.hs
index 340ff2fff0..a9a7677e40 100644
--- a/compiler/GHC/Stg/InferTags/Rewrite.hs
+++ b/compiler/GHC/Stg/InferTags/Rewrite.hs
@@ -20,6 +20,7 @@ where
import GHC.Prelude
+import GHC.Builtin.PrimOps ( PrimOp(..) )
import GHC.Types.Id
import GHC.Types.Name
import GHC.Types.Unique.Supply
@@ -346,6 +347,19 @@ fvArgs args = do
type IsScrut = Bool
+rewriteArgs :: [StgArg] -> RM [StgArg]
+rewriteArgs = mapM rewriteArg
+rewriteArg :: StgArg -> RM StgArg
+rewriteArg (StgVarArg v) = StgVarArg <$!> rewriteId v
+rewriteArg (lit@StgLitArg{}) = return lit
+
+-- Attach a tagSig if it's tagged
+rewriteId :: Id -> RM Id
+rewriteId v = do
+ is_tagged <- isTagged v
+ if is_tagged then return $! setIdTagSig v (TagSig TagProper)
+ else return v
+
rewriteExpr :: IsScrut -> InferStgExpr -> RM TgStgExpr
rewriteExpr _ (e@StgCase {}) = rewriteCase e
rewriteExpr _ (e@StgLet {}) = rewriteLet e
@@ -355,8 +369,11 @@ rewriteExpr _ e@(StgConApp {}) = rewriteConApp e
rewriteExpr isScrut e@(StgApp {}) = rewriteApp isScrut e
rewriteExpr _ (StgLit lit) = return $! (StgLit lit)
+rewriteExpr _ (StgOpApp op@(StgPrimOp DataToTagOp) args res_ty) = do
+ (StgOpApp op) <$!> rewriteArgs args <*> pure res_ty
rewriteExpr _ (StgOpApp op args res_ty) = return $! (StgOpApp op args res_ty)
+
rewriteCase :: InferStgExpr -> RM TgStgExpr
rewriteCase (StgCase scrut bndr alt_type alts) =
withBinder NotTopLevel bndr $
@@ -415,6 +432,7 @@ rewriteApp True (StgApp f []) = do
-- isTagged looks at more than the result of our analysis.
-- So always update here if useful.
let f' = if f_tagged
+ -- TODO: We might consisder using a subst env instead of setting the sig only for select places.
then setIdTagSig f (TagSig TagProper)
else f
return $! StgApp f' []
diff --git a/compiler/GHC/StgToCmm/Expr.hs b/compiler/GHC/StgToCmm/Expr.hs
index 0e7c52f68d..030655c8e2 100644
--- a/compiler/GHC/StgToCmm/Expr.hs
+++ b/compiler/GHC/StgToCmm/Expr.hs
@@ -76,6 +76,8 @@ cgExpr (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _res_ty) =
-- dataToTag# :: a -> Int#
-- See Note [dataToTag# magic] in GHC.Core.Opt.ConstantFold
+-- TODO: There are some more optimization ideas for this code path
+-- in #21710
cgExpr (StgOpApp (StgPrimOp DataToTagOp) [StgVarArg a] _res_ty) = do
platform <- getPlatform
emitComment (mkFastString "dataToTag#")
@@ -92,15 +94,7 @@ cgExpr (StgOpApp (StgPrimOp DataToTagOp) [StgVarArg a] _res_ty) = do
-- the constructor index is too large to fit in the pointer and therefore
-- we must look in the info table. See Note [Tagging big families].
- slow_path <- getCode $ do
- tmp <- newTemp (bWord platform)
- _ <- withSequel (AssignTo [tmp] False) (cgIdApp a [])
- profile <- getProfile
- align_check <- stgToCmmAlignCheck <$> getStgToCmmConfig
- emitAssign (CmmLocal result_reg)
- $ getConstrTag profile align_check (cmmUntag platform (CmmReg (CmmLocal tmp)))
-
- fast_path <- getCode $ do
+ (fast_path :: CmmAGraph) <- getCode $ do
-- Return the constructor index from the pointer tag
return_ptr_tag <- getCode $ do
emitAssign (CmmLocal result_reg)
@@ -113,8 +107,22 @@ cgExpr (StgOpApp (StgPrimOp DataToTagOp) [StgVarArg a] _res_ty) = do
$ getConstrTag profile align_check (cmmUntag platform amode)
emit =<< mkCmmIfThenElse' is_too_big_tag return_info_tag return_ptr_tag (Just False)
-
- emit =<< mkCmmIfThenElse' is_tagged fast_path slow_path (Just True)
+ -- If we know the argument is already tagged there is no need to generate code to evaluate it
+ -- so we skip straight to the fast path. If we don't know if there is a tag we take the slow
+ -- path which evaluates the argument before fetching the tag.
+ case (idTagSig_maybe a) of
+ Just sig
+ | isTaggedSig sig
+ -> emit fast_path
+ _ -> do
+ slow_path <- getCode $ do
+ tmp <- newTemp (bWord platform)
+ _ <- withSequel (AssignTo [tmp] False) (cgIdApp a [])
+ profile <- getProfile
+ align_check <- stgToCmmAlignCheck <$> getStgToCmmConfig
+ emitAssign (CmmLocal result_reg)
+ $ getConstrTag profile align_check (cmmUntag platform (CmmReg (CmmLocal tmp)))
+ emit =<< mkCmmIfThenElse' is_tagged fast_path slow_path (Just True)
emitReturn [CmmReg $ CmmLocal result_reg]
diff --git a/testsuite/tests/codeGen/should_compile/T21710a.hs b/testsuite/tests/codeGen/should_compile/T21710a.hs
new file mode 100644
index 0000000000..b98b3f39f3
--- /dev/null
+++ b/testsuite/tests/codeGen/should_compile/T21710a.hs
@@ -0,0 +1,15 @@
+{-# LANGUAGE MagicHash, UnboxedTuples #-}
+{-# OPTIONS_GHC -O #-}
+
+module M where
+
+import GHC.Exts
+
+data E = A | B | C | D | E
+
+foo x =
+ case x of
+ A -> 2#
+ B -> 42#
+ -- In this branch we already now `x` is evaluated, so we shouldn't generate an extra `call` for it.
+ _ -> dataToTag# x
diff --git a/testsuite/tests/codeGen/should_compile/T21710a.stderr b/testsuite/tests/codeGen/should_compile/T21710a.stderr
new file mode 100644
index 0000000000..1495876f42
--- /dev/null
+++ b/testsuite/tests/codeGen/should_compile/T21710a.stderr
@@ -0,0 +1,446 @@
+
+==================== Output Cmm ====================
+[section ""cstring" . M.$tc'E2_bytes" {
+ M.$tc'E2_bytes:
+ I8[] "'E"
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""cstring" . M.$tc'D2_bytes" {
+ M.$tc'D2_bytes:
+ I8[] "'D"
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""cstring" . M.$tc'C2_bytes" {
+ M.$tc'C2_bytes:
+ I8[] "'C"
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""cstring" . M.$tc'B2_bytes" {
+ M.$tc'B2_bytes:
+ I8[] "'B"
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""cstring" . M.$tc'A3_bytes" {
+ M.$tc'A3_bytes:
+ I8[] "'A"
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""cstring" . M.$tcE2_bytes" {
+ M.$tcE2_bytes:
+ I8[] "E"
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""cstring" . M.$trModule2_bytes" {
+ M.$trModule2_bytes:
+ I8[] "M"
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""cstring" . M.$trModule4_bytes" {
+ M.$trModule4_bytes:
+ I8[] "main"
+ }]
+
+
+
+==================== Output Cmm ====================
+[M.foo_entry() { // [R2]
+ { info_tbls: [(cBa,
+ label: block_cBa_info
+ rep: StackRep []
+ srt: Nothing),
+ (cBi,
+ label: M.foo_info
+ rep: HeapRep static { Fun {arity: 1 fun_type: ArgSpec 5} }
+ srt: Nothing)]
+ stack_info: arg_space: 8
+ }
+ {offset
+ cBi: // global
+ if ((Sp + -8) < SpLim) (likely: False) goto cBj; else goto cBk; // CmmCondBranch
+ cBj: // global
+ R1 = M.foo_closure; // CmmAssign
+ call (stg_gc_fun)(R2, R1) args: 8, res: 0, upd: 8; // CmmCall
+ cBk: // global
+ I64[Sp - 8] = cBa; // CmmStore
+ R1 = R2; // CmmAssign
+ Sp = Sp - 8; // CmmAssign
+ if (R1 & 7 != 0) goto cBa; else goto cBb; // CmmCondBranch
+ cBb: // global
+ call (I64[R1])(R1) returns to cBa, args: 8, res: 8, upd: 8; // CmmCall
+ cBa: // global
+ _cBh::P64 = R1 & 7; // CmmAssign
+ if (_cBh::P64 != 1) goto uBz; else goto cBf; // CmmCondBranch
+ uBz: // global
+ if (_cBh::P64 != 2) goto cBe; else goto cBg; // CmmCondBranch
+ cBe: // global
+ // dataToTag#
+ _cBn::P64 = R1 & 7; // CmmAssign
+ if (_cBn::P64 == 7) (likely: False) goto cBs; else goto cBr; // CmmCondBranch
+ cBs: // global
+ _cBo::I64 = %MO_UU_Conv_W32_W64(I32[I64[R1 & (-8)] - 4]); // CmmAssign
+ goto cBq; // CmmBranch
+ cBr: // global
+ _cBo::I64 = _cBn::P64 - 1; // CmmAssign
+ goto cBq; // CmmBranch
+ cBq: // global
+ R1 = _cBo::I64; // CmmAssign
+ Sp = Sp + 8; // CmmAssign
+ call (P64[Sp])(R1) args: 8, res: 0, upd: 8; // CmmCall
+ cBg: // global
+ R1 = 42; // CmmAssign
+ Sp = Sp + 8; // CmmAssign
+ call (P64[Sp])(R1) args: 8, res: 0, upd: 8; // CmmCall
+ cBf: // global
+ R1 = 2; // CmmAssign
+ Sp = Sp + 8; // CmmAssign
+ call (P64[Sp])(R1) args: 8, res: 0, upd: 8; // CmmCall
+ }
+ },
+ section ""data" . M.foo_closure" {
+ M.foo_closure:
+ const M.foo_info;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.$trModule3_closure" {
+ M.$trModule3_closure:
+ const GHC.Types.TrNameS_con_info;
+ const M.$trModule4_bytes;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.$trModule1_closure" {
+ M.$trModule1_closure:
+ const GHC.Types.TrNameS_con_info;
+ const M.$trModule2_bytes;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.$trModule_closure" {
+ M.$trModule_closure:
+ const GHC.Types.Module_con_info;
+ const M.$trModule3_closure+1;
+ const M.$trModule1_closure+1;
+ const 3;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.$tcE1_closure" {
+ M.$tcE1_closure:
+ const GHC.Types.TrNameS_con_info;
+ const M.$tcE2_bytes;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.$tcE_closure" {
+ M.$tcE_closure:
+ const GHC.Types.TyCon_con_info;
+ const M.$trModule_closure+1;
+ const M.$tcE1_closure+1;
+ const GHC.Types.krep$*_closure+5;
+ const 10475418246443540865;
+ const 12461417314693222409;
+ const 0;
+ const 3;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.$tc'A1_closure" {
+ M.$tc'A1_closure:
+ const GHC.Types.KindRepTyConApp_con_info;
+ const M.$tcE_closure+1;
+ const GHC.Types.[]_closure+1;
+ const 3;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.$tc'A2_closure" {
+ M.$tc'A2_closure:
+ const GHC.Types.TrNameS_con_info;
+ const M.$tc'A3_bytes;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.$tc'A_closure" {
+ M.$tc'A_closure:
+ const GHC.Types.TyCon_con_info;
+ const M.$trModule_closure+1;
+ const M.$tc'A2_closure+1;
+ const M.$tc'A1_closure+1;
+ const 10991425535368257265;
+ const 3459663971500179679;
+ const 0;
+ const 3;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.$tc'B1_closure" {
+ M.$tc'B1_closure:
+ const GHC.Types.TrNameS_con_info;
+ const M.$tc'B2_bytes;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.$tc'B_closure" {
+ M.$tc'B_closure:
+ const GHC.Types.TyCon_con_info;
+ const M.$trModule_closure+1;
+ const M.$tc'B1_closure+1;
+ const M.$tc'A1_closure+1;
+ const 13038863156169552918;
+ const 13430333535161531545;
+ const 0;
+ const 3;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.$tc'C1_closure" {
+ M.$tc'C1_closure:
+ const GHC.Types.TrNameS_con_info;
+ const M.$tc'C2_bytes;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.$tc'C_closure" {
+ M.$tc'C_closure:
+ const GHC.Types.TyCon_con_info;
+ const M.$trModule_closure+1;
+ const M.$tc'C1_closure+1;
+ const M.$tc'A1_closure+1;
+ const 8482817676735632621;
+ const 8146597712321241387;
+ const 0;
+ const 3;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.$tc'D1_closure" {
+ M.$tc'D1_closure:
+ const GHC.Types.TrNameS_con_info;
+ const M.$tc'D2_bytes;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.$tc'D_closure" {
+ M.$tc'D_closure:
+ const GHC.Types.TyCon_con_info;
+ const M.$trModule_closure+1;
+ const M.$tc'D1_closure+1;
+ const M.$tc'A1_closure+1;
+ const 7525207739284160575;
+ const 13746130127476219356;
+ const 0;
+ const 3;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.$tc'E1_closure" {
+ M.$tc'E1_closure:
+ const GHC.Types.TrNameS_con_info;
+ const M.$tc'E2_bytes;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.$tc'E_closure" {
+ M.$tc'E_closure:
+ const GHC.Types.TyCon_con_info;
+ const M.$trModule_closure+1;
+ const M.$tc'E1_closure+1;
+ const M.$tc'A1_closure+1;
+ const 6748545530683684316;
+ const 10193016702094081137;
+ const 0;
+ const 3;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.A_closure" {
+ M.A_closure:
+ const M.A_con_info;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.B_closure" {
+ M.B_closure:
+ const M.B_con_info;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.C_closure" {
+ M.C_closure:
+ const M.C_con_info;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.D_closure" {
+ M.D_closure:
+ const M.D_con_info;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.E_closure" {
+ M.E_closure:
+ const M.E_con_info;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""relreadonly" . M.E_closure_tbl" {
+ M.E_closure_tbl:
+ const M.A_closure+1;
+ const M.B_closure+2;
+ const M.C_closure+3;
+ const M.D_closure+4;
+ const M.E_closure+5;
+ }]
+
+
+
+==================== Output Cmm ====================
+[M.A_con_entry() { // []
+ { info_tbls: [(cC5,
+ label: M.A_con_info
+ rep: HeapRep 1 nonptrs { Con {tag: 0 descr:"main:M.A"} }
+ srt: Nothing)]
+ stack_info: arg_space: 8
+ }
+ {offset
+ cC5: // global
+ R1 = R1 + 1; // CmmAssign
+ call (P64[Sp])(R1) args: 8, res: 0, upd: 8; // CmmCall
+ }
+ }]
+
+
+
+==================== Output Cmm ====================
+[M.B_con_entry() { // []
+ { info_tbls: [(cCa,
+ label: M.B_con_info
+ rep: HeapRep 1 nonptrs { Con {tag: 1 descr:"main:M.B"} }
+ srt: Nothing)]
+ stack_info: arg_space: 8
+ }
+ {offset
+ cCa: // global
+ R1 = R1 + 2; // CmmAssign
+ call (P64[Sp])(R1) args: 8, res: 0, upd: 8; // CmmCall
+ }
+ }]
+
+
+
+==================== Output Cmm ====================
+[M.C_con_entry() { // []
+ { info_tbls: [(cCf,
+ label: M.C_con_info
+ rep: HeapRep 1 nonptrs { Con {tag: 2 descr:"main:M.C"} }
+ srt: Nothing)]
+ stack_info: arg_space: 8
+ }
+ {offset
+ cCf: // global
+ R1 = R1 + 3; // CmmAssign
+ call (P64[Sp])(R1) args: 8, res: 0, upd: 8; // CmmCall
+ }
+ }]
+
+
+
+==================== Output Cmm ====================
+[M.D_con_entry() { // []
+ { info_tbls: [(cCk,
+ label: M.D_con_info
+ rep: HeapRep 1 nonptrs { Con {tag: 3 descr:"main:M.D"} }
+ srt: Nothing)]
+ stack_info: arg_space: 8
+ }
+ {offset
+ cCk: // global
+ R1 = R1 + 4; // CmmAssign
+ call (P64[Sp])(R1) args: 8, res: 0, upd: 8; // CmmCall
+ }
+ }]
+
+
+
+==================== Output Cmm ====================
+[M.E_con_entry() { // []
+ { info_tbls: [(cCp,
+ label: M.E_con_info
+ rep: HeapRep 1 nonptrs { Con {tag: 4 descr:"main:M.E"} }
+ srt: Nothing)]
+ stack_info: arg_space: 8
+ }
+ {offset
+ cCp: // global
+ R1 = R1 + 5; // CmmAssign
+ call (P64[Sp])(R1) args: 8, res: 0, upd: 8; // CmmCall
+ }
+ }]
+
+
diff --git a/testsuite/tests/codeGen/should_compile/all.T b/testsuite/tests/codeGen/should_compile/all.T
index 78d96f83ff..9569a0e64a 100644
--- a/testsuite/tests/codeGen/should_compile/all.T
+++ b/testsuite/tests/codeGen/should_compile/all.T
@@ -108,3 +108,4 @@ test('T18614', normal, compile, [''])
test('mk-big-obj',
[unless(opsys('mingw32'), skip), pre_cmd('$PYTHON mk-big-obj.py > mk-big-obj.c')],
multimod_compile, ['mk-big-obj.c', '-c -v0 -no-hs-main'])
+test('T21710a', [ only_ways(['optasm']), when(wordsize(32), skip), grep_errmsg('(call)',[1]) ], compile, ['-ddump-cmm -dno-typeable-binds'])