diff options
author | Andreas Klebinger <klebinger.andreas@gmx.at> | 2022-06-08 18:07:30 +0200 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2022-08-08 16:46:37 -0400 |
commit | 742292e461e4040faecf3482349a4574a9184239 (patch) | |
tree | 3d54ae153d7b0afd041a16c4c083718e7e6901f3 | |
parent | 20457d775885d6c3df020d204da9a7acfb3c2e5a (diff) | |
download | haskell-742292e461e4040faecf3482349a4574a9184239.tar.gz |
dataToTag#: Skip runtime tag check if argument is infered tagged
This addresses one part of #21710.
-rw-r--r-- | compiler/GHC/Stg/InferTags/Rewrite.hs | 18 | ||||
-rw-r--r-- | compiler/GHC/StgToCmm/Expr.hs | 30 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_compile/T21710a.hs | 15 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_compile/T21710a.stderr | 446 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_compile/all.T | 1 |
5 files changed, 499 insertions, 11 deletions
diff --git a/compiler/GHC/Stg/InferTags/Rewrite.hs b/compiler/GHC/Stg/InferTags/Rewrite.hs index 340ff2fff0..a9a7677e40 100644 --- a/compiler/GHC/Stg/InferTags/Rewrite.hs +++ b/compiler/GHC/Stg/InferTags/Rewrite.hs @@ -20,6 +20,7 @@ where import GHC.Prelude +import GHC.Builtin.PrimOps ( PrimOp(..) ) import GHC.Types.Id import GHC.Types.Name import GHC.Types.Unique.Supply @@ -346,6 +347,19 @@ fvArgs args = do type IsScrut = Bool +rewriteArgs :: [StgArg] -> RM [StgArg] +rewriteArgs = mapM rewriteArg +rewriteArg :: StgArg -> RM StgArg +rewriteArg (StgVarArg v) = StgVarArg <$!> rewriteId v +rewriteArg (lit@StgLitArg{}) = return lit + +-- Attach a tagSig if it's tagged +rewriteId :: Id -> RM Id +rewriteId v = do + is_tagged <- isTagged v + if is_tagged then return $! setIdTagSig v (TagSig TagProper) + else return v + rewriteExpr :: IsScrut -> InferStgExpr -> RM TgStgExpr rewriteExpr _ (e@StgCase {}) = rewriteCase e rewriteExpr _ (e@StgLet {}) = rewriteLet e @@ -355,8 +369,11 @@ rewriteExpr _ e@(StgConApp {}) = rewriteConApp e rewriteExpr isScrut e@(StgApp {}) = rewriteApp isScrut e rewriteExpr _ (StgLit lit) = return $! (StgLit lit) +rewriteExpr _ (StgOpApp op@(StgPrimOp DataToTagOp) args res_ty) = do + (StgOpApp op) <$!> rewriteArgs args <*> pure res_ty rewriteExpr _ (StgOpApp op args res_ty) = return $! (StgOpApp op args res_ty) + rewriteCase :: InferStgExpr -> RM TgStgExpr rewriteCase (StgCase scrut bndr alt_type alts) = withBinder NotTopLevel bndr $ @@ -415,6 +432,7 @@ rewriteApp True (StgApp f []) = do -- isTagged looks at more than the result of our analysis. -- So always update here if useful. let f' = if f_tagged + -- TODO: We might consisder using a subst env instead of setting the sig only for select places. then setIdTagSig f (TagSig TagProper) else f return $! StgApp f' [] diff --git a/compiler/GHC/StgToCmm/Expr.hs b/compiler/GHC/StgToCmm/Expr.hs index 0e7c52f68d..030655c8e2 100644 --- a/compiler/GHC/StgToCmm/Expr.hs +++ b/compiler/GHC/StgToCmm/Expr.hs @@ -76,6 +76,8 @@ cgExpr (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _res_ty) = -- dataToTag# :: a -> Int# -- See Note [dataToTag# magic] in GHC.Core.Opt.ConstantFold +-- TODO: There are some more optimization ideas for this code path +-- in #21710 cgExpr (StgOpApp (StgPrimOp DataToTagOp) [StgVarArg a] _res_ty) = do platform <- getPlatform emitComment (mkFastString "dataToTag#") @@ -92,15 +94,7 @@ cgExpr (StgOpApp (StgPrimOp DataToTagOp) [StgVarArg a] _res_ty) = do -- the constructor index is too large to fit in the pointer and therefore -- we must look in the info table. See Note [Tagging big families]. - slow_path <- getCode $ do - tmp <- newTemp (bWord platform) - _ <- withSequel (AssignTo [tmp] False) (cgIdApp a []) - profile <- getProfile - align_check <- stgToCmmAlignCheck <$> getStgToCmmConfig - emitAssign (CmmLocal result_reg) - $ getConstrTag profile align_check (cmmUntag platform (CmmReg (CmmLocal tmp))) - - fast_path <- getCode $ do + (fast_path :: CmmAGraph) <- getCode $ do -- Return the constructor index from the pointer tag return_ptr_tag <- getCode $ do emitAssign (CmmLocal result_reg) @@ -113,8 +107,22 @@ cgExpr (StgOpApp (StgPrimOp DataToTagOp) [StgVarArg a] _res_ty) = do $ getConstrTag profile align_check (cmmUntag platform amode) emit =<< mkCmmIfThenElse' is_too_big_tag return_info_tag return_ptr_tag (Just False) - - emit =<< mkCmmIfThenElse' is_tagged fast_path slow_path (Just True) + -- If we know the argument is already tagged there is no need to generate code to evaluate it + -- so we skip straight to the fast path. If we don't know if there is a tag we take the slow + -- path which evaluates the argument before fetching the tag. + case (idTagSig_maybe a) of + Just sig + | isTaggedSig sig + -> emit fast_path + _ -> do + slow_path <- getCode $ do + tmp <- newTemp (bWord platform) + _ <- withSequel (AssignTo [tmp] False) (cgIdApp a []) + profile <- getProfile + align_check <- stgToCmmAlignCheck <$> getStgToCmmConfig + emitAssign (CmmLocal result_reg) + $ getConstrTag profile align_check (cmmUntag platform (CmmReg (CmmLocal tmp))) + emit =<< mkCmmIfThenElse' is_tagged fast_path slow_path (Just True) emitReturn [CmmReg $ CmmLocal result_reg] diff --git a/testsuite/tests/codeGen/should_compile/T21710a.hs b/testsuite/tests/codeGen/should_compile/T21710a.hs new file mode 100644 index 0000000000..b98b3f39f3 --- /dev/null +++ b/testsuite/tests/codeGen/should_compile/T21710a.hs @@ -0,0 +1,15 @@ +{-# LANGUAGE MagicHash, UnboxedTuples #-} +{-# OPTIONS_GHC -O #-} + +module M where + +import GHC.Exts + +data E = A | B | C | D | E + +foo x = + case x of + A -> 2# + B -> 42# + -- In this branch we already now `x` is evaluated, so we shouldn't generate an extra `call` for it. + _ -> dataToTag# x diff --git a/testsuite/tests/codeGen/should_compile/T21710a.stderr b/testsuite/tests/codeGen/should_compile/T21710a.stderr new file mode 100644 index 0000000000..1495876f42 --- /dev/null +++ b/testsuite/tests/codeGen/should_compile/T21710a.stderr @@ -0,0 +1,446 @@ + +==================== Output Cmm ==================== +[section ""cstring" . M.$tc'E2_bytes" { + M.$tc'E2_bytes: + I8[] "'E" + }] + + + +==================== Output Cmm ==================== +[section ""cstring" . M.$tc'D2_bytes" { + M.$tc'D2_bytes: + I8[] "'D" + }] + + + +==================== Output Cmm ==================== +[section ""cstring" . M.$tc'C2_bytes" { + M.$tc'C2_bytes: + I8[] "'C" + }] + + + +==================== Output Cmm ==================== +[section ""cstring" . M.$tc'B2_bytes" { + M.$tc'B2_bytes: + I8[] "'B" + }] + + + +==================== Output Cmm ==================== +[section ""cstring" . M.$tc'A3_bytes" { + M.$tc'A3_bytes: + I8[] "'A" + }] + + + +==================== Output Cmm ==================== +[section ""cstring" . M.$tcE2_bytes" { + M.$tcE2_bytes: + I8[] "E" + }] + + + +==================== Output Cmm ==================== +[section ""cstring" . M.$trModule2_bytes" { + M.$trModule2_bytes: + I8[] "M" + }] + + + +==================== Output Cmm ==================== +[section ""cstring" . M.$trModule4_bytes" { + M.$trModule4_bytes: + I8[] "main" + }] + + + +==================== Output Cmm ==================== +[M.foo_entry() { // [R2] + { info_tbls: [(cBa, + label: block_cBa_info + rep: StackRep [] + srt: Nothing), + (cBi, + label: M.foo_info + rep: HeapRep static { Fun {arity: 1 fun_type: ArgSpec 5} } + srt: Nothing)] + stack_info: arg_space: 8 + } + {offset + cBi: // global + if ((Sp + -8) < SpLim) (likely: False) goto cBj; else goto cBk; // CmmCondBranch + cBj: // global + R1 = M.foo_closure; // CmmAssign + call (stg_gc_fun)(R2, R1) args: 8, res: 0, upd: 8; // CmmCall + cBk: // global + I64[Sp - 8] = cBa; // CmmStore + R1 = R2; // CmmAssign + Sp = Sp - 8; // CmmAssign + if (R1 & 7 != 0) goto cBa; else goto cBb; // CmmCondBranch + cBb: // global + call (I64[R1])(R1) returns to cBa, args: 8, res: 8, upd: 8; // CmmCall + cBa: // global + _cBh::P64 = R1 & 7; // CmmAssign + if (_cBh::P64 != 1) goto uBz; else goto cBf; // CmmCondBranch + uBz: // global + if (_cBh::P64 != 2) goto cBe; else goto cBg; // CmmCondBranch + cBe: // global + // dataToTag# + _cBn::P64 = R1 & 7; // CmmAssign + if (_cBn::P64 == 7) (likely: False) goto cBs; else goto cBr; // CmmCondBranch + cBs: // global + _cBo::I64 = %MO_UU_Conv_W32_W64(I32[I64[R1 & (-8)] - 4]); // CmmAssign + goto cBq; // CmmBranch + cBr: // global + _cBo::I64 = _cBn::P64 - 1; // CmmAssign + goto cBq; // CmmBranch + cBq: // global + R1 = _cBo::I64; // CmmAssign + Sp = Sp + 8; // CmmAssign + call (P64[Sp])(R1) args: 8, res: 0, upd: 8; // CmmCall + cBg: // global + R1 = 42; // CmmAssign + Sp = Sp + 8; // CmmAssign + call (P64[Sp])(R1) args: 8, res: 0, upd: 8; // CmmCall + cBf: // global + R1 = 2; // CmmAssign + Sp = Sp + 8; // CmmAssign + call (P64[Sp])(R1) args: 8, res: 0, upd: 8; // CmmCall + } + }, + section ""data" . M.foo_closure" { + M.foo_closure: + const M.foo_info; + }] + + + +==================== Output Cmm ==================== +[section ""data" . M.$trModule3_closure" { + M.$trModule3_closure: + const GHC.Types.TrNameS_con_info; + const M.$trModule4_bytes; + }] + + + +==================== Output Cmm ==================== +[section ""data" . M.$trModule1_closure" { + M.$trModule1_closure: + const GHC.Types.TrNameS_con_info; + const M.$trModule2_bytes; + }] + + + +==================== Output Cmm ==================== +[section ""data" . M.$trModule_closure" { + M.$trModule_closure: + const GHC.Types.Module_con_info; + const M.$trModule3_closure+1; + const M.$trModule1_closure+1; + const 3; + }] + + + +==================== Output Cmm ==================== +[section ""data" . M.$tcE1_closure" { + M.$tcE1_closure: + const GHC.Types.TrNameS_con_info; + const M.$tcE2_bytes; + }] + + + +==================== Output Cmm ==================== +[section ""data" . M.$tcE_closure" { + M.$tcE_closure: + const GHC.Types.TyCon_con_info; + const M.$trModule_closure+1; + const M.$tcE1_closure+1; + const GHC.Types.krep$*_closure+5; + const 10475418246443540865; + const 12461417314693222409; + const 0; + const 3; + }] + + + +==================== Output Cmm ==================== +[section ""data" . M.$tc'A1_closure" { + M.$tc'A1_closure: + const GHC.Types.KindRepTyConApp_con_info; + const M.$tcE_closure+1; + const GHC.Types.[]_closure+1; + const 3; + }] + + + +==================== Output Cmm ==================== +[section ""data" . M.$tc'A2_closure" { + M.$tc'A2_closure: + const GHC.Types.TrNameS_con_info; + const M.$tc'A3_bytes; + }] + + + +==================== Output Cmm ==================== +[section ""data" . M.$tc'A_closure" { + M.$tc'A_closure: + const GHC.Types.TyCon_con_info; + const M.$trModule_closure+1; + const M.$tc'A2_closure+1; + const M.$tc'A1_closure+1; + const 10991425535368257265; + const 3459663971500179679; + const 0; + const 3; + }] + + + +==================== Output Cmm ==================== +[section ""data" . M.$tc'B1_closure" { + M.$tc'B1_closure: + const GHC.Types.TrNameS_con_info; + const M.$tc'B2_bytes; + }] + + + +==================== Output Cmm ==================== +[section ""data" . M.$tc'B_closure" { + M.$tc'B_closure: + const GHC.Types.TyCon_con_info; + const M.$trModule_closure+1; + const M.$tc'B1_closure+1; + const M.$tc'A1_closure+1; + const 13038863156169552918; + const 13430333535161531545; + const 0; + const 3; + }] + + + +==================== Output Cmm ==================== +[section ""data" . M.$tc'C1_closure" { + M.$tc'C1_closure: + const GHC.Types.TrNameS_con_info; + const M.$tc'C2_bytes; + }] + + + +==================== Output Cmm ==================== +[section ""data" . M.$tc'C_closure" { + M.$tc'C_closure: + const GHC.Types.TyCon_con_info; + const M.$trModule_closure+1; + const M.$tc'C1_closure+1; + const M.$tc'A1_closure+1; + const 8482817676735632621; + const 8146597712321241387; + const 0; + const 3; + }] + + + +==================== Output Cmm ==================== +[section ""data" . M.$tc'D1_closure" { + M.$tc'D1_closure: + const GHC.Types.TrNameS_con_info; + const M.$tc'D2_bytes; + }] + + + +==================== Output Cmm ==================== +[section ""data" . M.$tc'D_closure" { + M.$tc'D_closure: + const GHC.Types.TyCon_con_info; + const M.$trModule_closure+1; + const M.$tc'D1_closure+1; + const M.$tc'A1_closure+1; + const 7525207739284160575; + const 13746130127476219356; + const 0; + const 3; + }] + + + +==================== Output Cmm ==================== +[section ""data" . M.$tc'E1_closure" { + M.$tc'E1_closure: + const GHC.Types.TrNameS_con_info; + const M.$tc'E2_bytes; + }] + + + +==================== Output Cmm ==================== +[section ""data" . M.$tc'E_closure" { + M.$tc'E_closure: + const GHC.Types.TyCon_con_info; + const M.$trModule_closure+1; + const M.$tc'E1_closure+1; + const M.$tc'A1_closure+1; + const 6748545530683684316; + const 10193016702094081137; + const 0; + const 3; + }] + + + +==================== Output Cmm ==================== +[section ""data" . M.A_closure" { + M.A_closure: + const M.A_con_info; + }] + + + +==================== Output Cmm ==================== +[section ""data" . M.B_closure" { + M.B_closure: + const M.B_con_info; + }] + + + +==================== Output Cmm ==================== +[section ""data" . M.C_closure" { + M.C_closure: + const M.C_con_info; + }] + + + +==================== Output Cmm ==================== +[section ""data" . M.D_closure" { + M.D_closure: + const M.D_con_info; + }] + + + +==================== Output Cmm ==================== +[section ""data" . M.E_closure" { + M.E_closure: + const M.E_con_info; + }] + + + +==================== Output Cmm ==================== +[section ""relreadonly" . M.E_closure_tbl" { + M.E_closure_tbl: + const M.A_closure+1; + const M.B_closure+2; + const M.C_closure+3; + const M.D_closure+4; + const M.E_closure+5; + }] + + + +==================== Output Cmm ==================== +[M.A_con_entry() { // [] + { info_tbls: [(cC5, + label: M.A_con_info + rep: HeapRep 1 nonptrs { Con {tag: 0 descr:"main:M.A"} } + srt: Nothing)] + stack_info: arg_space: 8 + } + {offset + cC5: // global + R1 = R1 + 1; // CmmAssign + call (P64[Sp])(R1) args: 8, res: 0, upd: 8; // CmmCall + } + }] + + + +==================== Output Cmm ==================== +[M.B_con_entry() { // [] + { info_tbls: [(cCa, + label: M.B_con_info + rep: HeapRep 1 nonptrs { Con {tag: 1 descr:"main:M.B"} } + srt: Nothing)] + stack_info: arg_space: 8 + } + {offset + cCa: // global + R1 = R1 + 2; // CmmAssign + call (P64[Sp])(R1) args: 8, res: 0, upd: 8; // CmmCall + } + }] + + + +==================== Output Cmm ==================== +[M.C_con_entry() { // [] + { info_tbls: [(cCf, + label: M.C_con_info + rep: HeapRep 1 nonptrs { Con {tag: 2 descr:"main:M.C"} } + srt: Nothing)] + stack_info: arg_space: 8 + } + {offset + cCf: // global + R1 = R1 + 3; // CmmAssign + call (P64[Sp])(R1) args: 8, res: 0, upd: 8; // CmmCall + } + }] + + + +==================== Output Cmm ==================== +[M.D_con_entry() { // [] + { info_tbls: [(cCk, + label: M.D_con_info + rep: HeapRep 1 nonptrs { Con {tag: 3 descr:"main:M.D"} } + srt: Nothing)] + stack_info: arg_space: 8 + } + {offset + cCk: // global + R1 = R1 + 4; // CmmAssign + call (P64[Sp])(R1) args: 8, res: 0, upd: 8; // CmmCall + } + }] + + + +==================== Output Cmm ==================== +[M.E_con_entry() { // [] + { info_tbls: [(cCp, + label: M.E_con_info + rep: HeapRep 1 nonptrs { Con {tag: 4 descr:"main:M.E"} } + srt: Nothing)] + stack_info: arg_space: 8 + } + {offset + cCp: // global + R1 = R1 + 5; // CmmAssign + call (P64[Sp])(R1) args: 8, res: 0, upd: 8; // CmmCall + } + }] + + diff --git a/testsuite/tests/codeGen/should_compile/all.T b/testsuite/tests/codeGen/should_compile/all.T index 78d96f83ff..9569a0e64a 100644 --- a/testsuite/tests/codeGen/should_compile/all.T +++ b/testsuite/tests/codeGen/should_compile/all.T @@ -108,3 +108,4 @@ test('T18614', normal, compile, ['']) test('mk-big-obj', [unless(opsys('mingw32'), skip), pre_cmd('$PYTHON mk-big-obj.py > mk-big-obj.c')], multimod_compile, ['mk-big-obj.c', '-c -v0 -no-hs-main']) +test('T21710a', [ only_ways(['optasm']), when(wordsize(32), skip), grep_errmsg('(call)',[1]) ], compile, ['-ddump-cmm -dno-typeable-binds']) |