diff options
Diffstat (limited to 'compiler/GHC/StgToCmm/Expr.hs')
-rw-r--r-- | compiler/GHC/StgToCmm/Expr.hs | 216 |
1 files changed, 195 insertions, 21 deletions
diff --git a/compiler/GHC/StgToCmm/Expr.hs b/compiler/GHC/StgToCmm/Expr.hs index 51058c6186..07113a4e82 100644 --- a/compiler/GHC/StgToCmm/Expr.hs +++ b/compiler/GHC/StgToCmm/Expr.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE CPP #-} +{-# LANGUAGE CPP, BangPatterns #-} ----------------------------------------------------------------------------- -- @@ -32,10 +32,11 @@ import StgSyn import MkGraph import BlockId -import Cmm +import Cmm hiding ( succ ) import CmmInfo import CoreSyn import DataCon +import DynFlags ( mAX_PTR_TAG ) import ForeignCall import Id import PrimOp @@ -48,8 +49,9 @@ import Util import FastString import Outputable -import Control.Monad (unless,void) -import Control.Arrow (first) +import Control.Monad ( unless, void ) +import Control.Arrow ( first ) +import Data.List ( partition ) ------------------------------------------------------------------------ -- cgExpr: the main function @@ -631,29 +633,152 @@ cgAlts gc_plan bndr (AlgAlt tycon) alts ; (mb_deflt, branches) <- cgAlgAltRhss gc_plan bndr alts - ; let fam_sz = tyConFamilySize tycon - bndr_reg = CmmLocal (idToReg dflags bndr) - - -- Is the constructor tag in the node reg? - ; if isSmallFamily dflags fam_sz - then do - let -- Yes, bndr_reg has constr. tag in ls bits - tag_expr = cmmConstrTag1 dflags (CmmReg bndr_reg) - branches' = [(tag+1,branch) | (tag,branch) <- branches] - emitSwitch tag_expr branches' mb_deflt 1 fam_sz - - else -- No, get tag from info table - let -- Note that ptr _always_ has tag 1 - -- when the family size is big enough - untagged_ptr = cmmRegOffB bndr_reg (-1) - tag_expr = getConstrTag dflags (untagged_ptr) - in emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1) + ; let !fam_sz = tyConFamilySize tycon + !bndr_reg = CmmLocal (idToReg dflags bndr) + !ptag_expr = cmmConstrTag1 dflags (CmmReg bndr_reg) + !branches' = first succ <$> branches + !maxpt = mAX_PTR_TAG dflags + (!via_ptr, !via_info) = partition ((< maxpt) . fst) branches' + !small = isSmallFamily dflags fam_sz + + -- Is the constructor tag in the node reg? + -- See Note [Tagging big families] + ; if small || null via_info + then -- Yes, bndr_reg has constructor tag in ls bits + emitSwitch ptag_expr branches' mb_deflt 1 + (if small then fam_sz else maxpt) + + else -- No, the get exact tag from info table when mAX_PTR_TAG + -- See Note [Double switching for big families] + do + let !untagged_ptr = cmmUntag dflags (CmmReg bndr_reg) + !itag_expr = getConstrTag dflags untagged_ptr + !info0 = first pred <$> via_info + if null via_ptr then + emitSwitch itag_expr info0 mb_deflt 0 (fam_sz - 1) + else do + infos_lbl <- newBlockId + infos_scp <- getTickScope + + let spillover = (maxpt, (mkBranch infos_lbl, infos_scp)) + + (mb_shared_deflt, mb_shared_branch) <- case mb_deflt of + (Just (stmts, scp)) -> + do lbl <- newBlockId + return ( Just (mkLabel lbl scp <*> stmts, scp) + , Just (mkBranch lbl, scp)) + _ -> return (Nothing, Nothing) + -- Switch on pointer tag + emitSwitch ptag_expr (spillover : via_ptr) mb_shared_deflt 1 maxpt + join_lbl <- newBlockId + emit (mkBranch join_lbl) + -- Switch on info table tag + emitLabel infos_lbl + emitSwitch itag_expr info0 mb_shared_branch + (maxpt - 1) (fam_sz - 1) + emitLabel join_lbl ; return AssignedDirectly } cgAlts _ _ _ _ = panic "cgAlts" -- UbxTupAlt and PolyAlt have only one alternative +-- Note [Double switching for big families] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- An algebraic data type can have a n >= 0 summands +-- (or alternatives), which are identified (labeled) by +-- constructors. In memory they are kept apart by tags +-- (see Note [Data constructor dynamic tags] in GHC.StgToCmm.Closure). +-- Due to the characteristics of the platform that +-- contribute to the alignment of memory objects, there +-- is a natural limit of information about constructors +-- that can be encoded in the pointer tag. When the mapping +-- of constructors to the pointer tag range 1..mAX_PTR_TAG +-- is not injective, then we have a "big data type", also +-- called a "big (constructor) family" in the literature. +-- Constructor tags residing in the info table are injective, +-- but considerably more expensive to obtain, due to additional +-- memory access(es). +-- +-- When doing case analysis on a value of a "big data type" +-- we need two nested switch statements to make up for the lack +-- of injectivity of pointer tagging, also taking the info +-- table tag into account. The exact mechanism is described next. +-- +-- In the general case, switching on big family alternatives +-- is done by two nested switch statements. According to +-- Note [Tagging big families], the outer switch +-- looks at the pointer tag and the inner dereferences the +-- pointer and switches on the info table tag. +-- +-- We can handle a simple case first, namely when none +-- of the case alternatives mention a constructor having +-- a pointer tag of 1..mAX_PTR_TAG-1. In this case we +-- simply emit a switch on the info table tag. +-- Note that the other simple case is when all mentioned +-- alternatives lie in 1..mAX_PTR_TAG-1, in which case we can +-- switch on the ptr tag only, just like in the small family case. +-- +-- There is a single intricacy with a nested switch: +-- Both should branch to the same default alternative, and as such +-- avoid duplicate codegen of potentially heavy code. The outer +-- switch generates the actual code with a prepended fresh label, +-- while the inner one only generates a jump to that label. +-- +-- For example, let's assume a 64-bit architecture, so that all +-- heap objects are 8-byte aligned, and hence the address of a +-- heap object ends in `000` (three zero bits). +-- +-- Then consider the following data type +-- +-- > data Big = T0 | T1 | T2 | T3 | T4 | T5 | T6 | T7 | T8 +-- Ptr tag: 1 2 3 4 5 6 7 7 7 +-- As bits: 001 010 011 100 101 110 111 111 111 +-- Info pointer tag (zero based): +-- 0 1 2 3 4 5 6 7 8 +-- +-- Then \case T2 -> True; T8 -> True; _ -> False +-- will result in following code (slightly cleaned-up and +-- commented -ddump-cmm-from-stg): +{- + R1 = _sqI::P64; -- scrutinee + if (R1 & 7 != 0) goto cqO; else goto cqP; + cqP: // global -- enter + call (I64[R1])(R1) returns to cqO, args: 8, res: 8, upd: 8; + cqO: // global -- already WHNF + _sqJ::P64 = R1; + _cqX::P64 = _sqJ::P64 & 7; -- extract pointer tag + switch [1 .. 7] _cqX::P64 { + case 3 : goto cqW; + case 7 : goto cqR; + default: {goto cqS;} + } + cqR: // global + _cr2 = I32[I64[_sqJ::P64 & (-8)] - 4]; -- tag from info pointer + switch [6 .. 8] _cr2::I64 { + case 8 : goto cr1; + default: {goto cr0;} + } + cr1: // global + R1 = GHC.Types.True_closure+2; + call (P64[(old + 8)])(R1) args: 8, res: 0, upd: 8; + cr0: // global -- technically necessary label + goto cqS; + cqW: // global + R1 = GHC.Types.True_closure+2; + call (P64[(old + 8)])(R1) args: 8, res: 0, upd: 8; + cqS: // global + R1 = GHC.Types.False_closure+1; + call (P64[(old + 8)])(R1) args: 8, res: 0, upd: 8; +-} +-- +-- For 32-bit systems we only have 2 tag bits in the pointers at our disposal, +-- so the performance win is dubious, especially in face of the increased code +-- size due to double switching. But we can take the viewpoint that 32-bit +-- architectures are not relevant for performance any more, so this can be +-- considered as moot. + -- Note [alg-alt heap check] -- @@ -675,6 +800,55 @@ cgAlts _ _ _ _ = panic "cgAlts" -- x = R1 -- goto L1 + +-- Note [Tagging big families] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- Both the big and the small constructor families are tagged, +-- that is, greater unions which overflow the tag space of TAG_BITS +-- (i.e. 3 on 32 resp. 7 constructors on 64 bit archs). +-- +-- For example, let's assume a 64-bit architecture, so that all +-- heap objects are 8-byte aligned, and hence the address of a +-- heap object ends in `000` (three zero bits). Then consider +-- > data Maybe a = Nothing | Just a +-- > data Day a = Mon | Tue | Wed | Thu | Fri | Sat | Sun +-- > data Grade = G1 | G2 | G3 | G4 | G5 | G6 | G7 | G8 | G9 | G10 +-- +-- Since `Grade` has more than 7 constructors, it counts as a +-- "big data type" (also referred to as "big constructor family" in papers). +-- On the other hand, `Maybe` and `Day` have 7 constructors or fewer, so they +-- are "small data types". +-- +-- Then +-- * A pointer to an unevaluated thunk of type `Maybe Int`, `Day` or `Grade` will end in `000` +-- * A tagged pointer to a `Nothing`, `Mon` or `G1` will end in `001` +-- * A tagged pointer to a `Just x`, `Tue` or `G2` will end in `010` +-- * A tagged pointer to `Wed` or `G3` will end in `011` +-- ... +-- * A tagged pointer to `Sat` or `G6` will end in `110` +-- * A tagged pointer to `Sun` or `G7` or `G8` or `G9` or `G10` will end in `111` +-- +-- For big families we employ a mildly clever way of combining pointer and +-- info-table tagging. We use 1..MAX_PTR_TAG-1 as pointer-resident tags where +-- the tags in the pointer and the info table are in a one-to-one +-- relation, whereas tag MAX_PTR_TAG is used as "spill over", signifying +-- we have to fall back and get the precise constructor tag from the +-- info-table. +-- +-- Consequently we now cascade switches, because we have to check +-- the pointer tag first, and when it is MAX_PTR_TAG, fetch the precise +-- tag from the info table, and switch on that. The only technically +-- tricky part is that the default case needs (logical) duplication. +-- To do this we emit an extra label for it and branch to that from +-- the second switch. This avoids duplicated codegen. See Trac #14373. +-- See note [Double switching for big families] for the mechanics +-- involved. +-- +-- Also see note [Data constructor dynamic tags] +-- and the wiki https://gitlab.haskell.org/ghc/ghc/wikis/commentary/rts/haskell-execution/pointer-tagging +-- + ------------------- cgAlgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [CgStgAlt] -> FCode ( Maybe CmmAGraphScoped |