summaryrefslogtreecommitdiff
path: root/compiler/GHC/StgToCmm/Expr.hs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/GHC/StgToCmm/Expr.hs')
-rw-r--r--compiler/GHC/StgToCmm/Expr.hs216
1 files changed, 195 insertions, 21 deletions
diff --git a/compiler/GHC/StgToCmm/Expr.hs b/compiler/GHC/StgToCmm/Expr.hs
index 51058c6186..07113a4e82 100644
--- a/compiler/GHC/StgToCmm/Expr.hs
+++ b/compiler/GHC/StgToCmm/Expr.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE CPP #-}
+{-# LANGUAGE CPP, BangPatterns #-}
-----------------------------------------------------------------------------
--
@@ -32,10 +32,11 @@ import StgSyn
import MkGraph
import BlockId
-import Cmm
+import Cmm hiding ( succ )
import CmmInfo
import CoreSyn
import DataCon
+import DynFlags ( mAX_PTR_TAG )
import ForeignCall
import Id
import PrimOp
@@ -48,8 +49,9 @@ import Util
import FastString
import Outputable
-import Control.Monad (unless,void)
-import Control.Arrow (first)
+import Control.Monad ( unless, void )
+import Control.Arrow ( first )
+import Data.List ( partition )
------------------------------------------------------------------------
-- cgExpr: the main function
@@ -631,29 +633,152 @@ cgAlts gc_plan bndr (AlgAlt tycon) alts
; (mb_deflt, branches) <- cgAlgAltRhss gc_plan bndr alts
- ; let fam_sz = tyConFamilySize tycon
- bndr_reg = CmmLocal (idToReg dflags bndr)
-
- -- Is the constructor tag in the node reg?
- ; if isSmallFamily dflags fam_sz
- then do
- let -- Yes, bndr_reg has constr. tag in ls bits
- tag_expr = cmmConstrTag1 dflags (CmmReg bndr_reg)
- branches' = [(tag+1,branch) | (tag,branch) <- branches]
- emitSwitch tag_expr branches' mb_deflt 1 fam_sz
-
- else -- No, get tag from info table
- let -- Note that ptr _always_ has tag 1
- -- when the family size is big enough
- untagged_ptr = cmmRegOffB bndr_reg (-1)
- tag_expr = getConstrTag dflags (untagged_ptr)
- in emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1)
+ ; let !fam_sz = tyConFamilySize tycon
+ !bndr_reg = CmmLocal (idToReg dflags bndr)
+ !ptag_expr = cmmConstrTag1 dflags (CmmReg bndr_reg)
+ !branches' = first succ <$> branches
+ !maxpt = mAX_PTR_TAG dflags
+ (!via_ptr, !via_info) = partition ((< maxpt) . fst) branches'
+ !small = isSmallFamily dflags fam_sz
+
+ -- Is the constructor tag in the node reg?
+ -- See Note [Tagging big families]
+ ; if small || null via_info
+ then -- Yes, bndr_reg has constructor tag in ls bits
+ emitSwitch ptag_expr branches' mb_deflt 1
+ (if small then fam_sz else maxpt)
+
+ else -- No, the get exact tag from info table when mAX_PTR_TAG
+ -- See Note [Double switching for big families]
+ do
+ let !untagged_ptr = cmmUntag dflags (CmmReg bndr_reg)
+ !itag_expr = getConstrTag dflags untagged_ptr
+ !info0 = first pred <$> via_info
+ if null via_ptr then
+ emitSwitch itag_expr info0 mb_deflt 0 (fam_sz - 1)
+ else do
+ infos_lbl <- newBlockId
+ infos_scp <- getTickScope
+
+ let spillover = (maxpt, (mkBranch infos_lbl, infos_scp))
+
+ (mb_shared_deflt, mb_shared_branch) <- case mb_deflt of
+ (Just (stmts, scp)) ->
+ do lbl <- newBlockId
+ return ( Just (mkLabel lbl scp <*> stmts, scp)
+ , Just (mkBranch lbl, scp))
+ _ -> return (Nothing, Nothing)
+ -- Switch on pointer tag
+ emitSwitch ptag_expr (spillover : via_ptr) mb_shared_deflt 1 maxpt
+ join_lbl <- newBlockId
+ emit (mkBranch join_lbl)
+ -- Switch on info table tag
+ emitLabel infos_lbl
+ emitSwitch itag_expr info0 mb_shared_branch
+ (maxpt - 1) (fam_sz - 1)
+ emitLabel join_lbl
; return AssignedDirectly }
cgAlts _ _ _ _ = panic "cgAlts"
-- UbxTupAlt and PolyAlt have only one alternative
+-- Note [Double switching for big families]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- An algebraic data type can have a n >= 0 summands
+-- (or alternatives), which are identified (labeled) by
+-- constructors. In memory they are kept apart by tags
+-- (see Note [Data constructor dynamic tags] in GHC.StgToCmm.Closure).
+-- Due to the characteristics of the platform that
+-- contribute to the alignment of memory objects, there
+-- is a natural limit of information about constructors
+-- that can be encoded in the pointer tag. When the mapping
+-- of constructors to the pointer tag range 1..mAX_PTR_TAG
+-- is not injective, then we have a "big data type", also
+-- called a "big (constructor) family" in the literature.
+-- Constructor tags residing in the info table are injective,
+-- but considerably more expensive to obtain, due to additional
+-- memory access(es).
+--
+-- When doing case analysis on a value of a "big data type"
+-- we need two nested switch statements to make up for the lack
+-- of injectivity of pointer tagging, also taking the info
+-- table tag into account. The exact mechanism is described next.
+--
+-- In the general case, switching on big family alternatives
+-- is done by two nested switch statements. According to
+-- Note [Tagging big families], the outer switch
+-- looks at the pointer tag and the inner dereferences the
+-- pointer and switches on the info table tag.
+--
+-- We can handle a simple case first, namely when none
+-- of the case alternatives mention a constructor having
+-- a pointer tag of 1..mAX_PTR_TAG-1. In this case we
+-- simply emit a switch on the info table tag.
+-- Note that the other simple case is when all mentioned
+-- alternatives lie in 1..mAX_PTR_TAG-1, in which case we can
+-- switch on the ptr tag only, just like in the small family case.
+--
+-- There is a single intricacy with a nested switch:
+-- Both should branch to the same default alternative, and as such
+-- avoid duplicate codegen of potentially heavy code. The outer
+-- switch generates the actual code with a prepended fresh label,
+-- while the inner one only generates a jump to that label.
+--
+-- For example, let's assume a 64-bit architecture, so that all
+-- heap objects are 8-byte aligned, and hence the address of a
+-- heap object ends in `000` (three zero bits).
+--
+-- Then consider the following data type
+--
+-- > data Big = T0 | T1 | T2 | T3 | T4 | T5 | T6 | T7 | T8
+-- Ptr tag: 1 2 3 4 5 6 7 7 7
+-- As bits: 001 010 011 100 101 110 111 111 111
+-- Info pointer tag (zero based):
+-- 0 1 2 3 4 5 6 7 8
+--
+-- Then \case T2 -> True; T8 -> True; _ -> False
+-- will result in following code (slightly cleaned-up and
+-- commented -ddump-cmm-from-stg):
+{-
+ R1 = _sqI::P64; -- scrutinee
+ if (R1 & 7 != 0) goto cqO; else goto cqP;
+ cqP: // global -- enter
+ call (I64[R1])(R1) returns to cqO, args: 8, res: 8, upd: 8;
+ cqO: // global -- already WHNF
+ _sqJ::P64 = R1;
+ _cqX::P64 = _sqJ::P64 & 7; -- extract pointer tag
+ switch [1 .. 7] _cqX::P64 {
+ case 3 : goto cqW;
+ case 7 : goto cqR;
+ default: {goto cqS;}
+ }
+ cqR: // global
+ _cr2 = I32[I64[_sqJ::P64 & (-8)] - 4]; -- tag from info pointer
+ switch [6 .. 8] _cr2::I64 {
+ case 8 : goto cr1;
+ default: {goto cr0;}
+ }
+ cr1: // global
+ R1 = GHC.Types.True_closure+2;
+ call (P64[(old + 8)])(R1) args: 8, res: 0, upd: 8;
+ cr0: // global -- technically necessary label
+ goto cqS;
+ cqW: // global
+ R1 = GHC.Types.True_closure+2;
+ call (P64[(old + 8)])(R1) args: 8, res: 0, upd: 8;
+ cqS: // global
+ R1 = GHC.Types.False_closure+1;
+ call (P64[(old + 8)])(R1) args: 8, res: 0, upd: 8;
+-}
+--
+-- For 32-bit systems we only have 2 tag bits in the pointers at our disposal,
+-- so the performance win is dubious, especially in face of the increased code
+-- size due to double switching. But we can take the viewpoint that 32-bit
+-- architectures are not relevant for performance any more, so this can be
+-- considered as moot.
+
-- Note [alg-alt heap check]
--
@@ -675,6 +800,55 @@ cgAlts _ _ _ _ = panic "cgAlts"
-- x = R1
-- goto L1
+
+-- Note [Tagging big families]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- Both the big and the small constructor families are tagged,
+-- that is, greater unions which overflow the tag space of TAG_BITS
+-- (i.e. 3 on 32 resp. 7 constructors on 64 bit archs).
+--
+-- For example, let's assume a 64-bit architecture, so that all
+-- heap objects are 8-byte aligned, and hence the address of a
+-- heap object ends in `000` (three zero bits). Then consider
+-- > data Maybe a = Nothing | Just a
+-- > data Day a = Mon | Tue | Wed | Thu | Fri | Sat | Sun
+-- > data Grade = G1 | G2 | G3 | G4 | G5 | G6 | G7 | G8 | G9 | G10
+--
+-- Since `Grade` has more than 7 constructors, it counts as a
+-- "big data type" (also referred to as "big constructor family" in papers).
+-- On the other hand, `Maybe` and `Day` have 7 constructors or fewer, so they
+-- are "small data types".
+--
+-- Then
+-- * A pointer to an unevaluated thunk of type `Maybe Int`, `Day` or `Grade` will end in `000`
+-- * A tagged pointer to a `Nothing`, `Mon` or `G1` will end in `001`
+-- * A tagged pointer to a `Just x`, `Tue` or `G2` will end in `010`
+-- * A tagged pointer to `Wed` or `G3` will end in `011`
+-- ...
+-- * A tagged pointer to `Sat` or `G6` will end in `110`
+-- * A tagged pointer to `Sun` or `G7` or `G8` or `G9` or `G10` will end in `111`
+--
+-- For big families we employ a mildly clever way of combining pointer and
+-- info-table tagging. We use 1..MAX_PTR_TAG-1 as pointer-resident tags where
+-- the tags in the pointer and the info table are in a one-to-one
+-- relation, whereas tag MAX_PTR_TAG is used as "spill over", signifying
+-- we have to fall back and get the precise constructor tag from the
+-- info-table.
+--
+-- Consequently we now cascade switches, because we have to check
+-- the pointer tag first, and when it is MAX_PTR_TAG, fetch the precise
+-- tag from the info table, and switch on that. The only technically
+-- tricky part is that the default case needs (logical) duplication.
+-- To do this we emit an extra label for it and branch to that from
+-- the second switch. This avoids duplicated codegen. See Trac #14373.
+-- See note [Double switching for big families] for the mechanics
+-- involved.
+--
+-- Also see note [Data constructor dynamic tags]
+-- and the wiki https://gitlab.haskell.org/ghc/ghc/wikis/commentary/rts/haskell-execution/pointer-tagging
+--
+
-------------------
cgAlgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [CgStgAlt]
-> FCode ( Maybe CmmAGraphScoped