diff options
Diffstat (limited to 'compiler/GHC/Wasm')
-rw-r--r-- | compiler/GHC/Wasm/ControlFlow.hs | 152 | ||||
-rw-r--r-- | compiler/GHC/Wasm/ControlFlow/FromCmm.hs | 354 | ||||
-rw-r--r-- | compiler/GHC/Wasm/ControlFlow/ToAsm.hs | 98 |
3 files changed, 604 insertions, 0 deletions
diff --git a/compiler/GHC/Wasm/ControlFlow.hs b/compiler/GHC/Wasm/ControlFlow.hs new file mode 100644 index 0000000000..2ef025574d --- /dev/null +++ b/compiler/GHC/Wasm/ControlFlow.hs @@ -0,0 +1,152 @@ +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE DataKinds, GADTs, RankNTypes, TypeOperators, KindSignatures #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE PatternSynonyms #-} + +module GHC.Wasm.ControlFlow + ( WasmControl(..), (<>), pattern WasmIf, wasmReturn + , BrTableInterval(..), inclusiveInterval + , brTableLimit + + , WasmType(..), WasmTypeTag(..) + , TypeList(..) + , WasmFunctionType(..) + ) +where + +import GHC.Prelude + +import Data.Kind + +import GHC.Utils.Outputable hiding ((<>)) +import GHC.Utils.Panic + +{-| +Module : GHC.Wasm.ControlFlow +Description : Representation of control-flow portion of the WebAssembly instruction set +-} + +-- | WebAssembly type of a WebAssembly value that WebAssembly code +-- could either expect on the evaluation stack or leave on the evaluation +-- stack. At present we support only 32-bit values. + +data WasmType = I32 | F32 + deriving (Eq, Show) + + +-- | Singleton type useful for programming with `WasmType` at the type level. + +data WasmTypeTag :: WasmType -> Type where + TagI32 :: WasmTypeTag 'I32 + TagF32 :: WasmTypeTag 'F32 + +-- | List of WebAssembly types used to describe the sequence of WebAssembly +-- values that a block of code may expect on the stack or leave on the stack. + +data TypeList :: [WasmType] -> Type where + TypeListNil :: TypeList '[] + TypeListCons :: WasmTypeTag t -> TypeList ts -> TypeList (t : ts) + +-- | The type of a WebAssembly function, loop, block, or conditional. +-- This type says what values the code expects to pop off the stack and +-- what values it promises to push. The WebAssembly standard requires +-- that this type appear explicitly in the code. + +data WasmFunctionType pre post = + WasmFunctionType { ft_pops :: TypeList pre + , ft_pushes :: TypeList post + } + + +-- | Representation of WebAssembly control flow. +-- Normally written as +-- @ +-- WasmControl s e pre post +-- @ +-- Type parameter `s` is the type of (unspecified) statements. +-- It might be instantiated with an open Cmm block or with a sequence +-- of Wasm instructions. +-- Parameter `e` is the type of expressions. +-- Parameter `pre` represents the values that are expected on the +-- WebAssembly stack when the code runs, and `post` represents +-- the state of the stack on completion. + +data WasmControl :: Type -> Type -> [WasmType] -> [WasmType] -> Type where + + WasmPush :: WasmTypeTag t -> e -> WasmControl s e stack (t ': stack) + + WasmBlock :: WasmFunctionType pre post + -> WasmControl s e pre post + -> WasmControl s e pre post + WasmLoop :: WasmFunctionType pre post + -> WasmControl s e pre post + -> WasmControl s e pre post + WasmIfTop :: WasmFunctionType pre post + -> WasmControl s e pre post + -> WasmControl s e pre post + -> WasmControl s e ('I32 ': pre) post + + WasmBr :: Int -> WasmControl s e dropped destination -- not typechecked + WasmFallthrough :: WasmControl s e dropped destination + -- generates no code, but has the same type as a branch + + WasmBrTable :: e + -> BrTableInterval -- for testing + -> [Int] -- targets + -> Int -- default target + -> WasmControl s e dropped destination + -- invariant: the table interval is contained + -- within [0 .. pred (length targets)] + WasmReturnTop :: WasmTypeTag t + -> WasmControl s e (t ': t1star) t2star -- as per type system + + WasmActions :: s -> WasmControl s e stack stack -- basic block: one entry, one exit + WasmSeq :: WasmControl s e pre mid -> WasmControl s e mid post -> WasmControl s e pre post + +data BrTableInterval + = BrTableInterval { bti_lo :: Integer, bti_count :: Integer } + deriving (Show) + +instance Outputable BrTableInterval where + ppr range = brackets $ hcat[integer (bti_lo range), text "..", integer hi] + where hi = bti_lo range + bti_count range - 1 + +brTableLimit :: Int + -- ^ Size of the largest table that is deemed acceptable in a `br_table` instruction. + -- + -- Source: https://chromium.googlesource.com/v8/v8/+/master/src/wasm/wasm-limits.h#51 + -- See also discussion at https://github.com/WebAssembly/spec/issues/607, which shows + -- that major browsers agree. +brTableLimit = 65520 + +inclusiveInterval :: Integer -> Integer -> BrTableInterval +inclusiveInterval lo hi + | lo <= hi = let count = hi - lo + 1 + in if count > toInteger brTableLimit then + panic "interval too large in br_table instruction" + else + BrTableInterval lo count + | otherwise = panic "GHC.Wasm.ControlFlow: empty interval" + +(<>) :: forall s e pre mid post + . WasmControl s e pre mid + -> WasmControl s e mid post + -> WasmControl s e pre post +(<>) = WasmSeq +-- N.B. Fallthrough can't be optimized away because of type checking. + + + +-- Syntactic sugar. +pattern WasmIf :: WasmFunctionType pre post + -> e + -> WasmControl s e pre post + -> WasmControl s e pre post + -> WasmControl s e pre post + +pattern WasmIf ty e t f = + WasmPush TagI32 e `WasmSeq` WasmIfTop ty t f + +-- More syntactic sugar. +wasmReturn :: WasmTypeTag t -> e -> WasmControl s e (t ': t1star) t2star +wasmReturn tag e = WasmPush tag e `WasmSeq` WasmReturnTop tag diff --git a/compiler/GHC/Wasm/ControlFlow/FromCmm.hs b/compiler/GHC/Wasm/ControlFlow/FromCmm.hs new file mode 100644 index 0000000000..741ab35560 --- /dev/null +++ b/compiler/GHC/Wasm/ControlFlow/FromCmm.hs @@ -0,0 +1,354 @@ +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE DataKinds #-} + +module GHC.Wasm.ControlFlow.FromCmm + ( structuredControl + ) +where + +import GHC.Prelude hiding (succ) + +import Data.Function +import Data.List (sortBy) +import qualified Data.Tree as Tree + +import GHC.Cmm +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Collections +import GHC.Cmm.Dominators +import GHC.Cmm.Dataflow.Graph +import GHC.Cmm.Dataflow.Label +import GHC.Cmm.Switch + +import GHC.Platform + +import GHC.Utils.Misc +import GHC.Utils.Panic +import GHC.Utils.Outputable ( Outputable, text, (<+>), ppr + , pprWithCommas + , showSDocUnsafe + ) + +import GHC.Wasm.ControlFlow + + +{-| +Module : GHC.Wasm.ControlFlow.FromCmm +Description : Translation of (reducible) Cmm control flow to WebAssembly + +Code in this module can translate any _reducible_ Cmm control-flow +graph to the structured control flow that is required by WebAssembly. +The algorithm is subtle and is described in detail in a draft paper +to be found at https://www.cs.tufts.edu/~nr/pubs/relooper.pdf. +-} + +--------------------- Abstraction of Cmm control flow ----------------------- + +-- | Abstracts the kind of control flow we understand how to convert. +-- A block can be left in one of four ways: +-- +-- * Unconditionally +-- +-- * Conditionally on a predicate of type `e` +-- +-- * To a location determined by the value of a scrutinee of type `e` +-- +-- * Not at all. + +data ControlFlow e = Unconditional Label + | Conditional e Label Label + | Switch { _scrutinee :: e + , _range :: BrTableInterval + , _targets :: [Maybe Label] -- from 0 + , _defaultTarget :: Maybe Label + } + | TailCall e + +flowLeaving :: Platform -> CmmBlock -> ControlFlow CmmExpr +flowLeaving platform b = + case lastNode b of + CmmBranch l -> Unconditional l + CmmCondBranch c t f _ -> Conditional c t f + CmmSwitch e targets -> + let (offset, target_labels) = switchTargetsToTable targets + (lo, hi) = switchTargetsRange targets + default_label = switchTargetsDefault targets + scrutinee = smartPlus platform e offset + range = inclusiveInterval (lo+toInteger offset) (hi+toInteger offset) + in Switch scrutinee range (atMost brTableLimit target_labels) default_label + + CmmCall { cml_cont = Just l } -> Unconditional l + CmmCall { cml_cont = Nothing, cml_target = e } -> TailCall e + CmmForeignCall { succ = l } -> Unconditional l + + where atMost :: Int -> [a] -> [a] + atMost k xs = if xs `hasAtLeast` k then + panic "switch table is too big for WebAssembly" + else + xs + + hasAtLeast :: [a] -> Int -> Bool + hasAtLeast _ 0 = True + hasAtLeast [] _ = False + hasAtLeast (_:xs) k = hasAtLeast xs (k - 1) + + +----------------------- Evaluation contexts ------------------------------ + +-- | The syntactic constructs in which Wasm code may be contained. +-- A list of these constructs represents an evaluation context, +-- which is used to determined what level of `br` instruction +-- reaches a given label. + +data ContainingSyntax + = BlockFollowedBy Label + | LoopHeadedBy Label + | IfThenElse (Maybe Label) -- ^ Carries the label that follows `if...end`, if any + +matchesFrame :: Label -> ContainingSyntax -> Bool +matchesFrame label (BlockFollowedBy l) = label == l +matchesFrame label (LoopHeadedBy l) = label == l +matchesFrame label (IfThenElse (Just l)) = label == l +matchesFrame _ _ = False + +data Context = Context { enclosing :: [ContainingSyntax] + , fallthrough :: Maybe Label -- the label can + -- be reached just by "falling through" + -- the hole + } + +instance Outputable Context where + ppr c | Just l <- fallthrough c = + pprWithCommas ppr (enclosing c) <+> text "fallthrough to" <+> ppr l + | otherwise = pprWithCommas ppr (enclosing c) + +emptyContext :: Context +emptyContext = Context [] Nothing + +inside :: ContainingSyntax -> Context -> Context +withFallthrough :: Context -> Label -> Context + +inside frame c = c { enclosing = frame : enclosing c } +withFallthrough c l = c { fallthrough = Just l } + +type CmmActions = Block CmmNode O O +type CfgNode = CmmBlock + +type FT pre post = WasmFunctionType pre post + +returns :: FT '[] '[ 'I32] +doesn'tReturn :: FT '[] '[] + +returns = WasmFunctionType TypeListNil (TypeListCons TagI32 TypeListNil) +doesn'tReturn = WasmFunctionType TypeListNil TypeListNil + +emptyPost :: FT pre post -> Bool +emptyPost (WasmFunctionType _ TypeListNil) = True +emptyPost _ = False + +----------------------- Translation ------------------------------ + +-- | Convert a Cmm CFG to WebAssembly's structured control flow. + +structuredControl :: forall expr stmt . + Platform -- ^ needed for offset calculation + -> (Label -> CmmExpr -> expr) -- ^ translator for expressions + -> (Label -> CmmActions -> stmt) -- ^ translator for straight-line code + -> CmmGraph -- ^ CFG to be translated + -> WasmControl stmt expr '[] '[ 'I32] +structuredControl platform txExpr txBlock g = + doTree returns dominatorTree emptyContext + where + gwd :: GraphWithDominators CmmNode + gwd = graphWithDominators g + + dominatorTree :: Tree.Tree CfgNode-- Dominator tree in which children are sorted + -- with highest reverse-postorder number first + dominatorTree = fmap blockLabeled $ sortTree $ gwdDominatorTree gwd + + doTree :: FT '[] post -> Tree.Tree CfgNode -> Context -> WasmControl stmt expr '[] post + nodeWithin :: forall post . + FT '[] post -> CfgNode -> [Tree.Tree CfgNode] -> Maybe Label + -> Context -> WasmControl stmt expr '[] post + doBranch :: FT '[] post -> Label -> Label -> Context -> WasmControl stmt expr '[] post + + doTree fty (Tree.Node x children) context = + let codeForX = nodeWithin fty x selectedChildren Nothing + in if isLoopHeader x then + WasmLoop fty (codeForX loopContext) + else + codeForX context + where selectedChildren = case lastNode x of + CmmSwitch {} -> children + -- N.B. Unlike `if`, translation of Switch uses only labels. + _ -> filter hasMergeRoot children + loopContext = LoopHeadedBy (entryLabel x) `inside` + (context `withFallthrough` entryLabel x) + hasMergeRoot = isMergeNode . Tree.rootLabel + + nodeWithin fty x (y_n:ys) (Just zlabel) context = + WasmBlock fty $ nodeWithin fty x (y_n:ys) Nothing context' + where context' = BlockFollowedBy zlabel `inside` context + nodeWithin fty x (y_n:ys) Nothing context = + nodeWithin doesn'tReturn x ys (Just ylabel) (context `withFallthrough` ylabel) <> + doTree fty y_n context + where ylabel = treeEntryLabel y_n + nodeWithin fty x [] (Just zlabel) context + | not (generatesIf x) = + WasmBlock fty (nodeWithin fty x [] Nothing context') + where context' = BlockFollowedBy zlabel `inside` context + nodeWithin fty x [] maybeMarks context = + translationOfX context + where xlabel = entryLabel x + + translationOfX :: Context -> WasmControl stmt expr '[] post + translationOfX context = + WasmActions (txBlock xlabel $ nodeBody x) <> + case flowLeaving platform x of + Unconditional l -> doBranch fty xlabel l context + Conditional e t f -> + WasmIf fty + (txExpr xlabel e) + (doBranch fty xlabel t (IfThenElse maybeMarks `inside` context)) + (doBranch fty xlabel f (IfThenElse maybeMarks `inside` context)) + TailCall e -> WasmPush TagI32 (txExpr xlabel e) <> WasmReturnTop TagI32 + Switch e range targets default' -> + WasmBrTable (txExpr xlabel e) + range + (map switchIndex targets) + (switchIndex default') + where switchIndex :: Maybe Label -> Int + switchIndex Nothing = 0 -- arbitrary; GHC won't go here + switchIndex (Just lbl) = index lbl (enclosing context) + + doBranch fty from to context + | to `elem` fallthrough context && emptyPost fty = WasmFallthrough + -- optimization: `br` is not needed, but it typechecks + -- only if nothing is expected to be left on the stack + + | isBackward from to = WasmBr i -- continue + | isMergeLabel to = WasmBr i -- exit + | otherwise = doTree fty (subtreeAt to) context -- inline the code here + where i = index to (enclosing context) + + generatesIf :: CmmBlock -> Bool + generatesIf x = case flowLeaving platform x of Conditional {} -> True + _ -> False + + ---- everything else is utility functions + + treeEntryLabel :: Tree.Tree CfgNode -> Label + treeEntryLabel = entryLabel . Tree.rootLabel + + sortTree :: Tree.Tree Label -> Tree.Tree Label + -- Sort highest rpnum first + sortTree (Tree.Node label children) = + Tree.Node label $ sortBy (flip compare `on` (rpnum . Tree.rootLabel)) $ + map sortTree children + + subtreeAt :: Label -> Tree.Tree CfgNode + blockLabeled :: Label -> CfgNode + rpnum :: Label -> RPNum-- reverse postorder number of the labeled block + isMergeLabel :: Label -> Bool + isMergeNode :: CfgNode -> Bool + isLoopHeader :: CfgNode -> Bool-- identify loop headers + -- all nodes whose immediate dominator is the given block. + -- They are produced with the largest RP number first, + -- so the largest RP number is pushed on the context first. + dominates :: Label -> Label -> Bool + -- Domination relation (not just immediate domination) + + blockmap :: LabelMap CfgNode + GMany NothingO blockmap NothingO = g_graph g + + blockLabeled l = findLabelIn l blockmap + + rpblocks :: [CfgNode] + rpblocks = revPostorderFrom blockmap (g_entry g) + + foldEdges :: forall a . (Label -> Label -> a -> a) -> a -> a + foldEdges f a = + foldl (\a (from, to) -> f from to a) + a + [(entryLabel from, to) | from <- rpblocks, to <- successors from] + + isMergeLabel l = setMember l mergeBlockLabels + isMergeNode = isMergeLabel . entryLabel + + isBackward :: Label -> Label -> Bool + isBackward from to = rpnum to <= rpnum from -- self-edge counts as a backward edge + + subtreeAt label = findLabelIn label subtrees + subtrees :: LabelMap (Tree.Tree CfgNode) + subtrees = addSubtree mapEmpty dominatorTree + where addSubtree map t@(Tree.Node root children) = + foldl addSubtree (mapInsert (entryLabel root) t map) children + + mergeBlockLabels :: LabelSet + -- N.B. A block is a merge node if it is where control flow merges. + -- That means it is entered by multiple control-flow edges, _except_ + -- back edges don't count. There must be multiple paths that enter the + -- block _without_ passing through the block itself. + mergeBlockLabels = + setFromList [entryLabel n | n <- rpblocks, big (forwardPreds (entryLabel n))] + where big [] = False + big [_] = False + big (_ : _ : _) = True + + forwardPreds :: Label -> [Label] -- reachable predecessors of reachable blocks, + -- via forward edges only + forwardPreds = \l -> mapFindWithDefault [] l predmap + where predmap :: LabelMap [Label] + predmap = foldEdges addForwardEdge mapEmpty + addForwardEdge from to pm + | isBackward from to = pm + | otherwise = addToList (from :) to pm + + isLoopHeader = isHeaderLabel . entryLabel + isHeaderLabel = (`setMember` headers) -- loop headers + where headers :: LabelSet + headers = foldMap headersPointedTo blockmap + headersPointedTo block = + setFromList [label | label <- successors block, + dominates label (entryLabel block)] + + index :: Label -> [ContainingSyntax] -> Int + index _ [] = panic "destination label not in evaluation context" + index label (frame : context) + | label `matchesFrame` frame = 0 + | otherwise = 1 + index label context + + rpnum = gwdRPNumber gwd + dominates lbl blockname = + lbl == blockname || dominatorsMember lbl (gwdDominatorsOf gwd blockname) + + + +nodeBody :: CfgNode -> CmmActions +nodeBody (BlockCC _first middle _last) = middle + + +smartPlus :: Platform -> CmmExpr -> Int -> CmmExpr +smartPlus _ e 0 = e +smartPlus platform e k = + CmmMachOp (MO_Add width) [e, CmmLit (CmmInt (fromIntegral k) width)] + where width = cmmExprWidth platform e + +addToList :: (IsMap map) => ([a] -> [a]) -> KeyOf map -> map [a] -> map [a] +addToList consx = mapAlter add + where add Nothing = Just (consx []) + add (Just xs) = Just (consx xs) + +------------------------------------------------------------------ +--- everything below here is for diagnostics in case of panic + +instance Outputable ContainingSyntax where + ppr (BlockFollowedBy l) = text "node" <+> ppr l + ppr (LoopHeadedBy l) = text "loop" <+> ppr l + ppr (IfThenElse l) = text "if-then-else" <+> ppr l + +findLabelIn :: HasDebugCallStack => Label -> LabelMap a -> a +findLabelIn lbl = mapFindWithDefault failed lbl + where failed = + panic $ "label " ++ showSDocUnsafe (ppr lbl) ++ " not found in control-flow graph" diff --git a/compiler/GHC/Wasm/ControlFlow/ToAsm.hs b/compiler/GHC/Wasm/ControlFlow/ToAsm.hs new file mode 100644 index 0000000000..fbf387753a --- /dev/null +++ b/compiler/GHC/Wasm/ControlFlow/ToAsm.hs @@ -0,0 +1,98 @@ +{-# LANGUAGE GADTs #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} + +module GHC.Wasm.ControlFlow.ToAsm + ( toIndentedAsm + , noIndentation + ) +where + +{-| +Module : GHC.Wasm.ControlFlow.ToAsm +Description : Convert WebAssembly control-flow instructions to GNU assembler syntax. +-} + +import GHC.Prelude + +import Data.ByteString.Builder (Builder) +import qualified Data.ByteString.Builder as BS +import Data.List (intersperse) +import Data.Monoid + +import GHC.Utils.Panic + +import GHC.Wasm.ControlFlow hiding ((<>)) + +type Indentation = Builder + +standardIndentation :: Indentation +standardIndentation = " " + +noIndentation :: Indentation +noIndentation = "" + + +-- | Assuming that the type of a construct can be rendered as inline +-- syntax, return the syntax. For every type our translator +-- generates, the assumption should hold. +wasmFunctionType :: WasmFunctionType pre post -> Builder +wasmFunctionType (WasmFunctionType TypeListNil TypeListNil) = "void" +wasmFunctionType (WasmFunctionType TypeListNil (TypeListCons t TypeListNil)) = tagBuilder t +wasmFunctionType _ = panic "function type needs to be externalized" + -- Anything other then [] -> [], [] -> [t] needs to be put into a + -- type table and referred to by number. + +-- | Tag used in GNU assembly to name a WebAssembly type +tagBuilder :: WasmTypeTag a -> Builder +tagBuilder TagI32 = "i32" +tagBuilder TagF32 = "f32" + + +type Printer a = Indentation -> a -> Builder + +-- | Converts WebAssembly control-flow code into GNU (Clang) assembly +-- syntax, indented for readability. For ease of combining with other +-- output, the result does not have a trailing newline or preceding +-- indentation. (The indentation argument simply gives the blank +-- string that follows each emitted newline.) +-- +-- The initial `Indentation` argument specifies the indentation of the +-- entire output; for most use cases it will likely be `mempty`. + +toIndentedAsm :: forall s e pre post + . Printer s -> Printer e -> Printer (WasmControl s e pre post) +toIndentedAsm ps pe indent s = print s + where print, shift :: WasmControl s e pre' post' -> Builder + newline :: Builder -> Builder -> Builder + (<+>) :: Builder -> Builder -> Builder + ty = wasmFunctionType + + -- cases meant to avoid generating any output for `WasmFallthrough` + print (WasmFallthrough `WasmSeq` s) = print s + print (s `WasmSeq` WasmFallthrough) = print s + print (WasmIfTop t s WasmFallthrough) = + "if" <+> ty t `newline` shift s `newline` "end_if" + print (WasmIfTop t WasmFallthrough s) = + "if" <+> ty t `newline` "else" `newline` shift s `newline` "end_if" + + -- all the other cases + print (WasmPush _ e) = pe indent e + print (WasmBlock t s) = "block" <+> ty t `newline` shift s `newline` "end_block" + print (WasmLoop t s) = "loop" <+> ty t `newline` shift s `newline` "end_loop" + print (WasmIfTop t ts fs) = "if" <+> ty t `newline` shift ts `newline` + "else" `newline` shift fs `newline` "end_if" + print (WasmBr l) = "br" <+> BS.intDec l + print (WasmBrTable e _ ts t) = + pe indent e `newline` "br_table {" <+> + mconcat (intersperse ", " [BS.intDec i | i <- ts <> [t]]) <+> + "}" + print (WasmReturnTop _) = "return" + print (WasmActions as) = ps indent as + print (s `WasmSeq` s') = print s `newline` print s' + + print WasmFallthrough = "// fallthrough" -- rare + + newline s s' = s <> "\n" <> indent <> s' + shift s = standardIndentation <> toIndentedAsm ps pe (indent <> standardIndentation) s + s <+> s' = s <> " " <> s' |