add new modules for reducibility and WebAssembly translationwip/nr/wasm-control-flow

also includes an emitter for GNU assembler code and some regression tests
author: Norman Ramsey <Norman.Ramsey@tweag.io> 2022-05-17 15:07:05 -0400
committer: Norman Ramsey <Norman.Ramsey@tweag.io> 2022-08-09 16:36:50 -0400
commit: a8a3a8785fa1a58436bcf6460c2f3bcc8412cd53 (patch)
tree: dc62e83e947c3b94800c533177aa2ce5503c9983 /compiler/GHC
parent: 56d3201996ccd9e858267dad7b6af577f3a71e56 (diff)
download: haskell-wip/nr/wasm-control-flow.tar.gz
5 files changed, 1097 insertions, 0 deletions
diff --git a/compiler/GHC/Cmm/Reducibility.hs b/compiler/GHC/Cmm/Reducibility.hs
new file mode 100644
index 0000000000..82a6616f0f
--- /dev/null
+++ b/compiler/GHC/Cmm/Reducibility.hs
@@ -0,0 +1,229 @@
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+
+
+module GHC.Cmm.Reducibility
+  ( Reducibility(..)
+  , reducibility
+
+  , asReducible
+  )
+where
+
+import GHC.Prelude hiding (splitAt, succ)
+
+import Control.Monad
+import Data.List (nub)
+import Data.Maybe
+import Data.Semigroup
+import qualified Data.Sequence as Seq
+
+import GHC.Cmm
+import GHC.Cmm.BlockId
+import GHC.Cmm.Dataflow
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dominators
+import GHC.Cmm.Dataflow.Graph hiding (addBlock)
+import GHC.Cmm.Dataflow.Label
+import GHC.Data.Graph.Collapse
+import GHC.Data.Graph.Inductive.Graph
+import GHC.Data.Graph.Inductive.PatriciaTree
+import GHC.Types.Unique.Supply
+import GHC.Utils.Panic
+
+{-|
+Module      : GHC.Cmm.Reducibility
+Description : Tell if a `CmmGraph` is reducible, or make it so
+
+Test a Cmm control-flow graph for reducibility.  And provide a
+function that, when given an arbitrary control-flow graph, returns an
+equivalent, reducible control-flow graph.  The equivalent graph is
+obtained by "splitting" (copying) nodes of the original graph.
+The resulting equivalent graph has the same dynamic behavior as the
+original, but it is larger.
+
+Documentation uses the language of control-flow analysis, in which a
+basic block is called a "node."  These "nodes" are `CmmBlock`s or
+equivalent; they have nothing to do with a `CmmNode`.
+
+For more on reducibility and related analyses and algorithms, see
+Note [Reducibility resources]
+-}
+
+
+
+
+-- | Represents the result of a reducibility analysis.
+data Reducibility = Reducible | Irreducible
+  deriving (Eq, Show)
+
+-- | Given a graph, say whether the graph is reducible.  The graph must
+-- be bundled with a dominator analysis and a reverse postorder
+-- numbering, as these results are needed to perform the test.
+
+reducibility :: NonLocal node
+             => GraphWithDominators node
+             -> Reducibility
+reducibility gwd =
+    if all goodBlock blockmap then Reducible else Irreducible
+  where goodBlock b = all (goodEdge (entryLabel b)) (successors b)
+        goodEdge from to = rpnum to > rpnum from || to `dominates` from
+        rpnum = gwdRPNumber gwd
+        blockmap = graphMap $ gwd_graph gwd
+        dominators = gwdDominatorsOf gwd
+        dominates lbl blockname =
+            lbl == blockname || dominatorsMember lbl (dominators blockname)
+
+-- | Given a graph, return an equivalent reducible graph, by
+-- "splitting" (copying) nodes if necessary.  The input
+-- graph must be bundled with a dominator analysis and a reverse
+-- postorder numbering.  The computation is monadic because when a
+-- node is split, the new copy needs a fresh label.
+--
+-- Use this function whenever a downstream algorithm needs a reducible
+-- control-flow graph.
+
+asReducible :: GraphWithDominators CmmNode
+            -> UniqSM (GraphWithDominators CmmNode)
+asReducible gwd = case reducibility gwd of
+                    Reducible -> return gwd
+                    Irreducible -> assertReducible <$> nodeSplit gwd
+
+assertReducible :: GraphWithDominators CmmNode -> GraphWithDominators CmmNode
+assertReducible gwd = case reducibility gwd of
+                        Reducible -> gwd
+                        Irreducible -> panic "result not reducible"
+
+----------------------------------------------------------------
+
+-- | Split one or more nodes of the given graph, which must be
+-- irreducible.
+
+nodeSplit :: GraphWithDominators CmmNode
+          -> UniqSM (GraphWithDominators CmmNode)
+nodeSplit gwd =
+    graphWithDominators <$> inflate (g_entry g) <$> runNullCollapse collapsed
+  where g = gwd_graph gwd
+        collapsed :: NullCollapseViz (Gr CmmSuper ())
+        collapsed = collapseInductiveGraph (cgraphOfCmm g)
+
+type CGraph = Gr CmmSuper ()
+
+-- | Turn a collapsed supernode back into a control-flow graph
+inflate :: Label -> CGraph -> CmmGraph
+inflate entry cg = CmmGraph entry graph
+  where graph = GMany NothingO body NothingO
+        body :: LabelMap CmmBlock
+        body = foldl (\map block -> mapInsert (entryLabel block) block map) mapEmpty $
+               blocks super
+        super = case labNodes cg of
+                  [(_, s)] -> s
+                  _ -> panic "graph given to `inflate` is not singleton"
+
+
+-- | Convert a `CmmGraph` into an inductive graph.
+-- (The function coalesces duplicate edges into a single edge.)
+cgraphOfCmm :: CmmGraph -> CGraph
+cgraphOfCmm g = foldl' addSuccEdges (mkGraph cnodes []) blocks
+   where blocks = zip [0..] $ revPostorderFrom (graphMap g) (g_entry g)
+         cnodes = [(k, super block) | (k, block) <- blocks]
+          where super block = Nodes (entryLabel block) (Seq.singleton block)
+         labelNumber = \lbl -> fromJust $ mapLookup lbl numbers
+             where numbers :: LabelMap Int
+                   numbers = mapFromList $ map swap blocks
+                   swap (k, block) = (entryLabel block, k)
+         addSuccEdges :: CGraph -> (Node, CmmBlock) -> CGraph
+         addSuccEdges graph (k, block) =
+             insEdges [(k, labelNumber lbl, ()) | lbl <- nub $ successors block] graph
+{-
+
+Note [Reducibility resources]
+-----------------------------
+
+*Flow Analysis of Computer Programs.* Matthew S. Hecht North Holland, 1977.
+Available to borrow from archive.org.
+
+Matthew S. Hecht and Jeffrey D. Ullman (1972).
+Flow Graph Reducibility. SIAM J. Comput., 1(2), 188–202.
+https://doi.org/10.1137/0201014
+
+Johan Janssen and Henk Corporaal. 1997. Making graphs reducible with
+controlled node splitting. ACM TOPLAS 19, 6 (Nov. 1997),
+1031–1052. DOI:https://doi.org/10.1145/267959.269971
+
+Sebastian Unger and Frank Mueller. 2002. Handling irreducible loops:
+optimized node splitting versus DJ-graphs. ACM TOPLAS 24, 4 (July
+2002), 299–333. https://doi.org/10.1145/567097.567098.  (This one
+contains the most detailed account of how the Hecht/Ullman algorithm
+is used to modify an actual control-flow graph.  But still not much detail.)
+
+https://rgrig.blogspot.com/2009/10/dtfloatleftclearleft-summary-of-some.html
+ (Nice summary of useful facts)
+
+-}
+
+
+
+type Seq = Seq.Seq
+
+-- | A "supernode" contains a single-entry, multiple-exit, reducible subgraph.
+-- The entry point is the given label, and the block with that label
+-- dominates all the other blocks in the supernode.  When an entire
+-- graph is collapsed into a single supernode, the graph is reducible.
+-- More detail can be found in "GHC.Data.Graph.Collapse".
+
+data CmmSuper
+    = Nodes { label :: Label
+            , blocks :: Seq CmmBlock
+            }
+
+instance Semigroup CmmSuper where
+  s <> s' = Nodes (label s) (blocks s <> blocks s')
+
+instance PureSupernode CmmSuper where
+  superLabel = label
+  mapLabels = changeLabels
+
+instance Supernode CmmSuper NullCollapseViz where
+  freshen s = liftUniqSM $ relabel s
+
+
+-- | Return all labels defined within a supernode.
+definedLabels :: CmmSuper -> Seq Label
+definedLabels = fmap entryLabel . blocks
+
+
+
+-- | Map the given function over every use and definition of a label
+-- in the given supernode.
+changeLabels :: (Label -> Label) -> (CmmSuper -> CmmSuper)
+changeLabels f (Nodes l blocks) = Nodes (f l) (fmap (changeBlockLabels f) blocks)
+
+-- | Map the given function over every use and definition of a label
+-- in the given block.
+changeBlockLabels :: (Label -> Label) -> CmmBlock -> CmmBlock
+changeBlockLabels f block = blockJoin entry' middle exit'
+  where (entry, middle, exit) = blockSplit block
+        entry' = let CmmEntry l scope = entry
+                 in  CmmEntry (f l) scope
+        exit' = case exit of
+                  -- unclear why mapSuccessors doesn't touch these
+                  CmmCall { cml_cont = Just l } -> exit { cml_cont = Just (f l) }
+                  CmmForeignCall { succ = l } -> exit { succ = f l }
+                  _ -> mapSuccessors f exit
+
+
+-- | Within the given supernode, replace every defined label (and all
+-- of its uses) with a fresh label.
+
+relabel :: CmmSuper -> UniqSM CmmSuper
+relabel node = do
+     finite_map <- foldM addPair mapEmpty $ definedLabels node
+     return $ changeLabels (labelChanger finite_map) node
+  where addPair :: LabelMap Label -> Label -> UniqSM (LabelMap Label)
+        addPair map old = do new <- newBlockId
+                             return $ mapInsert old new map
+        labelChanger :: LabelMap Label -> (Label -> Label)
+        labelChanger mapping = \lbl -> mapFindWithDefault lbl lbl mapping
diff --git a/compiler/GHC/Data/Graph/Collapse.hs b/compiler/GHC/Data/Graph/Collapse.hs
new file mode 100644
index 0000000000..a19b5fa4b7
--- /dev/null
+++ b/compiler/GHC/Data/Graph/Collapse.hs
@@ -0,0 +1,264 @@
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+
+module GHC.Data.Graph.Collapse
+  ( PureSupernode(..)
+  , Supernode(..)
+  , collapseInductiveGraph
+  , VizCollapseMonad(..)
+  , NullCollapseViz(..)
+  , runNullCollapse
+  , MonadUniqSM(..)
+  )
+where
+
+import GHC.Prelude
+
+import Control.Exception
+import Control.Monad
+import Data.List (delete, union, insert, intersect)
+import Data.Semigroup
+
+import GHC.Cmm.Dataflow.Label
+import GHC.Data.Graph.Inductive.Graph
+import GHC.Types.Unique.Supply
+import GHC.Utils.Panic
+
+
+{-|
+Module      : GHC.Data.Graph.Collapse
+Description : Implement the "collapsing" algorithm Hecht and Ullman
+
+A control-flow graph is reducible if and only if it is collapsible
+according to the definition of Hecht and Ullman (1972).   This module
+implements the collapsing algorithm of Hecht and Ullman, and if it
+encounters a graph that is not collapsible, it splits nodes until the
+graph is fully collapsed.  It then reports what nodes (if any) had to
+be split in order to collapse the graph.  The information is used
+upstream to node-split Cmm graphs.
+
+The module uses the inductive graph representation cloned from the
+Functional Graph Library (Hackage package `fgl`, modules
+`GHC.Data.Graph.Inductive.*`.)
+
+-}
+
+-- Full reference to paper: Matthew S. Hecht and Jeffrey D. Ullman
+-- (1972).  Flow Graph Reducibility. SIAM J. Comput., 1(2), 188–202.
+-- https://doi.org/10.1137/0201014
+
+
+------------------ Graph-splitting monad -----------------------
+
+-- | If you want to visualize the graph-collapsing algorithm, create
+-- an instance of monad `VizCollapseMonad`.  Each step in the
+-- algorithm is announced to the monad as a side effect.  If you don't
+-- care about visualization, you would use the `NullCollapseViz`
+-- monad, in which these operations are no-ops.
+
+class (Monad m) => MonadUniqSM m where
+  liftUniqSM :: UniqSM a -> m a
+
+class (MonadUniqSM m, Graph gr, Supernode s m) => VizCollapseMonad m gr s where
+  consumeByInGraph :: Node -> Node -> gr s () -> m ()
+  splitGraphAt :: gr s () -> LNode s -> m ()
+  finalGraph :: gr s () -> m ()
+
+
+
+-- | The identity monad as a `VizCollapseMonad`.  Use this monad when
+-- you want efficiency in graph collapse.
+newtype NullCollapseViz a = NullCollapseViz { unNCV :: UniqSM a }
+  deriving (Functor, Applicative, Monad, MonadUnique)
+
+instance MonadUniqSM NullCollapseViz where
+  liftUniqSM = NullCollapseViz
+
+instance (Graph gr, Supernode s NullCollapseViz) =>
+    VizCollapseMonad NullCollapseViz gr s where
+  consumeByInGraph _ _ _ = return ()
+  splitGraphAt _ _ = return ()
+  finalGraph _ = return ()
+
+runNullCollapse :: NullCollapseViz a -> UniqSM a
+runNullCollapse = unNCV
+
+
+------------------ Utility functions on graphs -----------------------
+
+
+-- | Tell if a `Node` has a single predecessor.
+singlePred :: Graph gr => gr a b -> Node -> Bool
+singlePred gr n
+    | ([_], _, _, _) <- context gr n = True
+    | otherwise = False
+
+-- | Use this function to extract information about a `Node` that you
+-- know is in a `Graph`.  It's like `match` from `Graph`, but it must
+-- succeed.
+forceMatch :: (Graph gr)
+           => Node -> gr s b -> (Context s b, gr s b)
+forceMatch node g = case match node g of (Just c, g') -> (c, g')
+                                         _ -> panicDump node g
+ where panicDump :: Graph gr => Node -> gr s b -> any
+       panicDump k _g =
+         panic $ "GHC.Data.Graph.Collapse failed to match node " ++ show k
+
+-- | Rewrite the label of a given node.
+updateNode :: DynGraph gr => (s -> s) -> Node -> gr s b -> gr s b
+updateNode relabel node g = (preds, n, relabel this, succs) & g'
+    where ((preds, n, this, succs), g') = forceMatch node g
+
+
+-- | Test if a graph has but a single node.
+singletonGraph :: Graph gr => gr a b -> Bool
+singletonGraph g = case labNodes g of [_] -> True
+                                      _ -> False
+
+
+----------------  Supernodes ------------------------------------
+
+-- | A "supernode" stands for a collection of one or more nodes (basic
+-- blocks) that have been coalesced by the Hecht-Ullman algorithm.
+-- A collection in a supernode constitutes a /reducible/ subgraph of a
+-- control-flow graph.  (When an entire control-flow graph is collapsed
+-- to a single supernode, the flow graph is reducible.)
+--
+-- The idea of node splitting is to collapse a control-flow graph down
+-- to a single supernode, then materialize (``inflate'') the reducible
+-- equivalent graph from that supernode.  The `Supernode` class
+-- defines only the methods needed to collapse; rematerialization is
+-- the responsiblity of the client.
+--
+-- During the Hecht-Ullman algorithm, every supernode has a unique
+-- entry point, which is given by `superLabel`.  But this invariant is
+-- not guaranteed by the class methods and is not a law of the class.
+-- The `mapLabels` function rewrites all labels that appear in a
+-- supernode (both definitions and uses).  The `freshen` function
+-- replaces every appearance of a /defined/ label with a fresh label.
+-- (Appearances include both definitions and uses.)
+--
+-- Laws:
+-- @
+--    superLabel (n <> n') == superLabel n
+--    blocks (n <> n') == blocks n `union` blocks n'
+--    mapLabels f (n <> n') = mapLabels f n <> mapLabels f n'
+--    mapLabels id == id
+--    mapLabels (f . g) == mapLabels f . mapLabels g
+-- @
+--
+-- (We expect `freshen` to distribute over `<>`, but because of
+-- the fresh names involved, formulating a precise law is a bit
+-- challenging.)
+
+class (Semigroup node) => PureSupernode node where
+  superLabel :: node -> Label
+  mapLabels :: (Label -> Label) -> (node -> node)
+
+class (MonadUnique m, PureSupernode node) => Supernode node m where
+  freshen :: node -> m node
+
+  -- ghost method
+  -- blocks :: node -> Set Block
+
+------------------ Functions specific to the algorithm -----------------------
+
+-- | Merge two nodes, return new graph plus list of nodes that newly have a single
+-- predecessor.  This function implements transformation $T_2$ from
+-- the Hecht and Ullman paper (merge the node into its unique
+-- predecessor).  It then also removes self-edges (transformation $T_1$ from
+-- the Hecht and Ullman paper).  There is no need for a separate
+-- implementation of $T_1$.
+--
+-- `consumeBy v u g` returns the graph that results when node v is
+-- consumed by node u in graph g.  Both v and u are replaced with a new node u'
+-- with these properties:
+--
+--    LABELS(u') = LABELS(u) `union` LABELS(v)
+--    SUCC(u') = SUCC(u) `union` SUCC(v) - { u }
+--    every node that previously points to u now points to u'
+--
+-- It also returns a list of nodes in the result graph that
+-- are *newly* single-predecessor nodes.
+
+consumeBy :: (DynGraph gr, PureSupernode s)
+          => Node -> Node -> gr s () -> (gr s (), [Node])
+consumeBy toNode fromNode g =
+    assert (toPreds == [((), fromNode)]) $
+    (newGraph, newCandidates)
+  where ((toPreds,   _, to,   toSuccs),   g')  = forceMatch toNode   g
+        ((fromPreds, _, from, fromSuccs), g'') = forceMatch fromNode g'
+        context = ( fromPreds -- by construction, can't have `toNode`
+                  , fromNode
+                  , from <> to
+                  , delete ((), fromNode) toSuccs `union` fromSuccs
+                  )
+        newGraph = context & g''
+        newCandidates = filter (singlePred newGraph) changedNodes
+        changedNodes = fromNode `insert` map snd (toSuccs `intersect` fromSuccs)
+
+-- | Split a given node.  The node is replaced with a collection of replicas,
+-- one for each predecessor.  After the split, every predecessor
+-- points to a unique replica.
+split :: forall gr s b m . (DynGraph gr, Supernode s m)
+      => Node -> gr s b -> m (gr s b)
+split node g = assert (isMultiple preds) $ foldM addReplica g' newNodes
+  where ((preds, _, this, succs), g') = forceMatch node g
+        newNodes :: [((b, Node), Node)]
+        newNodes = zip preds [maxNode+1..]
+        (_, maxNode) = nodeRange g
+        thisLabel = superLabel this
+        addReplica :: gr s b -> ((b, Node), Node) -> m (gr s b)
+        addReplica g ((b, pred), newNode) = do
+            newSuper <- freshen this
+            return $ add newSuper
+          where add newSuper =
+                  updateNode (thisLabel `replacedWith` superLabel newSuper) pred $
+                  ([(b, pred)], newNode, newSuper, succs) & g
+
+replacedWith :: PureSupernode s => Label -> Label -> s -> s
+replacedWith old new = mapLabels (\l -> if l == old then new else l)
+
+
+-- | Does a list have more than one element? (in constant time).
+isMultiple :: [a] -> Bool
+isMultiple [] = False
+isMultiple [_] = False
+isMultiple (_:_:_) = True
+
+-- | Find a candidate for splitting by finding a node that has multiple predecessors.
+
+anySplittable :: forall gr a b . Graph gr => gr a b -> LNode a
+anySplittable g = case splittable of
+                    n : _ -> n
+                    [] -> panic "anySplittable found no splittable nodes"
+  where splittable = filter (isMultiple . pre g . fst) $ labNodes g
+        splittable :: [LNode a]
+
+
+------------------ The collapsing algorithm -----------------------
+
+-- | Using the algorithm of Hecht and Ullman (1972), collapse a graph
+-- into a single node, splitting nodes as needed.  Record
+-- visualization events in monad `m`.
+collapseInductiveGraph :: (DynGraph gr, Supernode s m, VizCollapseMonad m gr s)
+                       => gr s () -> m (gr s ())
+collapseInductiveGraph g = drain g worklist
+  where worklist :: [[Node]] -- nodes with exactly one predecessor
+        worklist = [filter (singlePred g) $ nodes g]
+
+        drain g [] = if singletonGraph g then finalGraph g >> return g
+                     else let (n, super) = anySplittable g
+                          in  do splitGraphAt g (n, super)
+                                 collapseInductiveGraph =<< split n g
+        drain g ([]:nss) = drain g nss
+        drain g ((n:ns):nss) = let (g', ns') = consumeBy n (theUniquePred n) g
+                               in  do consumeByInGraph n (theUniquePred n) g
+                                      drain g' (ns':ns:nss)
+         where theUniquePred n
+                 | ([(_, p)], _, _, _) <- context g n = p
+                 | otherwise =
+                     panic "node claimed to have a unique predecessor; it doesn't"
diff --git a/compiler/GHC/Wasm/ControlFlow.hs b/compiler/GHC/Wasm/ControlFlow.hs
new file mode 100644
index 0000000000..2ef025574d
--- /dev/null
+++ b/compiler/GHC/Wasm/ControlFlow.hs
@@ -0,0 +1,152 @@
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE DataKinds, GADTs, RankNTypes, TypeOperators, KindSignatures #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE PatternSynonyms #-}
+
+module GHC.Wasm.ControlFlow
+  ( WasmControl(..), (<>), pattern WasmIf, wasmReturn
+  , BrTableInterval(..), inclusiveInterval
+  , brTableLimit
+
+  , WasmType(..), WasmTypeTag(..)
+  , TypeList(..)
+  , WasmFunctionType(..)
+  )
+where
+
+import GHC.Prelude
+
+import Data.Kind
+
+import GHC.Utils.Outputable hiding ((<>))
+import GHC.Utils.Panic
+
+{-|
+Module      : GHC.Wasm.ControlFlow
+Description : Representation of control-flow portion of the WebAssembly instruction set
+-}
+
+-- | WebAssembly type of a WebAssembly value that WebAssembly code
+-- could either expect on the evaluation stack or leave on the evaluation
+-- stack.  At present we support only 32-bit values.
+
+data WasmType = I32 | F32
+  deriving (Eq, Show)
+
+
+-- | Singleton type useful for programming with `WasmType` at the type level.
+
+data WasmTypeTag :: WasmType -> Type where
+  TagI32 :: WasmTypeTag 'I32
+  TagF32 :: WasmTypeTag 'F32
+
+-- | List of WebAssembly types used to describe the sequence of WebAssembly
+-- values that a block of code may expect on the stack or leave on the stack.
+
+data TypeList :: [WasmType] -> Type where
+  TypeListNil :: TypeList '[]
+  TypeListCons :: WasmTypeTag t -> TypeList ts -> TypeList (t : ts)
+
+-- | The type of a WebAssembly function, loop, block, or conditional.
+-- This type says what values the code expects to pop off the stack and
+-- what values it promises to push.  The WebAssembly standard requires
+-- that this type appear explicitly in the code.
+
+data WasmFunctionType pre post =
+  WasmFunctionType { ft_pops :: TypeList pre
+                   , ft_pushes :: TypeList post
+                   }
+
+
+-- | Representation of WebAssembly control flow.
+-- Normally written as
+-- @
+--   WasmControl s e pre post
+-- @
+-- Type parameter `s` is the type of (unspecified) statements.
+-- It might be instantiated with an open Cmm block or with a sequence
+-- of Wasm instructions.
+-- Parameter `e` is the type of expressions.
+-- Parameter `pre` represents the values that are expected on the
+-- WebAssembly stack when the code runs, and `post` represents
+-- the state of the stack on completion.
+
+data WasmControl :: Type -> Type -> [WasmType] -> [WasmType] -> Type where
+
+  WasmPush  :: WasmTypeTag t -> e -> WasmControl s e stack (t ': stack)
+
+  WasmBlock :: WasmFunctionType pre post
+            -> WasmControl s e pre post
+            -> WasmControl s e pre post
+  WasmLoop  :: WasmFunctionType pre post
+            -> WasmControl s e pre post
+            -> WasmControl s e pre post
+  WasmIfTop :: WasmFunctionType pre post
+            -> WasmControl s e pre post
+            -> WasmControl s e pre post
+            -> WasmControl s e ('I32 ': pre) post
+
+  WasmBr   :: Int -> WasmControl s e dropped destination -- not typechecked
+  WasmFallthrough :: WasmControl s e dropped destination
+          -- generates no code, but has the same type as a branch
+
+  WasmBrTable :: e
+              -> BrTableInterval -- for testing
+              -> [Int]           -- targets
+              -> Int             -- default target
+              -> WasmControl s e dropped destination
+              -- invariant: the table interval is contained
+              -- within [0 .. pred (length targets)]
+  WasmReturnTop :: WasmTypeTag t
+                -> WasmControl s e (t ': t1star) t2star -- as per type system
+
+  WasmActions :: s -> WasmControl s e stack stack   -- basic block: one entry, one exit
+  WasmSeq :: WasmControl s e pre mid -> WasmControl s e mid post -> WasmControl s e pre post
+
+data BrTableInterval
+    = BrTableInterval { bti_lo :: Integer, bti_count :: Integer }
+  deriving (Show)
+
+instance Outputable BrTableInterval where
+  ppr range = brackets $ hcat[integer (bti_lo range), text "..", integer hi]
+      where hi = bti_lo range + bti_count range - 1
+
+brTableLimit :: Int
+  -- ^ Size of the largest table that is deemed acceptable in a `br_table` instruction.
+  --
+  -- Source: https://chromium.googlesource.com/v8/v8/+/master/src/wasm/wasm-limits.h#51
+  -- See also discussion at https://github.com/WebAssembly/spec/issues/607, which shows
+  -- that major browsers agree.
+brTableLimit = 65520
+
+inclusiveInterval :: Integer -> Integer -> BrTableInterval
+inclusiveInterval lo hi
+    | lo <= hi = let count = hi - lo + 1
+                 in  if count > toInteger brTableLimit then
+                         panic "interval too large in br_table instruction"
+                     else
+                         BrTableInterval lo count
+    | otherwise = panic "GHC.Wasm.ControlFlow: empty interval"
+
+(<>) :: forall s e pre mid post
+      . WasmControl s e pre mid
+     -> WasmControl s e mid post
+     -> WasmControl s e pre post
+(<>) = WasmSeq
+-- N.B. Fallthrough can't be optimized away because of type checking.
+
+
+
+-- Syntactic sugar.
+pattern WasmIf :: WasmFunctionType pre post
+               -> e
+               -> WasmControl s e pre post
+               -> WasmControl s e pre post
+               -> WasmControl s e pre post
+
+pattern WasmIf ty e t f =
+    WasmPush TagI32 e `WasmSeq` WasmIfTop ty t f
+
+-- More syntactic sugar.
+wasmReturn :: WasmTypeTag t -> e -> WasmControl s e (t ': t1star) t2star
+wasmReturn tag e = WasmPush tag e `WasmSeq` WasmReturnTop tag
diff --git a/compiler/GHC/Wasm/ControlFlow/FromCmm.hs b/compiler/GHC/Wasm/ControlFlow/FromCmm.hs
new file mode 100644
index 0000000000..741ab35560
--- /dev/null
+++ b/compiler/GHC/Wasm/ControlFlow/FromCmm.hs
@@ -0,0 +1,354 @@
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE DataKinds #-}
+
+module GHC.Wasm.ControlFlow.FromCmm
+  ( structuredControl
+  )
+where
+
+import GHC.Prelude hiding (succ)
+
+import Data.Function
+import Data.List (sortBy)
+import qualified Data.Tree as Tree
+
+import GHC.Cmm
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dominators
+import GHC.Cmm.Dataflow.Graph
+import GHC.Cmm.Dataflow.Label
+import GHC.Cmm.Switch
+
+import GHC.Platform
+
+import GHC.Utils.Misc
+import GHC.Utils.Panic
+import GHC.Utils.Outputable ( Outputable, text, (<+>), ppr
+                            , pprWithCommas
+                            , showSDocUnsafe
+                            )
+
+import GHC.Wasm.ControlFlow
+
+
+{-|
+Module      : GHC.Wasm.ControlFlow.FromCmm
+Description : Translation of (reducible) Cmm control flow to WebAssembly
+
+Code in this module can translate any _reducible_ Cmm control-flow
+graph to the structured control flow that is required by WebAssembly.
+The algorithm is subtle and is described in detail in a draft paper
+to be found at https://www.cs.tufts.edu/~nr/pubs/relooper.pdf.
+-}
+
+--------------------- Abstraction of Cmm control flow -----------------------
+
+-- | Abstracts the kind of control flow we understand how to convert.
+-- A block can be left in one of four ways:
+--
+--   * Unconditionally
+--
+--   * Conditionally on a predicate of type `e`
+--
+--   * To a location determined by the value of a scrutinee of type `e`
+--
+--   * Not at all.
+
+data ControlFlow e = Unconditional Label
+                   | Conditional e Label Label
+                   | Switch { _scrutinee :: e
+                            , _range :: BrTableInterval
+                            , _targets :: [Maybe Label] -- from 0
+                            , _defaultTarget :: Maybe Label
+                            }
+                   | TailCall e
+
+flowLeaving :: Platform -> CmmBlock -> ControlFlow CmmExpr
+flowLeaving platform b =
+    case lastNode b of
+      CmmBranch l -> Unconditional l
+      CmmCondBranch c t f _ -> Conditional c t f
+      CmmSwitch e targets ->
+          let (offset, target_labels) = switchTargetsToTable targets
+              (lo, hi) = switchTargetsRange targets
+              default_label = switchTargetsDefault targets
+              scrutinee = smartPlus platform e offset
+              range = inclusiveInterval (lo+toInteger offset) (hi+toInteger offset)
+          in  Switch scrutinee range (atMost brTableLimit target_labels) default_label
+
+      CmmCall { cml_cont = Just l } -> Unconditional l
+      CmmCall { cml_cont = Nothing, cml_target = e } -> TailCall e
+      CmmForeignCall { succ = l } -> Unconditional l
+
+  where atMost :: Int -> [a] -> [a]
+        atMost k xs = if xs `hasAtLeast` k then
+                          panic "switch table is too big for WebAssembly"
+                      else
+                          xs
+
+        hasAtLeast :: [a] -> Int -> Bool
+        hasAtLeast _ 0 = True
+        hasAtLeast [] _ = False
+        hasAtLeast (_:xs) k = hasAtLeast xs (k - 1)
+
+
+----------------------- Evaluation contexts ------------------------------
+
+-- | The syntactic constructs in which Wasm code may be contained.
+-- A list of these constructs represents an evaluation context,
+-- which is used to determined what level of `br` instruction
+-- reaches a given label.
+
+data ContainingSyntax
+    = BlockFollowedBy Label
+    | LoopHeadedBy Label
+    | IfThenElse (Maybe Label) -- ^ Carries the label that follows `if...end`, if any
+
+matchesFrame :: Label -> ContainingSyntax -> Bool
+matchesFrame label (BlockFollowedBy l) = label == l
+matchesFrame label (LoopHeadedBy l) = label == l
+matchesFrame label (IfThenElse (Just l)) = label == l
+matchesFrame _ _ = False
+
+data Context = Context { enclosing :: [ContainingSyntax]
+                       , fallthrough :: Maybe Label  -- the label can
+                                                     -- be reached just by "falling through"
+                                                     -- the hole
+                       }
+
+instance Outputable Context where
+  ppr c | Just l <- fallthrough c =
+                    pprWithCommas ppr (enclosing c) <+> text "fallthrough to" <+> ppr l
+        | otherwise = pprWithCommas ppr (enclosing c)
+
+emptyContext :: Context
+emptyContext = Context [] Nothing
+
+inside :: ContainingSyntax -> Context -> Context
+withFallthrough :: Context -> Label -> Context
+
+inside frame c = c { enclosing = frame : enclosing c }
+withFallthrough c l = c { fallthrough = Just l }
+
+type CmmActions = Block CmmNode O O
+type CfgNode = CmmBlock
+
+type FT pre post = WasmFunctionType pre post
+
+returns :: FT '[] '[ 'I32]
+doesn'tReturn :: FT '[] '[]
+
+returns = WasmFunctionType TypeListNil (TypeListCons TagI32 TypeListNil)
+doesn'tReturn = WasmFunctionType TypeListNil TypeListNil
+
+emptyPost :: FT pre post -> Bool
+emptyPost (WasmFunctionType _ TypeListNil) = True
+emptyPost _ = False
+
+----------------------- Translation ------------------------------
+
+-- | Convert a Cmm CFG to WebAssembly's structured control flow.
+
+structuredControl :: forall expr stmt .
+                     Platform  -- ^ needed for offset calculation
+                  -> (Label -> CmmExpr -> expr) -- ^ translator for expressions
+                  -> (Label -> CmmActions -> stmt) -- ^ translator for straight-line code
+                  -> CmmGraph -- ^ CFG to be translated
+                  -> WasmControl stmt expr '[] '[ 'I32]
+structuredControl platform txExpr txBlock g =
+   doTree returns dominatorTree emptyContext
+ where
+   gwd :: GraphWithDominators CmmNode
+   gwd = graphWithDominators g
+
+   dominatorTree :: Tree.Tree CfgNode-- Dominator tree in which children are sorted
+                                       -- with highest reverse-postorder number first
+   dominatorTree = fmap blockLabeled $ sortTree $ gwdDominatorTree gwd
+
+   doTree     :: FT '[] post -> Tree.Tree CfgNode -> Context -> WasmControl stmt expr '[] post
+   nodeWithin :: forall post .
+                 FT '[] post -> CfgNode -> [Tree.Tree CfgNode] -> Maybe Label
+                                                   -> Context -> WasmControl stmt expr '[] post
+   doBranch   :: FT '[] post -> Label -> Label     -> Context -> WasmControl stmt expr '[] post
+
+   doTree fty (Tree.Node x children) context =
+       let codeForX = nodeWithin fty x selectedChildren Nothing
+       in  if isLoopHeader x then
+             WasmLoop fty (codeForX loopContext)
+           else
+             codeForX context
+     where selectedChildren = case lastNode x of
+                                CmmSwitch {} -> children
+                                   -- N.B. Unlike `if`, translation of Switch uses only labels.
+                                _ -> filter hasMergeRoot children
+           loopContext = LoopHeadedBy (entryLabel x) `inside`
+                           (context `withFallthrough` entryLabel x)
+           hasMergeRoot = isMergeNode . Tree.rootLabel
+
+   nodeWithin fty x (y_n:ys) (Just zlabel) context =
+       WasmBlock fty $ nodeWithin fty x (y_n:ys) Nothing context'
+     where context' = BlockFollowedBy zlabel `inside` context
+   nodeWithin fty x (y_n:ys) Nothing context =
+       nodeWithin doesn'tReturn x ys (Just ylabel) (context `withFallthrough` ylabel) <>
+       doTree fty y_n context
+     where ylabel = treeEntryLabel y_n
+   nodeWithin fty x [] (Just zlabel) context
+     | not (generatesIf x) =
+         WasmBlock fty (nodeWithin fty x [] Nothing context')
+     where context' = BlockFollowedBy zlabel `inside` context
+   nodeWithin fty x [] maybeMarks context =
+       translationOfX context
+     where xlabel = entryLabel x
+
+           translationOfX :: Context -> WasmControl stmt expr '[] post
+           translationOfX context =
+             WasmActions (txBlock xlabel $ nodeBody x) <>
+             case flowLeaving platform x of
+               Unconditional l -> doBranch fty xlabel l context
+               Conditional e t f ->
+                 WasmIf fty
+                        (txExpr xlabel e)
+                        (doBranch fty xlabel t (IfThenElse maybeMarks `inside` context))
+                        (doBranch fty xlabel f (IfThenElse maybeMarks `inside` context))
+               TailCall e -> WasmPush TagI32 (txExpr xlabel e) <> WasmReturnTop TagI32
+               Switch e range targets default' ->
+                   WasmBrTable (txExpr xlabel e)
+                               range
+                               (map switchIndex targets)
+                               (switchIndex default')
+            where switchIndex :: Maybe Label -> Int
+                  switchIndex Nothing = 0 -- arbitrary; GHC won't go here
+                  switchIndex (Just lbl) = index lbl (enclosing context)
+
+   doBranch fty from to context
+      | to `elem` fallthrough context && emptyPost fty = WasmFallthrough
+                -- optimization: `br` is not needed, but it typechecks
+                -- only if nothing is expected to be left on the stack
+
+      | isBackward from to = WasmBr i -- continue
+      | isMergeLabel to = WasmBr i -- exit
+      | otherwise = doTree fty (subtreeAt to) context -- inline the code here
+     where i = index to (enclosing context)
+
+   generatesIf :: CmmBlock -> Bool
+   generatesIf x = case flowLeaving platform x of Conditional {} -> True
+                                                  _ -> False
+
+   ---- everything else is utility functions
+
+   treeEntryLabel :: Tree.Tree CfgNode -> Label
+   treeEntryLabel = entryLabel . Tree.rootLabel
+
+   sortTree :: Tree.Tree Label -> Tree.Tree Label
+    -- Sort highest rpnum first
+   sortTree (Tree.Node label children) =
+      Tree.Node label $ sortBy (flip compare `on` (rpnum . Tree.rootLabel)) $
+                        map sortTree children
+
+   subtreeAt :: Label -> Tree.Tree CfgNode
+   blockLabeled :: Label -> CfgNode
+   rpnum :: Label -> RPNum-- reverse postorder number of the labeled block
+   isMergeLabel :: Label -> Bool
+   isMergeNode :: CfgNode -> Bool
+   isLoopHeader :: CfgNode -> Bool-- identify loop headers
+    -- all nodes whose immediate dominator is the given block.
+     -- They are produced with the largest RP number first,
+     -- so the largest RP number is pushed on the context first.
+   dominates :: Label -> Label -> Bool
+    -- Domination relation (not just immediate domination)
+
+   blockmap :: LabelMap CfgNode
+   GMany NothingO blockmap NothingO = g_graph g
+
+   blockLabeled l = findLabelIn l blockmap
+
+   rpblocks :: [CfgNode]
+   rpblocks = revPostorderFrom blockmap (g_entry g)
+
+   foldEdges :: forall a . (Label -> Label -> a -> a) -> a -> a
+   foldEdges f a =
+     foldl (\a (from, to) -> f from to a)
+           a
+           [(entryLabel from, to) | from <- rpblocks, to <- successors from]
+
+   isMergeLabel l = setMember l mergeBlockLabels
+   isMergeNode = isMergeLabel . entryLabel
+
+   isBackward :: Label -> Label -> Bool
+   isBackward from to = rpnum to <= rpnum from -- self-edge counts as a backward edge
+
+   subtreeAt label = findLabelIn label subtrees
+   subtrees :: LabelMap (Tree.Tree CfgNode)
+   subtrees = addSubtree mapEmpty dominatorTree
+     where addSubtree map t@(Tree.Node root children) =
+               foldl addSubtree (mapInsert (entryLabel root) t map) children
+
+   mergeBlockLabels :: LabelSet
+   -- N.B. A block is a merge node if it is where control flow merges.
+   -- That means it is entered by multiple control-flow edges, _except_
+   -- back edges don't count.  There must be multiple paths that enter the
+   -- block _without_ passing through the block itself.
+   mergeBlockLabels =
+       setFromList [entryLabel n | n <- rpblocks, big (forwardPreds (entryLabel n))]
+    where big [] = False
+          big [_] = False
+          big (_ : _ : _) = True
+
+          forwardPreds :: Label -> [Label] -- reachable predecessors of reachable blocks,
+                                           -- via forward edges only
+          forwardPreds = \l -> mapFindWithDefault [] l predmap
+              where predmap :: LabelMap [Label]
+                    predmap = foldEdges addForwardEdge mapEmpty
+                    addForwardEdge from to pm
+                        | isBackward from to = pm
+                        | otherwise = addToList (from :) to pm
+
+   isLoopHeader = isHeaderLabel . entryLabel
+   isHeaderLabel = (`setMember` headers)  -- loop headers
+      where headers :: LabelSet
+            headers = foldMap headersPointedTo blockmap
+            headersPointedTo block =
+                setFromList [label | label <- successors block,
+                                              dominates label (entryLabel block)]
+
+   index :: Label -> [ContainingSyntax] -> Int
+   index _ [] = panic "destination label not in evaluation context"
+   index label (frame : context)
+       | label `matchesFrame` frame = 0
+       | otherwise = 1 + index label context
+
+   rpnum = gwdRPNumber gwd
+   dominates lbl blockname =
+       lbl == blockname || dominatorsMember lbl (gwdDominatorsOf gwd blockname)
+
+
+
+nodeBody :: CfgNode -> CmmActions
+nodeBody (BlockCC _first middle _last) = middle
+
+
+smartPlus :: Platform -> CmmExpr -> Int -> CmmExpr
+smartPlus _ e 0 = e
+smartPlus platform e k =
+    CmmMachOp (MO_Add width) [e, CmmLit (CmmInt (fromIntegral k) width)]
+  where width = cmmExprWidth platform e
+
+addToList :: (IsMap map) => ([a] -> [a]) -> KeyOf map -> map [a] -> map [a]
+addToList consx = mapAlter add
+    where add Nothing = Just (consx [])
+          add (Just xs) = Just (consx xs)
+
+------------------------------------------------------------------
+--- everything below here is for diagnostics in case of panic
+
+instance Outputable ContainingSyntax where
+    ppr (BlockFollowedBy l) = text "node" <+> ppr l
+    ppr (LoopHeadedBy l) = text "loop" <+> ppr l
+    ppr (IfThenElse l) = text "if-then-else" <+> ppr l
+
+findLabelIn :: HasDebugCallStack => Label -> LabelMap a -> a
+findLabelIn lbl = mapFindWithDefault failed lbl
+  where failed =
+            panic $ "label " ++ showSDocUnsafe (ppr lbl) ++ " not found in control-flow graph"
diff --git a/compiler/GHC/Wasm/ControlFlow/ToAsm.hs b/compiler/GHC/Wasm/ControlFlow/ToAsm.hs
new file mode 100644
index 0000000000..fbf387753a
--- /dev/null
+++ b/compiler/GHC/Wasm/ControlFlow/ToAsm.hs
@@ -0,0 +1,98 @@
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+
+module GHC.Wasm.ControlFlow.ToAsm
+  ( toIndentedAsm
+  , noIndentation
+  )
+where
+
+{-|
+Module      : GHC.Wasm.ControlFlow.ToAsm
+Description : Convert WebAssembly control-flow instructions to GNU assembler syntax.
+-}
+
+import GHC.Prelude
+
+import Data.ByteString.Builder (Builder)
+import qualified Data.ByteString.Builder as BS
+import Data.List (intersperse)
+import Data.Monoid
+
+import GHC.Utils.Panic
+
+import GHC.Wasm.ControlFlow hiding ((<>))
+
+type Indentation = Builder
+
+standardIndentation :: Indentation
+standardIndentation = "  "
+
+noIndentation :: Indentation
+noIndentation = ""
+
+
+-- | Assuming that the type of a construct can be rendered as inline
+-- syntax, return the syntax.  For every type our translator
+-- generates, the assumption should hold.
+wasmFunctionType :: WasmFunctionType pre post -> Builder
+wasmFunctionType (WasmFunctionType TypeListNil TypeListNil) = "void"
+wasmFunctionType (WasmFunctionType TypeListNil (TypeListCons t TypeListNil)) = tagBuilder t
+wasmFunctionType _ = panic "function type needs to be externalized"
+  -- Anything other then [] -> [], [] -> [t] needs to be put into a
+  -- type table and referred to by number.
+
+-- | Tag used in GNU assembly to name a WebAssembly type
+tagBuilder :: WasmTypeTag a -> Builder
+tagBuilder TagI32 = "i32"
+tagBuilder TagF32 = "f32"
+
+
+type Printer a = Indentation -> a -> Builder
+
+-- | Converts WebAssembly control-flow code into GNU (Clang) assembly
+-- syntax, indented for readability.  For ease of combining with other
+-- output, the result does not have a trailing newline or preceding
+-- indentation.  (The indentation argument simply gives the blank
+-- string that follows each emitted newline.)
+--
+-- The initial `Indentation` argument specifies the indentation of the
+-- entire output; for most use cases it will likely be `mempty`.
+
+toIndentedAsm :: forall s e pre post
+               . Printer s -> Printer e -> Printer (WasmControl s e pre post)
+toIndentedAsm ps pe indent s = print s
+  where print, shift :: WasmControl s e pre' post' -> Builder
+        newline :: Builder -> Builder -> Builder
+        (<+>) :: Builder -> Builder -> Builder
+        ty = wasmFunctionType
+
+        -- cases meant to avoid generating any output for `WasmFallthrough`
+        print (WasmFallthrough `WasmSeq` s) = print s
+        print (s `WasmSeq` WasmFallthrough) = print s
+        print (WasmIfTop t s WasmFallthrough) =
+            "if" <+> ty t `newline` shift s `newline` "end_if"
+        print (WasmIfTop t WasmFallthrough s) =
+            "if" <+> ty t `newline` "else" `newline` shift s `newline` "end_if"
+
+        -- all the other cases
+        print (WasmPush _ e) = pe indent e
+        print (WasmBlock t s) = "block" <+> ty t `newline` shift s `newline` "end_block"
+        print (WasmLoop  t s) = "loop"  <+> ty t `newline` shift s `newline` "end_loop"
+        print (WasmIfTop t ts fs) = "if" <+> ty t `newline` shift ts `newline`
+                                    "else" `newline` shift fs `newline` "end_if"
+        print (WasmBr l) = "br" <+> BS.intDec l
+        print (WasmBrTable e _ ts t) =
+          pe indent e `newline` "br_table {" <+>
+          mconcat (intersperse ", " [BS.intDec i | i <- ts <> [t]]) <+>
+          "}"
+        print (WasmReturnTop _) = "return"
+        print (WasmActions as) = ps indent as
+        print (s `WasmSeq` s') = print s `newline` print s'
+
+        print WasmFallthrough = "// fallthrough" -- rare
+
+        newline s s' = s <> "\n" <> indent <> s'
+        shift s = standardIndentation <> toIndentedAsm ps pe (indent <> standardIndentation) s
+        s <+> s' = s <> " " <> s'
author	Norman Ramsey <Norman.Ramsey@tweag.io>	2022-05-17 15:07:05 -0400
committer	Norman Ramsey <Norman.Ramsey@tweag.io>	2022-08-09 16:36:50 -0400
commit	a8a3a8785fa1a58436bcf6460c2f3bcc8412cd53 (patch)
tree	dc62e83e947c3b94800c533177aa2ce5503c9983 /compiler/GHC
parent	56d3201996ccd9e858267dad7b6af577f3a71e56 (diff)
download	haskell-wip/nr/wasm-control-flow.tar.gz