15 files changed, 39 insertions, 60 deletions
diff --git a/compiler/GHC/Cmm/CLabel.hs b/compiler/GHC/Cmm/CLabel.hs
index fd9f019e04..3acace8be2 100644
--- a/compiler/GHC/Cmm/CLabel.hs
+++ b/compiler/GHC/Cmm/CLabel.hs
@@ -478,7 +478,7 @@ data IdLabelInfo
                         -- Note [Bytes label].
   | BlockInfoTable      -- ^ Like LocalInfoTable but for a proc-point block
                         -- instead of a closure entry-point.
-                        -- See Note [Proc-point local block entry-point].
+                        -- See Note [Proc-point local block entry-points].
 
   deriving (Eq, Ord)
 
@@ -587,7 +587,7 @@ mkBytesLabel name                 = IdLabel name NoCafRefs Bytes
 
 mkBlockInfoTableLabel :: Name -> CafInfo -> CLabel
 mkBlockInfoTableLabel name c = IdLabel name c BlockInfoTable
-                               -- See Note [Proc-point local block entry-point].
+                               -- See Note [Proc-point local block entry-points].
 
 -- Constructing Cmm Labels
 mkDirty_MUT_VAR_Label,
@@ -865,7 +865,7 @@ toEntryLbl platform lbl = case lbl of
    IdLabel n c (ConInfoTable k)  -> IdLabel n c (ConEntry k)
 
    IdLabel n _ BlockInfoTable    -> mkLocalBlockLabel (nameUnique n)
-                   -- See Note [Proc-point local block entry-point].
+                   -- See Note [Proc-point local block entry-points].
    IdLabel n c _                 -> IdLabel n c Entry
    CmmLabel m ext str CmmInfo    -> CmmLabel m ext str CmmEntry
    CmmLabel m ext str CmmRetInfo -> CmmLabel m ext str CmmRet
@@ -898,7 +898,6 @@ hasCAF _                            = False
 
 -- Note [ticky for LNE]
 -- ~~~~~~~~~~~~~~~~~~~~~
-
 -- Until 14 Feb 2013, every ticky counter was associated with a
 -- closure. Thus, ticky labels used IdLabel. It is odd that
 -- GHC.Cmm.Info.Build.cafTransfers would consider such a ticky label
@@ -1465,7 +1464,6 @@ pprCLabel !platform !sty lbl = -- see Note [Bangs in CLabel]
 
 -- Note [Internal proc labels]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- Some tools (e.g. the `perf` utility on Linux) rely on the symbol table
 -- for resolution of function names. To help these tools we provide the
 -- (enabled by default) -fexpose-all-symbols flag which causes GHC to produce
diff --git a/compiler/GHC/Cmm/ContFlowOpt.hs b/compiler/GHC/Cmm/ContFlowOpt.hs
index 73c13d2040..350f94c818 100644
--- a/compiler/GHC/Cmm/ContFlowOpt.hs
+++ b/compiler/GHC/Cmm/ContFlowOpt.hs
@@ -29,7 +29,6 @@ import Control.Monad
 
 -- Note [What is shortcutting]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- Consider this Cmm code:
 --
 -- L1: ...
@@ -53,7 +52,6 @@ import Control.Monad
 
 -- Note [Control-flow optimisations]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- This optimisation does three things:
 --
 --   - If a block finishes in an unconditional branch to another block
@@ -80,7 +78,6 @@ import Control.Monad
 
 -- Note [Shortcut call returns]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- We are going to maintain the "current" graph (LabelMap CmmBlock) as
 -- we go, and also a mapping from BlockId to BlockId, representing
 -- continuation labels that we have renamed.  This latter mapping is
@@ -106,7 +103,6 @@ import Control.Monad
 
 -- Note [Shortcut call returns and proc-points]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- Consider this code that you might get from a recursive
 -- let-no-escape:
 --
diff --git a/compiler/GHC/Cmm/Dataflow.hs b/compiler/GHC/Cmm/Dataflow.hs
index 3e310fefcb..ad1c37ace2 100644
--- a/compiler/GHC/Cmm/Dataflow.hs
+++ b/compiler/GHC/Cmm/Dataflow.hs
@@ -294,7 +294,7 @@ sortBlocks direction entry blockmap =
     fwd = revPostorderFrom blockmap entry
 
 -- Note [Backward vs forward analysis]
---
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 -- The forward and backward cases are not dual.  In the forward case, the entry
 -- points are known, and one simply traverses the body blocks from those points.
 -- In the backward case, something is known about the exit points, but a
@@ -350,7 +350,7 @@ updateFact
 updateFact fact_join dep_blocks (todo, fbase) lbl new_fact
   = case lookupFact lbl fbase of
       Nothing ->
-          -- Note [No old fact]
+          -- See Note [No old fact]
           let !z = mapInsert lbl new_fact fbase in (changed, z)
       Just old_fact ->
           case fact_join (OldFact old_fact) (NewFact new_fact) of
@@ -362,7 +362,7 @@ updateFact fact_join dep_blocks (todo, fbase) lbl new_fact
 
 {-
 Note [No old fact]
-
+~~~~~~~~~~~~~~~~~~
 We know that the new_fact is >= _|_, so we don't need to join.  However,
 if the new fact is also _|_, and we have already analysed its block,
 we don't need to record a change.  So there's a tradeoff here.  It turns
diff --git a/compiler/GHC/Cmm/Expr.hs b/compiler/GHC/Cmm/Expr.hs
index 52cb63c901..f63ef62dab 100644
--- a/compiler/GHC/Cmm/Expr.hs
+++ b/compiler/GHC/Cmm/Expr.hs
@@ -86,7 +86,7 @@ data CmmReg
 data Area
   = Old            -- See Note [Old Area]
   | Young {-# UNPACK #-} !BlockId  -- Invariant: must be a continuation BlockId
-                   -- See Note [Continuation BlockId] in GHC.Cmm.Node.
+                   -- See Note [Continuation BlockIds] in GHC.Cmm.Node.
   deriving (Eq, Ord, Show)
 
 {- Note [Old Area]
@@ -203,7 +203,7 @@ data CmmLit
 
   | CmmBlock {-# UNPACK #-} !BlockId     -- Code label
         -- Invariant: must be a continuation BlockId
-        -- See Note [Continuation BlockId] in GHC.Cmm.Node.
+        -- See Note [Continuation BlockIds] in GHC.Cmm.Node.
 
   | CmmHighStackMark -- A late-bound constant that stands for the max
                      -- #bytes of stack space used during a procedure.
@@ -410,7 +410,7 @@ data VGcPtr = VGcPtr | VNonGcPtr deriving( Eq, Show )
 -----------------------------------------------------------------------------
 {-
 Note [Overlapping global registers]
-
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 The backend might not faithfully implement the abstraction of the STG
 machine with independent registers for different values of type
 GlobalReg. Specifically, certain pairs of registers (r1, r2) may
diff --git a/compiler/GHC/Cmm/Graph.hs b/compiler/GHC/Cmm/Graph.hs
index ef8ae7f26b..ff9391a7fe 100644
--- a/compiler/GHC/Cmm/Graph.hs
+++ b/compiler/GHC/Cmm/Graph.hs
@@ -425,7 +425,7 @@ copyOutOflow profile conv transfer area actuals updfr_off extra_stack_stuff
 
 
 -- Note [Width of parameters]
---
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~
 -- Consider passing a small (< word width) primitive like Int8# to a function.
 -- It's actually non-trivial to do this without extending/narrowing:
 -- * Global registers are considered to have native word width (i.e., 64-bits on
diff --git a/compiler/GHC/Cmm/Info/Build.hs b/compiler/GHC/Cmm/Info/Build.hs
index 01f3c2a3ff..571a1faae7 100644
--- a/compiler/GHC/Cmm/Info/Build.hs
+++ b/compiler/GHC/Cmm/Info/Build.hs
@@ -55,7 +55,6 @@ import GHC.Types.Name.Set
 
 {- Note [SRTs]
    ~~~~~~~~~~~
-
 SRTs are the mechanism by which the garbage collector can determine
 the live CAFs in the program.
 
@@ -925,7 +924,7 @@ doSCC cfg staticFuns static_data (CyclicSCC nodes) = do
 
 
 {- Note [recursive SRTs]
-
+   ~~~~~~~~~~~~~~~~~~~~~
 If the dependency analyser has found us a recursive group of
 declarations, then we build a single SRT for the whole group, on the
 grounds that everything in the group is reachable from everything
diff --git a/compiler/GHC/Cmm/LayoutStack.hs b/compiler/GHC/Cmm/LayoutStack.hs
index ad13e8f431..1bd00ed65a 100644
--- a/compiler/GHC/Cmm/LayoutStack.hs
+++ b/compiler/GHC/Cmm/LayoutStack.hs
@@ -39,7 +39,7 @@ import Data.Array as Array
 import Data.List (nub)
 
 {- Note [Stack Layout]
-
+   ~~~~~~~~~~~~~~~~~~~
 The job of this pass is to
 
  - replace references to abstract stack Areas with fixed offsets from Sp.
@@ -141,7 +141,7 @@ Pass 2:
 
 
 Note [Two pass approach]
-
+~~~~~~~~~~~~~~~~~~~~~~~~
 The main reason for Pass 2 is being able to insert only the reloads that are
 needed and the fact that the two passes need different liveness information.
 Let's consider an example:
@@ -510,7 +510,7 @@ handleLastNode cfg procpoints liveness cont_info stackmaps
                                 , LabelMap StackMap )
 
      handleBranches
-         -- Note [diamond proc point]
+         -- See Note [diamond proc point]
        | Just l <- futureContinuation middle
        , (nub $ filter (`setMember` procpoints) $ successors last) == [l]
        = do
@@ -644,9 +644,8 @@ setupStackFrame platform lbl liveness updfr_off ret_args stack0
                          }
 
 
--- -----------------------------------------------------------------------------
 -- Note [diamond proc point]
---
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~
 -- This special case looks for the pattern we get from a typical
 -- tagged case expression:
 --
@@ -895,7 +894,7 @@ maybeAddSpAdj cfg sp0 sp_off block =
       where sp_unwind = CmmRegOff spReg (sp0 - platformWordSizeInBytes platform - sp_off)
 
 {- Note [SP old/young offsets]
-
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Sp(L) is the Sp offset on entry to block L relative to the base of the
 OLD area.
 
@@ -1098,7 +1097,7 @@ insertReloads platform stackmap live =
 
 {-
 Note [Lower safe foreign calls]
-
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 We start with
 
    Sp[young(L1)] = L1
diff --git a/compiler/GHC/Cmm/MachOp.hs b/compiler/GHC/Cmm/MachOp.hs
index cd2d331a58..0bd3ac1111 100644
--- a/compiler/GHC/Cmm/MachOp.hs
+++ b/compiler/GHC/Cmm/MachOp.hs
@@ -340,9 +340,8 @@ isFloatComparison mop =
     MO_F_Lt {} -> True
     _other     -> False
 
--- -----------------------------------------------------------------------------
--- Inverting conditions
-
+-- Note [Inverting conditions]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 -- Sometimes it's useful to be able to invert the sense of a
 -- condition.  Not all conditional tests are invertible: in
 -- particular, floating point conditionals cannot be inverted, because
diff --git a/compiler/GHC/Cmm/Node.hs b/compiler/GHC/Cmm/Node.hs
index fe6eac3223..d7d35a8bfc 100644
--- a/compiler/GHC/Cmm/Node.hs
+++ b/compiler/GHC/Cmm/Node.hs
@@ -105,7 +105,7 @@ data CmmNode e x where
 
   CmmSwitch
     :: CmmExpr       -- Scrutinee, of some integral type
-    -> SwitchTargets -- Cases. See [Note SwitchTargets]
+    -> SwitchTargets -- Cases. See Note [SwitchTargets]
     -> CmmNode O C
 
   CmmCall :: {                -- A native call or tail call
@@ -114,7 +114,9 @@ data CmmNode e x where
       cml_cont :: Maybe Label,
           -- Label of continuation (Nothing for return or tail call)
           --
-          -- Note [Continuation BlockIds]: these BlockIds are called
+          -- Note [Continuation BlockIds]
+          -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+          -- These BlockIds are called
           -- Continuation BlockIds, and are the only BlockIds that can
           -- occur in CmmExprs, namely as (CmmLit (CmmBlock b)) or
           -- (CmmStackSlot (Young b) _).
@@ -196,7 +198,6 @@ sequence.
 
 {- Note [Unsafe foreign calls clobber caller-save registers]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 A foreign call is defined to clobber any GlobalRegs that are mapped to
 caller-saves machine registers (according to the prevailing C ABI).
 GHC.StgToCmm.Utils.callerSaves tells you which GlobalRegs are caller-saves.
@@ -386,7 +387,6 @@ instance DefinerOfRegs GlobalReg (CmmNode e x) where
 
 -- Note [Safe foreign calls clobber STG registers]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- During stack layout phase every safe foreign call is expanded into a block
 -- that contains unsafe foreign call (instead of safe foreign call) and ends
 -- with a normal call (See Note [Foreign calls]). This means that we must
@@ -642,8 +642,8 @@ data CmmTickScope
     -- the new block could have a combined tick scope a/c+b/d, which
     -- both tick<2> and tick<3> apply to.
 
--- Note [CmmTick scoping details]:
---
+-- Note [CmmTick scoping details]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 -- The scope of a @CmmTick@ is given by the @CmmEntry@ node of the
 -- same block. Note that as a result of this, optimisations making
 -- tick scopes more specific can *reduce* the amount of code a tick
diff --git a/compiler/GHC/Cmm/Parser.y b/compiler/GHC/Cmm/Parser.y
index ed9492aa32..68d5821309 100644
--- a/compiler/GHC/Cmm/Parser.y
+++ b/compiler/GHC/Cmm/Parser.y
@@ -6,9 +6,9 @@
 --
 -----------------------------------------------------------------------------
 
-{- -----------------------------------------------------------------------------
+{-
 Note [Syntax of .cmm files]
-
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
 NOTE: You are very much on your own in .cmm.  There is very little
 error checking at all:
 
diff --git a/compiler/GHC/Cmm/Pipeline.hs b/compiler/GHC/Cmm/Pipeline.hs
index 270a281461..585606fcb2 100644
--- a/compiler/GHC/Cmm/Pipeline.hs
+++ b/compiler/GHC/Cmm/Pipeline.hs
@@ -175,7 +175,6 @@ cpsTop logger platform cfg proc =
 
 -- Note [Sinking after stack layout]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- In the past we considered running sinking pass also before stack
 -- layout, but after making some measurements we realized that:
 --
@@ -301,7 +300,7 @@ cpsTop logger platform cfg proc =
 --
 
 {- Note [inconsistent-pic-reg]
-
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 On x86/Darwin, PIC is implemented by inserting a sequence like
 
     call 1f
@@ -329,7 +328,7 @@ _GLOBAL_OFFSET_TABLE_, regardless of which entry point we arrived via.
 -}
 
 {- Note [unreachable blocks]
-
+   ~~~~~~~~~~~~~~~~~~~~~~~~~
 The control-flow optimiser sometimes leaves unreachable blocks behind
 containing junk code.  These aren't necessarily a problem, but
 removing them is good because it might save time in the native code
diff --git a/compiler/GHC/Cmm/ProcPoint.hs b/compiler/GHC/Cmm/ProcPoint.hs
index 0cabea1536..cd55b4d255 100644
--- a/compiler/GHC/Cmm/ProcPoint.hs
+++ b/compiler/GHC/Cmm/ProcPoint.hs
@@ -428,7 +428,7 @@ attachContInfoTables _ other_decl
 
 {-
 Note [Direct reachability]
-
+~~~~~~~~~~~~~~~~~~~~~~~~~~
 Block B is directly reachable from proc point P iff control can flow
 from P to B without passing through an intervening proc point.
 -}
@@ -437,7 +437,7 @@ from P to B without passing through an intervening proc point.
 
 {-
 Note [No simple dataflow]
-
+~~~~~~~~~~~~~~~~~~~~~~~~~
 Sadly, it seems impossible to compute the proc points using a single
 dataflow pass.  One might attempt to use this simple lattice:
 
diff --git a/compiler/GHC/Cmm/Sink.hs b/compiler/GHC/Cmm/Sink.hs
index 7d90967132..0f3d979716 100644
--- a/compiler/GHC/Cmm/Sink.hs
+++ b/compiler/GHC/Cmm/Sink.hs
@@ -472,7 +472,7 @@ tryToInline platform liveAfter node assigs =
 
   go usages live node skipped (a@(l,rhs,_) : rest)
    | cannot_inline            = dont_inline
-   | occurs_none              = discard  -- Note [discard during inlining]
+   | occurs_none              = discard  -- See Note [discard during inlining]
    | occurs_once              = inline_and_discard
    | isTrivial platform rhs   = inline_and_keep
    | otherwise                = dont_inline
@@ -496,7 +496,7 @@ tryToInline platform liveAfter node assigs =
                 live' = inline foldLocalRegsUsed platform (\m r -> insertLRegSet r m)
                                             live rhs
 
-        cannot_inline = skipped `regsUsedIn` rhs -- Note [dependent assignments]
+        cannot_inline = skipped `regsUsedIn` rhs -- See Note [dependent assignments]
                         || l `elemLRegSet` skipped
                         || not (okToInline platform rhs node)
 
@@ -519,8 +519,7 @@ tryToInline platform liveAfter node assigs =
         inl_exp other = other
 
 {- Note [Keeping assignemnts mentioned in skipped RHSs]
-    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     If we have to assignments: [z = y, y = e1] and we skip
     z we *must* retain the assignment y = e1. This is because
     we might inline "z = y" into another node later on so we
@@ -541,7 +540,7 @@ tryToInline platform liveAfter node assigs =
 -}
 
 {- Note [improveConditional]
-
+   ~~~~~~~~~~~~~~~~~~~~~~~~~
 cmmMachOpFold tries to simplify conditionals to turn things like
   (a == b) != 1
 into
@@ -579,7 +578,6 @@ improveConditional other = other
 
 -- Note [dependent assignments]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- If our assignment list looks like
 --
 --    [ y = e,  x = ... y ... ]
@@ -690,7 +688,6 @@ conflicts platform (r, rhs, addr) node
 
 {- Note [Inlining foldRegsDefd]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
    foldRegsDefd is, after optimization, *not* a small function so
    it's only marked INLINEABLE, but not INLINE.
 
@@ -720,7 +717,6 @@ localRegistersConflict platform expr node =
 
 -- Note [Sinking and calls]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- We have three kinds of calls: normal (CmmCall), safe foreign (CmmForeignCall)
 -- and unsafe foreign (CmmUnsafeForeignCall). We perform sinking pass after
 -- stack layout (see Note [Sinking after stack layout]) which leads to two
@@ -803,7 +799,6 @@ data AbsMem
 
 -- Note [Foreign calls clobber heap]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- It is tempting to say that foreign calls clobber only
 -- non-heap/stack memory, but unfortunately we break this invariant in
 -- the RTS.  For example, in stg_catch_retry_frame we call
diff --git a/compiler/GHC/Cmm/Switch.hs b/compiler/GHC/Cmm/Switch.hs
index 7bef1e293a..f8c6c674ef 100644
--- a/compiler/GHC/Cmm/Switch.hs
+++ b/compiler/GHC/Cmm/Switch.hs
@@ -26,7 +26,6 @@ import qualified Data.Map as M
 
 -- Note [Cmm Switches, the general plan]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- Compiling a high-level switch statement, as it comes out of a STG case
 -- expression, for example, allows for a surprising amount of design decisions.
 -- Therefore, we cleanly separated this from the Stg → Cmm transformation, as
@@ -51,10 +50,9 @@ import qualified Data.Map as M
 -- See Note [GHC.Cmm.Switch vs. GHC.Cmm.Switch.Implement] why the two module are
 -- separated.
 
------------------------------------------------------------------------------
+
 -- Note [Magic Constants in GHC.Cmm.Switch]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- There are a lot of heuristics here that depend on magic values where it is
 -- hard to determine the "best" value (for whatever that means). These are the
 -- magic values:
@@ -83,7 +81,6 @@ minJumpTableOffset = 2
 
 -- Note [SwitchTargets]
 -- ~~~~~~~~~~~~~~~~~~~~
---
 -- The branches of a switch are stored in a SwitchTargets, which consists of an
 -- (optional) default jump target, and a map from values to jump targets.
 --
@@ -175,7 +172,6 @@ switchTargetsToTable (SwitchTargets _ (lo,hi) mbdef branches)
 
 -- Note [Jump Table Offset]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- Usually, the code for a jump table starting at x will first subtract x from
 -- the value, to avoid a large amount of empty entries. But if x is very small,
 -- the extra entries are no worse than the subtraction in terms of code size, and
@@ -239,7 +235,6 @@ data SwitchPlan
 --
 -- Note [createSwitchPlan]
 -- ~~~~~~~~~~~~~~~~~~~~~~~
---
 -- A SwitchPlan describes how a Switch statement is to be broken down into
 -- smaller pieces suitable for code generation.
 --
diff --git a/compiler/GHC/Cmm/Switch/Implement.hs b/compiler/GHC/Cmm/Switch/Implement.hs
index 87dfc1cdaa..30265dc234 100644
--- a/compiler/GHC/Cmm/Switch/Implement.hs
+++ b/compiler/GHC/Cmm/Switch/Implement.hs
@@ -57,16 +57,15 @@ visitSwitches platform block
 
 -- Note [Floating switch expressions]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 -- When we translate a sparse switch into a search tree we would like
 -- to compute the value we compare against only once.
-
+--
 -- For this purpose we assign the switch expression to a local register
 -- and then use this register when constructing the actual binary tree.
-
+--
 -- This is important as the expression could contain expensive code like
 -- memory loads or divisions which we REALLY don't want to duplicate.
-
+--
 -- This happened in parts of the handwritten RTS Cmm code. See also #16933
 
 -- See Note [Floating switch expressions]