summaryrefslogtreecommitdiff
path: root/compiler/nativeGen/X86/CodeGen.hs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/nativeGen/X86/CodeGen.hs')
-rw-r--r--compiler/nativeGen/X86/CodeGen.hs251
1 files changed, 167 insertions, 84 deletions
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index 38dc76090d..37080b990e 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -20,6 +20,7 @@ module X86.CodeGen (
cmmTopCodeGen,
generateJumpTableForInstr,
extractUnwindPoints,
+ invertCondBranches,
InstrBlock
)
@@ -36,13 +37,21 @@ import X86.Instr
import X86.Cond
import X86.Regs
import X86.RegInfo
+
+--TODO: Remove - Just for development/debugging
+import X86.Ppr()
+
import CodeGen.Platform
import CPrim
import Debug ( DebugBlock(..), UnwindPoint(..), UnwindTable
, UnwindExpr(UwReg), toUnwindExpr )
import Instruction
import PIC
-import NCGMonad
+import NCGMonad ( NatM, getNewRegNat, getNewLabelNat, setDeltaNat
+ , getDeltaNat, getBlockIdNat, getPicBaseNat, getNewRegPairNat
+ , getPicBaseMaybeNat, getDebugBlock, getFileId
+ , addImmediateSuccessorNat, updateCfgNat)
+import CFG
import Format
import Reg
import Platform
@@ -56,7 +65,9 @@ import CmmUtils
import CmmSwitch
import Cmm
import Hoopl.Block
+import Hoopl.Collections
import Hoopl.Graph
+import Hoopl.Label
import CLabel
import CoreSyn ( Tickish(..) )
import SrcLoc ( srcSpanFile, srcSpanStartLine, srcSpanStartCol )
@@ -137,8 +148,8 @@ basicBlockCodeGen block = do
let line = srcSpanStartLine span; col = srcSpanStartCol span
return $ unitOL $ LOCATION fileId line col name
_ -> return nilOL
- mid_instrs <- stmtsToInstrs stmts
- tail_instrs <- stmtToInstrs tail
+ mid_instrs <- stmtsToInstrs id stmts
+ tail_instrs <- stmtToInstrs id tail
let instrs = loc_instrs `appOL` mid_instrs `appOL` tail_instrs
instrs' <- fold <$> traverse addSpUnwindings instrs
-- code generation may introduce new basic block boundaries, which
@@ -169,14 +180,15 @@ addSpUnwindings instr@(DELTA d) = do
else return (unitOL instr)
addSpUnwindings instr = return $ unitOL instr
-stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock
-stmtsToInstrs stmts
- = do instrss <- mapM stmtToInstrs stmts
+stmtsToInstrs :: BlockId -> [CmmNode e x] -> NatM InstrBlock
+stmtsToInstrs bid stmts
+ = do instrss <- mapM (stmtToInstrs bid) stmts
return (concatOL instrss)
-
-stmtToInstrs :: CmmNode e x -> NatM InstrBlock
-stmtToInstrs stmt = do
+-- | `bid` refers to the current block and is used to update the CFG
+-- if new blocks are inserted in the control flow.
+stmtToInstrs :: BlockId -> CmmNode e x -> NatM InstrBlock
+stmtToInstrs bid stmt = do
dflags <- getDynFlags
is32Bit <- is32BitPlatform
case stmt of
@@ -207,16 +219,13 @@ stmtToInstrs stmt = do
format = cmmTypeFormat ty
CmmUnsafeForeignCall target result_regs args
- -> genCCall dflags is32Bit target result_regs args
+ -> genCCall dflags is32Bit target result_regs args bid
- CmmBranch id -> genBranch id
+ CmmBranch id -> return $ genBranch id
--We try to arrange blocks such that the likely branch is the fallthrough
--in CmmContFlowOpt. So we can assume the condition is likely false here.
- CmmCondBranch arg true false _ -> do
- b1 <- genCondJump true arg
- b2 <- genBranch false
- return (b1 `appOL` b2)
+ CmmCondBranch arg true false _ -> genCondBranch bid true false arg
CmmSwitch arg ids -> do dflags <- getDynFlags
genSwitch dflags arg ids
CmmCall { cml_target = arg
@@ -1673,13 +1682,13 @@ genJump expr regs = do
-- -----------------------------------------------------------------------------
-- Unconditional branches
-genBranch :: BlockId -> NatM InstrBlock
-genBranch = return . toOL . mkJumpInstr
+genBranch :: BlockId -> InstrBlock
+genBranch = toOL . mkJumpInstr
-- -----------------------------------------------------------------------------
--- Conditional jumps
+-- Conditional jumps/branches
{-
Conditional jumps are always to local labels, so we can use branch
@@ -1690,19 +1699,24 @@ I386: First, we have to ensure that the condition
codes are set according to the supplied comparison operation.
-}
-genCondJump
- :: BlockId -- the branch target
+
+genCondBranch
+ :: BlockId -- the source of the jump
+ -> BlockId -- the true branch target
+ -> BlockId -- the false branch target
-> CmmExpr -- the condition on which to branch
- -> NatM InstrBlock
+ -> NatM InstrBlock -- Instructions
-genCondJump id expr = do
+genCondBranch bid id false expr = do
is32Bit <- is32BitPlatform
- genCondJump' is32Bit id expr
+ genCondBranch' is32Bit bid id false expr
-genCondJump' :: Bool -> BlockId -> CmmExpr -> NatM InstrBlock
+-- | We return the instructions generated.
+genCondBranch' :: Bool -> BlockId -> BlockId -> BlockId -> CmmExpr
+ -> NatM InstrBlock
-- 64-bit integer comparisons on 32-bit
-genCondJump' is32Bit true (CmmMachOp mop [e1,e2])
+genCondBranch' is32Bit _bid true false (CmmMachOp mop [e1,e2])
| is32Bit, Just W64 <- maybeIntComparison mop = do
ChildCode64 code1 r1_lo <- iselExpr64 e1
ChildCode64 code2 r2_lo <- iselExpr64 e2
@@ -1710,52 +1724,52 @@ genCondJump' is32Bit true (CmmMachOp mop [e1,e2])
r2_hi = getHiVRegFromLo r2_lo
cond = machOpToCond mop
Just cond' = maybeFlipCond cond
- false <- getBlockIdNat
- return $ code1 `appOL` code2 `appOL` toOL [
- CMP II32 (OpReg r2_hi) (OpReg r1_hi),
- JXX cond true,
- JXX cond' false,
- CMP II32 (OpReg r2_lo) (OpReg r1_lo),
- JXX cond true,
- NEWBLOCK false ]
-
-genCondJump' _ id bool = do
+ --TODO: Update CFG for x86
+ let code = code1 `appOL` code2 `appOL` toOL [
+ CMP II32 (OpReg r2_hi) (OpReg r1_hi),
+ JXX cond true,
+ JXX cond' false,
+ CMP II32 (OpReg r2_lo) (OpReg r1_lo),
+ JXX cond true] `appOL` genBranch false
+ return code
+
+genCondBranch' _ bid id false bool = do
CondCode is_float cond cond_code <- getCondCode bool
use_sse2 <- sse2Enabled
if not is_float || not use_sse2
then
- return (cond_code `snocOL` JXX cond id)
+ return (cond_code `snocOL` JXX cond id `appOL` genBranch false)
else do
- lbl <- getBlockIdNat
-
-- See Note [SSE Parity Checks]
- let code = case cond of
- NE -> or_unordered
- GU -> plain_test
- GEU -> plain_test
- -- Use ASSERT so we don't break releases if
- -- LTT/LE creep in somehow.
- LTT ->
- ASSERT2(False, ppr "Should have been turned into >")
- and_ordered
- LE ->
- ASSERT2(False, ppr "Should have been turned into >=")
- and_ordered
- _ -> and_ordered
+ let jmpFalse = genBranch false
+ code
+ = case cond of
+ NE -> or_unordered
+ GU -> plain_test
+ GEU -> plain_test
+ -- Use ASSERT so we don't break releases if
+ -- LTT/LE creep in somehow.
+ LTT ->
+ ASSERT2(False, ppr "Should have been turned into >")
+ and_ordered
+ LE ->
+ ASSERT2(False, ppr "Should have been turned into >=")
+ and_ordered
+ _ -> and_ordered
plain_test = unitOL (
JXX cond id
- )
+ ) `appOL` jmpFalse
or_unordered = toOL [
JXX cond id,
JXX PARITY id
- ]
+ ] `appOL` jmpFalse
and_ordered = toOL [
- JXX PARITY lbl,
+ JXX PARITY false,
JXX cond id,
- JXX ALWAYS lbl,
- NEWBLOCK lbl
+ JXX ALWAYS false
]
+ updateCfgNat (\cfg -> adjustEdgeWeight cfg (+3) bid false)
return (cond_code `appOL` code)
-- -----------------------------------------------------------------------------
@@ -1774,6 +1788,7 @@ genCCall
-> ForeignTarget -- function to call
-> [CmmFormal] -- where to put the result
-> [CmmActual] -- arguments (of mixed type)
+ -> BlockId -- The block we are in
-> NatM InstrBlock
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
@@ -1782,7 +1797,7 @@ genCCall
-- least DWORD aligned and the number of bytes to copy isn't too
-- large. Otherwise, call C's memcpy.
genCCall dflags is32Bit (PrimTarget (MO_Memcpy align)) _
- [dst, src, CmmLit (CmmInt n _)]
+ [dst, src, CmmLit (CmmInt n _)] _
| fromInteger insns <= maxInlineMemcpyInsns dflags && align .&. 3 == 0 = do
code_dst <- getAnyReg dst
dst_r <- getNewRegNat format
@@ -1832,6 +1847,7 @@ genCCall dflags _ (PrimTarget (MO_Memset align)) _
[dst,
CmmLit (CmmInt c _),
CmmLit (CmmInt n _)]
+ _
| fromInteger insns <= maxInlineMemsetInsns dflags && align .&. 3 == 0 = do
code_dst <- getAnyReg dst
dst_r <- getNewRegNat format
@@ -1872,13 +1888,13 @@ genCCall dflags _ (PrimTarget (MO_Memset align)) _
dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone
(ImmInteger (n - i))
-genCCall _ _ (PrimTarget MO_WriteBarrier) _ _ = return nilOL
+genCCall _ _ (PrimTarget MO_WriteBarrier) _ _ _ = return nilOL
-- write barrier compiles to no code on x86/x86-64;
-- we keep it this long in order to prevent earlier optimisations.
-genCCall _ _ (PrimTarget MO_Touch) _ _ = return nilOL
+genCCall _ _ (PrimTarget MO_Touch) _ _ _ = return nilOL
-genCCall _ is32bit (PrimTarget (MO_Prefetch_Data n )) _ [src] =
+genCCall _ is32bit (PrimTarget (MO_Prefetch_Data n )) _ [src] _ =
case n of
0 -> genPrefetch src $ PREFETCH NTA format
1 -> genPrefetch src $ PREFETCH Lvl2 format
@@ -1899,7 +1915,7 @@ genCCall _ is32bit (PrimTarget (MO_Prefetch_Data n )) _ [src] =
((AddrBaseIndex (EABaseReg src_r ) EAIndexNone (ImmInt 0)))) ))
-- prefetch always takes an address
-genCCall dflags is32Bit (PrimTarget (MO_BSwap width)) [dst] [src] = do
+genCCall dflags is32Bit (PrimTarget (MO_BSwap width)) [dst] [src] _ = do
let platform = targetPlatform dflags
let dst_r = getRegisterReg platform False (CmmLocal dst)
case width of
@@ -1922,7 +1938,7 @@ genCCall dflags is32Bit (PrimTarget (MO_BSwap width)) [dst] [src] = do
format = intFormat width
genCCall dflags is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst]
- args@[src] = do
+ args@[src] bid = do
sse4_2 <- sse4_2Enabled
let platform = targetPlatform dflags
if sse4_2
@@ -1947,13 +1963,13 @@ genCCall dflags is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst]
let target = ForeignTarget targetExpr (ForeignConvention CCallConv
[NoHint] [NoHint]
CmmMayReturn)
- genCCall dflags is32Bit target dest_regs args
+ genCCall dflags is32Bit target dest_regs args bid
where
format = intFormat width
lbl = mkCmmCodeLabel primUnitId (fsLit (popCntLabel width))
genCCall dflags is32Bit (PrimTarget (MO_Pdep width)) dest_regs@[dst]
- args@[src, mask] = do
+ args@[src, mask] bid = do
let platform = targetPlatform dflags
if isBmi2Enabled dflags
then do code_src <- getAnyReg src
@@ -1980,13 +1996,13 @@ genCCall dflags is32Bit (PrimTarget (MO_Pdep width)) dest_regs@[dst]
let target = ForeignTarget targetExpr (ForeignConvention CCallConv
[NoHint] [NoHint]
CmmMayReturn)
- genCCall dflags is32Bit target dest_regs args
+ genCCall dflags is32Bit target dest_regs args bid
where
format = intFormat width
lbl = mkCmmCodeLabel primUnitId (fsLit (pdepLabel width))
genCCall dflags is32Bit (PrimTarget (MO_Pext width)) dest_regs@[dst]
- args@[src, mask] = do
+ args@[src, mask] bid = do
let platform = targetPlatform dflags
if isBmi2Enabled dflags
then do code_src <- getAnyReg src
@@ -2013,19 +2029,19 @@ genCCall dflags is32Bit (PrimTarget (MO_Pext width)) dest_regs@[dst]
let target = ForeignTarget targetExpr (ForeignConvention CCallConv
[NoHint] [NoHint]
CmmMayReturn)
- genCCall dflags is32Bit target dest_regs args
+ genCCall dflags is32Bit target dest_regs args bid
where
format = intFormat width
lbl = mkCmmCodeLabel primUnitId (fsLit (pextLabel width))
-genCCall dflags is32Bit (PrimTarget (MO_Clz width)) dest_regs@[dst] args@[src]
+genCCall dflags is32Bit (PrimTarget (MO_Clz width)) dest_regs@[dst] args@[src] bid
| is32Bit && width == W64 = do
-- Fallback to `hs_clz64` on i386
targetExpr <- cmmMakeDynamicReference dflags CallReference lbl
let target = ForeignTarget targetExpr (ForeignConvention CCallConv
[NoHint] [NoHint]
CmmMayReturn)
- genCCall dflags is32Bit target dest_regs args
+ genCCall dflags is32Bit target dest_regs args bid
| otherwise = do
code_src <- getAnyReg src
@@ -2050,7 +2066,7 @@ genCCall dflags is32Bit (PrimTarget (MO_Clz width)) dest_regs@[dst] args@[src]
format = if width == W8 then II16 else intFormat width
lbl = mkCmmCodeLabel primUnitId (fsLit (clzLabel width))
-genCCall dflags is32Bit (PrimTarget (MO_Ctz width)) [dst] [src]
+genCCall dflags is32Bit (PrimTarget (MO_Ctz width)) [dst] [src] bid
| is32Bit, width == W64 = do
ChildCode64 vcode rlo <- iselExpr64 src
let rhi = getHiVRegFromLo rlo
@@ -2059,6 +2075,14 @@ genCCall dflags is32Bit (PrimTarget (MO_Ctz width)) [dst] [src]
lbl2 <- getBlockIdNat
tmp_r <- getNewRegNat format
+ -- New CFG Edges:
+ -- bid -> lbl2
+ -- bid -> lbl1 -> lbl2
+ -- We also changes edges originating at bid to start at lbl2 instead.
+ updateCfgNat (addWeightEdge bid lbl1 110 .
+ addWeightEdge lbl1 lbl2 110 .
+ addImmediateSuccessor bid lbl2)
+
-- The following instruction sequence corresponds to the pseudo-code
--
-- if (src) {
@@ -2104,17 +2128,18 @@ genCCall dflags is32Bit (PrimTarget (MO_Ctz width)) [dst] [src]
platform = targetPlatform dflags
format = if width == W8 then II16 else intFormat width
-genCCall dflags is32Bit (PrimTarget (MO_UF_Conv width)) dest_regs args = do
+genCCall dflags is32Bit (PrimTarget (MO_UF_Conv width)) dest_regs args bid = do
targetExpr <- cmmMakeDynamicReference dflags
CallReference lbl
let target = ForeignTarget targetExpr (ForeignConvention CCallConv
[NoHint] [NoHint]
CmmMayReturn)
- genCCall dflags is32Bit target dest_regs args
+ genCCall dflags is32Bit target dest_regs args bid
where
lbl = mkCmmCodeLabel primUnitId (fsLit (word2FloatLabel width))
-genCCall dflags is32Bit (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = do
+genCCall dflags is32Bit (PrimTarget (MO_AtomicRMW width amop))
+ [dst] [addr, n] bid = do
Amode amode addr_code <-
if amop `elem` [AMO_Add, AMO_Sub]
then getAmode addr
@@ -2157,6 +2182,11 @@ genCCall dflags is32Bit (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] =
cmpxchg_code instrs = do
lbl <- getBlockIdNat
tmp <- getNewRegNat format
+
+ --Record inserted blocks
+ addImmediateSuccessorNat bid lbl
+ updateCfgNat (addWeightEdge lbl lbl 0)
+
return $ toOL
[ MOV format (OpAddr amode) (OpReg eax)
, JXX ALWAYS lbl
@@ -2172,17 +2202,17 @@ genCCall dflags is32Bit (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] =
format = intFormat width
-genCCall dflags _ (PrimTarget (MO_AtomicRead width)) [dst] [addr] = do
+genCCall dflags _ (PrimTarget (MO_AtomicRead width)) [dst] [addr] _ = do
load_code <- intLoadCode (MOV (intFormat width)) addr
let platform = targetPlatform dflags
use_sse2 <- sse2Enabled
return (load_code (getRegisterReg platform use_sse2 (CmmLocal dst)))
-genCCall _ _ (PrimTarget (MO_AtomicWrite width)) [] [addr, val] = do
+genCCall _ _ (PrimTarget (MO_AtomicWrite width)) [] [addr, val] _ = do
code <- assignMem_IntCode (intFormat width) addr val
return $ code `snocOL` MFENCE
-genCCall dflags is32Bit (PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] = do
+genCCall dflags is32Bit (PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] _ = do
-- On x86 we don't have enough registers to use cmpxchg with a
-- complicated addressing mode, so on that architecture we
-- pre-compute the address first.
@@ -2204,14 +2234,14 @@ genCCall dflags is32Bit (PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] =
where
format = intFormat width
-genCCall _ is32Bit target dest_regs args = do
+genCCall _ is32Bit target dest_regs args bid = do
dflags <- getDynFlags
let platform = targetPlatform dflags
sse2 = isSse2Enabled dflags
case (target, dest_regs) of
-- void return type prim op
(PrimTarget op, []) ->
- outOfLineCmmOp op Nothing args
+ outOfLineCmmOp bid op Nothing args
-- we only cope with a single result for foreign calls
(PrimTarget op, [r])
| sse2 -> case op of
@@ -2224,12 +2254,12 @@ genCCall _ is32Bit target dest_regs args = do
MO_F32_Sqrt -> actuallyInlineSSE2Op (\fmt r -> SQRT fmt (OpReg r)) FF32 args
MO_F64_Sqrt -> actuallyInlineSSE2Op (\fmt r -> SQRT fmt (OpReg r)) FF64 args
- _other_op -> outOfLineCmmOp op (Just r) args
+ _other_op -> outOfLineCmmOp bid op (Just r) args
| otherwise -> do
l1 <- getNewLabelNat
l2 <- getNewLabelNat
if sse2
- then outOfLineCmmOp op (Just r) args
+ then outOfLineCmmOp bid op (Just r) args
else case op of
MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
@@ -2243,7 +2273,7 @@ genCCall _ is32Bit target dest_regs args = do
MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
- _other_op -> outOfLineCmmOp op (Just r) args
+ _other_op -> outOfLineCmmOp bid op (Just r) args
where
actuallyInlineFloatOp = actuallyInlineFloatOp' False
@@ -2813,15 +2843,16 @@ maybePromoteCArg dflags wto arg
where
wfrom = cmmExprWidth dflags arg
-outOfLineCmmOp :: CallishMachOp -> Maybe CmmFormal -> [CmmActual] -> NatM InstrBlock
-outOfLineCmmOp mop res args
+outOfLineCmmOp :: BlockId -> CallishMachOp -> Maybe CmmFormal -> [CmmActual]
+ -> NatM InstrBlock
+outOfLineCmmOp bid mop res args
= do
dflags <- getDynFlags
targetExpr <- cmmMakeDynamicReference dflags CallReference lbl
let target = ForeignTarget targetExpr
(ForeignConvention CCallConv [] [] CmmMayReturn)
- stmtToInstrs (CmmUnsafeForeignCall target (catMaybes [res]) args)
+ stmtToInstrs bid (CmmUnsafeForeignCall target (catMaybes [res]) args)
where
-- Assume we can call these functions directly, and that they're not in a dynamic library.
-- TODO: Why is this ok? Under linux this code will be in libm.so
@@ -3389,3 +3420,55 @@ needLlvm :: NatM a
needLlvm =
sorry $ unlines ["The native code generator does not support vector"
,"instructions. Please use -fllvm."]
+
+-- | This works on the invariant that all jumps in the given blocks are required.
+-- Starting from there we try to make a few more jumps redundant by reordering
+-- them.
+invertCondBranches :: CFG -> LabelMap a -> [NatBasicBlock Instr]
+ -> [NatBasicBlock Instr]
+invertCondBranches cfg keep bs =
+ --trace "Foo" $
+ invert bs
+ where
+ invert :: [NatBasicBlock Instr] -> [NatBasicBlock Instr]
+ invert ((BasicBlock lbl1 ins@(_:_:_xs)):b2@(BasicBlock lbl2 _):bs)
+ | --pprTrace "Block" (ppr lbl1) True,
+ (jmp1,jmp2) <- last2 ins
+ , JXX cond1 target1 <- jmp1
+ , target1 == lbl2
+ --, pprTrace "CutChance" (ppr b1) True
+ , JXX ALWAYS target2 <- jmp2
+ -- We have enough information to check if we can perform the inversion
+ -- TODO: We could also check for the last asm instruction which sets
+ -- status flags instead. Which I suspect is worse in terms of compiler
+ -- performance, but might be applicable to more cases
+ , Just edgeInfo1 <- getEdgeInfo lbl1 target1 cfg
+ , Just edgeInfo2 <- getEdgeInfo lbl1 target2 cfg
+ -- Both jumps come from the same cmm statement
+ , transitionSource edgeInfo1 == transitionSource edgeInfo2
+ , (CmmSource cmmCondBranch) <- transitionSource edgeInfo1
+
+ --Int comparisons are invertable
+ , CmmCondBranch (CmmMachOp op _args) _ _ _ <- cmmCondBranch
+ , Just _ <- maybeIntComparison op
+ , Just invCond <- maybeInvertCond cond1
+
+ --Swap the last two jumps, invert the conditional jumps condition.
+ = let jumps =
+ case () of
+ -- We are free the eliminate the jmp. So we do so.
+ _ | not (mapMember target1 keep)
+ -> [JXX invCond target2]
+ -- If the conditional target is unlikely we put the other
+ -- target at the front.
+ | edgeWeight edgeInfo2 > edgeWeight edgeInfo1
+ -> [JXX invCond target2, JXX ALWAYS target1]
+ -- Keep things as-is otherwise
+ | otherwise
+ -> [jmp1, jmp2]
+ in --pprTrace "Cutable" (ppr [jmp1,jmp2] <+> text "=>" <+> ppr jumps) $
+ (BasicBlock lbl1
+ (dropTail 2 ins ++ jumps))
+ : invert (b2:bs)
+ invert (b:bs) = b : invert bs
+ invert [] = []