compiler/GHC/CmmToAsm/SPARC/CodeGen/Gen32.hs


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692

-- | Evaluation of 32 bit values.
module GHC.CmmToAsm.SPARC.CodeGen.Gen32 (
        getSomeReg,
        getRegister
)

where

import GhcPrelude

import GHC.CmmToAsm.SPARC.CodeGen.CondCode
import GHC.CmmToAsm.SPARC.CodeGen.Amode
import GHC.CmmToAsm.SPARC.CodeGen.Gen64
import GHC.CmmToAsm.SPARC.CodeGen.Base
import GHC.CmmToAsm.SPARC.Stack
import GHC.CmmToAsm.SPARC.Instr
import GHC.CmmToAsm.SPARC.Cond
import GHC.CmmToAsm.SPARC.AddrMode
import GHC.CmmToAsm.SPARC.Imm
import GHC.CmmToAsm.SPARC.Regs
import GHC.CmmToAsm.SPARC.Base
import GHC.CmmToAsm.Monad
import GHC.CmmToAsm.Format
import GHC.Platform.Reg

import GHC.Cmm

import Control.Monad (liftM)
import GHC.Driver.Session
import OrdList
import Outputable

-- | The dual to getAnyReg: compute an expression into a register, but
--      we don't mind which one it is.
getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg expr = do
  r <- getRegister expr
  case r of
    Any rep code -> do
        tmp <- getNewRegNat rep
        return (tmp, code tmp)
    Fixed _ reg code ->
        return (reg, code)


-- | Make code to evaluate a 32 bit expression.
--
getRegister :: CmmExpr -> NatM Register

getRegister (CmmReg reg)
  = do dflags <- getDynFlags
       let platform = targetPlatform dflags
       return (Fixed (cmmTypeFormat (cmmRegType dflags reg))
                     (getRegisterReg platform reg) nilOL)

getRegister tree@(CmmRegOff _ _)
  = do dflags <- getDynFlags
       getRegister (mangleIndexTree dflags tree)

getRegister (CmmMachOp (MO_UU_Conv W64 W32)
             [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 (getHiVRegFromLo rlo) code

getRegister (CmmMachOp (MO_SS_Conv W64 W32)
             [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 (getHiVRegFromLo rlo) code

getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 rlo code

getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
  ChildCode64 code rlo <- iselExpr64 x
  return $ Fixed II32 rlo code


-- Load a literal float into a float register.
--      The actual literal is stored in a new data area, and we load it
--      at runtime.
getRegister (CmmLit (CmmFloat f W32)) = do

    -- a label for the new data area
    lbl <- getNewLabelNat
    tmp <- getNewRegNat II32

    let code dst = toOL [
            -- the data area
            LDATA (Section ReadOnlyData lbl) $ RawCmmStatics lbl
                         [CmmStaticLit (CmmFloat f W32)],

            -- load the literal
            SETHI (HI (ImmCLbl lbl)) tmp,
            LD II32 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]

    return (Any FF32 code)

getRegister (CmmLit (CmmFloat d W64)) = do
    lbl <- getNewLabelNat
    tmp <- getNewRegNat II32
    let code dst = toOL [
            LDATA (Section ReadOnlyData lbl) $ RawCmmStatics lbl
                         [CmmStaticLit (CmmFloat d W64)],
            SETHI (HI (ImmCLbl lbl)) tmp,
            LD II64 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
    return (Any FF64 code)


-- Unary machine ops
getRegister (CmmMachOp mop [x])
  = case mop of
        -- Floating point negation -------------------------
        MO_F_Neg W32            -> trivialUFCode FF32 (FNEG FF32) x
        MO_F_Neg W64            -> trivialUFCode FF64 (FNEG FF64) x


        -- Integer negation --------------------------------
        MO_S_Neg rep            -> trivialUCode (intFormat rep) (SUB False False g0) x
        MO_Not rep              -> trivialUCode (intFormat rep) (XNOR False g0) x


        -- Float word size conversion ----------------------
        MO_FF_Conv W64 W32      -> coerceDbl2Flt x
        MO_FF_Conv W32 W64      -> coerceFlt2Dbl x


        -- Float <-> Signed Int conversion -----------------
        MO_FS_Conv from to      -> coerceFP2Int from to x
        MO_SF_Conv from to      -> coerceInt2FP from to x


        -- Unsigned integer word size conversions ----------

        -- If it's the same size, then nothing needs to be done.
        MO_UU_Conv from to
         | from == to           -> conversionNop (intFormat to)  x

        -- To narrow an unsigned word, mask out the high bits to simulate what would
        --      happen if we copied the value into a smaller register.
        MO_UU_Conv W16 W8       -> trivialCode W8  (AND False) x (CmmLit (CmmInt 255 W8))
        MO_UU_Conv W32 W8       -> trivialCode W8  (AND False) x (CmmLit (CmmInt 255 W8))

        -- for narrowing 32 bit to 16 bit, don't use a literal mask value like the W16->W8
        --      case because the only way we can load it is via SETHI, which needs 2 ops.
        --      Do some shifts to chop out the high bits instead.
        MO_UU_Conv W32 W16
         -> do  tmpReg          <- getNewRegNat II32
                (xReg, xCode)   <- getSomeReg x
                let code dst
                        =       xCode
                        `appOL` toOL
                                [ SLL xReg   (RIImm $ ImmInt 16) tmpReg
                                , SRL tmpReg (RIImm $ ImmInt 16) dst]

                return  $ Any II32 code

                --       trivialCode W16 (AND False) x (CmmLit (CmmInt 65535 W16))

        -- To widen an unsigned word we don't have to do anything.
        --      Just leave it in the same register and mark the result as the new size.
        MO_UU_Conv W8  W16      -> conversionNop (intFormat W16)  x
        MO_UU_Conv W8  W32      -> conversionNop (intFormat W32)  x
        MO_UU_Conv W16 W32      -> conversionNop (intFormat W32)  x


        -- Signed integer word size conversions ------------

        -- Mask out high bits when narrowing them
        MO_SS_Conv W16 W8       -> trivialCode W8  (AND False) x (CmmLit (CmmInt 255 W8))
        MO_SS_Conv W32 W8       -> trivialCode W8  (AND False) x (CmmLit (CmmInt 255 W8))
        MO_SS_Conv W32 W16      -> trivialCode W16 (AND False) x (CmmLit (CmmInt 65535 W16))

        -- Sign extend signed words when widening them.
        MO_SS_Conv W8  W16      -> integerExtend W8  W16 x
        MO_SS_Conv W8  W32      -> integerExtend W8  W32 x
        MO_SS_Conv W16 W32      -> integerExtend W16 W32 x

        _                       -> panic ("Unknown unary mach op: " ++ show mop)


-- Binary machine ops
getRegister (CmmMachOp mop [x, y])
  = case mop of
      MO_Eq _           -> condIntReg EQQ x y
      MO_Ne _           -> condIntReg NE x y

      MO_S_Gt _         -> condIntReg GTT x y
      MO_S_Ge _         -> condIntReg GE x y
      MO_S_Lt _         -> condIntReg LTT x y
      MO_S_Le _         -> condIntReg LE x y

      MO_U_Gt W32       -> condIntReg GU  x y
      MO_U_Ge W32       -> condIntReg GEU x y
      MO_U_Lt W32       -> condIntReg LU  x y
      MO_U_Le W32       -> condIntReg LEU x y

      MO_U_Gt W16       -> condIntReg GU  x y
      MO_U_Ge W16       -> condIntReg GEU x y
      MO_U_Lt W16       -> condIntReg LU  x y
      MO_U_Le W16       -> condIntReg LEU x y

      MO_Add W32        -> trivialCode W32 (ADD False False) x y
      MO_Sub W32        -> trivialCode W32 (SUB False False) x y

      MO_S_MulMayOflo rep -> imulMayOflo rep x y

      MO_S_Quot W32     -> idiv True  False x y
      MO_U_Quot W32     -> idiv False False x y

      MO_S_Rem  W32     -> irem True  x y
      MO_U_Rem  W32     -> irem False x y

      MO_F_Eq _         -> condFltReg EQQ x y
      MO_F_Ne _         -> condFltReg NE x y

      MO_F_Gt _         -> condFltReg GTT x y
      MO_F_Ge _         -> condFltReg GE x y
      MO_F_Lt _         -> condFltReg LTT x y
      MO_F_Le _         -> condFltReg LE x y

      MO_F_Add  w       -> trivialFCode w FADD x y
      MO_F_Sub  w       -> trivialFCode w FSUB x y
      MO_F_Mul  w       -> trivialFCode w FMUL x y
      MO_F_Quot w       -> trivialFCode w FDIV x y

      MO_And rep        -> trivialCode rep (AND False) x y
      MO_Or  rep        -> trivialCode rep (OR  False) x y
      MO_Xor rep        -> trivialCode rep (XOR False) x y

      MO_Mul rep        -> trivialCode rep (SMUL False) x y

      MO_Shl rep        -> trivialCode rep SLL  x y
      MO_U_Shr rep      -> trivialCode rep SRL x y
      MO_S_Shr rep      -> trivialCode rep SRA x y

      _                 -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)

getRegister (CmmLoad mem pk) = do
    Amode src code <- getAmode mem
    let
        code__2 dst     = code `snocOL` LD (cmmTypeFormat pk) src dst
    return (Any (cmmTypeFormat pk) code__2)

getRegister (CmmLit (CmmInt i _))
  | fits13Bits i
  = let
        src = ImmInt (fromInteger i)
        code dst = unitOL (OR False g0 (RIImm src) dst)
    in
        return (Any II32 code)

getRegister (CmmLit lit)
  = let imm = litToImm lit
        code dst = toOL [
            SETHI (HI imm) dst,
            OR False dst (RIImm (LO imm)) dst]
    in return (Any II32 code)


getRegister _
        = panic "SPARC.CodeGen.Gen32.getRegister: no match"


-- | sign extend and widen
integerExtend
        :: Width                -- ^ width of source expression
        -> Width                -- ^ width of result
        -> CmmExpr              -- ^ source expression
        -> NatM Register

integerExtend from to expr
 = do   -- load the expr into some register
        (reg, e_code)   <- getSomeReg expr
        tmp             <- getNewRegNat II32
        let bitCount
                = case (from, to) of
                        (W8,  W32)      -> 24
                        (W16, W32)      -> 16
                        (W8,  W16)      -> 24
                        _               -> panic "SPARC.CodeGen.Gen32: no match"
        let code dst
                = e_code

                -- local shift word left to load the sign bit
                `snocOL`  SLL reg (RIImm (ImmInt bitCount)) tmp

                -- arithmetic shift right to sign extend
                `snocOL`  SRA tmp (RIImm (ImmInt bitCount)) dst

        return (Any (intFormat to) code)


-- | For nop word format conversions we set the resulting value to have the
--      required size, but don't need to generate any actual code.
--
conversionNop
        :: Format -> CmmExpr -> NatM Register

conversionNop new_rep expr
 = do   e_code <- getRegister expr
        return (setFormatOfRegister e_code new_rep)


-- | Generate an integer division instruction.
idiv :: Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register

-- For unsigned division with a 32 bit numerator,
--              we can just clear the Y register.
idiv False cc x y
 = do
        (a_reg, a_code)         <- getSomeReg x
        (b_reg, b_code)         <- getSomeReg y

        let code dst
                =       a_code
                `appOL` b_code
                `appOL` toOL
                        [ WRY  g0 g0
                        , UDIV cc a_reg (RIReg b_reg) dst]

        return (Any II32 code)


-- For _signed_ division with a 32 bit numerator,
--              we have to sign extend the numerator into the Y register.
idiv True cc x y
 = do
        (a_reg, a_code)         <- getSomeReg x
        (b_reg, b_code)         <- getSomeReg y

        tmp                     <- getNewRegNat II32

        let code dst
                =       a_code
                `appOL` b_code
                `appOL` toOL
                        [ SRA  a_reg (RIImm (ImmInt 16)) tmp            -- sign extend
                        , SRA  tmp   (RIImm (ImmInt 16)) tmp

                        , WRY  tmp g0
                        , SDIV cc a_reg (RIReg b_reg) dst]

        return (Any II32 code)


-- | Do an integer remainder.
--
--       NOTE:  The SPARC v8 architecture manual says that integer division
--              instructions _may_ generate a remainder, depending on the implementation.
--              If so it is _recommended_ that the remainder is placed in the Y register.
--
--          The UltraSparc 2007 manual says Y is _undefined_ after division.
--
--              The SPARC T2 doesn't store the remainder, not sure about the others.
--              It's probably best not to worry about it, and just generate our own
--              remainders.
--
irem :: Bool -> CmmExpr -> CmmExpr -> NatM Register

-- For unsigned operands:
--              Division is between a 64 bit numerator and a 32 bit denominator,
--              so we still have to clear the Y register.
irem False x y
 = do
        (a_reg, a_code) <- getSomeReg x
        (b_reg, b_code) <- getSomeReg y

        tmp_reg         <- getNewRegNat II32

        let code dst
                =       a_code
                `appOL` b_code
                `appOL` toOL
                        [ WRY   g0 g0
                        , UDIV  False         a_reg (RIReg b_reg) tmp_reg
                        , UMUL  False       tmp_reg (RIReg b_reg) tmp_reg
                        , SUB   False False   a_reg (RIReg tmp_reg) dst]

        return  (Any II32 code)


-- For signed operands:
--              Make sure to sign extend into the Y register, or the remainder
--              will have the wrong sign when the numerator is negative.
--
--      TODO:   When sign extending, GCC only shifts the a_reg right by 17 bits,
--              not the full 32. Not sure why this is, something to do with overflow?
--              If anyone cares enough about the speed of signed remainder they
--              can work it out themselves (then tell me). -- BL 2009/01/20
irem True x y
 = do
        (a_reg, a_code) <- getSomeReg x
        (b_reg, b_code) <- getSomeReg y

        tmp1_reg        <- getNewRegNat II32
        tmp2_reg        <- getNewRegNat II32

        let code dst
                =       a_code
                `appOL` b_code
                `appOL` toOL
                        [ SRA   a_reg      (RIImm (ImmInt 16)) tmp1_reg -- sign extend
                        , SRA   tmp1_reg   (RIImm (ImmInt 16)) tmp1_reg -- sign extend
                        , WRY   tmp1_reg g0

                        , SDIV  False          a_reg (RIReg b_reg)    tmp2_reg
                        , SMUL  False       tmp2_reg (RIReg b_reg)    tmp2_reg
                        , SUB   False False    a_reg (RIReg tmp2_reg) dst]

        return (Any II32 code)


imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
imulMayOflo rep a b
 = do
        (a_reg, a_code) <- getSomeReg a
        (b_reg, b_code) <- getSomeReg b
        res_lo <- getNewRegNat II32
        res_hi <- getNewRegNat II32

        let shift_amt  = case rep of
                          W32 -> 31
                          W64 -> 63
                          _ -> panic "shift_amt"

        let code dst = a_code `appOL` b_code `appOL`
                       toOL [
                           SMUL False a_reg (RIReg b_reg) res_lo,
                           RDY res_hi,
                           SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
                           SUB False False res_lo (RIReg res_hi) dst
                        ]
        return (Any II32 code)


-- -----------------------------------------------------------------------------
-- 'trivial*Code': deal with trivial instructions

-- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
-- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
-- Only look for constants on the right hand side, because that's
-- where the generic optimizer will have put them.

-- Similarly, for unary instructions, we don't have to worry about
-- matching an StInt as the argument, because genericOpt will already
-- have handled the constant-folding.

trivialCode
        :: Width
        -> (Reg -> RI -> Reg -> Instr)
        -> CmmExpr
        -> CmmExpr
        -> NatM Register

trivialCode _ instr x (CmmLit (CmmInt y _))
  | fits13Bits y
  = do
      (src1, code) <- getSomeReg x
      let
        src2 = ImmInt (fromInteger y)
        code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
      return (Any II32 code__2)


trivialCode _ instr x y = do
    (src1, code1) <- getSomeReg x
    (src2, code2) <- getSomeReg y
    let
        code__2 dst = code1 `appOL` code2 `snocOL`
                      instr src1 (RIReg src2) dst
    return (Any II32 code__2)


trivialFCode
        :: Width
        -> (Format -> Reg -> Reg -> Reg -> Instr)
        -> CmmExpr
        -> CmmExpr
        -> NatM Register

trivialFCode pk instr x y = do
    dflags <- getDynFlags
    (src1, code1) <- getSomeReg x
    (src2, code2) <- getSomeReg y
    tmp <- getNewRegNat FF64
    let
        promote x = FxTOy FF32 FF64 x tmp

        pk1   = cmmExprType dflags x
        pk2   = cmmExprType dflags y

        code__2 dst =
                if pk1 `cmmEqType` pk2 then
                    code1 `appOL` code2 `snocOL`
                    instr (floatFormat pk) src1 src2 dst
                else if typeWidth pk1 == W32 then
                    code1 `snocOL` promote src1 `appOL` code2 `snocOL`
                    instr FF64 tmp src2 dst
                else
                    code1 `appOL` code2 `snocOL` promote src2 `snocOL`
                    instr FF64 src1 tmp dst
    return (Any (cmmTypeFormat $ if pk1 `cmmEqType` pk2 then pk1 else cmmFloat W64)
                code__2)


trivialUCode
        :: Format
        -> (RI -> Reg -> Instr)
        -> CmmExpr
        -> NatM Register

trivialUCode format instr x = do
    (src, code) <- getSomeReg x
    let
        code__2 dst = code `snocOL` instr (RIReg src) dst
    return (Any format code__2)


trivialUFCode
        :: Format
        -> (Reg -> Reg -> Instr)
        -> CmmExpr
        -> NatM Register

trivialUFCode pk instr x = do
    (src, code) <- getSomeReg x
    let
        code__2 dst = code `snocOL` instr src dst
    return (Any pk code__2)


-- Coercions -------------------------------------------------------------------

-- | Coerce a integer value to floating point
coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
coerceInt2FP width1 width2 x = do
    (src, code) <- getSomeReg x
    let
        code__2 dst = code `appOL` toOL [
            ST (intFormat width1) src (spRel (-2)),
            LD (intFormat width1) (spRel (-2)) dst,
            FxTOy (intFormat width1) (floatFormat width2) dst dst]
    return (Any (floatFormat $ width2) code__2)


-- | Coerce a floating point value to integer
--
--   NOTE: On sparc v9 there are no instructions to move a value from an
--         FP register directly to an int register, so we have to use a load/store.
--
coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
coerceFP2Int width1 width2 x
 = do   let fformat1      = floatFormat width1
            fformat2      = floatFormat width2

            iformat2      = intFormat   width2

        (fsrc, code)    <- getSomeReg x
        fdst            <- getNewRegNat fformat2

        let code2 dst
                =       code
                `appOL` toOL
                        -- convert float to int format, leaving it in a float reg.
                        [ FxTOy fformat1 iformat2 fsrc fdst

                        -- store the int into mem, then load it back to move
                        --      it into an actual int reg.
                        , ST    fformat2 fdst (spRel (-2))
                        , LD    iformat2 (spRel (-2)) dst]

        return (Any iformat2 code2)


-- | Coerce a double precision floating point value to single precision.
coerceDbl2Flt :: CmmExpr -> NatM Register
coerceDbl2Flt x = do
    (src, code) <- getSomeReg x
    return (Any FF32 (\dst -> code `snocOL` FxTOy FF64 FF32 src dst))


-- | Coerce a single precision floating point value to double precision
coerceFlt2Dbl :: CmmExpr -> NatM Register
coerceFlt2Dbl x = do
    (src, code) <- getSomeReg x
    return (Any FF64 (\dst -> code `snocOL` FxTOy FF32 FF64 src dst))


-- Condition Codes -------------------------------------------------------------
--
-- Evaluate a comparison, and get the result into a register.
--
-- Do not fill the delay slots here. you will confuse the register allocator.
--
condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
condIntReg EQQ x (CmmLit (CmmInt 0 _)) = do
    (src, code) <- getSomeReg x
    let
        code__2 dst = code `appOL` toOL [
            SUB False True g0 (RIReg src) g0,
            SUB True False g0 (RIImm (ImmInt (-1))) dst]
    return (Any II32 code__2)

condIntReg EQQ x y = do
    (src1, code1) <- getSomeReg x
    (src2, code2) <- getSomeReg y
    let
        code__2 dst = code1 `appOL` code2 `appOL` toOL [
            XOR False src1 (RIReg src2) dst,
            SUB False True g0 (RIReg dst) g0,
            SUB True False g0 (RIImm (ImmInt (-1))) dst]
    return (Any II32 code__2)

condIntReg NE x (CmmLit (CmmInt 0 _)) = do
    (src, code) <- getSomeReg x
    let
        code__2 dst = code `appOL` toOL [
            SUB False True g0 (RIReg src) g0,
            ADD True False g0 (RIImm (ImmInt 0)) dst]
    return (Any II32 code__2)

condIntReg NE x y = do
    (src1, code1) <- getSomeReg x
    (src2, code2) <- getSomeReg y
    let
        code__2 dst = code1 `appOL` code2 `appOL` toOL [
            XOR False src1 (RIReg src2) dst,
            SUB False True g0 (RIReg dst) g0,
            ADD True False g0 (RIImm (ImmInt 0)) dst]
    return (Any II32 code__2)

condIntReg cond x y = do
    bid1 <- liftM (\a -> seq a a) getBlockIdNat
    bid2 <- liftM (\a -> seq a a) getBlockIdNat
    CondCode _ cond cond_code <- condIntCode cond x y
    let
        code__2 dst
         =      cond_code
          `appOL` toOL
                [ BI cond False bid1
                , NOP

                , OR False g0 (RIImm (ImmInt 0)) dst
                , BI ALWAYS False bid2
                , NOP

                , NEWBLOCK bid1
                , OR False g0 (RIImm (ImmInt 1)) dst
                , BI ALWAYS False bid2
                , NOP

                , NEWBLOCK bid2]

    return (Any II32 code__2)


condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
condFltReg cond x y = do
    bid1 <- liftM (\a -> seq a a) getBlockIdNat
    bid2 <- liftM (\a -> seq a a) getBlockIdNat

    CondCode _ cond cond_code <- condFltCode cond x y
    let
        code__2 dst
         =      cond_code
          `appOL` toOL
                [ NOP
                , BF cond False bid1
                , NOP

                , OR False g0 (RIImm (ImmInt 0)) dst
                , BI ALWAYS False bid2
                , NOP

                , NEWBLOCK bid1
                , OR False g0 (RIImm (ImmInt 1)) dst
                , BI ALWAYS False bid2
                , NOP

                , NEWBLOCK bid2 ]

    return (Any II32 code__2)