summaryrefslogtreecommitdiff
path: root/compiler/nativeGen/RegAllocInfo.hs
blob: 98c4e2dfe061310d19ec1cfc1481f1c1e819e5c7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
-----------------------------------------------------------------------------
--
-- Machine-specific parts of the register allocator
--
-- (c) The University of Glasgow 1996-2004
--
-----------------------------------------------------------------------------

#include "nativeGen/NCG.h"

module RegAllocInfo (
	RegUsage(..),
	noUsage,
	regUsage,
	patchRegs,
	jumpDests,
	patchJump,
	isRegRegMove,

	maxSpillSlots,
	mkSpillInstr,
	mkLoadInstr,
	mkRegRegMoveInstr,
	mkBranchInstr
    ) where

#include "HsVersions.h"

import Cmm		( BlockId )
import MachOp           ( MachRep(..), wordRep )
import MachInstrs
import MachRegs
import Outputable
import Constants	( rESERVED_C_STACK_BYTES )
import FastTypes

-- -----------------------------------------------------------------------------
-- RegUsage type

-- @regUsage@ returns the sets of src and destination registers used
-- by a particular instruction.  Machine registers that are
-- pre-allocated to stgRegs are filtered out, because they are
-- uninteresting from a register allocation standpoint.  (We wouldn't
-- want them to end up on the free list!)  As far as we are concerned,
-- the fixed registers simply don't exist (for allocation purposes,
-- anyway).

-- regUsage doesn't need to do any trickery for jumps and such.  Just
-- state precisely the regs read and written by that insn.  The
-- consequences of control flow transfers, as far as register
-- allocation goes, are taken care of by the register allocator.

data RegUsage = RU [Reg] [Reg]

noUsage :: RegUsage
noUsage  = RU [] []

regUsage :: Instr -> RegUsage

interesting (VirtualRegI  _)  = True
interesting (VirtualRegHi _)  = True
interesting (VirtualRegF  _)  = True
interesting (VirtualRegD  _)  = True
interesting (RealReg i)       = isFastTrue (freeReg i)


#if alpha_TARGET_ARCH
regUsage instr = case instr of
    LD B reg addr	-> usage (regAddr addr, [reg, t9])
    LD Bu reg addr	-> usage (regAddr addr, [reg, t9])
--  LD W reg addr	-> usage (regAddr addr, [reg, t9]) : UNUSED
--  LD Wu reg addr	-> usage (regAddr addr, [reg, t9]) : UNUSED
    LD sz reg addr	-> usage (regAddr addr, [reg])
    LDA reg addr	-> usage (regAddr addr, [reg])
    LDAH reg addr	-> usage (regAddr addr, [reg])
    LDGP reg addr	-> usage (regAddr addr, [reg])
    LDI sz reg imm	-> usage ([], [reg])
    ST B reg addr	-> usage (reg : regAddr addr, [t9, t10])
--  ST W reg addr	-> usage (reg : regAddr addr, [t9, t10]) : UNUSED
    ST sz reg addr	-> usage (reg : regAddr addr, [])
    CLR reg		-> usage ([], [reg])
    ABS sz ri reg	-> usage (regRI ri, [reg])
    NEG sz ov ri reg	-> usage (regRI ri, [reg])
    ADD sz ov r1 ar r2	-> usage (r1 : regRI ar, [r2])
    SADD sz sc r1 ar r2 -> usage (r1 : regRI ar, [r2])
    SUB sz ov r1 ar r2	-> usage (r1 : regRI ar, [r2])
    SSUB sz sc r1 ar r2 -> usage (r1 : regRI ar, [r2])
    MUL sz ov r1 ar r2	-> usage (r1 : regRI ar, [r2])
    DIV sz un r1 ar r2	-> usage (r1 : regRI ar, [r2, t9, t10, t11, t12])
    REM sz un r1 ar r2	-> usage (r1 : regRI ar, [r2, t9, t10, t11, t12])
    NOT ri reg		-> usage (regRI ri, [reg])
    AND r1 ar r2	-> usage (r1 : regRI ar, [r2])
    ANDNOT r1 ar r2	-> usage (r1 : regRI ar, [r2])
    OR r1 ar r2		-> usage (r1 : regRI ar, [r2])
    ORNOT r1 ar r2	-> usage (r1 : regRI ar, [r2])
    XOR r1 ar r2	-> usage (r1 : regRI ar, [r2])
    XORNOT r1 ar r2	-> usage (r1 : regRI ar, [r2])
    SLL r1 ar r2	-> usage (r1 : regRI ar, [r2])
    SRL r1 ar r2	-> usage (r1 : regRI ar, [r2])
    SRA r1 ar r2	-> usage (r1 : regRI ar, [r2])
    ZAP r1 ar r2	-> usage (r1 : regRI ar, [r2])
    ZAPNOT r1 ar r2	-> usage (r1 : regRI ar, [r2])
    CMP co r1 ar r2	-> usage (r1 : regRI ar, [r2])
    FCLR reg		-> usage ([], [reg])
    FABS r1 r2		-> usage ([r1], [r2])
    FNEG sz r1 r2	-> usage ([r1], [r2])
    FADD sz r1 r2 r3	-> usage ([r1, r2], [r3])
    FDIV sz r1 r2 r3	-> usage ([r1, r2], [r3])
    FMUL sz r1 r2 r3	-> usage ([r1, r2], [r3])
    FSUB sz r1 r2 r3	-> usage ([r1, r2], [r3])
    CVTxy sz1 sz2 r1 r2 -> usage ([r1], [r2])
    FCMP sz co r1 r2 r3 -> usage ([r1, r2], [r3])
    FMOV r1 r2		-> usage ([r1], [r2])


    -- We assume that all local jumps will be BI/BF/BR.	 JMP must be out-of-line.
    BI cond reg lbl	-> usage ([reg], [])
    BF cond reg lbl	-> usage ([reg], [])
    JMP reg addr hint	-> RU (mkRegSet (filter interesting (regAddr addr))) freeRegSet

    BSR _ n		-> RU (argRegSet n) callClobberedRegSet
    JSR reg addr n	-> RU (argRegSet n) callClobberedRegSet

    _			-> noUsage

  where
    usage (src, dst) = RU (mkRegSet (filter interesting src))
			  (mkRegSet (filter interesting dst))

    interesting (FixedReg _) = False
    interesting _ = True

    regAddr (AddrReg r1)      = [r1]
    regAddr (AddrRegImm r1 _) = [r1]
    regAddr (AddrImm _)	      = []

    regRI (RIReg r) = [r]
    regRI  _	= []

#endif /* alpha_TARGET_ARCH */
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#if i386_TARGET_ARCH || x86_64_TARGET_ARCH

regUsage instr = case instr of
    MOV    sz src dst	-> usageRW src dst
    MOVZxL sz src dst	-> usageRW src dst
    MOVSxL sz src dst	-> usageRW src dst
    LEA    sz src dst	-> usageRW src dst
    ADD    sz src dst	-> usageRM src dst
    ADC    sz src dst	-> usageRM src dst
    SUB    sz src dst	-> usageRM src dst
    IMUL   sz src dst	-> usageRM src dst
    IMUL2  sz src       -> mkRU (eax:use_R src) [eax,edx]
    MUL    sz src dst	-> usageRM src dst
    DIV    sz op	-> mkRU (eax:edx:use_R op) [eax,edx]
    IDIV   sz op	-> mkRU (eax:edx:use_R op) [eax,edx]
    AND    sz src dst	-> usageRM src dst
    OR     sz src dst	-> usageRM src dst
    XOR    sz src dst	-> usageRM src dst
    NOT    sz op	-> usageM op
    NEGI   sz op	-> usageM op
    SHL    sz imm dst	-> usageRM imm dst
    SAR    sz imm dst	-> usageRM imm dst
    SHR    sz imm dst	-> usageRM imm dst
    BT     sz imm src	-> mkRUR (use_R src)

    PUSH   sz op	-> mkRUR (use_R op)
    POP    sz op	-> mkRU [] (def_W op)
    TEST   sz src dst	-> mkRUR (use_R src ++ use_R dst)
    CMP    sz src dst	-> mkRUR (use_R src ++ use_R dst)
    SETCC  cond op	-> mkRU [] (def_W op)
    JXX    cond lbl	-> mkRU [] []
    JMP    op		-> mkRUR (use_R op)
    JMP_TBL op ids      -> mkRUR (use_R op)
    CALL (Left imm)  params -> mkRU params callClobberedRegs
    CALL (Right reg) params -> mkRU (reg:params) callClobberedRegs
    CLTD   sz		-> mkRU [eax] [edx]
    NOP			-> mkRU [] []

#if i386_TARGET_ARCH
    GMOV   src dst	-> mkRU [src] [dst]
    GLD    sz src dst	-> mkRU (use_EA src) [dst]
    GST    sz src dst	-> mkRUR (src : use_EA dst)

    GLDZ   dst		-> mkRU [] [dst]
    GLD1   dst		-> mkRU [] [dst]

    GFTOI  src dst	-> mkRU [src] [dst]
    GDTOI  src dst	-> mkRU [src] [dst]

    GITOF  src dst	-> mkRU [src] [dst]
    GITOD  src dst	-> mkRU [src] [dst]

    GADD   sz s1 s2 dst	-> mkRU [s1,s2] [dst]
    GSUB   sz s1 s2 dst	-> mkRU [s1,s2] [dst]
    GMUL   sz s1 s2 dst	-> mkRU [s1,s2] [dst]
    GDIV   sz s1 s2 dst	-> mkRU [s1,s2] [dst]

    GCMP   sz src1 src2	-> mkRUR [src1,src2]
    GABS   sz src dst	-> mkRU [src] [dst]
    GNEG   sz src dst	-> mkRU [src] [dst]
    GSQRT  sz src dst	-> mkRU [src] [dst]
    GSIN   sz src dst	-> mkRU [src] [dst]
    GCOS   sz src dst	-> mkRU [src] [dst]
    GTAN   sz src dst	-> mkRU [src] [dst]
#endif

#if x86_64_TARGET_ARCH
    CVTSS2SD src dst	-> mkRU [src] [dst]
    CVTSD2SS src dst	-> mkRU [src] [dst]
    CVTSS2SI src dst	-> mkRU (use_R src) [dst]
    CVTSD2SI src dst	-> mkRU (use_R src) [dst]
    CVTSI2SS src dst	-> mkRU (use_R src) [dst]
    CVTSI2SD src dst	-> mkRU (use_R src) [dst]
    FDIV sz src dst     -> usageRM src dst
#endif    

    FETCHGOT reg        -> mkRU [] [reg]
    FETCHPC  reg        -> mkRU [] [reg]

    COMMENT _		-> noUsage
    DELTA   _           -> noUsage

    _other		-> panic "regUsage: unrecognised instr"

 where
#if x86_64_TARGET_ARCH
	-- call parameters: include %eax, because it is used
	-- to pass the number of SSE reg arguments to varargs fns.
    params = eax : allArgRegs ++ allFPArgRegs
#endif

    -- 2 operand form; first operand Read; second Written
    usageRW :: Operand -> Operand -> RegUsage
    usageRW op (OpReg reg) = mkRU (use_R op) [reg]
    usageRW op (OpAddr ea) = mkRUR (use_R op ++ use_EA ea)

    -- 2 operand form; first operand Read; second Modified
    usageRM :: Operand -> Operand -> RegUsage
    usageRM op (OpReg reg) = mkRU (use_R op ++ [reg]) [reg]
    usageRM op (OpAddr ea) = mkRUR (use_R op ++ use_EA ea)

    -- 1 operand form; operand Modified
    usageM :: Operand -> RegUsage
    usageM (OpReg reg)    = mkRU [reg] [reg]
    usageM (OpAddr ea)    = mkRUR (use_EA ea)

    -- Registers defd when an operand is written.
    def_W (OpReg reg)  = [reg]
    def_W (OpAddr ea)  = []

    -- Registers used when an operand is read.
    use_R (OpReg reg)  = [reg]
    use_R (OpImm imm)  = []
    use_R (OpAddr ea)  = use_EA ea

    -- Registers used to compute an effective address.
    use_EA (ImmAddr _ _) = []
    use_EA (AddrBaseIndex base index _) = 
	use_base base $! use_index index
	where use_base (EABaseReg r) x = r : x
	      use_base _ x             = x
	      use_index EAIndexNone   = []
	      use_index (EAIndex i _) = [i]

    mkRUR src = src' `seq` RU src' []
	where src' = filter interesting src

    mkRU src dst = src' `seq` dst' `seq` RU src' dst'
	where src' = filter interesting src
	      dst' = filter interesting dst

#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#if sparc_TARGET_ARCH

regUsage instr = case instr of
    LD    sz addr reg  	-> usage (regAddr addr, [reg])
    ST    sz reg addr  	-> usage (reg : regAddr addr, [])
    ADD   x cc r1 ar r2	-> usage (r1 : regRI ar, [r2])
    SUB   x cc r1 ar r2	-> usage (r1 : regRI ar, [r2])
    UMUL    cc r1 ar r2	-> usage (r1 : regRI ar, [r2])
    SMUL    cc r1 ar r2	-> usage (r1 : regRI ar, [r2])
    RDY   rd            -> usage ([], [rd])
    AND   b r1 ar r2  	-> usage (r1 : regRI ar, [r2])
    ANDN  b r1 ar r2 	-> usage (r1 : regRI ar, [r2])
    OR    b r1 ar r2   	-> usage (r1 : regRI ar, [r2])
    ORN   b r1 ar r2  	-> usage (r1 : regRI ar, [r2])
    XOR   b r1 ar r2  	-> usage (r1 : regRI ar, [r2])
    XNOR  b r1 ar r2 	-> usage (r1 : regRI ar, [r2])
    SLL   r1 ar r2    	-> usage (r1 : regRI ar, [r2])
    SRL   r1 ar r2    	-> usage (r1 : regRI ar, [r2])
    SRA   r1 ar r2    	-> usage (r1 : regRI ar, [r2])
    SETHI imm reg   	-> usage ([], [reg])
    FABS  s r1 r2    	-> usage ([r1], [r2])
    FADD  s r1 r2 r3 	-> usage ([r1, r2], [r3])
    FCMP  e s r1 r2  	-> usage ([r1, r2], [])
    FDIV  s r1 r2 r3 	-> usage ([r1, r2], [r3])
    FMOV  s r1 r2    	-> usage ([r1], [r2])
    FMUL  s r1 r2 r3 	-> usage ([r1, r2], [r3])
    FNEG  s r1 r2    	-> usage ([r1], [r2])
    FSQRT s r1 r2   	-> usage ([r1], [r2])
    FSUB  s r1 r2 r3 	-> usage ([r1, r2], [r3])
    FxTOy s1 s2 r1 r2 	-> usage ([r1], [r2])

    -- We assume that all local jumps will be BI/BF.  JMP must be out-of-line.
    JMP   addr 	        -> usage (regAddr addr, [])

    CALL  (Left imm)  n True  -> noUsage
    CALL  (Left imm)  n False -> usage (argRegs n, callClobberedRegs)
    CALL  (Right reg) n True  -> usage ([reg], [])
    CALL  (Right reg) n False -> usage (reg : (argRegs n), callClobberedRegs)

    _ 	    	    	-> noUsage
  where
    usage (src, dst) = RU (filter interesting src)
    	    	    	 (filter interesting dst)

    regAddr (AddrRegReg r1 r2) = [r1, r2]
    regAddr (AddrRegImm r1 _)  = [r1]

    regRI (RIReg r) = [r]
    regRI  _	= []

#endif /* sparc_TARGET_ARCH */
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#if powerpc_TARGET_ARCH

regUsage instr = case instr of
    LD    sz reg addr  	-> usage (regAddr addr, [reg])
    LA    sz reg addr  	-> usage (regAddr addr, [reg])
    ST    sz reg addr  	-> usage (reg : regAddr addr, [])
    STU    sz reg addr  -> usage (reg : regAddr addr, [])
    LIS   reg imm	-> usage ([], [reg])
    LI    reg imm	-> usage ([], [reg])
    MR	  reg1 reg2     -> usage ([reg2], [reg1])
    CMP   sz reg ri	-> usage (reg : regRI ri,[])
    CMPL  sz reg ri	-> usage (reg : regRI ri,[])
    BCC	  cond lbl	-> noUsage
    MTCTR reg		-> usage ([reg],[])
    BCTR  targets	-> noUsage
    BL    imm params	-> usage (params, callClobberedRegs)
    BCTRL params	-> usage (params, callClobberedRegs)
    ADD	  reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
    ADDC  reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
    ADDE  reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
    ADDIS reg1 reg2 imm -> usage ([reg2], [reg1])
    SUBF  reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
    MULLW reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
    DIVW  reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
    DIVWU reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
    MULLW_MayOflo reg1 reg2 reg3        
                        -> usage ([reg2,reg3], [reg1])
    AND	  reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
    OR	  reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
    XOR	  reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
    XORIS reg1 reg2 imm -> usage ([reg2], [reg1])
    EXTS  siz reg1 reg2 -> usage ([reg2], [reg1])
    NEG	  reg1 reg2	-> usage ([reg2], [reg1])
    NOT	  reg1 reg2	-> usage ([reg2], [reg1])
    SLW	  reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
    SRW	  reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
    SRAW  reg1 reg2 ri  -> usage (reg2 : regRI ri, [reg1])
    RLWINM reg1 reg2 sh mb me
                        -> usage ([reg2], [reg1])
    FADD  sz r1 r2 r3   -> usage ([r2,r3], [r1])
    FSUB  sz r1 r2 r3   -> usage ([r2,r3], [r1])
    FMUL  sz r1 r2 r3   -> usage ([r2,r3], [r1])
    FDIV  sz r1 r2 r3   -> usage ([r2,r3], [r1])
    FNEG  r1 r2		-> usage ([r2], [r1])
    FCMP  r1 r2		-> usage ([r1,r2], [])
    FCTIWZ r1 r2	-> usage ([r2], [r1])
    FRSP r1 r2		-> usage ([r2], [r1])
    MFCR reg            -> usage ([], [reg])
    MFLR reg            -> usage ([], [reg])
    FETCHPC reg         -> usage ([], [reg])
    _ 	    	    	-> noUsage
  where
    usage (src, dst) = RU (filter interesting src)
    	    	    	  (filter interesting dst)
    regAddr (AddrRegReg r1 r2) = [r1, r2]
    regAddr (AddrRegImm r1 _)  = [r1]

    regRI (RIReg r) = [r]
    regRI  _	= []
#endif /* powerpc_TARGET_ARCH */


-- -----------------------------------------------------------------------------
-- Determine the possible destinations from the current instruction.

-- (we always assume that the next instruction is also a valid destination;
-- if this isn't the case then the jump should be at the end of the basic
-- block).

jumpDests :: Instr -> [BlockId] -> [BlockId]
jumpDests insn acc
  = case insn of
#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
	JXX _ id	-> id : acc
	JMP_TBL _ ids	-> ids ++ acc
#elif powerpc_TARGET_ARCH
        BCC _ id        -> id : acc
        BCTR targets    -> targets ++ acc
#endif
	_other		-> acc

patchJump :: Instr -> BlockId -> BlockId -> Instr

patchJump insn old new
  = case insn of
#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
	JXX cc id | id == old -> JXX cc new
	JMP_TBL op ids -> error "Cannot patch JMP_TBL"
#elif powerpc_TARGET_ARCH
        BCC cc id | id == old -> BCC cc new
        BCTR targets -> error "Cannot patch BCTR"
#endif
	_other		-> insn

-- -----------------------------------------------------------------------------
-- 'patchRegs' function

-- 'patchRegs' takes an instruction and applies the given mapping to
-- all the register references.

patchRegs :: Instr -> (Reg -> Reg) -> Instr

#if alpha_TARGET_ARCH

patchRegs instr env = case instr of
    LD sz reg addr -> LD sz (env reg) (fixAddr addr)
    LDA reg addr -> LDA (env reg) (fixAddr addr)
    LDAH reg addr -> LDAH (env reg) (fixAddr addr)
    LDGP reg addr -> LDGP (env reg) (fixAddr addr)
    LDI sz reg imm -> LDI sz (env reg) imm
    ST sz reg addr -> ST sz (env reg) (fixAddr addr)
    CLR reg -> CLR (env reg)
    ABS sz ar reg -> ABS sz (fixRI ar) (env reg)
    NEG sz ov ar reg -> NEG sz ov (fixRI ar) (env reg)
    ADD sz ov r1 ar r2 -> ADD sz ov (env r1) (fixRI ar) (env r2)
    SADD sz sc r1 ar r2 -> SADD sz sc (env r1) (fixRI ar) (env r2)
    SUB sz ov r1 ar r2 -> SUB sz ov (env r1) (fixRI ar) (env r2)
    SSUB sz sc r1 ar r2 -> SSUB sz sc (env r1) (fixRI ar) (env r2)
    MUL sz ov r1 ar r2 -> MUL sz ov (env r1) (fixRI ar) (env r2)
    DIV sz un r1 ar r2 -> DIV sz un (env r1) (fixRI ar) (env r2)
    REM sz un r1 ar r2 -> REM sz un (env r1) (fixRI ar) (env r2)
    NOT ar reg -> NOT (fixRI ar) (env reg)
    AND r1 ar r2 -> AND (env r1) (fixRI ar) (env r2)
    ANDNOT r1 ar r2 -> ANDNOT (env r1) (fixRI ar) (env r2)
    OR r1 ar r2 -> OR (env r1) (fixRI ar) (env r2)
    ORNOT r1 ar r2 -> ORNOT (env r1) (fixRI ar) (env r2)
    XOR r1 ar r2 -> XOR (env r1) (fixRI ar) (env r2)
    XORNOT r1 ar r2 -> XORNOT (env r1) (fixRI ar) (env r2)
    SLL r1 ar r2 -> SLL (env r1) (fixRI ar) (env r2)
    SRL r1 ar r2 -> SRL (env r1) (fixRI ar) (env r2)
    SRA r1 ar r2 -> SRA (env r1) (fixRI ar) (env r2)
    ZAP r1 ar r2 -> ZAP (env r1) (fixRI ar) (env r2)
    ZAPNOT r1 ar r2 -> ZAPNOT (env r1) (fixRI ar) (env r2)
    CMP co r1 ar r2 -> CMP co (env r1) (fixRI ar) (env r2)
    FCLR reg -> FCLR (env reg)
    FABS r1 r2 -> FABS (env r1) (env r2)
    FNEG s r1 r2 -> FNEG s (env r1) (env r2)
    FADD s r1 r2 r3 -> FADD s (env r1) (env r2) (env r3)
    FDIV s r1 r2 r3 -> FDIV s (env r1) (env r2) (env r3)
    FMUL s r1 r2 r3 -> FMUL s (env r1) (env r2) (env r3)
    FSUB s r1 r2 r3 -> FSUB s (env r1) (env r2) (env r3)
    CVTxy s1 s2 r1 r2 -> CVTxy s1 s2 (env r1) (env r2)
    FCMP s co r1 r2 r3 -> FCMP s co (env r1) (env r2) (env r3)
    FMOV r1 r2 -> FMOV (env r1) (env r2)
    BI cond reg lbl -> BI cond (env reg) lbl
    BF cond reg lbl -> BF cond (env reg) lbl
    JMP reg addr hint -> JMP (env reg) (fixAddr addr) hint
    JSR reg addr i -> JSR (env reg) (fixAddr addr) i
    _ -> instr
  where
    fixAddr (AddrReg r1)       = AddrReg (env r1)
    fixAddr (AddrRegImm r1 i)  = AddrRegImm (env r1) i
    fixAddr other	       = other

    fixRI (RIReg r) = RIReg (env r)
    fixRI other	= other

#endif /* alpha_TARGET_ARCH */
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#if i386_TARGET_ARCH || x86_64_TARGET_ARCH

patchRegs instr env = case instr of
    MOV  sz src dst	-> patch2 (MOV  sz) src dst
    MOVZxL sz src dst	-> patch2 (MOVZxL sz) src dst
    MOVSxL sz src dst	-> patch2 (MOVSxL sz) src dst
    LEA  sz src dst	-> patch2 (LEA  sz) src dst
    ADD  sz src dst	-> patch2 (ADD  sz) src dst
    ADC  sz src dst	-> patch2 (ADC  sz) src dst
    SUB  sz src dst	-> patch2 (SUB  sz) src dst
    IMUL sz src dst 	-> patch2 (IMUL sz) src dst
    IMUL2 sz src        -> patch1 (IMUL2 sz) src
    MUL sz src dst 	-> patch2 (MUL sz) src dst
    IDIV sz op		-> patch1 (IDIV sz) op
    DIV sz op		-> patch1 (DIV sz) op
    AND  sz src dst	-> patch2 (AND  sz) src dst
    OR   sz src dst	-> patch2 (OR   sz) src dst
    XOR  sz src dst	-> patch2 (XOR  sz) src dst
    NOT  sz op 		-> patch1 (NOT  sz) op
    NEGI sz op		-> patch1 (NEGI sz) op
    SHL  sz imm dst 	-> patch1 (SHL sz imm) dst
    SAR  sz imm dst 	-> patch1 (SAR sz imm) dst
    SHR  sz imm dst 	-> patch1 (SHR sz imm) dst
    BT   sz imm src     -> patch1 (BT  sz imm) src
    TEST sz src dst	-> patch2 (TEST sz) src dst
    CMP  sz src dst	-> patch2 (CMP  sz) src dst
    PUSH sz op		-> patch1 (PUSH sz) op
    POP  sz op		-> patch1 (POP  sz) op
    SETCC cond op	-> patch1 (SETCC cond) op
    JMP op		-> patch1 JMP op
    JMP_TBL op ids      -> patch1 JMP_TBL op $ ids

#if i386_TARGET_ARCH
    GMOV src dst	-> GMOV (env src) (env dst)
    GLD sz src dst	-> GLD sz (lookupAddr src) (env dst)
    GST sz src dst	-> GST sz (env src) (lookupAddr dst)

    GLDZ dst		-> GLDZ (env dst)
    GLD1 dst		-> GLD1 (env dst)

    GFTOI src dst	-> GFTOI (env src) (env dst)
    GDTOI src dst	-> GDTOI (env src) (env dst)

    GITOF src dst	-> GITOF (env src) (env dst)
    GITOD src dst	-> GITOD (env src) (env dst)

    GADD sz s1 s2 dst	-> GADD sz (env s1) (env s2) (env dst)
    GSUB sz s1 s2 dst	-> GSUB sz (env s1) (env s2) (env dst)
    GMUL sz s1 s2 dst	-> GMUL sz (env s1) (env s2) (env dst)
    GDIV sz s1 s2 dst	-> GDIV sz (env s1) (env s2) (env dst)

    GCMP sz src1 src2	-> GCMP sz (env src1) (env src2)
    GABS sz src dst	-> GABS sz (env src) (env dst)
    GNEG sz src dst	-> GNEG sz (env src) (env dst)
    GSQRT sz src dst	-> GSQRT sz (env src) (env dst)
    GSIN sz src dst	-> GSIN sz (env src) (env dst)
    GCOS sz src dst	-> GCOS sz (env src) (env dst)
    GTAN sz src dst	-> GTAN sz (env src) (env dst)
#endif

#if x86_64_TARGET_ARCH
    CVTSS2SD src dst	-> CVTSS2SD (env src) (env dst)
    CVTSD2SS src dst	-> CVTSD2SS (env src) (env dst)
    CVTSS2SI src dst	-> CVTSS2SI (patchOp src) (env dst)
    CVTSD2SI src dst	-> CVTSD2SI (patchOp src) (env dst)
    CVTSI2SS src dst	-> CVTSI2SS (patchOp src) (env dst)
    CVTSI2SD src dst	-> CVTSI2SD (patchOp src) (env dst)
    FDIV sz src dst	-> FDIV sz (patchOp src) (patchOp dst)
#endif    

    CALL (Left imm)  _	-> instr
    CALL (Right reg) p	-> CALL (Right (env reg)) p
    
    FETCHGOT reg        -> FETCHGOT (env reg)
    FETCHPC  reg        -> FETCHPC  (env reg)
   
    NOP			-> instr
    COMMENT _		-> instr
    DELTA _ 		-> instr
    JXX _ _		-> instr
    CLTD _		-> instr

    _other		-> panic "patchRegs: unrecognised instr"

  where
    patch1 insn op      = insn $! patchOp op
    patch2 insn src dst = (insn $! patchOp src) $! patchOp dst

    patchOp (OpReg  reg) = OpReg $! env reg
    patchOp (OpImm  imm) = OpImm imm
    patchOp (OpAddr ea)  = OpAddr $! lookupAddr ea

    lookupAddr (ImmAddr imm off) = ImmAddr imm off
    lookupAddr (AddrBaseIndex base index disp)
      = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp
      where
	lookupBase EABaseNone       = EABaseNone
	lookupBase EABaseRip        = EABaseRip
	lookupBase (EABaseReg r)    = EABaseReg (env r)
				 
	lookupIndex EAIndexNone     = EAIndexNone
	lookupIndex (EAIndex r i)   = EAIndex (env r) i

#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH*/
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#if sparc_TARGET_ARCH

patchRegs instr env = case instr of
    LD    sz addr reg   -> LD sz (fixAddr addr) (env reg)
    ST    sz reg addr   -> ST sz (env reg) (fixAddr addr)
    ADD   x cc r1 ar r2 -> ADD x cc (env r1) (fixRI ar) (env r2)
    SUB   x cc r1 ar r2 -> SUB x cc (env r1) (fixRI ar) (env r2)
    UMUL    cc r1 ar r2	-> UMUL cc (env r1) (fixRI ar) (env r2)
    SMUL    cc r1 ar r2	-> SMUL cc (env r1) (fixRI ar) (env r2)
    RDY   rd            -> RDY (env rd)
    AND   b r1 ar r2    -> AND b (env r1) (fixRI ar) (env r2)
    ANDN  b r1 ar r2    -> ANDN b (env r1) (fixRI ar) (env r2)
    OR    b r1 ar r2    -> OR b (env r1) (fixRI ar) (env r2)
    ORN   b r1 ar r2    -> ORN b (env r1) (fixRI ar) (env r2)
    XOR   b r1 ar r2    -> XOR b (env r1) (fixRI ar) (env r2)
    XNOR  b r1 ar r2    -> XNOR b (env r1) (fixRI ar) (env r2)
    SLL   r1 ar r2      -> SLL (env r1) (fixRI ar) (env r2)
    SRL   r1 ar r2      -> SRL (env r1) (fixRI ar) (env r2)
    SRA   r1 ar r2      -> SRA (env r1) (fixRI ar) (env r2)
    SETHI imm reg       -> SETHI imm (env reg)
    FABS  s r1 r2       -> FABS s (env r1) (env r2)
    FADD  s r1 r2 r3    -> FADD s (env r1) (env r2) (env r3)
    FCMP  e s r1 r2     -> FCMP e s (env r1) (env r2)
    FDIV  s r1 r2 r3    -> FDIV s (env r1) (env r2) (env r3)
    FMOV  s r1 r2       -> FMOV s (env r1) (env r2)
    FMUL  s r1 r2 r3    -> FMUL s (env r1) (env r2) (env r3)
    FNEG  s r1 r2       -> FNEG s (env r1) (env r2)
    FSQRT s r1 r2       -> FSQRT s (env r1) (env r2)
    FSUB  s r1 r2 r3    -> FSUB s (env r1) (env r2) (env r3)
    FxTOy s1 s2 r1 r2   -> FxTOy s1 s2 (env r1) (env r2)
    JMP   addr          -> JMP (fixAddr addr)
    CALL  (Left i) n t  -> CALL (Left i) n t
    CALL  (Right r) n t -> CALL (Right (env r)) n t
    _ -> instr
  where
    fixAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2)
    fixAddr (AddrRegImm r1 i)  = AddrRegImm (env r1) i

    fixRI (RIReg r) = RIReg (env r)
    fixRI other	= other

#endif /* sparc_TARGET_ARCH */
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#if powerpc_TARGET_ARCH

patchRegs instr env = case instr of
    LD    sz reg addr   -> LD sz (env reg) (fixAddr addr)
    LA    sz reg addr   -> LA sz (env reg) (fixAddr addr)
    ST    sz reg addr   -> ST sz (env reg) (fixAddr addr)
    STU    sz reg addr  -> STU sz (env reg) (fixAddr addr)
    LIS   reg imm	-> LIS (env reg) imm
    LI    reg imm	-> LI (env reg) imm
    MR	  reg1 reg2     -> MR (env reg1) (env reg2)
    CMP	  sz reg ri	-> CMP sz (env reg) (fixRI ri)
    CMPL  sz reg ri	-> CMPL sz (env reg) (fixRI ri)
    BCC	  cond lbl	-> BCC cond lbl
    MTCTR reg		-> MTCTR (env reg)
    BCTR  targets	-> BCTR targets
    BL    imm argRegs	-> BL imm argRegs	-- argument regs
    BCTRL argRegs	-> BCTRL argRegs 	-- cannot be remapped
    ADD	  reg1 reg2 ri	-> ADD (env reg1) (env reg2) (fixRI ri)
    ADDC  reg1 reg2 reg3-> ADDC (env reg1) (env reg2) (env reg3)
    ADDE  reg1 reg2 reg3-> ADDE (env reg1) (env reg2) (env reg3)
    ADDIS reg1 reg2 imm -> ADDIS (env reg1) (env reg2) imm
    SUBF  reg1 reg2 reg3-> SUBF (env reg1) (env reg2) (env reg3)
    MULLW reg1 reg2 ri	-> MULLW (env reg1) (env reg2) (fixRI ri)
    DIVW  reg1 reg2 reg3-> DIVW (env reg1) (env reg2) (env reg3)
    DIVWU reg1 reg2 reg3-> DIVWU (env reg1) (env reg2) (env reg3)
    MULLW_MayOflo reg1 reg2 reg3
                        -> MULLW_MayOflo (env reg1) (env reg2) (env reg3)
    AND	  reg1 reg2 ri	-> AND (env reg1) (env reg2) (fixRI ri)
    OR 	  reg1 reg2 ri	-> OR  (env reg1) (env reg2) (fixRI ri)
    XOR	  reg1 reg2 ri	-> XOR (env reg1) (env reg2) (fixRI ri)
    XORIS reg1 reg2 imm -> XORIS (env reg1) (env reg2) imm
    EXTS  sz reg1 reg2 -> EXTS sz (env reg1) (env reg2)
    NEG	  reg1 reg2	-> NEG (env reg1) (env reg2)
    NOT	  reg1 reg2	-> NOT (env reg1) (env reg2)
    SLW	  reg1 reg2 ri	-> SLW (env reg1) (env reg2) (fixRI ri)
    SRW	  reg1 reg2 ri	-> SRW (env reg1) (env reg2) (fixRI ri)
    SRAW  reg1 reg2 ri	-> SRAW (env reg1) (env reg2) (fixRI ri)
    RLWINM reg1 reg2 sh mb me
                        -> RLWINM (env reg1) (env reg2) sh mb me
    FADD  sz r1 r2 r3   -> FADD sz (env r1) (env r2) (env r3)
    FSUB  sz r1 r2 r3   -> FSUB sz (env r1) (env r2) (env r3)
    FMUL  sz r1 r2 r3   -> FMUL sz (env r1) (env r2) (env r3)
    FDIV  sz r1 r2 r3   -> FDIV sz (env r1) (env r2) (env r3)
    FNEG  r1 r2		-> FNEG (env r1) (env r2)
    FCMP  r1 r2		-> FCMP (env r1) (env r2)
    FCTIWZ r1 r2	-> FCTIWZ (env r1) (env r2)
    FRSP r1 r2		-> FRSP (env r1) (env r2)
    MFCR reg            -> MFCR (env reg)
    MFLR reg            -> MFLR (env reg)
    FETCHPC reg         -> FETCHPC (env reg)
    _ -> instr
  where
    fixAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2)
    fixAddr (AddrRegImm r1 i)  = AddrRegImm (env r1) i

    fixRI (RIReg r) = RIReg (env r)
    fixRI other	= other
#endif /* powerpc_TARGET_ARCH */

-- -----------------------------------------------------------------------------
-- Detecting reg->reg moves

-- The register allocator attempts to eliminate reg->reg moves whenever it can,
-- by assigning the src and dest temporaries to the same real register.

isRegRegMove :: Instr -> Maybe (Reg,Reg)
#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-- TMP:
isRegRegMove (MOV _ (OpReg r1) (OpReg r2)) = Just (r1,r2)
#elif powerpc_TARGET_ARCH
isRegRegMove (MR dst src) = Just (src,dst)
#else
#warning ToDo: isRegRegMove
#endif
isRegRegMove _ = Nothing

-- -----------------------------------------------------------------------------
-- Generating spill instructions

mkSpillInstr
   :: Reg		-- register to spill (should be a real)
   -> Int		-- current stack delta
   -> Int		-- spill slot to use
   -> Instr
mkSpillInstr reg delta slot
  = ASSERT(isRealReg reg)
    let	
        off     = spillSlotToOffset slot
    in
#ifdef alpha_TARGET_ARCH
    {-Alpha: spill below the stack pointer (?)-}
    ST sz dyn (spRel (- (off `div` 8)))
#endif
#ifdef i386_TARGET_ARCH
    let off_w = (off-delta) `div` 4
    in case regClass reg of
	   RcInteger -> MOV I32 (OpReg reg) (OpAddr (spRel off_w))
	   _         -> GST F80 reg (spRel off_w) {- RcFloat/RcDouble -}
#endif
#ifdef x86_64_TARGET_ARCH
    let off_w = (off-delta) `div` 8
    in case regClass reg of
	   RcInteger -> MOV I64 (OpReg reg) (OpAddr (spRel off_w))
	   RcDouble  -> MOV F64 (OpReg reg) (OpAddr (spRel off_w))
		-- ToDo: will it work to always spill as a double?
		-- does that cause a stall if the data was a float?
#endif
#ifdef sparc_TARGET_ARCH
	{-SPARC: spill below frame pointer leaving 2 words/spill-}
                        let{off_w = 1 + (off `div` 4);
                            sz = case regClass reg of {
                                    RcInteger -> I32;
				    RcFloat   -> F32;
                                    RcDouble  -> F64}}
                        in ST sz reg (fpRel (- off_w))
#endif
#ifdef powerpc_TARGET_ARCH
    let sz = case regClass reg of
                RcInteger -> I32
                RcDouble -> F64
    in ST sz reg (AddrRegImm sp (ImmInt (off-delta)))
#endif


mkLoadInstr
   :: Reg		-- register to load (should be a real)
   -> Int		-- current stack delta
   -> Int		-- spill slot to use
   -> Instr
mkLoadInstr reg delta slot
  = ASSERT(isRealReg reg)
    let
        off     = spillSlotToOffset slot
    in
#if alpha_TARGET_ARCH
	 LD  sz dyn (spRel (- (off `div` 8)))
#endif
#if i386_TARGET_ARCH
	let off_w = (off-delta) `div` 4
        in case regClass reg of {
              RcInteger -> MOV I32 (OpAddr (spRel off_w)) (OpReg reg);
              _         -> GLD F80 (spRel off_w) reg} {- RcFloat/RcDouble -}
#endif
#if x86_64_TARGET_ARCH
	let off_w = (off-delta) `div` 8
        in case regClass reg of
              RcInteger -> MOV I64 (OpAddr (spRel off_w)) (OpReg reg)
              _         -> MOV F64 (OpAddr (spRel off_w)) (OpReg reg)
#endif
#if sparc_TARGET_ARCH
        let{off_w = 1 + (off `div` 4);
            sz = case regClass reg of {
                   RcInteger -> I32;
		   RcFloat   -> F32;
                   RcDouble  -> F64}}
        in LD sz (fpRel (- off_w)) reg
#endif
#if powerpc_TARGET_ARCH
    let sz = case regClass reg of
                RcInteger -> I32
                RcDouble -> F64
    in LD sz reg (AddrRegImm sp (ImmInt (off-delta)))
#endif

mkRegRegMoveInstr
    :: Reg
    -> Reg
    -> Instr
mkRegRegMoveInstr src dst
#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
    = case regClass src of
        RcInteger -> MOV wordRep (OpReg src) (OpReg dst)
#if i386_TARGET_ARCH
        RcDouble  -> GMOV src dst
#else
        RcDouble  -> MOV F64 (OpReg src) (OpReg dst)
#endif
#elif powerpc_TARGET_ARCH
    = MR dst src
#endif

mkBranchInstr
    :: BlockId
    -> [Instr]
#if alpha_TARGET_ARCH
mkBranchInstr id = [BR id]
#endif

#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
mkBranchInstr id = [JXX ALWAYS id]
#endif

#if sparc_TARGET_ARCH
mkBranchInstr (BlockId id) = [BI ALWAYS False (ImmCLbl (mkAsmTempLabel id)), NOP]
#endif

#if powerpc_TARGET_ARCH
mkBranchInstr id = [BCC ALWAYS id]
#endif


spillSlotSize :: Int
spillSlotSize = IF_ARCH_i386(12, 8)

maxSpillSlots :: Int
maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1

-- convert a spill slot number to a *byte* offset, with no sign:
-- decide on a per arch basis whether you are spilling above or below
-- the C stack pointer.
spillSlotToOffset :: Int -> Int
spillSlotToOffset slot
   | slot >= 0 && slot < maxSpillSlots
   = 64 + spillSlotSize * slot
   | otherwise
   = pprPanic "spillSlotToOffset:" 
              (text "invalid spill location: " <> int slot)