summaryrefslogtreecommitdiff
path: root/includes
diff options
context:
space:
mode:
authorGeoffrey Mainland <gmainlan@microsoft.com>2012-10-05 15:19:55 +0100
committerGeoffrey Mainland <gmainlan@microsoft.com>2012-10-30 20:50:48 +0000
commite2f6bbd3a27685bc667655fdb093734cb565b4cf (patch)
tree7b111be1d9e14a61073d3ca30c1d7333d494d127 /includes
parent5ee08ddffbbe596d7716a09306888004e6baf2b1 (diff)
downloadhaskell-e2f6bbd3a27685bc667655fdb093734cb565b4cf.tar.gz
Draw STG F and D registers from the same pool of available SSE registers on x86-64.
On x86-64 F and D registers are both drawn from SSE registers, so there is no reason not to draw them from the same pool of available SSE registers. This means that whereas previously a function could only receive two Double arguments in registers even if it did not have any Float arguments, now it can receive up to 6 arguments that are any mix of Float and Double in registers. This patch breaks the LLVM back end. The next patch will fix this breakage.
Diffstat (limited to 'includes')
-rw-r--r--includes/Cmm.h16
-rw-r--r--includes/CodeGen.Platform.hs161
-rw-r--r--includes/mkDerivedConstants.c8
-rw-r--r--includes/rts/Constants.h5
-rw-r--r--includes/stg/MachRegs.h62
-rw-r--r--includes/stg/Regs.h42
6 files changed, 284 insertions, 10 deletions
diff --git a/includes/Cmm.h b/includes/Cmm.h
index 36aae9b8dc..211d2a86fe 100644
--- a/includes/Cmm.h
+++ b/includes/Cmm.h
@@ -682,8 +682,8 @@
#define SAVE_STGREGS \
W_ r1, r2, r3, r4, r5, r6, r7, r8; \
- F_ f1, f2, f3, f4; \
- D_ d1, d2; \
+ F_ f1, f2, f3, f4, f5, f6; \
+ D_ d1, d2, d3, d4, d5, d6; \
L_ l1; \
\
r1 = R1; \
@@ -699,9 +699,15 @@
f2 = F2; \
f3 = F3; \
f4 = F4; \
+ f5 = F5; \
+ f6 = F6; \
\
d1 = D1; \
d2 = D2; \
+ d3 = D3; \
+ d4 = D4; \
+ d5 = D5; \
+ d6 = D6; \
\
l1 = L1;
@@ -720,9 +726,15 @@
F2 = f2; \
F3 = f3; \
F4 = f4; \
+ F5 = f5; \
+ F6 = f6; \
\
D1 = d1; \
D2 = d2; \
+ D3 = d3; \
+ D4 = d4; \
+ D5 = d5; \
+ D6 = d6; \
\
L1 = l1;
diff --git a/includes/CodeGen.Platform.hs b/includes/CodeGen.Platform.hs
index 0ba57a46d7..b038f822c2 100644
--- a/includes/CodeGen.Platform.hs
+++ b/includes/CodeGen.Platform.hs
@@ -286,12 +286,30 @@ callerSaves (FloatReg 3) = True
#ifdef CALLER_SAVES_F4
callerSaves (FloatReg 4) = True
#endif
+#ifdef CALLER_SAVES_F5
+callerSaves (FloatReg 5) = True
+#endif
+#ifdef CALLER_SAVES_F6
+callerSaves (FloatReg 6) = True
+#endif
#ifdef CALLER_SAVES_D1
callerSaves (DoubleReg 1) = True
#endif
#ifdef CALLER_SAVES_D2
callerSaves (DoubleReg 2) = True
#endif
+#ifdef CALLER_SAVES_D3
+callerSaves (DoubleReg 3) = True
+#endif
+#ifdef CALLER_SAVES_D4
+callerSaves (DoubleReg 4) = True
+#endif
+#ifdef CALLER_SAVES_D5
+callerSaves (DoubleReg 5) = True
+#endif
+#ifdef CALLER_SAVES_D6
+callerSaves (DoubleReg 6) = True
+#endif
#ifdef CALLER_SAVES_L1
callerSaves (LongReg 1) = True
#endif
@@ -362,24 +380,81 @@ activeStgRegs = [
#ifdef REG_SpLim
,SpLim
#endif
+#if MAX_REAL_SSE_REG != 0
#ifdef REG_F1
,FloatReg 1
#endif
+#ifdef REG_D1
+ ,DoubleReg 1
+#endif
#ifdef REG_F2
,FloatReg 2
#endif
+#ifdef REG_D2
+ ,DoubleReg 2
+#endif
#ifdef REG_F3
,FloatReg 3
#endif
+#ifdef REG_D3
+ ,DoubleReg 3
+#endif
#ifdef REG_F4
,FloatReg 4
#endif
+#ifdef REG_D4
+ ,DoubleReg 4
+#endif
+#ifdef REG_F5
+ ,FloatReg 5
+#endif
+#ifdef REG_D5
+ ,DoubleReg 5
+#endif
+#ifdef REG_F6
+ ,FloatReg 6
+#endif
+#ifdef REG_D6
+ ,DoubleReg 6
+#endif
+#else /* MAX_REAL_SSE_REG == 0 */
+#ifdef REG_F1
+ ,FloatReg 1
+#endif
+#ifdef REG_F2
+ ,FloatReg 2
+#endif
+#ifdef REG_F3
+ ,FloatReg 3
+#endif
+#ifdef REG_F4
+ ,FloatReg 4
+#endif
+#ifdef REG_F5
+ ,FloatReg 5
+#endif
+#ifdef REG_F6
+ ,FloatReg 6
+#endif
#ifdef REG_D1
,DoubleReg 1
#endif
#ifdef REG_D2
,DoubleReg 2
#endif
+#ifdef REG_D3
+ ,DoubleReg 3
+#endif
+#ifdef REG_D4
+ ,DoubleReg 4
+#endif
+#ifdef REG_D5
+ ,DoubleReg 5
+#endif
+#ifdef REG_D6
+ ,DoubleReg 6
+#endif
+#endif /* MAX_REAL_SSE_REG == 0 */
]
haveRegBase :: Bool
@@ -439,6 +514,12 @@ globalRegMaybe (FloatReg 3) = Just (RealRegSingle REG_F3)
# ifdef REG_F4
globalRegMaybe (FloatReg 4) = Just (RealRegSingle REG_F4)
# endif
+# ifdef REG_F5
+globalRegMaybe (FloatReg 5) = Just (RealRegSingle REG_F5)
+# endif
+# ifdef REG_F6
+globalRegMaybe (FloatReg 6) = Just (RealRegSingle REG_F6)
+# endif
# ifdef REG_D1
globalRegMaybe (DoubleReg 1) =
# if MACHREGS_sparc
@@ -455,6 +536,38 @@ globalRegMaybe (DoubleReg 2) =
Just (RealRegSingle REG_D2)
# endif
# endif
+# ifdef REG_D3
+globalRegMaybe (DoubleReg 3) =
+# if MACHREGS_sparc
+ Just (RealRegPair REG_D3 (REG_D3 + 1))
+# else
+ Just (RealRegSingle REG_D3)
+# endif
+# endif
+# ifdef REG_D4
+globalRegMaybe (DoubleReg 4) =
+# if MACHREGS_sparc
+ Just (RealRegPair REG_D4 (REG_D4 + 1))
+# else
+ Just (RealRegSingle REG_D4)
+# endif
+# endif
+# ifdef REG_D5
+globalRegMaybe (DoubleReg 5) =
+# if MACHREGS_sparc
+ Just (RealRegPair REG_D5 (REG_D5 + 1))
+# else
+ Just (RealRegSingle REG_D5)
+# endif
+# endif
+# ifdef REG_D6
+globalRegMaybe (DoubleReg 6) =
+# if MACHREGS_sparc
+ Just (RealRegPair REG_D6 (REG_D6 + 1))
+# else
+ Just (RealRegSingle REG_D6)
+# endif
+# endif
# ifdef REG_Sp
globalRegMaybe Sp = Just (RealRegSingle REG_Sp)
# endif
@@ -588,12 +701,30 @@ freeReg REG_F3 = fastBool False
# ifdef REG_F4
freeReg REG_F4 = fastBool False
# endif
+# ifdef REG_F5
+freeReg REG_F5 = fastBool False
+# endif
+# ifdef REG_F6
+freeReg REG_F6 = fastBool False
+# endif
# ifdef REG_D1
freeReg REG_D1 = fastBool False
# endif
# ifdef REG_D2
freeReg REG_D2 = fastBool False
# endif
+# ifdef REG_D3
+freeReg REG_D3 = fastBool False
+# endif
+# ifdef REG_D4
+freeReg REG_D4 = fastBool False
+# endif
+# ifdef REG_D5
+freeReg REG_D5 = fastBool False
+# endif
+# ifdef REG_D6
+freeReg REG_D6 = fastBool False
+# endif
# ifdef REG_Sp
freeReg REG_Sp = fastBool False
# endif
@@ -698,6 +829,12 @@ freeReg REG_F3 = fastBool False
# ifdef REG_F4
freeReg REG_F4 = fastBool False
# endif
+# ifdef REG_F5
+freeReg REG_F5 = fastBool False
+# endif
+# ifdef REG_F6
+freeReg REG_F6 = fastBool False
+# endif
# ifdef REG_D1
freeReg REG_D1 = fastBool False
# endif
@@ -710,6 +847,30 @@ freeReg REG_D2 = fastBool False
# ifdef REG_D2_2
freeReg REG_D2_2 = fastBool False
# endif
+# ifdef REG_D3
+freeReg REG_D3 = fastBool False
+# endif
+# ifdef REG_D3_2
+freeReg REG_D3_2 = fastBool False
+# endif
+# ifdef REG_D4
+freeReg REG_D4 = fastBool False
+# endif
+# ifdef REG_D4_2
+freeReg REG_D4_2 = fastBool False
+# endif
+# ifdef REG_D5
+freeReg REG_D5 = fastBool False
+# endif
+# ifdef REG_D5_2
+freeReg REG_D5_2 = fastBool False
+# endif
+# ifdef REG_D6
+freeReg REG_D6 = fastBool False
+# endif
+# ifdef REG_D6_2
+freeReg REG_D6_2 = fastBool False
+# endif
# ifdef REG_Sp
freeReg REG_Sp = fastBool False
# endif
diff --git a/includes/mkDerivedConstants.c b/includes/mkDerivedConstants.c
index 1393112595..79242d9b41 100644
--- a/includes/mkDerivedConstants.c
+++ b/includes/mkDerivedConstants.c
@@ -458,8 +458,14 @@ main(int argc, char *argv[])
field_offset(StgRegTable, rF2);
field_offset(StgRegTable, rF3);
field_offset(StgRegTable, rF4);
+ field_offset(StgRegTable, rF5);
+ field_offset(StgRegTable, rF6);
field_offset(StgRegTable, rD1);
field_offset(StgRegTable, rD2);
+ field_offset(StgRegTable, rD3);
+ field_offset(StgRegTable, rD4);
+ field_offset(StgRegTable, rD5);
+ field_offset(StgRegTable, rD6);
field_offset(StgRegTable, rL1);
field_offset(StgRegTable, rSp);
field_offset(StgRegTable, rSpLim);
@@ -736,9 +742,11 @@ main(int argc, char *argv[])
constantInt("mAX_Float_REG", MAX_FLOAT_REG);
constantInt("mAX_Double_REG", MAX_DOUBLE_REG);
constantInt("mAX_Long_REG", MAX_LONG_REG);
+ constantInt("mAX_SSE_REG", MAX_SSE_REG);
constantInt("mAX_Real_Vanilla_REG", MAX_REAL_VANILLA_REG);
constantInt("mAX_Real_Float_REG", MAX_REAL_FLOAT_REG);
constantInt("mAX_Real_Double_REG", MAX_REAL_DOUBLE_REG);
+ constantInt("mAX_Real_SSE_REG", MAX_REAL_SSE_REG);
constantInt("mAX_Real_Long_REG", MAX_REAL_LONG_REG);
// This tells the native code generator the size of the spill
diff --git a/includes/rts/Constants.h b/includes/rts/Constants.h
index 2fab041c22..5ff4d4e51e 100644
--- a/includes/rts/Constants.h
+++ b/includes/rts/Constants.h
@@ -81,9 +81,10 @@
-------------------------------------------------------------------------- */
#define MAX_VANILLA_REG 10
-#define MAX_FLOAT_REG 4
-#define MAX_DOUBLE_REG 2
+#define MAX_FLOAT_REG 6
+#define MAX_DOUBLE_REG 6
#define MAX_LONG_REG 1
+#define MAX_SSE_REG 6
/* -----------------------------------------------------------------------------
Semi-Tagging constants
diff --git a/includes/stg/MachRegs.h b/includes/stg/MachRegs.h
index 6a70d08d66..8cefe9bc64 100644
--- a/includes/stg/MachRegs.h
+++ b/includes/stg/MachRegs.h
@@ -92,6 +92,7 @@
#define MAX_REAL_FLOAT_REG 0
#define MAX_REAL_DOUBLE_REG 0
#define MAX_REAL_LONG_REG 0
+#define MAX_REAL_SSE_REG 0
/* -----------------------------------------------------------------------------
The x86-64 register mapping
@@ -141,9 +142,22 @@
#define REG_F2 xmm2
#define REG_F3 xmm3
#define REG_F4 xmm4
-
-#define REG_D1 xmm5
-#define REG_D2 xmm6
+#define REG_F5 xmm5
+#define REG_F6 xmm6
+
+#define REG_D1 xmm1
+#define REG_D2 xmm2
+#define REG_D3 xmm3
+#define REG_D4 xmm4
+#define REG_D5 xmm5
+#define REG_D6 xmm6
+
+#define REG_SSE1 xmm1
+#define REG_SSE2 xmm2
+#define REG_SSE3 xmm3
+#define REG_SSE4 xmm4
+#define REG_SSE5 xmm5
+#define REG_SSE6 xmm6
#if !defined(mingw32_HOST_OS)
#define CALLER_SAVES_R3
@@ -156,16 +170,34 @@
#define CALLER_SAVES_F2
#define CALLER_SAVES_F3
#define CALLER_SAVES_F4
+#define CALLER_SAVES_F5
+#if !defined(mingw32_HOST_OS)
+#define CALLER_SAVES_F6
+#endif
#define CALLER_SAVES_D1
-#if !defined(mingw32_HOST_OS)
#define CALLER_SAVES_D2
+#define CALLER_SAVES_D3
+#define CALLER_SAVES_D4
+#define CALLER_SAVES_D5
+#if !defined(mingw32_HOST_OS)
+#define CALLER_SAVES_D6
+#endif
+
+#define CALLER_SAVES_SSE1
+#define CALLER_SAVES_SSE2
+#define CALLER_SAVES_SSE3
+#define CALLER_SAVES_SSE4
+#define CALLER_SAVES_SSE5
+#if !defined(mingw32_HOST_OS)
+#define CALLER_SAVES_SSE6
#endif
#define MAX_REAL_VANILLA_REG 6
-#define MAX_REAL_FLOAT_REG 4
-#define MAX_REAL_DOUBLE_REG 2
+#define MAX_REAL_FLOAT_REG 6
+#define MAX_REAL_DOUBLE_REG 6
#define MAX_REAL_LONG_REG 0
+#define MAX_REAL_SSE_REG 6
/* -----------------------------------------------------------------------------
The PowerPC register mapping
@@ -518,6 +550,24 @@
# endif
#endif
+#ifndef MAX_REAL_SSE_REG
+# if defined(REG_SSE6)
+# define MAX_REAL_SSE_REG 6
+# elif defined(REG_SSE5)
+# define MAX_REAL_SSE_REG 5
+# elif defined(REG_SSE4)
+# define MAX_REAL_SSE_REG 4
+# elif defined(REG_SSE3)
+# define MAX_REAL_SSE_REG 3
+# elif defined(REG_SSE2)
+# define MAX_REAL_SSE_REG 2
+# elif defined(REG_SSE1)
+# define MAX_REAL_SSE_REG 1
+# else
+# define MAX_REAL_SSE_REG 0
+# endif
+#endif
+
/* define NO_ARG_REGS if we have no argument registers at all (we can
* optimise certain code paths using this predicate).
*/
diff --git a/includes/stg/Regs.h b/includes/stg/Regs.h
index 70e93d3234..fd1577e71a 100644
--- a/includes/stg/Regs.h
+++ b/includes/stg/Regs.h
@@ -73,8 +73,14 @@ typedef struct {
StgFloat rF2;
StgFloat rF3;
StgFloat rF4;
+ StgFloat rF5;
+ StgFloat rF6;
StgDouble rD1;
StgDouble rD2;
+ StgDouble rD3;
+ StgDouble rD4;
+ StgDouble rD5;
+ StgDouble rD6;
StgWord64 rL1;
StgPtr rSp;
StgPtr rSpLim;
@@ -216,6 +222,18 @@ GLOBAL_REG_DECL(StgFloat,F4,REG_F4)
#define F4 (BaseReg->rF4)
#endif
+#if defined(REG_F5) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgFloat,F5,REG_F5)
+#else
+#define F5 (BaseReg->rF5)
+#endif
+
+#if defined(REG_F6) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgFloat,F6,REG_F6)
+#else
+#define F6 (BaseReg->rF6)
+#endif
+
#if defined(REG_D1) && !defined(NO_GLOBAL_REG_DECLS)
GLOBAL_REG_DECL(StgDouble,D1,REG_D1)
#else
@@ -228,6 +246,30 @@ GLOBAL_REG_DECL(StgDouble,D2,REG_D2)
#define D2 (BaseReg->rD2)
#endif
+#if defined(REG_D3) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgDouble,D3,REG_D3)
+#else
+#define D3 (BaseReg->rD3)
+#endif
+
+#if defined(REG_D4) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgDouble,D4,REG_D4)
+#else
+#define D4 (BaseReg->rD4)
+#endif
+
+#if defined(REG_D5) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgDouble,D5,REG_D5)
+#else
+#define D5 (BaseReg->rD5)
+#endif
+
+#if defined(REG_D6) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgDouble,D6,REG_D6)
+#else
+#define D6 (BaseReg->rD6)
+#endif
+
#if defined(REG_L1) && !defined(NO_GLOBAL_REG_DECLS)
GLOBAL_REG_DECL(StgWord64,L1,REG_L1)
#else