diff options
author | Geoffrey Mainland <gmainlan@microsoft.com> | 2012-10-05 15:19:55 +0100 |
---|---|---|
committer | Geoffrey Mainland <gmainlan@microsoft.com> | 2012-10-30 20:50:48 +0000 |
commit | e2f6bbd3a27685bc667655fdb093734cb565b4cf (patch) | |
tree | 7b111be1d9e14a61073d3ca30c1d7333d494d127 /includes | |
parent | 5ee08ddffbbe596d7716a09306888004e6baf2b1 (diff) | |
download | haskell-e2f6bbd3a27685bc667655fdb093734cb565b4cf.tar.gz |
Draw STG F and D registers from the same pool of available SSE registers on x86-64.
On x86-64 F and D registers are both drawn from SSE registers, so there is no
reason not to draw them from the same pool of available SSE registers. This
means that whereas previously a function could only receive two Double arguments
in registers even if it did not have any Float arguments, now it can receive up
to 6 arguments that are any mix of Float and Double in registers.
This patch breaks the LLVM back end. The next patch will fix this breakage.
Diffstat (limited to 'includes')
-rw-r--r-- | includes/Cmm.h | 16 | ||||
-rw-r--r-- | includes/CodeGen.Platform.hs | 161 | ||||
-rw-r--r-- | includes/mkDerivedConstants.c | 8 | ||||
-rw-r--r-- | includes/rts/Constants.h | 5 | ||||
-rw-r--r-- | includes/stg/MachRegs.h | 62 | ||||
-rw-r--r-- | includes/stg/Regs.h | 42 |
6 files changed, 284 insertions, 10 deletions
diff --git a/includes/Cmm.h b/includes/Cmm.h index 36aae9b8dc..211d2a86fe 100644 --- a/includes/Cmm.h +++ b/includes/Cmm.h @@ -682,8 +682,8 @@ #define SAVE_STGREGS \ W_ r1, r2, r3, r4, r5, r6, r7, r8; \ - F_ f1, f2, f3, f4; \ - D_ d1, d2; \ + F_ f1, f2, f3, f4, f5, f6; \ + D_ d1, d2, d3, d4, d5, d6; \ L_ l1; \ \ r1 = R1; \ @@ -699,9 +699,15 @@ f2 = F2; \ f3 = F3; \ f4 = F4; \ + f5 = F5; \ + f6 = F6; \ \ d1 = D1; \ d2 = D2; \ + d3 = D3; \ + d4 = D4; \ + d5 = D5; \ + d6 = D6; \ \ l1 = L1; @@ -720,9 +726,15 @@ F2 = f2; \ F3 = f3; \ F4 = f4; \ + F5 = f5; \ + F6 = f6; \ \ D1 = d1; \ D2 = d2; \ + D3 = d3; \ + D4 = d4; \ + D5 = d5; \ + D6 = d6; \ \ L1 = l1; diff --git a/includes/CodeGen.Platform.hs b/includes/CodeGen.Platform.hs index 0ba57a46d7..b038f822c2 100644 --- a/includes/CodeGen.Platform.hs +++ b/includes/CodeGen.Platform.hs @@ -286,12 +286,30 @@ callerSaves (FloatReg 3) = True #ifdef CALLER_SAVES_F4 callerSaves (FloatReg 4) = True #endif +#ifdef CALLER_SAVES_F5 +callerSaves (FloatReg 5) = True +#endif +#ifdef CALLER_SAVES_F6 +callerSaves (FloatReg 6) = True +#endif #ifdef CALLER_SAVES_D1 callerSaves (DoubleReg 1) = True #endif #ifdef CALLER_SAVES_D2 callerSaves (DoubleReg 2) = True #endif +#ifdef CALLER_SAVES_D3 +callerSaves (DoubleReg 3) = True +#endif +#ifdef CALLER_SAVES_D4 +callerSaves (DoubleReg 4) = True +#endif +#ifdef CALLER_SAVES_D5 +callerSaves (DoubleReg 5) = True +#endif +#ifdef CALLER_SAVES_D6 +callerSaves (DoubleReg 6) = True +#endif #ifdef CALLER_SAVES_L1 callerSaves (LongReg 1) = True #endif @@ -362,24 +380,81 @@ activeStgRegs = [ #ifdef REG_SpLim ,SpLim #endif +#if MAX_REAL_SSE_REG != 0 #ifdef REG_F1 ,FloatReg 1 #endif +#ifdef REG_D1 + ,DoubleReg 1 +#endif #ifdef REG_F2 ,FloatReg 2 #endif +#ifdef REG_D2 + ,DoubleReg 2 +#endif #ifdef REG_F3 ,FloatReg 3 #endif +#ifdef REG_D3 + ,DoubleReg 3 +#endif #ifdef REG_F4 ,FloatReg 4 #endif +#ifdef REG_D4 + ,DoubleReg 4 +#endif +#ifdef REG_F5 + ,FloatReg 5 +#endif +#ifdef REG_D5 + ,DoubleReg 5 +#endif +#ifdef REG_F6 + ,FloatReg 6 +#endif +#ifdef REG_D6 + ,DoubleReg 6 +#endif +#else /* MAX_REAL_SSE_REG == 0 */ +#ifdef REG_F1 + ,FloatReg 1 +#endif +#ifdef REG_F2 + ,FloatReg 2 +#endif +#ifdef REG_F3 + ,FloatReg 3 +#endif +#ifdef REG_F4 + ,FloatReg 4 +#endif +#ifdef REG_F5 + ,FloatReg 5 +#endif +#ifdef REG_F6 + ,FloatReg 6 +#endif #ifdef REG_D1 ,DoubleReg 1 #endif #ifdef REG_D2 ,DoubleReg 2 #endif +#ifdef REG_D3 + ,DoubleReg 3 +#endif +#ifdef REG_D4 + ,DoubleReg 4 +#endif +#ifdef REG_D5 + ,DoubleReg 5 +#endif +#ifdef REG_D6 + ,DoubleReg 6 +#endif +#endif /* MAX_REAL_SSE_REG == 0 */ ] haveRegBase :: Bool @@ -439,6 +514,12 @@ globalRegMaybe (FloatReg 3) = Just (RealRegSingle REG_F3) # ifdef REG_F4 globalRegMaybe (FloatReg 4) = Just (RealRegSingle REG_F4) # endif +# ifdef REG_F5 +globalRegMaybe (FloatReg 5) = Just (RealRegSingle REG_F5) +# endif +# ifdef REG_F6 +globalRegMaybe (FloatReg 6) = Just (RealRegSingle REG_F6) +# endif # ifdef REG_D1 globalRegMaybe (DoubleReg 1) = # if MACHREGS_sparc @@ -455,6 +536,38 @@ globalRegMaybe (DoubleReg 2) = Just (RealRegSingle REG_D2) # endif # endif +# ifdef REG_D3 +globalRegMaybe (DoubleReg 3) = +# if MACHREGS_sparc + Just (RealRegPair REG_D3 (REG_D3 + 1)) +# else + Just (RealRegSingle REG_D3) +# endif +# endif +# ifdef REG_D4 +globalRegMaybe (DoubleReg 4) = +# if MACHREGS_sparc + Just (RealRegPair REG_D4 (REG_D4 + 1)) +# else + Just (RealRegSingle REG_D4) +# endif +# endif +# ifdef REG_D5 +globalRegMaybe (DoubleReg 5) = +# if MACHREGS_sparc + Just (RealRegPair REG_D5 (REG_D5 + 1)) +# else + Just (RealRegSingle REG_D5) +# endif +# endif +# ifdef REG_D6 +globalRegMaybe (DoubleReg 6) = +# if MACHREGS_sparc + Just (RealRegPair REG_D6 (REG_D6 + 1)) +# else + Just (RealRegSingle REG_D6) +# endif +# endif # ifdef REG_Sp globalRegMaybe Sp = Just (RealRegSingle REG_Sp) # endif @@ -588,12 +701,30 @@ freeReg REG_F3 = fastBool False # ifdef REG_F4 freeReg REG_F4 = fastBool False # endif +# ifdef REG_F5 +freeReg REG_F5 = fastBool False +# endif +# ifdef REG_F6 +freeReg REG_F6 = fastBool False +# endif # ifdef REG_D1 freeReg REG_D1 = fastBool False # endif # ifdef REG_D2 freeReg REG_D2 = fastBool False # endif +# ifdef REG_D3 +freeReg REG_D3 = fastBool False +# endif +# ifdef REG_D4 +freeReg REG_D4 = fastBool False +# endif +# ifdef REG_D5 +freeReg REG_D5 = fastBool False +# endif +# ifdef REG_D6 +freeReg REG_D6 = fastBool False +# endif # ifdef REG_Sp freeReg REG_Sp = fastBool False # endif @@ -698,6 +829,12 @@ freeReg REG_F3 = fastBool False # ifdef REG_F4 freeReg REG_F4 = fastBool False # endif +# ifdef REG_F5 +freeReg REG_F5 = fastBool False +# endif +# ifdef REG_F6 +freeReg REG_F6 = fastBool False +# endif # ifdef REG_D1 freeReg REG_D1 = fastBool False # endif @@ -710,6 +847,30 @@ freeReg REG_D2 = fastBool False # ifdef REG_D2_2 freeReg REG_D2_2 = fastBool False # endif +# ifdef REG_D3 +freeReg REG_D3 = fastBool False +# endif +# ifdef REG_D3_2 +freeReg REG_D3_2 = fastBool False +# endif +# ifdef REG_D4 +freeReg REG_D4 = fastBool False +# endif +# ifdef REG_D4_2 +freeReg REG_D4_2 = fastBool False +# endif +# ifdef REG_D5 +freeReg REG_D5 = fastBool False +# endif +# ifdef REG_D5_2 +freeReg REG_D5_2 = fastBool False +# endif +# ifdef REG_D6 +freeReg REG_D6 = fastBool False +# endif +# ifdef REG_D6_2 +freeReg REG_D6_2 = fastBool False +# endif # ifdef REG_Sp freeReg REG_Sp = fastBool False # endif diff --git a/includes/mkDerivedConstants.c b/includes/mkDerivedConstants.c index 1393112595..79242d9b41 100644 --- a/includes/mkDerivedConstants.c +++ b/includes/mkDerivedConstants.c @@ -458,8 +458,14 @@ main(int argc, char *argv[]) field_offset(StgRegTable, rF2); field_offset(StgRegTable, rF3); field_offset(StgRegTable, rF4); + field_offset(StgRegTable, rF5); + field_offset(StgRegTable, rF6); field_offset(StgRegTable, rD1); field_offset(StgRegTable, rD2); + field_offset(StgRegTable, rD3); + field_offset(StgRegTable, rD4); + field_offset(StgRegTable, rD5); + field_offset(StgRegTable, rD6); field_offset(StgRegTable, rL1); field_offset(StgRegTable, rSp); field_offset(StgRegTable, rSpLim); @@ -736,9 +742,11 @@ main(int argc, char *argv[]) constantInt("mAX_Float_REG", MAX_FLOAT_REG); constantInt("mAX_Double_REG", MAX_DOUBLE_REG); constantInt("mAX_Long_REG", MAX_LONG_REG); + constantInt("mAX_SSE_REG", MAX_SSE_REG); constantInt("mAX_Real_Vanilla_REG", MAX_REAL_VANILLA_REG); constantInt("mAX_Real_Float_REG", MAX_REAL_FLOAT_REG); constantInt("mAX_Real_Double_REG", MAX_REAL_DOUBLE_REG); + constantInt("mAX_Real_SSE_REG", MAX_REAL_SSE_REG); constantInt("mAX_Real_Long_REG", MAX_REAL_LONG_REG); // This tells the native code generator the size of the spill diff --git a/includes/rts/Constants.h b/includes/rts/Constants.h index 2fab041c22..5ff4d4e51e 100644 --- a/includes/rts/Constants.h +++ b/includes/rts/Constants.h @@ -81,9 +81,10 @@ -------------------------------------------------------------------------- */ #define MAX_VANILLA_REG 10 -#define MAX_FLOAT_REG 4 -#define MAX_DOUBLE_REG 2 +#define MAX_FLOAT_REG 6 +#define MAX_DOUBLE_REG 6 #define MAX_LONG_REG 1 +#define MAX_SSE_REG 6 /* ----------------------------------------------------------------------------- Semi-Tagging constants diff --git a/includes/stg/MachRegs.h b/includes/stg/MachRegs.h index 6a70d08d66..8cefe9bc64 100644 --- a/includes/stg/MachRegs.h +++ b/includes/stg/MachRegs.h @@ -92,6 +92,7 @@ #define MAX_REAL_FLOAT_REG 0 #define MAX_REAL_DOUBLE_REG 0 #define MAX_REAL_LONG_REG 0 +#define MAX_REAL_SSE_REG 0 /* ----------------------------------------------------------------------------- The x86-64 register mapping @@ -141,9 +142,22 @@ #define REG_F2 xmm2 #define REG_F3 xmm3 #define REG_F4 xmm4 - -#define REG_D1 xmm5 -#define REG_D2 xmm6 +#define REG_F5 xmm5 +#define REG_F6 xmm6 + +#define REG_D1 xmm1 +#define REG_D2 xmm2 +#define REG_D3 xmm3 +#define REG_D4 xmm4 +#define REG_D5 xmm5 +#define REG_D6 xmm6 + +#define REG_SSE1 xmm1 +#define REG_SSE2 xmm2 +#define REG_SSE3 xmm3 +#define REG_SSE4 xmm4 +#define REG_SSE5 xmm5 +#define REG_SSE6 xmm6 #if !defined(mingw32_HOST_OS) #define CALLER_SAVES_R3 @@ -156,16 +170,34 @@ #define CALLER_SAVES_F2 #define CALLER_SAVES_F3 #define CALLER_SAVES_F4 +#define CALLER_SAVES_F5 +#if !defined(mingw32_HOST_OS) +#define CALLER_SAVES_F6 +#endif #define CALLER_SAVES_D1 -#if !defined(mingw32_HOST_OS) #define CALLER_SAVES_D2 +#define CALLER_SAVES_D3 +#define CALLER_SAVES_D4 +#define CALLER_SAVES_D5 +#if !defined(mingw32_HOST_OS) +#define CALLER_SAVES_D6 +#endif + +#define CALLER_SAVES_SSE1 +#define CALLER_SAVES_SSE2 +#define CALLER_SAVES_SSE3 +#define CALLER_SAVES_SSE4 +#define CALLER_SAVES_SSE5 +#if !defined(mingw32_HOST_OS) +#define CALLER_SAVES_SSE6 #endif #define MAX_REAL_VANILLA_REG 6 -#define MAX_REAL_FLOAT_REG 4 -#define MAX_REAL_DOUBLE_REG 2 +#define MAX_REAL_FLOAT_REG 6 +#define MAX_REAL_DOUBLE_REG 6 #define MAX_REAL_LONG_REG 0 +#define MAX_REAL_SSE_REG 6 /* ----------------------------------------------------------------------------- The PowerPC register mapping @@ -518,6 +550,24 @@ # endif #endif +#ifndef MAX_REAL_SSE_REG +# if defined(REG_SSE6) +# define MAX_REAL_SSE_REG 6 +# elif defined(REG_SSE5) +# define MAX_REAL_SSE_REG 5 +# elif defined(REG_SSE4) +# define MAX_REAL_SSE_REG 4 +# elif defined(REG_SSE3) +# define MAX_REAL_SSE_REG 3 +# elif defined(REG_SSE2) +# define MAX_REAL_SSE_REG 2 +# elif defined(REG_SSE1) +# define MAX_REAL_SSE_REG 1 +# else +# define MAX_REAL_SSE_REG 0 +# endif +#endif + /* define NO_ARG_REGS if we have no argument registers at all (we can * optimise certain code paths using this predicate). */ diff --git a/includes/stg/Regs.h b/includes/stg/Regs.h index 70e93d3234..fd1577e71a 100644 --- a/includes/stg/Regs.h +++ b/includes/stg/Regs.h @@ -73,8 +73,14 @@ typedef struct { StgFloat rF2; StgFloat rF3; StgFloat rF4; + StgFloat rF5; + StgFloat rF6; StgDouble rD1; StgDouble rD2; + StgDouble rD3; + StgDouble rD4; + StgDouble rD5; + StgDouble rD6; StgWord64 rL1; StgPtr rSp; StgPtr rSpLim; @@ -216,6 +222,18 @@ GLOBAL_REG_DECL(StgFloat,F4,REG_F4) #define F4 (BaseReg->rF4) #endif +#if defined(REG_F5) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgFloat,F5,REG_F5) +#else +#define F5 (BaseReg->rF5) +#endif + +#if defined(REG_F6) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgFloat,F6,REG_F6) +#else +#define F6 (BaseReg->rF6) +#endif + #if defined(REG_D1) && !defined(NO_GLOBAL_REG_DECLS) GLOBAL_REG_DECL(StgDouble,D1,REG_D1) #else @@ -228,6 +246,30 @@ GLOBAL_REG_DECL(StgDouble,D2,REG_D2) #define D2 (BaseReg->rD2) #endif +#if defined(REG_D3) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgDouble,D3,REG_D3) +#else +#define D3 (BaseReg->rD3) +#endif + +#if defined(REG_D4) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgDouble,D4,REG_D4) +#else +#define D4 (BaseReg->rD4) +#endif + +#if defined(REG_D5) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgDouble,D5,REG_D5) +#else +#define D5 (BaseReg->rD5) +#endif + +#if defined(REG_D6) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgDouble,D6,REG_D6) +#else +#define D6 (BaseReg->rD6) +#endif + #if defined(REG_L1) && !defined(NO_GLOBAL_REG_DECLS) GLOBAL_REG_DECL(StgWord64,L1,REG_L1) #else |