summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/nativeGen/MachCodeGen.hs14
-rw-r--r--compiler/nativeGen/MachInstrs.hs8
-rw-r--r--compiler/nativeGen/PprMach.hs65
-rw-r--r--compiler/nativeGen/RegAllocInfo.hs12
4 files changed, 69 insertions, 30 deletions
diff --git a/compiler/nativeGen/MachCodeGen.hs b/compiler/nativeGen/MachCodeGen.hs
index 3abe820c9e..d86fe7a01b 100644
--- a/compiler/nativeGen/MachCodeGen.hs
+++ b/compiler/nativeGen/MachCodeGen.hs
@@ -3050,18 +3050,20 @@ genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
-- we only cope with a single result for foreign calls
genCCall (CmmPrim op) [CmmKinded r _] args = do
+ l1 <- getNewLabelNat
+ l2 <- getNewLabelNat
case op of
MO_F32_Sqrt -> actuallyInlineFloatOp F32 (GSQRT F32) args
MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
- MO_F32_Sin -> actuallyInlineFloatOp F32 (GSIN F32) args
- MO_F64_Sin -> actuallyInlineFloatOp F64 (GSIN F64) args
+ MO_F32_Sin -> actuallyInlineFloatOp F32 (GSIN F32 l1 l2) args
+ MO_F64_Sin -> actuallyInlineFloatOp F64 (GSIN F64 l1 l2) args
- MO_F32_Cos -> actuallyInlineFloatOp F32 (GCOS F32) args
- MO_F64_Cos -> actuallyInlineFloatOp F64 (GCOS F64) args
+ MO_F32_Cos -> actuallyInlineFloatOp F32 (GCOS F32 l1 l2) args
+ MO_F64_Cos -> actuallyInlineFloatOp F64 (GCOS F64 l1 l2) args
- MO_F32_Tan -> actuallyInlineFloatOp F32 (GTAN F32) args
- MO_F64_Tan -> actuallyInlineFloatOp F64 (GTAN F64) args
+ MO_F32_Tan -> actuallyInlineFloatOp F32 (GTAN F32 l1 l2) args
+ MO_F64_Tan -> actuallyInlineFloatOp F64 (GTAN F64 l1 l2) args
other_op -> outOfLineFloatOp op r args
where
diff --git a/compiler/nativeGen/MachInstrs.hs b/compiler/nativeGen/MachInstrs.hs
index 5a8d3f17f8..716a52148e 100644
--- a/compiler/nativeGen/MachInstrs.hs
+++ b/compiler/nativeGen/MachInstrs.hs
@@ -483,9 +483,9 @@ bit or 64 bit precision.
| GABS MachRep Reg Reg -- src, dst
| GNEG MachRep Reg Reg -- src, dst
| GSQRT MachRep Reg Reg -- src, dst
- | GSIN MachRep Reg Reg -- src, dst
- | GCOS MachRep Reg Reg -- src, dst
- | GTAN MachRep Reg Reg -- src, dst
+ | GSIN MachRep CLabel CLabel Reg Reg -- src, dst
+ | GCOS MachRep CLabel CLabel Reg Reg -- src, dst
+ | GTAN MachRep CLabel CLabel Reg Reg -- src, dst
| GFREE -- do ffree on all x86 regs; an ugly hack
#endif
@@ -583,7 +583,7 @@ is_G_instr instr
GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True
GCMP _ _ _ -> True; GABS _ _ _ -> True
GNEG _ _ _ -> True; GSQRT _ _ _ -> True
- GSIN _ _ _ -> True; GCOS _ _ _ -> True; GTAN _ _ _ -> True
+ GSIN _ _ _ _ _ -> True; GCOS _ _ _ _ _ -> True; GTAN _ _ _ _ _ -> True
GFREE -> panic "is_G_instr: GFREE (!)"
other -> False
#endif /* i386_TARGET_ARCH */
diff --git a/compiler/nativeGen/PprMach.hs b/compiler/nativeGen/PprMach.hs
index 38267d077b..1995cd0d61 100644
--- a/compiler/nativeGen/PprMach.hs
+++ b/compiler/nativeGen/PprMach.hs
@@ -1504,17 +1504,12 @@ pprInstr g@(GNEG sz src dst)
pprInstr g@(GSQRT sz src dst)
= pprG g (hcat [gtab, gpush src 0, text " ; fsqrt"] $$
hcat [gtab, gcoerceto sz, gpop dst 1])
-pprInstr g@(GSIN sz src dst)
- = pprG g (hcat [gtab, gpush src 0, text " ; fsin"] $$
- hcat [gtab, gcoerceto sz, gpop dst 1])
-pprInstr g@(GCOS sz src dst)
- = pprG g (hcat [gtab, gpush src 0, text " ; fcos"] $$
- hcat [gtab, gcoerceto sz, gpop dst 1])
-pprInstr g@(GTAN sz src dst)
- = pprG g (hcat [gtab, text "ffree %st(6) ; ",
- gpush src 0, text " ; fptan ; ",
- text " fstp %st(0)"] $$
- hcat [gtab, gcoerceto sz, gpop dst 1])
+pprInstr g@(GSIN sz l1 l2 src dst)
+ = pprG g (pprTrigOp "fsin" False l1 l2 src dst sz)
+pprInstr g@(GCOS sz l1 l2 src dst)
+ = pprG g (pprTrigOp "fcos" False l1 l2 src dst sz)
+pprInstr g@(GTAN sz l1 l2 src dst)
+ = pprG g (pprTrigOp "fptan" True l1 l2 src dst sz)
-- In the translations for GADD, GMUL, GSUB and GDIV,
-- the first two cases are mere optimisations. The otherwise clause
@@ -1585,6 +1580,48 @@ pprInstr GFREE
ptext (sLit "\tffree %st(4) ;ffree %st(5) ;ffree %st(6) ;ffree %st(7)")
]
+pprTrigOp :: String -> Bool -> CLabel -> CLabel -> Reg -> Reg -> MachRep -> Doc
+pprTrigOp op -- fsin, fcos or fptan
+ isTan -- we need a couple of extra steps if we're doing tan
+ l1 l2 -- internal labels for us to use
+ src dst sz
+ = -- We'll be needing %eax later on
+ hcat [gtab, text "pushl %eax;"] $$
+ -- tan is going to use an extra space on the FP stack
+ (if isTan then hcat [gtab, text "ffree %st(6)"] else empty) $$
+ -- First put the value in %st(0) and try to apply the op to it
+ hcat [gpush src 0, text ("; " ++ op)] $$
+ -- Now look to see if C2 was set (overflow, |value| >= 2^63)
+ hcat [gtab, text "fnstsw %ax"] $$
+ hcat [gtab, text "test $0x400,%eax"] $$
+ -- If we were in bounds then jump to the end
+ hcat [gtab, text "je " <> pprCLabel_asm l1] $$
+ -- Otherwise we need to shrink the value. Start by
+ -- loading pi, doubleing it (by adding it to itself),
+ -- and then swapping pi with the value, so the value we
+ -- want to apply op to is in %st(0) again
+ hcat [gtab, text "ffree %st(7); fldpi"] $$
+ hcat [gtab, text "fadd %st(0),%st"] $$
+ hcat [gtab, text "fxch %st(1)"] $$
+ -- Now we have a loop in which we make the value smaller,
+ -- see if it's small enough, and loop if not
+ (pprCLabel_asm l2 <> char ':') $$
+ hcat [gtab, text "fprem1"] $$
+ -- My Debian libc uses fstsw here for the tan code, but I can't
+ -- see any reason why it should need to be different for tan.
+ hcat [gtab, text "fnstsw %ax"] $$
+ hcat [gtab, text "test $0x400,%eax"] $$
+ hcat [gtab, text "jne " <> pprCLabel_asm l2] $$
+ hcat [gtab, text "fstp %st(1)"] $$
+ hcat [gtab, text op] $$
+ (pprCLabel_asm l1 <> char ':') $$
+ -- Pop the 1.0 tan gave us
+ (if isTan then hcat [gtab, text "fstp %st(0)"] else empty) $$
+ -- Restore %eax
+ hcat [gtab, text "popl %eax;"] $$
+ -- And finally make the result the right size
+ hcat [gtab, gcoerceto sz, gpop dst 1]
+
--------------------------
-- coerce %st(0) to the specified size
@@ -1626,9 +1663,9 @@ pprGInstr (GCMP co src dst) = pprCondRegReg (sLit "gcmp_") F64 co src dst
pprGInstr (GABS sz src dst) = pprSizeRegReg (sLit "gabs") sz src dst
pprGInstr (GNEG sz src dst) = pprSizeRegReg (sLit "gneg") sz src dst
pprGInstr (GSQRT sz src dst) = pprSizeRegReg (sLit "gsqrt") sz src dst
-pprGInstr (GSIN sz src dst) = pprSizeRegReg (sLit "gsin") sz src dst
-pprGInstr (GCOS sz src dst) = pprSizeRegReg (sLit "gcos") sz src dst
-pprGInstr (GTAN sz src dst) = pprSizeRegReg (sLit "gtan") sz src dst
+pprGInstr (GSIN sz _ _ src dst) = pprSizeRegReg (sLit "gsin") sz src dst
+pprGInstr (GCOS sz _ _ src dst) = pprSizeRegReg (sLit "gcos") sz src dst
+pprGInstr (GTAN sz _ _ src dst) = pprSizeRegReg (sLit "gtan") sz src dst
pprGInstr (GADD sz src1 src2 dst) = pprSizeRegRegReg (sLit "gadd") sz src1 src2 dst
pprGInstr (GSUB sz src1 src2 dst) = pprSizeRegRegReg (sLit "gsub") sz src1 src2 dst
diff --git a/compiler/nativeGen/RegAllocInfo.hs b/compiler/nativeGen/RegAllocInfo.hs
index 2361e7762c..da876c38fa 100644
--- a/compiler/nativeGen/RegAllocInfo.hs
+++ b/compiler/nativeGen/RegAllocInfo.hs
@@ -215,9 +215,9 @@ regUsage instr = case instr of
GABS sz src dst -> mkRU [src] [dst]
GNEG sz src dst -> mkRU [src] [dst]
GSQRT sz src dst -> mkRU [src] [dst]
- GSIN sz src dst -> mkRU [src] [dst]
- GCOS sz src dst -> mkRU [src] [dst]
- GTAN sz src dst -> mkRU [src] [dst]
+ GSIN sz _ _ src dst -> mkRU [src] [dst]
+ GCOS sz _ _ src dst -> mkRU [src] [dst]
+ GTAN sz _ _ src dst -> mkRU [src] [dst]
#endif
#if x86_64_TARGET_ARCH
@@ -599,9 +599,9 @@ patchRegs instr env = case instr of
GABS sz src dst -> GABS sz (env src) (env dst)
GNEG sz src dst -> GNEG sz (env src) (env dst)
GSQRT sz src dst -> GSQRT sz (env src) (env dst)
- GSIN sz src dst -> GSIN sz (env src) (env dst)
- GCOS sz src dst -> GCOS sz (env src) (env dst)
- GTAN sz src dst -> GTAN sz (env src) (env dst)
+ GSIN sz l1 l2 src dst -> GSIN sz l1 l2 (env src) (env dst)
+ GCOS sz l1 l2 src dst -> GCOS sz l1 l2 (env src) (env dst)
+ GTAN sz l1 l2 src dst -> GTAN sz l1 l2 (env src) (env dst)
#endif
#if x86_64_TARGET_ARCH