summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohan Tibell <johan.tibell@gmail.com>2013-01-10 15:30:21 -0800
committerJohan Tibell <johan.tibell@gmail.com>2013-01-10 15:30:21 -0800
commitbab8dc7925a374ef91df3bca3d9a2aa9755212e1 (patch)
tree6fd1d42e1c62bba11d9f23880822ea85d5d1ee2e
parentccd8c6f00d6a4ebb2f11383aaff1d444a66131b4 (diff)
downloadhaskell-bab8dc7925a374ef91df3bca3d9a2aa9755212e1.tar.gz
Add preprocessor defines when SSE is enabled
This will add the following preprocessor defines when Haskell source files are compiled: * __SSE__ - If any version of SSE is enabled * __SSE2__ - If SSE2 or greater is enabled * __SSE4_2_ - If SSE4.2 is enabled Note that SSE2 is enabled by default on x86-64.
-rw-r--r--compiler/main/DriverPipeline.hs8
-rw-r--r--compiler/main/DynFlags.hs27
-rw-r--r--compiler/nativeGen/X86/CodeGen.hs12
3 files changed, 37 insertions, 10 deletions
diff --git a/compiler/main/DriverPipeline.hs b/compiler/main/DriverPipeline.hs
index 866ae8cd0e..feaac3bc88 100644
--- a/compiler/main/DriverPipeline.hs
+++ b/compiler/main/DriverPipeline.hs
@@ -1962,12 +1962,20 @@ doCpp dflags raw include_cc_opts input_fn output_fn = do
-- remember, in code we *compile*, the HOST is the same our TARGET,
-- and BUILD is the same as our HOST.
+ let sse2 = isSse2Enabled dflags
+ sse4_2 = isSse4_2Enabled dflags
+ sse_defs =
+ [ "-D__SSE__=1" | sse2 || sse4_2 ] ++
+ [ "-D__SSE2__=1" | sse2 || sse4_2 ] ++
+ [ "-D__SSE4_2__=1" | sse4_2 ]
+
cpp_prog ( map SysTools.Option verbFlags
++ map SysTools.Option include_paths
++ map SysTools.Option hsSourceCppOpts
++ map SysTools.Option target_defs
++ map SysTools.Option hscpp_opts
++ map SysTools.Option cc_opts
+ ++ map SysTools.Option sse_defs
++ [ SysTools.Option "-x"
, SysTools.Option "c"
, SysTools.Option input_fn
diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs
index 35386296f0..216ca10449 100644
--- a/compiler/main/DynFlags.hs
+++ b/compiler/main/DynFlags.hs
@@ -118,6 +118,10 @@ module DynFlags (
tAG_MASK,
mAX_PTR_TAG,
tARGET_MIN_INT, tARGET_MAX_INT, tARGET_MAX_WORD,
+
+ -- * SSE
+ isSse2Enabled,
+ isSse4_2Enabled,
) where
#include "HsVersions.h"
@@ -2153,6 +2157,11 @@ dynamic_flags = [
, Flag "monly-4-regs" (NoArg (addWarn "The -monly-4-regs flag does nothing; it will be removed in a future GHC release"))
, Flag "msse2" (NoArg (setGeneralFlag Opt_SSE2))
, Flag "msse4.2" (NoArg (setGeneralFlag Opt_SSE4_2))
+ -- at some point we should probably have a single SSE flag that
+ -- contains the SSE version, instead of having a different flag
+ -- per version. That would make it easier to e.g. check if SSE2 is
+ -- enabled as you wouldn't have to check if either Opt_SSE2 or
+ -- Opt_SSE4_2 is set (as the latter implies the former).
------ Warning opts -------------------------------------------------
, Flag "W" (NoArg (mapM_ setWarningFlag minusWOpts))
@@ -3371,3 +3380,21 @@ makeDynFlagsConsistent dflags
arch = platformArch platform
os = platformOS platform
+-- -----------------------------------------------------------------------------
+-- SSE
+
+isSse2Enabled :: DynFlags -> Bool
+isSse2Enabled dflags = isSse4_2Enabled dflags || isSse2Enabled'
+ where
+ isSse2Enabled' = case platformArch (targetPlatform dflags) of
+ ArchX86_64 -> -- SSE2 is fixed on for x86_64. It would be
+ -- possible to make it optional, but we'd need to
+ -- fix at least the foreign call code where the
+ -- calling convention specifies the use of xmm regs,
+ -- and possibly other places.
+ True
+ ArchX86 -> gopt Opt_SSE2 dflags
+ _ -> False
+
+isSse4_2Enabled :: DynFlags -> Bool
+isSse4_2Enabled dflags = gopt Opt_SSE4_2 dflags
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index 30cf060e74..d01470926b 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -71,20 +71,12 @@ is32BitPlatform = do
sse2Enabled :: NatM Bool
sse2Enabled = do
dflags <- getDynFlags
- case platformArch (targetPlatform dflags) of
- ArchX86_64 -> -- SSE2 is fixed on for x86_64. It would be
- -- possible to make it optional, but we'd need to
- -- fix at least the foreign call code where the
- -- calling convention specifies the use of xmm regs,
- -- and possibly other places.
- return True
- ArchX86 -> return (gopt Opt_SSE2 dflags || gopt Opt_SSE4_2 dflags)
- _ -> panic "sse2Enabled: Not an X86* arch"
+ return (isSse2Enabled dflags)
sse4_2Enabled :: NatM Bool
sse4_2Enabled = do
dflags <- getDynFlags
- return (gopt Opt_SSE4_2 dflags)
+ return (isSse4_2Enabled dflags)
if_sse2 :: NatM a -> NatM a -> NatM a
if_sse2 sse2 x87 = do