summaryrefslogtreecommitdiff
path: root/testsuite/tests/codeGen
diff options
context:
space:
mode:
authorAbhiroop Sarkar <asiamgenius@gmail.com>2018-09-27 15:28:46 -0400
committerMarge Bot <ben+marge-bot@smart-cactus.org>2019-07-03 09:33:39 -0400
commitacd795583625401c5554f8e04ec7efca18814011 (patch)
tree545e529eed21e78592ff326d4ebf9804095ad2cb /testsuite/tests/codeGen
parentdf3e5b744db29c085f5bc05f8b609197bcbf9b0c (diff)
downloadhaskell-acd795583625401c5554f8e04ec7efca18814011.tar.gz
Add support for SIMD operations in the NCG
This adds support for constructing vector types from Float#, Double# etc and performing arithmetic operations on them Cleaned-Up-By: Ben Gamari <ben@well-typed.com>
Diffstat (limited to 'testsuite/tests/codeGen')
-rw-r--r--testsuite/tests/codeGen/should_run/all.T10
-rw-r--r--testsuite/tests/codeGen/should_run/cgrun083.hs70
-rw-r--r--testsuite/tests/codeGen/should_run/cgrun083.stdout2
-rw-r--r--testsuite/tests/codeGen/should_run/simd000.hs21
-rw-r--r--testsuite/tests/codeGen/should_run/simd000.stdout4
-rw-r--r--testsuite/tests/codeGen/should_run/simd001.hs49
-rw-r--r--testsuite/tests/codeGen/should_run/simd001.stdout6
-rw-r--r--testsuite/tests/codeGen/should_run/simd002.hs33
-rw-r--r--testsuite/tests/codeGen/should_run/simd002.stdout10
-rw-r--r--testsuite/tests/codeGen/should_run/simd003.hs25
-rw-r--r--testsuite/tests/codeGen/should_run/simd003.stdout2
-rw-r--r--testsuite/tests/codeGen/should_run/simd004.hs95
-rw-r--r--testsuite/tests/codeGen/should_run/simd004.stdout20
-rw-r--r--testsuite/tests/codeGen/should_run/simd005.hs93
-rw-r--r--testsuite/tests/codeGen/should_run/simd005.stdout20
15 files changed, 459 insertions, 1 deletions
diff --git a/testsuite/tests/codeGen/should_run/all.T b/testsuite/tests/codeGen/should_run/all.T
index 20ac9cc59e..eb6fee544f 100644
--- a/testsuite/tests/codeGen/should_run/all.T
+++ b/testsuite/tests/codeGen/should_run/all.T
@@ -80,11 +80,20 @@ test('cgrun069',
test('cgrun070', normal, compile_and_run, [''])
test('cgrun071', [when(have_cpu_feature('sse4_2'), extra_hc_opts('-msse4.2'))], compile_and_run, [''])
test('cgrun072', normal, compile_and_run, [''])
+test('cgrun074', normal, compile_and_run, [''])
test('cgrun075', normal, compile_and_run, [''])
test('cgrun076', normal, compile_and_run, [''])
test('cgrun077', [when(have_cpu_feature('bmi2'), extra_hc_opts('-mbmi2'))], compile_and_run, [''])
test('cgrun078', omit_ways(['ghci']), compile_and_run, [''])
+# N.B. Only NCG and LLVM backends support SIMD operations
+test('simd000', when(unregisterised(), skip), compile_and_run, [''])
+test('simd001', when(unregisterised(), skip), compile_and_run, [''])
+test('simd002', when(unregisterised(), skip), compile_and_run, [''])
+test('simd003', when(unregisterised(), skip), compile_and_run, [''])
+test('simd004', when(unregisterised(), skip), compile_and_run, [''])
+test('simd005', when(unregisterised(), skip), compile_and_run, [''])
+
test('T1852', normal, compile_and_run, [''])
test('T1861', extra_run_opts('0'), compile_and_run, [''])
test('T2080', normal, compile_and_run, [''])
@@ -143,7 +152,6 @@ test('T9001', normal, compile_and_run, [''])
test('T9013', omit_ways(['ghci']), # ghci doesn't support unboxed tuples
compile_and_run, [''])
test('T9340', normal, compile_and_run, [''])
-test('cgrun074', normal, compile_and_run, [''])
test('CmmSwitchTest32', unless(wordsize(32), skip), compile_and_run, [''])
test('CmmSwitchTest64', unless(wordsize(64), skip), compile_and_run, [''])
# Skipping WAY=ghci, because it is not broken.
diff --git a/testsuite/tests/codeGen/should_run/cgrun083.hs b/testsuite/tests/codeGen/should_run/cgrun083.hs
new file mode 100644
index 0000000000..cac889ec02
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/cgrun083.hs
@@ -0,0 +1,70 @@
+{-# OPTIONS_GHC -O2 #-}
+{-# OPTIONS_GHC -msse #-}
+{-# OPTIONS_GHC -msse2 #-}
+{-# OPTIONS_GHC -msse4 #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+-- tests for SSE based vector load/stores operations
+
+import GHC.Exts
+import GHC.IO
+
+data ByteArray = BA (MutableByteArray# RealWorld)
+
+data FloatX4 = FX4# FloatX4#
+
+instance Show FloatX4 where
+ show (FX4# f) = case (unpackFloatX4# f) of
+ (# a, b, c, d #) -> show ((F# a), (F# b), (F# c), (F# d))
+
+
+instance Eq FloatX4 where
+ (FX4# a) == (FX4# b)
+ = case (unpackFloatX4# a) of
+ (# a1, a2, a3, a4 #) ->
+ case (unpackFloatX4# b) of
+ (# b1, b2, b3, b4 #) -> (F# a1) == (F# b1) &&
+ (F# a2) == (F# b2) &&
+ (F# a3) == (F# b3) &&
+ (F# a4) == (F# b4)
+
+data DoubleX2 = DX2# DoubleX2#
+
+instance Show DoubleX2 where
+ show (DX2# d) = case (unpackDoubleX2# d) of
+ (# a, b #) -> show ((D# a), (D# b))
+
+
+instance Eq DoubleX2 where
+ (DX2# a) == (DX2# b)
+ = case (unpackDoubleX2# a) of
+ (# a1, a2 #) ->
+ case (unpackDoubleX2# b) of
+ (# b1, b2 #) -> (D# a1) == (D# b1) &&
+ (D# a2) == (D# b2)
+
+writeFloatArray :: ByteArray -> Int -> Float -> IO ()
+writeFloatArray (BA ba) (I# i) (F# n) = IO $ \s ->
+ case writeFloatArray# ba i n s of s' -> (# s', () #)
+
+readFloatX4 :: ByteArray -> Int -> IO FloatX4
+readFloatX4 (BA ba) (I# i) = IO $ \s ->
+ case readFloatArrayAsFloatX4# ba i s of (# s', r #) -> (# s', FX4# r #)
+
+writeDoubleArray :: ByteArray -> Int -> Double -> IO ()
+writeDoubleArray (BA ba) (I# i) (D# n) = IO $ \s ->
+ case writeDoubleArray# ba i n s of s' -> (# s', () #)
+
+readDoubleX2 :: ByteArray -> Int -> IO DoubleX2
+readDoubleX2 (BA ba) (I# i) = IO $ \s ->
+ case readDoubleArrayAsDoubleX2# ba i s of (# s', r #) -> (# s', DX2# r #)
+
+main :: IO ()
+main = do
+ ba <- IO $ \s -> case newAlignedPinnedByteArray# 64# 64# s of (# s', ba #) -> (# s', BA ba #)
+
+ mapM_ (\i -> writeFloatArray ba i (realToFrac i + realToFrac i / 10)) [0..16]
+ print =<< readFloatX4 ba 0
+
+ mapM_ (\i -> writeDoubleArray ba i (realToFrac i + realToFrac i / 10)) [0..8]
+ print =<< readDoubleX2 ba 0
diff --git a/testsuite/tests/codeGen/should_run/cgrun083.stdout b/testsuite/tests/codeGen/should_run/cgrun083.stdout
new file mode 100644
index 0000000000..bc41b3d2d3
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/cgrun083.stdout
@@ -0,0 +1,2 @@
+(0.0,1.1,2.2,3.3)
+(0.0,1.1)
diff --git a/testsuite/tests/codeGen/should_run/simd000.hs b/testsuite/tests/codeGen/should_run/simd000.hs
new file mode 100644
index 0000000000..47d69497c0
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/simd000.hs
@@ -0,0 +1,21 @@
+{-# OPTIONS_GHC -mavx #-}
+{-# OPTIONS_GHC -msse4 #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+-- !!! test broadcasting, packing and unpacking for vector types
+
+import GHC.Exts
+
+main :: IO ()
+main = do
+ -- FloatX4#
+ case unpackFloatX4# (broadcastFloatX4# 1.5#) of
+ (# a, b, c, d #) -> print (F# a, F# b, F# c, F# d)
+ case unpackFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #)) of
+ (# a, b, c, d #) -> print (F# a, F# b, F# c, F# d)
+
+ -- DoubleX2#
+ case unpackDoubleX2# (broadcastDoubleX2# 6.5##) of
+ (# a, b #) -> print (D# a, D# b)
+ case unpackDoubleX2# (packDoubleX2# (# 8.9##,7.2## #)) of
+ (# a, b #) -> print (D# a, D# b)
diff --git a/testsuite/tests/codeGen/should_run/simd000.stdout b/testsuite/tests/codeGen/should_run/simd000.stdout
new file mode 100644
index 0000000000..e5f9d383ec
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/simd000.stdout
@@ -0,0 +1,4 @@
+(1.5,1.5,1.5,1.5)
+(4.5,7.8,2.3,6.5)
+(6.5,6.5)
+(8.9,7.2)
diff --git a/testsuite/tests/codeGen/should_run/simd001.hs b/testsuite/tests/codeGen/should_run/simd001.hs
new file mode 100644
index 0000000000..c45e3bf922
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/simd001.hs
@@ -0,0 +1,49 @@
+{-# OPTIONS_GHC -mavx #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+-- !!! test the lifting of unlifted vector types and
+-- defining various typeclass instances for the lifted types
+
+import GHC.Exts
+
+data FloatX4 = FX4# FloatX4#
+
+instance Show FloatX4 where
+ show (FX4# f) = case (unpackFloatX4# f) of
+ (# a, b, c, d #) -> show ((F# a), (F# b), (F# c), (F# d))
+
+
+instance Eq FloatX4 where
+ (FX4# a) == (FX4# b)
+ = case (unpackFloatX4# a) of
+ (# a1, a2, a3, a4 #) ->
+ case (unpackFloatX4# b) of
+ (# b1, b2, b3, b4 #) -> (F# a1) == (F# b1) &&
+ (F# a2) == (F# b2) &&
+ (F# a3) == (F# b3) &&
+ (F# a4) == (F# b4)
+
+data DoubleX2 = DX2# DoubleX2#
+
+instance Show DoubleX2 where
+ show (DX2# d) = case (unpackDoubleX2# d) of
+ (# a, b #) -> show ((D# a), (D# b))
+
+
+instance Eq DoubleX2 where
+ (DX2# a) == (DX2# b)
+ = case (unpackDoubleX2# a) of
+ (# a1, a2 #) ->
+ case (unpackDoubleX2# b) of
+ (# b1, b2 #) -> (D# a1) == (D# b1) &&
+ (D# a2) == (D# b2)
+
+main :: IO ()
+main = do
+ print (FX4# (broadcastFloatX4# 1.5#))
+ print $ (FX4# (broadcastFloatX4# 1.5#)) == (FX4# (broadcastFloatX4# 2.5#))
+ print $ (FX4# (broadcastFloatX4# 3.5#)) == (FX4# (broadcastFloatX4# 3.5#))
+
+ print (DX2# (broadcastDoubleX2# 2.5##))
+ print $ (DX2# (broadcastDoubleX2# 1.5##)) == (DX2# (broadcastDoubleX2# 2.5##))
+ print $ (DX2# (broadcastDoubleX2# 3.5##)) == (DX2# (broadcastDoubleX2# 3.5##))
diff --git a/testsuite/tests/codeGen/should_run/simd001.stdout b/testsuite/tests/codeGen/should_run/simd001.stdout
new file mode 100644
index 0000000000..899f900506
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/simd001.stdout
@@ -0,0 +1,6 @@
+(1.5,1.5,1.5,1.5)
+False
+True
+(2.5,2.5)
+False
+True
diff --git a/testsuite/tests/codeGen/should_run/simd002.hs b/testsuite/tests/codeGen/should_run/simd002.hs
new file mode 100644
index 0000000000..8c61546381
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/simd002.hs
@@ -0,0 +1,33 @@
+{-# OPTIONS_GHC -mavx #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+-- !!! test arithmetic vector operations
+
+import GHC.Exts
+
+data FloatX4 = FX4# FloatX4#
+
+instance Show FloatX4 where
+ show (FX4# f) = case (unpackFloatX4# f) of
+ (# a, b, c, d #) -> show ((F# a), (F# b), (F# c), (F# d))
+
+data DoubleX2 = DX2# DoubleX2#
+
+instance Show DoubleX2 where
+ show (DX2# d) = case (unpackDoubleX2# d) of
+ (# a, b #) -> show ((D# a), (D# b))
+
+
+main :: IO ()
+main = do
+ print (FX4# (plusFloatX4# (broadcastFloatX4# 1.3#) (broadcastFloatX4# 2.2#)))
+ print (FX4# (minusFloatX4# (broadcastFloatX4# 3.5#) (broadcastFloatX4# 2.2#)))
+ print (FX4# (timesFloatX4# (broadcastFloatX4# 2.4#) (broadcastFloatX4# 2.2#)))
+ print (FX4# (divideFloatX4# (broadcastFloatX4# 9.2#) (broadcastFloatX4# 4.0#)))
+ print (FX4# (negateFloatX4# (broadcastFloatX4# 3.5#)))
+
+ print (DX2# (plusDoubleX2# (broadcastDoubleX2# 1.3##) (broadcastDoubleX2# 2.2##)))
+ print (DX2# (minusDoubleX2# (broadcastDoubleX2# 3.5##) (broadcastDoubleX2# 2.2##)))
+ print (DX2# (timesDoubleX2# (broadcastDoubleX2# 2.4##) (broadcastDoubleX2# 2.2##)))
+ print (DX2# (divideDoubleX2# (broadcastDoubleX2# 9.2##) (broadcastDoubleX2# 4.0##)))
+ print (DX2# (negateDoubleX2# (broadcastDoubleX2# 3.5##)))
diff --git a/testsuite/tests/codeGen/should_run/simd002.stdout b/testsuite/tests/codeGen/should_run/simd002.stdout
new file mode 100644
index 0000000000..302d71a13f
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/simd002.stdout
@@ -0,0 +1,10 @@
+(3.5,3.5,3.5,3.5)
+(1.3,1.3,1.3,1.3)
+(5.28,5.28,5.28,5.28)
+(2.3,2.3,2.3,2.3)
+(-3.5,-3.5,-3.5,-3.5)
+(3.5,3.5)
+(1.2999999999999998,1.2999999999999998)
+(5.28,5.28)
+(2.3,2.3)
+(-3.5,-3.5)
diff --git a/testsuite/tests/codeGen/should_run/simd003.hs b/testsuite/tests/codeGen/should_run/simd003.hs
new file mode 100644
index 0000000000..de3ae5aeb4
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/simd003.hs
@@ -0,0 +1,25 @@
+{-# OPTIONS_GHC -msse4 #-}
+{-# OPTIONS_GHC -mavx #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+-- !!! test the packing of floats and doubles into a vector
+
+import GHC.Exts
+
+data FloatX4 = FX4# FloatX4#
+
+instance Show FloatX4 where
+ show (FX4# f) = case (unpackFloatX4# f) of
+ (# a, b, c, d #) -> show ((F# a), (F# b), (F# c), (F# d))
+
+data DoubleX2 = DX2# DoubleX2#
+
+instance Show DoubleX2 where
+ show (DX2# d) = case (unpackDoubleX2# d) of
+ (# a, b #) -> show ((D# a), (D# b))
+
+
+main :: IO ()
+main = do
+ print (FX4# (packFloatX4# (# 9.2#, 8.15#, 7.0#, 6.4# #)))
+ print (DX2# (packDoubleX2# (# 7.2##, 9.3## #)))
diff --git a/testsuite/tests/codeGen/should_run/simd003.stdout b/testsuite/tests/codeGen/should_run/simd003.stdout
new file mode 100644
index 0000000000..230e4658c4
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/simd003.stdout
@@ -0,0 +1,2 @@
+(9.2,8.15,7.0,6.4)
+(7.2,9.3)
diff --git a/testsuite/tests/codeGen/should_run/simd004.hs b/testsuite/tests/codeGen/should_run/simd004.hs
new file mode 100644
index 0000000000..5216822ec4
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/simd004.hs
@@ -0,0 +1,95 @@
+{-# OPTIONS_GHC -O2 #-}
+{-# OPTIONS_GHC -mavx #-}
+{-# OPTIONS_GHC -msse4 #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+-- !!! test if enabling -O2 produces wrong results while
+-- packing , broadcasting, unpacking vectors and for
+-- arithmetic operations as well for avx instructions
+
+import GHC.Exts
+
+data FloatX4 = FX4# FloatX4#
+
+instance Show FloatX4 where
+ show (FX4# f) = case (unpackFloatX4# f) of
+ (# a, b, c, d #) -> show ((F# a), (F# b), (F# c), (F# d))
+
+
+instance Eq FloatX4 where
+ (FX4# a) == (FX4# b)
+ = case (unpackFloatX4# a) of
+ (# a1, a2, a3, a4 #) ->
+ case (unpackFloatX4# b) of
+ (# b1, b2, b3, b4 #) -> (F# a1) == (F# b1) &&
+ (F# a2) == (F# b2) &&
+ (F# a3) == (F# b3) &&
+ (F# a4) == (F# b4)
+
+data DoubleX2 = DX2# DoubleX2#
+
+instance Show DoubleX2 where
+ show (DX2# d) = case (unpackDoubleX2# d) of
+ (# a, b #) -> show ((D# a), (D# b))
+
+
+instance Eq DoubleX2 where
+ (DX2# a) == (DX2# b)
+ = case (unpackDoubleX2# a) of
+ (# a1, a2 #) ->
+ case (unpackDoubleX2# b) of
+ (# b1, b2 #) -> (D# a1) == (D# b1) &&
+ (D# a2) == (D# b2)
+
+
+main :: IO ()
+main = do
+
+ -- !!! test broadcasting, packing and unpacking for vector types
+ -- FloatX4#
+ case unpackFloatX4# (broadcastFloatX4# 1.5#) of
+ (# a, b, c, d #) -> print (F# a, F# b, F# c, F# d)
+ case unpackFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #)) of
+ (# a, b, c, d #) -> print (F# a, F# b, F# c, F# d)
+
+ -- DoubleX2#
+ case unpackDoubleX2# (broadcastDoubleX2# 6.5##) of
+ (# a, b #) -> print (D# a, D# b)
+ case unpackDoubleX2# (packDoubleX2# (# 8.9##,7.2## #)) of
+ (# a, b #) -> print (D# a, D# b)
+
+
+ -- !!! test the lifting of unlifted vector types and
+ -- defining various typeclass instances for the lifted types
+
+ print (FX4# (broadcastFloatX4# 1.5#))
+ print $ (FX4# (broadcastFloatX4# 1.5#)) == (FX4# (broadcastFloatX4# 2.5#))
+ print $ (FX4# (broadcastFloatX4# 3.5#)) == (FX4# (broadcastFloatX4# 3.5#))
+
+ print (DX2# (broadcastDoubleX2# 2.5##))
+ print $ (DX2#
+ (broadcastDoubleX2# 1.5##)) == (DX2# (broadcastDoubleX2# 2.5##))
+ print $ (DX2#
+ (broadcastDoubleX2# 3.5##)) == (DX2# (broadcastDoubleX2# 3.5##))
+
+
+ -- !!! test arithmetic vector operations
+ print (FX4# (plusFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #))
+ (packFloatX4# (# 8.2#,6.3#, 4.7#, 9.2# #))))
+ print (FX4# (minusFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #))
+ (packFloatX4# (# 8.2#,6.3#, 4.7#, 9.2# #))))
+ print (FX4# (timesFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #))
+ (packFloatX4# (# 8.2#,6.3#, 4.7#, 9.2# #))))
+ print (FX4# (divideFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #))
+ (packFloatX4# (# 8.2#,6.3#, 4.7#, 9.2# #))))
+ print (FX4# (negateFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #))))
+
+ print (DX2# (plusDoubleX2# (broadcastDoubleX2# 1.3##)
+ (broadcastDoubleX2# 2.2##)))
+ print (DX2# (minusDoubleX2# (broadcastDoubleX2# 3.5##)
+ (broadcastDoubleX2# 2.2##)))
+ print (DX2# (timesDoubleX2# (broadcastDoubleX2# 2.4##)
+ (broadcastDoubleX2# 2.2##)))
+ print (DX2# (divideDoubleX2# (broadcastDoubleX2# 9.2##)
+ (broadcastDoubleX2# 4.0##)))
+ print (DX2# (negateDoubleX2# (broadcastDoubleX2# 3.5##)))
diff --git a/testsuite/tests/codeGen/should_run/simd004.stdout b/testsuite/tests/codeGen/should_run/simd004.stdout
new file mode 100644
index 0000000000..ee90e738ca
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/simd004.stdout
@@ -0,0 +1,20 @@
+(1.5,1.5,1.5,1.5)
+(4.5,7.8,2.3,6.5)
+(6.5,6.5)
+(8.9,7.2)
+(1.5,1.5,1.5,1.5)
+False
+True
+(2.5,2.5)
+False
+True
+(12.7,14.1,7.0,15.7)
+(-3.6999998,1.5,-2.3999999,-2.6999998)
+(36.899998,49.140003,10.809999,59.8)
+(0.5487805,1.2380953,0.4893617,0.70652175)
+(-4.5,-7.8,-2.3,-6.5)
+(3.5,3.5)
+(1.2999999999999998,1.2999999999999998)
+(5.28,5.28)
+(2.3,2.3)
+(-3.5,-3.5) \ No newline at end of file
diff --git a/testsuite/tests/codeGen/should_run/simd005.hs b/testsuite/tests/codeGen/should_run/simd005.hs
new file mode 100644
index 0000000000..b074066d24
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/simd005.hs
@@ -0,0 +1,93 @@
+{-# OPTIONS_GHC -O2 #-}
+{-# OPTIONS_GHC -msse #-}
+{-# OPTIONS_GHC -msse2 #-}
+{-# OPTIONS_GHC -msse4 #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+-- tests for SSE based vector operations
+
+import GHC.Exts
+
+data FloatX4 = FX4# FloatX4#
+
+instance Show FloatX4 where
+ show (FX4# f) = case (unpackFloatX4# f) of
+ (# a, b, c, d #) -> show ((F# a), (F# b), (F# c), (F# d))
+
+
+instance Eq FloatX4 where
+ (FX4# a) == (FX4# b)
+ = case (unpackFloatX4# a) of
+ (# a1, a2, a3, a4 #) ->
+ case (unpackFloatX4# b) of
+ (# b1, b2, b3, b4 #) -> (F# a1) == (F# b1) &&
+ (F# a2) == (F# b2) &&
+ (F# a3) == (F# b3) &&
+ (F# a4) == (F# b4)
+
+data DoubleX2 = DX2# DoubleX2#
+
+instance Show DoubleX2 where
+ show (DX2# d) = case (unpackDoubleX2# d) of
+ (# a, b #) -> show ((D# a), (D# b))
+
+
+instance Eq DoubleX2 where
+ (DX2# a) == (DX2# b)
+ = case (unpackDoubleX2# a) of
+ (# a1, a2 #) ->
+ case (unpackDoubleX2# b) of
+ (# b1, b2 #) -> (D# a1) == (D# b1) &&
+ (D# a2) == (D# b2)
+
+main :: IO ()
+main = do
+
+ -- !!! test broadcasting, packing and unpacking for vector types
+ -- FloatX4#
+ case unpackFloatX4# (broadcastFloatX4# 1.5#) of
+ (# a, b, c, d #) -> print (F# a, F# b, F# c, F# d)
+ case unpackFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #)) of
+ (# a, b, c, d #) -> print (F# a, F# b, F# c, F# d)
+
+ -- DoubleX2#
+ case unpackDoubleX2# (broadcastDoubleX2# 6.5##) of
+ (# a, b #) -> print (D# a, D# b)
+ case unpackDoubleX2# (packDoubleX2# (# 8.9##,7.2## #)) of
+ (# a, b #) -> print (D# a, D# b)
+
+
+ -- !!! test the lifting of unlifted vector types and
+ -- defining various typeclass instances for the lifted types
+
+ print (FX4# (broadcastFloatX4# 1.5#))
+ print $ (FX4# (broadcastFloatX4# 1.5#)) == (FX4# (broadcastFloatX4# 2.5#))
+ print $ (FX4# (broadcastFloatX4# 3.5#)) == (FX4# (broadcastFloatX4# 3.5#))
+
+ print (DX2# (broadcastDoubleX2# 2.5##))
+ print $ (DX2#
+ (broadcastDoubleX2# 1.5##)) == (DX2# (broadcastDoubleX2# 2.5##))
+ print $ (DX2#
+ (broadcastDoubleX2# 3.5##)) == (DX2# (broadcastDoubleX2# 3.5##))
+
+
+ -- !!! test arithmetic vector operations
+ print (FX4# (plusFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #))
+ (packFloatX4# (# 8.2#,6.3#, 4.7#, 9.2# #))))
+ print (FX4# (minusFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #))
+ (packFloatX4# (# 8.2#,6.3#, 4.7#, 9.2# #))))
+ print (FX4# (timesFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #))
+ (packFloatX4# (# 8.2#,6.3#, 4.7#, 9.2# #))))
+ print (FX4# (divideFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #))
+ (packFloatX4# (# 8.2#,6.3#, 4.7#, 9.2# #))))
+ print (FX4# (negateFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #))))
+
+ print (DX2# (plusDoubleX2# (broadcastDoubleX2# 1.3##)
+ (broadcastDoubleX2# 2.2##)))
+ print (DX2# (minusDoubleX2# (broadcastDoubleX2# 3.5##)
+ (broadcastDoubleX2# 2.2##)))
+ print (DX2# (timesDoubleX2# (broadcastDoubleX2# 2.4##)
+ (broadcastDoubleX2# 2.2##)))
+ print (DX2# (divideDoubleX2# (broadcastDoubleX2# 9.2##)
+ (broadcastDoubleX2# 4.0##)))
+ print (DX2# (negateDoubleX2# (broadcastDoubleX2# 3.5##)))
diff --git a/testsuite/tests/codeGen/should_run/simd005.stdout b/testsuite/tests/codeGen/should_run/simd005.stdout
new file mode 100644
index 0000000000..84386823f8
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/simd005.stdout
@@ -0,0 +1,20 @@
+(1.5,1.5,1.5,1.5)
+(4.5,7.8,2.3,6.5)
+(6.5,6.5)
+(8.9,7.2)
+(1.5,1.5,1.5,1.5)
+False
+True
+(2.5,2.5)
+False
+True
+(12.7,14.1,7.0,15.7)
+(-3.6999998,1.5,-2.3999999,-2.6999998)
+(36.899998,49.140003,10.809999,59.8)
+(0.5487805,1.2380953,0.4893617,0.70652175)
+(-4.5,-7.8,-2.3,-6.5)
+(3.5,3.5)
+(1.2999999999999998,1.2999999999999998)
+(5.28,5.28)
+(2.3,2.3)
+(-3.5,-3.5)