Add support for SIMD operations in the NCG

This adds support for constructing vector types from Float#, Double# etc and performing arithmetic operations on them Cleaned-Up-By: Ben Gamari <ben@well-typed.com>
author: Abhiroop Sarkar <asiamgenius@gmail.com> 2018-09-27 15:28:46 -0400
committer: Marge Bot <ben+marge-bot@smart-cactus.org> 2019-07-03 09:33:39 -0400
commit: acd795583625401c5554f8e04ec7efca18814011 (patch)
tree: 545e529eed21e78592ff326d4ebf9804095ad2cb /testsuite/tests/codeGen
parent: df3e5b744db29c085f5bc05f8b609197bcbf9b0c (diff)
download: haskell-acd795583625401c5554f8e04ec7efca18814011.tar.gz
15 files changed, 459 insertions, 1 deletions
diff --git a/testsuite/tests/codeGen/should_run/all.T b/testsuite/tests/codeGen/should_run/all.T
index 20ac9cc59e..eb6fee544f 100644
--- a/testsuite/tests/codeGen/should_run/all.T
+++ b/testsuite/tests/codeGen/should_run/all.T
@@ -80,11 +80,20 @@ test('cgrun069',
 test('cgrun070', normal, compile_and_run, [''])
 test('cgrun071', [when(have_cpu_feature('sse4_2'), extra_hc_opts('-msse4.2'))], compile_and_run, [''])
 test('cgrun072', normal, compile_and_run, [''])
+test('cgrun074', normal, compile_and_run, [''])
 test('cgrun075', normal, compile_and_run, [''])
 test('cgrun076', normal, compile_and_run, [''])
 test('cgrun077', [when(have_cpu_feature('bmi2'), extra_hc_opts('-mbmi2'))], compile_and_run, [''])
 test('cgrun078', omit_ways(['ghci']), compile_and_run, [''])
 
+# N.B. Only NCG and LLVM backends support SIMD operations
+test('simd000', when(unregisterised(), skip), compile_and_run, [''])
+test('simd001', when(unregisterised(), skip), compile_and_run, [''])
+test('simd002', when(unregisterised(), skip), compile_and_run, [''])
+test('simd003', when(unregisterised(), skip), compile_and_run, [''])
+test('simd004', when(unregisterised(), skip), compile_and_run, [''])
+test('simd005', when(unregisterised(), skip), compile_and_run, [''])
+
 test('T1852', normal, compile_and_run, [''])
 test('T1861', extra_run_opts('0'), compile_and_run, [''])
 test('T2080', normal, compile_and_run, [''])
@@ -143,7 +152,6 @@ test('T9001', normal, compile_and_run, [''])
 test('T9013', omit_ways(['ghci']),  # ghci doesn't support unboxed tuples
      compile_and_run, [''])
 test('T9340', normal, compile_and_run, [''])
-test('cgrun074', normal, compile_and_run, [''])
 test('CmmSwitchTest32', unless(wordsize(32), skip), compile_and_run, [''])
 test('CmmSwitchTest64', unless(wordsize(64), skip), compile_and_run, [''])
 # Skipping WAY=ghci, because it is not broken.
diff --git a/testsuite/tests/codeGen/should_run/cgrun083.hs b/testsuite/tests/codeGen/should_run/cgrun083.hs
new file mode 100644
index 0000000000..cac889ec02
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/cgrun083.hs
@@ -0,0 +1,70 @@
+{-# OPTIONS_GHC -O2 #-}
+{-# OPTIONS_GHC -msse #-}
+{-# OPTIONS_GHC -msse2 #-}
+{-# OPTIONS_GHC -msse4 #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+-- tests for SSE based vector load/stores operations
+
+import GHC.Exts
+import GHC.IO
+
+data ByteArray = BA (MutableByteArray# RealWorld)
+
+data FloatX4  = FX4# FloatX4#
+
+instance Show FloatX4 where
+  show (FX4# f) = case (unpackFloatX4# f) of
+    (# a, b, c, d #) -> show ((F# a), (F# b), (F# c), (F# d))
+
+
+instance Eq FloatX4 where
+  (FX4# a) == (FX4# b)
+    = case (unpackFloatX4# a) of
+        (# a1, a2, a3, a4 #) ->
+          case (unpackFloatX4# b) of
+            (# b1, b2, b3, b4 #) -> (F# a1) == (F# b1) &&
+                                    (F# a2) == (F# b2) &&
+                                    (F# a3) == (F# b3) &&
+                                    (F# a4) == (F# b4)
+
+data DoubleX2 = DX2# DoubleX2#
+
+instance Show DoubleX2 where
+  show (DX2# d) = case (unpackDoubleX2# d) of
+    (# a, b #) -> show ((D# a), (D# b))
+
+
+instance Eq DoubleX2 where
+  (DX2# a) == (DX2# b)
+    = case (unpackDoubleX2# a) of
+        (# a1, a2 #) ->
+          case (unpackDoubleX2# b) of
+            (# b1, b2 #) -> (D# a1) == (D# b1) &&
+                            (D# a2) == (D# b2)
+
+writeFloatArray :: ByteArray -> Int -> Float -> IO ()
+writeFloatArray (BA ba) (I# i) (F# n) = IO $ \s ->
+    case writeFloatArray# ba i n s of s' -> (# s', () #)
+
+readFloatX4 :: ByteArray -> Int -> IO FloatX4
+readFloatX4 (BA ba) (I# i) = IO $ \s ->
+    case readFloatArrayAsFloatX4# ba i s of (# s', r #) -> (# s', FX4# r #)
+
+writeDoubleArray :: ByteArray -> Int -> Double -> IO ()
+writeDoubleArray (BA ba) (I# i) (D# n) = IO $ \s ->
+    case writeDoubleArray# ba i n s of s' -> (# s', () #)
+
+readDoubleX2 :: ByteArray -> Int -> IO DoubleX2
+readDoubleX2 (BA ba) (I# i) = IO $ \s ->
+    case readDoubleArrayAsDoubleX2# ba i s of (# s', r #) -> (# s', DX2# r #)
+
+main :: IO ()
+main = do
+    ba <- IO $ \s -> case newAlignedPinnedByteArray# 64# 64# s of (# s', ba #) -> (# s', BA ba #)
+
+    mapM_ (\i -> writeFloatArray ba i (realToFrac i + realToFrac i / 10)) [0..16]
+    print =<< readFloatX4 ba 0
+
+    mapM_ (\i -> writeDoubleArray ba i (realToFrac i + realToFrac i / 10)) [0..8]
+    print =<< readDoubleX2 ba 0
diff --git a/testsuite/tests/codeGen/should_run/cgrun083.stdout b/testsuite/tests/codeGen/should_run/cgrun083.stdout
new file mode 100644
index 0000000000..bc41b3d2d3
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/cgrun083.stdout
@@ -0,0 +1,2 @@
+(0.0,1.1,2.2,3.3)
+(0.0,1.1)
diff --git a/testsuite/tests/codeGen/should_run/simd000.hs b/testsuite/tests/codeGen/should_run/simd000.hs
new file mode 100644
index 0000000000..47d69497c0
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/simd000.hs
@@ -0,0 +1,21 @@
+{-# OPTIONS_GHC -mavx #-}
+{-# OPTIONS_GHC -msse4 #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+-- !!! test broadcasting, packing and unpacking for vector types
+
+import GHC.Exts
+
+main :: IO ()
+main = do
+    -- FloatX4#
+    case unpackFloatX4# (broadcastFloatX4# 1.5#) of
+        (# a, b, c, d #) -> print (F# a, F# b, F# c, F# d)
+    case unpackFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #)) of
+        (# a, b, c, d #) -> print (F# a, F# b, F# c, F# d)
+
+    -- DoubleX2#
+    case unpackDoubleX2# (broadcastDoubleX2# 6.5##) of
+        (# a, b #) -> print (D# a, D# b)
+    case unpackDoubleX2# (packDoubleX2# (# 8.9##,7.2## #)) of
+        (# a, b #) -> print (D# a, D# b)
diff --git a/testsuite/tests/codeGen/should_run/simd000.stdout b/testsuite/tests/codeGen/should_run/simd000.stdout
new file mode 100644
index 0000000000..e5f9d383ec
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/simd000.stdout
@@ -0,0 +1,4 @@
+(1.5,1.5,1.5,1.5)
+(4.5,7.8,2.3,6.5)
+(6.5,6.5)
+(8.9,7.2)
diff --git a/testsuite/tests/codeGen/should_run/simd001.hs b/testsuite/tests/codeGen/should_run/simd001.hs
new file mode 100644
index 0000000000..c45e3bf922
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/simd001.hs
@@ -0,0 +1,49 @@
+{-# OPTIONS_GHC -mavx #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+-- !!! test the lifting of unlifted vector types and
+-- defining various typeclass instances for the lifted types
+
+import GHC.Exts
+
+data FloatX4  = FX4# FloatX4#
+
+instance Show FloatX4 where
+  show (FX4# f) = case (unpackFloatX4# f) of
+    (# a, b, c, d #) -> show ((F# a), (F# b), (F# c), (F# d))
+
+
+instance Eq FloatX4 where
+  (FX4# a) == (FX4# b)
+    = case (unpackFloatX4# a) of
+        (# a1, a2, a3, a4 #) ->
+          case (unpackFloatX4# b) of
+            (# b1, b2, b3, b4 #) -> (F# a1) == (F# b1) &&
+                                    (F# a2) == (F# b2) &&
+                                    (F# a3) == (F# b3) &&
+                                    (F# a4) == (F# b4)
+
+data DoubleX2 = DX2# DoubleX2#
+
+instance Show DoubleX2 where
+  show (DX2# d) = case (unpackDoubleX2# d) of
+    (# a, b #) -> show ((D# a), (D# b))
+
+
+instance Eq DoubleX2 where
+  (DX2# a) == (DX2# b)
+    = case (unpackDoubleX2# a) of
+        (# a1, a2 #) ->
+          case (unpackDoubleX2# b) of
+            (# b1, b2 #) -> (D# a1) == (D# b1) &&
+                            (D# a2) == (D# b2)
+
+main :: IO ()
+main = do
+  print (FX4# (broadcastFloatX4# 1.5#))
+  print $ (FX4# (broadcastFloatX4# 1.5#)) == (FX4# (broadcastFloatX4# 2.5#))
+  print $ (FX4# (broadcastFloatX4# 3.5#)) == (FX4# (broadcastFloatX4# 3.5#))
+
+  print (DX2# (broadcastDoubleX2# 2.5##))
+  print $ (DX2# (broadcastDoubleX2# 1.5##)) == (DX2# (broadcastDoubleX2# 2.5##))
+  print $ (DX2# (broadcastDoubleX2# 3.5##)) == (DX2# (broadcastDoubleX2# 3.5##))
diff --git a/testsuite/tests/codeGen/should_run/simd001.stdout b/testsuite/tests/codeGen/should_run/simd001.stdout
new file mode 100644
index 0000000000..899f900506
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/simd001.stdout
@@ -0,0 +1,6 @@
+(1.5,1.5,1.5,1.5)
+False
+True
+(2.5,2.5)
+False
+True
diff --git a/testsuite/tests/codeGen/should_run/simd002.hs b/testsuite/tests/codeGen/should_run/simd002.hs
new file mode 100644
index 0000000000..8c61546381
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/simd002.hs
@@ -0,0 +1,33 @@
+{-# OPTIONS_GHC -mavx #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+-- !!! test arithmetic vector operations
+
+import GHC.Exts
+
+data FloatX4 = FX4# FloatX4#
+
+instance Show FloatX4 where
+  show (FX4# f) = case (unpackFloatX4# f) of
+    (# a, b, c, d #) -> show ((F# a), (F# b), (F# c), (F# d))
+
+data DoubleX2 = DX2# DoubleX2#
+
+instance Show DoubleX2 where
+  show (DX2# d) = case (unpackDoubleX2# d) of
+    (# a, b #) -> show ((D# a), (D# b))
+
+
+main :: IO ()
+main = do
+  print (FX4# (plusFloatX4# (broadcastFloatX4# 1.3#) (broadcastFloatX4# 2.2#)))
+  print (FX4# (minusFloatX4# (broadcastFloatX4# 3.5#) (broadcastFloatX4# 2.2#)))
+  print (FX4# (timesFloatX4# (broadcastFloatX4# 2.4#) (broadcastFloatX4# 2.2#)))
+  print (FX4# (divideFloatX4# (broadcastFloatX4# 9.2#) (broadcastFloatX4# 4.0#)))
+  print (FX4# (negateFloatX4# (broadcastFloatX4# 3.5#)))
+
+  print (DX2# (plusDoubleX2# (broadcastDoubleX2# 1.3##) (broadcastDoubleX2# 2.2##)))
+  print (DX2# (minusDoubleX2# (broadcastDoubleX2# 3.5##) (broadcastDoubleX2# 2.2##)))
+  print (DX2# (timesDoubleX2# (broadcastDoubleX2# 2.4##) (broadcastDoubleX2# 2.2##)))
+  print (DX2# (divideDoubleX2# (broadcastDoubleX2# 9.2##) (broadcastDoubleX2# 4.0##)))
+  print (DX2# (negateDoubleX2# (broadcastDoubleX2# 3.5##)))
diff --git a/testsuite/tests/codeGen/should_run/simd002.stdout b/testsuite/tests/codeGen/should_run/simd002.stdout
new file mode 100644
index 0000000000..302d71a13f
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/simd002.stdout
@@ -0,0 +1,10 @@
+(3.5,3.5,3.5,3.5)
+(1.3,1.3,1.3,1.3)
+(5.28,5.28,5.28,5.28)
+(2.3,2.3,2.3,2.3)
+(-3.5,-3.5,-3.5,-3.5)
+(3.5,3.5)
+(1.2999999999999998,1.2999999999999998)
+(5.28,5.28)
+(2.3,2.3)
+(-3.5,-3.5)
diff --git a/testsuite/tests/codeGen/should_run/simd003.hs b/testsuite/tests/codeGen/should_run/simd003.hs
new file mode 100644
index 0000000000..de3ae5aeb4
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/simd003.hs
@@ -0,0 +1,25 @@
+{-# OPTIONS_GHC -msse4 #-}
+{-# OPTIONS_GHC -mavx #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+-- !!! test the packing of floats and doubles into a vector
+
+import GHC.Exts
+
+data FloatX4 = FX4# FloatX4#
+
+instance Show FloatX4 where
+  show (FX4# f) = case (unpackFloatX4# f) of
+    (# a, b, c, d #) -> show ((F# a), (F# b), (F# c), (F# d))
+
+data DoubleX2 = DX2# DoubleX2#
+
+instance Show DoubleX2 where
+  show (DX2# d) = case (unpackDoubleX2# d) of
+    (# a, b #) -> show ((D# a), (D# b))
+
+
+main :: IO ()
+main = do
+  print (FX4# (packFloatX4# (# 9.2#, 8.15#, 7.0#, 6.4# #)))
+  print (DX2# (packDoubleX2# (# 7.2##, 9.3## #)))
diff --git a/testsuite/tests/codeGen/should_run/simd003.stdout b/testsuite/tests/codeGen/should_run/simd003.stdout
new file mode 100644
index 0000000000..230e4658c4
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/simd003.stdout
@@ -0,0 +1,2 @@
+(9.2,8.15,7.0,6.4)
+(7.2,9.3)
diff --git a/testsuite/tests/codeGen/should_run/simd004.hs b/testsuite/tests/codeGen/should_run/simd004.hs
new file mode 100644
index 0000000000..5216822ec4
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/simd004.hs
@@ -0,0 +1,95 @@
+{-# OPTIONS_GHC -O2 #-}
+{-# OPTIONS_GHC -mavx #-}
+{-# OPTIONS_GHC -msse4 #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+-- !!! test if enabling -O2 produces wrong results while
+--     packing , broadcasting, unpacking vectors and for
+--     arithmetic operations as well for avx instructions
+
+import GHC.Exts
+
+data FloatX4  = FX4# FloatX4#
+
+instance Show FloatX4 where
+  show (FX4# f) = case (unpackFloatX4# f) of
+    (# a, b, c, d #) -> show ((F# a), (F# b), (F# c), (F# d))
+
+
+instance Eq FloatX4 where
+  (FX4# a) == (FX4# b)
+    = case (unpackFloatX4# a) of
+        (# a1, a2, a3, a4 #) ->
+          case (unpackFloatX4# b) of
+            (# b1, b2, b3, b4 #) -> (F# a1) == (F# b1) &&
+                                    (F# a2) == (F# b2) &&
+                                    (F# a3) == (F# b3) &&
+                                    (F# a4) == (F# b4)
+
+data DoubleX2 = DX2# DoubleX2#
+
+instance Show DoubleX2 where
+  show (DX2# d) = case (unpackDoubleX2# d) of
+    (# a, b #) -> show ((D# a), (D# b))
+
+
+instance Eq DoubleX2 where
+  (DX2# a) == (DX2# b)
+    = case (unpackDoubleX2# a) of
+        (# a1, a2 #) ->
+          case (unpackDoubleX2# b) of
+            (# b1, b2 #) -> (D# a1) == (D# b1) &&
+                            (D# a2) == (D# b2)
+
+
+main :: IO ()
+main = do
+
+    -- !!! test broadcasting, packing and unpacking for vector types
+    -- FloatX4#
+    case unpackFloatX4# (broadcastFloatX4# 1.5#) of
+        (# a, b, c, d #) -> print (F# a, F# b, F# c, F# d)
+    case unpackFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #)) of
+        (# a, b, c, d #) -> print (F# a, F# b, F# c, F# d)
+
+    -- DoubleX2#
+    case unpackDoubleX2# (broadcastDoubleX2# 6.5##) of
+        (# a, b #) -> print (D# a, D# b)
+    case unpackDoubleX2# (packDoubleX2# (# 8.9##,7.2## #)) of
+        (# a, b #) -> print (D# a, D# b)
+
+
+    -- !!! test the lifting of unlifted vector types and
+    -- defining various typeclass instances for the lifted types
+
+    print (FX4# (broadcastFloatX4# 1.5#))
+    print $ (FX4# (broadcastFloatX4# 1.5#)) == (FX4# (broadcastFloatX4# 2.5#))
+    print $ (FX4# (broadcastFloatX4# 3.5#)) == (FX4# (broadcastFloatX4# 3.5#))
+
+    print (DX2# (broadcastDoubleX2# 2.5##))
+    print $ (DX2#
+             (broadcastDoubleX2# 1.5##)) == (DX2# (broadcastDoubleX2# 2.5##))
+    print $ (DX2#
+             (broadcastDoubleX2# 3.5##)) == (DX2# (broadcastDoubleX2# 3.5##))
+
+
+    -- !!! test arithmetic vector operations
+    print (FX4# (plusFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #))
+                              (packFloatX4# (# 8.2#,6.3#, 4.7#, 9.2# #))))
+    print (FX4# (minusFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #))
+                               (packFloatX4# (# 8.2#,6.3#, 4.7#, 9.2# #))))
+    print (FX4# (timesFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #))
+                               (packFloatX4# (# 8.2#,6.3#, 4.7#, 9.2# #))))
+    print (FX4# (divideFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #))
+                                (packFloatX4# (# 8.2#,6.3#, 4.7#, 9.2# #))))
+    print (FX4# (negateFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #))))
+
+    print (DX2# (plusDoubleX2# (broadcastDoubleX2# 1.3##)
+                               (broadcastDoubleX2# 2.2##)))
+    print (DX2# (minusDoubleX2# (broadcastDoubleX2# 3.5##)
+                                (broadcastDoubleX2# 2.2##)))
+    print (DX2# (timesDoubleX2# (broadcastDoubleX2# 2.4##)
+                                (broadcastDoubleX2# 2.2##)))
+    print (DX2# (divideDoubleX2# (broadcastDoubleX2# 9.2##)
+                                 (broadcastDoubleX2# 4.0##)))
+    print (DX2# (negateDoubleX2# (broadcastDoubleX2# 3.5##)))
diff --git a/testsuite/tests/codeGen/should_run/simd004.stdout b/testsuite/tests/codeGen/should_run/simd004.stdout
new file mode 100644
index 0000000000..ee90e738ca
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/simd004.stdout
@@ -0,0 +1,20 @@
+(1.5,1.5,1.5,1.5)
+(4.5,7.8,2.3,6.5)
+(6.5,6.5)
+(8.9,7.2)
+(1.5,1.5,1.5,1.5)
+False
+True
+(2.5,2.5)
+False
+True
+(12.7,14.1,7.0,15.7)
+(-3.6999998,1.5,-2.3999999,-2.6999998)
+(36.899998,49.140003,10.809999,59.8)
+(0.5487805,1.2380953,0.4893617,0.70652175)
+(-4.5,-7.8,-2.3,-6.5)
+(3.5,3.5)
+(1.2999999999999998,1.2999999999999998)
+(5.28,5.28)
+(2.3,2.3)
+(-3.5,-3.5)
+\ No newline at end of file
diff --git a/testsuite/tests/codeGen/should_run/simd005.hs b/testsuite/tests/codeGen/should_run/simd005.hs
new file mode 100644
index 0000000000..b074066d24
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/simd005.hs
@@ -0,0 +1,93 @@
+{-# OPTIONS_GHC -O2 #-}
+{-# OPTIONS_GHC -msse #-}
+{-# OPTIONS_GHC -msse2 #-}
+{-# OPTIONS_GHC -msse4 #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+-- tests for SSE based vector operations
+
+import GHC.Exts
+
+data FloatX4  = FX4# FloatX4#
+
+instance Show FloatX4 where
+  show (FX4# f) = case (unpackFloatX4# f) of
+    (# a, b, c, d #) -> show ((F# a), (F# b), (F# c), (F# d))
+
+
+instance Eq FloatX4 where
+  (FX4# a) == (FX4# b)
+    = case (unpackFloatX4# a) of
+        (# a1, a2, a3, a4 #) ->
+          case (unpackFloatX4# b) of
+            (# b1, b2, b3, b4 #) -> (F# a1) == (F# b1) &&
+                                    (F# a2) == (F# b2) &&
+                                    (F# a3) == (F# b3) &&
+                                    (F# a4) == (F# b4)
+
+data DoubleX2 = DX2# DoubleX2#
+
+instance Show DoubleX2 where
+  show (DX2# d) = case (unpackDoubleX2# d) of
+    (# a, b #) -> show ((D# a), (D# b))
+
+
+instance Eq DoubleX2 where
+  (DX2# a) == (DX2# b)
+    = case (unpackDoubleX2# a) of
+        (# a1, a2 #) ->
+          case (unpackDoubleX2# b) of
+            (# b1, b2 #) -> (D# a1) == (D# b1) &&
+                            (D# a2) == (D# b2)
+
+main :: IO ()
+main = do
+
+    -- !!! test broadcasting, packing and unpacking for vector types
+    -- FloatX4#
+    case unpackFloatX4# (broadcastFloatX4# 1.5#) of
+        (# a, b, c, d #) -> print (F# a, F# b, F# c, F# d)
+    case unpackFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #)) of
+        (# a, b, c, d #) -> print (F# a, F# b, F# c, F# d)
+
+    -- DoubleX2#
+    case unpackDoubleX2# (broadcastDoubleX2# 6.5##) of
+        (# a, b #) -> print (D# a, D# b)
+    case unpackDoubleX2# (packDoubleX2# (# 8.9##,7.2## #)) of
+        (# a, b #) -> print (D# a, D# b)
+
+
+    -- !!! test the lifting of unlifted vector types and
+    -- defining various typeclass instances for the lifted types
+
+    print (FX4# (broadcastFloatX4# 1.5#))
+    print $ (FX4# (broadcastFloatX4# 1.5#)) == (FX4# (broadcastFloatX4# 2.5#))
+    print $ (FX4# (broadcastFloatX4# 3.5#)) == (FX4# (broadcastFloatX4# 3.5#))
+
+    print (DX2# (broadcastDoubleX2# 2.5##))
+    print $ (DX2#
+             (broadcastDoubleX2# 1.5##)) == (DX2# (broadcastDoubleX2# 2.5##))
+    print $ (DX2#
+             (broadcastDoubleX2# 3.5##)) == (DX2# (broadcastDoubleX2# 3.5##))
+
+
+    -- !!! test arithmetic vector operations
+    print (FX4# (plusFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #))
+                              (packFloatX4# (# 8.2#,6.3#, 4.7#, 9.2# #))))
+    print (FX4# (minusFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #))
+                               (packFloatX4# (# 8.2#,6.3#, 4.7#, 9.2# #))))
+    print (FX4# (timesFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #))
+                               (packFloatX4# (# 8.2#,6.3#, 4.7#, 9.2# #))))
+    print (FX4# (divideFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #))
+                                (packFloatX4# (# 8.2#,6.3#, 4.7#, 9.2# #))))
+    print (FX4# (negateFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #))))
+
+    print (DX2# (plusDoubleX2# (broadcastDoubleX2# 1.3##)
+                               (broadcastDoubleX2# 2.2##)))
+    print (DX2# (minusDoubleX2# (broadcastDoubleX2# 3.5##)
+                                (broadcastDoubleX2# 2.2##)))
+    print (DX2# (timesDoubleX2# (broadcastDoubleX2# 2.4##)
+                                (broadcastDoubleX2# 2.2##)))
+    print (DX2# (divideDoubleX2# (broadcastDoubleX2# 9.2##)
+                                 (broadcastDoubleX2# 4.0##)))
+    print (DX2# (negateDoubleX2# (broadcastDoubleX2# 3.5##)))
diff --git a/testsuite/tests/codeGen/should_run/simd005.stdout b/testsuite/tests/codeGen/should_run/simd005.stdout
new file mode 100644
index 0000000000..84386823f8
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/simd005.stdout
@@ -0,0 +1,20 @@
+(1.5,1.5,1.5,1.5)
+(4.5,7.8,2.3,6.5)
+(6.5,6.5)
+(8.9,7.2)
+(1.5,1.5,1.5,1.5)
+False
+True
+(2.5,2.5)
+False
+True
+(12.7,14.1,7.0,15.7)
+(-3.6999998,1.5,-2.3999999,-2.6999998)
+(36.899998,49.140003,10.809999,59.8)
+(0.5487805,1.2380953,0.4893617,0.70652175)
+(-4.5,-7.8,-2.3,-6.5)
+(3.5,3.5)
+(1.2999999999999998,1.2999999999999998)
+(5.28,5.28)
+(2.3,2.3)
+(-3.5,-3.5)
author	Abhiroop Sarkar <asiamgenius@gmail.com>	2018-09-27 15:28:46 -0400
committer	Marge Bot <ben+marge-bot@smart-cactus.org>	2019-07-03 09:33:39 -0400
commit	acd795583625401c5554f8e04ec7efca18814011 (patch)
tree	545e529eed21e78592ff326d4ebf9804095ad2cb /testsuite/tests/codeGen
parent	df3e5b744db29c085f5bc05f8b609197bcbf9b0c (diff)
download	haskell-acd795583625401c5554f8e04ec7efca18814011.tar.gz