45 files changed, 246 insertions, 1512 deletions
diff --git a/compiler/cmm/CmmCallConv.hs b/compiler/cmm/CmmCallConv.hs
index 6df910edfa..4e6a9d293a 100644
--- a/compiler/cmm/CmmCallConv.hs
+++ b/compiler/cmm/CmmCallConv.hs
@@ -64,20 +64,13 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments)
       assign_regs assts (r:rs) regs | isVecType ty   = vec
                                     | isFloatType ty = float
                                     | otherwise      = int
-        where vec = case regs of
-                      (vs, fs, ds, ls, s:ss)
-                        | passVectorInReg w dflags
-                          -> let elt_ty = vecElemType ty
-                                 reg_ty = if isFloatType elt_ty
-                                          then Float else Integer
-                                 reg_class = case w of
-                                               W128 -> XmmReg
-                                               W256 -> YmmReg
-                                               W512 -> ZmmReg
-                                               _    -> panic "CmmCallConv.assignArgumentsPos: Invalid vector width"
-                              in k (RegisterParam
-                                     (reg_class s (vecLength ty) (typeWidth elt_ty) reg_ty),
-                                     (vs, fs, ds, ls, ss))
+        where vec = case (w, regs) of
+                      (W128, (vs, fs, ds, ls, s:ss))
+                          | passVectorInReg W128 dflags -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss))
+                      (W256, (vs, fs, ds, ls, s:ss))
+                          | passVectorInReg W256 dflags -> k (RegisterParam (YmmReg s), (vs, fs, ds, ls, ss))
+                      (W512, (vs, fs, ds, ls, s:ss))
+                          | passVectorInReg W512 dflags -> k (RegisterParam (ZmmReg s), (vs, fs, ds, ls, ss))
                       _ -> (assts, (r:rs))
               float = case (w, regs) of
                         (W32, (vs, fs, ds, ls, s:ss))
@@ -96,7 +89,6 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments)
                       (_, (vs, fs, ds, l:ls, ss)) | widthInBits w > widthInBits (wordWidth dflags)
                           -> k (RegisterParam l, (vs, fs, ds, ls, ss))
                       _   -> (assts, (r:rs))
-
               k (asst, regs') = assign_regs ((r, asst) : assts) rs regs'
               ty = arg_ty r
               w  = typeWidth ty
@@ -210,13 +202,11 @@ nodeOnly = ([VanillaReg 1], [], [], [], [])
 -- only use this functionality in hand-written C-- code in the RTS.
 realArgRegsCover :: DynFlags -> [GlobalReg]
 realArgRegsCover dflags
-    | passFloatArgsInXmm dflags
-      = map ($VGcPtr) (realVanillaRegs dflags) ++
-        realLongRegs dflags ++
-        map (\x -> XmmReg x 2 W64 Integer) (realXmmRegNos dflags)
-    | otherwise
-      = map ($VGcPtr) (realVanillaRegs dflags) ++
-        realFloatRegs dflags ++
-        realDoubleRegs dflags ++
-        realLongRegs dflags ++
-        map (\x -> XmmReg x 2 W64 Integer) (realXmmRegNos dflags)
+    | passFloatArgsInXmm dflags = map ($VGcPtr) (realVanillaRegs dflags) ++
+                                  realLongRegs dflags ++
+                                  map XmmReg (realXmmRegNos dflags)
+    | otherwise                 = map ($VGcPtr) (realVanillaRegs dflags) ++
+                                  realFloatRegs dflags ++
+                                  realDoubleRegs dflags ++
+                                  realLongRegs dflags ++
+                                  map XmmReg (realXmmRegNos dflags)
diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs
index 79eaf8f89c..901df5d908 100644
--- a/compiler/cmm/CmmExpr.hs
+++ b/compiler/cmm/CmmExpr.hs
@@ -14,7 +14,6 @@ module CmmExpr
     , currentTSOReg, currentNurseryReg, hpAllocReg, cccsReg
     , node, baseReg
     , VGcPtr(..)
-    , GlobalVecRegTy(..)
 
     , DefinerOfRegs, UserOfRegs
     , foldRegsDefd, foldRegsUsed
@@ -42,7 +41,6 @@ import Outputable (panic)
 import Unique
 
 import Data.Set (Set)
-import Data.Monoid ((<>))
 import qualified Data.Set as Set
 
 import BasicTypes (Alignment, mkAlignment, alignmentOf)
@@ -394,7 +392,6 @@ data VGcPtr = VGcPtr | VNonGcPtr deriving( Eq, Show )
 -----------------------------------------------------------------------------
 {-
 Note [Overlapping global registers]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 The backend might not faithfully implement the abstraction of the STG
 machine with independent registers for different values of type
@@ -416,26 +413,6 @@ on a particular platform. The instance Eq GlobalReg is syntactic
 equality of STG registers and does not take overlap into
 account. However it is still used in UserOfRegs/DefinerOfRegs and
 there are likely still bugs there, beware!
-
-
-Note [SIMD registers]
-~~~~~~~~~~~~~~~~~~~~~
-
-GHC's treatment of SIMD registers is heavily modelled after the x86_64
-architecture. Namely we have 128- (XMM), 256- (YMM), and 512-bit (ZMM)
-registers. Furthermore, we treat each possible format in these registers as a
-distinct register which overlaps with the others. For instance, we XMM1 as a
-2xI64 register is distinct from but overlaps with (in the sense defined in Note
-[Overlapping global registers]) its use as a 4xI32 register.
-
-This model makes it easier to fit SIMD registers into the NCG, which generally
-expects that each global register has a single, known CmmType.
-
-In the future we could consider further refactoring this to eliminate the
-XMM, YMM, and ZMM register names (which are quite x86-specific) and instead just
-having a set of NxM-bit vector registers (e.g. Vec2x64A, Vec2x64B, ...,
-Vec4x32A, ..., Vec4x64A).
-
 -}
 
 data GlobalReg
@@ -455,15 +432,12 @@ data GlobalReg
 
   | XmmReg                      -- 128-bit SIMD vector register
         {-# UNPACK #-} !Int     -- its number
-        !Length !Width !GlobalVecRegTy
 
   | YmmReg                      -- 256-bit SIMD vector register
         {-# UNPACK #-} !Int     -- its number
-        !Length !Width !GlobalVecRegTy
 
   | ZmmReg                      -- 512-bit SIMD vector register
         {-# UNPACK #-} !Int     -- its number
-        !Length !Width !GlobalVecRegTy
 
   -- STG registers
   | Sp                  -- Stack ptr; points to last occupied stack location.
@@ -504,17 +478,17 @@ data GlobalReg
 
   deriving( Show )
 
-data GlobalVecRegTy = Integer | Float
-  deriving (Show, Eq, Ord)
-
 instance Eq GlobalReg where
    VanillaReg i _ == VanillaReg j _ = i==j -- Ignore type when seeking clashes
    FloatReg i == FloatReg j = i==j
    DoubleReg i == DoubleReg j = i==j
    LongReg i == LongReg j = i==j
-   XmmReg i l w grt == XmmReg j l' w' grt' = i==j && l == l' && w == w' && grt == grt'
-   YmmReg i l w grt == YmmReg j l' w' grt' = i==j && l == l' && w == w' && grt == grt'
-   ZmmReg i l w grt == ZmmReg j l' w' grt' = i==j && l == l' && w == w' && grt == grt'
+   -- NOTE: XMM, YMM, ZMM registers actually are the same registers
+   -- at least with respect to store at YMM i and then read from XMM i
+   -- and similarly for ZMM etc.
+   XmmReg i == XmmReg j = i==j
+   YmmReg i == YmmReg j = i==j
+   ZmmReg i == ZmmReg j = i==j
    Sp == Sp = True
    SpLim == SpLim = True
    Hp == Hp = True
@@ -538,21 +512,9 @@ instance Ord GlobalReg where
    compare (FloatReg i)  (FloatReg  j) = compare i j
    compare (DoubleReg i) (DoubleReg j) = compare i j
    compare (LongReg i)   (LongReg   j) = compare i j
-   compare (XmmReg i l w grt)
-           (XmmReg j l' w' grt')       = compare i j
-                                         <> compare l l'
-                                         <> compare w w'
-                                         <> compare grt grt'
-   compare (YmmReg i l w grt)
-           (YmmReg j l' w' grt')       = compare i j
-                                         <> compare l l'
-                                         <> compare w w'
-                                         <> compare grt grt'
-   compare (ZmmReg i l w grt)
-           (ZmmReg j l' w' grt')       = compare i j
-                                         <> compare l l'
-                                         <> compare w w'
-                                         <> compare grt grt'
+   compare (XmmReg i)    (XmmReg    j) = compare i j
+   compare (YmmReg i)    (YmmReg    j) = compare i j
+   compare (ZmmReg i)    (ZmmReg    j) = compare i j
    compare Sp Sp = EQ
    compare SpLim SpLim = EQ
    compare Hp Hp = EQ
@@ -576,12 +538,12 @@ instance Ord GlobalReg where
    compare _ (DoubleReg _)    = GT
    compare (LongReg _) _      = LT
    compare _ (LongReg _)      = GT
-   compare (XmmReg _ _ _ _) _ = LT
-   compare _ (XmmReg _ _ _ _) = GT
-   compare (YmmReg _ _ _ _) _ = LT
-   compare _ (YmmReg _ _ _ _) = GT
-   compare (ZmmReg _ _ _ _) _ = LT
-   compare _ (ZmmReg _ _ _ _) = GT
+   compare (XmmReg _) _       = LT
+   compare _ (XmmReg _)       = GT
+   compare (YmmReg _) _       = LT
+   compare _ (YmmReg _)       = GT
+   compare (ZmmReg _) _       = LT
+   compare _ (ZmmReg _)       = GT
    compare Sp _ = LT
    compare _ Sp = GT
    compare SpLim _ = LT
@@ -634,15 +596,12 @@ globalRegType dflags (VanillaReg _ VNonGcPtr) = bWord dflags
 globalRegType _      (FloatReg _)      = cmmFloat W32
 globalRegType _      (DoubleReg _)     = cmmFloat W64
 globalRegType _      (LongReg _)       = cmmBits W64
-globalRegType _      (XmmReg _ l w ty) = case ty of
-                                           Integer -> cmmVec l (cmmBits w)
-                                           Float   -> cmmVec l (cmmFloat w)
-globalRegType _      (YmmReg _ l w ty) = case ty of
-                                           Integer -> cmmVec l (cmmBits w)
-                                           Float   -> cmmVec l (cmmFloat w)
-globalRegType _      (ZmmReg _ l w ty) = case ty of
-                                           Integer -> cmmVec l (cmmBits w)
-                                           Float   -> cmmVec l (cmmFloat w)
+-- TODO: improve the internal model of SIMD/vectorized registers
+-- the right design SHOULd improve handling of float and double code too.
+-- see remarks in "NOTE [SIMD Design for the future]"" in StgCmmPrim
+globalRegType _      (XmmReg _)        = cmmVec 4 (cmmBits W32)
+globalRegType _      (YmmReg _)        = cmmVec 8 (cmmBits W32)
+globalRegType _      (ZmmReg _)        = cmmVec 16 (cmmBits W32)
 
 globalRegType dflags Hp                = gcWord dflags
                                             -- The initialiser for all
diff --git a/compiler/cmm/CmmLint.hs b/compiler/cmm/CmmLint.hs
index 53dcd70b7b..d5c3f84443 100644
--- a/compiler/cmm/CmmLint.hs
+++ b/compiler/cmm/CmmLint.hs
@@ -148,13 +148,9 @@ lintCmmMiddle node = case node of
             dflags <- getDynFlags
             erep <- lintCmmExpr expr
             let reg_ty = cmmRegType dflags reg
-            case isVecCatType reg_ty of
-              True -> if ((typeWidth reg_ty) == (typeWidth erep))
-                         then return ()
-                         else cmmLintAssignErr (CmmAssign reg expr) erep reg_ty
-              _    -> if (erep `cmmEqType_ignoring_ptrhood` reg_ty)
-                         then return ()
-                          else cmmLintAssignErr (CmmAssign reg expr) erep reg_ty
+            if (erep `cmmEqType_ignoring_ptrhood` reg_ty)
+                then return ()
+                else cmmLintAssignErr (CmmAssign reg expr) erep reg_ty
 
   CmmStore l r -> do
             _ <- lintCmmExpr l
diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs
index 38d9edb480..9740d21bef 100644
--- a/compiler/cmm/CmmMachOp.hs
+++ b/compiler/cmm/CmmMachOp.hs
@@ -136,9 +136,8 @@ data MachOp
   | MO_VU_Rem  Length Width
 
   -- Floting point vector element insertion and extraction operations
-  | MO_VF_Broadcast Length Width   -- Broadcast a scalar into a vector
-  | MO_VF_Insert    Length Width   -- Insert scalar into vector
-  | MO_VF_Extract   Length Width   -- Extract scalar from vector
+  | MO_VF_Insert  Length Width   -- Insert scalar into vector
+  | MO_VF_Extract Length Width   -- Extract scalar from vector
 
   -- Floating point vector operations
   | MO_VF_Add  Length Width
@@ -431,7 +430,6 @@ machOpResultType dflags mop tys =
     MO_VU_Quot l w      -> cmmVec l (cmmBits w)
     MO_VU_Rem  l w      -> cmmVec l (cmmBits w)
 
-    MO_VF_Broadcast l w -> cmmVec l (cmmFloat w)
     MO_VF_Insert  l w   -> cmmVec l (cmmFloat w)
     MO_VF_Extract _ w   -> cmmFloat w
 
@@ -524,21 +522,16 @@ machOpArgReps dflags op =
     MO_VU_Quot _ r      -> [r,r]
     MO_VU_Rem  _ r      -> [r,r]
 
-    -- offset is always W32 as mentioned in StgCmmPrim.hs
-    MO_VF_Broadcast l r -> [vecwidth l r, r]
-    MO_VF_Insert    l r -> [vecwidth l r, r, W32]
-    MO_VF_Extract   l r -> [vecwidth l r, W32]
+    MO_VF_Insert  l r   -> [typeWidth (vec l (cmmFloat r)),r,wordWidth dflags]
+    MO_VF_Extract l r   -> [typeWidth (vec l (cmmFloat r)),wordWidth dflags]
 
-    -- NOTE: The below is owing to the fact that floats use the SSE registers
-    MO_VF_Add  l w      -> [vecwidth l w, vecwidth l w]
-    MO_VF_Sub  l w      -> [vecwidth l w, vecwidth l w]
-    MO_VF_Mul  l w      -> [vecwidth l w, vecwidth l w]
-    MO_VF_Quot l w      -> [vecwidth l w, vecwidth l w]
-    MO_VF_Neg  l w      -> [vecwidth l w]
+    MO_VF_Add  _ r      -> [r,r]
+    MO_VF_Sub  _ r      -> [r,r]
+    MO_VF_Mul  _ r      -> [r,r]
+    MO_VF_Quot _ r      -> [r,r]
+    MO_VF_Neg  _ r      -> [r]
 
     MO_AlignmentCheck _ r -> [r]
-    where
-      vecwidth l w = widthFromBytes (l*widthInBytes w)
 
 -----------------------------------------------------------------------------
 -- CallishMachOp
diff --git a/compiler/cmm/CmmType.hs b/compiler/cmm/CmmType.hs
index 17b588720f..43d23c7ee7 100644
--- a/compiler/cmm/CmmType.hs
+++ b/compiler/cmm/CmmType.hs
@@ -6,7 +6,6 @@ module CmmType
     , typeWidth, cmmEqType, cmmEqType_ignoring_ptrhood
     , isFloatType, isGcPtrType, isBitsType
     , isWord32, isWord64, isFloat64, isFloat32
-    , isVecCatType
 
     , Width(..)
     , widthInBits, widthInBytes, widthInLog, widthFromBytes
@@ -134,7 +133,7 @@ cInt :: DynFlags -> CmmType
 cInt dflags = cmmBits (cIntWidth  dflags)
 
 ------------ Predicates ----------------
-isFloatType, isGcPtrType, isBitsType, isVecCatType :: CmmType -> Bool
+isFloatType, isGcPtrType, isBitsType :: CmmType -> Bool
 isFloatType (CmmType FloatCat    _) = True
 isFloatType _other                  = False
 
@@ -144,9 +143,6 @@ isGcPtrType _other               = False
 isBitsType (CmmType BitsCat _) = True
 isBitsType _                   = False
 
-isVecCatType (CmmType (VecCat _ _) _) = True
-isVecCatType _other                   = False
-
 isWord32, isWord64, isFloat32, isFloat64 :: CmmType -> Bool
 -- isWord64 is true of 64-bit non-floats (both gc-ptrs and otherwise)
 -- isFloat32 and 64 are obvious
diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs
index a60a26229b..7227edd57e 100644
--- a/compiler/cmm/PprC.hs
+++ b/compiler/cmm/PprC.hs
@@ -713,10 +713,6 @@ pprMachOp_for_C mop = case mop of
                                 (panic $ "PprC.pprMachOp_for_C: MO_VU_Rem"
                                       ++ " should have been handled earlier!")
 
-        MO_VF_Broadcast {} -> pprTrace "offending mop:"
-                                 (text "MO_VF_Broadcast")
-                                 (panic $ "PprC.pprMachOp_for_C: MO_VF_Broadcast"
-                                      ++ " should have been handled earlier!")
         MO_VF_Insert {}   -> pprTrace "offending mop:"
                                 (text "MO_VF_Insert")
                                 (panic $ "PprC.pprMachOp_for_C: MO_VF_Insert"
diff --git a/compiler/cmm/PprCmmExpr.hs b/compiler/cmm/PprCmmExpr.hs
index 2080c1f5d8..7bf73f1ca6 100644
--- a/compiler/cmm/PprCmmExpr.hs
+++ b/compiler/cmm/PprCmmExpr.hs
@@ -261,9 +261,9 @@ pprGlobalReg gr
         FloatReg   n   -> char 'F' <> int n
         DoubleReg  n   -> char 'D' <> int n
         LongReg    n   -> char 'L' <> int n
-        XmmReg     n _ _ _ -> text "XMM" <> int n
-        YmmReg     n _ _ _ -> text "YMM" <> int n
-        ZmmReg     n _ _ _ -> text "ZMM" <> int n
+        XmmReg     n   -> text "XMM" <> int n
+        YmmReg     n   -> text "YMM" <> int n
+        ZmmReg     n   -> text "ZMM" <> int n
         Sp             -> text "Sp"
         SpLim          -> text "SpLim"
         Hp             -> text "Hp"
diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs
index 2cbcfc66a9..0ff9bd8b56 100644
--- a/compiler/codeGen/CgUtils.hs
+++ b/compiler/codeGen/CgUtils.hs
@@ -57,27 +57,27 @@ baseRegOffset dflags (DoubleReg 4)       = oFFSET_StgRegTable_rD4 dflags
 baseRegOffset dflags (DoubleReg 5)       = oFFSET_StgRegTable_rD5 dflags
 baseRegOffset dflags (DoubleReg 6)       = oFFSET_StgRegTable_rD6 dflags
 baseRegOffset _      (DoubleReg n)       = panic ("Registers above D6 are not supported (tried to use D" ++ show n ++ ")")
-baseRegOffset dflags (XmmReg 1 _ _ _)    = oFFSET_StgRegTable_rXMM1 dflags
-baseRegOffset dflags (XmmReg 2 _ _ _)    = oFFSET_StgRegTable_rXMM2 dflags
-baseRegOffset dflags (XmmReg 3 _ _ _)    = oFFSET_StgRegTable_rXMM3 dflags
-baseRegOffset dflags (XmmReg 4 _ _ _)    = oFFSET_StgRegTable_rXMM4 dflags
-baseRegOffset dflags (XmmReg 5 _ _ _)    = oFFSET_StgRegTable_rXMM5 dflags
-baseRegOffset dflags (XmmReg 6 _ _ _)    = oFFSET_StgRegTable_rXMM6 dflags
-baseRegOffset _      (XmmReg n _ _ _)    = panic ("Registers above XMM6 are not supported (tried to use XMM" ++ show n ++ ")")
-baseRegOffset dflags (YmmReg 1 _ _ _)    = oFFSET_StgRegTable_rYMM1 dflags
-baseRegOffset dflags (YmmReg 2 _ _ _)    = oFFSET_StgRegTable_rYMM2 dflags
-baseRegOffset dflags (YmmReg 3 _ _ _)    = oFFSET_StgRegTable_rYMM3 dflags
-baseRegOffset dflags (YmmReg 4 _ _ _)    = oFFSET_StgRegTable_rYMM4 dflags
-baseRegOffset dflags (YmmReg 5 _ _ _)    = oFFSET_StgRegTable_rYMM5 dflags
-baseRegOffset dflags (YmmReg 6 _ _ _)    = oFFSET_StgRegTable_rYMM6 dflags
-baseRegOffset _      (YmmReg n _ _ _)    = panic ("Registers above YMM6 are not supported (tried to use YMM" ++ show n ++ ")")
-baseRegOffset dflags (ZmmReg 1 _ _ _)    = oFFSET_StgRegTable_rZMM1 dflags
-baseRegOffset dflags (ZmmReg 2 _ _ _)    = oFFSET_StgRegTable_rZMM2 dflags
-baseRegOffset dflags (ZmmReg 3 _ _ _)    = oFFSET_StgRegTable_rZMM3 dflags
-baseRegOffset dflags (ZmmReg 4 _ _ _)    = oFFSET_StgRegTable_rZMM4 dflags
-baseRegOffset dflags (ZmmReg 5 _ _ _)    = oFFSET_StgRegTable_rZMM5 dflags
-baseRegOffset dflags (ZmmReg 6 _ _ _)    = oFFSET_StgRegTable_rZMM6 dflags
-baseRegOffset _      (ZmmReg n _ _ _)    = panic ("Registers above ZMM6 are not supported (tried to use ZMM" ++ show n ++ ")")
+baseRegOffset dflags (XmmReg 1)          = oFFSET_StgRegTable_rXMM1 dflags
+baseRegOffset dflags (XmmReg 2)          = oFFSET_StgRegTable_rXMM2 dflags
+baseRegOffset dflags (XmmReg 3)          = oFFSET_StgRegTable_rXMM3 dflags
+baseRegOffset dflags (XmmReg 4)          = oFFSET_StgRegTable_rXMM4 dflags
+baseRegOffset dflags (XmmReg 5)          = oFFSET_StgRegTable_rXMM5 dflags
+baseRegOffset dflags (XmmReg 6)          = oFFSET_StgRegTable_rXMM6 dflags
+baseRegOffset _      (XmmReg n)          = panic ("Registers above XMM6 are not supported (tried to use XMM" ++ show n ++ ")")
+baseRegOffset dflags (YmmReg 1)          = oFFSET_StgRegTable_rYMM1 dflags
+baseRegOffset dflags (YmmReg 2)          = oFFSET_StgRegTable_rYMM2 dflags
+baseRegOffset dflags (YmmReg 3)          = oFFSET_StgRegTable_rYMM3 dflags
+baseRegOffset dflags (YmmReg 4)          = oFFSET_StgRegTable_rYMM4 dflags
+baseRegOffset dflags (YmmReg 5)          = oFFSET_StgRegTable_rYMM5 dflags
+baseRegOffset dflags (YmmReg 6)          = oFFSET_StgRegTable_rYMM6 dflags
+baseRegOffset _      (YmmReg n)          = panic ("Registers above YMM6 are not supported (tried to use YMM" ++ show n ++ ")")
+baseRegOffset dflags (ZmmReg 1)          = oFFSET_StgRegTable_rZMM1 dflags
+baseRegOffset dflags (ZmmReg 2)          = oFFSET_StgRegTable_rZMM2 dflags
+baseRegOffset dflags (ZmmReg 3)          = oFFSET_StgRegTable_rZMM3 dflags
+baseRegOffset dflags (ZmmReg 4)          = oFFSET_StgRegTable_rZMM4 dflags
+baseRegOffset dflags (ZmmReg 5)          = oFFSET_StgRegTable_rZMM5 dflags
+baseRegOffset dflags (ZmmReg 6)          = oFFSET_StgRegTable_rZMM6 dflags
+baseRegOffset _      (ZmmReg n)          = panic ("Registers above ZMM6 are not supported (tried to use ZMM" ++ show n ++ ")")
 baseRegOffset dflags Sp                  = oFFSET_StgRegTable_rSp dflags
 baseRegOffset dflags SpLim               = oFFSET_StgRegTable_rSpLim dflags
 baseRegOffset dflags (LongReg 1)         = oFFSET_StgRegTable_rL1 dflags
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index 9a6cf6c2e5..c3f9d5a279 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -669,7 +669,7 @@ emitPrimOp _      [res] Word2DoubleOp [w] = emitPrimCall [res]
 -- SIMD primops
 emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] = do
     checkVecCompatibility dflags vcat n w
-    doVecBroadcastOp (vecElemInjectCast dflags vcat w) ty zeros e res
+    doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res
   where
     zeros :: CmmExpr
     zeros = CmmLit $ CmmVec (replicate n zero)
@@ -1765,8 +1765,9 @@ vecElemProjectCast _      _        _   =  Nothing
 
 checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode ()
 checkVecCompatibility dflags vcat l w = do
-    when (hscTarget dflags /= HscLlvm && hscTarget dflags /= HscAsm) $ do
-        sorry "SIMD vector instructions not supported for the C backend or GHCi"
+    when (hscTarget dflags /= HscLlvm) $ do
+        sorry $ unlines ["SIMD vector instructions require the LLVM back-end."
+                         ,"Please use -fllvm."]
     check vecWidth vcat l w
   where
     check :: Width -> PrimOpVecCat -> Length -> Width -> FCode ()
@@ -1791,38 +1792,6 @@ checkVecCompatibility dflags vcat l w = do
 
 ------------------------------------------------------------------------------
 -- Helpers for translating vector packing and unpacking.
-doVecBroadcastOp :: Maybe MachOp  -- Cast from element to vector component
-                 -> CmmType       -- Type of vector
-                 -> CmmExpr       -- Initial vector
-                 -> CmmExpr     -- Elements
-                 -> CmmFormal     -- Destination for result
-                 -> FCode ()
-doVecBroadcastOp maybe_pre_write_cast ty z es res = do
-    dst <- newTemp ty
-    emitAssign (CmmLocal dst) z
-    vecBroadcast dst es 0
-  where
-    vecBroadcast :: CmmFormal -> CmmExpr -> Int -> FCode ()
-    vecBroadcast src e _ = do
-        dst <- newTemp ty
-        if isFloatType (vecElemType ty)
-          then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Broadcast len wid)
-                                                    [CmmReg (CmmLocal src), cast e])
-               --TODO : Add the MachOp MO_V_Broadcast
-          else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
-                                                    [CmmReg (CmmLocal src), cast e])
-        emitAssign (CmmLocal res) (CmmReg (CmmLocal dst))
-
-    cast :: CmmExpr -> CmmExpr
-    cast val = case maybe_pre_write_cast of
-                 Nothing   -> val
-                 Just cast -> CmmMachOp cast [val]
-
-    len :: Length
-    len = vecLength ty
-
-    wid :: Width
-    wid = typeWidth (vecElemType ty)
 
 doVecPackOp :: Maybe MachOp  -- Cast from element to vector component
             -> CmmType       -- Type of vector
@@ -1840,16 +1809,16 @@ doVecPackOp maybe_pre_write_cast ty z es res = do
         emitAssign (CmmLocal res) (CmmReg (CmmLocal src))
 
     vecPack src (e : es) i = do
-      dst <- newTemp ty
-      if isFloatType (vecElemType ty)
-        then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
-                                         [CmmReg (CmmLocal src), cast e, iLit])
-        else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
-                                         [CmmReg (CmmLocal src), cast e, iLit])
-      vecPack dst es (i + 1)
+        dst <- newTemp ty
+        if isFloatType (vecElemType ty)
+          then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
+                                                    [CmmReg (CmmLocal src), cast e, iLit])
+          else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
+                                                    [CmmReg (CmmLocal src), cast e, iLit])
+        vecPack dst es (i + 1)
       where
         -- vector indices are always 32-bits
-        iLit = CmmLit (CmmInt ((toInteger i) * 16) W32)
+        iLit = CmmLit (CmmInt (toInteger i) W32)
 
     cast :: CmmExpr -> CmmExpr
     cast val = case maybe_pre_write_cast of
diff --git a/compiler/llvmGen/LlvmCodeGen/Base.hs b/compiler/llvmGen/LlvmCodeGen/Base.hs
index a5a5683a3e..81f3b9f84c 100644
--- a/compiler/llvmGen/LlvmCodeGen/Base.hs
+++ b/compiler/llvmGen/LlvmCodeGen/Base.hs
@@ -152,12 +152,12 @@ llvmFunArgs dflags live =
     where platform = targetPlatform dflags
           isLive r = not (isSSE r) || r `elem` alwaysLive || r `elem` live
           isPassed r = not (isSSE r) || isLive r
-          isSSE (FloatReg _)      = True
-          isSSE (DoubleReg _)     = True
-          isSSE (XmmReg _ _ _ _ ) = True
-          isSSE (YmmReg _ _ _ _ ) = True
-          isSSE (ZmmReg _ _ _ _ ) = True
-          isSSE _                 = False
+          isSSE (FloatReg _)  = True
+          isSSE (DoubleReg _) = True
+          isSSE (XmmReg _)    = True
+          isSSE (YmmReg _)    = True
+          isSSE (ZmmReg _)    = True
+          isSSE _             = False
 
 -- | Llvm standard fun attributes
 llvmStdFunAttrs :: [LlvmFuncAttr]
diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
index 8fea6e0b17..86a59381b2 100644
--- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
+++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
@@ -1287,7 +1287,6 @@ genMachOp _ op [x] = case op of
     MO_VU_Quot    _ _ -> panicOp
     MO_VU_Rem     _ _ -> panicOp
 
-    MO_VF_Broadcast _ _ -> panicOp
     MO_VF_Insert  _ _ -> panicOp
     MO_VF_Extract _ _ -> panicOp
 
@@ -1484,7 +1483,6 @@ genMachOp_slow opt op [x, y] = case op of
 
     MO_VS_Neg {} -> panicOp
 
-    MO_VF_Broadcast  {} -> panicOp
     MO_VF_Insert  {} -> panicOp
     MO_VF_Extract {} -> panicOp
 
@@ -1846,9 +1844,9 @@ funEpilogue live = do
     let liveRegs = alwaysLive ++ live
         isSSE (FloatReg _)  = True
         isSSE (DoubleReg _) = True
-        isSSE (XmmReg _ _ _ _) = True
-        isSSE (YmmReg _ _ _ _) = True
-        isSSE (ZmmReg _ _ _ _) = True
+        isSSE (XmmReg _)    = True
+        isSSE (YmmReg _)    = True
+        isSSE (ZmmReg _)    = True
         isSSE _             = False
 
     -- Set to value or "undef" depending on whether the register is
diff --git a/compiler/llvmGen/LlvmCodeGen/Regs.hs b/compiler/llvmGen/LlvmCodeGen/Regs.hs
index 6d188d908f..8cdf3c6869 100644
--- a/compiler/llvmGen/LlvmCodeGen/Regs.hs
+++ b/compiler/llvmGen/LlvmCodeGen/Regs.hs
@@ -60,24 +60,24 @@ lmGlobalReg dflags suf reg
         DoubleReg 4    -> doubleGlobal $ "D4" ++ suf
         DoubleReg 5    -> doubleGlobal $ "D5" ++ suf
         DoubleReg 6    -> doubleGlobal $ "D6" ++ suf
-        XmmReg 1 _ _ _ -> xmmGlobal $ "XMM1" ++ suf
-        XmmReg 2 _ _ _ -> xmmGlobal $ "XMM2" ++ suf
-        XmmReg 3 _ _ _ -> xmmGlobal $ "XMM3" ++ suf
-        XmmReg 4 _ _ _ -> xmmGlobal $ "XMM4" ++ suf
-        XmmReg 5 _ _ _ -> xmmGlobal $ "XMM5" ++ suf
-        XmmReg 6 _ _ _ -> xmmGlobal $ "XMM6" ++ suf
-        YmmReg 1 _ _ _ -> ymmGlobal $ "YMM1" ++ suf
-        YmmReg 2 _ _ _ -> ymmGlobal $ "YMM2" ++ suf
-        YmmReg 3 _ _ _ -> ymmGlobal $ "YMM3" ++ suf
-        YmmReg 4 _ _ _ -> ymmGlobal $ "YMM4" ++ suf
-        YmmReg 5 _ _ _ -> ymmGlobal $ "YMM5" ++ suf
-        YmmReg 6 _ _ _ -> ymmGlobal $ "YMM6" ++ suf
-        ZmmReg 1 _ _ _ -> zmmGlobal $ "ZMM1" ++ suf
-        ZmmReg 2 _ _ _ -> zmmGlobal $ "ZMM2" ++ suf
-        ZmmReg 3 _ _ _ -> zmmGlobal $ "ZMM3" ++ suf
-        ZmmReg 4 _ _ _ -> zmmGlobal $ "ZMM4" ++ suf
-        ZmmReg 5 _ _ _ -> zmmGlobal $ "ZMM5" ++ suf
-        ZmmReg 6 _ _ _ -> zmmGlobal $ "ZMM6" ++ suf
+        XmmReg 1       -> xmmGlobal $ "XMM1" ++ suf
+        XmmReg 2       -> xmmGlobal $ "XMM2" ++ suf
+        XmmReg 3       -> xmmGlobal $ "XMM3" ++ suf
+        XmmReg 4       -> xmmGlobal $ "XMM4" ++ suf
+        XmmReg 5       -> xmmGlobal $ "XMM5" ++ suf
+        XmmReg 6       -> xmmGlobal $ "XMM6" ++ suf
+        YmmReg 1       -> ymmGlobal $ "YMM1" ++ suf
+        YmmReg 2       -> ymmGlobal $ "YMM2" ++ suf
+        YmmReg 3       -> ymmGlobal $ "YMM3" ++ suf
+        YmmReg 4       -> ymmGlobal $ "YMM4" ++ suf
+        YmmReg 5       -> ymmGlobal $ "YMM5" ++ suf
+        YmmReg 6       -> ymmGlobal $ "YMM6" ++ suf
+        ZmmReg 1       -> zmmGlobal $ "ZMM1" ++ suf
+        ZmmReg 2       -> zmmGlobal $ "ZMM2" ++ suf
+        ZmmReg 3       -> zmmGlobal $ "ZMM3" ++ suf
+        ZmmReg 4       -> zmmGlobal $ "ZMM4" ++ suf
+        ZmmReg 5       -> zmmGlobal $ "ZMM5" ++ suf
+        ZmmReg 6       -> zmmGlobal $ "ZMM6" ++ suf
         MachSp         -> wordGlobal $ "MachSp" ++ suf
         _other         -> panic $ "LlvmCodeGen.Reg: GlobalReg (" ++ (show reg)
                                 ++ ") not supported!"
diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs
index 2315be3519..1f7de170a9 100644
--- a/compiler/main/DynFlags.hs
+++ b/compiler/main/DynFlags.hs
@@ -219,7 +219,6 @@ module DynFlags (
         -- * SSE and AVX
         isSseEnabled,
         isSse2Enabled,
-        isSse4_1Enabled,
         isSse4_2Enabled,
         isBmiEnabled,
         isBmi2Enabled,
@@ -5903,8 +5902,6 @@ isSse2Enabled dflags = case platformArch (targetPlatform dflags) of
     ArchX86    -> True
     _          -> False
 
-isSse4_1Enabled :: DynFlags -> Bool
-isSse4_1Enabled dflags = sseVersion dflags >= Just SSE4
 
 isSse4_2Enabled :: DynFlags -> Bool
 isSse4_2Enabled dflags = sseVersion dflags >= Just SSE42
diff --git a/compiler/nativeGen/Format.hs b/compiler/nativeGen/Format.hs
index a0e4e99f80..31472893e7 100644
--- a/compiler/nativeGen/Format.hs
+++ b/compiler/nativeGen/Format.hs
@@ -10,11 +10,9 @@
 --
 module Format (
     Format(..),
-    ScalarFormat(..),
     intFormat,
     floatFormat,
     isFloatFormat,
-    isVecFormat,
     cmmTypeFormat,
     formatToWidth,
     formatInBytes
@@ -27,29 +25,6 @@ import GhcPrelude
 import Cmm
 import Outputable
 
-
--- Note [GHC's data format representations]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
--- GHC has severals types that represent various aspects of data format.
--- These include:
---
---  * 'CmmType.CmmType': The data classification used throughout the C--
---    pipeline. This is a pair of a CmmCat and a Width.
---
---  * 'CmmType.CmmCat': What the bits in a C-- value mean (e.g. a pointer, integer, or floating-point value)
---
---  * 'CmmType.Width': The width of a C-- value.
---
---  * 'CmmType.Length': The width (measured in number of scalars) of a vector value.
---
---  * 'Format.Format': The data format representation used by much of the backend.
---
---  * 'Format.ScalarFormat': The format of a 'Format.VecFormat'\'s scalar.
---
---  * 'RegClass.RegClass': Whether a register is an integer, float-point, or vector register
---
-
 -- It looks very like the old MachRep, but it's now of purely local
 -- significance, here in the native code generator.  You can change it
 -- without global consequences.
@@ -72,16 +47,8 @@ data Format
         | II64
         | FF32
         | FF64
-        | VecFormat !Length !ScalarFormat !Width
         deriving (Show, Eq)
 
-data ScalarFormat = FmtInt8
-                  | FmtInt16
-                  | FmtInt32
-                  | FmtInt64
-                  | FmtFloat
-                  | FmtDouble
-                  deriving (Show, Eq)
 
 -- | Get the integer format of this width.
 intFormat :: Width -> Format
@@ -114,33 +81,13 @@ isFloatFormat format
         FF64    -> True
         _       -> False
 
--- | Check if a format represents a vector
-isVecFormat :: Format -> Bool
-isVecFormat (VecFormat {}) = True
-isVecFormat _              = False
 
 -- | Convert a Cmm type to a Format.
 cmmTypeFormat :: CmmType -> Format
 cmmTypeFormat ty
         | isFloatType ty        = floatFormat (typeWidth ty)
-        | isVecType ty          = vecFormat ty
         | otherwise             = intFormat (typeWidth ty)
 
-vecFormat :: CmmType -> Format
-vecFormat ty =
-  let l      = vecLength ty
-      elemTy = vecElemType ty
-   in if isFloatType elemTy
-      then case typeWidth elemTy of
-             W32 -> VecFormat l FmtFloat  W32
-             W64 -> VecFormat l FmtDouble W64
-             _   -> pprPanic "Incorrect vector element width" (ppr elemTy)
-      else case typeWidth elemTy of
-             W8  -> VecFormat l FmtInt8  W8
-             W16 -> VecFormat l FmtInt16 W16
-             W32 -> VecFormat l FmtInt32 W32
-             W64 -> VecFormat l FmtInt64 W64
-             _   -> pprPanic "Incorrect vector element width" (ppr elemTy)
 
 -- | Get the Width of a Format.
 formatToWidth :: Format -> Width
@@ -152,7 +99,7 @@ formatToWidth format
         II64            -> W64
         FF32            -> W32
         FF64            -> W64
-        VecFormat l _ w -> widthFromBytes (l*widthInBytes w)
+
 
 formatInBytes :: Format -> Int
 formatInBytes = widthInBytes . formatToWidth
diff --git a/compiler/nativeGen/NCGMonad.hs b/compiler/nativeGen/NCGMonad.hs
index 67730aa59b..3680c1c7b0 100644
--- a/compiler/nativeGen/NCGMonad.hs
+++ b/compiler/nativeGen/NCGMonad.hs
@@ -250,6 +250,7 @@ getNewRegNat rep
       dflags <- getDynFlags
       return (RegVirtual $ targetMkVirtualReg (targetPlatform dflags) u rep)
 
+
 getNewRegPairNat :: Format -> NatM (Reg,Reg)
 getNewRegPairNat rep
  = do u <- getUniqueNat
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs
index 7e5df6a76c..a49526c93a 100644
--- a/compiler/nativeGen/PPC/CodeGen.hs
+++ b/compiler/nativeGen/PPC/CodeGen.hs
@@ -1909,8 +1909,6 @@ genCCall' dflags gcp target dest_regs args
                           FF32 -> (1, 1, 4, fprs)
                           FF64 -> (2, 1, 8, fprs)
                           II64 -> panic "genCCall' passArguments II64"
-                          VecFormat {}
-                               -> panic "genCCall' passArguments vector format"
 
                       GCP32ELF ->
                           case cmmTypeFormat rep of
@@ -1921,8 +1919,6 @@ genCCall' dflags gcp target dest_regs args
                           FF32 -> (0, 1, 4, fprs)
                           FF64 -> (0, 1, 8, fprs)
                           II64 -> panic "genCCall' passArguments II64"
-                          VecFormat {}
-                               -> panic "genCCall' passArguments vector format"
                       GCP64ELF _ ->
                           case cmmTypeFormat rep of
                           II8  -> (1, 0, 8, gprs)
@@ -1934,8 +1930,6 @@ genCCall' dflags gcp target dest_regs args
                           -- the FPRs.
                           FF32 -> (1, 1, 8, fprs)
                           FF64 -> (1, 1, 8, fprs)
-                          VecFormat {}
-                               -> panic "genCCall' passArguments vector format"
 
         moveResult reduceToFF32 =
             case dest_regs of
diff --git a/compiler/nativeGen/PPC/Ppr.hs b/compiler/nativeGen/PPC/Ppr.hs
index b7316e6bc6..4254f23122 100644
--- a/compiler/nativeGen/PPC/Ppr.hs
+++ b/compiler/nativeGen/PPC/Ppr.hs
@@ -29,7 +29,7 @@ import BlockId
 import CLabel
 import PprCmmExpr ()
 
-import Unique                ( getUnique )
+import Unique                ( pprUniqueAlways, getUnique )
 import GHC.Platform
 import FastString
 import Outputable
@@ -168,7 +168,10 @@ pprReg r
   = case r of
       RegReal    (RealRegSingle i) -> ppr_reg_no i
       RegReal    (RealRegPair{})   -> panic "PPC.pprReg: no reg pairs on this arch"
-      RegVirtual v                 -> ppr v
+      RegVirtual (VirtualRegI  u)  -> text "%vI_"   <> pprUniqueAlways u
+      RegVirtual (VirtualRegHi u)  -> text "%vHi_"  <> pprUniqueAlways u
+      RegVirtual (VirtualRegF  u)  -> text "%vF_"   <> pprUniqueAlways u
+      RegVirtual (VirtualRegD  u)  -> text "%vD_"   <> pprUniqueAlways u
 
   where
     ppr_reg_no :: Int -> SDoc
@@ -187,8 +190,7 @@ pprFormat x
                 II32 -> sLit "w"
                 II64 -> sLit "d"
                 FF32 -> sLit "fs"
-                FF64 -> sLit "fd"
-                VecFormat _ _ _ -> panic "PPC.Ppr.pprFormat: VecFormat")
+                FF64 -> sLit "fd")
 
 
 pprCond :: Cond -> SDoc
@@ -373,7 +375,6 @@ pprInstr (LD fmt reg addr) = hcat [
             II64 -> sLit "d"
             FF32 -> sLit "fs"
             FF64 -> sLit "fd"
-            VecFormat _ _ _ -> panic "PPC.Ppr.pprInstr: VecFormat"
             ),
         case addr of AddrRegImm _ _ -> empty
                      AddrRegReg _ _ -> char 'x',
@@ -413,7 +414,6 @@ pprInstr (LA fmt reg addr) = hcat [
             II64 -> sLit "d"
             FF32 -> sLit "fs"
             FF64 -> sLit "fd"
-            VecFormat _ _ _ -> panic "PPC.Ppr.pprInstr: VecFormat"
             ),
         case addr of AddrRegImm _ _ -> empty
                      AddrRegReg _ _ -> char 'x',
diff --git a/compiler/nativeGen/Reg.hs b/compiler/nativeGen/Reg.hs
index dff2f07bf4..7f69ea01a4 100644
--- a/compiler/nativeGen/Reg.hs
+++ b/compiler/nativeGen/Reg.hs
@@ -56,7 +56,6 @@ data VirtualReg
         | VirtualRegHi {-# UNPACK #-} !Unique  -- High part of 2-word register
         | VirtualRegF  {-# UNPACK #-} !Unique
         | VirtualRegD  {-# UNPACK #-} !Unique
-        | VirtualRegVec {-# UNPACK #-} !Unique
 
         deriving (Eq, Show)
 
@@ -70,7 +69,6 @@ instance Ord VirtualReg where
   compare (VirtualRegHi a) (VirtualRegHi b) = nonDetCmpUnique a b
   compare (VirtualRegF a) (VirtualRegF b) = nonDetCmpUnique a b
   compare (VirtualRegD a) (VirtualRegD b) = nonDetCmpUnique a b
-  compare (VirtualRegVec a) (VirtualRegVec b) = nonDetCmpUnique a b
 
   compare VirtualRegI{} _ = LT
   compare _ VirtualRegI{} = GT
@@ -78,8 +76,7 @@ instance Ord VirtualReg where
   compare _ VirtualRegHi{} = GT
   compare VirtualRegF{} _ = LT
   compare _ VirtualRegF{} = GT
-  compare VirtualRegVec{} _ = LT
-  compare _ VirtualRegVec{} = GT
+
 
 
 instance Uniquable VirtualReg where
@@ -89,7 +86,6 @@ instance Uniquable VirtualReg where
                 VirtualRegHi u  -> u
                 VirtualRegF u   -> u
                 VirtualRegD u   -> u
-                VirtualRegVec u -> u
 
 instance Outputable VirtualReg where
         ppr reg
@@ -99,9 +95,8 @@ instance Outputable VirtualReg where
                 -- this code is kinda wrong on x86
                 -- because float and double occupy the same register set
                 -- namely SSE2 register xmm0 .. xmm15
-                VirtualRegF  u  -> text "%vFloat_"  <> pprUniqueAlways u
-                VirtualRegD  u  -> text "%vDouble_" <> pprUniqueAlways u
-                VirtualRegVec u -> text "%vVec_"    <> pprUniqueAlways u
+                VirtualRegF  u  -> text "%vFloat_"   <> pprUniqueAlways u
+                VirtualRegD  u  -> text "%vDouble_"   <> pprUniqueAlways u
 
 
 
@@ -112,7 +107,6 @@ renameVirtualReg u r
         VirtualRegHi _  -> VirtualRegHi u
         VirtualRegF _   -> VirtualRegF  u
         VirtualRegD _   -> VirtualRegD  u
-        VirtualRegVec _ -> VirtualRegVec u
 
 
 classOfVirtualReg :: VirtualReg -> RegClass
@@ -122,8 +116,6 @@ classOfVirtualReg vr
         VirtualRegHi{}  -> RcInteger
         VirtualRegF{}   -> RcFloat
         VirtualRegD{}   -> RcDouble
-        -- Below is an awful, largely x86-specific hack
-        VirtualRegVec{} -> RcDouble
 
 
 
diff --git a/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs b/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs
index 23d7c6b421..5d4fd418c3 100644
--- a/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs
@@ -195,6 +195,7 @@ trivColorable platform virtualRegSqueeze realRegSqueeze RcDouble conflicts exclu
 
 
 
+
 -- Specification Code ----------------------------------------------------------
 --
 --      The trivColorable function for each particular architecture should
diff --git a/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs
index d452edfdc6..5a4f1c65a8 100644
--- a/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs
@@ -2,7 +2,6 @@
 
 module RegAlloc.Linear.FreeRegs (
     FR(..),
-    allFreeRegs,
     maxSpillSlots
 )
 
@@ -70,10 +69,6 @@ instance FR SPARC.FreeRegs where
     frInitFreeRegs = SPARC.initFreeRegs
     frReleaseReg   = SPARC.releaseReg
 
--- | For debugging output.
-allFreeRegs :: FR freeRegs => Platform -> freeRegs -> [RealReg]
-allFreeRegs plat fr = foldMap (\rcls -> frGetFreeRegs plat rcls fr) allRegClasses
-
 maxSpillSlots :: DynFlags -> Int
 maxSpillSlots dflags
               = case platformArch (targetPlatform dflags) of
diff --git a/compiler/nativeGen/RegAlloc/Linear/Main.hs b/compiler/nativeGen/RegAlloc/Linear/Main.hs
index b29712e0e0..cdaf738d68 100644
--- a/compiler/nativeGen/RegAlloc/Linear/Main.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/Main.hs
@@ -884,10 +884,8 @@ allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc
                         $ vcat
                                 [ text "allocating vreg:  " <> text (show r)
                                 , text "assignment:       " <> ppr assig
-                                , text "freeRegs:         " <> text (showRegs freeRegs)
-                                , text "initFreeRegs:     " <> text (showRegs (frInitFreeRegs platform `asTypeOf` freeRegs))
-                                ]
-                        where showRegs = show . map (\reg -> (reg, targetClassOfRealReg platform reg)) . allFreeRegs platform
+                                , text "freeRegs:         " <> text (show freeRegs)
+                                , text "initFreeRegs:     " <> text (show (frInitFreeRegs platform `asTypeOf` freeRegs)) ]
 
                 result
 
diff --git a/compiler/nativeGen/RegClass.hs b/compiler/nativeGen/RegClass.hs
index d73a3409ac..fbbb786817 100644
--- a/compiler/nativeGen/RegClass.hs
+++ b/compiler/nativeGen/RegClass.hs
@@ -1,14 +1,15 @@
 -- | An architecture independent description of a register's class.
 module RegClass
-        ( RegClass(..)
-        , allRegClasses
-        ) where
+        ( RegClass (..) )
+
+where
 
 import GhcPrelude
 
 import  Outputable
 import  Unique
 
+
 -- | The class of a register.
 --      Used in the register allocator.
 --      We treat all registers in a class as being interchangable.
@@ -17,11 +18,7 @@ data RegClass
         = RcInteger
         | RcFloat
         | RcDouble
-        deriving (Eq, Show)
-
-allRegClasses :: [RegClass]
-allRegClasses =
-    [ RcInteger, RcFloat, RcDouble ]
+        deriving Eq
 
 
 instance Uniquable RegClass where
@@ -30,6 +27,6 @@ instance Uniquable RegClass where
     getUnique RcDouble  = mkRegClassUnique 2
 
 instance Outputable RegClass where
-    ppr RcInteger         = Outputable.text "I"
-    ppr RcFloat           = Outputable.text "F"
-    ppr RcDouble          = Outputable.text "D"
+    ppr RcInteger       = Outputable.text "I"
+    ppr RcFloat         = Outputable.text "F"
+    ppr RcDouble        = Outputable.text "D"
diff --git a/compiler/nativeGen/SPARC/Ppr.hs b/compiler/nativeGen/SPARC/Ppr.hs
index aa355f97cb..fc67f77541 100644
--- a/compiler/nativeGen/SPARC/Ppr.hs
+++ b/compiler/nativeGen/SPARC/Ppr.hs
@@ -45,6 +45,7 @@ import CLabel
 import Hoopl.Label
 import Hoopl.Collections
 
+import Unique           ( pprUniqueAlways )
 import Outputable
 import GHC.Platform
 import FastString
@@ -147,7 +148,12 @@ pprReg :: Reg -> SDoc
 pprReg reg
  = case reg of
         RegVirtual vr
-         -> ppr vr
+         -> case vr of
+                VirtualRegI   u -> text "%vI_"   <> pprUniqueAlways u
+                VirtualRegHi  u -> text "%vHi_"  <> pprUniqueAlways u
+                VirtualRegF   u -> text "%vF_"   <> pprUniqueAlways u
+                VirtualRegD   u -> text "%vD_"   <> pprUniqueAlways u
+
 
         RegReal rr
          -> case rr of
@@ -215,8 +221,7 @@ pprFormat x
         II32    -> sLit ""
         II64    -> sLit "d"
         FF32    -> sLit ""
-        FF64    -> sLit "d"
-        VecFormat _ _ _ -> panic "SPARC.Ppr.pprFormat: VecFormat")
+        FF64    -> sLit "d")
 
 
 -- | Pretty print a format for an instruction suffix.
@@ -230,8 +235,7 @@ pprStFormat x
         II32  -> sLit ""
         II64  -> sLit "x"
         FF32  -> sLit ""
-        FF64  -> sLit "d"
-        VecFormat _ _ _ -> panic "SPARC.Ppr.pprFormat: VecFormat")
+        FF64  -> sLit "d")
 
 
 
diff --git a/compiler/nativeGen/SPARC/Regs.hs b/compiler/nativeGen/SPARC/Regs.hs
index e46dbd0d38..0d7edc346a 100644
--- a/compiler/nativeGen/SPARC/Regs.hs
+++ b/compiler/nativeGen/SPARC/Regs.hs
@@ -104,6 +104,7 @@ virtualRegSqueeze cls vr
                 VirtualRegD{}           -> 1
                 _other                  -> 0
 
+
 {-# INLINE realRegSqueeze #-}
 realRegSqueeze :: RegClass -> RealReg -> Int
 
@@ -133,6 +134,7 @@ realRegSqueeze cls rr
 
                 RealRegPair{}           -> 1
 
+
 -- | All the allocatable registers in the machine,
 --      including register pairs.
 allRealRegs :: [RealReg]
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index ed3684e074..13662f6807 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -111,25 +111,12 @@ sse2Enabled = do
     ArchX86    -> return True
     _          -> panic "trying to generate x86/x86_64 on the wrong platform"
 
-sse4_1Enabled :: NatM Bool
-sse4_1Enabled = do
-  dflags <- getDynFlags
-  return (isSse4_1Enabled dflags)
 
 sse4_2Enabled :: NatM Bool
 sse4_2Enabled = do
   dflags <- getDynFlags
   return (isSse4_2Enabled dflags)
 
-sseEnabled :: NatM Bool
-sseEnabled = do
-  dflags <- getDynFlags
-  return (isSseEnabled dflags)
-
-avxEnabled :: NatM Bool
-avxEnabled = do
-  dflags <- getDynFlags
-  return (isAvxEnabled dflags)
 
 cmmTopCodeGen
         :: RawCmmDecl
@@ -228,7 +215,6 @@ stmtToInstrs bid stmt = do
     CmmAssign reg src
       | isFloatType ty         -> assignReg_FltCode format reg src
       | is32Bit && isWord64 ty -> assignReg_I64Code      reg src
-      | isVecType ty           -> assignReg_VecCode format reg src
       | otherwise              -> assignReg_IntCode format reg src
         where ty = cmmRegType dflags reg
               format = cmmTypeFormat ty
@@ -236,7 +222,6 @@ stmtToInstrs bid stmt = do
     CmmStore addr src
       | isFloatType ty         -> assignMem_FltCode format addr src
       | is32Bit && isWord64 ty -> assignMem_I64Code      addr src
-      | isVecType ty           -> assignMem_VecCode format addr src
       | otherwise              -> assignMem_IntCode format addr src
         where ty = cmmExprType dflags src
               format = cmmTypeFormat ty
@@ -323,15 +308,6 @@ getRegisterReg platform  (CmmGlobal mid)
         -- platform.  Hence ...
 
 
-getVecRegisterReg :: Platform -> Bool -> Format -> CmmReg -> Reg
-getVecRegisterReg _ use_avx format (CmmLocal (LocalReg u pk))
-  | isVecType pk && use_avx = RegVirtual (mkVirtualReg u format)
-  | otherwise               = pprPanic
-                              (unlines ["avx flag is not enabled" ,
-                                        "or this is not a vector register"])
-                              (ppr pk)
-getVecRegisterReg platform _use_avx _format c = getRegisterReg platform c
-
 -- | Memory addressing modes passed up the tree.
 data Amode
         = Amode AddrMode InstrBlock
@@ -527,13 +503,6 @@ iselExpr64 expr
 
 
 --------------------------------------------------------------------------------
-
--- This is a helper data type which helps reduce the code duplication for
--- the code generation of arithmetic operations. This is not specifically
--- targetted for any particular type like Int8, Int32 etc
-data VectorArithInstns = VA_Add | VA_Sub | VA_Mul | VA_Div
-
-
 getRegister :: CmmExpr -> NatM Register
 getRegister e = do dflags <- getDynFlags
                    is32Bit <- is32BitPlatform
@@ -551,24 +520,16 @@ getRegister' dflags is32Bit (CmmReg reg)
             do reg' <- getPicBaseNat (archWordFormat is32Bit)
                return (Fixed (archWordFormat is32Bit) reg' nilOL)
         _ ->
-            do use_sse2 <- sse2Enabled
-               use_avx <- avxEnabled
-               let cmmregtype = cmmRegType dflags reg
-               if isVecType cmmregtype
-                 then return (vectorRegister cmmregtype use_avx use_sse2)
-                 else return (standardRegister cmmregtype)
-  where
-    vectorRegister :: CmmType -> Bool -> Bool -> Register
-    vectorRegister reg_ty use_avx use_sse2
-      | use_avx || use_sse2 =
-        let vecfmt   = cmmTypeFormat reg_ty
-            platform = targetPlatform dflags
-        in (Fixed vecfmt (getVecRegisterReg platform True vecfmt reg) nilOL)
-      | otherwise = panic "Please enable the -mavx or -msse2 flag"
-
-    standardRegister crt =
-      let platform = targetPlatform dflags
-       in (Fixed (cmmTypeFormat crt) (getRegisterReg platform reg) nilOL)
+            do
+               let
+                 fmt = cmmTypeFormat (cmmRegType dflags reg)
+                 format  = fmt
+               --
+               let platform = targetPlatform dflags
+               return (Fixed format
+                             (getRegisterReg platform  reg)
+                             nilOL)
+
 
 getRegister' dflags is32Bit (CmmRegOff r n)
   = getRegister' dflags is32Bit $ mangleIndexTree dflags r n
@@ -670,69 +631,7 @@ getRegister' _ is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
       return $ Any II64 (\dst -> unitOL $
         LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
 
-getRegister' _ _ (CmmMachOp mop [x, y, z]) = do -- ternary MachOps
-  sse4_1 <- sse4_1Enabled
-  sse2   <- sse2Enabled
-  sse    <- sseEnabled
-  case mop of
-    MO_VF_Insert l W32  | sse4_1 && sse -> vector_float_pack l W32 x y z
-                        | otherwise
-                          -> sorry "Please enable the -msse4 and -msse flag"
-    MO_VF_Insert l W64  | sse2   && sse -> vector_float_pack l W64 x y z
-                        | otherwise
-                          -> sorry "Please enable the -msse2 and -msse flag"
-    _other                              -> incorrectOperands
-    where
-    vector_float_pack :: Length
-                      -> Width
-                      -> CmmExpr
-                      -> CmmExpr
-                      -> CmmExpr
-                      -> NatM Register
-    vector_float_pack len W32 expr1 expr2 (CmmLit offset)
-      = do
-      fn          <- getAnyReg expr1
-      (r, exp)    <- getSomeReg expr2
-      let f        = VecFormat len FmtFloat W32
-          imm      = litToImm offset
-          code dst = exp `appOL`
-                     (fn dst) `snocOL`
-                     (INSERTPS f (OpImm imm) (OpReg r) dst)
-       in return $ Any f code
-    vector_float_pack len W64 expr1 expr2 (CmmLit offset)
-      = do
-      Amode addr addr_code <- getAmode expr2
-      (r, exp) <- getSomeReg expr1
-
-      -- fn <- getAnyReg expr1
-      -- (r, exp) <- getSomeReg expr2
-      let f = VecFormat len FmtDouble W64
-          code dst
-            = case offset of
-                CmmInt 0  _ -> exp `appOL` addr_code `snocOL`
-                               (MOVL f (OpAddr addr) (OpReg r)) `snocOL`
-                               (MOVU f (OpReg r) (OpReg dst))
-                CmmInt 16 _ -> exp `appOL` addr_code `snocOL`
-                               (MOVH f (OpAddr addr) (OpReg r)) `snocOL`
-                               (MOVU f (OpReg r) (OpReg dst))
-                _ -> panic "Error in offset while packing"
-          -- code dst
-          --   = case offset of
-          --       CmmInt 0  _ -> exp `appOL`
-          --                      (fn dst) `snocOL`
-          --                      (MOVL f (OpReg r) (OpReg dst))
-          --       CmmInt 16 _ -> exp `appOL`
-          --                      (fn dst) `snocOL`
-          --                      (MOVH f (OpReg r) (OpReg dst))
-          --       _ -> panic "Error in offset while packing"
-       in return $ Any f code
-    vector_float_pack _ _ _ c _
-      = pprPanic "Pack not supported for : " (ppr c)
-
 getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
-    sse2   <- sse2Enabled
-    sse    <- sseEnabled
-    avx    <- avxEnabled
     case mop of
       MO_F_Neg w  -> sse2NegCode w x
 
@@ -809,28 +708,23 @@ getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
       MO_FS_Conv from to -> coerceFP2Int from to x
       MO_SF_Conv from to -> coerceInt2FP from to x
 
-      MO_V_Insert {}      -> needLlvm
-      MO_V_Extract {}     -> needLlvm
-      MO_V_Add {}         -> needLlvm
-      MO_V_Sub {}         -> needLlvm
-      MO_V_Mul {}         -> needLlvm
-      MO_VS_Quot {}       -> needLlvm
-      MO_VS_Rem {}        -> needLlvm
-      MO_VS_Neg {}        -> needLlvm
-      MO_VU_Quot {}       -> needLlvm
-      MO_VU_Rem {}        -> needLlvm
-      MO_VF_Broadcast {}  -> incorrectOperands
-      MO_VF_Insert {}     -> incorrectOperands
-      MO_VF_Extract {}    -> incorrectOperands
-      MO_VF_Add {}        -> incorrectOperands
-      MO_VF_Sub {}        -> incorrectOperands
-      MO_VF_Mul {}        -> incorrectOperands
-      MO_VF_Quot {}       -> incorrectOperands
-
-      MO_VF_Neg l w  | avx           -> vector_float_negate_avx l w x
-                     | sse && sse2   -> vector_float_negate_sse l w x
-                     | otherwise
-                       -> sorry "Please enable the -mavx or -msse, -msse2 flag"
+      MO_V_Insert {}   -> needLlvm
+      MO_V_Extract {}  -> needLlvm
+      MO_V_Add {}      -> needLlvm
+      MO_V_Sub {}      -> needLlvm
+      MO_V_Mul {}      -> needLlvm
+      MO_VS_Quot {}    -> needLlvm
+      MO_VS_Rem {}     -> needLlvm
+      MO_VS_Neg {}     -> needLlvm
+      MO_VU_Quot {}    -> needLlvm
+      MO_VU_Rem {}     -> needLlvm
+      MO_VF_Insert {}  -> needLlvm
+      MO_VF_Extract {} -> needLlvm
+      MO_VF_Add {}     -> needLlvm
+      MO_VF_Sub {}     -> needLlvm
+      MO_VF_Mul {}     -> needLlvm
+      MO_VF_Quot {}    -> needLlvm
+      MO_VF_Neg {}     -> needLlvm
 
       _other -> pprPanic "getRegister" (pprMachOp mop)
    where
@@ -868,45 +762,8 @@ getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
             = do e_code <- getRegister' dflags is32Bit expr
                  return (swizzleRegisterRep e_code new_format)
 
-        vector_float_negate_avx :: Length -> Width -> CmmExpr -> NatM Register
-        vector_float_negate_avx l w expr = do
-          tmp                  <- getNewRegNat (VecFormat l FmtFloat w)
-          (reg, exp)           <- getSomeReg expr
-          Amode addr addr_code <- memConstant (mkAlignment $ widthInBytes W32) (CmmFloat 0.0 W32)
-          let format   = case w of
-                           W32 -> VecFormat l FmtFloat w
-                           W64 -> VecFormat l FmtDouble w
-                           _ -> pprPanic "Cannot negate vector of width" (ppr w)
-              code dst = case w of
-                           W32 -> exp `appOL` addr_code `snocOL`
-                                  (VBROADCAST format addr tmp) `snocOL`
-                                  (VSUB format (OpReg reg) tmp dst)
-                           W64 -> exp `appOL` addr_code `snocOL`
-                                  (MOVL format (OpAddr addr) (OpReg tmp)) `snocOL`
-                                  (MOVH format (OpAddr addr) (OpReg tmp)) `snocOL`
-                                  (VSUB format (OpReg reg) tmp dst)
-                           _ -> pprPanic "Cannot negate vector of width" (ppr w)
-          return (Any format code)
-
-        vector_float_negate_sse :: Length -> Width -> CmmExpr -> NatM Register
-        vector_float_negate_sse l w expr = do
-          tmp                  <- getNewRegNat (VecFormat l FmtFloat w)
-          (reg, exp)           <- getSomeReg expr
-          let format   = case w of
-                           W32 -> VecFormat l FmtFloat w
-                           W64 -> VecFormat l FmtDouble w
-                           _ -> pprPanic "Cannot negate vector of width" (ppr w)
-              code dst = exp `snocOL`
-                         (XOR format (OpReg tmp) (OpReg tmp)) `snocOL`
-                         (MOVU format (OpReg tmp) (OpReg dst)) `snocOL`
-                         (SUB format (OpReg reg) (OpReg dst))
-          return (Any format code)
 
 getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
-  sse4_1 <- sse4_1Enabled
-  sse2   <- sse2Enabled
-  sse    <- sseEnabled
-  avx    <- avxEnabled
   case mop of
       MO_F_Eq _ -> condFltReg is32Bit EQQ x y
       MO_F_Ne _ -> condFltReg is32Bit NE  x y
@@ -971,49 +828,13 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
       MO_VS_Quot {}    -> needLlvm
       MO_VS_Rem {}     -> needLlvm
       MO_VS_Neg {}     -> needLlvm
-
-      MO_VF_Broadcast l W32 | avx       -> vector_float_broadcast_avx l W32 x y
-                            | sse4_1    -> vector_float_broadcast_sse l W32 x y
-                            | otherwise
-                              -> sorry "Please enable the -mavx or -msse4 flag"
-
-      MO_VF_Broadcast l W64 | sse2      -> vector_float_broadcast_avx l W64 x y
-                            | otherwise -> sorry "Please enable the -msse2 flag"
-
-      MO_VF_Extract l W32   | avx       -> vector_float_unpack l W32 x y
-                            | sse       -> vector_float_unpack_sse l W32 x y
-                            | otherwise
-                              -> sorry "Please enable the -mavx or -msse flag"
-
-      MO_VF_Extract l W64   | sse2      -> vector_float_unpack l W64 x y
-                            | otherwise -> sorry "Please enable the -msse2 flag"
-
-      MO_VF_Add l w         | avx              -> vector_float_op_avx VA_Add l w x y
-                            | sse  && w == W32 -> vector_float_op_sse VA_Add l w x y
-                            | sse2 && w == W64 -> vector_float_op_sse VA_Add l w x y
-                            | otherwise
-                              -> sorry "Please enable the -mavx or -msse flag"
-
-      MO_VF_Sub l w         | avx              -> vector_float_op_avx VA_Sub l w x y
-                            | sse  && w == W32 -> vector_float_op_sse VA_Sub l w x y
-                            | sse2 && w == W64 -> vector_float_op_sse VA_Sub l w x y
-                            | otherwise
-                              -> sorry "Please enable the -mavx or -msse flag"
-
-      MO_VF_Mul l w         | avx              -> vector_float_op_avx VA_Mul l w x y
-                            | sse  && w == W32 -> vector_float_op_sse VA_Mul l w x y
-                            | sse2 && w == W64 -> vector_float_op_sse VA_Mul l w x y
-                            | otherwise
-                              -> sorry "Please enable the -mavx or -msse flag"
-
-      MO_VF_Quot l w        | avx              -> vector_float_op_avx VA_Div l w x y
-                            | sse  && w == W32 -> vector_float_op_sse VA_Div l w x y
-                            | sse2 && w == W64 -> vector_float_op_sse VA_Div l w x y
-                            | otherwise
-                              -> sorry "Please enable the -mavx or -msse flag"
-
-      MO_VF_Insert {}                  -> incorrectOperands
-      MO_VF_Neg {}                     -> incorrectOperands
+      MO_VF_Insert {}  -> needLlvm
+      MO_VF_Extract {} -> needLlvm
+      MO_VF_Add {}     -> needLlvm
+      MO_VF_Sub {}     -> needLlvm
+      MO_VF_Mul {}     -> needLlvm
+      MO_VF_Quot {}    -> needLlvm
+      MO_VF_Neg {}     -> needLlvm
 
       _other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
   where
@@ -1109,171 +930,7 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
     -- TODO: There are other interesting patterns we want to replace
     --     with a LEA, e.g. `(x + offset) + (y << shift)`.
 
-    -----------------------
-    -- Vector operations---
-    vector_float_op_avx :: VectorArithInstns
-                        -> Length
-                        -> Width
-                        -> CmmExpr
-                        -> CmmExpr
-                        -> NatM Register
-    vector_float_op_avx op l w expr1 expr2 = do
-      (reg1, exp1) <- getSomeReg expr1
-      (reg2, exp2) <- getSomeReg expr2
-      let format   = case w of
-                       W32 -> VecFormat l FmtFloat  W32
-                       W64 -> VecFormat l FmtDouble W64
-                       _ -> pprPanic "Operation not supported for width " (ppr w)
-          code dst = case op of
-            VA_Add -> arithInstr VADD
-            VA_Sub -> arithInstr VSUB
-            VA_Mul -> arithInstr VMUL
-            VA_Div -> arithInstr VDIV
-            where
-              -- opcode src2 src1 dst <==> dst = src1 `opcode` src2
-              arithInstr instr = exp1 `appOL` exp2 `snocOL`
-                                 (instr format (OpReg reg2) reg1 dst)
-      return (Any format code)
-
-    vector_float_op_sse :: VectorArithInstns
-                        -> Length
-                        -> Width
-                        -> CmmExpr
-                        -> CmmExpr
-                        -> NatM Register
-    vector_float_op_sse op l w expr1 expr2 = do
-      (reg1, exp1) <- getSomeReg expr1
-      (reg2, exp2) <- getSomeReg expr2
-      let format   = case w of
-                       W32 -> VecFormat l FmtFloat  W32
-                       W64 -> VecFormat l FmtDouble W64
-                       _ -> pprPanic "Operation not supported for width " (ppr w)
-          code dst = case op of
-            VA_Add -> arithInstr ADD
-            VA_Sub -> arithInstr SUB
-            VA_Mul -> arithInstr MUL
-            VA_Div -> arithInstr FDIV
-            where
-              -- opcode src2 src1 <==> src1 = src1 `opcode` src2
-              arithInstr instr
-                = exp1 `appOL` exp2 `snocOL`
-                  (MOVU format (OpReg reg1) (OpReg dst)) `snocOL`
-                  (instr format (OpReg reg2) (OpReg dst))
-      return (Any format code)
     --------------------
-    vector_float_unpack :: Length
-                        -> Width
-                        -> CmmExpr
-                        -> CmmExpr
-                        -> NatM Register
-    vector_float_unpack l W32 expr (CmmLit lit)
-      = do
-      (r, exp) <- getSomeReg expr
-      let format   = VecFormat l FmtFloat W32
-          imm      = litToImm lit
-          code dst
-            = case lit of
-                CmmInt 0 _ -> exp `snocOL` (VMOVU format (OpReg r) (OpReg dst))
-                CmmInt _ _ -> exp `snocOL` (VPSHUFD format (OpImm imm) (OpReg r) dst)
-                _          -> panic "Error in offset while unpacking"
-      return (Any format code)
-    vector_float_unpack l W64 expr (CmmLit lit)
-      = do
-      dflags <- getDynFlags
-      (r, exp) <- getSomeReg expr
-      let format   = VecFormat l FmtDouble W64
-          addr     = spRel dflags 0
-          code dst
-            = case lit of
-                CmmInt 0 _ -> exp `snocOL`
-                              (MOVL format (OpReg r) (OpAddr addr)) `snocOL`
-                              (MOV FF64 (OpAddr addr) (OpReg dst))
-                CmmInt 1 _ -> exp `snocOL`
-                              (MOVH format (OpReg r) (OpAddr addr)) `snocOL`
-                              (MOV FF64 (OpAddr addr) (OpReg dst))
-                _          -> panic "Error in offset while unpacking"
-      return (Any format code)
-    vector_float_unpack _ w c e
-      = pprPanic "Unpack not supported for : " (ppr c $$ ppr e $$ ppr w)
-    -----------------------
-
-    vector_float_unpack_sse :: Length
-                            -> Width
-                            -> CmmExpr
-                            -> CmmExpr
-                            -> NatM Register
-    vector_float_unpack_sse l W32 expr (CmmLit lit)
-      = do
-      (r,exp) <- getSomeReg expr
-      let format   = VecFormat l FmtFloat W32
-          imm      = litToImm lit
-          code dst
-            = case lit of
-                CmmInt 0 _ -> exp `snocOL` (MOVU format (OpReg r) (OpReg dst))
-                CmmInt _ _ -> exp `snocOL` (PSHUFD format (OpImm imm) (OpReg r) dst)
-                _          -> panic "Error in offset while unpacking"
-      return (Any format code)
-    vector_float_unpack_sse _ w c e
-      = pprPanic "Unpack not supported for : " (ppr c $$ ppr e $$ ppr w)
-    -----------------------
-    vector_float_broadcast_avx :: Length
-                           -> Width
-                           -> CmmExpr
-                           -> CmmExpr
-                           -> NatM Register
-    vector_float_broadcast_avx len W32 expr1 expr2
-      = do
-      dflags    <- getDynFlags
-      fn        <- getAnyReg expr1
-      (r', exp) <- getSomeReg expr2
-      let f    = VecFormat len FmtFloat W32
-          addr = spRel dflags 0
-       in return $ Any f (\r -> exp    `appOL`
-                                (fn r) `snocOL`
-                                (MOVU f (OpReg r') (OpAddr addr)) `snocOL`
-                                (VBROADCAST f addr r))
-    vector_float_broadcast_avx len W64 expr1 expr2
-      = do
-      dflags    <- getDynFlags
-      fn        <- getAnyReg  expr1
-      (r', exp) <- getSomeReg expr2
-      let f    = VecFormat len FmtDouble W64
-          addr = spRel dflags 0
-       in return $ Any f (\r -> exp    `appOL`
-                                (fn r) `snocOL`
-                                (MOVU f (OpReg r') (OpAddr addr)) `snocOL`
-                                (MOVL f (OpAddr addr) (OpReg r)) `snocOL`
-                                (MOVH f (OpAddr addr) (OpReg r)))
-    vector_float_broadcast_avx _ _ c _
-      = pprPanic "Broadcast not supported for : " (ppr c)
-    -----------------------
-    vector_float_broadcast_sse :: Length
-                               -> Width
-                               -> CmmExpr
-                               -> CmmExpr
-                               -> NatM Register
-    vector_float_broadcast_sse len W32 expr1 expr2
-      = do
-      dflags   <- getDynFlags
-      fn       <- getAnyReg  expr1  -- destination
-      (r, exp) <- getSomeReg expr2  -- source
-      let f        = VecFormat len FmtFloat W32
-          addr     = spRel dflags 0
-          code dst = exp `appOL`
-                     (fn dst) `snocOL`
-                     (MOVU f (OpReg r) (OpAddr addr)) `snocOL`
-                     (insertps 0) `snocOL`
-                     (insertps 16) `snocOL`
-                     (insertps 32) `snocOL`
-                     (insertps 48)
-            where
-              insertps off =
-                INSERTPS f (OpImm $ litToImm $ CmmInt off W32) (OpAddr addr) dst
-
-       in return $ Any f code
-    vector_float_broadcast_sse _ _ c _
-      = pprPanic "Broadcast not supported for : " (ppr c)
-    -----------------------
     sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
     sub_code rep x (CmmLit (CmmInt y _))
         | is32BitInteger (-y) = add_int rep x (-y)
@@ -1326,21 +983,6 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
 
            return (Fixed format result code)
 
-getRegister' _ _ (CmmLoad mem pk)
-  | isVecType pk = do
-      use_avx <- avxEnabled
-      use_sse <- sseEnabled
-      Amode addr mem_code <- getAmode mem
-      let format = cmmTypeFormat pk
-          code dst
-            | use_avx = mem_code `snocOL`
-                        VMOVU format (OpAddr addr) (OpReg dst)
-            | use_sse = mem_code `snocOL`
-                        MOVU format (OpAddr addr) (OpReg dst)
-            | otherwise = pprPanic (unlines ["avx or sse flag not enabled",
-                                            "for loading to "])
-                          (ppr pk)
-      return (Any format code)
 
 getRegister' _ _ (CmmLoad mem pk)
   | isFloatType pk
@@ -1407,24 +1049,10 @@ getRegister' dflags is32Bit (CmmLit lit)
         -- small memory model (see gcc docs, -mcmodel=small).
 
 getRegister' dflags _ (CmmLit lit)
-  | isVecType cmmtype = vectorRegister cmmtype
-  | otherwise         = standardRegister cmmtype
-  where
-    cmmtype = cmmLitType dflags lit
-    vectorRegister ctype
-      = do
-      --NOTE:
-      -- This operation is only used to zero a register. For loading a
-      -- vector literal there are pack and broadcast operations
-      let format = cmmTypeFormat ctype
-          code dst = unitOL (XOR format (OpReg dst) (OpReg dst))
-      return (Any format code)
-    standardRegister ctype
-      = do
-      let format = cmmTypeFormat ctype
-          imm = litToImm lit
-          code dst = unitOL (MOV format (OpImm imm) (OpReg dst))
-      return (Any format code)
+  = do let format = cmmTypeFormat (cmmLitType dflags lit)
+           imm = litToImm lit
+           code dst = unitOL (MOV format (OpImm imm) (OpReg dst))
+       return (Any format code)
 
 getRegister' _ _ other
     | isVecExpr other  = needLlvm
@@ -1490,14 +1118,8 @@ getNonClobberedReg expr = do
                 return (reg, code)
 
 reg2reg :: Format -> Reg -> Reg -> Instr
-reg2reg format@(VecFormat _ FmtFloat W32) src dst
-  = VMOVU format (OpReg src) (OpReg dst)
-reg2reg format@(VecFormat _ FmtDouble W64) src dst
-  = VMOVU format (OpReg src) (OpReg dst)
-reg2reg (VecFormat _ _ _) _ _
-  = panic "MOV operation not implemented for vectors"
-reg2reg format src dst
-  = MOV format (OpReg src) (OpReg dst)
+reg2reg format src dst = MOV format (OpReg src) (OpReg dst)
+
 
 --------------------------------------------------------------------------------
 getAmode :: CmmExpr -> NatM Amode
@@ -1559,9 +1181,6 @@ getAmode' _ (CmmMachOp (MO_Add _)
 getAmode' _ (CmmMachOp (MO_Add _) [x,y])
   = x86_complex_amode x y 0 0
 
-getAmode' _ (CmmLit lit@(CmmFloat _ w))
-  = memConstant (mkAlignment $ widthInBytes w) lit
-
 getAmode' is32Bit (CmmLit lit) | is32BitLit is32Bit lit
   = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
 
@@ -1942,8 +1561,7 @@ assignReg_IntCode :: Format -> CmmReg  -> CmmExpr -> NatM InstrBlock
 assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
 assignReg_FltCode :: Format -> CmmReg  -> CmmExpr -> NatM InstrBlock
 
-assignMem_VecCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
-assignReg_VecCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
+
 -- integer assignment to memory
 
 -- specific case of adding/subtracting an integer to a particular address.
@@ -2020,29 +1638,6 @@ assignReg_FltCode _ reg src = do
   let platform = targetPlatform dflags
   return (src_code (getRegisterReg platform  reg))
 
-assignMem_VecCode pk addr src = do
-  (src_reg, src_code) <- getNonClobberedReg src
-  Amode addr addr_code <- getAmode addr
-  use_avx <- avxEnabled
-  use_sse <- sseEnabled
-  let
-        code | use_avx   = src_code `appOL`
-                           addr_code `snocOL`
-                           (VMOVU pk (OpReg src_reg) (OpAddr addr))
-             | use_sse   = src_code `appOL`
-                           addr_code `snocOL`
-                           (MOVU pk (OpReg src_reg) (OpAddr addr))
-             | otherwise = sorry "Please enable the -mavx or -msse flag"
-  return code
-
-assignReg_VecCode format reg src = do
-  use_avx <- avxEnabled
-  use_sse <- sseEnabled
-  src_code <- getAnyReg src
-  dflags <- getDynFlags
-  let platform = targetPlatform dflags
-      flag     = use_avx || use_sse
-  return (src_code (getVecRegisterReg platform flag format reg))
 
 genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock
 
@@ -3767,7 +3362,6 @@ sse2NegCode w x = do
       x@II16 -> wrongFmt x
       x@II32 -> wrongFmt x
       x@II64 -> wrongFmt x
-      x@VecFormat {} -> wrongFmt x
 
       where
         wrongFmt x = panic $ "sse2NegCode: " ++ show x
@@ -3782,33 +3376,29 @@ sse2NegCode w x = do
   return (Any fmt code)
 
 isVecExpr :: CmmExpr -> Bool
-isVecExpr (CmmMachOp (MO_V_Insert {}) _)     = True
-isVecExpr (CmmMachOp (MO_V_Extract {}) _)    = True
-isVecExpr (CmmMachOp (MO_V_Add {}) _)        = True
-isVecExpr (CmmMachOp (MO_V_Sub {}) _)        = True
-isVecExpr (CmmMachOp (MO_V_Mul {}) _)        = True
-isVecExpr (CmmMachOp (MO_VS_Quot {}) _)      = True
-isVecExpr (CmmMachOp (MO_VS_Rem {}) _)       = True
-isVecExpr (CmmMachOp (MO_VS_Neg {}) _)       = True
-isVecExpr (CmmMachOp (MO_VF_Broadcast {}) _) = True
-isVecExpr (CmmMachOp (MO_VF_Insert {}) _)    = True
-isVecExpr (CmmMachOp (MO_VF_Extract {}) _)   = True
-isVecExpr (CmmMachOp (MO_VF_Add {}) _)       = True
-isVecExpr (CmmMachOp (MO_VF_Sub {}) _)       = True
-isVecExpr (CmmMachOp (MO_VF_Mul {}) _)       = True
-isVecExpr (CmmMachOp (MO_VF_Quot {}) _)      = True
-isVecExpr (CmmMachOp (MO_VF_Neg {}) _)       = True
-isVecExpr (CmmMachOp _ [e])                  = isVecExpr e
-isVecExpr _                                  = False
+isVecExpr (CmmMachOp (MO_V_Insert {}) _)   = True
+isVecExpr (CmmMachOp (MO_V_Extract {}) _)  = True
+isVecExpr (CmmMachOp (MO_V_Add {}) _)      = True
+isVecExpr (CmmMachOp (MO_V_Sub {}) _)      = True
+isVecExpr (CmmMachOp (MO_V_Mul {}) _)      = True
+isVecExpr (CmmMachOp (MO_VS_Quot {}) _)    = True
+isVecExpr (CmmMachOp (MO_VS_Rem {}) _)     = True
+isVecExpr (CmmMachOp (MO_VS_Neg {}) _)     = True
+isVecExpr (CmmMachOp (MO_VF_Insert {}) _)  = True
+isVecExpr (CmmMachOp (MO_VF_Extract {}) _) = True
+isVecExpr (CmmMachOp (MO_VF_Add {}) _)     = True
+isVecExpr (CmmMachOp (MO_VF_Sub {}) _)     = True
+isVecExpr (CmmMachOp (MO_VF_Mul {}) _)     = True
+isVecExpr (CmmMachOp (MO_VF_Quot {}) _)    = True
+isVecExpr (CmmMachOp (MO_VF_Neg {}) _)     = True
+isVecExpr (CmmMachOp _ [e])                = isVecExpr e
+isVecExpr _                                = False
 
 needLlvm :: NatM a
 needLlvm =
     sorry $ unlines ["The native code generator does not support vector"
                     ,"instructions. Please use -fllvm."]
 
-incorrectOperands :: NatM a
-incorrectOperands = sorry "Incorrect number of operands"
-
 -- | This works on the invariant that all jumps in the given blocks are required.
 --   Starting from there we try to make a few more jumps redundant by reordering
 --   them.
diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs
index 47b62e62e7..6e5d656beb 100644
--- a/compiler/nativeGen/X86/Instr.hs
+++ b/compiler/nativeGen/X86/Instr.hs
@@ -328,36 +328,6 @@ data Instr
         | CMPXCHG     Format Operand Operand -- src (r), dst (r/m), eax implicit
         | MFENCE
 
-        -- Vector Instructions --
-        -- NOTE: Instructions follow the AT&T syntax
-        -- Constructors and deconstructors
-        | VBROADCAST  Format AddrMode Reg
-        | VEXTRACT    Format Operand Reg Operand
-        | INSERTPS    Format Operand Operand Reg
-
-        -- move operations
-        | VMOVU       Format Operand Operand
-        | MOVU        Format Operand Operand
-        | MOVL        Format Operand Operand
-        | MOVH        Format Operand Operand
-
-        -- logic operations
-        | VPXOR       Format Reg Reg Reg
-
-        -- Arithmetic
-        | VADD       Format Operand Reg Reg
-        | VSUB       Format Operand Reg Reg
-        | VMUL       Format Operand Reg Reg
-        | VDIV       Format Operand Reg Reg
-
-        -- Shuffle
-        | VPSHUFD    Format Operand Operand Reg
-        | PSHUFD     Format Operand Operand Reg
-
-        -- Shift
-        | PSLLDQ     Format Operand Reg
-        | PSRLDQ     Format Operand Reg
-
 data PrefetchVariant = NTA | Lvl0 | Lvl1 | Lvl2
 
 
@@ -460,31 +430,6 @@ x86_regUsageOfInstr platform instr
     CMPXCHG _ src dst   -> usageRMM src dst (OpReg eax)
     MFENCE -> noUsage
 
-    -- vector instructions
-    VBROADCAST _ src dst   -> mkRU (use_EA src []) [dst]
-    VEXTRACT     _ off src dst -> mkRU ((use_R off []) ++ [src]) (use_R dst [])
-    INSERTPS     _ off src dst
-      -> mkRU ((use_R off []) ++ (use_R src []) ++ [dst]) [dst]
-
-    VMOVU        _ src dst   -> mkRU (use_R src []) (use_R dst [])
-    MOVU         _ src dst   -> mkRU (use_R src []) (use_R dst [])
-    MOVL         _ src dst   -> mkRU (use_R src []) (use_R dst [])
-    MOVH         _ src dst   -> mkRU (use_R src []) (use_R dst [])
-    VPXOR        _ s1 s2 dst -> mkRU [s1,s2] [dst]
-
-    VADD         _ s1 s2 dst -> mkRU ((use_R s1 []) ++ [s2]) [dst]
-    VSUB         _ s1 s2 dst -> mkRU ((use_R s1 []) ++ [s2]) [dst]
-    VMUL         _ s1 s2 dst -> mkRU ((use_R s1 []) ++ [s2]) [dst]
-    VDIV         _ s1 s2 dst -> mkRU ((use_R s1 []) ++ [s2]) [dst]
-
-    VPSHUFD      _ off src dst
-      -> mkRU (concatMap (\op -> use_R op []) [off, src]) [dst]
-    PSHUFD       _ off src dst
-      -> mkRU (concatMap (\op -> use_R op []) [off, src]) [dst]
-
-    PSLLDQ       _ off dst -> mkRU (use_R off []) [dst]
-    PSRLDQ       _ off dst -> mkRU (use_R off []) [dst]
-
     _other              -> panic "regUsage: unrecognised instr"
  where
     -- # Definitions
@@ -643,32 +588,6 @@ x86_patchRegsOfInstr instr env
     CMPXCHG fmt src dst  -> patch2 (CMPXCHG fmt) src dst
     MFENCE               -> instr
 
-    -- vector instructions
-    VBROADCAST   fmt src dst   -> VBROADCAST fmt (lookupAddr src) (env dst)
-    VEXTRACT     fmt off src dst
-      -> VEXTRACT fmt (patchOp off) (env src) (patchOp dst)
-    INSERTPS    fmt off src dst
-      -> INSERTPS fmt (patchOp off) (patchOp src) (env dst)
-
-    VMOVU      fmt src dst   -> VMOVU fmt (patchOp src) (patchOp dst)
-    MOVU       fmt src dst   -> MOVU  fmt (patchOp src) (patchOp dst)
-    MOVL       fmt src dst   -> MOVL  fmt (patchOp src) (patchOp dst)
-    MOVH       fmt src dst   -> MOVH  fmt (patchOp src) (patchOp dst)
-    VPXOR      fmt s1 s2 dst -> VPXOR fmt (env s1) (env s2) (env dst)
-
-    VADD       fmt s1 s2 dst -> VADD fmt (patchOp s1) (env s2) (env dst)
-    VSUB       fmt s1 s2 dst -> VSUB fmt (patchOp s1) (env s2) (env dst)
-    VMUL       fmt s1 s2 dst -> VMUL fmt (patchOp s1) (env s2) (env dst)
-    VDIV       fmt s1 s2 dst -> VDIV fmt (patchOp s1) (env s2) (env dst)
-
-    VPSHUFD      fmt off src dst
-      -> VPSHUFD fmt (patchOp off) (patchOp src) (env dst)
-    PSHUFD       fmt off src dst
-      -> PSHUFD  fmt (patchOp off) (patchOp src) (env dst)
-    PSLLDQ       fmt off dst
-      -> PSLLDQ  fmt (patchOp off) (env dst)
-    PSRLDQ       fmt off dst
-      -> PSRLDQ  fmt (patchOp off) (env dst)
     _other              -> panic "patchRegs: unrecognised instr"
 
   where
diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs
index a3f27ba471..095d9eba7c 100644
--- a/compiler/nativeGen/X86/Ppr.hs
+++ b/compiler/nativeGen/X86/Ppr.hs
@@ -41,6 +41,7 @@ import DynFlags
 import Cmm              hiding (topInfoTable)
 import BlockId
 import CLabel
+import Unique           ( pprUniqueAlways )
 import GHC.Platform
 import FastString
 import Outputable
@@ -279,7 +280,10 @@ pprReg f r
           if target32Bit platform then ppr32_reg_no f i
                                   else ppr64_reg_no f i
       RegReal    (RealRegPair _ _) -> panic "X86.Ppr: no reg pairs on this arch"
-      RegVirtual v                 -> ppr v
+      RegVirtual (VirtualRegI  u)  -> text "%vI_"   <> pprUniqueAlways u
+      RegVirtual (VirtualRegHi u)  -> text "%vHi_"  <> pprUniqueAlways u
+      RegVirtual (VirtualRegF  u)  -> text "%vF_"   <> pprUniqueAlways u
+      RegVirtual (VirtualRegD  u)  -> text "%vD_"   <> pprUniqueAlways u
 
   where
     ppr32_reg_no :: Format -> Int -> SDoc
@@ -391,11 +395,6 @@ pprFormat x
                 II64  -> sLit "q"
                 FF32  -> sLit "ss"      -- "scalar single-precision float" (SSE2)
                 FF64  -> sLit "sd"      -- "scalar double-precision float" (SSE2)
-
-                VecFormat _ FmtFloat W32  -> sLit "ps"
-                VecFormat _ FmtDouble W64 -> sLit "pd"
-                -- TODO: Add Ints and remove panic
-                VecFormat {} -> panic "Incorrect width"
                 )
 
 pprFormat_x87 :: Format -> SDoc
@@ -784,41 +783,6 @@ pprInstr (IDIV fmt op)   = pprFormatOp (sLit "idiv") fmt op
 pprInstr (DIV fmt op)    = pprFormatOp (sLit "div")  fmt op
 pprInstr (IMUL2 fmt op)  = pprFormatOp (sLit "imul") fmt op
 
--- Vector Instructions
-
-pprInstr (VADD format s1 s2 dst)
-  = pprFormatOpRegReg (sLit "vadd") format s1 s2 dst
-pprInstr (VSUB format s1 s2 dst)
-  = pprFormatOpRegReg (sLit "vsub") format s1 s2 dst
-pprInstr (VMUL format s1 s2 dst)
-  = pprFormatOpRegReg (sLit "vmul") format s1 s2 dst
-pprInstr (VDIV format s1 s2 dst)
-  = pprFormatOpRegReg (sLit "vdiv") format s1 s2 dst
-pprInstr (VBROADCAST format from to)
-  = pprBroadcast (sLit "vbroadcast") format from to
-pprInstr (VMOVU format from to)
-  = pprFormatOpOp (sLit "vmovu") format from to
-pprInstr (MOVU format from to)
-  = pprFormatOpOp (sLit "movu") format from to
-pprInstr (MOVL format from to)
-  = pprFormatOpOp (sLit "movl") format from to
-pprInstr (MOVH format from to)
-  = pprFormatOpOp (sLit "movh") format from to
-pprInstr (VPXOR format s1 s2 dst)
-  = pprXor (sLit "vpxor") format s1 s2 dst
-pprInstr (VEXTRACT format offset from to)
-  = pprFormatOpRegOp (sLit "vextract") format offset from to
-pprInstr (INSERTPS format offset addr dst)
-  = pprInsert (sLit "insertps") format offset addr dst
-pprInstr (VPSHUFD format offset src dst)
-  = pprShuf (sLit "vpshufd") format offset src dst
-pprInstr (PSHUFD format offset src dst)
-  = pprShuf (sLit "pshufd") format offset src dst
-pprInstr (PSLLDQ format offset dst)
-  = pprShiftLeft (sLit "pslldq") format offset dst
-pprInstr (PSRLDQ format offset dst)
-  = pprShiftRight (sLit "psrldq") format offset dst
-
 -- x86_64 only
 pprInstr (MUL format op1 op2) = pprFormatOpOp (sLit "mul") format op1 op2
 pprInstr (MUL2 format op) = pprFormatOp (sLit "mul") format op
@@ -911,23 +875,6 @@ pprMnemonic  :: PtrString -> Format -> SDoc
 pprMnemonic name format =
    char '\t' <> ptext name <> pprFormat format <> space
 
-pprGenMnemonic  :: PtrString -> Format -> SDoc
-pprGenMnemonic name _ =
-   char '\t' <> ptext name <> ptext (sLit "") <> space
-
-pprBroadcastMnemonic  :: PtrString -> Format -> SDoc
-pprBroadcastMnemonic name format =
-   char '\t' <> ptext name <> pprBroadcastFormat format <> space
-
-pprBroadcastFormat :: Format -> SDoc
-pprBroadcastFormat x
-  = ptext (case x of
-             VecFormat _ FmtFloat W32  -> sLit "ss"
-             VecFormat _ FmtDouble W64 -> sLit "sd"
-             -- TODO: Add Ints and remove panic
-             VecFormat {} -> panic "Incorrect width"
-             _ -> panic "Scalar Format invading vector operation"
-          )
 
 pprFormatImmOp :: PtrString -> Format -> Imm -> Operand -> SDoc
 pprFormatImmOp name format imm op1
@@ -974,16 +921,7 @@ pprOpOp name format op1 op2
         pprOperand format op2
     ]
 
-pprFormatOpRegOp :: PtrString -> Format -> Operand -> Reg -> Operand -> SDoc
-pprFormatOpRegOp name format off reg1 op2
-  = hcat [
-        pprMnemonic name format,
-        pprOperand format off,
-        comma,
-        pprReg format reg1,
-        comma,
-        pprOperand format op2
-    ]
+
 
 pprRegReg :: PtrString -> Reg -> Reg -> SDoc
 pprRegReg name reg1 reg2
@@ -1006,17 +944,6 @@ pprFormatOpReg name format op1 reg2
         pprReg (archWordFormat (target32Bit platform)) reg2
     ]
 
-pprFormatOpRegReg :: PtrString -> Format -> Operand -> Reg -> Reg -> SDoc
-pprFormatOpRegReg name format op1 reg2 reg3
-  = hcat [
-        pprMnemonic name format,
-        pprOperand format op1,
-        comma,
-        pprReg format reg2,
-        comma,
-        pprReg format reg3
-    ]
-
 pprCondOpReg :: PtrString -> Format -> Cond -> Operand -> Reg -> SDoc
 pprCondOpReg name format cond op1 reg2
   = hcat [
@@ -1081,68 +1008,3 @@ pprFormatOpOpCoerce name format1 format2 op1 op2
 pprCondInstr :: PtrString -> Cond -> SDoc -> SDoc
 pprCondInstr name cond arg
   = hcat [ char '\t', ptext name, pprCond cond, space, arg]
-
-
--- Custom pretty printers
--- These instructions currently don't follow a uniform suffix pattern
--- in their names, so we have custom pretty printers for them.
-
-pprBroadcast :: PtrString -> Format -> AddrMode -> Reg -> SDoc
-pprBroadcast name format op dst
-  = hcat [
-        pprBroadcastMnemonic name format,
-        pprAddr op,
-        comma,
-        pprReg format dst
-    ]
-
-pprXor :: PtrString -> Format -> Reg -> Reg -> Reg -> SDoc
-pprXor name format reg1 reg2 reg3
-  = hcat [
-        pprGenMnemonic name format,
-        pprReg format reg1,
-        comma,
-        pprReg format reg2,
-        comma,
-        pprReg format reg3
-    ]
-
-pprInsert :: PtrString -> Format -> Operand -> Operand -> Reg -> SDoc
-pprInsert name format off src dst
-  = hcat [
-        pprGenMnemonic name format,
-        pprOperand format off,
-        comma,
-        pprOperand format src,
-        comma,
-        pprReg format dst
-    ]
-
-pprShuf :: PtrString -> Format -> Operand -> Operand -> Reg -> SDoc
-pprShuf name format op1 op2 reg3
-  = hcat [
-        pprGenMnemonic name format,
-        pprOperand format op1,
-        comma,
-        pprOperand format op2,
-        comma,
-        pprReg format reg3
-    ]
-
-pprShiftLeft :: PtrString -> Format -> Operand -> Reg -> SDoc
-pprShiftLeft name format off reg
-  = hcat [
-        pprGenMnemonic name format,
-        pprOperand format off,
-        comma,
-        pprReg format reg
-    ]
-
-pprShiftRight :: PtrString -> Format -> Operand -> Reg -> SDoc
-pprShiftRight name format off reg
-  = hcat [
-        pprGenMnemonic name format,
-        pprOperand format off,
-        comma,
-        pprReg format reg
-    ]
diff --git a/compiler/nativeGen/X86/RegInfo.hs b/compiler/nativeGen/X86/RegInfo.hs
index a7784bacad..19056be4fa 100644
--- a/compiler/nativeGen/X86/RegInfo.hs
+++ b/compiler/nativeGen/X86/RegInfo.hs
@@ -22,8 +22,6 @@ import UniqFM
 import X86.Regs
 
 
---TODO:
--- Add VirtualRegAVX and inspect VecFormat and allocate
 mkVirtualReg :: Unique -> Format -> VirtualReg
 mkVirtualReg u format
    = case format of
@@ -33,7 +31,6 @@ mkVirtualReg u format
         -- For now we map both to being allocated as "Double" Registers
         -- on X86/X86_64
         FF64    -> VirtualRegD u
-        VecFormat {} -> VirtualRegVec u
         _other  -> VirtualRegI u
 
 regDotColor :: Platform -> RealReg -> SDoc
diff --git a/compiler/nativeGen/X86/Regs.hs b/compiler/nativeGen/X86/Regs.hs
index f0e4c7d5f6..2d9fd88c8e 100644
--- a/compiler/nativeGen/X86/Regs.hs
+++ b/compiler/nativeGen/X86/Regs.hs
@@ -84,7 +84,6 @@ virtualRegSqueeze cls vr
          -> case vr of
                 VirtualRegD{}           -> 1
                 VirtualRegF{}           -> 0
-                VirtualRegVec{}         -> 1
                 _other                  -> 0
 
 
diff --git a/includes/CodeGen.Platform.hs b/includes/CodeGen.Platform.hs
index 34c943d053..27a9324438 100644
--- a/includes/CodeGen.Platform.hs
+++ b/includes/CodeGen.Platform.hs
@@ -495,13 +495,13 @@ activeStgRegs = [
     ,DoubleReg 1
 #endif
 #if defined(REG_XMM1)
-    ,XmmReg 1 2 W64 Integer
+    ,XmmReg 1
 #endif
 #if defined(REG_YMM1)
-    ,YmmReg 1 4 W64 Integer
+    ,YmmReg 1
 #endif
 #if defined(REG_ZMM1)
-    ,ZmmReg 1 8 W64 Integer
+    ,ZmmReg 1
 #endif
 #if defined(REG_F2)
     ,FloatReg 2
@@ -510,13 +510,13 @@ activeStgRegs = [
     ,DoubleReg 2
 #endif
 #if defined(REG_XMM2)
-    ,XmmReg 2 2 W64 Integer
+    ,XmmReg 2
 #endif
 #if defined(REG_YMM2)
-    ,YmmReg 2 4 W64 Integer
+    ,YmmReg 2
 #endif
 #if defined(REG_ZMM2)
-    ,ZmmReg 2 8 W64 Integer
+    ,ZmmReg 2
 #endif
 #if defined(REG_F3)
     ,FloatReg 3
@@ -525,13 +525,13 @@ activeStgRegs = [
     ,DoubleReg 3
 #endif
 #if defined(REG_XMM3)
-    ,XmmReg 3 2 W64 Integer
+    ,XmmReg 3
 #endif
 #if defined(REG_YMM3)
-    ,YmmReg 3 4 W64 Integer
+    ,YmmReg 3
 #endif
 #if defined(REG_ZMM3)
-    ,ZmmReg 3 8 W64 Integer
+    ,ZmmReg 3
 #endif
 #if defined(REG_F4)
     ,FloatReg 4
@@ -540,13 +540,13 @@ activeStgRegs = [
     ,DoubleReg 4
 #endif
 #if defined(REG_XMM4)
-    ,XmmReg 4 2 W64 Integer
+    ,XmmReg 4
 #endif
 #if defined(REG_YMM4)
-    ,YmmReg 4 4 W64 Integer
+    ,YmmReg 4
 #endif
 #if defined(REG_ZMM4)
-    ,ZmmReg 4 8 W64 Integer
+    ,ZmmReg 4
 #endif
 #if defined(REG_F5)
     ,FloatReg 5
@@ -555,13 +555,13 @@ activeStgRegs = [
     ,DoubleReg 5
 #endif
 #if defined(REG_XMM5)
-    ,XmmReg 5 2 W64 Integer
+    ,XmmReg 5
 #endif
 #if defined(REG_YMM5)
-    ,YmmReg 5 4 W64 Integer
+    ,YmmReg 5
 #endif
 #if defined(REG_ZMM5)
-    ,ZmmReg 5 8 W64 Integer
+    ,ZmmReg 5
 #endif
 #if defined(REG_F6)
     ,FloatReg 6
@@ -570,13 +570,13 @@ activeStgRegs = [
     ,DoubleReg 6
 #endif
 #if defined(REG_XMM6)
-    ,XmmReg 6 2 W64 Integer
+    ,XmmReg 6
 #endif
 #if defined(REG_YMM6)
-    ,YmmReg 6 4 W64 Integer
+    ,YmmReg 6
 #endif
 #if defined(REG_ZMM6)
-    ,ZmmReg 6 8 W64 Integer
+    ,ZmmReg 6
 #endif
 #else /* MAX_REAL_XMM_REG == 0 */
 #if defined(REG_F1)
@@ -733,62 +733,62 @@ globalRegMaybe (DoubleReg 6)            =
 # endif
 # if MAX_REAL_XMM_REG != 0
 #  if defined(REG_XMM1)
-globalRegMaybe (XmmReg 1 _ _ _)         = Just (RealRegSingle REG_XMM1)
+globalRegMaybe (XmmReg 1)               = Just (RealRegSingle REG_XMM1)
 #  endif
 #  if defined(REG_XMM2)
-globalRegMaybe (XmmReg 2 _ _ _)         = Just (RealRegSingle REG_XMM2)
+globalRegMaybe (XmmReg 2)               = Just (RealRegSingle REG_XMM2)
 #  endif
 #  if defined(REG_XMM3)
-globalRegMaybe (XmmReg 3 _ _ _)         = Just (RealRegSingle REG_XMM3)
+globalRegMaybe (XmmReg 3)               = Just (RealRegSingle REG_XMM3)
 #  endif
 #  if defined(REG_XMM4)
-globalRegMaybe (XmmReg 4 _ _ _)         = Just (RealRegSingle REG_XMM4)
+globalRegMaybe (XmmReg 4)               = Just (RealRegSingle REG_XMM4)
 #  endif
 #  if defined(REG_XMM5)
-globalRegMaybe (XmmReg 5 _ _ _)         = Just (RealRegSingle REG_XMM5)
+globalRegMaybe (XmmReg 5)               = Just (RealRegSingle REG_XMM5)
 #  endif
 #  if defined(REG_XMM6)
-globalRegMaybe (XmmReg 6 _ _ _)         = Just (RealRegSingle REG_XMM6)
+globalRegMaybe (XmmReg 6)               = Just (RealRegSingle REG_XMM6)
 #  endif
 # endif
 # if defined(MAX_REAL_YMM_REG) && MAX_REAL_YMM_REG != 0
 #  if defined(REG_YMM1)
-globalRegMaybe (YmmReg 1 _ _ _)         = Just (RealRegSingle REG_YMM1)
+globalRegMaybe (YmmReg 1)               = Just (RealRegSingle REG_YMM1)
 #  endif
 #  if defined(REG_YMM2)
-globalRegMaybe (YmmReg 2 _ _ _)         = Just (RealRegSingle REG_YMM2)
+globalRegMaybe (YmmReg 2)               = Just (RealRegSingle REG_YMM2)
 #  endif
 #  if defined(REG_YMM3)
-globalRegMaybe (YmmReg 3 _ _ _)         = Just (RealRegSingle REG_YMM3)
+globalRegMaybe (YmmReg 3)               = Just (RealRegSingle REG_YMM3)
 #  endif
 #  if defined(REG_YMM4)
-globalRegMaybe (YmmReg 4 _ _ _)         = Just (RealRegSingle REG_YMM4)
+globalRegMaybe (YmmReg 4)               = Just (RealRegSingle REG_YMM4)
 #  endif
 #  if defined(REG_YMM5)
-globalRegMaybe (YmmReg 5 _ _ _)         = Just (RealRegSingle REG_YMM5)
+globalRegMaybe (YmmReg 5)               = Just (RealRegSingle REG_YMM5)
 #  endif
 #  if defined(REG_YMM6)
-globalRegMaybe (YmmReg 6 _ _ _)         = Just (RealRegSingle REG_YMM6)
+globalRegMaybe (YmmReg 6)               = Just (RealRegSingle REG_YMM6)
 #  endif
 # endif
 # if defined(MAX_REAL_ZMM_REG) && MAX_REAL_ZMM_REG != 0
 #  if defined(REG_ZMM1)
-globalRegMaybe (ZmmReg 1 _ _ _)         = Just (RealRegSingle REG_ZMM1)
+globalRegMaybe (ZmmReg 1)               = Just (RealRegSingle REG_ZMM1)
 #  endif
 #  if defined(REG_ZMM2)
-globalRegMaybe (ZmmReg 2 _ _ _)         = Just (RealRegSingle REG_ZMM2)
+globalRegMaybe (ZmmReg 2)               = Just (RealRegSingle REG_ZMM2)
 #  endif
 #  if defined(REG_ZMM3)
-globalRegMaybe (ZmmReg 3 _ _ _)         = Just (RealRegSingle REG_ZMM3)
+globalRegMaybe (ZmmReg 3)               = Just (RealRegSingle REG_ZMM3)
 #  endif
 #  if defined(REG_ZMM4)
-globalRegMaybe (ZmmReg 4 _ _ _)         = Just (RealRegSingle REG_ZMM4)
+globalRegMaybe (ZmmReg 4)               = Just (RealRegSingle REG_ZMM4)
 #  endif
 #  if defined(REG_ZMM5)
-globalRegMaybe (ZmmReg 5 _ _ _)         = Just (RealRegSingle REG_ZMM5)
+globalRegMaybe (ZmmReg 5)               = Just (RealRegSingle REG_ZMM5)
 #  endif
 #  if defined(REG_ZMM6)
-globalRegMaybe (ZmmReg 6 _ _ _)         = Just (RealRegSingle REG_ZMM6)
+globalRegMaybe (ZmmReg 6)               = Just (RealRegSingle REG_ZMM6)
 #  endif
 # endif
 # if defined(REG_Sp)
diff --git a/testsuite/tests/codeGen/should_run/all.T b/testsuite/tests/codeGen/should_run/all.T
index eb6fee544f..20ac9cc59e 100644
--- a/testsuite/tests/codeGen/should_run/all.T
+++ b/testsuite/tests/codeGen/should_run/all.T
@@ -80,20 +80,11 @@ test('cgrun069',
 test('cgrun070', normal, compile_and_run, [''])
 test('cgrun071', [when(have_cpu_feature('sse4_2'), extra_hc_opts('-msse4.2'))], compile_and_run, [''])
 test('cgrun072', normal, compile_and_run, [''])
-test('cgrun074', normal, compile_and_run, [''])
 test('cgrun075', normal, compile_and_run, [''])
 test('cgrun076', normal, compile_and_run, [''])
 test('cgrun077', [when(have_cpu_feature('bmi2'), extra_hc_opts('-mbmi2'))], compile_and_run, [''])
 test('cgrun078', omit_ways(['ghci']), compile_and_run, [''])
 
-# N.B. Only NCG and LLVM backends support SIMD operations
-test('simd000', when(unregisterised(), skip), compile_and_run, [''])
-test('simd001', when(unregisterised(), skip), compile_and_run, [''])
-test('simd002', when(unregisterised(), skip), compile_and_run, [''])
-test('simd003', when(unregisterised(), skip), compile_and_run, [''])
-test('simd004', when(unregisterised(), skip), compile_and_run, [''])
-test('simd005', when(unregisterised(), skip), compile_and_run, [''])
-
 test('T1852', normal, compile_and_run, [''])
 test('T1861', extra_run_opts('0'), compile_and_run, [''])
 test('T2080', normal, compile_and_run, [''])
@@ -152,6 +143,7 @@ test('T9001', normal, compile_and_run, [''])
 test('T9013', omit_ways(['ghci']),  # ghci doesn't support unboxed tuples
      compile_and_run, [''])
 test('T9340', normal, compile_and_run, [''])
+test('cgrun074', normal, compile_and_run, [''])
 test('CmmSwitchTest32', unless(wordsize(32), skip), compile_and_run, [''])
 test('CmmSwitchTest64', unless(wordsize(64), skip), compile_and_run, [''])
 # Skipping WAY=ghci, because it is not broken.
diff --git a/testsuite/tests/codeGen/should_run/cgrun083.hs b/testsuite/tests/codeGen/should_run/cgrun083.hs
deleted file mode 100644
index cac889ec02..0000000000
--- a/testsuite/tests/codeGen/should_run/cgrun083.hs
+++ /dev/null
@@ -1,70 +0,0 @@
-{-# OPTIONS_GHC -O2 #-}
-{-# OPTIONS_GHC -msse #-}
-{-# OPTIONS_GHC -msse2 #-}
-{-# OPTIONS_GHC -msse4 #-}
-{-# LANGUAGE MagicHash #-}
-{-# LANGUAGE UnboxedTuples #-}
--- tests for SSE based vector load/stores operations
-
-import GHC.Exts
-import GHC.IO
-
-data ByteArray = BA (MutableByteArray# RealWorld)
-
-data FloatX4  = FX4# FloatX4#
-
-instance Show FloatX4 where
-  show (FX4# f) = case (unpackFloatX4# f) of
-    (# a, b, c, d #) -> show ((F# a), (F# b), (F# c), (F# d))
-
-
-instance Eq FloatX4 where
-  (FX4# a) == (FX4# b)
-    = case (unpackFloatX4# a) of
-        (# a1, a2, a3, a4 #) ->
-          case (unpackFloatX4# b) of
-            (# b1, b2, b3, b4 #) -> (F# a1) == (F# b1) &&
-                                    (F# a2) == (F# b2) &&
-                                    (F# a3) == (F# b3) &&
-                                    (F# a4) == (F# b4)
-
-data DoubleX2 = DX2# DoubleX2#
-
-instance Show DoubleX2 where
-  show (DX2# d) = case (unpackDoubleX2# d) of
-    (# a, b #) -> show ((D# a), (D# b))
-
-
-instance Eq DoubleX2 where
-  (DX2# a) == (DX2# b)
-    = case (unpackDoubleX2# a) of
-        (# a1, a2 #) ->
-          case (unpackDoubleX2# b) of
-            (# b1, b2 #) -> (D# a1) == (D# b1) &&
-                            (D# a2) == (D# b2)
-
-writeFloatArray :: ByteArray -> Int -> Float -> IO ()
-writeFloatArray (BA ba) (I# i) (F# n) = IO $ \s ->
-    case writeFloatArray# ba i n s of s' -> (# s', () #)
-
-readFloatX4 :: ByteArray -> Int -> IO FloatX4
-readFloatX4 (BA ba) (I# i) = IO $ \s ->
-    case readFloatArrayAsFloatX4# ba i s of (# s', r #) -> (# s', FX4# r #)
-
-writeDoubleArray :: ByteArray -> Int -> Double -> IO ()
-writeDoubleArray (BA ba) (I# i) (D# n) = IO $ \s ->
-    case writeDoubleArray# ba i n s of s' -> (# s', () #)
-
-readDoubleX2 :: ByteArray -> Int -> IO DoubleX2
-readDoubleX2 (BA ba) (I# i) = IO $ \s ->
-    case readDoubleArrayAsDoubleX2# ba i s of (# s', r #) -> (# s', DX2# r #)
-
-main :: IO ()
-main = do
-    ba <- IO $ \s -> case newAlignedPinnedByteArray# 64# 64# s of (# s', ba #) -> (# s', BA ba #)
-
-    mapM_ (\i -> writeFloatArray ba i (realToFrac i + realToFrac i / 10)) [0..16]
-    print =<< readFloatX4 ba 0
-
-    mapM_ (\i -> writeDoubleArray ba i (realToFrac i + realToFrac i / 10)) [0..8]
-    print =<< readDoubleX2 ba 0
diff --git a/testsuite/tests/codeGen/should_run/cgrun083.stdout b/testsuite/tests/codeGen/should_run/cgrun083.stdout
deleted file mode 100644
index bc41b3d2d3..0000000000
--- a/testsuite/tests/codeGen/should_run/cgrun083.stdout
+++ /dev/null
@@ -1,2 +0,0 @@
-(0.0,1.1,2.2,3.3)
-(0.0,1.1)
diff --git a/testsuite/tests/codeGen/should_run/simd000.hs b/testsuite/tests/codeGen/should_run/simd000.hs
deleted file mode 100644
index 47d69497c0..0000000000
--- a/testsuite/tests/codeGen/should_run/simd000.hs
+++ /dev/null
@@ -1,21 +0,0 @@
-{-# OPTIONS_GHC -mavx #-}
-{-# OPTIONS_GHC -msse4 #-}
-{-# LANGUAGE MagicHash #-}
-{-# LANGUAGE UnboxedTuples #-}
--- !!! test broadcasting, packing and unpacking for vector types
-
-import GHC.Exts
-
-main :: IO ()
-main = do
-    -- FloatX4#
-    case unpackFloatX4# (broadcastFloatX4# 1.5#) of
-        (# a, b, c, d #) -> print (F# a, F# b, F# c, F# d)
-    case unpackFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #)) of
-        (# a, b, c, d #) -> print (F# a, F# b, F# c, F# d)
-
-    -- DoubleX2#
-    case unpackDoubleX2# (broadcastDoubleX2# 6.5##) of
-        (# a, b #) -> print (D# a, D# b)
-    case unpackDoubleX2# (packDoubleX2# (# 8.9##,7.2## #)) of
-        (# a, b #) -> print (D# a, D# b)
diff --git a/testsuite/tests/codeGen/should_run/simd000.stdout b/testsuite/tests/codeGen/should_run/simd000.stdout
deleted file mode 100644
index e5f9d383ec..0000000000
--- a/testsuite/tests/codeGen/should_run/simd000.stdout
+++ /dev/null
@@ -1,4 +0,0 @@
-(1.5,1.5,1.5,1.5)
-(4.5,7.8,2.3,6.5)
-(6.5,6.5)
-(8.9,7.2)
diff --git a/testsuite/tests/codeGen/should_run/simd001.hs b/testsuite/tests/codeGen/should_run/simd001.hs
deleted file mode 100644
index c45e3bf922..0000000000
--- a/testsuite/tests/codeGen/should_run/simd001.hs
+++ /dev/null
@@ -1,49 +0,0 @@
-{-# OPTIONS_GHC -mavx #-}
-{-# LANGUAGE MagicHash #-}
-{-# LANGUAGE UnboxedTuples #-}
--- !!! test the lifting of unlifted vector types and
--- defining various typeclass instances for the lifted types
-
-import GHC.Exts
-
-data FloatX4  = FX4# FloatX4#
-
-instance Show FloatX4 where
-  show (FX4# f) = case (unpackFloatX4# f) of
-    (# a, b, c, d #) -> show ((F# a), (F# b), (F# c), (F# d))
-
-
-instance Eq FloatX4 where
-  (FX4# a) == (FX4# b)
-    = case (unpackFloatX4# a) of
-        (# a1, a2, a3, a4 #) ->
-          case (unpackFloatX4# b) of
-            (# b1, b2, b3, b4 #) -> (F# a1) == (F# b1) &&
-                                    (F# a2) == (F# b2) &&
-                                    (F# a3) == (F# b3) &&
-                                    (F# a4) == (F# b4)
-
-data DoubleX2 = DX2# DoubleX2#
-
-instance Show DoubleX2 where
-  show (DX2# d) = case (unpackDoubleX2# d) of
-    (# a, b #) -> show ((D# a), (D# b))
-
-
-instance Eq DoubleX2 where
-  (DX2# a) == (DX2# b)
-    = case (unpackDoubleX2# a) of
-        (# a1, a2 #) ->
-          case (unpackDoubleX2# b) of
-            (# b1, b2 #) -> (D# a1) == (D# b1) &&
-                            (D# a2) == (D# b2)
-
-main :: IO ()
-main = do
-  print (FX4# (broadcastFloatX4# 1.5#))
-  print $ (FX4# (broadcastFloatX4# 1.5#)) == (FX4# (broadcastFloatX4# 2.5#))
-  print $ (FX4# (broadcastFloatX4# 3.5#)) == (FX4# (broadcastFloatX4# 3.5#))
-
-  print (DX2# (broadcastDoubleX2# 2.5##))
-  print $ (DX2# (broadcastDoubleX2# 1.5##)) == (DX2# (broadcastDoubleX2# 2.5##))
-  print $ (DX2# (broadcastDoubleX2# 3.5##)) == (DX2# (broadcastDoubleX2# 3.5##))
diff --git a/testsuite/tests/codeGen/should_run/simd001.stdout b/testsuite/tests/codeGen/should_run/simd001.stdout
deleted file mode 100644
index 899f900506..0000000000
--- a/testsuite/tests/codeGen/should_run/simd001.stdout
+++ /dev/null
@@ -1,6 +0,0 @@
-(1.5,1.5,1.5,1.5)
-False
-True
-(2.5,2.5)
-False
-True
diff --git a/testsuite/tests/codeGen/should_run/simd002.hs b/testsuite/tests/codeGen/should_run/simd002.hs
deleted file mode 100644
index 8c61546381..0000000000
--- a/testsuite/tests/codeGen/should_run/simd002.hs
+++ /dev/null
@@ -1,33 +0,0 @@
-{-# OPTIONS_GHC -mavx #-}
-{-# LANGUAGE MagicHash #-}
-{-# LANGUAGE UnboxedTuples #-}
--- !!! test arithmetic vector operations
-
-import GHC.Exts
-
-data FloatX4 = FX4# FloatX4#
-
-instance Show FloatX4 where
-  show (FX4# f) = case (unpackFloatX4# f) of
-    (# a, b, c, d #) -> show ((F# a), (F# b), (F# c), (F# d))
-
-data DoubleX2 = DX2# DoubleX2#
-
-instance Show DoubleX2 where
-  show (DX2# d) = case (unpackDoubleX2# d) of
-    (# a, b #) -> show ((D# a), (D# b))
-
-
-main :: IO ()
-main = do
-  print (FX4# (plusFloatX4# (broadcastFloatX4# 1.3#) (broadcastFloatX4# 2.2#)))
-  print (FX4# (minusFloatX4# (broadcastFloatX4# 3.5#) (broadcastFloatX4# 2.2#)))
-  print (FX4# (timesFloatX4# (broadcastFloatX4# 2.4#) (broadcastFloatX4# 2.2#)))
-  print (FX4# (divideFloatX4# (broadcastFloatX4# 9.2#) (broadcastFloatX4# 4.0#)))
-  print (FX4# (negateFloatX4# (broadcastFloatX4# 3.5#)))
-
-  print (DX2# (plusDoubleX2# (broadcastDoubleX2# 1.3##) (broadcastDoubleX2# 2.2##)))
-  print (DX2# (minusDoubleX2# (broadcastDoubleX2# 3.5##) (broadcastDoubleX2# 2.2##)))
-  print (DX2# (timesDoubleX2# (broadcastDoubleX2# 2.4##) (broadcastDoubleX2# 2.2##)))
-  print (DX2# (divideDoubleX2# (broadcastDoubleX2# 9.2##) (broadcastDoubleX2# 4.0##)))
-  print (DX2# (negateDoubleX2# (broadcastDoubleX2# 3.5##)))
diff --git a/testsuite/tests/codeGen/should_run/simd002.stdout b/testsuite/tests/codeGen/should_run/simd002.stdout
deleted file mode 100644
index 302d71a13f..0000000000
--- a/testsuite/tests/codeGen/should_run/simd002.stdout
+++ /dev/null
@@ -1,10 +0,0 @@
-(3.5,3.5,3.5,3.5)
-(1.3,1.3,1.3,1.3)
-(5.28,5.28,5.28,5.28)
-(2.3,2.3,2.3,2.3)
-(-3.5,-3.5,-3.5,-3.5)
-(3.5,3.5)
-(1.2999999999999998,1.2999999999999998)
-(5.28,5.28)
-(2.3,2.3)
-(-3.5,-3.5)
diff --git a/testsuite/tests/codeGen/should_run/simd003.hs b/testsuite/tests/codeGen/should_run/simd003.hs
deleted file mode 100644
index de3ae5aeb4..0000000000
--- a/testsuite/tests/codeGen/should_run/simd003.hs
+++ /dev/null
@@ -1,25 +0,0 @@
-{-# OPTIONS_GHC -msse4 #-}
-{-# OPTIONS_GHC -mavx #-}
-{-# LANGUAGE MagicHash #-}
-{-# LANGUAGE UnboxedTuples #-}
--- !!! test the packing of floats and doubles into a vector
-
-import GHC.Exts
-
-data FloatX4 = FX4# FloatX4#
-
-instance Show FloatX4 where
-  show (FX4# f) = case (unpackFloatX4# f) of
-    (# a, b, c, d #) -> show ((F# a), (F# b), (F# c), (F# d))
-
-data DoubleX2 = DX2# DoubleX2#
-
-instance Show DoubleX2 where
-  show (DX2# d) = case (unpackDoubleX2# d) of
-    (# a, b #) -> show ((D# a), (D# b))
-
-
-main :: IO ()
-main = do
-  print (FX4# (packFloatX4# (# 9.2#, 8.15#, 7.0#, 6.4# #)))
-  print (DX2# (packDoubleX2# (# 7.2##, 9.3## #)))
diff --git a/testsuite/tests/codeGen/should_run/simd003.stdout b/testsuite/tests/codeGen/should_run/simd003.stdout
deleted file mode 100644
index 230e4658c4..0000000000
--- a/testsuite/tests/codeGen/should_run/simd003.stdout
+++ /dev/null
@@ -1,2 +0,0 @@
-(9.2,8.15,7.0,6.4)
-(7.2,9.3)
diff --git a/testsuite/tests/codeGen/should_run/simd004.hs b/testsuite/tests/codeGen/should_run/simd004.hs
deleted file mode 100644
index 5216822ec4..0000000000
--- a/testsuite/tests/codeGen/should_run/simd004.hs
+++ /dev/null
@@ -1,95 +0,0 @@
-{-# OPTIONS_GHC -O2 #-}
-{-# OPTIONS_GHC -mavx #-}
-{-# OPTIONS_GHC -msse4 #-}
-{-# LANGUAGE MagicHash #-}
-{-# LANGUAGE UnboxedTuples #-}
--- !!! test if enabling -O2 produces wrong results while
---     packing , broadcasting, unpacking vectors and for
---     arithmetic operations as well for avx instructions
-
-import GHC.Exts
-
-data FloatX4  = FX4# FloatX4#
-
-instance Show FloatX4 where
-  show (FX4# f) = case (unpackFloatX4# f) of
-    (# a, b, c, d #) -> show ((F# a), (F# b), (F# c), (F# d))
-
-
-instance Eq FloatX4 where
-  (FX4# a) == (FX4# b)
-    = case (unpackFloatX4# a) of
-        (# a1, a2, a3, a4 #) ->
-          case (unpackFloatX4# b) of
-            (# b1, b2, b3, b4 #) -> (F# a1) == (F# b1) &&
-                                    (F# a2) == (F# b2) &&
-                                    (F# a3) == (F# b3) &&
-                                    (F# a4) == (F# b4)
-
-data DoubleX2 = DX2# DoubleX2#
-
-instance Show DoubleX2 where
-  show (DX2# d) = case (unpackDoubleX2# d) of
-    (# a, b #) -> show ((D# a), (D# b))
-
-
-instance Eq DoubleX2 where
-  (DX2# a) == (DX2# b)
-    = case (unpackDoubleX2# a) of
-        (# a1, a2 #) ->
-          case (unpackDoubleX2# b) of
-            (# b1, b2 #) -> (D# a1) == (D# b1) &&
-                            (D# a2) == (D# b2)
-
-
-main :: IO ()
-main = do
-
-    -- !!! test broadcasting, packing and unpacking for vector types
-    -- FloatX4#
-    case unpackFloatX4# (broadcastFloatX4# 1.5#) of
-        (# a, b, c, d #) -> print (F# a, F# b, F# c, F# d)
-    case unpackFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #)) of
-        (# a, b, c, d #) -> print (F# a, F# b, F# c, F# d)
-
-    -- DoubleX2#
-    case unpackDoubleX2# (broadcastDoubleX2# 6.5##) of
-        (# a, b #) -> print (D# a, D# b)
-    case unpackDoubleX2# (packDoubleX2# (# 8.9##,7.2## #)) of
-        (# a, b #) -> print (D# a, D# b)
-
-
-    -- !!! test the lifting of unlifted vector types and
-    -- defining various typeclass instances for the lifted types
-
-    print (FX4# (broadcastFloatX4# 1.5#))
-    print $ (FX4# (broadcastFloatX4# 1.5#)) == (FX4# (broadcastFloatX4# 2.5#))
-    print $ (FX4# (broadcastFloatX4# 3.5#)) == (FX4# (broadcastFloatX4# 3.5#))
-
-    print (DX2# (broadcastDoubleX2# 2.5##))
-    print $ (DX2#
-             (broadcastDoubleX2# 1.5##)) == (DX2# (broadcastDoubleX2# 2.5##))
-    print $ (DX2#
-             (broadcastDoubleX2# 3.5##)) == (DX2# (broadcastDoubleX2# 3.5##))
-
-
-    -- !!! test arithmetic vector operations
-    print (FX4# (plusFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #))
-                              (packFloatX4# (# 8.2#,6.3#, 4.7#, 9.2# #))))
-    print (FX4# (minusFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #))
-                               (packFloatX4# (# 8.2#,6.3#, 4.7#, 9.2# #))))
-    print (FX4# (timesFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #))
-                               (packFloatX4# (# 8.2#,6.3#, 4.7#, 9.2# #))))
-    print (FX4# (divideFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #))
-                                (packFloatX4# (# 8.2#,6.3#, 4.7#, 9.2# #))))
-    print (FX4# (negateFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #))))
-
-    print (DX2# (plusDoubleX2# (broadcastDoubleX2# 1.3##)
-                               (broadcastDoubleX2# 2.2##)))
-    print (DX2# (minusDoubleX2# (broadcastDoubleX2# 3.5##)
-                                (broadcastDoubleX2# 2.2##)))
-    print (DX2# (timesDoubleX2# (broadcastDoubleX2# 2.4##)
-                                (broadcastDoubleX2# 2.2##)))
-    print (DX2# (divideDoubleX2# (broadcastDoubleX2# 9.2##)
-                                 (broadcastDoubleX2# 4.0##)))
-    print (DX2# (negateDoubleX2# (broadcastDoubleX2# 3.5##)))
diff --git a/testsuite/tests/codeGen/should_run/simd004.stdout b/testsuite/tests/codeGen/should_run/simd004.stdout
deleted file mode 100644
index ee90e738ca..0000000000
--- a/testsuite/tests/codeGen/should_run/simd004.stdout
+++ /dev/null
@@ -1,20 +0,0 @@
-(1.5,1.5,1.5,1.5)
-(4.5,7.8,2.3,6.5)
-(6.5,6.5)
-(8.9,7.2)
-(1.5,1.5,1.5,1.5)
-False
-True
-(2.5,2.5)
-False
-True
-(12.7,14.1,7.0,15.7)
-(-3.6999998,1.5,-2.3999999,-2.6999998)
-(36.899998,49.140003,10.809999,59.8)
-(0.5487805,1.2380953,0.4893617,0.70652175)
-(-4.5,-7.8,-2.3,-6.5)
-(3.5,3.5)
-(1.2999999999999998,1.2999999999999998)
-(5.28,5.28)
-(2.3,2.3)
-(-3.5,-3.5)
-\ No newline at end of file
diff --git a/testsuite/tests/codeGen/should_run/simd005.hs b/testsuite/tests/codeGen/should_run/simd005.hs
deleted file mode 100644
index b074066d24..0000000000
--- a/testsuite/tests/codeGen/should_run/simd005.hs
+++ /dev/null
@@ -1,93 +0,0 @@
-{-# OPTIONS_GHC -O2 #-}
-{-# OPTIONS_GHC -msse #-}
-{-# OPTIONS_GHC -msse2 #-}
-{-# OPTIONS_GHC -msse4 #-}
-{-# LANGUAGE MagicHash #-}
-{-# LANGUAGE UnboxedTuples #-}
--- tests for SSE based vector operations
-
-import GHC.Exts
-
-data FloatX4  = FX4# FloatX4#
-
-instance Show FloatX4 where
-  show (FX4# f) = case (unpackFloatX4# f) of
-    (# a, b, c, d #) -> show ((F# a), (F# b), (F# c), (F# d))
-
-
-instance Eq FloatX4 where
-  (FX4# a) == (FX4# b)
-    = case (unpackFloatX4# a) of
-        (# a1, a2, a3, a4 #) ->
-          case (unpackFloatX4# b) of
-            (# b1, b2, b3, b4 #) -> (F# a1) == (F# b1) &&
-                                    (F# a2) == (F# b2) &&
-                                    (F# a3) == (F# b3) &&
-                                    (F# a4) == (F# b4)
-
-data DoubleX2 = DX2# DoubleX2#
-
-instance Show DoubleX2 where
-  show (DX2# d) = case (unpackDoubleX2# d) of
-    (# a, b #) -> show ((D# a), (D# b))
-
-
-instance Eq DoubleX2 where
-  (DX2# a) == (DX2# b)
-    = case (unpackDoubleX2# a) of
-        (# a1, a2 #) ->
-          case (unpackDoubleX2# b) of
-            (# b1, b2 #) -> (D# a1) == (D# b1) &&
-                            (D# a2) == (D# b2)
-
-main :: IO ()
-main = do
-
-    -- !!! test broadcasting, packing and unpacking for vector types
-    -- FloatX4#
-    case unpackFloatX4# (broadcastFloatX4# 1.5#) of
-        (# a, b, c, d #) -> print (F# a, F# b, F# c, F# d)
-    case unpackFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #)) of
-        (# a, b, c, d #) -> print (F# a, F# b, F# c, F# d)
-
-    -- DoubleX2#
-    case unpackDoubleX2# (broadcastDoubleX2# 6.5##) of
-        (# a, b #) -> print (D# a, D# b)
-    case unpackDoubleX2# (packDoubleX2# (# 8.9##,7.2## #)) of
-        (# a, b #) -> print (D# a, D# b)
-
-
-    -- !!! test the lifting of unlifted vector types and
-    -- defining various typeclass instances for the lifted types
-
-    print (FX4# (broadcastFloatX4# 1.5#))
-    print $ (FX4# (broadcastFloatX4# 1.5#)) == (FX4# (broadcastFloatX4# 2.5#))
-    print $ (FX4# (broadcastFloatX4# 3.5#)) == (FX4# (broadcastFloatX4# 3.5#))
-
-    print (DX2# (broadcastDoubleX2# 2.5##))
-    print $ (DX2#
-             (broadcastDoubleX2# 1.5##)) == (DX2# (broadcastDoubleX2# 2.5##))
-    print $ (DX2#
-             (broadcastDoubleX2# 3.5##)) == (DX2# (broadcastDoubleX2# 3.5##))
-
-
-    -- !!! test arithmetic vector operations
-    print (FX4# (plusFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #))
-                              (packFloatX4# (# 8.2#,6.3#, 4.7#, 9.2# #))))
-    print (FX4# (minusFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #))
-                               (packFloatX4# (# 8.2#,6.3#, 4.7#, 9.2# #))))
-    print (FX4# (timesFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #))
-                               (packFloatX4# (# 8.2#,6.3#, 4.7#, 9.2# #))))
-    print (FX4# (divideFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #))
-                                (packFloatX4# (# 8.2#,6.3#, 4.7#, 9.2# #))))
-    print (FX4# (negateFloatX4# (packFloatX4# (# 4.5#,7.8#, 2.3#, 6.5# #))))
-
-    print (DX2# (plusDoubleX2# (broadcastDoubleX2# 1.3##)
-                               (broadcastDoubleX2# 2.2##)))
-    print (DX2# (minusDoubleX2# (broadcastDoubleX2# 3.5##)
-                                (broadcastDoubleX2# 2.2##)))
-    print (DX2# (timesDoubleX2# (broadcastDoubleX2# 2.4##)
-                                (broadcastDoubleX2# 2.2##)))
-    print (DX2# (divideDoubleX2# (broadcastDoubleX2# 9.2##)
-                                 (broadcastDoubleX2# 4.0##)))
-    print (DX2# (negateDoubleX2# (broadcastDoubleX2# 3.5##)))
diff --git a/testsuite/tests/codeGen/should_run/simd005.stdout b/testsuite/tests/codeGen/should_run/simd005.stdout
deleted file mode 100644
index 84386823f8..0000000000
--- a/testsuite/tests/codeGen/should_run/simd005.stdout
+++ /dev/null
@@ -1,20 +0,0 @@
-(1.5,1.5,1.5,1.5)
-(4.5,7.8,2.3,6.5)
-(6.5,6.5)
-(8.9,7.2)
-(1.5,1.5,1.5,1.5)
-False
-True
-(2.5,2.5)
-False
-True
-(12.7,14.1,7.0,15.7)
-(-3.6999998,1.5,-2.3999999,-2.6999998)
-(36.899998,49.140003,10.809999,59.8)
-(0.5487805,1.2380953,0.4893617,0.70652175)
-(-4.5,-7.8,-2.3,-6.5)
-(3.5,3.5)
-(1.2999999999999998,1.2999999999999998)
-(5.28,5.28)
-(2.3,2.3)
-(-3.5,-3.5)