6 files changed, 71 insertions, 48 deletions
diff --git a/compiler/basicTypes/BasicTypes.hs b/compiler/basicTypes/BasicTypes.hs
index ded9c0d9cf..f2d6f2b46a 100644
--- a/compiler/basicTypes/BasicTypes.hs
+++ b/compiler/basicTypes/BasicTypes.hs
@@ -26,7 +26,7 @@ module BasicTypes(
 
         Arity, RepArity, JoinArity,
 
-        Alignment,
+        Alignment, mkAlignment, alignmentOf, alignmentBytes,
 
         PromotionFlag(..), isPromoted,
         FunctionOrData(..),
@@ -116,6 +116,7 @@ import Outputable
 import SrcLoc ( Located,unLoc )
 import Data.Data hiding (Fixity, Prefix, Infix)
 import Data.Function (on)
+import Data.Bits
 
 {-
 ************************************************************************
@@ -196,8 +197,39 @@ fIRST_TAG =  1
 ************************************************************************
 -}
 
-type Alignment = Int -- align to next N-byte boundary (N must be a power of 2).
-
+-- | A power-of-two alignment
+newtype Alignment = Alignment { alignmentBytes :: Int } deriving (Eq, Ord)
+
+-- Builds an alignment, throws on non power of 2 input. This is not
+-- ideal, but convenient for internal use and better then silently
+-- passing incorrect data.
+mkAlignment :: Int -> Alignment
+mkAlignment n
+  | n == 1 = Alignment 1
+  | n == 2 = Alignment 2
+  | n == 4 = Alignment 4
+  | n == 8 = Alignment 8
+  | n == 16 = Alignment 16
+  | n == 32 = Alignment 32
+  | n == 64 = Alignment 64
+  | n == 128 = Alignment 128
+  | n == 256 = Alignment 256
+  | n == 512 = Alignment 512
+  | otherwise = panic "mkAlignment: received either a non power of 2 argument or > 512"
+
+-- Calculates an alignment of a number. x is aligned at N bytes means
+-- the remainder from x / N is zero. Currently, interested in N <= 8,
+-- but can be expanded to N <= 16 or N <= 32 if used within SSE or AVX
+-- context.
+alignmentOf :: Int -> Alignment
+alignmentOf x = case x .&. 7 of
+  0 -> Alignment 8
+  4 -> Alignment 4
+  2 -> Alignment 2
+  _ -> Alignment 1
+
+instance Outputable Alignment where
+  ppr (Alignment m) = ppr m
 {-
 ************************************************************************
 *                                                                      *
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index 1abef3a90a..63d8f7bc0e 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -2075,16 +2075,15 @@ doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
                  -> FCode ()
 doSetByteArrayOp ba off len c = do
     dflags <- getDynFlags
-    let maxAlign = wORD_SIZE dflags
-        align = minimum [maxAlign, possibleAlign]
 
-    p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
+    let byteArrayAlignment = wordAlignment dflags -- known since BA is allocated on heap
+        offsetAlignment = case off of
+            CmmLit (CmmInt intOff _) -> alignmentOf (fromInteger intOff)
+            _ -> mkAlignment 1
+        align = min byteArrayAlignment offsetAlignment
 
+    p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
     emitMemsetCall p c len align
-  where
-    possibleAlign = case off of
-      CmmLit (CmmInt intOff _) -> fromIntegral $ byteAlignment (fromIntegral intOff)
-      _ -> 1
 
 -- ----------------------------------------------------------------------------
 -- Allocating arrays
@@ -2355,7 +2354,7 @@ emitSetCards dst_start dst_cards_start n = do
     emitMemsetCall (cmmAddWord dflags dst_cards_start start_card)
         (mkIntExpr dflags 1)
         (cmmAddWord dflags (cmmSubWord dflags end_card start_card) (mkIntExpr dflags 1))
-        1 -- no alignment (1 byte)
+        (mkAlignment 1) -- no alignment (1 byte)
 
 -- Convert an element index to a card index
 cardCmm :: DynFlags -> CmmExpr -> CmmExpr
@@ -2481,11 +2480,11 @@ emitMemmoveCall dst src n align = do
 
 -- | Emit a call to @memset@.  The second argument must fit inside an
 -- unsigned char.
-emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
+emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> Alignment -> FCode ()
 emitMemsetCall dst c n align = do
     emitPrimCall
         [ {- no results -} ]
-        (MO_Memset align)
+        (MO_Memset (alignmentBytes align))
         [ dst, c, n ]
 
 emitMemcmpCall :: LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs
index 66a5335db6..68c0735f71 100644
--- a/compiler/main/DynFlags.hs
+++ b/compiler/main/DynFlags.hs
@@ -147,6 +147,7 @@ module DynFlags (
 #include "GHCConstantsHaskellExports.hs"
         bLOCK_SIZE_W,
         wORD_SIZE_IN_BITS,
+        wordAlignment,
         tAG_MASK,
         mAX_PTR_TAG,
         tARGET_MIN_INT, tARGET_MAX_INT, tARGET_MAX_WORD,
@@ -205,7 +206,7 @@ import Maybes
 import MonadUtils
 import qualified Pretty
 import SrcLoc
-import BasicTypes       ( IntWithInf, treatZeroAsInf )
+import BasicTypes       ( Alignment, alignmentOf, IntWithInf, treatZeroAsInf )
 import FastString
 import Fingerprint
 import Outputable
@@ -5661,6 +5662,9 @@ bLOCK_SIZE_W dflags = bLOCK_SIZE dflags `quot` wORD_SIZE dflags
 wORD_SIZE_IN_BITS :: DynFlags -> Int
 wORD_SIZE_IN_BITS dflags = wORD_SIZE dflags * 8
 
+wordAlignment :: DynFlags -> Alignment
+wordAlignment dflags = alignmentOf (wORD_SIZE dflags)
+
 tAG_MASK :: DynFlags -> Int
 tAG_MASK dflags = (1 `shiftL` tAG_BITS dflags) - 1
 
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index 06ebd2adb5..61686186f1 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -128,7 +128,7 @@ cmmTopCodeGen (CmmProc info lab live graph) = do
       Nothing -> return tops
 
 cmmTopCodeGen (CmmData sec dat) = do
-  return [CmmData sec (1, dat)]  -- no translation, we just use CmmStatic
+  return [CmmData sec (mkAlignment 1, dat)]  -- no translation, we just use CmmStatic
 
 
 basicBlockCodeGen
@@ -569,7 +569,7 @@ getRegister' _ _ (CmmLit lit@(CmmFloat f w)) =
       return (Any format code)
 
    | otherwise = do
-      Amode addr code <- memConstant (widthInBytes w) lit
+      Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit
       loadFloatAmode True w addr code
 
   float_const_x87 = case w of
@@ -583,7 +583,7 @@ getRegister' _ _ (CmmLit lit@(CmmFloat f w)) =
         in  return (Any FF80 code)
 
     _otherwise -> do
-      Amode addr code <- memConstant (widthInBytes w) lit
+      Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit
       loadFloatAmode False w addr code
 
 -- catch simple cases of zero- or sign-extended load
@@ -1247,7 +1247,7 @@ getNonClobberedOperand (CmmLit lit) = do
   if use_sse2 && isSuitableFloatingPointLit lit
     then do
       let CmmFloat _ w = lit
-      Amode addr code <- memConstant (widthInBytes w) lit
+      Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit
       return (OpAddr addr, code)
      else do
 
@@ -1303,7 +1303,7 @@ getOperand (CmmLit lit) = do
   if (use_sse2 && isSuitableFloatingPointLit lit)
     then do
       let CmmFloat _ w = lit
-      Amode addr code <- memConstant (widthInBytes w) lit
+      Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit
       return (OpAddr addr, code)
     else do
 
@@ -1351,7 +1351,7 @@ addAlignmentCheck align reg =
              , JXX_GBL NE $ ImmCLbl mkBadAlignmentLabel
              ]
 
-memConstant :: Int -> CmmLit -> NatM Amode
+memConstant :: Alignment -> CmmLit -> NatM Amode
 memConstant align lit = do
   lbl <- getNewLabelNat
   let rosection = Section ReadOnlyData lbl
@@ -1843,7 +1843,7 @@ genCCall dflags is32Bit (PrimTarget (MO_Memcpy align)) _
         dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone
                    (ImmInteger (n - i))
 
-genCCall dflags is32Bit (PrimTarget (MO_Memset align)) _
+genCCall dflags _ (PrimTarget (MO_Memset align)) _
          [dst,
           CmmLit (CmmInt c _),
           CmmLit (CmmInt n _)]
@@ -1861,11 +1861,9 @@ genCCall dflags is32Bit (PrimTarget (MO_Memset align)) _
           return $ code_dst dst_r `appOL`
                    go4 dst_r (fromInteger n)
   where
-    format = case byteAlignment (fromIntegral align) of
-        8  -> if is32Bit then II32 else II64
-        4  -> II32
-        2 -> II16
-        _ -> II8
+    maxAlignment = wordAlignment dflags -- only machine word wide MOVs are supported
+    effectiveAlignment = min (alignmentOf align) maxAlignment
+    format = intFormat . widthFromBytes $ alignmentBytes effectiveAlignment
     c2 = c `shiftL` 8 .|. c
     c4 = c2 `shiftL` 16 .|. c2
     c8 = c4 `shiftL` 32 .|. c4
@@ -2352,7 +2350,7 @@ genCCall _ is32Bit target dest_regs args bid = do
           let
             const | FF32 <- fmt = CmmInt 0x7fffffff W32
                   | otherwise   = CmmInt 0x7fffffffffffffff W64
-          Amode amode amode_code <- memConstant (widthInBytes w) const
+          Amode amode amode_code <- memConstant (mkAlignment $ widthInBytes w) const
           tmp <- getNewRegNat fmt
           let
             code dst = x_code dst `appOL` amode_code `appOL` toOL [
@@ -3081,7 +3079,7 @@ createJumpTable dflags ids section lbl
                           where blockLabel = blockLbl blockid
                   in map jumpTableEntryRel ids
             | otherwise = map (jumpTableEntry dflags) ids
-      in CmmData section (1, Statics lbl jumpTable)
+      in CmmData section (mkAlignment 1, Statics lbl jumpTable)
 
 extractUnwindPoints :: [Instr] -> [UnwindPoint]
 extractUnwindPoints instrs =
@@ -3448,7 +3446,7 @@ sse2NegCode w x = do
       x@FF80 -> wrongFmt x
       where
         wrongFmt x = panic $ "sse2NegCode: " ++ show x
-  Amode amode amode_code <- memConstant (widthInBytes w) const
+  Amode amode amode_code <- memConstant (mkAlignment $ widthInBytes w) const
   tmp <- getNewRegNat fmt
   let
     code dst = x_code dst `appOL` amode_code `appOL` toOL [
diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs
index 4f5a5f273e..83d53be553 100644
--- a/compiler/nativeGen/X86/Ppr.hs
+++ b/compiler/nativeGen/X86/Ppr.hs
@@ -36,7 +36,7 @@ import PprBase
 
 import Hoopl.Collections
 import Hoopl.Label
-import BasicTypes       (Alignment)
+import BasicTypes       (Alignment, mkAlignment, alignmentBytes)
 import DynFlags
 import Cmm              hiding (topInfoTable)
 import BlockId
@@ -72,7 +72,7 @@ import Data.Bits
 
 pprProcAlignment :: SDoc
 pprProcAlignment = sdocWithDynFlags $ \dflags ->
-  (maybe empty pprAlign . cmmProcAlignment $ dflags)
+  (maybe empty (pprAlign . mkAlignment) (cmmProcAlignment dflags))
 
 pprNatCmmDecl :: NatCmmDecl (Alignment, CmmStatics) Instr -> SDoc
 pprNatCmmDecl (CmmData section dats) =
@@ -236,14 +236,15 @@ pprLabel lbl = pprGloblDecl lbl
             $$ pprTypeDecl lbl
             $$ (ppr lbl <> char ':')
 
-pprAlign :: Int -> SDoc
-pprAlign bytes
+pprAlign :: Alignment -> SDoc
+pprAlign alignment
         = sdocWithPlatform $ \platform ->
-          text ".align " <> int (alignment platform)
+          text ".align " <> int (alignmentOn platform)
   where
-        alignment platform = if platformOS platform == OSDarwin
-                             then log2 bytes
-                             else      bytes
+        bytes = alignmentBytes alignment
+        alignmentOn platform = if platformOS platform == OSDarwin
+                               then log2 bytes
+                               else      bytes
 
         log2 :: Int -> Int  -- cache the common ones
         log2 1 = 0
diff --git a/compiler/utils/Util.hs b/compiler/utils/Util.hs
index 6f7a9e5d07..c07b87f547 100644
--- a/compiler/utils/Util.hs
+++ b/compiler/utils/Util.hs
@@ -87,7 +87,6 @@ module Util (
 
         -- * Integers
         exactLog2,
-        byteAlignment,
 
         -- * Floating point
         readRational,
@@ -1150,16 +1149,6 @@ exactLog2 x
     pow2 x | x == 1 = 0
            | otherwise = 1 + pow2 (x `shiftR` 1)
 
--- x is aligned at N bytes means the remainder from x / N is zero.
--- Currently, interested in N <= 8, but can be expanded to N <= 16 or
--- N <= 32 if used within SSE or AVX context.
-byteAlignment :: Integer -> Integer
-byteAlignment x = case x .&. 7 of
-  0 -> 8
-  4 -> 4
-  2 -> 2
-  _ -> 1
-
 {-
 -- -----------------------------------------------------------------------------
 -- Floats