From af4cea7f1411e5b99e2417d7c2d3d0e697093103 Mon Sep 17 00:00:00 2001
From: Artem Pyanykh <artem.pyanykh@gmail.com>
Date: Thu, 4 Apr 2019 13:43:38 +0300
Subject: codegen: fix memset unroll for small bytearrays, add 64-bit sets

Fixes #16052

When the offset in `setByteArray#` is statically known, we can provide
better alignment guarantees then just 1 byte.

Also, memset can now do 64-bit wide sets.

The current memset intrinsic is not optimal however and can be
improved for the case when we know that we deal with

(baseAddress at known alignment) + offset

For instance, on 64-bit

`setByteArray# s 1# 23# 0#`

given that bytearray is 8 bytes aligned could be unrolled into
`movb, movw, movl, movq, movq`; but currently it is
`movb x23` since alignment of 1 is all we can embed into MO_Memset op.
---
 compiler/utils/Util.hs | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'compiler/utils')

diff --git a/compiler/utils/Util.hs b/compiler/utils/Util.hs
index 9e67a43bf5..6f7a9e5d07 100644
--- a/compiler/utils/Util.hs
+++ b/compiler/utils/Util.hs
@@ -87,6 +87,7 @@ module Util (
 
         -- * Integers
         exactLog2,
+        byteAlignment,
 
         -- * Floating point
         readRational,
@@ -1149,6 +1150,15 @@ exactLog2 x
     pow2 x | x == 1 = 0
            | otherwise = 1 + pow2 (x `shiftR` 1)
 
+-- x is aligned at N bytes means the remainder from x / N is zero.
+-- Currently, interested in N <= 8, but can be expanded to N <= 16 or
+-- N <= 32 if used within SSE or AVX context.
+byteAlignment :: Integer -> Integer
+byteAlignment x = case x .&. 7 of
+  0 -> 8
+  4 -> 4
+  2 -> 2
+  _ -> 1
 
 {-
 -- -----------------------------------------------------------------------------
-- 
cgit v1.2.1