summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Klebinger <klebinger.andreas@gmx.at>2022-03-04 17:21:09 +0100
committerAndreas Klebinger <klebinger.andreas@gmx.at>2022-08-08 14:42:26 +0200
commit20457d775885d6c3df020d204da9a7acfb3c2e5a (patch)
tree46be70cbecee6e4568c9d24a641dc733bc9b87d2
parent78d04cfadfd728bb088b08b1e88905b43cc0360c (diff)
downloadhaskell-20457d775885d6c3df020d204da9a7acfb3c2e5a.tar.gz
NCG(x86): Compile add+shift as lea if possible.wip/andreask/add_mul_lea
-rw-r--r--compiler/GHC/CmmToAsm/X86/CodeGen.hs36
-rw-r--r--testsuite/tests/codeGen/should_gen_asm/AddMulX86.asm46
-rw-r--r--testsuite/tests/codeGen/should_gen_asm/AddMulX86.hs12
-rw-r--r--testsuite/tests/codeGen/should_gen_asm/all.T1
4 files changed, 95 insertions, 0 deletions
diff --git a/compiler/GHC/CmmToAsm/X86/CodeGen.hs b/compiler/GHC/CmmToAsm/X86/CodeGen.hs
index de0a7c56e3..295cd9f555 100644
--- a/compiler/GHC/CmmToAsm/X86/CodeGen.hs
+++ b/compiler/GHC/CmmToAsm/X86/CodeGen.hs
@@ -1048,10 +1048,29 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = -- dyadic MachOps
--------------------
add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
+ -- x + imm
add_code rep x (CmmLit (CmmInt y _))
| is32BitInteger y
, rep /= W8 -- LEA doesn't support byte size (#18614)
= add_int rep x y
+ -- x + (y << imm)
+ add_code rep x y
+ -- Byte size is not supported and 16bit size is slow when computed via LEA
+ | rep /= W8 && rep /= W16
+ -- 2^3 = 8 is the highest multiplicator supported by LEA.
+ , Just (x,y,shift_bits) <- get_shift x y
+ = add_shiftL rep x y (fromIntegral shift_bits)
+ where
+ -- x + (y << imm)
+ get_shift x (CmmMachOp (MO_Shl _w) [y, CmmLit (CmmInt shift_bits _)])
+ | shift_bits <= 3
+ = Just (x, y, shift_bits)
+ -- (y << imm) + x
+ get_shift (CmmMachOp (MO_Shl _w) [y, CmmLit (CmmInt shift_bits _)]) x
+ | shift_bits <= 3
+ = Just (x, y, shift_bits)
+ get_shift _ _
+ = Nothing
add_code rep x y = trivialCode rep (ADD format) (Just (ADD format)) x y
where format = intFormat rep
-- TODO: There are other interesting patterns we want to replace
@@ -1066,6 +1085,7 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = -- dyadic MachOps
sub_code rep x y = trivialCode rep (SUB (intFormat rep)) Nothing x y
-- our three-operand add instruction:
+ add_int :: (Width -> CmmExpr -> Integer -> NatM Register)
add_int width x y = do
(x_reg, x_code) <- getSomeReg x
let
@@ -1079,6 +1099,22 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = -- dyadic MachOps
--
return (Any format code)
+ -- x + (y << shift_bits) using LEA
+ add_shiftL :: (Width -> CmmExpr -> CmmExpr -> Int -> NatM Register)
+ add_shiftL width x y shift_bits = do
+ (x_reg, x_code) <- getSomeReg x
+ (y_reg, y_code) <- getSomeReg y
+ let
+ format = intFormat width
+ imm = ImmInt 0
+ code dst
+ = (x_code `appOL` y_code) `snocOL`
+ LEA format
+ (OpAddr (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg (2 ^ shift_bits)) imm))
+ (OpReg dst)
+ --
+ return (Any format code)
+
----------------------
-- See Note [DIV/IDIV for bytes]
diff --git a/testsuite/tests/codeGen/should_gen_asm/AddMulX86.asm b/testsuite/tests/codeGen/should_gen_asm/AddMulX86.asm
new file mode 100644
index 0000000000..d11e6809fc
--- /dev/null
+++ b/testsuite/tests/codeGen/should_gen_asm/AddMulX86.asm
@@ -0,0 +1,46 @@
+.section .text
+.align 8
+.align 8
+ .quad 8589934604
+ .quad 0
+ .long 14
+ .long 0
+.globl AddMulX86_f_info
+.type AddMulX86_f_info, @function
+AddMulX86_f_info:
+.LcAx:
+ leaq (%r14,%rsi,8),%rbx
+ jmp *(%rbp)
+ .size AddMulX86_f_info, .-AddMulX86_f_info
+.section .data
+.align 8
+.align 1
+.globl AddMulX86_f_closure
+.type AddMulX86_f_closure, @object
+AddMulX86_f_closure:
+ .quad AddMulX86_f_info
+.section .text
+.align 8
+.align 8
+ .quad 8589934604
+ .quad 0
+ .long 14
+ .long 0
+.globl AddMulX86_g_info
+.type AddMulX86_g_info, @function
+AddMulX86_g_info:
+.LcAL:
+ leaq (%r14,%rsi,8),%rbx
+ jmp *(%rbp)
+ .size AddMulX86_g_info, .-AddMulX86_g_info
+.section .data
+.align 8
+.align 1
+.globl AddMulX86_g_closure
+.type AddMulX86_g_closure, @object
+AddMulX86_g_closure:
+ .quad AddMulX86_g_info
+.section .note.GNU-stack,"",@progbits
+.ident "GHC 9.3.20220228"
+
+
diff --git a/testsuite/tests/codeGen/should_gen_asm/AddMulX86.hs b/testsuite/tests/codeGen/should_gen_asm/AddMulX86.hs
new file mode 100644
index 0000000000..552940956e
--- /dev/null
+++ b/testsuite/tests/codeGen/should_gen_asm/AddMulX86.hs
@@ -0,0 +1,12 @@
+{-# LANGUAGE MagicHash #-}
+
+module AddMulX86 where
+
+import GHC.Exts
+
+f :: Int# -> Int# -> Int#
+f x y =
+ x +# (y *# 8#) -- Should result in a lea instruction, which we grep the assembly output for.
+
+g x y =
+ (y *# 8#) +# x -- Should result in a lea instruction, which we grep the assembly output for.
diff --git a/testsuite/tests/codeGen/should_gen_asm/all.T b/testsuite/tests/codeGen/should_gen_asm/all.T
index fa3ed1ccf5..1faa7d62ab 100644
--- a/testsuite/tests/codeGen/should_gen_asm/all.T
+++ b/testsuite/tests/codeGen/should_gen_asm/all.T
@@ -10,3 +10,4 @@ test('memset-unroll', is_amd64_codegen, compile_cmp_asm, ['cmm', ''])
test('bytearray-memset-unroll', is_amd64_codegen, compile_grep_asm, ['hs', True, ''])
test('bytearray-memcpy-unroll', is_amd64_codegen, compile_grep_asm, ['hs', True, ''])
test('T18137', [when(opsys('darwin'), skip), only_ways(llvm_ways)], compile_grep_asm, ['hs', False, '-fllvm -split-sections'])
+test('AddMulX86', is_amd64_codegen, compile_cmp_asm, ['hs', '-dno-typeable-binds'])