diff options
author | Andreas Klebinger <klebinger.andreas@gmx.at> | 2022-03-04 17:21:09 +0100 |
---|---|---|
committer | Andreas Klebinger <klebinger.andreas@gmx.at> | 2022-08-08 14:42:26 +0200 |
commit | 20457d775885d6c3df020d204da9a7acfb3c2e5a (patch) | |
tree | 46be70cbecee6e4568c9d24a641dc733bc9b87d2 | |
parent | 78d04cfadfd728bb088b08b1e88905b43cc0360c (diff) | |
download | haskell-20457d775885d6c3df020d204da9a7acfb3c2e5a.tar.gz |
NCG(x86): Compile add+shift as lea if possible.wip/andreask/add_mul_lea
-rw-r--r-- | compiler/GHC/CmmToAsm/X86/CodeGen.hs | 36 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_gen_asm/AddMulX86.asm | 46 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_gen_asm/AddMulX86.hs | 12 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_gen_asm/all.T | 1 |
4 files changed, 95 insertions, 0 deletions
diff --git a/compiler/GHC/CmmToAsm/X86/CodeGen.hs b/compiler/GHC/CmmToAsm/X86/CodeGen.hs index de0a7c56e3..295cd9f555 100644 --- a/compiler/GHC/CmmToAsm/X86/CodeGen.hs +++ b/compiler/GHC/CmmToAsm/X86/CodeGen.hs @@ -1048,10 +1048,29 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = -- dyadic MachOps -------------------- add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register + -- x + imm add_code rep x (CmmLit (CmmInt y _)) | is32BitInteger y , rep /= W8 -- LEA doesn't support byte size (#18614) = add_int rep x y + -- x + (y << imm) + add_code rep x y + -- Byte size is not supported and 16bit size is slow when computed via LEA + | rep /= W8 && rep /= W16 + -- 2^3 = 8 is the highest multiplicator supported by LEA. + , Just (x,y,shift_bits) <- get_shift x y + = add_shiftL rep x y (fromIntegral shift_bits) + where + -- x + (y << imm) + get_shift x (CmmMachOp (MO_Shl _w) [y, CmmLit (CmmInt shift_bits _)]) + | shift_bits <= 3 + = Just (x, y, shift_bits) + -- (y << imm) + x + get_shift (CmmMachOp (MO_Shl _w) [y, CmmLit (CmmInt shift_bits _)]) x + | shift_bits <= 3 + = Just (x, y, shift_bits) + get_shift _ _ + = Nothing add_code rep x y = trivialCode rep (ADD format) (Just (ADD format)) x y where format = intFormat rep -- TODO: There are other interesting patterns we want to replace @@ -1066,6 +1085,7 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = -- dyadic MachOps sub_code rep x y = trivialCode rep (SUB (intFormat rep)) Nothing x y -- our three-operand add instruction: + add_int :: (Width -> CmmExpr -> Integer -> NatM Register) add_int width x y = do (x_reg, x_code) <- getSomeReg x let @@ -1079,6 +1099,22 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = -- dyadic MachOps -- return (Any format code) + -- x + (y << shift_bits) using LEA + add_shiftL :: (Width -> CmmExpr -> CmmExpr -> Int -> NatM Register) + add_shiftL width x y shift_bits = do + (x_reg, x_code) <- getSomeReg x + (y_reg, y_code) <- getSomeReg y + let + format = intFormat width + imm = ImmInt 0 + code dst + = (x_code `appOL` y_code) `snocOL` + LEA format + (OpAddr (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg (2 ^ shift_bits)) imm)) + (OpReg dst) + -- + return (Any format code) + ---------------------- -- See Note [DIV/IDIV for bytes] diff --git a/testsuite/tests/codeGen/should_gen_asm/AddMulX86.asm b/testsuite/tests/codeGen/should_gen_asm/AddMulX86.asm new file mode 100644 index 0000000000..d11e6809fc --- /dev/null +++ b/testsuite/tests/codeGen/should_gen_asm/AddMulX86.asm @@ -0,0 +1,46 @@ +.section .text +.align 8 +.align 8 + .quad 8589934604 + .quad 0 + .long 14 + .long 0 +.globl AddMulX86_f_info +.type AddMulX86_f_info, @function +AddMulX86_f_info: +.LcAx: + leaq (%r14,%rsi,8),%rbx + jmp *(%rbp) + .size AddMulX86_f_info, .-AddMulX86_f_info +.section .data +.align 8 +.align 1 +.globl AddMulX86_f_closure +.type AddMulX86_f_closure, @object +AddMulX86_f_closure: + .quad AddMulX86_f_info +.section .text +.align 8 +.align 8 + .quad 8589934604 + .quad 0 + .long 14 + .long 0 +.globl AddMulX86_g_info +.type AddMulX86_g_info, @function +AddMulX86_g_info: +.LcAL: + leaq (%r14,%rsi,8),%rbx + jmp *(%rbp) + .size AddMulX86_g_info, .-AddMulX86_g_info +.section .data +.align 8 +.align 1 +.globl AddMulX86_g_closure +.type AddMulX86_g_closure, @object +AddMulX86_g_closure: + .quad AddMulX86_g_info +.section .note.GNU-stack,"",@progbits +.ident "GHC 9.3.20220228" + + diff --git a/testsuite/tests/codeGen/should_gen_asm/AddMulX86.hs b/testsuite/tests/codeGen/should_gen_asm/AddMulX86.hs new file mode 100644 index 0000000000..552940956e --- /dev/null +++ b/testsuite/tests/codeGen/should_gen_asm/AddMulX86.hs @@ -0,0 +1,12 @@ +{-# LANGUAGE MagicHash #-} + +module AddMulX86 where + +import GHC.Exts + +f :: Int# -> Int# -> Int# +f x y = + x +# (y *# 8#) -- Should result in a lea instruction, which we grep the assembly output for. + +g x y = + (y *# 8#) +# x -- Should result in a lea instruction, which we grep the assembly output for. diff --git a/testsuite/tests/codeGen/should_gen_asm/all.T b/testsuite/tests/codeGen/should_gen_asm/all.T index fa3ed1ccf5..1faa7d62ab 100644 --- a/testsuite/tests/codeGen/should_gen_asm/all.T +++ b/testsuite/tests/codeGen/should_gen_asm/all.T @@ -10,3 +10,4 @@ test('memset-unroll', is_amd64_codegen, compile_cmp_asm, ['cmm', '']) test('bytearray-memset-unroll', is_amd64_codegen, compile_grep_asm, ['hs', True, '']) test('bytearray-memcpy-unroll', is_amd64_codegen, compile_grep_asm, ['hs', True, '']) test('T18137', [when(opsys('darwin'), skip), only_ways(llvm_ways)], compile_grep_asm, ['hs', False, '-fllvm -split-sections']) +test('AddMulX86', is_amd64_codegen, compile_cmp_asm, ['hs', '-dno-typeable-binds']) |