diff options
author | Michal Terepeta <michal.terepeta@gmail.com> | 2018-10-04 13:56:59 -0400 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2018-10-07 18:36:07 -0400 |
commit | 5d5307f943d7581d7013ffe20af22233273fba06 (patch) | |
tree | ec9ae993cfa44d2cfe797e0422eb388933277100 /compiler/nativeGen | |
parent | e4bec29cb475b7e1431dad41fb8d4438814641c9 (diff) | |
download | haskell-5d5307f943d7581d7013ffe20af22233273fba06.tar.gz |
Add Int8# and Word8#
This is the first step of implementing:
https://github.com/ghc-proposals/ghc-proposals/pull/74
The main highlights/changes:
- `primops.txt.pp` gets two new sections for two new primitive types
for signed and unsigned 8-bit integers (`Int8#` and `Word8`
respectively) along with basic arithmetic and comparison
operations. `PrimRep`/`RuntimeRep` get two new constructors for
them. All of the primops translate into the existing `MachOP`s.
- For `CmmCall`s the codegen will now zero-extend the values at call
site (so that they can be moved to the right register) and then
truncate them back their original width.
- x86 native codegen needed some updates, since it wasn't able to deal
with the new widths, but all the changes are quite localized. LLVM
backend seems to just work.
Bumps binary submodule.
Signed-off-by: Michal Terepeta <michal.terepeta@gmail.com>
Test Plan: ./validate with new tests
Reviewers: hvr, goldfire, bgamari, simonmar
Subscribers: Abhiroop, dfeuer, rwbarton, thomie, carter
Differential Revision: https://phabricator.haskell.org/D4475
Diffstat (limited to 'compiler/nativeGen')
-rw-r--r-- | compiler/nativeGen/X86/CodeGen.hs | 90 | ||||
-rw-r--r-- | compiler/nativeGen/X86/Instr.hs | 8 | ||||
-rw-r--r-- | compiler/nativeGen/X86/Ppr.hs | 10 |
3 files changed, 97 insertions, 11 deletions
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index a2e26bd68b..66f959a86b 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -644,20 +644,27 @@ getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps -- Nop conversions MO_UU_Conv W32 W8 -> toI8Reg W32 x MO_SS_Conv W32 W8 -> toI8Reg W32 x + MO_XX_Conv W32 W8 -> toI8Reg W32 x MO_UU_Conv W16 W8 -> toI8Reg W16 x MO_SS_Conv W16 W8 -> toI8Reg W16 x + MO_XX_Conv W16 W8 -> toI8Reg W16 x MO_UU_Conv W32 W16 -> toI16Reg W32 x MO_SS_Conv W32 W16 -> toI16Reg W32 x + MO_XX_Conv W32 W16 -> toI16Reg W32 x MO_UU_Conv W64 W32 | not is32Bit -> conversionNop II64 x MO_SS_Conv W64 W32 | not is32Bit -> conversionNop II64 x + MO_XX_Conv W64 W32 | not is32Bit -> conversionNop II64 x MO_UU_Conv W64 W16 | not is32Bit -> toI16Reg W64 x MO_SS_Conv W64 W16 | not is32Bit -> toI16Reg W64 x + MO_XX_Conv W64 W16 | not is32Bit -> toI16Reg W64 x MO_UU_Conv W64 W8 | not is32Bit -> toI8Reg W64 x MO_SS_Conv W64 W8 | not is32Bit -> toI8Reg W64 x + MO_XX_Conv W64 W8 | not is32Bit -> toI8Reg W64 x MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x + MO_XX_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x -- widenings MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x @@ -668,16 +675,26 @@ getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x + -- We don't care about the upper bits for MO_XX_Conv, so MOV is enough. + MO_XX_Conv W8 W32 -> integerExtend W8 W32 MOV x + MO_XX_Conv W16 W32 -> integerExtend W16 W32 MOV x + MO_XX_Conv W8 W16 -> integerExtend W8 W16 MOV x + MO_UU_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVZxL x MO_UU_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVZxL x MO_UU_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVZxL x MO_SS_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVSxL x MO_SS_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVSxL x MO_SS_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVSxL x - -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl. - -- However, we don't want the register allocator to throw it - -- away as an unnecessary reg-to-reg move, so we keep it in - -- the form of a movzl and print it as a movl later. + -- For 32-to-64 bit zero extension, amd64 uses an ordinary movl. + -- However, we don't want the register allocator to throw it + -- away as an unnecessary reg-to-reg move, so we keep it in + -- the form of a movzl and print it as a movl later. + -- This doesn't apply to MO_XX_Conv since in this case we don't care about + -- the upper bits. So we can just use MOV. + MO_XX_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOV x + MO_XX_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOV x + MO_XX_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOV x MO_FF_Conv W32 W64 | sse2 -> coerceFP2FP W64 x @@ -787,6 +804,7 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps MO_S_MulMayOflo rep -> imulMayOflo rep x y + MO_Mul W8 -> imulW8 x y MO_Mul rep -> triv_op rep IMUL MO_And rep -> triv_op rep AND MO_Or rep -> triv_op rep OR @@ -822,6 +840,21 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps triv_op width instr = trivialCode width op (Just op) x y where op = instr (intFormat width) + -- Special case for IMUL for bytes, since the result of IMULB will be in + -- %ax, the split to %dx/%edx/%rdx and %ax/%eax/%rax happens only for wider + -- values. + imulW8 :: CmmExpr -> CmmExpr -> NatM Register + imulW8 arg_a arg_b = do + (a_reg, a_code) <- getNonClobberedReg arg_a + b_code <- getAnyReg arg_b + + let code = a_code `appOL` b_code eax `appOL` + toOL [ IMUL2 format (OpReg a_reg) ] + format = intFormat W8 + + return (Fixed format eax code) + + imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register imulMayOflo rep a b = do (a_reg, a_code) <- getNonClobberedReg a @@ -916,6 +949,18 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps return (Any format code) ---------------------- + + -- See Note [DIV/IDIV for bytes] + div_code W8 signed quotient x y = do + let widen | signed = MO_SS_Conv W8 W16 + | otherwise = MO_UU_Conv W8 W16 + div_code + W16 + signed + quotient + (CmmMachOp widen [x]) + (CmmMachOp widen [y]) + div_code width signed quotient x y = do (y_op, y_code) <- getRegOrMem y -- cannot be clobbered x_code <- getAnyReg x @@ -2277,6 +2322,18 @@ genCCall _ is32Bit target dest_regs args = do = divOp platform signed width results (Just arg_x_high) arg_x_low arg_y divOp2 _ _ _ _ _ = panic "genCCall: Wrong number of arguments for divOp2" + + -- See Note [DIV/IDIV for bytes] + divOp platform signed W8 [res_q, res_r] m_arg_x_high arg_x_low arg_y = + let widen | signed = MO_SS_Conv W8 W16 + | otherwise = MO_UU_Conv W8 W16 + arg_x_low_16 = CmmMachOp widen [arg_x_low] + arg_y_16 = CmmMachOp widen [arg_y] + m_arg_x_high_16 = (\p -> CmmMachOp widen [p]) <$> m_arg_x_high + in divOp + platform signed W16 [res_q, res_r] + m_arg_x_high_16 arg_x_low_16 arg_y_16 + divOp platform signed width [res_q, res_r] m_arg_x_high arg_x_low arg_y = do let format = intFormat width @@ -2318,6 +2375,22 @@ genCCall _ is32Bit target dest_regs args = do addSubIntC _ _ _ _ _ _ _ _ = panic "genCCall: Wrong number of arguments/results for addSubIntC" +-- Note [DIV/IDIV for bytes] +-- +-- IDIV reminder: +-- Size Dividend Divisor Quotient Remainder +-- byte %ax r/m8 %al %ah +-- word %dx:%ax r/m16 %ax %dx +-- dword %edx:%eax r/m32 %eax %edx +-- qword %rdx:%rax r/m64 %rax %rdx +-- +-- We do a special case for the byte division because the current +-- codegen doesn't deal well with accessing %ah register (also, +-- accessing %ah in 64-bit mode is complicated because it cannot be an +-- operand of many instructions). So we just widen operands to 16 bits +-- and get the results from %al, %dl. This is not optimal, but a few +-- register moves are probably not a huge deal when doing division. + genCCall32' :: DynFlags -> ForeignTarget -- function to call -> [CmmFormal] -- where to put the result @@ -2461,6 +2534,10 @@ genCCall32' dflags target dest_regs args = do ) | otherwise = do + -- Arguments can be smaller than 32-bit, but we still use @PUSH + -- II32@ - the usual calling conventions expect integers to be + -- 4-byte aligned. + ASSERT((typeWidth arg_ty) <= W32) return () (operand, code) <- getOperand arg delta <- getDeltaNat setDeltaNat (delta-size) @@ -2700,7 +2777,10 @@ genCCall64' dflags target dest_regs args = do push_args rest code' | otherwise = do - ASSERT(width == W64) return () + -- Arguments can be smaller than 64-bit, but we still use @PUSH + -- II64@ - the usual calling conventions expect integers to be + -- 8-byte aligned. + ASSERT(width <= W64) return () (arg_op, arg_code) <- getOperand arg delta <- getDeltaNat setDeltaNat (delta-arg_size) diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs index c7000c9f4b..8cc61ed789 100644 --- a/compiler/nativeGen/X86/Instr.hs +++ b/compiler/nativeGen/X86/Instr.hs @@ -383,7 +383,13 @@ x86_regUsageOfInstr platform instr SUB _ src dst -> usageRM src dst SBB _ src dst -> usageRM src dst IMUL _ src dst -> usageRM src dst - IMUL2 _ src -> mkRU (eax:use_R src []) [eax,edx] + + -- Result of IMULB will be in just in %ax + IMUL2 II8 src -> mkRU (eax:use_R src []) [eax] + -- Result of IMUL for wider values, will be split between %dx/%edx/%rdx and + -- %ax/%eax/%rax. + IMUL2 _ src -> mkRU (eax:use_R src []) [eax,edx] + MUL _ src dst -> usageRM src dst MUL2 _ src -> mkRU (eax:use_R src []) [eax,edx] DIV _ op -> mkRU (eax:edx:use_R op []) [eax,edx] diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs index 03d4fce794..d4c92df753 100644 --- a/compiler/nativeGen/X86/Ppr.hs +++ b/compiler/nativeGen/X86/Ppr.hs @@ -327,7 +327,7 @@ pprReg f r (case i of { 0 -> sLit "%al"; 1 -> sLit "%bl"; 2 -> sLit "%cl"; 3 -> sLit "%dl"; - _ -> sLit "very naughty I386 byte register" + _ -> sLit $ "very naughty I386 byte register: " ++ show i }) ppr32_reg_word i = ptext @@ -364,7 +364,7 @@ pprReg f r 10 -> sLit "%r10b"; 11 -> sLit "%r11b"; 12 -> sLit "%r12b"; 13 -> sLit "%r13b"; 14 -> sLit "%r14b"; 15 -> sLit "%r15b"; - _ -> sLit "very naughty x86_64 byte register" + _ -> sLit $ "very naughty x86_64 byte register: " ++ show i }) ppr64_reg_word i = ptext @@ -789,8 +789,11 @@ pprInstr (POP format op) = pprFormatOp (sLit "pop") format op -- pprInstr POPA = text "\tpopal" pprInstr NOP = text "\tnop" +pprInstr (CLTD II8) = text "\tcbtw" +pprInstr (CLTD II16) = text "\tcwtd" pprInstr (CLTD II32) = text "\tcltd" pprInstr (CLTD II64) = text "\tcqto" +pprInstr (CLTD x) = panic $ "pprInstr: " ++ show x pprInstr (SETCC cond op) = pprCondInstr (sLit "set") cond (pprOperand II8 op) @@ -1076,9 +1079,6 @@ pprInstr (XADD format src dst) = pprFormatOpOp (sLit "xadd") format src dst pprInstr (CMPXCHG format src dst) = pprFormatOpOp (sLit "cmpxchg") format src dst -pprInstr _ - = panic "X86.Ppr.pprInstr: no match" - pprTrigOp :: String -> Bool -> CLabel -> CLabel -> Reg -> Reg -> Format -> SDoc |