diff options
Diffstat (limited to 'compiler/GHC/CmmToAsm')
-rw-r--r-- | compiler/GHC/CmmToAsm/AArch64/CodeGen.hs | 41 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/AArch64/Instr.hs | 19 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/AArch64/Ppr.hs | 7 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/PPC/CodeGen.hs | 35 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/PPC/Instr.hs | 11 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/PPC/Ppr.hs | 18 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/Wasm/FromCmm.hs | 4 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/X86/CodeGen.hs | 92 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/X86/Instr.hs | 22 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/X86/Ppr.hs | 23 |
10 files changed, 254 insertions, 18 deletions
diff --git a/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs b/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs index 8ebccaf093..c0e9a7e8d5 100644 --- a/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs +++ b/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs @@ -783,7 +783,7 @@ getRegister' config plat expr where w' = formatToWidth (cmmTypeFormat (cmmRegType reg)) r' = getRegisterReg plat reg - -- Generic case. + -- Generic binary case. CmmMachOp op [x, y] -> do -- alright, so we have an operation, and two expressions. And we want to essentially do -- ensure we get float regs (TODO(Ben): What?) @@ -956,7 +956,44 @@ getRegister' config plat expr -- TODO - op -> pprPanic "getRegister' (unhandled dyadic CmmMachOp): " $ (pprMachOp op) <+> text "in" <+> (pdoc plat expr) + op -> pprPanic "getRegister' (unhandled dyadic CmmMachOp): " $ + (pprMachOp op) <+> text "in" <+> (pdoc plat expr) + + -- Generic ternary case. + CmmMachOp op [x, y, z] -> + + case op of + + -- Floating-point fused multiply-add operations + + -- x86 fmadd x * y + z <=> AArch64 fmadd : d = r1 * r2 + r3 + -- x86 fmsub x * y - z <=> AArch64 fnmsub: d = r1 * r2 - r3 + -- x86 fnmadd - x * y + z <=> AArch64 fmsub : d = - r1 * r2 + r3 + -- x86 fnmsub - x * y - z <=> AArch64 fnmadd: d = - r1 * r2 - r3 + + MO_FMA var w -> case var of + FMAdd -> float3Op w (\d n m a -> unitOL $ FMA FMAdd d n m a) + FMSub -> float3Op w (\d n m a -> unitOL $ FMA FNMSub d n m a) + FNMAdd -> float3Op w (\d n m a -> unitOL $ FMA FMSub d n m a) + FNMSub -> float3Op w (\d n m a -> unitOL $ FMA FNMAdd d n m a) + + _ -> pprPanic "getRegister' (unhandled ternary CmmMachOp): " $ + (pprMachOp op) <+> text "in" <+> (pdoc plat expr) + + where + float3Op w op = do + (reg_fx, format_x, code_fx) <- getFloatReg x + (reg_fy, format_y, code_fy) <- getFloatReg y + (reg_fz, format_z, code_fz) <- getFloatReg z + massertPpr (isFloatFormat format_x && isFloatFormat format_y && isFloatFormat format_z) $ + text "float3Op: non-float" + return $ + Any (floatFormat w) $ \ dst -> + code_fx `appOL` + code_fy `appOL` + code_fz `appOL` + op (OpReg w dst) (OpReg w reg_fx) (OpReg w reg_fy) (OpReg w reg_fz) + CmmMachOp _op _xs -> pprPanic "getRegister' (variadic CmmMachOp): " (pdoc plat expr) diff --git a/compiler/GHC/CmmToAsm/AArch64/Instr.hs b/compiler/GHC/CmmToAsm/AArch64/Instr.hs index 7bf78becb6..166ab2ca17 100644 --- a/compiler/GHC/CmmToAsm/AArch64/Instr.hs +++ b/compiler/GHC/CmmToAsm/AArch64/Instr.hs @@ -142,6 +142,8 @@ regUsageOfInstr platform instr = case instr of SCVTF dst src -> usage (regOp src, regOp dst) FCVTZS dst src -> usage (regOp src, regOp dst) FABS dst src -> usage (regOp src, regOp dst) + FMA _ dst src1 src2 src3 -> + usage (regOp src1 ++ regOp src2 ++ regOp src3, regOp dst) _ -> panic $ "regUsageOfInstr: " ++ instrCon instr @@ -280,6 +282,9 @@ patchRegsOfInstr instr env = case instr of SCVTF o1 o2 -> SCVTF (patchOp o1) (patchOp o2) FCVTZS o1 o2 -> FCVTZS (patchOp o1) (patchOp o2) FABS o1 o2 -> FABS (patchOp o1) (patchOp o2) + FMA s o1 o2 o3 o4 -> + FMA s (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4) + _ -> panic $ "patchRegsOfInstr: " ++ instrCon instr where patchOp :: Operand -> Operand @@ -650,6 +655,14 @@ data Instr -- Float ABSolute value | FABS Operand Operand + -- | Floating-point fused multiply-add instructions + -- + -- - fmadd : d = r1 * r2 + r3 + -- - fnmsub: d = r1 * r2 - r3 + -- - fmsub : d = - r1 * r2 + r3 + -- - fnmadd: d = - r1 * r2 - r3 + | FMA FMASign Operand Operand Operand Operand + instrCon :: Instr -> String instrCon i = case i of @@ -715,6 +728,12 @@ instrCon i = SCVTF{} -> "SCVTF" FCVTZS{} -> "FCVTZS" FABS{} -> "FABS" + FMA variant _ _ _ _ -> + case variant of + FMAdd -> "FMADD" + FMSub -> "FMSUB" + FNMAdd -> "FNMADD" + FNMSub -> "FNMSUB" data Target = TBlock BlockId diff --git a/compiler/GHC/CmmToAsm/AArch64/Ppr.hs b/compiler/GHC/CmmToAsm/AArch64/Ppr.hs index 475324afce..646f914c8d 100644 --- a/compiler/GHC/CmmToAsm/AArch64/Ppr.hs +++ b/compiler/GHC/CmmToAsm/AArch64/Ppr.hs @@ -546,6 +546,13 @@ pprInstr platform instr = case instr of SCVTF o1 o2 -> op2 (text "\tscvtf") o1 o2 FCVTZS o1 o2 -> op2 (text "\tfcvtzs") o1 o2 FABS o1 o2 -> op2 (text "\tfabs") o1 o2 + FMA variant d r1 r2 r3 -> + let fma = case variant of + FMAdd -> text "\tfmadd" + FMSub -> text "\tfmsub" + FNMAdd -> text "\tfnmadd" + FNMSub -> text "\tfnmsub" + in op4 fma d r1 r2 r3 where op2 op o1 o2 = line $ op <+> pprOp platform o1 <> comma <+> pprOp platform o2 op3 op o1 o2 o3 = line $ op <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 op4 op o1 o2 o3 o4 = line $ op <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 <> comma <+> pprOp platform o4 diff --git a/compiler/GHC/CmmToAsm/PPC/CodeGen.hs b/compiler/GHC/CmmToAsm/PPC/CodeGen.hs index 7dac4f221b..f8a726da6c 100644 --- a/compiler/GHC/CmmToAsm/PPC/CodeGen.hs +++ b/compiler/GHC/CmmToAsm/PPC/CodeGen.hs @@ -649,6 +649,21 @@ getRegister' _ _ (CmmMachOp mop [x, y]) -- dyadic PrimOps code <- remainderCode rep sgn tmp x y return (Any fmt code) +getRegister' _ _ (CmmMachOp mop [x, y, z]) -- ternary PrimOps + = case mop of + + -- x86 fmadd x * y + z <> PPC fmadd rt = ra * rc + rb + -- x86 fmsub x * y - z <> PPC fmsub rt = ra * rc - rb + -- x86 fnmadd - x * y + z ~~ PPC fnmsub rt = -(ra * rc - rb) + -- x86 fnmsub - x * y - z ~~ PPC fnmadd rt = -(ra * rc + rb) + + MO_FMA variant w -> + case variant of + FMAdd -> fma_code w (FMADD FMAdd) x y z + FMSub -> fma_code w (FMADD FMSub) x y z + FNMAdd -> fma_code w (FMADD FNMAdd) x y z + FNMSub -> fma_code w (FMADD FNMSub) x y z + _ -> panic "PPC.CodeGen.getRegister: no match" getRegister' _ _ (CmmLit (CmmInt i rep)) | Just imm <- makeImmediate rep True i @@ -2358,10 +2373,28 @@ trivialUCode rep instr x = do let code' dst = code `snocOL` instr dst src return (Any rep code') +-- | Generate code for a 4-register FMA instruction, +-- e.g. @fmadd rt ra rc rb := rt <- ra * rc + rb@. +fma_code :: Width + -> (Format -> Reg -> Reg -> Reg -> Reg -> Instr) + -> CmmExpr + -> CmmExpr + -> CmmExpr + -> NatM Register +fma_code w instr ra rc rb = do + let rep = floatFormat w + (src1, code1) <- getSomeReg ra + (src2, code2) <- getSomeReg rc + (src3, code3) <- getSomeReg rb + let instrCode rt = + code1 `appOL` + code2 `appOL` + code3 `snocOL` instr rep rt src1 src2 src3 + return $ Any rep instrCode + -- There is no "remainder" instruction on the PPC, so we have to do -- it the hard way. -- The "sgn" parameter is the signedness for the division instruction - remainderCode :: Width -> Bool -> Reg -> CmmExpr -> CmmExpr -> NatM (Reg -> InstrBlock) remainderCode rep sgn reg_q arg_x arg_y = do diff --git a/compiler/GHC/CmmToAsm/PPC/Instr.hs b/compiler/GHC/CmmToAsm/PPC/Instr.hs index 639ae979f8..3fedcc1fc4 100644 --- a/compiler/GHC/CmmToAsm/PPC/Instr.hs +++ b/compiler/GHC/CmmToAsm/PPC/Instr.hs @@ -280,6 +280,14 @@ data Instr | FABS Reg Reg -- abs is the same for single and double | FNEG Reg Reg -- negate is the same for single and double prec. + -- | Fused multiply-add instructions. + -- + -- - FMADD: @rd = (ra * rb) + rd@ + -- - FMSUB: @rd = ra * rb - rd@ + -- - FNMADD: @rd = -(ra * rb + rd)@ + -- - FNMSUB: @rd = -(ra * rb - rd)@ + | FMADD FMASign Format Reg Reg Reg Reg + | FCMP Reg Reg | FCTIWZ Reg Reg -- convert to integer word @@ -380,6 +388,7 @@ regUsageOfInstr platform instr MFCR reg -> usage ([], [reg]) MFLR reg -> usage ([], [reg]) FETCHPC reg -> usage ([], [reg]) + FMADD _ _ rt ra rc rb -> usage ([ra, rc, rb], [rt]) _ -> noUsage where usage (src, dst) = RU (filter (interesting platform) src) @@ -467,6 +476,8 @@ patchRegsOfInstr instr env FDIV fmt r1 r2 r3 -> FDIV fmt (env r1) (env r2) (env r3) FABS r1 r2 -> FABS (env r1) (env r2) FNEG r1 r2 -> FNEG (env r1) (env r2) + FMADD sgn fmt r1 r2 r3 r4 + -> FMADD sgn fmt (env r1) (env r2) (env r3) (env r4) FCMP r1 r2 -> FCMP (env r1) (env r2) FCTIWZ r1 r2 -> FCTIWZ (env r1) (env r2) FCTIDZ r1 r2 -> FCTIDZ (env r1) (env r2) diff --git a/compiler/GHC/CmmToAsm/PPC/Ppr.hs b/compiler/GHC/CmmToAsm/PPC/Ppr.hs index ba364df1b0..f1d6733327 100644 --- a/compiler/GHC/CmmToAsm/PPC/Ppr.hs +++ b/compiler/GHC/CmmToAsm/PPC/Ppr.hs @@ -934,6 +934,9 @@ pprInstr platform instr = case instr of FNEG reg1 reg2 -> pprUnary (text "fneg") reg1 reg2 + FMADD signs fmt dst ra rc rb + -> pprTernaryF (pprFMASign signs) fmt dst ra rc rb + FCMP reg1 reg2 -> line $ hcat [ char '\t', @@ -1083,6 +1086,21 @@ pprBinaryF op fmt reg1 reg2 reg3 = line $ hcat [ pprReg reg3 ] +pprTernaryF :: IsDoc doc => Line doc -> Format -> Reg -> Reg -> Reg -> Reg -> doc +pprTernaryF op fmt rt ra rc rb = line $ hcat [ + char '\t', + op, + pprFFormat fmt, + char '\t', + pprReg rt, + text ", ", + pprReg ra, + text ", ", + pprReg rc, + text ", ", + pprReg rb + ] + pprRI :: IsLine doc => Platform -> RI -> doc pprRI _ (RIReg r) = pprReg r pprRI platform (RIImm r) = pprImm platform r diff --git a/compiler/GHC/CmmToAsm/Wasm/FromCmm.hs b/compiler/GHC/CmmToAsm/Wasm/FromCmm.hs index 7ca323d72d..9a4c3f34c2 100644 --- a/compiler/GHC/CmmToAsm/Wasm/FromCmm.hs +++ b/compiler/GHC/CmmToAsm/Wasm/FromCmm.hs @@ -816,7 +816,9 @@ lower_CmmMachOp lbl (MO_SS_Conv w0 w1) xs = lower_MO_SS_Conv lbl w0 w1 xs lower_CmmMachOp lbl (MO_UU_Conv w0 w1) xs = lower_MO_UU_Conv lbl w0 w1 xs lower_CmmMachOp lbl (MO_XX_Conv w0 w1) xs = lower_MO_UU_Conv lbl w0 w1 xs lower_CmmMachOp lbl (MO_FF_Conv w0 w1) xs = lower_MO_FF_Conv lbl w0 w1 xs -lower_CmmMachOp _ _ _ = panic "lower_CmmMachOp: unreachable" +lower_CmmMachOp _ mop _ = + pprPanic "lower_CmmMachOp: unreachable" $ + vcat [ text "offending MachOp:" <+> pprMachOp mop ] -- | Lower a 'CmmLit'. Note that we don't emit 'f32.const' or -- 'f64.const' for the time being, and instead emit their relative bit diff --git a/compiler/GHC/CmmToAsm/X86/CodeGen.hs b/compiler/GHC/CmmToAsm/X86/CodeGen.hs index d6ef821c9f..859b27e248 100644 --- a/compiler/GHC/CmmToAsm/X86/CodeGen.hs +++ b/compiler/GHC/CmmToAsm/X86/CodeGen.hs @@ -901,14 +901,10 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = -- dyadic MachOps MO_U_Lt _ -> condIntReg LU x y MO_U_Le _ -> condIntReg LEU x y - MO_F_Add w -> trivialFCode_sse2 w ADD x y - - MO_F_Sub w -> trivialFCode_sse2 w SUB x y - - MO_F_Quot w -> trivialFCode_sse2 w FDIV x y - - MO_F_Mul w -> trivialFCode_sse2 w MUL x y - + MO_F_Add w -> trivialFCode_sse2 w ADD x y + MO_F_Sub w -> trivialFCode_sse2 w SUB x y + MO_F_Quot w -> trivialFCode_sse2 w FDIV x y + MO_F_Mul w -> trivialFCode_sse2 w MUL x y MO_Add rep -> add_code rep x y MO_Sub rep -> sub_code rep x y @@ -1113,6 +1109,13 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = -- dyadic MachOps return (Fixed format result code) +getRegister' _plat _is32Bit (CmmMachOp mop [x, y, z]) = -- ternary MachOps + case mop of + -- Floating point fused multiply-add operations @ ± x*y ± z@ + MO_FMA var w -> genFMA3Code w var x y z + + _other -> pprPanic "getRegister(x86) - ternary CmmMachOp (1)" + (pprMachOp mop) getRegister' _ _ (CmmLoad mem pk _) | isFloatType pk @@ -3151,12 +3154,12 @@ genTrivialCode rep instr a b = do a_code <- getAnyReg a tmp <- getNewRegNat rep let - -- We want the value of b to stay alive across the computation of a. - -- But, we want to calculate a straight into the destination register, + -- We want the value of 'b' to stay alive across the computation of 'a'. + -- But, we want to calculate 'a' straight into the destination register, -- because the instruction only has two operands (dst := dst `op` src). - -- The troublesome case is when the result of b is in the same register - -- as the destination reg. In this case, we have to save b in a - -- new temporary across the computation of a. + -- The troublesome case is when the result of 'b' is in the same register + -- as the destination 'reg'. In this case, we have to save 'b' in a + -- new temporary across the computation of 'a'. code dst | dst `regClashesWithOp` b_op = b_code `appOL` @@ -3174,6 +3177,69 @@ reg `regClashesWithOp` OpReg reg2 = reg == reg2 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode) _ `regClashesWithOp` _ = False +-- | Generate code for a fused multiply-add operation, of the form @± x * y ± z@, +-- with 3 operands (FMA3 instruction set). +genFMA3Code :: Width + -> FMASign + -> CmmExpr -> CmmExpr -> CmmExpr -> NatM Register +genFMA3Code w signs x y z = do + + -- For the FMA instruction, we want to compute x * y + z + -- + -- There are three possible instructions we could emit: + -- + -- - fmadd213 z y x, result in x, z can be a memory address + -- - fmadd132 x z y, result in y, x can be a memory address + -- - fmadd231 y x z, result in z, y can be a memory address + -- + -- This suggests two possible optimisations: + -- + -- - OPTIMISATION 1 + -- If one argument is an address, use the instruction that allows + -- a memory address in that position. + -- + -- - OPTIMISATION 2 + -- If one argument is in a fixed register, use the instruction that puts + -- the result in that same register. + -- + -- Currently we follow neither of these optimisations, + -- opting to always use fmadd213 for simplicity. + let rep = floatFormat w + (y_reg, y_code) <- getNonClobberedReg y + (z_reg, z_code) <- getNonClobberedReg z + x_code <- getAnyReg x + y_tmp <- getNewRegNat rep + z_tmp <- getNewRegNat rep + let + fma213 = FMA3 rep signs FMA213 + code dst + | dst == y_reg + , dst == z_reg + = y_code `appOL` + unitOL (MOV rep (OpReg y_reg) (OpReg y_tmp)) `appOL` + z_code `appOL` + unitOL (MOV rep (OpReg z_reg) (OpReg z_tmp)) `appOL` + x_code dst `snocOL` + fma213 (OpReg z_tmp) y_tmp dst + | dst == y_reg + = y_code `appOL` + unitOL (MOV rep (OpReg y_reg) (OpReg z_tmp)) `appOL` + z_code `appOL` + x_code dst `snocOL` + fma213 (OpReg z_reg) y_tmp dst + | dst == z_reg + = y_code `appOL` + z_code `appOL` + unitOL (MOV rep (OpReg z_reg) (OpReg z_tmp)) `appOL` + x_code dst `snocOL` + fma213 (OpReg z_tmp) y_reg dst + | otherwise + = y_code `appOL` + z_code `appOL` + x_code dst `snocOL` + fma213 (OpReg z_reg) y_reg dst + return (Any rep code) + ----------- trivialUCode :: Format -> (Operand -> Instr) diff --git a/compiler/GHC/CmmToAsm/X86/Instr.hs b/compiler/GHC/CmmToAsm/X86/Instr.hs index ccb3ce09ba..b4e93a1c5d 100644 --- a/compiler/GHC/CmmToAsm/X86/Instr.hs +++ b/compiler/GHC/CmmToAsm/X86/Instr.hs @@ -12,6 +12,7 @@ module GHC.CmmToAsm.X86.Instr ( Instr(..) , Operand(..) , PrefetchVariant(..) + , FMAPermutation(..) , JumpDest(..) , getJumpDestBlockId , canShortcut @@ -272,6 +273,10 @@ data Instr | CVTSI2SS Format Operand Reg -- I32/I64 to F32 | CVTSI2SD Format Operand Reg -- I32/I64 to F64 + -- | FMA3 fused multiply-add operations. + | FMA3 Format FMASign FMAPermutation Operand Reg Reg + -- src1 (r/m), src2 (r), dst (r) + -- use ADD, SUB, and SQRT for arithmetic. In both cases, operands -- are Operand Reg. @@ -351,7 +356,7 @@ data Operand | OpImm Imm -- immediate value | OpAddr AddrMode -- memory reference - +data FMAPermutation = FMA132 | FMA213 | FMA231 -- | Returns which registers are read and written as a (read, written) -- pair. @@ -438,6 +443,8 @@ regUsageOfInstr platform instr PDEP _ src mask dst -> mkRU (use_R src $ use_R mask []) [dst] PEXT _ src mask dst -> mkRU (use_R src $ use_R mask []) [dst] + FMA3 _ _ _ src1 src2 dst -> usageFMA src1 src2 dst + -- note: might be a better way to do this PREFETCH _ _ src -> mkRU (use_R src []) [] LOCK i -> regUsageOfInstr platform i @@ -482,6 +489,15 @@ regUsageOfInstr platform instr usageRMM (OpReg src) (OpAddr ea) (OpReg reg) = mkRU (use_EA ea [src, reg]) [reg] usageRMM _ _ _ = panic "X86.RegInfo.usageRMM: no match" + -- 3 operand form of FMA instructions. + usageFMA :: Operand -> Reg -> Reg -> RegUsage + usageFMA (OpReg src1) src2 dst + = mkRU [src1, src2, dst] [dst] + usageFMA (OpAddr ea1) src2 dst + = mkRU (use_EA ea1 [src2, dst]) [dst] + usageFMA _ _ _ + = panic "X86.RegInfo.usageFMA: no match" + -- 1 operand form; operand Modified usageM :: Operand -> RegUsage usageM (OpReg reg) = mkRU [reg] [reg] @@ -561,6 +577,8 @@ patchRegsOfInstr instr env JMP op regs -> JMP (patchOp op) regs JMP_TBL op ids s lbl -> JMP_TBL (patchOp op) ids s lbl + FMA3 fmt perm var x1 x2 x3 -> patch3 (FMA3 fmt perm var) x1 x2 x3 + -- literally only support storing the top x87 stack value st(0) X87Store fmt dst -> X87Store fmt (lookupAddr dst) @@ -612,6 +630,8 @@ patchRegsOfInstr instr env patch1 insn op = insn $! patchOp op patch2 :: (Operand -> Operand -> a) -> Operand -> Operand -> a patch2 insn src dst = (insn $! patchOp src) $! patchOp dst + patch3 :: (Operand -> Reg -> Reg -> a) -> Operand -> Reg -> Reg -> a + patch3 insn src1 src2 dst = ((insn $! patchOp src1) $! env src2) $! env dst patchOp (OpReg reg) = OpReg $! env reg patchOp (OpImm imm) = OpImm imm diff --git a/compiler/GHC/CmmToAsm/X86/Ppr.hs b/compiler/GHC/CmmToAsm/X86/Ppr.hs index 4a8f55fdf0..0d649f2efb 100644 --- a/compiler/GHC/CmmToAsm/X86/Ppr.hs +++ b/compiler/GHC/CmmToAsm/X86/Ppr.hs @@ -838,6 +838,14 @@ pprInstr platform i = case i of FDIV format op1 op2 -> pprFormatOpOp (text "div") format op1 op2 + FMA3 format var perm op1 op2 op3 + -> let mnemo = case var of + FMAdd -> text "vfmadd" + FMSub -> text "vfmsub" + FNMAdd -> text "vfnmadd" + FNMSub -> text "vfnmsub" + in pprFormatOpRegReg (mnemo <> pprFMAPermutation perm) format op1 op2 op3 + SQRT format op1 op2 -> pprFormatOpReg (text "sqrt") format op1 op2 @@ -968,6 +976,21 @@ pprInstr platform i = case i of pprOperand platform format op2 ] + pprFormatOpRegReg :: Line doc -> Format -> Operand -> Reg -> Reg -> doc + pprFormatOpRegReg name format op1 op2 op3 + = line $ hcat [ + pprMnemonic name format, + pprOperand platform format op1, + comma, + pprReg platform format op2, + comma, + pprReg platform format op3 + ] + + pprFMAPermutation :: FMAPermutation -> Line doc + pprFMAPermutation FMA132 = text "132" + pprFMAPermutation FMA213 = text "213" + pprFMAPermutation FMA231 = text "231" pprOpOp :: Line doc -> Format -> Operand -> Operand -> doc pprOpOp name format op1 op2 |