summaryrefslogtreecommitdiff
path: root/compiler/GHC/Builtin/primops.txt.pp
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/GHC/Builtin/primops.txt.pp')
-rw-r--r--compiler/GHC/Builtin/primops.txt.pp69
1 files changed, 69 insertions, 0 deletions
diff --git a/compiler/GHC/Builtin/primops.txt.pp b/compiler/GHC/Builtin/primops.txt.pp
index 5b730c1943..b2b3f1d8f5 100644
--- a/compiler/GHC/Builtin/primops.txt.pp
+++ b/compiler/GHC/Builtin/primops.txt.pp
@@ -1370,6 +1370,75 @@ primop FloatDecode_IntOp "decodeFloat_Int#" GenPrimOp
with out_of_line = True
------------------------------------------------------------------------
+section "Fused multiply-add operations"
+ { #fma#
+
+ The fused multiply-add primops 'fmaddFloat#' and 'fmaddDouble#'
+ implement the operation
+
+ \[
+ \lambda\ x\ y\ z \rightarrow x * y + z
+ \]
+
+ with a single floating-point rounding operation at the end, as opposed to
+ rounding twice (which can accumulate rounding errors).
+
+ These primops can be compiled directly to a single machine instruction on
+ architectures that support them. Currently, these are:
+
+ 1. x86 with CPUs that support the FMA3 extended instruction set (which
+ includes most processors since 2013).
+ 2. PowerPC.
+ 3. AArch64.
+
+ This requires users pass the '-mfma' flag to GHC. Otherwise, the primop
+ is implemented by falling back to the C standard library, which might
+ perform software emulation (this may yield results that are not IEEE
+ compliant on some platforms).
+
+ The additional operations 'fmsubFloat#'/'fmsubDouble#',
+ 'fnmaddFloat#'/'fnmaddDouble#' and 'fnmsubFloat#'/'fnmsubDouble#' provide
+ variants on 'fmaddFloat#'/'fmaddDouble#' in which some signs are changed:
+
+ \[
+ \begin{aligned}
+ \mathrm{fmadd}\ x\ y\ z &= \phantom{+} x * y + z \\[8pt]
+ \mathrm{fmsub}\ x\ y\ z &= \phantom{+} x * y - z \\[8pt]
+ \mathrm{fnmadd}\ x\ y\ z &= - x * y + z \\[8pt]
+ \mathrm{fnmsub}\ x\ y\ z &= - x * y - z
+ \end{aligned}
+ \]
+
+ }
+------------------------------------------------------------------------
+
+primop FloatFMAdd "fmaddFloat#" GenPrimOp
+ Float# -> Float# -> Float# -> Float#
+ {Fused multiply-add operation @x*y+z@. See "GHC.Prim#fma".}
+primop FloatFMSub "fmsubFloat#" GenPrimOp
+ Float# -> Float# -> Float# -> Float#
+ {Fused multiply-subtract operation @x*y-z@. See "GHC.Prim#fma".}
+primop FloatFNMAdd "fnmaddFloat#" GenPrimOp
+ Float# -> Float# -> Float# -> Float#
+ {Fused negate-multiply-add operation @-x*y+z@. See "GHC.Prim#fma".}
+primop FloatFNMSub "fnmsubFloat#" GenPrimOp
+ Float# -> Float# -> Float# -> Float#
+ {Fused negate-multiply-subtract operation @-x*y-z@. See "GHC.Prim#fma".}
+
+primop DoubleFMAdd "fmaddDouble#" GenPrimOp
+ Double# -> Double# -> Double# -> Double#
+ {Fused multiply-add operation @x*y+z@. See "GHC.Prim#fma".}
+primop DoubleFMSub "fmsubDouble#" GenPrimOp
+ Double# -> Double# -> Double# -> Double#
+ {Fused multiply-subtract operation @x*y-z@. See "GHC.Prim#fma".}
+primop DoubleFNMAdd "fnmaddDouble#" GenPrimOp
+ Double# -> Double# -> Double# -> Double#
+ {Fused negate-multiply-add operation @-x*y+z@. See "GHC.Prim#fma".}
+primop DoubleFNMSub "fnmsubDouble#" GenPrimOp
+ Double# -> Double# -> Double# -> Double#
+ {Fused negate-multiply-subtract operation @-x*y-z@. See "GHC.Prim#fma".}
+
+------------------------------------------------------------------------
section "Arrays"
{Operations on 'Array#'.}
------------------------------------------------------------------------