diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2013-10-14 16:06:17 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2013-10-14 16:06:22 +0200 |
commit | e3e0e3d0c913a55fd616d3c431859564dc0e9b79 (patch) | |
tree | 07e6759f4bf1a3472bc7258e6cf1192cce4292fb /libavutil | |
parent | 9ac124c889a6b3a8472f3bdfda2c379cb4ddaefa (diff) | |
parent | c6908d6b4b377a04a5d055ba874bdbcf06c80497 (diff) | |
download | ffmpeg-e3e0e3d0c913a55fd616d3c431859564dc0e9b79.tar.gz |
Merge commit 'c6908d6b4b377a04a5d055ba874bdbcf06c80497'
* commit 'c6908d6b4b377a04a5d055ba874bdbcf06c80497':
x86inc: FMA3/4 Support
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavutil')
-rw-r--r-- | libavutil/x86/x86inc.asm | 42 | ||||
-rw-r--r-- | libavutil/x86/x86util.asm | 4 |
2 files changed, 45 insertions, 1 deletions
diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm index 4c794def0c..07ed08f8be 100644 --- a/libavutil/x86/x86inc.asm +++ b/libavutil/x86/x86inc.asm @@ -1425,3 +1425,45 @@ FMA_INSTR pmadcswd, pmaddwd, paddd ; tzcnt is equivalent to "rep bsf" and is backwards-compatible with bsf. ; This lets us use tzcnt without bumping the yasm version requirement yet. %define tzcnt rep bsf + +; convert FMA4 to FMA3 if possible +%macro FMA4_INSTR 4 + %macro %1 4-8 %1, %2, %3, %4 + %if cpuflag(fma4) + v%5 %1, %2, %3, %4 + %elifidn %1, %2 + v%6 %1, %4, %3 ; %1 = %1 * %3 + %4 + %elifidn %1, %3 + v%7 %1, %2, %4 ; %1 = %2 * %1 + %4 + %elifidn %1, %4 + v%8 %1, %2, %3 ; %1 = %2 * %3 + %1 + %else + %error fma3 emulation of ``%5 %1, %2, %3, %4'' is not supported + %endif + %endmacro +%endmacro + +FMA4_INSTR fmaddpd, fmadd132pd, fmadd213pd, fmadd231pd +FMA4_INSTR fmaddps, fmadd132ps, fmadd213ps, fmadd231ps +FMA4_INSTR fmaddsd, fmadd132sd, fmadd213sd, fmadd231sd +FMA4_INSTR fmaddss, fmadd132ss, fmadd213ss, fmadd231ss + +FMA4_INSTR fmaddsubpd, fmaddsub132pd, fmaddsub213pd, fmaddsub231pd +FMA4_INSTR fmaddsubps, fmaddsub132ps, fmaddsub213ps, fmaddsub231ps +FMA4_INSTR fmsubaddpd, fmsubadd132pd, fmsubadd213pd, fmsubadd231pd +FMA4_INSTR fmsubaddps, fmsubadd132ps, fmsubadd213ps, fmsubadd231ps + +FMA4_INSTR fmsubpd, fmsub132pd, fmsub213pd, fmsub231pd +FMA4_INSTR fmsubps, fmsub132ps, fmsub213ps, fmsub231ps +FMA4_INSTR fmsubsd, fmsub132sd, fmsub213sd, fmsub231sd +FMA4_INSTR fmsubss, fmsub132ss, fmsub213ss, fmsub231ss + +FMA4_INSTR fnmaddpd, fnmadd132pd, fnmadd213pd, fnmadd231pd +FMA4_INSTR fnmaddps, fnmadd132ps, fnmadd213ps, fnmadd231ps +FMA4_INSTR fnmaddsd, fnmadd132sd, fnmadd213sd, fnmadd231sd +FMA4_INSTR fnmaddss, fnmadd132ss, fnmadd213ss, fnmadd231ss + +FMA4_INSTR fnmsubpd, fnmsub132pd, fnmsub213pd, fnmsub231pd +FMA4_INSTR fnmsubps, fnmsub132ps, fnmsub213ps, fnmsub231ps +FMA4_INSTR fnmsubsd, fnmsub132sd, fnmsub213sd, fnmsub231sd +FMA4_INSTR fnmsubss, fnmsub132ss, fnmsub213ss, fnmsub231ss diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm index 01f306831c..59e5df248e 100644 --- a/libavutil/x86/x86util.asm +++ b/libavutil/x86/x86util.asm @@ -668,7 +668,9 @@ ; Wrapper for non-FMA version of fmaddps %macro FMULADD_PS 5 - %ifidn %1, %4 + %if cpuflag(fma3) || cpuflag(fma4) + fmaddps %1, %2, %3, %4 + %elifidn %1, %4 mulps %5, %2, %3 addps %1, %4, %5 %else |