summaryrefslogtreecommitdiff
path: root/libavfilter/x86
diff options
context:
space:
mode:
authorPaul B Mahol <onemda@gmail.com>2020-01-30 22:01:23 +0100
committerPaul B Mahol <onemda@gmail.com>2020-02-04 18:28:04 +0100
commitfcc0424c933742c8fc852371e985d16b6eb4bfe9 (patch)
treee6b1d088bd47d4a3c38c8a4b1c4a402090597b6b /libavfilter/x86
parentc35382aaf471d5ba88648f22cc182b2b09b7d7fa (diff)
downloadffmpeg-fcc0424c933742c8fc852371e985d16b6eb4bfe9.tar.gz
avfilter/vf_ssim: improve precision
Use doubles for accumulating floats.
Diffstat (limited to 'libavfilter/x86')
-rw-r--r--libavfilter/x86/vf_ssim.asm37
-rw-r--r--libavfilter/x86/vf_ssim_init.c2
2 files changed, 26 insertions, 13 deletions
diff --git a/libavfilter/x86/vf_ssim.asm b/libavfilter/x86/vf_ssim.asm
index 3293e66701..1e682fe452 100644
--- a/libavfilter/x86/vf_ssim.asm
+++ b/libavfilter/x86/vf_ssim.asm
@@ -169,8 +169,9 @@ SSIM_4X4_LINE 8
%endif
INIT_XMM sse4
-cglobal ssim_end_line, 3, 3, 6, sum0, sum1, w
+cglobal ssim_end_line, 3, 3, 7, sum0, sum1, w
pxor m0, m0
+ pxor m6, m6
.loop:
mova m1, [sum0q+mmsize*0]
mova m2, [sum0q+mmsize*1]
@@ -214,34 +215,46 @@ cglobal ssim_end_line, 3, 3, 6, sum0, sum1, w
mulps m4, m5
mulps m3, m1
divps m4, m3 ; ssim_endl
- addps m0, m4 ; ssim
+ mova m5, m4
+ cvtps2pd m3, m5
+ movhlps m5, m5
+ cvtps2pd m5, m5
+ addpd m0, m3 ; ssim
+ addpd m6, m5 ; ssim
add sum0q, mmsize*4
add sum1q, mmsize*4
sub wd, 4
jg .loop
- ; subps the ones we added too much
+ ; subpd the ones we added too much
test wd, wd
jz .end
add wd, 4
+ test wd, 3
+ jz .skip3
test wd, 2
jz .skip2
- psrldq m4, 8
-.skip2:
test wd, 1
jz .skip1
- psrldq m4, 4
+.skip3:
+ psrldq m5, 8
+ subpd m6, m5
+ jmp .end
+.skip2:
+ psrldq m5, 8
+ subpd m6, m5
+ subpd m0, m3
+ jmp .end
.skip1:
- subps m0, m4
+ psrldq m3, 16
+ subpd m6, m5
.end:
+ addpd m0, m6
movhlps m4, m0
- addps m0, m4
- movss m4, m0
- shufps m0, m0, 1
- addss m0, m4
+ addpd m0, m4
%if ARCH_X86_32
- movss r0m, m0
+ movsd r0m, m0
fld r0mp
%endif
RET
diff --git a/libavfilter/x86/vf_ssim_init.c b/libavfilter/x86/vf_ssim_init.c
index 599c928403..cbaa20ef16 100644
--- a/libavfilter/x86/vf_ssim_init.c
+++ b/libavfilter/x86/vf_ssim_init.c
@@ -28,7 +28,7 @@ void ff_ssim_4x4_line_ssse3(const uint8_t *buf, ptrdiff_t buf_stride,
void ff_ssim_4x4_line_xop (const uint8_t *buf, ptrdiff_t buf_stride,
const uint8_t *ref, ptrdiff_t ref_stride,
int (*sums)[4], int w);
-float ff_ssim_end_line_sse4(const int (*sum0)[4], const int (*sum1)[4], int w);
+double ff_ssim_end_line_sse4(const int (*sum0)[4], const int (*sum1)[4], int w);
void ff_ssim_init_x86(SSIMDSPContext *dsp)
{