diff options
author | Paul B Mahol <onemda@gmail.com> | 2020-01-30 22:01:23 +0100 |
---|---|---|
committer | Paul B Mahol <onemda@gmail.com> | 2020-02-04 18:28:04 +0100 |
commit | fcc0424c933742c8fc852371e985d16b6eb4bfe9 (patch) | |
tree | e6b1d088bd47d4a3c38c8a4b1c4a402090597b6b /libavfilter/x86 | |
parent | c35382aaf471d5ba88648f22cc182b2b09b7d7fa (diff) | |
download | ffmpeg-fcc0424c933742c8fc852371e985d16b6eb4bfe9.tar.gz |
avfilter/vf_ssim: improve precision
Use doubles for accumulating floats.
Diffstat (limited to 'libavfilter/x86')
-rw-r--r-- | libavfilter/x86/vf_ssim.asm | 37 | ||||
-rw-r--r-- | libavfilter/x86/vf_ssim_init.c | 2 |
2 files changed, 26 insertions, 13 deletions
diff --git a/libavfilter/x86/vf_ssim.asm b/libavfilter/x86/vf_ssim.asm index 3293e66701..1e682fe452 100644 --- a/libavfilter/x86/vf_ssim.asm +++ b/libavfilter/x86/vf_ssim.asm @@ -169,8 +169,9 @@ SSIM_4X4_LINE 8 %endif INIT_XMM sse4 -cglobal ssim_end_line, 3, 3, 6, sum0, sum1, w +cglobal ssim_end_line, 3, 3, 7, sum0, sum1, w pxor m0, m0 + pxor m6, m6 .loop: mova m1, [sum0q+mmsize*0] mova m2, [sum0q+mmsize*1] @@ -214,34 +215,46 @@ cglobal ssim_end_line, 3, 3, 6, sum0, sum1, w mulps m4, m5 mulps m3, m1 divps m4, m3 ; ssim_endl - addps m0, m4 ; ssim + mova m5, m4 + cvtps2pd m3, m5 + movhlps m5, m5 + cvtps2pd m5, m5 + addpd m0, m3 ; ssim + addpd m6, m5 ; ssim add sum0q, mmsize*4 add sum1q, mmsize*4 sub wd, 4 jg .loop - ; subps the ones we added too much + ; subpd the ones we added too much test wd, wd jz .end add wd, 4 + test wd, 3 + jz .skip3 test wd, 2 jz .skip2 - psrldq m4, 8 -.skip2: test wd, 1 jz .skip1 - psrldq m4, 4 +.skip3: + psrldq m5, 8 + subpd m6, m5 + jmp .end +.skip2: + psrldq m5, 8 + subpd m6, m5 + subpd m0, m3 + jmp .end .skip1: - subps m0, m4 + psrldq m3, 16 + subpd m6, m5 .end: + addpd m0, m6 movhlps m4, m0 - addps m0, m4 - movss m4, m0 - shufps m0, m0, 1 - addss m0, m4 + addpd m0, m4 %if ARCH_X86_32 - movss r0m, m0 + movsd r0m, m0 fld r0mp %endif RET diff --git a/libavfilter/x86/vf_ssim_init.c b/libavfilter/x86/vf_ssim_init.c index 599c928403..cbaa20ef16 100644 --- a/libavfilter/x86/vf_ssim_init.c +++ b/libavfilter/x86/vf_ssim_init.c @@ -28,7 +28,7 @@ void ff_ssim_4x4_line_ssse3(const uint8_t *buf, ptrdiff_t buf_stride, void ff_ssim_4x4_line_xop (const uint8_t *buf, ptrdiff_t buf_stride, const uint8_t *ref, ptrdiff_t ref_stride, int (*sums)[4], int w); -float ff_ssim_end_line_sse4(const int (*sum0)[4], const int (*sum1)[4], int w); +double ff_ssim_end_line_sse4(const int (*sum0)[4], const int (*sum1)[4], int w); void ff_ssim_init_x86(SSIMDSPContext *dsp) { |