summaryrefslogtreecommitdiff
path: root/libavcodec/x86
diff options
context:
space:
mode:
authorPaul B Mahol <onemda@gmail.com>2022-09-03 23:41:38 +0200
committerPaul B Mahol <onemda@gmail.com>2022-09-05 12:27:49 +0200
commit1e202d89c95d99d69412854f4ee9d5e569c3da69 (patch)
tree96a97097bc8fdb053f809088a0dd7bdaa4334256 /libavcodec/x86
parentea5b375e0e95b05c88ec728d5953c916b4925bef (diff)
downloadffmpeg-1e202d89c95d99d69412854f4ee9d5e569c3da69.tar.gz
avcodec/x86/flacdsp: fix bug in decorrelation
Fixes #9297
Diffstat (limited to 'libavcodec/x86')
-rw-r--r--libavcodec/x86/flacdsp.asm23
-rw-r--r--libavcodec/x86/flacdsp_init.c41
2 files changed, 44 insertions, 20 deletions
diff --git a/libavcodec/x86/flacdsp.asm b/libavcodec/x86/flacdsp.asm
index 7138611526..6d755f4972 100644
--- a/libavcodec/x86/flacdsp.asm
+++ b/libavcodec/x86/flacdsp.asm
@@ -23,6 +23,10 @@
%include "libavutil/x86/x86util.asm"
+SECTION_RODATA
+
+vector: db 0,1,4,5,8,9,12,13,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0,1,4,5,8,9,12,13,
+
SECTION .text
%macro PMACSDQL 5
@@ -89,6 +93,9 @@ LPC_32 sse4
;----------------------------------------------------------------------------------
%macro FLAC_DECORRELATE_16 3-4
cglobal flac_decorrelate_%1_16, 2, 4, 4, out, in0, in1, len
+%ifidn %1, indep2
+ VBROADCASTI128 m2, [vector]
+%endif
%if ARCH_X86_32
mov lend, lenm
%endif
@@ -112,11 +119,17 @@ align 16
%endif
%ifnidn %1, indep2
p%4d m2, m0, m1
+ packssdw m%2, m%2
+ packssdw m%3, m%3
+ punpcklwd m%2, m%3
+ psllw m%2, m3
+%else
+ pslld m%2, m3
+ pslld m%3, m3
+ pshufb m%2, m%2, m2
+ pshufb m%3, m%3, m2
+ punpcklwd m%2, m%3
%endif
- packssdw m%2, m%2
- packssdw m%3, m%3
- punpcklwd m%2, m%3
- psllw m%2, m3
mova [outq + lenq], m%2
add lenq, 16
jl .loop
@@ -292,7 +305,7 @@ align 16
REP_RET
%endmacro
-INIT_XMM sse2
+INIT_XMM ssse3
FLAC_DECORRELATE_16 indep2, 0, 1 ; Reuse stereo 16bits macro
FLAC_DECORRELATE_INDEP 32, 2, 3, d
FLAC_DECORRELATE_INDEP 16, 4, 3, w
diff --git a/libavcodec/x86/flacdsp_init.c b/libavcodec/x86/flacdsp_init.c
index 2deaf3117f..48e3e7c55c 100644
--- a/libavcodec/x86/flacdsp_init.c
+++ b/libavcodec/x86/flacdsp_init.c
@@ -34,7 +34,9 @@ void ff_flac_decorrelate_ls_##fmt##_##opt(uint8_t **out, int32_t **in, int chann
void ff_flac_decorrelate_rs_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \
int len, int shift); \
void ff_flac_decorrelate_ms_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \
- int len, int shift); \
+ int len, int shift);
+
+#define DECORRELATE_IFUNCS(fmt, opt) \
void ff_flac_decorrelate_indep2_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \
int len, int shift); \
void ff_flac_decorrelate_indep4_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \
@@ -48,6 +50,10 @@ DECORRELATE_FUNCS(16, sse2);
DECORRELATE_FUNCS(16, avx);
DECORRELATE_FUNCS(32, sse2);
DECORRELATE_FUNCS(32, avx);
+DECORRELATE_IFUNCS(16, ssse3);
+DECORRELATE_IFUNCS(16, avx);
+DECORRELATE_IFUNCS(32, ssse3);
+DECORRELATE_IFUNCS(32, avx);
av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int channels)
{
@@ -56,29 +62,34 @@ av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int
if (EXTERNAL_SSE2(cpu_flags)) {
if (fmt == AV_SAMPLE_FMT_S16) {
+ c->decorrelate[1] = ff_flac_decorrelate_ls_16_sse2;
+ c->decorrelate[2] = ff_flac_decorrelate_rs_16_sse2;
+ c->decorrelate[3] = ff_flac_decorrelate_ms_16_sse2;
+ } else if (fmt == AV_SAMPLE_FMT_S32) {
+ c->decorrelate[1] = ff_flac_decorrelate_ls_32_sse2;
+ c->decorrelate[2] = ff_flac_decorrelate_rs_32_sse2;
+ c->decorrelate[3] = ff_flac_decorrelate_ms_32_sse2;
+ }
+ }
+ if (EXTERNAL_SSSE3(cpu_flags)) {
+ if (fmt == AV_SAMPLE_FMT_S16) {
if (channels == 2)
- c->decorrelate[0] = ff_flac_decorrelate_indep2_16_sse2;
+ c->decorrelate[0] = ff_flac_decorrelate_indep2_16_ssse3;
else if (channels == 4)
- c->decorrelate[0] = ff_flac_decorrelate_indep4_16_sse2;
+ c->decorrelate[0] = ff_flac_decorrelate_indep4_16_ssse3;
else if (channels == 6)
- c->decorrelate[0] = ff_flac_decorrelate_indep6_16_sse2;
+ c->decorrelate[0] = ff_flac_decorrelate_indep6_16_ssse3;
else if (ARCH_X86_64 && channels == 8)
- c->decorrelate[0] = ff_flac_decorrelate_indep8_16_sse2;
- c->decorrelate[1] = ff_flac_decorrelate_ls_16_sse2;
- c->decorrelate[2] = ff_flac_decorrelate_rs_16_sse2;
- c->decorrelate[3] = ff_flac_decorrelate_ms_16_sse2;
+ c->decorrelate[0] = ff_flac_decorrelate_indep8_16_ssse3;
} else if (fmt == AV_SAMPLE_FMT_S32) {
if (channels == 2)
- c->decorrelate[0] = ff_flac_decorrelate_indep2_32_sse2;
+ c->decorrelate[0] = ff_flac_decorrelate_indep2_32_ssse3;
else if (channels == 4)
- c->decorrelate[0] = ff_flac_decorrelate_indep4_32_sse2;
+ c->decorrelate[0] = ff_flac_decorrelate_indep4_32_ssse3;
else if (channels == 6)
- c->decorrelate[0] = ff_flac_decorrelate_indep6_32_sse2;
+ c->decorrelate[0] = ff_flac_decorrelate_indep6_32_ssse3;
else if (ARCH_X86_64 && channels == 8)
- c->decorrelate[0] = ff_flac_decorrelate_indep8_32_sse2;
- c->decorrelate[1] = ff_flac_decorrelate_ls_32_sse2;
- c->decorrelate[2] = ff_flac_decorrelate_rs_32_sse2;
- c->decorrelate[3] = ff_flac_decorrelate_ms_32_sse2;
+ c->decorrelate[0] = ff_flac_decorrelate_indep8_32_ssse3;
}
}
if (EXTERNAL_SSE4(cpu_flags)) {