diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2012-06-12 16:27:00 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2012-06-12 16:35:13 +0200 |
commit | cbeeaf25932aac5923e075ae241fcc5287f7396a (patch) | |
tree | 73cca1c32b4e7ed3d6e31f871cb93fb7dbf284a6 /libswresample | |
parent | 52afa43691116bf35b6398ef3087f3a1508ef9a6 (diff) | |
download | ffmpeg-cbeeaf25932aac5923e075ae241fcc5287f7396a.tar.gz |
swr: mix_1_1 int16 MMX
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libswresample')
-rw-r--r-- | libswresample/rematrix.c | 2 | ||||
-rw-r--r-- | libswresample/x86/rematrix.asm | 63 | ||||
-rw-r--r-- | libswresample/x86/swresample_x86.c | 15 |
3 files changed, 79 insertions, 1 deletions
diff --git a/libswresample/rematrix.c b/libswresample/rematrix.c index 18e89c96ba..b9c5a4cf50 100644 --- a/libswresample/rematrix.c +++ b/libswresample/rematrix.c @@ -380,7 +380,7 @@ int swri_rematrix(SwrContext *s, AudioData *out, AudioData *in, int len, int mus in_i= s->matrix_ch[out_i][1]; if(s->matrix[out_i][in_i]!=1.0){ if(s->mix_1_1_simd && len1) - s->mix_1_1_simd(out->ch[out_i] , in->ch[in_i] , s->native_matrix, in->ch_count*out_i + in_i, len1); + s->mix_1_1_simd(out->ch[out_i] , in->ch[in_i] , s->native_simd_matrix, in->ch_count*out_i + in_i, len1); if(len != len1) s->mix_1_1_f (out->ch[out_i]+off, in->ch[in_i]+off, s->native_matrix, in->ch_count*out_i + in_i, len-len1); }else if(mustcopy){ diff --git a/libswresample/x86/rematrix.asm b/libswresample/x86/rematrix.asm index e6f0b2fab6..c96ce49d9e 100644 --- a/libswresample/x86/rematrix.asm +++ b/libswresample/x86/rematrix.asm @@ -21,6 +21,12 @@ %include "libavutil/x86/x86inc.asm" %include "libavutil/x86/x86util.asm" + +SECTION_RODATA +align 32 +dw1: times 8 dd 1 +w1 : times 16 dw 1 + SECTION .text %macro MIX2_FLT 1 @@ -99,6 +105,63 @@ mix_1_1_float_u_int %+ SUFFIX REP_RET %endmacro +%macro MIX1_INT16 1 +cglobal mix_1_1_%1_int16, 5, 5, 6, out, in, coeffp, index, len +%ifidn %1, a + test inq, mmsize-1 + jne mix_1_1_int16_u_int %+ SUFFIX + test outq, mmsize-1 + jne mix_1_1_int16_u_int %+ SUFFIX +%else +mix_1_1_int16_u_int %+ SUFFIX +%endif + movd m4, [coeffpq + 4*indexq] + SPLATW m5, m4 + psllq m4, 32 + psrlq m4, 48 + mova m0, [w1] + psllw m0, m4 + psrlw m0, 1 + punpcklwd m5, m0 + add lenq , lenq + add inq , lenq + add outq , lenq + neg lenq +.next: + mov%1 m0, [inq + lenq ] + mov%1 m2, [inq + lenq + mmsize] + mova m1, m0 + mova m3, m2 + punpcklwd m0, [w1] + punpckhwd m1, [w1] + punpcklwd m2, [w1] + punpckhwd m3, [w1] + pmaddwd m0, m5 + pmaddwd m1, m5 + pmaddwd m2, m5 + pmaddwd m3, m5 + psrad m0, m4 + psrad m1, m4 + psrad m2, m4 + psrad m3, m4 + packssdw m0, m1 + packssdw m2, m3 + mov%1 [outq + lenq ], m0 + mov%1 [outq + lenq + mmsize], m2 + add lenq, mmsize*2 + jl .next +%if mmsize == 8 + emms + RET +%else + REP_RET +%endif +%endmacro + +INIT_MMX mmx +MIX1_INT16 u +MIX1_INT16 a + INIT_XMM sse MIX2_FLT u MIX2_FLT a diff --git a/libswresample/x86/swresample_x86.c b/libswresample/x86/swresample_x86.c index 18c601f72e..ba0f1f131a 100644 --- a/libswresample/x86/swresample_x86.c +++ b/libswresample/x86/swresample_x86.c @@ -163,6 +163,21 @@ void swri_rematrix_init_x86(struct SwrContext *s){ s->mix_2_1_simd = NULL; if (s->midbuf.fmt == AV_SAMPLE_FMT_S16P){ + if(mm_flags & AV_CPU_FLAG_MMX) { + s->mix_1_1_simd = ff_mix_1_1_a_int16_mmx; + } + s->native_simd_matrix = av_mallocz(2 * num * sizeof(int16_t)); + for(i=0; i<nb_out; i++){ + int sh = 0; + for(j=0; j<nb_in; j++) + sh = FFMAX(sh, FFABS(((int*)s->native_matrix)[i * nb_in + j])); + sh = FFMAX(av_log2(sh) - 14, 0); + for(j=0; j<nb_in; j++) { + ((int16_t*)s->native_simd_matrix)[2*(i * nb_in + j)+1] = 15 - sh; + ((int16_t*)s->native_simd_matrix)[2*(i * nb_in + j)] = + ((((int*)s->native_matrix)[i * nb_in + j]) + (1<<sh>>1)) >> sh; + } + } } else if(s->midbuf.fmt == AV_SAMPLE_FMT_FLTP){ if(mm_flags & AV_CPU_FLAG_SSE) { s->mix_1_1_simd = ff_mix_1_1_a_float_sse; |