diff options
author | Justin Ruggles <justin.ruggles@gmail.com> | 2012-10-31 15:40:12 -0400 |
---|---|---|
committer | Justin Ruggles <justin.ruggles@gmail.com> | 2013-01-08 14:52:43 -0500 |
commit | a6a3164b1399372dcf779643d7d605d7438c91b7 (patch) | |
tree | bcc065d4117f621a432cd9cc359fb07c96c8e8d4 /libavresample | |
parent | 1fb8f6a44f06e48386450fe0363aefc02583d24a (diff) | |
download | ffmpeg-a6a3164b1399372dcf779643d7d605d7438c91b7.tar.gz |
x86: lavr: add SSE2/AVX dither_int_to_float()
Diffstat (limited to 'libavresample')
-rw-r--r-- | libavresample/x86/dither.asm | 64 | ||||
-rw-r--r-- | libavresample/x86/dither_init.c | 22 |
2 files changed, 86 insertions, 0 deletions
diff --git a/libavresample/x86/dither.asm b/libavresample/x86/dither.asm index 34e7924291..2192e98eb4 100644 --- a/libavresample/x86/dither.asm +++ b/libavresample/x86/dither.asm @@ -23,6 +23,9 @@ SECTION_RODATA 32 +; 1.0f / (2.0f * INT32_MAX) +pf_dither_scale: times 8 dd 2.32830643762e-10 + pf_s16_scale: times 4 dd 32753.0 SECTION_TEXT @@ -51,3 +54,64 @@ cglobal quantize, 4,4,3, dst, src, dither, len add lenq, mmsize jl .loop REP_RET + +;------------------------------------------------------------------------------ +; void ff_dither_int_to_float_rectangular(float *dst, int *src, int len) +;------------------------------------------------------------------------------ + +%macro DITHER_INT_TO_FLOAT_RECTANGULAR 0 +cglobal dither_int_to_float_rectangular, 3,3,3, dst, src, len + lea lenq, [4*lend] + add srcq, lenq + add dstq, lenq + neg lenq + mova m0, [pf_dither_scale] +.loop: + cvtdq2ps m1, [srcq+lenq] + cvtdq2ps m2, [srcq+lenq+mmsize] + mulps m1, m1, m0 + mulps m2, m2, m0 + mova [dstq+lenq], m1 + mova [dstq+lenq+mmsize], m2 + add lenq, 2*mmsize + jl .loop + REP_RET +%endmacro + +INIT_XMM sse2 +DITHER_INT_TO_FLOAT_RECTANGULAR +INIT_YMM avx +DITHER_INT_TO_FLOAT_RECTANGULAR + +;------------------------------------------------------------------------------ +; void ff_dither_int_to_float_triangular(float *dst, int *src0, int len) +;------------------------------------------------------------------------------ + +%macro DITHER_INT_TO_FLOAT_TRIANGULAR 0 +cglobal dither_int_to_float_triangular, 3,4,5, dst, src0, len, src1 + lea lenq, [4*lend] + lea src1q, [src0q+2*lenq] + add src0q, lenq + add dstq, lenq + neg lenq + mova m0, [pf_dither_scale] +.loop: + cvtdq2ps m1, [src0q+lenq] + cvtdq2ps m2, [src0q+lenq+mmsize] + cvtdq2ps m3, [src1q+lenq] + cvtdq2ps m4, [src1q+lenq+mmsize] + addps m1, m1, m3 + addps m2, m2, m4 + mulps m1, m1, m0 + mulps m2, m2, m0 + mova [dstq+lenq], m1 + mova [dstq+lenq+mmsize], m2 + add lenq, 2*mmsize + jl .loop + REP_RET +%endmacro + +INIT_XMM sse2 +DITHER_INT_TO_FLOAT_TRIANGULAR +INIT_YMM avx +DITHER_INT_TO_FLOAT_TRIANGULAR diff --git a/libavresample/x86/dither_init.c b/libavresample/x86/dither_init.c index 1e20c1194a..de38398891 100644 --- a/libavresample/x86/dither_init.c +++ b/libavresample/x86/dither_init.c @@ -26,6 +26,12 @@ extern void ff_quantize_sse2(int16_t *dst, const float *src, float *dither, int len); +extern void ff_dither_int_to_float_rectangular_sse2(float *dst, int *src, int len); +extern void ff_dither_int_to_float_rectangular_avx(float *dst, int *src, int len); + +extern void ff_dither_int_to_float_triangular_sse2(float *dst, int *src0, int len); +extern void ff_dither_int_to_float_triangular_avx(float *dst, int *src0, int len); + av_cold void ff_dither_init_x86(DitherDSPContext *ddsp, enum AVResampleDitherMethod method) { @@ -36,4 +42,20 @@ av_cold void ff_dither_init_x86(DitherDSPContext *ddsp, ddsp->ptr_align = 16; ddsp->samples_align = 8; } + + if (method == AV_RESAMPLE_DITHER_RECTANGULAR) { + if (EXTERNAL_SSE2(mm_flags)) { + ddsp->dither_int_to_float = ff_dither_int_to_float_rectangular_sse2; + } + if (EXTERNAL_AVX(mm_flags)) { + ddsp->dither_int_to_float = ff_dither_int_to_float_rectangular_avx; + } + } else { + if (EXTERNAL_SSE2(mm_flags)) { + ddsp->dither_int_to_float = ff_dither_int_to_float_triangular_sse2; + } + if (EXTERNAL_AVX(mm_flags)) { + ddsp->dither_int_to_float = ff_dither_int_to_float_triangular_avx; + } + } } |