diff options
-rw-r--r-- | libavcodec/aacdec.c | 12 | ||||
-rw-r--r-- | libavcodec/arm/dsputil_init_neon.c | 3 | ||||
-rw-r--r-- | libavcodec/arm/dsputil_neon.S | 38 | ||||
-rw-r--r-- | libavcodec/dsputil.c | 9 | ||||
-rw-r--r-- | libavcodec/dsputil.h | 10 | ||||
-rw-r--r-- | libavcodec/libmp3lame.c | 14 | ||||
-rw-r--r-- | libavcodec/wmaenc.c | 2 | ||||
-rw-r--r-- | libavcodec/wmaprodec.c | 22 | ||||
-rw-r--r-- | libavutil/arm/float_dsp_init_neon.c | 4 | ||||
-rw-r--r-- | libavutil/arm/float_dsp_neon.S | 38 | ||||
-rw-r--r-- | libavutil/float_dsp.c | 9 | ||||
-rw-r--r-- | libavutil/float_dsp.h | 15 |
12 files changed, 93 insertions, 83 deletions
diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c index af17acfb75..a69f055859 100644 --- a/libavcodec/aacdec.c +++ b/libavcodec/aacdec.c @@ -1360,7 +1360,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], band_energy = ac->dsp.scalarproduct_float(cfo, cfo, off_len); scale = sf[idx] / sqrtf(band_energy); - ac->dsp.vector_fmul_scalar(cfo, cfo, scale, off_len); + ac->fdsp.vector_fmul_scalar(cfo, cfo, scale, off_len); } } else { const float *vq = ff_aac_codebook_vector_vals[cbt_m1]; @@ -1506,7 +1506,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], } } while (len -= 2); - ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len); + ac->fdsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len); } } @@ -1730,10 +1730,10 @@ static void apply_intensity_stereo(AACContext *ac, ChannelElement *cpe, int ms_p c *= 1 - 2 * cpe->ms_mask[idx]; scale = c * sce1->sf[idx]; for (group = 0; group < ics->group_len[g]; group++) - ac->dsp.vector_fmul_scalar(coef1 + group * 128 + offsets[i], - coef0 + group * 128 + offsets[i], - scale, - offsets[i + 1] - offsets[i]); + ac->fdsp.vector_fmul_scalar(coef1 + group * 128 + offsets[i], + coef0 + group * 128 + offsets[i], + scale, + offsets[i + 1] - offsets[i]); } } else { int bt_run_end = sce1->band_type_run_end[idx]; diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c index a132f6f993..b2e7204a60 100644 --- a/libavcodec/arm/dsputil_init_neon.c +++ b/libavcodec/arm/dsputil_init_neon.c @@ -144,8 +144,6 @@ void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int); void ff_vector_fmul_window_neon(float *dst, const float *src0, const float *src1, const float *win, int len); -void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul, - int len); void ff_butterflies_float_neon(float *v1, float *v2, int len); float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len); void ff_vector_fmul_reverse_neon(float *dst, const float *src0, @@ -305,7 +303,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) } c->vector_fmul_window = ff_vector_fmul_window_neon; - c->vector_fmul_scalar = ff_vector_fmul_scalar_neon; c->butterflies_float = ff_butterflies_float_neon; c->scalarproduct_float = ff_scalarproduct_float_neon; c->vector_fmul_reverse = ff_vector_fmul_reverse_neon; diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S index ca1d2dee3f..cf9ad9e583 100644 --- a/libavcodec/arm/dsputil_neon.S +++ b/libavcodec/arm/dsputil_neon.S @@ -642,44 +642,6 @@ function ff_vorbis_inverse_coupling_neon, export=1 endfunc #endif -function ff_vector_fmul_scalar_neon, export=1 -VFP len .req r2 -NOVFP len .req r3 -VFP vdup.32 q8, d0[0] -NOVFP vdup.32 q8, r2 - bics r12, len, #15 - beq 3f - vld1.32 {q0},[r1,:128]! - vld1.32 {q1},[r1,:128]! -1: vmul.f32 q0, q0, q8 - vld1.32 {q2},[r1,:128]! - vmul.f32 q1, q1, q8 - vld1.32 {q3},[r1,:128]! - vmul.f32 q2, q2, q8 - vst1.32 {q0},[r0,:128]! - vmul.f32 q3, q3, q8 - vst1.32 {q1},[r0,:128]! - subs r12, r12, #16 - beq 2f - vld1.32 {q0},[r1,:128]! - vst1.32 {q2},[r0,:128]! - vld1.32 {q1},[r1,:128]! - vst1.32 {q3},[r0,:128]! - b 1b -2: vst1.32 {q2},[r0,:128]! - vst1.32 {q3},[r0,:128]! - ands len, len, #15 - it eq - bxeq lr -3: vld1.32 {q0},[r1,:128]! - vmul.f32 q0, q0, q8 - vst1.32 {q0},[r0,:128]! - subs len, len, #4 - bgt 3b - bx lr - .unreq len -endfunc - function ff_butterflies_float_neon, export=1 1: vld1.32 {q0},[r0,:128] vld1.32 {q1},[r1,:128] diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 7a3fdba299..d4471dc24c 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -2392,14 +2392,6 @@ static void vector_fmul_window_c(float *dst, const float *src0, } } -static void vector_fmul_scalar_c(float *dst, const float *src, float mul, - int len) -{ - int i; - for (i = 0; i < len; i++) - dst[i] = src[i] * mul; -} - static void butterflies_float_c(float *restrict v1, float *restrict v2, int len) { @@ -2869,7 +2861,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) c->scalarproduct_float = ff_scalarproduct_float_c; c->butterflies_float = butterflies_float_c; c->butterflies_float_interleave = butterflies_float_interleave_c; - c->vector_fmul_scalar = vector_fmul_scalar_c; c->shrink[0]= av_image_copy_plane; c->shrink[1]= ff_shrink22; diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index f48aa96017..5640f3abea 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -383,16 +383,6 @@ typedef struct DSPContext { /* assume len is a multiple of 8, and arrays are 16-byte aligned */ void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */); /** - * Multiply a vector of floats by a scalar float. Source and - * destination vectors must overlap exactly or not at all. - * @param dst result vector, 16-byte aligned - * @param src input vector, 16-byte aligned - * @param mul scalar value - * @param len length of vector, multiple of 4 - */ - void (*vector_fmul_scalar)(float *dst, const float *src, float mul, - int len); - /** * Calculate the scalar product of two vectors of floats. * @param v1 first vector, 16-byte aligned * @param v2 second vector, 16-byte aligned diff --git a/libavcodec/libmp3lame.c b/libavcodec/libmp3lame.c index 600f6fd9dd..264a0e2236 100644 --- a/libavcodec/libmp3lame.c +++ b/libavcodec/libmp3lame.c @@ -28,12 +28,12 @@ #include "libavutil/channel_layout.h" #include "libavutil/common.h" +#include "libavutil/float_dsp.h" #include "libavutil/intreadwrite.h" #include "libavutil/log.h" #include "libavutil/opt.h" #include "avcodec.h" #include "audio_frame_queue.h" -#include "dsputil.h" #include "internal.h" #include "mpegaudio.h" #include "mpegaudiodecheader.h" @@ -50,7 +50,7 @@ typedef struct LAMEContext { int reservoir; float *samples_flt[2]; AudioFrameQueue afq; - DSPContext dsp; + AVFloatDSPContext fdsp; } LAMEContext; @@ -167,7 +167,7 @@ static av_cold int mp3lame_encode_init(AVCodecContext *avctx) if (ret < 0) goto error; - ff_dsputil_init(&s->dsp, avctx); + avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); return 0; error: @@ -205,10 +205,10 @@ static int mp3lame_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, return AVERROR(EINVAL); } for (ch = 0; ch < avctx->channels; ch++) { - s->dsp.vector_fmul_scalar(s->samples_flt[ch], - (const float *)frame->data[ch], - 32768.0f, - FFALIGN(frame->nb_samples, 8)); + s->fdsp.vector_fmul_scalar(s->samples_flt[ch], + (const float *)frame->data[ch], + 32768.0f, + FFALIGN(frame->nb_samples, 8)); } ENCODE_BUFFER(lame_encode_buffer_float, float, s->samples_flt); break; diff --git a/libavcodec/wmaenc.c b/libavcodec/wmaenc.c index 13d8a1cfbf..044114b516 100644 --- a/libavcodec/wmaenc.c +++ b/libavcodec/wmaenc.c @@ -111,7 +111,7 @@ static void apply_window_and_mdct(AVCodecContext * avctx, const AVFrame *frame) for (ch = 0; ch < avctx->channels; ch++) { memcpy(s->output, s->frame_out[ch], window_len * sizeof(*s->output)); - s->dsp.vector_fmul_scalar(s->frame_out[ch], audio[ch], n, len); + s->fdsp.vector_fmul_scalar(s->frame_out[ch], audio[ch], n, len); s->dsp.vector_fmul_reverse(&s->output[window_len], s->frame_out[ch], win, len); s->fdsp.vector_fmul(s->frame_out[ch], s->frame_out[ch], win, len); mdct->mdct_calc(mdct, s->coefs[ch], s->output); diff --git a/libavcodec/wmaprodec.c b/libavcodec/wmaprodec.c index 43fdbc068d..ac0cce16bd 100644 --- a/libavcodec/wmaprodec.c +++ b/libavcodec/wmaprodec.c @@ -86,6 +86,7 @@ * subframe in order to reconstruct the output samples. */ +#include "libavutil/float_dsp.h" #include "libavutil/intfloat.h" #include "libavutil/intreadwrite.h" #include "avcodec.h" @@ -170,6 +171,7 @@ typedef struct WMAProDecodeCtx { AVCodecContext* avctx; ///< codec context for av_log AVFrame frame; ///< AVFrame for decoded output DSPContext dsp; ///< accelerated DSP functions + AVFloatDSPContext fdsp; uint8_t frame_data[MAX_FRAMESIZE + FF_INPUT_BUFFER_PADDING_SIZE];///< compressed frame data PutBitContext pb; ///< context for filling the frame_data buffer @@ -280,6 +282,8 @@ static av_cold int decode_init(AVCodecContext *avctx) s->avctx = avctx; ff_dsputil_init(&s->dsp, avctx); + avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); + init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE); avctx->sample_fmt = AV_SAMPLE_FMT_FLTP; @@ -1008,12 +1012,12 @@ static void inverse_channel_transform(WMAProDecodeCtx *s) } } else if (s->avctx->channels == 2) { int len = FFMIN(sfb[1], s->subframe_len) - sfb[0]; - s->dsp.vector_fmul_scalar(ch_data[0] + sfb[0], - ch_data[0] + sfb[0], - 181.0 / 128, len); - s->dsp.vector_fmul_scalar(ch_data[1] + sfb[0], - ch_data[1] + sfb[0], - 181.0 / 128, len); + s->fdsp.vector_fmul_scalar(ch_data[0] + sfb[0], + ch_data[0] + sfb[0], + 181.0 / 128, len); + s->fdsp.vector_fmul_scalar(ch_data[1] + sfb[0], + ch_data[1] + sfb[0], + 181.0 / 128, len); } } } @@ -1259,9 +1263,9 @@ static int decode_subframe(WMAProDecodeCtx *s) s->channel[c].scale_factor_step; const float quant = pow(10.0, exp / 20.0); int start = s->cur_sfb_offsets[b]; - s->dsp.vector_fmul_scalar(s->tmp + start, - s->channel[c].coeffs + start, - quant, end - start); + s->fdsp.vector_fmul_scalar(s->tmp + start, + s->channel[c].coeffs + start, + quant, end - start); } /** apply imdct (imdct_half == DCTIV with reverse) */ diff --git a/libavutil/arm/float_dsp_init_neon.c b/libavutil/arm/float_dsp_init_neon.c index 3ca0288b31..88eb4b3d2a 100644 --- a/libavutil/arm/float_dsp_init_neon.c +++ b/libavutil/arm/float_dsp_init_neon.c @@ -29,8 +29,12 @@ void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int l void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul, int len); +void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul, + int len); + void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp) { fdsp->vector_fmul = ff_vector_fmul_neon; fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_neon; + fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_neon; } diff --git a/libavutil/arm/float_dsp_neon.S b/libavutil/arm/float_dsp_neon.S index 4aa6f838dd..6d7bd5236e 100644 --- a/libavutil/arm/float_dsp_neon.S +++ b/libavutil/arm/float_dsp_neon.S @@ -108,3 +108,41 @@ NOVFP vdup.32 q15, r2 bx lr .unreq len endfunc + +function ff_vector_fmul_scalar_neon, export=1 +VFP len .req r2 +NOVFP len .req r3 +VFP vdup.32 q8, d0[0] +NOVFP vdup.32 q8, r2 + bics r12, len, #15 + beq 3f + vld1.32 {q0},[r1,:128]! + vld1.32 {q1},[r1,:128]! +1: vmul.f32 q0, q0, q8 + vld1.32 {q2},[r1,:128]! + vmul.f32 q1, q1, q8 + vld1.32 {q3},[r1,:128]! + vmul.f32 q2, q2, q8 + vst1.32 {q0},[r0,:128]! + vmul.f32 q3, q3, q8 + vst1.32 {q1},[r0,:128]! + subs r12, r12, #16 + beq 2f + vld1.32 {q0},[r1,:128]! + vst1.32 {q2},[r0,:128]! + vld1.32 {q1},[r1,:128]! + vst1.32 {q3},[r0,:128]! + b 1b +2: vst1.32 {q2},[r0,:128]! + vst1.32 {q3},[r0,:128]! + ands len, len, #15 + it eq + bxeq lr +3: vld1.32 {q0},[r1,:128]! + vmul.f32 q0, q0, q8 + vst1.32 {q0},[r0,:128]! + subs len, len, #4 + bgt 3b + bx lr + .unreq len +endfunc diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c index 2e90939090..b6b11818b5 100644 --- a/libavutil/float_dsp.c +++ b/libavutil/float_dsp.c @@ -36,10 +36,19 @@ static void vector_fmac_scalar_c(float *dst, const float *src, float mul, dst[i] += src[i] * mul; } +static void vector_fmul_scalar_c(float *dst, const float *src, float mul, + int len) +{ + int i; + for (i = 0; i < len; i++) + dst[i] = src[i] * mul; +} + void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact) { fdsp->vector_fmul = vector_fmul_c; fdsp->vector_fmac_scalar = vector_fmac_scalar_c; + fdsp->vector_fmul_scalar = vector_fmul_scalar_c; #if ARCH_ARM ff_float_dsp_init_arm(fdsp); diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h index 95cef62f29..cb4b28f0e2 100644 --- a/libavutil/float_dsp.h +++ b/libavutil/float_dsp.h @@ -51,6 +51,21 @@ typedef struct AVFloatDSPContext { */ void (*vector_fmac_scalar)(float *dst, const float *src, float mul, int len); + + /** + * Multiply a vector of floats by a scalar float. Source and + * destination vectors must overlap exactly or not at all. + * + * @param dst result vector + * constraints: 16-byte aligned + * @param src input vector + * constraints: 16-byte aligned + * @param mul scalar value + * @param len length of vector + * constraints: multiple of 4 + */ + void (*vector_fmul_scalar)(float *dst, const float *src, float mul, + int len); } AVFloatDSPContext; /** |