diff options
author | Loren Merritt <lorenm@u.washington.edu> | 2008-08-12 00:33:34 +0000 |
---|---|---|
committer | Loren Merritt <lorenm@u.washington.edu> | 2008-08-12 00:33:34 +0000 |
commit | 46803f4f67e821f6e62793d876be164fc04622b2 (patch) | |
tree | cc86c780395e6a14bcd92b37da29ac8194715ce3 /libavcodec/mdct.c | |
parent | 49c0dd754c85e2cbb7dfe8d63dbb7141cddaeb61 (diff) | |
download | ffmpeg-46803f4f67e821f6e62793d876be164fc04622b2.tar.gz |
optimize imdct_half:
remove tmp buffer.
skip fft reinterleave pass, leaving data in a format more convenient for simd.
merge post-rotate with post-reorder.
Originally committed as revision 14700 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/mdct.c')
-rw-r--r-- | libavcodec/mdct.c | 93 |
1 files changed, 36 insertions, 57 deletions
diff --git a/libavcodec/mdct.c b/libavcodec/mdct.c index 6a3b69a014..86d447b910 100644 --- a/libavcodec/mdct.c +++ b/libavcodec/mdct.c @@ -100,18 +100,25 @@ int ff_mdct_init(MDCTContext *s, int nbits, int inverse) (pim) = _are * _bim + _aim * _bre;\ } -static void imdct_c(MDCTContext *s, const FFTSample *input, FFTSample *tmp) +/** + * Compute the middle half of the inverse MDCT of size N = 2^nbits, + * thus excluding the parts that can be derived by symmetry + * @param output N/2 samples + * @param input N/2 samples + */ +void ff_imdct_half(MDCTContext *s, FFTSample *output, const FFTSample *input) { - int k, n4, n2, n, j; + int k, n8, n4, n2, n, j; const uint16_t *revtab = s->fft.revtab; const FFTSample *tcos = s->tcos; const FFTSample *tsin = s->tsin; const FFTSample *in1, *in2; - FFTComplex *z = (FFTComplex *)tmp; + FFTComplex *z = (FFTComplex *)output; n = 1 << s->nbits; n2 = n >> 1; n4 = n >> 2; + n8 = n >> 3; /* pre rotation */ in1 = input; @@ -125,9 +132,15 @@ static void imdct_c(MDCTContext *s, const FFTSample *input, FFTSample *tmp) ff_fft_calc(&s->fft, z); /* post rotation + reordering */ - /* XXX: optimize */ - for(k = 0; k < n4; k++) { - CMUL(z[k].re, z[k].im, z[k].re, z[k].im, tcos[k], tsin[k]); + output += n4; + for(k = 0; k < n8; k++) { + FFTSample r0, i0, r1, i1; + CMUL(r0, i1, z[n8-k-1].im, z[n8-k-1].re, tsin[n8-k-1], tcos[n8-k-1]); + CMUL(r1, i0, z[n8+k ].im, z[n8+k ].re, tsin[n8+k ], tcos[n8+k ]); + z[n8-k-1].re = r0; + z[n8-k-1].im = i0; + z[n8+k ].re = r1; + z[n8+k ].im = i1; } } @@ -140,52 +153,16 @@ static void imdct_c(MDCTContext *s, const FFTSample *input, FFTSample *tmp) void ff_imdct_calc(MDCTContext *s, FFTSample *output, const FFTSample *input, FFTSample *tmp) { - int k, n8, n2, n; - FFTComplex *z = (FFTComplex *)tmp; - n = 1 << s->nbits; - n2 = n >> 1; - n8 = n >> 3; - - imdct_c(s, input, tmp); - - for(k = 0; k < n8; k++) { - output[2*k] = -z[n8 + k].im; - output[n2-1-2*k] = z[n8 + k].im; + int k; + int n = 1 << s->nbits; + int n2 = n >> 1; + int n4 = n >> 2; - output[2*k+1] = z[n8-1-k].re; - output[n2-1-2*k-1] = -z[n8-1-k].re; - - output[n2 + 2*k]=-z[k+n8].re; - output[n-1- 2*k]=-z[k+n8].re; - - output[n2 + 2*k+1]=z[n8-k-1].im; - output[n-2 - 2 * k] = z[n8-k-1].im; - } -} + ff_imdct_half(s, output+n4, input); -/** - * Compute the middle half of the inverse MDCT of size N = 2^nbits, - * thus excluding the parts that can be derived by symmetry - * @param output N/2 samples - * @param input N/2 samples - * @param tmp N/2 samples - */ -void ff_imdct_half(MDCTContext *s, FFTSample *output, - const FFTSample *input, FFTSample *tmp) -{ - int k, n8, n4, n; - FFTComplex *z = (FFTComplex *)tmp; - n = 1 << s->nbits; - n4 = n >> 2; - n8 = n >> 3; - - imdct_c(s, input, tmp); - - for(k = 0; k < n8; k++) { - output[n4-1-2*k] = z[n8+k].im; - output[n4-1-2*k-1] = -z[n8-k-1].re; - output[n4 + 2*k] = -z[n8+k].re; - output[n4 + 2*k+1] = z[n8-k-1].im; + for(k = 0; k < n4; k++) { + output[k] = -output[n2-k-1]; + output[n-k-1] = output[n2+k]; } } @@ -203,7 +180,7 @@ void ff_mdct_calc(MDCTContext *s, FFTSample *out, const uint16_t *revtab = s->fft.revtab; const FFTSample *tcos = s->tcos; const FFTSample *tsin = s->tsin; - FFTComplex *x = (FFTComplex *)tmp; + FFTComplex *x = (FFTComplex *)out; n = 1 << s->nbits; n2 = n >> 1; @@ -227,12 +204,14 @@ void ff_mdct_calc(MDCTContext *s, FFTSample *out, ff_fft_calc(&s->fft, x); /* post rotation */ - for(i=0;i<n4;i++) { - re = x[i].re; - im = x[i].im; - CMUL(re1, im1, re, im, -tsin[i], -tcos[i]); - out[2*i] = im1; - out[n2-1-2*i] = re1; + for(i=0;i<n8;i++) { + FFTSample r0, i0, r1, i1; + CMUL(i1, r0, x[n8-i-1].re, x[n8-i-1].im, -tsin[n8-i-1], -tcos[n8-i-1]); + CMUL(i0, r1, x[n8+i ].re, x[n8+i ].im, -tsin[n8+i ], -tcos[n8+i ]); + x[n8-i-1].re = r0; + x[n8-i-1].im = i0; + x[n8+i ].re = r1; + x[n8+i ].im = i1; } } |