diff options
author | Linfeng Zhang <linfengz@google.com> | 2016-06-16 16:21:02 -0700 |
---|---|---|
committer | Felicia Lim <flim@google.com> | 2017-01-17 14:04:37 -0800 |
commit | 783ad76766e1f6b6aaca5d6eb415ac8a8269e1f2 (patch) | |
tree | 120458893d45586c9c1f3ee07135a71b4d22fc01 /celt/celt_lpc.c | |
parent | c9ba55208c842a1681d82e7d7ff44fafedd2a853 (diff) | |
download | opus-783ad76766e1f6b6aaca5d6eb415ac8a8269e1f2.tar.gz |
Revise celt_fir_c() to not pass in argument "mem"
The "mem" in celt_fir_c() either is contained in the head of input "x"
in reverse order already, or can be easily attached to the head of "x"
before calling the function. Removing argument "mem" can eliminate the
redundant buffer copies inside.
Update celt_fir_sse4_1() accordingly.
Diffstat (limited to 'celt/celt_lpc.c')
-rw-r--r-- | celt/celt_lpc.c | 33 |
1 files changed, 12 insertions, 21 deletions
diff --git a/celt/celt_lpc.c b/celt/celt_lpc.c index bc9eb2c8..0aabd192 100644 --- a/celt/celt_lpc.c +++ b/celt/celt_lpc.c @@ -89,56 +89,47 @@ int p void celt_fir_c( - const opus_val16 *_x, + const opus_val16 *x, const opus_val16 *num, - opus_val16 *_y, + opus_val16 *y, int N, int ord, - opus_val16 *mem, int arch) { int i,j; VARDECL(opus_val16, rnum); - VARDECL(opus_val16, x); SAVE_STACK; ALLOC(rnum, ord, opus_val16); - ALLOC(x, N+ord, opus_val16); for(i=0;i<ord;i++) rnum[i] = num[ord-i-1]; - for(i=0;i<ord;i++) - x[i] = mem[ord-i-1]; - for (i=0;i<N;i++) - x[i+ord]=_x[i]; - for(i=0;i<ord;i++) - mem[i] = _x[N-i-1]; #ifdef SMALL_FOOTPRINT (void)arch; for (i=0;i<N;i++) { - opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT); + opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT); for (j=0;j<ord;j++) { - sum = MAC16_16(sum,rnum[j],x[i+j]); + sum = MAC16_16(sum,rnum[j],x[i+j-ord]); } - _y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT)); + y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT)); } #else for (i=0;i<N-3;i+=4) { opus_val32 sum[4]={0,0,0,0}; - xcorr_kernel(rnum, x+i, sum, ord, arch); - _y[i ] = SATURATE16(ADD32(EXTEND32(_x[i ]), PSHR32(sum[0], SIG_SHIFT))); - _y[i+1] = SATURATE16(ADD32(EXTEND32(_x[i+1]), PSHR32(sum[1], SIG_SHIFT))); - _y[i+2] = SATURATE16(ADD32(EXTEND32(_x[i+2]), PSHR32(sum[2], SIG_SHIFT))); - _y[i+3] = SATURATE16(ADD32(EXTEND32(_x[i+3]), PSHR32(sum[3], SIG_SHIFT))); + xcorr_kernel(rnum, x+i-ord, sum, ord, arch); + y[i ] = SATURATE16(ADD32(EXTEND32(x[i ]), PSHR32(sum[0], SIG_SHIFT))); + y[i+1] = SATURATE16(ADD32(EXTEND32(x[i+1]), PSHR32(sum[1], SIG_SHIFT))); + y[i+2] = SATURATE16(ADD32(EXTEND32(x[i+2]), PSHR32(sum[2], SIG_SHIFT))); + y[i+3] = SATURATE16(ADD32(EXTEND32(x[i+3]), PSHR32(sum[3], SIG_SHIFT))); } for (;i<N;i++) { opus_val32 sum = 0; for (j=0;j<ord;j++) - sum = MAC16_16(sum,rnum[j],x[i+j]); - _y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT))); + sum = MAC16_16(sum,rnum[j],x[i+j-ord]); + y[i] = SATURATE16(ADD32(EXTEND32(x[i]), PSHR32(sum, SIG_SHIFT))); } #endif RESTORE_STACK; |