diff options
author | Martijn van Beurden <mvanb1@gmail.com> | 2022-05-08 20:14:36 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-05-08 20:14:36 +0200 |
commit | b433bbc5cfb4b8169b6a1864e5df3ba16f787665 (patch) | |
tree | 9ef63cc40499d9da9e17d4f15db4a96bbf7084b9 /src | |
parent | b64ea1171bfa7424d2e3b2ffba50c58fa15769a7 (diff) | |
download | flac-b433bbc5cfb4b8169b6a1864e5df3ba16f787665.tar.gz |
Rewrite of fixed_compute_best_predictor functions
The code of fixed_compute_best_predictor was presumable once optimized,
but it appears a much more readable form is now equally fast or even faster,
see https://github.com/xiph/flac/pull/337 for details.
Diffstat (limited to 'src')
-rw-r--r-- | src/libFLAC/fixed.c | 51 |
1 files changed, 30 insertions, 21 deletions
diff --git a/src/libFLAC/fixed.c b/src/libFLAC/fixed.c index f6dc9ac7..9e53f299 100644 --- a/src/libFLAC/fixed.c +++ b/src/libFLAC/fixed.c @@ -219,21 +219,39 @@ uint32_t FLAC__fixed_compute_best_predictor(const FLAC__int32 data[], uint32_t d uint32_t FLAC__fixed_compute_best_predictor(const FLAC__int32 data[], uint32_t data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]) #endif { + FLAC__uint32 total_error_0 = 0, total_error_1 = 0, total_error_2 = 0, total_error_3 = 0, total_error_4 = 0; + uint32_t order; +#if 0 + /* This code has been around a long time, and was written when compilers weren't able + * to vectorize code. These days, compilers are better in optimizing the next block + * which is also much more readable + */ FLAC__int32 last_error_0 = data[-1]; FLAC__int32 last_error_1 = data[-1] - data[-2]; FLAC__int32 last_error_2 = last_error_1 - (data[-2] - data[-3]); FLAC__int32 last_error_3 = last_error_2 - (data[-2] - 2*data[-3] + data[-4]); FLAC__int32 error, save; - FLAC__uint32 total_error_0 = 0, total_error_1 = 0, total_error_2 = 0, total_error_3 = 0, total_error_4 = 0; - uint32_t i, order; - - for(i = 0; i < data_len; i++) { + /* total_error_* are 64-bits to avoid overflow when encoding + * erratic signals when the bits-per-sample and blocksize are + * large. + */ + for(uint32_t i = 0; i < data_len; i++) { error = data[i] ; total_error_0 += local_abs(error); save = error; error -= last_error_0; total_error_1 += local_abs(error); last_error_0 = save; save = error; error -= last_error_1; total_error_2 += local_abs(error); last_error_1 = save; save = error; error -= last_error_2; total_error_3 += local_abs(error); last_error_2 = save; save = error; error -= last_error_3; total_error_4 += local_abs(error); last_error_3 = save; } +#else + for(int i = 0; i < (int)data_len; i++) { + total_error_0 += local_abs(data[i]); + total_error_1 += local_abs(data[i] - data[i-1]); + total_error_2 += local_abs(data[i] - 2 * data[i-1] + data[i-2]); + total_error_3 += local_abs(data[i] - 3 * data[i-1] + 3 * data[i-2] - data[i-3]); + total_error_4 += local_abs(data[i] - 4 * data[i-1] + 6 * data[i-2] - 4 * data[i-3] + data[i-4]); + } +#endif + /* prefer lower order */ if(total_error_0 <= flac_min(flac_min(flac_min(total_error_1, total_error_2), total_error_3), total_error_4)) @@ -278,24 +296,15 @@ uint32_t FLAC__fixed_compute_best_predictor_wide(const FLAC__int32 data[], uint3 uint32_t FLAC__fixed_compute_best_predictor_wide(const FLAC__int32 data[], uint32_t data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]) #endif { - FLAC__int32 last_error_0 = data[-1]; - FLAC__int32 last_error_1 = data[-1] - data[-2]; - FLAC__int32 last_error_2 = last_error_1 - (data[-2] - data[-3]); - FLAC__int32 last_error_3 = last_error_2 - (data[-2] - 2*data[-3] + data[-4]); - FLAC__int32 error, save; - /* total_error_* are 64-bits to avoid overflow when encoding - * erratic signals when the bits-per-sample and blocksize are - * large. - */ FLAC__uint64 total_error_0 = 0, total_error_1 = 0, total_error_2 = 0, total_error_3 = 0, total_error_4 = 0; - uint32_t i, order; - - for(i = 0; i < data_len; i++) { - error = data[i] ; total_error_0 += local_abs(error); save = error; - error -= last_error_0; total_error_1 += local_abs(error); last_error_0 = save; save = error; - error -= last_error_1; total_error_2 += local_abs(error); last_error_1 = save; save = error; - error -= last_error_2; total_error_3 += local_abs(error); last_error_2 = save; save = error; - error -= last_error_3; total_error_4 += local_abs(error); last_error_3 = save; + uint32_t order; + + for(int i = 0; i < (int)data_len; i++) { + total_error_0 += local_abs(data[i]); + total_error_1 += local_abs(data[i] - data[i-1]); + total_error_2 += local_abs(data[i] - 2 * data[i-1] + data[i-2]); + total_error_3 += local_abs(data[i] - 3 * data[i-1] + 3 * data[i-2] - data[i-3]); + total_error_4 += local_abs(data[i] - 4 * data[i-1] + 6 * data[i-2] - 4 * data[i-3] + data[i-4]); } /* prefer lower order */ |