summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMartijn van Beurden <mvanb1@gmail.com>2022-05-08 20:14:36 +0200
committerGitHub <noreply@github.com>2022-05-08 20:14:36 +0200
commitb433bbc5cfb4b8169b6a1864e5df3ba16f787665 (patch)
tree9ef63cc40499d9da9e17d4f15db4a96bbf7084b9 /src
parentb64ea1171bfa7424d2e3b2ffba50c58fa15769a7 (diff)
downloadflac-b433bbc5cfb4b8169b6a1864e5df3ba16f787665.tar.gz
Rewrite of fixed_compute_best_predictor functions
The code of fixed_compute_best_predictor was presumable once optimized, but it appears a much more readable form is now equally fast or even faster, see https://github.com/xiph/flac/pull/337 for details.
Diffstat (limited to 'src')
-rw-r--r--src/libFLAC/fixed.c51
1 files changed, 30 insertions, 21 deletions
diff --git a/src/libFLAC/fixed.c b/src/libFLAC/fixed.c
index f6dc9ac7..9e53f299 100644
--- a/src/libFLAC/fixed.c
+++ b/src/libFLAC/fixed.c
@@ -219,21 +219,39 @@ uint32_t FLAC__fixed_compute_best_predictor(const FLAC__int32 data[], uint32_t d
uint32_t FLAC__fixed_compute_best_predictor(const FLAC__int32 data[], uint32_t data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1])
#endif
{
+ FLAC__uint32 total_error_0 = 0, total_error_1 = 0, total_error_2 = 0, total_error_3 = 0, total_error_4 = 0;
+ uint32_t order;
+#if 0
+ /* This code has been around a long time, and was written when compilers weren't able
+ * to vectorize code. These days, compilers are better in optimizing the next block
+ * which is also much more readable
+ */
FLAC__int32 last_error_0 = data[-1];
FLAC__int32 last_error_1 = data[-1] - data[-2];
FLAC__int32 last_error_2 = last_error_1 - (data[-2] - data[-3]);
FLAC__int32 last_error_3 = last_error_2 - (data[-2] - 2*data[-3] + data[-4]);
FLAC__int32 error, save;
- FLAC__uint32 total_error_0 = 0, total_error_1 = 0, total_error_2 = 0, total_error_3 = 0, total_error_4 = 0;
- uint32_t i, order;
-
- for(i = 0; i < data_len; i++) {
+ /* total_error_* are 64-bits to avoid overflow when encoding
+ * erratic signals when the bits-per-sample and blocksize are
+ * large.
+ */
+ for(uint32_t i = 0; i < data_len; i++) {
error = data[i] ; total_error_0 += local_abs(error); save = error;
error -= last_error_0; total_error_1 += local_abs(error); last_error_0 = save; save = error;
error -= last_error_1; total_error_2 += local_abs(error); last_error_1 = save; save = error;
error -= last_error_2; total_error_3 += local_abs(error); last_error_2 = save; save = error;
error -= last_error_3; total_error_4 += local_abs(error); last_error_3 = save;
}
+#else
+ for(int i = 0; i < (int)data_len; i++) {
+ total_error_0 += local_abs(data[i]);
+ total_error_1 += local_abs(data[i] - data[i-1]);
+ total_error_2 += local_abs(data[i] - 2 * data[i-1] + data[i-2]);
+ total_error_3 += local_abs(data[i] - 3 * data[i-1] + 3 * data[i-2] - data[i-3]);
+ total_error_4 += local_abs(data[i] - 4 * data[i-1] + 6 * data[i-2] - 4 * data[i-3] + data[i-4]);
+ }
+#endif
+
/* prefer lower order */
if(total_error_0 <= flac_min(flac_min(flac_min(total_error_1, total_error_2), total_error_3), total_error_4))
@@ -278,24 +296,15 @@ uint32_t FLAC__fixed_compute_best_predictor_wide(const FLAC__int32 data[], uint3
uint32_t FLAC__fixed_compute_best_predictor_wide(const FLAC__int32 data[], uint32_t data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1])
#endif
{
- FLAC__int32 last_error_0 = data[-1];
- FLAC__int32 last_error_1 = data[-1] - data[-2];
- FLAC__int32 last_error_2 = last_error_1 - (data[-2] - data[-3]);
- FLAC__int32 last_error_3 = last_error_2 - (data[-2] - 2*data[-3] + data[-4]);
- FLAC__int32 error, save;
- /* total_error_* are 64-bits to avoid overflow when encoding
- * erratic signals when the bits-per-sample and blocksize are
- * large.
- */
FLAC__uint64 total_error_0 = 0, total_error_1 = 0, total_error_2 = 0, total_error_3 = 0, total_error_4 = 0;
- uint32_t i, order;
-
- for(i = 0; i < data_len; i++) {
- error = data[i] ; total_error_0 += local_abs(error); save = error;
- error -= last_error_0; total_error_1 += local_abs(error); last_error_0 = save; save = error;
- error -= last_error_1; total_error_2 += local_abs(error); last_error_1 = save; save = error;
- error -= last_error_2; total_error_3 += local_abs(error); last_error_2 = save; save = error;
- error -= last_error_3; total_error_4 += local_abs(error); last_error_3 = save;
+ uint32_t order;
+
+ for(int i = 0; i < (int)data_len; i++) {
+ total_error_0 += local_abs(data[i]);
+ total_error_1 += local_abs(data[i] - data[i-1]);
+ total_error_2 += local_abs(data[i] - 2 * data[i-1] + data[i-2]);
+ total_error_3 += local_abs(data[i] - 3 * data[i-1] + 3 * data[i-2] - data[i-3]);
+ total_error_4 += local_abs(data[i] - 4 * data[i-1] + 6 * data[i-2] - 4 * data[i-3] + data[i-4]);
}
/* prefer lower order */