summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartijn van Beurden <mvanb1@gmail.com>2022-10-10 08:37:46 +0200
committerMartijn van Beurden <mvanb1@gmail.com>2022-10-12 20:37:18 +0200
commit20ce6f9bf294f54e09a422bf78757cf821bb2f13 (patch)
tree18849730da5108340869e857c21690c313098b3f
parent039586e909747815e33641f421fb5613e851c5c0 (diff)
downloadflac-20ce6f9bf294f54e09a422bf78757cf821bb2f13.tar.gz
Do not let small blocksizes be handled by intrinsics autoc calculation
Also, fix a bug in which apodization windows were not recalculated when blocksize was shrunk
-rw-r--r--src/libFLAC/lpc.c34
-rw-r--r--src/libFLAC/stream_encoder.c145
2 files changed, 95 insertions, 84 deletions
diff --git a/src/libFLAC/lpc.c b/src/libFLAC/lpc.c
index 1814fef5..a760b121 100644
--- a/src/libFLAC/lpc.c
+++ b/src/libFLAC/lpc.c
@@ -130,23 +130,7 @@ void FLAC__lpc_compute_autocorrelation(const FLAC__real data[], uint32_t data_le
autoc[lag] = d;
}
#endif
- if(lag <= 8) {
- #undef MAX_LAG
- #define MAX_LAG 8
- #include "deduplication/lpc_compute_autocorrelation_intrin.c"
- }
- else if(lag <= 12) {
- #undef MAX_LAG
- #define MAX_LAG 12
- #include "deduplication/lpc_compute_autocorrelation_intrin.c"
- }
- else if(lag <= 16) {
- #undef MAX_LAG
- #define MAX_LAG 16
- #include "deduplication/lpc_compute_autocorrelation_intrin.c"
- }
- else {
-
+ if (data_len < FLAC__MAX_LPC_ORDER || lag > 16) {
/*
* this version tends to run faster because of better data locality
* ('data_len' is usually much larger than 'lag')
@@ -171,6 +155,22 @@ void FLAC__lpc_compute_autocorrelation(const FLAC__real data[], uint32_t data_le
autoc[coeff] += d * data[sample+coeff];
}
}
+ else if(lag <= 8) {
+ #undef MAX_LAG
+ #define MAX_LAG 8
+ #include "deduplication/lpc_compute_autocorrelation_intrin.c"
+ }
+ else if(lag <= 12) {
+ #undef MAX_LAG
+ #define MAX_LAG 12
+ #include "deduplication/lpc_compute_autocorrelation_intrin.c"
+ }
+ else if(lag <= 16) {
+ #undef MAX_LAG
+ #define MAX_LAG 16
+ #include "deduplication/lpc_compute_autocorrelation_intrin.c"
+ }
+
}
void FLAC__lpc_compute_lp_coefficients(const double autoc[], uint32_t *max_order, FLAC__real lp_coeff[][FLAC__MAX_LPC_ORDER], double error[])
diff --git a/src/libFLAC/stream_encoder.c b/src/libFLAC/stream_encoder.c
index 2879c51c..b33847dc 100644
--- a/src/libFLAC/stream_encoder.c
+++ b/src/libFLAC/stream_encoder.c
@@ -1486,6 +1486,10 @@ FLAC_API FLAC__bool FLAC__stream_encoder_finish(FLAC__StreamEncoder *encoder)
if(encoder->protected_->state == FLAC__STREAM_ENCODER_OK && !encoder->private_->is_being_deleted) {
if(encoder->private_->current_sample_number != 0) {
encoder->protected_->blocksize = encoder->private_->current_sample_number;
+ if(!resize_buffers_(encoder, encoder->protected_->blocksize)) {
+ /* the above function sets the state for us in case of an error */
+ return FLAC__STREAM_ENCODER_INIT_STATUS_ENCODER_ERROR;
+ }
if(!process_frame_(encoder, /*is_last_block=*/true))
error = true;
}
@@ -2562,81 +2566,88 @@ FLAC__bool resize_buffers_(FLAC__StreamEncoder *encoder, uint32_t new_blocksize)
FLAC__ASSERT(encoder->protected_->state == FLAC__STREAM_ENCODER_OK);
FLAC__ASSERT(encoder->private_->current_sample_number == 0);
- /* To avoid excessive malloc'ing, we only grow the buffer; no shrinking. */
- if(new_blocksize <= encoder->private_->input_capacity)
- return true;
-
ok = true;
- /* WATCHOUT: FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx() and ..._intrin_sse2()
- * require that the input arrays (in our case the integer signals)
- * have a buffer of up to 3 zeroes in front (at negative indices) for
- * alignment purposes; we use 4 in front to keep the data well-aligned.
- */
+ /* To avoid excessive malloc'ing, we only grow the buffer; no shrinking. */
+ if(new_blocksize > encoder->private_->input_capacity) {
+
+ /* WATCHOUT: FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx() and ..._intrin_sse2()
+ * require that the input arrays (in our case the integer signals)
+ * have a buffer of up to 3 zeroes in front (at negative indices) for
+ * alignment purposes; we use 4 in front to keep the data well-aligned.
+ */
- for(i = 0; ok && i < encoder->protected_->channels; i++) {
- ok = ok && FLAC__memory_alloc_aligned_int32_array(new_blocksize+4+OVERREAD_, &encoder->private_->integer_signal_unaligned[i], &encoder->private_->integer_signal[i]);
- if(ok) {
- memset(encoder->private_->integer_signal[i], 0, sizeof(FLAC__int32)*4);
- encoder->private_->integer_signal[i] += 4;
+ for(i = 0; ok && i < encoder->protected_->channels; i++) {
+ ok = ok && FLAC__memory_alloc_aligned_int32_array(new_blocksize+4+OVERREAD_, &encoder->private_->integer_signal_unaligned[i], &encoder->private_->integer_signal[i]);
+ if(ok) {
+ memset(encoder->private_->integer_signal[i], 0, sizeof(FLAC__int32)*4);
+ encoder->private_->integer_signal[i] += 4;
+ }
}
- }
- for(i = 0; ok && i < 2; i++) {
- ok = ok && FLAC__memory_alloc_aligned_int32_array(new_blocksize+4+OVERREAD_, &encoder->private_->integer_signal_mid_side_unaligned[i], &encoder->private_->integer_signal_mid_side[i]);
- if(ok) {
- memset(encoder->private_->integer_signal_mid_side[i], 0, sizeof(FLAC__int32)*4);
- encoder->private_->integer_signal_mid_side[i] += 4;
+ for(i = 0; ok && i < 2; i++) {
+ ok = ok && FLAC__memory_alloc_aligned_int32_array(new_blocksize+4+OVERREAD_, &encoder->private_->integer_signal_mid_side_unaligned[i], &encoder->private_->integer_signal_mid_side[i]);
+ if(ok) {
+ memset(encoder->private_->integer_signal_mid_side[i], 0, sizeof(FLAC__int32)*4);
+ encoder->private_->integer_signal_mid_side[i] += 4;
+ }
}
- }
- ok = ok && FLAC__memory_alloc_aligned_int64_array(new_blocksize+4+OVERREAD_, &encoder->private_->integer_signal_33bit_side_unaligned, &encoder->private_->integer_signal_33bit_side);
+ ok = ok && FLAC__memory_alloc_aligned_int64_array(new_blocksize+4+OVERREAD_, &encoder->private_->integer_signal_33bit_side_unaligned, &encoder->private_->integer_signal_33bit_side);
#ifndef FLAC__INTEGER_ONLY_LIBRARY
- if(ok && encoder->protected_->max_lpc_order > 0) {
- for(i = 0; ok && i < encoder->protected_->num_apodizations; i++)
- ok = ok && FLAC__memory_alloc_aligned_real_array(new_blocksize, &encoder->private_->window_unaligned[i], &encoder->private_->window[i]);
- ok = ok && FLAC__memory_alloc_aligned_real_array(new_blocksize, &encoder->private_->windowed_signal_unaligned, &encoder->private_->windowed_signal);
- }
+ if(ok && encoder->protected_->max_lpc_order > 0) {
+ for(i = 0; ok && i < encoder->protected_->num_apodizations; i++)
+ ok = ok && FLAC__memory_alloc_aligned_real_array(new_blocksize, &encoder->private_->window_unaligned[i], &encoder->private_->window[i]);
+ ok = ok && FLAC__memory_alloc_aligned_real_array(new_blocksize, &encoder->private_->windowed_signal_unaligned, &encoder->private_->windowed_signal);
+ }
#endif
- for(channel = 0; ok && channel < encoder->protected_->channels; channel++) {
- for(i = 0; ok && i < 2; i++) {
- ok = ok && FLAC__memory_alloc_aligned_int32_array(new_blocksize, &encoder->private_->residual_workspace_unaligned[channel][i], &encoder->private_->residual_workspace[channel][i]);
+ for(channel = 0; ok && channel < encoder->protected_->channels; channel++) {
+ for(i = 0; ok && i < 2; i++) {
+ ok = ok && FLAC__memory_alloc_aligned_int32_array(new_blocksize, &encoder->private_->residual_workspace_unaligned[channel][i], &encoder->private_->residual_workspace[channel][i]);
+ }
}
- }
- for(channel = 0; ok && channel < encoder->protected_->channels; channel++) {
- for(i = 0; ok && i < 2; i++) {
- ok = ok && FLAC__format_entropy_coding_method_partitioned_rice_contents_ensure_size(&encoder->private_->partitioned_rice_contents_workspace[channel][i], encoder->protected_->max_residual_partition_order);
- ok = ok && FLAC__format_entropy_coding_method_partitioned_rice_contents_ensure_size(&encoder->private_->partitioned_rice_contents_workspace[channel][i], encoder->protected_->max_residual_partition_order);
+ for(channel = 0; ok && channel < encoder->protected_->channels; channel++) {
+ for(i = 0; ok && i < 2; i++) {
+ ok = ok && FLAC__format_entropy_coding_method_partitioned_rice_contents_ensure_size(&encoder->private_->partitioned_rice_contents_workspace[channel][i], encoder->protected_->max_residual_partition_order);
+ ok = ok && FLAC__format_entropy_coding_method_partitioned_rice_contents_ensure_size(&encoder->private_->partitioned_rice_contents_workspace[channel][i], encoder->protected_->max_residual_partition_order);
+ }
}
- }
- for(channel = 0; ok && channel < 2; channel++) {
- for(i = 0; ok && i < 2; i++) {
- ok = ok && FLAC__memory_alloc_aligned_int32_array(new_blocksize, &encoder->private_->residual_workspace_mid_side_unaligned[channel][i], &encoder->private_->residual_workspace_mid_side[channel][i]);
+ for(channel = 0; ok && channel < 2; channel++) {
+ for(i = 0; ok && i < 2; i++) {
+ ok = ok && FLAC__memory_alloc_aligned_int32_array(new_blocksize, &encoder->private_->residual_workspace_mid_side_unaligned[channel][i], &encoder->private_->residual_workspace_mid_side[channel][i]);
+ }
+ }
+
+ for(channel = 0; ok && channel < 2; channel++) {
+ for(i = 0; ok && i < 2; i++) {
+ ok = ok && FLAC__format_entropy_coding_method_partitioned_rice_contents_ensure_size(&encoder->private_->partitioned_rice_contents_workspace_mid_side[channel][i], encoder->protected_->max_residual_partition_order);
+ }
}
- }
- for(channel = 0; ok && channel < 2; channel++) {
for(i = 0; ok && i < 2; i++) {
- ok = ok && FLAC__format_entropy_coding_method_partitioned_rice_contents_ensure_size(&encoder->private_->partitioned_rice_contents_workspace_mid_side[channel][i], encoder->protected_->max_residual_partition_order);
+ ok = ok && FLAC__format_entropy_coding_method_partitioned_rice_contents_ensure_size(&encoder->private_->partitioned_rice_contents_extra[i], encoder->protected_->max_residual_partition_order);
}
- }
- for(i = 0; ok && i < 2; i++) {
- ok = ok && FLAC__format_entropy_coding_method_partitioned_rice_contents_ensure_size(&encoder->private_->partitioned_rice_contents_extra[i], encoder->protected_->max_residual_partition_order);
- }
+ /* the *2 is an approximation to the series 1 + 1/2 + 1/4 + ... that sums tree occupies in a flat array */
+ /*@@@ new_blocksize*2 is too pessimistic, but to fix, we need smarter logic because a smaller new_blocksize can actually increase the # of partitions; would require moving this out into a separate function, then checking its capacity against the need of the current blocksize&min/max_partition_order (and maybe predictor order) */
+ ok = ok && FLAC__memory_alloc_aligned_uint64_array(new_blocksize * 2, &encoder->private_->abs_residual_partition_sums_unaligned, &encoder->private_->abs_residual_partition_sums);
+ if(encoder->protected_->do_escape_coding)
+ ok = ok && FLAC__memory_alloc_aligned_unsigned_array(new_blocksize * 2, &encoder->private_->raw_bits_per_partition_unaligned, &encoder->private_->raw_bits_per_partition);
+}
+ if(ok)
+ encoder->private_->input_capacity = new_blocksize;
+ else {
+ encoder->protected_->state = FLAC__STREAM_ENCODER_MEMORY_ALLOCATION_ERROR;
+ return ok;
+ }
- /* the *2 is an approximation to the series 1 + 1/2 + 1/4 + ... that sums tree occupies in a flat array */
- /*@@@ new_blocksize*2 is too pessimistic, but to fix, we need smarter logic because a smaller new_blocksize can actually increase the # of partitions; would require moving this out into a separate function, then checking its capacity against the need of the current blocksize&min/max_partition_order (and maybe predictor order) */
- ok = ok && FLAC__memory_alloc_aligned_uint64_array(new_blocksize * 2, &encoder->private_->abs_residual_partition_sums_unaligned, &encoder->private_->abs_residual_partition_sums);
- if(encoder->protected_->do_escape_coding)
- ok = ok && FLAC__memory_alloc_aligned_unsigned_array(new_blocksize * 2, &encoder->private_->raw_bits_per_partition_unaligned, &encoder->private_->raw_bits_per_partition);
/* now adjust the windows if the blocksize has changed */
#ifndef FLAC__INTEGER_ONLY_LIBRARY
- if(ok && new_blocksize != encoder->private_->input_capacity && encoder->protected_->max_lpc_order > 0) {
- for(i = 0; ok && i < encoder->protected_->num_apodizations; i++) {
+ if(encoder->protected_->max_lpc_order > 0) {
+ for(i = 0; i < encoder->protected_->num_apodizations; i++) {
switch(encoder->protected_->apodizations[i].type) {
case FLAC__APODIZATION_BARTLETT:
FLAC__window_bartlett(encoder->private_->window[i], new_blocksize);
@@ -2700,14 +2711,14 @@ FLAC__bool resize_buffers_(FLAC__StreamEncoder *encoder, uint32_t new_blocksize)
}
}
}
+ if (new_blocksize < FLAC__MAX_LPC_ORDER) {
+ /* intrinsics autocorrelation routines do not all handle cases in which lag might be
+ * larger than data_len */
+ encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation;
+ }
#endif
- if(ok)
- encoder->private_->input_capacity = new_blocksize;
- else
- encoder->protected_->state = FLAC__STREAM_ENCODER_MEMORY_ALLOCATION_ERROR;
-
- return ok;
+ return true;
}
FLAC__bool write_bitbuffer_(FLAC__StreamEncoder *encoder, uint32_t samples, FLAC__bool is_last_block)
@@ -3734,14 +3745,14 @@ FLAC__bool process_subframe_(
}
else {
/* window part of subblock */
- if(max_lpc_order_this_apodization >= frame_header->blocksize/b) {
- max_lpc_order_this_apodization = frame_header->blocksize/b - 1;
- if(frame_header->blocksize/b > 0)
- max_lpc_order_this_apodization = frame_header->blocksize/b - 1;
- else {
- set_next_subdivide_tukey(encoder->protected_->apodizations[a].parameters.subdivide_tukey.parts, &a, &b, &c);
- continue;
- }
+ if(frame_header->blocksize/b < FLAC__MAX_LPC_ORDER) {
+ /* intrinsics autocorrelation routines do not all handle cases in which lag might be
+ * larger than data_len, and some routines round lag up to the nearest multiple of 4
+ * As little gain is expected from using LPC on part of a signal as small as 32 samples
+ * and to enable widening this rounding up to larger values in the future, windowing
+ * parts smaller than FLAC__MAX_LPC_ORDER (which is 32) samples is not supported */
+ set_next_subdivide_tukey(encoder->protected_->apodizations[a].parameters.subdivide_tukey.parts, &a, &b, &c);
+ continue;
}
if(!(c % 2)){
/* on even c, evaluate the (c/2)th partial window of size blocksize/b */