summaryrefslogtreecommitdiff
path: root/webrtc/modules/audio_processing/three_band_filter_bank.cc
diff options
context:
space:
mode:
Diffstat (limited to 'webrtc/modules/audio_processing/three_band_filter_bank.cc')
-rw-r--r--webrtc/modules/audio_processing/three_band_filter_bank.cc308
1 files changed, 186 insertions, 122 deletions
diff --git a/webrtc/modules/audio_processing/three_band_filter_bank.cc b/webrtc/modules/audio_processing/three_band_filter_bank.cc
index 91e58df..2a7d272 100644
--- a/webrtc/modules/audio_processing/three_band_filter_bank.cc
+++ b/webrtc/modules/audio_processing/three_band_filter_bank.cc
@@ -30,37 +30,33 @@
//
// A similar logic can be applied to the synthesis stage.
-// MSVC++ requires this to be set before any other includes to get M_PI.
-#define _USE_MATH_DEFINES
+#include "modules/audio_processing/three_band_filter_bank.h"
-#include "webrtc/modules/audio_processing/three_band_filter_bank.h"
+#include <array>
-#include <cmath>
-
-#include "webrtc/base/checks.h"
+#include "rtc_base/checks.h"
namespace webrtc {
namespace {
-const size_t kNumBands = 3;
-const size_t kSparsity = 4;
-
-// Factors to take into account when choosing |kNumCoeffs|:
-// 1. Higher |kNumCoeffs|, means faster transition, which ensures less
+// Factors to take into account when choosing |kFilterSize|:
+// 1. Higher |kFilterSize|, means faster transition, which ensures less
// aliasing. This is especially important when there is non-linear
// processing between the splitting and merging.
// 2. The delay that this filter bank introduces is
-// |kNumBands| * |kSparsity| * |kNumCoeffs| / 2, so it increases linearly
-// with |kNumCoeffs|.
-// 3. The computation complexity also increases linearly with |kNumCoeffs|.
-const size_t kNumCoeffs = 4;
+// |kNumBands| * |kSparsity| * |kFilterSize| / 2, so it increases linearly
+// with |kFilterSize|.
+// 3. The computation complexity also increases linearly with |kFilterSize|.
-// The Matlab code to generate these |kLowpassCoeffs| is:
+// The Matlab code to generate these |kFilterCoeffs| is:
//
-// N = kNumBands * kSparsity * kNumCoeffs - 1;
+// N = kNumBands * kSparsity * kFilterSize - 1;
// h = fir1(N, 1 / (2 * kNumBands), kaiser(N + 1, 3.5));
-// reshape(h, kNumBands * kSparsity, kNumCoeffs);
+// reshape(h, kNumBands * kSparsity, kFilterSize);
//
+// The code below uses the values of kFilterSize, kNumBands and kSparsity
+// specified in the header.
+
// Because the total bandwidth of the lower and higher band is double the middle
// one (because of the spectrum parity), the low-pass prototype is half the
// bandwidth of 1 / (2 * |kNumBands|) and is then shifted with cosine modulation
@@ -68,39 +64,84 @@ const size_t kNumCoeffs = 4;
// A Kaiser window is used because of its flexibility and the alpha is set to
// 3.5, since that sets a stop band attenuation of 40dB ensuring a fast
// transition.
-const float kLowpassCoeffs[kNumBands * kSparsity][kNumCoeffs] =
- {{-0.00047749f, -0.00496888f, +0.16547118f, +0.00425496f},
- {-0.00173287f, -0.01585778f, +0.14989004f, +0.00994113f},
- {-0.00304815f, -0.02536082f, +0.12154542f, +0.01157993f},
- {-0.00383509f, -0.02982767f, +0.08543175f, +0.00983212f},
- {-0.00346946f, -0.02587886f, +0.04760441f, +0.00607594f},
- {-0.00154717f, -0.01136076f, +0.01387458f, +0.00186353f},
- {+0.00186353f, +0.01387458f, -0.01136076f, -0.00154717f},
- {+0.00607594f, +0.04760441f, -0.02587886f, -0.00346946f},
- {+0.00983212f, +0.08543175f, -0.02982767f, -0.00383509f},
- {+0.01157993f, +0.12154542f, -0.02536082f, -0.00304815f},
- {+0.00994113f, +0.14989004f, -0.01585778f, -0.00173287f},
- {+0.00425496f, +0.16547118f, -0.00496888f, -0.00047749f}};
-
-// Downsamples |in| into |out|, taking one every |kNumbands| starting from
-// |offset|. |split_length| is the |out| length. |in| has to be at least
-// |kNumBands| * |split_length| long.
-void Downsample(const float* in,
- size_t split_length,
- size_t offset,
- float* out) {
- for (size_t i = 0; i < split_length; ++i) {
- out[i] = in[kNumBands * i + offset];
+
+constexpr int kSubSampling = ThreeBandFilterBank::kNumBands;
+constexpr int kDctSize = ThreeBandFilterBank::kNumBands;
+static_assert(ThreeBandFilterBank::kNumBands *
+ ThreeBandFilterBank::kSplitBandSize ==
+ ThreeBandFilterBank::kFullBandSize,
+ "The full band must be split in equally sized subbands");
+
+const float
+ kFilterCoeffs[ThreeBandFilterBank::kNumNonZeroFilters][kFilterSize] = {
+ {-0.00047749f, -0.00496888f, +0.16547118f, +0.00425496f},
+ {-0.00173287f, -0.01585778f, +0.14989004f, +0.00994113f},
+ {-0.00304815f, -0.02536082f, +0.12154542f, +0.01157993f},
+ {-0.00346946f, -0.02587886f, +0.04760441f, +0.00607594f},
+ {-0.00154717f, -0.01136076f, +0.01387458f, +0.00186353f},
+ {+0.00186353f, +0.01387458f, -0.01136076f, -0.00154717f},
+ {+0.00607594f, +0.04760441f, -0.02587886f, -0.00346946f},
+ {+0.00983212f, +0.08543175f, -0.02982767f, -0.00383509f},
+ {+0.00994113f, +0.14989004f, -0.01585778f, -0.00173287f},
+ {+0.00425496f, +0.16547118f, -0.00496888f, -0.00047749f}};
+
+constexpr int kZeroFilterIndex1 = 3;
+constexpr int kZeroFilterIndex2 = 9;
+
+const float kDctModulation[ThreeBandFilterBank::kNumNonZeroFilters][kDctSize] =
+ {{2.f, 2.f, 2.f},
+ {1.73205077f, 0.f, -1.73205077f},
+ {1.f, -2.f, 1.f},
+ {-1.f, 2.f, -1.f},
+ {-1.73205077f, 0.f, 1.73205077f},
+ {-2.f, -2.f, -2.f},
+ {-1.73205077f, 0.f, 1.73205077f},
+ {-1.f, 2.f, -1.f},
+ {1.f, -2.f, 1.f},
+ {1.73205077f, 0.f, -1.73205077f}};
+
+// Filters the input signal |in| with the filter |filter| using a shift by
+// |in_shift|, taking into account the previous state.
+void FilterCore(
+ rtc::ArrayView<const float, kFilterSize> filter,
+ rtc::ArrayView<const float, ThreeBandFilterBank::kSplitBandSize> in,
+ const int in_shift,
+ rtc::ArrayView<float, ThreeBandFilterBank::kSplitBandSize> out,
+ rtc::ArrayView<float, kMemorySize> state) {
+ constexpr int kMaxInShift = (kStride - 1);
+ RTC_DCHECK_GE(in_shift, 0);
+ RTC_DCHECK_LE(in_shift, kMaxInShift);
+ std::fill(out.begin(), out.end(), 0.f);
+
+ for (int k = 0; k < in_shift; ++k) {
+ for (int i = 0, j = kMemorySize + k - in_shift; i < kFilterSize;
+ ++i, j -= kStride) {
+ out[k] += state[j] * filter[i];
+ }
}
-}
-// Upsamples |in| into |out|, scaling by |kNumBands| and accumulating it every
-// |kNumBands| starting from |offset|. |split_length| is the |in| length. |out|
-// has to be at least |kNumBands| * |split_length| long.
-void Upsample(const float* in, size_t split_length, size_t offset, float* out) {
- for (size_t i = 0; i < split_length; ++i) {
- out[kNumBands * i + offset] += kNumBands * in[i];
+ for (int k = in_shift, shift = 0; k < kFilterSize * kStride; ++k, ++shift) {
+ RTC_DCHECK_GE(shift, 0);
+ const int loop_limit = std::min(kFilterSize, 1 + (shift >> kStrideLog2));
+ for (int i = 0, j = shift; i < loop_limit; ++i, j -= kStride) {
+ out[k] += in[j] * filter[i];
+ }
+ for (int i = loop_limit, j = kMemorySize + shift - loop_limit * kStride;
+ i < kFilterSize; ++i, j -= kStride) {
+ out[k] += state[j] * filter[i];
+ }
}
+
+ for (int k = kFilterSize * kStride, shift = kFilterSize * kStride - in_shift;
+ k < ThreeBandFilterBank::kSplitBandSize; ++k, ++shift) {
+ for (int i = 0, j = shift; i < kFilterSize; ++i, j -= kStride) {
+ out[k] += in[j] * filter[i];
+ }
+ }
+
+ // Update current state.
+ std::copy(in.begin() + ThreeBandFilterBank::kSplitBandSize - kMemorySize,
+ in.end(), state.begin());
}
} // namespace
@@ -108,48 +149,72 @@ void Upsample(const float* in, size_t split_length, size_t offset, float* out) {
// Because the low-pass filter prototype has half bandwidth it is possible to
// use a DCT to shift it in both directions at the same time, to the center
// frequencies [1 / 12, 3 / 12, 5 / 12].
-ThreeBandFilterBank::ThreeBandFilterBank(size_t length)
- : in_buffer_(rtc::CheckedDivExact(length, kNumBands)),
- out_buffer_(in_buffer_.size()) {
- for (size_t i = 0; i < kSparsity; ++i) {
- for (size_t j = 0; j < kNumBands; ++j) {
- analysis_filters_.push_back(new SparseFIRFilter(
- kLowpassCoeffs[i * kNumBands + j], kNumCoeffs, kSparsity, i));
- synthesis_filters_.push_back(new SparseFIRFilter(
- kLowpassCoeffs[i * kNumBands + j], kNumCoeffs, kSparsity, i));
- }
- }
- dct_modulation_.resize(kNumBands * kSparsity);
- for (size_t i = 0; i < dct_modulation_.size(); ++i) {
- dct_modulation_[i].resize(kNumBands);
- for (size_t j = 0; j < kNumBands; ++j) {
- dct_modulation_[i][j] =
- 2.f * cos(2.f * M_PI * i * (2.f * j + 1.f) / dct_modulation_.size());
- }
+ThreeBandFilterBank::ThreeBandFilterBank() {
+ RTC_DCHECK_EQ(state_analysis_.size(), kNumNonZeroFilters);
+ RTC_DCHECK_EQ(state_synthesis_.size(), kNumNonZeroFilters);
+ for (int k = 0; k < kNumNonZeroFilters; ++k) {
+ RTC_DCHECK_EQ(state_analysis_[k].size(), kMemorySize);
+ RTC_DCHECK_EQ(state_synthesis_[k].size(), kMemorySize);
+
+ state_analysis_[k].fill(0.f);
+ state_synthesis_[k].fill(0.f);
}
}
+ThreeBandFilterBank::~ThreeBandFilterBank() = default;
+
// The analysis can be separated in these steps:
// 1. Serial to parallel downsampling by a factor of |kNumBands|.
// 2. Filtering of |kSparsity| different delayed signals with polyphase
// decomposition of the low-pass prototype filter and upsampled by a factor
// of |kSparsity|.
// 3. Modulating with cosines and accumulating to get the desired band.
-void ThreeBandFilterBank::Analysis(const float* in,
- size_t length,
- float* const* out) {
- RTC_CHECK_EQ(in_buffer_.size(), rtc::CheckedDivExact(length, kNumBands));
- for (size_t i = 0; i < kNumBands; ++i) {
- memset(out[i], 0, in_buffer_.size() * sizeof(*out[i]));
+void ThreeBandFilterBank::Analysis(
+ rtc::ArrayView<const float, kFullBandSize> in,
+ rtc::ArrayView<const rtc::ArrayView<float>, ThreeBandFilterBank::kNumBands>
+ out) {
+ // Initialize the output to zero.
+ for (int band = 0; band < ThreeBandFilterBank::kNumBands; ++band) {
+ RTC_DCHECK_EQ(out[band].size(), kSplitBandSize);
+ std::fill(out[band].begin(), out[band].end(), 0);
}
- for (size_t i = 0; i < kNumBands; ++i) {
- Downsample(in, in_buffer_.size(), kNumBands - i - 1, &in_buffer_[0]);
- for (size_t j = 0; j < kSparsity; ++j) {
- const size_t offset = i + j * kNumBands;
- analysis_filters_[offset]->Filter(&in_buffer_[0],
- in_buffer_.size(),
- &out_buffer_[0]);
- DownModulate(&out_buffer_[0], out_buffer_.size(), offset, out);
+
+ for (int downsampling_index = 0; downsampling_index < kSubSampling;
+ ++downsampling_index) {
+ // Downsample to form the filter input.
+ std::array<float, kSplitBandSize> in_subsampled;
+ for (int k = 0; k < kSplitBandSize; ++k) {
+ in_subsampled[k] =
+ in[(kSubSampling - 1) - downsampling_index + kSubSampling * k];
+ }
+
+ for (int in_shift = 0; in_shift < kStride; ++in_shift) {
+ // Choose filter, skip zero filters.
+ const int index = downsampling_index + in_shift * kSubSampling;
+ if (index == kZeroFilterIndex1 || index == kZeroFilterIndex2) {
+ continue;
+ }
+ const int filter_index =
+ index < kZeroFilterIndex1
+ ? index
+ : (index < kZeroFilterIndex2 ? index - 1 : index - 2);
+
+ rtc::ArrayView<const float, kFilterSize> filter(
+ kFilterCoeffs[filter_index]);
+ rtc::ArrayView<const float, kDctSize> dct_modulation(
+ kDctModulation[filter_index]);
+ rtc::ArrayView<float, kMemorySize> state(state_analysis_[filter_index]);
+
+ // Filter.
+ std::array<float, kSplitBandSize> out_subsampled;
+ FilterCore(filter, in_subsampled, in_shift, out_subsampled, state);
+
+ // Band and modulate the output.
+ for (int band = 0; band < ThreeBandFilterBank::kNumBands; ++band) {
+ for (int n = 0; n < kSplitBandSize; ++n) {
+ out[band][n] += dct_modulation[band] * out_subsampled[n];
+ }
+ }
}
}
}
@@ -160,51 +225,50 @@ void ThreeBandFilterBank::Analysis(const float* in,
// prototype filter upsampled by a factor of |kSparsity| and accumulating
// |kSparsity| signals with different delays.
// 3. Parallel to serial upsampling by a factor of |kNumBands|.
-void ThreeBandFilterBank::Synthesis(const float* const* in,
- size_t split_length,
- float* out) {
- RTC_CHECK_EQ(in_buffer_.size(), split_length);
- memset(out, 0, kNumBands * in_buffer_.size() * sizeof(*out));
- for (size_t i = 0; i < kNumBands; ++i) {
- for (size_t j = 0; j < kSparsity; ++j) {
- const size_t offset = i + j * kNumBands;
- UpModulate(in, in_buffer_.size(), offset, &in_buffer_[0]);
- synthesis_filters_[offset]->Filter(&in_buffer_[0],
- in_buffer_.size(),
- &out_buffer_[0]);
- Upsample(&out_buffer_[0], out_buffer_.size(), i, out);
- }
- }
-}
+void ThreeBandFilterBank::Synthesis(
+ rtc::ArrayView<const rtc::ArrayView<float>, ThreeBandFilterBank::kNumBands>
+ in,
+ rtc::ArrayView<float, kFullBandSize> out) {
+ std::fill(out.begin(), out.end(), 0);
+ for (int upsampling_index = 0; upsampling_index < kSubSampling;
+ ++upsampling_index) {
+ for (int in_shift = 0; in_shift < kStride; ++in_shift) {
+ // Choose filter, skip zero filters.
+ const int index = upsampling_index + in_shift * kSubSampling;
+ if (index == kZeroFilterIndex1 || index == kZeroFilterIndex2) {
+ continue;
+ }
+ const int filter_index =
+ index < kZeroFilterIndex1
+ ? index
+ : (index < kZeroFilterIndex2 ? index - 1 : index - 2);
+ rtc::ArrayView<const float, kFilterSize> filter(
+ kFilterCoeffs[filter_index]);
+ rtc::ArrayView<const float, kDctSize> dct_modulation(
+ kDctModulation[filter_index]);
+ rtc::ArrayView<float, kMemorySize> state(state_synthesis_[filter_index]);
-// Modulates |in| by |dct_modulation_| and accumulates it in each of the
-// |kNumBands| bands of |out|. |offset| is the index in the period of the
-// cosines used for modulation. |split_length| is the length of |in| and each
-// band of |out|.
-void ThreeBandFilterBank::DownModulate(const float* in,
- size_t split_length,
- size_t offset,
- float* const* out) {
- for (size_t i = 0; i < kNumBands; ++i) {
- for (size_t j = 0; j < split_length; ++j) {
- out[i][j] += dct_modulation_[offset][i] * in[j];
- }
- }
-}
+ // Prepare filter input by modulating the banded input.
+ std::array<float, kSplitBandSize> in_subsampled;
+ std::fill(in_subsampled.begin(), in_subsampled.end(), 0.f);
+ for (int band = 0; band < ThreeBandFilterBank::kNumBands; ++band) {
+ RTC_DCHECK_EQ(in[band].size(), kSplitBandSize);
+ for (int n = 0; n < kSplitBandSize; ++n) {
+ in_subsampled[n] += dct_modulation[band] * in[band][n];
+ }
+ }
+
+ // Filter.
+ std::array<float, kSplitBandSize> out_subsampled;
+ FilterCore(filter, in_subsampled, in_shift, out_subsampled, state);
-// Modulates each of the |kNumBands| bands of |in| by |dct_modulation_| and
-// accumulates them in |out|. |out| is cleared before starting to accumulate.
-// |offset| is the index in the period of the cosines used for modulation.
-// |split_length| is the length of each band of |in| and |out|.
-void ThreeBandFilterBank::UpModulate(const float* const* in,
- size_t split_length,
- size_t offset,
- float* out) {
- memset(out, 0, split_length * sizeof(*out));
- for (size_t i = 0; i < kNumBands; ++i) {
- for (size_t j = 0; j < split_length; ++j) {
- out[j] += dct_modulation_[offset][i] * in[i][j];
+ // Upsample.
+ constexpr float kUpsamplingScaling = kSubSampling;
+ for (int k = 0; k < kSplitBandSize; ++k) {
+ out[upsampling_index + kSubSampling * k] +=
+ kUpsamplingScaling * out_subsampled[k];
+ }
}
}
}