summaryrefslogtreecommitdiff
path: root/webrtc/modules/audio_processing/aec3/stationarity_estimator.cc
diff options
context:
space:
mode:
Diffstat (limited to 'webrtc/modules/audio_processing/aec3/stationarity_estimator.cc')
-rw-r--r--webrtc/modules/audio_processing/aec3/stationarity_estimator.cc243
1 files changed, 243 insertions, 0 deletions
diff --git a/webrtc/modules/audio_processing/aec3/stationarity_estimator.cc b/webrtc/modules/audio_processing/aec3/stationarity_estimator.cc
new file mode 100644
index 0000000..01628f3
--- /dev/null
+++ b/webrtc/modules/audio_processing/aec3/stationarity_estimator.cc
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/stationarity_estimator.h"
+
+#include <algorithm>
+#include <array>
+
+#include "api/array_view.h"
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "modules/audio_processing/aec3/spectrum_buffer.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/atomic_ops.h"
+
+namespace webrtc {
+
+namespace {
+constexpr float kMinNoisePower = 10.f;
+constexpr int kHangoverBlocks = kNumBlocksPerSecond / 20;
+constexpr int kNBlocksAverageInitPhase = 20;
+constexpr int kNBlocksInitialPhase = kNumBlocksPerSecond * 2.;
+} // namespace
+
+StationarityEstimator::StationarityEstimator()
+ : data_dumper_(
+ new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))) {
+ Reset();
+}
+
+StationarityEstimator::~StationarityEstimator() = default;
+
+void StationarityEstimator::Reset() {
+ noise_.Reset();
+ hangovers_.fill(0);
+ stationarity_flags_.fill(false);
+}
+
+// Update just the noise estimator. Usefull until the delay is known
+void StationarityEstimator::UpdateNoiseEstimator(
+ rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> spectrum) {
+ noise_.Update(spectrum);
+ data_dumper_->DumpRaw("aec3_stationarity_noise_spectrum", noise_.Spectrum());
+ data_dumper_->DumpRaw("aec3_stationarity_is_block_stationary",
+ IsBlockStationary());
+}
+
+void StationarityEstimator::UpdateStationarityFlags(
+ const SpectrumBuffer& spectrum_buffer,
+ rtc::ArrayView<const float> render_reverb_contribution_spectrum,
+ int idx_current,
+ int num_lookahead) {
+ std::array<int, kWindowLength> indexes;
+ int num_lookahead_bounded = std::min(num_lookahead, kWindowLength - 1);
+ int idx = idx_current;
+
+ if (num_lookahead_bounded < kWindowLength - 1) {
+ int num_lookback = (kWindowLength - 1) - num_lookahead_bounded;
+ idx = spectrum_buffer.OffsetIndex(idx_current, num_lookback);
+ }
+ // For estimating the stationarity properties of the current frame, the
+ // power for each band is accumulated for several consecutive spectra in the
+ // method EstimateBandStationarity.
+ // In order to avoid getting the indexes of the spectra for every band with
+ // its associated overhead, those indexes are stored in an array and then use
+ // when the estimation is done.
+ indexes[0] = idx;
+ for (size_t k = 1; k < indexes.size(); ++k) {
+ indexes[k] = spectrum_buffer.DecIndex(indexes[k - 1]);
+ }
+ RTC_DCHECK_EQ(
+ spectrum_buffer.DecIndex(indexes[kWindowLength - 1]),
+ spectrum_buffer.OffsetIndex(idx_current, -(num_lookahead_bounded + 1)));
+
+ for (size_t k = 0; k < stationarity_flags_.size(); ++k) {
+ stationarity_flags_[k] = EstimateBandStationarity(
+ spectrum_buffer, render_reverb_contribution_spectrum, indexes, k);
+ }
+ UpdateHangover();
+ SmoothStationaryPerFreq();
+}
+
+bool StationarityEstimator::IsBlockStationary() const {
+ float acum_stationarity = 0.f;
+ RTC_DCHECK_EQ(stationarity_flags_.size(), kFftLengthBy2Plus1);
+ for (size_t band = 0; band < stationarity_flags_.size(); ++band) {
+ bool st = IsBandStationary(band);
+ acum_stationarity += static_cast<float>(st);
+ }
+ return ((acum_stationarity * (1.f / kFftLengthBy2Plus1)) > 0.75f);
+}
+
+bool StationarityEstimator::EstimateBandStationarity(
+ const SpectrumBuffer& spectrum_buffer,
+ rtc::ArrayView<const float> average_reverb,
+ const std::array<int, kWindowLength>& indexes,
+ size_t band) const {
+ constexpr float kThrStationarity = 10.f;
+ float acum_power = 0.f;
+ const int num_render_channels =
+ static_cast<int>(spectrum_buffer.buffer[0].size());
+ const float one_by_num_channels = 1.f / num_render_channels;
+ for (auto idx : indexes) {
+ for (int ch = 0; ch < num_render_channels; ++ch) {
+ acum_power += spectrum_buffer.buffer[idx][ch][band] * one_by_num_channels;
+ }
+ }
+ acum_power += average_reverb[band];
+ float noise = kWindowLength * GetStationarityPowerBand(band);
+ RTC_CHECK_LT(0.f, noise);
+ bool stationary = acum_power < kThrStationarity * noise;
+ data_dumper_->DumpRaw("aec3_stationarity_long_ratio", acum_power / noise);
+ return stationary;
+}
+
+bool StationarityEstimator::AreAllBandsStationary() {
+ for (auto b : stationarity_flags_) {
+ if (!b)
+ return false;
+ }
+ return true;
+}
+
+void StationarityEstimator::UpdateHangover() {
+ bool reduce_hangover = AreAllBandsStationary();
+ for (size_t k = 0; k < stationarity_flags_.size(); ++k) {
+ if (!stationarity_flags_[k]) {
+ hangovers_[k] = kHangoverBlocks;
+ } else if (reduce_hangover) {
+ hangovers_[k] = std::max(hangovers_[k] - 1, 0);
+ }
+ }
+}
+
+void StationarityEstimator::SmoothStationaryPerFreq() {
+ std::array<bool, kFftLengthBy2Plus1> all_ahead_stationary_smooth;
+ for (size_t k = 1; k < kFftLengthBy2Plus1 - 1; ++k) {
+ all_ahead_stationary_smooth[k] = stationarity_flags_[k - 1] &&
+ stationarity_flags_[k] &&
+ stationarity_flags_[k + 1];
+ }
+
+ all_ahead_stationary_smooth[0] = all_ahead_stationary_smooth[1];
+ all_ahead_stationary_smooth[kFftLengthBy2Plus1 - 1] =
+ all_ahead_stationary_smooth[kFftLengthBy2Plus1 - 2];
+
+ stationarity_flags_ = all_ahead_stationary_smooth;
+}
+
+int StationarityEstimator::instance_count_ = 0;
+
+StationarityEstimator::NoiseSpectrum::NoiseSpectrum() {
+ Reset();
+}
+
+StationarityEstimator::NoiseSpectrum::~NoiseSpectrum() = default;
+
+void StationarityEstimator::NoiseSpectrum::Reset() {
+ block_counter_ = 0;
+ noise_spectrum_.fill(kMinNoisePower);
+}
+
+void StationarityEstimator::NoiseSpectrum::Update(
+ rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> spectrum) {
+ RTC_DCHECK_LE(1, spectrum[0].size());
+ const int num_render_channels = static_cast<int>(spectrum.size());
+
+ std::array<float, kFftLengthBy2Plus1> avg_spectrum_data;
+ rtc::ArrayView<const float> avg_spectrum;
+ if (num_render_channels == 1) {
+ avg_spectrum = spectrum[0];
+ } else {
+ // For multiple channels, average the channel spectra before passing to the
+ // noise spectrum estimator.
+ avg_spectrum = avg_spectrum_data;
+ std::copy(spectrum[0].begin(), spectrum[0].end(),
+ avg_spectrum_data.begin());
+ for (int ch = 1; ch < num_render_channels; ++ch) {
+ for (size_t k = 1; k < kFftLengthBy2Plus1; ++k) {
+ avg_spectrum_data[k] += spectrum[ch][k];
+ }
+ }
+
+ const float one_by_num_channels = 1.f / num_render_channels;
+ for (size_t k = 1; k < kFftLengthBy2Plus1; ++k) {
+ avg_spectrum_data[k] *= one_by_num_channels;
+ }
+ }
+
+ ++block_counter_;
+ float alpha = GetAlpha();
+ for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
+ if (block_counter_ <= kNBlocksAverageInitPhase) {
+ noise_spectrum_[k] += (1.f / kNBlocksAverageInitPhase) * avg_spectrum[k];
+ } else {
+ noise_spectrum_[k] =
+ UpdateBandBySmoothing(avg_spectrum[k], noise_spectrum_[k], alpha);
+ }
+ }
+}
+
+float StationarityEstimator::NoiseSpectrum::GetAlpha() const {
+ constexpr float kAlpha = 0.004f;
+ constexpr float kAlphaInit = 0.04f;
+ constexpr float kTiltAlpha = (kAlphaInit - kAlpha) / kNBlocksInitialPhase;
+
+ if (block_counter_ > (kNBlocksInitialPhase + kNBlocksAverageInitPhase)) {
+ return kAlpha;
+ } else {
+ return kAlphaInit -
+ kTiltAlpha * (block_counter_ - kNBlocksAverageInitPhase);
+ }
+}
+
+float StationarityEstimator::NoiseSpectrum::UpdateBandBySmoothing(
+ float power_band,
+ float power_band_noise,
+ float alpha) const {
+ float power_band_noise_updated = power_band_noise;
+ if (power_band_noise < power_band) {
+ RTC_DCHECK_GT(power_band, 0.f);
+ float alpha_inc = alpha * (power_band_noise / power_band);
+ if (block_counter_ > kNBlocksInitialPhase) {
+ if (10.f * power_band_noise < power_band) {
+ alpha_inc *= 0.1f;
+ }
+ }
+ power_band_noise_updated += alpha_inc * (power_band - power_band_noise);
+ } else {
+ power_band_noise_updated += alpha * (power_band - power_band_noise);
+ power_band_noise_updated =
+ std::max(power_band_noise_updated, kMinNoisePower);
+ }
+ return power_band_noise_updated;
+}
+
+} // namespace webrtc