webrtc/modules/audio_processing/ns/ns_core.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190

/*
 *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_

#include "webrtc/modules/audio_processing/ns/defines.h"

typedef struct NSParaExtract_ {
  // Bin size of histogram.
  float binSizeLrt;
  float binSizeSpecFlat;
  float binSizeSpecDiff;
  // Range of histogram over which LRT threshold is computed.
  float rangeAvgHistLrt;
  // Scale parameters: multiply dominant peaks of the histograms by scale factor
  // to obtain thresholds for prior model.
  float factor1ModelPars;  // For LRT and spectral difference.
  float factor2ModelPars;  // For spectral_flatness: used when noise is flatter
                           // than speech.
  // Peak limit for spectral flatness (varies between 0 and 1).
  float thresPosSpecFlat;
  // Limit on spacing of two highest peaks in histogram: spacing determined by
  // bin size.
  float limitPeakSpacingSpecFlat;
  float limitPeakSpacingSpecDiff;
  // Limit on relevance of second peak.
  float limitPeakWeightsSpecFlat;
  float limitPeakWeightsSpecDiff;
  // Limit on fluctuation of LRT feature.
  float thresFluctLrt;
  // Limit on the max and min values for the feature thresholds.
  float maxLrt;
  float minLrt;
  float maxSpecFlat;
  float minSpecFlat;
  float maxSpecDiff;
  float minSpecDiff;
  // Criteria of weight of histogram peak to accept/reject feature.
  int thresWeightSpecFlat;
  int thresWeightSpecDiff;

} NSParaExtract;

typedef struct NoiseSuppressionC_ {
  uint32_t fs;
  size_t blockLen;
  size_t windShift;
  size_t anaLen;
  size_t magnLen;
  int aggrMode;
  const float* window;
  float analyzeBuf[ANAL_BLOCKL_MAX];
  float dataBuf[ANAL_BLOCKL_MAX];
  float syntBuf[ANAL_BLOCKL_MAX];

  int initFlag;
  // Parameters for quantile noise estimation.
  float density[SIMULT * HALF_ANAL_BLOCKL];
  float lquantile[SIMULT * HALF_ANAL_BLOCKL];
  float quantile[HALF_ANAL_BLOCKL];
  int counter[SIMULT];
  int updates;
  // Parameters for Wiener filter.
  float smooth[HALF_ANAL_BLOCKL];
  float overdrive;
  float denoiseBound;
  int gainmap;
  // FFT work arrays.
  size_t ip[IP_LENGTH];
  float wfft[W_LENGTH];

  // Parameters for new method: some not needed, will reduce/cleanup later.
  int32_t blockInd;  // Frame index counter.
  int modelUpdatePars[4];  // Parameters for updating or estimating.
  // Thresholds/weights for prior model.
  float priorModelPars[7];  // Parameters for prior model.
  float noise[HALF_ANAL_BLOCKL];  // Noise spectrum from current frame.
  float noisePrev[HALF_ANAL_BLOCKL];  // Noise spectrum from previous frame.
  // Magnitude spectrum of previous analyze frame.
  float magnPrevAnalyze[HALF_ANAL_BLOCKL];
  // Magnitude spectrum of previous process frame.
  float magnPrevProcess[HALF_ANAL_BLOCKL];
  float logLrtTimeAvg[HALF_ANAL_BLOCKL];  // Log LRT factor with time-smoothing.
  float priorSpeechProb;  // Prior speech/noise probability.
  float featureData[7];
  // Conservative noise spectrum estimate.
  float magnAvgPause[HALF_ANAL_BLOCKL];
  float signalEnergy;  // Energy of |magn|.
  float sumMagn;
  float whiteNoiseLevel;  // Initial noise estimate.
  float initMagnEst[HALF_ANAL_BLOCKL];  // Initial magnitude spectrum estimate.
  float pinkNoiseNumerator;  // Pink noise parameter: numerator.
  float pinkNoiseExp;  // Pink noise parameter: power of frequencies.
  float parametricNoise[HALF_ANAL_BLOCKL];
  // Parameters for feature extraction.
  NSParaExtract featureExtractionParams;
  // Histograms for parameter estimation.
  int histLrt[HIST_PAR_EST];
  int histSpecFlat[HIST_PAR_EST];
  int histSpecDiff[HIST_PAR_EST];
  // Quantities for high band estimate.
  float speechProb[HALF_ANAL_BLOCKL];  // Final speech/noise prob: prior + LRT.
  // Buffering data for HB.
  float dataBufHB[NUM_HIGH_BANDS_MAX][ANAL_BLOCKL_MAX];

} NoiseSuppressionC;

#ifdef __cplusplus
extern "C" {
#endif

/****************************************************************************
 * WebRtcNs_InitCore(...)
 *
 * This function initializes a noise suppression instance
 *
 * Input:
 *      - self          : Instance that should be initialized
 *      - fs            : Sampling frequency
 *
 * Output:
 *      - self          : Initialized instance
 *
 * Return value         :  0 - Ok
 *                        -1 - Error
 */
int WebRtcNs_InitCore(NoiseSuppressionC* self, uint32_t fs);

/****************************************************************************
 * WebRtcNs_set_policy_core(...)
 *
 * This changes the aggressiveness of the noise suppression method.
 *
 * Input:
 *      - self          : Instance that should be initialized
 *      - mode          : 0: Mild (6dB), 1: Medium (10dB), 2: Aggressive (15dB)
 *
 * Output:
 *      - self          : Initialized instance
 *
 * Return value         :  0 - Ok
 *                        -1 - Error
 */
int WebRtcNs_set_policy_core(NoiseSuppressionC* self, int mode);

/****************************************************************************
 * WebRtcNs_AnalyzeCore
 *
 * Estimate the background noise.
 *
 * Input:
 *      - self          : Instance that should be initialized
 *      - speechFrame   : Input speech frame for lower band
 *
 * Output:
 *      - self          : Updated instance
 */
void WebRtcNs_AnalyzeCore(NoiseSuppressionC* self, const float* speechFrame);

/****************************************************************************
 * WebRtcNs_ProcessCore
 *
 * Do noise suppression.
 *
 * Input:
 *      - self          : Instance that should be initialized
 *      - inFrame       : Input speech frame for each band
 *      - num_bands     : Number of bands
 *
 * Output:
 *      - self          : Updated instance
 *      - outFrame      : Output speech frame for each band
 */
void WebRtcNs_ProcessCore(NoiseSuppressionC* self,
                          const float* const* inFrame,
                          size_t num_bands,
                          float* const* outFrame);

#ifdef __cplusplus
}
#endif
#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_