summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJesús de Vicente Peña <devicentepena@webrtc.org>2020-02-06 15:55:17 +0100
committerFelicia Lim <flim@google.com>2020-02-10 11:39:33 -0800
commitea3b30f946d0e3a8d5b88d1b71cac56fb87955fd (patch)
treee27523fb715b6e715599f0723aeb963a5ce750c8 /src
parentcf58efea0bf978dc53290f3a6898323f4e8b663a (diff)
downloadopus-ea3b30f946d0e3a8d5b88d1b71cac56fb87955fd.tar.gz
Fixes to the the activity flag that is passed to Silk so it represents the final activity flag used in the DTX decision
This flag was modified after calling the Silk encoder function. This commit corrects that behavior by introducing those modifications before calling the Silk encoder. Slightly modified comments by Felicia Lim Signed-off-by: Felicia Lim <flim@google.com>
Diffstat (limited to 'src')
-rw-r--r--src/opus_encoder.c54
1 files changed, 19 insertions, 35 deletions
diff --git a/src/opus_encoder.c b/src/opus_encoder.c
index 844b08dd..1ae5598a 100644
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -892,34 +892,15 @@ static opus_val32 compute_frame_energy(const opus_val16 *pcm, int frame_size, in
#endif
/* Decides if DTX should be turned on (=1) or off (=0) */
-static int decide_dtx_mode(float activity_probability, /* probability that current frame contains speech/music */
- int *nb_no_activity_frames, /* number of consecutive frames with no activity */
- opus_val32 peak_signal_energy, /* peak energy of desired signal detected so far */
- const opus_val16 *pcm, /* input pcm signal */
- int frame_size, /* frame size */
- int channels,
- int is_silence, /* only digital silence detected in this frame */
- int arch
- )
-{
- opus_val32 noise_energy;
+static int decide_dtx_mode(opus_int activity, /* indicates if this frame contains speech/music */
+ int *nb_no_activity_frames /* number of consecutive frames with no activity */
+ )
- if (!is_silence)
- {
- if (activity_probability < DTX_ACTIVITY_THRESHOLD) /* is noise */
- {
- noise_energy = compute_frame_energy(pcm, frame_size, channels, arch);
-
- /* but is sufficiently quiet */
- is_silence = peak_signal_energy >= (PSEUDO_SNR_THRESHOLD * noise_energy);
- }
- }
-
- if (is_silence)
+{
+ if (!activity)
{
/* The number of consecutive DTX frames should be within the allowed bounds */
(*nb_no_activity_frames)++;
-
if (*nb_no_activity_frames > NB_SPEECH_FRAMES_BEFORE_DTX)
{
if (*nb_no_activity_frames <= (NB_SPEECH_FRAMES_BEFORE_DTX + MAX_CONSECUTIVE_DTX))
@@ -1102,6 +1083,8 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
int analysis_read_subframe_bak=-1;
int is_silence = 0;
#endif
+ opus_int activity = VAD_NO_DECISION;
+
VARDECL(opus_val16, tmp_prefill);
ALLOC_STACK;
@@ -1169,6 +1152,17 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
if (!is_silence)
st->voice_ratio = -1;
+ if (analysis_info.valid) {
+ activity = !is_silence && analysis_info.activity_probability >= DTX_ACTIVITY_THRESHOLD;
+ if (!activity) {
+ /* Mark as active if this noise frame is sufficiently loud */
+ opus_val32 noise_energy = compute_frame_energy(pcm, frame_size, st->channels, st->arch);
+ activity = st->peak_signal_energy < (PSEUDO_SNR_THRESHOLD * noise_energy);
+ }
+ } else {
+ activity = !is_silence;
+ }
+
st->detected_bandwidth = 0;
if (analysis_info.valid)
{
@@ -1668,7 +1662,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
if (st->mode != MODE_CELT_ONLY)
{
opus_int32 total_bitRate, celt_rate;
- opus_int activity;
#ifdef FIXED_POINT
const opus_int16 *pcm_silk;
#else
@@ -1676,14 +1669,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
ALLOC(pcm_silk, st->channels*frame_size, opus_int16);
#endif
- activity = VAD_NO_DECISION;
-#ifndef DISABLE_FLOAT_API
- if( analysis_info.valid ) {
- /* Inform SILK about the Opus VAD decision */
- activity = ( analysis_info.activity_probability >= DTX_ACTIVITY_THRESHOLD );
- }
-#endif
-
/* Distribute bits between SILK and CELT */
total_bitRate = 8 * bytes_target * frame_rate;
if( st->mode == MODE_HYBRID ) {
@@ -2144,8 +2129,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
#ifndef DISABLE_FLOAT_API
if (st->use_dtx && (analysis_info.valid || is_silence))
{
- if (decide_dtx_mode(analysis_info.activity_probability, &st->nb_no_activity_frames,
- st->peak_signal_energy, pcm, frame_size, st->channels, is_silence, st->arch))
+ if (decide_dtx_mode(activity, &st->nb_no_activity_frames))
{
st->rangeFinal = 0;
data[0] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels);