1 files changed, 35 insertions, 19 deletions
diff --git a/src/opus_encoder.c b/src/opus_encoder.c
index 1ae5598a..844b08dd 100644
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -892,15 +892,34 @@ static opus_val32 compute_frame_energy(const opus_val16 *pcm, int frame_size, in
 #endif
 
 /* Decides if DTX should be turned on (=1) or off (=0) */
-static int decide_dtx_mode(opus_int activity,          /* indicates if this frame contains speech/music */
-                           int *nb_no_activity_frames  /* number of consecutive frames with no activity */
-                           )
-
+static int decide_dtx_mode(float activity_probability,    /* probability that current frame contains speech/music */
+                           int *nb_no_activity_frames,    /* number of consecutive frames with no activity */
+                           opus_val32 peak_signal_energy, /* peak energy of desired signal detected so far */
+                           const opus_val16 *pcm,         /* input pcm signal */
+                           int frame_size,                /* frame size */
+                           int channels,
+                           int is_silence,                 /* only digital silence detected in this frame */
+                           int arch
+                          )
 {
-   if (!activity)
+   opus_val32 noise_energy;
+
+   if (!is_silence)
+   {
+      if (activity_probability < DTX_ACTIVITY_THRESHOLD)  /* is noise */
+      {
+         noise_energy = compute_frame_energy(pcm, frame_size, channels, arch);
+
+         /* but is sufficiently quiet */
+         is_silence = peak_signal_energy >= (PSEUDO_SNR_THRESHOLD * noise_energy);
+      }
+   }
+
+   if (is_silence)
    {
       /* The number of consecutive DTX frames should be within the allowed bounds */
       (*nb_no_activity_frames)++;
+
       if (*nb_no_activity_frames > NB_SPEECH_FRAMES_BEFORE_DTX)
       {
          if (*nb_no_activity_frames <= (NB_SPEECH_FRAMES_BEFORE_DTX + MAX_CONSECUTIVE_DTX))
@@ -1083,8 +1102,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
     int analysis_read_subframe_bak=-1;
     int is_silence = 0;
 #endif
-    opus_int activity = VAD_NO_DECISION;
-
     VARDECL(opus_val16, tmp_prefill);
 
     ALLOC_STACK;
@@ -1152,17 +1169,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
     if (!is_silence)
       st->voice_ratio = -1;
 
-    if (analysis_info.valid) {
-       activity = !is_silence && analysis_info.activity_probability >= DTX_ACTIVITY_THRESHOLD;
-       if (!activity) {
-          /* Mark as active if this noise frame is sufficiently loud */
-          opus_val32 noise_energy = compute_frame_energy(pcm, frame_size, st->channels, st->arch);
-          activity = st->peak_signal_energy < (PSEUDO_SNR_THRESHOLD * noise_energy);
-       }
-    } else {
-       activity = !is_silence;
-    }
-
     st->detected_bandwidth = 0;
     if (analysis_info.valid)
     {
@@ -1662,6 +1668,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
     if (st->mode != MODE_CELT_ONLY)
     {
         opus_int32 total_bitRate, celt_rate;
+        opus_int activity;
 #ifdef FIXED_POINT
        const opus_int16 *pcm_silk;
 #else
@@ -1669,6 +1676,14 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
        ALLOC(pcm_silk, st->channels*frame_size, opus_int16);
 #endif
 
+        activity = VAD_NO_DECISION;
+#ifndef DISABLE_FLOAT_API
+        if( analysis_info.valid ) {
+            /* Inform SILK about the Opus VAD decision */
+            activity = ( analysis_info.activity_probability >= DTX_ACTIVITY_THRESHOLD );
+        }
+#endif
+
         /* Distribute bits between SILK and CELT */
         total_bitRate = 8 * bytes_target * frame_rate;
         if( st->mode == MODE_HYBRID ) {
@@ -2129,7 +2144,8 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
 #ifndef DISABLE_FLOAT_API
     if (st->use_dtx && (analysis_info.valid || is_silence))
     {
-       if (decide_dtx_mode(activity, &st->nb_no_activity_frames))
+       if (decide_dtx_mode(analysis_info.activity_probability, &st->nb_no_activity_frames,
+             st->peak_signal_energy, pcm, frame_size, st->channels, is_silence, st->arch))
        {
           st->rangeFinal = 0;
           data[0] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels);