New DTX that works in all modes (SILK/CELT/HYBRID)

A frame is marked as valid for DTX if it contains noise or only digital silence. As before, there is an overhang period of 200 ms and a maximum consecutive DTX period of 400 ms. If the new DTX cannot be used because of the complexity setting and sampling frequency chosen, the SILK DTX will be used instead. Signed-off-by: Jean-Marc Valin <jmvalin@jmvalin.ca>
author: Felicia Lim <flim@google.com> 2016-05-16 15:29:53 +0200
committer: Jean-Marc Valin <jmvalin@jmvalin.ca> 2016-05-23 11:43:22 -0400
commit: 1aafc2b6f59a95b3a25d603923fe959cc9d53a94 (patch)
tree: 5fdea375565b2a771a6c75ce147c0e188d3f6b7f
parent: 150cf7c14e3d1179ab87f39efef23d2f2c6f4ee6 (diff)
download: opus-1aafc2b6f59a95b3a25d603923fe959cc9d53a94.tar.gz
4 files changed, 60 insertions, 4 deletions
diff --git a/celt/celt.h b/celt/celt.h
index 1b8b86f9..52dfb8d6 100644
--- a/celt/celt.h
+++ b/celt/celt.h
@@ -57,7 +57,8 @@ typedef struct {
    float noisiness;
    float activity;
    float music_prob;
-   int        bandwidth;
+   int   bandwidth;
+   float activity_probability;
 } AnalysisInfo;
 
 typedef struct {
diff --git a/silk/define.h b/silk/define.h
index 520cdbfc..dd2e29b6 100644
--- a/silk/define.h
+++ b/silk/define.h
@@ -56,6 +56,7 @@ extern "C"
 /* DTX settings */
 #define NB_SPEECH_FRAMES_BEFORE_DTX             10      /* eq 200 ms */
 #define MAX_CONSECUTIVE_DTX                     20      /* eq 400 ms */
+#define DTX_ACTIVITY_THRESHOLD                  0.1f
 
 /* Maximum sampling frequency */
 #define MAX_FS_KHZ                              16
diff --git a/src/analysis.c b/src/analysis.c
index 95e79964..076deb2f 100644
--- a/src/analysis.c
+++ b/src/analysis.c
@@ -524,6 +524,9 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
     /* Consider that silence has a 50-50 probability. */
     frame_probs[0] = frame_probs[1]*frame_probs[0] + (1-frame_probs[1])*.5f;
 
+    /* Probability of speech or music vs noise */
+    info->activity_probability = frame_probs[1];
+
     /*printf("%f %f\n", frame_probs[0], frame_probs[1]);*/
     {
        /* Probability of state transition */
diff --git a/src/opus_encoder.c b/src/opus_encoder.c
index 1450e569..d6ee624a 100644
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -84,6 +84,7 @@ struct OpusEncoder {
     int          arch;
 #ifndef DISABLE_FLOAT_API
     TonalityAnalysisState analysis;
+    int          use_dtx;                 /* general DTX for both SILK and CELT */
 #endif
 
 #define OPUS_ENCODER_RESET_START stream_channels
@@ -105,6 +106,7 @@ struct OpusEncoder {
     opus_val16   delay_buffer[MAX_ENCODER_BUFFER*2];
 #ifndef DISABLE_FLOAT_API
     int          detected_bandwidth;
+    int          nb_no_activity_frames;
 #endif
     opus_uint32  rangeFinal;
 };
@@ -1027,6 +1029,38 @@ static int is_digital_silence(const opus_val16* pcm, int frame_size, int lsb_dep
    return silence;
 }
 
+/* Decides if DTX should be turned on (=1) or off (=0) */
+static int decide_dtx_mode(float activity_probability,    /* probability that current frame contains speech/music */
+                           int *nb_no_activity_frames,    /* number of consecutive frames with no activity */
+                           int is_silence                 /* only digital silence detected in this frame */
+                          )
+{
+   int is_noise = 0;
+
+   if (!is_silence)
+   {
+      is_noise = activity_probability < DTX_ACTIVITY_THRESHOLD;
+   }
+
+   if (is_silence || is_noise)
+   {
+      /* The number of consecutive DTX frames should be within the allowed bounds */
+      (*nb_no_activity_frames)++;
+
+      if (*nb_no_activity_frames > NB_SPEECH_FRAMES_BEFORE_DTX)
+      {
+         if (*nb_no_activity_frames <= (NB_SPEECH_FRAMES_BEFORE_DTX + MAX_CONSECUTIVE_DTX))
+            /* Valid frame for DTX! */
+            return 1;
+         else
+            (*nb_no_activity_frames) = NB_SPEECH_FRAMES_BEFORE_DTX;
+      }
+   } else
+      (*nb_no_activity_frames) = 0;
+
+   return 0;
+}
+
 #endif
 
 opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
@@ -1280,7 +1314,10 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
        /* When FEC is enabled and there's enough packet loss, use SILK */
        if (st->silk_mode.useInBandFEC && st->silk_mode.packetLossPercentage > (128-voice_est)>>4)
           st->mode = MODE_SILK_ONLY;
-       /* When encoding voice and DTX is enabled, set the encoder to SILK mode (at least for now) */
+       /* When encoding voice and DTX is enabled but the generalized DTX cannot be used,
+          because of complexity and sampling frequency settings,
+          set the encoder to SILK mode so that the SILK DTX can be used */
+       st->silk_mode.useDTX = st->use_dtx && !(analysis_info.valid || is_silence);
        if (st->silk_mode.useDTX && voice_est > 100)
           st->mode = MODE_SILK_ONLY;
 #endif
@@ -2045,6 +2082,20 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
 
     st->first = 0;
 
+    /* DTX decision */
+#ifndef DISABLE_FLOAT_API
+    if (st->use_dtx && (analysis_info.valid || is_silence))
+    {
+       if (decide_dtx_mode(analysis_info.activity_probability, &st->nb_no_activity_frames, is_silence))
+       {
+          st->rangeFinal = 0;
+          data[0] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels);
+          RESTORE_STACK;
+          return 1;
+       }
+    }
+#endif
+
     /* In the unlikely case that the SILK encoder busted its target, tell
        the decoder to call the PLC */
     if (ec_tell(&enc) > (max_data_bytes-1)*8)
@@ -2321,7 +2372,7 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...)
             {
                goto bad_arg;
             }
-            st->silk_mode.useDTX = value;
+            st->use_dtx = value;
         }
         break;
         case OPUS_GET_DTX_REQUEST:
@@ -2331,7 +2382,7 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...)
             {
                goto bad_arg;
             }
-            *value = st->silk_mode.useDTX;
+            *value = st->use_dtx;
         }
         break;
         case OPUS_SET_COMPLEXITY_REQUEST:
author	Felicia Lim <flim@google.com>	2016-05-16 15:29:53 +0200
committer	Jean-Marc Valin <jmvalin@jmvalin.ca>	2016-05-23 11:43:22 -0400
commit	1aafc2b6f59a95b3a25d603923fe959cc9d53a94 (patch)
tree	5fdea375565b2a771a6c75ce147c0e188d3f6b7f
parent	150cf7c14e3d1179ab87f39efef23d2f2c6f4ee6 (diff)
download	opus-1aafc2b6f59a95b3a25d603923fe959cc9d53a94.tar.gz