summaryrefslogtreecommitdiff
path: root/silk
diff options
context:
space:
mode:
authorGustaf Ullberg <gustaf.ullberg@gmail.com>2018-04-10 13:37:49 +0200
committerJean-Marc Valin <jmvalin@jmvalin.ca>2018-05-14 13:55:39 -0400
commit42f43db7e470dec8c7a40c86180d6a07241c3577 (patch)
tree39db3088e1693763e3e7d57fecff3df1aff8778a /silk
parent1b5844678cde315bd2d67f0e8233d15cc83b001d (diff)
downloadopus-42f43db7e470dec8c7a40c86180d6a07241c3577.tar.gz
Silk makes use of Opus VAD
Signed-off-by: Jean-Marc Valin <jmvalin@jmvalin.ca>
Diffstat (limited to 'silk')
-rw-r--r--silk/API.h3
-rw-r--r--silk/define.h5
-rw-r--r--silk/enc_API.c7
-rw-r--r--silk/fixed/encode_frame_FIX.c11
-rw-r--r--silk/fixed/main_FIX.h3
-rw-r--r--silk/float/encode_frame_FLP.c11
-rw-r--r--silk/float/main_FLP.h3
7 files changed, 33 insertions, 10 deletions
diff --git a/silk/API.h b/silk/API.h
index 0131acbb..4d90ff9a 100644
--- a/silk/API.h
+++ b/silk/API.h
@@ -80,7 +80,8 @@ opus_int silk_Encode( /* O Returns error co
opus_int nSamplesIn, /* I Number of samples in input vector */
ec_enc *psRangeEnc, /* I/O Compressor data structure */
opus_int32 *nBytesOut, /* I/O Number of bytes in payload (input: Max bytes) */
- const opus_int prefillFlag /* I Flag to indicate prefilling buffers no coding */
+ const opus_int prefillFlag, /* I Flag to indicate prefilling buffers no coding */
+ int activity /* I Decision of Opus voice activity detector */
);
/****************************************/
diff --git a/silk/define.h b/silk/define.h
index 1286048e..22fd720b 100644
--- a/silk/define.h
+++ b/silk/define.h
@@ -58,6 +58,11 @@ extern "C"
#define MAX_CONSECUTIVE_DTX 20 /* eq 400 ms */
#define DTX_ACTIVITY_THRESHOLD 0.1f
+/* VAD decision */
+#define VAD_NO_DECISION -1
+#define VAD_NO_ACTIVITY 0
+#define VAD_ACTIVITY 1
+
/* Maximum sampling frequency */
#define MAX_FS_KHZ 16
#define MAX_API_FS_KHZ 48
diff --git a/silk/enc_API.c b/silk/enc_API.c
index 10adf2c3..7ae31a9e 100644
--- a/silk/enc_API.c
+++ b/silk/enc_API.c
@@ -144,7 +144,8 @@ opus_int silk_Encode( /* O Returns error co
opus_int nSamplesIn, /* I Number of samples in input vector */
ec_enc *psRangeEnc, /* I/O Compressor data structure */
opus_int32 *nBytesOut, /* I/O Number of bytes in payload (input: Max bytes) */
- const opus_int prefillFlag /* I Flag to indicate prefilling buffers no coding */
+ const opus_int prefillFlag, /* I Flag to indicate prefilling buffers no coding */
+ opus_int activity /* I Decision of Opus voice activity detector */
)
{
opus_int n, i, nBits, flags, tmp_payloadSize_ms = 0, tmp_complexity = 0, ret = 0;
@@ -425,7 +426,7 @@ opus_int silk_Encode( /* O Returns error co
psEnc->state_Fxx[ 1 ].sCmn.sNSQ.prev_gain_Q16 = 65536;
psEnc->state_Fxx[ 1 ].sCmn.first_frame_after_reset = 1;
}
- silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 1 ] );
+ silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 1 ], activity );
} else {
psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] = 0;
}
@@ -440,7 +441,7 @@ opus_int silk_Encode( /* O Returns error co
silk_memcpy( psEnc->state_Fxx[ 0 ].sCmn.inputBuf, psEnc->sStereo.sMid, 2 * sizeof( opus_int16 ) );
silk_memcpy( psEnc->sStereo.sMid, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.frame_length ], 2 * sizeof( opus_int16 ) );
}
- silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 0 ] );
+ silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 0 ], activity );
/* Encode */
for( n = 0; n < encControl->nChannelsInternal; n++ ) {
diff --git a/silk/fixed/encode_frame_FIX.c b/silk/fixed/encode_frame_FIX.c
index d49a0fe3..a02bf87d 100644
--- a/silk/fixed/encode_frame_FIX.c
+++ b/silk/fixed/encode_frame_FIX.c
@@ -43,18 +43,25 @@ static OPUS_INLINE void silk_LBRR_encode_FIX(
);
void silk_encode_do_VAD_FIX(
- silk_encoder_state_FIX *psEnc /* I/O Pointer to Silk FIX encoder state */
+ silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */
+ opus_int activity /* I Decision of Opus voice activity detector */
)
{
+ const opus_int activity_threshold = SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 );
+
/****************************/
/* Voice Activity Detection */
/****************************/
silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.arch );
+ /* If Opus VAD is inactive and Silk VAD is active: lower Silk VAD to just under the threshold */
+ if( activity == VAD_NO_ACTIVITY && psEnc->sCmn.speech_activity_Q8 >= activity_threshold ) {
+ psEnc->sCmn.speech_activity_Q8 = activity_threshold - 1;
+ }
/**************************************************/
/* Convert speech activity into VAD and DTX flags */
/**************************************************/
- if( psEnc->sCmn.speech_activity_Q8 < SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ) ) {
+ if( psEnc->sCmn.speech_activity_Q8 < activity_threshold ) {
psEnc->sCmn.indices.signalType = TYPE_NO_VOICE_ACTIVITY;
psEnc->sCmn.noSpeechCounter++;
if( psEnc->sCmn.noSpeechCounter <= NB_SPEECH_FRAMES_BEFORE_DTX ) {
diff --git a/silk/fixed/main_FIX.h b/silk/fixed/main_FIX.h
index 780afa39..6d2112e5 100644
--- a/silk/fixed/main_FIX.h
+++ b/silk/fixed/main_FIX.h
@@ -66,7 +66,8 @@ void silk_HP_variable_cutoff(
/* Encoder main function */
void silk_encode_do_VAD_FIX(
- silk_encoder_state_FIX *psEnc /* I/O Pointer to Silk FIX encoder state */
+ silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */
+ opus_int activity /* I Decision of Opus voice activity detector */
);
/* Encoder main function */
diff --git a/silk/float/encode_frame_FLP.c b/silk/float/encode_frame_FLP.c
index 1d4d8a77..b029c3f5 100644
--- a/silk/float/encode_frame_FLP.c
+++ b/silk/float/encode_frame_FLP.c
@@ -42,18 +42,25 @@ static OPUS_INLINE void silk_LBRR_encode_FLP(
);
void silk_encode_do_VAD_FLP(
- silk_encoder_state_FLP *psEnc /* I/O Encoder state FLP */
+ silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */
+ opus_int activity /* I Decision of Opus voice activity detector */
)
{
+ const opus_int activity_threshold = SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 );
+
/****************************/
/* Voice Activity Detection */
/****************************/
silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.arch );
+ /* If Opus VAD is inactive and Silk VAD is active: lower Silk VAD to just under the threshold */
+ if( activity == VAD_NO_ACTIVITY && psEnc->sCmn.speech_activity_Q8 >= activity_threshold ) {
+ psEnc->sCmn.speech_activity_Q8 = activity_threshold - 1;
+ }
/**************************************************/
/* Convert speech activity into VAD and DTX flags */
/**************************************************/
- if( psEnc->sCmn.speech_activity_Q8 < SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ) ) {
+ if( psEnc->sCmn.speech_activity_Q8 < activity_threshold ) {
psEnc->sCmn.indices.signalType = TYPE_NO_VOICE_ACTIVITY;
psEnc->sCmn.noSpeechCounter++;
if( psEnc->sCmn.noSpeechCounter <= NB_SPEECH_FRAMES_BEFORE_DTX ) {
diff --git a/silk/float/main_FLP.h b/silk/float/main_FLP.h
index f47fc93b..5dc0ccf4 100644
--- a/silk/float/main_FLP.h
+++ b/silk/float/main_FLP.h
@@ -56,7 +56,8 @@ void silk_HP_variable_cutoff(
/* Encoder main function */
void silk_encode_do_VAD_FLP(
- silk_encoder_state_FLP *psEnc /* I/O Encoder state FLP */
+ silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */
+ opus_int activity /* I Decision of Opus voice activity detector */
);
/* Encoder main function */