summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTimothy B. Terriberry <territim@amazon.com>2022-07-06 15:21:16 -0700
committerTimothy B. Terriberry <territim@amazon.com>2022-07-09 21:18:52 -0700
commit71fb707875b95672f0cd1cb153c890eff4219720 (patch)
tree6941d8cfd78c208462b00be70b9f1369efdc4905
parentaffb551e47052d5b9a0e37c681c816a6cf4159a7 (diff)
downloadopus-71fb707875b95672f0cd1cb153c890eff4219720.tar.gz
Don't compile x86 cpu detection without RTCD.
Also #error if RTCD is enabled without a detection method, like Arm. A number of SILK functions also still used the lookup tables, even when RTCD was disabled. Fix those, too.
-rw-r--r--celt/cpu_support.h5
-rw-r--r--celt/x86/x86cpu.c9
-rw-r--r--silk/SigProc_FIX.h4
-rw-r--r--silk/x86/SigProc_FIX_sse.h9
-rw-r--r--silk/x86/main_sse.h31
-rw-r--r--silk/x86/x86_silk_map.c2
6 files changed, 36 insertions, 24 deletions
diff --git a/celt/cpu_support.h b/celt/cpu_support.h
index 68fc6067..7b5c56ca 100644
--- a/celt/cpu_support.h
+++ b/celt/cpu_support.h
@@ -43,10 +43,11 @@
*/
#define OPUS_ARCHMASK 3
-#elif (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
+#elif defined(OPUS_HAVE_RTCD) && \
+ ((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
(defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
(defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
- (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX))
+ (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX)))
#include "x86/x86cpu.h"
/* We currently support 5 x86 variants:
diff --git a/celt/x86/x86cpu.c b/celt/x86/x86cpu.c
index 7cfc8db5..6a1914de 100644
--- a/celt/x86/x86cpu.c
+++ b/celt/x86/x86cpu.c
@@ -35,11 +35,11 @@
#include "pitch.h"
#include "x86cpu.h"
-#if (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
+#if defined(OPUS_HAVE_RTCD) && \
+ ((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
(defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
(defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
- (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX))
-
+ (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX)))
#if defined(_MSC_VER)
@@ -91,6 +91,9 @@ static void cpuid(unsigned int CPUInfo[4], unsigned int InfoType)
what we want on CPUs that don't support CPUID. */
CPUInfo[3] = CPUInfo[2] = CPUInfo[1] = CPUInfo[0] = 0;
}
+#else
+# error "Configured to use x86 RTCD, but no CPU detection method available. " \
+ "Reconfigure with --disable-rtcd (or send patches)."
#endif
}
diff --git a/silk/SigProc_FIX.h b/silk/SigProc_FIX.h
index 1d9bf2f1..fbdfa82e 100644
--- a/silk/SigProc_FIX.h
+++ b/silk/SigProc_FIX.h
@@ -609,10 +609,12 @@ static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
/* the following seems faster on x86 */
#define silk_SMMUL(a32, b32) (opus_int32)silk_RSHIFT64(silk_SMULL((a32), (b32)), 32)
-#if !defined(OPUS_X86_MAY_HAVE_SSE4_1)
+#if !defined(OVERRIDE_silk_burg_modified)
#define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \
((void)(arch), silk_burg_modified_c(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch))
+#endif
+#if !defined(OVERRIDE_silk_inner_prod16)
#define silk_inner_prod16(inVec1, inVec2, len, arch) \
((void)(arch),silk_inner_prod16_c(inVec1, inVec2, len))
#endif
diff --git a/silk/x86/SigProc_FIX_sse.h b/silk/x86/SigProc_FIX_sse.h
index 9bcaa805..89a5ec88 100644
--- a/silk/x86/SigProc_FIX_sse.h
+++ b/silk/x86/SigProc_FIX_sse.h
@@ -46,10 +46,12 @@ void silk_burg_modified_sse4_1(
);
# if defined(OPUS_X86_PRESUME_SSE4_1)
+
+# define OVERRIDE_silk_burg_modified
# define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \
((void)(arch), silk_burg_modified_sse4_1(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch))
-# else
+# elif defined(OPUS_HAVE_RTCD)
extern void (*const SILK_BURG_MODIFIED_IMPL[OPUS_ARCHMASK + 1])(
opus_int32 *res_nrg, /* O Residual energy */
@@ -62,6 +64,7 @@ extern void (*const SILK_BURG_MODIFIED_IMPL[OPUS_ARCHMASK + 1])(
const opus_int D, /* I Order */
int arch /* I Run-time architecture */);
+# define OVERRIDE_silk_burg_modified
# define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \
((*SILK_BURG_MODIFIED_IMPL[(arch) & OPUS_ARCHMASK])(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch))
@@ -76,16 +79,18 @@ opus_int64 silk_inner_prod16_sse4_1(
# if defined(OPUS_X86_PRESUME_SSE4_1)
+# define OVERRIDE_silk_inner_prod16
# define silk_inner_prod16(inVec1, inVec2, len, arch) \
((void)(arch),silk_inner_prod16_sse4_1(inVec1, inVec2, len))
-# else
+# elif defined(OPUS_HAVE_RTCD)
extern opus_int64 (*const SILK_INNER_PROD16_IMPL[OPUS_ARCHMASK + 1])(
const opus_int16 *inVec1,
const opus_int16 *inVec2,
const opus_int len);
+# define OVERRIDE_silk_inner_prod16
# define silk_inner_prod16(inVec1, inVec2, len, arch) \
((*SILK_INNER_PROD16_IMPL[(arch) & OPUS_ARCHMASK])(inVec1, inVec2, len))
diff --git a/silk/x86/main_sse.h b/silk/x86/main_sse.h
index 9ed436bb..a01d7f6c 100644
--- a/silk/x86/main_sse.h
+++ b/silk/x86/main_sse.h
@@ -34,8 +34,6 @@
# if defined(OPUS_X86_MAY_HAVE_SSE4_1)
-# define OVERRIDE_silk_VQ_WMat_EC
-
void silk_VQ_WMat_EC_sse4_1(
opus_int8 *ind, /* O index of best codebook vector */
opus_int32 *res_nrg_Q15, /* O best residual energy */
@@ -53,12 +51,13 @@ void silk_VQ_WMat_EC_sse4_1(
# if defined OPUS_X86_PRESUME_SSE4_1
+# define OVERRIDE_silk_VQ_WMat_EC
# define silk_VQ_WMat_EC(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
subfr_len, max_gain_Q7, L, arch) \
((void)(arch),silk_VQ_WMat_EC_sse4_1(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
subfr_len, max_gain_Q7, L))
-# else
+# elif defined(OPUS_HAVE_RTCD)
extern void (*const SILK_VQ_WMAT_EC_IMPL[OPUS_ARCHMASK + 1])(
opus_int8 *ind, /* O index of best codebook vector */
@@ -75,6 +74,7 @@ extern void (*const SILK_VQ_WMAT_EC_IMPL[OPUS_ARCHMASK + 1])(
const opus_int L /* I number of vectors in codebook */
);
+# define OVERRIDE_silk_VQ_WMat_EC
# define silk_VQ_WMat_EC(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
subfr_len, max_gain_Q7, L, arch) \
((*SILK_VQ_WMAT_EC_IMPL[(arch) & OPUS_ARCHMASK])(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
@@ -82,8 +82,6 @@ extern void (*const SILK_VQ_WMAT_EC_IMPL[OPUS_ARCHMASK + 1])(
# endif
-# define OVERRIDE_silk_NSQ
-
void silk_NSQ_sse4_1(
const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
@@ -104,12 +102,13 @@ void silk_NSQ_sse4_1(
# if defined OPUS_X86_PRESUME_SSE4_1
+# define OVERRIDE_silk_NSQ
# define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
((void)(arch),silk_NSQ_sse4_1(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
-# else
+# elif defined(OPUS_HAVE_RTCD)
extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])(
const silk_encoder_state *psEncC, /* I Encoder State */
@@ -129,6 +128,7 @@ extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])(
const opus_int LTP_scale_Q14 /* I LTP state scaling */
);
+# define OVERRIDE_silk_NSQ
# define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
((*SILK_NSQ_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
@@ -136,8 +136,6 @@ extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])(
# endif
-# define OVERRIDE_silk_NSQ_del_dec
-
void silk_NSQ_del_dec_sse4_1(
const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
@@ -158,12 +156,13 @@ void silk_NSQ_del_dec_sse4_1(
# if defined OPUS_X86_PRESUME_SSE4_1
+# define OVERRIDE_silk_NSQ_del_dec
# define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
((void)(arch),silk_NSQ_del_dec_sse4_1(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
-# else
+# elif defined(OPUS_HAVE_RTCD)
extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(
const silk_encoder_state *psEncC, /* I Encoder State */
@@ -183,6 +182,7 @@ extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(
const opus_int LTP_scale_Q14 /* I LTP state scaling */
);
+# define OVERRIDE_silk_NSQ_del_dec
# define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
((*SILK_NSQ_DEL_DEC_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
@@ -221,25 +221,26 @@ void silk_VAD_GetNoiseLevels(
silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */
);
-# define OVERRIDE_silk_VAD_GetSA_Q8
-
opus_int silk_VAD_GetSA_Q8_sse4_1(
silk_encoder_state *psEnC,
const opus_int16 pIn[]
);
# if defined(OPUS_X86_PRESUME_SSE4_1)
+
+# define OVERRIDE_silk_VAD_GetSA_Q8
# define silk_VAD_GetSA_Q8(psEnC, pIn, arch) ((void)(arch),silk_VAD_GetSA_Q8_sse4_1(psEnC, pIn))
-# else
-
-# define silk_VAD_GetSA_Q8(psEnC, pIn, arch) \
- ((*SILK_VAD_GETSA_Q8_IMPL[(arch) & OPUS_ARCHMASK])(psEnC, pIn))
+# elif defined(OPUS_HAVE_RTCD)
extern opus_int (*const SILK_VAD_GETSA_Q8_IMPL[OPUS_ARCHMASK + 1])(
silk_encoder_state *psEnC,
const opus_int16 pIn[]);
+# define OVERRIDE_silk_VAD_GetSA_Q8
+# define silk_VAD_GetSA_Q8(psEnC, pIn, arch) \
+ ((*SILK_VAD_GETSA_Q8_IMPL[(arch) & OPUS_ARCHMASK])(psEnC, pIn))
+
# endif
# endif
diff --git a/silk/x86/x86_silk_map.c b/silk/x86/x86_silk_map.c
index ca13cde9..70f60078 100644
--- a/silk/x86/x86_silk_map.c
+++ b/silk/x86/x86_silk_map.c
@@ -35,7 +35,7 @@
#include "pitch.h"
#include "main.h"
-#if !defined(OPUS_X86_PRESUME_SSE4_1)
+#if defined(OPUS_HAVE_RTCD) && !defined(OPUS_X86_PRESUME_SSE4_1)
#if defined(FIXED_POINT)