summaryrefslogtreecommitdiff
path: root/celt/arm
diff options
context:
space:
mode:
authorViswanath Puttagunta <viswanath.puttagunta@linaro.org>2015-05-15 12:42:25 -0500
committerJean-Marc Valin <jmvalin@jmvalin.ca>2015-10-07 18:09:20 -0400
commitba8713ccb5fd1ee893630d2d1bcd86aacfc3aeea (patch)
tree03882eef35e3832f77b70cf41ac2144d08021995 /celt/arm
parent3807af3b3b6109d38dce236abf8c96b8724da863 (diff)
downloadopus-ba8713ccb5fd1ee893630d2d1bcd86aacfc3aeea.tar.gz
armv7: Optimize fixed point FFT using NE10 library
Uses NEON optimized fixed point FFT routines in NE10 library. Signed-off-by: Timothy B. Terriberry <tterribe@xiph.org>
Diffstat (limited to 'celt/arm')
-rw-r--r--celt/arm/arm_celt_map.c24
-rw-r--r--celt/arm/celt_ne10_fft.c95
-rw-r--r--celt/arm/fft_arm.h26
3 files changed, 87 insertions, 58 deletions
diff --git a/celt/arm/arm_celt_map.c b/celt/arm/arm_celt_map.c
index 4c2d28ce..b5bd44dc 100644
--- a/celt/arm/arm_celt_map.c
+++ b/celt/arm/arm_celt_map.c
@@ -52,23 +52,26 @@ void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
celt_pitch_xcorr_c, /* Media */
celt_pitch_xcorr_float_neon /* Neon */
};
+# endif
+# endif /* FIXED_POINT */
-# if defined(HAVE_ARM_NE10)
-# if defined(CUSTOM_MODES)
+# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+# if defined(HAVE_ARM_NE10)
+# if defined(CUSTOM_MODES)
int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
opus_fft_alloc_arch_c, /* ARMv4 */
opus_fft_alloc_arch_c, /* EDSP */
opus_fft_alloc_arch_c, /* Media */
- opus_fft_alloc_arm_float_neon /* Neon with NE10 library support */
+ opus_fft_alloc_arm_neon /* Neon with NE10 library support */
};
void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
opus_fft_free_arch_c, /* ARMv4 */
opus_fft_free_arch_c, /* EDSP */
opus_fft_free_arch_c, /* Media */
- opus_fft_free_arm_float_neon /* Neon with NE10 */
+ opus_fft_free_arm_neon /* Neon with NE10 */
};
-# endif /* CUSTOM_MODES */
+# endif /* CUSTOM_MODES */
void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
const kiss_fft_cpx *fin,
@@ -76,7 +79,7 @@ void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
opus_fft_c, /* ARMv4 */
opus_fft_c, /* EDSP */
opus_fft_c, /* Media */
- opus_fft_float_neon /* Neon with NE10 */
+ opus_fft_neon /* Neon with NE10 */
};
void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
@@ -85,9 +88,10 @@ void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
opus_ifft_c, /* ARMv4 */
opus_ifft_c, /* EDSP */
opus_ifft_c, /* Media */
- opus_ifft_float_neon /* Neon with NE10 */
+ opus_ifft_neon /* Neon with NE10 */
};
+# if !defined(FIXED_POINT)
void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out,
@@ -112,8 +116,8 @@ void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
clt_mdct_backward_float_neon /* Neon with NE10 */
};
-# endif /* HAVE_ARM_NE10 */
-# endif /* OPUS_ARM_MAY_HAVE_NEON_INTR */
-# endif /* FIXED_POINT */
+# endif /* !FIXED_POINT */
+# endif /* HAVE_ARM_NE10 */
+# endif /* OPUS_ARM_MAY_HAVE_NEON_INTR */
#endif /* OPUS_HAVE_RTCD */
diff --git a/celt/arm/celt_ne10_fft.c b/celt/arm/celt_ne10_fft.c
index 2ba8c559..42d96a71 100644
--- a/celt/arm/celt_ne10_fft.c
+++ b/celt/arm/celt_ne10_fft.c
@@ -43,15 +43,31 @@
#include "stack_alloc.h"
#if !defined(FIXED_POINT)
-# if defined(CUSTOM_MODES)
+# define NE10_FFT_ALLOC_C2C_TYPE_NEON ne10_fft_alloc_c2c_float32_neon
+# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_float32_t
+# define NE10_FFT_STATE_TYPE_T ne10_fft_state_float32_t
+# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_float32
+# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_float32_t
+# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_float32_neon
+#else
+# define NE10_FFT_ALLOC_C2C_TYPE_NEON(nfft) ne10_fft_alloc_c2c_int32_neon(nfft)
+# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_int32_t
+# define NE10_FFT_STATE_TYPE_T ne10_fft_state_int32_t
+# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32
+# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32
+# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_int32_t
+# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_int32_neon
+#endif
+
+#if defined(CUSTOM_MODES)
/* nfft lengths in NE10 that support scaled fft */
-#define NE10_FFTSCALED_SUPPORT_MAX 4
+# define NE10_FFTSCALED_SUPPORT_MAX 4
static const int ne10_fft_scaled_support[NE10_FFTSCALED_SUPPORT_MAX] = {
480, 240, 120, 60
};
-int opus_fft_alloc_arm_float_neon(kiss_fft_state *st)
+int opus_fft_alloc_arm_neon(kiss_fft_state *st)
{
int i;
size_t memneeded = sizeof(struct arch_fft_state);
@@ -71,7 +87,7 @@ int opus_fft_alloc_arm_float_neon(kiss_fft_state *st)
}
else {
st->arch_fft->is_supported = 1;
- st->arch_fft->priv = (void *)ne10_fft_alloc_c2c_float32_neon(st->nfft);
+ st->arch_fft->priv = (void *)NE10_FFT_ALLOC_C2C_TYPE_NEON(st->nfft);
if (st->arch_fft->priv == NULL) {
return -1;
}
@@ -79,69 +95,80 @@ int opus_fft_alloc_arm_float_neon(kiss_fft_state *st)
return 0;
}
-void opus_fft_free_arm_float_neon(kiss_fft_state *st)
+void opus_fft_free_arm_neon(kiss_fft_state *st)
{
- ne10_fft_cfg_float32_t cfg;
+ NE10_FFT_CFG_TYPE_T cfg;
if (!st->arch_fft)
return;
- cfg = (ne10_fft_cfg_float32_t)st->arch_fft->priv;
+ cfg = (NE10_FFT_CFG_TYPE_T)st->arch_fft->priv;
if (cfg)
- ne10_fft_destroy_c2c_float32(cfg);
+ NE10_FFT_DESTROY_C2C_TYPE(cfg);
opus_free(st->arch_fft);
}
-# endif
+#endif
-void opus_fft_float_neon(const kiss_fft_state *st,
- const kiss_fft_cpx *fin,
- kiss_fft_cpx *fout)
+void opus_fft_neon(const kiss_fft_state *st,
+ const kiss_fft_cpx *fin,
+ kiss_fft_cpx *fout)
{
- ne10_fft_state_float32_t state;
- ne10_fft_cfg_float32_t cfg = &state;
- VARDECL(ne10_fft_cpx_float32_t, buffer);
+ NE10_FFT_STATE_TYPE_T state;
+ NE10_FFT_CFG_TYPE_T cfg = &state;
+ VARDECL(NE10_FFT_CPX_TYPE_T, buffer);
SAVE_STACK;
- ALLOC(buffer, st->nfft, ne10_fft_cpx_float32_t);
+ ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T);
if (!st->arch_fft->is_supported) {
/* This nfft length (scaled fft) not supported in NE10 */
opus_fft_c(st, fin, fout);
}
else {
- memcpy((void *)cfg, st->arch_fft->priv, sizeof(ne10_fft_state_float32_t));
- state.buffer = (ne10_fft_cpx_float32_t *)&buffer[0];
+ memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T));
+ state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0];
+#if !defined(FIXED_POINT)
state.is_forward_scaled = 1;
- ne10_fft_c2c_1d_float32_neon((ne10_fft_cpx_float32_t *)fout,
- (ne10_fft_cpx_float32_t *)fin,
- cfg, 0);
+ NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
+ (NE10_FFT_CPX_TYPE_T *)fin,
+ cfg, 0);
+#else
+ NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
+ (NE10_FFT_CPX_TYPE_T *)fin,
+ cfg, 0, 1);
+#endif
}
RESTORE_STACK;
}
-void opus_ifft_float_neon(const kiss_fft_state *st,
- const kiss_fft_cpx *fin,
- kiss_fft_cpx *fout)
+void opus_ifft_neon(const kiss_fft_state *st,
+ const kiss_fft_cpx *fin,
+ kiss_fft_cpx *fout)
{
- ne10_fft_state_float32_t state;
- ne10_fft_cfg_float32_t cfg = &state;
- VARDECL(ne10_fft_cpx_float32_t, buffer);
+ NE10_FFT_STATE_TYPE_T state;
+ NE10_FFT_CFG_TYPE_T cfg = &state;
+ VARDECL(NE10_FFT_CPX_TYPE_T, buffer);
SAVE_STACK;
- ALLOC(buffer, st->nfft, ne10_fft_cpx_float32_t);
+ ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T);
if (!st->arch_fft->is_supported) {
/* This nfft length (scaled fft) not supported in NE10 */
opus_ifft_c(st, fin, fout);
}
else {
- memcpy((void *)cfg, st->arch_fft->priv, sizeof(ne10_fft_state_float32_t));
- state.buffer = (ne10_fft_cpx_float32_t *)&buffer[0];
+ memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T));
+ state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0];
+#if !defined(FIXED_POINT)
state.is_backward_scaled = 0;
- ne10_fft_c2c_1d_float32_neon((ne10_fft_cpx_float32_t *)fout,
- (ne10_fft_cpx_float32_t *)fin,
- cfg, 1);
+ NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
+ (NE10_FFT_CPX_TYPE_T *)fin,
+ cfg, 1);
+#else
+ NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
+ (NE10_FFT_CPX_TYPE_T *)fin,
+ cfg, 1, 0);
+#endif
}
RESTORE_STACK;
}
-#endif /* !defined(FIXED_POINT) */
diff --git a/celt/arm/fft_arm.h b/celt/arm/fft_arm.h
index e57b0aa6..0cb55d8e 100644
--- a/celt/arm/fft_arm.h
+++ b/celt/arm/fft_arm.h
@@ -37,38 +37,36 @@
#include "config.h"
#include "kiss_fft.h"
-#if !defined(FIXED_POINT)
#if defined(HAVE_ARM_NE10)
-int opus_fft_alloc_arm_float_neon(kiss_fft_state *st);
-void opus_fft_free_arm_float_neon(kiss_fft_state *st);
+int opus_fft_alloc_arm_neon(kiss_fft_state *st);
+void opus_fft_free_arm_neon(kiss_fft_state *st);
-void opus_fft_float_neon(const kiss_fft_state *st,
- const kiss_fft_cpx *fin,
- kiss_fft_cpx *fout);
+void opus_fft_neon(const kiss_fft_state *st,
+ const kiss_fft_cpx *fin,
+ kiss_fft_cpx *fout);
-void opus_ifft_float_neon(const kiss_fft_state *st,
- const kiss_fft_cpx *fin,
- kiss_fft_cpx *fout);
+void opus_ifft_neon(const kiss_fft_state *st,
+ const kiss_fft_cpx *fin,
+ kiss_fft_cpx *fout);
#if !defined(OPUS_HAVE_RTCD)
#define OVERRIDE_OPUS_FFT (1)
#define opus_fft_alloc_arch(_st, arch) \
- ((void)(arch), opus_fft_alloc_arm_float_neon(_st))
+ ((void)(arch), opus_fft_alloc_arm_neon(_st))
#define opus_fft_free_arch(_st, arch) \
- ((void)(arch), opus_fft_free_arm_float_neon(_st))
+ ((void)(arch), opus_fft_free_arm_neon(_st))
#define opus_fft(_st, _fin, _fout, arch) \
- ((void)(arch), opus_fft_float_neon(_st, _fin, _fout))
+ ((void)(arch), opus_fft_neon(_st, _fin, _fout))
#define opus_ifft(_st, _fin, _fout, arch) \
- ((void)(arch), opus_ifft_float_neon(_st, _fin, _fout))
+ ((void)(arch), opus_ifft_neon(_st, _fin, _fout))
#endif /* OPUS_HAVE_RTCD */
#endif /* HAVE_ARM_NE10 */
-#endif /* FIXED_POINT */
#endif