summaryrefslogtreecommitdiff
path: root/celt/mdct.h
diff options
context:
space:
mode:
authorViswanath Puttagunta <viswanath.puttagunta@linaro.org>2015-05-15 12:42:19 -0500
committerJean-Marc Valin <jmvalin@jmvalin.ca>2015-10-07 18:09:20 -0400
commitf48abe8308ba7a67e443ad0911e06d62fd47ba91 (patch)
treea231446a0bc212ae891524881580f9b5ffd90798 /celt/mdct.h
parent0fe514352568530d4bd18a6686e6878417e6cf41 (diff)
downloadopus-f48abe8308ba7a67e443ad0911e06d62fd47ba91.tar.gz
armv7(float): Optimize encode usecase using NE10 library
Optimize opus encode (float only) usecase using ARM NE10 library. Mainly effects opus_fft and ctl_mdct_forward and related functions. This optimization can be used for ARM CPUs that have NEON VFP unit. This patch only enables optimizations for ARMv7. Official ARM NE10 library page available at http://projectne10.github.io/Ne10/ To enable this optimization, use --enable-intrinsics --with-NE10=<install_prefix> or --enable-intrinsics --with-NE10-libraries=<NE10_lib_dir> --with-NE10-includes=<NE10_includes_dir> Compile time checks made during configure process to make sure optimization option available only when compiler supports NEON instrinsics. Runtime checks made to make sure optimized functions only called on appropriate hardware. Signed-off-by: Timothy B. Terriberry <tterribe@xiph.org>
Diffstat (limited to 'celt/mdct.h')
-rw-r--r--celt/mdct.h37
1 files changed, 32 insertions, 5 deletions
diff --git a/celt/mdct.h b/celt/mdct.h
index d7218213..8aef9087 100644
--- a/celt/mdct.h
+++ b/celt/mdct.h
@@ -53,13 +53,19 @@ typedef struct {
const kiss_twiddle_scalar * OPUS_RESTRICT trig;
} mdct_lookup;
-int clt_mdct_init(mdct_lookup *l,int N, int maxshift);
-void clt_mdct_clear(mdct_lookup *l);
+#if !defined(FIXED_POINT) && defined(HAVE_ARM_NE10)
+#include "arm/mdct_arm.h"
+#endif
+
+
+int clt_mdct_init(mdct_lookup *l,int N, int maxshift, int arch);
+void clt_mdct_clear(mdct_lookup *l, int arch);
/** Compute a forward MDCT and scale by 4/N, trashes the input array */
-void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in,
- kiss_fft_scalar * OPUS_RESTRICT out,
- const opus_val16 *window, int overlap, int shift, int stride);
+void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in,
+ kiss_fft_scalar * OPUS_RESTRICT out,
+ const opus_val16 *window, int overlap,
+ int shift, int stride, int arch);
/** Compute a backward MDCT (no scaling) and performs weighted overlap-add
(scales implicitly by 1/2) */
@@ -67,4 +73,25 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride);
+#if !defined(OVERRIDE_OPUS_MDCT)
+/* Is run-time CPU detection enabled on this platform? */
+#if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10))
+
+extern void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(
+ const mdct_lookup *l, kiss_fft_scalar *in,
+ kiss_fft_scalar * OPUS_RESTRICT out, const opus_val16 *window,
+ int overlap, int shift, int stride, int arch);
+
+#define clt_mdct_forward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \
+ ((*CLT_MDCT_FORWARD_IMPL[(arch)&OPUS_ARCHMASK])(_l, _in, _out, \
+ _window, _overlap, _shift, \
+ _stride, _arch))
+#else /* else for if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */
+
+#define clt_mdct_forward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \
+ clt_mdct_forward_c(_l, _in, _out, _window, _overlap, _shift, _stride, _arch)
+
+#endif /* end if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */
+#endif /* end if !defined(OVERRIDE_OPUS_MDCT) */
+
#endif