summaryrefslogtreecommitdiff
path: root/vpx_dsp
diff options
context:
space:
mode:
authorSalome Thirot <salome.thirot@arm.com>2023-03-08 12:01:04 +0000
committerSalome Thirot <salome.thirot@arm.com>2023-03-14 09:32:42 +0000
commitbe84aa14dc3d7b1eae3bab9bf060eabadd84196d (patch)
tree755ff4f0eb6106764afa023f363788012cb951aa /vpx_dsp
parentd32a410880c6583d49baaac17c84b3d0fead43ba (diff)
downloadlibvpx-be84aa14dc3d7b1eae3bab9bf060eabadd84196d.tar.gz
Add Neon implementation of vpx_highbd_satd_c
Add Neon implementation of vpx_highbd_satd_c as well as the corresponding tests. Change-Id: I3d50e6abdf168fb13743e7d8da9364f072308b7f
Diffstat (limited to 'vpx_dsp')
-rw-r--r--vpx_dsp/arm/highbd_avg_neon.c40
-rw-r--r--vpx_dsp/vpx_dsp.mk1
-rw-r--r--vpx_dsp/vpx_dsp_rtcd_defs.pl2
3 files changed, 42 insertions, 1 deletions
diff --git a/vpx_dsp/arm/highbd_avg_neon.c b/vpx_dsp/arm/highbd_avg_neon.c
new file mode 100644
index 000000000..3ba58b800
--- /dev/null
+++ b/vpx_dsp/arm/highbd_avg_neon.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2023 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+#include "./vpx_dsp_rtcd.h"
+#include "./vpx_config.h"
+
+#include "vpx_dsp/arm/mem_neon.h"
+#include "vpx_dsp/arm/sum_neon.h"
+
+// coeff: 32 bits, dynamic range [-2147483648, 2147483647].
+// length: value range {16, 64, 256, 1024}.
+// satd: 42 bits, dynamic range [-2147483648 * 1024, 2147483647 * 1024]
+int vpx_highbd_satd_neon(const tran_low_t *coeff, int length) {
+ int64x2_t sum_s64[2] = { vdupq_n_s64(0), vdupq_n_s64(0) };
+
+ do {
+ int32x4_t abs0, abs1;
+ const int32x4_t s0 = load_tran_low_to_s32q(coeff);
+ const int32x4_t s1 = load_tran_low_to_s32q(coeff + 4);
+
+ abs0 = vabsq_s32(s0);
+ sum_s64[0] = vpadalq_s32(sum_s64[0], abs0);
+ abs1 = vabsq_s32(s1);
+ sum_s64[1] = vpadalq_s32(sum_s64[1], abs1);
+
+ length -= 8;
+ coeff += 8;
+ } while (length != 0);
+
+ return (int)horizontal_add_int64x2(vaddq_s64(sum_s64[0], sum_s64[1]));
+}
diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk
index ab8e5bd81..207cda631 100644
--- a/vpx_dsp/vpx_dsp.mk
+++ b/vpx_dsp/vpx_dsp.mk
@@ -344,6 +344,7 @@ DSP_SRCS-$(HAVE_NEON) += arm/avg_neon.c
DSP_SRCS-$(HAVE_NEON) += arm/hadamard_neon.c
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
DSP_SRCS-$(HAVE_NEON) += arm/highbd_hadamard_neon.c
+DSP_SRCS-$(HAVE_NEON) += arm/highbd_avg_neon.c
endif
DSP_SRCS-$(HAVE_MSA) += mips/avg_msa.c
DSP_SRCS-$(HAVE_LSX) += loongarch/avg_lsx.c
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index 3baf16cc8..2a01ec1b5 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -821,7 +821,7 @@ if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
specialize qw/vpx_satd avx2 sse2 neon/;
add_proto qw/int vpx_highbd_satd/, "const tran_low_t *coeff, int length";
- specialize qw/vpx_highbd_satd avx2/;
+ specialize qw/vpx_highbd_satd avx2 neon/;
} else {
add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff";
specialize qw/vpx_hadamard_8x8 sse2 neon msa vsx lsx/, "$ssse3_x86_64";