diff options
author | Salome Thirot <salome.thirot@arm.com> | 2023-03-08 12:01:04 +0000 |
---|---|---|
committer | Salome Thirot <salome.thirot@arm.com> | 2023-03-14 09:32:42 +0000 |
commit | be84aa14dc3d7b1eae3bab9bf060eabadd84196d (patch) | |
tree | 755ff4f0eb6106764afa023f363788012cb951aa /vpx_dsp | |
parent | d32a410880c6583d49baaac17c84b3d0fead43ba (diff) | |
download | libvpx-be84aa14dc3d7b1eae3bab9bf060eabadd84196d.tar.gz |
Add Neon implementation of vpx_highbd_satd_c
Add Neon implementation of vpx_highbd_satd_c as well as the
corresponding tests.
Change-Id: I3d50e6abdf168fb13743e7d8da9364f072308b7f
Diffstat (limited to 'vpx_dsp')
-rw-r--r-- | vpx_dsp/arm/highbd_avg_neon.c | 40 | ||||
-rw-r--r-- | vpx_dsp/vpx_dsp.mk | 1 | ||||
-rw-r--r-- | vpx_dsp/vpx_dsp_rtcd_defs.pl | 2 |
3 files changed, 42 insertions, 1 deletions
diff --git a/vpx_dsp/arm/highbd_avg_neon.c b/vpx_dsp/arm/highbd_avg_neon.c new file mode 100644 index 000000000..3ba58b800 --- /dev/null +++ b/vpx_dsp/arm/highbd_avg_neon.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2023 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <arm_neon.h> + +#include "./vpx_dsp_rtcd.h" +#include "./vpx_config.h" + +#include "vpx_dsp/arm/mem_neon.h" +#include "vpx_dsp/arm/sum_neon.h" + +// coeff: 32 bits, dynamic range [-2147483648, 2147483647]. +// length: value range {16, 64, 256, 1024}. +// satd: 42 bits, dynamic range [-2147483648 * 1024, 2147483647 * 1024] +int vpx_highbd_satd_neon(const tran_low_t *coeff, int length) { + int64x2_t sum_s64[2] = { vdupq_n_s64(0), vdupq_n_s64(0) }; + + do { + int32x4_t abs0, abs1; + const int32x4_t s0 = load_tran_low_to_s32q(coeff); + const int32x4_t s1 = load_tran_low_to_s32q(coeff + 4); + + abs0 = vabsq_s32(s0); + sum_s64[0] = vpadalq_s32(sum_s64[0], abs0); + abs1 = vabsq_s32(s1); + sum_s64[1] = vpadalq_s32(sum_s64[1], abs1); + + length -= 8; + coeff += 8; + } while (length != 0); + + return (int)horizontal_add_int64x2(vaddq_s64(sum_s64[0], sum_s64[1])); +} diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk index ab8e5bd81..207cda631 100644 --- a/vpx_dsp/vpx_dsp.mk +++ b/vpx_dsp/vpx_dsp.mk @@ -344,6 +344,7 @@ DSP_SRCS-$(HAVE_NEON) += arm/avg_neon.c DSP_SRCS-$(HAVE_NEON) += arm/hadamard_neon.c ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) DSP_SRCS-$(HAVE_NEON) += arm/highbd_hadamard_neon.c +DSP_SRCS-$(HAVE_NEON) += arm/highbd_avg_neon.c endif DSP_SRCS-$(HAVE_MSA) += mips/avg_msa.c DSP_SRCS-$(HAVE_LSX) += loongarch/avg_lsx.c diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 3baf16cc8..2a01ec1b5 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -821,7 +821,7 @@ if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") { specialize qw/vpx_satd avx2 sse2 neon/; add_proto qw/int vpx_highbd_satd/, "const tran_low_t *coeff, int length"; - specialize qw/vpx_highbd_satd avx2/; + specialize qw/vpx_highbd_satd avx2 neon/; } else { add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff"; specialize qw/vpx_hadamard_8x8 sse2 neon msa vsx lsx/, "$ssse3_x86_64"; |