diff options
author | Jonathan Wright <jonathan.wright@arm.com> | 2023-04-12 17:38:24 +0100 |
---|---|---|
committer | Jonathan Wright <jonathan.wright@arm.com> | 2023-04-19 00:57:18 +0100 |
commit | 42c0cbb9cb114af824083f4e6f0e757985b8942f (patch) | |
tree | 25dec91374ba0371d4a6e4da009d2d3b080104a9 /vpx_dsp | |
parent | 05b244af52e87ff7dacce78a6db3eab1765e84c8 (diff) | |
download | libvpx-42c0cbb9cb114af824083f4e6f0e757985b8942f.tar.gz |
Add Neon implementation of vpx_sad_skip_<w>x<h>x4d functions
Add Neon implementations of standard bitdepth downsampling SAD4D
functions for all block sizes.
Also add corresponding unit tests.
Change-Id: Ieb77661ea2bbe357529862a5fb54956e34e8d758
Diffstat (limited to 'vpx_dsp')
-rw-r--r-- | vpx_dsp/arm/sad4d_neon.c | 32 | ||||
-rw-r--r-- | vpx_dsp/vpx_dsp_rtcd_defs.pl | 24 |
2 files changed, 45 insertions, 11 deletions
diff --git a/vpx_dsp/arm/sad4d_neon.c b/vpx_dsp/arm/sad4d_neon.c index 6ad6c9621..44cd99028 100644 --- a/vpx_dsp/arm/sad4d_neon.c +++ b/vpx_dsp/arm/sad4d_neon.c @@ -308,3 +308,35 @@ SAD_WXH_4D_NEON(64, 32) SAD_WXH_4D_NEON(64, 64) #undef SAD_WXH_4D_NEON + +#define SAD_SKIP_WXH_4D_NEON(w, h) \ + void vpx_sad_skip_##w##x##h##x4d_neon(const uint8_t *src, int src_stride, \ + const uint8_t *const ref[4], \ + int ref_stride, uint32_t res[4]) { \ + sad##w##xhx4d_neon(src, 2 * src_stride, ref, 2 * ref_stride, res, \ + ((h) >> 1)); \ + res[0] <<= 1; \ + res[1] <<= 1; \ + res[2] <<= 1; \ + res[3] <<= 1; \ + } + +SAD_SKIP_WXH_4D_NEON(4, 4) +SAD_SKIP_WXH_4D_NEON(4, 8) + +SAD_SKIP_WXH_4D_NEON(8, 4) +SAD_SKIP_WXH_4D_NEON(8, 8) +SAD_SKIP_WXH_4D_NEON(8, 16) + +SAD_SKIP_WXH_4D_NEON(16, 8) +SAD_SKIP_WXH_4D_NEON(16, 16) +SAD_SKIP_WXH_4D_NEON(16, 32) + +SAD_SKIP_WXH_4D_NEON(32, 16) +SAD_SKIP_WXH_4D_NEON(32, 32) +SAD_SKIP_WXH_4D_NEON(32, 64) + +SAD_SKIP_WXH_4D_NEON(64, 32) +SAD_SKIP_WXH_4D_NEON(64, 64) + +#undef SAD_SKIP_WXH_4D_NEON diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 7bea73895..4c5fab318 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -968,41 +968,43 @@ add_proto qw/void vpx_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const specialize qw/vpx_sad4x4x4d neon msa sse2 mmi/; add_proto qw/void vpx_sad_skip_64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]"; -specialize qw/vpx_sad_skip_64x64x4d avx2 sse2/; +specialize qw/vpx_sad_skip_64x64x4d neon avx2 sse2/; add_proto qw/void vpx_sad_skip_64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]"; -specialize qw/vpx_sad_skip_64x32x4d avx2 sse2/; +specialize qw/vpx_sad_skip_64x32x4d neon avx2 sse2/; add_proto qw/void vpx_sad_skip_32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]"; -specialize qw/vpx_sad_skip_32x64x4d avx2 sse2/; +specialize qw/vpx_sad_skip_32x64x4d neon avx2 sse2/; add_proto qw/void vpx_sad_skip_32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]"; -specialize qw/vpx_sad_skip_32x32x4d avx2 sse2/; +specialize qw/vpx_sad_skip_32x32x4d neon avx2 sse2/; add_proto qw/void vpx_sad_skip_32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]"; -specialize qw/vpx_sad_skip_32x16x4d avx2 sse2/; +specialize qw/vpx_sad_skip_32x16x4d neon avx2 sse2/; add_proto qw/void vpx_sad_skip_16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]"; -specialize qw/vpx_sad_skip_16x32x4d sse2/; +specialize qw/vpx_sad_skip_16x32x4d neon sse2/; add_proto qw/void vpx_sad_skip_16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]"; -specialize qw/vpx_sad_skip_16x16x4d sse2/; +specialize qw/vpx_sad_skip_16x16x4d neon sse2/; add_proto qw/void vpx_sad_skip_16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]"; -specialize qw/vpx_sad_skip_16x8x4d sse2/; +specialize qw/vpx_sad_skip_16x8x4d neon sse2/; add_proto qw/void vpx_sad_skip_8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]"; -specialize qw/vpx_sad_skip_8x16x4d sse2/; +specialize qw/vpx_sad_skip_8x16x4d neon sse2/; add_proto qw/void vpx_sad_skip_8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]"; -specialize qw/vpx_sad_skip_8x8x4d sse2/; +specialize qw/vpx_sad_skip_8x8x4d neon sse2/; add_proto qw/void vpx_sad_skip_8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]"; +specialize qw/vpx_sad_skip_8x4x4d neon/; add_proto qw/void vpx_sad_skip_4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]"; -specialize qw/vpx_sad_skip_4x8x4d sse2/; +specialize qw/vpx_sad_skip_4x8x4d neon sse2/; add_proto qw/void vpx_sad_skip_4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]"; +specialize qw/vpx_sad_skip_4x4x4d neon/; add_proto qw/uint64_t vpx_sum_squares_2d_i16/, "const int16_t *src, int stride, int size"; specialize qw/vpx_sum_squares_2d_i16 neon sse2 msa/; |