summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChi Yo Tsai <chiyotsai@google.com>2023-05-04 17:04:17 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2023-05-04 17:04:17 +0000
commit4379041094108d20ce099ce202ab43af32195f8c (patch)
tree4f2409af97ebc2b6548a8aad4e70bbcb3c00f65b
parent4dd3afc00eb0c9d74dfa18bb8e727dbfb98e92ff (diff)
parent2c03388231cf545e1245746a6ee4edfe0322e71e (diff)
downloadlibvpx-4379041094108d20ce099ce202ab43af32195f8c.tar.gz
Merge changes I226215a2,Ia4918eb0,If6219446,Ibf00a6e1,I900a0a48 into main
* changes: Fix mismatched param names in vpx_dsp/x86/sad4d_avx2.c Fix mismatched param names in vpx_dsp/arm/highbd_sad4d_neon.c Fix mismatched param names in vpx_dsp/arm/sad4d_neon.c Fix mismatched param names in vpx_dsp/arm/highbd_avg_neon.c Fix clang warning on const-qualification of parameters
-rw-r--r--test/vp9_quantize_test.cc4
-rw-r--r--vpx_dsp/arm/highbd_avg_neon.c59
-rw-r--r--vpx_dsp/arm/highbd_sad4d_neon.c29
-rw-r--r--vpx_dsp/arm/quantize_neon.c4
-rw-r--r--vpx_dsp/arm/sad4d_neon.c32
-rw-r--r--vpx_dsp/quantize.c4
-rw-r--r--vpx_dsp/vpx_dsp_rtcd_defs.pl4
-rw-r--r--vpx_dsp/x86/quantize_avx.c4
-rw-r--r--vpx_dsp/x86/quantize_avx2.c4
-rw-r--r--vpx_dsp/x86/quantize_ssse3.c4
-rw-r--r--vpx_dsp/x86/sad4d_avx2.c60
11 files changed, 105 insertions, 103 deletions
diff --git a/test/vp9_quantize_test.cc b/test/vp9_quantize_test.cc
index 84a5a58e4..5e3a7c270 100644
--- a/test/vp9_quantize_test.cc
+++ b/test/vp9_quantize_test.cc
@@ -39,10 +39,10 @@ namespace {
const int number_of_iterations = 100;
typedef void (*QuantizeFunc)(const tran_low_t *coeff, intptr_t count,
- const macroblock_plane *const mb_plane,
+ const macroblock_plane *mb_plane,
tran_low_t *qcoeff, tran_low_t *dqcoeff,
const int16_t *dequant, uint16_t *eob,
- const struct ScanOrder *const scan_order);
+ const struct ScanOrder *scan_order);
typedef std::tuple<QuantizeFunc, QuantizeFunc, vpx_bit_depth_t,
int /*max_size*/, bool /*is_fp*/>
QuantizeParam;
diff --git a/vpx_dsp/arm/highbd_avg_neon.c b/vpx_dsp/arm/highbd_avg_neon.c
index 8939ee131..4265596c8 100644
--- a/vpx_dsp/arm/highbd_avg_neon.c
+++ b/vpx_dsp/arm/highbd_avg_neon.c
@@ -16,18 +16,18 @@
#include "vpx_dsp/arm/mem_neon.h"
#include "vpx_dsp/arm/sum_neon.h"
-uint32_t vpx_highbd_avg_4x4_neon(const uint8_t *a, int a_stride) {
- const uint16_t *a_ptr = CONVERT_TO_SHORTPTR(a);
- const uint16x8_t a0 = load_unaligned_u16q(a_ptr + 0 * a_stride, a_stride);
- const uint16x8_t a1 = load_unaligned_u16q(a_ptr + 2 * a_stride, a_stride);
+uint32_t vpx_highbd_avg_4x4_neon(const uint8_t *s8, int p) {
+ const uint16_t *a_ptr = CONVERT_TO_SHORTPTR(s8);
+ const uint16x8_t a0 = load_unaligned_u16q(a_ptr + 0 * p, p);
+ const uint16x8_t a1 = load_unaligned_u16q(a_ptr + 2 * p, p);
return (horizontal_add_uint16x8(vaddq_u16(a0, a1)) + (1 << 3)) >> 4;
}
-uint32_t vpx_highbd_avg_8x8_neon(const uint8_t *a, int a_stride) {
- const uint16_t *a_ptr = CONVERT_TO_SHORTPTR(a);
+uint32_t vpx_highbd_avg_8x8_neon(const uint8_t *s8, int p) {
+ const uint16_t *a_ptr = CONVERT_TO_SHORTPTR(s8);
uint16x8_t sum, a0, a1, a2, a3, a4, a5, a6, a7;
- load_u16_8x8(a_ptr, a_stride, &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7);
+ load_u16_8x8(a_ptr, p, &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7);
sum = vaddq_u16(a0, a1);
sum = vaddq_u16(sum, a2);
@@ -63,29 +63,28 @@ int vpx_highbd_satd_neon(const tran_low_t *coeff, int length) {
return (int)horizontal_add_int64x2(vaddq_s64(sum_s64[0], sum_s64[1]));
}
-void vpx_highbd_minmax_8x8_neon(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride, int *min,
- int *max) {
- const uint16_t *a_ptr = CONVERT_TO_SHORTPTR(a);
- const uint16_t *b_ptr = CONVERT_TO_SHORTPTR(b);
-
- const uint16x8_t a0 = vld1q_u16(a_ptr + 0 * a_stride);
- const uint16x8_t a1 = vld1q_u16(a_ptr + 1 * a_stride);
- const uint16x8_t a2 = vld1q_u16(a_ptr + 2 * a_stride);
- const uint16x8_t a3 = vld1q_u16(a_ptr + 3 * a_stride);
- const uint16x8_t a4 = vld1q_u16(a_ptr + 4 * a_stride);
- const uint16x8_t a5 = vld1q_u16(a_ptr + 5 * a_stride);
- const uint16x8_t a6 = vld1q_u16(a_ptr + 6 * a_stride);
- const uint16x8_t a7 = vld1q_u16(a_ptr + 7 * a_stride);
-
- const uint16x8_t b0 = vld1q_u16(b_ptr + 0 * b_stride);
- const uint16x8_t b1 = vld1q_u16(b_ptr + 1 * b_stride);
- const uint16x8_t b2 = vld1q_u16(b_ptr + 2 * b_stride);
- const uint16x8_t b3 = vld1q_u16(b_ptr + 3 * b_stride);
- const uint16x8_t b4 = vld1q_u16(b_ptr + 4 * b_stride);
- const uint16x8_t b5 = vld1q_u16(b_ptr + 5 * b_stride);
- const uint16x8_t b6 = vld1q_u16(b_ptr + 6 * b_stride);
- const uint16x8_t b7 = vld1q_u16(b_ptr + 7 * b_stride);
+void vpx_highbd_minmax_8x8_neon(const uint8_t *s8, int p, const uint8_t *d8,
+ int dp, int *min, int *max) {
+ const uint16_t *a_ptr = CONVERT_TO_SHORTPTR(s8);
+ const uint16_t *b_ptr = CONVERT_TO_SHORTPTR(d8);
+
+ const uint16x8_t a0 = vld1q_u16(a_ptr + 0 * p);
+ const uint16x8_t a1 = vld1q_u16(a_ptr + 1 * p);
+ const uint16x8_t a2 = vld1q_u16(a_ptr + 2 * p);
+ const uint16x8_t a3 = vld1q_u16(a_ptr + 3 * p);
+ const uint16x8_t a4 = vld1q_u16(a_ptr + 4 * p);
+ const uint16x8_t a5 = vld1q_u16(a_ptr + 5 * p);
+ const uint16x8_t a6 = vld1q_u16(a_ptr + 6 * p);
+ const uint16x8_t a7 = vld1q_u16(a_ptr + 7 * p);
+
+ const uint16x8_t b0 = vld1q_u16(b_ptr + 0 * dp);
+ const uint16x8_t b1 = vld1q_u16(b_ptr + 1 * dp);
+ const uint16x8_t b2 = vld1q_u16(b_ptr + 2 * dp);
+ const uint16x8_t b3 = vld1q_u16(b_ptr + 3 * dp);
+ const uint16x8_t b4 = vld1q_u16(b_ptr + 4 * dp);
+ const uint16x8_t b5 = vld1q_u16(b_ptr + 5 * dp);
+ const uint16x8_t b6 = vld1q_u16(b_ptr + 6 * dp);
+ const uint16x8_t b7 = vld1q_u16(b_ptr + 7 * dp);
const uint16x8_t abs_diff0 = vabdq_u16(a0, b0);
const uint16x8_t abs_diff1 = vabdq_u16(a1, b1);
diff --git a/vpx_dsp/arm/highbd_sad4d_neon.c b/vpx_dsp/arm/highbd_sad4d_neon.c
index 62c4685a7..a6684b053 100644
--- a/vpx_dsp/arm/highbd_sad4d_neon.c
+++ b/vpx_dsp/arm/highbd_sad4d_neon.c
@@ -213,10 +213,11 @@ static INLINE void highbd_sad32xhx4d_neon(const uint8_t *src_ptr,
}
#define HBD_SAD_WXH_4D_NEON(w, h) \
- void vpx_highbd_sad##w##x##h##x4d_neon(const uint8_t *src, int src_stride, \
- const uint8_t *const ref[4], \
- int ref_stride, uint32_t res[4]) { \
- highbd_sad##w##xhx4d_neon(src, src_stride, ref, ref_stride, res, (h)); \
+ void vpx_highbd_sad##w##x##h##x4d_neon( \
+ const uint8_t *src, int src_stride, const uint8_t *const ref_array[4], \
+ int ref_stride, uint32_t sad_array[4]) { \
+ highbd_sad##w##xhx4d_neon(src, src_stride, ref_array, ref_stride, \
+ sad_array, (h)); \
}
HBD_SAD_WXH_4D_NEON(4, 4)
@@ -239,16 +240,16 @@ HBD_SAD_WXH_4D_NEON(64, 64)
#undef HBD_SAD_WXH_4D_NEON
-#define HBD_SAD_SKIP_WXH_4D_NEON(w, h) \
- void vpx_highbd_sad_skip_##w##x##h##x4d_neon( \
- const uint8_t *src, int src_stride, const uint8_t *const ref[4], \
- int ref_stride, uint32_t res[4]) { \
- highbd_sad##w##xhx4d_neon(src, 2 * src_stride, ref, 2 * ref_stride, res, \
- ((h) >> 1)); \
- res[0] <<= 1; \
- res[1] <<= 1; \
- res[2] <<= 1; \
- res[3] <<= 1; \
+#define HBD_SAD_SKIP_WXH_4D_NEON(w, h) \
+ void vpx_highbd_sad_skip_##w##x##h##x4d_neon( \
+ const uint8_t *src, int src_stride, const uint8_t *const ref_array[4], \
+ int ref_stride, uint32_t sad_array[4]) { \
+ highbd_sad##w##xhx4d_neon(src, 2 * src_stride, ref_array, 2 * ref_stride, \
+ sad_array, ((h) >> 1)); \
+ sad_array[0] <<= 1; \
+ sad_array[1] <<= 1; \
+ sad_array[2] <<= 1; \
+ sad_array[3] <<= 1; \
}
HBD_SAD_SKIP_WXH_4D_NEON(4, 4)
diff --git a/vpx_dsp/arm/quantize_neon.c b/vpx_dsp/arm/quantize_neon.c
index 35c67f607..5a7606554 100644
--- a/vpx_dsp/arm/quantize_neon.c
+++ b/vpx_dsp/arm/quantize_neon.c
@@ -216,10 +216,10 @@ quantize_b_32x32_neon(const tran_low_t *coeff_ptr, tran_low_t *qcoeff_ptr,
// Main difference is that zbin values are halved before comparison and dqcoeff
// values are divided by 2. zbin is rounded but dqcoeff is not.
void vpx_quantize_b_32x32_neon(const tran_low_t *coeff_ptr,
- const struct macroblock_plane *const mb_plane,
+ const struct macroblock_plane *mb_plane,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const struct ScanOrder *const scan_order) {
+ const struct ScanOrder *scan_order) {
const int16x8_t neg_one = vdupq_n_s16(-1);
uint16x8_t eob_max;
int i;
diff --git a/vpx_dsp/arm/sad4d_neon.c b/vpx_dsp/arm/sad4d_neon.c
index 44cd99028..3a548d0f9 100644
--- a/vpx_dsp/arm/sad4d_neon.c
+++ b/vpx_dsp/arm/sad4d_neon.c
@@ -282,11 +282,12 @@ static INLINE void sad4xhx4d_neon(const uint8_t *src, int src_stride,
vst1q_u32(res, horizontal_add_4d_uint16x8(sum));
}
-#define SAD_WXH_4D_NEON(w, h) \
- void vpx_sad##w##x##h##x4d_neon(const uint8_t *src, int src_stride, \
- const uint8_t *const ref[4], int ref_stride, \
- uint32_t res[4]) { \
- sad##w##xhx4d_neon(src, src_stride, ref, ref_stride, res, (h)); \
+#define SAD_WXH_4D_NEON(w, h) \
+ void vpx_sad##w##x##h##x4d_neon(const uint8_t *src_ptr, int src_stride, \
+ const uint8_t *const ref_array[4], \
+ int ref_stride, uint32_t sad_array[4]) { \
+ sad##w##xhx4d_neon(src_ptr, src_stride, ref_array, ref_stride, sad_array, \
+ (h)); \
}
SAD_WXH_4D_NEON(4, 4)
@@ -309,16 +310,17 @@ SAD_WXH_4D_NEON(64, 64)
#undef SAD_WXH_4D_NEON
-#define SAD_SKIP_WXH_4D_NEON(w, h) \
- void vpx_sad_skip_##w##x##h##x4d_neon(const uint8_t *src, int src_stride, \
- const uint8_t *const ref[4], \
- int ref_stride, uint32_t res[4]) { \
- sad##w##xhx4d_neon(src, 2 * src_stride, ref, 2 * ref_stride, res, \
- ((h) >> 1)); \
- res[0] <<= 1; \
- res[1] <<= 1; \
- res[2] <<= 1; \
- res[3] <<= 1; \
+#define SAD_SKIP_WXH_4D_NEON(w, h) \
+ void vpx_sad_skip_##w##x##h##x4d_neon( \
+ const uint8_t *src_ptr, int src_stride, \
+ const uint8_t *const ref_array[4], int ref_stride, \
+ uint32_t sad_array[4]) { \
+ sad##w##xhx4d_neon(src_ptr, 2 * src_stride, ref_array, 2 * ref_stride, \
+ sad_array, ((h) >> 1)); \
+ sad_array[0] <<= 1; \
+ sad_array[1] <<= 1; \
+ sad_array[2] <<= 1; \
+ sad_array[3] <<= 1; \
}
SAD_SKIP_WXH_4D_NEON(4, 4)
diff --git a/vpx_dsp/quantize.c b/vpx_dsp/quantize.c
index d44ced20d..7dff8c7a8 100644
--- a/vpx_dsp/quantize.c
+++ b/vpx_dsp/quantize.c
@@ -211,10 +211,10 @@ void vpx_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
#endif
void vpx_quantize_b_32x32_c(const tran_low_t *coeff_ptr,
- const struct macroblock_plane *const mb_plane,
+ const struct macroblock_plane *mb_plane,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const struct ScanOrder *const scan_order) {
+ const struct ScanOrder *scan_order) {
const int n_coeffs = 32 * 32;
const int zbins[2] = { ROUND_POWER_OF_TWO(mb_plane->zbin[0], 1),
ROUND_POWER_OF_TWO(mb_plane->zbin[1], 1) };
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index bde011529..494d7ba5e 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -725,14 +725,14 @@ if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
add_proto qw/void vpx_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vpx_quantize_b neon sse2 ssse3 avx avx2 vsx lsx/;
- add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order";
+ add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order";
specialize qw/vpx_quantize_b_32x32 neon ssse3 avx avx2 vsx lsx/;
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vpx_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vpx_highbd_quantize_b neon sse2 avx2/;
- add_proto qw/void vpx_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order";
+ add_proto qw/void vpx_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order";
specialize qw/vpx_highbd_quantize_b_32x32 neon sse2 avx2/;
} # CONFIG_VP9_HIGHBITDEPTH
} # CONFIG_VP9_ENCODER
diff --git a/vpx_dsp/x86/quantize_avx.c b/vpx_dsp/x86/quantize_avx.c
index d289bf6eb..6837a5cf2 100644
--- a/vpx_dsp/x86/quantize_avx.c
+++ b/vpx_dsp/x86/quantize_avx.c
@@ -143,10 +143,10 @@ void vpx_quantize_b_avx(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
}
void vpx_quantize_b_32x32_avx(const tran_low_t *coeff_ptr,
- const struct macroblock_plane *const mb_plane,
+ const struct macroblock_plane *mb_plane,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const struct ScanOrder *const scan_order) {
+ const struct ScanOrder *scan_order) {
const __m128i zero = _mm_setzero_si128();
const __m256i big_zero = _mm256_setzero_si256();
int index;
diff --git a/vpx_dsp/x86/quantize_avx2.c b/vpx_dsp/x86/quantize_avx2.c
index 5421dcf0b..3d97b3fda 100644
--- a/vpx_dsp/x86/quantize_avx2.c
+++ b/vpx_dsp/x86/quantize_avx2.c
@@ -253,10 +253,10 @@ static VPX_FORCE_INLINE __m256i quantize_b_32x32_16(
}
void vpx_quantize_b_32x32_avx2(const tran_low_t *coeff_ptr,
- const struct macroblock_plane *const mb_plane,
+ const struct macroblock_plane *mb_plane,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const struct ScanOrder *const scan_order) {
+ const struct ScanOrder *scan_order) {
__m256i v_zbin, v_round, v_quant, v_dequant, v_quant_shift;
__m256i v_eobmax = _mm256_setzero_si256();
intptr_t count;
diff --git a/vpx_dsp/x86/quantize_ssse3.c b/vpx_dsp/x86/quantize_ssse3.c
index 556f4ca61..641f23298 100644
--- a/vpx_dsp/x86/quantize_ssse3.c
+++ b/vpx_dsp/x86/quantize_ssse3.c
@@ -110,10 +110,10 @@ void vpx_quantize_b_ssse3(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
}
void vpx_quantize_b_32x32_ssse3(const tran_low_t *coeff_ptr,
- const struct macroblock_plane *const mb_plane,
+ const struct macroblock_plane *mb_plane,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const struct ScanOrder *const scan_order) {
+ const struct ScanOrder *scan_order) {
const __m128i zero = _mm_setzero_si128();
int index;
const int16_t *iscan = scan_order->iscan;
diff --git a/vpx_dsp/x86/sad4d_avx2.c b/vpx_dsp/x86/sad4d_avx2.c
index c87fd3cd2..cf7111983 100644
--- a/vpx_dsp/x86/sad4d_avx2.c
+++ b/vpx_dsp/x86/sad4d_avx2.c
@@ -135,45 +135,45 @@ static INLINE void sad64xhx4d_avx2(const uint8_t *src_ptr, int src_stride,
calc_final_4(sums, sad_array);
}
-#define SAD64_H(h) \
- void vpx_sad64x##h##x4d_avx2(const uint8_t *src, int src_stride, \
- const uint8_t *const ref[4], int ref_stride, \
- uint32_t res[4]) { \
- sad64xhx4d_avx2(src, src_stride, ref, ref_stride, h, res); \
+#define SAD64_H(h) \
+ void vpx_sad64x##h##x4d_avx2(const uint8_t *src, int src_stride, \
+ const uint8_t *const ref_array[4], \
+ int ref_stride, uint32_t sad_array[4]) { \
+ sad64xhx4d_avx2(src, src_stride, ref_array, ref_stride, h, sad_array); \
}
-#define SAD32_H(h) \
- void vpx_sad32x##h##x4d_avx2(const uint8_t *src, int src_stride, \
- const uint8_t *const ref[4], int ref_stride, \
- uint32_t res[4]) { \
- sad32xhx4d_avx2(src, src_stride, ref, ref_stride, h, res); \
+#define SAD32_H(h) \
+ void vpx_sad32x##h##x4d_avx2(const uint8_t *src, int src_stride, \
+ const uint8_t *const ref_array[4], \
+ int ref_stride, uint32_t sad_array[4]) { \
+ sad32xhx4d_avx2(src, src_stride, ref_array, ref_stride, h, sad_array); \
}
SAD64_H(64)
SAD32_H(32)
-#define SADS64_H(h) \
- void vpx_sad_skip_64x##h##x4d_avx2(const uint8_t *src, int src_stride, \
- const uint8_t *const ref[4], \
- int ref_stride, uint32_t res[4]) { \
- sad64xhx4d_avx2(src, 2 * src_stride, ref, 2 * ref_stride, ((h) >> 1), \
- res); \
- res[0] <<= 1; \
- res[1] <<= 1; \
- res[2] <<= 1; \
- res[3] <<= 1; \
+#define SADS64_H(h) \
+ void vpx_sad_skip_64x##h##x4d_avx2(const uint8_t *src, int src_stride, \
+ const uint8_t *const ref_array[4], \
+ int ref_stride, uint32_t sad_array[4]) { \
+ sad64xhx4d_avx2(src, 2 * src_stride, ref_array, 2 * ref_stride, \
+ ((h) >> 1), sad_array); \
+ sad_array[0] <<= 1; \
+ sad_array[1] <<= 1; \
+ sad_array[2] <<= 1; \
+ sad_array[3] <<= 1; \
}
-#define SADS32_H(h) \
- void vpx_sad_skip_32x##h##x4d_avx2(const uint8_t *src, int src_stride, \
- const uint8_t *const ref[4], \
- int ref_stride, uint32_t res[4]) { \
- sad32xhx4d_avx2(src, 2 * src_stride, ref, 2 * ref_stride, ((h) >> 1), \
- res); \
- res[0] <<= 1; \
- res[1] <<= 1; \
- res[2] <<= 1; \
- res[3] <<= 1; \
+#define SADS32_H(h) \
+ void vpx_sad_skip_32x##h##x4d_avx2(const uint8_t *src, int src_stride, \
+ const uint8_t *const ref_array[4], \
+ int ref_stride, uint32_t sad_array[4]) { \
+ sad32xhx4d_avx2(src, 2 * src_stride, ref_array, 2 * ref_stride, \
+ ((h) >> 1), sad_array); \
+ sad_array[0] <<= 1; \
+ sad_array[1] <<= 1; \
+ sad_array[2] <<= 1; \
+ sad_array[3] <<= 1; \
}
SADS64_H(64)