diff options
author | Chi Yo Tsai <chiyotsai@google.com> | 2023-05-04 17:04:17 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2023-05-04 17:04:17 +0000 |
commit | 4379041094108d20ce099ce202ab43af32195f8c (patch) | |
tree | 4f2409af97ebc2b6548a8aad4e70bbcb3c00f65b | |
parent | 4dd3afc00eb0c9d74dfa18bb8e727dbfb98e92ff (diff) | |
parent | 2c03388231cf545e1245746a6ee4edfe0322e71e (diff) | |
download | libvpx-4379041094108d20ce099ce202ab43af32195f8c.tar.gz |
Merge changes I226215a2,Ia4918eb0,If6219446,Ibf00a6e1,I900a0a48 into main
* changes:
Fix mismatched param names in vpx_dsp/x86/sad4d_avx2.c
Fix mismatched param names in vpx_dsp/arm/highbd_sad4d_neon.c
Fix mismatched param names in vpx_dsp/arm/sad4d_neon.c
Fix mismatched param names in vpx_dsp/arm/highbd_avg_neon.c
Fix clang warning on const-qualification of parameters
-rw-r--r-- | test/vp9_quantize_test.cc | 4 | ||||
-rw-r--r-- | vpx_dsp/arm/highbd_avg_neon.c | 59 | ||||
-rw-r--r-- | vpx_dsp/arm/highbd_sad4d_neon.c | 29 | ||||
-rw-r--r-- | vpx_dsp/arm/quantize_neon.c | 4 | ||||
-rw-r--r-- | vpx_dsp/arm/sad4d_neon.c | 32 | ||||
-rw-r--r-- | vpx_dsp/quantize.c | 4 | ||||
-rw-r--r-- | vpx_dsp/vpx_dsp_rtcd_defs.pl | 4 | ||||
-rw-r--r-- | vpx_dsp/x86/quantize_avx.c | 4 | ||||
-rw-r--r-- | vpx_dsp/x86/quantize_avx2.c | 4 | ||||
-rw-r--r-- | vpx_dsp/x86/quantize_ssse3.c | 4 | ||||
-rw-r--r-- | vpx_dsp/x86/sad4d_avx2.c | 60 |
11 files changed, 105 insertions, 103 deletions
diff --git a/test/vp9_quantize_test.cc b/test/vp9_quantize_test.cc index 84a5a58e4..5e3a7c270 100644 --- a/test/vp9_quantize_test.cc +++ b/test/vp9_quantize_test.cc @@ -39,10 +39,10 @@ namespace { const int number_of_iterations = 100; typedef void (*QuantizeFunc)(const tran_low_t *coeff, intptr_t count, - const macroblock_plane *const mb_plane, + const macroblock_plane *mb_plane, tran_low_t *qcoeff, tran_low_t *dqcoeff, const int16_t *dequant, uint16_t *eob, - const struct ScanOrder *const scan_order); + const struct ScanOrder *scan_order); typedef std::tuple<QuantizeFunc, QuantizeFunc, vpx_bit_depth_t, int /*max_size*/, bool /*is_fp*/> QuantizeParam; diff --git a/vpx_dsp/arm/highbd_avg_neon.c b/vpx_dsp/arm/highbd_avg_neon.c index 8939ee131..4265596c8 100644 --- a/vpx_dsp/arm/highbd_avg_neon.c +++ b/vpx_dsp/arm/highbd_avg_neon.c @@ -16,18 +16,18 @@ #include "vpx_dsp/arm/mem_neon.h" #include "vpx_dsp/arm/sum_neon.h" -uint32_t vpx_highbd_avg_4x4_neon(const uint8_t *a, int a_stride) { - const uint16_t *a_ptr = CONVERT_TO_SHORTPTR(a); - const uint16x8_t a0 = load_unaligned_u16q(a_ptr + 0 * a_stride, a_stride); - const uint16x8_t a1 = load_unaligned_u16q(a_ptr + 2 * a_stride, a_stride); +uint32_t vpx_highbd_avg_4x4_neon(const uint8_t *s8, int p) { + const uint16_t *a_ptr = CONVERT_TO_SHORTPTR(s8); + const uint16x8_t a0 = load_unaligned_u16q(a_ptr + 0 * p, p); + const uint16x8_t a1 = load_unaligned_u16q(a_ptr + 2 * p, p); return (horizontal_add_uint16x8(vaddq_u16(a0, a1)) + (1 << 3)) >> 4; } -uint32_t vpx_highbd_avg_8x8_neon(const uint8_t *a, int a_stride) { - const uint16_t *a_ptr = CONVERT_TO_SHORTPTR(a); +uint32_t vpx_highbd_avg_8x8_neon(const uint8_t *s8, int p) { + const uint16_t *a_ptr = CONVERT_TO_SHORTPTR(s8); uint16x8_t sum, a0, a1, a2, a3, a4, a5, a6, a7; - load_u16_8x8(a_ptr, a_stride, &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7); + load_u16_8x8(a_ptr, p, &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7); sum = vaddq_u16(a0, a1); sum = vaddq_u16(sum, a2); @@ -63,29 +63,28 @@ int vpx_highbd_satd_neon(const tran_low_t *coeff, int length) { return (int)horizontal_add_int64x2(vaddq_s64(sum_s64[0], sum_s64[1])); } -void vpx_highbd_minmax_8x8_neon(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, int *min, - int *max) { - const uint16_t *a_ptr = CONVERT_TO_SHORTPTR(a); - const uint16_t *b_ptr = CONVERT_TO_SHORTPTR(b); - - const uint16x8_t a0 = vld1q_u16(a_ptr + 0 * a_stride); - const uint16x8_t a1 = vld1q_u16(a_ptr + 1 * a_stride); - const uint16x8_t a2 = vld1q_u16(a_ptr + 2 * a_stride); - const uint16x8_t a3 = vld1q_u16(a_ptr + 3 * a_stride); - const uint16x8_t a4 = vld1q_u16(a_ptr + 4 * a_stride); - const uint16x8_t a5 = vld1q_u16(a_ptr + 5 * a_stride); - const uint16x8_t a6 = vld1q_u16(a_ptr + 6 * a_stride); - const uint16x8_t a7 = vld1q_u16(a_ptr + 7 * a_stride); - - const uint16x8_t b0 = vld1q_u16(b_ptr + 0 * b_stride); - const uint16x8_t b1 = vld1q_u16(b_ptr + 1 * b_stride); - const uint16x8_t b2 = vld1q_u16(b_ptr + 2 * b_stride); - const uint16x8_t b3 = vld1q_u16(b_ptr + 3 * b_stride); - const uint16x8_t b4 = vld1q_u16(b_ptr + 4 * b_stride); - const uint16x8_t b5 = vld1q_u16(b_ptr + 5 * b_stride); - const uint16x8_t b6 = vld1q_u16(b_ptr + 6 * b_stride); - const uint16x8_t b7 = vld1q_u16(b_ptr + 7 * b_stride); +void vpx_highbd_minmax_8x8_neon(const uint8_t *s8, int p, const uint8_t *d8, + int dp, int *min, int *max) { + const uint16_t *a_ptr = CONVERT_TO_SHORTPTR(s8); + const uint16_t *b_ptr = CONVERT_TO_SHORTPTR(d8); + + const uint16x8_t a0 = vld1q_u16(a_ptr + 0 * p); + const uint16x8_t a1 = vld1q_u16(a_ptr + 1 * p); + const uint16x8_t a2 = vld1q_u16(a_ptr + 2 * p); + const uint16x8_t a3 = vld1q_u16(a_ptr + 3 * p); + const uint16x8_t a4 = vld1q_u16(a_ptr + 4 * p); + const uint16x8_t a5 = vld1q_u16(a_ptr + 5 * p); + const uint16x8_t a6 = vld1q_u16(a_ptr + 6 * p); + const uint16x8_t a7 = vld1q_u16(a_ptr + 7 * p); + + const uint16x8_t b0 = vld1q_u16(b_ptr + 0 * dp); + const uint16x8_t b1 = vld1q_u16(b_ptr + 1 * dp); + const uint16x8_t b2 = vld1q_u16(b_ptr + 2 * dp); + const uint16x8_t b3 = vld1q_u16(b_ptr + 3 * dp); + const uint16x8_t b4 = vld1q_u16(b_ptr + 4 * dp); + const uint16x8_t b5 = vld1q_u16(b_ptr + 5 * dp); + const uint16x8_t b6 = vld1q_u16(b_ptr + 6 * dp); + const uint16x8_t b7 = vld1q_u16(b_ptr + 7 * dp); const uint16x8_t abs_diff0 = vabdq_u16(a0, b0); const uint16x8_t abs_diff1 = vabdq_u16(a1, b1); diff --git a/vpx_dsp/arm/highbd_sad4d_neon.c b/vpx_dsp/arm/highbd_sad4d_neon.c index 62c4685a7..a6684b053 100644 --- a/vpx_dsp/arm/highbd_sad4d_neon.c +++ b/vpx_dsp/arm/highbd_sad4d_neon.c @@ -213,10 +213,11 @@ static INLINE void highbd_sad32xhx4d_neon(const uint8_t *src_ptr, } #define HBD_SAD_WXH_4D_NEON(w, h) \ - void vpx_highbd_sad##w##x##h##x4d_neon(const uint8_t *src, int src_stride, \ - const uint8_t *const ref[4], \ - int ref_stride, uint32_t res[4]) { \ - highbd_sad##w##xhx4d_neon(src, src_stride, ref, ref_stride, res, (h)); \ + void vpx_highbd_sad##w##x##h##x4d_neon( \ + const uint8_t *src, int src_stride, const uint8_t *const ref_array[4], \ + int ref_stride, uint32_t sad_array[4]) { \ + highbd_sad##w##xhx4d_neon(src, src_stride, ref_array, ref_stride, \ + sad_array, (h)); \ } HBD_SAD_WXH_4D_NEON(4, 4) @@ -239,16 +240,16 @@ HBD_SAD_WXH_4D_NEON(64, 64) #undef HBD_SAD_WXH_4D_NEON -#define HBD_SAD_SKIP_WXH_4D_NEON(w, h) \ - void vpx_highbd_sad_skip_##w##x##h##x4d_neon( \ - const uint8_t *src, int src_stride, const uint8_t *const ref[4], \ - int ref_stride, uint32_t res[4]) { \ - highbd_sad##w##xhx4d_neon(src, 2 * src_stride, ref, 2 * ref_stride, res, \ - ((h) >> 1)); \ - res[0] <<= 1; \ - res[1] <<= 1; \ - res[2] <<= 1; \ - res[3] <<= 1; \ +#define HBD_SAD_SKIP_WXH_4D_NEON(w, h) \ + void vpx_highbd_sad_skip_##w##x##h##x4d_neon( \ + const uint8_t *src, int src_stride, const uint8_t *const ref_array[4], \ + int ref_stride, uint32_t sad_array[4]) { \ + highbd_sad##w##xhx4d_neon(src, 2 * src_stride, ref_array, 2 * ref_stride, \ + sad_array, ((h) >> 1)); \ + sad_array[0] <<= 1; \ + sad_array[1] <<= 1; \ + sad_array[2] <<= 1; \ + sad_array[3] <<= 1; \ } HBD_SAD_SKIP_WXH_4D_NEON(4, 4) diff --git a/vpx_dsp/arm/quantize_neon.c b/vpx_dsp/arm/quantize_neon.c index 35c67f607..5a7606554 100644 --- a/vpx_dsp/arm/quantize_neon.c +++ b/vpx_dsp/arm/quantize_neon.c @@ -216,10 +216,10 @@ quantize_b_32x32_neon(const tran_low_t *coeff_ptr, tran_low_t *qcoeff_ptr, // Main difference is that zbin values are halved before comparison and dqcoeff // values are divided by 2. zbin is rounded but dqcoeff is not. void vpx_quantize_b_32x32_neon(const tran_low_t *coeff_ptr, - const struct macroblock_plane *const mb_plane, + const struct macroblock_plane *mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, - const struct ScanOrder *const scan_order) { + const struct ScanOrder *scan_order) { const int16x8_t neg_one = vdupq_n_s16(-1); uint16x8_t eob_max; int i; diff --git a/vpx_dsp/arm/sad4d_neon.c b/vpx_dsp/arm/sad4d_neon.c index 44cd99028..3a548d0f9 100644 --- a/vpx_dsp/arm/sad4d_neon.c +++ b/vpx_dsp/arm/sad4d_neon.c @@ -282,11 +282,12 @@ static INLINE void sad4xhx4d_neon(const uint8_t *src, int src_stride, vst1q_u32(res, horizontal_add_4d_uint16x8(sum)); } -#define SAD_WXH_4D_NEON(w, h) \ - void vpx_sad##w##x##h##x4d_neon(const uint8_t *src, int src_stride, \ - const uint8_t *const ref[4], int ref_stride, \ - uint32_t res[4]) { \ - sad##w##xhx4d_neon(src, src_stride, ref, ref_stride, res, (h)); \ +#define SAD_WXH_4D_NEON(w, h) \ + void vpx_sad##w##x##h##x4d_neon(const uint8_t *src_ptr, int src_stride, \ + const uint8_t *const ref_array[4], \ + int ref_stride, uint32_t sad_array[4]) { \ + sad##w##xhx4d_neon(src_ptr, src_stride, ref_array, ref_stride, sad_array, \ + (h)); \ } SAD_WXH_4D_NEON(4, 4) @@ -309,16 +310,17 @@ SAD_WXH_4D_NEON(64, 64) #undef SAD_WXH_4D_NEON -#define SAD_SKIP_WXH_4D_NEON(w, h) \ - void vpx_sad_skip_##w##x##h##x4d_neon(const uint8_t *src, int src_stride, \ - const uint8_t *const ref[4], \ - int ref_stride, uint32_t res[4]) { \ - sad##w##xhx4d_neon(src, 2 * src_stride, ref, 2 * ref_stride, res, \ - ((h) >> 1)); \ - res[0] <<= 1; \ - res[1] <<= 1; \ - res[2] <<= 1; \ - res[3] <<= 1; \ +#define SAD_SKIP_WXH_4D_NEON(w, h) \ + void vpx_sad_skip_##w##x##h##x4d_neon( \ + const uint8_t *src_ptr, int src_stride, \ + const uint8_t *const ref_array[4], int ref_stride, \ + uint32_t sad_array[4]) { \ + sad##w##xhx4d_neon(src_ptr, 2 * src_stride, ref_array, 2 * ref_stride, \ + sad_array, ((h) >> 1)); \ + sad_array[0] <<= 1; \ + sad_array[1] <<= 1; \ + sad_array[2] <<= 1; \ + sad_array[3] <<= 1; \ } SAD_SKIP_WXH_4D_NEON(4, 4) diff --git a/vpx_dsp/quantize.c b/vpx_dsp/quantize.c index d44ced20d..7dff8c7a8 100644 --- a/vpx_dsp/quantize.c +++ b/vpx_dsp/quantize.c @@ -211,10 +211,10 @@ void vpx_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, #endif void vpx_quantize_b_32x32_c(const tran_low_t *coeff_ptr, - const struct macroblock_plane *const mb_plane, + const struct macroblock_plane *mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, - const struct ScanOrder *const scan_order) { + const struct ScanOrder *scan_order) { const int n_coeffs = 32 * 32; const int zbins[2] = { ROUND_POWER_OF_TWO(mb_plane->zbin[0], 1), ROUND_POWER_OF_TWO(mb_plane->zbin[1], 1) }; diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index bde011529..494d7ba5e 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -725,14 +725,14 @@ if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") { add_proto qw/void vpx_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vpx_quantize_b neon sse2 ssse3 avx avx2 vsx lsx/; - add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order"; + add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order"; specialize qw/vpx_quantize_b_32x32 neon ssse3 avx avx2 vsx lsx/; if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vpx_highbd_quantize_b neon sse2 avx2/; - add_proto qw/void vpx_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order"; + add_proto qw/void vpx_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order"; specialize qw/vpx_highbd_quantize_b_32x32 neon sse2 avx2/; } # CONFIG_VP9_HIGHBITDEPTH } # CONFIG_VP9_ENCODER diff --git a/vpx_dsp/x86/quantize_avx.c b/vpx_dsp/x86/quantize_avx.c index d289bf6eb..6837a5cf2 100644 --- a/vpx_dsp/x86/quantize_avx.c +++ b/vpx_dsp/x86/quantize_avx.c @@ -143,10 +143,10 @@ void vpx_quantize_b_avx(const tran_low_t *coeff_ptr, intptr_t n_coeffs, } void vpx_quantize_b_32x32_avx(const tran_low_t *coeff_ptr, - const struct macroblock_plane *const mb_plane, + const struct macroblock_plane *mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, - const struct ScanOrder *const scan_order) { + const struct ScanOrder *scan_order) { const __m128i zero = _mm_setzero_si128(); const __m256i big_zero = _mm256_setzero_si256(); int index; diff --git a/vpx_dsp/x86/quantize_avx2.c b/vpx_dsp/x86/quantize_avx2.c index 5421dcf0b..3d97b3fda 100644 --- a/vpx_dsp/x86/quantize_avx2.c +++ b/vpx_dsp/x86/quantize_avx2.c @@ -253,10 +253,10 @@ static VPX_FORCE_INLINE __m256i quantize_b_32x32_16( } void vpx_quantize_b_32x32_avx2(const tran_low_t *coeff_ptr, - const struct macroblock_plane *const mb_plane, + const struct macroblock_plane *mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, - const struct ScanOrder *const scan_order) { + const struct ScanOrder *scan_order) { __m256i v_zbin, v_round, v_quant, v_dequant, v_quant_shift; __m256i v_eobmax = _mm256_setzero_si256(); intptr_t count; diff --git a/vpx_dsp/x86/quantize_ssse3.c b/vpx_dsp/x86/quantize_ssse3.c index 556f4ca61..641f23298 100644 --- a/vpx_dsp/x86/quantize_ssse3.c +++ b/vpx_dsp/x86/quantize_ssse3.c @@ -110,10 +110,10 @@ void vpx_quantize_b_ssse3(const tran_low_t *coeff_ptr, intptr_t n_coeffs, } void vpx_quantize_b_32x32_ssse3(const tran_low_t *coeff_ptr, - const struct macroblock_plane *const mb_plane, + const struct macroblock_plane *mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, - const struct ScanOrder *const scan_order) { + const struct ScanOrder *scan_order) { const __m128i zero = _mm_setzero_si128(); int index; const int16_t *iscan = scan_order->iscan; diff --git a/vpx_dsp/x86/sad4d_avx2.c b/vpx_dsp/x86/sad4d_avx2.c index c87fd3cd2..cf7111983 100644 --- a/vpx_dsp/x86/sad4d_avx2.c +++ b/vpx_dsp/x86/sad4d_avx2.c @@ -135,45 +135,45 @@ static INLINE void sad64xhx4d_avx2(const uint8_t *src_ptr, int src_stride, calc_final_4(sums, sad_array); } -#define SAD64_H(h) \ - void vpx_sad64x##h##x4d_avx2(const uint8_t *src, int src_stride, \ - const uint8_t *const ref[4], int ref_stride, \ - uint32_t res[4]) { \ - sad64xhx4d_avx2(src, src_stride, ref, ref_stride, h, res); \ +#define SAD64_H(h) \ + void vpx_sad64x##h##x4d_avx2(const uint8_t *src, int src_stride, \ + const uint8_t *const ref_array[4], \ + int ref_stride, uint32_t sad_array[4]) { \ + sad64xhx4d_avx2(src, src_stride, ref_array, ref_stride, h, sad_array); \ } -#define SAD32_H(h) \ - void vpx_sad32x##h##x4d_avx2(const uint8_t *src, int src_stride, \ - const uint8_t *const ref[4], int ref_stride, \ - uint32_t res[4]) { \ - sad32xhx4d_avx2(src, src_stride, ref, ref_stride, h, res); \ +#define SAD32_H(h) \ + void vpx_sad32x##h##x4d_avx2(const uint8_t *src, int src_stride, \ + const uint8_t *const ref_array[4], \ + int ref_stride, uint32_t sad_array[4]) { \ + sad32xhx4d_avx2(src, src_stride, ref_array, ref_stride, h, sad_array); \ } SAD64_H(64) SAD32_H(32) -#define SADS64_H(h) \ - void vpx_sad_skip_64x##h##x4d_avx2(const uint8_t *src, int src_stride, \ - const uint8_t *const ref[4], \ - int ref_stride, uint32_t res[4]) { \ - sad64xhx4d_avx2(src, 2 * src_stride, ref, 2 * ref_stride, ((h) >> 1), \ - res); \ - res[0] <<= 1; \ - res[1] <<= 1; \ - res[2] <<= 1; \ - res[3] <<= 1; \ +#define SADS64_H(h) \ + void vpx_sad_skip_64x##h##x4d_avx2(const uint8_t *src, int src_stride, \ + const uint8_t *const ref_array[4], \ + int ref_stride, uint32_t sad_array[4]) { \ + sad64xhx4d_avx2(src, 2 * src_stride, ref_array, 2 * ref_stride, \ + ((h) >> 1), sad_array); \ + sad_array[0] <<= 1; \ + sad_array[1] <<= 1; \ + sad_array[2] <<= 1; \ + sad_array[3] <<= 1; \ } -#define SADS32_H(h) \ - void vpx_sad_skip_32x##h##x4d_avx2(const uint8_t *src, int src_stride, \ - const uint8_t *const ref[4], \ - int ref_stride, uint32_t res[4]) { \ - sad32xhx4d_avx2(src, 2 * src_stride, ref, 2 * ref_stride, ((h) >> 1), \ - res); \ - res[0] <<= 1; \ - res[1] <<= 1; \ - res[2] <<= 1; \ - res[3] <<= 1; \ +#define SADS32_H(h) \ + void vpx_sad_skip_32x##h##x4d_avx2(const uint8_t *src, int src_stride, \ + const uint8_t *const ref_array[4], \ + int ref_stride, uint32_t sad_array[4]) { \ + sad32xhx4d_avx2(src, 2 * src_stride, ref_array, 2 * ref_stride, \ + ((h) >> 1), sad_array); \ + sad_array[0] <<= 1; \ + sad_array[1] <<= 1; \ + sad_array[2] <<= 1; \ + sad_array[3] <<= 1; \ } SADS64_H(64) |