summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Zern <jzern@google.com>2023-05-04 02:16:12 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2023-05-04 02:16:12 +0000
commit4dd3afc00eb0c9d74dfa18bb8e727dbfb98e92ff (patch)
tree5481e8458ee8887ea59a144f62a65396ce54d2c9
parent69d5d16552d8b1b1e43f0ff4f1e6f79ebaa2a373 (diff)
parent57b9afa58f849a8165ce3132c21087ae451d862c (diff)
downloadlibvpx-4dd3afc00eb0c9d74dfa18bb8e727dbfb98e92ff.tar.gz
Merge changes I4d26f5f8,I12e25710 into main
* changes: s/__aarch64__/VPX_ARCH_AARCH64/ configure: add aarch64 to ARCH_LIST
-rw-r--r--build/make/configure.sh4
-rwxr-xr-xconfigure1
-rw-r--r--vp8/encoder/arm/neon/fastquantizeb_neon.c8
-rw-r--r--vp9/encoder/arm/neon/vp9_denoiser_neon.c2
-rw-r--r--vp9/encoder/arm/neon/vp9_diamond_search_sad_neon.c14
-rw-r--r--vp9/encoder/arm/neon/vp9_quantize_neon.c6
-rw-r--r--vpx_dsp/arm/avg_neon.c2
-rw-r--r--vpx_dsp/arm/highbd_avg_neon.c2
-rw-r--r--vpx_dsp/arm/highbd_quantize_neon.c8
-rw-r--r--vpx_dsp/arm/quantize_neon.c8
-rw-r--r--vpx_dsp/arm/sum_neon.h34
-rw-r--r--vpx_dsp/arm/transpose_neon.h10
-rw-r--r--vpx_dsp/arm/vpx_convolve8_neon.c6
-rw-r--r--vpx_dsp/arm/vpx_convolve8_neon.h8
14 files changed, 59 insertions, 54 deletions
diff --git a/build/make/configure.sh b/build/make/configure.sh
index 32105651f..ec9af5e63 100644
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -842,6 +842,10 @@ process_common_toolchain() {
# Enable the architecture family
case ${tgt_isa} in
+ arm64 | armv8)
+ enable_feature arm
+ enable_feature aarch64
+ ;;
arm*)
enable_feature arm
;;
diff --git a/configure b/configure
index 890ad3968..20707727e 100755
--- a/configure
+++ b/configure
@@ -243,6 +243,7 @@ CODEC_FAMILIES="
ARCH_LIST="
arm
+ aarch64
mips
x86
x86_64
diff --git a/vp8/encoder/arm/neon/fastquantizeb_neon.c b/vp8/encoder/arm/neon/fastquantizeb_neon.c
index 6fc60805f..950c94334 100644
--- a/vp8/encoder/arm/neon/fastquantizeb_neon.c
+++ b/vp8/encoder/arm/neon/fastquantizeb_neon.c
@@ -28,11 +28,11 @@ void vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d) {
zig_zag1 = vld1q_u16(inv_zig_zag + 8);
int16x8_t x0, x1, sz0, sz1, y0, y1;
uint16x8_t eob0, eob1;
-#ifndef __aarch64__
+#if !VPX_ARCH_AARCH64
uint16x4_t eob_d16;
uint32x2_t eob_d32;
uint32x4_t eob_q32;
-#endif // __arch64__
+#endif // !VPX_ARCH_AARCH64
/* sign of z: z >> 15 */
sz0 = vshrq_n_s16(z0, 15);
@@ -70,7 +70,7 @@ void vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d) {
/* select the largest value */
eob0 = vmaxq_u16(eob0, eob1);
-#ifdef __aarch64__
+#if VPX_ARCH_AARCH64
*d->eob = (int8_t)vmaxvq_u16(eob0);
#else
eob_d16 = vmax_u16(vget_low_u16(eob0), vget_high_u16(eob0));
@@ -79,7 +79,7 @@ void vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d) {
eob_d32 = vpmax_u32(eob_d32, eob_d32);
vst1_lane_s8((int8_t *)d->eob, vreinterpret_s8_u32(eob_d32), 0);
-#endif // __aarch64__
+#endif // VPX_ARCH_AARCH64
/* qcoeff = x */
vst1q_s16(d->qcoeff, x0);
diff --git a/vp9/encoder/arm/neon/vp9_denoiser_neon.c b/vp9/encoder/arm/neon/vp9_denoiser_neon.c
index 53e8c7e49..d631cd437 100644
--- a/vp9/encoder/arm/neon/vp9_denoiser_neon.c
+++ b/vp9/encoder/arm/neon/vp9_denoiser_neon.c
@@ -21,7 +21,7 @@
// Compute the sum of all pixel differences of this MB.
static INLINE int horizontal_add_s8x16(const int8x16_t v_sum_diff_total) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
return vaddlvq_s8(v_sum_diff_total);
#else
const int16x8_t fe_dc_ba_98_76_54_32_10 = vpaddlq_s8(v_sum_diff_total);
diff --git a/vp9/encoder/arm/neon/vp9_diamond_search_sad_neon.c b/vp9/encoder/arm/neon/vp9_diamond_search_sad_neon.c
index 255e6fbc4..b82b3f9db 100644
--- a/vp9/encoder/arm/neon/vp9_diamond_search_sad_neon.c
+++ b/vp9/encoder/arm/neon/vp9_diamond_search_sad_neon.c
@@ -94,7 +94,7 @@ int vp9_diamond_search_sad_neon(const MACROBLOCK *x,
// Work out the start point for the search
const uint8_t *best_address = in_what;
const uint8_t *new_best_address = best_address;
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
int64x2_t v_ba_q = vdupq_n_s64((intptr_t)best_address);
#else
int32x4_t v_ba_d = vdupq_n_s32((intptr_t)best_address);
@@ -117,7 +117,7 @@ int vp9_diamond_search_sad_neon(const MACROBLOCK *x,
int8x16_t v_inside_d;
uint32x4_t v_outside_d;
int32x4_t v_cost_d, v_sad_d;
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
int64x2_t v_blocka[2];
#else
int32x4_t v_blocka[1];
@@ -138,7 +138,7 @@ int vp9_diamond_search_sad_neon(const MACROBLOCK *x,
vreinterpretq_s32_s16(v_these_mv_w)));
// If none of them are inside, then move on
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
horiz_max = vmaxvq_u32(vreinterpretq_u32_s8(v_inside_d));
#else
horiz_max_0 = vmax_u32(vget_low_u32(vreinterpretq_u32_s8(v_inside_d)),
@@ -167,7 +167,7 @@ int vp9_diamond_search_sad_neon(const MACROBLOCK *x,
// Compute the SIMD pointer offsets.
{
-#if defined(__aarch64__) // sizeof(intptr_t) == 8
+#if VPX_ARCH_AARCH64 // sizeof(intptr_t) == 8
// Load the offsets
int64x2_t v_bo10_q = vld1q_s64((const int64_t *)&ss_os[i + 0]);
int64x2_t v_bo32_q = vld1q_s64((const int64_t *)&ss_os[i + 2]);
@@ -234,7 +234,7 @@ int vp9_diamond_search_sad_neon(const MACROBLOCK *x,
// Find the minimum value and index horizontally in v_sad_d
{
uint32_t local_best_sad;
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
local_best_sad = vminvq_u32(vreinterpretq_u32_s32(v_sad_d));
#else
uint32x2_t horiz_min_0 =
@@ -256,7 +256,7 @@ int vp9_diamond_search_sad_neon(const MACROBLOCK *x,
uint32x4_t v_mask_d = vandq_u32(v_sel_d, v_idx_d);
v_mask_d = vbslq_u32(v_sel_d, v_mask_d, vdupq_n_u32(0xffffffff));
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
local_best_idx = vminvq_u32(v_mask_d);
#else
horiz_min_0 =
@@ -280,7 +280,7 @@ int vp9_diamond_search_sad_neon(const MACROBLOCK *x,
best_address = new_best_address;
v_bmv_w = vreinterpretq_s16_s32(vdupq_n_s32(bmv.as_int));
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
v_ba_q = vdupq_n_s64((intptr_t)best_address);
#else
v_ba_d = vdupq_n_s32((intptr_t)best_address);
diff --git a/vp9/encoder/arm/neon/vp9_quantize_neon.c b/vp9/encoder/arm/neon/vp9_quantize_neon.c
index c2b55fcba..97ab13628 100644
--- a/vp9/encoder/arm/neon/vp9_quantize_neon.c
+++ b/vp9/encoder/arm/neon/vp9_quantize_neon.c
@@ -50,7 +50,7 @@ static VPX_FORCE_INLINE int16x8_t get_max_lane_eob(const int16_t *iscan_ptr,
}
static VPX_FORCE_INLINE uint16_t get_max_eob(int16x8_t v_eobmax) {
-#ifdef __aarch64__
+#if VPX_ARCH_AARCH64
return (uint16_t)vmaxvq_s16(v_eobmax);
#else
const int16x4_t v_eobmax_3210 =
@@ -65,7 +65,7 @@ static VPX_FORCE_INLINE uint16_t get_max_eob(int16x8_t v_eobmax) {
vmax_s16(v_eobmax_tmp, vreinterpret_s16_s64(v_eobmax_xxx3));
return (uint16_t)vget_lane_s16(v_eobmax_final, 0);
-#endif // __aarch64__
+#endif // VPX_ARCH_AARCH64
}
static VPX_FORCE_INLINE void load_fp_values(const int16_t *round_ptr,
@@ -81,7 +81,7 @@ static VPX_FORCE_INLINE void load_fp_values(const int16_t *round_ptr,
static VPX_FORCE_INLINE void update_fp_values(int16x8_t *v_round,
int16x8_t *v_quant,
int16x8_t *v_dequant) {
-#ifdef __aarch64__
+#if VPX_ARCH_AARCH64
*v_round = vdupq_laneq_s16(*v_round, 1);
*v_quant = vdupq_laneq_s16(*v_quant, 1);
*v_dequant = vdupq_laneq_s16(*v_dequant, 1);
diff --git a/vpx_dsp/arm/avg_neon.c b/vpx_dsp/arm/avg_neon.c
index d48115dd0..8c61fc26f 100644
--- a/vpx_dsp/arm/avg_neon.c
+++ b/vpx_dsp/arm/avg_neon.c
@@ -210,7 +210,7 @@ void vpx_minmax_8x8_neon(const uint8_t *a, int a_stride, const uint8_t *b,
const uint8x16_t ab07_max = vmaxq_u8(ab0123_max, ab4567_max);
const uint8x16_t ab07_min = vminq_u8(ab0123_min, ab4567_min);
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
*min = *max = 0; // Clear high bits
*((uint8_t *)max) = vmaxvq_u8(ab07_max);
*((uint8_t *)min) = vminvq_u8(ab07_min);
diff --git a/vpx_dsp/arm/highbd_avg_neon.c b/vpx_dsp/arm/highbd_avg_neon.c
index fc10197d7..8939ee131 100644
--- a/vpx_dsp/arm/highbd_avg_neon.c
+++ b/vpx_dsp/arm/highbd_avg_neon.c
@@ -114,7 +114,7 @@ void vpx_highbd_minmax_8x8_neon(const uint8_t *a, int a_stride,
const uint16x8_t min4567 = vminq_u16(min45, min67);
const uint16x8_t min07 = vminq_u16(min0123, min4567);
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
*min = *max = 0; // Clear high bits
*((uint16_t *)max) = vmaxvq_u16(max07);
*((uint16_t *)min) = vminvq_u16(min07);
diff --git a/vpx_dsp/arm/highbd_quantize_neon.c b/vpx_dsp/arm/highbd_quantize_neon.c
index 526447acf..d2a7add60 100644
--- a/vpx_dsp/arm/highbd_quantize_neon.c
+++ b/vpx_dsp/arm/highbd_quantize_neon.c
@@ -166,7 +166,7 @@ void vpx_highbd_quantize_b_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
} while (n_coeffs > 0);
}
-#ifdef __aarch64__
+#if VPX_ARCH_AARCH64
*eob_ptr = vmaxvq_u16(eob_max);
#else
{
@@ -176,7 +176,7 @@ void vpx_highbd_quantize_b_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const uint16x4_t eob_max_2 = vpmax_u16(eob_max_1, eob_max_1);
vst1_lane_u16(eob_ptr, eob_max_2, 0);
}
-#endif // __aarch64__
+#endif // VPX_ARCH_AARCH64
// Need these here, else the compiler complains about mixing declarations and
// code in C90
(void)n_coeffs;
@@ -291,7 +291,7 @@ void vpx_highbd_quantize_b_32x32_neon(
}
}
-#ifdef __aarch64__
+#if VPX_ARCH_AARCH64
*eob_ptr = vmaxvq_u16(eob_max);
#else
{
@@ -301,5 +301,5 @@ void vpx_highbd_quantize_b_32x32_neon(
const uint16x4_t eob_max_2 = vpmax_u16(eob_max_1, eob_max_1);
vst1_lane_u16(eob_ptr, eob_max_2, 0);
}
-#endif // __aarch64__
+#endif // VPX_ARCH_AARCH64
}
diff --git a/vpx_dsp/arm/quantize_neon.c b/vpx_dsp/arm/quantize_neon.c
index cc8f62374..35c67f607 100644
--- a/vpx_dsp/arm/quantize_neon.c
+++ b/vpx_dsp/arm/quantize_neon.c
@@ -134,7 +134,7 @@ void vpx_quantize_b_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
} while (n_coeffs > 0);
}
-#ifdef __aarch64__
+#if VPX_ARCH_AARCH64
*eob_ptr = vmaxvq_u16(eob_max);
#else
{
@@ -144,7 +144,7 @@ void vpx_quantize_b_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const uint16x4_t eob_max_2 = vpmax_u16(eob_max_1, eob_max_1);
vst1_lane_u16(eob_ptr, eob_max_2, 0);
}
-#endif // __aarch64__
+#endif // VPX_ARCH_AARCH64
// Need these here, else the compiler complains about mixing declarations and
// code in C90
(void)scan;
@@ -276,7 +276,7 @@ void vpx_quantize_b_32x32_neon(const tran_low_t *coeff_ptr,
}
}
-#ifdef __aarch64__
+#if VPX_ARCH_AARCH64
*eob_ptr = vmaxvq_u16(eob_max);
#else
{
@@ -286,5 +286,5 @@ void vpx_quantize_b_32x32_neon(const tran_low_t *coeff_ptr,
const uint16x4_t eob_max_2 = vpmax_u16(eob_max_1, eob_max_1);
vst1_lane_u16(eob_ptr, eob_max_2, 0);
}
-#endif // __aarch64__
+#endif // VPX_ARCH_AARCH64
}
diff --git a/vpx_dsp/arm/sum_neon.h b/vpx_dsp/arm/sum_neon.h
index a0c72f92c..48a2fc05c 100644
--- a/vpx_dsp/arm/sum_neon.h
+++ b/vpx_dsp/arm/sum_neon.h
@@ -17,7 +17,7 @@
#include "vpx/vpx_integer.h"
static INLINE uint16_t horizontal_add_uint8x4(const uint8x8_t a) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
return vaddlv_u8(a);
#else
const uint16x4_t b = vpaddl_u8(a);
@@ -27,7 +27,7 @@ static INLINE uint16_t horizontal_add_uint8x4(const uint8x8_t a) {
}
static INLINE uint16_t horizontal_add_uint8x8(const uint8x8_t a) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
return vaddlv_u8(a);
#else
const uint16x4_t b = vpaddl_u8(a);
@@ -38,7 +38,7 @@ static INLINE uint16_t horizontal_add_uint8x8(const uint8x8_t a) {
}
static INLINE uint16_t horizontal_add_uint8x16(const uint8x16_t a) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
return vaddlvq_u8(a);
#else
const uint16x8_t b = vpaddlq_u8(a);
@@ -50,7 +50,7 @@ static INLINE uint16_t horizontal_add_uint8x16(const uint8x16_t a) {
}
static INLINE uint16_t horizontal_add_uint16x4(const uint16x4_t a) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
return vaddv_u16(a);
#else
const uint16x4_t b = vpadd_u16(a, a);
@@ -60,7 +60,7 @@ static INLINE uint16_t horizontal_add_uint16x4(const uint16x4_t a) {
}
static INLINE int32_t horizontal_add_int16x8(const int16x8_t a) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
return vaddlvq_s16(a);
#else
const int32x4_t b = vpaddlq_s16(a);
@@ -72,7 +72,7 @@ static INLINE int32_t horizontal_add_int16x8(const int16x8_t a) {
}
static INLINE uint32_t horizontal_add_uint16x8(const uint16x8_t a) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
return vaddlvq_u16(a);
#else
const uint32x4_t b = vpaddlq_u16(a);
@@ -84,7 +84,7 @@ static INLINE uint32_t horizontal_add_uint16x8(const uint16x8_t a) {
}
static INLINE uint32x4_t horizontal_add_4d_uint16x8(const uint16x8_t sum[4]) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
const uint16x8_t a0 = vpaddq_u16(sum[0], sum[1]);
const uint16x8_t a1 = vpaddq_u16(sum[2], sum[3]);
const uint16x8_t b0 = vpaddq_u16(a0, a1);
@@ -102,7 +102,7 @@ static INLINE uint32x4_t horizontal_add_4d_uint16x8(const uint16x8_t sum[4]) {
static INLINE uint32_t horizontal_long_add_uint16x8(const uint16x8_t vec_lo,
const uint16x8_t vec_hi) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
return vaddlvq_u16(vec_lo) + vaddlvq_u16(vec_hi);
#else
const uint32x4_t vec_l_lo =
@@ -127,7 +127,7 @@ static INLINE uint32x4_t horizontal_long_add_4d_uint16x8(
const uint32x4_t b1 = vpadalq_u16(a1, sum_hi[1]);
const uint32x4_t b2 = vpadalq_u16(a2, sum_hi[2]);
const uint32x4_t b3 = vpadalq_u16(a3, sum_hi[3]);
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
const uint32x4_t c0 = vpaddq_u32(b0, b1);
const uint32x4_t c1 = vpaddq_u32(b2, b3);
return vpaddq_u32(c0, c1);
@@ -143,7 +143,7 @@ static INLINE uint32x4_t horizontal_long_add_4d_uint16x8(
}
static INLINE int32_t horizontal_add_int32x2(const int32x2_t a) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
return vaddv_s32(a);
#else
return vget_lane_s32(a, 0) + vget_lane_s32(a, 1);
@@ -151,7 +151,7 @@ static INLINE int32_t horizontal_add_int32x2(const int32x2_t a) {
}
static INLINE uint32_t horizontal_add_uint32x2(const uint32x2_t a) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
return vaddv_u32(a);
#else
return vget_lane_u32(a, 0) + vget_lane_u32(a, 1);
@@ -159,7 +159,7 @@ static INLINE uint32_t horizontal_add_uint32x2(const uint32x2_t a) {
}
static INLINE int32_t horizontal_add_int32x4(const int32x4_t a) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
return vaddvq_s32(a);
#else
const int64x2_t b = vpaddlq_s32(a);
@@ -170,7 +170,7 @@ static INLINE int32_t horizontal_add_int32x4(const int32x4_t a) {
}
static INLINE uint32_t horizontal_add_uint32x4(const uint32x4_t a) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
return vaddvq_u32(a);
#else
const uint64x2_t b = vpaddlq_u32(a);
@@ -181,7 +181,7 @@ static INLINE uint32_t horizontal_add_uint32x4(const uint32x4_t a) {
}
static INLINE uint32x4_t horizontal_add_4d_uint32x4(const uint32x4_t sum[4]) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
uint32x4_t res01 = vpaddq_u32(sum[0], sum[1]);
uint32x4_t res23 = vpaddq_u32(sum[2], sum[3]);
return vpaddq_u32(res01, res23);
@@ -196,7 +196,7 @@ static INLINE uint32x4_t horizontal_add_4d_uint32x4(const uint32x4_t sum[4]) {
}
static INLINE uint64_t horizontal_long_add_uint32x4(const uint32x4_t a) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
return vaddlvq_u32(a);
#else
const uint64x2_t b = vpaddlq_u32(a);
@@ -205,7 +205,7 @@ static INLINE uint64_t horizontal_long_add_uint32x4(const uint32x4_t a) {
}
static INLINE int64_t horizontal_add_int64x2(const int64x2_t a) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
return vaddvq_s64(a);
#else
return vgetq_lane_s64(a, 0) + vgetq_lane_s64(a, 1);
@@ -213,7 +213,7 @@ static INLINE int64_t horizontal_add_int64x2(const int64x2_t a) {
}
static INLINE uint64_t horizontal_add_uint64x2(const uint64x2_t a) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
return vaddvq_u64(a);
#else
return vgetq_lane_u64(a, 0) + vgetq_lane_u64(a, 1);
diff --git a/vpx_dsp/arm/transpose_neon.h b/vpx_dsp/arm/transpose_neon.h
index 518278f30..74f85a6bb 100644
--- a/vpx_dsp/arm/transpose_neon.h
+++ b/vpx_dsp/arm/transpose_neon.h
@@ -23,7 +23,7 @@
// b0.val[1]: 04 05 06 07 20 21 22 23
static INLINE int16x8x2_t vpx_vtrnq_s64_to_s16(int32x4_t a0, int32x4_t a1) {
int16x8x2_t b0;
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
b0.val[0] = vreinterpretq_s16_s64(
vtrn1q_s64(vreinterpretq_s64_s32(a0), vreinterpretq_s64_s32(a1)));
b0.val[1] = vreinterpretq_s16_s64(
@@ -39,7 +39,7 @@ static INLINE int16x8x2_t vpx_vtrnq_s64_to_s16(int32x4_t a0, int32x4_t a1) {
static INLINE int32x4x2_t vpx_vtrnq_s64_to_s32(int32x4_t a0, int32x4_t a1) {
int32x4x2_t b0;
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
b0.val[0] = vreinterpretq_s32_s64(
vtrn1q_s64(vreinterpretq_s64_s32(a0), vreinterpretq_s64_s32(a1)));
b0.val[1] = vreinterpretq_s32_s64(
@@ -53,7 +53,7 @@ static INLINE int32x4x2_t vpx_vtrnq_s64_to_s32(int32x4_t a0, int32x4_t a1) {
static INLINE int64x2x2_t vpx_vtrnq_s64(int32x4_t a0, int32x4_t a1) {
int64x2x2_t b0;
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
b0.val[0] = vtrn1q_s64(vreinterpretq_s64_s32(a0), vreinterpretq_s64_s32(a1));
b0.val[1] = vtrn2q_s64(vreinterpretq_s64_s32(a0), vreinterpretq_s64_s32(a1));
#else
@@ -67,7 +67,7 @@ static INLINE int64x2x2_t vpx_vtrnq_s64(int32x4_t a0, int32x4_t a1) {
static INLINE uint8x16x2_t vpx_vtrnq_u64_to_u8(uint32x4_t a0, uint32x4_t a1) {
uint8x16x2_t b0;
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
b0.val[0] = vreinterpretq_u8_u64(
vtrn1q_u64(vreinterpretq_u64_u32(a0), vreinterpretq_u64_u32(a1)));
b0.val[1] = vreinterpretq_u8_u64(
@@ -83,7 +83,7 @@ static INLINE uint8x16x2_t vpx_vtrnq_u64_to_u8(uint32x4_t a0, uint32x4_t a1) {
static INLINE uint16x8x2_t vpx_vtrnq_u64_to_u16(uint32x4_t a0, uint32x4_t a1) {
uint16x8x2_t b0;
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
b0.val[0] = vreinterpretq_u16_u64(
vtrn1q_u64(vreinterpretq_u64_u32(a0), vreinterpretq_u64_u32(a1)));
b0.val[1] = vreinterpretq_u16_u64(
diff --git a/vpx_dsp/arm/vpx_convolve8_neon.c b/vpx_dsp/arm/vpx_convolve8_neon.c
index b4cdd58c7..b312cc747 100644
--- a/vpx_dsp/arm/vpx_convolve8_neon.c
+++ b/vpx_dsp/arm/vpx_convolve8_neon.c
@@ -31,7 +31,7 @@
// instructions. This optimization is much faster in speed unit test, but slowed
// down the whole decoder by 5%.
-#if defined(__aarch64__) && \
+#if VPX_ARCH_AARCH64 && \
(defined(__ARM_FEATURE_DOTPROD) || defined(__ARM_FEATURE_MATMUL_INT8))
DECLARE_ALIGNED(16, static const uint8_t, dot_prod_permute_tbl[48]) = {
@@ -1261,7 +1261,7 @@ void vpx_convolve8_avg_vert_neon(const uint8_t *src, ptrdiff_t src_stride,
#endif // defined(__ARM_FEATURE_MATMUL_INT8)
-#else // !(defined(__aarch64__) &&
+#else // !(VPX_ARCH_AARCH64 &&
// (defined(__ARM_FEATURE_DOTPROD) ||
// defined(__ARM_FEATURE_MATMUL_INT8)))
@@ -2105,6 +2105,6 @@ void vpx_convolve8_avg_vert_neon(const uint8_t *src, ptrdiff_t src_stride,
}
}
-#endif // #if defined(__aarch64__) &&
+#endif // #if VPX_ARCH_AARCH64 &&
// (defined(__ARM_FEATURE_DOTPROD) ||
// defined(__ARM_FEATURE_MATMUL_INT8))
diff --git a/vpx_dsp/arm/vpx_convolve8_neon.h b/vpx_dsp/arm/vpx_convolve8_neon.h
index ed7f18053..07cf8242d 100644
--- a/vpx_dsp/arm/vpx_convolve8_neon.h
+++ b/vpx_dsp/arm/vpx_convolve8_neon.h
@@ -16,7 +16,7 @@
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
-#if defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#if VPX_ARCH_AARCH64 && defined(__ARM_FEATURE_DOTPROD)
static INLINE int32x4_t convolve8_4_sdot_partial(const int8x16_t samples_lo,
const int8x16_t samples_hi,
@@ -114,9 +114,9 @@ static INLINE uint8x8_t convolve8_8_sdot(uint8x16_t samples,
return vqrshrun_n_s16(sum, 7);
}
-#endif // defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#endif // VPX_ARCH_AARCH64 && defined(__ARM_FEATURE_DOTPROD)
-#if defined(__aarch64__) && defined(__ARM_FEATURE_MATMUL_INT8)
+#if VPX_ARCH_AARCH64 && defined(__ARM_FEATURE_MATMUL_INT8)
static INLINE int32x4_t convolve8_4_usdot_partial(const uint8x16_t samples_lo,
const uint8x16_t samples_hi,
@@ -199,7 +199,7 @@ static INLINE uint8x8_t convolve8_8_usdot(uint8x16_t samples,
return vqrshrun_n_s16(sum, 7);
}
-#endif // defined(__aarch64__) && defined(__ARM_FEATURE_MATMUL_INT8)
+#endif // VPX_ARCH_AARCH64 && defined(__ARM_FEATURE_MATMUL_INT8)
static INLINE int16x4_t convolve8_4(const int16x4_t s0, const int16x4_t s1,
const int16x4_t s2, const int16x4_t s3,