diff options
author | Anupam Pandey <anupam.pandey@ittiam.com> | 2023-05-08 12:10:09 +0530 |
---|---|---|
committer | Anupam Pandey <anupam.pandey@ittiam.com> | 2023-05-09 16:33:59 +0530 |
commit | 457b7f59860955415a23c20c535fc13fde51936f (patch) | |
tree | 1da1088a694cff95d93647a9a70e5eb7030ec196 /vp9 | |
parent | 4818f997fe43c96a96bbda2d3b9aac714dbcd920 (diff) | |
download | libvpx-457b7f59860955415a23c20c535fc13fde51936f.tar.gz |
Add AVX2 intrinsic for vpx_comp_avg_pred() function
The module level scaling w.r.t C function (timer based) for
existing (SSE2) and new AVX2 intrinsics:
If ref_padding = 0
Block Scaling
size SSE2 AVX2
8x4 3.24x 3.24x
8x8 4.22x 4.90x
8x16 5.91x 5.93x
16x8 1.63x 3.52x
16x16 1.53x 4.19x
16x32 1.38x 4.82x
32x16 1.28x 3.08x
32x32 1.45x 3.13x
32x64 1.38x 3.04x
64x32 1.39x 2.12x
64x64 1.46x 2.24x
If ref_padding = 8
Block Scaling
size SSE2 AVX2
8x4 3.20x 3.21x
8x8 4.61x 4.83x
8x16 5.50x 6.45x
16x8 1.56x 3.35x
16x16 1.53x 4.19x
16x32 1.37x 4.83x
32x16 1.28x 3.07x
32x32 1.46x 3.29x
32x64 1.38x 3.22x
64x32 1.38x 2.14x
64x64 1.38x 2.12x
This is a bit-exact change.
Change-Id: I72c5d155f64d0c630bc8c3aef21dc8bbd045d9e6
Diffstat (limited to 'vp9')
-rw-r--r-- | vp9/encoder/vp9_mcomp.c | 8 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 4 |
2 files changed, 6 insertions, 6 deletions
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 64e9ef0f9..0ea0f85e4 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -297,7 +297,7 @@ static unsigned int setup_center_error( besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride, sse1); } else { - DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]); + DECLARE_ALIGNED(32, uint8_t, comp_pred[64 * 64]); vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); } @@ -312,7 +312,7 @@ static unsigned int setup_center_error( uint32_t besterr; (void)xd; if (second_pred != NULL) { - DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]); + DECLARE_ALIGNED(32, uint8_t, comp_pred[64 * 64]); vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); } else { @@ -635,7 +635,7 @@ static int accurate_sub_pel_search( vp9_build_inter_predictor(pre_address, y_stride, pred, w, this_mv, sf, w, h, 0, kernel, MV_PRECISION_Q3, 0, 0); if (second_pred != NULL) { - DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]); + DECLARE_ALIGNED(32, uint8_t, comp_pred[64 * 64]); vpx_comp_avg_pred(comp_pred, second_pred, w, h, pred, w); besterr = vfp->vf(comp_pred, w, src_address, src_stride, sse); } else { @@ -654,7 +654,7 @@ static int accurate_sub_pel_search( vp9_build_inter_predictor(pre_address, y_stride, pred, w, this_mv, sf, w, h, 0, kernel, MV_PRECISION_Q3, 0, 0); if (second_pred != NULL) { - DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]); + DECLARE_ALIGNED(32, uint8_t, comp_pred[64 * 64]); vpx_comp_avg_pred(comp_pred, second_pred, w, h, pred, w); besterr = vfp->vf(comp_pred, w, src_address, src_stride, sse); } else { diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index f051c6279..464705a67 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1937,10 +1937,10 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, // Prediction buffer from second frame. #if CONFIG_VP9_HIGHBITDEPTH - DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[64 * 64]); + DECLARE_ALIGNED(32, uint16_t, second_pred_alloc_16[64 * 64]); uint8_t *second_pred; #else - DECLARE_ALIGNED(16, uint8_t, second_pred[64 * 64]); + DECLARE_ALIGNED(32, uint8_t, second_pred[64 * 64]); #endif // CONFIG_VP9_HIGHBITDEPTH // Check number of iterations do not exceed the max |