Add AVX2 intrinsic for vpx_comp_avg_pred() function

The module level scaling w.r.t C function (timer based) for existing (SSE2) and new AVX2 intrinsics: If ref_padding = 0 Block Scaling size SSE2 AVX2 8x4 3.24x 3.24x 8x8 4.22x 4.90x 8x16 5.91x 5.93x 16x8 1.63x 3.52x 16x16 1.53x 4.19x 16x32 1.38x 4.82x 32x16 1.28x 3.08x 32x32 1.45x 3.13x 32x64 1.38x 3.04x 64x32 1.39x 2.12x 64x64 1.46x 2.24x If ref_padding = 8 Block Scaling size SSE2 AVX2 8x4 3.20x 3.21x 8x8 4.61x 4.83x 8x16 5.50x 6.45x 16x8 1.56x 3.35x 16x16 1.53x 4.19x 16x32 1.37x 4.83x 32x16 1.28x 3.07x 32x32 1.46x 3.29x 32x64 1.38x 3.22x 64x32 1.38x 2.14x 64x64 1.38x 2.12x This is a bit-exact change. Change-Id: I72c5d155f64d0c630bc8c3aef21dc8bbd045d9e6
author: Anupam Pandey <anupam.pandey@ittiam.com> 2023-05-08 12:10:09 +0530
committer: Anupam Pandey <anupam.pandey@ittiam.com> 2023-05-09 16:33:59 +0530
commit: 457b7f59860955415a23c20c535fc13fde51936f (patch)
tree: 1da1088a694cff95d93647a9a70e5eb7030ec196 /vp9
parent: 4818f997fe43c96a96bbda2d3b9aac714dbcd920 (diff)
download: libvpx-457b7f59860955415a23c20c535fc13fde51936f.tar.gz
2 files changed, 6 insertions, 6 deletions
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 64e9ef0f9..0ea0f85e4 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -297,7 +297,7 @@ static unsigned int setup_center_error(
       besterr =
           vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride, sse1);
     } else {
-      DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
+      DECLARE_ALIGNED(32, uint8_t, comp_pred[64 * 64]);
       vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
       besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
     }
@@ -312,7 +312,7 @@ static unsigned int setup_center_error(
   uint32_t besterr;
   (void)xd;
   if (second_pred != NULL) {
-    DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
+    DECLARE_ALIGNED(32, uint8_t, comp_pred[64 * 64]);
     vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
     besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
   } else {
@@ -635,7 +635,7 @@ static int accurate_sub_pel_search(
     vp9_build_inter_predictor(pre_address, y_stride, pred, w, this_mv, sf, w, h,
                               0, kernel, MV_PRECISION_Q3, 0, 0);
     if (second_pred != NULL) {
-      DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
+      DECLARE_ALIGNED(32, uint8_t, comp_pred[64 * 64]);
       vpx_comp_avg_pred(comp_pred, second_pred, w, h, pred, w);
       besterr = vfp->vf(comp_pred, w, src_address, src_stride, sse);
     } else {
@@ -654,7 +654,7 @@ static int accurate_sub_pel_search(
   vp9_build_inter_predictor(pre_address, y_stride, pred, w, this_mv, sf, w, h,
                             0, kernel, MV_PRECISION_Q3, 0, 0);
   if (second_pred != NULL) {
-    DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
+    DECLARE_ALIGNED(32, uint8_t, comp_pred[64 * 64]);
     vpx_comp_avg_pred(comp_pred, second_pred, w, h, pred, w);
     besterr = vfp->vf(comp_pred, w, src_address, src_stride, sse);
   } else {
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index f051c6279..464705a67 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1937,10 +1937,10 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
 
 // Prediction buffer from second frame.
 #if CONFIG_VP9_HIGHBITDEPTH
-  DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[64 * 64]);
+  DECLARE_ALIGNED(32, uint16_t, second_pred_alloc_16[64 * 64]);
   uint8_t *second_pred;
 #else
-  DECLARE_ALIGNED(16, uint8_t, second_pred[64 * 64]);
+  DECLARE_ALIGNED(32, uint8_t, second_pred[64 * 64]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
   // Check number of iterations do not exceed the max
author	Anupam Pandey <anupam.pandey@ittiam.com>	2023-05-08 12:10:09 +0530
committer	Anupam Pandey <anupam.pandey@ittiam.com>	2023-05-09 16:33:59 +0530
commit	457b7f59860955415a23c20c535fc13fde51936f (patch)
tree	1da1088a694cff95d93647a9a70e5eb7030ec196 /vp9
parent	4818f997fe43c96a96bbda2d3b9aac714dbcd920 (diff)
download	libvpx-457b7f59860955415a23c20c535fc13fde51936f.tar.gz