summaryrefslogtreecommitdiff
path: root/libavfilter/vf_gblur.c
diff options
context:
space:
mode:
authorWu Jianhua <jianhua.wu@intel.com>2021-08-04 10:06:15 +0800
committerPaul B Mahol <onemda@gmail.com>2021-08-29 19:58:33 +0200
commit4041c1029b93162faacda9e3f3cd083d1fbca7ce (patch)
tree0c7693b822eb51ccbef214df0be4e91a56f734f5 /libavfilter/vf_gblur.c
parent0c54ab20c254bf26c33a5cceb83862d3a59b3db7 (diff)
downloadffmpeg-4041c1029b93162faacda9e3f3cd083d1fbca7ce.tar.gz
libavfilter/x86/vf_gblur: add localbuf and ff_horiz_slice_avx2/512()
We introduced a ff_horiz_slice_avx2/512() implemented on a new algorithm. In a nutshell, the new algorithm does three things, gathering data from 8/16 rows, blurring data, and scattering data back to the image buffer. Here we used a customized transpose 8x8/16x16 to avoid the huge overhead brought by gather and scatter instructions, which is dependent on the temporary buffer called localbuf added newly. Performance data: ff_horiz_slice_avx2(old): 109.89 ff_horiz_slice_avx2(new): 666.67 ff_horiz_slice_avx512: 1000 Co-authored-by: Cheng Yanfei <yanfei.cheng@intel.com> Co-authored-by: Jin Jun <jun.i.jin@intel.com> Signed-off-by: Wu Jianhua <jianhua.wu@intel.com>
Diffstat (limited to 'libavfilter/vf_gblur.c')
-rw-r--r--libavfilter/vf_gblur.c11
1 files changed, 9 insertions, 2 deletions
diff --git a/libavfilter/vf_gblur.c b/libavfilter/vf_gblur.c
index a2c410c07b..ad48eae672 100644
--- a/libavfilter/vf_gblur.c
+++ b/libavfilter/vf_gblur.c
@@ -64,7 +64,7 @@ static void postscale_c(float *buffer, int length,
}
static void horiz_slice_c(float *buffer, int width, int height, int steps,
- float nu, float bscale)
+ float nu, float bscale, float *localbuf)
{
int step, x, y;
float *ptr;
@@ -97,9 +97,13 @@ static int filter_horizontally(AVFilterContext *ctx, void *arg, int jobnr, int n
const int steps = s->steps;
const float nu = s->nu;
float *buffer = s->buffer;
+ float *localbuf = NULL;
+
+ if (s->localbuf)
+ localbuf = s->localbuf + s->stride * width * slice_start;
s->horiz_slice(buffer + width * slice_start, width, slice_end - slice_start,
- steps, nu, boundaryscale);
+ steps, nu, boundaryscale, localbuf);
emms_c();
return 0;
}
@@ -242,6 +246,7 @@ static int query_formats(AVFilterContext *ctx)
void ff_gblur_init(GBlurContext *s)
{
+ s->localbuf = NULL;
s->horiz_slice = horiz_slice_c;
s->verti_slice = verti_slice_c;
s->postscale_slice = postscale_c;
@@ -384,6 +389,8 @@ static av_cold void uninit(AVFilterContext *ctx)
GBlurContext *s = ctx->priv;
av_freep(&s->buffer);
+ if (s->localbuf)
+ av_free(s->localbuf);
}
static const AVFilterPad gblur_inputs[] = {