summaryrefslogtreecommitdiff
path: root/libavfilter/vf_gblur.c
diff options
context:
space:
mode:
authorWu Jianhua <jianhua.wu@intel.com>2021-08-04 10:06:13 +0800
committerPaul B Mahol <onemda@gmail.com>2021-08-29 19:58:33 +0200
commit68a2722aee2868084ad3ba1a7a5431735eab049e (patch)
tree647893d1ea8883e2b9a15e707b12b6aeb1be6618 /libavfilter/vf_gblur.c
parent4a5e24721c2bd1839aec57730061884fe2c5dd3b (diff)
downloadffmpeg-68a2722aee2868084ad3ba1a7a5431735eab049e.tar.gz
libavfilter/x86/vf_gblur: add ff_verti_slice_avx2/512()
The new vertical slice with AVX2/512 acceleration can significantly improve the performance of Gaussian Filter 2D. Performance data: ff_verti_slice_c: 32.57 ff_verti_slice_avx2: 476.19 ff_verti_slice_avx512: 833.33 Co-authored-by: Cheng Yanfei <yanfei.cheng@intel.com> Co-authored-by: Jin Jun <jun.i.jin@intel.com> Signed-off-by: Wu Jianhua <jianhua.wu@intel.com>
Diffstat (limited to 'libavfilter/vf_gblur.c')
-rw-r--r--libavfilter/vf_gblur.c24
1 files changed, 16 insertions, 8 deletions
diff --git a/libavfilter/vf_gblur.c b/libavfilter/vf_gblur.c
index 4780bb6204..a2c410c07b 100644
--- a/libavfilter/vf_gblur.c
+++ b/libavfilter/vf_gblur.c
@@ -138,6 +138,19 @@ static void do_vertical_columns(float *buffer, int width, int height,
}
}
+static void verti_slice_c(float *buffer, int width, int height,
+ int slice_start, int slice_end, int steps,
+ float nu, float boundaryscale)
+{
+ int aligned_end = slice_start + (((slice_end - slice_start) >> 3) << 3);
+ /* Filter vertically along columns (process 8 columns in each step) */
+ do_vertical_columns(buffer, width, height, slice_start, aligned_end,
+ steps, nu, boundaryscale, 8);
+ /* Filter un-aligned columns one by one */
+ do_vertical_columns(buffer, width, height, aligned_end, slice_end,
+ steps, nu, boundaryscale, 1);
+}
+
static int filter_vertically(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
{
GBlurContext *s = ctx->priv;
@@ -150,16 +163,10 @@ static int filter_vertically(AVFilterContext *ctx, void *arg, int jobnr, int nb_
const int steps = s->steps;
const float nu = s->nuV;
float *buffer = s->buffer;
- int aligned_end;
- aligned_end = slice_start + (((slice_end - slice_start) >> 3) << 3);
- /* Filter vertically along columns (process 8 columns in each step) */
- do_vertical_columns(buffer, width, height, slice_start, aligned_end,
- steps, nu, boundaryscale, 8);
+ s->verti_slice(buffer, width, height, slice_start, slice_end,
+ steps, nu, boundaryscale);
- /* Filter un-aligned columns one by one */
- do_vertical_columns(buffer, width, height, aligned_end, slice_end,
- steps, nu, boundaryscale, 1);
return 0;
}
@@ -236,6 +243,7 @@ static int query_formats(AVFilterContext *ctx)
void ff_gblur_init(GBlurContext *s)
{
s->horiz_slice = horiz_slice_c;
+ s->verti_slice = verti_slice_c;
s->postscale_slice = postscale_c;
if (ARCH_X86)
ff_gblur_init_x86(s);