summaryrefslogtreecommitdiff
path: root/libavfilter/vf_vpp_qsv.c
diff options
context:
space:
mode:
authorFei Wang <fei.w.wang@intel.com>2021-03-31 10:07:44 +0800
committerZhong Li <zhongli_dev@126.com>2021-04-11 23:18:20 +0800
commit89ffcd1bbe1150aa07ae52a4e1545668a4e83a3e (patch)
treeeee074d12f0ce73aca687a5f38c65060b88452eb /libavfilter/vf_vpp_qsv.c
parent309e3cc15c87234861fe127614e09023f3867523 (diff)
downloadffmpeg-89ffcd1bbe1150aa07ae52a4e1545668a4e83a3e.tar.gz
lavfi/qsvvpp: support async depth
Async depth will allow qsv filter cache few frames, and avoid force switch and end filter task frame by frame. This change will improve performance for some multi-task case, for example 1:N transcode( decode + vpp + encode) with all QSV plugins. Performance data test on my Coffee Lake Desktop(i7-8700K) by using the following 1:8 transcode test case improvement: 1. Fps improved from 55 to 130. 2. Render/Video usage improved from ~61%/~38% to ~100%/~70%.(Data get from intel_gpu_top) test CMD: ffmpeg -v verbose -init_hw_device qsv=hw:/dev/dri/renderD128 -filter_hw_device \ hw -hwaccel qsv -hwaccel_output_format qsv -c:v h264_qsv -i 1920x1080.264 \ -vf 'vpp_qsv=w=1280:h=720:async_depth=4' -c:v h264_qsv -r:v 30 -preset 7 -g 33 -refs 2 -bf 3 -q 24 -f null - \ -vf 'vpp_qsv=w=1280:h=720:async_depth=4' -c:v h264_qsv -r:v 30 -preset 7 -g 33 -refs 2 -bf 3 -q 24 -f null - \ -vf 'vpp_qsv=w=1280:h=720:async_depth=4' -c:v h264_qsv -r:v 30 -preset 7 -g 33 -refs 2 -bf 3 -q 24 -f null - \ -vf 'vpp_qsv=w=1280:h=720:async_depth=4' -c:v h264_qsv -r:v 30 -preset 7 -g 33 -refs 2 -bf 3 -q 24 -f null - \ -vf 'vpp_qsv=w=1280:h=720:async_depth=4' -c:v h264_qsv -r:v 30 -preset 7 -g 33 -refs 2 -bf 3 -q 24 -f null - \ -vf 'vpp_qsv=w=1280:h=720:async_depth=4' -c:v h264_qsv -r:v 30 -preset 7 -g 33 -refs 2 -bf 3 -q 24 -f null - \ -vf 'vpp_qsv=w=1280:h=720:async_depth=4' -c:v h264_qsv -r:v 30 -preset 7 -g 33 -refs 2 -bf 3 -q 24 -f null - Signed-off-by: Fei Wang <fei.w.wang@intel.com> Reviewed-by: Linjie Fu <linjie.justin.fu@gmail.com> Signed-off-by: Zhong Li <zhongli_dev@126.com>
Diffstat (limited to 'libavfilter/vf_vpp_qsv.c')
-rw-r--r--libavfilter/vf_vpp_qsv.c75
1 files changed, 61 insertions, 14 deletions
diff --git a/libavfilter/vf_vpp_qsv.c b/libavfilter/vf_vpp_qsv.c
index 5d57707455..d9c27ce43e 100644
--- a/libavfilter/vf_vpp_qsv.c
+++ b/libavfilter/vf_vpp_qsv.c
@@ -32,6 +32,7 @@
#include "formats.h"
#include "internal.h"
#include "avfilter.h"
+#include "filters.h"
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
@@ -93,6 +94,9 @@ typedef struct VPPContext{
char *cx, *cy, *cw, *ch;
char *ow, *oh;
char *output_format_str;
+
+ int async_depth;
+ int eof;
} VPPContext;
static const AVOption options[] = {
@@ -128,6 +132,7 @@ static const AVOption options[] = {
{ "h", "Output video height", OFFSET(oh), AV_OPT_TYPE_STRING, { .str="w*ch/cw" }, 0, 255, .flags = FLAGS },
{ "height", "Output video height", OFFSET(oh), AV_OPT_TYPE_STRING, { .str="w*ch/cw" }, 0, 255, .flags = FLAGS },
{ "format", "Output pixel format", OFFSET(output_format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS },
+ { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(async_depth), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS },
{ NULL }
};
@@ -303,6 +308,7 @@ static int config_output(AVFilterLink *outlink)
param.filter_frame = NULL;
param.num_ext_buf = 0;
param.ext_buf = ext_buf;
+ param.async_depth = vpp->async_depth;
if (inlink->format == AV_PIX_FMT_QSV) {
if (!inlink->hw_frames_ctx || !inlink->hw_frames_ctx->data)
@@ -467,23 +473,64 @@ static int config_output(AVFilterLink *outlink)
return 0;
}
-static int filter_frame(AVFilterLink *inlink, AVFrame *picref)
+static int activate(AVFilterContext *ctx)
{
- int ret = 0;
- AVFilterContext *ctx = inlink->dst;
- VPPContext *vpp = inlink->dst->priv;
- AVFilterLink *outlink = ctx->outputs[0];
-
- if (vpp->qsv) {
- ret = ff_qsvvpp_filter_frame(vpp->qsv, inlink, picref);
- av_frame_free(&picref);
+ AVFilterLink *inlink = ctx->inputs[0];
+ AVFilterLink *outlink = ctx->outputs[0];
+ VPPContext *s =ctx->priv;
+ QSVVPPContext *qsv = s->qsv;
+ AVFrame *in = NULL;
+ int ret, status;
+ int64_t pts;
+
+ FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
+
+ if (!s->eof) {
+ ret = ff_inlink_consume_frame(inlink, &in);
+ if (ret < 0)
+ return ret;
+
+ if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
+ if (status == AVERROR_EOF) {
+ s->eof = 1;
+ }
+ }
+ }
+
+ if (qsv) {
+ if (in || s->eof) {
+ qsv->eof = s->eof;
+ ret = ff_qsvvpp_filter_frame(qsv, inlink, in);
+ av_frame_free(&in);
+
+ if (s->eof) {
+ ff_outlink_set_status(outlink, status, pts);
+ return 0;
+ }
+
+ if (qsv->got_frame) {
+ qsv->got_frame = 0;
+ return ret;
+ }
+ }
} else {
- if (picref->pts != AV_NOPTS_VALUE)
- picref->pts = av_rescale_q(picref->pts, inlink->time_base, outlink->time_base);
- ret = ff_filter_frame(outlink, picref);
+ if (in) {
+ if (in->pts != AV_NOPTS_VALUE)
+ in->pts = av_rescale_q(in->pts, inlink->time_base, outlink->time_base);
+
+ ret = ff_filter_frame(outlink, in);
+ return ret;
+ }
}
- return ret;
+ if (s->eof) {
+ ff_outlink_set_status(outlink, status, pts);
+ return 0;
+ } else {
+ FF_FILTER_FORWARD_WANTED(outlink, inlink);
+ }
+
+ return FFERROR_NOT_READY;
}
static int query_formats(AVFilterContext *ctx)
@@ -531,7 +578,6 @@ static const AVFilterPad vpp_inputs[] = {
.name = "default",
.type = AVMEDIA_TYPE_VIDEO,
.config_props = config_input,
- .filter_frame = filter_frame,
},
{ NULL }
};
@@ -554,6 +600,7 @@ AVFilter ff_vf_vpp_qsv = {
.uninit = vpp_uninit,
.inputs = vpp_inputs,
.outputs = vpp_outputs,
+ .activate = activate,
.priv_class = &vpp_class,
.flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
};