/* * Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #include #include "libavutil/avstring.h" #include "libavutil/common.h" #include "libavutil/cuda_check.h" #include "libavutil/hwcontext.h" #include "libavutil/hwcontext_cuda_internal.h" #include "libavutil/internal.h" #include "libavutil/opt.h" #include "libavutil/pixdesc.h" #include "avfilter.h" #include "formats.h" #include "internal.h" #include "scale_eval.h" #include "video.h" #include "cuda/load_helper.h" static const enum AVPixelFormat supported_formats[] = { AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV444P, }; #define DIV_UP(a, b) (((a) + (b)-1) / (b)) #define BLOCKX 32 #define BLOCKY 16 #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x) typedef struct CUDAColorspaceContext { const AVClass* class; AVCUDADeviceContext* hwctx; AVBufferRef* frames_ctx; AVFrame* own_frame; AVFrame* tmp_frame; CUcontext cu_ctx; CUstream cu_stream; CUmodule cu_module; CUfunction cu_convert[AVCOL_RANGE_NB]; enum AVPixelFormat pix_fmt; enum AVColorRange range; int num_planes; } CUDAColorspaceContext; static av_cold int cudacolorspace_init(AVFilterContext* ctx) { CUDAColorspaceContext* s = ctx->priv; s->own_frame = av_frame_alloc(); if (!s->own_frame) return AVERROR(ENOMEM); s->tmp_frame = av_frame_alloc(); if (!s->tmp_frame) return AVERROR(ENOMEM); return 0; } static av_cold void cudacolorspace_uninit(AVFilterContext* ctx) { CUDAColorspaceContext* s = ctx->priv; if (s->hwctx && s->cu_module) { CudaFunctions* cu = s->hwctx->internal->cuda_dl; CUcontext dummy; CHECK_CU(cu->cuCtxPushCurrent(s->hwctx->cuda_ctx)); CHECK_CU(cu->cuModuleUnload(s->cu_module)); s->cu_module = NULL; CHECK_CU(cu->cuCtxPopCurrent(&dummy)); } av_frame_free(&s->own_frame); av_buffer_unref(&s->frames_ctx); av_frame_free(&s->tmp_frame); } static av_cold int init_hwframe_ctx(CUDAColorspaceContext* s, AVBufferRef* device_ctx, int width, int height) { AVBufferRef* out_ref = NULL; AVHWFramesContext* out_ctx; int ret; out_ref = av_hwframe_ctx_alloc(device_ctx); if (!out_ref) return AVERROR(ENOMEM); out_ctx = (AVHWFramesContext*)out_ref->data; out_ctx->format = AV_PIX_FMT_CUDA; out_ctx->sw_format = s->pix_fmt; out_ctx->width = FFALIGN(width, 32); out_ctx->height = FFALIGN(height, 32); ret = av_hwframe_ctx_init(out_ref); if (ret < 0) goto fail; av_frame_unref(s->own_frame); ret = av_hwframe_get_buffer(out_ref, s->own_frame, 0); if (ret < 0) goto fail; s->own_frame->width = width; s->own_frame->height = height; av_buffer_unref(&s->frames_ctx); s->frames_ctx = out_ref; return 0; fail: av_buffer_unref(&out_ref); return ret; } static int format_is_supported(enum AVPixelFormat fmt) { for (int i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) if (fmt == supported_formats[i]) return 1; return 0; } static av_cold int init_processing_chain(AVFilterContext* ctx, int width, int height) { CUDAColorspaceContext* s = ctx->priv; AVHWFramesContext* in_frames_ctx; int ret; if (!ctx->inputs[0]->hw_frames_ctx) { av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n"); return AVERROR(EINVAL); } in_frames_ctx = (AVHWFramesContext*)ctx->inputs[0]->hw_frames_ctx->data; s->pix_fmt = in_frames_ctx->sw_format; if (!format_is_supported(s->pix_fmt)) { av_log(ctx, AV_LOG_ERROR, "Unsupported pixel format: %s\n", av_get_pix_fmt_name(s->pix_fmt)); return AVERROR(EINVAL); } if ((AVCOL_RANGE_MPEG != s->range) && (AVCOL_RANGE_JPEG != s->range)) { av_log(ctx, AV_LOG_ERROR, "Unsupported color range\n"); return AVERROR(EINVAL); } s->num_planes = av_pix_fmt_count_planes(s->pix_fmt); ret = init_hwframe_ctx(s, in_frames_ctx->device_ref, width, height); if (ret < 0) return ret; ctx->outputs[0]->hw_frames_ctx = av_buffer_ref(s->frames_ctx); if (!ctx->outputs[0]->hw_frames_ctx) return AVERROR(ENOMEM); return 0; } static av_cold int cudacolorspace_load_functions(AVFilterContext* ctx) { CUDAColorspaceContext* s = ctx->priv; CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx; CudaFunctions* cu = s->hwctx->internal->cuda_dl; int ret; extern const unsigned char ff_vf_colorspace_cuda_ptx_data[]; extern const unsigned int ff_vf_colorspace_cuda_ptx_len; ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx)); if (ret < 0) return ret; ret = ff_cuda_load_module(ctx, s->hwctx, &s->cu_module, ff_vf_colorspace_cuda_ptx_data, ff_vf_colorspace_cuda_ptx_len); if (ret < 0) goto fail; ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_convert[AVCOL_RANGE_MPEG], s->cu_module, "to_mpeg_cuda")); if (ret < 0) goto fail; ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_convert[AVCOL_RANGE_JPEG], s->cu_module, "to_jpeg_cuda")); if (ret < 0) goto fail; fail: CHECK_CU(cu->cuCtxPopCurrent(&dummy)); return ret; } static av_cold int cudacolorspace_config_props(AVFilterLink* outlink) { AVFilterContext* ctx = outlink->src; AVFilterLink* inlink = outlink->src->inputs[0]; CUDAColorspaceContext* s = ctx->priv; AVHWFramesContext* frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; AVCUDADeviceContext* device_hwctx = frames_ctx->device_ctx->hwctx; int ret; s->hwctx = device_hwctx; s->cu_stream = s->hwctx->stream; outlink->w = inlink->w; outlink->h = inlink->h; ret = init_processing_chain(ctx, inlink->w, inlink->h); if (ret < 0) return ret; if (inlink->sample_aspect_ratio.num) { outlink->sample_aspect_ratio = av_mul_q( (AVRational){outlink->h * inlink->w, outlink->w * inlink->h}, inlink->sample_aspect_ratio); } else { outlink->sample_aspect_ratio = inlink->sample_aspect_ratio; } ret = cudacolorspace_load_functions(ctx); if (ret < 0) return ret; return ret; } static int conv_cuda_convert(AVFilterContext* ctx, AVFrame* out, AVFrame* in) { CUDAColorspaceContext* s = ctx->priv; CudaFunctions* cu = s->hwctx->internal->cuda_dl; CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx; int ret; ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx)); if (ret < 0) return ret; out->color_range = s->range; for (int i = 0; i < s->num_planes; i++) { int width = in->width, height = in->height, comp_id = (i > 0); switch (s->pix_fmt) { case AV_PIX_FMT_YUV444P: break; case AV_PIX_FMT_YUV420P: width = comp_id ? in->width / 2 : in->width; /* fall-through */ case AV_PIX_FMT_NV12: height = comp_id ? in->height / 2 : in->height; break; default: av_log(ctx, AV_LOG_ERROR, "Unsupported pixel format: %s\n", av_get_pix_fmt_name(s->pix_fmt)); return AVERROR(EINVAL); } if (!s->cu_convert[out->color_range]) { av_log(ctx, AV_LOG_ERROR, "Unsupported color range\n"); return AVERROR(EINVAL); } if (in->color_range != out->color_range) { void* args[] = {&in->data[i], &out->data[i], &in->linesize[i], &comp_id}; ret = CHECK_CU(cu->cuLaunchKernel( s->cu_convert[out->color_range], DIV_UP(width, BLOCKX), DIV_UP(height, BLOCKY), 1, BLOCKX, BLOCKY, 1, 0, s->cu_stream, args, NULL)); } else { ret = av_hwframe_transfer_data(out, in, 0); if (ret < 0) return ret; } } CHECK_CU(cu->cuCtxPopCurrent(&dummy)); return ret; } static int cudacolorspace_conv(AVFilterContext* ctx, AVFrame* out, AVFrame* in) { CUDAColorspaceContext* s = ctx->priv; AVFilterLink* outlink = ctx->outputs[0]; AVFrame* src = in; int ret; ret = conv_cuda_convert(ctx, s->own_frame, src); if (ret < 0) return ret; src = s->own_frame; ret = av_hwframe_get_buffer(src->hw_frames_ctx, s->tmp_frame, 0); if (ret < 0) return ret; av_frame_move_ref(out, s->own_frame); av_frame_move_ref(s->own_frame, s->tmp_frame); s->own_frame->width = outlink->w; s->own_frame->height = outlink->h; ret = av_frame_copy_props(out, in); if (ret < 0) return ret; return 0; } static int cudacolorspace_filter_frame(AVFilterLink* link, AVFrame* in) { AVFilterContext* ctx = link->dst; CUDAColorspaceContext* s = ctx->priv; AVFilterLink* outlink = ctx->outputs[0]; CudaFunctions* cu = s->hwctx->internal->cuda_dl; AVFrame* out = NULL; CUcontext dummy; int ret = 0; out = av_frame_alloc(); if (!out) { ret = AVERROR(ENOMEM); goto fail; } ret = CHECK_CU(cu->cuCtxPushCurrent(s->hwctx->cuda_ctx)); if (ret < 0) goto fail; ret = cudacolorspace_conv(ctx, out, in); CHECK_CU(cu->cuCtxPopCurrent(&dummy)); if (ret < 0) goto fail; av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den, (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w, (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h, INT_MAX); av_frame_free(&in); return ff_filter_frame(outlink, out); fail: av_frame_free(&in); av_frame_free(&out); return ret; } #define OFFSET(x) offsetof(CUDAColorspaceContext, x) #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) static const AVOption options[] = { {"range", "Output video range", OFFSET(range), AV_OPT_TYPE_INT, { .i64 = AVCOL_RANGE_UNSPECIFIED }, AVCOL_RANGE_UNSPECIFIED, AVCOL_RANGE_NB - 1, FLAGS, "range"}, {"tv", "Limited range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range"}, {"mpeg", "Limited range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range"}, {"pc", "Full range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range"}, {"jpeg", "Full range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range"}, {NULL}, }; static const AVClass cudacolorspace_class = { .class_name = "colorspace_cuda", .item_name = av_default_item_name, .option = options, .version = LIBAVUTIL_VERSION_INT, }; static const AVFilterPad cudacolorspace_inputs[] = { { .name = "default", .type = AVMEDIA_TYPE_VIDEO, .filter_frame = cudacolorspace_filter_frame, }, }; static const AVFilterPad cudacolorspace_outputs[] = { { .name = "default", .type = AVMEDIA_TYPE_VIDEO, .config_props = cudacolorspace_config_props, }, }; const AVFilter ff_vf_colorspace_cuda = { .name = "colorspace_cuda", .description = NULL_IF_CONFIG_SMALL("CUDA accelerated video color converter"), .init = cudacolorspace_init, .uninit = cudacolorspace_uninit, .priv_size = sizeof(CUDAColorspaceContext), .priv_class = &cudacolorspace_class, FILTER_INPUTS(cudacolorspace_inputs), FILTER_OUTPUTS(cudacolorspace_outputs), FILTER_SINGLE_PIXFMT(AV_PIX_FMT_CUDA), .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, };