diff options
author | Seungha Yang <seungha@centricular.com> | 2020-10-25 02:27:52 +0900 |
---|---|---|
committer | GStreamer Merge Bot <gitlab-merge-bot@gstreamer-foundation.org> | 2020-10-27 18:31:42 +0000 |
commit | 8f96361c81ec6fd17347605ae746019cc4e6d308 (patch) | |
tree | 43bc5e077a55eee15bda30f520d871492b3209f3 /sys/nvcodec | |
parent | fab234f2f1d0a620d341856211ec1b75f45c1192 (diff) | |
download | gstreamer-plugins-bad-8f96361c81ec6fd17347605ae746019cc4e6d308.tar.gz |
nvcodec: nvsldec: Add support for CUDA memory
Add CUDA memory support. Note that zero copying is not supported yet
Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-bad/-/merge_requests/1720>
Diffstat (limited to 'sys/nvcodec')
-rw-r--r-- | sys/nvcodec/gstnvdecoder.c | 206 | ||||
-rw-r--r-- | sys/nvcodec/gstnvdecoder.h | 6 | ||||
-rw-r--r-- | sys/nvcodec/gstnvh264dec.c | 5 | ||||
-rw-r--r-- | sys/nvcodec/gstnvh265dec.c | 5 |
4 files changed, 191 insertions, 31 deletions
diff --git a/sys/nvcodec/gstnvdecoder.c b/sys/nvcodec/gstnvdecoder.c index ba18b3db5..902db4d07 100644 --- a/sys/nvcodec/gstnvdecoder.c +++ b/sys/nvcodec/gstnvdecoder.c @@ -52,6 +52,7 @@ #include "gstcudamemory.h" #include "gstnvdecoder.h" +#include "gstcudabufferpool.h" #include <string.h> GST_DEBUG_CATEGORY_EXTERN (gst_nv_decoder_debug); @@ -673,12 +674,80 @@ done: return ret; } +static gboolean +gst_nv_decoder_copy_frame_to_cuda (GstNvDecoder * decoder, + GstNvDecoderFrame * frame, GstBuffer * buffer) +{ + CUDA_MEMCPY2D copy_params = { 0, }; + GstMemory *mem; + GstCudaMemory *cuda_mem = NULL; + gint i; + gboolean ret = FALSE; + + mem = gst_buffer_peek_memory (buffer, 0); + if (!gst_is_cuda_memory (mem)) { + GST_WARNING_OBJECT (decoder, "Not a CUDA memory"); + return FALSE; + } else { + GstCudaMemory *cmem = GST_CUDA_MEMORY_CAST (mem); + + if (cmem->context == decoder->context || + gst_cuda_context_get_handle (cmem->context) == + gst_cuda_context_get_handle (decoder->context) || + (gst_cuda_context_can_access_peer (cmem->context, decoder->context) && + gst_cuda_context_can_access_peer (decoder->context, + cmem->context))) { + cuda_mem = cmem; + } + } + + if (!cuda_mem) { + GST_WARNING_OBJECT (decoder, "Access to CUDA memory is not allowed"); + return FALSE; + } + + if (!gst_cuda_context_push (decoder->context)) { + GST_ERROR_OBJECT (decoder, "Failed to pust CUDA context"); + return FALSE; + } + + copy_params.srcMemoryType = CU_MEMORYTYPE_DEVICE; + copy_params.srcPitch = frame->pitch; + copy_params.dstMemoryType = CU_MEMORYTYPE_DEVICE; + + for (i = 0; i < GST_VIDEO_INFO_N_PLANES (&decoder->info); i++) { + copy_params.srcDevice = frame->devptr + + (i * frame->pitch * GST_VIDEO_INFO_HEIGHT (&decoder->info)); + copy_params.dstDevice = cuda_mem->data + cuda_mem->offset[i]; + copy_params.dstPitch = cuda_mem->stride; + copy_params.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (&decoder->info, 0) + * GST_VIDEO_INFO_COMP_PSTRIDE (&decoder->info, 0); + copy_params.Height = GST_VIDEO_INFO_COMP_HEIGHT (&decoder->info, i); + + if (!gst_cuda_result (CuMemcpy2DAsync (©_params, NULL))) { + GST_ERROR_OBJECT (decoder, "failed to copy %dth plane", i); + goto done; + } + } + + gst_cuda_result (CuStreamSynchronize (NULL)); + + ret = TRUE; + +done: + gst_cuda_context_pop (NULL); + + GST_LOG_OBJECT (decoder, "Copy frame to CUDA ret %d", ret); + + return ret; +} + gboolean gst_nv_decoder_finish_frame (GstNvDecoder * decoder, GstNvDecoderOutputType output_type, GstObject * graphics_context, GstNvDecoderFrame * frame, GstBuffer * buffer) { - gboolean ret; + gboolean ret = FALSE; g_return_val_if_fail (GST_IS_NV_DECODER (decoder), FALSE); g_return_val_if_fail (frame != NULL, FALSE); @@ -712,7 +781,9 @@ gst_nv_decoder_finish_frame (GstNvDecoder * decoder, GST_GL_CONTEXT (graphics_context), frame, buffer); } else #endif - { + if (output_type == GST_NV_DECOCER_OUTPUT_TYPE_CUDA) { + ret = gst_nv_decoder_copy_frame_to_cuda (decoder, frame, buffer); + } else { ret = gst_nv_decoder_copy_frame_to_system (decoder, frame, buffer); } @@ -1321,7 +1392,6 @@ gst_nv_decoder_negotiate (GstVideoDecoder * decoder, *output_type = GST_NV_DECOCER_OUTPUT_TYPE_SYSTEM; -#ifdef HAVE_NVCODEC_GST_GL { GstCaps *caps; caps = gst_pad_get_allowed_caps (GST_VIDEO_DECODER_SRC_PAD (decoder)); @@ -1330,24 +1400,40 @@ gst_nv_decoder_negotiate (GstVideoDecoder * decoder, if (!caps || gst_caps_is_any (caps)) { GST_DEBUG_OBJECT (decoder, "cannot determine output format, using system memory"); - } else if (gl_display) { + } else { GstCapsFeatures *features; guint size = gst_caps_get_size (caps); guint i; + gboolean have_cuda = FALSE; + gboolean have_gl = FALSE; for (i = 0; i < size; i++) { features = gst_caps_get_features (caps, i); if (features && gst_caps_features_contains (features, - GST_CAPS_FEATURE_MEMORY_GL_MEMORY)) { - GST_DEBUG_OBJECT (decoder, "found GL memory feature, using gl"); - *output_type = GST_NV_DECOCER_OUTPUT_TYPE_GL; + GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY)) { + GST_DEBUG_OBJECT (decoder, "found CUDA memory feature"); + have_cuda = TRUE; break; } +#ifdef HAVE_NVCODEC_GST_GL + if (gl_display && + features && gst_caps_features_contains (features, + GST_CAPS_FEATURE_MEMORY_GL_MEMORY)) { + GST_DEBUG_OBJECT (decoder, "found GL memory feature"); + have_gl = TRUE; + } +#endif } + + if (have_cuda) + *output_type = GST_NV_DECOCER_OUTPUT_TYPE_CUDA; + else if (have_gl) + *output_type = GST_NV_DECOCER_OUTPUT_TYPE_GL; } gst_clear_caps (&caps); } +#ifdef HAVE_NVCODEC_GST_GL if (*output_type == GST_NV_DECOCER_OUTPUT_TYPE_GL && !gst_nv_decoder_ensure_gl_context (GST_ELEMENT (decoder), gl_display, other_gl_context, gl_context)) { @@ -1355,36 +1441,85 @@ gst_nv_decoder_negotiate (GstVideoDecoder * decoder, "OpenGL context is not CUDA-compatible, fallback to system memory"); *output_type = GST_NV_DECOCER_OUTPUT_TYPE_SYSTEM; } +#endif - if (*output_type == GST_NV_DECOCER_OUTPUT_TYPE_GL) { - gst_caps_set_features (state->caps, 0, - gst_caps_features_new (GST_CAPS_FEATURE_MEMORY_GL_MEMORY, NULL)); - gst_caps_set_simple (state->caps, "texture-target", G_TYPE_STRING, - "2D", NULL); - } else { - GST_DEBUG_OBJECT (decoder, "using system memory"); - } + switch (*output_type) { + case GST_NV_DECOCER_OUTPUT_TYPE_CUDA: + GST_DEBUG_OBJECT (decoder, "using CUDA memory"); + gst_caps_set_features (state->caps, 0, + gst_caps_features_new (GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY, NULL)); + break; +#ifdef HAVE_NVCODEC_GST_GL + case GST_NV_DECOCER_OUTPUT_TYPE_GL: + GST_DEBUG_OBJECT (decoder, "using GL memory"); + gst_caps_set_features (state->caps, 0, + gst_caps_features_new (GST_CAPS_FEATURE_MEMORY_GL_MEMORY, NULL)); + gst_caps_set_simple (state->caps, "texture-target", G_TYPE_STRING, + "2D", NULL); + break; #endif + default: + GST_DEBUG_OBJECT (decoder, "using system memory"); + break; + } return TRUE; } -gboolean -gst_nv_decoder_decide_allocation (GstVideoDecoder * decocer, GstQuery * query, - GstObject * gl_context, GstNvDecoderOutputType output_type) +static gboolean +gst_nv_decoder_ensure_cuda_pool (GstNvDecoder * decoder, GstQuery * query) { -#ifdef HAVE_NVCODEC_GST_GL GstCaps *outcaps; GstBufferPool *pool = NULL; guint n, size, min, max; GstVideoInfo vinfo = { 0, }; GstStructure *config; - GST_DEBUG_OBJECT (decocer, "decide allocation"); + gst_query_parse_allocation (query, &outcaps, NULL); + n = gst_query_get_n_allocation_pools (query); + if (n > 0) { + gst_query_parse_nth_allocation_pool (query, 0, &pool, &size, &min, &max); + if (pool && !GST_IS_CUDA_BUFFER_POOL (pool)) { + gst_object_unref (pool); + pool = NULL; + } + } - /* GstVideoDecoder will take care this case */ - if (output_type == GST_NV_DECOCER_OUTPUT_TYPE_SYSTEM) - return TRUE; + if (!pool) { + GST_DEBUG_OBJECT (decoder, "no downstream pool, create our pool"); + pool = gst_cuda_buffer_pool_new (decoder->context); + + if (outcaps) + gst_video_info_from_caps (&vinfo, outcaps); + size = (guint) vinfo.size; + min = max = 0; + } + + config = gst_buffer_pool_get_config (pool); + gst_buffer_pool_config_set_params (config, outcaps, size, min, max); + gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META); + gst_buffer_pool_set_config (pool, config); + if (n > 0) + gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max); + else + gst_query_add_allocation_pool (query, pool, size, min, max); + gst_object_unref (pool); + + return TRUE; +} + +#ifdef HAVE_NVCODEC_GST_GL +static gboolean +gst_nv_decoder_ensure_gl_pool (GstNvDecoder * decoder, GstQuery * query, + GstObject * gl_context) +{ + GstCaps *outcaps; + GstBufferPool *pool = NULL; + guint n, size, min, max; + GstVideoInfo vinfo = { 0, }; + GstStructure *config; + + GST_DEBUG_OBJECT (decoder, "decide allocation"); gst_query_parse_allocation (query, &outcaps, NULL); n = gst_query_get_n_allocation_pools (query); @@ -1397,6 +1532,7 @@ gst_nv_decoder_decide_allocation (GstVideoDecoder * decocer, GstQuery * query, } if (!pool) { + GST_DEBUG_OBJECT (decoder, "no downstream pool, create our pool"); pool = gst_gl_buffer_pool_new (GST_GL_CONTEXT (gl_context)); if (outcaps) @@ -1414,7 +1550,31 @@ gst_nv_decoder_decide_allocation (GstVideoDecoder * decocer, GstQuery * query, else gst_query_add_allocation_pool (query, pool, size, min, max); gst_object_unref (pool); + + return TRUE; +} #endif +gboolean +gst_nv_decoder_decide_allocation (GstNvDecoder * nvdec, + GstVideoDecoder * decocer, GstQuery * query, GstObject * gl_context, + GstNvDecoderOutputType output_type) +{ + GST_DEBUG_OBJECT (decocer, "decide allocation"); + + /* GstVideoDecoder will take care this case */ + if (output_type == GST_NV_DECOCER_OUTPUT_TYPE_SYSTEM) + return TRUE; + +#ifdef HAVE_NVCODEC_GST_GL + if (output_type == GST_NV_DECOCER_OUTPUT_TYPE_GL) { + if (!gst_nv_decoder_ensure_gl_pool (nvdec, query, gl_context)) + return FALSE; + } else +#endif + if (!gst_nv_decoder_ensure_cuda_pool (nvdec, query)) { + return FALSE; + } + return TRUE; } diff --git a/sys/nvcodec/gstnvdecoder.h b/sys/nvcodec/gstnvdecoder.h index d3fc0859d..d40b1e60b 100644 --- a/sys/nvcodec/gstnvdecoder.h +++ b/sys/nvcodec/gstnvdecoder.h @@ -48,7 +48,8 @@ typedef enum { GST_NV_DECOCER_OUTPUT_TYPE_SYSTEM = 0, GST_NV_DECOCER_OUTPUT_TYPE_GL, - /* FIXME: add support CUDA, D3D11 memory */ + GST_NV_DECOCER_OUTPUT_TYPE_CUDA, + /* FIXME: add support D3D11 memory */ } GstNvDecoderOutputType; G_GNUC_INTERNAL @@ -122,7 +123,8 @@ gboolean gst_nv_decoder_negotiate (GstVideoDecoder * decoder, GstNvDecoderOutputType * output_type); G_GNUC_INTERNAL -gboolean gst_nv_decoder_decide_allocation (GstVideoDecoder * decocer, +gboolean gst_nv_decoder_decide_allocation (GstNvDecoder * nvdec, + GstVideoDecoder * decocer, GstQuery * query, GstObject * gl_context, GstNvDecoderOutputType output_type); diff --git a/sys/nvcodec/gstnvh264dec.c b/sys/nvcodec/gstnvh264dec.c index a8851c0cc..8712b2499 100644 --- a/sys/nvcodec/gstnvh264dec.c +++ b/sys/nvcodec/gstnvh264dec.c @@ -294,7 +294,7 @@ gst_nv_h264_dec_decide_allocation (GstVideoDecoder * decoder, GstQuery * query) { GstNvH264Dec *self = GST_NV_H264_DEC (decoder); - gst_nv_decoder_decide_allocation (decoder, query, + gst_nv_decoder_decide_allocation (self->decoder, decoder, query, self->gl_context, self->output_type); return GST_VIDEO_DECODER_CLASS (parent_class)->decide_allocation @@ -480,8 +480,7 @@ gst_nv_h264_dec_output_picture (GstH264Decoder * decoder, if (!ret) { if (!gst_nv_decoder_finish_frame (self->decoder, - GST_NV_DECOCER_OUTPUT_TYPE_SYSTEM, NULL, decoder_frame, - frame->output_buffer)) { + self->output_type, NULL, decoder_frame, frame->output_buffer)) { GST_ERROR_OBJECT (self, "Failed to finish frame"); goto error; } diff --git a/sys/nvcodec/gstnvh265dec.c b/sys/nvcodec/gstnvh265dec.c index d9780f871..1440cd26f 100644 --- a/sys/nvcodec/gstnvh265dec.c +++ b/sys/nvcodec/gstnvh265dec.c @@ -294,7 +294,7 @@ gst_nv_h265_dec_decide_allocation (GstVideoDecoder * decoder, GstQuery * query) { GstNvH265Dec *self = GST_NV_H265_DEC (decoder); - gst_nv_decoder_decide_allocation (decoder, query, + gst_nv_decoder_decide_allocation (self->decoder, decoder, query, self->gl_context, self->output_type); return GST_VIDEO_DECODER_CLASS (parent_class)->decide_allocation @@ -475,8 +475,7 @@ gst_nv_h265_dec_output_picture (GstH265Decoder * decoder, if (!ret) { if (!gst_nv_decoder_finish_frame (self->decoder, - GST_NV_DECOCER_OUTPUT_TYPE_SYSTEM, NULL, decoder_frame, - frame->output_buffer)) { + self->output_type, NULL, decoder_frame, frame->output_buffer)) { GST_ERROR_OBJECT (self, "Failed to finish frame"); goto error; } |