summaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorSeungha Yang <seungha@centricular.com>2021-03-23 16:26:13 +0900
committerGStreamer Marge Bot <gitlab-merge-bot@gstreamer-foundation.org>2021-03-23 12:41:43 +0000
commite006366206e6ed6f52c31d8b40f5ceac92a801c9 (patch)
tree8f238a4ed378835f23e7967191e49c84951e3dbe /sys
parent176a00985a3929aad8c7ae3d53db7fe6feebb9b5 (diff)
downloadgstreamer-plugins-bad-e006366206e6ed6f52c31d8b40f5ceac92a801c9.tar.gz
d3d11decoder: Enable high precision clock if needed
We've been doing retry with 1ms sleep if DecoderBeginFrame() returned E_PENDING which means application should call DecoderBeginFrame() again because GPU is busy. The 1ms sleep() during retry would result in usually about 15ms delay in reality because of bad clock precision on Windows. To improve throughput performance, this commit will enable high precision clock only for NVIDIA platform since DecoderBeginFrame() call on the other GPU vendors seems to succeed without retry. Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-bad/-/merge_requests/2099>
Diffstat (limited to 'sys')
-rw-r--r--sys/d3d11/gstd3d11decoder.cpp71
-rw-r--r--sys/d3d11/meson.build8
2 files changed, 72 insertions, 7 deletions
diff --git a/sys/d3d11/gstd3d11decoder.cpp b/sys/d3d11/gstd3d11decoder.cpp
index 6292f7ace..d70d7e4b3 100644
--- a/sys/d3d11/gstd3d11decoder.cpp
+++ b/sys/d3d11/gstd3d11decoder.cpp
@@ -56,6 +56,10 @@
#include "gstd3d11pluginutils.h"
#include <string.h>
+#ifdef HAVE_WINMM
+#include <timeapi.h>
+#endif
+
GST_DEBUG_CATEGORY (d3d11_decoder_debug);
#define GST_CAT_DEFAULT d3d11_decoder_debug
@@ -158,6 +162,9 @@ struct _GstD3D11Decoder
/* For device specific workaround */
gboolean can_direct_rendering;
+
+ /* For high precision clock */
+ guint timer_resolution;
};
static void gst_d3d11_decoder_constructed (GObject * object);
@@ -166,6 +173,7 @@ static void gst_d3d11_decoder_set_property (GObject * object, guint prop_id,
static void gst_d3d11_decoder_get_property (GObject * object, guint prop_id,
GValue * value, GParamSpec * pspec);
static void gst_d3d11_decoder_dispose (GObject * obj);
+static void gst_d3d11_decoder_finalize (GObject * obj);
#define parent_class gst_d3d11_decoder_parent_class
G_DEFINE_TYPE (GstD3D11Decoder, gst_d3d11_decoder, GST_TYPE_OBJECT);
@@ -179,6 +187,7 @@ gst_d3d11_decoder_class_init (GstD3D11DecoderClass * klass)
gobject_class->set_property = gst_d3d11_decoder_set_property;
gobject_class->get_property = gst_d3d11_decoder_get_property;
gobject_class->dispose = gst_d3d11_decoder_dispose;
+ gobject_class->finalize = gst_d3d11_decoder_finalize;
g_object_class_install_property (gobject_class, PROP_DEVICE,
g_param_spec_object ("device", "Device",
@@ -304,6 +313,20 @@ gst_d3d11_decoder_dispose (GObject * obj)
G_OBJECT_CLASS (parent_class)->dispose (obj);
}
+static void
+gst_d3d11_decoder_finalize (GObject * obj)
+{
+#if HAVE_WINMM
+ GstD3D11Decoder *self = GST_D3D11_DECODER (obj);
+
+ /* Restore clock precision */
+ if (self->timer_resolution)
+ timeEndPeriod (self->timer_resolution);
+#endif
+
+ G_OBJECT_CLASS (parent_class)->finalize (obj);
+}
+
GstD3D11Decoder *
gst_d3d11_decoder_new (GstD3D11Device * device)
{
@@ -634,6 +657,36 @@ gst_d3d11_decoder_ensure_staging_texture (GstD3D11Decoder * self)
return TRUE;
}
+static void
+gst_d3d11_decoder_enable_high_precision_timer (GstD3D11Decoder * self)
+{
+#if HAVE_WINMM
+ GstD3D11DeviceVendor vendor;
+
+ if (self->timer_resolution)
+ return;
+
+ vendor = gst_d3d11_get_device_vendor (self->device);
+ /* Do this only for NVIDIA at the moment, other vendors doesn't seem to be
+ * requiring retry for BeginFrame() */
+ if (vendor == GST_D3D11_DEVICE_VENDOR_NVIDIA) {
+ TIMECAPS time_caps;
+ if (timeGetDevCaps (&time_caps, sizeof (TIMECAPS)) == TIMERR_NOERROR) {
+ guint resolution;
+ MMRESULT ret;
+
+ resolution = MIN (MAX (time_caps.wPeriodMin, 1), time_caps.wPeriodMax);
+
+ ret = timeBeginPeriod (resolution);
+ if (ret == TIMERR_NOERROR) {
+ self->timer_resolution = resolution;
+ GST_INFO_OBJECT (self, "Updated timer resolution to %d", resolution);
+ }
+ }
+ }
+#endif
+}
+
static gboolean
gst_d3d11_decoder_open (GstD3D11Decoder * self)
{
@@ -826,6 +879,8 @@ gst_d3d11_decoder_open (GstD3D11Decoder * self)
self->opened = TRUE;
gst_d3d11_device_unlock (self->device);
+ gst_d3d11_decoder_enable_high_precision_timer (self);
+
return TRUE;
error:
@@ -843,26 +898,27 @@ gst_d3d11_decoder_begin_frame (GstD3D11Decoder * decoder,
ID3D11VideoContext *video_context;
guint retry_count = 0;
HRESULT hr;
+ guint retry_threshold = 100;
+
+ /* if we have high resolution timer, do more retry */
+ if (decoder->timer_resolution)
+ retry_threshold = 500;
g_return_val_if_fail (GST_IS_D3D11_DECODER (decoder), FALSE);
g_return_val_if_fail (output_view != NULL, FALSE);
video_context = decoder->video_context;
+ gst_d3d11_device_lock (decoder->device);
do {
GST_LOG_OBJECT (decoder, "Try begin frame, retry count %d", retry_count);
- gst_d3d11_device_lock (decoder->device);
hr = video_context->DecoderBeginFrame (decoder->decoder_handle,
output_view, content_key_size, content_key);
- gst_d3d11_device_unlock (decoder->device);
- /* HACK: Do 100 times retry with 1ms sleep per failure, since DXVA/D3D11
+ /* HACK: Do retry with 1ms sleep per failure, since DXVA/D3D11
* doesn't provide API for "GPU-IS-READY-TO-DECODE" like signal.
- * In the worst case, we will error out after 100ms.
- * Note that Windows' clock precision is known to be incorrect,
- * so it would be longer than 100ms in reality.
*/
- if (hr == E_PENDING && retry_count < 100) {
+ if (hr == E_PENDING && retry_count < retry_threshold) {
GST_LOG_OBJECT (decoder, "GPU is busy, try again. Retry count %d",
retry_count);
g_usleep (1000);
@@ -874,6 +930,7 @@ gst_d3d11_decoder_begin_frame (GstD3D11Decoder * decoder,
retry_count++;
} while (TRUE);
+ gst_d3d11_device_unlock (decoder->device);
if (!gst_d3d11_result (hr, decoder->device)) {
GST_ERROR_OBJECT (decoder, "Failed to begin frame, hr: 0x%x", (guint) hr);
diff --git a/sys/d3d11/meson.build b/sys/d3d11/meson.build
index 5f3989c6a..a75fbb9ea 100644
--- a/sys/d3d11/meson.build
+++ b/sys/d3d11/meson.build
@@ -44,6 +44,8 @@ endif
d3dcompiler_lib = cc.find_library('d3dcompiler', required: d3d11_option)
runtimeobject_lib = cc.find_library('runtimeobject', required : false)
+winmm_lib = cc.find_library('winmm', required: false)
+has_decoder = false
have_d3d11 = cc.has_header('d3dcompiler.h')
if not have_d3d11
@@ -58,6 +60,7 @@ if cc.has_header('dxva.h') and cc.has_header('d3d9.h')
d3d11_sources += d3d11_dec_sources
extra_c_args += ['-DHAVE_DXVA_H']
extra_dep += [gstcodecs_dep]
+ has_decoder = true
endif
if d3d11_winapi_only_app and (not d3dcompiler_lib.found() or not runtimeobject_lib.found())
@@ -84,6 +87,11 @@ if d3d11_winapi_desktop
d3d11_sources += ['gstd3d11desktopdup.cpp', 'gstd3d11desktopdupsrc.cpp']
message('Enable D3D11 Desktop Duplication API')
endif
+ # multimedia clock is desktop only API
+ if has_decoder and winmm_lib.found() and cc.has_header('timeapi.h')
+ extra_cpp_args += ['-DHAVE_WINMM']
+ extra_dep += [winmm_lib]
+ endif
endif
# need dxgi1_5.h for HDR10 processing and d3d11_4.h for ID3D11VideoContext2 interface