summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915')
-rw-r--r--drivers/gpu/drm/i915/display/intel_display.c11
-rw-r--r--drivers/gpu/drm/i915/display/intel_display.h1
-rw-r--r--drivers/gpu/drm/i915/display/intel_fbc.c65
-rw-r--r--drivers/gpu/drm/i915/display/intel_hdmi.c10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.c12
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c19
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rps.c8
-rw-r--r--drivers/gpu/drm/i915/gt/shaders/README46
-rw-r--r--drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm119
-rw-r--r--drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm117
-rw-r--r--drivers/gpu/drm/i915/gvt/debugfs.c2
-rw-r--r--drivers/gpu/drm/i915/gvt/handlers.c24
-rw-r--r--drivers/gpu/drm/i915/gvt/mmio_context.h6
-rw-r--r--drivers/gpu/drm/i915/gvt/reg.h5
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c2
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h7
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c1
-rw-r--r--drivers/gpu/drm/i915/i915_vma.c15
18 files changed, 381 insertions, 89 deletions
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 9ea1a397d1b5..26996e1839e2 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -3822,6 +3822,17 @@ skl_check_main_ccs_coordinates(struct intel_plane_state *plane_state,
return true;
}
+unsigned int
+intel_plane_fence_y_offset(const struct intel_plane_state *plane_state)
+{
+ int x = 0, y = 0;
+
+ intel_plane_adjust_aligned_offset(&x, &y, plane_state, 0,
+ plane_state->color_plane[0].offset, 0);
+
+ return y;
+}
+
static int skl_check_main_surface(struct intel_plane_state *plane_state)
{
struct drm_i915_private *dev_priv = to_i915(plane_state->uapi.plane->dev);
diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h
index efb4da205ea2..3a06f72c9859 100644
--- a/drivers/gpu/drm/i915/display/intel_display.h
+++ b/drivers/gpu/drm/i915/display/intel_display.h
@@ -608,6 +608,7 @@ unsigned int i9xx_plane_max_stride(struct intel_plane *plane,
u32 pixel_format, u64 modifier,
unsigned int rotation);
int bdw_get_pipemisc_bpp(struct intel_crtc *crtc);
+unsigned int intel_plane_fence_y_offset(const struct intel_plane_state *plane_state);
struct intel_display_error_state *
intel_display_capture_error_state(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c
index 1c26673acb2d..412572f88b67 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -48,19 +48,6 @@
#include "intel_frontbuffer.h"
/*
- * In some platforms where the CRTC's x:0/y:0 coordinates doesn't match the
- * frontbuffer's x:0/y:0 coordinates we lie to the hardware about the plane's
- * origin so the x and y offsets can actually fit the registers. As a
- * consequence, the fence doesn't really start exactly at the display plane
- * address we program because it starts at the real start of the buffer, so we
- * have to take this into consideration here.
- */
-static unsigned int get_crtc_fence_y_offset(struct intel_fbc *fbc)
-{
- return fbc->state_cache.plane.y - fbc->state_cache.plane.adjusted_y;
-}
-
-/*
* For SKL+, the plane source size used by the hardware is based on the value we
* write to the PLANE_SIZE register. For BDW-, the hardware looks at the value
* we wrote to PIPESRC.
@@ -141,7 +128,7 @@ static void i8xx_fbc_activate(struct drm_i915_private *dev_priv)
fbc_ctl2 |= FBC_CTL_CPU_FENCE;
intel_de_write(dev_priv, FBC_CONTROL2, fbc_ctl2);
intel_de_write(dev_priv, FBC_FENCE_OFF,
- params->crtc.fence_y_offset);
+ params->fence_y_offset);
}
/* enable it... */
@@ -175,7 +162,7 @@ static void g4x_fbc_activate(struct drm_i915_private *dev_priv)
if (params->fence_id >= 0) {
dpfc_ctl |= DPFC_CTL_FENCE_EN | params->fence_id;
intel_de_write(dev_priv, DPFC_FENCE_YOFF,
- params->crtc.fence_y_offset);
+ params->fence_y_offset);
} else {
intel_de_write(dev_priv, DPFC_FENCE_YOFF, 0);
}
@@ -243,7 +230,7 @@ static void ilk_fbc_activate(struct drm_i915_private *dev_priv)
intel_de_write(dev_priv, SNB_DPFC_CTL_SA,
SNB_CPU_FENCE_ENABLE | params->fence_id);
intel_de_write(dev_priv, DPFC_CPU_FENCE_OFFSET,
- params->crtc.fence_y_offset);
+ params->fence_y_offset);
}
} else {
if (IS_GEN(dev_priv, 6)) {
@@ -253,7 +240,7 @@ static void ilk_fbc_activate(struct drm_i915_private *dev_priv)
}
intel_de_write(dev_priv, ILK_DPFC_FENCE_YOFF,
- params->crtc.fence_y_offset);
+ params->fence_y_offset);
/* enable it... */
intel_de_write(dev_priv, ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN);
@@ -320,7 +307,7 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv)
intel_de_write(dev_priv, SNB_DPFC_CTL_SA,
SNB_CPU_FENCE_ENABLE | params->fence_id);
intel_de_write(dev_priv, DPFC_CPU_FENCE_OFFSET,
- params->crtc.fence_y_offset);
+ params->fence_y_offset);
} else if (dev_priv->ggtt.num_fences) {
intel_de_write(dev_priv, SNB_DPFC_CTL_SA, 0);
intel_de_write(dev_priv, DPFC_CPU_FENCE_OFFSET, 0);
@@ -631,8 +618,8 @@ static bool rotation_is_valid(struct drm_i915_private *dev_priv,
/*
* For some reason, the hardware tracking starts looking at whatever we
* programmed as the display plane base address register. It does not look at
- * the X and Y offset registers. That's why we look at the crtc->adjusted{x,y}
- * variables instead of just looking at the pipe/plane size.
+ * the X and Y offset registers. That's why we include the src x/y offsets
+ * instead of just looking at the plane size.
*/
static bool intel_fbc_hw_tracking_covers_screen(struct intel_crtc *crtc)
{
@@ -705,7 +692,6 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc,
cache->plane.src_h = drm_rect_height(&plane_state->uapi.src) >> 16;
cache->plane.adjusted_x = plane_state->color_plane[0].x;
cache->plane.adjusted_y = plane_state->color_plane[0].y;
- cache->plane.y = plane_state->uapi.src.y1 >> 16;
cache->plane.pixel_blend_mode = plane_state->hw.pixel_blend_mode;
@@ -713,6 +699,8 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc,
cache->fb.stride = fb->pitches[0];
cache->fb.modifier = fb->modifier;
+ cache->fence_y_offset = intel_plane_fence_y_offset(plane_state);
+
drm_WARN_ON(&dev_priv->drm, plane_state->flags & PLANE_HAS_FENCE &&
!plane_state->vma->fence);
@@ -731,6 +719,25 @@ static bool intel_fbc_cfb_size_changed(struct drm_i915_private *dev_priv)
fbc->compressed_fb.size * fbc->threshold;
}
+static u16 intel_fbc_gen9_wa_cfb_stride(struct drm_i915_private *dev_priv)
+{
+ struct intel_fbc *fbc = &dev_priv->fbc;
+ struct intel_fbc_state_cache *cache = &fbc->state_cache;
+
+ if ((IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) &&
+ cache->fb.modifier != I915_FORMAT_MOD_X_TILED)
+ return DIV_ROUND_UP(cache->plane.src_w, 32 * fbc->threshold) * 8;
+ else
+ return 0;
+}
+
+static bool intel_fbc_gen9_wa_cfb_stride_changed(struct drm_i915_private *dev_priv)
+{
+ struct intel_fbc *fbc = &dev_priv->fbc;
+
+ return fbc->params.gen9_wa_cfb_stride != intel_fbc_gen9_wa_cfb_stride(dev_priv);
+}
+
static bool intel_fbc_can_enable(struct drm_i915_private *dev_priv)
{
struct intel_fbc *fbc = &dev_priv->fbc;
@@ -883,12 +890,13 @@ static void intel_fbc_get_reg_params(struct intel_crtc *crtc,
memset(params, 0, sizeof(*params));
params->fence_id = cache->fence_id;
+ params->fence_y_offset = cache->fence_y_offset;
params->crtc.pipe = crtc->pipe;
params->crtc.i9xx_plane = to_intel_plane(crtc->base.primary)->i9xx_plane;
- params->crtc.fence_y_offset = get_crtc_fence_y_offset(fbc);
params->fb.format = cache->fb.format;
+ params->fb.modifier = cache->fb.modifier;
params->fb.stride = cache->fb.stride;
params->cfb_size = intel_fbc_calculate_cfb_size(dev_priv, cache);
@@ -918,6 +926,9 @@ static bool intel_fbc_can_flip_nuke(const struct intel_crtc_state *crtc_state)
if (params->fb.format != cache->fb.format)
return false;
+ if (params->fb.modifier != cache->fb.modifier)
+ return false;
+
if (params->fb.stride != cache->fb.stride)
return false;
@@ -1197,7 +1208,8 @@ void intel_fbc_enable(struct intel_atomic_state *state,
if (fbc->crtc) {
if (fbc->crtc != crtc ||
- !intel_fbc_cfb_size_changed(dev_priv))
+ (!intel_fbc_cfb_size_changed(dev_priv) &&
+ !intel_fbc_gen9_wa_cfb_stride_changed(dev_priv)))
goto out;
__intel_fbc_disable(dev_priv);
@@ -1219,12 +1231,7 @@ void intel_fbc_enable(struct intel_atomic_state *state,
goto out;
}
- if ((IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) &&
- plane_state->hw.fb->modifier != I915_FORMAT_MOD_X_TILED)
- cache->gen9_wa_cfb_stride =
- DIV_ROUND_UP(cache->plane.src_w, 32 * fbc->threshold) * 8;
- else
- cache->gen9_wa_cfb_stride = 0;
+ cache->gen9_wa_cfb_stride = intel_fbc_gen9_wa_cfb_stride(dev_priv);
drm_dbg_kms(&dev_priv->drm, "Enabling FBC on pipe %c\n",
pipe_name(crtc->pipe));
diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c
index 010f37240710..95b6d9457910 100644
--- a/drivers/gpu/drm/i915/display/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/display/intel_hdmi.c
@@ -2867,19 +2867,13 @@ intel_hdmi_connector_register(struct drm_connector *connector)
return ret;
}
-static void intel_hdmi_destroy(struct drm_connector *connector)
+static void intel_hdmi_connector_unregister(struct drm_connector *connector)
{
struct cec_notifier *n = intel_attached_hdmi(to_intel_connector(connector))->cec_notifier;
cec_notifier_conn_unregister(n);
- intel_connector_destroy(connector);
-}
-
-static void intel_hdmi_connector_unregister(struct drm_connector *connector)
-{
intel_hdmi_remove_i2c_symlink(connector);
-
intel_connector_unregister(connector);
}
@@ -2891,7 +2885,7 @@ static const struct drm_connector_funcs intel_hdmi_connector_funcs = {
.atomic_set_property = intel_digital_connector_atomic_set_property,
.late_register = intel_hdmi_connector_register,
.early_unregister = intel_hdmi_connector_unregister,
- .destroy = intel_hdmi_destroy,
+ .destroy = intel_connector_destroy,
.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
.atomic_duplicate_state = intel_digital_connector_duplicate_state,
};
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index e4aece20bc80..52db2bde44a3 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -204,25 +204,25 @@ static int __ring_active(struct intel_ring *ring)
{
int err;
- err = i915_active_acquire(&ring->vma->active);
+ err = intel_ring_pin(ring);
if (err)
return err;
- err = intel_ring_pin(ring);
+ err = i915_active_acquire(&ring->vma->active);
if (err)
- goto err_active;
+ goto err_pin;
return 0;
-err_active:
- i915_active_release(&ring->vma->active);
+err_pin:
+ intel_ring_unpin(ring);
return err;
}
static void __ring_retire(struct intel_ring *ring)
{
- intel_ring_unpin(ring);
i915_active_release(&ring->vma->active);
+ intel_ring_unpin(ring);
}
__i915_active_call
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 7c3d8ef4a47c..cb07e1d2a353 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -5396,13 +5396,8 @@ static void virtual_engine_initial_hint(struct virtual_engine *ve)
* typically be the first we inspect for submission.
*/
swp = prandom_u32_max(ve->num_siblings);
- if (!swp)
- return;
-
- swap(ve->siblings[swp], ve->siblings[0]);
- if (!intel_engine_has_relative_mmio(ve->siblings[0]))
- virtual_update_register_offsets(ve->context.lrc_reg_state,
- ve->siblings[0]);
+ if (swp)
+ swap(ve->siblings[swp], ve->siblings[0]);
}
static int virtual_context_alloc(struct intel_context *ce)
@@ -5415,15 +5410,9 @@ static int virtual_context_alloc(struct intel_context *ce)
static int virtual_context_pin(struct intel_context *ce)
{
struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
- int err;
/* Note: we must use a real engine class for setting up reg state */
- err = __execlists_context_pin(ce, ve->siblings[0]);
- if (err)
- return err;
-
- virtual_engine_initial_hint(ve);
- return 0;
+ return __execlists_context_pin(ce, ve->siblings[0]);
}
static void virtual_context_enter(struct intel_context *ce)
@@ -5688,6 +5677,7 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
intel_engine_init_breadcrumbs(&ve->base);
intel_engine_init_execlists(&ve->base);
+ ve->base.breadcrumbs.irq_armed = true; /* fake HW, used for irq_work */
ve->base.cops = &virtual_context_ops;
ve->base.request_alloc = execlists_request_alloc;
@@ -5769,6 +5759,7 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
+ virtual_engine_initial_hint(ve);
return &ve->context;
err_put:
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
index 5049c3dd08a6..c91981e75ebf 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -44,9 +44,9 @@ static int cmp_u64(const void *A, const void *B)
{
const u64 *a = A, *b = B;
- if (a < b)
+ if (*a < *b)
return -1;
- else if (a > b)
+ else if (*a > *b)
return 1;
else
return 0;
@@ -56,9 +56,9 @@ static int cmp_u32(const void *A, const void *B)
{
const u32 *a = A, *b = B;
- if (a < b)
+ if (*a < *b)
return -1;
- else if (a > b)
+ else if (*a > *b)
return 1;
else
return 0;
diff --git a/drivers/gpu/drm/i915/gt/shaders/README b/drivers/gpu/drm/i915/gt/shaders/README
new file mode 100644
index 000000000000..e7e96d7073c7
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/shaders/README
@@ -0,0 +1,46 @@
+ASM sources for auto generated shaders
+======================================
+
+The i915/gt/hsw_clear_kernel.c and i915/gt/ivb_clear_kernel.c files contain
+pre-compiled batch chunks that will clear any residual render cache during
+context switch.
+
+They are generated from their respective platform ASM files present on
+i915/gt/shaders/clear_kernel directory.
+
+The generated .c files should never be modified directly. Instead, any modification
+needs to be done on the on their respective ASM files and build instructions below
+needes to be followed.
+
+Building
+========
+
+Environment
+-----------
+
+IGT GPU tool scripts and the Mesa's i965 instruction assembler tool are used
+on building.
+
+Please make sure your Mesa tool is compiled with "-Dtools=intel" and
+"-Ddri-drivers=i965", and run this script from IGT source root directory"
+
+The instructions bellow assume:
+ * IGT gpu tools source code is located on your home directory (~) as ~/igt
+ * Mesa source code is located on your home directory (~) as ~/mesa
+ and built under the ~/mesa/build directory
+ * Linux kernel source code is under your home directory (~) as ~/linux
+
+Instructions
+------------
+
+~ $ cp ~/linux/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm \
+ ~/igt/lib/i915/shaders/clear_kernel/ivb.asm
+~ $ cd ~/igt
+igt $ ./scripts/generate_clear_kernel.sh -g ivb \
+ -m ~/mesa/build/src/intel/tools/i965_asm
+
+~ $ cp ~/linux/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm \
+ ~/igt/lib/i915/shaders/clear_kernel/hsw.asm
+~ $ cd ~/igt
+igt $ ./scripts/generate_clear_kernel.sh -g hsw \
+ -m ~/mesa/build/src/intel/tools/i965_asm \ No newline at end of file
diff --git a/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm
new file mode 100644
index 000000000000..5fdf384bb621
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+/*
+ * Kernel for PAVP buffer clear.
+ *
+ * 1. Clear all 64 GRF registers assigned to the kernel with designated value;
+ * 2. Write 32x16 block of all "0" to render target buffer which indirectly clears
+ * 512 bytes of Render Cache.
+ */
+
+/* Store designated "clear GRF" value */
+mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N };
+
+/**
+ * Curbe Format
+ *
+ * DW 1.0 - Block Offset to write Render Cache
+ * DW 1.1 [15:0] - Clear Word
+ * DW 1.2 - Delay iterations
+ * DW 1.3 - Enable Instrumentation (only for debug)
+ * DW 1.4 - Rsvd (intended for context ID)
+ * DW 1.5 - [31:16]:SliceCount, [15:0]:SubSlicePerSliceCount
+ * DW 1.6 - Rsvd MBZ (intended for Enable Wait on Total Thread Count)
+ * DW 1.7 - Rsvd MBZ (inteded for Total Thread Count)
+ *
+ * Binding Table
+ *
+ * BTI 0: 2D Surface to help clear L3 (Render/Data Cache)
+ * BTI 1: Wait/Instrumentation Buffer
+ * Size : (SliceCount * SubSliceCount * 16 EUs/SubSlice) rows * (16 threads/EU) cols (Format R32_UINT)
+ * Expected to be initialized to 0 by driver/another kernel
+ * Layout:
+ * RowN: Histogram for EU-N: (SliceID*SubSlicePerSliceCount + SSID)*16 + EUID [assume max 16 EUs / SS]
+ * Col-k[DW-k]: Threads Executed on ThreadID-k for EU-N
+ */
+add(1) g1.2<1>UD g1.2<0,1,0>UD 0x00000001UD { align1 1N }; /* Loop count to delay kernel: Init to (g1.2 + 1) */
+cmp.z.f0.0(1) null<1>UD g1.3<0,1,0>UD 0x00000000UD { align1 1N };
+(+f0.0) jmpi(1) 352D { align1 WE_all 1N };
+
+/**
+ * State Register has info on where this thread is running
+ * IVB: sr0.0 :: [15:13]: MBZ, 12: HSID (Half-Slice ID), [11:8]EUID, [2:0] ThreadSlotID
+ * HSW: sr0.0 :: 15: MBZ, [14:13]: SliceID, 12: HSID (Half-Slice ID), [11:8]EUID, [2:0] ThreadSlotID
+ */
+mov(8) g3<1>UD 0x00000000UD { align1 1Q };
+shr(1) g3<1>D sr0<0,1,0>D 12D { align1 1N };
+and(1) g3<1>D g3<0,1,0>D 1D { align1 1N }; /* g3 has HSID */
+shr(1) g3.1<1>D sr0<0,1,0>D 13D { align1 1N };
+and(1) g3.1<1>D g3.1<0,1,0>D 3D { align1 1N }; /* g3.1 has sliceID */
+mul(1) g3.5<1>D g3.1<0,1,0>D g1.10<0,1,0>UW { align1 1N };
+add(1) g3<1>D g3<0,1,0>D g3.5<0,1,0>D { align1 1N }; /* g3 = sliceID * SubSlicePerSliceCount + HSID */
+shr(1) g3.2<1>D sr0<0,1,0>D 8D { align1 1N };
+and(1) g3.2<1>D g3.2<0,1,0>D 15D { align1 1N }; /* g3.2 = EUID */
+mul(1) g3.4<1>D g3<0,1,0>D 16D { align1 1N };
+add(1) g3.2<1>D g3.2<0,1,0>D g3.4<0,1,0>D { align1 1N }; /* g3.2 now points to EU row number (Y-pixel = V address ) in instrumentation surf */
+
+mov(8) g5<1>UD 0x00000000UD { align1 1Q };
+and(1) g3.3<1>D sr0<0,1,0>D 7D { align1 1N };
+mul(1) g3.3<1>D g3.3<0,1,0>D 4D { align1 1N };
+
+mov(8) g4<1>UD g0<8,8,1>UD { align1 1Q }; /* Initialize message header with g0 */
+mov(1) g4<1>UD g3.3<0,1,0>UD { align1 1N }; /* Block offset */
+mov(1) g4.1<1>UD g3.2<0,1,0>UD { align1 1N }; /* Block offset */
+mov(1) g4.2<1>UD 0x00000003UD { align1 1N }; /* Block size (1 row x 4 bytes) */
+and(1) g4.3<1>UD g4.3<0,1,0>UW 0xffffffffUD { align1 1N };
+
+/* Media block read to fetch current value at specified location in instrumentation buffer */
+sendc(8) g5<1>UD g4<8,8,1>F 0x02190001
+
+ render MsgDesc: media block read MsgCtrl = 0x0 Surface = 1 mlen 1 rlen 1 { align1 1Q };
+add(1) g5<1>D g5<0,1,0>D 1D { align1 1N };
+
+/* Media block write for updated value at specified location in instrumentation buffer */
+sendc(8) g5<1>UD g4<8,8,1>F 0x040a8001
+ render MsgDesc: media block write MsgCtrl = 0x0 Surface = 1 mlen 2 rlen 0 { align1 1Q };
+
+/* Delay thread for specified parameter */
+add.nz.f0.0(1) g1.2<1>UD g1.2<0,1,0>UD -1D { align1 1N };
+(+f0.0) jmpi(1) -32D { align1 WE_all 1N };
+
+/* Store designated "clear GRF" value */
+mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N };
+
+/* Initialize looping parameters */
+mov(1) a0<1>D 0D { align1 1N }; /* Initialize a0.0:w=0 */
+mov(1) a0.4<1>W 127W { align1 1N }; /* Loop count. Each loop contains 16 GRF's */
+
+/* Write 32x16 all "0" block */
+mov(8) g2<1>UD g0<8,8,1>UD { align1 1Q };
+mov(8) g127<1>UD g0<8,8,1>UD { align1 1Q };
+mov(2) g2<1>UD g1<2,2,1>UW { align1 1N };
+mov(1) g2.2<1>UD 0x000f000fUD { align1 1N }; /* Block size (16x16) */
+and(1) g2.3<1>UD g2.3<0,1,0>UW 0xffffffefUD { align1 1N };
+mov(16) g3<1>UD 0x00000000UD { align1 1H };
+mov(16) g4<1>UD 0x00000000UD { align1 1H };
+mov(16) g5<1>UD 0x00000000UD { align1 1H };
+mov(16) g6<1>UD 0x00000000UD { align1 1H };
+mov(16) g7<1>UD 0x00000000UD { align1 1H };
+mov(16) g8<1>UD 0x00000000UD { align1 1H };
+mov(16) g9<1>UD 0x00000000UD { align1 1H };
+mov(16) g10<1>UD 0x00000000UD { align1 1H };
+sendc(8) null<1>UD g2<8,8,1>F 0x120a8000
+ render MsgDesc: media block write MsgCtrl = 0x0 Surface = 0 mlen 9 rlen 0 { align1 1Q };
+add(1) g2<1>UD g1<0,1,0>UW 0x0010UW { align1 1N };
+sendc(8) null<1>UD g2<8,8,1>F 0x120a8000
+ render MsgDesc: media block write MsgCtrl = 0x0 Surface = 0 mlen 9 rlen 0 { align1 1Q };
+
+/* Now, clear all GRF registers */
+add.nz.f0.0(1) a0.4<1>W a0.4<0,1,0>W -1W { align1 1N };
+mov(16) g[a0]<1>UW f0.1<0,1,0>UW { align1 1H };
+add(1) a0<1>D a0<0,1,0>D 32D { align1 1N };
+(+f0.0) jmpi(1) -64D { align1 WE_all 1N };
+
+/* Terminante the thread */
+sendc(8) null<1>UD g127<8,8,1>F 0x82000010
+ thread_spawner MsgDesc: mlen 1 rlen 0 { align1 1Q EOT };
diff --git a/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm
new file mode 100644
index 000000000000..97c7ac9e3854
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+/*
+ * Kernel for PAVP buffer clear.
+ *
+ * 1. Clear all 64 GRF registers assigned to the kernel with designated value;
+ * 2. Write 32x16 block of all "0" to render target buffer which indirectly clears
+ * 512 bytes of Render Cache.
+ */
+
+/* Store designated "clear GRF" value */
+mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N };
+
+/**
+ * Curbe Format
+ *
+ * DW 1.0 - Block Offset to write Render Cache
+ * DW 1.1 [15:0] - Clear Word
+ * DW 1.2 - Delay iterations
+ * DW 1.3 - Enable Instrumentation (only for debug)
+ * DW 1.4 - Rsvd (intended for context ID)
+ * DW 1.5 - [31:16]:SliceCount, [15:0]:SubSlicePerSliceCount
+ * DW 1.6 - Rsvd MBZ (intended for Enable Wait on Total Thread Count)
+ * DW 1.7 - Rsvd MBZ (inteded for Total Thread Count)
+ *
+ * Binding Table
+ *
+ * BTI 0: 2D Surface to help clear L3 (Render/Data Cache)
+ * BTI 1: Wait/Instrumentation Buffer
+ * Size : (SliceCount * SubSliceCount * 16 EUs/SubSlice) rows * (16 threads/EU) cols (Format R32_UINT)
+ * Expected to be initialized to 0 by driver/another kernel
+ * Layout :
+ * RowN: Histogram for EU-N: (SliceID*SubSlicePerSliceCount + SSID)*16 + EUID [assume max 16 EUs / SS]
+ * Col-k[DW-k]: Threads Executed on ThreadID-k for EU-N
+ */
+add(1) g1.2<1>UD g1.2<0,1,0>UD 0x00000001UD { align1 1N }; /* Loop count to delay kernel: Init to (g1.2 + 1) */
+cmp.z.f0.0(1) null<1>UD g1.3<0,1,0>UD 0x00000000UD { align1 1N };
+(+f0.0) jmpi(1) 44D { align1 WE_all 1N };
+
+/**
+ * State Register has info on where this thread is running
+ * IVB: sr0.0 :: [15:13]: MBZ, 12: HSID (Half-Slice ID), [11:8]EUID, [2:0] ThreadSlotID
+ * HSW: sr0.0 :: 15: MBZ, [14:13]: SliceID, 12: HSID (Half-Slice ID), [11:8]EUID, [2:0] ThreadSlotID
+ */
+mov(8) g3<1>UD 0x00000000UD { align1 1Q };
+shr(1) g3<1>D sr0<0,1,0>D 12D { align1 1N };
+and(1) g3<1>D g3<0,1,0>D 1D { align1 1N }; /* g3 has HSID */
+shr(1) g3.1<1>D sr0<0,1,0>D 13D { align1 1N };
+and(1) g3.1<1>D g3.1<0,1,0>D 3D { align1 1N }; /* g3.1 has sliceID */
+mul(1) g3.5<1>D g3.1<0,1,0>D g1.10<0,1,0>UW { align1 1N };
+add(1) g3<1>D g3<0,1,0>D g3.5<0,1,0>D { align1 1N }; /* g3 = sliceID * SubSlicePerSliceCount + HSID */
+shr(1) g3.2<1>D sr0<0,1,0>D 8D { align1 1N };
+and(1) g3.2<1>D g3.2<0,1,0>D 15D { align1 1N }; /* g3.2 = EUID */
+mul(1) g3.4<1>D g3<0,1,0>D 16D { align1 1N };
+add(1) g3.2<1>D g3.2<0,1,0>D g3.4<0,1,0>D { align1 1N }; /* g3.2 now points to EU row number (Y-pixel = V address ) in instrumentation surf */
+
+mov(8) g5<1>UD 0x00000000UD { align1 1Q };
+and(1) g3.3<1>D sr0<0,1,0>D 7D { align1 1N };
+mul(1) g3.3<1>D g3.3<0,1,0>D 4D { align1 1N };
+
+mov(8) g4<1>UD g0<8,8,1>UD { align1 1Q }; /* Initialize message header with g0 */
+mov(1) g4<1>UD g3.3<0,1,0>UD { align1 1N }; /* Block offset */
+mov(1) g4.1<1>UD g3.2<0,1,0>UD { align1 1N }; /* Block offset */
+mov(1) g4.2<1>UD 0x00000003UD { align1 1N }; /* Block size (1 row x 4 bytes) */
+and(1) g4.3<1>UD g4.3<0,1,0>UW 0xffffffffUD { align1 1N };
+
+/* Media block read to fetch current value at specified location in instrumentation buffer */
+sendc(8) g5<1>UD g4<8,8,1>F 0x02190001
+ render MsgDesc: media block read MsgCtrl = 0x0 Surface = 1 mlen 1 rlen 1 { align1 1Q };
+add(1) g5<1>D g5<0,1,0>D 1D { align1 1N };
+
+/* Media block write for updated value at specified location in instrumentation buffer */
+sendc(8) g5<1>UD g4<8,8,1>F 0x040a8001
+ render MsgDesc: media block write MsgCtrl = 0x0 Surface = 1 mlen 2 rlen 0 { align1 1Q };
+/* Delay thread for specified parameter */
+add.nz.f0.0(1) g1.2<1>UD g1.2<0,1,0>UD -1D { align1 1N };
+(+f0.0) jmpi(1) -4D { align1 WE_all 1N };
+
+/* Store designated "clear GRF" value */
+mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N };
+
+/* Initialize looping parameters */
+mov(1) a0<1>D 0D { align1 1N }; /* Initialize a0.0:w=0 */
+mov(1) a0.4<1>W 127W { align1 1N }; /* Loop count. Each loop contains 16 GRF's */
+
+/* Write 32x16 all "0" block */
+mov(8) g2<1>UD g0<8,8,1>UD { align1 1Q };
+mov(8) g127<1>UD g0<8,8,1>UD { align1 1Q };
+mov(2) g2<1>UD g1<2,2,1>UW { align1 1N };
+mov(1) g2.2<1>UD 0x000f000fUD { align1 1N }; /* Block size (16x16) */
+and(1) g2.3<1>UD g2.3<0,1,0>UW 0xffffffefUD { align1 1N };
+mov(16) g3<1>UD 0x00000000UD { align1 1H };
+mov(16) g4<1>UD 0x00000000UD { align1 1H };
+mov(16) g5<1>UD 0x00000000UD { align1 1H };
+mov(16) g6<1>UD 0x00000000UD { align1 1H };
+mov(16) g7<1>UD 0x00000000UD { align1 1H };
+mov(16) g8<1>UD 0x00000000UD { align1 1H };
+mov(16) g9<1>UD 0x00000000UD { align1 1H };
+mov(16) g10<1>UD 0x00000000UD { align1 1H };
+sendc(8) null<1>UD g2<8,8,1>F 0x120a8000
+ render MsgDesc: media block write MsgCtrl = 0x0 Surface = 0 mlen 9 rlen 0 { align1 1Q };
+add(1) g2<1>UD g1<0,1,0>UW 0x0010UW { align1 1N };
+sendc(8) null<1>UD g2<8,8,1>F 0x120a8000
+ render MsgDesc: media block write MsgCtrl = 0x0 Surface = 0 mlen 9 rlen 0 { align1 1Q };
+
+/* Now, clear all GRF registers */
+add.nz.f0.0(1) a0.4<1>W a0.4<0,1,0>W -1W { align1 1N };
+mov(16) g[a0]<1>UW f0.1<0,1,0>UW { align1 1H };
+add(1) a0<1>D a0<0,1,0>D 32D { align1 1N };
+(+f0.0) jmpi(1) -8D { align1 WE_all 1N };
+
+/* Terminante the thread */
+sendc(8) null<1>UD g127<8,8,1>F 0x82000010
+ thread_spawner MsgDesc: mlen 1 rlen 0 { align1 1Q EOT };
diff --git a/drivers/gpu/drm/i915/gvt/debugfs.c b/drivers/gpu/drm/i915/gvt/debugfs.c
index ec47d4114554..62e6a14ad58e 100644
--- a/drivers/gpu/drm/i915/gvt/debugfs.c
+++ b/drivers/gpu/drm/i915/gvt/debugfs.c
@@ -66,7 +66,7 @@ static inline int mmio_diff_handler(struct intel_gvt *gvt,
vreg = vgpu_vreg(param->vgpu, offset);
if (preg != vreg) {
- node = kmalloc(sizeof(*node), GFP_KERNEL);
+ node = kmalloc(sizeof(*node), GFP_ATOMIC);
if (!node)
return -ENOMEM;
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 3e88e3b5c43a..fadd2adb8030 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -1726,13 +1726,13 @@ static int ring_mode_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
(*(u32 *)p_data) &= ~_MASKED_BIT_ENABLE(2);
write_vreg(vgpu, offset, p_data, bytes);
- if (data & _MASKED_BIT_ENABLE(1)) {
+ if (IS_MASKED_BITS_ENABLED(data, 1)) {
enter_failsafe_mode(vgpu, GVT_FAILSAFE_UNSUPPORTED_GUEST);
return 0;
}
if (IS_COFFEELAKE(vgpu->gvt->gt->i915) &&
- data & _MASKED_BIT_ENABLE(2)) {
+ IS_MASKED_BITS_ENABLED(data, 2)) {
enter_failsafe_mode(vgpu, GVT_FAILSAFE_UNSUPPORTED_GUEST);
return 0;
}
@@ -1741,14 +1741,14 @@ static int ring_mode_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
* pvinfo, if not, we will treat this guest as non-gvtg-aware
* guest, and stop emulating its cfg space, mmio, gtt, etc.
*/
- if (((data & _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)) ||
- (data & _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)))
- && !vgpu->pv_notified) {
+ if ((IS_MASKED_BITS_ENABLED(data, GFX_PPGTT_ENABLE) ||
+ IS_MASKED_BITS_ENABLED(data, GFX_RUN_LIST_ENABLE)) &&
+ !vgpu->pv_notified) {
enter_failsafe_mode(vgpu, GVT_FAILSAFE_UNSUPPORTED_GUEST);
return 0;
}
- if ((data & _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE))
- || (data & _MASKED_BIT_DISABLE(GFX_RUN_LIST_ENABLE))) {
+ if (IS_MASKED_BITS_ENABLED(data, GFX_RUN_LIST_ENABLE) ||
+ IS_MASKED_BITS_DISABLED(data, GFX_RUN_LIST_ENABLE)) {
enable_execlist = !!(data & GFX_RUN_LIST_ENABLE);
gvt_dbg_core("EXECLIST %s on ring %s\n",
@@ -1809,7 +1809,7 @@ static int ring_reset_ctl_write(struct intel_vgpu *vgpu,
write_vreg(vgpu, offset, p_data, bytes);
data = vgpu_vreg(vgpu, offset);
- if (data & _MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET))
+ if (IS_MASKED_BITS_ENABLED(data, RESET_CTL_REQUEST_RESET))
data |= RESET_CTL_READY_TO_RESET;
else if (data & _MASKED_BIT_DISABLE(RESET_CTL_REQUEST_RESET))
data &= ~RESET_CTL_READY_TO_RESET;
@@ -1827,7 +1827,8 @@ static int csfe_chicken1_mmio_write(struct intel_vgpu *vgpu,
(*(u32 *)p_data) &= ~_MASKED_BIT_ENABLE(0x18);
write_vreg(vgpu, offset, p_data, bytes);
- if (data & _MASKED_BIT_ENABLE(0x10) || data & _MASKED_BIT_ENABLE(0x8))
+ if (IS_MASKED_BITS_ENABLED(data, 0x10) ||
+ IS_MASKED_BITS_ENABLED(data, 0x8))
enter_failsafe_mode(vgpu, GVT_FAILSAFE_UNSUPPORTED_GUEST);
return 0;
@@ -3055,6 +3056,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
MMIO_D(_MMIO(0x72380), D_SKL_PLUS);
MMIO_D(_MMIO(0x7239c), D_SKL_PLUS);
MMIO_D(_MMIO(_PLANE_SURF_3_A), D_SKL_PLUS);
+ MMIO_D(_MMIO(_PLANE_SURF_3_B), D_SKL_PLUS);
MMIO_D(CSR_SSP_BASE, D_SKL_PLUS);
MMIO_D(CSR_HTP_SKL, D_SKL_PLUS);
@@ -3131,8 +3133,8 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
MMIO_DFH(GEN9_WM_CHICKEN3, D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS,
NULL, NULL);
- MMIO_D(GAMT_CHKN_BIT_REG, D_KBL);
- MMIO_D(GEN9_CTX_PREEMPT_REG, D_KBL | D_SKL);
+ MMIO_D(GAMT_CHKN_BIT_REG, D_KBL | D_CFL);
+ MMIO_D(GEN9_CTX_PREEMPT_REG, D_SKL_PLUS);
return 0;
}
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.h b/drivers/gpu/drm/i915/gvt/mmio_context.h
index 970704b18f23..3b25e7fe32f6 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.h
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.h
@@ -54,8 +54,8 @@ bool is_inhibit_context(struct intel_context *ce);
int intel_vgpu_restore_inhibit_context(struct intel_vgpu *vgpu,
struct i915_request *req);
-#define IS_RESTORE_INHIBIT(a) \
- (_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) == \
- ((a) & _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT)))
+
+#define IS_RESTORE_INHIBIT(a) \
+ IS_MASKED_BITS_ENABLED(a, CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT)
#endif
diff --git a/drivers/gpu/drm/i915/gvt/reg.h b/drivers/gpu/drm/i915/gvt/reg.h
index 5b66e14c5b7b..b88e033cbed4 100644
--- a/drivers/gpu/drm/i915/gvt/reg.h
+++ b/drivers/gpu/drm/i915/gvt/reg.h
@@ -94,6 +94,11 @@
#define GFX_MODE_BIT_SET_IN_MASK(val, bit) \
((((bit) & 0xffff0000) == 0) && !!((val) & (((bit) << 16))))
+#define IS_MASKED_BITS_ENABLED(_val, _b) \
+ (((_val) & _MASKED_BIT_ENABLE(_b)) == _MASKED_BIT_ENABLE(_b))
+#define IS_MASKED_BITS_DISABLED(_val, _b) \
+ ((_val) & _MASKED_BIT_DISABLE(_b))
+
#define FORCEWAKE_RENDER_GEN9_REG 0xa278
#define FORCEWAKE_ACK_RENDER_GEN9_REG 0x0D84
#define FORCEWAKE_BLITTER_GEN9_REG 0xa188
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index bca036ac6621..e7532e7d74e9 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -230,7 +230,7 @@ static int per_file_stats(int id, void *ptr, void *data)
struct file_stats *stats = data;
struct i915_vma *vma;
- if (!kref_get_unless_zero(&obj->base.refcount))
+ if (IS_ERR_OR_NULL(obj) || !kref_get_unless_zero(&obj->base.refcount))
return 0;
stats->count++;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index adb9bf34cf97..ae99a9190200 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -410,8 +410,6 @@ struct intel_fbc {
int adjusted_x;
int adjusted_y;
- int y;
-
u16 pixel_blend_mode;
} plane;
@@ -420,6 +418,8 @@ struct intel_fbc {
unsigned int stride;
u64 modifier;
} fb;
+
+ unsigned int fence_y_offset;
u16 gen9_wa_cfb_stride;
s8 fence_id;
} state_cache;
@@ -435,15 +435,16 @@ struct intel_fbc {
struct {
enum pipe pipe;
enum i9xx_plane_id i9xx_plane;
- unsigned int fence_y_offset;
} crtc;
struct {
const struct drm_format_info *format;
unsigned int stride;
+ u64 modifier;
} fb;
int cfb_size;
+ unsigned int fence_y_offset;
u16 gen9_wa_cfb_stride;
s8 fence_id;
bool plane_visible;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 25329b7600c9..014f34c047d5 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1592,6 +1592,7 @@ static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs,
u32 d;
cmd = save ? MI_STORE_REGISTER_MEM : MI_LOAD_REGISTER_MEM;
+ cmd |= MI_SRM_LRM_GLOBAL_GTT;
if (INTEL_GEN(stream->perf->i915) >= 8)
cmd++;
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index fc14ebf9a0b7..1f9cd33b35cb 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -104,6 +104,7 @@ vma_create(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
const struct i915_ggtt_view *view)
{
+ struct i915_vma *pos = ERR_PTR(-E2BIG);
struct i915_vma *vma;
struct rb_node *rb, **p;
@@ -184,7 +185,6 @@ vma_create(struct drm_i915_gem_object *obj,
rb = NULL;
p = &obj->vma.tree.rb_node;
while (*p) {
- struct i915_vma *pos;
long cmp;
rb = *p;
@@ -196,16 +196,12 @@ vma_create(struct drm_i915_gem_object *obj,
* and dispose of ours.
*/
cmp = i915_vma_compare(pos, vm, view);
- if (cmp == 0) {
- spin_unlock(&obj->vma.lock);
- i915_vma_free(vma);
- return pos;
- }
-
if (cmp < 0)
p = &rb->rb_right;
- else
+ else if (cmp > 0)
p = &rb->rb_left;
+ else
+ goto err_unlock;
}
rb_link_node(&vma->obj_node, rb, p);
rb_insert_color(&vma->obj_node, &obj->vma.tree);
@@ -228,8 +224,9 @@ vma_create(struct drm_i915_gem_object *obj,
err_unlock:
spin_unlock(&obj->vma.lock);
err_vma:
+ i915_vm_put(vm);
i915_vma_free(vma);
- return ERR_PTR(-E2BIG);
+ return pos;
}
static struct i915_vma *