diff options
Diffstat (limited to 'drivers/gpu/drm/i915')
106 files changed, 2486 insertions, 1757 deletions
diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 25cd9788a4d5..72a38f28393f 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -19,6 +19,8 @@ config DRM_I915_WERROR config DRM_I915_DEBUG bool "Enable additional driver debugging" depends on DRM_I915 + depends on EXPERT # only for developers + depends on !COMPILE_TEST # never built by robots select DEBUG_FS select PREEMPT_COUNT select I2C_CHARDEV @@ -31,10 +33,13 @@ config DRM_I915_DEBUG select DRM_DEBUG_SELFTEST select DMABUF_SELFTESTS select SW_SYNC # signaling validation framework (igt/syncobj*) + select DRM_I915_WERROR + select DRM_I915_DEBUG_GEM + select DRM_I915_DEBUG_GEM_ONCE + select DRM_I915_DEBUG_MMIO + select DRM_I915_DEBUG_RUNTIME_PM select DRM_I915_SW_FENCE_DEBUG_OBJECTS select DRM_I915_SELFTEST - select DRM_I915_DEBUG_RUNTIME_PM - select DRM_I915_DEBUG_MMIO default n help Choose this option to turn on extra driver debugging that may affect @@ -69,6 +74,21 @@ config DRM_I915_DEBUG_GEM If in doubt, say "N". +config DRM_I915_DEBUG_GEM_ONCE + bool "Make a GEM debug failure fatal" + default n + depends on DRM_I915_DEBUG_GEM + help + During development, we often only want the very first failure + as that would otherwise be lost in the deluge of subsequent + failures. However, more casual testers may not want to trigger + a hard BUG_ON and hope that the system remains sufficiently usable + to capture a bug report in situ. + + Recommended for driver developers only. + + If in doubt, say "N". + config DRM_I915_ERRLOG_GEM bool "Insert extra logging (very verbose) for common GEM errors" default n diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 8af63b2afd26..921db06232c3 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -135,6 +135,7 @@ gem-y += \ gem/i915_gem_clflush.o \ gem/i915_gem_client_blt.o \ gem/i915_gem_context.o \ + gem/i915_gem_create.o \ gem/i915_gem_dmabuf.o \ gem/i915_gem_domain.o \ gem/i915_gem_execbuffer.o \ @@ -299,7 +300,7 @@ obj-$(CONFIG_DRM_I915_GVT_KVMGT) += gvt/kvmgt.o no-header-test := \ display/intel_vbt_defs.h -extra-$(CONFIG_DRM_I915_WERROR) += \ +always-$(CONFIG_DRM_I915_WERROR) += \ $(patsubst %.h,%.hdrtest, $(filter-out $(no-header-test), \ $(shell cd $(srctree)/$(src) && find * -name '*.h'))) diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index 05d5709ae537..7f2abc088a66 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -655,6 +655,24 @@ static void gen11_dsi_ungate_clocks(struct intel_encoder *encoder) mutex_unlock(&dev_priv->dpll.lock); } +static bool gen11_dsi_is_clock_enabled(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); + bool clock_enabled = false; + enum phy phy; + u32 tmp; + + tmp = intel_de_read(dev_priv, ICL_DPCLKA_CFGCR0); + + for_each_dsi_phy(phy, intel_dsi->phys) { + if (!(tmp & ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy))) + clock_enabled = true; + } + + return clock_enabled; +} + static void gen11_dsi_map_pll(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { @@ -1490,14 +1508,10 @@ static void gen11_dsi_get_cmd_mode_config(struct intel_dsi *intel_dsi, static void gen11_dsi_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); - /* FIXME: adapt icl_ddi_clock_get() for DSI and use that? */ - pipe_config->port_clock = intel_dpll_get_freq(i915, - pipe_config->shared_dpll, - &pipe_config->dpll_hw_state); + intel_ddi_get_clock(encoder, pipe_config, icl_ddi_combo_get_pll(encoder)); pipe_config->hw.adjusted_mode.crtc_clock = intel_dsi->pclk; if (intel_dsi->dual_link) @@ -1943,6 +1957,7 @@ void icl_dsi_init(struct drm_i915_private *dev_priv) encoder->power_domain = POWER_DOMAIN_PORT_DSI; encoder->get_power_domains = gen11_dsi_get_power_domains; encoder->disable_clock = gen11_dsi_gate_clocks; + encoder->is_clock_enabled = gen11_dsi_is_clock_enabled; /* register DSI connector with DRM subsystem */ drm_connector_init(dev, connector, &gen11_dsi_connector_funcs, diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index 91a8a42b4aa2..7f3d11c5ce3e 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -142,7 +142,7 @@ static void hsw_crt_get_config(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - intel_ddi_get_config(encoder, pipe_config); + hsw_ddi_get_config(encoder, pipe_config); pipe_config->hw.adjusted_mode.flags &= ~(DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NHSYNC | @@ -1078,6 +1078,7 @@ void intel_crt_init(struct drm_i915_private *dev_priv) crt->base.post_disable = hsw_post_disable_crt; crt->base.enable_clock = hsw_ddi_enable_clock; crt->base.disable_clock = hsw_ddi_disable_clock; + crt->base.is_clock_enabled = hsw_ddi_is_clock_enabled; } else { if (HAS_PCH_SPLIT(dev_priv)) { crt->base.compute_config = pch_crt_compute_config; diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c index 88b44ac50aae..3248f49999bb 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc.c +++ b/drivers/gpu/drm/i915/display/intel_crtc.c @@ -39,6 +39,9 @@ u32 intel_crtc_get_vblank_counter(struct intel_crtc *crtc) struct drm_device *dev = crtc->base.dev; struct drm_vblank_crtc *vblank = &dev->vblank[drm_crtc_index(&crtc->base)]; + if (!crtc->active) + return 0; + if (!vblank->max_vblank_count) return (u32)drm_crtc_accurate_vblank_count(&crtc->base); @@ -48,8 +51,6 @@ u32 intel_crtc_get_vblank_counter(struct intel_crtc *crtc) u32 intel_crtc_max_vblank_count(const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - u32 mode_flags = crtc->mode_flags; /* * From Gen 11, In case of dsi cmd mode, frame counter wouldnt @@ -57,7 +58,8 @@ u32 intel_crtc_max_vblank_count(const struct intel_crtc_state *crtc_state) * the hw counter, then we would find it updated in only * the next TE, hence switching to sw counter. */ - if (mode_flags & (I915_MODE_FLAG_DSI_USE_TE0 | I915_MODE_FLAG_DSI_USE_TE1)) + if (crtc_state->mode_flags & (I915_MODE_FLAG_DSI_USE_TE0 | + I915_MODE_FLAG_DSI_USE_TE1)) return 0; /* @@ -84,12 +86,26 @@ void intel_crtc_vblank_on(const struct intel_crtc_state *crtc_state) drm_crtc_set_max_vblank_count(&crtc->base, intel_crtc_max_vblank_count(crtc_state)); drm_crtc_vblank_on(&crtc->base); + + /* + * Should really happen exactly when we enable the pipe + * but we want the frame counters in the trace, and that + * requires vblank support on some platforms/outputs. + */ + trace_intel_pipe_enable(crtc); } void intel_crtc_vblank_off(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + /* + * Should really happen exactly when we disable the pipe + * but we want the frame counters in the trace, and that + * requires vblank support on some platforms/outputs. + */ + trace_intel_pipe_disable(crtc); + drm_crtc_vblank_off(&crtc->base); assert_vblank_disabled(&crtc->base); } diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index eeae78097a20..64a952db8528 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -28,7 +28,6 @@ #include <drm/drm_scdc_helper.h> #include "i915_drv.h" -#include "i915_trace.h" #include "intel_audio.h" #include "intel_combo_phy.h" #include "intel_connector.h" @@ -304,25 +303,6 @@ static void ddi_dotclock_get(struct intel_crtc_state *pipe_config) pipe_config->hw.adjusted_mode.crtc_clock = dotclock; } -static void intel_ddi_clock_get(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum phy phy = intel_port_to_phy(dev_priv, encoder->port); - - if (intel_phy_is_tc(dev_priv, phy) && - intel_get_shared_dpll_id(dev_priv, pipe_config->shared_dpll) == - DPLL_ID_ICL_TBTPLL) - pipe_config->port_clock = icl_calc_tbt_pll_link(dev_priv, - encoder->port); - else - pipe_config->port_clock = - intel_dpll_get_freq(dev_priv, pipe_config->shared_dpll, - &pipe_config->dpll_hw_state); - - ddi_dotclock_get(pipe_config); -} - void intel_ddi_set_dp_msa(const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { @@ -1608,6 +1588,23 @@ static void _cnl_ddi_disable_clock(struct drm_i915_private *i915, i915_reg_t reg mutex_unlock(&i915->dpll.lock); } +static bool _cnl_ddi_is_clock_enabled(struct drm_i915_private *i915, i915_reg_t reg, + u32 clk_off) +{ + return !(intel_de_read(i915, reg) & clk_off); +} + +static struct intel_shared_dpll * +_cnl_ddi_get_pll(struct drm_i915_private *i915, i915_reg_t reg, + u32 clk_sel_mask, u32 clk_sel_shift) +{ + enum intel_dpll_id id; + + id = (intel_de_read(i915, reg) & clk_sel_mask) >> clk_sel_shift; + + return intel_get_shared_dpll_by_id(i915, id); +} + static void adls_ddi_enable_clock(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { @@ -1633,6 +1630,25 @@ static void adls_ddi_disable_clock(struct intel_encoder *encoder) ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy)); } +static bool adls_ddi_is_clock_enabled(struct intel_encoder *encoder) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + enum phy phy = intel_port_to_phy(i915, encoder->port); + + return _cnl_ddi_is_clock_enabled(i915, ADLS_DPCLKA_CFGCR(phy), + ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy)); +} + +static struct intel_shared_dpll *adls_ddi_get_pll(struct intel_encoder *encoder) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + enum phy phy = intel_port_to_phy(i915, encoder->port); + + return _cnl_ddi_get_pll(i915, ADLS_DPCLKA_CFGCR(phy), + ADLS_DPCLKA_CFGCR_DDI_CLK_SEL_MASK(phy), + ADLS_DPCLKA_CFGCR_DDI_SHIFT(phy)); +} + static void rkl_ddi_enable_clock(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { @@ -1658,6 +1674,25 @@ static void rkl_ddi_disable_clock(struct intel_encoder *encoder) RKL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy)); } +static bool rkl_ddi_is_clock_enabled(struct intel_encoder *encoder) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + enum phy phy = intel_port_to_phy(i915, encoder->port); + + return _cnl_ddi_is_clock_enabled(i915, ICL_DPCLKA_CFGCR0, + RKL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy)); +} + +static struct intel_shared_dpll *rkl_ddi_get_pll(struct intel_encoder *encoder) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + enum phy phy = intel_port_to_phy(i915, encoder->port); + + return _cnl_ddi_get_pll(i915, ICL_DPCLKA_CFGCR0, + RKL_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy), + RKL_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy)); +} + static void dg1_ddi_enable_clock(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { @@ -1692,6 +1727,25 @@ static void dg1_ddi_disable_clock(struct intel_encoder *encoder) DG1_DPCLKA_CFGCR0_DDI_CLK_OFF(phy)); } +static bool dg1_ddi_is_clock_enabled(struct intel_encoder *encoder) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + enum phy phy = intel_port_to_phy(i915, encoder->port); + + return _cnl_ddi_is_clock_enabled(i915, DG1_DPCLKA_CFGCR0(phy), + DG1_DPCLKA_CFGCR0_DDI_CLK_OFF(phy)); +} + +static struct intel_shared_dpll *dg1_ddi_get_pll(struct intel_encoder *encoder) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + enum phy phy = intel_port_to_phy(i915, encoder->port); + + return _cnl_ddi_get_pll(i915, DG1_DPCLKA_CFGCR0(phy), + DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy), + DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy)); +} + static void icl_ddi_combo_enable_clock(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { @@ -1717,6 +1771,25 @@ static void icl_ddi_combo_disable_clock(struct intel_encoder *encoder) ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy)); } +static bool icl_ddi_combo_is_clock_enabled(struct intel_encoder *encoder) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + enum phy phy = intel_port_to_phy(i915, encoder->port); + + return _cnl_ddi_is_clock_enabled(i915, ICL_DPCLKA_CFGCR0, + ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy)); +} + +struct intel_shared_dpll *icl_ddi_combo_get_pll(struct intel_encoder *encoder) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + enum phy phy = intel_port_to_phy(i915, encoder->port); + + return _cnl_ddi_get_pll(i915, ICL_DPCLKA_CFGCR0, + ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy), + ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy)); +} + static void jsl_ddi_tc_enable_clock(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { @@ -1746,6 +1819,20 @@ static void jsl_ddi_tc_disable_clock(struct intel_encoder *encoder) intel_de_write(i915, DDI_CLK_SEL(port), DDI_CLK_SEL_NONE); } +static bool jsl_ddi_tc_is_clock_enabled(struct intel_encoder *encoder) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + enum port port = encoder->port; + u32 tmp; + + tmp = intel_de_read(i915, DDI_CLK_SEL(port)); + + if ((tmp & DDI_CLK_SEL_MASK) == DDI_CLK_SEL_NONE) + return false; + + return icl_ddi_combo_is_clock_enabled(encoder); +} + static void icl_ddi_tc_enable_clock(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { @@ -1784,6 +1871,53 @@ static void icl_ddi_tc_disable_clock(struct intel_encoder *encoder) intel_de_write(i915, DDI_CLK_SEL(port), DDI_CLK_SEL_NONE); } +static bool icl_ddi_tc_is_clock_enabled(struct intel_encoder *encoder) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + enum tc_port tc_port = intel_port_to_tc(i915, encoder->port); + enum port port = encoder->port; + u32 tmp; + + tmp = intel_de_read(i915, DDI_CLK_SEL(port)); + + if ((tmp & DDI_CLK_SEL_MASK) == DDI_CLK_SEL_NONE) + return false; + + tmp = intel_de_read(i915, ICL_DPCLKA_CFGCR0); + + return !(tmp & ICL_DPCLKA_CFGCR0_TC_CLK_OFF(tc_port)); +} + +static struct intel_shared_dpll *icl_ddi_tc_get_pll(struct intel_encoder *encoder) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + enum tc_port tc_port = intel_port_to_tc(i915, encoder->port); + enum port port = encoder->port; + enum intel_dpll_id id; + u32 tmp; + + tmp = intel_de_read(i915, DDI_CLK_SEL(port)); + + switch (tmp & DDI_CLK_SEL_MASK) { + case DDI_CLK_SEL_TBT_162: + case DDI_CLK_SEL_TBT_270: + case DDI_CLK_SEL_TBT_540: + case DDI_CLK_SEL_TBT_810: + id = DPLL_ID_ICL_TBTPLL; + break; + case DDI_CLK_SEL_MG: + id = icl_tc_port_to_pll_id(tc_port); + break; + default: + MISSING_CASE(tmp); + fallthrough; + case DDI_CLK_SEL_NONE: + return NULL; + } + + return intel_get_shared_dpll_by_id(i915, id); +} + static void cnl_ddi_enable_clock(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { @@ -1809,6 +1943,48 @@ static void cnl_ddi_disable_clock(struct intel_encoder *encoder) DPCLKA_CFGCR0_DDI_CLK_OFF(port)); } +static bool cnl_ddi_is_clock_enabled(struct intel_encoder *encoder) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + enum port port = encoder->port; + + return _cnl_ddi_is_clock_enabled(i915, DPCLKA_CFGCR0, + DPCLKA_CFGCR0_DDI_CLK_OFF(port)); +} + +static struct intel_shared_dpll *cnl_ddi_get_pll(struct intel_encoder *encoder) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + enum port port = encoder->port; + + return _cnl_ddi_get_pll(i915, DPCLKA_CFGCR0, + DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port), + DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(port)); +} + +static struct intel_shared_dpll *bxt_ddi_get_pll(struct intel_encoder *encoder) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + enum intel_dpll_id id; + + switch (encoder->port) { + case PORT_A: + id = DPLL_ID_SKL_DPLL0; + break; + case PORT_B: + id = DPLL_ID_SKL_DPLL1; + break; + case PORT_C: + id = DPLL_ID_SKL_DPLL2; + break; + default: + MISSING_CASE(encoder->port); + return NULL; + } + + return intel_get_shared_dpll_by_id(i915, id); +} + static void skl_ddi_enable_clock(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { @@ -1843,6 +2019,40 @@ static void skl_ddi_disable_clock(struct intel_encoder *encoder) mutex_unlock(&i915->dpll.lock); } +static bool skl_ddi_is_clock_enabled(struct intel_encoder *encoder) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + enum port port = encoder->port; + + /* + * FIXME Not sure if the override affects both + * the PLL selection and the CLK_OFF bit. + */ + return !(intel_de_read(i915, DPLL_CTRL2) & DPLL_CTRL2_DDI_CLK_OFF(port)); +} + +static struct intel_shared_dpll *skl_ddi_get_pll(struct intel_encoder *encoder) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + enum port port = encoder->port; + enum intel_dpll_id id; + u32 tmp; + + tmp = intel_de_read(i915, DPLL_CTRL2); + + /* + * FIXME Not sure if the override affects both + * the PLL selection and the CLK_OFF bit. + */ + if ((tmp & DPLL_CTRL2_DDI_SEL_OVERRIDE(port)) == 0) + return NULL; + + id = (tmp & DPLL_CTRL2_DDI_CLK_SEL_MASK(port)) >> + DPLL_CTRL2_DDI_CLK_SEL_SHIFT(port); + + return intel_get_shared_dpll_by_id(i915, id); +} + void hsw_ddi_enable_clock(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { @@ -1864,6 +2074,52 @@ void hsw_ddi_disable_clock(struct intel_encoder *encoder) intel_de_write(i915, PORT_CLK_SEL(port), PORT_CLK_SEL_NONE); } +bool hsw_ddi_is_clock_enabled(struct intel_encoder *encoder) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + enum port port = encoder->port; + + return intel_de_read(i915, PORT_CLK_SEL(port)) != PORT_CLK_SEL_NONE; +} + +static struct intel_shared_dpll *hsw_ddi_get_pll(struct intel_encoder *encoder) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + enum port port = encoder->port; + enum intel_dpll_id id; + u32 tmp; + + tmp = intel_de_read(i915, PORT_CLK_SEL(port)); + + switch (tmp & PORT_CLK_SEL_MASK) { + case PORT_CLK_SEL_WRPLL1: + id = DPLL_ID_WRPLL1; + break; + case PORT_CLK_SEL_WRPLL2: + id = DPLL_ID_WRPLL2; + break; + case PORT_CLK_SEL_SPLL: + id = DPLL_ID_SPLL; + break; + case PORT_CLK_SEL_LCPLL_810: + id = DPLL_ID_LCPLL_810; + break; + case PORT_CLK_SEL_LCPLL_1350: + id = DPLL_ID_LCPLL_1350; + break; + case PORT_CLK_SEL_LCPLL_2700: + id = DPLL_ID_LCPLL_2700; + break; + default: + MISSING_CASE(tmp); + fallthrough; + case PORT_CLK_SEL_NONE: + return NULL; + } + + return intel_get_shared_dpll_by_id(i915, id); +} + void intel_ddi_enable_clock(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { @@ -1877,7 +2133,7 @@ static void intel_ddi_disable_clock(struct intel_encoder *encoder) encoder->disable_clock(encoder); } -void icl_sanitize_encoder_pll_mapping(struct intel_encoder *encoder) +void intel_ddi_sanitize_encoder_pll_mapping(struct intel_encoder *encoder) { struct drm_i915_private *i915 = to_i915(encoder->base.dev); u32 port_mask; @@ -1929,8 +2185,15 @@ void icl_sanitize_encoder_pll_mapping(struct intel_encoder *encoder) ddi_clk_needed = false; } - if (!ddi_clk_needed && encoder->disable_clock) - encoder->disable_clock(encoder); + if (ddi_clk_needed || !encoder->disable_clock || + !encoder->is_clock_enabled(encoder)) + return; + + drm_notice(&i915->drm, + "[ENCODER:%d:%s] is disabled/in DSI mode with an ungated DDI clock, gate it\n", + encoder->base.base.id, encoder->base.name); + + encoder->disable_clock(encoder); } static void @@ -2136,6 +2399,73 @@ static void intel_ddi_power_up_lanes(struct intel_encoder *encoder, } } +static void intel_ddi_mso_get_config(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config) +{ + struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); + struct drm_i915_private *i915 = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; + u32 dss1; + + if (!HAS_MSO(i915)) + return; + + dss1 = intel_de_read(i915, ICL_PIPE_DSS_CTL1(pipe)); + + pipe_config->splitter.enable = dss1 & SPLITTER_ENABLE; + if (!pipe_config->splitter.enable) + return; + + /* Splitter enable is supported for pipe A only. */ + if (drm_WARN_ON(&i915->drm, pipe != PIPE_A)) { + pipe_config->splitter.enable = false; + return; + } + + switch (dss1 & SPLITTER_CONFIGURATION_MASK) { + default: + drm_WARN(&i915->drm, true, + "Invalid splitter configuration, dss1=0x%08x\n", dss1); + fallthrough; + case SPLITTER_CONFIGURATION_2_SEGMENT: + pipe_config->splitter.link_count = 2; + break; + case SPLITTER_CONFIGURATION_4_SEGMENT: + pipe_config->splitter.link_count = 4; + break; + } + + pipe_config->splitter.pixel_overlap = REG_FIELD_GET(OVERLAP_PIXELS_MASK, dss1); +} + +static void intel_ddi_mso_configure(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *i915 = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; + u32 dss1 = 0; + + if (!HAS_MSO(i915)) + return; + + if (crtc_state->splitter.enable) { + /* Splitter enable is supported for pipe A only. */ + if (drm_WARN_ON(&i915->drm, pipe != PIPE_A)) + return; + + dss1 |= SPLITTER_ENABLE; + dss1 |= OVERLAP_PIXELS(crtc_state->splitter.pixel_overlap); + if (crtc_state->splitter.link_count == 2) + dss1 |= SPLITTER_CONFIGURATION_2_SEGMENT; + else + dss1 |= SPLITTER_CONFIGURATION_4_SEGMENT; + } + + intel_de_rmw(i915, ICL_PIPE_DSS_CTL1(pipe), + SPLITTER_ENABLE | SPLITTER_CONFIGURATION_MASK | + OVERLAP_PIXELS_MASK, dss1); +} + static void tgl_ddi_pre_enable_dp(struct intel_atomic_state *state, struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, @@ -2230,6 +2560,11 @@ static void tgl_ddi_pre_enable_dp(struct intel_atomic_state *state, intel_ddi_power_up_lanes(encoder, crtc_state); /* + * 7.g Program CoG/MSO configuration bits in DSS_CTL1 if selected. + */ + intel_ddi_mso_configure(crtc_state); + + /* * 7.g Configure and enable DDI_BUF_CTL * 7.h Wait for DDI_BUF_CTL DDI Idle Status = 0b (Not Idle), timeout * after 500 us. @@ -2584,7 +2919,6 @@ static void intel_ddi_post_disable(struct intel_atomic_state *state, intel_atomic_get_old_crtc_state(state, slave); intel_crtc_vblank_off(old_slave_crtc_state); - trace_intel_pipe_disable(slave); intel_dsc_disable(old_slave_crtc_state); skl_scaler_disable(old_slave_crtc_state); @@ -3293,8 +3627,8 @@ static void intel_ddi_read_func_ctl(struct intel_encoder *encoder, } } -void intel_ddi_get_config(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config) +static void intel_ddi_get_config(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum transcoder cpu_transcoder = pipe_config->cpu_transcoder; @@ -3316,6 +3650,8 @@ void intel_ddi_get_config(struct intel_encoder *encoder, intel_ddi_read_func_ctl(encoder, pipe_config); } + intel_ddi_mso_get_config(encoder, pipe_config); + pipe_config->has_audio = intel_ddi_is_audio_enabled(dev_priv, cpu_transcoder); @@ -3341,7 +3677,7 @@ void intel_ddi_get_config(struct intel_encoder *encoder, } if (!pipe_config->bigjoiner_slave) - intel_ddi_clock_get(encoder, pipe_config); + ddi_dotclock_get(pipe_config); if (IS_GEN9_LP(dev_priv)) pipe_config->lane_lat_optim_mask = @@ -3371,6 +3707,123 @@ void intel_ddi_get_config(struct intel_encoder *encoder, intel_read_dp_sdp(encoder, pipe_config, DP_SDP_VSC); } +void intel_ddi_get_clock(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state, + struct intel_shared_dpll *pll) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + enum icl_port_dpll_id port_dpll_id = ICL_PORT_DPLL_DEFAULT; + struct icl_port_dpll *port_dpll = &crtc_state->icl_port_dplls[port_dpll_id]; + bool pll_active; + + if (drm_WARN_ON(&i915->drm, !pll)) + return; + + port_dpll->pll = pll; + pll_active = intel_dpll_get_hw_state(i915, pll, &port_dpll->hw_state); + drm_WARN_ON(&i915->drm, !pll_active); + + icl_set_active_port_dpll(crtc_state, port_dpll_id); + + crtc_state->port_clock = intel_dpll_get_freq(i915, crtc_state->shared_dpll, + &crtc_state->dpll_hw_state); +} + +static void adls_ddi_get_config(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state) +{ + intel_ddi_get_clock(encoder, crtc_state, adls_ddi_get_pll(encoder)); + intel_ddi_get_config(encoder, crtc_state); +} + +static void rkl_ddi_get_config(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state) +{ + intel_ddi_get_clock(encoder, crtc_state, rkl_ddi_get_pll(encoder)); + intel_ddi_get_config(encoder, crtc_state); +} + +static void dg1_ddi_get_config(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state) +{ + intel_ddi_get_clock(encoder, crtc_state, dg1_ddi_get_pll(encoder)); + intel_ddi_get_config(encoder, crtc_state); +} + +static void icl_ddi_combo_get_config(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state) +{ + intel_ddi_get_clock(encoder, crtc_state, icl_ddi_combo_get_pll(encoder)); + intel_ddi_get_config(encoder, crtc_state); +} + +static void icl_ddi_tc_get_clock(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state, + struct intel_shared_dpll *pll) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + enum icl_port_dpll_id port_dpll_id; + struct icl_port_dpll *port_dpll; + bool pll_active; + + if (drm_WARN_ON(&i915->drm, !pll)) + return; + + if (intel_get_shared_dpll_id(i915, pll) == DPLL_ID_ICL_TBTPLL) + port_dpll_id = ICL_PORT_DPLL_DEFAULT; + else + port_dpll_id = ICL_PORT_DPLL_MG_PHY; + + port_dpll = &crtc_state->icl_port_dplls[port_dpll_id]; + + port_dpll->pll = pll; + pll_active = intel_dpll_get_hw_state(i915, pll, &port_dpll->hw_state); + drm_WARN_ON(&i915->drm, !pll_active); + + icl_set_active_port_dpll(crtc_state, port_dpll_id); + + if (intel_get_shared_dpll_id(i915, crtc_state->shared_dpll) == DPLL_ID_ICL_TBTPLL) + crtc_state->port_clock = icl_calc_tbt_pll_link(i915, encoder->port); + else + crtc_state->port_clock = intel_dpll_get_freq(i915, crtc_state->shared_dpll, + &crtc_state->dpll_hw_state); +} + +static void icl_ddi_tc_get_config(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state) +{ + icl_ddi_tc_get_clock(encoder, crtc_state, icl_ddi_tc_get_pll(encoder)); + intel_ddi_get_config(encoder, crtc_state); +} + +static void cnl_ddi_get_config(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state) +{ + intel_ddi_get_clock(encoder, crtc_state, cnl_ddi_get_pll(encoder)); + intel_ddi_get_config(encoder, crtc_state); +} + +static void bxt_ddi_get_config(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state) +{ + intel_ddi_get_clock(encoder, crtc_state, bxt_ddi_get_pll(encoder)); + intel_ddi_get_config(encoder, crtc_state); +} + +static void skl_ddi_get_config(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state) +{ + intel_ddi_get_clock(encoder, crtc_state, skl_ddi_get_pll(encoder)); + intel_ddi_get_config(encoder, crtc_state); +} + +void hsw_ddi_get_config(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state) +{ + intel_ddi_get_clock(encoder, crtc_state, hsw_ddi_get_pll(encoder)); + intel_ddi_get_config(encoder, crtc_state); +} + static void intel_ddi_sync_state(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { @@ -4057,7 +4510,6 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) encoder->post_disable = intel_ddi_post_disable; encoder->update_pipe = intel_ddi_update_pipe; encoder->get_hw_state = intel_ddi_get_hw_state; - encoder->get_config = intel_ddi_get_config; encoder->sync_state = intel_ddi_sync_state; encoder->initial_fastset_check = intel_ddi_initial_fastset_check; encoder->suspend = intel_dp_encoder_suspend; @@ -4073,37 +4525,60 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) if (IS_ALDERLAKE_S(dev_priv)) { encoder->enable_clock = adls_ddi_enable_clock; encoder->disable_clock = adls_ddi_disable_clock; + encoder->is_clock_enabled = adls_ddi_is_clock_enabled; + encoder->get_config = adls_ddi_get_config; } else if (IS_ROCKETLAKE(dev_priv)) { encoder->enable_clock = rkl_ddi_enable_clock; encoder->disable_clock = rkl_ddi_disable_clock; + encoder->is_clock_enabled = rkl_ddi_is_clock_enabled; + encoder->get_config = rkl_ddi_get_config; } else if (IS_DG1(dev_priv)) { encoder->enable_clock = dg1_ddi_enable_clock; encoder->disable_clock = dg1_ddi_disable_clock; + encoder->is_clock_enabled = dg1_ddi_is_clock_enabled; + encoder->get_config = dg1_ddi_get_config; } else if (IS_JSL_EHL(dev_priv)) { if (intel_ddi_is_tc(dev_priv, port)) { encoder->enable_clock = jsl_ddi_tc_enable_clock; encoder->disable_clock = jsl_ddi_tc_disable_clock; + encoder->is_clock_enabled = jsl_ddi_tc_is_clock_enabled; + encoder->get_config = icl_ddi_combo_get_config; } else { encoder->enable_clock = icl_ddi_combo_enable_clock; encoder->disable_clock = icl_ddi_combo_disable_clock; + encoder->is_clock_enabled = icl_ddi_combo_is_clock_enabled; + encoder->get_config = icl_ddi_combo_get_config; } } else if (INTEL_GEN(dev_priv) >= 11) { if (intel_ddi_is_tc(dev_priv, port)) { encoder->enable_clock = icl_ddi_tc_enable_clock; encoder->disable_clock = icl_ddi_tc_disable_clock; + encoder->is_clock_enabled = icl_ddi_tc_is_clock_enabled; + encoder->get_config = icl_ddi_tc_get_config; } else { encoder->enable_clock = icl_ddi_combo_enable_clock; encoder->disable_clock = icl_ddi_combo_disable_clock; + encoder->is_clock_enabled = icl_ddi_combo_is_clock_enabled; + encoder->get_config = icl_ddi_combo_get_config; } } else if (IS_CANNONLAKE(dev_priv)) { encoder->enable_clock = cnl_ddi_enable_clock; encoder->disable_clock = cnl_ddi_disable_clock; + encoder->is_clock_enabled = cnl_ddi_is_clock_enabled; + encoder->get_config = cnl_ddi_get_config; + } else if (IS_GEN9_LP(dev_priv)) { + /* BXT/GLK have fixed PLL->port mapping */ + encoder->get_config = bxt_ddi_get_config; } else if (IS_GEN9_BC(dev_priv)) { encoder->enable_clock = skl_ddi_enable_clock; encoder->disable_clock = skl_ddi_disable_clock; + encoder->is_clock_enabled = skl_ddi_is_clock_enabled; + encoder->get_config = skl_ddi_get_config; } else if (IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) { encoder->enable_clock = hsw_ddi_enable_clock; encoder->disable_clock = hsw_ddi_disable_clock; + encoder->is_clock_enabled = hsw_ddi_is_clock_enabled; + encoder->get_config = hsw_ddi_get_config; } if (IS_DG1(dev_priv)) @@ -4159,6 +4634,10 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) goto err; dig_port->hpd_pulse = intel_dp_hpd_pulse; + + /* Splitter enable for eDP MSO is supported for pipe A only. */ + if (dig_port->dp.mso_link_count) + encoder->pipe_mask = BIT(PIPE_A); } /* In theory we don't need the encoder->type check, but leave it just in diff --git a/drivers/gpu/drm/i915/display/intel_ddi.h b/drivers/gpu/drm/i915/display/intel_ddi.h index 4a0c1d5c85e7..59c6b01d4199 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.h +++ b/drivers/gpu/drm/i915/display/intel_ddi.h @@ -30,9 +30,16 @@ void intel_ddi_fdi_post_disable(struct intel_atomic_state *state, const struct drm_connector_state *old_conn_state); void intel_ddi_enable_clock(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state); +void intel_ddi_get_clock(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state, + struct intel_shared_dpll *pll); void hsw_ddi_enable_clock(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state); void hsw_ddi_disable_clock(struct intel_encoder *encoder); +bool hsw_ddi_is_clock_enabled(struct intel_encoder *encoder); +void hsw_ddi_get_config(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state); +struct intel_shared_dpll *icl_ddi_combo_get_pll(struct intel_encoder *encoder); void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state); void intel_wait_ddi_buf_idle(struct drm_i915_private *dev_priv, @@ -48,8 +55,6 @@ void intel_ddi_disable_pipe_clock(const struct intel_crtc_state *crtc_state); void intel_ddi_set_dp_msa(const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state); bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector); -void intel_ddi_get_config(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config); void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state, bool state); void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv, @@ -61,6 +66,6 @@ u32 ddi_signal_levels(struct intel_dp *intel_dp, int intel_ddi_toggle_hdcp_bits(struct intel_encoder *intel_encoder, enum transcoder cpu_transcoder, bool enable, u32 hdcp_mask); -void icl_sanitize_encoder_pll_mapping(struct intel_encoder *encoder); +void intel_ddi_sanitize_encoder_pll_mapping(struct intel_encoder *encoder); #endif /* __INTEL_DDI_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index e1060076ac83..3957f7497461 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -67,7 +67,6 @@ #include "gt/intel_rps.h" #include "i915_drv.h" -#include "i915_trace.h" #include "intel_acpi.h" #include "intel_atomic.h" #include "intel_atomic_plane.h" @@ -794,8 +793,6 @@ void intel_enable_pipe(const struct intel_crtc_state *new_crtc_state) /* FIXME: assert CPU port conditions for SNB+ */ } - trace_intel_pipe_enable(crtc); - reg = PIPECONF(cpu_transcoder); val = intel_de_read(dev_priv, reg); if (val & PIPECONF_ENABLE) { @@ -835,8 +832,6 @@ void intel_disable_pipe(const struct intel_crtc_state *old_crtc_state) */ assert_planes_disabled(crtc); - trace_intel_pipe_disable(crtc); - reg = PIPECONF(cpu_transcoder); val = intel_de_read(dev_priv, reg); if ((val & PIPECONF_ENABLE) == 0) @@ -1173,7 +1168,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, */ ret = i915_vma_pin_fence(vma); if (ret != 0 && INTEL_GEN(dev_priv) < 4) { - i915_gem_object_unpin_from_display_plane(vma); + i915_vma_unpin(vma); vma = ERR_PTR(ret); goto err; } @@ -1191,12 +1186,9 @@ err: void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags) { - i915_gem_object_lock(vma->obj, NULL); if (flags & PLANE_HAS_FENCE) i915_vma_unpin_fence(vma); - i915_gem_object_unpin_from_display_plane(vma); - i915_gem_object_unlock(vma->obj); - + i915_vma_unpin(vma); i915_vma_put(vma); } @@ -4026,10 +4018,8 @@ static void hsw_crtc_enable(struct intel_atomic_state *state, if (INTEL_GEN(dev_priv) >= 11) icl_pipe_mbus_enable(crtc); - if (new_crtc_state->bigjoiner_slave) { - trace_intel_pipe_enable(crtc); + if (new_crtc_state->bigjoiner_slave) intel_crtc_vblank_on(new_crtc_state); - } intel_encoders_enable(state, crtc); @@ -4862,8 +4852,30 @@ static void intel_crtc_readout_derived_state(struct intel_crtc_state *crtc_state pipe_mode->crtc_clock /= 2; } - intel_mode_from_crtc_timings(pipe_mode, pipe_mode); - intel_mode_from_crtc_timings(adjusted_mode, adjusted_mode); + if (crtc_state->splitter.enable) { + int n = crtc_state->splitter.link_count; + int overlap = crtc_state->splitter.pixel_overlap; + + /* + * eDP MSO uses segment timings from EDID for transcoder + * timings, but full mode for everything else. + * + * h_full = (h_segment - pixel_overlap) * link_count + */ + pipe_mode->crtc_hdisplay = (pipe_mode->crtc_hdisplay - overlap) * n; + pipe_mode->crtc_hblank_start = (pipe_mode->crtc_hblank_start - overlap) * n; + pipe_mode->crtc_hblank_end = (pipe_mode->crtc_hblank_end - overlap) * n; + pipe_mode->crtc_hsync_start = (pipe_mode->crtc_hsync_start - overlap) * n; + pipe_mode->crtc_hsync_end = (pipe_mode->crtc_hsync_end - overlap) * n; + pipe_mode->crtc_htotal = (pipe_mode->crtc_htotal - overlap) * n; + pipe_mode->crtc_clock *= n; + + intel_mode_from_crtc_timings(pipe_mode, pipe_mode); + intel_mode_from_crtc_timings(adjusted_mode, pipe_mode); + } else { + intel_mode_from_crtc_timings(pipe_mode, pipe_mode); + intel_mode_from_crtc_timings(adjusted_mode, adjusted_mode); + } intel_crtc_compute_pixel_rate(crtc_state); @@ -4901,6 +4913,19 @@ static int intel_crtc_compute_config(struct intel_crtc *crtc, pipe_config->pipe_src_w /= 2; } + if (pipe_config->splitter.enable) { + int n = pipe_config->splitter.link_count; + int overlap = pipe_config->splitter.pixel_overlap; + + pipe_mode->crtc_hdisplay = (pipe_mode->crtc_hdisplay - overlap) * n; + pipe_mode->crtc_hblank_start = (pipe_mode->crtc_hblank_start - overlap) * n; + pipe_mode->crtc_hblank_end = (pipe_mode->crtc_hblank_end - overlap) * n; + pipe_mode->crtc_hsync_start = (pipe_mode->crtc_hsync_start - overlap) * n; + pipe_mode->crtc_hsync_end = (pipe_mode->crtc_hsync_end - overlap) * n; + pipe_mode->crtc_htotal = (pipe_mode->crtc_htotal - overlap) * n; + pipe_mode->crtc_clock *= n; + } + intel_mode_from_crtc_timings(pipe_mode, pipe_mode); if (INTEL_GEN(dev_priv) < 4) { @@ -6526,212 +6551,6 @@ out: return ret; } -static void dg1_get_ddi_pll(struct drm_i915_private *dev_priv, enum port port, - struct intel_crtc_state *pipe_config) -{ - enum icl_port_dpll_id port_dpll_id = ICL_PORT_DPLL_DEFAULT; - enum phy phy = intel_port_to_phy(dev_priv, port); - struct icl_port_dpll *port_dpll; - struct intel_shared_dpll *pll; - enum intel_dpll_id id; - bool pll_active; - u32 clk_sel; - - clk_sel = intel_de_read(dev_priv, DG1_DPCLKA_CFGCR0(phy)) & DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy); - id = DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_DPLL_MAP(clk_sel, phy); - - if (WARN_ON(id > DPLL_ID_DG1_DPLL3)) - return; - - pll = intel_get_shared_dpll_by_id(dev_priv, id); - port_dpll = &pipe_config->icl_port_dplls[port_dpll_id]; - - port_dpll->pll = pll; - pll_active = intel_dpll_get_hw_state(dev_priv, pll, - &port_dpll->hw_state); - drm_WARN_ON(&dev_priv->drm, !pll_active); - - icl_set_active_port_dpll(pipe_config, port_dpll_id); -} - -static void icl_get_ddi_pll(struct drm_i915_private *dev_priv, enum port port, - struct intel_crtc_state *pipe_config) -{ - enum phy phy = intel_port_to_phy(dev_priv, port); - enum icl_port_dpll_id port_dpll_id; - struct icl_port_dpll *port_dpll; - struct intel_shared_dpll *pll; - enum intel_dpll_id id; - bool pll_active; - i915_reg_t reg; - u32 temp; - - if (intel_phy_is_combo(dev_priv, phy)) { - u32 mask, shift; - - if (IS_ALDERLAKE_S(dev_priv)) { - reg = ADLS_DPCLKA_CFGCR(phy); - mask = ADLS_DPCLKA_CFGCR_DDI_CLK_SEL_MASK(phy); - shift = ADLS_DPCLKA_CFGCR_DDI_SHIFT(phy); - } else if (IS_ROCKETLAKE(dev_priv)) { - reg = ICL_DPCLKA_CFGCR0; - mask = RKL_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy); - shift = RKL_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy); - } else { - reg = ICL_DPCLKA_CFGCR0; - mask = ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy); - shift = ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy); - } - - temp = intel_de_read(dev_priv, reg) & mask; - id = temp >> shift; - port_dpll_id = ICL_PORT_DPLL_DEFAULT; - } else if (intel_phy_is_tc(dev_priv, phy)) { - u32 clk_sel = intel_de_read(dev_priv, DDI_CLK_SEL(port)) & DDI_CLK_SEL_MASK; - - if (clk_sel == DDI_CLK_SEL_MG) { - id = icl_tc_port_to_pll_id(intel_port_to_tc(dev_priv, - port)); - port_dpll_id = ICL_PORT_DPLL_MG_PHY; - } else { - drm_WARN_ON(&dev_priv->drm, - clk_sel < DDI_CLK_SEL_TBT_162); - id = DPLL_ID_ICL_TBTPLL; - port_dpll_id = ICL_PORT_DPLL_DEFAULT; - } - } else { - drm_WARN(&dev_priv->drm, 1, "Invalid port %x\n", port); - return; - } - - pll = intel_get_shared_dpll_by_id(dev_priv, id); - port_dpll = &pipe_config->icl_port_dplls[port_dpll_id]; - - port_dpll->pll = pll; - pll_active = intel_dpll_get_hw_state(dev_priv, pll, - &port_dpll->hw_state); - drm_WARN_ON(&dev_priv->drm, !pll_active); - - icl_set_active_port_dpll(pipe_config, port_dpll_id); -} - -static void cnl_get_ddi_pll(struct drm_i915_private *dev_priv, enum port port, - struct intel_crtc_state *pipe_config) -{ - struct intel_shared_dpll *pll; - enum intel_dpll_id id; - bool pll_active; - u32 temp; - - temp = intel_de_read(dev_priv, DPCLKA_CFGCR0) & DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port); - id = temp >> DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(port); - - if (drm_WARN_ON(&dev_priv->drm, id < SKL_DPLL0 || id > SKL_DPLL2)) - return; - - pll = intel_get_shared_dpll_by_id(dev_priv, id); - - pipe_config->shared_dpll = pll; - pll_active = intel_dpll_get_hw_state(dev_priv, pll, - &pipe_config->dpll_hw_state); - drm_WARN_ON(&dev_priv->drm, !pll_active); -} - -static void bxt_get_ddi_pll(struct drm_i915_private *dev_priv, - enum port port, - struct intel_crtc_state *pipe_config) -{ - struct intel_shared_dpll *pll; - enum intel_dpll_id id; - bool pll_active; - - switch (port) { - case PORT_A: - id = DPLL_ID_SKL_DPLL0; - break; - case PORT_B: - id = DPLL_ID_SKL_DPLL1; - break; - case PORT_C: - id = DPLL_ID_SKL_DPLL2; - break; - default: - drm_err(&dev_priv->drm, "Incorrect port type\n"); - return; - } - - pll = intel_get_shared_dpll_by_id(dev_priv, id); - - pipe_config->shared_dpll = pll; - pll_active = intel_dpll_get_hw_state(dev_priv, pll, - &pipe_config->dpll_hw_state); - drm_WARN_ON(&dev_priv->drm, !pll_active); -} - -static void skl_get_ddi_pll(struct drm_i915_private *dev_priv, enum port port, - struct intel_crtc_state *pipe_config) -{ - struct intel_shared_dpll *pll; - enum intel_dpll_id id; - bool pll_active; - u32 temp; - - temp = intel_de_read(dev_priv, DPLL_CTRL2) & DPLL_CTRL2_DDI_CLK_SEL_MASK(port); - id = temp >> (port * 3 + 1); - - if (drm_WARN_ON(&dev_priv->drm, id < SKL_DPLL0 || id > SKL_DPLL3)) - return; - - pll = intel_get_shared_dpll_by_id(dev_priv, id); - - pipe_config->shared_dpll = pll; - pll_active = intel_dpll_get_hw_state(dev_priv, pll, - &pipe_config->dpll_hw_state); - drm_WARN_ON(&dev_priv->drm, !pll_active); -} - -static void hsw_get_ddi_pll(struct drm_i915_private *dev_priv, enum port port, - struct intel_crtc_state *pipe_config) -{ - struct intel_shared_dpll *pll; - enum intel_dpll_id id; - u32 ddi_pll_sel = intel_de_read(dev_priv, PORT_CLK_SEL(port)); - bool pll_active; - - switch (ddi_pll_sel) { - case PORT_CLK_SEL_WRPLL1: - id = DPLL_ID_WRPLL1; - break; - case PORT_CLK_SEL_WRPLL2: - id = DPLL_ID_WRPLL2; - break; - case PORT_CLK_SEL_SPLL: - id = DPLL_ID_SPLL; - break; - case PORT_CLK_SEL_LCPLL_810: - id = DPLL_ID_LCPLL_810; - break; - case PORT_CLK_SEL_LCPLL_1350: - id = DPLL_ID_LCPLL_1350; - break; - case PORT_CLK_SEL_LCPLL_2700: - id = DPLL_ID_LCPLL_2700; - break; - default: - MISSING_CASE(ddi_pll_sel); - fallthrough; - case PORT_CLK_SEL_NONE: - return; - } - - pll = intel_get_shared_dpll_by_id(dev_priv, id); - - pipe_config->shared_dpll = pll; - pll_active = intel_dpll_get_hw_state(dev_priv, pll, - &pipe_config->dpll_hw_state); - drm_WARN_ON(&dev_priv->drm, !pll_active); -} - static bool hsw_get_transcoder_state(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config, struct intel_display_power_domain_set *power_domain_set) @@ -6888,19 +6707,6 @@ static void hsw_get_ddi_port_state(struct intel_crtc *crtc, port = TRANS_DDI_FUNC_CTL_VAL_TO_PORT(tmp); } - if (IS_DG1(dev_priv)) - dg1_get_ddi_pll(dev_priv, port, pipe_config); - else if (INTEL_GEN(dev_priv) >= 11) - icl_get_ddi_pll(dev_priv, port, pipe_config); - else if (IS_CANNONLAKE(dev_priv)) - cnl_get_ddi_pll(dev_priv, port, pipe_config); - else if (IS_GEN9_LP(dev_priv)) - bxt_get_ddi_pll(dev_priv, port, pipe_config); - else if (IS_GEN9_BC(dev_priv)) - skl_get_ddi_pll(dev_priv, port, pipe_config); - else - hsw_get_ddi_pll(dev_priv, port, pipe_config); - /* * Haswell has only FDI/PCH transcoder A. It is which is connected to * DDI E. So just check whether this pipe is wired to DDI E and whether @@ -7991,19 +7797,27 @@ static void intel_modeset_update_connector_atomic_state(struct drm_device *dev) drm_connector_list_iter_begin(dev, &conn_iter); for_each_intel_connector_iter(connector, &conn_iter) { - if (connector->base.state->crtc) + struct drm_connector_state *conn_state = connector->base.state; + struct intel_encoder *encoder = + to_intel_encoder(connector->base.encoder); + + if (conn_state->crtc) drm_connector_put(&connector->base); - if (connector->base.encoder) { - connector->base.state->best_encoder = - connector->base.encoder; - connector->base.state->crtc = - connector->base.encoder->crtc; + if (encoder) { + struct intel_crtc *crtc = + to_intel_crtc(encoder->base.crtc); + const struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + + conn_state->best_encoder = &encoder->base; + conn_state->crtc = &crtc->base; + conn_state->max_bpc = (crtc_state->pipe_bpp ?: 24) / 3; drm_connector_get(&connector->base); } else { - connector->base.state->best_encoder = NULL; - connector->base.state->crtc = NULL; + conn_state->best_encoder = NULL; + conn_state->crtc = NULL; } } drm_connector_list_iter_end(&conn_iter); @@ -8264,6 +8078,11 @@ static void intel_dump_pipe_config(const struct intel_crtc_state *pipe_config, pipe_config->bigjoiner_slave ? "slave" : pipe_config->bigjoiner ? "master" : "no"); + drm_dbg_kms(&dev_priv->drm, "splitter: %s, link count %d, overlap %d\n", + enableddisabled(pipe_config->splitter.enable), + pipe_config->splitter.link_count, + pipe_config->splitter.pixel_overlap); + if (pipe_config->has_pch_encoder) intel_dump_m_n_config(pipe_config, "fdi", pipe_config->fdi_lanes, @@ -9304,6 +9123,10 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, PIPE_CONF_CHECK_I(dsc.dsc_split); PIPE_CONF_CHECK_I(dsc.compressed_bpp); + PIPE_CONF_CHECK_BOOL(splitter.enable); + PIPE_CONF_CHECK_I(splitter.link_count); + PIPE_CONF_CHECK_I(splitter.pixel_overlap); + PIPE_CONF_CHECK_I(mst_master_transcoder); PIPE_CONF_CHECK_BOOL(vrr.enable); @@ -9353,11 +9176,10 @@ static void verify_wm_state(struct intel_crtc *crtc, struct skl_ddb_entry ddb_uv[I915_MAX_PLANES]; struct skl_pipe_wm wm; } *hw; - struct skl_pipe_wm *sw_wm; - struct skl_ddb_entry *hw_ddb_entry, *sw_ddb_entry; + const struct skl_pipe_wm *sw_wm = &new_crtc_state->wm.skl.optimal; + int level, max_level = ilk_wm_max_level(dev_priv); + struct intel_plane *plane; u8 hw_enabled_slices; - const enum pipe pipe = crtc->pipe; - int plane, level, max_level = ilk_wm_max_level(dev_priv); if (INTEL_GEN(dev_priv) < 9 || !new_crtc_state->hw.active) return; @@ -9367,7 +9189,6 @@ static void verify_wm_state(struct intel_crtc *crtc, return; skl_pipe_wm_get_hw_state(crtc, &hw->wm); - sw_wm = &new_crtc_state->wm.skl.optimal; skl_pipe_ddb_get_hw_state(crtc, hw->ddb_y, hw->ddb_uv); @@ -9380,110 +9201,52 @@ static void verify_wm_state(struct intel_crtc *crtc, dev_priv->dbuf.enabled_slices, hw_enabled_slices); - /* planes */ - for_each_universal_plane(dev_priv, pipe, plane) { - struct skl_plane_wm *hw_plane_wm, *sw_plane_wm; - - hw_plane_wm = &hw->wm.planes[plane]; - sw_plane_wm = &sw_wm->planes[plane]; + for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) { + const struct skl_ddb_entry *hw_ddb_entry, *sw_ddb_entry; + const struct skl_wm_level *hw_wm_level, *sw_wm_level; /* Watermarks */ for (level = 0; level <= max_level; level++) { - if (skl_wm_level_equals(&hw_plane_wm->wm[level], - &sw_plane_wm->wm[level]) || - (level == 0 && skl_wm_level_equals(&hw_plane_wm->wm[level], - &sw_plane_wm->sagv_wm0))) - continue; + hw_wm_level = &hw->wm.planes[plane->id].wm[level]; + sw_wm_level = skl_plane_wm_level(sw_wm, plane->id, level); - drm_err(&dev_priv->drm, - "mismatch in WM pipe %c plane %d level %d (expected e=%d b=%u l=%u, got e=%d b=%u l=%u)\n", - pipe_name(pipe), plane + 1, level, - sw_plane_wm->wm[level].plane_en, - sw_plane_wm->wm[level].plane_res_b, - sw_plane_wm->wm[level].plane_res_l, - hw_plane_wm->wm[level].plane_en, - hw_plane_wm->wm[level].plane_res_b, - hw_plane_wm->wm[level].plane_res_l); - } - - if (!skl_wm_level_equals(&hw_plane_wm->trans_wm, - &sw_plane_wm->trans_wm)) { - drm_err(&dev_priv->drm, - "mismatch in trans WM pipe %c plane %d (expected e=%d b=%u l=%u, got e=%d b=%u l=%u)\n", - pipe_name(pipe), plane + 1, - sw_plane_wm->trans_wm.plane_en, - sw_plane_wm->trans_wm.plane_res_b, - sw_plane_wm->trans_wm.plane_res_l, - hw_plane_wm->trans_wm.plane_en, - hw_plane_wm->trans_wm.plane_res_b, - hw_plane_wm->trans_wm.plane_res_l); - } - - /* DDB */ - hw_ddb_entry = &hw->ddb_y[plane]; - sw_ddb_entry = &new_crtc_state->wm.skl.plane_ddb_y[plane]; - - if (!skl_ddb_entry_equal(hw_ddb_entry, sw_ddb_entry)) { - drm_err(&dev_priv->drm, - "mismatch in DDB state pipe %c plane %d (expected (%u,%u), found (%u,%u))\n", - pipe_name(pipe), plane + 1, - sw_ddb_entry->start, sw_ddb_entry->end, - hw_ddb_entry->start, hw_ddb_entry->end); - } - } - - /* - * cursor - * If the cursor plane isn't active, we may not have updated it's ddb - * allocation. In that case since the ddb allocation will be updated - * once the plane becomes visible, we can skip this check - */ - if (1) { - struct skl_plane_wm *hw_plane_wm, *sw_plane_wm; - - hw_plane_wm = &hw->wm.planes[PLANE_CURSOR]; - sw_plane_wm = &sw_wm->planes[PLANE_CURSOR]; - - /* Watermarks */ - for (level = 0; level <= max_level; level++) { - if (skl_wm_level_equals(&hw_plane_wm->wm[level], - &sw_plane_wm->wm[level]) || - (level == 0 && skl_wm_level_equals(&hw_plane_wm->wm[level], - &sw_plane_wm->sagv_wm0))) + if (skl_wm_level_equals(hw_wm_level, sw_wm_level)) continue; drm_err(&dev_priv->drm, - "mismatch in WM pipe %c cursor level %d (expected e=%d b=%u l=%u, got e=%d b=%u l=%u)\n", - pipe_name(pipe), level, - sw_plane_wm->wm[level].plane_en, - sw_plane_wm->wm[level].plane_res_b, - sw_plane_wm->wm[level].plane_res_l, - hw_plane_wm->wm[level].plane_en, - hw_plane_wm->wm[level].plane_res_b, - hw_plane_wm->wm[level].plane_res_l); + "[PLANE:%d:%s] mismatch in WM%d (expected e=%d b=%u l=%u, got e=%d b=%u l=%u)\n", + plane->base.base.id, plane->base.name, level, + sw_wm_level->enable, + sw_wm_level->blocks, + sw_wm_level->lines, + hw_wm_level->enable, + hw_wm_level->blocks, + hw_wm_level->lines); } - if (!skl_wm_level_equals(&hw_plane_wm->trans_wm, - &sw_plane_wm->trans_wm)) { + hw_wm_level = &hw->wm.planes[plane->id].trans_wm; + sw_wm_level = skl_plane_trans_wm(sw_wm, plane->id); + + if (!skl_wm_level_equals(hw_wm_level, sw_wm_level)) { drm_err(&dev_priv->drm, - "mismatch in trans WM pipe %c cursor (expected e=%d b=%u l=%u, got e=%d b=%u l=%u)\n", - pipe_name(pipe), - sw_plane_wm->trans_wm.plane_en, - sw_plane_wm->trans_wm.plane_res_b, - sw_plane_wm->trans_wm.plane_res_l, - hw_plane_wm->trans_wm.plane_en, - hw_plane_wm->trans_wm.plane_res_b, - hw_plane_wm->trans_wm.plane_res_l); + "[PLANE:%d:%s] mismatch in trans WM (expected e=%d b=%u l=%u, got e=%d b=%u l=%u)\n", + plane->base.base.id, plane->base.name, + sw_wm_level->enable, + sw_wm_level->blocks, + sw_wm_level->lines, + hw_wm_level->enable, + hw_wm_level->blocks, + hw_wm_level->lines); } /* DDB */ - hw_ddb_entry = &hw->ddb_y[PLANE_CURSOR]; - sw_ddb_entry = &new_crtc_state->wm.skl.plane_ddb_y[PLANE_CURSOR]; + hw_ddb_entry = &hw->ddb_y[plane->id]; + sw_ddb_entry = &new_crtc_state->wm.skl.plane_ddb_y[plane->id]; if (!skl_ddb_entry_equal(hw_ddb_entry, sw_ddb_entry)) { drm_err(&dev_priv->drm, - "mismatch in DDB state pipe %c cursor (expected (%u,%u), found (%u,%u))\n", - pipe_name(pipe), + "[PLANE:%d:%s] mismatch in DDB (expected (%u,%u), found (%u,%u))\n", + plane->base.base.id, plane->base.name, sw_ddb_entry->start, sw_ddb_entry->end, hw_ddb_entry->start, hw_ddb_entry->end); } @@ -9658,7 +9421,7 @@ verify_single_dpll_state(struct drm_i915_private *dev_priv, struct intel_crtc_state *new_crtc_state) { struct intel_dpll_hw_state dpll_hw_state; - unsigned int crtc_mask; + u8 pipe_mask; bool active; memset(&dpll_hw_state, 0, sizeof(dpll_hw_state)); @@ -9671,34 +9434,34 @@ verify_single_dpll_state(struct drm_i915_private *dev_priv, I915_STATE_WARN(!pll->on && pll->active_mask, "pll in active use but not on in sw tracking\n"); I915_STATE_WARN(pll->on && !pll->active_mask, - "pll is on but not used by any active crtc\n"); + "pll is on but not used by any active pipe\n"); I915_STATE_WARN(pll->on != active, "pll on state mismatch (expected %i, found %i)\n", pll->on, active); } if (!crtc) { - I915_STATE_WARN(pll->active_mask & ~pll->state.crtc_mask, - "more active pll users than references: %x vs %x\n", - pll->active_mask, pll->state.crtc_mask); + I915_STATE_WARN(pll->active_mask & ~pll->state.pipe_mask, + "more active pll users than references: 0x%x vs 0x%x\n", + pll->active_mask, pll->state.pipe_mask); return; } - crtc_mask = drm_crtc_mask(&crtc->base); + pipe_mask = BIT(crtc->pipe); if (new_crtc_state->hw.active) - I915_STATE_WARN(!(pll->active_mask & crtc_mask), - "pll active mismatch (expected pipe %c in active mask 0x%02x)\n", + I915_STATE_WARN(!(pll->active_mask & pipe_mask), + "pll active mismatch (expected pipe %c in active mask 0x%x)\n", pipe_name(crtc->pipe), pll->active_mask); else - I915_STATE_WARN(pll->active_mask & crtc_mask, - "pll active mismatch (didn't expect pipe %c in active mask 0x%02x)\n", + I915_STATE_WARN(pll->active_mask & pipe_mask, + "pll active mismatch (didn't expect pipe %c in active mask 0x%x)\n", pipe_name(crtc->pipe), pll->active_mask); - I915_STATE_WARN(!(pll->state.crtc_mask & crtc_mask), - "pll enabled crtcs mismatch (expected 0x%x in 0x%02x)\n", - crtc_mask, pll->state.crtc_mask); + I915_STATE_WARN(!(pll->state.pipe_mask & pipe_mask), + "pll enabled crtcs mismatch (expected 0x%x in 0x%x)\n", + pipe_mask, pll->state.pipe_mask); I915_STATE_WARN(pll->on && memcmp(&pll->state.hw_state, &dpll_hw_state, @@ -9718,15 +9481,15 @@ verify_shared_dpll_state(struct intel_crtc *crtc, if (old_crtc_state->shared_dpll && old_crtc_state->shared_dpll != new_crtc_state->shared_dpll) { - unsigned int crtc_mask = drm_crtc_mask(&crtc->base); + u8 pipe_mask = BIT(crtc->pipe); struct intel_shared_dpll *pll = old_crtc_state->shared_dpll; - I915_STATE_WARN(pll->active_mask & crtc_mask, - "pll active mismatch (didn't expect pipe %c in active mask)\n", - pipe_name(crtc->pipe)); - I915_STATE_WARN(pll->state.crtc_mask & crtc_mask, - "pll enabled crtcs mismatch (found %x in enabled mask)\n", - pipe_name(crtc->pipe)); + I915_STATE_WARN(pll->active_mask & pipe_mask, + "pll active mismatch (didn't expect pipe %c in active mask (0x%x))\n", + pipe_name(crtc->pipe), pll->active_mask); + I915_STATE_WARN(pll->state.pipe_mask & pipe_mask, + "pll enabled crtcs mismatch (found %x in enabled mask (0x%x))\n", + pipe_name(crtc->pipe), pll->state.pipe_mask); } } @@ -11605,15 +11368,6 @@ void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state) intel_unpin_fb_vma(vma, old_plane_state->flags); } -static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj) -{ - struct i915_sched_attr attr = { - .priority = I915_USER_PRIORITY(I915_PRIORITY_DISPLAY), - }; - - i915_gem_object_wait_priority(obj, 0, &attr); -} - /** * intel_prepare_plane_fb - Prepare fb for usage on plane * @_plane: drm plane to prepare for @@ -11630,6 +11384,9 @@ int intel_prepare_plane_fb(struct drm_plane *_plane, struct drm_plane_state *_new_plane_state) { + struct i915_sched_attr attr = { + .priority = I915_USER_PRIORITY(I915_PRIORITY_DISPLAY), + }; struct intel_plane *plane = to_intel_plane(_plane); struct intel_plane_state *new_plane_state = to_intel_plane_state(_new_plane_state); @@ -11669,6 +11426,8 @@ intel_prepare_plane_fb(struct drm_plane *_plane, } if (new_plane_state->uapi.fence) { /* explicit fencing */ + i915_gem_fence_wait_priority(new_plane_state->uapi.fence, + &attr); ret = i915_sw_fence_await_dma_fence(&state->commit_ready, new_plane_state->uapi.fence, i915_fence_timeout(dev_priv), @@ -11690,7 +11449,7 @@ intel_prepare_plane_fb(struct drm_plane *_plane, if (ret) return ret; - fb_obj_bump_render_priority(obj); + i915_gem_object_wait_priority(obj, 0, &attr); i915_gem_object_flush_frontbuffer(obj, ORIGIN_DIRTYFB); if (!new_plane_state->uapi.fence) { /* implicit fencing */ @@ -12924,6 +12683,7 @@ int intel_modeset_init_nogem(struct drm_i915_private *i915) intel_update_czclk(i915); intel_modeset_init_hw(i915); + intel_dpll_update_ref_clks(i915); intel_hdcp_component_init(i915); @@ -13379,8 +13139,8 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder) /* notify opregion of the sanitized encoder state */ intel_opregion_notify_encoder(encoder, connector && has_active_crtc); - if (INTEL_GEN(dev_priv) >= 11) - icl_sanitize_encoder_pll_mapping(encoder); + if (HAS_DDI(dev_priv)) + intel_ddi_sanitize_encoder_pll_mapping(encoder); } /* FIXME read out full plane state for all planes */ @@ -13460,8 +13220,6 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) readout_plane_state(dev_priv); - intel_dpll_readout_hw_state(dev_priv); - for_each_intel_encoder(dev, encoder) { pipe = 0; @@ -13496,6 +13254,8 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) pipe_name(pipe)); } + intel_dpll_readout_hw_state(dev_priv); + drm_connector_list_iter_begin(dev, &conn_iter); for_each_intel_connector_iter(connector, &conn_iter) { if (connector->get_hw_state(connector)) { diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 0e4c1481fa00..431770eeadb4 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -353,11 +353,6 @@ enum phy_fia { for_each_cpu_transcoder(__dev_priv, __t) \ for_each_if ((__mask) & BIT(__t)) -#define for_each_universal_plane(__dev_priv, __pipe, __p) \ - for ((__p) = 0; \ - (__p) < RUNTIME_INFO(__dev_priv)->num_sprites[(__pipe)] + 1; \ - (__p)++) - #define for_each_sprite(__dev_priv, __p, __s) \ for ((__s) = 0; \ (__s) < RUNTIME_INFO(__dev_priv)->num_sprites[(__p)]; \ diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index 449470ef9f65..0c5b7600d847 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -1098,8 +1098,8 @@ static int i915_shared_dplls_info(struct seq_file *m, void *unused) seq_printf(m, "DPLL%i: %s, id: %i\n", i, pll->info->name, pll->info->id); - seq_printf(m, " crtc_mask: 0x%08x, active: 0x%x, on: %s\n", - pll->state.crtc_mask, pll->active_mask, yesno(pll->on)); + seq_printf(m, " pipe_mask: 0x%x, active: 0x%x, on: %s\n", + pll->state.pipe_mask, pll->active_mask, yesno(pll->on)); seq_printf(m, " tracked hardware state:\n"); seq_printf(m, " dpll: 0x%08x\n", pll->state.hw_state.dpll); seq_printf(m, " dpll_md: 0x%08x\n", diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 1a76e1d9de7a..eaebba5889d2 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -226,6 +226,10 @@ struct intel_encoder { void (*enable_clock)(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state); void (*disable_clock)(struct intel_encoder *encoder); + /* + * Returns whether the port clock is enabled or not. + */ + bool (*is_clock_enabled)(struct intel_encoder *encoder); enum hpd_pin hpd_pin; enum intel_display_power_domain power_domain; /* for communication with audio component; protected by av_mutex */ @@ -721,9 +725,9 @@ struct intel_pipe_wm { struct skl_wm_level { u16 min_ddb_alloc; - u16 plane_res_b; - u8 plane_res_l; - bool plane_en; + u16 blocks; + u8 lines; + bool enable; bool ignore_lines; bool can_sagv; }; @@ -732,7 +736,10 @@ struct skl_plane_wm { struct skl_wm_level wm[8]; struct skl_wm_level uv_wm[8]; struct skl_wm_level trans_wm; - struct skl_wm_level sagv_wm0; + struct { + struct skl_wm_level wm0; + struct skl_wm_level trans_wm; + } sagv; bool is_planar; }; @@ -1166,6 +1173,13 @@ struct intel_crtc_state { u8 pipeline_full; u16 flipline, vmin, vmax; } vrr; + + /* Stream Splitter for eDP MSO */ + struct { + bool enable; + u8 link_count; + u8 pixel_overlap; + } splitter; }; enum intel_pipe_crc_source { diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 2ec82a5c9f24..b6b5776f5a66 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1723,6 +1723,7 @@ intel_dp_drrs_compute_config(struct intel_dp *intel_dp, { struct intel_connector *intel_connector = intel_dp->attached_connector; struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + int pixel_clock; if (pipe_config->vrr.enable) return; @@ -1741,10 +1742,18 @@ intel_dp_drrs_compute_config(struct intel_dp *intel_dp, return; pipe_config->has_drrs = true; - intel_link_compute_m_n(output_bpp, pipe_config->lane_count, - intel_connector->panel.downclock_mode->clock, + + pixel_clock = intel_connector->panel.downclock_mode->clock; + if (pipe_config->splitter.enable) + pixel_clock /= pipe_config->splitter.link_count; + + intel_link_compute_m_n(output_bpp, pipe_config->lane_count, pixel_clock, pipe_config->port_clock, &pipe_config->dp_m2_n2, constant_n, pipe_config->fec_enable); + + /* FIXME: abstract this better */ + if (pipe_config->splitter.enable) + pipe_config->dp_m2_n2.gmch_m *= pipe_config->splitter.link_count; } int @@ -1819,6 +1828,26 @@ intel_dp_compute_config(struct intel_encoder *encoder, output_bpp = intel_dp_output_bpp(pipe_config->output_format, pipe_config->pipe_bpp); + if (intel_dp->mso_link_count) { + int n = intel_dp->mso_link_count; + int overlap = intel_dp->mso_pixel_overlap; + + pipe_config->splitter.enable = true; + pipe_config->splitter.link_count = n; + pipe_config->splitter.pixel_overlap = overlap; + + drm_dbg_kms(&dev_priv->drm, "MSO link count %d, pixel overlap %d\n", + n, overlap); + + adjusted_mode->crtc_hdisplay = adjusted_mode->crtc_hdisplay / n + overlap; + adjusted_mode->crtc_hblank_start = adjusted_mode->crtc_hblank_start / n + overlap; + adjusted_mode->crtc_hblank_end = adjusted_mode->crtc_hblank_end / n + overlap; + adjusted_mode->crtc_hsync_start = adjusted_mode->crtc_hsync_start / n + overlap; + adjusted_mode->crtc_hsync_end = adjusted_mode->crtc_hsync_end / n + overlap; + adjusted_mode->crtc_htotal = adjusted_mode->crtc_htotal / n + overlap; + adjusted_mode->crtc_clock /= n; + } + intel_link_compute_m_n(output_bpp, pipe_config->lane_count, adjusted_mode->crtc_clock, @@ -1826,6 +1855,10 @@ intel_dp_compute_config(struct intel_encoder *encoder, &pipe_config->dp_m_n, constant_n, pipe_config->fec_enable); + /* FIXME: abstract this better */ + if (pipe_config->splitter.enable) + pipe_config->dp_m_n.gmch_m *= pipe_config->splitter.link_count; + if (!HAS_DDI(dev_priv)) intel_dp_set_clock(encoder, pipe_config); @@ -3516,6 +3549,31 @@ static void intel_dp_get_dsc_sink_cap(struct intel_dp *intel_dp) } } +static void intel_edp_mso_mode_fixup(struct intel_connector *connector, + struct drm_display_mode *mode) +{ + struct intel_dp *intel_dp = intel_attached_dp(connector); + struct drm_i915_private *i915 = to_i915(connector->base.dev); + int n = intel_dp->mso_link_count; + int overlap = intel_dp->mso_pixel_overlap; + + if (!mode || !n) + return; + + mode->hdisplay = (mode->hdisplay - overlap) * n; + mode->hsync_start = (mode->hsync_start - overlap) * n; + mode->hsync_end = (mode->hsync_end - overlap) * n; + mode->htotal = (mode->htotal - overlap) * n; + mode->clock *= n; + + drm_mode_set_name(mode); + + drm_dbg_kms(&i915->drm, + "[CONNECTOR:%d:%s] using generated MSO mode: ", + connector->base.base.id, connector->base.name); + drm_mode_debug_printmodeline(mode); +} + static void intel_edp_mso_init(struct intel_dp *intel_dp) { struct drm_i915_private *i915 = dp_to_i915(intel_dp); @@ -3539,8 +3597,10 @@ static void intel_edp_mso_init(struct intel_dp *intel_dp) if (mso) { drm_dbg_kms(&i915->drm, "Sink MSO %ux%u configuration\n", mso, drm_dp_max_lane_count(intel_dp->dpcd) / mso); - drm_err(&i915->drm, "No source MSO support, disabling\n"); - mso = 0; + if (!HAS_MSO(i915)) { + drm_err(&i915->drm, "No source MSO support, disabling\n"); + mso = 0; + } } intel_dp->mso_link_count = mso; @@ -6493,6 +6553,10 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, if (fixed_mode) downclock_mode = intel_dp_drrs_init(intel_connector, fixed_mode); + /* multiply the mode clock and horizontal timings for MSO */ + intel_edp_mso_mode_fixup(intel_connector, fixed_mode); + intel_edp_mso_mode_fixup(intel_connector, downclock_mode); + /* fallback to VBT if available for eDP */ if (!fixed_mode) fixed_mode = intel_panel_vbt_fixed_mode(intel_connector); diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index 892d7db7d94f..19ba7c7cbaab 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -26,12 +26,13 @@ #include "intel_dp_link_training.h" static void -intel_dp_dump_link_status(const u8 link_status[DP_LINK_STATUS_SIZE]) +intel_dp_dump_link_status(struct drm_device *drm, + const u8 link_status[DP_LINK_STATUS_SIZE]) { - - DRM_DEBUG_KMS("ln0_1:0x%x ln2_3:0x%x align:0x%x sink:0x%x adj_req0_1:0x%x adj_req2_3:0x%x", - link_status[0], link_status[1], link_status[2], - link_status[3], link_status[4], link_status[5]); + drm_dbg_kms(drm, + "ln0_1:0x%x ln2_3:0x%x align:0x%x sink:0x%x adj_req0_1:0x%x adj_req2_3:0x%x\n", + link_status[0], link_status[1], link_status[2], + link_status[3], link_status[4], link_status[5]); } static void intel_dp_reset_lttpr_count(struct intel_dp *intel_dp) @@ -642,7 +643,7 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp, /* Make sure clock is still ok */ if (!drm_dp_clock_recovery_ok(link_status, crtc_state->lane_count)) { - intel_dp_dump_link_status(link_status); + intel_dp_dump_link_status(&i915->drm, link_status); drm_dbg_kms(&i915->drm, "Clock recovery check failed, cannot " "continue channel equalization\n"); @@ -669,7 +670,7 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp, /* Try 5 times, else fail and try at lower BW */ if (tries == 5) { - intel_dp_dump_link_status(link_status); + intel_dp_dump_link_status(&i915->drm, link_status); drm_dbg_kms(&i915->drm, "Channel equalization failed 5 times\n"); } @@ -731,7 +732,7 @@ intel_dp_link_train_phy(struct intel_dp *intel_dp, out: drm_dbg_kms(&dp_to_i915(intel_dp)->drm, - "[CONNECTOR:%d:%s] Link Training %s at link rate = %d, lane count = %d, at %s", + "[CONNECTOR:%d:%s] Link Training %s at link rate = %d, lane count = %d, at %s\n", intel_connector->base.base.id, intel_connector->base.name, ret ? "passed" : "failed", diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 8e316146b6d1..906860ad8eb8 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -591,7 +591,7 @@ static void intel_dp_mst_enc_get_config(struct intel_encoder *encoder, struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); struct intel_digital_port *dig_port = intel_mst->primary; - intel_ddi_get_config(&dig_port->base, pipe_config); + dig_port->base.get_config(&dig_port->base, pipe_config); } static bool intel_dp_mst_initial_fastset_check(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 529b1d569af2..22ee8e13b518 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -176,7 +176,7 @@ void intel_prepare_shared_dpll(const struct intel_crtc_state *crtc_state) return; mutex_lock(&dev_priv->dpll.lock); - drm_WARN_ON(&dev_priv->drm, !pll->state.crtc_mask); + drm_WARN_ON(&dev_priv->drm, !pll->state.pipe_mask); if (!pll->active_mask) { drm_dbg(&dev_priv->drm, "setting up %s\n", pll->info->name); drm_WARN_ON(&dev_priv->drm, pll->on); @@ -198,7 +198,7 @@ void intel_enable_shared_dpll(const struct intel_crtc_state *crtc_state) struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_shared_dpll *pll = crtc_state->shared_dpll; - unsigned int crtc_mask = drm_crtc_mask(&crtc->base); + unsigned int pipe_mask = BIT(crtc->pipe); unsigned int old_mask; if (drm_WARN_ON(&dev_priv->drm, pll == NULL)) @@ -207,16 +207,16 @@ void intel_enable_shared_dpll(const struct intel_crtc_state *crtc_state) mutex_lock(&dev_priv->dpll.lock); old_mask = pll->active_mask; - if (drm_WARN_ON(&dev_priv->drm, !(pll->state.crtc_mask & crtc_mask)) || - drm_WARN_ON(&dev_priv->drm, pll->active_mask & crtc_mask)) + if (drm_WARN_ON(&dev_priv->drm, !(pll->state.pipe_mask & pipe_mask)) || + drm_WARN_ON(&dev_priv->drm, pll->active_mask & pipe_mask)) goto out; - pll->active_mask |= crtc_mask; + pll->active_mask |= pipe_mask; drm_dbg_kms(&dev_priv->drm, - "enable %s (active %x, on? %d) for crtc %d\n", + "enable %s (active 0x%x, on? %d) for [CRTC:%d:%s]\n", pll->info->name, pll->active_mask, pll->on, - crtc->base.base.id); + crtc->base.base.id, crtc->base.name); if (old_mask) { drm_WARN_ON(&dev_priv->drm, !pll->on); @@ -244,7 +244,7 @@ void intel_disable_shared_dpll(const struct intel_crtc_state *crtc_state) struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_shared_dpll *pll = crtc_state->shared_dpll; - unsigned int crtc_mask = drm_crtc_mask(&crtc->base); + unsigned int pipe_mask = BIT(crtc->pipe); /* PCH only available on ILK+ */ if (INTEL_GEN(dev_priv) < 5) @@ -254,18 +254,20 @@ void intel_disable_shared_dpll(const struct intel_crtc_state *crtc_state) return; mutex_lock(&dev_priv->dpll.lock); - if (drm_WARN_ON(&dev_priv->drm, !(pll->active_mask & crtc_mask))) + if (drm_WARN(&dev_priv->drm, !(pll->active_mask & pipe_mask), + "%s not used by [CRTC:%d:%s]\n", pll->info->name, + crtc->base.base.id, crtc->base.name)) goto out; drm_dbg_kms(&dev_priv->drm, - "disable %s (active %x, on? %d) for crtc %d\n", + "disable %s (active 0x%x, on? %d) for [CRTC:%d:%s]\n", pll->info->name, pll->active_mask, pll->on, - crtc->base.base.id); + crtc->base.base.id, crtc->base.name); assert_shared_dpll_enabled(dev_priv, pll); drm_WARN_ON(&dev_priv->drm, !pll->on); - pll->active_mask &= ~crtc_mask; + pll->active_mask &= ~pipe_mask; if (pll->active_mask) goto out; @@ -296,7 +298,7 @@ intel_find_shared_dpll(struct intel_atomic_state *state, pll = &dev_priv->dpll.shared_dplls[i]; /* Only want to check enabled timings first */ - if (shared_dpll[i].crtc_mask == 0) { + if (shared_dpll[i].pipe_mask == 0) { if (!unused_pll) unused_pll = pll; continue; @@ -306,10 +308,10 @@ intel_find_shared_dpll(struct intel_atomic_state *state, &shared_dpll[i].hw_state, sizeof(*pll_state)) == 0) { drm_dbg_kms(&dev_priv->drm, - "[CRTC:%d:%s] sharing existing %s (crtc mask 0x%08x, active %x)\n", + "[CRTC:%d:%s] sharing existing %s (pipe mask 0x%x, active 0x%x)\n", crtc->base.base.id, crtc->base.name, pll->info->name, - shared_dpll[i].crtc_mask, + shared_dpll[i].pipe_mask, pll->active_mask); return pll; } @@ -338,13 +340,13 @@ intel_reference_shared_dpll(struct intel_atomic_state *state, shared_dpll = intel_atomic_get_shared_dpll_state(&state->base); - if (shared_dpll[id].crtc_mask == 0) + if (shared_dpll[id].pipe_mask == 0) shared_dpll[id].hw_state = *pll_state; drm_dbg(&i915->drm, "using %s for pipe %c\n", pll->info->name, pipe_name(crtc->pipe)); - shared_dpll[id].crtc_mask |= 1 << crtc->pipe; + shared_dpll[id].pipe_mask |= BIT(crtc->pipe); } static void intel_unreference_shared_dpll(struct intel_atomic_state *state, @@ -354,7 +356,7 @@ static void intel_unreference_shared_dpll(struct intel_atomic_state *state, struct intel_shared_dpll_state *shared_dpll; shared_dpll = intel_atomic_get_shared_dpll_state(&state->base); - shared_dpll[pll->info->id].crtc_mask &= ~(1 << crtc->pipe); + shared_dpll[pll->info->id].pipe_mask &= ~BIT(crtc->pipe); } static void intel_put_dpll(struct intel_atomic_state *state, @@ -4597,27 +4599,30 @@ static void readout_dpll_hw_state(struct drm_i915_private *i915, POWER_DOMAIN_DPLL_DC_OFF); } - pll->state.crtc_mask = 0; + pll->state.pipe_mask = 0; for_each_intel_crtc(&i915->drm, crtc) { struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state); if (crtc_state->hw.active && crtc_state->shared_dpll == pll) - pll->state.crtc_mask |= 1 << crtc->pipe; + pll->state.pipe_mask |= BIT(crtc->pipe); } - pll->active_mask = pll->state.crtc_mask; + pll->active_mask = pll->state.pipe_mask; drm_dbg_kms(&i915->drm, - "%s hw state readout: crtc_mask 0x%08x, on %i\n", - pll->info->name, pll->state.crtc_mask, pll->on); + "%s hw state readout: pipe_mask 0x%x, on %i\n", + pll->info->name, pll->state.pipe_mask, pll->on); } -void intel_dpll_readout_hw_state(struct drm_i915_private *i915) +void intel_dpll_update_ref_clks(struct drm_i915_private *i915) { - int i; - if (i915->dpll.mgr && i915->dpll.mgr->update_ref_clks) i915->dpll.mgr->update_ref_clks(i915); +} + +void intel_dpll_readout_hw_state(struct drm_i915_private *i915) +{ + int i; for (i = 0; i < i915->dpll.num_shared_dpll; i++) readout_dpll_hw_state(i915, &i915->dpll.shared_dplls[i]); diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.h b/drivers/gpu/drm/i915/display/intel_dpll_mgr.h index 2eb7618ef957..7fd031a70cfd 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.h +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.h @@ -241,9 +241,9 @@ struct intel_dpll_hw_state { */ struct intel_shared_dpll_state { /** - * @crtc_mask: mask of CRTC using this DPLL, active or not + * @pipe_mask: mask of pipes using this DPLL, active or not */ - unsigned crtc_mask; + u8 pipe_mask; /** * @hw_state: hardware configuration for the DPLL stored in @@ -351,9 +351,9 @@ struct intel_shared_dpll { struct intel_shared_dpll_state state; /** - * @active_mask: mask of active CRTCs (i.e. DPMS on) using this DPLL + * @active_mask: mask of active pipes (i.e. DPMS on) using this DPLL */ - unsigned active_mask; + u8 active_mask; /** * @on: is the PLL actually active? Disabled during modeset @@ -410,6 +410,7 @@ void intel_enable_shared_dpll(const struct intel_crtc_state *crtc_state); void intel_disable_shared_dpll(const struct intel_crtc_state *crtc_state); void intel_shared_dpll_swap_state(struct intel_atomic_state *state); void intel_shared_dpll_init(struct drm_device *dev); +void intel_dpll_update_ref_clks(struct drm_i915_private *dev_priv); void intel_dpll_readout_hw_state(struct drm_i915_private *dev_priv); void intel_dpll_sanitize_state(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index 4ccb462bd497..07db8e83f98e 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -256,7 +256,7 @@ static int intelfb_create(struct drm_fb_helper *helper, * If the object is stolen however, it will be full of whatever * garbage was left in there. */ - if (vma->obj->stolen && !prealloc) + if (!i915_gem_object_is_shmem(vma->obj) && !prealloc) memset_io(info->screen_base, 0, info->screen_size); /* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */ @@ -595,7 +595,7 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous * full of whatever garbage was left in there. */ if (state == FBINFO_STATE_RUNNING && - intel_fb_obj(&ifbdev->fb->base)->stolen) + !i915_gem_object_is_shmem(intel_fb_obj(&ifbdev->fb->base))) memset_io(info->screen_base, 0, info->screen_size); drm_fb_helper_set_suspend(&ifbdev->helper, state); diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c index b2744c5c3653..6fc6965b6133 100644 --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c @@ -227,8 +227,10 @@ static void frontbuffer_release(struct kref *ref) drm_WARN_ON(obj->base.dev, atomic_read(&front->bits)); spin_lock(&obj->vma.lock); - for_each_ggtt_vma(vma, obj) + for_each_ggtt_vma(vma, obj) { + i915_vma_clear_scanout(vma); vma->display_alignment = I915_GTT_MIN_ALIGNMENT; + } spin_unlock(&obj->vma.lock); RCU_INIT_POINTER(obj->frontbuffer, NULL); diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 16bc155df9dc..ef8f44f5e751 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -359,7 +359,7 @@ static void intel_overlay_release_old_vma(struct intel_overlay *overlay) intel_frontbuffer_flip_complete(overlay->i915, INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe)); - i915_gem_object_unpin_from_display_plane(vma); + i915_vma_unpin(vma); i915_vma_put(vma); } @@ -860,7 +860,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, return 0; out_unpin: - i915_gem_object_unpin_from_display_plane(vma); + i915_vma_unpin(vma); out_pin_section: atomic_dec(&dev_priv->gpu_error.pending_fb_pin); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 68f58762d5e3..4d2f40cf237b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -408,7 +408,7 @@ __active_engine(struct i915_request *rq, struct intel_engine_cs **active) } if (i915_request_is_active(rq)) { - if (!i915_request_completed(rq)) + if (!__i915_request_is_complete(rq)) *active = locked; ret = true; } @@ -717,7 +717,8 @@ err_free: } static inline struct i915_gem_engines * -__context_engines_await(const struct i915_gem_context *ctx) +__context_engines_await(const struct i915_gem_context *ctx, + bool *user_engines) { struct i915_gem_engines *engines; @@ -726,6 +727,10 @@ __context_engines_await(const struct i915_gem_context *ctx) engines = rcu_dereference(ctx->engines); GEM_BUG_ON(!engines); + if (user_engines) + *user_engines = i915_gem_context_user_engines(ctx); + + /* successful await => strong mb */ if (unlikely(!i915_sw_fence_await(&engines->fence))) continue; @@ -749,7 +754,7 @@ context_apply_all(struct i915_gem_context *ctx, struct intel_context *ce; int err = 0; - e = __context_engines_await(ctx); + e = __context_engines_await(ctx, NULL); for_each_gem_engine(ce, e, it) { err = fn(ce, data); if (err) @@ -1075,7 +1080,7 @@ static int context_barrier_task(struct i915_gem_context *ctx, return err; } - e = __context_engines_await(ctx); + e = __context_engines_await(ctx, NULL); if (!e) { i915_active_release(&cb->base); return -ENOENT; @@ -1838,27 +1843,6 @@ replace: return 0; } -static struct i915_gem_engines * -__copy_engines(struct i915_gem_engines *e) -{ - struct i915_gem_engines *copy; - unsigned int n; - - copy = alloc_engines(e->num_engines); - if (!copy) - return ERR_PTR(-ENOMEM); - - for (n = 0; n < e->num_engines; n++) { - if (e->engines[n]) - copy->engines[n] = intel_context_get(e->engines[n]); - else - copy->engines[n] = NULL; - } - copy->num_engines = n; - - return copy; -} - static int get_engines(struct i915_gem_context *ctx, struct drm_i915_gem_context_param *args) @@ -1866,19 +1850,17 @@ get_engines(struct i915_gem_context *ctx, struct i915_context_param_engines __user *user; struct i915_gem_engines *e; size_t n, count, size; + bool user_engines; int err = 0; - err = mutex_lock_interruptible(&ctx->engines_mutex); - if (err) - return err; + e = __context_engines_await(ctx, &user_engines); + if (!e) + return -ENOENT; - e = NULL; - if (i915_gem_context_user_engines(ctx)) - e = __copy_engines(i915_gem_context_engines(ctx)); - mutex_unlock(&ctx->engines_mutex); - if (IS_ERR_OR_NULL(e)) { + if (!user_engines) { + i915_sw_fence_complete(&e->fence); args->size = 0; - return PTR_ERR_OR_ZERO(e); + return 0; } count = e->num_engines; @@ -1929,7 +1911,7 @@ get_engines(struct i915_gem_context *ctx, args->size = size; err_free: - free_engines(e); + i915_sw_fence_complete(&e->fence); return err; } @@ -2095,11 +2077,14 @@ static int copy_ring_size(struct intel_context *dst, static int clone_engines(struct i915_gem_context *dst, struct i915_gem_context *src) { - struct i915_gem_engines *e = i915_gem_context_lock_engines(src); - struct i915_gem_engines *clone; + struct i915_gem_engines *clone, *e; bool user_engines; unsigned long n; + e = __context_engines_await(src, &user_engines); + if (!e) + return -ENOENT; + clone = alloc_engines(e->num_engines); if (!clone) goto err_unlock; @@ -2141,9 +2126,7 @@ static int clone_engines(struct i915_gem_context *dst, } } clone->num_engines = n; - - user_engines = i915_gem_context_user_engines(src); - i915_gem_context_unlock_engines(src); + i915_sw_fence_complete(&e->fence); /* Serialised by constructor */ engines_idle_release(dst, rcu_replace_pointer(dst->engines, clone, 1)); @@ -2154,7 +2137,7 @@ static int clone_engines(struct i915_gem_context *dst, return 0; err_unlock: - i915_gem_context_unlock_engines(src); + i915_sw_fence_complete(&e->fence); return -ENOMEM; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c new file mode 100644 index 000000000000..45d60e3d98e3 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include "gem/i915_gem_ioctls.h" +#include "gem/i915_gem_region.h" + +#include "i915_drv.h" + +static int +i915_gem_create(struct drm_file *file, + struct intel_memory_region *mr, + u64 *size_p, + u32 *handle_p) +{ + struct drm_i915_gem_object *obj; + u32 handle; + u64 size; + int ret; + + GEM_BUG_ON(!is_power_of_2(mr->min_page_size)); + size = round_up(*size_p, mr->min_page_size); + if (size == 0) + return -EINVAL; + + /* For most of the ABI (e.g. mmap) we think in system pages */ + GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); + + /* Allocate the new object */ + obj = i915_gem_object_create_region(mr, size, 0); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + GEM_BUG_ON(size != obj->base.size); + + ret = drm_gem_handle_create(file, &obj->base, &handle); + /* drop reference from allocate - handle holds it now */ + i915_gem_object_put(obj); + if (ret) + return ret; + + *handle_p = handle; + *size_p = size; + return 0; +} + +int +i915_gem_dumb_create(struct drm_file *file, + struct drm_device *dev, + struct drm_mode_create_dumb *args) +{ + enum intel_memory_type mem_type; + int cpp = DIV_ROUND_UP(args->bpp, 8); + u32 format; + + switch (cpp) { + case 1: + format = DRM_FORMAT_C8; + break; + case 2: + format = DRM_FORMAT_RGB565; + break; + case 4: + format = DRM_FORMAT_XRGB8888; + break; + default: + return -EINVAL; + } + + /* have to work out size/pitch and return them */ + args->pitch = ALIGN(args->width * cpp, 64); + + /* align stride to page size so that we can remap */ + if (args->pitch > intel_plane_fb_max_stride(to_i915(dev), format, + DRM_FORMAT_MOD_LINEAR)) + args->pitch = ALIGN(args->pitch, 4096); + + if (args->pitch < args->width) + return -EINVAL; + + args->size = mul_u32_u32(args->pitch, args->height); + + mem_type = INTEL_MEMORY_SYSTEM; + if (HAS_LMEM(to_i915(dev))) + mem_type = INTEL_MEMORY_LOCAL; + + return i915_gem_create(file, + intel_memory_region_by_type(to_i915(dev), + mem_type), + &args->size, &args->handle); +} + +/** + * Creates a new mm object and returns a handle to it. + * @dev: drm device pointer + * @data: ioctl data blob + * @file: drm file pointer + */ +int +i915_gem_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_private *i915 = to_i915(dev); + struct drm_i915_gem_create *args = data; + + i915_gem_flush_free_objects(i915); + + return i915_gem_create(file, + intel_memory_region_by_type(i915, + INTEL_MEMORY_SYSTEM), + &args->size, &args->handle); +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index fcce6909f201..36f54cedaaeb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -5,6 +5,7 @@ */ #include "display/intel_frontbuffer.h" +#include "gt/intel_gt.h" #include "i915_drv.h" #include "i915_gem_clflush.h" @@ -15,13 +16,58 @@ #include "i915_gem_lmem.h" #include "i915_gem_mman.h" +static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) +{ + return !(obj->cache_level == I915_CACHE_NONE || + obj->cache_level == I915_CACHE_WT); +} + +static void +flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) +{ + struct i915_vma *vma; + + assert_object_held(obj); + + if (!(obj->write_domain & flush_domains)) + return; + + switch (obj->write_domain) { + case I915_GEM_DOMAIN_GTT: + spin_lock(&obj->vma.lock); + for_each_ggtt_vma(vma, obj) { + if (i915_vma_unset_ggtt_write(vma)) + intel_gt_flush_ggtt_writes(vma->vm->gt); + } + spin_unlock(&obj->vma.lock); + + i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); + break; + + case I915_GEM_DOMAIN_WC: + wmb(); + break; + + case I915_GEM_DOMAIN_CPU: + i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); + break; + + case I915_GEM_DOMAIN_RENDER: + if (gpu_write_needs_clflush(obj)) + obj->cache_dirty = true; + break; + } + + obj->write_domain = 0; +} + static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) { /* * We manually flush the CPU domain so that we can override and * force the flush for the display, and perform it asyncrhonously. */ - i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); + flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); if (obj->cache_dirty) i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); obj->write_domain = 0; @@ -80,7 +126,7 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) if (ret) return ret; - i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); + flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); /* Serialise direct access to this object with the barriers for * coherent writes from the GPU, by effectively invalidating the @@ -141,7 +187,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) if (ret) return ret; - i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); + flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); /* Serialise direct access to this object with the barriers for * coherent writes from the GPU, by effectively invalidating the @@ -370,6 +416,7 @@ retry: } vma->display_alignment = max_t(u64, vma->display_alignment, alignment); + i915_vma_mark_scanout(vma); i915_gem_object_flush_if_display_locked(obj); @@ -387,48 +434,6 @@ err: return vma; } -static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_vma *vma; - - if (list_empty(&obj->vma.list)) - return; - - mutex_lock(&i915->ggtt.vm.mutex); - spin_lock(&obj->vma.lock); - for_each_ggtt_vma(vma, obj) { - if (!drm_mm_node_allocated(&vma->node)) - continue; - - GEM_BUG_ON(vma->vm != &i915->ggtt.vm); - list_move_tail(&vma->vm_link, &vma->vm->bound_list); - } - spin_unlock(&obj->vma.lock); - mutex_unlock(&i915->ggtt.vm.mutex); - - if (i915_gem_object_is_shrinkable(obj)) { - unsigned long flags; - - spin_lock_irqsave(&i915->mm.obj_lock, flags); - - if (obj->mm.madv == I915_MADV_WILLNEED && - !atomic_read(&obj->mm.shrink_pin)) - list_move_tail(&obj->mm.link, &i915->mm.shrink_list); - - spin_unlock_irqrestore(&i915->mm.obj_lock, flags); - } -} - -void -i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) -{ - /* Bump the LRU to try and avoid premature eviction whilst flipping */ - i915_gem_object_bump_inactive_ggtt(vma->obj); - - i915_vma_unpin(vma); -} - /** * Moves a single object to the CPU read, and possibly write domain. * @obj: object to act on @@ -451,7 +456,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) if (ret) return ret; - i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); + flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); /* Flush the CPU cache if it's still invalid. */ if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { @@ -569,9 +574,6 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, else err = i915_gem_object_set_to_cpu_domain(obj, write_domain); - /* And bump the LRU for this access */ - i915_gem_object_bump_inactive_ggtt(obj); - i915_gem_object_unlock(obj); if (write_domain) @@ -619,7 +621,7 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, goto out; } - i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); + flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); /* If we're not in the cpu read domain, set ourself into the gtt * read domain and manually flush cachelines (if required). This @@ -670,7 +672,7 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, goto out; } - i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); + flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); /* If we're not in the cpu write domain, set ourself into the * gtt write domain and manually flush cachelines (as required). diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index b91b32195dcf..d70ca36f74f6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1276,7 +1276,10 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, int err; if (!pool) { - pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE); + pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE, + cache->has_llc ? + I915_MAP_WB : + I915_MAP_WC); if (IS_ERR(pool)) return PTR_ERR(pool); } @@ -1286,10 +1289,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, if (err) goto err_pool; - cmd = i915_gem_object_pin_map(pool->obj, - cache->has_llc ? - I915_MAP_FORCE_WB : - I915_MAP_FORCE_WC); + cmd = i915_gem_object_pin_map(pool->obj, pool->type); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); goto err_pool; @@ -2458,7 +2458,8 @@ static int eb_parse(struct i915_execbuffer *eb) return -EINVAL; if (!pool) { - pool = intel_gt_get_buffer_pool(eb->engine->gt, len); + pool = intel_gt_get_buffer_pool(eb->engine->gt, len, + I915_MAP_WB); if (IS_ERR(pool)) return PTR_ERR(pool); eb->batch_pool = pool; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index 932ee21e6609..194f35342710 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -31,18 +31,13 @@ i915_gem_object_create_lmem(struct drm_i915_private *i915, size, flags); } -struct drm_i915_gem_object * -__i915_gem_lmem_object_create(struct intel_memory_region *mem, - resource_size_t size, - unsigned int flags) +int __i915_gem_lmem_object_init(struct intel_memory_region *mem, + struct drm_i915_gem_object *obj, + resource_size_t size, + unsigned int flags) { static struct lock_class_key lock_class; struct drm_i915_private *i915 = mem->i915; - struct drm_i915_gem_object *obj; - - obj = i915_gem_object_alloc(); - if (!obj) - return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, size); i915_gem_object_init(obj, &i915_gem_lmem_obj_ops, &lock_class); @@ -53,5 +48,5 @@ __i915_gem_lmem_object_create(struct intel_memory_region *mem, i915_gem_object_init_memory_region(obj, mem, flags); - return obj; + return 0; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h index fc3f15580fe3..036d53c01de9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h @@ -21,9 +21,9 @@ i915_gem_object_create_lmem(struct drm_i915_private *i915, resource_size_t size, unsigned int flags); -struct drm_i915_gem_object * -__i915_gem_lmem_object_create(struct intel_memory_region *mem, - resource_size_t size, - unsigned int flags); +int __i915_gem_lmem_object_init(struct intel_memory_region *mem, + struct drm_i915_gem_object *obj, + resource_size_t size, + unsigned int flags); #endif /* !__I915_GEM_LMEM_H */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index acae93199957..70f798405f7f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -25,7 +25,6 @@ #include <linux/sched/mm.h> #include "display/intel_frontbuffer.h" -#include "gt/intel_gt.h" #include "i915_drv.h" #include "i915_gem_clflush.h" #include "i915_gem_context.h" @@ -314,52 +313,6 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj) queue_work(i915->wq, &i915->mm.free_work); } -static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) -{ - return !(obj->cache_level == I915_CACHE_NONE || - obj->cache_level == I915_CACHE_WT); -} - -void -i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj, - unsigned int flush_domains) -{ - struct i915_vma *vma; - - assert_object_held(obj); - - if (!(obj->write_domain & flush_domains)) - return; - - switch (obj->write_domain) { - case I915_GEM_DOMAIN_GTT: - spin_lock(&obj->vma.lock); - for_each_ggtt_vma(vma, obj) { - if (i915_vma_unset_ggtt_write(vma)) - intel_gt_flush_ggtt_writes(vma->vm->gt); - } - spin_unlock(&obj->vma.lock); - - i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); - break; - - case I915_GEM_DOMAIN_WC: - wmb(); - break; - - case I915_GEM_DOMAIN_CPU: - i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); - break; - - case I915_GEM_DOMAIN_RENDER: - if (gpu_write_needs_clflush(obj)) - obj->cache_dirty = true; - break; - } - - obj->write_domain = 0; -} - void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj, enum fb_op_origin origin) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index ae83737f1d48..d0ae834d787a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -188,6 +188,24 @@ i915_gem_object_set_volatile(struct drm_i915_gem_object *obj) } static inline bool +i915_gem_object_has_tiling_quirk(struct drm_i915_gem_object *obj) +{ + return test_bit(I915_TILING_QUIRK_BIT, &obj->flags); +} + +static inline void +i915_gem_object_set_tiling_quirk(struct drm_i915_gem_object *obj) +{ + set_bit(I915_TILING_QUIRK_BIT, &obj->flags); +} + +static inline void +i915_gem_object_clear_tiling_quirk(struct drm_i915_gem_object *obj) +{ + clear_bit(I915_TILING_QUIRK_BIT, &obj->flags); +} + +static inline bool i915_gem_object_type_has(const struct drm_i915_gem_object *obj, unsigned long flags) { @@ -390,14 +408,6 @@ int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj); void i915_gem_object_truncate(struct drm_i915_gem_object *obj); void i915_gem_object_writeback(struct drm_i915_gem_object *obj); -enum i915_map_type { - I915_MAP_WB = 0, - I915_MAP_WC, -#define I915_MAP_OVERRIDE BIT(31) - I915_MAP_FORCE_WB = I915_MAP_WB | I915_MAP_OVERRIDE, - I915_MAP_FORCE_WC = I915_MAP_WC | I915_MAP_OVERRIDE, -}; - /** * i915_gem_object_pin_map - return a contiguous mapping of the entire object * @obj: the object to map into kernel address space @@ -441,10 +451,6 @@ static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj) void __i915_gem_object_release_map(struct drm_i915_gem_object *obj); -void -i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj, - unsigned int flush_domains); - int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, unsigned int *needs_clflush); int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, @@ -492,7 +498,6 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, u32 alignment, const struct i915_ggtt_view *view, unsigned int flags); -void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma); void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj); void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj); @@ -518,6 +523,9 @@ static inline void __start_cpu_write(struct drm_i915_gem_object *obj) obj->cache_dirty = true; } +void i915_gem_fence_wait_priority(struct dma_fence *fence, + const struct i915_sched_attr *attr); + int i915_gem_object_wait(struct drm_i915_gem_object *obj, unsigned int flags, long timeout); @@ -548,4 +556,6 @@ i915_gem_object_invalidate_frontbuffer(struct drm_i915_gem_object *obj, int i915_gem_object_read_from_page(struct drm_i915_gem_object *obj, u64 offset, void *dst, int size); +bool i915_gem_object_is_shmem(const struct drm_i915_gem_object *obj); + #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c index 10cac9fac79b..d6dac21fce0b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c @@ -35,7 +35,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, count = div_u64(round_up(vma->size, block_size), block_size); size = (1 + 8 * count) * sizeof(u32); size = round_up(size, PAGE_SIZE); - pool = intel_gt_get_buffer_pool(ce->engine->gt, size); + pool = intel_gt_get_buffer_pool(ce->engine->gt, size, I915_MAP_WC); if (IS_ERR(pool)) { err = PTR_ERR(pool); goto out_pm; @@ -55,7 +55,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, if (unlikely(err)) goto out_put; - cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC); + cmd = i915_gem_object_pin_map(pool->obj, pool->type); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); goto out_unpin; @@ -257,7 +257,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, count = div_u64(round_up(dst->size, block_size), block_size); size = (1 + 11 * count) * sizeof(u32); size = round_up(size, PAGE_SIZE); - pool = intel_gt_get_buffer_pool(ce->engine->gt, size); + pool = intel_gt_get_buffer_pool(ce->engine->gt, size, I915_MAP_WC); if (IS_ERR(pool)) { err = PTR_ERR(pool); goto out_pm; @@ -277,7 +277,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, if (unlikely(err)) goto out_put; - cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC); + cmd = i915_gem_object_pin_map(pool->obj, pool->type); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); goto out_unpin; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index e2d9b7e1e152..0438e00d4ca7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -67,6 +67,14 @@ struct drm_i915_gem_object_ops { const char *name; /* friendly name for debug, e.g. lockdep classes */ }; +enum i915_map_type { + I915_MAP_WB = 0, + I915_MAP_WC, +#define I915_MAP_OVERRIDE BIT(31) + I915_MAP_FORCE_WB = I915_MAP_WB | I915_MAP_OVERRIDE, + I915_MAP_FORCE_WC = I915_MAP_WC | I915_MAP_OVERRIDE, +}; + enum i915_mmap_type { I915_MMAP_TYPE_GTT = 0, I915_MMAP_TYPE_WC, @@ -142,8 +150,6 @@ struct drm_i915_gem_object { */ struct list_head obj_link; - /** Stolen memory for this object, instead of being backed by shmem. */ - struct drm_mm_node *stolen; union { struct rcu_head rcu; struct llist_node freed; @@ -167,6 +173,7 @@ struct drm_i915_gem_object { #define I915_BO_ALLOC_VOLATILE BIT(1) #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | I915_BO_ALLOC_VOLATILE) #define I915_BO_READONLY BIT(2) +#define I915_TILING_QUIRK_BIT 3 /* unknown swizzling; do not release! */ /* * Is the object to be mapped as read-only to the GPU @@ -275,12 +282,6 @@ struct drm_i915_gem_object { * pages were last acquired. */ bool dirty:1; - - /** - * This is set if the object has been pinned due to unknown - * swizzling. - */ - bool quirked:1; } mm; /** Record of address bit 17 of each page at last unbind. */ @@ -295,6 +296,8 @@ struct drm_i915_gem_object { struct work_struct *work; } userptr; + struct drm_mm_node *stolen; + unsigned long scratch; u64 encode; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 3db3c667c486..43028f3539a6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -16,6 +16,7 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, { struct drm_i915_private *i915 = to_i915(obj->base.dev); unsigned long supported = INTEL_INFO(i915)->page_sizes; + bool shrinkable; int i; lockdep_assert_held(&obj->mm.lock); @@ -38,13 +39,6 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, obj->mm.pages = pages; - if (i915_gem_object_is_tiled(obj) && - i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { - GEM_BUG_ON(obj->mm.quirked); - __i915_gem_object_pin_pages(obj); - obj->mm.quirked = true; - } - GEM_BUG_ON(!sg_page_sizes); obj->mm.page_sizes.phys = sg_page_sizes; @@ -63,7 +57,16 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, } GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg)); - if (i915_gem_object_is_shrinkable(obj)) { + shrinkable = i915_gem_object_is_shrinkable(obj); + + if (i915_gem_object_is_tiled(obj) && + i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { + GEM_BUG_ON(i915_gem_object_has_tiling_quirk(obj)); + i915_gem_object_set_tiling_quirk(obj); + shrinkable = false; + } + + if (shrinkable) { struct list_head *list; unsigned long flags; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c index f47dafdda539..01fe89afe8c0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c @@ -213,7 +213,7 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) if (obj->ops == &i915_gem_phys_ops) return 0; - if (obj->ops != &i915_gem_shmem_ops) + if (!i915_gem_object_is_shmem(obj)) return -EINVAL; err = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); @@ -227,7 +227,7 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) goto err_unlock; } - if (obj->mm.quirked) { + if (i915_gem_object_has_tiling_quirk(obj)) { err = -EFAULT; goto err_unlock; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 40d3e40500fa..000e1cd8e920 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -11,6 +11,13 @@ #include "i915_drv.h" +#if defined(CONFIG_X86) +#include <asm/smp.h> +#else +#define wbinvd_on_all_cpus() \ + pr_warn(DRIVER_NAME ": Missing cache flush in %s\n", __func__) +#endif + void i915_gem_suspend(struct drm_i915_private *i915) { GEM_TRACE("%s\n", dev_name(i915->drm.dev)); @@ -32,13 +39,6 @@ void i915_gem_suspend(struct drm_i915_private *i915) i915_gem_drain_freed_objects(i915); } -static struct drm_i915_gem_object *first_mm_object(struct list_head *list) -{ - return list_first_entry_or_null(list, - struct drm_i915_gem_object, - mm.link); -} - void i915_gem_suspend_late(struct drm_i915_private *i915) { struct drm_i915_gem_object *obj; @@ -48,6 +48,7 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) NULL }, **phase; unsigned long flags; + bool flush = false; /* * Neither the BIOS, ourselves or any other kernel @@ -73,29 +74,56 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) spin_lock_irqsave(&i915->mm.obj_lock, flags); for (phase = phases; *phase; phase++) { - LIST_HEAD(keep); + list_for_each_entry(obj, *phase, mm.link) { + if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) + flush |= (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0; + __start_cpu_write(obj); /* presume auto-hibernate */ + } + } + spin_unlock_irqrestore(&i915->mm.obj_lock, flags); + if (flush) + wbinvd_on_all_cpus(); +} - while ((obj = first_mm_object(*phase))) { - list_move_tail(&obj->mm.link, &keep); +int i915_gem_freeze(struct drm_i915_private *i915) +{ + /* Discard all purgeable objects, let userspace recover those as + * required after resuming. + */ + i915_gem_shrink_all(i915); - /* Beware the background _i915_gem_free_objects */ - if (!kref_get_unless_zero(&obj->base.refcount)) - continue; + return 0; +} - spin_unlock_irqrestore(&i915->mm.obj_lock, flags); +int i915_gem_freeze_late(struct drm_i915_private *i915) +{ + struct drm_i915_gem_object *obj; + intel_wakeref_t wakeref; - i915_gem_object_lock(obj, NULL); - drm_WARN_ON(&i915->drm, - i915_gem_object_set_to_gtt_domain(obj, false)); - i915_gem_object_unlock(obj); - i915_gem_object_put(obj); + /* + * Called just before we write the hibernation image. + * + * We need to update the domain tracking to reflect that the CPU + * will be accessing all the pages to create and restore from the + * hibernation, and so upon restoration those pages will be in the + * CPU domain. + * + * To make sure the hibernation image contains the latest state, + * we update that state just before writing out the image. + * + * To try and reduce the hibernation image, we manually shrink + * the objects as well, see i915_gem_freeze() + */ - spin_lock_irqsave(&i915->mm.obj_lock, flags); - } + with_intel_runtime_pm(&i915->runtime_pm, wakeref) + i915_gem_shrink(i915, -1UL, NULL, ~0); + i915_gem_drain_freed_objects(i915); - list_splice_tail(&keep, *phase); - } - spin_unlock_irqrestore(&i915->mm.obj_lock, flags); + wbinvd_on_all_cpus(); + list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) + __start_cpu_write(obj); + + return 0; } void i915_gem_resume(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.h b/drivers/gpu/drm/i915/gem/i915_gem_pm.h index 26b78dbdc225..c9a66630e92e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.h @@ -19,4 +19,7 @@ void i915_gem_idle_work_handler(struct work_struct *work); void i915_gem_suspend(struct drm_i915_private *i915); void i915_gem_suspend_late(struct drm_i915_private *i915); +int i915_gem_freeze(struct drm_i915_private *i915); +int i915_gem_freeze_late(struct drm_i915_private *i915); + #endif /* __I915_GEM_PM_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c index 835bd01f2e5d..3e3dad22a683 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -143,6 +143,7 @@ i915_gem_object_create_region(struct intel_memory_region *mem, unsigned int flags) { struct drm_i915_gem_object *obj; + int err; /* * NB: Our use of resource_size_t for the size stems from using struct @@ -173,9 +174,18 @@ i915_gem_object_create_region(struct intel_memory_region *mem, if (overflows_type(size, obj->base.size)) return ERR_PTR(-E2BIG); - obj = mem->ops->create_object(mem, size, flags); - if (!IS_ERR(obj)) - trace_i915_gem_object_create(obj); + obj = i915_gem_object_alloc(); + if (!obj) + return ERR_PTR(-ENOMEM); + err = mem->ops->init_object(mem, obj, size, flags); + if (err) + goto err_object_free; + + trace_i915_gem_object_create(obj); return obj; + +err_object_free: + i915_gem_object_free(obj); + return ERR_PTR(err); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 08c9c25f1109..680b370a8ef3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -464,26 +464,21 @@ static int __create_shmem(struct drm_i915_private *i915, return 0; } -static struct drm_i915_gem_object * -create_shmem(struct intel_memory_region *mem, - resource_size_t size, - unsigned int flags) +static int shmem_object_init(struct intel_memory_region *mem, + struct drm_i915_gem_object *obj, + resource_size_t size, + unsigned int flags) { static struct lock_class_key lock_class; struct drm_i915_private *i915 = mem->i915; - struct drm_i915_gem_object *obj; struct address_space *mapping; unsigned int cache_level; gfp_t mask; int ret; - obj = i915_gem_object_alloc(); - if (!obj) - return ERR_PTR(-ENOMEM); - ret = __create_shmem(i915, &obj->base, size); if (ret) - goto fail; + return ret; mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; if (IS_I965GM(i915) || IS_I965G(i915)) { @@ -522,11 +517,7 @@ create_shmem(struct intel_memory_region *mem, i915_gem_object_init_memory_region(obj, mem, 0); - return obj; - -fail: - i915_gem_object_free(obj); - return ERR_PTR(ret); + return 0; } struct drm_i915_gem_object * @@ -611,7 +602,7 @@ static void release_shmem(struct intel_memory_region *mem) static const struct intel_memory_region_ops shmem_region_ops = { .init = init_shmem, .release = release_shmem, - .create_object = create_shmem, + .init_object = shmem_object_init, }; struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915) @@ -621,3 +612,8 @@ struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915) PAGE_SIZE, 0, &shmem_region_ops); } + +bool i915_gem_object_is_shmem(const struct drm_i915_gem_object *obj) +{ + return obj->ops == &i915_gem_shmem_ops; +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index 41b9fbf4dbcc..a1e197a6e999 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -621,18 +621,13 @@ static const struct drm_i915_gem_object_ops i915_gem_object_stolen_ops = { .release = i915_gem_object_release_stolen, }; -static struct drm_i915_gem_object * -__i915_gem_object_create_stolen(struct intel_memory_region *mem, - struct drm_mm_node *stolen) +static int __i915_gem_object_create_stolen(struct intel_memory_region *mem, + struct drm_i915_gem_object *obj, + struct drm_mm_node *stolen) { static struct lock_class_key lock_class; - struct drm_i915_gem_object *obj; unsigned int cache_level; - int err = -ENOMEM; - - obj = i915_gem_object_alloc(); - if (!obj) - goto err; + int err; drm_gem_private_object_init(&mem->i915->drm, &obj->base, stolen->size); i915_gem_object_init(obj, &i915_gem_object_stolen_ops, &lock_class); @@ -644,55 +639,47 @@ __i915_gem_object_create_stolen(struct intel_memory_region *mem, err = i915_gem_object_pin_pages(obj); if (err) - goto cleanup; + return err; i915_gem_object_init_memory_region(obj, mem, 0); - return obj; - -cleanup: - i915_gem_object_free(obj); -err: - return ERR_PTR(err); + return 0; } -static struct drm_i915_gem_object * -_i915_gem_object_create_stolen(struct intel_memory_region *mem, - resource_size_t size, - unsigned int flags) +static int _i915_gem_object_stolen_init(struct intel_memory_region *mem, + struct drm_i915_gem_object *obj, + resource_size_t size, + unsigned int flags) { struct drm_i915_private *i915 = mem->i915; - struct drm_i915_gem_object *obj; struct drm_mm_node *stolen; int ret; if (!drm_mm_initialized(&i915->mm.stolen)) - return ERR_PTR(-ENODEV); + return -ENODEV; if (size == 0) - return ERR_PTR(-EINVAL); + return -EINVAL; stolen = kzalloc(sizeof(*stolen), GFP_KERNEL); if (!stolen) - return ERR_PTR(-ENOMEM); + return -ENOMEM; ret = i915_gem_stolen_insert_node(i915, stolen, size, 4096); - if (ret) { - obj = ERR_PTR(ret); + if (ret) goto err_free; - } - obj = __i915_gem_object_create_stolen(mem, stolen); - if (IS_ERR(obj)) + ret = __i915_gem_object_create_stolen(mem, obj, stolen); + if (ret) goto err_remove; - return obj; + return 0; err_remove: i915_gem_stolen_remove_node(i915, stolen); err_free: kfree(stolen); - return obj; + return ret; } struct drm_i915_gem_object * @@ -722,7 +709,7 @@ static void release_stolen(struct intel_memory_region *mem) static const struct intel_memory_region_ops i915_region_stolen_ops = { .init = init_stolen, .release = release_stolen, - .create_object = _i915_gem_object_create_stolen, + .init_object = _i915_gem_object_stolen_init, }; struct intel_memory_region *i915_gem_stolen_setup(struct drm_i915_private *i915) @@ -766,21 +753,32 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *i915, mutex_lock(&i915->mm.stolen_lock); ret = drm_mm_reserve_node(&i915->mm.stolen, stolen); mutex_unlock(&i915->mm.stolen_lock); - if (ret) { - obj = ERR_PTR(ret); + if (ret) goto err_free; - } - obj = __i915_gem_object_create_stolen(mem, stolen); - if (IS_ERR(obj)) + obj = i915_gem_object_alloc(); + if (!obj) { + ret = -ENOMEM; goto err_stolen; + } + + ret = __i915_gem_object_create_stolen(mem, obj, stolen); + if (ret) + goto err_object_free; i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); return obj; +err_object_free: + i915_gem_object_free(obj); err_stolen: i915_gem_stolen_remove_node(i915, stolen); err_free: kfree(stolen); - return obj; + return ERR_PTR(ret); +} + +bool i915_gem_object_is_stolen(const struct drm_i915_gem_object *obj) +{ + return obj->ops == &i915_gem_object_stolen_ops; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h index 61e028063f9f..b03489706796 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h @@ -30,6 +30,8 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv resource_size_t stolen_offset, resource_size_t size); +bool i915_gem_object_is_stolen(const struct drm_i915_gem_object *obj); + #define I915_GEM_STOLEN_BIAS SZ_128K #endif /* __I915_GEM_STOLEN_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c index ffcaee74a249..d589d3d81085 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c @@ -270,14 +270,14 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, obj->mm.madv == I915_MADV_WILLNEED && i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { if (tiling == I915_TILING_NONE) { - GEM_BUG_ON(!obj->mm.quirked); - __i915_gem_object_unpin_pages(obj); - obj->mm.quirked = false; + GEM_BUG_ON(!i915_gem_object_has_tiling_quirk(obj)); + i915_gem_object_clear_tiling_quirk(obj); + i915_gem_object_make_shrinkable(obj); } if (!i915_gem_object_is_tiled(obj)) { - GEM_BUG_ON(obj->mm.quirked); - __i915_gem_object_pin_pages(obj); - obj->mm.quirked = true; + GEM_BUG_ON(i915_gem_object_has_tiling_quirk(obj)); + i915_gem_object_make_unshrinkable(obj); + i915_gem_object_set_tiling_quirk(obj); } } mutex_unlock(&obj->mm.lock); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index c1b13ac50d0f..4b9856d5ba14 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -5,6 +5,7 @@ */ #include <linux/dma-fence-array.h> +#include <linux/dma-fence-chain.h> #include <linux/jiffies.h> #include "gt/intel_engine.h" @@ -44,8 +45,7 @@ i915_gem_object_wait_reservation(struct dma_resv *resv, unsigned int count, i; int ret; - ret = dma_resv_get_fences_rcu(resv, - &excl, &count, &shared); + ret = dma_resv_get_fences_rcu(resv, &excl, &count, &shared); if (ret) return ret; @@ -91,8 +91,8 @@ i915_gem_object_wait_reservation(struct dma_resv *resv, return timeout; } -static void __fence_set_priority(struct dma_fence *fence, - const struct i915_sched_attr *attr) +static void fence_set_priority(struct dma_fence *fence, + const struct i915_sched_attr *attr) { struct i915_request *rq; struct intel_engine_cs *engine; @@ -103,27 +103,47 @@ static void __fence_set_priority(struct dma_fence *fence, rq = to_request(fence); engine = rq->engine; - local_bh_disable(); rcu_read_lock(); /* RCU serialisation for set-wedged protection */ if (engine->schedule) engine->schedule(rq, attr); rcu_read_unlock(); - local_bh_enable(); /* kick the tasklets if queues were reprioritised */ } -static void fence_set_priority(struct dma_fence *fence, - const struct i915_sched_attr *attr) +static inline bool __dma_fence_is_chain(const struct dma_fence *fence) +{ + return fence->ops == &dma_fence_chain_ops; +} + +void i915_gem_fence_wait_priority(struct dma_fence *fence, + const struct i915_sched_attr *attr) { + if (dma_fence_is_signaled(fence)) + return; + + local_bh_disable(); + /* Recurse once into a fence-array */ if (dma_fence_is_array(fence)) { struct dma_fence_array *array = to_dma_fence_array(fence); int i; for (i = 0; i < array->num_fences; i++) - __fence_set_priority(array->fences[i], attr); + fence_set_priority(array->fences[i], attr); + } else if (__dma_fence_is_chain(fence)) { + struct dma_fence *iter; + + /* The chain is ordered; if we boost the last, we boost all */ + dma_fence_chain_for_each(iter, fence) { + fence_set_priority(to_dma_fence_chain(iter)->fence, + attr); + break; + } + dma_fence_put(iter); } else { - __fence_set_priority(fence, attr); + fence_set_priority(fence, attr); } + + local_bh_enable(); /* kick the tasklets if queues were reprioritised */ } int @@ -139,12 +159,12 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, int ret; ret = dma_resv_get_fences_rcu(obj->base.resv, - &excl, &count, &shared); + &excl, &count, &shared); if (ret) return ret; for (i = 0; i < count; i++) { - fence_set_priority(shared[i], attr); + i915_gem_fence_wait_priority(shared[i], attr); dma_fence_put(shared[i]); } @@ -154,7 +174,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, } if (excl) { - fence_set_priority(excl, attr); + i915_gem_fence_wait_priority(excl, attr); dma_fence_put(excl); } return 0; diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c index 680bd9442eb0..e08dff376339 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -12,9 +12,9 @@ #include "intel_gt.h" /* Write pde (index) from the page directory @pd to the page table @pt */ -static inline void gen6_write_pde(const struct gen6_ppgtt *ppgtt, - const unsigned int pde, - const struct i915_page_table *pt) +static void gen6_write_pde(const struct gen6_ppgtt *ppgtt, + const unsigned int pde, + const struct i915_page_table *pt) { dma_addr_t addr = pt ? px_dma(pt) : px_dma(ppgtt->base.vm.scratch[1]); @@ -27,8 +27,6 @@ void gen7_ppgtt_enable(struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; struct intel_uncore *uncore = gt->uncore; - struct intel_engine_cs *engine; - enum intel_engine_id id; u32 ecochk; intel_uncore_rmw(uncore, GAC_ECO_BITS, 0, ECOBITS_PPGTT_CACHE64B); @@ -41,13 +39,6 @@ void gen7_ppgtt_enable(struct intel_gt *gt) ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; } intel_uncore_write(uncore, GAM_ECOCHK, ecochk); - - for_each_engine(engine, gt, id) { - /* GFX_MODE is per-ring on gen7+ */ - ENGINE_WRITE(engine, - RING_MODE_GEN7, - _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); - } } void gen6_ppgtt_enable(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.c b/drivers/gpu/drm/i915/gt/gen7_renderclear.c index 94465374ca2f..de575fdb033f 100644 --- a/drivers/gpu/drm/i915/gt/gen7_renderclear.c +++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.c @@ -40,7 +40,7 @@ struct batch_vals { u32 size; }; -static inline int num_primitives(const struct batch_vals *bv) +static int num_primitives(const struct batch_vals *bv) { /* * We need to saturate the GPU with work in order to dispatch @@ -240,7 +240,7 @@ gen7_emit_state_base_address(struct batch_chunk *batch, /* general */ *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY; /* surface */ - *cs++ = batch_addr(batch) | surface_state_base | BASE_ADDRESS_MODIFY; + *cs++ = (batch_addr(batch) + surface_state_base) | BASE_ADDRESS_MODIFY; /* dynamic */ *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY; /* indirect */ @@ -353,19 +353,21 @@ static void gen7_emit_pipeline_flush(struct batch_chunk *batch) static void gen7_emit_pipeline_invalidate(struct batch_chunk *batch) { - u32 *cs = batch_alloc_items(batch, 0, 8); + u32 *cs = batch_alloc_items(batch, 0, 10); /* ivb: Stall before STATE_CACHE_INVALIDATE */ - *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = GFX_OP_PIPE_CONTROL(5); *cs++ = PIPE_CONTROL_STALL_AT_SCOREBOARD | PIPE_CONTROL_CS_STALL; *cs++ = 0; *cs++ = 0; + *cs++ = 0; - *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = GFX_OP_PIPE_CONTROL(5); *cs++ = PIPE_CONTROL_STATE_CACHE_INVALIDATE; *cs++ = 0; *cs++ = 0; + *cs++ = 0; batch_advance(batch, cs); } @@ -390,6 +392,18 @@ static void emit_batch(struct i915_vma * const vma, &cb_kernel_ivb, desc_count); + /* Reset inherited context registers */ + gen7_emit_pipeline_flush(&cmds); + gen7_emit_pipeline_invalidate(&cmds); + batch_add(&cmds, MI_LOAD_REGISTER_IMM(2)); + batch_add(&cmds, i915_mmio_reg_offset(CACHE_MODE_0_GEN7)); + batch_add(&cmds, 0xffff0000); + batch_add(&cmds, i915_mmio_reg_offset(CACHE_MODE_1)); + batch_add(&cmds, 0xffff0000 | PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); + gen7_emit_pipeline_invalidate(&cmds); + gen7_emit_pipeline_flush(&cmds); + + /* Switch to the media pipeline and our base address */ gen7_emit_pipeline_invalidate(&cmds); batch_add(&cmds, PIPELINE_SELECT | PIPELINE_SELECT_MEDIA); batch_add(&cmds, MI_NOOP); @@ -399,9 +413,11 @@ static void emit_batch(struct i915_vma * const vma, gen7_emit_state_base_address(&cmds, descriptors); gen7_emit_pipeline_invalidate(&cmds); + /* Set the clear-residual kernel state */ gen7_emit_vfe_state(&cmds, bv, urb_size - 1, 0, 0); gen7_emit_interface_descriptor_load(&cmds, descriptors, desc_count); + /* Execute the kernel on all HW threads */ for (i = 0; i < num_primitives(bv); i++) gen7_emit_media_object(&cmds, i); diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 8066b93e6dc4..07ba524da90b 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -330,7 +330,7 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) return 0; } -static inline u32 preempt_address(struct intel_engine_cs *engine) +static u32 preempt_address(struct intel_engine_cs *engine) { return (i915_ggtt_offset(engine->status_page.vma) + I915_GEM_HWS_PREEMPT_ADDR); @@ -488,6 +488,7 @@ static u32 *gen8_emit_wa_tail(struct i915_request *rq, u32 *cs) static u32 *emit_preempt_busywait(struct i915_request *rq, u32 *cs) { + *cs++ = MI_ARB_CHECK; /* trigger IDLE->ACTIVE first */ *cs++ = MI_SEMAPHORE_WAIT | MI_SEMAPHORE_GLOBAL_GTT | MI_SEMAPHORE_POLL | @@ -495,6 +496,7 @@ static u32 *emit_preempt_busywait(struct i915_request *rq, u32 *cs) *cs++ = 0; *cs++ = preempt_address(rq->engine); *cs++ = 0; + *cs++ = MI_NOOP; return cs; } diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index a37c968ef8f7..755522ced60d 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -109,7 +109,7 @@ static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) #define as_pd(x) container_of((x), typeof(struct i915_page_directory), pt) -static inline unsigned int +static unsigned int gen8_pd_range(u64 start, u64 end, int lvl, unsigned int *idx) { const int shift = gen8_pd_shift(lvl); @@ -125,7 +125,7 @@ gen8_pd_range(u64 start, u64 end, int lvl, unsigned int *idx) return i915_pde_index(end, shift) - *idx; } -static inline bool gen8_pd_contains(u64 start, u64 end, int lvl) +static bool gen8_pd_contains(u64 start, u64 end, int lvl) { const u64 mask = ~0ull << gen8_pd_shift(lvl + 1); @@ -133,7 +133,7 @@ static inline bool gen8_pd_contains(u64 start, u64 end, int lvl) return (start ^ end) & mask && (start & ~mask) == 0; } -static inline unsigned int gen8_pt_count(u64 start, u64 end) +static unsigned int gen8_pt_count(u64 start, u64 end) { GEM_BUG_ON(start >= end); if ((start ^ end) >> gen8_pd_shift(1)) @@ -142,14 +142,13 @@ static inline unsigned int gen8_pt_count(u64 start, u64 end) return end - start; } -static inline unsigned int -gen8_pd_top_count(const struct i915_address_space *vm) +static unsigned int gen8_pd_top_count(const struct i915_address_space *vm) { unsigned int shift = __gen8_pte_shift(vm->top); return (vm->total + (1ull << shift) - 1) >> shift; } -static inline struct i915_page_directory * +static struct i915_page_directory * gen8_pdp_for_page_index(struct i915_address_space * const vm, const u64 idx) { struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm); @@ -160,7 +159,7 @@ gen8_pdp_for_page_index(struct i915_address_space * const vm, const u64 idx) return i915_pd_entry(ppgtt->pd, gen8_pd_index(idx, vm->top)); } -static inline struct i915_page_directory * +static struct i915_page_directory * gen8_pdp_for_page_address(struct i915_address_space * const vm, const u64 addr) { return gen8_pdp_for_page_index(vm, addr >> GEN8_PTE_SHIFT); diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index be2c285a0ac7..34a645d6babd 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -453,16 +453,17 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq) { struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs; struct intel_context *ce = rq->context; - unsigned long flags; bool release; - if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) + spin_lock(&ce->signal_lock); + if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) { + spin_unlock(&ce->signal_lock); return; + } - spin_lock_irqsave(&ce->signal_lock, flags); list_del_rcu(&rq->signal_link); release = remove_signaling_context(b, ce); - spin_unlock_irqrestore(&ce->signal_lock, flags); + spin_unlock(&ce->signal_lock); if (release) intel_context_put(ce); @@ -517,8 +518,8 @@ static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p) list_for_each_entry_rcu(rq, &ce->signals, signal_link) drm_printf(p, "\t[%llx:%llx%s] @ %dms\n", rq->fence.context, rq->fence.seqno, - i915_request_completed(rq) ? "!" : - i915_request_started(rq) ? "*" : + __i915_request_is_complete(rq) ? "!" : + __i915_request_has_started(rq) ? "*" : "", jiffies_to_msecs(jiffies - rq->emitted_jiffies)); } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 8eea3da19564..376e82e17061 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -342,7 +342,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) engine->schedule = NULL; ewma__engine_latency_init(&engine->latency); - seqlock_init(&engine->stats.lock); + seqcount_init(&engine->stats.lock); ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); @@ -1676,7 +1676,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, ktime_to_ms(intel_engine_get_busy_time(engine, &dummy))); drm_printf(m, "\tForcewake: %x domains, %d active\n", - engine->fw_domain, atomic_read(&engine->fw_active)); + engine->fw_domain, READ_ONCE(engine->fw_active)); rcu_read_lock(); rq = READ_ONCE(engine->heartbeat.systole); @@ -1754,7 +1754,7 @@ static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine, * add it to the total. */ *now = ktime_get(); - if (atomic_read(&engine->stats.active)) + if (READ_ONCE(engine->stats.active)) total = ktime_add(total, ktime_sub(*now, engine->stats.start)); return total; @@ -1773,9 +1773,9 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now) ktime_t total; do { - seq = read_seqbegin(&engine->stats.lock); + seq = read_seqcount_begin(&engine->stats.lock); total = __intel_engine_get_busy_time(engine, now); - } while (read_seqretry(&engine->stats.lock, seq)); + } while (read_seqcount_retry(&engine->stats.lock, seq)); return total; } @@ -1811,7 +1811,7 @@ intel_engine_find_active_request(struct intel_engine_cs *engine) struct intel_timeline *tl = request->context->timeline; list_for_each_entry_from_reverse(request, &tl->requests, link) { - if (i915_request_completed(request)) + if (__i915_request_is_complete(request)) break; active = request; @@ -1822,10 +1822,10 @@ intel_engine_find_active_request(struct intel_engine_cs *engine) return active; list_for_each_entry(request, &engine->active.requests, sched.link) { - if (i915_request_completed(request)) + if (__i915_request_is_complete(request)) continue; - if (!i915_request_started(request)) + if (!__i915_request_has_started(request)) continue; /* More than one preemptible request may match! */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 2843db731b7d..e67d09259dd0 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -79,7 +79,7 @@ static int __engine_unpark(struct intel_wakeref *wf) #if IS_ENABLED(CONFIG_LOCKDEP) -static inline unsigned long __timeline_mark_lock(struct intel_context *ce) +static unsigned long __timeline_mark_lock(struct intel_context *ce) { unsigned long flags; @@ -89,8 +89,8 @@ static inline unsigned long __timeline_mark_lock(struct intel_context *ce) return flags; } -static inline void __timeline_mark_unlock(struct intel_context *ce, - unsigned long flags) +static void __timeline_mark_unlock(struct intel_context *ce, + unsigned long flags) { mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_); local_irq_restore(flags); @@ -98,13 +98,13 @@ static inline void __timeline_mark_unlock(struct intel_context *ce, #else -static inline unsigned long __timeline_mark_lock(struct intel_context *ce) +static unsigned long __timeline_mark_lock(struct intel_context *ce) { return 0; } -static inline void __timeline_mark_unlock(struct intel_context *ce, - unsigned long flags) +static void __timeline_mark_unlock(struct intel_context *ce, + unsigned long flags) { } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_stats.h b/drivers/gpu/drm/i915/gt/intel_engine_stats.h new file mode 100644 index 000000000000..24fbdd94351a --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_engine_stats.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef __INTEL_ENGINE_STATS_H__ +#define __INTEL_ENGINE_STATS_H__ + +#include <linux/atomic.h> +#include <linux/ktime.h> +#include <linux/seqlock.h> + +#include "i915_gem.h" /* GEM_BUG_ON */ +#include "intel_engine.h" + +static inline void intel_engine_context_in(struct intel_engine_cs *engine) +{ + unsigned long flags; + + if (engine->stats.active) { + engine->stats.active++; + return; + } + + /* The writer is serialised; but the pmu reader may be from hardirq */ + local_irq_save(flags); + write_seqcount_begin(&engine->stats.lock); + + engine->stats.start = ktime_get(); + engine->stats.active++; + + write_seqcount_end(&engine->stats.lock); + local_irq_restore(flags); + + GEM_BUG_ON(!engine->stats.active); +} + +static inline void intel_engine_context_out(struct intel_engine_cs *engine) +{ + unsigned long flags; + + GEM_BUG_ON(!engine->stats.active); + if (engine->stats.active > 1) { + engine->stats.active--; + return; + } + + local_irq_save(flags); + write_seqcount_begin(&engine->stats.lock); + + engine->stats.active--; + engine->stats.total = + ktime_add(engine->stats.total, + ktime_sub(ktime_get(), engine->stats.start)); + + write_seqcount_end(&engine->stats.lock); + local_irq_restore(flags); +} + +#endif /* __INTEL_ENGINE_STATS_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index df62e793e747..d2346b425547 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -319,7 +319,7 @@ struct intel_engine_cs { * as possible. */ enum forcewake_domains fw_domain; - atomic_t fw_active; + unsigned int fw_active; unsigned long context_tag; @@ -516,12 +516,12 @@ struct intel_engine_cs { /** * @active: Number of contexts currently scheduled in. */ - atomic_t active; + unsigned int active; /** * @lock: Lock protecting the below fields. */ - seqlock_t lock; + seqcount_t lock; /** * @total: Total time this engine was busy. diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index d7d5a58990bb..ac1be7a632d3 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -115,6 +115,7 @@ #include "intel_breadcrumbs.h" #include "intel_context.h" #include "intel_engine_pm.h" +#include "intel_engine_stats.h" #include "intel_execlists_submission.h" #include "intel_gt.h" #include "intel_gt_pm.h" @@ -230,8 +231,7 @@ active_request(const struct intel_timeline * const tl, struct i915_request *rq) return __active_request(tl, rq, 0); } -static inline void -ring_set_paused(const struct intel_engine_cs *engine, int state) +static void ring_set_paused(const struct intel_engine_cs *engine, int state) { /* * We inspect HWS_PREEMPT with a semaphore inside @@ -244,12 +244,12 @@ ring_set_paused(const struct intel_engine_cs *engine, int state) wmb(); } -static inline struct i915_priolist *to_priolist(struct rb_node *rb) +static struct i915_priolist *to_priolist(struct rb_node *rb) { return rb_entry(rb, struct i915_priolist, node); } -static inline int rq_prio(const struct i915_request *rq) +static int rq_prio(const struct i915_request *rq) { return READ_ONCE(rq->sched.attr.priority); } @@ -299,8 +299,8 @@ static int virtual_prio(const struct intel_engine_execlists *el) return rb ? rb_entry(rb, struct ve_node, rb)->prio : INT_MIN; } -static inline bool need_preempt(const struct intel_engine_cs *engine, - const struct i915_request *rq) +static bool need_preempt(const struct intel_engine_cs *engine, + const struct i915_request *rq) { int last_prio; @@ -351,7 +351,7 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, queue_prio(&engine->execlists)) > last_prio; } -__maybe_unused static inline bool +__maybe_unused static bool assert_priority_queue(const struct i915_request *prev, const struct i915_request *next) { @@ -418,7 +418,7 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) return __unwind_incomplete_requests(engine); } -static inline void +static void execlists_context_status_change(struct i915_request *rq, unsigned long status) { /* @@ -432,39 +432,6 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status) status, rq); } -static void intel_engine_context_in(struct intel_engine_cs *engine) -{ - unsigned long flags; - - if (atomic_add_unless(&engine->stats.active, 1, 0)) - return; - - write_seqlock_irqsave(&engine->stats.lock, flags); - if (!atomic_add_unless(&engine->stats.active, 1, 0)) { - engine->stats.start = ktime_get(); - atomic_inc(&engine->stats.active); - } - write_sequnlock_irqrestore(&engine->stats.lock, flags); -} - -static void intel_engine_context_out(struct intel_engine_cs *engine) -{ - unsigned long flags; - - GEM_BUG_ON(!atomic_read(&engine->stats.active)); - - if (atomic_add_unless(&engine->stats.active, -1, 1)) - return; - - write_seqlock_irqsave(&engine->stats.lock, flags); - if (atomic_dec_and_test(&engine->stats.active)) { - engine->stats.total = - ktime_add(engine->stats.total, - ktime_sub(ktime_get(), engine->stats.start)); - } - write_sequnlock_irqrestore(&engine->stats.lock, flags); -} - static void reset_active(struct i915_request *rq, struct intel_engine_cs *engine) { @@ -503,7 +470,7 @@ static void reset_active(struct i915_request *rq, ce->lrc.lrca = lrc_update_regs(ce, engine, head); } -static inline struct intel_engine_cs * +static struct intel_engine_cs * __execlists_schedule_in(struct i915_request *rq) { struct intel_engine_cs * const engine = rq->engine; @@ -539,7 +506,7 @@ __execlists_schedule_in(struct i915_request *rq) ce->lrc.ccid |= engine->execlists.ccid; __intel_gt_pm_get(engine->gt); - if (engine->fw_domain && !atomic_fetch_inc(&engine->fw_active)) + if (engine->fw_domain && !engine->fw_active++) intel_uncore_forcewake_get(engine->uncore, engine->fw_domain); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); intel_engine_context_in(engine); @@ -549,7 +516,7 @@ __execlists_schedule_in(struct i915_request *rq) return engine; } -static inline void execlists_schedule_in(struct i915_request *rq, int idx) +static void execlists_schedule_in(struct i915_request *rq, int idx) { struct intel_context * const ce = rq->context; struct intel_engine_cs *old; @@ -608,9 +575,9 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce) tasklet_hi_schedule(&ve->base.execlists.tasklet); } -static inline void __execlists_schedule_out(struct i915_request *rq) +static void __execlists_schedule_out(struct i915_request * const rq, + struct intel_context * const ce) { - struct intel_context * const ce = rq->context; struct intel_engine_cs * const engine = rq->engine; unsigned int ccid; @@ -621,6 +588,7 @@ static inline void __execlists_schedule_out(struct i915_request *rq) */ CE_TRACE(ce, "schedule-out, ccid:%x\n", ce->lrc.ccid); + GEM_BUG_ON(ce->inflight != engine); if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) lrc_check_regs(ce, engine, "after"); @@ -645,7 +613,7 @@ static inline void __execlists_schedule_out(struct i915_request *rq) lrc_update_runtime(ce); intel_engine_context_out(engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); - if (engine->fw_domain && !atomic_dec_return(&engine->fw_active)) + if (engine->fw_domain && !--engine->fw_active) intel_uncore_forcewake_put(engine->uncore, engine->fw_domain); intel_gt_pm_put_async(engine->gt); @@ -660,10 +628,12 @@ static inline void __execlists_schedule_out(struct i915_request *rq) */ if (ce->engine != engine) kick_siblings(rq, ce); + + WRITE_ONCE(ce->inflight, NULL); + intel_context_put(ce); } -static inline void -execlists_schedule_out(struct i915_request *rq) +static inline void execlists_schedule_out(struct i915_request *rq) { struct intel_context * const ce = rq->context; @@ -671,12 +641,8 @@ execlists_schedule_out(struct i915_request *rq) GEM_BUG_ON(!ce->inflight); ce->inflight = ptr_dec(ce->inflight); - if (!__intel_context_inflight_count(ce->inflight)) { - GEM_BUG_ON(ce->inflight != rq->engine); - __execlists_schedule_out(rq); - WRITE_ONCE(ce->inflight, NULL); - intel_context_put(ce); - } + if (!__intel_context_inflight_count(ce->inflight)) + __execlists_schedule_out(rq, ce); i915_request_put(rq); } @@ -728,7 +694,7 @@ static u64 execlists_update_context(struct i915_request *rq) return desc; } -static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port) +static void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port) { if (execlists->ctrl_reg) { writel(lower_32_bits(desc), execlists->submit_reg + port * 2); @@ -757,7 +723,7 @@ dump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq) return buf; } -static __maybe_unused void +static __maybe_unused noinline void trace_ports(const struct intel_engine_execlists *execlists, const char *msg, struct i915_request * const *ports) @@ -774,13 +740,13 @@ trace_ports(const struct intel_engine_execlists *execlists, dump_port(p1, sizeof(p1), ", ", ports[1])); } -static inline bool +static bool reset_in_progress(const struct intel_engine_execlists *execlists) { return unlikely(!__tasklet_is_enabled(&execlists->tasklet)); } -static __maybe_unused bool +static __maybe_unused noinline bool assert_pending_valid(const struct intel_engine_execlists *execlists, const char *msg) { @@ -1258,12 +1224,20 @@ static void set_preempt_timeout(struct intel_engine_cs *engine, active_preempt_timeout(engine, rq)); } +static bool completed(const struct i915_request *rq) +{ + if (i915_request_has_sentinel(rq)) + return false; + + return __i915_request_is_complete(rq); +} + static void execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_request **port = execlists->pending; struct i915_request ** const last_port = port + execlists->port_mask; - struct i915_request *last = *execlists->active; + struct i915_request *last, * const *active; struct virtual_engine *ve; struct rb_node *rb; bool submit = false; @@ -1300,21 +1274,13 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * i.e. we will retrigger preemption following the ack in case * of trouble. * - * In theory we can skip over completed contexts that have not - * yet been processed by events (as those events are in flight): - * - * while ((last = *active) && i915_request_completed(last)) - * active++; - * - * However, the GPU cannot handle this as it will ultimately - * find itself trying to jump back into a context it has just - * completed and barf. */ + active = execlists->active; + while ((last = *active) && completed(last)) + active++; if (last) { - if (__i915_request_is_complete(last)) { - goto check_secondary; - } else if (need_preempt(engine, last)) { + if (need_preempt(engine, last)) { ENGINE_TRACE(engine, "preempting last=%llx:%lld, prio=%d, hint=%d\n", last->fence.context, @@ -1393,9 +1359,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * we hopefully coalesce several updates into a single * submission. */ -check_secondary: - if (!list_is_last(&last->sched.link, - &engine->active.requests)) { + if (active[1]) { /* * Even if ELSP[1] is occupied and not worthy * of timeslices, our queue might be. @@ -1596,7 +1560,7 @@ done: * of ordered contexts. */ if (submit && - memcmp(execlists->active, + memcmp(active, execlists->pending, (port - execlists->pending) * sizeof(*port))) { *port = NULL; @@ -1604,7 +1568,7 @@ done: execlists_schedule_in(*port, port - execlists->pending); WRITE_ONCE(execlists->yield, -1); - set_preempt_timeout(engine, *execlists->active); + set_preempt_timeout(engine, *active); execlists_submit_ports(engine); } else { ring_set_paused(engine, 0); @@ -1621,12 +1585,12 @@ static void execlists_dequeue_irq(struct intel_engine_cs *engine) local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */ } -static inline void clear_ports(struct i915_request **ports, int count) +static void clear_ports(struct i915_request **ports, int count) { memset_p((void **)ports, NULL, count); } -static inline void +static void copy_ports(struct i915_request **dst, struct i915_request **src, int count) { /* A memcpy_p() would be very useful here! */ @@ -1660,8 +1624,7 @@ cancel_port_requests(struct intel_engine_execlists * const execlists, return inactive; } -static inline void -invalidate_csb_entries(const u64 *first, const u64 *last) +static void invalidate_csb_entries(const u64 *first, const u64 *last) { clflush((void *)first); clflush((void *)last); @@ -1693,7 +1656,7 @@ invalidate_csb_entries(const u64 *first, const u64 *last) * bits 47-57: sw context id of the lrc the GT switched away from * bits 58-63: sw counter of the lrc the GT switched away from */ -static inline bool gen12_csb_parse(const u64 csb) +static bool gen12_csb_parse(const u64 csb) { bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(csb)); bool new_queue = @@ -1720,7 +1683,7 @@ static inline bool gen12_csb_parse(const u64 csb) return false; } -static inline bool gen8_csb_parse(const u64 csb) +static bool gen8_csb_parse(const u64 csb) { return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED); } @@ -1759,8 +1722,7 @@ wa_csb_read(const struct intel_engine_cs *engine, u64 * const csb) return entry; } -static inline u64 -csb_read(const struct intel_engine_cs *engine, u64 * const csb) +static u64 csb_read(const struct intel_engine_cs *engine, u64 * const csb) { u64 entry = READ_ONCE(*csb); @@ -2026,6 +1988,9 @@ static void __execlists_hold(struct i915_request *rq) struct i915_request *w = container_of(p->waiter, typeof(*w), sched); + if (p->flags & I915_DEPENDENCY_WEAK) + continue; + /* Leave semaphores spinning on the other engines */ if (w->engine != rq->engine) continue; @@ -2124,6 +2089,9 @@ static void __execlists_unhold(struct i915_request *rq) struct i915_request *w = container_of(p->waiter, typeof(*w), sched); + if (p->flags & I915_DEPENDENCY_WEAK) + continue; + /* Propagate any change in error status */ if (rq->fence.error) i915_request_set_error_once(w, rq->fence.error); @@ -3180,8 +3148,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) } } -static inline void -logical_ring_default_irqs(struct intel_engine_cs *engine) +static void logical_ring_default_irqs(struct intel_engine_cs *engine) { unsigned int shift = 0; @@ -3296,7 +3263,7 @@ static void rcu_virtual_context_destroy(struct work_struct *wrk) old = fetch_and_zero(&ve->request); if (old) { - GEM_BUG_ON(!i915_request_completed(old)); + GEM_BUG_ON(!__i915_request_is_complete(old)); __i915_request_submit(old); i915_request_put(old); } @@ -3573,7 +3540,7 @@ static void virtual_submit_request(struct i915_request *rq) } if (ve->request) { /* background completion from preempt-to-busy */ - GEM_BUG_ON(!i915_request_completed(ve->request)); + GEM_BUG_ON(!__i915_request_is_complete(ve->request)); __i915_request_submit(ve->request); i915_request_put(ve->request); } diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index fd6c8fa54812..ec2bf963ced9 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -535,16 +535,39 @@ static int init_ggtt(struct i915_ggtt *ggtt) mutex_init(&ggtt->error_mutex); if (ggtt->mappable_end) { - /* Reserve a mappable slot for our lockless error capture */ - ret = drm_mm_insert_node_in_range(&ggtt->vm.mm, - &ggtt->error_capture, - PAGE_SIZE, 0, - I915_COLOR_UNEVICTABLE, - 0, ggtt->mappable_end, - DRM_MM_INSERT_LOW); - if (ret) - return ret; + /* + * Reserve a mappable slot for our lockless error capture. + * + * We strongly prefer taking address 0x0 in order to protect + * other critical buffers against accidental overwrites, + * as writing to address 0 is a very common mistake. + * + * Since 0 may already be in use by the system (e.g. the BIOS + * framebuffer), we let the reservation fail quietly and hope + * 0 remains reserved always. + * + * If we fail to reserve 0, and then fail to find any space + * for an error-capture, remain silent. We can afford not + * to reserve an error_capture node as we have fallback + * paths, and we trust that 0 will remain reserved. However, + * the only likely reason for failure to insert is a driver + * bug, which we expect to cause other failures... + */ + ggtt->error_capture.size = I915_GTT_PAGE_SIZE; + ggtt->error_capture.color = I915_COLOR_UNEVICTABLE; + if (drm_mm_reserve_node(&ggtt->vm.mm, &ggtt->error_capture)) + drm_mm_insert_node_in_range(&ggtt->vm.mm, + &ggtt->error_capture, + ggtt->error_capture.size, 0, + ggtt->error_capture.color, + 0, ggtt->mappable_end, + DRM_MM_INSERT_LOW); } + if (drm_mm_node_allocated(&ggtt->error_capture)) + drm_dbg(&ggtt->vm.i915->drm, + "Reserved GGTT:[%llx, %llx] for use by error capture\n", + ggtt->error_capture.start, + ggtt->error_capture.start + ggtt->error_capture.size); /* * The upper portion of the GuC address space has a sizeable hole @@ -557,9 +580,9 @@ static int init_ggtt(struct i915_ggtt *ggtt) /* Clear any non-preallocated blocks */ drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) { - drm_dbg_kms(&ggtt->vm.i915->drm, - "clearing unused GTT space: [%lx, %lx]\n", - hole_start, hole_end); + drm_dbg(&ggtt->vm.i915->drm, + "clearing unused GTT space: [%lx, %lx]\n", + hole_start, hole_end); ggtt->vm.clear_range(&ggtt->vm, hole_start, hole_end - hole_start); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c index 104cb30e8c13..06d84cf09570 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c @@ -145,7 +145,8 @@ static void pool_retire(struct i915_active *ref) } static struct intel_gt_buffer_pool_node * -node_create(struct intel_gt_buffer_pool *pool, size_t sz) +node_create(struct intel_gt_buffer_pool *pool, size_t sz, + enum i915_map_type type) { struct intel_gt *gt = to_gt(pool); struct intel_gt_buffer_pool_node *node; @@ -169,12 +170,14 @@ node_create(struct intel_gt_buffer_pool *pool, size_t sz) i915_gem_object_set_readonly(obj); + node->type = type; node->obj = obj; return node; } struct intel_gt_buffer_pool_node * -intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size) +intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size, + enum i915_map_type type) { struct intel_gt_buffer_pool *pool = >->buffer_pool; struct intel_gt_buffer_pool_node *node; @@ -191,6 +194,9 @@ intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size) if (node->obj->base.size < size) continue; + if (node->type != type) + continue; + age = READ_ONCE(node->age); if (!age) continue; @@ -205,7 +211,7 @@ intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size) rcu_read_unlock(); if (&node->link == list) { - node = node_create(pool, size); + node = node_create(pool, size, type); if (IS_ERR(node)) return node; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h index 42cbac003e8a..6068f8f1762e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h @@ -15,7 +15,8 @@ struct intel_gt; struct i915_request; struct intel_gt_buffer_pool_node * -intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size); +intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size, + enum i915_map_type type); static inline int intel_gt_buffer_pool_mark_active(struct intel_gt_buffer_pool_node *node, diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h index bcf1658c9633..d8d82c890da8 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h @@ -11,10 +11,9 @@ #include <linux/spinlock.h> #include <linux/workqueue.h> +#include "gem/i915_gem_object_types.h" #include "i915_active_types.h" -struct drm_i915_gem_object; - struct intel_gt_buffer_pool { spinlock_t lock; struct list_head cache_list[4]; @@ -31,6 +30,7 @@ struct intel_gt_buffer_pool_node { struct rcu_head rcu; }; unsigned long age; + enum i915_map_type type; }; #endif /* INTEL_GT_BUFFER_POOL_TYPES_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index a0fc78c89b61..94f485b591af 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1035,7 +1035,7 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs) return cs; } -static inline u32 context_wa_bb_offset(const struct intel_context *ce) +static u32 context_wa_bb_offset(const struct intel_context *ce) { return PAGE_SIZE * ce->wa_bb_page; } @@ -1098,7 +1098,7 @@ setup_indirect_ctx_bb(const struct intel_context *ce, * engine info, SW context ID and SW counter need to form a unique number * (Context ID) per lrc. */ -static inline u32 lrc_descriptor(const struct intel_context *ce) +static u32 lrc_descriptor(const struct intel_context *ce) { u32 desc; diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index c4512ee4daf2..8acb84960cd0 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -472,7 +472,7 @@ static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table, return table->table[I915_MOCS_PTE].l3cc_value; } -static inline u32 l3cc_combine(u16 low, u16 high) +static u32 l3cc_combine(u16 low, u16 high) { return low | (u32)high << 16; } diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c index 01b7d08532f2..3f940ae27028 100644 --- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c @@ -80,7 +80,7 @@ void free_px(struct i915_address_space *vm, struct i915_page_table *pt, int lvl) kfree(pt); } -static inline void +static void write_dma_entry(struct drm_i915_gem_object * const pdma, const unsigned short idx, const u64 encoded_entry) diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index cce53fb9589c..9843e1d4327f 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -49,7 +49,7 @@ static struct drm_i915_private *rc6_to_i915(struct intel_rc6 *rc) return rc6_to_gt(rc)->i915; } -static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) +static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) { intel_uncore_write_fw(uncore, reg, val); } diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index 421493eb5788..e326d3c0bc10 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -98,7 +98,7 @@ region_lmem_init(struct intel_memory_region *mem) static const struct intel_memory_region_ops intel_region_lmem_ops = { .init = region_lmem_init, .release = region_lmem_release, - .create_object = __i915_gem_lmem_object_create, + .init_object = __i915_gem_lmem_object_init, }; struct intel_memory_region * diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index d5614e195b81..afe0342dcd47 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -151,8 +151,7 @@ static void mark_innocent(struct i915_request *rq) void __i915_request_reset(struct i915_request *rq, bool guilty) { RQ_TRACE(rq, "guilty? %s\n", yesno(guilty)); - - GEM_BUG_ON(i915_request_completed(rq)); + GEM_BUG_ON(__i915_request_is_complete(rq)); rcu_read_lock(); /* protect the GEM context */ if (guilty) { @@ -1110,7 +1109,7 @@ error: goto finish; } -static inline int intel_gt_reset_engine(struct intel_engine_cs *engine) +static int intel_gt_reset_engine(struct intel_engine_cs *engine) { return __intel_gt_reset(engine->gt, engine->mask); } diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index 06385550450c..78d1360caa0f 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -42,7 +42,7 @@ int intel_ring_pin(struct intel_ring *ring, struct i915_gem_ww_ctx *ww) /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); - if (vma->obj->stolen) + if (i915_gem_object_is_stolen(vma->obj)) flags |= PIN_MAPPABLE; else flags |= PIN_HIGH; diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 20f42722be8b..4984ff565424 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -122,31 +122,27 @@ static void set_hwsp(struct intel_engine_cs *engine, u32 offset) hwsp = RING_HWS_PGA(engine->mmio_base); } - intel_uncore_write(engine->uncore, hwsp, offset); - intel_uncore_posting_read(engine->uncore, hwsp); + intel_uncore_write_fw(engine->uncore, hwsp, offset); + intel_uncore_posting_read_fw(engine->uncore, hwsp); } static void flush_cs_tlb(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; - - if (!IS_GEN_RANGE(dev_priv, 6, 7)) + if (!IS_GEN_RANGE(engine->i915, 6, 7)) return; /* ring should be idle before issuing a sync flush*/ - drm_WARN_ON(&dev_priv->drm, - (ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0); - - ENGINE_WRITE(engine, RING_INSTPM, - _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE | - INSTPM_SYNC_FLUSH)); - if (intel_wait_for_register(engine->uncore, - RING_INSTPM(engine->mmio_base), - INSTPM_SYNC_FLUSH, 0, - 1000)) - drm_err(&dev_priv->drm, - "%s: wait for SyncFlush to complete for TLB invalidation timed out\n", - engine->name); + GEM_DEBUG_WARN_ON((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0); + + ENGINE_WRITE_FW(engine, RING_INSTPM, + _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE | + INSTPM_SYNC_FLUSH)); + if (__intel_wait_for_register_fw(engine->uncore, + RING_INSTPM(engine->mmio_base), + INSTPM_SYNC_FLUSH, 0, + 2000, 0, NULL)) + ENGINE_TRACE(engine, + "wait for SyncFlush to complete for TLB invalidation timed out\n"); } static void ring_setup_status_page(struct intel_engine_cs *engine) @@ -157,21 +153,6 @@ static void ring_setup_status_page(struct intel_engine_cs *engine) flush_cs_tlb(engine); } -static bool stop_ring(struct intel_engine_cs *engine) -{ - intel_engine_stop_cs(engine); - - ENGINE_WRITE(engine, RING_HEAD, ENGINE_READ(engine, RING_TAIL)); - - ENGINE_WRITE(engine, RING_HEAD, 0); - ENGINE_WRITE(engine, RING_TAIL, 0); - - /* The ring must be empty before it is disabled */ - ENGINE_WRITE(engine, RING_CTL, 0); - - return (ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) == 0; -} - static struct i915_address_space *vm_alias(struct i915_address_space *vm) { if (i915_is_ggtt(vm)) @@ -189,9 +170,16 @@ static void set_pp_dir(struct intel_engine_cs *engine) { struct i915_address_space *vm = vm_alias(engine->gt->vm); - if (vm) { - ENGINE_WRITE(engine, RING_PP_DIR_DCLV, PP_DIR_DCLV_2G); - ENGINE_WRITE(engine, RING_PP_DIR_BASE, pp_dir(vm)); + if (!vm) + return; + + ENGINE_WRITE_FW(engine, RING_PP_DIR_DCLV, PP_DIR_DCLV_2G); + ENGINE_WRITE_FW(engine, RING_PP_DIR_BASE, pp_dir(vm)); + + if (INTEL_GEN(engine->i915) >= 7) { + ENGINE_WRITE_FW(engine, + RING_MODE_GEN7, + _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); } } @@ -199,38 +187,10 @@ static int xcs_resume(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; struct intel_ring *ring = engine->legacy.ring; - int ret = 0; ENGINE_TRACE(engine, "ring:{HEAD:%04x, TAIL:%04x}\n", ring->head, ring->tail); - intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL); - - /* WaClearRingBufHeadRegAtInit:ctg,elk */ - if (!stop_ring(engine)) { - /* G45 ring initialization often fails to reset head to zero */ - drm_dbg(&dev_priv->drm, "%s head not reset to zero " - "ctl %08x head %08x tail %08x start %08x\n", - engine->name, - ENGINE_READ(engine, RING_CTL), - ENGINE_READ(engine, RING_HEAD), - ENGINE_READ(engine, RING_TAIL), - ENGINE_READ(engine, RING_START)); - - if (!stop_ring(engine)) { - drm_err(&dev_priv->drm, - "failed to set %s head to zero " - "ctl %08x head %08x tail %08x start %08x\n", - engine->name, - ENGINE_READ(engine, RING_CTL), - ENGINE_READ(engine, RING_HEAD), - ENGINE_READ(engine, RING_TAIL), - ENGINE_READ(engine, RING_START)); - ret = -EIO; - goto out; - } - } - if (HWS_NEEDS_PHYSICAL(dev_priv)) ring_setup_phys_status_page(engine); else @@ -247,7 +207,7 @@ static int xcs_resume(struct intel_engine_cs *engine) * also enforces ordering), otherwise the hw might lose the new ring * register values. */ - ENGINE_WRITE(engine, RING_START, i915_ggtt_offset(ring->vma)); + ENGINE_WRITE_FW(engine, RING_START, i915_ggtt_offset(ring->vma)); /* Check that the ring offsets point within the ring! */ GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head)); @@ -257,46 +217,44 @@ static int xcs_resume(struct intel_engine_cs *engine) set_pp_dir(engine); /* First wake the ring up to an empty/idle ring */ - ENGINE_WRITE(engine, RING_HEAD, ring->head); - ENGINE_WRITE(engine, RING_TAIL, ring->head); + ENGINE_WRITE_FW(engine, RING_HEAD, ring->head); + ENGINE_WRITE_FW(engine, RING_TAIL, ring->head); ENGINE_POSTING_READ(engine, RING_TAIL); - ENGINE_WRITE(engine, RING_CTL, RING_CTL_SIZE(ring->size) | RING_VALID); + ENGINE_WRITE_FW(engine, RING_CTL, + RING_CTL_SIZE(ring->size) | RING_VALID); /* If the head is still not zero, the ring is dead */ - if (intel_wait_for_register(engine->uncore, - RING_CTL(engine->mmio_base), - RING_VALID, RING_VALID, - 50)) { - drm_err(&dev_priv->drm, "%s initialization failed " - "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n", - engine->name, - ENGINE_READ(engine, RING_CTL), - ENGINE_READ(engine, RING_CTL) & RING_VALID, - ENGINE_READ(engine, RING_HEAD), ring->head, - ENGINE_READ(engine, RING_TAIL), ring->tail, - ENGINE_READ(engine, RING_START), - i915_ggtt_offset(ring->vma)); - ret = -EIO; - goto out; + if (__intel_wait_for_register_fw(engine->uncore, + RING_CTL(engine->mmio_base), + RING_VALID, RING_VALID, + 5000, 0, NULL)) { + drm_err(&dev_priv->drm, + "%s initialization failed; " + "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n", + engine->name, + ENGINE_READ(engine, RING_CTL), + ENGINE_READ(engine, RING_CTL) & RING_VALID, + ENGINE_READ(engine, RING_HEAD), ring->head, + ENGINE_READ(engine, RING_TAIL), ring->tail, + ENGINE_READ(engine, RING_START), + i915_ggtt_offset(ring->vma)); + return -EIO; } if (INTEL_GEN(dev_priv) > 2) - ENGINE_WRITE(engine, - RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); + ENGINE_WRITE_FW(engine, + RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); /* Now awake, let it get started */ if (ring->tail != ring->head) { - ENGINE_WRITE(engine, RING_TAIL, ring->tail); + ENGINE_WRITE_FW(engine, RING_TAIL, ring->tail); ENGINE_POSTING_READ(engine, RING_TAIL); } /* Papering over lost _interrupts_ immediately following the restart */ intel_engine_signal_breadcrumbs(engine); -out: - intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL); - - return ret; + return 0; } static void sanitize_hwsp(struct intel_engine_cs *engine) @@ -332,11 +290,25 @@ static void xcs_sanitize(struct intel_engine_cs *engine) clflush_cache_range(engine->status_page.addr, PAGE_SIZE); } -static void reset_prepare(struct intel_engine_cs *engine) +static bool stop_ring(struct intel_engine_cs *engine) { - struct intel_uncore *uncore = engine->uncore; - const u32 base = engine->mmio_base; + /* Empty the ring by skipping to the end */ + ENGINE_WRITE_FW(engine, RING_HEAD, ENGINE_READ_FW(engine, RING_TAIL)); + ENGINE_POSTING_READ(engine, RING_HEAD); + /* The ring must be empty before it is disabled */ + ENGINE_WRITE_FW(engine, RING_CTL, 0); + ENGINE_POSTING_READ(engine, RING_CTL); + + /* Then reset the disabled ring */ + ENGINE_WRITE_FW(engine, RING_HEAD, 0); + ENGINE_WRITE_FW(engine, RING_TAIL, 0); + + return (ENGINE_READ_FW(engine, RING_HEAD) & HEAD_ADDR) == 0; +} + +static void reset_prepare(struct intel_engine_cs *engine) +{ /* * We stop engines, otherwise we might get failed reset and a * dead gpu (on elk). Also as modern gpu as kbl can suffer @@ -348,30 +320,35 @@ static void reset_prepare(struct intel_engine_cs *engine) * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES) * * WaMediaResetMainRingCleanup:ctg,elk (presumably) + * WaClearRingBufHeadRegAtInit:ctg,elk * * FIXME: Wa for more modern gens needs to be validated */ ENGINE_TRACE(engine, "\n"); + intel_engine_stop_cs(engine); - if (intel_engine_stop_cs(engine)) - ENGINE_TRACE(engine, "timed out on STOP_RING\n"); - - intel_uncore_write_fw(uncore, - RING_HEAD(base), - intel_uncore_read_fw(uncore, RING_TAIL(base))); - intel_uncore_posting_read_fw(uncore, RING_HEAD(base)); /* paranoia */ - - intel_uncore_write_fw(uncore, RING_HEAD(base), 0); - intel_uncore_write_fw(uncore, RING_TAIL(base), 0); - intel_uncore_posting_read_fw(uncore, RING_TAIL(base)); - - /* The ring must be empty before it is disabled */ - intel_uncore_write_fw(uncore, RING_CTL(base), 0); + if (!stop_ring(engine)) { + /* G45 ring initialization often fails to reset head to zero */ + drm_dbg(&engine->i915->drm, + "%s head not reset to zero " + "ctl %08x head %08x tail %08x start %08x\n", + engine->name, + ENGINE_READ_FW(engine, RING_CTL), + ENGINE_READ_FW(engine, RING_HEAD), + ENGINE_READ_FW(engine, RING_TAIL), + ENGINE_READ_FW(engine, RING_START)); + } - /* Check acts as a post */ - if (intel_uncore_read_fw(uncore, RING_HEAD(base))) - ENGINE_TRACE(engine, "ring head [%x] not parked\n", - intel_uncore_read_fw(uncore, RING_HEAD(base))); + if (!stop_ring(engine)) { + drm_err(&engine->i915->drm, + "failed to set %s head to zero " + "ctl %08x head %08x tail %08x start %08x\n", + engine->name, + ENGINE_READ_FW(engine, RING_CTL), + ENGINE_READ_FW(engine, RING_HEAD), + ENGINE_READ_FW(engine, RING_TAIL), + ENGINE_READ_FW(engine, RING_START)); + } } static void reset_rewind(struct intel_engine_cs *engine, bool stalled) @@ -382,12 +359,14 @@ static void reset_rewind(struct intel_engine_cs *engine, bool stalled) rq = NULL; spin_lock_irqsave(&engine->active.lock, flags); + rcu_read_lock(); list_for_each_entry(pos, &engine->active.requests, sched.link) { - if (!i915_request_completed(pos)) { + if (!__i915_request_is_complete(pos)) { rq = pos; break; } } + rcu_read_unlock(); /* * The guilty request will get skipped on a hung engine. @@ -663,9 +642,9 @@ static int load_pd_dir(struct i915_request *rq, return rq->engine->emit_flush(rq, EMIT_FLUSH); } -static inline int mi_set_context(struct i915_request *rq, - struct intel_context *ce, - u32 flags) +static int mi_set_context(struct i915_request *rq, + struct intel_context *ce, + u32 flags) { struct intel_engine_cs *engine = rq->engine; struct drm_i915_private *i915 = engine->i915; diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 69e1bd46cc46..ee5835c29c03 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -43,7 +43,7 @@ static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask) return mask & ~rps->pm_intrmsk_mbz; } -static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) +static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) { intel_uncore_write_fw(uncore, reg, val); } diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 7fe05918a76e..037b0e3ccbed 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -582,11 +582,11 @@ int intel_timeline_read_hwsp(struct i915_request *from, rcu_read_lock(); cl = rcu_dereference(from->hwsp_cacheline); - if (i915_request_completed(from)) /* confirm cacheline is valid */ + if (i915_request_signaled(from)) /* confirm cacheline is valid */ goto unlock; if (unlikely(!i915_active_acquire_if_busy(&cl->active))) goto unlock; /* seqno wrapped and completed! */ - if (unlikely(i915_request_completed(from))) + if (unlikely(__i915_request_is_complete(from))) goto release; rcu_read_unlock(); diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 53f7838bd3c4..8c0c050c4af9 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1313,7 +1313,7 @@ bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from) } __maybe_unused -static inline bool is_nonpriv_flags_valid(u32 flags) +static bool is_nonpriv_flags_valid(u32 flags) { /* Check only valid flag bits are set */ if (flags & ~RING_FORCE_TO_NONPRIV_MASK_VALID) @@ -1850,6 +1850,14 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) wa_write_or(wal, GEN8_L3SQCREG4, GEN8_LQSC_FLUSH_COHERENT_LINES); + + /* Disable atomics in L3 to prevent unrecoverable hangs */ + wa_write_clr_set(wal, GEN9_SCRATCH_LNCF1, + GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE, 0); + wa_write_clr_set(wal, GEN8_L3SQCREG4, + GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0); + wa_write_clr_set(wal, GEN9_SCRATCH1, + EVICTION_PERF_FIX_ENABLE, 0); } if (IS_HASWELL(i915)) { diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 460c3e9542f4..463bb6a700c8 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -704,6 +704,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) for_each_engine(engine, gt, id) { unsigned int reset_count, reset_engine_count; + unsigned long count; IGT_TIMEOUT(end_time); if (active && !intel_engine_can_store_dword(engine)) @@ -721,6 +722,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) st_engine_heartbeat_disable(engine); set_bit(I915_RESET_ENGINE + id, >->reset.flags); + count = 0; do { if (active) { struct i915_request *rq; @@ -770,9 +772,13 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) err = -EINVAL; break; } + + count++; } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, >->reset.flags); st_engine_heartbeat_enable(engine); + pr_info("%s: Completed %lu %s resets\n", + engine->name, count, active ? "active" : "idle"); if (err) break; @@ -1623,7 +1629,8 @@ static int igt_reset_queue(void *arg) prev = rq; count++; } while (time_before(jiffies, end_time)); - pr_info("%s: Completed %d resets\n", engine->name, count); + pr_info("%s: Completed %d queued resets\n", + engine->name, count); *h.batch = MI_BATCH_BUFFER_END; intel_gt_chipset_flush(engine->gt); @@ -1720,7 +1727,8 @@ static int __igt_atomic_reset_engine(struct intel_engine_cs *engine, GEM_TRACE("i915_reset_engine(%s:%s) under %s\n", engine->name, mode, p->name); - tasklet_disable(t); + if (t->func) + tasklet_disable(t); if (strcmp(p->name, "softirq")) local_bh_disable(); p->critical_section_begin(); @@ -1730,8 +1738,10 @@ static int __igt_atomic_reset_engine(struct intel_engine_cs *engine, p->critical_section_end(); if (strcmp(p->name, "softirq")) local_bh_enable(); - tasklet_enable(t); - tasklet_hi_schedule(t); + if (t->func) { + tasklet_enable(t); + tasklet_hi_schedule(t); + } if (err) pr_err("i915_reset_engine(%s:%s) failed under %s\n", diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index b7befcfbdcde..8784257ec808 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -321,7 +321,10 @@ static int igt_atomic_engine_reset(void *arg) goto out_unlock; for_each_engine(engine, gt, id) { - tasklet_disable(&engine->execlists.tasklet); + struct tasklet_struct *t = &engine->execlists.tasklet; + + if (t->func) + tasklet_disable(t); intel_engine_pm_get(engine); for (p = igt_atomic_phases; p->name; p++) { @@ -345,8 +348,10 @@ static int igt_atomic_engine_reset(void *arg) } intel_engine_pm_put(engine); - tasklet_enable(&engine->execlists.tasklet); - tasklet_hi_schedule(&engine->execlists.tasklet); + if (t->func) { + tasklet_enable(t); + tasklet_hi_schedule(t); + } if (err) break; } diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c index 5982b62f913d..a4d8fc9e2374 100644 --- a/drivers/gpu/drm/i915/gt/shmem_utils.c +++ b/drivers/gpu/drm/i915/gt/shmem_utils.c @@ -33,7 +33,7 @@ struct file *shmem_create_from_object(struct drm_i915_gem_object *obj) struct file *file; void *ptr; - if (obj->ops == &i915_gem_shmem_ops) { + if (i915_gem_object_is_shmem(obj)) { file = obj->base.filp; atomic_long_inc(&file->f_count); return file; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 6a0452815c41..6abb8f2dc33d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -15,6 +15,29 @@ static const struct intel_uc_ops uc_ops_off; static const struct intel_uc_ops uc_ops_on; +static void uc_expand_default_options(struct intel_uc *uc) +{ + struct drm_i915_private *i915 = uc_to_gt(uc)->i915; + + if (i915->params.enable_guc != -1) + return; + + /* Don't enable GuC/HuC on pre-Gen12 */ + if (INTEL_GEN(i915) < 12) { + i915->params.enable_guc = 0; + return; + } + + /* Don't enable GuC/HuC on older Gen12 platforms */ + if (IS_TIGERLAKE(i915) || IS_ROCKETLAKE(i915)) { + i915->params.enable_guc = 0; + return; + } + + /* Default: enable HuC authentication only */ + i915->params.enable_guc = ENABLE_GUC_LOAD_HUC; +} + /* Reset GuC providing us with fresh state for both GuC and HuC. */ static int __intel_uc_reset_hw(struct intel_uc *uc) @@ -52,9 +75,6 @@ static void __confirm_options(struct intel_uc *uc) yesno(intel_uc_wants_guc_submission(uc)), yesno(intel_uc_wants_huc(uc))); - if (i915->params.enable_guc == -1) - return; - if (i915->params.enable_guc == 0) { GEM_BUG_ON(intel_uc_wants_guc(uc)); GEM_BUG_ON(intel_uc_wants_guc_submission(uc)); @@ -79,8 +99,7 @@ static void __confirm_options(struct intel_uc *uc) "Incompatible option enable_guc=%d - %s\n", i915->params.enable_guc, "GuC submission is N/A"); - if (i915->params.enable_guc & ~(ENABLE_GUC_SUBMISSION | - ENABLE_GUC_LOAD_HUC)) + if (i915->params.enable_guc & ~ENABLE_GUC_MASK) drm_info(&i915->drm, "Incompatible option enable_guc=%d - %s\n", i915->params.enable_guc, "undocumented flag"); @@ -88,6 +107,8 @@ static void __confirm_options(struct intel_uc *uc) void intel_uc_init_early(struct intel_uc *uc) { + uc_expand_default_options(uc); + intel_guc_init_early(&uc->guc); intel_huc_init_early(&uc->huc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 8b7358106a52..984fa79e0fa7 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -154,16 +154,11 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) uc_fw->path = NULL; } } - - /* We don't want to enable GuC/HuC on pre-Gen11 by default */ - if (i915->params.enable_guc == -1 && p < INTEL_ICELAKE) - uc_fw->path = NULL; } static const char *__override_guc_firmware_path(struct drm_i915_private *i915) { - if (i915->params.enable_guc & (ENABLE_GUC_SUBMISSION | - ENABLE_GUC_LOAD_HUC)) + if (i915->params.enable_guc & ENABLE_GUC_MASK) return i915->params.guc_firmware_path; return ""; } diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 3fea967ee817..fef1e857cefc 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -38,11 +38,18 @@ #include "i915_drv.h" #include "gt/intel_gpu_commands.h" +#include "gt/intel_lrc.h" #include "gt/intel_ring.h" +#include "gt/intel_gt_requests.h" +#include "gt/shmem_utils.h" #include "gvt.h" #include "i915_pvinfo.h" #include "trace.h" +#include "gem/i915_gem_context.h" +#include "gem/i915_gem_pm.h" +#include "gt/intel_context.h" + #define INVALID_OP (~0U) #define OP_LEN_MI 9 @@ -455,6 +462,7 @@ enum { RING_BUFFER_INSTRUCTION, BATCH_BUFFER_INSTRUCTION, BATCH_BUFFER_2ND_LEVEL, + RING_BUFFER_CTX, }; enum { @@ -496,6 +504,7 @@ struct parser_exec_state { */ int saved_buf_addr_type; bool is_ctx_wa; + bool is_init_ctx; const struct cmd_info *info; @@ -709,6 +718,11 @@ static inline u32 cmd_val(struct parser_exec_state *s, int index) return *cmd_ptr(s, index); } +static inline bool is_init_ctx(struct parser_exec_state *s) +{ + return (s->buf_type == RING_BUFFER_CTX && s->is_init_ctx); +} + static void parser_exec_state_dump(struct parser_exec_state *s) { int cnt = 0; @@ -722,7 +736,8 @@ static void parser_exec_state_dump(struct parser_exec_state *s) gvt_dbg_cmd(" %s %s ip_gma(%08lx) ", s->buf_type == RING_BUFFER_INSTRUCTION ? - "RING_BUFFER" : "BATCH_BUFFER", + "RING_BUFFER" : ((s->buf_type == RING_BUFFER_CTX) ? + "CTX_BUFFER" : "BATCH_BUFFER"), s->buf_addr_type == GTT_BUFFER ? "GTT" : "PPGTT", s->ip_gma); @@ -757,7 +772,8 @@ static inline void update_ip_va(struct parser_exec_state *s) if (WARN_ON(s->ring_head == s->ring_tail)) return; - if (s->buf_type == RING_BUFFER_INSTRUCTION) { + if (s->buf_type == RING_BUFFER_INSTRUCTION || + s->buf_type == RING_BUFFER_CTX) { unsigned long ring_top = s->ring_start + s->ring_size; if (s->ring_head > s->ring_tail) { @@ -821,68 +837,12 @@ static inline int cmd_length(struct parser_exec_state *s) *addr = val; \ } while (0) -static bool is_shadowed_mmio(unsigned int offset) -{ - bool ret = false; - - if ((offset == 0x2168) || /*BB current head register UDW */ - (offset == 0x2140) || /*BB current header register */ - (offset == 0x211c) || /*second BB header register UDW */ - (offset == 0x2114)) { /*second BB header register UDW */ - ret = true; - } - return ret; -} - -static inline bool is_force_nonpriv_mmio(unsigned int offset) -{ - return (offset >= 0x24d0 && offset < 0x2500); -} - -static int force_nonpriv_reg_handler(struct parser_exec_state *s, - unsigned int offset, unsigned int index, char *cmd) -{ - struct intel_gvt *gvt = s->vgpu->gvt; - unsigned int data; - u32 ring_base; - u32 nopid; - - if (!strcmp(cmd, "lri")) - data = cmd_val(s, index + 1); - else { - gvt_err("Unexpected forcenonpriv 0x%x write from cmd %s\n", - offset, cmd); - return -EINVAL; - } - - ring_base = s->engine->mmio_base; - nopid = i915_mmio_reg_offset(RING_NOPID(ring_base)); - - if (!intel_gvt_in_force_nonpriv_whitelist(gvt, data) && - data != nopid) { - gvt_err("Unexpected forcenonpriv 0x%x LRI write, value=0x%x\n", - offset, data); - patch_value(s, cmd_ptr(s, index), nopid); - return 0; - } - return 0; -} - static inline bool is_mocs_mmio(unsigned int offset) { return ((offset >= 0xc800) && (offset <= 0xcff8)) || ((offset >= 0xb020) && (offset <= 0xb0a0)); } -static int mocs_cmd_reg_handler(struct parser_exec_state *s, - unsigned int offset, unsigned int index) -{ - if (!is_mocs_mmio(offset)) - return -EINVAL; - vgpu_vreg(s->vgpu, offset) = cmd_val(s, index + 1); - return 0; -} - static int is_cmd_update_pdps(unsigned int offset, struct parser_exec_state *s) { @@ -930,6 +890,7 @@ static int cmd_reg_handler(struct parser_exec_state *s, struct intel_vgpu *vgpu = s->vgpu; struct intel_gvt *gvt = vgpu->gvt; u32 ctx_sr_ctl; + u32 *vreg, vreg_old; if (offset + 4 > gvt->device_info.mmio_size) { gvt_vgpu_err("%s access to (%x) outside of MMIO range\n", @@ -937,34 +898,101 @@ static int cmd_reg_handler(struct parser_exec_state *s, return -EFAULT; } + if (is_init_ctx(s)) { + struct intel_gvt_mmio_info *mmio_info; + + intel_gvt_mmio_set_cmd_accessible(gvt, offset); + mmio_info = intel_gvt_find_mmio_info(gvt, offset); + if (mmio_info && mmio_info->write) + intel_gvt_mmio_set_cmd_write_patch(gvt, offset); + return 0; + } + if (!intel_gvt_mmio_is_cmd_accessible(gvt, offset)) { gvt_vgpu_err("%s access to non-render register (%x)\n", cmd, offset); return -EBADRQC; } - if (is_shadowed_mmio(offset)) { - gvt_vgpu_err("found access of shadowed MMIO %x\n", offset); - return 0; + if (!strncmp(cmd, "srm", 3) || + !strncmp(cmd, "lrm", 3)) { + if (offset != i915_mmio_reg_offset(GEN8_L3SQCREG4) && + offset != 0x21f0) { + gvt_vgpu_err("%s access to register (%x)\n", + cmd, offset); + return -EPERM; + } else + return 0; } - if (is_mocs_mmio(offset) && - mocs_cmd_reg_handler(s, offset, index)) - return -EINVAL; + if (!strncmp(cmd, "lrr-src", 7) || + !strncmp(cmd, "lrr-dst", 7)) { + gvt_vgpu_err("not allowed cmd %s\n", cmd); + return -EPERM; + } + + if (!strncmp(cmd, "pipe_ctrl", 9)) { + /* TODO: add LRI POST logic here */ + return 0; + } - if (is_force_nonpriv_mmio(offset) && - force_nonpriv_reg_handler(s, offset, index, cmd)) + if (strncmp(cmd, "lri", 3)) return -EPERM; + /* below are all lri handlers */ + vreg = &vgpu_vreg(s->vgpu, offset); + if (!intel_gvt_mmio_is_cmd_accessible(gvt, offset)) { + gvt_vgpu_err("%s access to non-render register (%x)\n", + cmd, offset); + return -EBADRQC; + } + + if (is_cmd_update_pdps(offset, s) && + cmd_pdp_mmio_update_handler(s, offset, index)) + return -EINVAL; + if (offset == i915_mmio_reg_offset(DERRMR) || offset == i915_mmio_reg_offset(FORCEWAKE_MT)) { /* Writing to HW VGT_PVINFO_PAGE offset will be discarded */ patch_value(s, cmd_ptr(s, index), VGT_PVINFO_PAGE); } - if (is_cmd_update_pdps(offset, s) && - cmd_pdp_mmio_update_handler(s, offset, index)) - return -EINVAL; + if (is_mocs_mmio(offset)) + *vreg = cmd_val(s, index + 1); + + vreg_old = *vreg; + + if (intel_gvt_mmio_is_cmd_write_patch(gvt, offset)) { + u32 cmdval_new, cmdval; + struct intel_gvt_mmio_info *mmio_info; + + cmdval = cmd_val(s, index + 1); + + mmio_info = intel_gvt_find_mmio_info(gvt, offset); + if (!mmio_info) { + cmdval_new = cmdval; + } else { + u64 ro_mask = mmio_info->ro_mask; + int ret; + + if (likely(!ro_mask)) + ret = mmio_info->write(s->vgpu, offset, + &cmdval, 4); + else { + gvt_vgpu_err("try to write RO reg %x\n", + offset); + ret = -EBADRQC; + } + if (ret) + return ret; + cmdval_new = *vreg; + } + if (cmdval_new != cmdval) + patch_value(s, cmd_ptr(s, index+1), cmdval_new); + } + + /* only patch cmd. restore vreg value if changed in mmio write handler*/ + *vreg = vreg_old; /* TODO * In order to let workload with inhibit context to generate @@ -1216,6 +1244,8 @@ static int cmd_handler_mi_batch_buffer_end(struct parser_exec_state *s) s->buf_type = BATCH_BUFFER_INSTRUCTION; ret = ip_gma_set(s, s->ret_ip_gma_bb); s->buf_addr_type = s->saved_buf_addr_type; + } else if (s->buf_type == RING_BUFFER_CTX) { + ret = ip_gma_set(s, s->ring_tail); } else { s->buf_type = RING_BUFFER_INSTRUCTION; s->buf_addr_type = GTT_BUFFER; @@ -2764,7 +2794,8 @@ static int command_scan(struct parser_exec_state *s, gma_bottom = rb_start + rb_len; while (s->ip_gma != gma_tail) { - if (s->buf_type == RING_BUFFER_INSTRUCTION) { + if (s->buf_type == RING_BUFFER_INSTRUCTION || + s->buf_type == RING_BUFFER_CTX) { if (!(s->ip_gma >= rb_start) || !(s->ip_gma < gma_bottom)) { gvt_vgpu_err("ip_gma %lx out of ring scope." @@ -3057,6 +3088,118 @@ int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) return 0; } +/* generate dummy contexts by sending empty requests to HW, and let + * the HW to fill Engine Contexts. This dummy contexts are used for + * initialization purpose (update reg whitelist), so referred to as + * init context here + */ +void intel_gvt_update_reg_whitelist(struct intel_vgpu *vgpu) +{ + const unsigned long start = LRC_STATE_PN * PAGE_SIZE; + struct intel_gvt *gvt = vgpu->gvt; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + if (gvt->is_reg_whitelist_updated) + return; + + /* scan init ctx to update cmd accessible list */ + for_each_engine(engine, gvt->gt, id) { + struct parser_exec_state s; + void *vaddr; + int ret; + + if (!engine->default_state) + continue; + + vaddr = shmem_pin_map(engine->default_state); + if (IS_ERR(vaddr)) { + gvt_err("failed to map %s->default state, err:%zd\n", + engine->name, PTR_ERR(vaddr)); + return; + } + + s.buf_type = RING_BUFFER_CTX; + s.buf_addr_type = GTT_BUFFER; + s.vgpu = vgpu; + s.engine = engine; + s.ring_start = 0; + s.ring_size = engine->context_size - start; + s.ring_head = 0; + s.ring_tail = s.ring_size; + s.rb_va = vaddr + start; + s.workload = NULL; + s.is_ctx_wa = false; + s.is_init_ctx = true; + + /* skipping the first RING_CTX_SIZE(0x50) dwords */ + ret = ip_gma_set(&s, RING_CTX_SIZE); + if (ret == 0) { + ret = command_scan(&s, 0, s.ring_size, 0, s.ring_size); + if (ret) + gvt_err("Scan init ctx error\n"); + } + + shmem_unpin_map(engine->default_state, vaddr); + if (ret) + return; + } + + gvt->is_reg_whitelist_updated = true; +} + +int intel_gvt_scan_engine_context(struct intel_vgpu_workload *workload) +{ + struct intel_vgpu *vgpu = workload->vgpu; + unsigned long gma_head, gma_tail, gma_start, ctx_size; + struct parser_exec_state s; + int ring_id = workload->engine->id; + struct intel_context *ce = vgpu->submission.shadow[ring_id]; + int ret; + + GEM_BUG_ON(atomic_read(&ce->pin_count) < 0); + + ctx_size = workload->engine->context_size - PAGE_SIZE; + + /* Only ring contxt is loaded to HW for inhibit context, no need to + * scan engine context + */ + if (is_inhibit_context(ce)) + return 0; + + gma_start = i915_ggtt_offset(ce->state) + LRC_STATE_PN*PAGE_SIZE; + gma_head = 0; + gma_tail = ctx_size; + + s.buf_type = RING_BUFFER_CTX; + s.buf_addr_type = GTT_BUFFER; + s.vgpu = workload->vgpu; + s.engine = workload->engine; + s.ring_start = gma_start; + s.ring_size = ctx_size; + s.ring_head = gma_start + gma_head; + s.ring_tail = gma_start + gma_tail; + s.rb_va = ce->lrc_reg_state; + s.workload = workload; + s.is_ctx_wa = false; + s.is_init_ctx = false; + + /* don't scan the first RING_CTX_SIZE(0x50) dwords, as it's ring + * context + */ + ret = ip_gma_set(&s, gma_start + gma_head + RING_CTX_SIZE); + if (ret) + goto out; + + ret = command_scan(&s, gma_head, gma_tail, + gma_start, ctx_size); +out: + if (ret) + gvt_vgpu_err("scan shadow ctx error\n"); + + return ret; +} + static int init_cmd_table(struct intel_gvt *gvt) { unsigned int gen_type = intel_gvt_get_device_type(gvt); diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.h b/drivers/gpu/drm/i915/gvt/cmd_parser.h index ab25d151932a..416d345e2816 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.h +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.h @@ -40,6 +40,7 @@ struct intel_gvt; struct intel_shadow_wa_ctx; +struct intel_vgpu; struct intel_vgpu_workload; void intel_gvt_clean_cmd_parser(struct intel_gvt *gvt); @@ -50,4 +51,8 @@ int intel_gvt_scan_and_shadow_ringbuffer(struct intel_vgpu_workload *workload); int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx); +void intel_gvt_update_reg_whitelist(struct intel_vgpu *vgpu); + +int intel_gvt_scan_engine_context(struct intel_vgpu_workload *workload); + #endif diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index 26bce91437fa..034c060f89d4 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -217,6 +217,15 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) DDI_BUF_CTL_ENABLE); vgpu_vreg_t(vgpu, DDI_BUF_CTL(port)) |= DDI_BUF_IS_IDLE; } + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &= + ~(PORTA_HOTPLUG_ENABLE | PORTA_HOTPLUG_STATUS_MASK); + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &= + ~(PORTB_HOTPLUG_ENABLE | PORTB_HOTPLUG_STATUS_MASK); + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &= + ~(PORTC_HOTPLUG_ENABLE | PORTC_HOTPLUG_STATUS_MASK); + /* No hpd_invert set in vgpu vbt, need to clear invert mask */ + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &= ~BXT_DDI_HPD_INVERT_MASK; + vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &= ~BXT_DE_PORT_HOTPLUG_MASK; vgpu_vreg_t(vgpu, BXT_P_CR_GT_DISP_PWRON) &= ~(BIT(0) | BIT(1)); vgpu_vreg_t(vgpu, BXT_PORT_CL1CM_DW0(DPIO_PHY0)) &= @@ -273,6 +282,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_EDP)) |= (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST | TRANS_DDI_FUNC_ENABLE); + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |= + PORTA_HOTPLUG_ENABLE; vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= GEN8_DE_PORT_HOTPLUG(HPD_PORT_A); } @@ -301,6 +312,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST | (PORT_B << TRANS_DDI_PORT_SHIFT) | TRANS_DDI_FUNC_ENABLE); + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |= + PORTB_HOTPLUG_ENABLE; vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= GEN8_DE_PORT_HOTPLUG(HPD_PORT_B); } @@ -329,6 +342,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST | (PORT_B << TRANS_DDI_PORT_SHIFT) | TRANS_DDI_FUNC_ENABLE); + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |= + PORTC_HOTPLUG_ENABLE; vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= GEN8_DE_PORT_HOTPLUG(HPD_PORT_C); } @@ -670,44 +685,62 @@ void intel_vgpu_emulate_hotplug(struct intel_vgpu *vgpu, bool connected) PORTD_HOTPLUG_STATUS_MASK; intel_vgpu_trigger_virtual_event(vgpu, DP_D_HOTPLUG); } else if (IS_BROXTON(i915)) { - if (connected) { - if (intel_vgpu_has_monitor_on_port(vgpu, PORT_A)) { + if (intel_vgpu_has_monitor_on_port(vgpu, PORT_A)) { + if (connected) { vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= GEN8_DE_PORT_HOTPLUG(HPD_PORT_A); + } else { + vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &= + ~GEN8_DE_PORT_HOTPLUG(HPD_PORT_A); } - if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) { - vgpu_vreg_t(vgpu, SFUSE_STRAP) |= - SFUSE_STRAP_DDIB_DETECTED; + vgpu_vreg_t(vgpu, GEN8_DE_PORT_IIR) |= + GEN8_DE_PORT_HOTPLUG(HPD_PORT_A); + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &= + ~PORTA_HOTPLUG_STATUS_MASK; + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |= + PORTA_HOTPLUG_LONG_DETECT; + intel_vgpu_trigger_virtual_event(vgpu, DP_A_HOTPLUG); + } + if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) { + if (connected) { vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= GEN8_DE_PORT_HOTPLUG(HPD_PORT_B); - } - if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) { vgpu_vreg_t(vgpu, SFUSE_STRAP) |= - SFUSE_STRAP_DDIC_DETECTED; - vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= - GEN8_DE_PORT_HOTPLUG(HPD_PORT_C); - } - } else { - if (intel_vgpu_has_monitor_on_port(vgpu, PORT_A)) { + SFUSE_STRAP_DDIB_DETECTED; + } else { vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &= - ~GEN8_DE_PORT_HOTPLUG(HPD_PORT_A); - } - if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) { + ~GEN8_DE_PORT_HOTPLUG(HPD_PORT_B); vgpu_vreg_t(vgpu, SFUSE_STRAP) &= ~SFUSE_STRAP_DDIB_DETECTED; - vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &= - ~GEN8_DE_PORT_HOTPLUG(HPD_PORT_B); } - if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) { - vgpu_vreg_t(vgpu, SFUSE_STRAP) &= - ~SFUSE_STRAP_DDIC_DETECTED; + vgpu_vreg_t(vgpu, GEN8_DE_PORT_IIR) |= + GEN8_DE_PORT_HOTPLUG(HPD_PORT_B); + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &= + ~PORTB_HOTPLUG_STATUS_MASK; + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |= + PORTB_HOTPLUG_LONG_DETECT; + intel_vgpu_trigger_virtual_event(vgpu, DP_B_HOTPLUG); + } + if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) { + if (connected) { + vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= + GEN8_DE_PORT_HOTPLUG(HPD_PORT_C); + vgpu_vreg_t(vgpu, SFUSE_STRAP) |= + SFUSE_STRAP_DDIC_DETECTED; + } else { vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &= ~GEN8_DE_PORT_HOTPLUG(HPD_PORT_C); + vgpu_vreg_t(vgpu, SFUSE_STRAP) &= + ~SFUSE_STRAP_DDIC_DETECTED; } + vgpu_vreg_t(vgpu, GEN8_DE_PORT_IIR) |= + GEN8_DE_PORT_HOTPLUG(HPD_PORT_C); + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &= + ~PORTC_HOTPLUG_STATUS_MASK; + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |= + PORTC_HOTPLUG_LONG_DETECT; + intel_vgpu_trigger_virtual_event(vgpu, DP_C_HOTPLUG); } - vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |= - PORTB_HOTPLUG_STATUS_MASK; - intel_vgpu_trigger_virtual_event(vgpu, DP_B_HOTPLUG); } } diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index 158873f269b1..c8dcda6d4f0d 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -522,12 +522,11 @@ static void init_vgpu_execlist(struct intel_vgpu *vgpu, static void clean_execlist(struct intel_vgpu *vgpu, intel_engine_mask_t engine_mask) { - struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915; - struct intel_engine_cs *engine; struct intel_vgpu_submission *s = &vgpu->submission; + struct intel_engine_cs *engine; intel_engine_mask_t tmp; - for_each_engine_masked(engine, &dev_priv->gt, engine_mask, tmp) { + for_each_engine_masked(engine, vgpu->gvt->gt, engine_mask, tmp) { kfree(s->ring_scan_buffer[engine->id]); s->ring_scan_buffer[engine->id] = NULL; s->ring_scan_buffer_size[engine->id] = 0; @@ -537,11 +536,10 @@ static void clean_execlist(struct intel_vgpu *vgpu, static void reset_execlist(struct intel_vgpu *vgpu, intel_engine_mask_t engine_mask) { - struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915; struct intel_engine_cs *engine; intel_engine_mask_t tmp; - for_each_engine_masked(engine, &dev_priv->gt, engine_mask, tmp) + for_each_engine_masked(engine, vgpu->gvt->gt, engine_mask, tmp) init_vgpu_execlist(vgpu, engine); } diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 62b6faeb7ec0..8dc8170ba00f 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -250,7 +250,7 @@ struct gvt_mmio_block { #define INTEL_GVT_MMIO_HASH_BITS 11 struct intel_gvt_mmio { - u8 *mmio_attribute; + u16 *mmio_attribute; /* Register contains RO bits */ #define F_RO (1 << 0) /* Register contains graphics address */ @@ -269,6 +269,8 @@ struct intel_gvt_mmio { * logical context image */ #define F_SR_IN_CTX (1 << 7) +/* Value of command write of this reg needs to be patched */ +#define F_CMD_WRITE_PATCH (1 << 8) struct gvt_mmio_block *mmio_block; unsigned int num_mmio_block; @@ -335,6 +337,7 @@ struct intel_gvt { u32 *mocs_mmio_offset_list; u32 mocs_mmio_offset_list_cnt; } engine_mmio_list; + bool is_reg_whitelist_updated; struct dentry *debugfs_root; }; @@ -421,6 +424,9 @@ int intel_gvt_load_firmware(struct intel_gvt *gvt); #define vgpu_fence_base(vgpu) (vgpu->fence.base) #define vgpu_fence_sz(vgpu) (vgpu->fence.size) +/* ring context size i.e. the first 0x50 dwords*/ +#define RING_CTX_SIZE 320 + struct intel_vgpu_creation_params { __u64 handle; __u64 low_gm_sz; /* in MB */ @@ -692,6 +698,35 @@ static inline void intel_gvt_mmio_set_sr_in_ctx( } void intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu); +/** + * intel_gvt_mmio_set_cmd_write_patch - + * mark an MMIO if its cmd write needs to be + * patched + * @gvt: a GVT device + * @offset: register offset + * + */ +static inline void intel_gvt_mmio_set_cmd_write_patch( + struct intel_gvt *gvt, unsigned int offset) +{ + gvt->mmio.mmio_attribute[offset >> 2] |= F_CMD_WRITE_PATCH; +} + +/** + * intel_gvt_mmio_is_cmd_write_patch - check if an mmio's cmd access needs to + * be patched + * @gvt: a GVT device + * @offset: register offset + * + * Returns: + * True if GPU commmand write to an MMIO should be patched + */ +static inline bool intel_gvt_mmio_is_cmd_write_patch( + struct intel_gvt *gvt, unsigned int offset) +{ + return gvt->mmio.mmio_attribute[offset >> 2] & F_CMD_WRITE_PATCH; +} + void intel_gvt_debugfs_remove_vgpu(struct intel_vgpu *vgpu); void intel_gvt_debugfs_init(struct intel_gvt *gvt); void intel_gvt_debugfs_clean(struct intel_gvt *gvt); diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 30ae4eca8748..477badfcb258 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -84,7 +84,7 @@ static void write_vreg(struct intel_vgpu *vgpu, unsigned int offset, memcpy(&vgpu_vreg(vgpu, offset), p_data, bytes); } -static struct intel_gvt_mmio_info *find_mmio_info(struct intel_gvt *gvt, +struct intel_gvt_mmio_info *intel_gvt_find_mmio_info(struct intel_gvt *gvt, unsigned int offset) { struct intel_gvt_mmio_info *e; @@ -97,7 +97,7 @@ static struct intel_gvt_mmio_info *find_mmio_info(struct intel_gvt *gvt, } static int new_mmio_info(struct intel_gvt *gvt, - u32 offset, u8 flags, u32 size, + u32 offset, u16 flags, u32 size, u32 addr_mask, u32 ro_mask, u32 device, gvt_mmio_func read, gvt_mmio_func write) { @@ -119,7 +119,7 @@ static int new_mmio_info(struct intel_gvt *gvt, return -ENOMEM; info->offset = i; - p = find_mmio_info(gvt, info->offset); + p = intel_gvt_find_mmio_info(gvt, info->offset); if (p) { WARN(1, "dup mmio definition offset %x\n", info->offset); @@ -2214,7 +2214,8 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) /* RING MODE */ #define RING_REG(base) _MMIO((base) + 0x29c) - MMIO_RING_DFH(RING_REG, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, + MMIO_RING_DFH(RING_REG, D_ALL, + F_MODE_MASK | F_CMD_ACCESS | F_CMD_WRITE_PATCH, NULL, ring_mode_mmio_write); #undef RING_REG @@ -3134,8 +3135,8 @@ static int init_bdw_mmio_info(struct intel_gvt *gvt) MMIO_DFH(_MMIO(0xb10c), D_BDW, F_CMD_ACCESS, NULL, NULL); MMIO_D(_MMIO(0xb110), D_BDW); - MMIO_F(_MMIO(0x24d0), 48, F_CMD_ACCESS, 0, 0, D_BDW_PLUS, - NULL, force_nonpriv_write); + MMIO_F(_MMIO(0x24d0), 48, F_CMD_ACCESS | F_CMD_WRITE_PATCH, 0, 0, + D_BDW_PLUS, NULL, force_nonpriv_write); MMIO_D(_MMIO(0x44484), D_BDW_PLUS); MMIO_D(_MMIO(0x4448c), D_BDW_PLUS); @@ -3875,7 +3876,7 @@ int intel_vgpu_mmio_reg_rw(struct intel_vgpu *vgpu, unsigned int offset, /* * Normal tracked MMIOs. */ - mmio_info = find_mmio_info(gvt, offset); + mmio_info = intel_gvt_find_mmio_info(gvt, offset); if (!mmio_info) { gvt_dbg_mmio("untracked MMIO %08x len %d\n", offset, bytes); goto default_rw; diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 551f00024e99..d089770795b8 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1703,7 +1703,7 @@ static int kvmgt_page_track_add(unsigned long handle, u64 gfn) return -EINVAL; } - spin_lock(&kvm->mmu_lock); + write_lock(&kvm->mmu_lock); if (kvmgt_gfn_is_write_protected(info, gfn)) goto out; @@ -1712,7 +1712,7 @@ static int kvmgt_page_track_add(unsigned long handle, u64 gfn) kvmgt_protect_table_add(info, gfn); out: - spin_unlock(&kvm->mmu_lock); + write_unlock(&kvm->mmu_lock); srcu_read_unlock(&kvm->srcu, idx); return 0; } @@ -1737,7 +1737,7 @@ static int kvmgt_page_track_remove(unsigned long handle, u64 gfn) return -EINVAL; } - spin_lock(&kvm->mmu_lock); + write_lock(&kvm->mmu_lock); if (!kvmgt_gfn_is_write_protected(info, gfn)) goto out; @@ -1746,7 +1746,7 @@ static int kvmgt_page_track_remove(unsigned long handle, u64 gfn) kvmgt_protect_table_del(info, gfn); out: - spin_unlock(&kvm->mmu_lock); + write_unlock(&kvm->mmu_lock); srcu_read_unlock(&kvm->srcu, idx); return 0; } @@ -1772,7 +1772,7 @@ static void kvmgt_page_track_flush_slot(struct kvm *kvm, struct kvmgt_guest_info *info = container_of(node, struct kvmgt_guest_info, track_node); - spin_lock(&kvm->mmu_lock); + write_lock(&kvm->mmu_lock); for (i = 0; i < slot->npages; i++) { gfn = slot->base_gfn + i; if (kvmgt_gfn_is_write_protected(info, gfn)) { @@ -1781,7 +1781,7 @@ static void kvmgt_page_track_flush_slot(struct kvm *kvm, kvmgt_protect_table_del(info, gfn); } } - spin_unlock(&kvm->mmu_lock); + write_unlock(&kvm->mmu_lock); } static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu, struct kvm *kvm) diff --git a/drivers/gpu/drm/i915/gvt/mmio.h b/drivers/gpu/drm/i915/gvt/mmio.h index 9e862dc73579..7c26af39fbfc 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.h +++ b/drivers/gpu/drm/i915/gvt/mmio.h @@ -80,6 +80,9 @@ int intel_gvt_for_each_tracked_mmio(struct intel_gvt *gvt, int (*handler)(struct intel_gvt *gvt, u32 offset, void *data), void *data); +struct intel_gvt_mmio_info *intel_gvt_find_mmio_info(struct intel_gvt *gvt, + unsigned int offset); + int intel_vgpu_init_mmio(struct intel_vgpu *vgpu); void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu, bool dmlr); void intel_vgpu_clean_mmio(struct intel_vgpu *vgpu); diff --git a/drivers/gpu/drm/i915/gvt/reg.h b/drivers/gpu/drm/i915/gvt/reg.h index b58860dee970..244cc7320b54 100644 --- a/drivers/gpu/drm/i915/gvt/reg.h +++ b/drivers/gpu/drm/i915/gvt/reg.h @@ -133,4 +133,6 @@ #define RING_GFX_MODE(base) _MMIO((base) + 0x29c) #define VF_GUARDBAND _MMIO(0x83a4) + +#define BCS_TILE_REGISTER_VAL_OFFSET (0x43*4) #endif diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 6af5c06caee0..fc735692f21f 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -137,6 +137,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) int i; bool skip = false; int ring_id = workload->engine->id; + int ret; GEM_BUG_ON(!intel_context_is_pinned(ctx)); @@ -163,16 +164,24 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) COPY_REG(bb_per_ctx_ptr); COPY_REG(rcs_indirect_ctx); COPY_REG(rcs_indirect_ctx_offset); - } + } else if (workload->engine->id == BCS0) + intel_gvt_hypervisor_read_gpa(vgpu, + workload->ring_context_gpa + + BCS_TILE_REGISTER_VAL_OFFSET, + (void *)shadow_ring_context + + BCS_TILE_REGISTER_VAL_OFFSET, 4); #undef COPY_REG #undef COPY_REG_MASKED + /* don't copy Ring Context (the first 0x50 dwords), + * only copy the Engine Context part from guest + */ intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa + - sizeof(*shadow_ring_context), + RING_CTX_SIZE, (void *)shadow_ring_context + - sizeof(*shadow_ring_context), - I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context)); + RING_CTX_SIZE, + I915_GTT_PAGE_SIZE - RING_CTX_SIZE); sr_oa_regs(workload, (u32 *)shadow_ring_context, false); @@ -239,6 +248,11 @@ read: gpa_size = I915_GTT_PAGE_SIZE; dst = context_base + (i << I915_GTT_PAGE_SHIFT); } + ret = intel_gvt_scan_engine_context(workload); + if (ret) { + gvt_vgpu_err("invalid cmd found in guest context pages\n"); + return ret; + } s->last_ctx[ring_id].valid = true; return 0; } @@ -398,7 +412,9 @@ static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) if (!wa_ctx->indirect_ctx.obj) return; + i915_gem_object_lock(wa_ctx->indirect_ctx.obj, NULL); i915_gem_object_unpin_map(wa_ctx->indirect_ctx.obj); + i915_gem_object_unlock(wa_ctx->indirect_ctx.obj); i915_gem_object_put(wa_ctx->indirect_ctx.obj); wa_ctx->indirect_ctx.obj = NULL; @@ -506,6 +522,7 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) struct intel_gvt *gvt = workload->vgpu->gvt; const int gmadr_bytes = gvt->device_info.gmadr_bytes_in_cmd; struct intel_vgpu_shadow_bb *bb; + struct i915_gem_ww_ctx ww; int ret; list_for_each_entry(bb, &workload->shadow_bb, list) { @@ -530,10 +547,19 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) * directly */ if (!bb->ppgtt) { - bb->vma = i915_gem_object_ggtt_pin(bb->obj, - NULL, 0, 0, 0); + i915_gem_ww_ctx_init(&ww, false); +retry: + i915_gem_object_lock(bb->obj, &ww); + + bb->vma = i915_gem_object_ggtt_pin_ww(bb->obj, &ww, + NULL, 0, 0, 0); if (IS_ERR(bb->vma)) { ret = PTR_ERR(bb->vma); + if (ret == -EDEADLK) { + ret = i915_gem_ww_ctx_backoff(&ww); + if (!ret) + goto retry; + } goto err; } @@ -547,13 +573,15 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) 0); if (ret) goto err; - } - /* No one is going to touch shadow bb from now on. */ - i915_gem_object_flush_map(bb->obj); + /* No one is going to touch shadow bb from now on. */ + i915_gem_object_flush_map(bb->obj); + i915_gem_object_unlock(bb->obj); + } } return 0; err: + i915_gem_ww_ctx_fini(&ww); release_shadow_batch_buffer(workload); return ret; } @@ -580,14 +608,29 @@ static int prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) unsigned char *per_ctx_va = (unsigned char *)wa_ctx->indirect_ctx.shadow_va + wa_ctx->indirect_ctx.size; + struct i915_gem_ww_ctx ww; + int ret; if (wa_ctx->indirect_ctx.size == 0) return 0; - vma = i915_gem_object_ggtt_pin(wa_ctx->indirect_ctx.obj, NULL, - 0, CACHELINE_BYTES, 0); - if (IS_ERR(vma)) - return PTR_ERR(vma); + i915_gem_ww_ctx_init(&ww, false); +retry: + i915_gem_object_lock(wa_ctx->indirect_ctx.obj, &ww); + + vma = i915_gem_object_ggtt_pin_ww(wa_ctx->indirect_ctx.obj, &ww, NULL, + 0, CACHELINE_BYTES, 0); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + if (ret == -EDEADLK) { + ret = i915_gem_ww_ctx_backoff(&ww); + if (!ret) + goto retry; + } + return ret; + } + + i915_gem_object_unlock(wa_ctx->indirect_ctx.obj); /* FIXME: we are not tracking our pinned VMA leaving it * up to the core to fix up the stray pin_count upon @@ -621,12 +664,14 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) list_for_each_entry_safe(bb, pos, &workload->shadow_bb, list) { if (bb->obj) { + i915_gem_object_lock(bb->obj, NULL); if (bb->va && !IS_ERR(bb->va)) i915_gem_object_unpin_map(bb->obj); if (bb->vma && !IS_ERR(bb->vma)) i915_vma_unpin(bb->vma); + i915_gem_object_unlock(bb->obj); i915_gem_object_put(bb->obj); } list_del(&bb->list); @@ -1001,13 +1046,12 @@ void intel_vgpu_clean_workloads(struct intel_vgpu *vgpu, intel_engine_mask_t engine_mask) { struct intel_vgpu_submission *s = &vgpu->submission; - struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915; struct intel_engine_cs *engine; struct intel_vgpu_workload *pos, *n; intel_engine_mask_t tmp; /* free the unsubmited workloads in the queues. */ - for_each_engine_masked(engine, &dev_priv->gt, engine_mask, tmp) { + for_each_engine_masked(engine, vgpu->gvt->gt, engine_mask, tmp) { list_for_each_entry_safe(pos, n, &s->workload_q_head[engine->id], list) { list_del_init(&pos->list); diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index c6e3f2925c01..9039787f123a 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -435,10 +435,9 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, if (ret) goto out_clean_sched_policy; - if (IS_BROADWELL(dev_priv)) + if (IS_BROADWELL(dev_priv) || IS_BROXTON(dev_priv)) ret = intel_gvt_hypervisor_set_edid(vgpu, PORT_B); - /* FixMe: Re-enable APL/BXT once vfio_edid enabled */ - else if (!IS_BROXTON(dev_priv)) + else ret = intel_gvt_hypervisor_set_edid(vgpu, PORT_D); if (ret) goto out_clean_sched_policy; @@ -498,9 +497,11 @@ struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt, mutex_lock(&gvt->lock); vgpu = __intel_gvt_create_vgpu(gvt, ¶m); - if (!IS_ERR(vgpu)) + if (!IS_ERR(vgpu)) { /* calculate left instance change for types */ intel_gvt_update_vgpu_types(gvt); + intel_gvt_update_reg_whitelist(vgpu); + } mutex_unlock(&gvt->lock); return vgpu; diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index ab4382841c6b..3bc616cc1ad2 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -628,24 +628,26 @@ static int flush_lazy_signals(struct i915_active *ref) int __i915_active_wait(struct i915_active *ref, int state) { - int err; - might_sleep(); - if (!i915_active_acquire_if_busy(ref)) - return 0; - /* Any fence added after the wait begins will not be auto-signaled */ - err = flush_lazy_signals(ref); - i915_active_release(ref); - if (err) - return err; + if (i915_active_acquire_if_busy(ref)) { + int err; - if (!i915_active_is_idle(ref) && - ___wait_var_event(ref, i915_active_is_idle(ref), - state, 0, 0, schedule())) - return -EINTR; + err = flush_lazy_signals(ref); + i915_active_release(ref); + if (err) + return err; + if (___wait_var_event(ref, i915_active_is_idle(ref), + state, 0, 0, schedule())) + return -EINTR; + } + + /* + * After the wait is complete, the caller may free the active. + * We have to flush any concurrent retirement before returning. + */ flush_work(&ref->work); return 0; } diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 82d0f19e86df..ced9a96d7c34 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1143,7 +1143,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, void *dst, *src; int ret; - dst = i915_gem_object_pin_map(dst_obj, I915_MAP_FORCE_WB); + dst = i915_gem_object_pin_map(dst_obj, I915_MAP_WB); if (IS_ERR(dst)) return dst; diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 1bfb740a3d1e..51133b8fabb4 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -210,7 +210,7 @@ i915_debugfs_describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) spin_unlock(&obj->vma.lock); seq_printf(m, " (pinned x %d)", pin_count); - if (obj->stolen) + if (i915_gem_object_is_stolen(obj)) seq_printf(m, " (stolen: %08llx)", obj->stolen->start); if (i915_gem_object_is_framebuffer(obj)) seq_printf(m, " (fb)"); @@ -220,145 +220,6 @@ i915_debugfs_describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) seq_printf(m, " (%s)", engine->name); } -struct file_stats { - struct i915_address_space *vm; - unsigned long count; - u64 total; - u64 active, inactive; - u64 closed; -}; - -static int per_file_stats(int id, void *ptr, void *data) -{ - struct drm_i915_gem_object *obj = ptr; - struct file_stats *stats = data; - struct i915_vma *vma; - - if (IS_ERR_OR_NULL(obj) || !kref_get_unless_zero(&obj->base.refcount)) - return 0; - - stats->count++; - stats->total += obj->base.size; - - spin_lock(&obj->vma.lock); - if (!stats->vm) { - for_each_ggtt_vma(vma, obj) { - if (!drm_mm_node_allocated(&vma->node)) - continue; - - if (i915_vma_is_active(vma)) - stats->active += vma->node.size; - else - stats->inactive += vma->node.size; - - if (i915_vma_is_closed(vma)) - stats->closed += vma->node.size; - } - } else { - struct rb_node *p = obj->vma.tree.rb_node; - - while (p) { - long cmp; - - vma = rb_entry(p, typeof(*vma), obj_node); - cmp = i915_vma_compare(vma, stats->vm, NULL); - if (cmp == 0) { - if (drm_mm_node_allocated(&vma->node)) { - if (i915_vma_is_active(vma)) - stats->active += vma->node.size; - else - stats->inactive += vma->node.size; - - if (i915_vma_is_closed(vma)) - stats->closed += vma->node.size; - } - break; - } - if (cmp < 0) - p = p->rb_right; - else - p = p->rb_left; - } - } - spin_unlock(&obj->vma.lock); - - i915_gem_object_put(obj); - return 0; -} - -#define print_file_stats(m, name, stats) do { \ - if (stats.count) \ - seq_printf(m, "%s: %lu objects, %llu bytes (%llu active, %llu inactive, %llu closed)\n", \ - name, \ - stats.count, \ - stats.total, \ - stats.active, \ - stats.inactive, \ - stats.closed); \ -} while (0) - -static void print_context_stats(struct seq_file *m, - struct drm_i915_private *i915) -{ - struct file_stats kstats = {}; - struct i915_gem_context *ctx, *cn; - - spin_lock(&i915->gem.contexts.lock); - list_for_each_entry_safe(ctx, cn, &i915->gem.contexts.list, link) { - struct i915_gem_engines_iter it; - struct intel_context *ce; - - if (!kref_get_unless_zero(&ctx->ref)) - continue; - - spin_unlock(&i915->gem.contexts.lock); - - for_each_gem_engine(ce, - i915_gem_context_lock_engines(ctx), it) { - if (intel_context_pin_if_active(ce)) { - rcu_read_lock(); - if (ce->state) - per_file_stats(0, - ce->state->obj, &kstats); - per_file_stats(0, ce->ring->vma->obj, &kstats); - rcu_read_unlock(); - intel_context_unpin(ce); - } - } - i915_gem_context_unlock_engines(ctx); - - mutex_lock(&ctx->mutex); - if (!IS_ERR_OR_NULL(ctx->file_priv)) { - struct file_stats stats = { - .vm = rcu_access_pointer(ctx->vm), - }; - struct drm_file *file = ctx->file_priv->file; - struct task_struct *task; - char name[80]; - - rcu_read_lock(); - idr_for_each(&file->object_idr, per_file_stats, &stats); - rcu_read_unlock(); - - rcu_read_lock(); - task = pid_task(ctx->pid ?: file->pid, PIDTYPE_PID); - snprintf(name, sizeof(name), "%s", - task ? task->comm : "<unknown>"); - rcu_read_unlock(); - - print_file_stats(m, name, stats); - } - mutex_unlock(&ctx->mutex); - - spin_lock(&i915->gem.contexts.lock); - list_safe_reset_next(ctx, cn, link); - i915_gem_context_put(ctx); - } - spin_unlock(&i915->gem.contexts.lock); - - print_file_stats(m, "[k]contexts", kstats); -} - static int i915_gem_object_info(struct seq_file *m, void *data) { struct drm_i915_private *i915 = node_to_i915(m->private); @@ -372,9 +233,6 @@ static int i915_gem_object_info(struct seq_file *m, void *data) for_each_memory_region(mr, i915, id) seq_printf(m, "%s: total:%pa, available:%pa bytes\n", mr->name, &mr->total, &mr->avail); - seq_putc(m, '\n'); - - print_context_stats(m, i915); return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index fb35fcf698f8..43ac73861a4c 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -62,6 +62,7 @@ #include "gem/i915_gem_context.h" #include "gem/i915_gem_ioctls.h" #include "gem/i915_gem_mman.h" +#include "gem/i915_gem_pm.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" #include "gt/intel_rc6.h" diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0816c39e51dd..0c43e44d1722 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1298,7 +1298,7 @@ intel_subplatform(const struct intel_runtime_info *info, enum intel_platform p) { const unsigned int pi = __platform_mask_index(info, p); - return info->platform_mask[pi] & INTEL_SUBPLATFORM_BITS; + return info->platform_mask[pi] & ((1 << INTEL_SUBPLATFORM_BITS) - 1); } static __always_inline bool @@ -1705,6 +1705,8 @@ tgl_stepping_get(struct drm_i915_private *dev_priv) #define HAS_CSR(dev_priv) (INTEL_INFO(dev_priv)->display.has_csr) +#define HAS_MSO(i915) (INTEL_GEN(i915) >= 12) + #define HAS_RUNTIME_PM(dev_priv) (INTEL_INFO(dev_priv)->has_runtime_pm) #define HAS_64BIT_RELOC(dev_priv) (INTEL_INFO(dev_priv)->has_64bit_reloc) @@ -1790,8 +1792,6 @@ int i915_gem_init_userptr(struct drm_i915_private *dev_priv); void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv); void i915_gem_init_early(struct drm_i915_private *dev_priv); void i915_gem_cleanup_early(struct drm_i915_private *dev_priv); -int i915_gem_freeze(struct drm_i915_private *dev_priv); -int i915_gem_freeze_late(struct drm_i915_private *dev_priv); struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 17a4636ee542..aa4490934469 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -180,108 +180,6 @@ try_again: } static int -i915_gem_create(struct drm_file *file, - struct intel_memory_region *mr, - u64 *size_p, - u32 *handle_p) -{ - struct drm_i915_gem_object *obj; - u32 handle; - u64 size; - int ret; - - GEM_BUG_ON(!is_power_of_2(mr->min_page_size)); - size = round_up(*size_p, mr->min_page_size); - if (size == 0) - return -EINVAL; - - /* For most of the ABI (e.g. mmap) we think in system pages */ - GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); - - /* Allocate the new object */ - obj = i915_gem_object_create_region(mr, size, 0); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - ret = drm_gem_handle_create(file, &obj->base, &handle); - /* drop reference from allocate - handle holds it now */ - i915_gem_object_put(obj); - if (ret) - return ret; - - *handle_p = handle; - *size_p = size; - return 0; -} - -int -i915_gem_dumb_create(struct drm_file *file, - struct drm_device *dev, - struct drm_mode_create_dumb *args) -{ - enum intel_memory_type mem_type; - int cpp = DIV_ROUND_UP(args->bpp, 8); - u32 format; - - switch (cpp) { - case 1: - format = DRM_FORMAT_C8; - break; - case 2: - format = DRM_FORMAT_RGB565; - break; - case 4: - format = DRM_FORMAT_XRGB8888; - break; - default: - return -EINVAL; - } - - /* have to work out size/pitch and return them */ - args->pitch = ALIGN(args->width * cpp, 64); - - /* align stride to page size so that we can remap */ - if (args->pitch > intel_plane_fb_max_stride(to_i915(dev), format, - DRM_FORMAT_MOD_LINEAR)) - args->pitch = ALIGN(args->pitch, 4096); - - if (args->pitch < args->width) - return -EINVAL; - - args->size = mul_u32_u32(args->pitch, args->height); - - mem_type = INTEL_MEMORY_SYSTEM; - if (HAS_LMEM(to_i915(dev))) - mem_type = INTEL_MEMORY_LOCAL; - - return i915_gem_create(file, - intel_memory_region_by_type(to_i915(dev), - mem_type), - &args->size, &args->handle); -} - -/** - * Creates a new mm object and returns a handle to it. - * @dev: drm device pointer - * @data: ioctl data blob - * @file: drm file pointer - */ -int -i915_gem_create_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_private *i915 = to_i915(dev); - struct drm_i915_gem_create *args = data; - - i915_gem_flush_free_objects(i915); - - return i915_gem_create(file, - intel_memory_region_by_type(i915, - INTEL_MEMORY_SYSTEM), - &args->size, &args->handle); -} - -static int shmem_pread(struct page *page, int offset, int len, char __user *user_data, bool needs_clflush) { @@ -1059,14 +957,14 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, i915_gem_object_is_tiled(obj) && i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { if (obj->mm.madv == I915_MADV_WILLNEED) { - GEM_BUG_ON(!obj->mm.quirked); - __i915_gem_object_unpin_pages(obj); - obj->mm.quirked = false; + GEM_BUG_ON(!i915_gem_object_has_tiling_quirk(obj)); + i915_gem_object_clear_tiling_quirk(obj); + i915_gem_object_make_shrinkable(obj); } if (args->madv == I915_MADV_WILLNEED) { - GEM_BUG_ON(obj->mm.quirked); - __i915_gem_object_pin_pages(obj); - obj->mm.quirked = true; + GEM_BUG_ON(i915_gem_object_has_tiling_quirk(obj)); + i915_gem_object_make_unshrinkable(obj); + i915_gem_object_set_tiling_quirk(obj); } } @@ -1247,53 +1145,6 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) drm_WARN_ON(&dev_priv->drm, dev_priv->mm.shrink_count); } -int i915_gem_freeze(struct drm_i915_private *dev_priv) -{ - /* Discard all purgeable objects, let userspace recover those as - * required after resuming. - */ - i915_gem_shrink_all(dev_priv); - - return 0; -} - -int i915_gem_freeze_late(struct drm_i915_private *i915) -{ - struct drm_i915_gem_object *obj; - intel_wakeref_t wakeref; - - /* - * Called just before we write the hibernation image. - * - * We need to update the domain tracking to reflect that the CPU - * will be accessing all the pages to create and restore from the - * hibernation, and so upon restoration those pages will be in the - * CPU domain. - * - * To make sure the hibernation image contains the latest state, - * we update that state just before writing out the image. - * - * To try and reduce the hibernation image, we manually shrink - * the objects as well, see i915_gem_freeze() - */ - - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - i915_gem_shrink(i915, -1UL, NULL, ~0); - i915_gem_drain_freed_objects(i915); - - list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) { - i915_gem_object_lock(obj, NULL); - drm_WARN_ON(&i915->drm, - i915_gem_object_set_to_cpu_domain(obj, true)); - i915_gem_object_unlock(obj); - } - - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - - return 0; -} - int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) { struct drm_i915_file_private *file_priv; diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index a4cad3f154ca..e622aee6e4be 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -38,11 +38,18 @@ struct drm_i915_private; #define GEM_SHOW_DEBUG() drm_debug_enabled(DRM_UT_DRIVER) +#ifdef CONFIG_DRM_I915_DEBUG_GEM_ONCE +#define __GEM_BUG(cond) BUG() +#else +#define __GEM_BUG(cond) \ + WARN(1, "%s:%d GEM_BUG_ON(%s)\n", __func__, __LINE__, __stringify(cond)) +#endif + #define GEM_BUG_ON(condition) do { if (unlikely((condition))) { \ GEM_TRACE_ERR("%s:%d GEM_BUG_ON(%s)\n", \ __func__, __LINE__, __stringify(condition)); \ GEM_TRACE_DUMP(); \ - BUG(); \ + __GEM_BUG(condition); \ } \ } while(0) #define GEM_WARN_ON(expr) WARN_ON(expr) diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index e1a66c8245b8..4d2d59a9942b 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -61,6 +61,17 @@ mark_free(struct drm_mm_scan *scan, return drm_mm_scan_add_block(scan, &vma->node); } +static bool defer_evict(struct i915_vma *vma) +{ + if (i915_vma_is_active(vma)) + return true; + + if (i915_vma_is_scanout(vma)) + return true; + + return false; +} + /** * i915_gem_evict_something - Evict vmas to make room for binding a new one * @vm: address space to evict from @@ -150,7 +161,7 @@ search_again: * To notice when we complete one full cycle, we record the * first active element seen, before moving it to the tail. */ - if (active != ERR_PTR(-EAGAIN) && i915_vma_is_active(vma)) { + if (active != ERR_PTR(-EAGAIN) && defer_evict(vma)) { if (!active) active = vma; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index e3068ce50b42..bb181fe5d47e 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1051,7 +1051,9 @@ i915_vma_coredump_create(const struct intel_gt *gt, for_each_sgt_daddr(dma, iter, vma->pages) { void __iomem *s; - s = io_mapping_map_wc(&mem->iomap, dma, PAGE_SIZE); + s = io_mapping_map_wc(&mem->iomap, + dma - mem->region.start, + PAGE_SIZE); ret = compress_page(compress, (void __force *)s, dst, true); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 013794004da4..67c6d71f2675 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -794,7 +794,7 @@ static int __intel_get_crtc_scanline(struct intel_crtc *crtc) int position, vtotal; if (!crtc->active) - return -1; + return 0; vblank = &crtc->base.dev->vblank[drm_crtc_index(&crtc->base)]; mode = &vblank->hwmode; @@ -3040,6 +3040,24 @@ static void valleyview_irq_reset(struct drm_i915_private *dev_priv) spin_unlock_irq(&dev_priv->irq_lock); } +static void cnp_display_clock_wa(struct drm_i915_private *dev_priv) +{ + struct intel_uncore *uncore = &dev_priv->uncore; + + /* + * Wa_14010685332:cnp/cmp,tgp,adp + * TODO: Clarify which platforms this applies to + * TODO: Figure out if this workaround can be applied in the s0ix suspend/resume handlers as + * on earlier platforms and whether the workaround is also needed for runtime suspend/resume + */ + if (INTEL_PCH_TYPE(dev_priv) == PCH_CNP || + (INTEL_PCH_TYPE(dev_priv) >= PCH_TGP && INTEL_PCH_TYPE(dev_priv) < PCH_DG1)) { + intel_uncore_rmw(uncore, SOUTH_CHICKEN1, SBCLK_RUN_REFCLK_DIS, + SBCLK_RUN_REFCLK_DIS); + intel_uncore_rmw(uncore, SOUTH_CHICKEN1, SBCLK_RUN_REFCLK_DIS, 0); + } +} + static void gen8_irq_reset(struct drm_i915_private *dev_priv) { struct intel_uncore *uncore = &dev_priv->uncore; @@ -3063,6 +3081,8 @@ static void gen8_irq_reset(struct drm_i915_private *dev_priv) if (HAS_PCH_SPLIT(dev_priv)) ibx_irq_reset(dev_priv); + + cnp_display_clock_wa(dev_priv); } static void gen11_display_irq_reset(struct drm_i915_private *dev_priv) @@ -3104,15 +3124,7 @@ static void gen11_display_irq_reset(struct drm_i915_private *dev_priv) if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) GEN3_IRQ_RESET(uncore, SDE); - /* Wa_14010685332:cnp/cmp,tgp,adp */ - if (INTEL_PCH_TYPE(dev_priv) == PCH_CNP || - (INTEL_PCH_TYPE(dev_priv) >= PCH_TGP && - INTEL_PCH_TYPE(dev_priv) < PCH_DG1)) { - intel_uncore_rmw(uncore, SOUTH_CHICKEN1, - SBCLK_RUN_REFCLK_DIS, SBCLK_RUN_REFCLK_DIS); - intel_uncore_rmw(uncore, SOUTH_CHICKEN1, - SBCLK_RUN_REFCLK_DIS, 0); - } + cnp_display_clock_wa(dev_priv); } static void gen11_irq_reset(struct drm_i915_private *dev_priv) @@ -3764,9 +3776,19 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) } } +static void icp_irq_postinstall(struct drm_i915_private *dev_priv) +{ + struct intel_uncore *uncore = &dev_priv->uncore; + u32 mask = SDE_GMBUS_ICP; + + GEN3_IRQ_INIT(uncore, SDE, ~mask, 0xffffffff); +} + static void gen8_irq_postinstall(struct drm_i915_private *dev_priv) { - if (HAS_PCH_SPLIT(dev_priv)) + if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) + icp_irq_postinstall(dev_priv); + else if (HAS_PCH_SPLIT(dev_priv)) ibx_irq_postinstall(dev_priv); gen8_gt_irq_postinstall(&dev_priv->gt); @@ -3775,13 +3797,6 @@ static void gen8_irq_postinstall(struct drm_i915_private *dev_priv) gen8_master_intr_enable(dev_priv->uncore.regs); } -static void icp_irq_postinstall(struct drm_i915_private *dev_priv) -{ - struct intel_uncore *uncore = &dev_priv->uncore; - u32 mask = SDE_GMBUS_ICP; - - GEN3_IRQ_INIT(uncore, SDE, ~mask, 0xffffffff); -} static void gen11_irq_postinstall(struct drm_i915_private *dev_priv) { @@ -4304,6 +4319,8 @@ void intel_irq_init(struct drm_i915_private *dev_priv) dev_priv->display.hpd_irq_setup = gen11_hpd_irq_setup; else if (IS_GEN9_LP(dev_priv)) dev_priv->display.hpd_irq_setup = bxt_hpd_irq_setup; + else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) + dev_priv->display.hpd_irq_setup = icp_hpd_irq_setup; else if (INTEL_PCH_TYPE(dev_priv) >= PCH_SPT) dev_priv->display.hpd_irq_setup = spt_hpd_irq_setup; else diff --git a/drivers/gpu/drm/i915/i915_mm.c b/drivers/gpu/drm/i915/i915_mm.c index 43039dc8c607..666808cb3a32 100644 --- a/drivers/gpu/drm/i915/i915_mm.c +++ b/drivers/gpu/drm/i915/i915_mm.c @@ -62,7 +62,7 @@ static int remap_sg(pte_t *pte, unsigned long addr, void *data) { struct remap_pfn *r = data; - if (GEM_WARN_ON(!r->sgt.pfn)) + if (GEM_WARN_ON(!r->sgt.sgp)) return -EINVAL; /* Special PTE are not associated with any struct page */ diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 21a7a5f686ec..48f47e44e848 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -32,6 +32,7 @@ struct drm_printer; #define ENABLE_GUC_SUBMISSION BIT(0) #define ENABLE_GUC_LOAD_HUC BIT(1) +#define ENABLE_GUC_MASK GENMASK(1, 0) /* * Invoke param, a function-like macro, for each i915 param, with arguments: diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 914ae9dc6612..9a481ad5a8f6 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -455,6 +455,7 @@ static const struct intel_device_info snb_m_gt2_info = { .has_llc = 1, \ .has_rc6 = 1, \ .has_rc6p = 1, \ + .has_reset_engine = true, \ .has_rps = true, \ .dma_mask_size = 40, \ .ppgtt_type = INTEL_PPGTT_ALIASING, \ @@ -513,6 +514,7 @@ static const struct intel_device_info vlv_info = { .cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B), .has_runtime_pm = 1, .has_rc6 = 1, + .has_reset_engine = true, .has_rps = true, .display.has_gmch = 1, .display.has_hotplug = 1, @@ -571,8 +573,7 @@ static const struct intel_device_info hsw_gt3_info = { .dma_mask_size = 39, \ .ppgtt_type = INTEL_PPGTT_FULL, \ .ppgtt_size = 48, \ - .has_64bit_reloc = 1, \ - .has_reset_engine = 1 + .has_64bit_reloc = 1 #define BDW_PLATFORM \ GEN8_FEATURES, \ diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 736c09891e24..41ad5a66657e 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -595,7 +595,6 @@ static int append_oa_sample(struct i915_perf_stream *stream, { int report_size = stream->oa_buffer.format_size; struct drm_i915_perf_record_header header; - u32 sample_flags = stream->sample_flags; header.type = DRM_I915_PERF_RECORD_SAMPLE; header.pad = 0; @@ -609,10 +608,8 @@ static int append_oa_sample(struct i915_perf_stream *stream, return -EFAULT; buf += sizeof(header); - if (sample_flags & SAMPLE_OA_REPORT) { - if (copy_to_user(buf, report, report_size)) - return -EFAULT; - } + if (copy_to_user(buf, report, report_size)) + return -EFAULT; (*offset) += header.size; @@ -2669,7 +2666,7 @@ static void i915_oa_stream_enable(struct i915_perf_stream *stream) stream->perf->ops.oa_enable(stream); - if (stream->periodic) + if (stream->sample_flags & SAMPLE_OA_REPORT) hrtimer_start(&stream->poll_check_timer, ns_to_ktime(stream->poll_oa_period), HRTIMER_MODE_REL_PINNED); @@ -2732,7 +2729,7 @@ static void i915_oa_stream_disable(struct i915_perf_stream *stream) { stream->perf->ops.oa_disable(stream); - if (stream->periodic) + if (stream->sample_flags & SAMPLE_OA_REPORT) hrtimer_cancel(&stream->poll_check_timer); } @@ -3015,7 +3012,7 @@ static ssize_t i915_perf_read(struct file *file, * disabled stream as an error. In particular it might otherwise lead * to a deadlock for blocking file descriptors... */ - if (!stream->enabled) + if (!stream->enabled || !(stream->sample_flags & SAMPLE_OA_REPORT)) return -EIO; if (!(file->f_flags & O_NONBLOCK)) { diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index ca549d77657b..765ba64442ab 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3319,7 +3319,18 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define ILK_DISPLAY_CHICKEN1 _MMIO(0x42000) #define ILK_FBCQ_DIS (1 << 22) -#define ILK_PABSTRETCH_DIS (1 << 21) +#define ILK_PABSTRETCH_DIS REG_BIT(21) +#define ILK_SABSTRETCH_DIS REG_BIT(20) +#define IVB_PRI_STRETCH_MAX_MASK REG_GENMASK(21, 20) +#define IVB_PRI_STRETCH_MAX_X8 REG_FIELD_PREP(IVB_PRI_STRETCH_MAX_MASK, 0) +#define IVB_PRI_STRETCH_MAX_X4 REG_FIELD_PREP(IVB_PRI_STRETCH_MAX_MASK, 1) +#define IVB_PRI_STRETCH_MAX_X2 REG_FIELD_PREP(IVB_PRI_STRETCH_MAX_MASK, 2) +#define IVB_PRI_STRETCH_MAX_X1 REG_FIELD_PREP(IVB_PRI_STRETCH_MAX_MASK, 3) +#define IVB_SPR_STRETCH_MAX_MASK REG_GENMASK(19, 18) +#define IVB_SPR_STRETCH_MAX_X8 REG_FIELD_PREP(IVB_SPR_STRETCH_MAX_MASK, 0) +#define IVB_SPR_STRETCH_MAX_X4 REG_FIELD_PREP(IVB_SPR_STRETCH_MAX_MASK, 1) +#define IVB_SPR_STRETCH_MAX_X2 REG_FIELD_PREP(IVB_SPR_STRETCH_MAX_MASK, 2) +#define IVB_SPR_STRETCH_MAX_X1 REG_FIELD_PREP(IVB_SPR_STRETCH_MAX_MASK, 3) /* @@ -8042,6 +8053,16 @@ enum { #define _CHICKEN_PIPESL_1_A 0x420b0 #define _CHICKEN_PIPESL_1_B 0x420b4 +#define HSW_PRI_STRETCH_MAX_MASK REG_GENMASK(28, 27) +#define HSW_PRI_STRETCH_MAX_X8 REG_FIELD_PREP(HSW_PRI_STRETCH_MAX_MASK, 0) +#define HSW_PRI_STRETCH_MAX_X4 REG_FIELD_PREP(HSW_PRI_STRETCH_MAX_MASK, 1) +#define HSW_PRI_STRETCH_MAX_X2 REG_FIELD_PREP(HSW_PRI_STRETCH_MAX_MASK, 2) +#define HSW_PRI_STRETCH_MAX_X1 REG_FIELD_PREP(HSW_PRI_STRETCH_MAX_MASK, 3) +#define HSW_SPR_STRETCH_MAX_MASK REG_GENMASK(26, 25) +#define HSW_SPR_STRETCH_MAX_X8 REG_FIELD_PREP(HSW_SPR_STRETCH_MAX_MASK, 0) +#define HSW_SPR_STRETCH_MAX_X4 REG_FIELD_PREP(HSW_SPR_STRETCH_MAX_MASK, 1) +#define HSW_SPR_STRETCH_MAX_X2 REG_FIELD_PREP(HSW_SPR_STRETCH_MAX_MASK, 2) +#define HSW_SPR_STRETCH_MAX_X1 REG_FIELD_PREP(HSW_SPR_STRETCH_MAX_MASK, 3) #define HSW_FBCQ_DIS (1 << 22) #define BDW_DPRS_MASK_VBLANK_SRD (1 << 0) #define CHICKEN_PIPESL_1(pipe) _MMIO_PIPE(pipe, _CHICKEN_PIPESL_1_A, _CHICKEN_PIPESL_1_B) @@ -8228,6 +8249,7 @@ enum { #define GEN11_LQSC_CLEAN_EVICT_DISABLE (1 << 6) #define GEN8_LQSC_RO_PERF_DIS (1 << 27) #define GEN8_LQSC_FLUSH_COHERENT_LINES (1 << 21) +#define GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE REG_BIT(22) /* GEN8 chicken */ #define HDC_CHICKEN0 _MMIO(0x7300) @@ -12152,6 +12174,12 @@ enum skl_power_gate { #define __GEN11_VCS2_MOCS0 0x10000 #define GEN11_MFX2_MOCS(i) _MMIO(__GEN11_VCS2_MOCS0 + (i) * 4) +#define GEN9_SCRATCH_LNCF1 _MMIO(0xb008) +#define GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE REG_BIT(0) + +#define GEN9_SCRATCH1 _MMIO(0xb11c) +#define EVICTION_PERF_FIX_ENABLE REG_BIT(8) + #define GEN10_SCRATCH_LNCF2 _MMIO(0xb0a0) #define PMFLUSHDONE_LNICRSDROP (1 << 20) #define PMFLUSH_GAPL3UNBLOCK (1 << 21) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 0b1a46a0d866..22e39d938f17 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -276,7 +276,7 @@ static void remove_from_engine(struct i915_request *rq) bool i915_request_retire(struct i915_request *rq) { - if (!i915_request_completed(rq)) + if (!__i915_request_is_complete(rq)) return false; RQ_TRACE(rq, "\n"); @@ -342,8 +342,7 @@ void i915_request_retire_upto(struct i915_request *rq) struct i915_request *tmp; RQ_TRACE(rq, "\n"); - - GEM_BUG_ON(!i915_request_completed(rq)); + GEM_BUG_ON(!__i915_request_is_complete(rq)); do { tmp = list_first_entry(&tl->requests, typeof(*tmp), link); @@ -552,8 +551,10 @@ bool __i915_request_submit(struct i915_request *request) * dropped upon retiring. (Otherwise if resubmit a *retired* * request, this would be a horrible use-after-free.) */ - if (i915_request_completed(request)) - goto xfer; + if (__i915_request_is_complete(request)) { + list_del_init(&request->sched.link); + goto active; + } if (unlikely(intel_context_is_banned(request->context))) i915_request_set_error_once(request, -EIO); @@ -588,11 +589,11 @@ bool __i915_request_submit(struct i915_request *request) engine->serial++; result = true; -xfer: - if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) { - list_move_tail(&request->sched.link, &engine->active.requests); - clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags); - } + GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); + list_move_tail(&request->sched.link, &engine->active.requests); +active: + clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags); + set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); /* * XXX Rollback bonded-execution on __i915_request_unsubmit()? @@ -652,7 +653,7 @@ void __i915_request_unsubmit(struct i915_request *request) i915_request_cancel_breadcrumb(request); /* We've already spun, don't charge on resubmitting. */ - if (request->sched.semaphores && i915_request_started(request)) + if (request->sched.semaphores && __i915_request_has_started(request)) request->sched.semaphores = 0; /* @@ -864,7 +865,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) RCU_INIT_POINTER(rq->timeline, tl); RCU_INIT_POINTER(rq->hwsp_cacheline, tl->hwsp_cacheline); rq->hwsp_seqno = tl->hwsp_seqno; - GEM_BUG_ON(i915_request_completed(rq)); + GEM_BUG_ON(__i915_request_is_complete(rq)); rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */ @@ -970,15 +971,22 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal) if (i915_request_started(signal)) return 0; + /* + * The caller holds a reference on @signal, but we do not serialise + * against it being retired and removed from the lists. + * + * We do not hold a reference to the request before @signal, and + * so must be very careful to ensure that it is not _recycled_ as + * we follow the link backwards. + */ fence = NULL; rcu_read_lock(); - spin_lock_irq(&signal->lock); do { struct list_head *pos = READ_ONCE(signal->link.prev); struct i915_request *prev; /* Confirm signal has not been retired, the link is valid */ - if (unlikely(i915_request_started(signal))) + if (unlikely(__i915_request_has_started(signal))) break; /* Is signal the earliest request on its timeline? */ @@ -1003,7 +1011,6 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal) fence = &prev->fence; } while (0); - spin_unlock_irq(&signal->lock); rcu_read_unlock(); if (!fence) return 0; @@ -1520,7 +1527,7 @@ __i915_request_add_to_timeline(struct i915_request *rq) */ prev = to_request(__i915_active_fence_set(&timeline->last_request, &rq->fence)); - if (prev && !i915_request_completed(prev)) { + if (prev && !__i915_request_is_complete(prev)) { /* * The requests are supposed to be kept in order. However, * we need to be wary in case the timeline->last_request @@ -1897,10 +1904,10 @@ static char queue_status(const struct i915_request *rq) static const char *run_status(const struct i915_request *rq) { - if (i915_request_completed(rq)) + if (__i915_request_is_complete(rq)) return "!"; - if (i915_request_started(rq)) + if (__i915_request_has_started(rq)) return "*"; if (!i915_sw_fence_signaled(&rq->semaphore)) diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 318e359bf5c3..7144239f08df 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -520,7 +520,7 @@ void i915_request_show_with_schedule(struct drm_printer *m, if (signaler->timeline == rq->timeline) continue; - if (i915_request_completed(signaler)) + if (__i915_request_is_complete(signaler)) continue; i915_request_show(m, signaler, prefix, indent + 2); diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 5b3a3c653454..a64adc8c883b 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -363,6 +363,21 @@ i915_vma_unpin_fence(struct i915_vma *vma) void i915_vma_parked(struct intel_gt *gt); +static inline bool i915_vma_is_scanout(const struct i915_vma *vma) +{ + return test_bit(I915_VMA_SCANOUT_BIT, __i915_vma_flags(vma)); +} + +static inline void i915_vma_mark_scanout(struct i915_vma *vma) +{ + set_bit(I915_VMA_SCANOUT_BIT, __i915_vma_flags(vma)); +} + +static inline void i915_vma_clear_scanout(struct i915_vma *vma) +{ + clear_bit(I915_VMA_SCANOUT_BIT, __i915_vma_flags(vma)); +} + #define for_each_until(cond) if (cond) break; else /** diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h index 9e9082dc8f4b..f5cb848b7a7e 100644 --- a/drivers/gpu/drm/i915/i915_vma_types.h +++ b/drivers/gpu/drm/i915/i915_vma_types.h @@ -249,6 +249,9 @@ struct i915_vma { #define I915_VMA_USERFAULT ((int)BIT(I915_VMA_USERFAULT_BIT)) #define I915_VMA_GGTT_WRITE ((int)BIT(I915_VMA_GGTT_WRITE_BIT)) +#define I915_VMA_SCANOUT_BIT 18 +#define I915_VMA_SCANOUT ((int)BIT(I915_VMA_SCANOUT_BIT)) + struct i915_active active; #define I915_VMA_PAGES_BIAS 24 diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 6590d55df6cb..6ffc0673f005 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -57,10 +57,10 @@ struct intel_memory_region_ops { int (*init)(struct intel_memory_region *mem); void (*release)(struct intel_memory_region *mem); - struct drm_i915_gem_object * - (*create_object)(struct intel_memory_region *mem, - resource_size_t size, - unsigned int flags); + int (*init_object)(struct intel_memory_region *mem, + struct drm_i915_gem_object *obj, + resource_size_t size, + unsigned int flags); }; struct intel_memory_region { diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 8cc67f9c4e58..4c07745a6fdb 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3876,6 +3876,7 @@ static bool skl_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state) struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum plane_id plane_id; + int max_level = INT_MAX; if (!intel_has_sagv(dev_priv)) return false; @@ -3892,20 +3893,31 @@ static bool skl_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state) int level; /* Skip this plane if it's not enabled */ - if (!wm->wm[0].plane_en) + if (!wm->wm[0].enable) continue; /* Find the highest enabled wm level for this plane */ for (level = ilk_wm_max_level(dev_priv); - !wm->wm[level].plane_en; --level) + !wm->wm[level].enable; --level) { } + /* Highest common enabled wm level for all planes */ + max_level = min(level, max_level); + } + + /* No enabled planes? */ + if (max_level == INT_MAX) + return true; + + for_each_plane_id_on_crtc(crtc, plane_id) { + const struct skl_plane_wm *wm = + &crtc_state->wm.skl.optimal.planes[plane_id]; + /* - * If any of the planes on this pipe don't enable wm levels that - * incur memory latencies higher than sagv_block_time_us we - * can't enable SAGV. + * All enabled planes must have enabled a common wm level that + * can tolerate memory latencies higher than sagv_block_time_us */ - if (!wm->wm[level].can_sagv) + if (wm->wm[0].enable && !wm->wm[max_level].can_sagv) return false; } @@ -3921,12 +3933,10 @@ static bool tgl_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state) return true; for_each_plane_id_on_crtc(crtc, plane_id) { - const struct skl_ddb_entry *plane_alloc = - &crtc_state->wm.skl.plane_ddb_y[plane_id]; const struct skl_plane_wm *wm = &crtc_state->wm.skl.optimal.planes[plane_id]; - if (skl_ddb_entry_size(plane_alloc) < wm->sagv_wm0.min_ddb_alloc) + if (wm->wm[0].enable && !wm->sagv.wm0.enable) return false; } @@ -4747,20 +4757,61 @@ icl_get_total_relative_data_rate(struct intel_atomic_state *state, return total_data_rate; } -static const struct skl_wm_level * -skl_plane_wm_level(const struct intel_crtc_state *crtc_state, +const struct skl_wm_level * +skl_plane_wm_level(const struct skl_pipe_wm *pipe_wm, enum plane_id plane_id, int level) { - const struct skl_pipe_wm *pipe_wm = &crtc_state->wm.skl.optimal; const struct skl_plane_wm *wm = &pipe_wm->planes[plane_id]; if (level == 0 && pipe_wm->use_sagv_wm) - return &wm->sagv_wm0; + return &wm->sagv.wm0; return &wm->wm[level]; } +const struct skl_wm_level * +skl_plane_trans_wm(const struct skl_pipe_wm *pipe_wm, + enum plane_id plane_id) +{ + const struct skl_plane_wm *wm = &pipe_wm->planes[plane_id]; + + if (pipe_wm->use_sagv_wm) + return &wm->sagv.trans_wm; + + return &wm->trans_wm; +} + +/* + * We only disable the watermarks for each plane if + * they exceed the ddb allocation of said plane. This + * is done so that we don't end up touching cursor + * watermarks needlessly when some other plane reduces + * our max possible watermark level. + * + * Bspec has this to say about the PLANE_WM enable bit: + * "All the watermarks at this level for all enabled + * planes must be enabled before the level will be used." + * So this is actually safe to do. + */ +static void +skl_check_wm_level(struct skl_wm_level *wm, u64 total) +{ + if (wm->min_ddb_alloc > total) + memset(wm, 0, sizeof(*wm)); +} + +static void +skl_check_nv12_wm_level(struct skl_wm_level *wm, struct skl_wm_level *uv_wm, + u64 total, u64 uv_total) +{ + if (wm->min_ddb_alloc > total || + uv_wm->min_ddb_alloc > uv_total) { + memset(wm, 0, sizeof(*wm)); + memset(uv_wm, 0, sizeof(*uv_wm)); + } +} + static int skl_allocate_plane_ddb(struct intel_atomic_state *state, struct intel_crtc *crtc) @@ -4928,45 +4979,33 @@ skl_allocate_plane_ddb(struct intel_atomic_state *state, struct skl_plane_wm *wm = &crtc_state->wm.skl.optimal.planes[plane_id]; - /* - * We only disable the watermarks for each plane if - * they exceed the ddb allocation of said plane. This - * is done so that we don't end up touching cursor - * watermarks needlessly when some other plane reduces - * our max possible watermark level. - * - * Bspec has this to say about the PLANE_WM enable bit: - * "All the watermarks at this level for all enabled - * planes must be enabled before the level will be used." - * So this is actually safe to do. - */ - if (wm->wm[level].min_ddb_alloc > total[plane_id] || - wm->uv_wm[level].min_ddb_alloc > uv_total[plane_id]) - memset(&wm->wm[level], 0, sizeof(wm->wm[level])); + skl_check_nv12_wm_level(&wm->wm[level], &wm->uv_wm[level], + total[plane_id], uv_total[plane_id]); /* * Wa_1408961008:icl, ehl * Underruns with WM1+ disabled */ if (IS_GEN(dev_priv, 11) && - level == 1 && wm->wm[0].plane_en) { - wm->wm[level].plane_res_b = wm->wm[0].plane_res_b; - wm->wm[level].plane_res_l = wm->wm[0].plane_res_l; + level == 1 && wm->wm[0].enable) { + wm->wm[level].blocks = wm->wm[0].blocks; + wm->wm[level].lines = wm->wm[0].lines; wm->wm[level].ignore_lines = wm->wm[0].ignore_lines; } } } /* - * Go back and disable the transition watermark if it turns out we - * don't have enough DDB blocks for it. + * Go back and disable the transition and SAGV watermarks + * if it turns out we don't have enough DDB blocks for them. */ for_each_plane_id_on_crtc(crtc, plane_id) { struct skl_plane_wm *wm = &crtc_state->wm.skl.optimal.planes[plane_id]; - if (wm->trans_wm.plane_res_b >= total[plane_id]) - memset(&wm->trans_wm, 0, sizeof(wm->trans_wm)); + skl_check_wm_level(&wm->trans_wm, total[plane_id]); + skl_check_wm_level(&wm->sagv.wm0, total[plane_id]); + skl_check_wm_level(&wm->sagv.trans_wm, total[plane_id]); } return 0; @@ -5171,7 +5210,7 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state, struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); uint_fixed_16_16_t method1, method2; uint_fixed_16_16_t selected_result; - u32 res_blocks, res_lines, min_ddb_alloc = 0; + u32 blocks, lines, min_ddb_alloc = 0; if (latency == 0) { /* reject it */ @@ -5217,24 +5256,22 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state, } } - res_blocks = fixed16_to_u32_round_up(selected_result) + 1; - res_lines = div_round_up_fixed16(selected_result, - wp->plane_blocks_per_line); + blocks = fixed16_to_u32_round_up(selected_result) + 1; + lines = div_round_up_fixed16(selected_result, + wp->plane_blocks_per_line); if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) { /* Display WA #1125: skl,bxt,kbl */ if (level == 0 && wp->rc_surface) - res_blocks += - fixed16_to_u32_round_up(wp->y_tile_minimum); + blocks += fixed16_to_u32_round_up(wp->y_tile_minimum); /* Display WA #1126: skl,bxt,kbl */ if (level >= 1 && level <= 7) { if (wp->y_tiled) { - res_blocks += - fixed16_to_u32_round_up(wp->y_tile_minimum); - res_lines += wp->y_min_scanlines; + blocks += fixed16_to_u32_round_up(wp->y_tile_minimum); + lines += wp->y_min_scanlines; } else { - res_blocks++; + blocks++; } /* @@ -5243,8 +5280,8 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state, * Assumption in DDB algorithm optimization for special * cases. Also covers Display WA #1125 for RC. */ - if (result_prev->plane_res_b > res_blocks) - res_blocks = result_prev->plane_res_b; + if (result_prev->blocks > blocks) + blocks = result_prev->blocks; } } @@ -5252,40 +5289,39 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state, if (wp->y_tiled) { int extra_lines; - if (res_lines % wp->y_min_scanlines == 0) + if (lines % wp->y_min_scanlines == 0) extra_lines = wp->y_min_scanlines; else extra_lines = wp->y_min_scanlines * 2 - - res_lines % wp->y_min_scanlines; + lines % wp->y_min_scanlines; - min_ddb_alloc = mul_round_up_u32_fixed16(res_lines + extra_lines, + min_ddb_alloc = mul_round_up_u32_fixed16(lines + extra_lines, wp->plane_blocks_per_line); } else { - min_ddb_alloc = res_blocks + - DIV_ROUND_UP(res_blocks, 10); + min_ddb_alloc = blocks + DIV_ROUND_UP(blocks, 10); } } if (!skl_wm_has_lines(dev_priv, level)) - res_lines = 0; + lines = 0; - if (res_lines > 31) { + if (lines > 31) { /* reject it */ result->min_ddb_alloc = U16_MAX; return; } /* - * If res_lines is valid, assume we can use this watermark level + * If lines is valid, assume we can use this watermark level * for now. We'll come back and disable it after we calculate the * DDB allocation if it turns out we don't actually have enough * blocks to satisfy it. */ - result->plane_res_b = res_blocks; - result->plane_res_l = res_lines; + result->blocks = blocks; + result->lines = lines; /* Bspec says: value >= plane ddb allocation -> invalid, hence the +1 here */ - result->min_ddb_alloc = max(min_ddb_alloc, res_blocks) + 1; - result->plane_en = true; + result->min_ddb_alloc = max(min_ddb_alloc, blocks) + 1; + result->enable = true; if (INTEL_GEN(dev_priv) < 12) result->can_sagv = latency >= dev_priv->sagv_block_time_us; @@ -5316,7 +5352,7 @@ static void tgl_compute_sagv_wm(const struct intel_crtc_state *crtc_state, struct skl_plane_wm *plane_wm) { struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); - struct skl_wm_level *sagv_wm = &plane_wm->sagv_wm0; + struct skl_wm_level *sagv_wm = &plane_wm->sagv.wm0; struct skl_wm_level *levels = plane_wm->wm; unsigned int latency = dev_priv->wm.skl_latency[0] + dev_priv->sagv_block_time_us; @@ -5325,14 +5361,13 @@ static void tgl_compute_sagv_wm(const struct intel_crtc_state *crtc_state, sagv_wm); } -static void skl_compute_transition_wm(const struct intel_crtc_state *crtc_state, - const struct skl_wm_params *wp, - struct skl_plane_wm *wm) +static void skl_compute_transition_wm(struct drm_i915_private *dev_priv, + struct skl_wm_level *trans_wm, + const struct skl_wm_level *wm0, + const struct skl_wm_params *wp) { - struct drm_device *dev = crtc_state->uapi.crtc->dev; - const struct drm_i915_private *dev_priv = to_i915(dev); u16 trans_min, trans_amount, trans_y_tile_min; - u16 wm0_sel_res_b, trans_offset_b, res_blocks; + u16 wm0_blocks, trans_offset, blocks; /* Transition WM don't make any sense if ipc is disabled */ if (!dev_priv->ipc_enabled) @@ -5356,36 +5391,37 @@ static void skl_compute_transition_wm(const struct intel_crtc_state *crtc_state, else trans_amount = 10; /* This is configurable amount */ - trans_offset_b = trans_min + trans_amount; + trans_offset = trans_min + trans_amount; /* * The spec asks for Selected Result Blocks for wm0 (the real value), * not Result Blocks (the integer value). Pay attention to the capital - * letters. The value wm_l0->plane_res_b is actually Result Blocks, but + * letters. The value wm_l0->blocks is actually Result Blocks, but * since Result Blocks is the ceiling of Selected Result Blocks plus 1, * and since we later will have to get the ceiling of the sum in the * transition watermarks calculation, we can just pretend Selected * Result Blocks is Result Blocks minus 1 and it should work for the * current platforms. */ - wm0_sel_res_b = wm->wm[0].plane_res_b - 1; + wm0_blocks = wm0->blocks - 1; if (wp->y_tiled) { trans_y_tile_min = (u16)mul_round_up_u32_fixed16(2, wp->y_tile_minimum); - res_blocks = max(wm0_sel_res_b, trans_y_tile_min) + - trans_offset_b; + blocks = max(wm0_blocks, trans_y_tile_min) + trans_offset; } else { - res_blocks = wm0_sel_res_b + trans_offset_b; + blocks = wm0_blocks + trans_offset; } + blocks++; /* * Just assume we can enable the transition watermark. After * computing the DDB we'll come back and disable it if that * assumption turns out to be false. */ - wm->trans_wm.plane_res_b = res_blocks + 1; - wm->trans_wm.plane_en = true; + trans_wm->blocks = blocks; + trans_wm->min_ddb_alloc = max_t(u16, wm0->min_ddb_alloc, blocks + 1); + trans_wm->enable = true; } static int skl_build_plane_wm_single(struct intel_crtc_state *crtc_state, @@ -5405,10 +5441,15 @@ static int skl_build_plane_wm_single(struct intel_crtc_state *crtc_state, skl_compute_wm_levels(crtc_state, &wm_params, wm->wm); - if (INTEL_GEN(dev_priv) >= 12) + skl_compute_transition_wm(dev_priv, &wm->trans_wm, + &wm->wm[0], &wm_params); + + if (INTEL_GEN(dev_priv) >= 12) { tgl_compute_sagv_wm(crtc_state, &wm_params, wm); - skl_compute_transition_wm(crtc_state, &wm_params, wm); + skl_compute_transition_wm(dev_priv, &wm->sagv.trans_wm, + &wm->sagv.wm0, &wm_params); + } return 0; } @@ -5555,12 +5596,12 @@ static void skl_write_wm_level(struct drm_i915_private *dev_priv, { u32 val = 0; - if (level->plane_en) + if (level->enable) val |= PLANE_WM_EN; if (level->ignore_lines) val |= PLANE_WM_IGNORE_LINES; - val |= level->plane_res_b; - val |= level->plane_res_l << PLANE_WM_LINES_SHIFT; + val |= level->blocks; + val |= level->lines << PLANE_WM_LINES_SHIFT; intel_de_write_fw(dev_priv, reg, val); } @@ -5572,23 +5613,19 @@ void skl_write_plane_wm(struct intel_plane *plane, int level, max_level = ilk_wm_max_level(dev_priv); enum plane_id plane_id = plane->id; enum pipe pipe = plane->pipe; - const struct skl_plane_wm *wm = - &crtc_state->wm.skl.optimal.planes[plane_id]; + const struct skl_pipe_wm *pipe_wm = &crtc_state->wm.skl.optimal; + const struct skl_plane_wm *wm = &pipe_wm->planes[plane_id]; const struct skl_ddb_entry *ddb_y = &crtc_state->wm.skl.plane_ddb_y[plane_id]; const struct skl_ddb_entry *ddb_uv = &crtc_state->wm.skl.plane_ddb_uv[plane_id]; - for (level = 0; level <= max_level; level++) { - const struct skl_wm_level *wm_level; - - wm_level = skl_plane_wm_level(crtc_state, plane_id, level); - + for (level = 0; level <= max_level; level++) skl_write_wm_level(dev_priv, PLANE_WM(pipe, plane_id, level), - wm_level); - } + skl_plane_wm_level(pipe_wm, plane_id, level)); + skl_write_wm_level(dev_priv, PLANE_WM_TRANS(pipe, plane_id), - &wm->trans_wm); + skl_plane_trans_wm(pipe_wm, plane_id)); if (INTEL_GEN(dev_priv) >= 11) { skl_ddb_entry_write(dev_priv, @@ -5612,20 +5649,16 @@ void skl_write_cursor_wm(struct intel_plane *plane, int level, max_level = ilk_wm_max_level(dev_priv); enum plane_id plane_id = plane->id; enum pipe pipe = plane->pipe; - const struct skl_plane_wm *wm = - &crtc_state->wm.skl.optimal.planes[plane_id]; + const struct skl_pipe_wm *pipe_wm = &crtc_state->wm.skl.optimal; const struct skl_ddb_entry *ddb = &crtc_state->wm.skl.plane_ddb_y[plane_id]; - for (level = 0; level <= max_level; level++) { - const struct skl_wm_level *wm_level; - - wm_level = skl_plane_wm_level(crtc_state, plane_id, level); - + for (level = 0; level <= max_level; level++) skl_write_wm_level(dev_priv, CUR_WM(pipe, level), - wm_level); - } - skl_write_wm_level(dev_priv, CUR_WM_TRANS(pipe), &wm->trans_wm); + skl_plane_wm_level(pipe_wm, plane_id, level)); + + skl_write_wm_level(dev_priv, CUR_WM_TRANS(pipe), + skl_plane_trans_wm(pipe_wm, plane_id)); skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe), ddb); } @@ -5633,10 +5666,10 @@ void skl_write_cursor_wm(struct intel_plane *plane, bool skl_wm_level_equals(const struct skl_wm_level *l1, const struct skl_wm_level *l2) { - return l1->plane_en == l2->plane_en && + return l1->enable == l2->enable && l1->ignore_lines == l2->ignore_lines && - l1->plane_res_l == l2->plane_res_l && - l1->plane_res_b == l2->plane_res_b; + l1->lines == l2->lines && + l1->blocks == l2->blocks; } static bool skl_plane_wm_equals(struct drm_i915_private *dev_priv, @@ -5655,7 +5688,9 @@ static bool skl_plane_wm_equals(struct drm_i915_private *dev_priv, return false; } - return skl_wm_level_equals(&wm1->trans_wm, &wm2->trans_wm); + return skl_wm_level_equals(&wm1->trans_wm, &wm2->trans_wm) && + skl_wm_level_equals(&wm1->sagv.wm0, &wm2->sagv.wm0) && + skl_wm_level_equals(&wm1->sagv.trans_wm, &wm2->sagv.trans_wm); } static bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a, @@ -5885,85 +5920,114 @@ skl_print_wm_changes(struct intel_atomic_state *state) continue; drm_dbg_kms(&dev_priv->drm, - "[PLANE:%d:%s] level %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm,%cswm" - " -> %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm,%cswm\n", + "[PLANE:%d:%s] level %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm,%cswm,%cstwm" + " -> %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm,%cswm,%cstwm\n", plane->base.base.id, plane->base.name, - enast(old_wm->wm[0].plane_en), enast(old_wm->wm[1].plane_en), - enast(old_wm->wm[2].plane_en), enast(old_wm->wm[3].plane_en), - enast(old_wm->wm[4].plane_en), enast(old_wm->wm[5].plane_en), - enast(old_wm->wm[6].plane_en), enast(old_wm->wm[7].plane_en), - enast(old_wm->trans_wm.plane_en), - enast(old_wm->sagv_wm0.plane_en), - enast(new_wm->wm[0].plane_en), enast(new_wm->wm[1].plane_en), - enast(new_wm->wm[2].plane_en), enast(new_wm->wm[3].plane_en), - enast(new_wm->wm[4].plane_en), enast(new_wm->wm[5].plane_en), - enast(new_wm->wm[6].plane_en), enast(new_wm->wm[7].plane_en), - enast(new_wm->trans_wm.plane_en), - enast(new_wm->sagv_wm0.plane_en)); + enast(old_wm->wm[0].enable), enast(old_wm->wm[1].enable), + enast(old_wm->wm[2].enable), enast(old_wm->wm[3].enable), + enast(old_wm->wm[4].enable), enast(old_wm->wm[5].enable), + enast(old_wm->wm[6].enable), enast(old_wm->wm[7].enable), + enast(old_wm->trans_wm.enable), + enast(old_wm->sagv.wm0.enable), + enast(old_wm->sagv.trans_wm.enable), + enast(new_wm->wm[0].enable), enast(new_wm->wm[1].enable), + enast(new_wm->wm[2].enable), enast(new_wm->wm[3].enable), + enast(new_wm->wm[4].enable), enast(new_wm->wm[5].enable), + enast(new_wm->wm[6].enable), enast(new_wm->wm[7].enable), + enast(new_wm->trans_wm.enable), + enast(new_wm->sagv.wm0.enable), + enast(new_wm->sagv.trans_wm.enable)); drm_dbg_kms(&dev_priv->drm, - "[PLANE:%d:%s] lines %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d" - " -> %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d\n", + "[PLANE:%d:%s] lines %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%4d" + " -> %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%4d\n", plane->base.base.id, plane->base.name, - enast(old_wm->wm[0].ignore_lines), old_wm->wm[0].plane_res_l, - enast(old_wm->wm[1].ignore_lines), old_wm->wm[1].plane_res_l, - enast(old_wm->wm[2].ignore_lines), old_wm->wm[2].plane_res_l, - enast(old_wm->wm[3].ignore_lines), old_wm->wm[3].plane_res_l, - enast(old_wm->wm[4].ignore_lines), old_wm->wm[4].plane_res_l, - enast(old_wm->wm[5].ignore_lines), old_wm->wm[5].plane_res_l, - enast(old_wm->wm[6].ignore_lines), old_wm->wm[6].plane_res_l, - enast(old_wm->wm[7].ignore_lines), old_wm->wm[7].plane_res_l, - enast(old_wm->trans_wm.ignore_lines), old_wm->trans_wm.plane_res_l, - enast(old_wm->sagv_wm0.ignore_lines), old_wm->sagv_wm0.plane_res_l, - - enast(new_wm->wm[0].ignore_lines), new_wm->wm[0].plane_res_l, - enast(new_wm->wm[1].ignore_lines), new_wm->wm[1].plane_res_l, - enast(new_wm->wm[2].ignore_lines), new_wm->wm[2].plane_res_l, - enast(new_wm->wm[3].ignore_lines), new_wm->wm[3].plane_res_l, - enast(new_wm->wm[4].ignore_lines), new_wm->wm[4].plane_res_l, - enast(new_wm->wm[5].ignore_lines), new_wm->wm[5].plane_res_l, - enast(new_wm->wm[6].ignore_lines), new_wm->wm[6].plane_res_l, - enast(new_wm->wm[7].ignore_lines), new_wm->wm[7].plane_res_l, - enast(new_wm->trans_wm.ignore_lines), new_wm->trans_wm.plane_res_l, - enast(new_wm->sagv_wm0.ignore_lines), new_wm->sagv_wm0.plane_res_l); + enast(old_wm->wm[0].ignore_lines), old_wm->wm[0].lines, + enast(old_wm->wm[1].ignore_lines), old_wm->wm[1].lines, + enast(old_wm->wm[2].ignore_lines), old_wm->wm[2].lines, + enast(old_wm->wm[3].ignore_lines), old_wm->wm[3].lines, + enast(old_wm->wm[4].ignore_lines), old_wm->wm[4].lines, + enast(old_wm->wm[5].ignore_lines), old_wm->wm[5].lines, + enast(old_wm->wm[6].ignore_lines), old_wm->wm[6].lines, + enast(old_wm->wm[7].ignore_lines), old_wm->wm[7].lines, + enast(old_wm->trans_wm.ignore_lines), old_wm->trans_wm.lines, + enast(old_wm->sagv.wm0.ignore_lines), old_wm->sagv.wm0.lines, + enast(old_wm->sagv.trans_wm.ignore_lines), old_wm->sagv.trans_wm.lines, + enast(new_wm->wm[0].ignore_lines), new_wm->wm[0].lines, + enast(new_wm->wm[1].ignore_lines), new_wm->wm[1].lines, + enast(new_wm->wm[2].ignore_lines), new_wm->wm[2].lines, + enast(new_wm->wm[3].ignore_lines), new_wm->wm[3].lines, + enast(new_wm->wm[4].ignore_lines), new_wm->wm[4].lines, + enast(new_wm->wm[5].ignore_lines), new_wm->wm[5].lines, + enast(new_wm->wm[6].ignore_lines), new_wm->wm[6].lines, + enast(new_wm->wm[7].ignore_lines), new_wm->wm[7].lines, + enast(new_wm->trans_wm.ignore_lines), new_wm->trans_wm.lines, + enast(new_wm->sagv.wm0.ignore_lines), new_wm->sagv.wm0.lines, + enast(new_wm->sagv.trans_wm.ignore_lines), new_wm->sagv.trans_wm.lines); drm_dbg_kms(&dev_priv->drm, - "[PLANE:%d:%s] blocks %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d" - " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n", + "[PLANE:%d:%s] blocks %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%5d" + " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%5d\n", plane->base.base.id, plane->base.name, - old_wm->wm[0].plane_res_b, old_wm->wm[1].plane_res_b, - old_wm->wm[2].plane_res_b, old_wm->wm[3].plane_res_b, - old_wm->wm[4].plane_res_b, old_wm->wm[5].plane_res_b, - old_wm->wm[6].plane_res_b, old_wm->wm[7].plane_res_b, - old_wm->trans_wm.plane_res_b, - old_wm->sagv_wm0.plane_res_b, - new_wm->wm[0].plane_res_b, new_wm->wm[1].plane_res_b, - new_wm->wm[2].plane_res_b, new_wm->wm[3].plane_res_b, - new_wm->wm[4].plane_res_b, new_wm->wm[5].plane_res_b, - new_wm->wm[6].plane_res_b, new_wm->wm[7].plane_res_b, - new_wm->trans_wm.plane_res_b, - new_wm->sagv_wm0.plane_res_b); + old_wm->wm[0].blocks, old_wm->wm[1].blocks, + old_wm->wm[2].blocks, old_wm->wm[3].blocks, + old_wm->wm[4].blocks, old_wm->wm[5].blocks, + old_wm->wm[6].blocks, old_wm->wm[7].blocks, + old_wm->trans_wm.blocks, + old_wm->sagv.wm0.blocks, + old_wm->sagv.trans_wm.blocks, + new_wm->wm[0].blocks, new_wm->wm[1].blocks, + new_wm->wm[2].blocks, new_wm->wm[3].blocks, + new_wm->wm[4].blocks, new_wm->wm[5].blocks, + new_wm->wm[6].blocks, new_wm->wm[7].blocks, + new_wm->trans_wm.blocks, + new_wm->sagv.wm0.blocks, + new_wm->sagv.trans_wm.blocks); drm_dbg_kms(&dev_priv->drm, - "[PLANE:%d:%s] min_ddb %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d" - " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n", + "[PLANE:%d:%s] min_ddb %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%5d" + " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%5d\n", plane->base.base.id, plane->base.name, old_wm->wm[0].min_ddb_alloc, old_wm->wm[1].min_ddb_alloc, old_wm->wm[2].min_ddb_alloc, old_wm->wm[3].min_ddb_alloc, old_wm->wm[4].min_ddb_alloc, old_wm->wm[5].min_ddb_alloc, old_wm->wm[6].min_ddb_alloc, old_wm->wm[7].min_ddb_alloc, old_wm->trans_wm.min_ddb_alloc, - old_wm->sagv_wm0.min_ddb_alloc, + old_wm->sagv.wm0.min_ddb_alloc, + old_wm->sagv.trans_wm.min_ddb_alloc, new_wm->wm[0].min_ddb_alloc, new_wm->wm[1].min_ddb_alloc, new_wm->wm[2].min_ddb_alloc, new_wm->wm[3].min_ddb_alloc, new_wm->wm[4].min_ddb_alloc, new_wm->wm[5].min_ddb_alloc, new_wm->wm[6].min_ddb_alloc, new_wm->wm[7].min_ddb_alloc, new_wm->trans_wm.min_ddb_alloc, - new_wm->sagv_wm0.min_ddb_alloc); + new_wm->sagv.wm0.min_ddb_alloc, + new_wm->sagv.trans_wm.min_ddb_alloc); } } } +static bool skl_plane_selected_wm_equals(struct intel_plane *plane, + const struct skl_pipe_wm *old_pipe_wm, + const struct skl_pipe_wm *new_pipe_wm) +{ + struct drm_i915_private *i915 = to_i915(plane->base.dev); + int level, max_level = ilk_wm_max_level(i915); + + for (level = 0; level <= max_level; level++) { + /* + * We don't check uv_wm as the hardware doesn't actually + * use it. It only gets used for calculating the required + * ddb allocation. + */ + if (!skl_wm_level_equals(skl_plane_wm_level(old_pipe_wm, level, plane->id), + skl_plane_wm_level(new_pipe_wm, level, plane->id))) + return false; + } + + return skl_wm_level_equals(skl_plane_trans_wm(old_pipe_wm, plane->id), + skl_plane_trans_wm(new_pipe_wm, plane->id)); +} + /* * To make sure the cursor watermark registers are always consistent * with our computed state the following scenario needs special @@ -6009,9 +6073,9 @@ static int skl_wm_add_affected_planes(struct intel_atomic_state *state, * with the software state. */ if (!drm_atomic_crtc_needs_modeset(&new_crtc_state->uapi) && - skl_plane_wm_equals(dev_priv, - &old_crtc_state->wm.skl.optimal.planes[plane_id], - &new_crtc_state->wm.skl.optimal.planes[plane_id])) + skl_plane_selected_wm_equals(plane, + &old_crtc_state->wm.skl.optimal, + &new_crtc_state->wm.skl.optimal)) continue; plane_state = intel_atomic_get_plane_state(state, plane); @@ -6142,10 +6206,10 @@ static void ilk_optimize_watermarks(struct intel_atomic_state *state, static void skl_wm_level_from_reg_val(u32 val, struct skl_wm_level *level) { - level->plane_en = val & PLANE_WM_EN; + level->enable = val & PLANE_WM_EN; level->ignore_lines = val & PLANE_WM_IGNORE_LINES; - level->plane_res_b = val & PLANE_WM_BLOCKS_MASK; - level->plane_res_l = (val >> PLANE_WM_LINES_SHIFT) & + level->blocks = val & PLANE_WM_BLOCKS_MASK; + level->lines = (val >> PLANE_WM_LINES_SHIFT) & PLANE_WM_LINES_MASK; } @@ -6172,15 +6236,17 @@ void skl_pipe_wm_get_hw_state(struct intel_crtc *crtc, skl_wm_level_from_reg_val(val, &wm->wm[level]); } - if (INTEL_GEN(dev_priv) >= 12) - wm->sagv_wm0 = wm->wm[0]; - if (plane_id != PLANE_CURSOR) val = intel_uncore_read(&dev_priv->uncore, PLANE_WM_TRANS(pipe, plane_id)); else val = intel_uncore_read(&dev_priv->uncore, CUR_WM_TRANS(pipe)); skl_wm_level_from_reg_val(val, &wm->trans_wm); + + if (INTEL_GEN(dev_priv) >= 12) { + wm->sagv.wm0 = wm->wm[0]; + wm->sagv.trans_wm = wm->trans_wm; + } } } @@ -7243,11 +7309,16 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv) intel_uncore_write(&dev_priv->uncore, CHICKEN_PAR1_1, intel_uncore_read(&dev_priv->uncore, CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD); - /* WaPsrDPRSUnmaskVBlankInSRD:bdw */ for_each_pipe(dev_priv, pipe) { + /* WaPsrDPRSUnmaskVBlankInSRD:bdw */ intel_uncore_write(&dev_priv->uncore, CHICKEN_PIPESL_1(pipe), intel_uncore_read(&dev_priv->uncore, CHICKEN_PIPESL_1(pipe)) | BDW_DPRS_MASK_VBLANK_SRD); + + /* Undocumented but fixes async flip + VT-d corruption */ + if (intel_vtd_active()) + intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PIPESL_1(pipe), + HSW_PRI_STRETCH_MAX_MASK, HSW_PRI_STRETCH_MAX_X1); } /* WaVSRefCountFullforceMissDisable:bdw */ @@ -7283,11 +7354,20 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv) static void hsw_init_clock_gating(struct drm_i915_private *dev_priv) { + enum pipe pipe; + /* WaFbcAsynchFlipDisableFbcQueue:hsw,bdw */ intel_uncore_write(&dev_priv->uncore, CHICKEN_PIPESL_1(PIPE_A), intel_uncore_read(&dev_priv->uncore, CHICKEN_PIPESL_1(PIPE_A)) | HSW_FBCQ_DIS); + for_each_pipe(dev_priv, pipe) { + /* Undocumented but fixes async flip + VT-d corruption */ + if (intel_vtd_active()) + intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PIPESL_1(pipe), + HSW_PRI_STRETCH_MAX_MASK, HSW_PRI_STRETCH_MAX_X1); + } + /* This is required by WaCatErrorRejectionIssue:hsw */ intel_uncore_write(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, intel_uncore_read(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | diff --git a/drivers/gpu/drm/i915/intel_pm.h b/drivers/gpu/drm/i915/intel_pm.h index 97550cf0b6df..669c8d505677 100644 --- a/drivers/gpu/drm/i915/intel_pm.h +++ b/drivers/gpu/drm/i915/intel_pm.h @@ -52,6 +52,11 @@ bool intel_can_enable_sagv(struct drm_i915_private *dev_priv, const struct intel_bw_state *bw_state); void intel_sagv_pre_plane_update(struct intel_atomic_state *state); void intel_sagv_post_plane_update(struct intel_atomic_state *state); +const struct skl_wm_level *skl_plane_wm_level(const struct skl_pipe_wm *pipe_wm, + enum plane_id plane_id, + int level); +const struct skl_wm_level *skl_plane_trans_wm(const struct skl_pipe_wm *pipe_wm, + enum plane_id plane_id); bool skl_wm_level_equals(const struct skl_wm_level *l1, const struct skl_wm_level *l2); bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry *ddb, diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index 412e21604a05..dc394fb7ccfa 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -8,6 +8,7 @@ #include "gem/selftests/igt_gem_utils.h" #include "gem/selftests/mock_context.h" +#include "gem/i915_gem_pm.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 3512bb8433cf..f99bb0113726 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -38,8 +38,8 @@ static void quirk_add(struct drm_i915_gem_object *obj, struct list_head *objects) { /* quirk is only for live tiled objects, use it to declare ownership */ - GEM_BUG_ON(obj->mm.quirked); - obj->mm.quirked = true; + GEM_BUG_ON(i915_gem_object_has_tiling_quirk(obj)); + i915_gem_object_set_tiling_quirk(obj); list_add(&obj->st_link, objects); } @@ -85,7 +85,7 @@ static void unpin_ggtt(struct i915_ggtt *ggtt) struct i915_vma *vma; list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) - if (vma->obj->mm.quirked) + if (i915_gem_object_has_tiling_quirk(vma->obj)) i915_vma_unpin(vma); } @@ -94,8 +94,8 @@ static void cleanup_objects(struct i915_ggtt *ggtt, struct list_head *list) struct drm_i915_gem_object *obj, *on; list_for_each_entry_safe(obj, on, list, st_link) { - GEM_BUG_ON(!obj->mm.quirked); - obj->mm.quirked = false; + GEM_BUG_ON(!i915_gem_object_has_tiling_quirk(obj)); + i915_gem_object_set_tiling_quirk(obj); i915_gem_object_put(obj); } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 70e07e9b78c2..c1adea8765a9 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1881,7 +1881,7 @@ static int igt_cs_tlb(void *arg) vma = i915_vma_instance(out, vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); - goto out_put_batch; + goto out_put_out; } err = i915_vma_pin(vma, 0, 0, diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index 979d96f27c43..3c6021415274 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -15,21 +15,16 @@ static const struct drm_i915_gem_object_ops mock_region_obj_ops = { .release = i915_gem_object_release_memory_region, }; -static struct drm_i915_gem_object * -mock_object_create(struct intel_memory_region *mem, - resource_size_t size, - unsigned int flags) +static int mock_object_init(struct intel_memory_region *mem, + struct drm_i915_gem_object *obj, + resource_size_t size, + unsigned int flags) { static struct lock_class_key lock_class; struct drm_i915_private *i915 = mem->i915; - struct drm_i915_gem_object *obj; if (size > mem->mm.size) - return ERR_PTR(-E2BIG); - - obj = i915_gem_object_alloc(); - if (!obj) - return ERR_PTR(-ENOMEM); + return -E2BIG; drm_gem_private_object_init(&i915->drm, &obj->base, size); i915_gem_object_init(obj, &mock_region_obj_ops, &lock_class); @@ -40,13 +35,13 @@ mock_object_create(struct intel_memory_region *mem, i915_gem_object_init_memory_region(obj, mem, flags); - return obj; + return 0; } static const struct intel_memory_region_ops mock_region_ops = { .init = intel_memory_region_init_buddy, .release = intel_memory_region_release_buddy, - .create_object = mock_object_create, + .init_object = mock_object_init, }; struct intel_memory_region * |