summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt/uc
diff options
context:
space:
mode:
authorDaniel Vetter <daniel.vetter@ffwll.ch>2023-04-06 14:21:00 +0200
committerDaniel Vetter <daniel.vetter@ffwll.ch>2023-04-06 14:21:00 +0200
commitf86286569e92a260fbf8a1975f9421b4a66581d8 (patch)
tree9f6b99a928d4bd76c0cc0bd840c8251e728ec8cd /drivers/gpu/drm/i915/gt/uc
parent1138398d71e8e583669fcec96784471332e488d4 (diff)
parent4b51210f98c2b89ce37aede5b8dc5105be0572c6 (diff)
downloadlinux-next-f86286569e92a260fbf8a1975f9421b4a66581d8.tar.gz
Merge tag 'drm-intel-gt-next-2023-04-06' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
UAPI Changes: - (Build-time only, should not have any impact) drm/i915/uapi: Replace fake flex-array with flexible-array member "Zero-length arrays as fake flexible arrays are deprecated and we are moving towards adopting C99 flexible-array members instead." This is on core kernel request moving towards GCC 13. Driver Changes: - Fix context runtime accounting on sysfs fdinfo for heavy workloads (Tvrtko) - Add support for OA media units on MTL (Umesh) - Add new workarounds for Meteorlake (Daniele, Radhakrishna, Haridhar) - Fix sysfs to read actual frequency for MTL and Gen6 and earlier (Ashutosh) - Synchronize i915/BIOS on C6 enabling on MTL (Vinay) - Fix DMAR error noise due to GPU error capture (Andrej) - Fix forcewake during BAR resize on discrete (Andrzej) - Flush lmem contents after construction on discrete (Chris) - Fix GuC loading timeout on systems where IFWI programs low boot frequency (John) - Fix race condition UAF in i915_perf_add_config_ioctl (Min) - Sanitycheck MMIO access early in driver load and during forcewake (Matt) - Wakeref fixes for GuC RC error scenario and active VM tracking (Chris) - Cancel HuC delayed load timer on reset (Daniele) - Limit double GT reset to pre-MTL (Daniele) - Use i915 instead of dev_priv insied the file_priv structure (Andi) - Improve GuC load error reporting (John) - Simplify VCS/BSD engine selection logic (Tvrtko) - Perform uc late init after probe error injection (Andrzej) - Fix format for perf_limit_reasons in debugfs (Vinay) - Create per-gt debugfs files (Andi) - Documentation and kerneldoc fixes (Nirmoy, Lee) - Selftest improvements (Fei, Jonathan) Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/ZC6APj/feB+jBf2d@jlahtine-mobl.ger.corp.intel.com
Diffstat (limited to 'drivers/gpu/drm/i915/gt/uc')
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h17
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.h2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c141
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c6
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.c5
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h4
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.c7
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.h7
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c2
10 files changed, 161 insertions, 32 deletions
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
index 8085fb181274..bcb1129b3610 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
@@ -21,6 +21,9 @@ enum intel_guc_load_status {
INTEL_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH = 0x02,
INTEL_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH = 0x03,
INTEL_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE = 0x04,
+ INTEL_GUC_LOAD_STATUS_HWCONFIG_START = 0x05,
+ INTEL_GUC_LOAD_STATUS_HWCONFIG_DONE = 0x06,
+ INTEL_GUC_LOAD_STATUS_HWCONFIG_ERROR = 0x07,
INTEL_GUC_LOAD_STATUS_GDT_DONE = 0x10,
INTEL_GUC_LOAD_STATUS_IDT_DONE = 0x20,
INTEL_GUC_LOAD_STATUS_LAPIC_DONE = 0x30,
@@ -38,4 +41,18 @@ enum intel_guc_load_status {
INTEL_GUC_LOAD_STATUS_READY = 0xF0,
};
+enum intel_bootrom_load_status {
+ INTEL_BOOTROM_STATUS_NO_KEY_FOUND = 0x13,
+ INTEL_BOOTROM_STATUS_AES_PROD_KEY_FOUND = 0x1A,
+ INTEL_BOOTROM_STATUS_RSA_FAILED = 0x50,
+ INTEL_BOOTROM_STATUS_PAVPC_FAILED = 0x73,
+ INTEL_BOOTROM_STATUS_WOPCM_FAILED = 0x74,
+ INTEL_BOOTROM_STATUS_LOADLOC_FAILED = 0x75,
+ INTEL_BOOTROM_STATUS_JUMP_PASSED = 0x76,
+ INTEL_BOOTROM_STATUS_JUMP_FAILED = 0x77,
+ INTEL_BOOTROM_STATUS_RC6CTXCONFIG_FAILED = 0x79,
+ INTEL_BOOTROM_STATUS_MPUMAP_INCORRECT = 0x7A,
+ INTEL_BOOTROM_STATUS_EXCEPTION = 0x7E,
+};
+
#endif /* _ABI_GUC_ERRORS_ABI_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h
index 4b5dbb44afb4..f4c1106bb2a9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h
@@ -9,7 +9,9 @@
#include <linux/types.h>
struct intel_gsc_uc;
+struct intel_uncore;
int intel_gsc_uc_fw_upload(struct intel_gsc_uc *gsc);
bool intel_gsc_uc_fw_init_done(struct intel_gsc_uc *gsc);
+
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index bb4dfe707a7d..e46aac1a41e6 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -42,6 +42,8 @@ struct intel_guc {
/** @capture: the error-state-capture module's data and objects */
struct intel_guc_state_capture *capture;
+ struct dentry *dbgfs_node;
+
/** @sched_engine: Global engine used to submit requests to GuC */
struct i915_sched_engine *sched_engine;
/**
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
index 69133420c78b..6fda3aec5c66 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
@@ -12,6 +12,7 @@
#include "gt/intel_gt.h"
#include "gt/intel_gt_mcr.h"
#include "gt/intel_gt_regs.h"
+#include "gt/intel_rps.h"
#include "intel_guc_fw.h"
#include "intel_guc_print.h"
#include "i915_drv.h"
@@ -88,31 +89,80 @@ static int guc_xfer_rsa(struct intel_uc_fw *guc_fw,
/*
* Read the GuC status register (GUC_STATUS) and store it in the
* specified location; then return a boolean indicating whether
- * the value matches either of two values representing completion
- * of the GuC boot process.
+ * the value matches either completion or a known failure code.
*
* This is used for polling the GuC status in a wait_for()
* loop below.
*/
-static inline bool guc_ready(struct intel_uncore *uncore, u32 *status)
+static inline bool guc_load_done(struct intel_uncore *uncore, u32 *status, bool *success)
{
u32 val = intel_uncore_read(uncore, GUC_STATUS);
u32 uk_val = REG_FIELD_GET(GS_UKERNEL_MASK, val);
+ u32 br_val = REG_FIELD_GET(GS_BOOTROM_MASK, val);
*status = val;
- return uk_val == INTEL_GUC_LOAD_STATUS_READY;
+ switch (uk_val) {
+ case INTEL_GUC_LOAD_STATUS_READY:
+ *success = true;
+ return true;
+
+ case INTEL_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH:
+ case INTEL_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH:
+ case INTEL_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE:
+ case INTEL_GUC_LOAD_STATUS_HWCONFIG_ERROR:
+ case INTEL_GUC_LOAD_STATUS_DPC_ERROR:
+ case INTEL_GUC_LOAD_STATUS_EXCEPTION:
+ case INTEL_GUC_LOAD_STATUS_INIT_DATA_INVALID:
+ case INTEL_GUC_LOAD_STATUS_MPU_DATA_INVALID:
+ case INTEL_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID:
+ *success = false;
+ return true;
+ }
+
+ switch (br_val) {
+ case INTEL_BOOTROM_STATUS_NO_KEY_FOUND:
+ case INTEL_BOOTROM_STATUS_RSA_FAILED:
+ case INTEL_BOOTROM_STATUS_PAVPC_FAILED:
+ case INTEL_BOOTROM_STATUS_WOPCM_FAILED:
+ case INTEL_BOOTROM_STATUS_LOADLOC_FAILED:
+ case INTEL_BOOTROM_STATUS_JUMP_FAILED:
+ case INTEL_BOOTROM_STATUS_RC6CTXCONFIG_FAILED:
+ case INTEL_BOOTROM_STATUS_MPUMAP_INCORRECT:
+ case INTEL_BOOTROM_STATUS_EXCEPTION:
+ *success = false;
+ return true;
+ }
+
+ return false;
}
+/*
+ * Use a longer timeout for debug builds so that problems can be detected
+ * and analysed. But a shorter timeout for releases so that user's don't
+ * wait forever to find out there is a problem. Note that the only reason
+ * an end user should hit the timeout is in case of extreme thermal throttling.
+ * And a system that is that hot during boot is probably dead anyway!
+ */
+#if defined(CONFIG_DRM_I915_DEBUG_GEM)
+#define GUC_LOAD_RETRY_LIMIT 20
+#else
+#define GUC_LOAD_RETRY_LIMIT 3
+#endif
+
static int guc_wait_ucode(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
struct intel_uncore *uncore = gt->uncore;
+ ktime_t before, after, delta;
+ bool success;
u32 status;
- int ret;
+ int ret, count;
+ u64 delta_ms;
+ u32 before_freq;
/*
* Wait for the GuC to start up.
- * NB: Docs recommend not using the interrupt for completion.
+ *
* Measurements indicate this should take no more than 20ms
* (assuming the GT clock is at maximum frequency). So, a
* timeout here indicates that the GuC has failed and is unusable.
@@ -126,29 +176,80 @@ static int guc_wait_ucode(struct intel_guc *guc)
* issues to be resolved. In the meantime bump the timeout to
* 200ms. Even at slowest clock, this should be sufficient. And
* in the working case, a larger timeout makes no difference.
+ *
+ * IFWI updates have also been seen to cause sporadic failures due to
+ * the requested frequency not being granted and thus the firmware
+ * load is attempted at minimum frequency. That can lead to load times
+ * in the seconds range. However, there is a limit on how long an
+ * individual wait_for() can wait. So wrap it in a loop.
*/
- ret = wait_for(guc_ready(uncore, &status), 200);
- if (ret) {
- guc_info(guc, "load failed: status = 0x%08X\n", status);
- guc_info(guc, "load failed: status: Reset = %d, "
- "BootROM = 0x%02X, UKernel = 0x%02X, "
- "MIA = 0x%02X, Auth = 0x%02X\n",
- REG_FIELD_GET(GS_MIA_IN_RESET, status),
- REG_FIELD_GET(GS_BOOTROM_MASK, status),
- REG_FIELD_GET(GS_UKERNEL_MASK, status),
- REG_FIELD_GET(GS_MIA_MASK, status),
- REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
-
- if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) {
+ before_freq = intel_rps_read_actual_frequency(&uncore->gt->rps);
+ before = ktime_get();
+ for (count = 0; count < GUC_LOAD_RETRY_LIMIT; count++) {
+ ret = wait_for(guc_load_done(uncore, &status, &success), 1000);
+ if (!ret || !success)
+ break;
+
+ guc_dbg(guc, "load still in progress, count = %d, freq = %dMHz\n",
+ count, intel_rps_read_actual_frequency(&uncore->gt->rps));
+ }
+ after = ktime_get();
+ delta = ktime_sub(after, before);
+ delta_ms = ktime_to_ms(delta);
+ if (ret || !success) {
+ u32 ukernel = REG_FIELD_GET(GS_UKERNEL_MASK, status);
+ u32 bootrom = REG_FIELD_GET(GS_BOOTROM_MASK, status);
+
+ guc_info(guc, "load failed: status = 0x%08X, time = %lldms, freq = %dMHz, ret = %d\n",
+ status, delta_ms, intel_rps_read_actual_frequency(&uncore->gt->rps), ret);
+ guc_info(guc, "load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n",
+ REG_FIELD_GET(GS_MIA_IN_RESET, status),
+ bootrom, ukernel,
+ REG_FIELD_GET(GS_MIA_MASK, status),
+ REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
+
+ switch (bootrom) {
+ case INTEL_BOOTROM_STATUS_NO_KEY_FOUND:
+ guc_info(guc, "invalid key requested, header = 0x%08X\n",
+ intel_uncore_read(uncore, GUC_HEADER_INFO));
+ ret = -ENOEXEC;
+ break;
+
+ case INTEL_BOOTROM_STATUS_RSA_FAILED:
guc_info(guc, "firmware signature verification failed\n");
ret = -ENOEXEC;
+ break;
}
- if (REG_FIELD_GET(GS_UKERNEL_MASK, status) == INTEL_GUC_LOAD_STATUS_EXCEPTION) {
+ switch (ukernel) {
+ case INTEL_GUC_LOAD_STATUS_EXCEPTION:
guc_info(guc, "firmware exception. EIP: %#x\n",
intel_uncore_read(uncore, SOFT_SCRATCH(13)));
ret = -ENXIO;
+ break;
+
+ case INTEL_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID:
+ guc_info(guc, "illegal register in save/restore workaround list\n");
+ ret = -EPERM;
+ break;
+
+ case INTEL_GUC_LOAD_STATUS_HWCONFIG_START:
+ guc_info(guc, "still extracting hwconfig table.\n");
+ ret = -ETIMEDOUT;
+ break;
}
+
+ /* Uncommon/unexpected error, see earlier status code print for details */
+ if (ret == 0)
+ ret = -ENXIO;
+ } else if (delta_ms > 200) {
+ guc_warn(guc, "excessive init time: %lldms! [freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d]\n",
+ delta_ms, intel_rps_read_actual_frequency(&uncore->gt->rps),
+ before_freq, status, count, ret);
+ } else {
+ guc_dbg(guc, "init took %lldms, freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d\n",
+ delta_ms, intel_rps_read_actual_frequency(&uncore->gt->rps),
+ before_freq, status, count, ret);
}
return ret;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
index 4781fccc2687..852bea0208ce 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
@@ -102,7 +102,7 @@ static bool has_table(struct drm_i915_private *i915)
return false;
}
-/**
+/*
* intel_guc_hwconfig_init - Initialize the HWConfig
*
* Retrieve the HWConfig table from the GuC and save it locally.
@@ -136,7 +136,7 @@ static int guc_hwconfig_init(struct intel_gt *gt)
return 0;
}
-/**
+/*
* intel_gt_init_hwconfig - Initialize the HWConfig if available
*
* Retrieve the HWConfig table if available on the current platform.
@@ -149,7 +149,7 @@ int intel_gt_init_hwconfig(struct intel_gt *gt)
return guc_hwconfig_init(gt);
}
-/**
+/*
* intel_gt_fini_hwconfig - Finalize the HWConfig
*
* Free up the memory allocation holding the table.
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
index 195db8c9d420..55bc8b55fbc0 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
@@ -542,8 +542,11 @@ static int guc_log_relay_create(struct intel_guc_log *log)
*/
n_subbufs = 8;
+ if (!guc->dbgfs_node)
+ return -ENOENT;
+
guc_log_relay_chan = relay_open("guc_log",
- i915->drm.primary->debugfs_root,
+ guc->dbgfs_node,
subbuf_size, n_subbufs,
&relay_callbacks, i915);
if (!guc_log_relay_chan) {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
index 9915de32e894..3fd798837502 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
@@ -18,8 +18,6 @@
#define GS_MIA_IN_RESET (0x01 << GS_RESET_SHIFT)
#define GS_BOOTROM_SHIFT 1
#define GS_BOOTROM_MASK (0x7F << GS_BOOTROM_SHIFT)
-#define GS_BOOTROM_RSA_FAILED (0x50 << GS_BOOTROM_SHIFT)
-#define GS_BOOTROM_JUMP_PASSED (0x76 << GS_BOOTROM_SHIFT)
#define GS_UKERNEL_SHIFT 8
#define GS_UKERNEL_MASK (0xFF << GS_UKERNEL_SHIFT)
#define GS_MIA_SHIFT 16
@@ -32,6 +30,8 @@
#define GS_AUTH_STATUS_BAD (0x01 << GS_AUTH_STATUS_SHIFT)
#define GS_AUTH_STATUS_GOOD (0x02 << GS_AUTH_STATUS_SHIFT)
+#define GUC_HEADER_INFO _MMIO(0xc014)
+
#define SOFT_SCRATCH(n) _MMIO(0xc180 + (n) * 4)
#define SOFT_SCRATCH_COUNT 16
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index 72884e21470b..aefdaa62da99 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -241,6 +241,13 @@ static void delayed_huc_load_fini(struct intel_huc *huc)
i915_sw_fence_fini(&huc->delayed_load.fence);
}
+int intel_huc_sanitize(struct intel_huc *huc)
+{
+ delayed_huc_load_complete(huc);
+ intel_uc_fw_sanitize(&huc->fw);
+ return 0;
+}
+
static bool vcs_supported(struct intel_gt *gt)
{
intel_engine_mask_t mask = gt->info.engine_mask;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
index 52db03620c60..db555b3c1f56 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
@@ -41,6 +41,7 @@ struct intel_huc {
} delayed_load;
};
+int intel_huc_sanitize(struct intel_huc *huc);
void intel_huc_init_early(struct intel_huc *huc);
int intel_huc_init(struct intel_huc *huc);
void intel_huc_fini(struct intel_huc *huc);
@@ -54,12 +55,6 @@ bool intel_huc_is_authenticated(struct intel_huc *huc);
void intel_huc_register_gsc_notifier(struct intel_huc *huc, struct bus_type *bus);
void intel_huc_unregister_gsc_notifier(struct intel_huc *huc, struct bus_type *bus);
-static inline int intel_huc_sanitize(struct intel_huc *huc)
-{
- intel_uc_fw_sanitize(&huc->fw);
- return 0;
-}
-
static inline bool intel_huc_is_supported(struct intel_huc *huc)
{
return intel_uc_fw_is_supported(&huc->fw);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c
index 284d6fbc2d08..2f93cc4e408a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c
@@ -54,6 +54,8 @@ void intel_uc_debugfs_register(struct intel_uc *uc, struct dentry *gt_root)
if (IS_ERR(root))
return;
+ uc->guc.dbgfs_node = root;
+
intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), uc);
intel_guc_debugfs_register(&uc->guc, root);