diff options
author | Daniel Vetter <daniel.vetter@ffwll.ch> | 2023-04-06 14:21:00 +0200 |
---|---|---|
committer | Daniel Vetter <daniel.vetter@ffwll.ch> | 2023-04-06 14:21:00 +0200 |
commit | f86286569e92a260fbf8a1975f9421b4a66581d8 (patch) | |
tree | 9f6b99a928d4bd76c0cc0bd840c8251e728ec8cd /drivers/gpu/drm/i915/gt/uc | |
parent | 1138398d71e8e583669fcec96784471332e488d4 (diff) | |
parent | 4b51210f98c2b89ce37aede5b8dc5105be0572c6 (diff) | |
download | linux-next-f86286569e92a260fbf8a1975f9421b4a66581d8.tar.gz |
Merge tag 'drm-intel-gt-next-2023-04-06' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
UAPI Changes:
- (Build-time only, should not have any impact)
drm/i915/uapi: Replace fake flex-array with flexible-array member
"Zero-length arrays as fake flexible arrays are deprecated and we are
moving towards adopting C99 flexible-array members instead."
This is on core kernel request moving towards GCC 13.
Driver Changes:
- Fix context runtime accounting on sysfs fdinfo for heavy workloads (Tvrtko)
- Add support for OA media units on MTL (Umesh)
- Add new workarounds for Meteorlake (Daniele, Radhakrishna, Haridhar)
- Fix sysfs to read actual frequency for MTL and Gen6 and earlier
(Ashutosh)
- Synchronize i915/BIOS on C6 enabling on MTL (Vinay)
- Fix DMAR error noise due to GPU error capture (Andrej)
- Fix forcewake during BAR resize on discrete (Andrzej)
- Flush lmem contents after construction on discrete (Chris)
- Fix GuC loading timeout on systems where IFWI programs low boot
frequency (John)
- Fix race condition UAF in i915_perf_add_config_ioctl (Min)
- Sanitycheck MMIO access early in driver load and during forcewake
(Matt)
- Wakeref fixes for GuC RC error scenario and active VM tracking (Chris)
- Cancel HuC delayed load timer on reset (Daniele)
- Limit double GT reset to pre-MTL (Daniele)
- Use i915 instead of dev_priv insied the file_priv structure (Andi)
- Improve GuC load error reporting (John)
- Simplify VCS/BSD engine selection logic (Tvrtko)
- Perform uc late init after probe error injection (Andrzej)
- Fix format for perf_limit_reasons in debugfs (Vinay)
- Create per-gt debugfs files (Andi)
- Documentation and kerneldoc fixes (Nirmoy, Lee)
- Selftest improvements (Fei, Jonathan)
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/ZC6APj/feB+jBf2d@jlahtine-mobl.ger.corp.intel.com
Diffstat (limited to 'drivers/gpu/drm/i915/gt/uc')
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h | 17 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_guc.h | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 141 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_guc_log.c | 5 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_huc.c | 7 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_huc.h | 7 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c | 2 |
10 files changed, 161 insertions, 32 deletions
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h index 8085fb181274..bcb1129b3610 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h @@ -21,6 +21,9 @@ enum intel_guc_load_status { INTEL_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH = 0x02, INTEL_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH = 0x03, INTEL_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE = 0x04, + INTEL_GUC_LOAD_STATUS_HWCONFIG_START = 0x05, + INTEL_GUC_LOAD_STATUS_HWCONFIG_DONE = 0x06, + INTEL_GUC_LOAD_STATUS_HWCONFIG_ERROR = 0x07, INTEL_GUC_LOAD_STATUS_GDT_DONE = 0x10, INTEL_GUC_LOAD_STATUS_IDT_DONE = 0x20, INTEL_GUC_LOAD_STATUS_LAPIC_DONE = 0x30, @@ -38,4 +41,18 @@ enum intel_guc_load_status { INTEL_GUC_LOAD_STATUS_READY = 0xF0, }; +enum intel_bootrom_load_status { + INTEL_BOOTROM_STATUS_NO_KEY_FOUND = 0x13, + INTEL_BOOTROM_STATUS_AES_PROD_KEY_FOUND = 0x1A, + INTEL_BOOTROM_STATUS_RSA_FAILED = 0x50, + INTEL_BOOTROM_STATUS_PAVPC_FAILED = 0x73, + INTEL_BOOTROM_STATUS_WOPCM_FAILED = 0x74, + INTEL_BOOTROM_STATUS_LOADLOC_FAILED = 0x75, + INTEL_BOOTROM_STATUS_JUMP_PASSED = 0x76, + INTEL_BOOTROM_STATUS_JUMP_FAILED = 0x77, + INTEL_BOOTROM_STATUS_RC6CTXCONFIG_FAILED = 0x79, + INTEL_BOOTROM_STATUS_MPUMAP_INCORRECT = 0x7A, + INTEL_BOOTROM_STATUS_EXCEPTION = 0x7E, +}; + #endif /* _ABI_GUC_ERRORS_ABI_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h index 4b5dbb44afb4..f4c1106bb2a9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h @@ -9,7 +9,9 @@ #include <linux/types.h> struct intel_gsc_uc; +struct intel_uncore; int intel_gsc_uc_fw_upload(struct intel_gsc_uc *gsc); bool intel_gsc_uc_fw_init_done(struct intel_gsc_uc *gsc); + #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index bb4dfe707a7d..e46aac1a41e6 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -42,6 +42,8 @@ struct intel_guc { /** @capture: the error-state-capture module's data and objects */ struct intel_guc_state_capture *capture; + struct dentry *dbgfs_node; + /** @sched_engine: Global engine used to submit requests to GuC */ struct i915_sched_engine *sched_engine; /** diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index 69133420c78b..6fda3aec5c66 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -12,6 +12,7 @@ #include "gt/intel_gt.h" #include "gt/intel_gt_mcr.h" #include "gt/intel_gt_regs.h" +#include "gt/intel_rps.h" #include "intel_guc_fw.h" #include "intel_guc_print.h" #include "i915_drv.h" @@ -88,31 +89,80 @@ static int guc_xfer_rsa(struct intel_uc_fw *guc_fw, /* * Read the GuC status register (GUC_STATUS) and store it in the * specified location; then return a boolean indicating whether - * the value matches either of two values representing completion - * of the GuC boot process. + * the value matches either completion or a known failure code. * * This is used for polling the GuC status in a wait_for() * loop below. */ -static inline bool guc_ready(struct intel_uncore *uncore, u32 *status) +static inline bool guc_load_done(struct intel_uncore *uncore, u32 *status, bool *success) { u32 val = intel_uncore_read(uncore, GUC_STATUS); u32 uk_val = REG_FIELD_GET(GS_UKERNEL_MASK, val); + u32 br_val = REG_FIELD_GET(GS_BOOTROM_MASK, val); *status = val; - return uk_val == INTEL_GUC_LOAD_STATUS_READY; + switch (uk_val) { + case INTEL_GUC_LOAD_STATUS_READY: + *success = true; + return true; + + case INTEL_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH: + case INTEL_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH: + case INTEL_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE: + case INTEL_GUC_LOAD_STATUS_HWCONFIG_ERROR: + case INTEL_GUC_LOAD_STATUS_DPC_ERROR: + case INTEL_GUC_LOAD_STATUS_EXCEPTION: + case INTEL_GUC_LOAD_STATUS_INIT_DATA_INVALID: + case INTEL_GUC_LOAD_STATUS_MPU_DATA_INVALID: + case INTEL_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID: + *success = false; + return true; + } + + switch (br_val) { + case INTEL_BOOTROM_STATUS_NO_KEY_FOUND: + case INTEL_BOOTROM_STATUS_RSA_FAILED: + case INTEL_BOOTROM_STATUS_PAVPC_FAILED: + case INTEL_BOOTROM_STATUS_WOPCM_FAILED: + case INTEL_BOOTROM_STATUS_LOADLOC_FAILED: + case INTEL_BOOTROM_STATUS_JUMP_FAILED: + case INTEL_BOOTROM_STATUS_RC6CTXCONFIG_FAILED: + case INTEL_BOOTROM_STATUS_MPUMAP_INCORRECT: + case INTEL_BOOTROM_STATUS_EXCEPTION: + *success = false; + return true; + } + + return false; } +/* + * Use a longer timeout for debug builds so that problems can be detected + * and analysed. But a shorter timeout for releases so that user's don't + * wait forever to find out there is a problem. Note that the only reason + * an end user should hit the timeout is in case of extreme thermal throttling. + * And a system that is that hot during boot is probably dead anyway! + */ +#if defined(CONFIG_DRM_I915_DEBUG_GEM) +#define GUC_LOAD_RETRY_LIMIT 20 +#else +#define GUC_LOAD_RETRY_LIMIT 3 +#endif + static int guc_wait_ucode(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); struct intel_uncore *uncore = gt->uncore; + ktime_t before, after, delta; + bool success; u32 status; - int ret; + int ret, count; + u64 delta_ms; + u32 before_freq; /* * Wait for the GuC to start up. - * NB: Docs recommend not using the interrupt for completion. + * * Measurements indicate this should take no more than 20ms * (assuming the GT clock is at maximum frequency). So, a * timeout here indicates that the GuC has failed and is unusable. @@ -126,29 +176,80 @@ static int guc_wait_ucode(struct intel_guc *guc) * issues to be resolved. In the meantime bump the timeout to * 200ms. Even at slowest clock, this should be sufficient. And * in the working case, a larger timeout makes no difference. + * + * IFWI updates have also been seen to cause sporadic failures due to + * the requested frequency not being granted and thus the firmware + * load is attempted at minimum frequency. That can lead to load times + * in the seconds range. However, there is a limit on how long an + * individual wait_for() can wait. So wrap it in a loop. */ - ret = wait_for(guc_ready(uncore, &status), 200); - if (ret) { - guc_info(guc, "load failed: status = 0x%08X\n", status); - guc_info(guc, "load failed: status: Reset = %d, " - "BootROM = 0x%02X, UKernel = 0x%02X, " - "MIA = 0x%02X, Auth = 0x%02X\n", - REG_FIELD_GET(GS_MIA_IN_RESET, status), - REG_FIELD_GET(GS_BOOTROM_MASK, status), - REG_FIELD_GET(GS_UKERNEL_MASK, status), - REG_FIELD_GET(GS_MIA_MASK, status), - REG_FIELD_GET(GS_AUTH_STATUS_MASK, status)); - - if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) { + before_freq = intel_rps_read_actual_frequency(&uncore->gt->rps); + before = ktime_get(); + for (count = 0; count < GUC_LOAD_RETRY_LIMIT; count++) { + ret = wait_for(guc_load_done(uncore, &status, &success), 1000); + if (!ret || !success) + break; + + guc_dbg(guc, "load still in progress, count = %d, freq = %dMHz\n", + count, intel_rps_read_actual_frequency(&uncore->gt->rps)); + } + after = ktime_get(); + delta = ktime_sub(after, before); + delta_ms = ktime_to_ms(delta); + if (ret || !success) { + u32 ukernel = REG_FIELD_GET(GS_UKERNEL_MASK, status); + u32 bootrom = REG_FIELD_GET(GS_BOOTROM_MASK, status); + + guc_info(guc, "load failed: status = 0x%08X, time = %lldms, freq = %dMHz, ret = %d\n", + status, delta_ms, intel_rps_read_actual_frequency(&uncore->gt->rps), ret); + guc_info(guc, "load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n", + REG_FIELD_GET(GS_MIA_IN_RESET, status), + bootrom, ukernel, + REG_FIELD_GET(GS_MIA_MASK, status), + REG_FIELD_GET(GS_AUTH_STATUS_MASK, status)); + + switch (bootrom) { + case INTEL_BOOTROM_STATUS_NO_KEY_FOUND: + guc_info(guc, "invalid key requested, header = 0x%08X\n", + intel_uncore_read(uncore, GUC_HEADER_INFO)); + ret = -ENOEXEC; + break; + + case INTEL_BOOTROM_STATUS_RSA_FAILED: guc_info(guc, "firmware signature verification failed\n"); ret = -ENOEXEC; + break; } - if (REG_FIELD_GET(GS_UKERNEL_MASK, status) == INTEL_GUC_LOAD_STATUS_EXCEPTION) { + switch (ukernel) { + case INTEL_GUC_LOAD_STATUS_EXCEPTION: guc_info(guc, "firmware exception. EIP: %#x\n", intel_uncore_read(uncore, SOFT_SCRATCH(13))); ret = -ENXIO; + break; + + case INTEL_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID: + guc_info(guc, "illegal register in save/restore workaround list\n"); + ret = -EPERM; + break; + + case INTEL_GUC_LOAD_STATUS_HWCONFIG_START: + guc_info(guc, "still extracting hwconfig table.\n"); + ret = -ETIMEDOUT; + break; } + + /* Uncommon/unexpected error, see earlier status code print for details */ + if (ret == 0) + ret = -ENXIO; + } else if (delta_ms > 200) { + guc_warn(guc, "excessive init time: %lldms! [freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d]\n", + delta_ms, intel_rps_read_actual_frequency(&uncore->gt->rps), + before_freq, status, count, ret); + } else { + guc_dbg(guc, "init took %lldms, freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d\n", + delta_ms, intel_rps_read_actual_frequency(&uncore->gt->rps), + before_freq, status, count, ret); } return ret; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c index 4781fccc2687..852bea0208ce 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c @@ -102,7 +102,7 @@ static bool has_table(struct drm_i915_private *i915) return false; } -/** +/* * intel_guc_hwconfig_init - Initialize the HWConfig * * Retrieve the HWConfig table from the GuC and save it locally. @@ -136,7 +136,7 @@ static int guc_hwconfig_init(struct intel_gt *gt) return 0; } -/** +/* * intel_gt_init_hwconfig - Initialize the HWConfig if available * * Retrieve the HWConfig table if available on the current platform. @@ -149,7 +149,7 @@ int intel_gt_init_hwconfig(struct intel_gt *gt) return guc_hwconfig_init(gt); } -/** +/* * intel_gt_fini_hwconfig - Finalize the HWConfig * * Free up the memory allocation holding the table. diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index 195db8c9d420..55bc8b55fbc0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -542,8 +542,11 @@ static int guc_log_relay_create(struct intel_guc_log *log) */ n_subbufs = 8; + if (!guc->dbgfs_node) + return -ENOENT; + guc_log_relay_chan = relay_open("guc_log", - i915->drm.primary->debugfs_root, + guc->dbgfs_node, subbuf_size, n_subbufs, &relay_callbacks, i915); if (!guc_log_relay_chan) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h index 9915de32e894..3fd798837502 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h @@ -18,8 +18,6 @@ #define GS_MIA_IN_RESET (0x01 << GS_RESET_SHIFT) #define GS_BOOTROM_SHIFT 1 #define GS_BOOTROM_MASK (0x7F << GS_BOOTROM_SHIFT) -#define GS_BOOTROM_RSA_FAILED (0x50 << GS_BOOTROM_SHIFT) -#define GS_BOOTROM_JUMP_PASSED (0x76 << GS_BOOTROM_SHIFT) #define GS_UKERNEL_SHIFT 8 #define GS_UKERNEL_MASK (0xFF << GS_UKERNEL_SHIFT) #define GS_MIA_SHIFT 16 @@ -32,6 +30,8 @@ #define GS_AUTH_STATUS_BAD (0x01 << GS_AUTH_STATUS_SHIFT) #define GS_AUTH_STATUS_GOOD (0x02 << GS_AUTH_STATUS_SHIFT) +#define GUC_HEADER_INFO _MMIO(0xc014) + #define SOFT_SCRATCH(n) _MMIO(0xc180 + (n) * 4) #define SOFT_SCRATCH_COUNT 16 diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index 72884e21470b..aefdaa62da99 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -241,6 +241,13 @@ static void delayed_huc_load_fini(struct intel_huc *huc) i915_sw_fence_fini(&huc->delayed_load.fence); } +int intel_huc_sanitize(struct intel_huc *huc) +{ + delayed_huc_load_complete(huc); + intel_uc_fw_sanitize(&huc->fw); + return 0; +} + static bool vcs_supported(struct intel_gt *gt) { intel_engine_mask_t mask = gt->info.engine_mask; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h index 52db03620c60..db555b3c1f56 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h @@ -41,6 +41,7 @@ struct intel_huc { } delayed_load; }; +int intel_huc_sanitize(struct intel_huc *huc); void intel_huc_init_early(struct intel_huc *huc); int intel_huc_init(struct intel_huc *huc); void intel_huc_fini(struct intel_huc *huc); @@ -54,12 +55,6 @@ bool intel_huc_is_authenticated(struct intel_huc *huc); void intel_huc_register_gsc_notifier(struct intel_huc *huc, struct bus_type *bus); void intel_huc_unregister_gsc_notifier(struct intel_huc *huc, struct bus_type *bus); -static inline int intel_huc_sanitize(struct intel_huc *huc) -{ - intel_uc_fw_sanitize(&huc->fw); - return 0; -} - static inline bool intel_huc_is_supported(struct intel_huc *huc) { return intel_uc_fw_is_supported(&huc->fw); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c index 284d6fbc2d08..2f93cc4e408a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c @@ -54,6 +54,8 @@ void intel_uc_debugfs_register(struct intel_uc *uc, struct dentry *gt_root) if (IS_ERR(root)) return; + uc->guc.dbgfs_node = root; + intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), uc); intel_guc_debugfs_register(&uc->guc, root); |