diff options
author | Dave Airlie <airlied@redhat.com> | 2019-04-03 11:36:52 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2019-04-03 13:26:11 +1000 |
commit | 457109829f4ee4107e8c7108237afba21fabbb5e (patch) | |
tree | b2494fb72af2f0d4572362dcf30fb532a846ef7d /include | |
parent | b4e4538a0ab5079ae5dc401970e11f0ff2ba13a7 (diff) | |
parent | 7a65bdc6903d4cae56855eceabfd8f879a3b8f6e (diff) | |
download | linux-next-457109829f4ee4107e8c7108237afba21fabbb5e.tar.gz |
Merge branch 'drm-next-5.2' of git://people.freedesktop.org/~agd5f/linux into drm-next
amdgpu:
- Switch to HMM for userptr (reverted until HMM fixes land)
- New experimental SMU 11 replacement for powerplay for vega20 (not enabled by default)
- Initial RAS support for vega20
- BACO support for vega12
- BACO fixes for vega20
- Rework IH handling for page fault and retry interrupts
- Cleanly split CPU and GPU paths for GPUVM updates
- Powerplay fixes
- XGMI fixes
- Rework how DC interacts with atomic for planes
- Clean up and simplify DC/Powerplay interfaces
- Misc cleanups and bug fixes
amdkfd:
- Switch to HMM for userptr (reverted until HMM fixes land)
- Add initial RAS support
- MQD fixes
ttm:
- Unify DRM_FILE_PAGE_OFFSET handling
- Account for kernel allocations in kernel zone only
- Misc cleanups
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexdeucher@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190402170820.22197-1-alexander.deucher@amd.com
Diffstat (limited to 'include')
-rw-r--r-- | include/drm/drm_vma_manager.h | 12 | ||||
-rw-r--r-- | include/drm/ttm/ttm_bo_driver.h | 2 | ||||
-rw-r--r-- | include/uapi/drm/amdgpu_drm.h | 35 | ||||
-rw-r--r-- | include/uapi/linux/kfd_ioctl.h | 12 |
4 files changed, 59 insertions, 2 deletions
diff --git a/include/drm/drm_vma_manager.h b/include/drm/drm_vma_manager.h index c7987daeaed0..f4f8ff1cdeec 100644 --- a/include/drm/drm_vma_manager.h +++ b/include/drm/drm_vma_manager.h @@ -29,6 +29,18 @@ #include <linux/spinlock.h> #include <linux/types.h> +/* We make up offsets for buffer objects so we can recognize them at + * mmap time. pgoff in mmap is an unsigned long, so we need to make sure + * that the faked up offset will fit + */ +#if BITS_PER_LONG == 64 +#define DRM_FILE_PAGE_OFFSET_START ((0xFFFFFFFFUL >> PAGE_SHIFT) + 1) +#define DRM_FILE_PAGE_OFFSET_SIZE ((0xFFFFFFFFUL >> PAGE_SHIFT) * 16) +#else +#define DRM_FILE_PAGE_OFFSET_START ((0xFFFFFFFUL >> PAGE_SHIFT) + 1) +#define DRM_FILE_PAGE_OFFSET_SIZE ((0xFFFFFFFUL >> PAGE_SHIFT) * 16) +#endif + struct drm_file; struct drm_vma_offset_file { diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index cbf3180cb612..61b80ec78e80 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -597,7 +597,7 @@ int ttm_bo_device_release(struct ttm_bo_device *bdev); int ttm_bo_device_init(struct ttm_bo_device *bdev, struct ttm_bo_driver *driver, struct address_space *mapping, - uint64_t file_page_offset, bool need_dma32); + bool need_dma32); /** * ttm_bo_unmap_virtual diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 4a53f6cfa034..e3a97da4add9 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -210,6 +210,9 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST (1<<1) /* indicate some job from this context once cause gpu hang */ #define AMDGPU_CTX_QUERY2_FLAGS_GUILTY (1<<2) +/* indicate some errors are detected by RAS */ +#define AMDGPU_CTX_QUERY2_FLAGS_RAS_CE (1<<3) +#define AMDGPU_CTX_QUERY2_FLAGS_RAS_UE (1<<4) /* Context priority level */ #define AMDGPU_CTX_PRIORITY_UNSET -2048 @@ -680,6 +683,7 @@ struct drm_amdgpu_cs_chunk_data { #define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM 0x11 /* Subquery id: Query DMCU firmware version */ #define AMDGPU_INFO_FW_DMCU 0x12 + #define AMDGPU_INFO_FW_TA 0x13 /* number of bytes moved for TTM migration */ #define AMDGPU_INFO_NUM_BYTES_MOVED 0x0f /* the used VRAM size */ @@ -733,6 +737,37 @@ struct drm_amdgpu_cs_chunk_data { /* Number of VRAM page faults on CPU access. */ #define AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS 0x1E #define AMDGPU_INFO_VRAM_LOST_COUNTER 0x1F +/* query ras mask of enabled features*/ +#define AMDGPU_INFO_RAS_ENABLED_FEATURES 0x20 + +/* RAS MASK: UMC (VRAM) */ +#define AMDGPU_INFO_RAS_ENABLED_UMC (1 << 0) +/* RAS MASK: SDMA */ +#define AMDGPU_INFO_RAS_ENABLED_SDMA (1 << 1) +/* RAS MASK: GFX */ +#define AMDGPU_INFO_RAS_ENABLED_GFX (1 << 2) +/* RAS MASK: MMHUB */ +#define AMDGPU_INFO_RAS_ENABLED_MMHUB (1 << 3) +/* RAS MASK: ATHUB */ +#define AMDGPU_INFO_RAS_ENABLED_ATHUB (1 << 4) +/* RAS MASK: PCIE */ +#define AMDGPU_INFO_RAS_ENABLED_PCIE (1 << 5) +/* RAS MASK: HDP */ +#define AMDGPU_INFO_RAS_ENABLED_HDP (1 << 6) +/* RAS MASK: XGMI */ +#define AMDGPU_INFO_RAS_ENABLED_XGMI (1 << 7) +/* RAS MASK: DF */ +#define AMDGPU_INFO_RAS_ENABLED_DF (1 << 8) +/* RAS MASK: SMN */ +#define AMDGPU_INFO_RAS_ENABLED_SMN (1 << 9) +/* RAS MASK: SEM */ +#define AMDGPU_INFO_RAS_ENABLED_SEM (1 << 10) +/* RAS MASK: MP0 */ +#define AMDGPU_INFO_RAS_ENABLED_MP0 (1 << 11) +/* RAS MASK: MP1 */ +#define AMDGPU_INFO_RAS_ENABLED_MP1 (1 << 12) +/* RAS MASK: FUSE */ +#define AMDGPU_INFO_RAS_ENABLED_FUSE (1 << 13) #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0 #define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index e622fd1fbd46..dc067ed0b72d 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -211,6 +211,11 @@ struct kfd_ioctl_dbg_wave_control_args { #define KFD_HW_EXCEPTION_GPU_HANG 0 #define KFD_HW_EXCEPTION_ECC 1 +/* For kfd_hsa_memory_exception_data.ErrorType */ +#define KFD_MEM_ERR_NO_RAS 0 +#define KFD_MEM_ERR_SRAM_ECC 1 +#define KFD_MEM_ERR_POISON_CONSUMED 2 +#define KFD_MEM_ERR_GPU_HANG 3 struct kfd_ioctl_create_event_args { __u64 event_page_offset; /* from KFD */ @@ -250,7 +255,12 @@ struct kfd_hsa_memory_exception_data { struct kfd_memory_exception_failure failure; __u64 va; __u32 gpu_id; - __u32 pad; + __u32 ErrorType; /* 0 = no RAS error, + * 1 = ECC_SRAM, + * 2 = Link_SYNFLOOD (poison), + * 3 = GPU hang (not attributable to a specific cause), + * other values reserved + */ }; /* hw exception data */ |