summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJordan Justen <jordan.l.justen@intel.com>2022-09-28 16:46:20 -0700
committerJordan Justen <jordan.l.justen@intel.com>2022-09-28 17:04:12 -0700
commite0df5fce890c8f70ccd72a3e7260aa8f5dd842d8 (patch)
tree54d24b2ad5e732b6dfb33d0840b12361b3e6d4d1
parente2504b921f12738b9b02acb8aa7ea95596bd9545 (diff)
downloaddrm-e0df5fce890c8f70ccd72a3e7260aa8f5dd842d8.tar.gz
include/drm/i915_drm.h: Update from Linux v6.0-rc7
Generated from the Linux v6.0-rc7 tag with a sha1 of f76349cf41451c5c42a99f18a9163377e4b364ff. Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
-rw-r--r--include/drm/i915_drm.h2069
1 files changed, 1939 insertions, 130 deletions
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 72afd94e..1de0433f 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -55,15 +55,15 @@ extern "C" {
* cause the related events to not be seen.
*
* I915_RESET_UEVENT - Event is generated just before an attempt to reset the
- * the GPU. The value supplied with the event is always 1. NOTE: Disable
+ * GPU. The value supplied with the event is always 1. NOTE: Disable
* reset via module parameter will cause this event to not be seen.
*/
#define I915_L3_PARITY_UEVENT "L3_PARITY_ERROR"
#define I915_ERROR_UEVENT "ERROR"
#define I915_RESET_UEVENT "RESET"
-/*
- * i915_user_extension: Base class for defining a chain of extensions
+/**
+ * struct i915_user_extension - Base class for defining a chain of extensions
*
* Many interfaces need to grow over time. In most cases we can simply
* extend the struct and have userspace pass in more data. Another option,
@@ -76,12 +76,58 @@ extern "C" {
* increasing complexity, and for large parts of that interface to be
* entirely optional. The downside is more pointer chasing; chasing across
* the boundary with pointers encapsulated inside u64.
+ *
+ * Example chaining:
+ *
+ * .. code-block:: C
+ *
+ * struct i915_user_extension ext3 {
+ * .next_extension = 0, // end
+ * .name = ...,
+ * };
+ * struct i915_user_extension ext2 {
+ * .next_extension = (uintptr_t)&ext3,
+ * .name = ...,
+ * };
+ * struct i915_user_extension ext1 {
+ * .next_extension = (uintptr_t)&ext2,
+ * .name = ...,
+ * };
+ *
+ * Typically the struct i915_user_extension would be embedded in some uAPI
+ * struct, and in this case we would feed it the head of the chain(i.e ext1),
+ * which would then apply all of the above extensions.
+ *
*/
struct i915_user_extension {
+ /**
+ * @next_extension:
+ *
+ * Pointer to the next struct i915_user_extension, or zero if the end.
+ */
__u64 next_extension;
+ /**
+ * @name: Name of the extension.
+ *
+ * Note that the name here is just some integer.
+ *
+ * Also note that the name space for this is not global for the whole
+ * driver, but rather its scope/meaning is limited to the specific piece
+ * of uAPI which has embedded the struct i915_user_extension.
+ */
__u32 name;
- __u32 flags; /* All undefined bits must be zero. */
- __u32 rsvd[4]; /* Reserved for future use; must be zero. */
+ /**
+ * @flags: MBZ
+ *
+ * All undefined bits must be zero.
+ */
+ __u32 flags;
+ /**
+ * @rsvd: MBZ
+ *
+ * Reserved for future use; must be zero.
+ */
+ __u32 rsvd[4];
};
/*
@@ -108,25 +154,102 @@ enum i915_mocs_table_index {
I915_MOCS_CACHED,
};
-/*
+/**
+ * enum drm_i915_gem_engine_class - uapi engine type enumeration
+ *
* Different engines serve different roles, and there may be more than one
- * engine serving each role. enum drm_i915_gem_engine_class provides a
- * classification of the role of the engine, which may be used when requesting
- * operations to be performed on a certain subset of engines, or for providing
- * information about that group.
+ * engine serving each role. This enum provides a classification of the role
+ * of the engine, which may be used when requesting operations to be performed
+ * on a certain subset of engines, or for providing information about that
+ * group.
*/
enum drm_i915_gem_engine_class {
+ /**
+ * @I915_ENGINE_CLASS_RENDER:
+ *
+ * Render engines support instructions used for 3D, Compute (GPGPU),
+ * and programmable media workloads. These instructions fetch data and
+ * dispatch individual work items to threads that operate in parallel.
+ * The threads run small programs (called "kernels" or "shaders") on
+ * the GPU's execution units (EUs).
+ */
I915_ENGINE_CLASS_RENDER = 0,
+
+ /**
+ * @I915_ENGINE_CLASS_COPY:
+ *
+ * Copy engines (also referred to as "blitters") support instructions
+ * that move blocks of data from one location in memory to another,
+ * or that fill a specified location of memory with fixed data.
+ * Copy engines can perform pre-defined logical or bitwise operations
+ * on the source, destination, or pattern data.
+ */
I915_ENGINE_CLASS_COPY = 1,
+
+ /**
+ * @I915_ENGINE_CLASS_VIDEO:
+ *
+ * Video engines (also referred to as "bit stream decode" (BSD) or
+ * "vdbox") support instructions that perform fixed-function media
+ * decode and encode.
+ */
I915_ENGINE_CLASS_VIDEO = 2,
+
+ /**
+ * @I915_ENGINE_CLASS_VIDEO_ENHANCE:
+ *
+ * Video enhancement engines (also referred to as "vebox") support
+ * instructions related to image enhancement.
+ */
I915_ENGINE_CLASS_VIDEO_ENHANCE = 3,
- /* should be kept compact */
+ /**
+ * @I915_ENGINE_CLASS_COMPUTE:
+ *
+ * Compute engines support a subset of the instructions available
+ * on render engines: compute engines support Compute (GPGPU) and
+ * programmable media workloads, but do not support the 3D pipeline.
+ */
+ I915_ENGINE_CLASS_COMPUTE = 4,
+
+ /* Values in this enum should be kept compact. */
+ /**
+ * @I915_ENGINE_CLASS_INVALID:
+ *
+ * Placeholder value to represent an invalid engine class assignment.
+ */
I915_ENGINE_CLASS_INVALID = -1
};
/**
+ * struct i915_engine_class_instance - Engine class/instance identifier
+ *
+ * There may be more than one engine fulfilling any role within the system.
+ * Each engine of a class is given a unique instance number and therefore
+ * any engine can be specified by its class:instance tuplet. APIs that allow
+ * access to any engine in the system will use struct i915_engine_class_instance
+ * for this identification.
+ */
+struct i915_engine_class_instance {
+ /**
+ * @engine_class:
+ *
+ * Engine class from enum drm_i915_gem_engine_class
+ */
+ __u16 engine_class;
+#define I915_ENGINE_CLASS_INVALID_NONE -1
+#define I915_ENGINE_CLASS_INVALID_VIRTUAL -2
+
+ /**
+ * @engine_instance:
+ *
+ * Engine instance.
+ */
+ __u16 engine_instance;
+};
+
+/**
* DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915
*
*/
@@ -163,8 +286,9 @@ enum drm_i915_pmu_engine_sample {
#define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1)
#define I915_PMU_INTERRUPTS __I915_PMU_OTHER(2)
#define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(3)
+#define I915_PMU_SOFTWARE_GT_AWAKE_TIME __I915_PMU_OTHER(4)
-#define I915_PMU_LAST I915_PMU_RC6_RESIDENCY
+#define I915_PMU_LAST /* Deprecated - do not use */ I915_PMU_RC6_RESIDENCY
/* Each region is a minimum of 16k, and there are at most 255 of them.
*/
@@ -343,6 +467,9 @@ typedef struct _drm_i915_sarea {
#define DRM_I915_PERF_ADD_CONFIG 0x37
#define DRM_I915_PERF_REMOVE_CONFIG 0x38
#define DRM_I915_QUERY 0x39
+#define DRM_I915_GEM_VM_CREATE 0x3a
+#define DRM_I915_GEM_VM_DESTROY 0x3b
+#define DRM_I915_GEM_CREATE_EXT 0x3c
/* Must be kept compact -- no holes */
#define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
@@ -375,10 +502,12 @@ typedef struct _drm_i915_sarea {
#define DRM_IOCTL_I915_GEM_ENTERVT DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_ENTERVT)
#define DRM_IOCTL_I915_GEM_LEAVEVT DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_LEAVEVT)
#define DRM_IOCTL_I915_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE, struct drm_i915_gem_create)
+#define DRM_IOCTL_I915_GEM_CREATE_EXT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE_EXT, struct drm_i915_gem_create_ext)
#define DRM_IOCTL_I915_GEM_PREAD DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PREAD, struct drm_i915_gem_pread)
#define DRM_IOCTL_I915_GEM_PWRITE DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PWRITE, struct drm_i915_gem_pwrite)
#define DRM_IOCTL_I915_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct drm_i915_gem_mmap)
#define DRM_IOCTL_I915_GEM_MMAP_GTT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP_GTT, struct drm_i915_gem_mmap_gtt)
+#define DRM_IOCTL_I915_GEM_MMAP_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP_GTT, struct drm_i915_gem_mmap_offset)
#define DRM_IOCTL_I915_GEM_SET_DOMAIN DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SET_DOMAIN, struct drm_i915_gem_set_domain)
#define DRM_IOCTL_I915_GEM_SW_FINISH DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SW_FINISH, struct drm_i915_gem_sw_finish)
#define DRM_IOCTL_I915_GEM_SET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_SET_TILING, struct drm_i915_gem_set_tiling)
@@ -403,6 +532,8 @@ typedef struct _drm_i915_sarea {
#define DRM_IOCTL_I915_PERF_ADD_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
#define DRM_IOCTL_I915_PERF_REMOVE_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64)
#define DRM_IOCTL_I915_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query)
+#define DRM_IOCTL_I915_GEM_VM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_CREATE, struct drm_i915_gem_vm_control)
+#define DRM_IOCTL_I915_GEM_VM_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control)
/* Allow drivers to submit batchbuffers directly to hardware, relying
* on the security mechanisms provided by hardware.
@@ -503,6 +634,16 @@ typedef struct drm_i915_irq_wait {
#define I915_SCHEDULER_CAP_PRIORITY (1ul << 1)
#define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2)
#define I915_SCHEDULER_CAP_SEMAPHORES (1ul << 3)
+#define I915_SCHEDULER_CAP_ENGINE_BUSY_STATS (1ul << 4)
+/*
+ * Indicates the 2k user priority levels are statically mapped into 3 buckets as
+ * follows:
+ *
+ * -1k to -1 Low priority
+ * 0 Normal priority
+ * 1 to 1k Highest priority
+ */
+#define I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP (1ul << 5)
#define I915_PARAM_HUC_STATUS 42
@@ -520,7 +661,7 @@ typedef struct drm_i915_irq_wait {
#define I915_PARAM_HAS_EXEC_FENCE 44
/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to capture
- * user specified buffers for post-mortem debugging of GPU hangs. See
+ * user specified bufffers for post-mortem debugging of GPU hangs. See
* EXEC_OBJECT_CAPTURE.
*/
#define I915_PARAM_HAS_EXEC_CAPTURE 45
@@ -586,16 +727,51 @@ typedef struct drm_i915_irq_wait {
*/
#define I915_PARAM_MMAP_GTT_COHERENT 52
+/*
+ * Query whether DRM_I915_GEM_EXECBUFFER2 supports coordination of parallel
+ * execution through use of explicit fence support.
+ * See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT.
+ */
+#define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53
+
+/*
+ * Revision of the i915-perf uAPI. The value returned helps determine what
+ * i915-perf features are available. See drm_i915_perf_property_id.
+ */
+#define I915_PARAM_PERF_REVISION 54
+
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
+ * timeline syncobj through drm_i915_gem_execbuffer_ext_timeline_fences. See
+ * I915_EXEC_USE_EXTENSIONS.
+ */
+#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55
+
+/* Query if the kernel supports the I915_USERPTR_PROBE flag. */
+#define I915_PARAM_HAS_USERPTR_PROBE 56
+
/* Must be kept compact -- no holes and well documented */
-typedef struct drm_i915_getparam {
+/**
+ * struct drm_i915_getparam - Driver parameter query structure.
+ */
+struct drm_i915_getparam {
+ /** @param: Driver parameter to query. */
__s32 param;
- /*
+
+ /**
+ * @value: Address of memory where queried value should be put.
+ *
* WARNING: Using pointers instead of fixed-size u64 means we need to write
* compat32 code. Don't repeat this mistake.
*/
int *value;
-} drm_i915_getparam_t;
+};
+
+/**
+ * typedef drm_i915_getparam_t - Driver parameter query structure.
+ * See struct drm_i915_getparam.
+ */
+typedef struct drm_i915_getparam drm_i915_getparam_t;
/* Ioctl to set kernel params:
*/
@@ -761,14 +937,113 @@ struct drm_i915_gem_mmap_gtt {
__u64 offset;
};
+/**
+ * struct drm_i915_gem_mmap_offset - Retrieve an offset so we can mmap this buffer object.
+ *
+ * This struct is passed as argument to the `DRM_IOCTL_I915_GEM_MMAP_OFFSET` ioctl,
+ * and is used to retrieve the fake offset to mmap an object specified by &handle.
+ *
+ * The legacy way of using `DRM_IOCTL_I915_GEM_MMAP` is removed on gen12+.
+ * `DRM_IOCTL_I915_GEM_MMAP_GTT` is an older supported alias to this struct, but will behave
+ * as setting the &extensions to 0, and &flags to `I915_MMAP_OFFSET_GTT`.
+ */
+struct drm_i915_gem_mmap_offset {
+ /** @handle: Handle for the object being mapped. */
+ __u32 handle;
+ /** @pad: Must be zero */
+ __u32 pad;
+ /**
+ * @offset: The fake offset to use for subsequent mmap call
+ *
+ * This is a fixed-size type for 32/64 compatibility.
+ */
+ __u64 offset;
+
+ /**
+ * @flags: Flags for extended behaviour.
+ *
+ * It is mandatory that one of the `MMAP_OFFSET` types
+ * should be included:
+ *
+ * - `I915_MMAP_OFFSET_GTT`: Use mmap with the object bound to GTT. (Write-Combined)
+ * - `I915_MMAP_OFFSET_WC`: Use Write-Combined caching.
+ * - `I915_MMAP_OFFSET_WB`: Use Write-Back caching.
+ * - `I915_MMAP_OFFSET_FIXED`: Use object placement to determine caching.
+ *
+ * On devices with local memory `I915_MMAP_OFFSET_FIXED` is the only valid
+ * type. On devices without local memory, this caching mode is invalid.
+ *
+ * As caching mode when specifying `I915_MMAP_OFFSET_FIXED`, WC or WB will
+ * be used, depending on the object placement on creation. WB will be used
+ * when the object can only exist in system memory, WC otherwise.
+ */
+ __u64 flags;
+
+#define I915_MMAP_OFFSET_GTT 0
+#define I915_MMAP_OFFSET_WC 1
+#define I915_MMAP_OFFSET_WB 2
+#define I915_MMAP_OFFSET_UC 3
+#define I915_MMAP_OFFSET_FIXED 4
+
+ /**
+ * @extensions: Zero-terminated chain of extensions.
+ *
+ * No current extensions defined; mbz.
+ */
+ __u64 extensions;
+};
+
+/**
+ * struct drm_i915_gem_set_domain - Adjust the objects write or read domain, in
+ * preparation for accessing the pages via some CPU domain.
+ *
+ * Specifying a new write or read domain will flush the object out of the
+ * previous domain(if required), before then updating the objects domain
+ * tracking with the new domain.
+ *
+ * Note this might involve waiting for the object first if it is still active on
+ * the GPU.
+ *
+ * Supported values for @read_domains and @write_domain:
+ *
+ * - I915_GEM_DOMAIN_WC: Uncached write-combined domain
+ * - I915_GEM_DOMAIN_CPU: CPU cache domain
+ * - I915_GEM_DOMAIN_GTT: Mappable aperture domain
+ *
+ * All other domains are rejected.
+ *
+ * Note that for discrete, starting from DG1, this is no longer supported, and
+ * is instead rejected. On such platforms the CPU domain is effectively static,
+ * where we also only support a single &drm_i915_gem_mmap_offset cache mode,
+ * which can't be set explicitly and instead depends on the object placements,
+ * as per the below.
+ *
+ * Implicit caching rules, starting from DG1:
+ *
+ * - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions)
+ * contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and
+ * mapped as write-combined only.
+ *
+ * - Everything else is always allocated and mapped as write-back, with the
+ * guarantee that everything is also coherent with the GPU.
+ *
+ * Note that this is likely to change in the future again, where we might need
+ * more flexibility on future devices, so making this all explicit as part of a
+ * new &drm_i915_gem_create_ext extension is probable.
+ */
struct drm_i915_gem_set_domain {
- /** Handle for the object */
+ /** @handle: Handle for the object. */
__u32 handle;
- /** New read domains */
+ /** @read_domains: New read domains. */
__u32 read_domains;
- /** New write domain */
+ /**
+ * @write_domain: New write domain.
+ *
+ * Note that having something in the write domain implies it's in the
+ * read domain, and only that read domain.
+ */
__u32 write_domain;
};
@@ -872,6 +1147,7 @@ struct drm_i915_gem_exec_object {
__u64 offset;
};
+/* DRM_IOCTL_I915_GEM_EXECBUFFER was removed in Linux 5.13 */
struct drm_i915_gem_execbuffer {
/**
* List of buffers to be validated with their relocations to be
@@ -918,10 +1194,16 @@ struct drm_i915_gem_exec_object2 {
/**
* When the EXEC_OBJECT_PINNED flag is specified this is populated by
* the user with the GTT offset at which this object will be pinned.
+ *
* When the I915_EXEC_NO_RELOC flag is specified this must contain the
* presumed_offset of the object.
+ *
* During execbuffer2 the kernel populates it with the value of the
* current GTT offset of the object, for future presumed_offset writes.
+ *
+ * See struct drm_i915_gem_create_ext for the rules when dealing with
+ * alignment restrictions with I915_MEMORY_CLASS_DEVICE, on devices with
+ * minimum page sizes, like DG2.
*/
__u64 offset;
@@ -970,38 +1252,119 @@ struct drm_i915_gem_exec_object2 {
__u64 rsvd2;
};
+/**
+ * struct drm_i915_gem_exec_fence - An input or output fence for the execbuf
+ * ioctl.
+ *
+ * The request will wait for input fence to signal before submission.
+ *
+ * The returned output fence will be signaled after the completion of the
+ * request.
+ */
struct drm_i915_gem_exec_fence {
- /**
- * User's handle for a drm_syncobj to wait on or signal.
- */
+ /** @handle: User's handle for a drm_syncobj to wait on or signal. */
__u32 handle;
+ /**
+ * @flags: Supported flags are:
+ *
+ * I915_EXEC_FENCE_WAIT:
+ * Wait for the input fence before request submission.
+ *
+ * I915_EXEC_FENCE_SIGNAL:
+ * Return request completion fence as output
+ */
+ __u32 flags;
#define I915_EXEC_FENCE_WAIT (1<<0)
#define I915_EXEC_FENCE_SIGNAL (1<<1)
#define __I915_EXEC_FENCE_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SIGNAL << 1))
- __u32 flags;
};
-struct drm_i915_gem_execbuffer2 {
+/**
+ * struct drm_i915_gem_execbuffer_ext_timeline_fences - Timeline fences
+ * for execbuf ioctl.
+ *
+ * This structure describes an array of drm_syncobj and associated points for
+ * timeline variants of drm_syncobj. It is invalid to append this structure to
+ * the execbuf if I915_EXEC_FENCE_ARRAY is set.
+ */
+struct drm_i915_gem_execbuffer_ext_timeline_fences {
+#define DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES 0
+ /** @base: Extension link. See struct i915_user_extension. */
+ struct i915_user_extension base;
+
/**
- * List of gem_exec_object2 structs
+ * @fence_count: Number of elements in the @handles_ptr & @value_ptr
+ * arrays.
*/
+ __u64 fence_count;
+
+ /**
+ * @handles_ptr: Pointer to an array of struct drm_i915_gem_exec_fence
+ * of length @fence_count.
+ */
+ __u64 handles_ptr;
+
+ /**
+ * @values_ptr: Pointer to an array of u64 values of length
+ * @fence_count.
+ * Values must be 0 for a binary drm_syncobj. A Value of 0 for a
+ * timeline drm_syncobj is invalid as it turns a drm_syncobj into a
+ * binary one.
+ */
+ __u64 values_ptr;
+};
+
+/**
+ * struct drm_i915_gem_execbuffer2 - Structure for DRM_I915_GEM_EXECBUFFER2
+ * ioctl.
+ */
+struct drm_i915_gem_execbuffer2 {
+ /** @buffers_ptr: Pointer to a list of gem_exec_object2 structs */
__u64 buffers_ptr;
+
+ /** @buffer_count: Number of elements in @buffers_ptr array */
__u32 buffer_count;
- /** Offset in the batchbuffer to start execution from. */
+ /**
+ * @batch_start_offset: Offset in the batchbuffer to start execution
+ * from.
+ */
__u32 batch_start_offset;
- /** Bytes used in batchbuffer from batch_start_offset */
+
+ /**
+ * @batch_len: Length in bytes of the batch buffer, starting from the
+ * @batch_start_offset. If 0, length is assumed to be the batch buffer
+ * object size.
+ */
__u32 batch_len;
+
+ /** @DR1: deprecated */
__u32 DR1;
+
+ /** @DR4: deprecated */
__u32 DR4;
+
+ /** @num_cliprects: See @cliprects_ptr */
__u32 num_cliprects;
+
/**
- * This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY
- * is not set. If I915_EXEC_FENCE_ARRAY is set, then this is a
- * struct drm_i915_gem_exec_fence *fences.
+ * @cliprects_ptr: Kernel clipping was a DRI1 misfeature.
+ *
+ * It is invalid to use this field if I915_EXEC_FENCE_ARRAY or
+ * I915_EXEC_USE_EXTENSIONS flags are not set.
+ *
+ * If I915_EXEC_FENCE_ARRAY is set, then this is a pointer to an array
+ * of &drm_i915_gem_exec_fence and @num_cliprects is the length of the
+ * array.
+ *
+ * If I915_EXEC_USE_EXTENSIONS is set, then this is a pointer to a
+ * single &i915_user_extension and num_cliprects is 0.
*/
__u64 cliprects_ptr;
+
+ /** @flags: Execbuf flags */
+ __u64 flags;
#define I915_EXEC_RING_MASK (0x3f)
#define I915_EXEC_DEFAULT (0<<0)
#define I915_EXEC_RENDER (1<<0)
@@ -1019,10 +1382,6 @@ struct drm_i915_gem_execbuffer2 {
#define I915_EXEC_CONSTANTS_REL_GENERAL (0<<6) /* default */
#define I915_EXEC_CONSTANTS_ABSOLUTE (1<<6)
#define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */
- __u64 flags;
- __u64 rsvd1; /* now used for context info */
- __u64 rsvd2;
-};
/** Resets the SO write offset registers for transform feedback on gen7. */
#define I915_EXEC_GEN7_SOL_RESET (1<<8)
@@ -1108,7 +1467,39 @@ struct drm_i915_gem_execbuffer2 {
*/
#define I915_EXEC_FENCE_ARRAY (1<<19)
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1))
+/*
+ * Setting I915_EXEC_FENCE_SUBMIT implies that lower_32_bits(rsvd2) represent
+ * a sync_file fd to wait upon (in a nonblocking manner) prior to executing
+ * the batch.
+ *
+ * Returns -EINVAL if the sync_file fd cannot be found.
+ */
+#define I915_EXEC_FENCE_SUBMIT (1 << 20)
+
+/*
+ * Setting I915_EXEC_USE_EXTENSIONS implies that
+ * drm_i915_gem_execbuffer2.cliprects_ptr is treated as a pointer to an linked
+ * list of i915_user_extension. Each i915_user_extension node is the base of a
+ * larger structure. The list of supported structures are listed in the
+ * drm_i915_gem_execbuffer_ext enum.
+ */
+#define I915_EXEC_USE_EXTENSIONS (1 << 21)
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_USE_EXTENSIONS << 1))
+
+ /** @rsvd1: Context id */
+ __u64 rsvd1;
+
+ /**
+ * @rsvd2: in and out sync_file file descriptors.
+ *
+ * When I915_EXEC_FENCE_IN or I915_EXEC_FENCE_SUBMIT flag is set, the
+ * lower 32 bits of this field will have the in sync_file fd (input).
+ *
+ * When I915_EXEC_FENCE_OUT flag is set, the upper 32 bits of this
+ * field will have the out sync_file fd (output).
+ */
+ __u64 rsvd2;
+};
#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
#define i915_execbuffer2_set_context_id(eb2, context) \
@@ -1172,12 +1563,11 @@ struct drm_i915_gem_busy {
* reading from the object simultaneously.
*
* The value of each engine class is the same as specified in the
- * I915_CONTEXT_SET_ENGINES parameter and via perf, i.e.
+ * I915_CONTEXT_PARAM_ENGINES context parameter and via perf, i.e.
* I915_ENGINE_CLASS_RENDER, I915_ENGINE_CLASS_COPY, etc.
- * reported as active itself. Some hardware may have parallel
- * execution engines, e.g. multiple media engines, which are
- * mapped to the same class identifier and so are not separately
- * reported for busyness.
+ * Some hardware may have parallel execution engines, e.g. multiple
+ * media engines, which are mapped to the same class identifier and so
+ * are not separately reported for busyness.
*
* Caveat emptor:
* Only the boolean result of this query is reliable; that is whether
@@ -1188,49 +1578,91 @@ struct drm_i915_gem_busy {
};
/**
- * I915_CACHING_NONE
+ * struct drm_i915_gem_caching - Set or get the caching for given object
+ * handle.
*
- * GPU access is not coherent with cpu caches. Default for machines without an
- * LLC.
- */
-#define I915_CACHING_NONE 0
-/**
- * I915_CACHING_CACHED
+ * Allow userspace to control the GTT caching bits for a given object when the
+ * object is later mapped through the ppGTT(or GGTT on older platforms lacking
+ * ppGTT support, or if the object is used for scanout). Note that this might
+ * require unbinding the object from the GTT first, if its current caching value
+ * doesn't match.
*
- * GPU access is coherent with cpu caches and furthermore the data is cached in
- * last-level caches shared between cpu cores and the gpu GT. Default on
- * machines with HAS_LLC.
- */
-#define I915_CACHING_CACHED 1
-/**
- * I915_CACHING_DISPLAY
+ * Note that this all changes on discrete platforms, starting from DG1, the
+ * set/get caching is no longer supported, and is now rejected. Instead the CPU
+ * caching attributes(WB vs WC) will become an immutable creation time property
+ * for the object, along with the GTT caching level. For now we don't expose any
+ * new uAPI for this, instead on DG1 this is all implicit, although this largely
+ * shouldn't matter since DG1 is coherent by default(without any way of
+ * controlling it).
+ *
+ * Implicit caching rules, starting from DG1:
+ *
+ * - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions)
+ * contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and
+ * mapped as write-combined only.
+ *
+ * - Everything else is always allocated and mapped as write-back, with the
+ * guarantee that everything is also coherent with the GPU.
*
- * Special GPU caching mode which is coherent with the scanout engines.
- * Transparently falls back to I915_CACHING_NONE on platforms where no special
- * cache mode (like write-through or gfdt flushing) is available. The kernel
- * automatically sets this mode when using a buffer as a scanout target.
- * Userspace can manually set this mode to avoid a costly stall and clflush in
- * the hotpath of drawing the first frame.
+ * Note that this is likely to change in the future again, where we might need
+ * more flexibility on future devices, so making this all explicit as part of a
+ * new &drm_i915_gem_create_ext extension is probable.
+ *
+ * Side note: Part of the reason for this is that changing the at-allocation-time CPU
+ * caching attributes for the pages might be required(and is expensive) if we
+ * need to then CPU map the pages later with different caching attributes. This
+ * inconsistent caching behaviour, while supported on x86, is not universally
+ * supported on other architectures. So for simplicity we opt for setting
+ * everything at creation time, whilst also making it immutable, on discrete
+ * platforms.
*/
-#define I915_CACHING_DISPLAY 2
-
struct drm_i915_gem_caching {
/**
- * Handle of the buffer to set/get the caching level of. */
+ * @handle: Handle of the buffer to set/get the caching level.
+ */
__u32 handle;
/**
- * Caching level to apply or return value
+ * @caching: The GTT caching level to apply or possible return value.
+ *
+ * The supported @caching values:
+ *
+ * I915_CACHING_NONE:
*
- * bits0-15 are for generic caching control (i.e. the above defined
- * values). bits16-31 are reserved for platform-specific variations
- * (e.g. l3$ caching on gen7). */
+ * GPU access is not coherent with CPU caches. Default for machines
+ * without an LLC. This means manual flushing might be needed, if we
+ * want GPU access to be coherent.
+ *
+ * I915_CACHING_CACHED:
+ *
+ * GPU access is coherent with CPU caches and furthermore the data is
+ * cached in last-level caches shared between CPU cores and the GPU GT.
+ *
+ * I915_CACHING_DISPLAY:
+ *
+ * Special GPU caching mode which is coherent with the scanout engines.
+ * Transparently falls back to I915_CACHING_NONE on platforms where no
+ * special cache mode (like write-through or gfdt flushing) is
+ * available. The kernel automatically sets this mode when using a
+ * buffer as a scanout target. Userspace can manually set this mode to
+ * avoid a costly stall and clflush in the hotpath of drawing the first
+ * frame.
+ */
+#define I915_CACHING_NONE 0
+#define I915_CACHING_CACHED 1
+#define I915_CACHING_DISPLAY 2
__u32 caching;
};
#define I915_TILING_NONE 0
#define I915_TILING_X 1
#define I915_TILING_Y 2
+/*
+ * Do not add new tiling types here. The I915_TILING_* values are for
+ * de-tiling fence registers that no longer exist on modern platforms. Although
+ * the hardware may support new types of tiling in general (e.g., Tile4), we
+ * do not need to add them to the uapi that is specific to now-defunct ioctls.
+ */
#define I915_TILING_LAST I915_TILING_Y
#define I915_BIT_6_SWIZZLE_NONE 0
@@ -1448,20 +1880,64 @@ struct drm_i915_gem_context_create {
__u32 pad;
};
+/**
+ * struct drm_i915_gem_context_create_ext - Structure for creating contexts.
+ */
struct drm_i915_gem_context_create_ext {
- __u32 ctx_id; /* output: id of new context*/
+ /** @ctx_id: Id of the created context (output) */
+ __u32 ctx_id;
+
+ /**
+ * @flags: Supported flags are:
+ *
+ * I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS:
+ *
+ * Extensions may be appended to this structure and driver must check
+ * for those. See @extensions.
+ *
+ * I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE
+ *
+ * Created context will have single timeline.
+ */
__u32 flags;
#define I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS (1u << 0)
+#define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE (1u << 1)
#define I915_CONTEXT_CREATE_FLAGS_UNKNOWN \
- (-(I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS << 1))
+ (-(I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE << 1))
+
+ /**
+ * @extensions: Zero-terminated chain of extensions.
+ *
+ * I915_CONTEXT_CREATE_EXT_SETPARAM:
+ * Context parameter to set or query during context creation.
+ * See struct drm_i915_gem_context_create_ext_setparam.
+ *
+ * I915_CONTEXT_CREATE_EXT_CLONE:
+ * This extension has been removed. On the off chance someone somewhere
+ * has attempted to use it, never re-use this extension number.
+ */
__u64 extensions;
+#define I915_CONTEXT_CREATE_EXT_SETPARAM 0
+#define I915_CONTEXT_CREATE_EXT_CLONE 1
};
+/**
+ * struct drm_i915_gem_context_param - Context parameter to set or query.
+ */
struct drm_i915_gem_context_param {
+ /** @ctx_id: Context id */
__u32 ctx_id;
+
+ /** @size: Size of the parameter @value */
__u32 size;
+
+ /** @param: Parameter to set or query */
__u64 param;
#define I915_CONTEXT_PARAM_BAN_PERIOD 0x1
+/* I915_CONTEXT_PARAM_NO_ZEROMAP has been removed. On the off chance
+ * someone somewhere has attempted to use it, never re-use this context
+ * param number.
+ */
#define I915_CONTEXT_PARAM_NO_ZEROMAP 0x2
#define I915_CONTEXT_PARAM_GTT_SIZE 0x3
#define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4
@@ -1495,12 +1971,118 @@ struct drm_i915_gem_context_param {
* On creation, all new contexts are marked as recoverable.
*/
#define I915_CONTEXT_PARAM_RECOVERABLE 0x8
+
+ /*
+ * The id of the associated virtual memory address space (ppGTT) of
+ * this context. Can be retrieved and passed to another context
+ * (on the same fd) for both to use the same ppGTT and so share
+ * address layouts, and avoid reloading the page tables on context
+ * switches between themselves.
+ *
+ * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY.
+ */
+#define I915_CONTEXT_PARAM_VM 0x9
+
+/*
+ * I915_CONTEXT_PARAM_ENGINES:
+ *
+ * Bind this context to operate on this subset of available engines. Henceforth,
+ * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as
+ * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0]
+ * and upwards. Slots 0...N are filled in using the specified (class, instance).
+ * Use
+ * engine_class: I915_ENGINE_CLASS_INVALID,
+ * engine_instance: I915_ENGINE_CLASS_INVALID_NONE
+ * to specify a gap in the array that can be filled in later, e.g. by a
+ * virtual engine used for load balancing.
+ *
+ * Setting the number of engines bound to the context to 0, by passing a zero
+ * sized argument, will revert back to default settings.
+ *
+ * See struct i915_context_param_engines.
+ *
+ * Extensions:
+ * i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE)
+ * i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
+ * i915_context_engines_parallel_submit (I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT)
+ */
+#define I915_CONTEXT_PARAM_ENGINES 0xa
+
+/*
+ * I915_CONTEXT_PARAM_PERSISTENCE:
+ *
+ * Allow the context and active rendering to survive the process until
+ * completion. Persistence allows fire-and-forget clients to queue up a
+ * bunch of work, hand the output over to a display server and then quit.
+ * If the context is marked as not persistent, upon closing (either via
+ * an explicit DRM_I915_GEM_CONTEXT_DESTROY or implicitly from file closure
+ * or process termination), the context and any outstanding requests will be
+ * cancelled (and exported fences for cancelled requests marked as -EIO).
+ *
+ * By default, new contexts allow persistence.
+ */
+#define I915_CONTEXT_PARAM_PERSISTENCE 0xb
+
+/* This API has been removed. On the off chance someone somewhere has
+ * attempted to use it, never re-use this context param number.
+ */
+#define I915_CONTEXT_PARAM_RINGSIZE 0xc
+
+/*
+ * I915_CONTEXT_PARAM_PROTECTED_CONTENT:
+ *
+ * Mark that the context makes use of protected content, which will result
+ * in the context being invalidated when the protected content session is.
+ * Given that the protected content session is killed on suspend, the device
+ * is kept awake for the lifetime of a protected context, so the user should
+ * make sure to dispose of them once done.
+ * This flag can only be set at context creation time and, when set to true,
+ * must be preceded by an explicit setting of I915_CONTEXT_PARAM_RECOVERABLE
+ * to false. This flag can't be set to true in conjunction with setting the
+ * I915_CONTEXT_PARAM_BANNABLE flag to false. Creation example:
+ *
+ * .. code-block:: C
+ *
+ * struct drm_i915_gem_context_create_ext_setparam p_protected = {
+ * .base = {
+ * .name = I915_CONTEXT_CREATE_EXT_SETPARAM,
+ * },
+ * .param = {
+ * .param = I915_CONTEXT_PARAM_PROTECTED_CONTENT,
+ * .value = 1,
+ * }
+ * };
+ * struct drm_i915_gem_context_create_ext_setparam p_norecover = {
+ * .base = {
+ * .name = I915_CONTEXT_CREATE_EXT_SETPARAM,
+ * .next_extension = to_user_pointer(&p_protected),
+ * },
+ * .param = {
+ * .param = I915_CONTEXT_PARAM_RECOVERABLE,
+ * .value = 0,
+ * }
+ * };
+ * struct drm_i915_gem_context_create_ext create = {
+ * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
+ * .extensions = to_user_pointer(&p_norecover);
+ * };
+ *
+ * ctx_id = gem_context_create_ext(drm_fd, &create);
+ *
+ * In addition to the normal failure cases, setting this flag during context
+ * creation can result in the following errors:
+ *
+ * -ENODEV: feature not available
+ * -EPERM: trying to mark a recoverable or not bannable context as protected
+ */
+#define I915_CONTEXT_PARAM_PROTECTED_CONTENT 0xd
/* Must be kept compact -- no holes and well documented */
+ /** @value: Context parameter value to be set or queried */
__u64 value;
};
-/**
+/*
* Context SSEU programming
*
* It may be necessary for either functional or performance reason to configure
@@ -1525,13 +2107,13 @@ struct drm_i915_gem_context_param_sseu {
/*
* Engine class & instance to be configured or queried.
*/
- __u16 engine_class;
- __u16 engine_instance;
+ struct i915_engine_class_instance engine;
/*
- * Unused for now. Must be cleared to zero.
+ * Unknown flags must be cleared to zero.
*/
__u32 flags;
+#define I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX (1u << 0)
/*
* Mask of slices to enable for the context. Valid values are a subset
@@ -1559,9 +2141,354 @@ struct drm_i915_gem_context_param_sseu {
__u32 rsvd;
};
+/**
+ * DOC: Virtual Engine uAPI
+ *
+ * Virtual engine is a concept where userspace is able to configure a set of
+ * physical engines, submit a batch buffer, and let the driver execute it on any
+ * engine from the set as it sees fit.
+ *
+ * This is primarily useful on parts which have multiple instances of a same
+ * class engine, like for example GT3+ Skylake parts with their two VCS engines.
+ *
+ * For instance userspace can enumerate all engines of a certain class using the
+ * previously described `Engine Discovery uAPI`_. After that userspace can
+ * create a GEM context with a placeholder slot for the virtual engine (using
+ * `I915_ENGINE_CLASS_INVALID` and `I915_ENGINE_CLASS_INVALID_NONE` for class
+ * and instance respectively) and finally using the
+ * `I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE` extension place a virtual engine in
+ * the same reserved slot.
+ *
+ * Example of creating a virtual engine and submitting a batch buffer to it:
+ *
+ * .. code-block:: C
+ *
+ * I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(virtual, 2) = {
+ * .base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE,
+ * .engine_index = 0, // Place this virtual engine into engine map slot 0
+ * .num_siblings = 2,
+ * .engines = { { I915_ENGINE_CLASS_VIDEO, 0 },
+ * { I915_ENGINE_CLASS_VIDEO, 1 }, },
+ * };
+ * I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1) = {
+ * .engines = { { I915_ENGINE_CLASS_INVALID,
+ * I915_ENGINE_CLASS_INVALID_NONE } },
+ * .extensions = to_user_pointer(&virtual), // Chains after load_balance extension
+ * };
+ * struct drm_i915_gem_context_create_ext_setparam p_engines = {
+ * .base = {
+ * .name = I915_CONTEXT_CREATE_EXT_SETPARAM,
+ * },
+ * .param = {
+ * .param = I915_CONTEXT_PARAM_ENGINES,
+ * .value = to_user_pointer(&engines),
+ * .size = sizeof(engines),
+ * },
+ * };
+ * struct drm_i915_gem_context_create_ext create = {
+ * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
+ * .extensions = to_user_pointer(&p_engines);
+ * };
+ *
+ * ctx_id = gem_context_create_ext(drm_fd, &create);
+ *
+ * // Now we have created a GEM context with its engine map containing a
+ * // single virtual engine. Submissions to this slot can go either to
+ * // vcs0 or vcs1, depending on the load balancing algorithm used inside
+ * // the driver. The load balancing is dynamic from one batch buffer to
+ * // another and transparent to userspace.
+ *
+ * ...
+ * execbuf.rsvd1 = ctx_id;
+ * execbuf.flags = 0; // Submits to index 0 which is the virtual engine
+ * gem_execbuf(drm_fd, &execbuf);
+ */
+
+/*
+ * i915_context_engines_load_balance:
+ *
+ * Enable load balancing across this set of engines.
+ *
+ * Into the I915_EXEC_DEFAULT slot [0], a virtual engine is created that when
+ * used will proxy the execbuffer request onto one of the set of engines
+ * in such a way as to distribute the load evenly across the set.
+ *
+ * The set of engines must be compatible (e.g. the same HW class) as they
+ * will share the same logical GPU context and ring.
+ *
+ * To intermix rendering with the virtual engine and direct rendering onto
+ * the backing engines (bypassing the load balancing proxy), the context must
+ * be defined to use a single timeline for all engines.
+ */
+struct i915_context_engines_load_balance {
+ struct i915_user_extension base;
+
+ __u16 engine_index;
+ __u16 num_siblings;
+ __u32 flags; /* all undefined flags must be zero */
+
+ __u64 mbz64; /* reserved for future use; must be zero */
+
+ struct i915_engine_class_instance engines[];
+} __attribute__((packed));
+
+#define I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(name__, N__) struct { \
+ struct i915_user_extension base; \
+ __u16 engine_index; \
+ __u16 num_siblings; \
+ __u32 flags; \
+ __u64 mbz64; \
+ struct i915_engine_class_instance engines[N__]; \
+} __attribute__((packed)) name__
+
+/*
+ * i915_context_engines_bond:
+ *
+ * Constructed bonded pairs for execution within a virtual engine.
+ *
+ * All engines are equal, but some are more equal than others. Given
+ * the distribution of resources in the HW, it may be preferable to run
+ * a request on a given subset of engines in parallel to a request on a
+ * specific engine. We enable this selection of engines within a virtual
+ * engine by specifying bonding pairs, for any given master engine we will
+ * only execute on one of the corresponding siblings within the virtual engine.
+ *
+ * To execute a request in parallel on the master engine and a sibling requires
+ * coordination with a I915_EXEC_FENCE_SUBMIT.
+ */
+struct i915_context_engines_bond {
+ struct i915_user_extension base;
+
+ struct i915_engine_class_instance master;
+
+ __u16 virtual_index; /* index of virtual engine in ctx->engines[] */
+ __u16 num_bonds;
+
+ __u64 flags; /* all undefined flags must be zero */
+ __u64 mbz64[4]; /* reserved for future use; must be zero */
+
+ struct i915_engine_class_instance engines[];
+} __attribute__((packed));
+
+#define I915_DEFINE_CONTEXT_ENGINES_BOND(name__, N__) struct { \
+ struct i915_user_extension base; \
+ struct i915_engine_class_instance master; \
+ __u16 virtual_index; \
+ __u16 num_bonds; \
+ __u64 flags; \
+ __u64 mbz64[4]; \
+ struct i915_engine_class_instance engines[N__]; \
+} __attribute__((packed)) name__
+
+/**
+ * struct i915_context_engines_parallel_submit - Configure engine for
+ * parallel submission.
+ *
+ * Setup a slot in the context engine map to allow multiple BBs to be submitted
+ * in a single execbuf IOCTL. Those BBs will then be scheduled to run on the GPU
+ * in parallel. Multiple hardware contexts are created internally in the i915 to
+ * run these BBs. Once a slot is configured for N BBs only N BBs can be
+ * submitted in each execbuf IOCTL and this is implicit behavior e.g. The user
+ * doesn't tell the execbuf IOCTL there are N BBs, the execbuf IOCTL knows how
+ * many BBs there are based on the slot's configuration. The N BBs are the last
+ * N buffer objects or first N if I915_EXEC_BATCH_FIRST is set.
+ *
+ * The default placement behavior is to create implicit bonds between each
+ * context if each context maps to more than 1 physical engine (e.g. context is
+ * a virtual engine). Also we only allow contexts of same engine class and these
+ * contexts must be in logically contiguous order. Examples of the placement
+ * behavior are described below. Lastly, the default is to not allow BBs to be
+ * preempted mid-batch. Rather insert coordinated preemption points on all
+ * hardware contexts between each set of BBs. Flags could be added in the future
+ * to change both of these default behaviors.
+ *
+ * Returns -EINVAL if hardware context placement configuration is invalid or if
+ * the placement configuration isn't supported on the platform / submission
+ * interface.
+ * Returns -ENODEV if extension isn't supported on the platform / submission
+ * interface.
+ *
+ * .. code-block:: none
+ *
+ * Examples syntax:
+ * CS[X] = generic engine of same class, logical instance X
+ * INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
+ *
+ * Example 1 pseudo code:
+ * set_engines(INVALID)
+ * set_parallel(engine_index=0, width=2, num_siblings=1,
+ * engines=CS[0],CS[1])
+ *
+ * Results in the following valid placement:
+ * CS[0], CS[1]
+ *
+ * Example 2 pseudo code:
+ * set_engines(INVALID)
+ * set_parallel(engine_index=0, width=2, num_siblings=2,
+ * engines=CS[0],CS[2],CS[1],CS[3])
+ *
+ * Results in the following valid placements:
+ * CS[0], CS[1]
+ * CS[2], CS[3]
+ *
+ * This can be thought of as two virtual engines, each containing two
+ * engines thereby making a 2D array. However, there are bonds tying the
+ * entries together and placing restrictions on how they can be scheduled.
+ * Specifically, the scheduler can choose only vertical columns from the 2D
+ * array. That is, CS[0] is bonded to CS[1] and CS[2] to CS[3]. So if the
+ * scheduler wants to submit to CS[0], it must also choose CS[1] and vice
+ * versa. Same for CS[2] requires also using CS[3].
+ * VE[0] = CS[0], CS[2]
+ * VE[1] = CS[1], CS[3]
+ *
+ * Example 3 pseudo code:
+ * set_engines(INVALID)
+ * set_parallel(engine_index=0, width=2, num_siblings=2,
+ * engines=CS[0],CS[1],CS[1],CS[3])
+ *
+ * Results in the following valid and invalid placements:
+ * CS[0], CS[1]
+ * CS[1], CS[3] - Not logically contiguous, return -EINVAL
+ */
+struct i915_context_engines_parallel_submit {
+ /**
+ * @base: base user extension.
+ */
+ struct i915_user_extension base;
+
+ /**
+ * @engine_index: slot for parallel engine
+ */
+ __u16 engine_index;
+
+ /**
+ * @width: number of contexts per parallel engine or in other words the
+ * number of batches in each submission
+ */
+ __u16 width;
+
+ /**
+ * @num_siblings: number of siblings per context or in other words the
+ * number of possible placements for each submission
+ */
+ __u16 num_siblings;
+
+ /**
+ * @mbz16: reserved for future use; must be zero
+ */
+ __u16 mbz16;
+
+ /**
+ * @flags: all undefined flags must be zero, currently not defined flags
+ */
+ __u64 flags;
+
+ /**
+ * @mbz64: reserved for future use; must be zero
+ */
+ __u64 mbz64[3];
+
+ /**
+ * @engines: 2-d array of engine instances to configure parallel engine
+ *
+ * length = width (i) * num_siblings (j)
+ * index = j + i * num_siblings
+ */
+ struct i915_engine_class_instance engines[];
+
+} __attribute__((packed));
+
+#define I915_DEFINE_CONTEXT_ENGINES_PARALLEL_SUBMIT(name__, N__) struct { \
+ struct i915_user_extension base; \
+ __u16 engine_index; \
+ __u16 width; \
+ __u16 num_siblings; \
+ __u16 mbz16; \
+ __u64 flags; \
+ __u64 mbz64[3]; \
+ struct i915_engine_class_instance engines[N__]; \
+} __attribute__((packed)) name__
+
+/**
+ * DOC: Context Engine Map uAPI
+ *
+ * Context engine map is a new way of addressing engines when submitting batch-
+ * buffers, replacing the existing way of using identifiers like `I915_EXEC_BLT`
+ * inside the flags field of `struct drm_i915_gem_execbuffer2`.
+ *
+ * To use it created GEM contexts need to be configured with a list of engines
+ * the user is intending to submit to. This is accomplished using the
+ * `I915_CONTEXT_PARAM_ENGINES` parameter and `struct
+ * i915_context_param_engines`.
+ *
+ * For such contexts the `I915_EXEC_RING_MASK` field becomes an index into the
+ * configured map.
+ *
+ * Example of creating such context and submitting against it:
+ *
+ * .. code-block:: C
+ *
+ * I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 2) = {
+ * .engines = { { I915_ENGINE_CLASS_RENDER, 0 },
+ * { I915_ENGINE_CLASS_COPY, 0 } }
+ * };
+ * struct drm_i915_gem_context_create_ext_setparam p_engines = {
+ * .base = {
+ * .name = I915_CONTEXT_CREATE_EXT_SETPARAM,
+ * },
+ * .param = {
+ * .param = I915_CONTEXT_PARAM_ENGINES,
+ * .value = to_user_pointer(&engines),
+ * .size = sizeof(engines),
+ * },
+ * };
+ * struct drm_i915_gem_context_create_ext create = {
+ * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
+ * .extensions = to_user_pointer(&p_engines);
+ * };
+ *
+ * ctx_id = gem_context_create_ext(drm_fd, &create);
+ *
+ * // We have now created a GEM context with two engines in the map:
+ * // Index 0 points to rcs0 while index 1 points to bcs0. Other engines
+ * // will not be accessible from this context.
+ *
+ * ...
+ * execbuf.rsvd1 = ctx_id;
+ * execbuf.flags = 0; // Submits to index 0, which is rcs0 for this context
+ * gem_execbuf(drm_fd, &execbuf);
+ *
+ * ...
+ * execbuf.rsvd1 = ctx_id;
+ * execbuf.flags = 1; // Submits to index 0, which is bcs0 for this context
+ * gem_execbuf(drm_fd, &execbuf);
+ */
+
+struct i915_context_param_engines {
+ __u64 extensions; /* linked chain of extension blocks, 0 terminates */
+#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */
+#define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */
+#define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */
+ struct i915_engine_class_instance engines[0];
+} __attribute__((packed));
+
+#define I915_DEFINE_CONTEXT_PARAM_ENGINES(name__, N__) struct { \
+ __u64 extensions; \
+ struct i915_engine_class_instance engines[N__]; \
+} __attribute__((packed)) name__
+
+/**
+ * struct drm_i915_gem_context_create_ext_setparam - Context parameter
+ * to set or query during context creation.
+ */
struct drm_i915_gem_context_create_ext_setparam {
-#define I915_CONTEXT_CREATE_EXT_SETPARAM 0
+ /** @base: Extension link. See struct i915_user_extension. */
struct i915_user_extension base;
+
+ /**
+ * @param: Context parameter to set or query.
+ * See struct drm_i915_gem_context_param.
+ */
struct drm_i915_gem_context_param param;
};
@@ -1570,7 +2497,9 @@ struct drm_i915_gem_context_destroy {
__u32 pad;
};
-/*
+/**
+ * struct drm_i915_gem_vm_control - Structure to create or destroy VM.
+ *
* DRM_I915_GEM_VM_CREATE -
*
* Create a new virtual memory address space (ppGTT) for use within a context
@@ -1580,20 +2509,23 @@ struct drm_i915_gem_context_destroy {
* The id of new VM (bound to the fd) for use with I915_CONTEXT_PARAM_VM is
* returned in the outparam @id.
*
- * No flags are defined, with all bits reserved and must be zero.
- *
* An extension chain maybe provided, starting with @extensions, and terminated
* by the @next_extension being 0. Currently, no extensions are defined.
*
* DRM_I915_GEM_VM_DESTROY -
*
- * Destroys a previously created VM id, specified in @id.
+ * Destroys a previously created VM id, specified in @vm_id.
*
* No extensions or flags are allowed currently, and so must be zero.
*/
struct drm_i915_gem_vm_control {
+ /** @extensions: Zero-terminated chain of extensions. */
__u64 extensions;
+
+ /** @flags: reserved for future usage, currently MBZ */
__u32 flags;
+
+ /** @vm_id: Id of the VM created or to be destroyed */
__u32 vm_id;
};
@@ -1635,14 +2567,69 @@ struct drm_i915_reset_stats {
__u32 pad;
};
+/**
+ * struct drm_i915_gem_userptr - Create GEM object from user allocated memory.
+ *
+ * Userptr objects have several restrictions on what ioctls can be used with the
+ * object handle.
+ */
struct drm_i915_gem_userptr {
+ /**
+ * @user_ptr: The pointer to the allocated memory.
+ *
+ * Needs to be aligned to PAGE_SIZE.
+ */
__u64 user_ptr;
+
+ /**
+ * @user_size:
+ *
+ * The size in bytes for the allocated memory. This will also become the
+ * object size.
+ *
+ * Needs to be aligned to PAGE_SIZE, and should be at least PAGE_SIZE,
+ * or larger.
+ */
__u64 user_size;
+
+ /**
+ * @flags:
+ *
+ * Supported flags:
+ *
+ * I915_USERPTR_READ_ONLY:
+ *
+ * Mark the object as readonly, this also means GPU access can only be
+ * readonly. This is only supported on HW which supports readonly access
+ * through the GTT. If the HW can't support readonly access, an error is
+ * returned.
+ *
+ * I915_USERPTR_PROBE:
+ *
+ * Probe the provided @user_ptr range and validate that the @user_ptr is
+ * indeed pointing to normal memory and that the range is also valid.
+ * For example if some garbage address is given to the kernel, then this
+ * should complain.
+ *
+ * Returns -EFAULT if the probe failed.
+ *
+ * Note that this doesn't populate the backing pages, and also doesn't
+ * guarantee that the object will remain valid when the object is
+ * eventually used.
+ *
+ * The kernel supports this feature if I915_PARAM_HAS_USERPTR_PROBE
+ * returns a non-zero value.
+ *
+ * I915_USERPTR_UNSYNCHRONIZED:
+ *
+ * NOT USED. Setting this flag will result in an error.
+ */
__u32 flags;
#define I915_USERPTR_READ_ONLY 0x1
+#define I915_USERPTR_PROBE 0x2
#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
/**
- * Returned handle for the object.
+ * @handle: Returned handle for the object.
*
* Object handles are nonzero.
*/
@@ -1671,23 +2658,31 @@ enum drm_i915_perf_property_id {
* Open the stream for a specific context handle (as used with
* execbuffer2). A stream opened for a specific context this way
* won't typically require root privileges.
+ *
+ * This property is available in perf revision 1.
*/
DRM_I915_PERF_PROP_CTX_HANDLE = 1,
/**
* A value of 1 requests the inclusion of raw OA unit reports as
* part of stream samples.
+ *
+ * This property is available in perf revision 1.
*/
DRM_I915_PERF_PROP_SAMPLE_OA,
/**
* The value specifies which set of OA unit metrics should be
- * be configured, defining the contents of any OA unit reports.
+ * configured, defining the contents of any OA unit reports.
+ *
+ * This property is available in perf revision 1.
*/
DRM_I915_PERF_PROP_OA_METRICS_SET,
/**
* The value specifies the size and layout of OA unit reports.
+ *
+ * This property is available in perf revision 1.
*/
DRM_I915_PERF_PROP_OA_FORMAT,
@@ -1697,9 +2692,46 @@ enum drm_i915_perf_property_id {
* from this exponent as follows:
*
* 80ns * 2^(period_exponent + 1)
+ *
+ * This property is available in perf revision 1.
*/
DRM_I915_PERF_PROP_OA_EXPONENT,
+ /**
+ * Specifying this property is only valid when specify a context to
+ * filter with DRM_I915_PERF_PROP_CTX_HANDLE. Specifying this property
+ * will hold preemption of the particular context we want to gather
+ * performance data about. The execbuf2 submissions must include a
+ * drm_i915_gem_execbuffer_ext_perf parameter for this to apply.
+ *
+ * This property is available in perf revision 3.
+ */
+ DRM_I915_PERF_PROP_HOLD_PREEMPTION,
+
+ /**
+ * Specifying this pins all contexts to the specified SSEU power
+ * configuration for the duration of the recording.
+ *
+ * This parameter's value is a pointer to a struct
+ * drm_i915_gem_context_param_sseu.
+ *
+ * This property is available in perf revision 4.
+ */
+ DRM_I915_PERF_PROP_GLOBAL_SSEU,
+
+ /**
+ * This optional parameter specifies the timer interval in nanoseconds
+ * at which the i915 driver will check the OA buffer for available data.
+ * Minimum allowed value is 100 microseconds. A default value is used by
+ * the driver if this parameter is not specified. Note that larger timer
+ * values will reduce cpu consumption during OA perf captures. However,
+ * excessively large values would potentially result in OA buffer
+ * overwrites as captures reach end of the OA buffer.
+ *
+ * This property is available in perf revision 5.
+ */
+ DRM_I915_PERF_PROP_POLL_OA_PERIOD,
+
DRM_I915_PERF_PROP_MAX /* non-ABI */
};
@@ -1719,7 +2751,7 @@ struct drm_i915_perf_open_param {
__u64 properties_ptr;
};
-/**
+/*
* Enable data capture for a stream that was either opened in a disabled state
* via I915_PERF_FLAG_DISABLED or was later disabled via
* I915_PERF_IOCTL_DISABLE.
@@ -1728,17 +2760,34 @@ struct drm_i915_perf_open_param {
* to close and re-open a stream with the same configuration.
*
* It's undefined whether any pending data for the stream will be lost.
+ *
+ * This ioctl is available in perf revision 1.
*/
#define I915_PERF_IOCTL_ENABLE _IO('i', 0x0)
-/**
+/*
* Disable data capture for a stream.
*
* It is an error to try and read a stream that is disabled.
+ *
+ * This ioctl is available in perf revision 1.
*/
#define I915_PERF_IOCTL_DISABLE _IO('i', 0x1)
-/**
+/*
+ * Change metrics_set captured by a stream.
+ *
+ * If the stream is bound to a specific context, the configuration change
+ * will performed __inline__ with that context such that it takes effect before
+ * the next execbuf submission.
+ *
+ * Returns the previously bound metrics set id, or a negative error code.
+ *
+ * This ioctl is available in perf revision 2.
+ */
+#define I915_PERF_IOCTL_CONFIG _IO('i', 0x2)
+
+/*
* Common to all i915 perf records
*/
struct drm_i915_perf_record_header {
@@ -1787,127 +2836,887 @@ enum drm_i915_perf_record_type {
};
/**
+ * struct drm_i915_perf_oa_config
+ *
* Structure to upload perf dynamic configuration into the kernel.
*/
struct drm_i915_perf_oa_config {
- /** String formatted like "%08x-%04x-%04x-%04x-%012x" */
+ /**
+ * @uuid:
+ *
+ * String formatted like "%\08x-%\04x-%\04x-%\04x-%\012x"
+ */
char uuid[36];
+ /**
+ * @n_mux_regs:
+ *
+ * Number of mux regs in &mux_regs_ptr.
+ */
__u32 n_mux_regs;
+
+ /**
+ * @n_boolean_regs:
+ *
+ * Number of boolean regs in &boolean_regs_ptr.
+ */
__u32 n_boolean_regs;
+
+ /**
+ * @n_flex_regs:
+ *
+ * Number of flex regs in &flex_regs_ptr.
+ */
__u32 n_flex_regs;
- /*
- * These fields are pointers to tuples of u32 values (register address,
- * value). For example the expected length of the buffer pointed by
- * mux_regs_ptr is (2 * sizeof(u32) * n_mux_regs).
+ /**
+ * @mux_regs_ptr:
+ *
+ * Pointer to tuples of u32 values (register address, value) for mux
+ * registers. Expected length of buffer is (2 * sizeof(u32) *
+ * &n_mux_regs).
*/
__u64 mux_regs_ptr;
+
+ /**
+ * @boolean_regs_ptr:
+ *
+ * Pointer to tuples of u32 values (register address, value) for mux
+ * registers. Expected length of buffer is (2 * sizeof(u32) *
+ * &n_boolean_regs).
+ */
__u64 boolean_regs_ptr;
+
+ /**
+ * @flex_regs_ptr:
+ *
+ * Pointer to tuples of u32 values (register address, value) for mux
+ * registers. Expected length of buffer is (2 * sizeof(u32) *
+ * &n_flex_regs).
+ */
__u64 flex_regs_ptr;
};
+/**
+ * struct drm_i915_query_item - An individual query for the kernel to process.
+ *
+ * The behaviour is determined by the @query_id. Note that exactly what
+ * @data_ptr is also depends on the specific @query_id.
+ */
struct drm_i915_query_item {
+ /**
+ * @query_id:
+ *
+ * The id for this query. Currently accepted query IDs are:
+ * - %DRM_I915_QUERY_TOPOLOGY_INFO (see struct drm_i915_query_topology_info)
+ * - %DRM_I915_QUERY_ENGINE_INFO (see struct drm_i915_engine_info)
+ * - %DRM_I915_QUERY_PERF_CONFIG (see struct drm_i915_query_perf_config)
+ * - %DRM_I915_QUERY_MEMORY_REGIONS (see struct drm_i915_query_memory_regions)
+ * - %DRM_I915_QUERY_HWCONFIG_BLOB (see `GuC HWCONFIG blob uAPI`)
+ * - %DRM_I915_QUERY_GEOMETRY_SUBSLICES (see struct drm_i915_query_topology_info)
+ */
__u64 query_id;
-#define DRM_I915_QUERY_TOPOLOGY_INFO 1
+#define DRM_I915_QUERY_TOPOLOGY_INFO 1
+#define DRM_I915_QUERY_ENGINE_INFO 2
+#define DRM_I915_QUERY_PERF_CONFIG 3
+#define DRM_I915_QUERY_MEMORY_REGIONS 4
+#define DRM_I915_QUERY_HWCONFIG_BLOB 5
+#define DRM_I915_QUERY_GEOMETRY_SUBSLICES 6
/* Must be kept compact -- no holes and well documented */
- /*
+ /**
+ * @length:
+ *
* When set to zero by userspace, this is filled with the size of the
- * data to be written at the data_ptr pointer. The kernel sets this
+ * data to be written at the @data_ptr pointer. The kernel sets this
* value to a negative value to signal an error on a particular query
* item.
*/
__s32 length;
- /*
- * Unused for now. Must be cleared to zero.
+ /**
+ * @flags:
+ *
+ * When &query_id == %DRM_I915_QUERY_TOPOLOGY_INFO, must be 0.
+ *
+ * When &query_id == %DRM_I915_QUERY_PERF_CONFIG, must be one of the
+ * following:
+ *
+ * - %DRM_I915_QUERY_PERF_CONFIG_LIST
+ * - %DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID
+ * - %DRM_I915_QUERY_PERF_CONFIG_FOR_UUID
+ *
+ * When &query_id == %DRM_I915_QUERY_GEOMETRY_SUBSLICES must contain
+ * a struct i915_engine_class_instance that references a render engine.
*/
__u32 flags;
+#define DRM_I915_QUERY_PERF_CONFIG_LIST 1
+#define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID 2
+#define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID 3
- /*
- * Data will be written at the location pointed by data_ptr when the
- * value of length matches the length of the data to be written by the
+ /**
+ * @data_ptr:
+ *
+ * Data will be written at the location pointed by @data_ptr when the
+ * value of @length matches the length of the data to be written by the
* kernel.
*/
__u64 data_ptr;
};
+/**
+ * struct drm_i915_query - Supply an array of struct drm_i915_query_item for the
+ * kernel to fill out.
+ *
+ * Note that this is generally a two step process for each struct
+ * drm_i915_query_item in the array:
+ *
+ * 1. Call the DRM_IOCTL_I915_QUERY, giving it our array of struct
+ * drm_i915_query_item, with &drm_i915_query_item.length set to zero. The
+ * kernel will then fill in the size, in bytes, which tells userspace how
+ * memory it needs to allocate for the blob(say for an array of properties).
+ *
+ * 2. Next we call DRM_IOCTL_I915_QUERY again, this time with the
+ * &drm_i915_query_item.data_ptr equal to our newly allocated blob. Note that
+ * the &drm_i915_query_item.length should still be the same as what the
+ * kernel previously set. At this point the kernel can fill in the blob.
+ *
+ * Note that for some query items it can make sense for userspace to just pass
+ * in a buffer/blob equal to or larger than the required size. In this case only
+ * a single ioctl call is needed. For some smaller query items this can work
+ * quite well.
+ *
+ */
struct drm_i915_query {
+ /** @num_items: The number of elements in the @items_ptr array */
__u32 num_items;
- /*
- * Unused for now. Must be cleared to zero.
+ /**
+ * @flags: Unused for now. Must be cleared to zero.
*/
__u32 flags;
- /*
- * This points to an array of num_items drm_i915_query_item structures.
+ /**
+ * @items_ptr:
+ *
+ * Pointer to an array of struct drm_i915_query_item. The number of
+ * array elements is @num_items.
*/
__u64 items_ptr;
};
-/*
- * Data written by the kernel with query DRM_I915_QUERY_TOPOLOGY_INFO :
- *
- * data: contains the 3 pieces of information :
- *
- * - the slice mask with one bit per slice telling whether a slice is
- * available. The availability of slice X can be queried with the following
- * formula :
- *
- * (data[X / 8] >> (X % 8)) & 1
- *
- * - the subslice mask for each slice with one bit per subslice telling
- * whether a subslice is available. The availability of subslice Y in slice
- * X can be queried with the following formula :
- *
- * (data[subslice_offset +
- * X * subslice_stride +
- * Y / 8] >> (Y % 8)) & 1
- *
- * - the EU mask for each subslice in each slice with one bit per EU telling
- * whether an EU is available. The availability of EU Z in subslice Y in
- * slice X can be queried with the following formula :
+/**
+ * struct drm_i915_query_topology_info
*
- * (data[eu_offset +
- * (X * max_subslices + Y) * eu_stride +
- * Z / 8] >> (Z % 8)) & 1
+ * Describes slice/subslice/EU information queried by
+ * %DRM_I915_QUERY_TOPOLOGY_INFO
*/
struct drm_i915_query_topology_info {
- /*
+ /**
+ * @flags:
+ *
* Unused for now. Must be cleared to zero.
*/
__u16 flags;
+ /**
+ * @max_slices:
+ *
+ * The number of bits used to express the slice mask.
+ */
__u16 max_slices;
+
+ /**
+ * @max_subslices:
+ *
+ * The number of bits used to express the subslice mask.
+ */
__u16 max_subslices;
+
+ /**
+ * @max_eus_per_subslice:
+ *
+ * The number of bits in the EU mask that correspond to a single
+ * subslice's EUs.
+ */
__u16 max_eus_per_subslice;
- /*
+ /**
+ * @subslice_offset:
+ *
* Offset in data[] at which the subslice masks are stored.
*/
__u16 subslice_offset;
- /*
+ /**
+ * @subslice_stride:
+ *
* Stride at which each of the subslice masks for each slice are
* stored.
*/
__u16 subslice_stride;
- /*
+ /**
+ * @eu_offset:
+ *
* Offset in data[] at which the EU masks are stored.
*/
__u16 eu_offset;
- /*
+ /**
+ * @eu_stride:
+ *
* Stride at which each of the EU masks for each subslice are stored.
*/
__u16 eu_stride;
+ /**
+ * @data:
+ *
+ * Contains 3 pieces of information :
+ *
+ * - The slice mask with one bit per slice telling whether a slice is
+ * available. The availability of slice X can be queried with the
+ * following formula :
+ *
+ * .. code:: c
+ *
+ * (data[X / 8] >> (X % 8)) & 1
+ *
+ * Starting with Xe_HP platforms, Intel hardware no longer has
+ * traditional slices so i915 will always report a single slice
+ * (hardcoded slicemask = 0x1) which contains all of the platform's
+ * subslices. I.e., the mask here does not reflect any of the newer
+ * hardware concepts such as "gslices" or "cslices" since userspace
+ * is capable of inferring those from the subslice mask.
+ *
+ * - The subslice mask for each slice with one bit per subslice telling
+ * whether a subslice is available. Starting with Gen12 we use the
+ * term "subslice" to refer to what the hardware documentation
+ * describes as a "dual-subslices." The availability of subslice Y
+ * in slice X can be queried with the following formula :
+ *
+ * .. code:: c
+ *
+ * (data[subslice_offset + X * subslice_stride + Y / 8] >> (Y % 8)) & 1
+ *
+ * - The EU mask for each subslice in each slice, with one bit per EU
+ * telling whether an EU is available. The availability of EU Z in
+ * subslice Y in slice X can be queried with the following formula :
+ *
+ * .. code:: c
+ *
+ * (data[eu_offset +
+ * (X * max_subslices + Y) * eu_stride +
+ * Z / 8
+ * ] >> (Z % 8)) & 1
+ */
+ __u8 data[];
+};
+
+/**
+ * DOC: Engine Discovery uAPI
+ *
+ * Engine discovery uAPI is a way of enumerating physical engines present in a
+ * GPU associated with an open i915 DRM file descriptor. This supersedes the old
+ * way of using `DRM_IOCTL_I915_GETPARAM` and engine identifiers like
+ * `I915_PARAM_HAS_BLT`.
+ *
+ * The need for this interface came starting with Icelake and newer GPUs, which
+ * started to establish a pattern of having multiple engines of a same class,
+ * where not all instances were always completely functionally equivalent.
+ *
+ * Entry point for this uapi is `DRM_IOCTL_I915_QUERY` with the
+ * `DRM_I915_QUERY_ENGINE_INFO` as the queried item id.
+ *
+ * Example for getting the list of engines:
+ *
+ * .. code-block:: C
+ *
+ * struct drm_i915_query_engine_info *info;
+ * struct drm_i915_query_item item = {
+ * .query_id = DRM_I915_QUERY_ENGINE_INFO;
+ * };
+ * struct drm_i915_query query = {
+ * .num_items = 1,
+ * .items_ptr = (uintptr_t)&item,
+ * };
+ * int err, i;
+ *
+ * // First query the size of the blob we need, this needs to be large
+ * // enough to hold our array of engines. The kernel will fill out the
+ * // item.length for us, which is the number of bytes we need.
+ * //
+ * // Alternatively a large buffer can be allocated straight away enabling
+ * // querying in one pass, in which case item.length should contain the
+ * // length of the provided buffer.
+ * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query);
+ * if (err) ...
+ *
+ * info = calloc(1, item.length);
+ * // Now that we allocated the required number of bytes, we call the ioctl
+ * // again, this time with the data_ptr pointing to our newly allocated
+ * // blob, which the kernel can then populate with info on all engines.
+ * item.data_ptr = (uintptr_t)&info,
+ *
+ * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query);
+ * if (err) ...
+ *
+ * // We can now access each engine in the array
+ * for (i = 0; i < info->num_engines; i++) {
+ * struct drm_i915_engine_info einfo = info->engines[i];
+ * u16 class = einfo.engine.class;
+ * u16 instance = einfo.engine.instance;
+ * ....
+ * }
+ *
+ * free(info);
+ *
+ * Each of the enumerated engines, apart from being defined by its class and
+ * instance (see `struct i915_engine_class_instance`), also can have flags and
+ * capabilities defined as documented in i915_drm.h.
+ *
+ * For instance video engines which support HEVC encoding will have the
+ * `I915_VIDEO_CLASS_CAPABILITY_HEVC` capability bit set.
+ *
+ * Engine discovery only fully comes to its own when combined with the new way
+ * of addressing engines when submitting batch buffers using contexts with
+ * engine maps configured.
+ */
+
+/**
+ * struct drm_i915_engine_info
+ *
+ * Describes one engine and it's capabilities as known to the driver.
+ */
+struct drm_i915_engine_info {
+ /** @engine: Engine class and instance. */
+ struct i915_engine_class_instance engine;
+
+ /** @rsvd0: Reserved field. */
+ __u32 rsvd0;
+
+ /** @flags: Engine flags. */
+ __u64 flags;
+#define I915_ENGINE_INFO_HAS_LOGICAL_INSTANCE (1 << 0)
+
+ /** @capabilities: Capabilities of this engine. */
+ __u64 capabilities;
+#define I915_VIDEO_CLASS_CAPABILITY_HEVC (1 << 0)
+#define I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC (1 << 1)
+
+ /** @logical_instance: Logical instance of engine */
+ __u16 logical_instance;
+
+ /** @rsvd1: Reserved fields. */
+ __u16 rsvd1[3];
+ /** @rsvd2: Reserved fields. */
+ __u64 rsvd2[3];
+};
+
+/**
+ * struct drm_i915_query_engine_info
+ *
+ * Engine info query enumerates all engines known to the driver by filling in
+ * an array of struct drm_i915_engine_info structures.
+ */
+struct drm_i915_query_engine_info {
+ /** @num_engines: Number of struct drm_i915_engine_info structs following. */
+ __u32 num_engines;
+
+ /** @rsvd: MBZ */
+ __u32 rsvd[3];
+
+ /** @engines: Marker for drm_i915_engine_info structures. */
+ struct drm_i915_engine_info engines[];
+};
+
+/**
+ * struct drm_i915_query_perf_config
+ *
+ * Data written by the kernel with query %DRM_I915_QUERY_PERF_CONFIG and
+ * %DRM_I915_QUERY_GEOMETRY_SUBSLICES.
+ */
+struct drm_i915_query_perf_config {
+ union {
+ /**
+ * @n_configs:
+ *
+ * When &drm_i915_query_item.flags ==
+ * %DRM_I915_QUERY_PERF_CONFIG_LIST, i915 sets this fields to
+ * the number of configurations available.
+ */
+ __u64 n_configs;
+
+ /**
+ * @config:
+ *
+ * When &drm_i915_query_item.flags ==
+ * %DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID, i915 will use the
+ * value in this field as configuration identifier to decide
+ * what data to write into config_ptr.
+ */
+ __u64 config;
+
+ /**
+ * @uuid:
+ *
+ * When &drm_i915_query_item.flags ==
+ * %DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID, i915 will use the
+ * value in this field as configuration identifier to decide
+ * what data to write into config_ptr.
+ *
+ * String formatted like "%08x-%04x-%04x-%04x-%012x"
+ */
+ char uuid[36];
+ };
+
+ /**
+ * @flags:
+ *
+ * Unused for now. Must be cleared to zero.
+ */
+ __u32 flags;
+
+ /**
+ * @data:
+ *
+ * When &drm_i915_query_item.flags == %DRM_I915_QUERY_PERF_CONFIG_LIST,
+ * i915 will write an array of __u64 of configuration identifiers.
+ *
+ * When &drm_i915_query_item.flags == %DRM_I915_QUERY_PERF_CONFIG_DATA,
+ * i915 will write a struct drm_i915_perf_oa_config. If the following
+ * fields of struct drm_i915_perf_oa_config are not set to 0, i915 will
+ * write into the associated pointers the values of submitted when the
+ * configuration was created :
+ *
+ * - &drm_i915_perf_oa_config.n_mux_regs
+ * - &drm_i915_perf_oa_config.n_boolean_regs
+ * - &drm_i915_perf_oa_config.n_flex_regs
+ */
__u8 data[];
};
+/**
+ * enum drm_i915_gem_memory_class - Supported memory classes
+ */
+enum drm_i915_gem_memory_class {
+ /** @I915_MEMORY_CLASS_SYSTEM: System memory */
+ I915_MEMORY_CLASS_SYSTEM = 0,
+ /** @I915_MEMORY_CLASS_DEVICE: Device local-memory */
+ I915_MEMORY_CLASS_DEVICE,
+};
+
+/**
+ * struct drm_i915_gem_memory_class_instance - Identify particular memory region
+ */
+struct drm_i915_gem_memory_class_instance {
+ /** @memory_class: See enum drm_i915_gem_memory_class */
+ __u16 memory_class;
+
+ /** @memory_instance: Which instance */
+ __u16 memory_instance;
+};
+
+/**
+ * struct drm_i915_memory_region_info - Describes one region as known to the
+ * driver.
+ *
+ * Note this is using both struct drm_i915_query_item and struct drm_i915_query.
+ * For this new query we are adding the new query id DRM_I915_QUERY_MEMORY_REGIONS
+ * at &drm_i915_query_item.query_id.
+ */
+struct drm_i915_memory_region_info {
+ /** @region: The class:instance pair encoding */
+ struct drm_i915_gem_memory_class_instance region;
+
+ /** @rsvd0: MBZ */
+ __u32 rsvd0;
+
+ /**
+ * @probed_size: Memory probed by the driver
+ *
+ * Note that it should not be possible to ever encounter a zero value
+ * here, also note that no current region type will ever return -1 here.
+ * Although for future region types, this might be a possibility. The
+ * same applies to the other size fields.
+ */
+ __u64 probed_size;
+
+ /**
+ * @unallocated_size: Estimate of memory remaining
+ *
+ * Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable accounting.
+ * Without this (or if this is an older kernel) the value here will
+ * always equal the @probed_size. Note this is only currently tracked
+ * for I915_MEMORY_CLASS_DEVICE regions (for other types the value here
+ * will always equal the @probed_size).
+ */
+ __u64 unallocated_size;
+
+ union {
+ /** @rsvd1: MBZ */
+ __u64 rsvd1[8];
+ struct {
+ /**
+ * @probed_cpu_visible_size: Memory probed by the driver
+ * that is CPU accessible.
+ *
+ * This will be always be <= @probed_size, and the
+ * remainder (if there is any) will not be CPU
+ * accessible.
+ *
+ * On systems without small BAR, the @probed_size will
+ * always equal the @probed_cpu_visible_size, since all
+ * of it will be CPU accessible.
+ *
+ * Note this is only tracked for
+ * I915_MEMORY_CLASS_DEVICE regions (for other types the
+ * value here will always equal the @probed_size).
+ *
+ * Note that if the value returned here is zero, then
+ * this must be an old kernel which lacks the relevant
+ * small-bar uAPI support (including
+ * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS), but on
+ * such systems we should never actually end up with a
+ * small BAR configuration, assuming we are able to load
+ * the kernel module. Hence it should be safe to treat
+ * this the same as when @probed_cpu_visible_size ==
+ * @probed_size.
+ */
+ __u64 probed_cpu_visible_size;
+
+ /**
+ * @unallocated_cpu_visible_size: Estimate of CPU
+ * visible memory remaining.
+ *
+ * Note this is only tracked for
+ * I915_MEMORY_CLASS_DEVICE regions (for other types the
+ * value here will always equal the
+ * @probed_cpu_visible_size).
+ *
+ * Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable
+ * accounting. Without this the value here will always
+ * equal the @probed_cpu_visible_size. Note this is only
+ * currently tracked for I915_MEMORY_CLASS_DEVICE
+ * regions (for other types the value here will also
+ * always equal the @probed_cpu_visible_size).
+ *
+ * If this is an older kernel the value here will be
+ * zero, see also @probed_cpu_visible_size.
+ */
+ __u64 unallocated_cpu_visible_size;
+ };
+ };
+};
+
+/**
+ * struct drm_i915_query_memory_regions
+ *
+ * The region info query enumerates all regions known to the driver by filling
+ * in an array of struct drm_i915_memory_region_info structures.
+ *
+ * Example for getting the list of supported regions:
+ *
+ * .. code-block:: C
+ *
+ * struct drm_i915_query_memory_regions *info;
+ * struct drm_i915_query_item item = {
+ * .query_id = DRM_I915_QUERY_MEMORY_REGIONS;
+ * };
+ * struct drm_i915_query query = {
+ * .num_items = 1,
+ * .items_ptr = (uintptr_t)&item,
+ * };
+ * int err, i;
+ *
+ * // First query the size of the blob we need, this needs to be large
+ * // enough to hold our array of regions. The kernel will fill out the
+ * // item.length for us, which is the number of bytes we need.
+ * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query);
+ * if (err) ...
+ *
+ * info = calloc(1, item.length);
+ * // Now that we allocated the required number of bytes, we call the ioctl
+ * // again, this time with the data_ptr pointing to our newly allocated
+ * // blob, which the kernel can then populate with the all the region info.
+ * item.data_ptr = (uintptr_t)&info,
+ *
+ * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query);
+ * if (err) ...
+ *
+ * // We can now access each region in the array
+ * for (i = 0; i < info->num_regions; i++) {
+ * struct drm_i915_memory_region_info mr = info->regions[i];
+ * u16 class = mr.region.class;
+ * u16 instance = mr.region.instance;
+ *
+ * ....
+ * }
+ *
+ * free(info);
+ */
+struct drm_i915_query_memory_regions {
+ /** @num_regions: Number of supported regions */
+ __u32 num_regions;
+
+ /** @rsvd: MBZ */
+ __u32 rsvd[3];
+
+ /** @regions: Info about each supported region */
+ struct drm_i915_memory_region_info regions[];
+};
+
+/**
+ * DOC: GuC HWCONFIG blob uAPI
+ *
+ * The GuC produces a blob with information about the current device.
+ * i915 reads this blob from GuC and makes it available via this uAPI.
+ *
+ * The format and meaning of the blob content are documented in the
+ * Programmer's Reference Manual.
+ */
+
+/**
+ * struct drm_i915_gem_create_ext - Existing gem_create behaviour, with added
+ * extension support using struct i915_user_extension.
+ *
+ * Note that new buffer flags should be added here, at least for the stuff that
+ * is immutable. Previously we would have two ioctls, one to create the object
+ * with gem_create, and another to apply various parameters, however this
+ * creates some ambiguity for the params which are considered immutable. Also in
+ * general we're phasing out the various SET/GET ioctls.
+ */
+struct drm_i915_gem_create_ext {
+ /**
+ * @size: Requested size for the object.
+ *
+ * The (page-aligned) allocated size for the object will be returned.
+ *
+ * DG2 64K min page size implications:
+ *
+ * On discrete platforms, starting from DG2, we have to contend with GTT
+ * page size restrictions when dealing with I915_MEMORY_CLASS_DEVICE
+ * objects. Specifically the hardware only supports 64K or larger GTT
+ * page sizes for such memory. The kernel will already ensure that all
+ * I915_MEMORY_CLASS_DEVICE memory is allocated using 64K or larger page
+ * sizes underneath.
+ *
+ * Note that the returned size here will always reflect any required
+ * rounding up done by the kernel, i.e 4K will now become 64K on devices
+ * such as DG2. The kernel will always select the largest minimum
+ * page-size for the set of possible placements as the value to use when
+ * rounding up the @size.
+ *
+ * Special DG2 GTT address alignment requirement:
+ *
+ * The GTT alignment will also need to be at least 2M for such objects.
+ *
+ * Note that due to how the hardware implements 64K GTT page support, we
+ * have some further complications:
+ *
+ * 1) The entire PDE (which covers a 2MB virtual address range), must
+ * contain only 64K PTEs, i.e mixing 4K and 64K PTEs in the same
+ * PDE is forbidden by the hardware.
+ *
+ * 2) We still need to support 4K PTEs for I915_MEMORY_CLASS_SYSTEM
+ * objects.
+ *
+ * To keep things simple for userland, we mandate that any GTT mappings
+ * must be aligned to and rounded up to 2MB. The kernel will internally
+ * pad them out to the next 2MB boundary. As this only wastes virtual
+ * address space and avoids userland having to copy any needlessly
+ * complicated PDE sharing scheme (coloring) and only affects DG2, this
+ * is deemed to be a good compromise.
+ */
+ __u64 size;
+
+ /**
+ * @handle: Returned handle for the object.
+ *
+ * Object handles are nonzero.
+ */
+ __u32 handle;
+
+ /**
+ * @flags: Optional flags.
+ *
+ * Supported values:
+ *
+ * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS - Signal to the kernel that
+ * the object will need to be accessed via the CPU.
+ *
+ * Only valid when placing objects in I915_MEMORY_CLASS_DEVICE, and only
+ * strictly required on configurations where some subset of the device
+ * memory is directly visible/mappable through the CPU (which we also
+ * call small BAR), like on some DG2+ systems. Note that this is quite
+ * undesirable, but due to various factors like the client CPU, BIOS etc
+ * it's something we can expect to see in the wild. See
+ * &drm_i915_memory_region_info.probed_cpu_visible_size for how to
+ * determine if this system applies.
+ *
+ * Note that one of the placements MUST be I915_MEMORY_CLASS_SYSTEM, to
+ * ensure the kernel can always spill the allocation to system memory,
+ * if the object can't be allocated in the mappable part of
+ * I915_MEMORY_CLASS_DEVICE.
+ *
+ * Also note that since the kernel only supports flat-CCS on objects
+ * that can *only* be placed in I915_MEMORY_CLASS_DEVICE, we therefore
+ * don't support I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS together with
+ * flat-CCS.
+ *
+ * Without this hint, the kernel will assume that non-mappable
+ * I915_MEMORY_CLASS_DEVICE is preferred for this object. Note that the
+ * kernel can still migrate the object to the mappable part, as a last
+ * resort, if userspace ever CPU faults this object, but this might be
+ * expensive, and so ideally should be avoided.
+ *
+ * On older kernels which lack the relevant small-bar uAPI support (see
+ * also &drm_i915_memory_region_info.probed_cpu_visible_size),
+ * usage of the flag will result in an error, but it should NEVER be
+ * possible to end up with a small BAR configuration, assuming we can
+ * also successfully load the i915 kernel module. In such cases the
+ * entire I915_MEMORY_CLASS_DEVICE region will be CPU accessible, and as
+ * such there are zero restrictions on where the object can be placed.
+ */
+#define I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS (1 << 0)
+ __u32 flags;
+
+ /**
+ * @extensions: The chain of extensions to apply to this object.
+ *
+ * This will be useful in the future when we need to support several
+ * different extensions, and we need to apply more than one when
+ * creating the object. See struct i915_user_extension.
+ *
+ * If we don't supply any extensions then we get the same old gem_create
+ * behaviour.
+ *
+ * For I915_GEM_CREATE_EXT_MEMORY_REGIONS usage see
+ * struct drm_i915_gem_create_ext_memory_regions.
+ *
+ * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
+ * struct drm_i915_gem_create_ext_protected_content.
+ */
+#define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0
+#define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1
+ __u64 extensions;
+};
+
+/**
+ * struct drm_i915_gem_create_ext_memory_regions - The
+ * I915_GEM_CREATE_EXT_MEMORY_REGIONS extension.
+ *
+ * Set the object with the desired set of placements/regions in priority
+ * order. Each entry must be unique and supported by the device.
+ *
+ * This is provided as an array of struct drm_i915_gem_memory_class_instance, or
+ * an equivalent layout of class:instance pair encodings. See struct
+ * drm_i915_query_memory_regions and DRM_I915_QUERY_MEMORY_REGIONS for how to
+ * query the supported regions for a device.
+ *
+ * As an example, on discrete devices, if we wish to set the placement as
+ * device local-memory we can do something like:
+ *
+ * .. code-block:: C
+ *
+ * struct drm_i915_gem_memory_class_instance region_lmem = {
+ * .memory_class = I915_MEMORY_CLASS_DEVICE,
+ * .memory_instance = 0,
+ * };
+ * struct drm_i915_gem_create_ext_memory_regions regions = {
+ * .base = { .name = I915_GEM_CREATE_EXT_MEMORY_REGIONS },
+ * .regions = (uintptr_t)&region_lmem,
+ * .num_regions = 1,
+ * };
+ * struct drm_i915_gem_create_ext create_ext = {
+ * .size = 16 * PAGE_SIZE,
+ * .extensions = (uintptr_t)&regions,
+ * };
+ *
+ * int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
+ * if (err) ...
+ *
+ * At which point we get the object handle in &drm_i915_gem_create_ext.handle,
+ * along with the final object size in &drm_i915_gem_create_ext.size, which
+ * should account for any rounding up, if required.
+ *
+ * Note that userspace has no means of knowing the current backing region
+ * for objects where @num_regions is larger than one. The kernel will only
+ * ensure that the priority order of the @regions array is honoured, either
+ * when initially placing the object, or when moving memory around due to
+ * memory pressure
+ *
+ * On Flat-CCS capable HW, compression is supported for the objects residing
+ * in I915_MEMORY_CLASS_DEVICE. When such objects (compressed) have other
+ * memory class in @regions and migrated (by i915, due to memory
+ * constraints) to the non I915_MEMORY_CLASS_DEVICE region, then i915 needs to
+ * decompress the content. But i915 doesn't have the required information to
+ * decompress the userspace compressed objects.
+ *
+ * So i915 supports Flat-CCS, on the objects which can reside only on
+ * I915_MEMORY_CLASS_DEVICE regions.
+ */
+struct drm_i915_gem_create_ext_memory_regions {
+ /** @base: Extension link. See struct i915_user_extension. */
+ struct i915_user_extension base;
+
+ /** @pad: MBZ */
+ __u32 pad;
+ /** @num_regions: Number of elements in the @regions array. */
+ __u32 num_regions;
+ /**
+ * @regions: The regions/placements array.
+ *
+ * An array of struct drm_i915_gem_memory_class_instance.
+ */
+ __u64 regions;
+};
+
+/**
+ * struct drm_i915_gem_create_ext_protected_content - The
+ * I915_OBJECT_PARAM_PROTECTED_CONTENT extension.
+ *
+ * If this extension is provided, buffer contents are expected to be protected
+ * by PXP encryption and require decryption for scan out and processing. This
+ * is only possible on platforms that have PXP enabled, on all other scenarios
+ * using this extension will cause the ioctl to fail and return -ENODEV. The
+ * flags parameter is reserved for future expansion and must currently be set
+ * to zero.
+ *
+ * The buffer contents are considered invalid after a PXP session teardown.
+ *
+ * The encryption is guaranteed to be processed correctly only if the object
+ * is submitted with a context created using the
+ * I915_CONTEXT_PARAM_PROTECTED_CONTENT flag. This will also enable extra checks
+ * at submission time on the validity of the objects involved.
+ *
+ * Below is an example on how to create a protected object:
+ *
+ * .. code-block:: C
+ *
+ * struct drm_i915_gem_create_ext_protected_content protected_ext = {
+ * .base = { .name = I915_GEM_CREATE_EXT_PROTECTED_CONTENT },
+ * .flags = 0,
+ * };
+ * struct drm_i915_gem_create_ext create_ext = {
+ * .size = PAGE_SIZE,
+ * .extensions = (uintptr_t)&protected_ext,
+ * };
+ *
+ * int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
+ * if (err) ...
+ */
+struct drm_i915_gem_create_ext_protected_content {
+ /** @base: Extension link. See struct i915_user_extension. */
+ struct i915_user_extension base;
+ /** @flags: reserved for future usage, currently MBZ */
+ __u32 flags;
+};
+
+/* ID of the protected content session managed by i915 when PXP is active */
+#define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf
+
#if defined(__cplusplus)
}
#endif