diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/hisi_acc_qm.h | 490 | ||||
-rw-r--r-- | include/linux/mlx5/driver.h | 3 | ||||
-rw-r--r-- | include/linux/mlx5/mlx5_ifc.h | 147 | ||||
-rw-r--r-- | include/linux/pci.h | 15 | ||||
-rw-r--r-- | include/linux/pci_ids.h | 3 | ||||
-rw-r--r-- | include/linux/vfio.h | 53 | ||||
-rw-r--r-- | include/linux/vfio_pci_core.h | 13 | ||||
-rw-r--r-- | include/uapi/linux/vfio.h | 406 |
8 files changed, 917 insertions, 213 deletions
diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h new file mode 100644 index 000000000000..177f7b7cd414 --- /dev/null +++ b/include/linux/hisi_acc_qm.h @@ -0,0 +1,490 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019 HiSilicon Limited. */ +#ifndef HISI_ACC_QM_H +#define HISI_ACC_QM_H + +#include <linux/bitfield.h> +#include <linux/debugfs.h> +#include <linux/iopoll.h> +#include <linux/module.h> +#include <linux/pci.h> + +#define QM_QNUM_V1 4096 +#define QM_QNUM_V2 1024 +#define QM_MAX_VFS_NUM_V2 63 + +/* qm user domain */ +#define QM_ARUSER_M_CFG_1 0x100088 +#define AXUSER_SNOOP_ENABLE BIT(30) +#define AXUSER_CMD_TYPE GENMASK(14, 12) +#define AXUSER_CMD_SMMU_NORMAL 1 +#define AXUSER_NS BIT(6) +#define AXUSER_NO BIT(5) +#define AXUSER_FP BIT(4) +#define AXUSER_SSV BIT(0) +#define AXUSER_BASE (AXUSER_SNOOP_ENABLE | \ + FIELD_PREP(AXUSER_CMD_TYPE, \ + AXUSER_CMD_SMMU_NORMAL) | \ + AXUSER_NS | AXUSER_NO | AXUSER_FP) +#define QM_ARUSER_M_CFG_ENABLE 0x100090 +#define ARUSER_M_CFG_ENABLE 0xfffffffe +#define QM_AWUSER_M_CFG_1 0x100098 +#define QM_AWUSER_M_CFG_ENABLE 0x1000a0 +#define AWUSER_M_CFG_ENABLE 0xfffffffe +#define QM_WUSER_M_CFG_ENABLE 0x1000a8 +#define WUSER_M_CFG_ENABLE 0xffffffff + +/* mailbox */ +#define QM_MB_CMD_SQC 0x0 +#define QM_MB_CMD_CQC 0x1 +#define QM_MB_CMD_EQC 0x2 +#define QM_MB_CMD_AEQC 0x3 +#define QM_MB_CMD_SQC_BT 0x4 +#define QM_MB_CMD_CQC_BT 0x5 +#define QM_MB_CMD_SQC_VFT_V2 0x6 +#define QM_MB_CMD_STOP_QP 0x8 +#define QM_MB_CMD_SRC 0xc +#define QM_MB_CMD_DST 0xd + +#define QM_MB_CMD_SEND_BASE 0x300 +#define QM_MB_EVENT_SHIFT 8 +#define QM_MB_BUSY_SHIFT 13 +#define QM_MB_OP_SHIFT 14 +#define QM_MB_CMD_DATA_ADDR_L 0x304 +#define QM_MB_CMD_DATA_ADDR_H 0x308 +#define QM_MB_MAX_WAIT_CNT 6000 + +/* doorbell */ +#define QM_DOORBELL_CMD_SQ 0 +#define QM_DOORBELL_CMD_CQ 1 +#define QM_DOORBELL_CMD_EQ 2 +#define QM_DOORBELL_CMD_AEQ 3 + +#define QM_DOORBELL_SQ_CQ_BASE_V2 0x1000 +#define QM_DOORBELL_EQ_AEQ_BASE_V2 0x2000 +#define QM_QP_MAX_NUM_SHIFT 11 +#define QM_DB_CMD_SHIFT_V2 12 +#define QM_DB_RAND_SHIFT_V2 16 +#define QM_DB_INDEX_SHIFT_V2 32 +#define QM_DB_PRIORITY_SHIFT_V2 48 +#define QM_VF_STATE 0x60 + +/* qm cache */ +#define QM_CACHE_CTL 0x100050 +#define SQC_CACHE_ENABLE BIT(0) +#define CQC_CACHE_ENABLE BIT(1) +#define SQC_CACHE_WB_ENABLE BIT(4) +#define SQC_CACHE_WB_THRD GENMASK(10, 5) +#define CQC_CACHE_WB_ENABLE BIT(11) +#define CQC_CACHE_WB_THRD GENMASK(17, 12) +#define QM_AXI_M_CFG 0x1000ac +#define AXI_M_CFG 0xffff +#define QM_AXI_M_CFG_ENABLE 0x1000b0 +#define AM_CFG_SINGLE_PORT_MAX_TRANS 0x300014 +#define AXI_M_CFG_ENABLE 0xffffffff +#define QM_PEH_AXUSER_CFG 0x1000cc +#define QM_PEH_AXUSER_CFG_ENABLE 0x1000d0 +#define PEH_AXUSER_CFG 0x401001 +#define PEH_AXUSER_CFG_ENABLE 0xffffffff + +#define QM_AXI_RRESP BIT(0) +#define QM_AXI_BRESP BIT(1) +#define QM_ECC_MBIT BIT(2) +#define QM_ECC_1BIT BIT(3) +#define QM_ACC_GET_TASK_TIMEOUT BIT(4) +#define QM_ACC_DO_TASK_TIMEOUT BIT(5) +#define QM_ACC_WB_NOT_READY_TIMEOUT BIT(6) +#define QM_SQ_CQ_VF_INVALID BIT(7) +#define QM_CQ_VF_INVALID BIT(8) +#define QM_SQ_VF_INVALID BIT(9) +#define QM_DB_TIMEOUT BIT(10) +#define QM_OF_FIFO_OF BIT(11) +#define QM_DB_RANDOM_INVALID BIT(12) +#define QM_MAILBOX_TIMEOUT BIT(13) +#define QM_FLR_TIMEOUT BIT(14) + +#define QM_BASE_NFE (QM_AXI_RRESP | QM_AXI_BRESP | QM_ECC_MBIT | \ + QM_ACC_GET_TASK_TIMEOUT | QM_DB_TIMEOUT | \ + QM_OF_FIFO_OF | QM_DB_RANDOM_INVALID | \ + QM_MAILBOX_TIMEOUT | QM_FLR_TIMEOUT) +#define QM_BASE_CE QM_ECC_1BIT + +#define QM_Q_DEPTH 1024 +#define QM_MIN_QNUM 2 +#define HISI_ACC_SGL_SGE_NR_MAX 255 +#define QM_SHAPER_CFG 0x100164 +#define QM_SHAPER_ENABLE BIT(30) +#define QM_SHAPER_TYPE1_OFFSET 10 + +/* page number for queue file region */ +#define QM_DOORBELL_PAGE_NR 1 + +/* uacce mode of the driver */ +#define UACCE_MODE_NOUACCE 0 /* don't use uacce */ +#define UACCE_MODE_SVA 1 /* use uacce sva mode */ +#define UACCE_MODE_DESC "0(default) means only register to crypto, 1 means both register to crypto and uacce" + +enum qm_stop_reason { + QM_NORMAL, + QM_SOFT_RESET, + QM_FLR, +}; + +enum qm_state { + QM_INIT = 0, + QM_START, + QM_CLOSE, + QM_STOP, +}; + +enum qp_state { + QP_INIT = 1, + QP_START, + QP_STOP, + QP_CLOSE, +}; + +enum qm_hw_ver { + QM_HW_UNKNOWN = -1, + QM_HW_V1 = 0x20, + QM_HW_V2 = 0x21, + QM_HW_V3 = 0x30, +}; + +enum qm_fun_type { + QM_HW_PF, + QM_HW_VF, +}; + +enum qm_debug_file { + CURRENT_QM, + CURRENT_Q, + CLEAR_ENABLE, + DEBUG_FILE_NUM, +}; + +enum qm_vf_state { + QM_READY = 0, + QM_NOT_READY, +}; + +struct qm_dfx { + atomic64_t err_irq_cnt; + atomic64_t aeq_irq_cnt; + atomic64_t abnormal_irq_cnt; + atomic64_t create_qp_err_cnt; + atomic64_t mb_err_cnt; +}; + +struct debugfs_file { + enum qm_debug_file index; + struct mutex lock; + struct qm_debug *debug; +}; + +struct qm_debug { + u32 curr_qm_qp_num; + u32 sqe_mask_offset; + u32 sqe_mask_len; + struct qm_dfx dfx; + struct dentry *debug_root; + struct dentry *qm_d; + struct debugfs_file files[DEBUG_FILE_NUM]; +}; + +struct qm_shaper_factor { + u32 func_qos; + u64 cir_b; + u64 cir_u; + u64 cir_s; + u64 cbs_s; +}; + +struct qm_dma { + void *va; + dma_addr_t dma; + size_t size; +}; + +struct hisi_qm_status { + u32 eq_head; + bool eqc_phase; + u32 aeq_head; + bool aeqc_phase; + atomic_t flags; + int stop_reason; +}; + +struct hisi_qm; + +struct hisi_qm_err_info { + char *acpi_rst; + u32 msi_wr_port; + u32 ecc_2bits_mask; + u32 dev_ce_mask; + u32 ce; + u32 nfe; + u32 fe; +}; + +struct hisi_qm_err_status { + u32 is_qm_ecc_mbit; + u32 is_dev_ecc_mbit; +}; + +struct hisi_qm_err_ini { + int (*hw_init)(struct hisi_qm *qm); + void (*hw_err_enable)(struct hisi_qm *qm); + void (*hw_err_disable)(struct hisi_qm *qm); + u32 (*get_dev_hw_err_status)(struct hisi_qm *qm); + void (*clear_dev_hw_err_status)(struct hisi_qm *qm, u32 err_sts); + void (*open_axi_master_ooo)(struct hisi_qm *qm); + void (*close_axi_master_ooo)(struct hisi_qm *qm); + void (*open_sva_prefetch)(struct hisi_qm *qm); + void (*close_sva_prefetch)(struct hisi_qm *qm); + void (*log_dev_hw_err)(struct hisi_qm *qm, u32 err_sts); + void (*err_info_init)(struct hisi_qm *qm); +}; + +struct hisi_qm_list { + struct mutex lock; + struct list_head list; + int (*register_to_crypto)(struct hisi_qm *qm); + void (*unregister_from_crypto)(struct hisi_qm *qm); +}; + +struct hisi_qm { + enum qm_hw_ver ver; + enum qm_fun_type fun_type; + const char *dev_name; + struct pci_dev *pdev; + void __iomem *io_base; + void __iomem *db_io_base; + u32 sqe_size; + u32 qp_base; + u32 qp_num; + u32 qp_in_used; + u32 ctrl_qp_num; + u32 max_qp_num; + u32 vfs_num; + u32 db_interval; + struct list_head list; + struct hisi_qm_list *qm_list; + + struct qm_dma qdma; + struct qm_sqc *sqc; + struct qm_cqc *cqc; + struct qm_eqe *eqe; + struct qm_aeqe *aeqe; + dma_addr_t sqc_dma; + dma_addr_t cqc_dma; + dma_addr_t eqe_dma; + dma_addr_t aeqe_dma; + + struct hisi_qm_status status; + const struct hisi_qm_err_ini *err_ini; + struct hisi_qm_err_info err_info; + struct hisi_qm_err_status err_status; + unsigned long misc_ctl; /* driver removing and reset sched */ + + struct rw_semaphore qps_lock; + struct idr qp_idr; + struct hisi_qp *qp_array; + + struct mutex mailbox_lock; + + const struct hisi_qm_hw_ops *ops; + + struct qm_debug debug; + + u32 error_mask; + + struct workqueue_struct *wq; + struct work_struct work; + struct work_struct rst_work; + struct work_struct cmd_process; + + const char *algs; + bool use_sva; + bool is_frozen; + + /* doorbell isolation enable */ + bool use_db_isolation; + resource_size_t phys_base; + resource_size_t db_phys_base; + struct uacce_device *uacce; + int mode; + struct qm_shaper_factor *factor; + u32 mb_qos; + u32 type_rate; +}; + +struct hisi_qp_status { + atomic_t used; + u16 sq_tail; + u16 cq_head; + bool cqc_phase; + atomic_t flags; +}; + +struct hisi_qp_ops { + int (*fill_sqe)(void *sqe, void *q_parm, void *d_parm); +}; + +struct hisi_qp { + u32 qp_id; + u8 alg_type; + u8 req_type; + + struct qm_dma qdma; + void *sqe; + struct qm_cqe *cqe; + dma_addr_t sqe_dma; + dma_addr_t cqe_dma; + + struct hisi_qp_status qp_status; + struct hisi_qp_ops *hw_ops; + void *qp_ctx; + void (*req_cb)(struct hisi_qp *qp, void *data); + void (*event_cb)(struct hisi_qp *qp); + + struct hisi_qm *qm; + bool is_resetting; + bool is_in_kernel; + u16 pasid; + struct uacce_queue *uacce_q; +}; + +static inline int q_num_set(const char *val, const struct kernel_param *kp, + unsigned int device) +{ + struct pci_dev *pdev = pci_get_device(PCI_VENDOR_ID_HUAWEI, + device, NULL); + u32 n, q_num; + int ret; + + if (!val) + return -EINVAL; + + if (!pdev) { + q_num = min_t(u32, QM_QNUM_V1, QM_QNUM_V2); + pr_info("No device found currently, suppose queue number is %u\n", + q_num); + } else { + if (pdev->revision == QM_HW_V1) + q_num = QM_QNUM_V1; + else + q_num = QM_QNUM_V2; + } + + ret = kstrtou32(val, 10, &n); + if (ret || n < QM_MIN_QNUM || n > q_num) + return -EINVAL; + + return param_set_int(val, kp); +} + +static inline int vfs_num_set(const char *val, const struct kernel_param *kp) +{ + u32 n; + int ret; + + if (!val) + return -EINVAL; + + ret = kstrtou32(val, 10, &n); + if (ret < 0) + return ret; + + if (n > QM_MAX_VFS_NUM_V2) + return -EINVAL; + + return param_set_int(val, kp); +} + +static inline int mode_set(const char *val, const struct kernel_param *kp) +{ + u32 n; + int ret; + + if (!val) + return -EINVAL; + + ret = kstrtou32(val, 10, &n); + if (ret != 0 || (n != UACCE_MODE_SVA && + n != UACCE_MODE_NOUACCE)) + return -EINVAL; + + return param_set_int(val, kp); +} + +static inline int uacce_mode_set(const char *val, const struct kernel_param *kp) +{ + return mode_set(val, kp); +} + +static inline void hisi_qm_init_list(struct hisi_qm_list *qm_list) +{ + INIT_LIST_HEAD(&qm_list->list); + mutex_init(&qm_list->lock); +} + +int hisi_qm_init(struct hisi_qm *qm); +void hisi_qm_uninit(struct hisi_qm *qm); +int hisi_qm_start(struct hisi_qm *qm); +int hisi_qm_stop(struct hisi_qm *qm, enum qm_stop_reason r); +struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type); +int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg); +int hisi_qm_stop_qp(struct hisi_qp *qp); +void hisi_qm_release_qp(struct hisi_qp *qp); +int hisi_qp_send(struct hisi_qp *qp, const void *msg); +int hisi_qm_get_free_qp_num(struct hisi_qm *qm); +int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number); +void hisi_qm_debug_init(struct hisi_qm *qm); +enum qm_hw_ver hisi_qm_get_hw_version(struct pci_dev *pdev); +void hisi_qm_debug_regs_clear(struct hisi_qm *qm); +int hisi_qm_sriov_enable(struct pci_dev *pdev, int max_vfs); +int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen); +int hisi_qm_sriov_configure(struct pci_dev *pdev, int num_vfs); +void hisi_qm_dev_err_init(struct hisi_qm *qm); +void hisi_qm_dev_err_uninit(struct hisi_qm *qm); +pci_ers_result_t hisi_qm_dev_err_detected(struct pci_dev *pdev, + pci_channel_state_t state); +pci_ers_result_t hisi_qm_dev_slot_reset(struct pci_dev *pdev); +void hisi_qm_reset_prepare(struct pci_dev *pdev); +void hisi_qm_reset_done(struct pci_dev *pdev); + +int hisi_qm_wait_mb_ready(struct hisi_qm *qm); +int hisi_qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue, + bool op); + +struct hisi_acc_sgl_pool; +struct hisi_acc_hw_sgl *hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev, + struct scatterlist *sgl, struct hisi_acc_sgl_pool *pool, + u32 index, dma_addr_t *hw_sgl_dma); +void hisi_acc_sg_buf_unmap(struct device *dev, struct scatterlist *sgl, + struct hisi_acc_hw_sgl *hw_sgl); +struct hisi_acc_sgl_pool *hisi_acc_create_sgl_pool(struct device *dev, + u32 count, u32 sge_nr); +void hisi_acc_free_sgl_pool(struct device *dev, + struct hisi_acc_sgl_pool *pool); +int hisi_qm_alloc_qps_node(struct hisi_qm_list *qm_list, int qp_num, + u8 alg_type, int node, struct hisi_qp **qps); +void hisi_qm_free_qps(struct hisi_qp **qps, int qp_num); +void hisi_qm_dev_shutdown(struct pci_dev *pdev); +void hisi_qm_wait_task_finish(struct hisi_qm *qm, struct hisi_qm_list *qm_list); +int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list); +void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list); +int hisi_qm_resume(struct device *dev); +int hisi_qm_suspend(struct device *dev); +void hisi_qm_pm_uninit(struct hisi_qm *qm); +void hisi_qm_pm_init(struct hisi_qm *qm); +int hisi_qm_get_dfx_access(struct hisi_qm *qm); +void hisi_qm_put_dfx_access(struct hisi_qm *qm); +void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset); + +/* Used by VFIO ACC live migration driver */ +struct pci_driver *hisi_sec_get_pf_driver(void); +struct pci_driver *hisi_hpre_get_pf_driver(void); +struct pci_driver *hisi_zip_get_pf_driver(void); +#endif diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 78655d8d13a7..319322a8ff94 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1143,6 +1143,9 @@ int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type, int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type, u64 length, u16 uid, phys_addr_t addr, u32 obj_id); +struct mlx5_core_dev *mlx5_vf_get_core_dev(struct pci_dev *pdev); +void mlx5_vf_put_core_dev(struct mlx5_core_dev *mdev); + #ifdef CONFIG_MLX5_CORE_IPOIB struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, struct ib_device *ibdev, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 49a48d7709ac..0235054fe55c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -127,6 +127,11 @@ enum { MLX5_CMD_OP_QUERY_SF_PARTITION = 0x111, MLX5_CMD_OP_ALLOC_SF = 0x113, MLX5_CMD_OP_DEALLOC_SF = 0x114, + MLX5_CMD_OP_SUSPEND_VHCA = 0x115, + MLX5_CMD_OP_RESUME_VHCA = 0x116, + MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE = 0x117, + MLX5_CMD_OP_SAVE_VHCA_STATE = 0x118, + MLX5_CMD_OP_LOAD_VHCA_STATE = 0x119, MLX5_CMD_OP_CREATE_MKEY = 0x200, MLX5_CMD_OP_QUERY_MKEY = 0x201, MLX5_CMD_OP_DESTROY_MKEY = 0x202, @@ -1757,7 +1762,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_682[0x1]; u8 log_max_sf[0x5]; u8 apu[0x1]; - u8 reserved_at_689[0x7]; + u8 reserved_at_689[0x4]; + u8 migration[0x1]; + u8 reserved_at_68e[0x2]; u8 log_min_sf_size[0x8]; u8 max_num_sf_partitions[0x8]; @@ -11518,4 +11525,142 @@ enum { MLX5_MTT_PERM_RW = MLX5_MTT_PERM_READ | MLX5_MTT_PERM_WRITE, }; +enum { + MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_INITIATOR = 0x0, + MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_RESPONDER = 0x1, +}; + +struct mlx5_ifc_suspend_vhca_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x10]; + u8 vhca_id[0x10]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_suspend_vhca_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +enum { + MLX5_RESUME_VHCA_IN_OP_MOD_RESUME_RESPONDER = 0x0, + MLX5_RESUME_VHCA_IN_OP_MOD_RESUME_INITIATOR = 0x1, +}; + +struct mlx5_ifc_resume_vhca_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x10]; + u8 vhca_id[0x10]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_resume_vhca_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_query_vhca_migration_state_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x10]; + u8 vhca_id[0x10]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_query_vhca_migration_state_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; + + u8 required_umem_size[0x20]; + + u8 reserved_at_a0[0x160]; +}; + +struct mlx5_ifc_save_vhca_state_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x10]; + u8 vhca_id[0x10]; + + u8 reserved_at_60[0x20]; + + u8 va[0x40]; + + u8 mkey[0x20]; + + u8 size[0x20]; +}; + +struct mlx5_ifc_save_vhca_state_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 actual_image_size[0x20]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_load_vhca_state_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x10]; + u8 vhca_id[0x10]; + + u8 reserved_at_60[0x20]; + + u8 va[0x40]; + + u8 mkey[0x20]; + + u8 size[0x20]; +}; + +struct mlx5_ifc_load_vhca_state_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + #endif /* MLX5_IFC_H */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 8253a5413d7c..60d423d8f0c4 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2166,7 +2166,8 @@ void __iomem *pci_ioremap_wc_bar(struct pci_dev *pdev, int bar); #ifdef CONFIG_PCI_IOV int pci_iov_virtfn_bus(struct pci_dev *dev, int id); int pci_iov_virtfn_devfn(struct pci_dev *dev, int id); - +int pci_iov_vf_id(struct pci_dev *dev); +void *pci_iov_get_pf_drvdata(struct pci_dev *dev, struct pci_driver *pf_driver); int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn); void pci_disable_sriov(struct pci_dev *dev); @@ -2194,6 +2195,18 @@ static inline int pci_iov_virtfn_devfn(struct pci_dev *dev, int id) { return -ENOSYS; } + +static inline int pci_iov_vf_id(struct pci_dev *dev) +{ + return -ENOSYS; +} + +static inline void *pci_iov_get_pf_drvdata(struct pci_dev *dev, + struct pci_driver *pf_driver) +{ + return ERR_PTR(-EINVAL); +} + static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn) { return -ENODEV; } diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index aad54c666407..31dee2b65a62 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2529,6 +2529,9 @@ #define PCI_DEVICE_ID_KORENIX_JETCARDF3 0x17ff #define PCI_VENDOR_ID_HUAWEI 0x19e5 +#define PCI_DEVICE_ID_HUAWEI_ZIP_VF 0xa251 +#define PCI_DEVICE_ID_HUAWEI_SEC_VF 0xa256 +#define PCI_DEVICE_ID_HUAWEI_HPRE_VF 0xa259 #define PCI_VENDOR_ID_NETRONOME 0x19ee #define PCI_DEVICE_ID_NETRONOME_NFP4000 0x4000 diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 76191d7abed1..66dda06ec42d 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -33,6 +33,7 @@ struct vfio_device { struct vfio_group *group; struct vfio_device_set *dev_set; struct list_head dev_set_list; + unsigned int migration_flags; /* Members below here are private, not for driver use */ refcount_t refcount; @@ -55,6 +56,17 @@ struct vfio_device { * @match: Optional device name match callback (return: 0 for no-match, >0 for * match, -errno for abort (ex. match with insufficient or incorrect * additional args) + * @device_feature: Optional, fill in the VFIO_DEVICE_FEATURE ioctl + * @migration_set_state: Optional callback to change the migration state for + * devices that support migration. It's mandatory for + * VFIO_DEVICE_FEATURE_MIGRATION migration support. + * The returned FD is used for data transfer according to the FSM + * definition. The driver is responsible to ensure that FD reaches end + * of stream or error whenever the migration FSM leaves a data transfer + * state or before close_device() returns. + * @migration_get_state: Optional callback to get the migration state for + * devices that support migration. It's mandatory for + * VFIO_DEVICE_FEATURE_MIGRATION migration support. */ struct vfio_device_ops { char *name; @@ -69,8 +81,44 @@ struct vfio_device_ops { int (*mmap)(struct vfio_device *vdev, struct vm_area_struct *vma); void (*request)(struct vfio_device *vdev, unsigned int count); int (*match)(struct vfio_device *vdev, char *buf); + int (*device_feature)(struct vfio_device *device, u32 flags, + void __user *arg, size_t argsz); + struct file *(*migration_set_state)( + struct vfio_device *device, + enum vfio_device_mig_state new_state); + int (*migration_get_state)(struct vfio_device *device, + enum vfio_device_mig_state *curr_state); }; +/** + * vfio_check_feature - Validate user input for the VFIO_DEVICE_FEATURE ioctl + * @flags: Arg from the device_feature op + * @argsz: Arg from the device_feature op + * @supported_ops: Combination of VFIO_DEVICE_FEATURE_GET and SET the driver + * supports + * @minsz: Minimum data size the driver accepts + * + * For use in a driver's device_feature op. Checks that the inputs to the + * VFIO_DEVICE_FEATURE ioctl are correct for the driver's feature. Returns 1 if + * the driver should execute the get or set, otherwise the relevant + * value should be returned. + */ +static inline int vfio_check_feature(u32 flags, size_t argsz, u32 supported_ops, + size_t minsz) +{ + if ((flags & (VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_SET)) & + ~supported_ops) + return -EINVAL; + if (flags & VFIO_DEVICE_FEATURE_PROBE) + return 0; + /* Without PROBE one of GET or SET must be requested */ + if (!(flags & (VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_SET))) + return -EINVAL; + if (argsz < minsz) + return -EINVAL; + return 1; +} + void vfio_init_group_dev(struct vfio_device *device, struct device *dev, const struct vfio_device_ops *ops); void vfio_uninit_group_dev(struct vfio_device *device); @@ -82,6 +130,11 @@ extern void vfio_device_put(struct vfio_device *device); int vfio_assign_device_set(struct vfio_device *device, void *set_id); +int vfio_mig_get_next_state(struct vfio_device *device, + enum vfio_device_mig_state cur_fsm, + enum vfio_device_mig_state new_fsm, + enum vfio_device_mig_state *next_fsm); + /* * External user API */ diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index ef9a44b6cf5d..74a4a0f17b28 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -159,8 +159,17 @@ extern ssize_t vfio_pci_config_rw(struct vfio_pci_core_device *vdev, extern ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite); +#ifdef CONFIG_VFIO_PCI_VGA extern ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite); +#else +static inline ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, + char __user *buf, size_t count, + loff_t *ppos, bool iswrite) +{ + return -EINVAL; +} +#endif extern long vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset, uint64_t data, int count, int fd); @@ -220,6 +229,8 @@ int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn); extern const struct pci_error_handlers vfio_pci_core_err_handlers; long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd, unsigned long arg); +int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32 flags, + void __user *arg, size_t argsz); ssize_t vfio_pci_core_read(struct vfio_device *core_vdev, char __user *buf, size_t count, loff_t *ppos); ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *buf, @@ -230,6 +241,8 @@ int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf); int vfio_pci_core_enable(struct vfio_pci_core_device *vdev); void vfio_pci_core_disable(struct vfio_pci_core_device *vdev); void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev); +pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev, + pci_channel_state_t state); static inline bool vfio_pci_is_vga(struct pci_dev *pdev) { diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index ef33ea002b0b..fea86061b44e 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -323,7 +323,7 @@ struct vfio_region_info_cap_type { #define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff) #define VFIO_REGION_TYPE_GFX (1) #define VFIO_REGION_TYPE_CCW (2) -#define VFIO_REGION_TYPE_MIGRATION (3) +#define VFIO_REGION_TYPE_MIGRATION_DEPRECATED (3) /* sub-types for VFIO_REGION_TYPE_PCI_* */ @@ -405,225 +405,29 @@ struct vfio_region_gfx_edid { #define VFIO_REGION_SUBTYPE_CCW_CRW (3) /* sub-types for VFIO_REGION_TYPE_MIGRATION */ -#define VFIO_REGION_SUBTYPE_MIGRATION (1) - -/* - * The structure vfio_device_migration_info is placed at the 0th offset of - * the VFIO_REGION_SUBTYPE_MIGRATION region to get and set VFIO device related - * migration information. Field accesses from this structure are only supported - * at their native width and alignment. Otherwise, the result is undefined and - * vendor drivers should return an error. - * - * device_state: (read/write) - * - The user application writes to this field to inform the vendor driver - * about the device state to be transitioned to. - * - The vendor driver should take the necessary actions to change the - * device state. After successful transition to a given state, the - * vendor driver should return success on write(device_state, state) - * system call. If the device state transition fails, the vendor driver - * should return an appropriate -errno for the fault condition. - * - On the user application side, if the device state transition fails, - * that is, if write(device_state, state) returns an error, read - * device_state again to determine the current state of the device from - * the vendor driver. - * - The vendor driver should return previous state of the device unless - * the vendor driver has encountered an internal error, in which case - * the vendor driver may report the device_state VFIO_DEVICE_STATE_ERROR. - * - The user application must use the device reset ioctl to recover the - * device from VFIO_DEVICE_STATE_ERROR state. If the device is - * indicated to be in a valid device state by reading device_state, the - * user application may attempt to transition the device to any valid - * state reachable from the current state or terminate itself. - * - * device_state consists of 3 bits: - * - If bit 0 is set, it indicates the _RUNNING state. If bit 0 is clear, - * it indicates the _STOP state. When the device state is changed to - * _STOP, driver should stop the device before write() returns. - * - If bit 1 is set, it indicates the _SAVING state, which means that the - * driver should start gathering device state information that will be - * provided to the VFIO user application to save the device's state. - * - If bit 2 is set, it indicates the _RESUMING state, which means that - * the driver should prepare to resume the device. Data provided through - * the migration region should be used to resume the device. - * Bits 3 - 31 are reserved for future use. To preserve them, the user - * application should perform a read-modify-write operation on this - * field when modifying the specified bits. - * - * +------- _RESUMING - * |+------ _SAVING - * ||+----- _RUNNING - * ||| - * 000b => Device Stopped, not saving or resuming - * 001b => Device running, which is the default state - * 010b => Stop the device & save the device state, stop-and-copy state - * 011b => Device running and save the device state, pre-copy state - * 100b => Device stopped and the device state is resuming - * 101b => Invalid state - * 110b => Error state - * 111b => Invalid state - * - * State transitions: - * - * _RESUMING _RUNNING Pre-copy Stop-and-copy _STOP - * (100b) (001b) (011b) (010b) (000b) - * 0. Running or default state - * | - * - * 1. Normal Shutdown (optional) - * |------------------------------------->| - * - * 2. Save the state or suspend - * |------------------------->|---------->| - * - * 3. Save the state during live migration - * |----------->|------------>|---------->| - * - * 4. Resuming - * |<---------| - * - * 5. Resumed - * |--------->| - * - * 0. Default state of VFIO device is _RUNNING when the user application starts. - * 1. During normal shutdown of the user application, the user application may - * optionally change the VFIO device state from _RUNNING to _STOP. This - * transition is optional. The vendor driver must support this transition but - * must not require it. - * 2. When the user application saves state or suspends the application, the - * device state transitions from _RUNNING to stop-and-copy and then to _STOP. - * On state transition from _RUNNING to stop-and-copy, driver must stop the - * device, save the device state and send it to the application through the - * migration region. The sequence to be followed for such transition is given - * below. - * 3. In live migration of user application, the state transitions from _RUNNING - * to pre-copy, to stop-and-copy, and to _STOP. - * On state transition from _RUNNING to pre-copy, the driver should start - * gathering the device state while the application is still running and send - * the device state data to application through the migration region. - * On state transition from pre-copy to stop-and-copy, the driver must stop - * the device, save the device state and send it to the user application - * through the migration region. - * Vendor drivers must support the pre-copy state even for implementations - * where no data is provided to the user before the stop-and-copy state. The - * user must not be required to consume all migration data before the device - * transitions to a new state, including the stop-and-copy state. - * The sequence to be followed for above two transitions is given below. - * 4. To start the resuming phase, the device state should be transitioned from - * the _RUNNING to the _RESUMING state. - * In the _RESUMING state, the driver should use the device state data - * received through the migration region to resume the device. - * 5. After providing saved device data to the driver, the application should - * change the state from _RESUMING to _RUNNING. - * - * reserved: - * Reads on this field return zero and writes are ignored. - * - * pending_bytes: (read only) - * The number of pending bytes still to be migrated from the vendor driver. - * - * data_offset: (read only) - * The user application should read data_offset field from the migration - * region. The user application should read the device data from this - * offset within the migration region during the _SAVING state or write - * the device data during the _RESUMING state. See below for details of - * sequence to be followed. - * - * data_size: (read/write) - * The user application should read data_size to get the size in bytes of - * the data copied in the migration region during the _SAVING state and - * write the size in bytes of the data copied in the migration region - * during the _RESUMING state. - * - * The format of the migration region is as follows: - * ------------------------------------------------------------------ - * |vfio_device_migration_info| data section | - * | | /////////////////////////////// | - * ------------------------------------------------------------------ - * ^ ^ - * offset 0-trapped part data_offset - * - * The structure vfio_device_migration_info is always followed by the data - * section in the region, so data_offset will always be nonzero. The offset - * from where the data is copied is decided by the kernel driver. The data - * section can be trapped, mmapped, or partitioned, depending on how the kernel - * driver defines the data section. The data section partition can be defined - * as mapped by the sparse mmap capability. If mmapped, data_offset must be - * page aligned, whereas initial section which contains the - * vfio_device_migration_info structure, might not end at the offset, which is - * page aligned. The user is not required to access through mmap regardless - * of the capabilities of the region mmap. - * The vendor driver should determine whether and how to partition the data - * section. The vendor driver should return data_offset accordingly. - * - * The sequence to be followed while in pre-copy state and stop-and-copy state - * is as follows: - * a. Read pending_bytes, indicating the start of a new iteration to get device - * data. Repeated read on pending_bytes at this stage should have no side - * effects. - * If pending_bytes == 0, the user application should not iterate to get data - * for that device. - * If pending_bytes > 0, perform the following steps. - * b. Read data_offset, indicating that the vendor driver should make data - * available through the data section. The vendor driver should return this - * read operation only after data is available from (region + data_offset) - * to (region + data_offset + data_size). - * c. Read data_size, which is the amount of data in bytes available through - * the migration region. - * Read on data_offset and data_size should return the offset and size of - * the current buffer if the user application reads data_offset and - * data_size more than once here. - * d. Read data_size bytes of data from (region + data_offset) from the - * migration region. - * e. Process the data. - * f. Read pending_bytes, which indicates that the data from the previous - * iteration has been read. If pending_bytes > 0, go to step b. - * - * The user application can transition from the _SAVING|_RUNNING - * (pre-copy state) to the _SAVING (stop-and-copy) state regardless of the - * number of pending bytes. The user application should iterate in _SAVING - * (stop-and-copy) until pending_bytes is 0. - * - * The sequence to be followed while _RESUMING device state is as follows: - * While data for this device is available, repeat the following steps: - * a. Read data_offset from where the user application should write data. - * b. Write migration data starting at the migration region + data_offset for - * the length determined by data_size from the migration source. - * c. Write data_size, which indicates to the vendor driver that data is - * written in the migration region. Vendor driver must return this write - * operations on consuming data. Vendor driver should apply the - * user-provided migration region data to the device resume state. - * - * If an error occurs during the above sequences, the vendor driver can return - * an error code for next read() or write() operation, which will terminate the - * loop. The user application should then take the next necessary action, for - * example, failing migration or terminating the user application. - * - * For the user application, data is opaque. The user application should write - * data in the same order as the data is received and the data should be of - * same transaction size at the source. - */ +#define VFIO_REGION_SUBTYPE_MIGRATION_DEPRECATED (1) struct vfio_device_migration_info { __u32 device_state; /* VFIO device state */ -#define VFIO_DEVICE_STATE_STOP (0) -#define VFIO_DEVICE_STATE_RUNNING (1 << 0) -#define VFIO_DEVICE_STATE_SAVING (1 << 1) -#define VFIO_DEVICE_STATE_RESUMING (1 << 2) -#define VFIO_DEVICE_STATE_MASK (VFIO_DEVICE_STATE_RUNNING | \ - VFIO_DEVICE_STATE_SAVING | \ - VFIO_DEVICE_STATE_RESUMING) +#define VFIO_DEVICE_STATE_V1_STOP (0) +#define VFIO_DEVICE_STATE_V1_RUNNING (1 << 0) +#define VFIO_DEVICE_STATE_V1_SAVING (1 << 1) +#define VFIO_DEVICE_STATE_V1_RESUMING (1 << 2) +#define VFIO_DEVICE_STATE_MASK (VFIO_DEVICE_STATE_V1_RUNNING | \ + VFIO_DEVICE_STATE_V1_SAVING | \ + VFIO_DEVICE_STATE_V1_RESUMING) #define VFIO_DEVICE_STATE_VALID(state) \ - (state & VFIO_DEVICE_STATE_RESUMING ? \ - (state & VFIO_DEVICE_STATE_MASK) == VFIO_DEVICE_STATE_RESUMING : 1) + (state & VFIO_DEVICE_STATE_V1_RESUMING ? \ + (state & VFIO_DEVICE_STATE_MASK) == VFIO_DEVICE_STATE_V1_RESUMING : 1) #define VFIO_DEVICE_STATE_IS_ERROR(state) \ - ((state & VFIO_DEVICE_STATE_MASK) == (VFIO_DEVICE_STATE_SAVING | \ - VFIO_DEVICE_STATE_RESUMING)) + ((state & VFIO_DEVICE_STATE_MASK) == (VFIO_DEVICE_STATE_V1_SAVING | \ + VFIO_DEVICE_STATE_V1_RESUMING)) #define VFIO_DEVICE_STATE_SET_ERROR(state) \ - ((state & ~VFIO_DEVICE_STATE_MASK) | VFIO_DEVICE_SATE_SAVING | \ - VFIO_DEVICE_STATE_RESUMING) + ((state & ~VFIO_DEVICE_STATE_MASK) | VFIO_DEVICE_STATE_V1_SAVING | \ + VFIO_DEVICE_STATE_V1_RESUMING) __u32 reserved; __u64 pending_bytes; @@ -1002,6 +806,186 @@ struct vfio_device_feature { */ #define VFIO_DEVICE_FEATURE_PCI_VF_TOKEN (0) +/* + * Indicates the device can support the migration API through + * VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE. If this GET succeeds, the RUNNING and + * ERROR states are always supported. Support for additional states is + * indicated via the flags field; at least VFIO_MIGRATION_STOP_COPY must be + * set. + * + * VFIO_MIGRATION_STOP_COPY means that STOP, STOP_COPY and + * RESUMING are supported. + * + * VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P means that RUNNING_P2P + * is supported in addition to the STOP_COPY states. + * + * Other combinations of flags have behavior to be defined in the future. + */ +struct vfio_device_feature_migration { + __aligned_u64 flags; +#define VFIO_MIGRATION_STOP_COPY (1 << 0) +#define VFIO_MIGRATION_P2P (1 << 1) +}; +#define VFIO_DEVICE_FEATURE_MIGRATION 1 + +/* + * Upon VFIO_DEVICE_FEATURE_SET, execute a migration state change on the VFIO + * device. The new state is supplied in device_state, see enum + * vfio_device_mig_state for details + * + * The kernel migration driver must fully transition the device to the new state + * value before the operation returns to the user. + * + * The kernel migration driver must not generate asynchronous device state + * transitions outside of manipulation by the user or the VFIO_DEVICE_RESET + * ioctl as described above. + * + * If this function fails then current device_state may be the original + * operating state or some other state along the combination transition path. + * The user can then decide if it should execute a VFIO_DEVICE_RESET, attempt + * to return to the original state, or attempt to return to some other state + * such as RUNNING or STOP. + * + * If the new_state starts a new data transfer session then the FD associated + * with that session is returned in data_fd. The user is responsible to close + * this FD when it is finished. The user must consider the migration data stream + * carried over the FD to be opaque and must preserve the byte order of the + * stream. The user is not required to preserve buffer segmentation when writing + * the data stream during the RESUMING operation. + * + * Upon VFIO_DEVICE_FEATURE_GET, get the current migration state of the VFIO + * device, data_fd will be -1. + */ +struct vfio_device_feature_mig_state { + __u32 device_state; /* From enum vfio_device_mig_state */ + __s32 data_fd; +}; +#define VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE 2 + +/* + * The device migration Finite State Machine is described by the enum + * vfio_device_mig_state. Some of the FSM arcs will create a migration data + * transfer session by returning a FD, in this case the migration data will + * flow over the FD using read() and write() as discussed below. + * + * There are 5 states to support VFIO_MIGRATION_STOP_COPY: + * RUNNING - The device is running normally + * STOP - The device does not change the internal or external state + * STOP_COPY - The device internal state can be read out + * RESUMING - The device is stopped and is loading a new internal state + * ERROR - The device has failed and must be reset + * + * And 1 optional state to support VFIO_MIGRATION_P2P: + * RUNNING_P2P - RUNNING, except the device cannot do peer to peer DMA + * + * The FSM takes actions on the arcs between FSM states. The driver implements + * the following behavior for the FSM arcs: + * + * RUNNING_P2P -> STOP + * STOP_COPY -> STOP + * While in STOP the device must stop the operation of the device. The device + * must not generate interrupts, DMA, or any other change to external state. + * It must not change its internal state. When stopped the device and kernel + * migration driver must accept and respond to interaction to support external + * subsystems in the STOP state, for example PCI MSI-X and PCI config space. + * Failure by the user to restrict device access while in STOP must not result + * in error conditions outside the user context (ex. host system faults). + * + * The STOP_COPY arc will terminate a data transfer session. + * + * RESUMING -> STOP + * Leaving RESUMING terminates a data transfer session and indicates the + * device should complete processing of the data delivered by write(). The + * kernel migration driver should complete the incorporation of data written + * to the data transfer FD into the device internal state and perform + * final validity and consistency checking of the new device state. If the + * user provided data is found to be incomplete, inconsistent, or otherwise + * invalid, the migration driver must fail the SET_STATE ioctl and + * optionally go to the ERROR state as described below. + * + * While in STOP the device has the same behavior as other STOP states + * described above. + * + * To abort a RESUMING session the device must be reset. + * + * RUNNING_P2P -> RUNNING + * While in RUNNING the device is fully operational, the device may generate + * interrupts, DMA, respond to MMIO, all vfio device regions are functional, + * and the device may advance its internal state. + * + * RUNNING -> RUNNING_P2P + * STOP -> RUNNING_P2P + * While in RUNNING_P2P the device is partially running in the P2P quiescent + * state defined below. + * + * STOP -> STOP_COPY + * This arc begin the process of saving the device state and will return a + * new data_fd. + * + * While in the STOP_COPY state the device has the same behavior as STOP + * with the addition that the data transfers session continues to stream the + * migration state. End of stream on the FD indicates the entire device + * state has been transferred. + * + * The user should take steps to restrict access to vfio device regions while + * the device is in STOP_COPY or risk corruption of the device migration data + * stream. + * + * STOP -> RESUMING + * Entering the RESUMING state starts a process of restoring the device state + * and will return a new data_fd. The data stream fed into the data_fd should + * be taken from the data transfer output of a single FD during saving from + * a compatible device. The migration driver may alter/reset the internal + * device state for this arc if required to prepare the device to receive the + * migration data. + * + * any -> ERROR + * ERROR cannot be specified as a device state, however any transition request + * can be failed with an errno return and may then move the device_state into + * ERROR. In this case the device was unable to execute the requested arc and + * was also unable to restore the device to any valid device_state. + * To recover from ERROR VFIO_DEVICE_RESET must be used to return the + * device_state back to RUNNING. + * + * The optional peer to peer (P2P) quiescent state is intended to be a quiescent + * state for the device for the purposes of managing multiple devices within a + * user context where peer-to-peer DMA between devices may be active. The + * RUNNING_P2P states must prevent the device from initiating + * any new P2P DMA transactions. If the device can identify P2P transactions + * then it can stop only P2P DMA, otherwise it must stop all DMA. The migration + * driver must complete any such outstanding operations prior to completing the + * FSM arc into a P2P state. For the purpose of specification the states + * behave as though the device was fully running if not supported. Like while in + * STOP or STOP_COPY the user must not touch the device, otherwise the state + * can be exited. + * + * The remaining possible transitions are interpreted as combinations of the + * above FSM arcs. As there are multiple paths through the FSM arcs the path + * should be selected based on the following rules: + * - Select the shortest path. + * Refer to vfio_mig_get_next_state() for the result of the algorithm. + * + * The automatic transit through the FSM arcs that make up the combination + * transition is invisible to the user. When working with combination arcs the + * user may see any step along the path in the device_state if SET_STATE + * fails. When handling these types of errors users should anticipate future + * revisions of this protocol using new states and those states becoming + * visible in this case. + * + * The optional states cannot be used with SET_STATE if the device does not + * support them. The user can discover if these states are supported by using + * VFIO_DEVICE_FEATURE_MIGRATION. By using combination transitions the user can + * avoid knowing about these optional states if the kernel driver supports them. + */ +enum vfio_device_mig_state { + VFIO_DEVICE_STATE_ERROR = 0, + VFIO_DEVICE_STATE_STOP = 1, + VFIO_DEVICE_STATE_RUNNING = 2, + VFIO_DEVICE_STATE_STOP_COPY = 3, + VFIO_DEVICE_STATE_RESUMING = 4, + VFIO_DEVICE_STATE_RUNNING_P2P = 5, +}; + /* -------- API for Type1 VFIO IOMMU -------- */ /** |