From 746b5583c1a48a837f4891adaff5e09d61b204a6 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 23 Oct 2013 09:53:14 +0300 Subject: IB/mlx5: Multithreaded create MR Use asynchronous commands to execute up to eight concurrent create MR commands. This is to fill memory caches faster so we keep consuming from there. Also, increase timeout for shrinking caches to five minutes. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- include/linux/mlx5/driver.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 6b8c496572c8..513619a75695 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -557,9 +557,11 @@ typedef void (*mlx5_cmd_cbk_t)(int status, void *context); struct mlx5_cmd_work_ent { struct mlx5_cmd_msg *in; struct mlx5_cmd_msg *out; + void *uout; + int uout_size; mlx5_cmd_cbk_t callback; void *context; - int idx; + int idx; struct completion done; struct mlx5_cmd *cmd; struct work_struct work; @@ -570,6 +572,7 @@ struct mlx5_cmd_work_ent { u8 token; struct timespec ts1; struct timespec ts2; + u16 op; }; struct mlx5_pas { @@ -653,6 +656,9 @@ void mlx5_cmd_use_polling(struct mlx5_core_dev *dev); int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr); int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); +int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size, + void *out, int out_size, mlx5_cmd_cbk_t callback, + void *context); int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn); int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn); int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari); @@ -676,7 +682,9 @@ int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, u16 lwm, int is_srq); int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, - struct mlx5_create_mkey_mbox_in *in, int inlen); + struct mlx5_create_mkey_mbox_in *in, int inlen, + mlx5_cmd_cbk_t callback, void *context, + struct mlx5_create_mkey_mbox_out *out); int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr); int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, struct mlx5_query_mkey_mbox_out *out, int outlen); @@ -745,6 +753,11 @@ static inline u32 mlx5_idx_to_mkey(u32 mkey_idx) return mkey_idx << 8; } +static inline u8 mlx5_mkey_variant(u32 mkey) +{ + return mkey & 0xff; +} + enum { MLX5_PROF_MASK_QP_SIZE = (u64)1 << 0, MLX5_PROF_MASK_MR_CACHE = (u64)1 << 1, -- cgit v1.2.1 From bf0bf77f6519e5dcd57a77b47e1d151c1e81b7ec Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 23 Oct 2013 09:53:19 +0300 Subject: mlx5: Support communicating arbitrary host page size to firmware Connect-IB firmware requires 4K pages to be communicated with the driver. This patch breaks larger pages to 4K units to enable support for architectures utilizing larger page size, such as PowerPC. This patch also fixes several places that referred to PAGE_SHIFT instead of explicit 12 which is the inherent page shift on Connect-IB. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 513619a75695..554548cd3dd4 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -483,6 +483,7 @@ struct mlx5_priv { struct rb_root page_root; int fw_pages; int reg_pages; + struct list_head free_list; struct mlx5_core_health health; -- cgit v1.2.1 From 87b8de492da34942fc554f2958a570ce0642296a Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 23 Oct 2013 09:53:20 +0300 Subject: mlx5: Clear reserved area in set_hca_cap() Firmware spec requires reserved fields to be cleared when calling set_hca_cap. Current code queries and copy to the set area, possibly resulting in reserved bits not cleared. This patch copies only writable fields to the set area. Fix also typo - msx => max Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- include/linux/mlx5/device.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 5eb4e31af22b..3d789f43e34b 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -230,6 +230,11 @@ enum { MLX5_MAX_PAGE_SHIFT = 31 }; +enum { + MLX5_CAP_OFF_DCT = 41, + MLX5_CAP_OFF_CMDIF_CSUM = 46, +}; + struct mlx5_inbox_hdr { __be16 opcode; u8 rsvd[4]; @@ -319,9 +324,9 @@ struct mlx5_hca_cap { u8 rsvd25[42]; __be16 log_uar_page_sz; u8 rsvd26[28]; - u8 log_msx_atomic_size_qp; + u8 log_max_atomic_size_qp; u8 rsvd27[2]; - u8 log_msx_atomic_size_dc; + u8 log_max_atomic_size_dc; u8 rsvd28[76]; }; -- cgit v1.2.1 From 1b77d2bd753d119eedcbc08fda58934307676554 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 24 Oct 2013 12:01:03 +0300 Subject: mlx5: Use enum to indicate adapter page size The Connect-IB adapter has an inherent page size which equals 4K. Define an new enum that equals the page shift and use it instead of using the value 12 throughout the code. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- include/linux/mlx5/device.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 3d789f43e34b..da78875807fc 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -230,6 +230,10 @@ enum { MLX5_MAX_PAGE_SHIFT = 31 }; +enum { + MLX5_ADAPTER_PAGE_SHIFT = 12 +}; + enum { MLX5_CAP_OFF_DCT = 41, MLX5_CAP_OFF_CMDIF_CSUM = 46, -- cgit v1.2.1 From 9dd69a600a680fab1c9235a644c886d8d6a2da2a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sat, 26 Oct 2013 14:32:30 +0200 Subject: IB/srp: Keep rport as long as the IB transport layer Keep the rport data structure around after srp_remove_host() has finished until cleanup of the IB transport layer has finished completely. This is necessary because later patches use the rport pointer inside the queuecommand callback. Without this patch accessing the rport from inside a queuecommand callback is racy because srp_remove_host() must be invoked before scsi_remove_host() and because the queuecommand callback could get invoked after srp_remove_host() has finished. In other words, without this patch the queuecommand callback can get invoked after the rport data structure has been freed. Signed-off-by: Bart Van Assche Acked-by: David Dillow Signed-off-by: Roland Dreier --- include/scsi/scsi_transport_srp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/scsi/scsi_transport_srp.h b/include/scsi/scsi_transport_srp.h index ff0f04ac91aa..5a2d2d1081c1 100644 --- a/include/scsi/scsi_transport_srp.h +++ b/include/scsi/scsi_transport_srp.h @@ -38,6 +38,8 @@ extern struct scsi_transport_template * srp_attach_transport(struct srp_function_template *); extern void srp_release_transport(struct scsi_transport_template *); +extern void srp_rport_get(struct srp_rport *rport); +extern void srp_rport_put(struct srp_rport *rport); extern struct srp_rport *srp_rport_add(struct Scsi_Host *, struct srp_rport_identifiers *); extern void srp_rport_del(struct srp_rport *); -- cgit v1.2.1 From 29c17324803c8a3bb5b2b69309e43571164cc4de Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sat, 26 Oct 2013 14:33:30 +0200 Subject: scsi_transport_srp: Add transport layer error handling Add the necessary functions in the SRP transport module to allow an SRP initiator driver to implement transport layer error handling similar to the functionality already provided by the FC transport layer. This includes: - Support for implementing fast_io_fail_tmo, the time that should elapse after having detected a transport layer problem and before failing I/O. - Support for implementing dev_loss_tmo, the time that should elapse after having detected a transport layer problem and before removing a remote port. Signed-off-by: Bart Van Assche Acked-by: David Dillow Signed-off-by: Roland Dreier --- include/scsi/scsi_transport_srp.h | 74 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 72 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_transport_srp.h b/include/scsi/scsi_transport_srp.h index 5a2d2d1081c1..ee7001677f64 100644 --- a/include/scsi/scsi_transport_srp.h +++ b/include/scsi/scsi_transport_srp.h @@ -13,6 +13,26 @@ struct srp_rport_identifiers { u8 roles; }; +/** + * enum srp_rport_state - SRP transport layer state + * @SRP_RPORT_RUNNING: Transport layer operational. + * @SRP_RPORT_BLOCKED: Transport layer not operational; fast I/O fail timer + * is running and I/O has been blocked. + * @SRP_RPORT_FAIL_FAST: Fast I/O fail timer has expired; fail I/O fast. + * @SRP_RPORT_LOST: Device loss timer has expired; port is being removed. + */ +enum srp_rport_state { + SRP_RPORT_RUNNING, + SRP_RPORT_BLOCKED, + SRP_RPORT_FAIL_FAST, + SRP_RPORT_LOST, +}; + +/** + * struct srp_rport + * @lld_data: LLD private data. + * @mutex: Protects against concurrent rport fast_io_fail / dev_loss_tmo. + */ struct srp_rport { /* for initiator and target drivers */ @@ -23,11 +43,38 @@ struct srp_rport { /* for initiator drivers */ - void *lld_data; /* LLD private data */ + void *lld_data; + + struct mutex mutex; + enum srp_rport_state state; + bool deleted; + int fast_io_fail_tmo; + int dev_loss_tmo; + struct delayed_work fast_io_fail_work; + struct delayed_work dev_loss_work; }; +/** + * struct srp_function_template + * @has_rport_state: Whether or not to create the state, fast_io_fail_tmo and + * dev_loss_tmo sysfs attribute for an rport. + * @reset_timer_if_blocked: Whether or srp_timed_out() should reset the command + * timer if the device on which it has been queued is blocked. + * @fast_io_fail_tmo: If not NULL, points to the default fast_io_fail_tmo value. + * @dev_loss_tmo: If not NULL, points to the default dev_loss_tmo value. + * @reconnect: Callback function for reconnecting to the target. See also + * srp_reconnect_rport(). + * @terminate_rport_io: Callback function for terminating all outstanding I/O + * requests for an rport. + */ struct srp_function_template { /* for initiator drivers */ + bool has_rport_state; + bool reset_timer_if_blocked; + int *fast_io_fail_tmo; + int *dev_loss_tmo; + int (*reconnect)(struct srp_rport *rport); + void (*terminate_rport_io)(struct srp_rport *rport); void (*rport_delete)(struct srp_rport *rport); /* for target drivers */ int (* tsk_mgmt_response)(struct Scsi_Host *, u64, u64, int); @@ -43,7 +90,30 @@ extern void srp_rport_put(struct srp_rport *rport); extern struct srp_rport *srp_rport_add(struct Scsi_Host *, struct srp_rport_identifiers *); extern void srp_rport_del(struct srp_rport *); - +extern int srp_tmo_valid(int fast_io_fail_tmo, int dev_loss_tmo); +extern int srp_reconnect_rport(struct srp_rport *rport); +extern void srp_start_tl_fail_timers(struct srp_rport *rport); extern void srp_remove_host(struct Scsi_Host *); +/** + * srp_chkready() - evaluate the transport layer state before I/O + * + * Returns a SCSI result code that can be returned by the LLD queuecommand() + * implementation. The role of this function is similar to that of + * fc_remote_port_chkready(). + */ +static inline int srp_chkready(struct srp_rport *rport) +{ + switch (rport->state) { + case SRP_RPORT_RUNNING: + case SRP_RPORT_BLOCKED: + default: + return 0; + case SRP_RPORT_FAIL_FAST: + return DID_TRANSPORT_FAILFAST << 16; + case SRP_RPORT_LOST: + return DID_NO_CONNECT << 16; + } +} + #endif -- cgit v1.2.1 From 8c64e4531c3c3bedf11d723196270d4a7553db45 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sat, 26 Oct 2013 14:35:59 +0200 Subject: scsi_transport_srp: Add periodic reconnect support Add support for periodically reconnecting to an SRP target until the dev_loss timer expires. After the tenth reconnection attempt, gradually slow down subsequent reconnect attempts. Signed-off-by: Bart Van Assche Acked-by: David Dillow Signed-off-by: Roland Dreier --- include/scsi/scsi_transport_srp.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_transport_srp.h b/include/scsi/scsi_transport_srp.h index ee7001677f64..4ebf6913b7b2 100644 --- a/include/scsi/scsi_transport_srp.h +++ b/include/scsi/scsi_transport_srp.h @@ -31,7 +31,8 @@ enum srp_rport_state { /** * struct srp_rport * @lld_data: LLD private data. - * @mutex: Protects against concurrent rport fast_io_fail / dev_loss_tmo. + * @mutex: Protects against concurrent rport reconnect / fast_io_fail / + * dev_loss_tmo activity. */ struct srp_rport { /* for initiator and target drivers */ @@ -48,6 +49,9 @@ struct srp_rport { struct mutex mutex; enum srp_rport_state state; bool deleted; + int reconnect_delay; + int failed_reconnects; + struct delayed_work reconnect_work; int fast_io_fail_tmo; int dev_loss_tmo; struct delayed_work fast_io_fail_work; @@ -60,6 +64,7 @@ struct srp_rport { * dev_loss_tmo sysfs attribute for an rport. * @reset_timer_if_blocked: Whether or srp_timed_out() should reset the command * timer if the device on which it has been queued is blocked. + * @reconnect_delay: If not NULL, points to the default reconnect_delay value. * @fast_io_fail_tmo: If not NULL, points to the default fast_io_fail_tmo value. * @dev_loss_tmo: If not NULL, points to the default dev_loss_tmo value. * @reconnect: Callback function for reconnecting to the target. See also @@ -71,6 +76,7 @@ struct srp_function_template { /* for initiator drivers */ bool has_rport_state; bool reset_timer_if_blocked; + int *reconnect_delay; int *fast_io_fail_tmo; int *dev_loss_tmo; int (*reconnect)(struct srp_rport *rport); @@ -90,7 +96,8 @@ extern void srp_rport_put(struct srp_rport *rport); extern struct srp_rport *srp_rport_add(struct Scsi_Host *, struct srp_rport_identifiers *); extern void srp_rport_del(struct srp_rport *); -extern int srp_tmo_valid(int fast_io_fail_tmo, int dev_loss_tmo); +extern int srp_tmo_valid(int reconnect_delay, int fast_io_fail_tmo, + int dev_loss_tmo); extern int srp_reconnect_rport(struct srp_rport *rport); extern void srp_start_tl_fail_timers(struct srp_rport *rport); extern void srp_remove_host(struct Scsi_Host *); -- cgit v1.2.1 From 180771a3707a4c0577cbf4f830c754dbabfdfccb Mon Sep 17 00:00:00 2001 From: "Upinder Malhi \\(umalhi\\)" Date: Tue, 10 Sep 2013 03:36:59 +0000 Subject: IB/core: Add Cisco usNIC rdma node and transport types This patch adds new rdma node and new rdma transport, and supporting code used by Cisco's low latency driver called usNIC. usNIC uses its own transport, distinct from IB and iWARP. Signed-off-by: Upinder Malhi Signed-off-by: Jeff Squyres Signed-off-by: Roland Dreier --- include/rdma/ib_verbs.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index e393171e2fac..60354d53948e 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -67,12 +67,14 @@ enum rdma_node_type { RDMA_NODE_IB_CA = 1, RDMA_NODE_IB_SWITCH, RDMA_NODE_IB_ROUTER, - RDMA_NODE_RNIC + RDMA_NODE_RNIC, + RDMA_NODE_USNIC, }; enum rdma_transport_type { RDMA_TRANSPORT_IB, - RDMA_TRANSPORT_IWARP + RDMA_TRANSPORT_IWARP, + RDMA_TRANSPORT_USNIC }; enum rdma_transport_type -- cgit v1.2.1 From 1c636f801615bdfc9b1d46904e8258c7a025670b Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 31 Oct 2013 15:26:32 +0200 Subject: IB/core: Encorce MR access rights rules on kernel consumers Enforce the rule that when requesting remote write or atomic permissions, local write must be indicated as well. See IB spec 11.2.8.2. Spotted by: Hagay Abramovsky Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- include/rdma/ib_verbs.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 60354d53948e..68c053d0d629 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2386,4 +2386,17 @@ struct ib_flow *ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, int domain); int ib_destroy_flow(struct ib_flow *flow_id); +static inline int ib_check_mr_access(int flags) +{ + /* + * Local write permission is required if remote write or + * remote atomic permission is also requested. + */ + if (flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) && + !(flags & IB_ACCESS_LOCAL_WRITE)) + return -EINVAL; + + return 0; +} + #endif /* IB_VERBS_H */ -- cgit v1.2.1 From f88482743872230f5899b8344a057d6e2fd011e2 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Wed, 6 Nov 2013 23:21:44 +0100 Subject: IB/core: clarify overflow/underflow checks on ib_create/destroy_flow This patch fixes the following issues: 1. Unneeded checks were removed 2. Removed the fixed size out of flow_attr.size, thus simplifying the checks. 3. Remove a 32bit hole on 64bit systems with strict alignment in struct ib_kern_flow_att by adding a reserved field. Signed-off-by: Matan Barak Signed-off-by: Roland Dreier --- include/uapi/rdma/ib_user_verbs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index e3ddd86c90a6..99a58bdc1c64 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -771,6 +771,7 @@ struct ib_kern_flow_attr { struct ib_uverbs_create_flow { __u32 comp_mask; + __u32 reserved; __u64 response; __u32 qp_handle; struct ib_kern_flow_attr flow_attr; -- cgit v1.2.1 From d82693dad09b49232cd727ee9e15bd027710edac Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Wed, 6 Nov 2013 23:21:45 +0100 Subject: IB/core: Rename 'flow' structs to match other uverbs structs Commit 436f2ad05a0b ("IB/core: Export ib_create/destroy_flow through uverbs") added public data structures to support receive flow steering. The new structs are not following the 'uverbs' pattern: they're lacking the common prefix 'ib_uverbs'. This patch replaces ib_kern prefix by ib_uverbs. Signed-off-by: Yann Droneaud Link: http://marc.info/?i=cover.1383773832.git.ydroneaud@opteya.com Signed-off-by: Roland Dreier --- include/uapi/rdma/ib_user_verbs.h | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 99a58bdc1c64..96a660533236 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -701,61 +701,61 @@ struct ib_uverbs_detach_mcast { }; #ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING -struct ib_kern_eth_filter { +struct ib_uverbs_eth_filter { __u8 dst_mac[6]; __u8 src_mac[6]; __be16 ether_type; __be16 vlan_tag; }; -struct ib_kern_spec_eth { +struct ib_uverbs_spec_eth { __u32 type; __u16 size; __u16 reserved; - struct ib_kern_eth_filter val; - struct ib_kern_eth_filter mask; + struct ib_uverbs_eth_filter val; + struct ib_uverbs_eth_filter mask; }; -struct ib_kern_ipv4_filter { +struct ib_uverbs_ipv4_filter { __be32 src_ip; __be32 dst_ip; }; -struct ib_kern_spec_ipv4 { +struct ib_uverbs_spec_ipv4 { __u32 type; __u16 size; __u16 reserved; - struct ib_kern_ipv4_filter val; - struct ib_kern_ipv4_filter mask; + struct ib_uverbs_ipv4_filter val; + struct ib_uverbs_ipv4_filter mask; }; -struct ib_kern_tcp_udp_filter { +struct ib_uverbs_tcp_udp_filter { __be16 dst_port; __be16 src_port; }; -struct ib_kern_spec_tcp_udp { +struct ib_uverbs_spec_tcp_udp { __u32 type; __u16 size; __u16 reserved; - struct ib_kern_tcp_udp_filter val; - struct ib_kern_tcp_udp_filter mask; + struct ib_uverbs_tcp_udp_filter val; + struct ib_uverbs_tcp_udp_filter mask; }; -struct ib_kern_spec { +struct ib_uverbs_spec { union { struct { __u32 type; __u16 size; __u16 reserved; }; - struct ib_kern_spec_eth eth; - struct ib_kern_spec_ipv4 ipv4; - struct ib_kern_spec_tcp_udp tcp_udp; + struct ib_uverbs_spec_eth eth; + struct ib_uverbs_spec_ipv4 ipv4; + struct ib_uverbs_spec_tcp_udp tcp_udp; }; }; -struct ib_kern_flow_attr { +struct ib_uverbs_flow_attr { __u32 type; __u16 size; __u16 priority; @@ -774,7 +774,7 @@ struct ib_uverbs_create_flow { __u32 reserved; __u64 response; __u32 qp_handle; - struct ib_kern_flow_attr flow_attr; + struct ib_uverbs_flow_attr flow_attr; }; struct ib_uverbs_create_flow_resp { -- cgit v1.2.1 From b68c956021386eead6b8b28e445f33c8c985d7d2 Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Wed, 6 Nov 2013 23:21:46 +0100 Subject: IB/core: Make uverbs flow structure use names like verbs ones This patch adds "flow" prefix to most of data structure added as part of commit 436f2ad05a0b ("IB/core: Export ib_create/destroy_flow through uverbs") to keep those names in sync with the data structures added in commit 319a441d1361 ("IB/core: Add receive flow steering support"). It's just a matter of translating 'ib_flow' to 'ib_uverbs_flow'. Signed-off-by: Yann Droneaud Link: http://marc.info/?i=cover.1383773832.git.ydroneaud@opteya.com Signed-off-by: Roland Dreier --- include/uapi/rdma/ib_user_verbs.h | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 96a660533236..ef2be64bf4c3 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -701,57 +701,57 @@ struct ib_uverbs_detach_mcast { }; #ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING -struct ib_uverbs_eth_filter { +struct ib_uverbs_flow_eth_filter { __u8 dst_mac[6]; __u8 src_mac[6]; __be16 ether_type; __be16 vlan_tag; }; -struct ib_uverbs_spec_eth { +struct ib_uverbs_flow_spec_eth { __u32 type; __u16 size; __u16 reserved; - struct ib_uverbs_eth_filter val; - struct ib_uverbs_eth_filter mask; + struct ib_uverbs_flow_eth_filter val; + struct ib_uverbs_flow_eth_filter mask; }; -struct ib_uverbs_ipv4_filter { +struct ib_uverbs_flow_ipv4_filter { __be32 src_ip; __be32 dst_ip; }; -struct ib_uverbs_spec_ipv4 { +struct ib_uverbs_flow_spec_ipv4 { __u32 type; __u16 size; __u16 reserved; - struct ib_uverbs_ipv4_filter val; - struct ib_uverbs_ipv4_filter mask; + struct ib_uverbs_flow_ipv4_filter val; + struct ib_uverbs_flow_ipv4_filter mask; }; -struct ib_uverbs_tcp_udp_filter { +struct ib_uverbs_flow_tcp_udp_filter { __be16 dst_port; __be16 src_port; }; -struct ib_uverbs_spec_tcp_udp { +struct ib_uverbs_flow_spec_tcp_udp { __u32 type; __u16 size; __u16 reserved; - struct ib_uverbs_tcp_udp_filter val; - struct ib_uverbs_tcp_udp_filter mask; + struct ib_uverbs_flow_tcp_udp_filter val; + struct ib_uverbs_flow_tcp_udp_filter mask; }; -struct ib_uverbs_spec { +struct ib_uverbs_flow_spec { union { struct { __u32 type; __u16 size; __u16 reserved; }; - struct ib_uverbs_spec_eth eth; - struct ib_uverbs_spec_ipv4 ipv4; - struct ib_uverbs_spec_tcp_udp tcp_udp; + struct ib_uverbs_flow_spec_eth eth; + struct ib_uverbs_flow_spec_ipv4 ipv4; + struct ib_uverbs_flow_spec_tcp_udp tcp_udp; }; }; -- cgit v1.2.1 From 58913efba9c3aa7992f2a4d630135ded833d988e Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Wed, 6 Nov 2013 23:21:47 +0100 Subject: IB/core: Use a common header for uverbs flow_specs A common header will allows better checking of flow specs size, while ensuring strict alignment to 64 bits. Signed-off-by: Yann Droneaud Link: http://marc.info/?i=cover.1383773832.git.ydroneaud@opteya.com Signed-off-by: Roland Dreier --- include/uapi/rdma/ib_user_verbs.h | 53 +++++++++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index ef2be64bf4c3..43014981550a 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -701,6 +701,14 @@ struct ib_uverbs_detach_mcast { }; #ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING +struct ib_uverbs_flow_spec_hdr { + __u32 type; + __u16 size; + __u16 reserved; + /* followed by flow_spec */ + __u64 flow_spec_data[0]; +}; + struct ib_uverbs_flow_eth_filter { __u8 dst_mac[6]; __u8 src_mac[6]; @@ -709,9 +717,14 @@ struct ib_uverbs_flow_eth_filter { }; struct ib_uverbs_flow_spec_eth { - __u32 type; - __u16 size; - __u16 reserved; + union { + struct ib_uverbs_flow_spec_hdr hdr; + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; + }; struct ib_uverbs_flow_eth_filter val; struct ib_uverbs_flow_eth_filter mask; }; @@ -722,9 +735,14 @@ struct ib_uverbs_flow_ipv4_filter { }; struct ib_uverbs_flow_spec_ipv4 { - __u32 type; - __u16 size; - __u16 reserved; + union { + struct ib_uverbs_flow_spec_hdr hdr; + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; + }; struct ib_uverbs_flow_ipv4_filter val; struct ib_uverbs_flow_ipv4_filter mask; }; @@ -735,19 +753,27 @@ struct ib_uverbs_flow_tcp_udp_filter { }; struct ib_uverbs_flow_spec_tcp_udp { - __u32 type; - __u16 size; - __u16 reserved; + union { + struct ib_uverbs_flow_spec_hdr hdr; + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; + }; struct ib_uverbs_flow_tcp_udp_filter val; struct ib_uverbs_flow_tcp_udp_filter mask; }; struct ib_uverbs_flow_spec { union { - struct { - __u32 type; - __u16 size; - __u16 reserved; + union { + struct ib_uverbs_flow_spec_hdr hdr; + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; }; struct ib_uverbs_flow_spec_eth eth; struct ib_uverbs_flow_spec_ipv4 ipv4; @@ -767,6 +793,7 @@ struct ib_uverbs_flow_attr { * struct ib_flow_spec_xxx * struct ib_flow_spec_yyy */ + struct ib_uverbs_flow_spec_hdr flow_specs[0]; }; struct ib_uverbs_create_flow { -- cgit v1.2.1 From 2490f20be496c2da14ae4632a8c60e0633e97fd0 Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Wed, 6 Nov 2013 23:21:48 +0100 Subject: IB/core: Remove ib_uverbs_flow_spec structure from userspace The structure holding any types of flow_spec is of no use to userspace. It would be wrong for userspace to do: struct ib_uverbs_flow_spec flow_spec; flow_spec.type = IB_FLOW_SPEC_TCP; flow_spec.size = sizeof(flow_spec); Instead, userspace should use the dedicated flow_spec structure for - Ethernet : struct ib_uverbs_flow_spec_eth, - IPv4 : struct ib_uverbs_flow_spec_ipv4, - TCP/UDP : struct ib_uverbs_flow_spec_tcp_udp. In other words, struct ib_uverbs_flow_spec is a "virtual" data structure that can only be use by the kernel as an alias to the other. Signed-off-by: Yann Droneaud Link: http://marc.info/?i=cover.1383773832.git.ydroneaud@opteya.com Signed-off-by: Roland Dreier --- include/uapi/rdma/ib_user_verbs.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include') diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 43014981550a..fc9bbe37cfce 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -765,22 +765,6 @@ struct ib_uverbs_flow_spec_tcp_udp { struct ib_uverbs_flow_tcp_udp_filter mask; }; -struct ib_uverbs_flow_spec { - union { - union { - struct ib_uverbs_flow_spec_hdr hdr; - struct { - __u32 type; - __u16 size; - __u16 reserved; - }; - }; - struct ib_uverbs_flow_spec_eth eth; - struct ib_uverbs_flow_spec_ipv4 ipv4; - struct ib_uverbs_flow_spec_tcp_udp tcp_udp; - }; -}; - struct ib_uverbs_flow_attr { __u32 type; __u16 size; -- cgit v1.2.1 From f21519b23c1b6fa25366be4114ccf7fcf1c190f9 Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Wed, 6 Nov 2013 23:21:49 +0100 Subject: IB/core: extended command: an improved infrastructure for uverbs commands Commit 400dbc96583f ("IB/core: Infrastructure for extensible uverbs commands") added an infrastructure for extensible uverbs commands while later commit 436f2ad05a0b ("IB/core: Export ib_create/destroy_flow through uverbs") exported ib_create_flow()/ib_destroy_flow() functions using this new infrastructure. According to the commit 400dbc96583f, the purpose of this infrastructure is to support passing around provider (eg. hardware) specific buffers when userspace issue commands to the kernel, so that it would be possible to extend uverbs (eg. core) buffers independently from the provider buffers. But the new kernel command function prototypes were not modified to take advantage of this extension. This issue was exposed by Roland Dreier in a previous review[1]. So the following patch is an attempt to a revised extensible command infrastructure. This improved extensible command infrastructure distinguish between core (eg. legacy)'s command/response buffers from provider (eg. hardware)'s command/response buffers: each extended command implementing function is given a struct ib_udata to hold core (eg. uverbs) input and output buffers, and another struct ib_udata to hold the hw (eg. provider) input and output buffers. Having those buffers identified separately make it easier to increase one buffer to support extension without having to add some code to guess the exact size of each command/response parts: This should make the extended functions more reliable. Additionally, instead of relying on command identifier being greater than IB_USER_VERBS_CMD_THRESHOLD, the proposed infrastructure rely on unused bits in command field: on the 32 bits provided by command field, only 6 bits are really needed to encode the identifier of commands currently supported by the kernel. (Even using only 6 bits leaves room for about 23 new commands). So this patch makes use of some high order bits in command field to store flags, leaving enough room for more command identifiers than one will ever need (eg. 256). The new flags are used to specify if the command should be processed as an extended one or a legacy one. While designing the new command format, care was taken to make usage of flags itself extensible. Using high order bits of the commands field ensure that newer libibverbs on older kernel will properly fail when trying to call extended commands. On the other hand, older libibverbs on newer kernel will never be able to issue calls to extended commands. The extended command header includes the optional response pointer so that output buffer length and output buffer pointer are located together in the command, allowing proper parameters checking. This should make implementing functions easier and safer. Additionally the extended header ensure 64bits alignment, while making all sizes multiple of 8 bytes, extending the maximum buffer size: legacy extended Maximum command buffer: 256KBytes 1024KBytes (512KBytes + 512KBytes) Maximum response buffer: 256KBytes 1024KBytes (512KBytes + 512KBytes) For the purpose of doing proper buffer size accounting, the headers size are no more taken in account in "in_words". One of the odds of the current extensible infrastructure, reading twice the "legacy" command header, is fixed by removing the "legacy" command header from the extended command header: they are processed as two different parts of the command: memory is read once and information are not duplicated: it's making clear that's an extended command scheme and not a different command scheme. The proposed scheme will format input (command) and output (response) buffers this way: - command: legacy header + extended header + command data (core + hw): +----------------------------------------+ | flags | 00 00 | command | | in_words | out_words | +----------------------------------------+ | response | | response | | provider_in_words | provider_out_words | | padding | +----------------------------------------+ | | . . . (in_words * 8) . | | +----------------------------------------+ | | . . . (provider_in_words * 8) . | | +----------------------------------------+ - response, if present: +----------------------------------------+ | | . . . (out_words * 8) . | | +----------------------------------------+ | | . . . (provider_out_words * 8) . | | +----------------------------------------+ The overall design is to ensure that the extensible infrastructure is itself extensible while begin more reliable with more input and bound checking. Note: The unused field in the extended header would be perfect candidate to hold the command "comp_mask" (eg. bit field used to handle compatibility). This was suggested by Roland Dreier in a previous review[2]. But "comp_mask" field is likely to be present in the uverb input and/or provider input, likewise for the response, as noted by Matan Barak[3], so it doesn't make sense to put "comp_mask" in the header. [1]: http://marc.info/?i=CAL1RGDWxmM17W2o_era24A-TTDeKyoL6u3NRu_=t_dhV_ZA9MA@mail.gmail.com [2]: http://marc.info/?i=CAL1RGDXJtrc849M6_XNZT5xO1+ybKtLWGq6yg6LhoSsKpsmkYA@mail.gmail.com [3]: http://marc.info/?i=525C1149.6000701@mellanox.com Signed-off-by: Yann Droneaud Link: http://marc.info/?i=cover.1383773832.git.ydroneaud@opteya.com [ Convert "ret ? ret : 0" to the equivalent "ret". - Roland ] Signed-off-by: Roland Dreier --- include/rdma/ib_verbs.h | 1 + include/uapi/rdma/ib_user_verbs.h | 23 ++++++++++++++--------- 2 files changed, 15 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index e393171e2fac..a06fc122f803 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1436,6 +1436,7 @@ struct ib_device { int uverbs_abi_ver; u64 uverbs_cmd_mask; + u64 uverbs_ex_cmd_mask; char node_desc[64]; __be64 node_guid; diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index fc9bbe37cfce..6ace125e1af6 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -87,11 +87,14 @@ enum { IB_USER_VERBS_CMD_CLOSE_XRCD, IB_USER_VERBS_CMD_CREATE_XSRQ, IB_USER_VERBS_CMD_OPEN_QP, +}; + #ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING - IB_USER_VERBS_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD, - IB_USER_VERBS_CMD_DESTROY_FLOW -#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */ +enum { + IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD, + IB_USER_VERBS_EX_CMD_DESTROY_FLOW }; +#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */ /* * Make sure that all structs defined in this file remain laid out so @@ -122,6 +125,12 @@ struct ib_uverbs_comp_event_desc { * the rest of the command struct based on these value. */ +#define IB_USER_VERBS_CMD_COMMAND_MASK 0xff +#define IB_USER_VERBS_CMD_FLAGS_MASK 0xff000000u +#define IB_USER_VERBS_CMD_FLAGS_SHIFT 24 + +#define IB_USER_VERBS_CMD_FLAG_EXTENDED 0x80 + struct ib_uverbs_cmd_hdr { __u32 command; __u16 in_words; @@ -129,10 +138,8 @@ struct ib_uverbs_cmd_hdr { }; #ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING -struct ib_uverbs_cmd_hdr_ex { - __u32 command; - __u16 in_words; - __u16 out_words; +struct ib_uverbs_ex_cmd_hdr { + __u64 response; __u16 provider_in_words; __u16 provider_out_words; __u32 cmd_hdr_reserved; @@ -782,8 +789,6 @@ struct ib_uverbs_flow_attr { struct ib_uverbs_create_flow { __u32 comp_mask; - __u32 reserved; - __u64 response; __u32 qp_handle; struct ib_uverbs_flow_attr flow_attr; }; -- cgit v1.2.1 From 69ad5da41b4ed94aef31d4111a3442cfd73ce570 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Wed, 6 Nov 2013 23:21:50 +0100 Subject: IB/core: Re-enable create_flow/destroy_flow uverbs This commit reverts commit 7afbddfae993 ("IB/core: Temporarily disable create_flow/destroy_flow uverbs"). Since the uverbs extensions functionality was experimental for v3.12, this patch re-enables the support for them and flow-steering for v3.13. Signed-off-by: Matan Barak Signed-off-by: Roland Dreier --- include/uapi/rdma/ib_user_verbs.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 6ace125e1af6..cbfdd4ca9510 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -89,12 +89,10 @@ enum { IB_USER_VERBS_CMD_OPEN_QP, }; -#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING enum { IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD, IB_USER_VERBS_EX_CMD_DESTROY_FLOW }; -#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */ /* * Make sure that all structs defined in this file remain laid out so @@ -137,14 +135,12 @@ struct ib_uverbs_cmd_hdr { __u16 out_words; }; -#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING struct ib_uverbs_ex_cmd_hdr { __u64 response; __u16 provider_in_words; __u16 provider_out_words; __u32 cmd_hdr_reserved; }; -#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */ struct ib_uverbs_get_context { __u64 response; @@ -707,7 +703,6 @@ struct ib_uverbs_detach_mcast { __u64 driver_data[0]; }; -#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING struct ib_uverbs_flow_spec_hdr { __u32 type; __u16 size; @@ -802,7 +797,6 @@ struct ib_uverbs_destroy_flow { __u32 comp_mask; __u32 flow_handle; }; -#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */ struct ib_uverbs_create_srq { __u64 response; -- cgit v1.2.1