diff options
Diffstat (limited to 'drivers/infiniband/sw')
25 files changed, 342 insertions, 366 deletions
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 49c9541050d4..728795043496 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -381,15 +381,14 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, { struct rvt_mr *mr; struct ib_umem *umem; - struct scatterlist *sg; - int n, m, entry; + struct sg_page_iter sg_iter; + int n, m; struct ib_mr *ret; if (length == 0) return ERR_PTR(-EINVAL); - umem = ib_umem_get(pd->uobject->context, start, length, - mr_access_flags, 0); + umem = ib_umem_get(udata, start, length, mr_access_flags, 0); if (IS_ERR(umem)) return (void *)umem; @@ -408,23 +407,21 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->mr.access_flags = mr_access_flags; mr->umem = umem; - mr->mr.page_shift = umem->page_shift; + mr->mr.page_shift = PAGE_SHIFT; m = 0; n = 0; - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + for_each_sg_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { void *vaddr; - vaddr = page_address(sg_page(sg)); + vaddr = page_address(sg_page_iter_page(&sg_iter)); if (!vaddr) { ret = ERR_PTR(-EINVAL); goto bail_inval; } mr->mr.map[m]->segs[n].vaddr = vaddr; - mr->mr.map[m]->segs[n].length = BIT(umem->page_shift); - trace_rvt_mr_user_seg(&mr->mr, m, n, vaddr, - BIT(umem->page_shift)); - n++; - if (n == RVT_SEGSZ) { + mr->mr.map[m]->segs[n].length = PAGE_SIZE; + trace_rvt_mr_user_seg(&mr->mr, m, n, vaddr, PAGE_SIZE); + if (++n == RVT_SEGSZ) { m++; n = 0; } diff --git a/drivers/infiniband/sw/rdmavt/pd.c b/drivers/infiniband/sw/rdmavt/pd.c index 8a89afff3363..6033054b22fa 100644 --- a/drivers/infiniband/sw/rdmavt/pd.c +++ b/drivers/infiniband/sw/rdmavt/pd.c @@ -50,7 +50,7 @@ /** * rvt_alloc_pd - allocate a protection domain - * @ibdev: ib device + * @ibpd: PD * @context: optional user context * @udata: optional user data * @@ -58,19 +58,14 @@ * * Return: 0 on success */ -struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) +int rvt_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, + struct ib_udata *udata) { + struct ib_device *ibdev = ibpd->device; struct rvt_dev_info *dev = ib_to_rvt(ibdev); - struct rvt_pd *pd; - struct ib_pd *ret; + struct rvt_pd *pd = ibpd_to_rvtpd(ibpd); + int ret = 0; - pd = kmalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) { - ret = ERR_PTR(-ENOMEM); - goto bail; - } /* * While we could continue allocating protecetion domains, being * constrained only by system resources. The IBTA spec defines that @@ -81,8 +76,7 @@ struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev, spin_lock(&dev->n_pds_lock); if (dev->n_pds_allocated == dev->dparms.props.max_pd) { spin_unlock(&dev->n_pds_lock); - kfree(pd); - ret = ERR_PTR(-ENOMEM); + ret = -ENOMEM; goto bail; } @@ -92,8 +86,6 @@ struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev, /* ib_alloc_pd() will initialize pd->ibpd. */ pd->user = !!udata; - ret = &pd->ibpd; - bail: return ret; } @@ -104,16 +96,11 @@ bail: * * Return: always 0 */ -int rvt_dealloc_pd(struct ib_pd *ibpd) +void rvt_dealloc_pd(struct ib_pd *ibpd) { - struct rvt_pd *pd = ibpd_to_rvtpd(ibpd); struct rvt_dev_info *dev = ib_to_rvt(ibpd->device); spin_lock(&dev->n_pds_lock); dev->n_pds_allocated--; spin_unlock(&dev->n_pds_lock); - - kfree(pd); - - return 0; } diff --git a/drivers/infiniband/sw/rdmavt/pd.h b/drivers/infiniband/sw/rdmavt/pd.h index 1892ca4a9746..7a887e4a45e7 100644 --- a/drivers/infiniband/sw/rdmavt/pd.h +++ b/drivers/infiniband/sw/rdmavt/pd.h @@ -50,9 +50,8 @@ #include <rdma/rdma_vt.h> -struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata); -int rvt_dealloc_pd(struct ib_pd *ibpd); +int rvt_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, + struct ib_udata *udata); +void rvt_dealloc_pd(struct ib_pd *ibpd); #endif /* DEF_RDMAVTPD_H */ diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index c6cc3e4ab71d..a34b9a2a32b6 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -53,6 +53,7 @@ #include <rdma/ib_verbs.h> #include <rdma/ib_hdrs.h> #include <rdma/opa_addr.h> +#include <rdma/uverbs_ioctl.h> #include "qp.h" #include "vt.h" #include "trace.h" @@ -854,6 +855,7 @@ static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, qp->s_mig_state = IB_MIG_MIGRATED; qp->r_head_ack_queue = 0; qp->s_tail_ack_queue = 0; + qp->s_acked_ack_queue = 0; qp->s_num_rd_atomic = 0; if (qp->r_rq.wq) { qp->r_rq.wq->head = 0; @@ -955,6 +957,8 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, size_t sg_list_sz; struct ib_qp *ret = ERR_PTR(-ENOMEM); struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device); + struct rvt_ucontext *ucontext = rdma_udata_to_drv_context( + udata, struct rvt_ucontext, ibucontext); void *priv = NULL; size_t sqsize; @@ -1128,7 +1132,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz; qp->ip = rvt_create_mmap_info(rdi, s, - ibpd->uobject->context, + &ucontext->ibucontext, qp->r_rq.wq); if (!qp->ip) { ret = ERR_PTR(-ENOMEM); @@ -1642,11 +1646,11 @@ int rvt_destroy_qp(struct ib_qp *ibqp) kref_put(&qp->ip->ref, rvt_release_mmap_info); else vfree(qp->r_rq.wq); - vfree(qp->s_wq); rdi->driver_f.qp_priv_free(rdi, qp); kfree(qp->s_ack_queue); rdma_destroy_ah_attr(&qp->remote_ah_attr); rdma_destroy_ah_attr(&qp->alt_ah_attr); + vfree(qp->s_wq); kfree(qp); return 0; } @@ -2393,11 +2397,12 @@ static inline unsigned long rvt_aeth_to_usec(u32 aeth) } /* - * rvt_add_retry_timer - add/start a retry timer + * rvt_add_retry_timer_ext - add/start a retry timer * @qp - the QP + * @shift - timeout shift to wait for multiple packets * add a retry timer on the QP */ -void rvt_add_retry_timer(struct rvt_qp *qp) +void rvt_add_retry_timer_ext(struct rvt_qp *qp, u8 shift) { struct ib_qp *ibqp = &qp->ibqp; struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); @@ -2405,11 +2410,11 @@ void rvt_add_retry_timer(struct rvt_qp *qp) lockdep_assert_held(&qp->s_lock); qp->s_flags |= RVT_S_TIMER; /* 4.096 usec. * (1 << qp->timeout) */ - qp->s_timer.expires = jiffies + qp->timeout_jiffies + - rdi->busy_jiffies; + qp->s_timer.expires = jiffies + rdi->busy_jiffies + + (qp->timeout_jiffies << shift); add_timer(&qp->s_timer); } -EXPORT_SYMBOL(rvt_add_retry_timer); +EXPORT_SYMBOL(rvt_add_retry_timer_ext); /** * rvt_add_rnr_timer - add/start an rnr timer @@ -2785,6 +2790,18 @@ again: } EXPORT_SYMBOL(rvt_copy_sge); +static enum ib_wc_status loopback_qp_drop(struct rvt_ibport *rvp, + struct rvt_qp *sqp) +{ + rvp->n_pkt_drops++; + /* + * For RC, the requester would timeout and retry so + * shortcut the timeouts and just signal too many retries. + */ + return sqp->ibqp.qp_type == IB_QPT_RC ? + IB_WC_RETRY_EXC_ERR : IB_WC_SUCCESS; +} + /** * ruc_loopback - handle UC and RC loopback requests * @sqp: the sending QP @@ -2857,17 +2874,14 @@ again: } spin_unlock_irqrestore(&sqp->s_lock, flags); - if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) || + if (!qp) { + send_status = loopback_qp_drop(rvp, sqp); + goto serr_no_r_lock; + } + spin_lock_irqsave(&qp->r_lock, flags); + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) || qp->ibqp.qp_type != sqp->ibqp.qp_type) { - rvp->n_pkt_drops++; - /* - * For RC, the requester would timeout and retry so - * shortcut the timeouts and just signal too many retries. - */ - if (sqp->ibqp.qp_type == IB_QPT_RC) - send_status = IB_WC_RETRY_EXC_ERR; - else - send_status = IB_WC_SUCCESS; + send_status = loopback_qp_drop(rvp, sqp); goto serr; } @@ -2893,18 +2907,8 @@ again: goto send_comp; case IB_WR_SEND_WITH_INV: - if (!rvt_invalidate_rkey(qp, wqe->wr.ex.invalidate_rkey)) { - wc.wc_flags = IB_WC_WITH_INVALIDATE; - wc.ex.invalidate_rkey = wqe->wr.ex.invalidate_rkey; - } - goto send; - case IB_WR_SEND_WITH_IMM: - wc.wc_flags = IB_WC_WITH_IMM; - wc.ex.imm_data = wqe->wr.ex.imm_data; - /* FALLTHROUGH */ case IB_WR_SEND: -send: ret = rvt_get_rwqe(qp, false); if (ret < 0) goto op_err; @@ -2912,6 +2916,22 @@ send: goto rnr_nak; if (wqe->length > qp->r_len) goto inv_err; + switch (wqe->wr.opcode) { + case IB_WR_SEND_WITH_INV: + if (!rvt_invalidate_rkey(qp, + wqe->wr.ex.invalidate_rkey)) { + wc.wc_flags = IB_WC_WITH_INVALIDATE; + wc.ex.invalidate_rkey = + wqe->wr.ex.invalidate_rkey; + } + break; + case IB_WR_SEND_WITH_IMM: + wc.wc_flags = IB_WC_WITH_IMM; + wc.ex.imm_data = wqe->wr.ex.imm_data; + break; + default: + break; + } break; case IB_WR_RDMA_WRITE_WITH_IMM: @@ -2988,34 +3008,12 @@ do_write: sge = &sqp->s_sge.sge; while (sqp->s_len) { - u32 len = sqp->s_len; + u32 len = rvt_get_sge_length(sge, sqp->s_len); - if (len > sge->length) - len = sge->length; - if (len > sge->sge_length) - len = sge->sge_length; WARN_ON_ONCE(len == 0); rvt_copy_sge(qp, &qp->r_sge, sge->vaddr, len, release, copy_last); - sge->vaddr += len; - sge->length -= len; - sge->sge_length -= len; - if (sge->sge_length == 0) { - if (!release) - rvt_put_mr(sge->mr); - if (--sqp->s_sge.num_sge) - *sge = *sqp->s_sge.sg_list++; - } else if (sge->length == 0 && sge->mr->lkey) { - if (++sge->n >= RVT_SEGSZ) { - if (++sge->m >= sge->mr->mapsz) - break; - sge->n = 0; - } - sge->vaddr = - sge->mr->map[sge->m]->segs[sge->n].vaddr; - sge->length = - sge->mr->map[sge->m]->segs[sge->n].length; - } + rvt_update_sge(&sqp->s_sge, len, !release); sqp->s_len -= len; } if (release) @@ -3041,6 +3039,7 @@ do_write: wqe->wr.send_flags & IB_SEND_SOLICITED); send_comp: + spin_unlock_irqrestore(&qp->r_lock, flags); spin_lock_irqsave(&sqp->s_lock, flags); rvp->n_loop_pkts++; flush_send: @@ -3067,6 +3066,7 @@ rnr_nak: } if (sqp->s_rnr_retry_cnt < 7) sqp->s_rnr_retry--; + spin_unlock_irqrestore(&qp->r_lock, flags); spin_lock_irqsave(&sqp->s_lock, flags); if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK)) goto clr_busy; @@ -3095,6 +3095,8 @@ err: rvt_rc_error(qp, wc.status); serr: + spin_unlock_irqrestore(&qp->r_lock, flags); +serr_no_r_lock: spin_lock_irqsave(&sqp->s_lock, flags); rvt_send_complete(sqp, wqe, send_status); if (sqp->ibqp.qp_type == IB_QPT_RC) { diff --git a/drivers/infiniband/sw/rdmavt/rc.c b/drivers/infiniband/sw/rdmavt/rc.c index 6131cc558bdb..8d71647820a8 100644 --- a/drivers/infiniband/sw/rdmavt/rc.c +++ b/drivers/infiniband/sw/rdmavt/rc.c @@ -187,3 +187,16 @@ void rvt_get_credit(struct rvt_qp *qp, u32 aeth) } } EXPORT_SYMBOL(rvt_get_credit); + +/* rvt_restart_sge - rewind the sge state for a wqe */ +u32 rvt_restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, u32 len) +{ + ss->sge = wqe->sg_list[0]; + ss->sg_list = wqe->sg_list + 1; + ss->num_sge = wqe->wr.num_sge; + ss->total_len = wqe->length; + rvt_skip_sge(ss, len, false); + return wqe->length - len; +} +EXPORT_SYMBOL(rvt_restart_sge); + diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c index 78e06fc456c5..895b3fabd0bf 100644 --- a/drivers/infiniband/sw/rdmavt/srq.c +++ b/drivers/infiniband/sw/rdmavt/srq.c @@ -48,6 +48,7 @@ #include <linux/err.h> #include <linux/slab.h> #include <linux/vmalloc.h> +#include <rdma/uverbs_ioctl.h> #include "srq.h" #include "vt.h" @@ -77,6 +78,8 @@ struct ib_srq *rvt_create_srq(struct ib_pd *ibpd, struct ib_udata *udata) { struct rvt_dev_info *dev = ib_to_rvt(ibpd->device); + struct rvt_ucontext *ucontext = rdma_udata_to_drv_context( + udata, struct rvt_ucontext, ibucontext); struct rvt_srq *srq; u32 sz; struct ib_srq *ret; @@ -119,7 +122,7 @@ struct ib_srq *rvt_create_srq(struct ib_pd *ibpd, u32 s = sizeof(struct rvt_rwq) + srq->rq.size * sz; srq->ip = - rvt_create_mmap_info(dev, s, ibpd->uobject->context, + rvt_create_mmap_info(dev, s, &ucontext->ibucontext, srq->rq.wq); if (!srq->ip) { ret = ERR_PTR(-ENOMEM); diff --git a/drivers/infiniband/sw/rdmavt/trace_cq.h b/drivers/infiniband/sw/rdmavt/trace_cq.h index df8e1adbef9d..e3c416c6f900 100644 --- a/drivers/infiniband/sw/rdmavt/trace_cq.h +++ b/drivers/infiniband/sw/rdmavt/trace_cq.h @@ -105,7 +105,7 @@ DEFINE_EVENT(rvt_cq_template, rvt_create_cq, TP_ARGS(cq, attr)); #define CQ_PRN \ -"[%s] idx %u wr_id %llx status %u opcode %u,%s length %u qpn %x" +"[%s] idx %u wr_id %llx status %u opcode %u,%s length %u qpn %x flags %x imm %x" DECLARE_EVENT_CLASS( rvt_cq_entry_template, @@ -119,6 +119,8 @@ DECLARE_EVENT_CLASS( __field(u32, qpn) __field(u32, length) __field(u32, idx) + __field(u32, flags) + __field(u32, imm) ), TP_fast_assign( RDI_DEV_ASSIGN(cq->rdi) @@ -128,6 +130,8 @@ DECLARE_EVENT_CLASS( __entry->length = wc->byte_len; __entry->qpn = wc->qp->qp_num; __entry->idx = idx; + __entry->flags = wc->wc_flags; + __entry->imm = be32_to_cpu(wc->ex.imm_data); ), TP_printk( CQ_PRN, @@ -137,7 +141,9 @@ DECLARE_EVENT_CLASS( __entry->status, __entry->opcode, show_wc_opcode(__entry->opcode), __entry->length, - __entry->qpn + __entry->qpn, + __entry->flags, + __entry->imm ) ); diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index aef3aa3fe667..42c9d35f832d 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -91,7 +91,7 @@ struct rvt_dev_info *rvt_alloc_device(size_t size, int nports) { struct rvt_dev_info *rdi; - rdi = (struct rvt_dev_info *)ib_alloc_device(size); + rdi = container_of(_ib_alloc_device(size), struct rvt_dev_info, ibdev); if (!rdi) return rdi; @@ -284,10 +284,6 @@ static int rvt_query_gid(struct ib_device *ibdev, u8 port_num, &gid->global.interface_id); } -struct rvt_ucontext { - struct ib_ucontext ibucontext; -}; - static inline struct rvt_ucontext *to_iucontext(struct ib_ucontext *ibucontext) { @@ -296,28 +292,21 @@ static inline struct rvt_ucontext *to_iucontext(struct ib_ucontext /** * rvt_alloc_ucontext - Allocate a user context - * @ibdev: Verbs IB dev + * @uctx: Verbs context * @udata: User data allocated */ -static struct ib_ucontext *rvt_alloc_ucontext(struct ib_device *ibdev, - struct ib_udata *udata) +static int rvt_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { - struct rvt_ucontext *context; - - context = kmalloc(sizeof(*context), GFP_KERNEL); - if (!context) - return ERR_PTR(-ENOMEM); - return &context->ibucontext; + return 0; } /** - *rvt_dealloc_ucontext - Free a user context - *@context - Free this + * rvt_dealloc_ucontext - Free a user context + * @context - Free this */ -static int rvt_dealloc_ucontext(struct ib_ucontext *context) +static void rvt_dealloc_ucontext(struct ib_ucontext *context) { - kfree(to_iucontext(context)); - return 0; + return; } static int rvt_get_port_immutable(struct ib_device *ibdev, u8 port_num, @@ -436,6 +425,8 @@ static const struct ib_device_ops rvt_dev_ops = { .req_notify_cq = rvt_req_notify_cq, .resize_cq = rvt_resize_cq, .unmap_fmr = rvt_unmap_fmr, + INIT_RDMA_OBJ_SIZE(ib_pd, rvt_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_ucontext, rvt_ucontext, ibucontext), }; static noinline int check_support(struct rvt_dev_info *rdi, int verb) @@ -446,7 +437,7 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb) * These functions are not part of verbs specifically but are * required for rdmavt to function. */ - if ((!rdi->driver_f.port_callback) || + if ((!rdi->ibdev.ops.init_port) || (!rdi->driver_f.get_pci_dev)) return -EINVAL; break; @@ -644,8 +635,7 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id) rdi->ibdev.driver_id = driver_id; /* We are now good to announce we exist */ - ret = ib_register_device(&rdi->ibdev, dev_name(&rdi->ibdev.dev), - rdi->driver_f.port_callback); + ret = ib_register_device(&rdi->ibdev, dev_name(&rdi->ibdev.dev)); if (ret) { rvt_pr_err(rdi, "Failed to register driver with ib core.\n"); goto bail_wss; diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 383e65c7bbc0..a8c11b5e1e94 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -31,6 +31,7 @@ * SOFTWARE. */ +#include <rdma/rdma_netlink.h> #include <net/addrconf.h> #include "rxe.h" #include "rxe_loc.h" @@ -50,8 +51,10 @@ static void rxe_cleanup_ports(struct rxe_dev *rxe) /* free resources for a rxe device all objects created for this device must * have been destroyed */ -static void rxe_cleanup(struct rxe_dev *rxe) +void rxe_dealloc(struct ib_device *ib_dev) { + struct rxe_dev *rxe = container_of(ib_dev, struct rxe_dev, ib_dev); + rxe_pool_cleanup(&rxe->uc_pool); rxe_pool_cleanup(&rxe->pd_pool); rxe_pool_cleanup(&rxe->ah_pool); @@ -65,16 +68,8 @@ static void rxe_cleanup(struct rxe_dev *rxe) rxe_cleanup_ports(rxe); - crypto_free_shash(rxe->tfm); -} - -/* called when all references have been dropped */ -void rxe_release(struct kref *kref) -{ - struct rxe_dev *rxe = container_of(kref, struct rxe_dev, ref_cnt); - - rxe_cleanup(rxe); - ib_dealloc_device(&rxe->ib_dev); + if (rxe->tfm) + crypto_free_shash(rxe->tfm); } /* initialize rxe device parameters */ @@ -279,7 +274,6 @@ static int rxe_init(struct rxe_dev *rxe) spin_lock_init(&rxe->mmap_offset_lock); spin_lock_init(&rxe->pending_lock); INIT_LIST_HEAD(&rxe->pending_mmaps); - INIT_LIST_HEAD(&rxe->list); mutex_init(&rxe->usdev_lock); @@ -308,37 +302,46 @@ void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu) /* called by ifc layer to create new rxe device. * The caller should allocate memory for rxe by calling ib_alloc_device. */ -int rxe_add(struct rxe_dev *rxe, unsigned int mtu) +int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name) { int err; - kref_init(&rxe->ref_cnt); - err = rxe_init(rxe); if (err) - goto err1; + return err; rxe_set_mtu(rxe, mtu); - err = rxe_register_device(rxe); - if (err) - goto err1; - - return 0; - -err1: - rxe_dev_put(rxe); - return err; + return rxe_register_device(rxe, ibdev_name); } -/* called by the ifc layer to remove a device */ -void rxe_remove(struct rxe_dev *rxe) +static int rxe_newlink(const char *ibdev_name, struct net_device *ndev) { - rxe_unregister_device(rxe); + struct rxe_dev *exists; + int err = 0; + + exists = rxe_get_dev_from_net(ndev); + if (exists) { + ib_device_put(&exists->ib_dev); + pr_err("already configured on %s\n", ndev->name); + err = -EEXIST; + goto err; + } - rxe_dev_put(rxe); + err = rxe_net_add(ibdev_name, ndev); + if (err) { + pr_err("failed to add %s\n", ndev->name); + goto err; + } +err: + return err; } +static struct rdma_link_ops rxe_link_ops = { + .type = "rxe", + .newlink = rxe_newlink, +}; + static int __init rxe_module_init(void) { int err; @@ -354,13 +357,15 @@ static int __init rxe_module_init(void) if (err) return err; + rdma_link_register(&rxe_link_ops); pr_info("loaded\n"); return 0; } static void __exit rxe_module_exit(void) { - rxe_remove_all(); + rdma_link_unregister(&rxe_link_ops); + ib_unregister_driver(RDMA_DRIVER_RXE); rxe_net_exit(); rxe_cache_exit(); @@ -369,3 +374,5 @@ static void __exit rxe_module_exit(void) late_initcall(rxe_module_init); module_exit(rxe_module_exit); + +MODULE_ALIAS_RDMA_LINK("rxe"); diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h index 5bde2ad964d2..2e2dff478833 100644 --- a/drivers/infiniband/sw/rxe/rxe.h +++ b/drivers/infiniband/sw/rxe/rxe.h @@ -95,18 +95,20 @@ static inline u32 rxe_crc32(struct rxe_dev *rxe, void rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu); -int rxe_add(struct rxe_dev *rxe, unsigned int mtu); -void rxe_remove(struct rxe_dev *rxe); -void rxe_remove_all(void); +int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name); void rxe_rcv(struct sk_buff *skb); -static inline void rxe_dev_put(struct rxe_dev *rxe) +/* The caller must do a matching ib_device_put(&dev->ib_dev) */ +static inline struct rxe_dev *rxe_get_dev_from_net(struct net_device *ndev) { - kref_put(&rxe->ref_cnt, rxe_release); + struct ib_device *ibdev = + ib_device_get_by_netdev(ndev, RDMA_DRIVER_RXE); + + if (!ibdev) + return NULL; + return container_of(ibdev, struct rxe_dev, ib_dev); } -struct rxe_dev *net_to_rxe(struct net_device *ndev); -struct rxe_dev *get_rxe_by_name(const char *name); void rxe_port_up(struct rxe_dev *rxe); void rxe_port_down(struct rxe_dev *rxe); diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c index 26fe8d7dbc55..81ee756c19b8 100644 --- a/drivers/infiniband/sw/rxe/rxe_av.c +++ b/drivers/infiniband/sw/rxe/rxe_av.c @@ -34,6 +34,13 @@ #include "rxe.h" #include "rxe_loc.h" +void rxe_init_av(struct rdma_ah_attr *attr, struct rxe_av *av) +{ + rxe_av_from_attr(rdma_ah_get_port_num(attr), av, attr); + rxe_av_fill_ip_info(av, attr); + memcpy(av->dmac, attr->roce.dmac, ETH_ALEN); +} + int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr) { struct rxe_port *port; diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index e996da67a851..00eb99d3df86 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -146,8 +146,7 @@ void retransmit_timer(struct timer_list *t) } } -void rxe_comp_queue_pkt(struct rxe_dev *rxe, struct rxe_qp *qp, - struct sk_buff *skb) +void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb) { int must_sched; @@ -155,7 +154,8 @@ void rxe_comp_queue_pkt(struct rxe_dev *rxe, struct rxe_qp *qp, must_sched = skb_queue_len(&qp->resp_pkts) > 1; if (must_sched != 0) - rxe_counter_inc(rxe, RXE_CNT_COMPLETER_SCHED); + rxe_counter_inc(SKB_TO_PKT(skb)->rxe, RXE_CNT_COMPLETER_SCHED); + rxe_run_task(&qp->comp.task, must_sched); } diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 01b74597b36a..3d8cef836f0d 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -35,6 +35,7 @@ #define RXE_LOC_H /* rxe_av.c */ +void rxe_init_av(struct rdma_ah_attr *attr, struct rxe_av *av); int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr); @@ -231,7 +232,7 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, struct ib_srq_attr *attr, enum ib_srq_attr_mask mask, struct rxe_modify_srq_cmd *ucmd); -void rxe_release(struct kref *kref); +void rxe_dealloc(struct ib_device *ib_dev); int rxe_completer(void *arg); int rxe_requester(void *arg); @@ -239,11 +240,9 @@ int rxe_responder(void *arg); u32 rxe_icrc_hdr(struct rxe_pkt_info *pkt, struct sk_buff *skb); -void rxe_resp_queue_pkt(struct rxe_dev *rxe, - struct rxe_qp *qp, struct sk_buff *skb); +void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb); -void rxe_comp_queue_pkt(struct rxe_dev *rxe, - struct rxe_qp *qp, struct sk_buff *skb); +void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb); static inline unsigned int wr_opcode_mask(int opcode, struct rxe_qp *qp) { diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 9d3916b93f23..42f0f25e396c 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -162,16 +162,15 @@ int rxe_mem_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, int access, struct ib_udata *udata, struct rxe_mem *mem) { - int entry; struct rxe_map **map; struct rxe_phys_buf *buf = NULL; struct ib_umem *umem; - struct scatterlist *sg; + struct sg_page_iter sg_iter; int num_buf; void *vaddr; int err; - umem = ib_umem_get(pd->ibpd.uobject->context, start, length, access, 0); + umem = ib_umem_get(udata, start, length, access, 0); if (IS_ERR(umem)) { pr_warn("err %d from rxe_umem_get\n", (int)PTR_ERR(umem)); @@ -191,16 +190,16 @@ int rxe_mem_init_user(struct rxe_pd *pd, u64 start, goto err1; } - mem->page_shift = umem->page_shift; - mem->page_mask = BIT(umem->page_shift) - 1; + mem->page_shift = PAGE_SHIFT; + mem->page_mask = PAGE_SIZE - 1; num_buf = 0; map = mem->map; if (length > 0) { buf = map[0]->buf; - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - vaddr = page_address(sg_page(sg)); + for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { + vaddr = page_address(sg_page_iter_page(&sg_iter)); if (!vaddr) { pr_warn("null vaddr\n"); err = -ENOMEM; @@ -208,7 +207,7 @@ int rxe_mem_init_user(struct rxe_pd *pd, u64 start, } buf->addr = (uintptr_t)vaddr; - buf->size = BIT(umem->page_shift); + buf->size = PAGE_SIZE; num_buf++; buf++; diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 8fd03ae20efc..753cabcd441c 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -45,43 +45,6 @@ #include "rxe_net.h" #include "rxe_loc.h" -static LIST_HEAD(rxe_dev_list); -static DEFINE_SPINLOCK(dev_list_lock); /* spinlock for device list */ - -struct rxe_dev *net_to_rxe(struct net_device *ndev) -{ - struct rxe_dev *rxe; - struct rxe_dev *found = NULL; - - spin_lock_bh(&dev_list_lock); - list_for_each_entry(rxe, &rxe_dev_list, list) { - if (rxe->ndev == ndev) { - found = rxe; - break; - } - } - spin_unlock_bh(&dev_list_lock); - - return found; -} - -struct rxe_dev *get_rxe_by_name(const char *name) -{ - struct rxe_dev *rxe; - struct rxe_dev *found = NULL; - - spin_lock_bh(&dev_list_lock); - list_for_each_entry(rxe, &rxe_dev_list, list) { - if (!strcmp(name, dev_name(&rxe->ib_dev.dev))) { - found = rxe; - break; - } - } - spin_unlock_bh(&dev_list_lock); - return found; -} - - static struct rxe_recv_sockets recv_sockets; struct device *rxe_dma_device(struct rxe_dev *rxe) @@ -229,18 +192,19 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb) struct udphdr *udph; struct net_device *ndev = skb->dev; struct net_device *rdev = ndev; - struct rxe_dev *rxe = net_to_rxe(ndev); + struct rxe_dev *rxe = rxe_get_dev_from_net(ndev); struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); if (!rxe && is_vlan_dev(rdev)) { rdev = vlan_dev_real_dev(ndev); - rxe = net_to_rxe(rdev); + rxe = rxe_get_dev_from_net(rdev); } if (!rxe) goto drop; if (skb_linearize(skb)) { pr_err("skb_linearize failed\n"); + ib_device_put(&rxe->ib_dev); goto drop; } @@ -253,6 +217,12 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb) rxe_rcv(skb); + /* + * FIXME: this is in the wrong place, it needs to be done when pkt is + * destroyed + */ + ib_device_put(&rxe->ib_dev); + return 0; drop: kfree_skb(skb); @@ -384,9 +354,6 @@ static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb, return -EHOSTUNREACH; } - if (!memcmp(saddr, daddr, sizeof(*daddr))) - pkt->mask |= RXE_LOOPBACK_MASK; - prepare_udp_hdr(skb, cpu_to_be16(qp->src_port), cpu_to_be16(ROCE_V2_UDP_DPORT)); @@ -411,9 +378,6 @@ static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb, return -EHOSTUNREACH; } - if (!memcmp(saddr, daddr, sizeof(*daddr))) - pkt->mask |= RXE_LOOPBACK_MASK; - prepare_udp_hdr(skb, cpu_to_be16(qp->src_port), cpu_to_be16(ROCE_V2_UDP_DPORT)); @@ -437,6 +401,9 @@ int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb, u32 *crc) *crc = rxe_icrc_hdr(pkt, skb); + if (ether_addr_equal(skb->dev->dev_addr, av->dmac)) + pkt->mask |= RXE_LOOPBACK_MASK; + return err; } @@ -550,42 +517,24 @@ enum rdma_link_layer rxe_link_layer(struct rxe_dev *rxe, unsigned int port_num) return IB_LINK_LAYER_ETHERNET; } -struct rxe_dev *rxe_net_add(struct net_device *ndev) +int rxe_net_add(const char *ibdev_name, struct net_device *ndev) { int err; struct rxe_dev *rxe = NULL; - rxe = (struct rxe_dev *)ib_alloc_device(sizeof(*rxe)); + rxe = ib_alloc_device(rxe_dev, ib_dev); if (!rxe) - return NULL; + return -ENOMEM; rxe->ndev = ndev; - err = rxe_add(rxe, ndev->mtu); + err = rxe_add(rxe, ndev->mtu, ibdev_name); if (err) { ib_dealloc_device(&rxe->ib_dev); - return NULL; + return err; } - spin_lock_bh(&dev_list_lock); - list_add_tail(&rxe->list, &rxe_dev_list); - spin_unlock_bh(&dev_list_lock); - return rxe; -} - -void rxe_remove_all(void) -{ - spin_lock_bh(&dev_list_lock); - while (!list_empty(&rxe_dev_list)) { - struct rxe_dev *rxe = - list_first_entry(&rxe_dev_list, struct rxe_dev, list); - - list_del(&rxe->list); - spin_unlock_bh(&dev_list_lock); - rxe_remove(rxe); - spin_lock_bh(&dev_list_lock); - } - spin_unlock_bh(&dev_list_lock); + return 0; } static void rxe_port_event(struct rxe_dev *rxe, @@ -638,15 +587,14 @@ static int rxe_notify(struct notifier_block *not_blk, void *arg) { struct net_device *ndev = netdev_notifier_info_to_dev(arg); - struct rxe_dev *rxe = net_to_rxe(ndev); + struct rxe_dev *rxe = rxe_get_dev_from_net(ndev); if (!rxe) - goto out; + return NOTIFY_OK; switch (event) { case NETDEV_UNREGISTER: - list_del(&rxe->list); - rxe_remove(rxe); + ib_unregister_device_queued(&rxe->ib_dev); break; case NETDEV_UP: rxe_port_up(rxe); @@ -671,7 +619,8 @@ static int rxe_notify(struct notifier_block *not_blk, event, ndev->name); break; } -out: + + ib_device_put(&rxe->ib_dev); return NOTIFY_OK; } diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h index 106c586dbb26..2ca71d3d245c 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.h +++ b/drivers/infiniband/sw/rxe/rxe_net.h @@ -43,7 +43,7 @@ struct rxe_recv_sockets { struct socket *sk6; }; -struct rxe_dev *rxe_net_add(struct net_device *ndev); +int rxe_net_add(const char *ibdev_name, struct net_device *ndev); int rxe_net_init(void); void rxe_net_exit(void); diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index bdea899a58ac..1abed47ca221 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -78,7 +78,8 @@ enum rxe_device_param { | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_SRQ_RESIZE - | IB_DEVICE_MEM_MGT_EXTENSIONS, + | IB_DEVICE_MEM_MGT_EXTENSIONS + | IB_DEVICE_ALLOW_USER_UNREG, RXE_MAX_SGE = 32, RXE_MAX_SGE_RD = 32, RXE_MAX_CQ = 16384, diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index b5c91df22047..120fa9005954 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -42,10 +42,12 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_UC] = { .name = "rxe-uc", .size = sizeof(struct rxe_ucontext), + .flags = RXE_POOL_NO_ALLOC, }, [RXE_TYPE_PD] = { .name = "rxe-pd", .size = sizeof(struct rxe_pd), + .flags = RXE_POOL_NO_ALLOC, }, [RXE_TYPE_AH] = { .name = "rxe-ah", @@ -119,8 +121,10 @@ static void rxe_cache_clean(size_t cnt) for (i = 0; i < cnt; i++) { type = &rxe_type_info[i]; - kmem_cache_destroy(type->cache); - type->cache = NULL; + if (!(type->flags & RXE_POOL_NO_ALLOC)) { + kmem_cache_destroy(type->cache); + type->cache = NULL; + } } } @@ -134,14 +138,17 @@ int rxe_cache_init(void) for (i = 0; i < RXE_NUM_TYPES; i++) { type = &rxe_type_info[i]; size = ALIGN(type->size, RXE_POOL_ALIGN); - type->cache = kmem_cache_create(type->name, size, - RXE_POOL_ALIGN, - RXE_POOL_CACHE_FLAGS, NULL); - if (!type->cache) { - pr_err("Unable to init kmem cache for %s\n", - type->name); - err = -ENOMEM; - goto err1; + if (!(type->flags & RXE_POOL_NO_ALLOC)) { + type->cache = + kmem_cache_create(type->name, size, + RXE_POOL_ALIGN, + RXE_POOL_CACHE_FLAGS, NULL); + if (!type->cache) { + pr_err("Unable to init kmem cache for %s\n", + type->name); + err = -ENOMEM; + goto err1; + } } } @@ -392,29 +399,64 @@ void *rxe_alloc(struct rxe_pool *pool) kref_get(&pool->ref_cnt); read_unlock_irqrestore(&pool->pool_lock, flags); - kref_get(&pool->rxe->ref_cnt); + if (!ib_device_try_get(&pool->rxe->ib_dev)) + goto out_put_pool; if (atomic_inc_return(&pool->num_elem) > pool->max_elem) - goto out_put_pool; + goto out_cnt; elem = kmem_cache_zalloc(pool_cache(pool), (pool->flags & RXE_POOL_ATOMIC) ? GFP_ATOMIC : GFP_KERNEL); if (!elem) - goto out_put_pool; + goto out_cnt; elem->pool = pool; kref_init(&elem->ref_cnt); return elem; -out_put_pool: +out_cnt: atomic_dec(&pool->num_elem); - rxe_dev_put(pool->rxe); + ib_device_put(&pool->rxe->ib_dev); +out_put_pool: rxe_pool_put(pool); return NULL; } +int rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem) +{ + unsigned long flags; + + might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC)); + + read_lock_irqsave(&pool->pool_lock, flags); + if (pool->state != RXE_POOL_STATE_VALID) { + read_unlock_irqrestore(&pool->pool_lock, flags); + return -EINVAL; + } + kref_get(&pool->ref_cnt); + read_unlock_irqrestore(&pool->pool_lock, flags); + + if (!ib_device_try_get(&pool->rxe->ib_dev)) + goto out_put_pool; + + if (atomic_inc_return(&pool->num_elem) > pool->max_elem) + goto out_cnt; + + elem->pool = pool; + kref_init(&elem->ref_cnt); + + return 0; + +out_cnt: + atomic_dec(&pool->num_elem); + ib_device_put(&pool->rxe->ib_dev); +out_put_pool: + rxe_pool_put(pool); + return -EINVAL; +} + void rxe_elem_release(struct kref *kref) { struct rxe_pool_entry *elem = @@ -424,9 +466,10 @@ void rxe_elem_release(struct kref *kref) if (pool->cleanup) pool->cleanup(elem); - kmem_cache_free(pool_cache(pool), elem); + if (!(pool->flags & RXE_POOL_NO_ALLOC)) + kmem_cache_free(pool_cache(pool), elem); atomic_dec(&pool->num_elem); - rxe_dev_put(pool->rxe); + ib_device_put(&pool->rxe->ib_dev); rxe_pool_put(pool); } diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h index 72968c29e01f..2f2cff1cbe43 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.h +++ b/drivers/infiniband/sw/rxe/rxe_pool.h @@ -41,6 +41,7 @@ enum rxe_pool_flags { RXE_POOL_ATOMIC = BIT(0), RXE_POOL_INDEX = BIT(1), RXE_POOL_KEY = BIT(2), + RXE_POOL_NO_ALLOC = BIT(4), }; enum rxe_elem_type { @@ -131,6 +132,9 @@ void rxe_pool_cleanup(struct rxe_pool *pool); /* allocate an object from pool */ void *rxe_alloc(struct rxe_pool *pool); +/* connect already allocated object to pool */ +int rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem); + /* assign an index to an indexed object and insert object into * pool's rb tree */ diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index fd86fd2fbb26..09ede70dc1e8 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -35,6 +35,7 @@ #include <linux/delay.h> #include <linux/sched.h> #include <linux/vmalloc.h> +#include <rdma/uverbs_ioctl.h> #include "rxe.h" #include "rxe_loc.h" @@ -343,7 +344,8 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, struct rxe_cq *rcq = to_rcq(init->recv_cq); struct rxe_cq *scq = to_rcq(init->send_cq); struct rxe_srq *srq = init->srq ? to_rsrq(init->srq) : NULL; - struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL; + struct rxe_ucontext *ucontext = + rdma_udata_to_drv_context(udata, struct rxe_ucontext, ibuc); rxe_add_ref(pd); rxe_add_ref(rcq); @@ -358,11 +360,11 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, rxe_qp_init_misc(rxe, qp, init); - err = rxe_qp_init_req(rxe, qp, init, context, uresp); + err = rxe_qp_init_req(rxe, qp, init, &ucontext->ibuc, uresp); if (err) goto err1; - err = rxe_qp_init_resp(rxe, qp, init, context, uresp); + err = rxe_qp_init_resp(rxe, qp, init, &ucontext->ibuc, uresp); if (err) goto err2; @@ -631,14 +633,11 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, qp->attr.qkey = attr->qkey; if (mask & IB_QP_AV) { - rxe_av_from_attr(attr->port_num, &qp->pri_av, &attr->ah_attr); - rxe_av_fill_ip_info(&qp->pri_av, &attr->ah_attr); + rxe_init_av(&attr->ah_attr, &qp->pri_av); } if (mask & IB_QP_ALT_PATH) { - rxe_av_from_attr(attr->alt_port_num, &qp->alt_av, - &attr->alt_ah_attr); - rxe_av_fill_ip_info(&qp->alt_av, &attr->alt_ah_attr); + rxe_init_av(&attr->alt_ah_attr, &qp->alt_av); qp->attr.alt_port_num = attr->alt_port_num; qp->attr.alt_pkey_index = attr->alt_pkey_index; qp->attr.alt_timeout = attr->alt_timeout; diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 5c29a1bb575a..f9a492ed900b 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -266,14 +266,12 @@ err1: return -EINVAL; } -static inline void rxe_rcv_pkt(struct rxe_dev *rxe, - struct rxe_pkt_info *pkt, - struct sk_buff *skb) +static inline void rxe_rcv_pkt(struct rxe_pkt_info *pkt, struct sk_buff *skb) { if (pkt->mask & RXE_REQ_MASK) - rxe_resp_queue_pkt(rxe, pkt->qp, skb); + rxe_resp_queue_pkt(pkt->qp, skb); else - rxe_comp_queue_pkt(rxe, pkt->qp, skb); + rxe_comp_queue_pkt(pkt->qp, skb); } static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) @@ -319,7 +317,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) pkt->qp = qp; rxe_add_ref(qp); - rxe_rcv_pkt(rxe, pkt, skb); + rxe_rcv_pkt(pkt, skb); } spin_unlock_bh(&mcg->mcg_lock); @@ -411,7 +409,7 @@ void rxe_rcv(struct sk_buff *skb) if (unlikely(bth_qpn(pkt) == IB_MULTICAST_QPN)) rxe_rcv_mcast_pkt(rxe, skb); else - rxe_rcv_pkt(rxe, pkt, skb); + rxe_rcv_pkt(pkt, skb); return; diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 231528188250..aca9f60f9b21 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -104,8 +104,7 @@ static char *resp_state_name[] = { }; /* rxe_recv calls here to add a request packet to the input queue */ -void rxe_resp_queue_pkt(struct rxe_dev *rxe, struct rxe_qp *qp, - struct sk_buff *skb) +void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb) { int must_sched; struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); diff --git a/drivers/infiniband/sw/rxe/rxe_sysfs.c b/drivers/infiniband/sw/rxe/rxe_sysfs.c index 95a15892f7e6..ccda5f5a3bc0 100644 --- a/drivers/infiniband/sw/rxe/rxe_sysfs.c +++ b/drivers/infiniband/sw/rxe/rxe_sysfs.c @@ -58,41 +58,37 @@ static int rxe_param_set_add(const char *val, const struct kernel_param *kp) int len; int err = 0; char intf[32]; - struct net_device *ndev = NULL; - struct rxe_dev *rxe; + struct net_device *ndev; + struct rxe_dev *exists; len = sanitize_arg(val, intf, sizeof(intf)); if (!len) { pr_err("add: invalid interface name\n"); - err = -EINVAL; - goto err; + return -EINVAL; } ndev = dev_get_by_name(&init_net, intf); if (!ndev) { pr_err("interface %s not found\n", intf); - err = -EINVAL; - goto err; + return -EINVAL; } - if (net_to_rxe(ndev)) { + exists = rxe_get_dev_from_net(ndev); + if (exists) { + ib_device_put(&exists->ib_dev); pr_err("already configured on %s\n", intf); err = -EINVAL; goto err; } - rxe = rxe_net_add(ndev); - if (!rxe) { + err = rxe_net_add("rxe%d", ndev); + if (err) { pr_err("failed to add %s\n", intf); - err = -EINVAL; goto err; } - rxe_set_port_state(rxe); - dev_info(&rxe->ib_dev.dev, "added %s\n", intf); err: - if (ndev) - dev_put(ndev); + dev_put(ndev); return err; } @@ -100,7 +96,7 @@ static int rxe_param_set_remove(const char *val, const struct kernel_param *kp) { int len; char intf[32]; - struct rxe_dev *rxe; + struct ib_device *ib_dev; len = sanitize_arg(val, intf, sizeof(intf)); if (!len) { @@ -110,19 +106,17 @@ static int rxe_param_set_remove(const char *val, const struct kernel_param *kp) if (strncmp("all", intf, len) == 0) { pr_info("rxe_sys: remove all"); - rxe_remove_all(); + ib_unregister_driver(RDMA_DRIVER_RXE); return 0; } - rxe = get_rxe_by_name(intf); - - if (!rxe) { + ib_dev = ib_device_get_by_name(intf, RDMA_DRIVER_RXE); + if (!ib_dev) { pr_err("not configured on %s\n", intf); return -EINVAL; } - list_del(&rxe->list); - rxe_remove(rxe); + ib_unregister_device_and_put(ib_dev); return 0; } @@ -136,6 +130,6 @@ static const struct kernel_param_ops rxe_remove_ops = { }; module_param_cb(add, &rxe_add_ops, NULL, 0200); -MODULE_PARM_DESC(add, "Create RXE device over network interface"); +MODULE_PARM_DESC(add, "DEPRECATED. Create RXE device over network interface"); module_param_cb(remove, &rxe_remove_ops, NULL, 0200); -MODULE_PARM_DESC(remove, "Remove RXE device over network interface"); +MODULE_PARM_DESC(remove, "DEPRECATED. Remove RXE device over network interface"); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index b20e6e0415f5..6ecf28570ff0 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -33,6 +33,7 @@ #include <linux/dma-mapping.h> #include <net/addrconf.h> +#include <rdma/uverbs_ioctl.h> #include "rxe.h" #include "rxe_loc.h" #include "rxe_queue.h" @@ -79,19 +80,6 @@ static int rxe_query_port(struct ib_device *dev, return rc; } -static struct net_device *rxe_get_netdev(struct ib_device *device, - u8 port_num) -{ - struct rxe_dev *rxe = to_rdev(device); - - if (rxe->ndev) { - dev_hold(rxe->ndev); - return rxe->ndev; - } - - return NULL; -} - static int rxe_query_pkey(struct ib_device *device, u8 port_num, u16 index, u16 *pkey) { @@ -154,22 +142,19 @@ static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev, return rxe_link_layer(rxe, port_num); } -static struct ib_ucontext *rxe_alloc_ucontext(struct ib_device *dev, - struct ib_udata *udata) +static int rxe_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { - struct rxe_dev *rxe = to_rdev(dev); - struct rxe_ucontext *uc; + struct rxe_dev *rxe = to_rdev(uctx->device); + struct rxe_ucontext *uc = to_ruc(uctx); - uc = rxe_alloc(&rxe->uc_pool); - return uc ? &uc->ibuc : ERR_PTR(-ENOMEM); + return rxe_add_to_pool(&rxe->uc_pool, &uc->pelem); } -static int rxe_dealloc_ucontext(struct ib_ucontext *ibuc) +static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc) { struct rxe_ucontext *uc = to_ruc(ibuc); rxe_drop_ref(uc); - return 0; } static int rxe_port_immutable(struct ib_device *dev, u8 port_num, @@ -191,30 +176,20 @@ static int rxe_port_immutable(struct ib_device *dev, u8 port_num, return 0; } -static struct ib_pd *rxe_alloc_pd(struct ib_device *dev, - struct ib_ucontext *context, - struct ib_udata *udata) +static int rxe_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, + struct ib_udata *udata) { - struct rxe_dev *rxe = to_rdev(dev); - struct rxe_pd *pd; + struct rxe_dev *rxe = to_rdev(ibpd->device); + struct rxe_pd *pd = to_rpd(ibpd); - pd = rxe_alloc(&rxe->pd_pool); - return pd ? &pd->ibpd : ERR_PTR(-ENOMEM); + return rxe_add_to_pool(&rxe->pd_pool, &pd->pelem); } -static int rxe_dealloc_pd(struct ib_pd *ibpd) +static void rxe_dealloc_pd(struct ib_pd *ibpd) { struct rxe_pd *pd = to_rpd(ibpd); rxe_drop_ref(pd); - return 0; -} - -static void rxe_init_av(struct rxe_dev *rxe, struct rdma_ah_attr *attr, - struct rxe_av *av) -{ - rxe_av_from_attr(rdma_ah_get_port_num(attr), av, attr); - rxe_av_fill_ip_info(av, attr); } static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, @@ -239,7 +214,7 @@ static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, rxe_add_ref(pd); ah->pd = pd; - rxe_init_av(rxe, attr, &ah->av); + rxe_init_av(attr, &ah->av); return &ah->ibah; } @@ -253,7 +228,7 @@ static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) if (err) return err; - rxe_init_av(rxe, attr, &ah->av); + rxe_init_av(attr, &ah->av); return 0; } @@ -330,8 +305,9 @@ static struct ib_srq *rxe_create_srq(struct ib_pd *ibpd, int err; struct rxe_dev *rxe = to_rdev(ibpd->device); struct rxe_pd *pd = to_rpd(ibpd); + struct rxe_ucontext *ucontext = + rdma_udata_to_drv_context(udata, struct rxe_ucontext, ibuc); struct rxe_srq *srq; - struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL; struct rxe_create_srq_resp __user *uresp = NULL; if (udata) { @@ -354,7 +330,7 @@ static struct ib_srq *rxe_create_srq(struct ib_pd *ibpd, rxe_add_ref(pd); srq->pd = pd; - err = rxe_srq_from_init(rxe, srq, init, context, uresp); + err = rxe_srq_from_init(rxe, srq, init, &ucontext->ibuc, uresp); if (err) goto err2; @@ -1129,8 +1105,8 @@ static int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) static ssize_t parent_show(struct device *device, struct device_attribute *attr, char *buf) { - struct rxe_dev *rxe = container_of(device, struct rxe_dev, - ib_dev.dev); + struct rxe_dev *rxe = + rdma_device_to_drv_device(device, struct rxe_dev, ib_dev); return snprintf(buf, 16, "%s\n", rxe_parent_name(rxe, 1)); } @@ -1146,6 +1122,15 @@ static const struct attribute_group rxe_attr_group = { .attrs = rxe_dev_attributes, }; +static int rxe_enable_driver(struct ib_device *ib_dev) +{ + struct rxe_dev *rxe = container_of(ib_dev, struct rxe_dev, ib_dev); + + rxe_set_port_state(rxe); + dev_info(&rxe->ib_dev.dev, "added %s\n", netdev_name(rxe->ndev)); + return 0; +} + static const struct ib_device_ops rxe_dev_ops = { .alloc_hw_stats = rxe_ib_alloc_hw_stats, .alloc_mr = rxe_alloc_mr, @@ -1156,6 +1141,7 @@ static const struct ib_device_ops rxe_dev_ops = { .create_cq = rxe_create_cq, .create_qp = rxe_create_qp, .create_srq = rxe_create_srq, + .dealloc_driver = rxe_dealloc, .dealloc_pd = rxe_dealloc_pd, .dealloc_ucontext = rxe_dealloc_ucontext, .dereg_mr = rxe_dereg_mr, @@ -1164,10 +1150,10 @@ static const struct ib_device_ops rxe_dev_ops = { .destroy_qp = rxe_destroy_qp, .destroy_srq = rxe_destroy_srq, .detach_mcast = rxe_detach_mcast, + .enable_driver = rxe_enable_driver, .get_dma_mr = rxe_get_dma_mr, .get_hw_stats = rxe_ib_get_hw_stats, .get_link_layer = rxe_get_link_layer, - .get_netdev = rxe_get_netdev, .get_port_immutable = rxe_port_immutable, .map_mr_sg = rxe_map_mr_sg, .mmap = rxe_mmap, @@ -1190,9 +1176,11 @@ static const struct ib_device_ops rxe_dev_ops = { .reg_user_mr = rxe_reg_user_mr, .req_notify_cq = rxe_req_notify_cq, .resize_cq = rxe_resize_cq, + INIT_RDMA_OBJ_SIZE(ib_pd, rxe_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_ucontext, rxe_ucontext, ibuc), }; -int rxe_register_device(struct rxe_dev *rxe) +int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) { int err; struct ib_device *dev = &rxe->ib_dev; @@ -1247,6 +1235,9 @@ int rxe_register_device(struct rxe_dev *rxe) ; ib_set_device_ops(dev, &rxe_dev_ops); + err = ib_device_set_netdev(&rxe->ib_dev, rxe->ndev, 1); + if (err) + return err; tfm = crypto_alloc_shash("crc32", 0, 0); if (IS_ERR(tfm)) { @@ -1258,23 +1249,13 @@ int rxe_register_device(struct rxe_dev *rxe) rdma_set_device_sysfs_group(dev, &rxe_attr_group); dev->driver_id = RDMA_DRIVER_RXE; - err = ib_register_device(dev, "rxe%d", NULL); - if (err) { + err = ib_register_device(dev, ibdev_name); + if (err) pr_warn("%s failed with error %d\n", __func__, err); - goto err1; - } - - return 0; - -err1: - crypto_free_shash(rxe->tfm); + /* + * Note that rxe may be invalid at this point if another thread + * unregistered it. + */ return err; } - -void rxe_unregister_device(struct rxe_dev *rxe) -{ - struct ib_device *dev = &rxe->ib_dev; - - ib_unregister_device(dev); -} diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 74e04801d34d..157e51aeb1e1 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -61,13 +61,13 @@ static inline int psn_compare(u32 psn_a, u32 psn_b) } struct rxe_ucontext { + struct ib_ucontext ibuc; struct rxe_pool_entry pelem; - struct ib_ucontext ibuc; }; struct rxe_pd { + struct ib_pd ibpd; struct rxe_pool_entry pelem; - struct ib_pd ibpd; }; struct rxe_ah { @@ -385,7 +385,6 @@ struct rxe_dev { struct ib_device_attr attr; int max_ucontext; int max_inline_data; - struct kref ref_cnt; struct mutex usdev_lock; struct net_device *ndev; @@ -412,7 +411,6 @@ struct rxe_dev { atomic64_t stats_counters[RXE_NUM_OF_COUNTERS]; struct rxe_port port; - struct list_head list; struct crypto_shash *tfm; }; @@ -466,8 +464,7 @@ static inline struct rxe_mem *to_rmw(struct ib_mw *mw) return mw ? container_of(mw, struct rxe_mem, ibmw) : NULL; } -int rxe_register_device(struct rxe_dev *rxe); -void rxe_unregister_device(struct rxe_dev *rxe); +int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name); void rxe_mc_cleanup(struct rxe_pool_entry *arg); |