diff options
58 files changed, 1725 insertions, 432 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 707163365a93..5e8a9328e3f1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3631,7 +3631,7 @@ S: Maintained F: drivers/net/ethernet/icplus/ipg.* IPATH DRIVER -M: Mike Marciniszyn <infinipath@qlogic.com> +M: Mike Marciniszyn <infinipath@intel.com> L: linux-rdma@vger.kernel.org S: Maintained F: drivers/infiniband/hw/ipath/ @@ -5455,7 +5455,7 @@ L: rtc-linux@googlegroups.com S: Maintained QIB DRIVER -M: Mike Marciniszyn <infinipath@qlogic.com> +M: Mike Marciniszyn <infinipath@intel.com> L: linux-rdma@vger.kernel.org S: Supported F: drivers/infiniband/hw/qib/ diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index e3e470fecaa9..79c7eebb970f 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1218,13 +1218,13 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) } if (!conn_id) { ret = -ENOMEM; - goto out; + goto err1; } mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); ret = cma_acquire_dev(conn_id); if (ret) - goto release_conn_id; + goto err2; conn_id->cm_id.ib = cm_id; cm_id->context = conn_id; @@ -1236,31 +1236,33 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) */ atomic_inc(&conn_id->refcount); ret = conn_id->id.event_handler(&conn_id->id, &event); - if (!ret) { - /* - * Acquire mutex to prevent user executing rdma_destroy_id() - * while we're accessing the cm_id. - */ - mutex_lock(&lock); - if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD)) - ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); - mutex_unlock(&lock); - mutex_unlock(&conn_id->handler_mutex); - cma_deref_id(conn_id); - goto out; - } + if (ret) + goto err3; + + /* + * Acquire mutex to prevent user executing rdma_destroy_id() + * while we're accessing the cm_id. + */ + mutex_lock(&lock); + if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD)) + ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); + mutex_unlock(&lock); + mutex_unlock(&conn_id->handler_mutex); + mutex_unlock(&listen_id->handler_mutex); cma_deref_id(conn_id); + return 0; +err3: + cma_deref_id(conn_id); /* Destroy the CM ID by returning a non-zero value. */ conn_id->cm_id.ib = NULL; - -release_conn_id: +err2: cma_exch(conn_id, RDMA_CM_DESTROYING); mutex_unlock(&conn_id->handler_mutex); - rdma_destroy_id(&conn_id->id); - -out: +err1: mutex_unlock(&listen_id->handler_mutex); + if (conn_id) + rdma_destroy_id(&conn_id->id); return ret; } diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 71f0c0f7df94..a84112322071 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -269,7 +269,7 @@ void ib_umem_release(struct ib_umem *umem) } else down_write(&mm->mmap_sem); - current->mm->locked_vm -= diff; + current->mm->pinned_vm -= diff; up_write(&mm->mmap_sem); mmput(mm); kfree(umem); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 4d27e4c3fe34..f9d0d7c413a2 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -41,13 +41,18 @@ #include "uverbs.h" -static struct lock_class_key pd_lock_key; -static struct lock_class_key mr_lock_key; -static struct lock_class_key cq_lock_key; -static struct lock_class_key qp_lock_key; -static struct lock_class_key ah_lock_key; -static struct lock_class_key srq_lock_key; -static struct lock_class_key xrcd_lock_key; +struct uverbs_lock_class { + struct lock_class_key key; + char name[16]; +}; + +static struct uverbs_lock_class pd_lock_class = { .name = "PD-uobj" }; +static struct uverbs_lock_class mr_lock_class = { .name = "MR-uobj" }; +static struct uverbs_lock_class cq_lock_class = { .name = "CQ-uobj" }; +static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" }; +static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" }; +static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" }; +static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" }; #define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \ do { \ @@ -83,13 +88,13 @@ static struct lock_class_key xrcd_lock_key; */ static void init_uobj(struct ib_uobject *uobj, u64 user_handle, - struct ib_ucontext *context, struct lock_class_key *key) + struct ib_ucontext *context, struct uverbs_lock_class *c) { uobj->user_handle = user_handle; uobj->context = context; kref_init(&uobj->ref); init_rwsem(&uobj->mutex); - lockdep_set_class(&uobj->mutex, key); + lockdep_set_class_and_name(&uobj->mutex, &c->key, c->name); uobj->live = 0; } @@ -522,7 +527,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, if (!uobj) return -ENOMEM; - init_uobj(uobj, 0, file->ucontext, &pd_lock_key); + init_uobj(uobj, 0, file->ucontext, &pd_lock_class); down_write(&uobj->mutex); pd = file->device->ib_dev->alloc_pd(file->device->ib_dev, @@ -750,7 +755,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, goto err_tree_mutex_unlock; } - init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_key); + init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_class); down_write(&obj->uobject.mutex); @@ -947,7 +952,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, if (!uobj) return -ENOMEM; - init_uobj(uobj, 0, file->ucontext, &mr_lock_key); + init_uobj(uobj, 0, file->ucontext, &mr_lock_class); down_write(&uobj->mutex); pd = idr_read_pd(cmd.pd_handle, file->ucontext); @@ -1115,7 +1120,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, if (!obj) return -ENOMEM; - init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &cq_lock_key); + init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &cq_lock_class); down_write(&obj->uobject.mutex); if (cmd.comp_channel >= 0) { @@ -1399,6 +1404,9 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; + if (cmd.qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) + return -EPERM; + INIT_UDATA(&udata, buf + sizeof cmd, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); @@ -1407,7 +1415,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, if (!obj) return -ENOMEM; - init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_key); + init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class); down_write(&obj->uevent.uobject.mutex); if (cmd.qp_type == IB_QPT_XRC_TGT) { @@ -1418,13 +1426,6 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, } device = xrcd->device; } else { - pd = idr_read_pd(cmd.pd_handle, file->ucontext); - scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0); - if (!pd || !scq) { - ret = -EINVAL; - goto err_put; - } - if (cmd.qp_type == IB_QPT_XRC_INI) { cmd.max_recv_wr = cmd.max_recv_sge = 0; } else { @@ -1435,13 +1436,24 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, goto err_put; } } - rcq = (cmd.recv_cq_handle == cmd.send_cq_handle) ? - scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1); - if (!rcq) { - ret = -EINVAL; - goto err_put; + + if (cmd.recv_cq_handle != cmd.send_cq_handle) { + rcq = idr_read_cq(cmd.recv_cq_handle, file->ucontext, 0); + if (!rcq) { + ret = -EINVAL; + goto err_put; + } } } + + scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, !!rcq); + rcq = rcq ?: scq; + pd = idr_read_pd(cmd.pd_handle, file->ucontext); + if (!pd || !scq) { + ret = -EINVAL; + goto err_put; + } + device = pd->device; } @@ -1585,7 +1597,7 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, if (!obj) return -ENOMEM; - init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_key); + init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class); down_write(&obj->uevent.uobject.mutex); xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj); @@ -2272,7 +2284,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, if (!uobj) return -ENOMEM; - init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_key); + init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_class); down_write(&uobj->mutex); pd = idr_read_pd(cmd.pd_handle, file->ucontext); @@ -2476,30 +2488,30 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, if (!obj) return -ENOMEM; - init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_key); + init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_class); down_write(&obj->uevent.uobject.mutex); - pd = idr_read_pd(cmd->pd_handle, file->ucontext); - if (!pd) { - ret = -EINVAL; - goto err; - } - if (cmd->srq_type == IB_SRQT_XRC) { - attr.ext.xrc.cq = idr_read_cq(cmd->cq_handle, file->ucontext, 0); - if (!attr.ext.xrc.cq) { - ret = -EINVAL; - goto err_put_pd; - } - attr.ext.xrc.xrcd = idr_read_xrcd(cmd->xrcd_handle, file->ucontext, &xrcd_uobj); if (!attr.ext.xrc.xrcd) { ret = -EINVAL; - goto err_put_cq; + goto err; } obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); atomic_inc(&obj->uxrcd->refcnt); + + attr.ext.xrc.cq = idr_read_cq(cmd->cq_handle, file->ucontext, 0); + if (!attr.ext.xrc.cq) { + ret = -EINVAL; + goto err_put_xrcd; + } + } + + pd = idr_read_pd(cmd->pd_handle, file->ucontext); + if (!pd) { + ret = -EINVAL; + goto err_put_cq; } attr.event_handler = ib_uverbs_srq_event_handler; @@ -2576,17 +2588,17 @@ err_destroy: ib_destroy_srq(srq); err_put: - if (cmd->srq_type == IB_SRQT_XRC) { - atomic_dec(&obj->uxrcd->refcnt); - put_uobj_read(xrcd_uobj); - } + put_pd_read(pd); err_put_cq: if (cmd->srq_type == IB_SRQT_XRC) put_cq_read(attr.ext.xrc.cq); -err_put_pd: - put_pd_read(pd); +err_put_xrcd: + if (cmd->srq_type == IB_SRQT_XRC) { + atomic_dec(&obj->uxrcd->refcnt); + put_uobj_read(xrcd_uobj); + } err: put_uobj_write(&obj->uevent.uobject); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 575b78045aaf..30f199e8579f 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -479,6 +479,7 @@ static const struct { [IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY), + [IB_QPT_RAW_PACKET] = IB_QP_PORT, [IB_QPT_UC] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), @@ -1183,23 +1184,33 @@ EXPORT_SYMBOL(ib_dealloc_fmr); int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) { + int ret; + if (!qp->device->attach_mcast) return -ENOSYS; if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) return -EINVAL; - return qp->device->attach_mcast(qp, gid, lid); + ret = qp->device->attach_mcast(qp, gid, lid); + if (!ret) + atomic_inc(&qp->usecnt); + return ret; } EXPORT_SYMBOL(ib_attach_mcast); int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) { + int ret; + if (!qp->device->detach_mcast) return -ENOSYS; if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) return -EINVAL; - return qp->device->detach_mcast(qp, gid, lid); + ret = qp->device->detach_mcast(qp, gid, lid); + if (!ret) + atomic_dec(&qp->usecnt); + return ret; } EXPORT_SYMBOL(ib_detach_mcast); diff --git a/drivers/infiniband/hw/cxgb4/Makefile b/drivers/infiniband/hw/cxgb4/Makefile index 46b878ca2c3b..e11cf7299945 100644 --- a/drivers/infiniband/hw/cxgb4/Makefile +++ b/drivers/infiniband/hw/cxgb4/Makefile @@ -2,4 +2,4 @@ ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4 obj-$(CONFIG_INFINIBAND_CXGB4) += iw_cxgb4.o -iw_cxgb4-y := device.o cm.o provider.o mem.o cq.o qp.o resource.o ev.o +iw_cxgb4-y := device.o cm.o provider.o mem.o cq.o qp.o resource.o ev.o id_table.o diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 92b4c2b0308b..55ab284e22f2 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1362,7 +1362,10 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) ep = lookup_tid(t, tid); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - BUG_ON(!ep); + if (!ep) { + printk(KERN_WARNING MOD "Abort rpl to freed endpoint\n"); + return 0; + } mutex_lock(&ep->com.mutex); switch (ep->com.state) { case ABORTING: @@ -1410,6 +1413,24 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } + /* + * Log interesting failures. + */ + switch (status) { + case CPL_ERR_CONN_RESET: + case CPL_ERR_CONN_TIMEDOUT: + break; + default: + printk(KERN_INFO MOD "Active open failure - " + "atid %u status %u errno %d %pI4:%u->%pI4:%u\n", + atid, status, status2errno(status), + &ep->com.local_addr.sin_addr.s_addr, + ntohs(ep->com.local_addr.sin_port), + &ep->com.remote_addr.sin_addr.s_addr, + ntohs(ep->com.remote_addr.sin_port)); + break; + } + connect_reply_upcall(ep, status2errno(status)); state_set(&ep->com, DEAD); @@ -1593,7 +1614,7 @@ static int import_ep(struct c4iw_ep *ep, __be32 peer_ip, struct dst_entry *dst, n, n->dev, 0); if (!ep->l2t) goto out; - ep->mtu = dst_mtu(ep->dst); + ep->mtu = dst_mtu(dst); ep->tx_chan = cxgb4_port_chan(n->dev); ep->smac_idx = (cxgb4_port_viid(n->dev) & 0x7F) << 1; step = cdev->rdev.lldi.ntxq / @@ -2656,6 +2677,12 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) unsigned int tid = GET_TID(req); ep = lookup_tid(t, tid); + if (!ep) { + printk(KERN_WARNING MOD + "Abort on non-existent endpoint, tid %d\n", tid); + kfree_skb(skb); + return 0; + } if (is_neg_adv_abort(req->status)) { PDBG("%s neg_adv_abort ep %p tid %u\n", __func__, ep, ep->hwtid); @@ -2667,11 +2694,8 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) /* * Wake up any threads in rdma_init() or rdma_fini(). - * However, this is not needed if com state is just - * MPA_REQ_SENT */ - if (ep->com.state != MPA_REQ_SENT) - c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); + c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); sched(dev, skb); return 0; } diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 6d0df6ec161b..cb4ecd783700 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -32,6 +32,7 @@ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/debugfs.h> +#include <linux/vmalloc.h> #include <rdma/ib_verbs.h> @@ -44,6 +45,12 @@ MODULE_DESCRIPTION("Chelsio T4 RDMA Driver"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRV_VERSION); +struct uld_ctx { + struct list_head entry; + struct cxgb4_lld_info lldi; + struct c4iw_dev *dev; +}; + static LIST_HEAD(uld_ctx_list); static DEFINE_MUTEX(dev_mutex); @@ -115,7 +122,7 @@ static int qp_release(struct inode *inode, struct file *file) printk(KERN_INFO "%s null qpd?\n", __func__); return 0; } - kfree(qpd->buf); + vfree(qpd->buf); kfree(qpd); return 0; } @@ -139,7 +146,7 @@ static int qp_open(struct inode *inode, struct file *file) spin_unlock_irq(&qpd->devp->lock); qpd->bufsize = count * 128; - qpd->buf = kmalloc(qpd->bufsize, GFP_KERNEL); + qpd->buf = vmalloc(qpd->bufsize); if (!qpd->buf) { ret = -ENOMEM; goto err1; @@ -240,6 +247,81 @@ static const struct file_operations stag_debugfs_fops = { .llseek = default_llseek, }; +static char *db_state_str[] = {"NORMAL", "FLOW_CONTROL", "RECOVERY"}; + +static int stats_show(struct seq_file *seq, void *v) +{ + struct c4iw_dev *dev = seq->private; + + seq_printf(seq, " Object: %10s %10s %10s %10s\n", "Total", "Current", + "Max", "Fail"); + seq_printf(seq, " PDID: %10llu %10llu %10llu %10llu\n", + dev->rdev.stats.pd.total, dev->rdev.stats.pd.cur, + dev->rdev.stats.pd.max, dev->rdev.stats.pd.fail); + seq_printf(seq, " QID: %10llu %10llu %10llu %10llu\n", + dev->rdev.stats.qid.total, dev->rdev.stats.qid.cur, + dev->rdev.stats.qid.max, dev->rdev.stats.qid.fail); + seq_printf(seq, " TPTMEM: %10llu %10llu %10llu %10llu\n", + dev->rdev.stats.stag.total, dev->rdev.stats.stag.cur, + dev->rdev.stats.stag.max, dev->rdev.stats.stag.fail); + seq_printf(seq, " PBLMEM: %10llu %10llu %10llu %10llu\n", + dev->rdev.stats.pbl.total, dev->rdev.stats.pbl.cur, + dev->rdev.stats.pbl.max, dev->rdev.stats.pbl.fail); + seq_printf(seq, " RQTMEM: %10llu %10llu %10llu %10llu\n", + dev->rdev.stats.rqt.total, dev->rdev.stats.rqt.cur, + dev->rdev.stats.rqt.max, dev->rdev.stats.rqt.fail); + seq_printf(seq, " OCQPMEM: %10llu %10llu %10llu %10llu\n", + dev->rdev.stats.ocqp.total, dev->rdev.stats.ocqp.cur, + dev->rdev.stats.ocqp.max, dev->rdev.stats.ocqp.fail); + seq_printf(seq, " DB FULL: %10llu\n", dev->rdev.stats.db_full); + seq_printf(seq, " DB EMPTY: %10llu\n", dev->rdev.stats.db_empty); + seq_printf(seq, " DB DROP: %10llu\n", dev->rdev.stats.db_drop); + seq_printf(seq, " DB State: %s Transitions %llu\n", + db_state_str[dev->db_state], + dev->rdev.stats.db_state_transitions); + return 0; +} + +static int stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, stats_show, inode->i_private); +} + +static ssize_t stats_clear(struct file *file, const char __user *buf, + size_t count, loff_t *pos) +{ + struct c4iw_dev *dev = ((struct seq_file *)file->private_data)->private; + + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.pd.max = 0; + dev->rdev.stats.pd.fail = 0; + dev->rdev.stats.qid.max = 0; + dev->rdev.stats.qid.fail = 0; + dev->rdev.stats.stag.max = 0; + dev->rdev.stats.stag.fail = 0; + dev->rdev.stats.pbl.max = 0; + dev->rdev.stats.pbl.fail = 0; + dev->rdev.stats.rqt.max = 0; + dev->rdev.stats.rqt.fail = 0; + dev->rdev.stats.ocqp.max = 0; + dev->rdev.stats.ocqp.fail = 0; + dev->rdev.stats.db_full = 0; + dev->rdev.stats.db_empty = 0; + dev->rdev.stats.db_drop = 0; + dev->rdev.stats.db_state_transitions = 0; + mutex_unlock(&dev->rdev.stats.lock); + return count; +} + +static const struct file_operations stats_debugfs_fops = { + .owner = THIS_MODULE, + .open = stats_open, + .release = single_release, + .read = seq_read, + .llseek = seq_lseek, + .write = stats_clear, +}; + static int setup_debugfs(struct c4iw_dev *devp) { struct dentry *de; @@ -256,6 +338,12 @@ static int setup_debugfs(struct c4iw_dev *devp) (void *)devp, &stag_debugfs_fops); if (de && de->d_inode) de->d_inode->i_size = 4096; + + de = debugfs_create_file("stats", S_IWUSR, devp->debugfs_root, + (void *)devp, &stats_debugfs_fops); + if (de && de->d_inode) + de->d_inode->i_size = 4096; + return 0; } @@ -269,9 +357,13 @@ void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev, list_for_each_safe(pos, nxt, &uctx->qpids) { entry = list_entry(pos, struct c4iw_qid_list, entry); list_del_init(&entry->entry); - if (!(entry->qid & rdev->qpmask)) - c4iw_put_resource(&rdev->resource.qid_fifo, entry->qid, - &rdev->resource.qid_fifo_lock); + if (!(entry->qid & rdev->qpmask)) { + c4iw_put_resource(&rdev->resource.qid_table, + entry->qid); + mutex_lock(&rdev->stats.lock); + rdev->stats.qid.cur -= rdev->qpmask + 1; + mutex_unlock(&rdev->stats.lock); + } kfree(entry); } @@ -332,6 +424,13 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) goto err1; } + rdev->stats.pd.total = T4_MAX_NUM_PD; + rdev->stats.stag.total = rdev->lldi.vr->stag.size; + rdev->stats.pbl.total = rdev->lldi.vr->pbl.size; + rdev->stats.rqt.total = rdev->lldi.vr->rq.size; + rdev->stats.ocqp.total = rdev->lldi.vr->ocq.size; + rdev->stats.qid.total = rdev->lldi.vr->qp.size; + err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD); if (err) { printk(KERN_ERR MOD "error %d initializing resources\n", err); @@ -370,12 +469,6 @@ static void c4iw_rdev_close(struct c4iw_rdev *rdev) c4iw_destroy_resource(&rdev->resource); } -struct uld_ctx { - struct list_head entry; - struct cxgb4_lld_info lldi; - struct c4iw_dev *dev; -}; - static void c4iw_dealloc(struct uld_ctx *ctx) { c4iw_rdev_close(&ctx->dev->rdev); @@ -440,6 +533,8 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) idr_init(&devp->qpidr); idr_init(&devp->mmidr); spin_lock_init(&devp->lock); + mutex_init(&devp->rdev.stats.lock); + mutex_init(&devp->db_mutex); if (c4iw_debugfs_root) { devp->debugfs_root = debugfs_create_dir( @@ -585,11 +680,234 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state) return 0; } +static int disable_qp_db(int id, void *p, void *data) +{ + struct c4iw_qp *qp = p; + + t4_disable_wq_db(&qp->wq); + return 0; +} + +static void stop_queues(struct uld_ctx *ctx) +{ + spin_lock_irq(&ctx->dev->lock); + if (ctx->dev->db_state == NORMAL) { + ctx->dev->rdev.stats.db_state_transitions++; + ctx->dev->db_state = FLOW_CONTROL; + idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL); + } + spin_unlock_irq(&ctx->dev->lock); +} + +static int enable_qp_db(int id, void *p, void *data) +{ + struct c4iw_qp *qp = p; + + t4_enable_wq_db(&qp->wq); + return 0; +} + +static void resume_queues(struct uld_ctx *ctx) +{ + spin_lock_irq(&ctx->dev->lock); + if (ctx->dev->qpcnt <= db_fc_threshold && + ctx->dev->db_state == FLOW_CONTROL) { + ctx->dev->db_state = NORMAL; + ctx->dev->rdev.stats.db_state_transitions++; + idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL); + } + spin_unlock_irq(&ctx->dev->lock); +} + +struct qp_list { + unsigned idx; + struct c4iw_qp **qps; +}; + +static int add_and_ref_qp(int id, void *p, void *data) +{ + struct qp_list *qp_listp = data; + struct c4iw_qp *qp = p; + + c4iw_qp_add_ref(&qp->ibqp); + qp_listp->qps[qp_listp->idx++] = qp; + return 0; +} + +static int count_qps(int id, void *p, void *data) +{ + unsigned *countp = data; + (*countp)++; + return 0; +} + +static void deref_qps(struct qp_list qp_list) +{ + int idx; + + for (idx = 0; idx < qp_list.idx; idx++) + c4iw_qp_rem_ref(&qp_list.qps[idx]->ibqp); +} + +static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list) +{ + int idx; + int ret; + + for (idx = 0; idx < qp_list->idx; idx++) { + struct c4iw_qp *qp = qp_list->qps[idx]; + + ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0], + qp->wq.sq.qid, + t4_sq_host_wq_pidx(&qp->wq), + t4_sq_wq_size(&qp->wq)); + if (ret) { + printk(KERN_ERR MOD "%s: Fatal error - " + "DB overflow recovery failed - " + "error syncing SQ qid %u\n", + pci_name(ctx->lldi.pdev), qp->wq.sq.qid); + return; + } + + ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0], + qp->wq.rq.qid, + t4_rq_host_wq_pidx(&qp->wq), + t4_rq_wq_size(&qp->wq)); + + if (ret) { + printk(KERN_ERR MOD "%s: Fatal error - " + "DB overflow recovery failed - " + "error syncing RQ qid %u\n", + pci_name(ctx->lldi.pdev), qp->wq.rq.qid); + return; + } + + /* Wait for the dbfifo to drain */ + while (cxgb4_dbfifo_count(qp->rhp->rdev.lldi.ports[0], 1) > 0) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(usecs_to_jiffies(10)); + } + } +} + +static void recover_queues(struct uld_ctx *ctx) +{ + int count = 0; + struct qp_list qp_list; + int ret; + + /* lock out kernel db ringers */ + mutex_lock(&ctx->dev->db_mutex); + + /* put all queues in to recovery mode */ + spin_lock_irq(&ctx->dev->lock); + ctx->dev->db_state = RECOVERY; + ctx->dev->rdev.stats.db_state_transitions++; + idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL); + spin_unlock_irq(&ctx->dev->lock); + + /* slow everybody down */ + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(usecs_to_jiffies(1000)); + + /* Wait for the dbfifo to completely drain. */ + while (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) > 0) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(usecs_to_jiffies(10)); + } + + /* flush the SGE contexts */ + ret = cxgb4_flush_eq_cache(ctx->dev->rdev.lldi.ports[0]); + if (ret) { + printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n", + pci_name(ctx->lldi.pdev)); + goto out; + } + + /* Count active queues so we can build a list of queues to recover */ + spin_lock_irq(&ctx->dev->lock); + idr_for_each(&ctx->dev->qpidr, count_qps, &count); + + qp_list.qps = kzalloc(count * sizeof *qp_list.qps, GFP_ATOMIC); + if (!qp_list.qps) { + printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n", + pci_name(ctx->lldi.pdev)); + spin_unlock_irq(&ctx->dev->lock); + goto out; + } + qp_list.idx = 0; + + /* add and ref each qp so it doesn't get freed */ + idr_for_each(&ctx->dev->qpidr, add_and_ref_qp, &qp_list); + + spin_unlock_irq(&ctx->dev->lock); + + /* now traverse the list in a safe context to recover the db state*/ + recover_lost_dbs(ctx, &qp_list); + + /* we're almost done! deref the qps and clean up */ + deref_qps(qp_list); + kfree(qp_list.qps); + + /* Wait for the dbfifo to completely drain again */ + while (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) > 0) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(usecs_to_jiffies(10)); + } + + /* resume the queues */ + spin_lock_irq(&ctx->dev->lock); + if (ctx->dev->qpcnt > db_fc_threshold) + ctx->dev->db_state = FLOW_CONTROL; + else { + ctx->dev->db_state = NORMAL; + idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL); + } + ctx->dev->rdev.stats.db_state_transitions++; + spin_unlock_irq(&ctx->dev->lock); + +out: + /* start up kernel db ringers again */ + mutex_unlock(&ctx->dev->db_mutex); +} + +static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...) +{ + struct uld_ctx *ctx = handle; + + switch (control) { + case CXGB4_CONTROL_DB_FULL: + stop_queues(ctx); + mutex_lock(&ctx->dev->rdev.stats.lock); + ctx->dev->rdev.stats.db_full++; + mutex_unlock(&ctx->dev->rdev.stats.lock); + break; + case CXGB4_CONTROL_DB_EMPTY: + resume_queues(ctx); + mutex_lock(&ctx->dev->rdev.stats.lock); + ctx->dev->rdev.stats.db_empty++; + mutex_unlock(&ctx->dev->rdev.stats.lock); + break; + case CXGB4_CONTROL_DB_DROP: + recover_queues(ctx); + mutex_lock(&ctx->dev->rdev.stats.lock); + ctx->dev->rdev.stats.db_drop++; + mutex_unlock(&ctx->dev->rdev.stats.lock); + break; + default: + printk(KERN_WARNING MOD "%s: unknown control cmd %u\n", + pci_name(ctx->lldi.pdev), control); + break; + } + return 0; +} + static struct cxgb4_uld_info c4iw_uld_info = { .name = DRV_NAME, .add = c4iw_uld_add, .rx_handler = c4iw_uld_rx_handler, .state_change = c4iw_uld_state_change, + .control = c4iw_uld_control, }; static int __init c4iw_init_module(void) diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c index 397cb36cf103..cf2f6b47617a 100644 --- a/drivers/infiniband/hw/cxgb4/ev.c +++ b/drivers/infiniband/hw/cxgb4/ev.c @@ -84,7 +84,7 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe) struct c4iw_qp *qhp; u32 cqid; - spin_lock(&dev->lock); + spin_lock_irq(&dev->lock); qhp = get_qhp(dev, CQE_QPID(err_cqe)); if (!qhp) { printk(KERN_ERR MOD "BAD AE qpid 0x%x opcode %d " @@ -93,7 +93,7 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe) CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe), CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe), CQE_WRID_LOW(err_cqe)); - spin_unlock(&dev->lock); + spin_unlock_irq(&dev->lock); goto out; } @@ -109,13 +109,13 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe) CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe), CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe), CQE_WRID_LOW(err_cqe)); - spin_unlock(&dev->lock); + spin_unlock_irq(&dev->lock); goto out; } c4iw_qp_add_ref(&qhp->ibqp); atomic_inc(&chp->refcnt); - spin_unlock(&dev->lock); + spin_unlock_irq(&dev->lock); /* Bad incoming write */ if (RQ_TYPE(err_cqe) && diff --git a/drivers/infiniband/hw/cxgb4/id_table.c b/drivers/infiniband/hw/cxgb4/id_table.c new file mode 100644 index 000000000000..f95e5df30db2 --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/id_table.c @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2011 Chelsio Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/kernel.h> +#include <linux/random.h> +#include "iw_cxgb4.h" + +#define RANDOM_SKIP 16 + +/* + * Trivial bitmap-based allocator. If the random flag is set, the + * allocator is designed to: + * - pseudo-randomize the id returned such that it is not trivially predictable. + * - avoid reuse of recently used id (at the expense of predictability) + */ +u32 c4iw_id_alloc(struct c4iw_id_table *alloc) +{ + unsigned long flags; + u32 obj; + + spin_lock_irqsave(&alloc->lock, flags); + + obj = find_next_zero_bit(alloc->table, alloc->max, alloc->last); + if (obj >= alloc->max) + obj = find_first_zero_bit(alloc->table, alloc->max); + + if (obj < alloc->max) { + if (alloc->flags & C4IW_ID_TABLE_F_RANDOM) + alloc->last += random32() % RANDOM_SKIP; + else + alloc->last = obj + 1; + if (alloc->last >= alloc->max) + alloc->last = 0; + set_bit(obj, alloc->table); + obj += alloc->start; + } else + obj = -1; + + spin_unlock_irqrestore(&alloc->lock, flags); + return obj; +} + +void c4iw_id_free(struct c4iw_id_table *alloc, u32 obj) +{ + unsigned long flags; + + obj -= alloc->start; + BUG_ON((int)obj < 0); + + spin_lock_irqsave(&alloc->lock, flags); + clear_bit(obj, alloc->table); + spin_unlock_irqrestore(&alloc->lock, flags); +} + +int c4iw_id_table_alloc(struct c4iw_id_table *alloc, u32 start, u32 num, + u32 reserved, u32 flags) +{ + int i; + + alloc->start = start; + alloc->flags = flags; + if (flags & C4IW_ID_TABLE_F_RANDOM) + alloc->last = random32() % RANDOM_SKIP; + else + alloc->last = 0; + alloc->max = num; + spin_lock_init(&alloc->lock); + alloc->table = kmalloc(BITS_TO_LONGS(num) * sizeof(long), + GFP_KERNEL); + if (!alloc->table) + return -ENOMEM; + + bitmap_zero(alloc->table, num); + if (!(alloc->flags & C4IW_ID_TABLE_F_EMPTY)) + for (i = 0; i < reserved; ++i) + set_bit(i, alloc->table); + + return 0; +} + +void c4iw_id_table_free(struct c4iw_id_table *alloc) +{ + kfree(alloc->table); +} diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 1357c5bf209b..9beb3a9f0336 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -45,7 +45,6 @@ #include <linux/kref.h> #include <linux/timer.h> #include <linux/io.h> -#include <linux/kfifo.h> #include <asm/byteorder.h> @@ -79,13 +78,22 @@ static inline void *cplhdr(struct sk_buff *skb) return skb->data; } +#define C4IW_ID_TABLE_F_RANDOM 1 /* Pseudo-randomize the id's returned */ +#define C4IW_ID_TABLE_F_EMPTY 2 /* Table is initially empty */ + +struct c4iw_id_table { + u32 flags; + u32 start; /* logical minimal id */ + u32 last; /* hint for find */ + u32 max; + spinlock_t lock; + unsigned long *table; +}; + struct c4iw_resource { - struct kfifo tpt_fifo; - spinlock_t tpt_fifo_lock; - struct kfifo qid_fifo; - spinlock_t qid_fifo_lock; - struct kfifo pdid_fifo; - spinlock_t pdid_fifo_lock; + struct c4iw_id_table tpt_table; + struct c4iw_id_table qid_table; + struct c4iw_id_table pdid_table; }; struct c4iw_qid_list { @@ -103,6 +111,27 @@ enum c4iw_rdev_flags { T4_FATAL_ERROR = (1<<0), }; +struct c4iw_stat { + u64 total; + u64 cur; + u64 max; + u64 fail; +}; + +struct c4iw_stats { + struct mutex lock; + struct c4iw_stat qid; + struct c4iw_stat pd; + struct c4iw_stat stag; + struct c4iw_stat pbl; + struct c4iw_stat rqt; + struct c4iw_stat ocqp; + u64 db_full; + u64 db_empty; + u64 db_drop; + u64 db_state_transitions; +}; + struct c4iw_rdev { struct c4iw_resource resource; unsigned long qpshift; @@ -117,6 +146,7 @@ struct c4iw_rdev { struct cxgb4_lld_info lldi; unsigned long oc_mw_pa; void __iomem *oc_mw_kva; + struct c4iw_stats stats; }; static inline int c4iw_fatal_error(struct c4iw_rdev *rdev) @@ -175,6 +205,12 @@ static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev, return wr_waitp->ret; } +enum db_state { + NORMAL = 0, + FLOW_CONTROL = 1, + RECOVERY = 2 +}; + struct c4iw_dev { struct ib_device ibdev; struct c4iw_rdev rdev; @@ -183,7 +219,10 @@ struct c4iw_dev { struct idr qpidr; struct idr mmidr; spinlock_t lock; + struct mutex db_mutex; struct dentry *debugfs_root; + enum db_state db_state; + int qpcnt; }; static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev) @@ -211,29 +250,57 @@ static inline struct c4iw_mr *get_mhp(struct c4iw_dev *rhp, u32 mmid) return idr_find(&rhp->mmidr, mmid); } -static inline int insert_handle(struct c4iw_dev *rhp, struct idr *idr, - void *handle, u32 id) +static inline int _insert_handle(struct c4iw_dev *rhp, struct idr *idr, + void *handle, u32 id, int lock) { int ret; int newid; do { - if (!idr_pre_get(idr, GFP_KERNEL)) + if (!idr_pre_get(idr, lock ? GFP_KERNEL : GFP_ATOMIC)) return -ENOMEM; - spin_lock_irq(&rhp->lock); + if (lock) + spin_lock_irq(&rhp->lock); ret = idr_get_new_above(idr, handle, id, &newid); - BUG_ON(newid != id); - spin_unlock_irq(&rhp->lock); + BUG_ON(!ret && newid != id); + if (lock) + spin_unlock_irq(&rhp->lock); } while (ret == -EAGAIN); return ret; } -static inline void remove_handle(struct c4iw_dev *rhp, struct idr *idr, u32 id) +static inline int insert_handle(struct c4iw_dev *rhp, struct idr *idr, + void *handle, u32 id) +{ + return _insert_handle(rhp, idr, handle, id, 1); +} + +static inline int insert_handle_nolock(struct c4iw_dev *rhp, struct idr *idr, + void *handle, u32 id) +{ + return _insert_handle(rhp, idr, handle, id, 0); +} + +static inline void _remove_handle(struct c4iw_dev *rhp, struct idr *idr, + u32 id, int lock) { - spin_lock_irq(&rhp->lock); + if (lock) + spin_lock_irq(&rhp->lock); idr_remove(idr, id); - spin_unlock_irq(&rhp->lock); + if (lock) + spin_unlock_irq(&rhp->lock); +} + +static inline void remove_handle(struct c4iw_dev *rhp, struct idr *idr, u32 id) +{ + _remove_handle(rhp, idr, id, 1); +} + +static inline void remove_handle_nolock(struct c4iw_dev *rhp, + struct idr *idr, u32 id) +{ + _remove_handle(rhp, idr, id, 0); } struct c4iw_pd { @@ -353,6 +420,8 @@ struct c4iw_qp_attributes { struct c4iw_ep *llp_stream_handle; u8 layer_etype; u8 ecode; + u16 sq_db_inc; + u16 rq_db_inc; }; struct c4iw_qp { @@ -427,6 +496,8 @@ static inline void insert_mmap(struct c4iw_ucontext *ucontext, enum c4iw_qp_attr_mask { C4IW_QP_ATTR_NEXT_STATE = 1 << 0, + C4IW_QP_ATTR_SQ_DB = 1<<1, + C4IW_QP_ATTR_RQ_DB = 1<<2, C4IW_QP_ATTR_ENABLE_RDMA_READ = 1 << 7, C4IW_QP_ATTR_ENABLE_RDMA_WRITE = 1 << 8, C4IW_QP_ATTR_ENABLE_RDMA_BIND = 1 << 9, @@ -480,6 +551,23 @@ static inline int c4iw_convert_state(enum ib_qp_state ib_state) } } +static inline int to_ib_qp_state(int c4iw_qp_state) +{ + switch (c4iw_qp_state) { + case C4IW_QP_STATE_IDLE: + return IB_QPS_INIT; + case C4IW_QP_STATE_RTS: + return IB_QPS_RTS; + case C4IW_QP_STATE_CLOSING: + return IB_QPS_SQD; + case C4IW_QP_STATE_TERMINATE: + return IB_QPS_SQE; + case C4IW_QP_STATE_ERROR: + return IB_QPS_ERR; + } + return IB_QPS_ERR; +} + static inline u32 c4iw_ib_to_tpt_access(int a) { return (a & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) | @@ -693,14 +781,20 @@ static inline int compute_wscale(int win) return wscale; } +u32 c4iw_id_alloc(struct c4iw_id_table *alloc); +void c4iw_id_free(struct c4iw_id_table *alloc, u32 obj); +int c4iw_id_table_alloc(struct c4iw_id_table *alloc, u32 start, u32 num, + u32 reserved, u32 flags); +void c4iw_id_table_free(struct c4iw_id_table *alloc); + typedef int (*c4iw_handler_func)(struct c4iw_dev *dev, struct sk_buff *skb); int c4iw_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, struct l2t_entry *l2t); void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qpid, struct c4iw_dev_ucontext *uctx); -u32 c4iw_get_resource(struct kfifo *fifo, spinlock_t *lock); -void c4iw_put_resource(struct kfifo *fifo, u32 entry, spinlock_t *lock); +u32 c4iw_get_resource(struct c4iw_id_table *id_table); +void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry); int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid); int c4iw_init_ctrl_qp(struct c4iw_rdev *rdev); int c4iw_pblpool_create(struct c4iw_rdev *rdev); @@ -769,6 +863,8 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_udata *udata); int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); +int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_qp_init_attr *init_attr); struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn); u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size); void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size); @@ -797,5 +893,7 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe); extern struct cxgb4_client t4c_client; extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS]; extern int c4iw_max_read_depth; +extern int db_fc_threshold; + #endif diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 40c835309e49..57e07c61ace2 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -131,10 +131,14 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, stag_idx = (*stag) >> 8; if ((!reset_tpt_entry) && (*stag == T4_STAG_UNSET)) { - stag_idx = c4iw_get_resource(&rdev->resource.tpt_fifo, - &rdev->resource.tpt_fifo_lock); + stag_idx = c4iw_get_resource(&rdev->resource.tpt_table); if (!stag_idx) return -ENOMEM; + mutex_lock(&rdev->stats.lock); + rdev->stats.stag.cur += 32; + if (rdev->stats.stag.cur > rdev->stats.stag.max) + rdev->stats.stag.max = rdev->stats.stag.cur; + mutex_unlock(&rdev->stats.lock); *stag = (stag_idx << 8) | (atomic_inc_return(&key) & 0xff); } PDBG("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n", @@ -165,9 +169,12 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, (rdev->lldi.vr->stag.start >> 5), sizeof(tpt), &tpt); - if (reset_tpt_entry) - c4iw_put_resource(&rdev->resource.tpt_fifo, stag_idx, - &rdev->resource.tpt_fifo_lock); + if (reset_tpt_entry) { + c4iw_put_resource(&rdev->resource.tpt_table, stag_idx); + mutex_lock(&rdev->stats.lock); + rdev->stats.stag.cur -= 32; + mutex_unlock(&rdev->stats.lock); + } return err; } @@ -686,8 +693,8 @@ int c4iw_dealloc_mw(struct ib_mw *mw) mhp = to_c4iw_mw(mw); rhp = mhp->rhp; mmid = (mw->rkey) >> 8; - deallocate_window(&rhp->rdev, mhp->attr.stag); remove_handle(rhp, &rhp->mmidr, mmid); + deallocate_window(&rhp->rdev, mhp->attr.stag); kfree(mhp); PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp); return 0; @@ -789,12 +796,12 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr) mhp = to_c4iw_mr(ib_mr); rhp = mhp->rhp; mmid = mhp->attr.stag >> 8; + remove_handle(rhp, &rhp->mmidr, mmid); dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, mhp->attr.pbl_addr); if (mhp->attr.pbl_size) c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, mhp->attr.pbl_size << 3); - remove_handle(rhp, &rhp->mmidr, mmid); if (mhp->kva) kfree((void *) (unsigned long) mhp->kva); if (mhp->umem) diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index be1c18f44400..e084fdc6da7f 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -188,8 +188,10 @@ static int c4iw_deallocate_pd(struct ib_pd *pd) php = to_c4iw_pd(pd); rhp = php->rhp; PDBG("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid); - c4iw_put_resource(&rhp->rdev.resource.pdid_fifo, php->pdid, - &rhp->rdev.resource.pdid_fifo_lock); + c4iw_put_resource(&rhp->rdev.resource.pdid_table, php->pdid); + mutex_lock(&rhp->rdev.stats.lock); + rhp->rdev.stats.pd.cur--; + mutex_unlock(&rhp->rdev.stats.lock); kfree(php); return 0; } @@ -204,14 +206,12 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev, PDBG("%s ibdev %p\n", __func__, ibdev); rhp = (struct c4iw_dev *) ibdev; - pdid = c4iw_get_resource(&rhp->rdev.resource.pdid_fifo, - &rhp->rdev.resource.pdid_fifo_lock); + pdid = c4iw_get_resource(&rhp->rdev.resource.pdid_table); if (!pdid) return ERR_PTR(-EINVAL); php = kzalloc(sizeof(*php), GFP_KERNEL); if (!php) { - c4iw_put_resource(&rhp->rdev.resource.pdid_fifo, pdid, - &rhp->rdev.resource.pdid_fifo_lock); + c4iw_put_resource(&rhp->rdev.resource.pdid_table, pdid); return ERR_PTR(-ENOMEM); } php->pdid = pdid; @@ -222,6 +222,11 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev, return ERR_PTR(-EFAULT); } } + mutex_lock(&rhp->rdev.stats.lock); + rhp->rdev.stats.pd.cur++; + if (rhp->rdev.stats.pd.cur > rhp->rdev.stats.pd.max) + rhp->rdev.stats.pd.max = rhp->rdev.stats.pd.cur; + mutex_unlock(&rhp->rdev.stats.lock); PDBG("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php); return &php->ibpd; } @@ -438,6 +443,7 @@ int c4iw_register_device(struct c4iw_dev *dev) (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | (1ull << IB_USER_VERBS_CMD_CREATE_QP) | (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | (1ull << IB_USER_VERBS_CMD_POLL_CQ) | (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | (1ull << IB_USER_VERBS_CMD_POST_SEND) | @@ -460,6 +466,7 @@ int c4iw_register_device(struct c4iw_dev *dev) dev->ibdev.destroy_ah = c4iw_ah_destroy; dev->ibdev.create_qp = c4iw_create_qp; dev->ibdev.modify_qp = c4iw_ib_modify_qp; + dev->ibdev.query_qp = c4iw_ib_query_qp; dev->ibdev.destroy_qp = c4iw_destroy_qp; dev->ibdev.create_cq = c4iw_create_cq; dev->ibdev.destroy_cq = c4iw_destroy_cq; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 5f940aeaab1e..45aedf1d9338 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -34,10 +34,19 @@ #include "iw_cxgb4.h" +static int db_delay_usecs = 1; +module_param(db_delay_usecs, int, 0644); +MODULE_PARM_DESC(db_delay_usecs, "Usecs to delay awaiting db fifo to drain"); + static int ocqp_support = 1; module_param(ocqp_support, int, 0644); MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=1)"); +int db_fc_threshold = 2000; +module_param(db_fc_threshold, int, 0644); +MODULE_PARM_DESC(db_fc_threshold, "QP count/threshold that triggers automatic " + "db flow control mode (default = 2000)"); + static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state) { unsigned long flag; @@ -1128,6 +1137,35 @@ out: return ret; } +/* + * Called by the library when the qp has user dbs disabled due to + * a DB_FULL condition. This function will single-thread all user + * DB rings to avoid overflowing the hw db-fifo. + */ +static int ring_kernel_db(struct c4iw_qp *qhp, u32 qid, u16 inc) +{ + int delay = db_delay_usecs; + + mutex_lock(&qhp->rhp->db_mutex); + do { + + /* + * The interrupt threshold is dbfifo_int_thresh << 6. So + * make sure we don't cross that and generate an interrupt. + */ + if (cxgb4_dbfifo_count(qhp->rhp->rdev.lldi.ports[0], 1) < + (qhp->rhp->rdev.lldi.dbfifo_int_thresh << 5)) { + writel(V_QID(qid) | V_PIDX(inc), qhp->wq.db); + break; + } + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(usecs_to_jiffies(delay)); + delay = min(delay << 1, 2000); + } while (1); + mutex_unlock(&qhp->rhp->db_mutex); + return 0; +} + int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, enum c4iw_qp_attr_mask mask, struct c4iw_qp_attributes *attrs, @@ -1176,6 +1214,15 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, qhp->attr = newattr; } + if (mask & C4IW_QP_ATTR_SQ_DB) { + ret = ring_kernel_db(qhp, qhp->wq.sq.qid, attrs->sq_db_inc); + goto out; + } + if (mask & C4IW_QP_ATTR_RQ_DB) { + ret = ring_kernel_db(qhp, qhp->wq.rq.qid, attrs->rq_db_inc); + goto out; + } + if (!(mask & C4IW_QP_ATTR_NEXT_STATE)) goto out; if (qhp->attr.state == attrs->next_state) @@ -1352,6 +1399,14 @@ out: return ret; } +static int enable_qp_db(int id, void *p, void *data) +{ + struct c4iw_qp *qp = p; + + t4_enable_wq_db(&qp->wq); + return 0; +} + int c4iw_destroy_qp(struct ib_qp *ib_qp) { struct c4iw_dev *rhp; @@ -1369,7 +1424,16 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp) c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); wait_event(qhp->wait, !qhp->ep); - remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); + spin_lock_irq(&rhp->lock); + remove_handle_nolock(rhp, &rhp->qpidr, qhp->wq.sq.qid); + rhp->qpcnt--; + BUG_ON(rhp->qpcnt < 0); + if (rhp->qpcnt <= db_fc_threshold && rhp->db_state == FLOW_CONTROL) { + rhp->rdev.stats.db_state_transitions++; + rhp->db_state = NORMAL; + idr_for_each(&rhp->qpidr, enable_qp_db, NULL); + } + spin_unlock_irq(&rhp->lock); atomic_dec(&qhp->refcnt); wait_event(qhp->wait, !atomic_read(&qhp->refcnt)); @@ -1383,6 +1447,14 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp) return 0; } +static int disable_qp_db(int id, void *p, void *data) +{ + struct c4iw_qp *qp = p; + + t4_disable_wq_db(&qp->wq); + return 0; +} + struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, struct ib_udata *udata) { @@ -1469,7 +1541,16 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, init_waitqueue_head(&qhp->wait); atomic_set(&qhp->refcnt, 1); - ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid); + spin_lock_irq(&rhp->lock); + if (rhp->db_state != NORMAL) + t4_disable_wq_db(&qhp->wq); + if (++rhp->qpcnt > db_fc_threshold && rhp->db_state == NORMAL) { + rhp->rdev.stats.db_state_transitions++; + rhp->db_state = FLOW_CONTROL; + idr_for_each(&rhp->qpidr, disable_qp_db, NULL); + } + ret = insert_handle_nolock(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid); + spin_unlock_irq(&rhp->lock); if (ret) goto err2; @@ -1613,6 +1694,15 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, C4IW_QP_ATTR_ENABLE_RDMA_WRITE | C4IW_QP_ATTR_ENABLE_RDMA_BIND) : 0; + /* + * Use SQ_PSN and RQ_PSN to pass in IDX_INC values for + * ringing the queue db when we're in DB_FULL mode. + */ + attrs.sq_db_inc = attr->sq_psn; + attrs.rq_db_inc = attr->rq_psn; + mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0; + mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0; + return c4iw_modify_qp(rhp, qhp, mask, &attrs, 0); } @@ -1621,3 +1711,14 @@ struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn) PDBG("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn); return (struct ib_qp *)get_qhp(to_c4iw_dev(dev), qpn); } + +int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_qp_init_attr *init_attr) +{ + struct c4iw_qp *qhp = to_c4iw_qp(ibqp); + + memset(attr, 0, sizeof *attr); + memset(init_attr, 0, sizeof *init_attr); + attr->qp_state = to_ib_qp_state(qhp->attr.state); + return 0; +} diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c index 407ff3924150..cdef4d7fb6d8 100644 --- a/drivers/infiniband/hw/cxgb4/resource.c +++ b/drivers/infiniband/hw/cxgb4/resource.c @@ -30,96 +30,25 @@ * SOFTWARE. */ /* Crude resource management */ -#include <linux/kernel.h> -#include <linux/random.h> -#include <linux/slab.h> -#include <linux/kfifo.h> #include <linux/spinlock.h> -#include <linux/errno.h> #include <linux/genalloc.h> #include <linux/ratelimit.h> #include "iw_cxgb4.h" -#define RANDOM_SIZE 16 - -static int __c4iw_init_resource_fifo(struct kfifo *fifo, - spinlock_t *fifo_lock, - u32 nr, u32 skip_low, - u32 skip_high, - int random) -{ - u32 i, j, entry = 0, idx; - u32 random_bytes; - u32 rarray[16]; - spin_lock_init(fifo_lock); - - if (kfifo_alloc(fifo, nr * sizeof(u32), GFP_KERNEL)) - return -ENOMEM; - - for (i = 0; i < skip_low + skip_high; i++) - kfifo_in(fifo, (unsigned char *) &entry, sizeof(u32)); - if (random) { - j = 0; - random_bytes = random32(); - for (i = 0; i < RANDOM_SIZE; i++) - rarray[i] = i + skip_low; - for (i = skip_low + RANDOM_SIZE; i < nr - skip_high; i++) { - if (j >= RANDOM_SIZE) { - j = 0; - random_bytes = random32(); - } - idx = (random_bytes >> (j * 2)) & 0xF; - kfifo_in(fifo, - (unsigned char *) &rarray[idx], - sizeof(u32)); - rarray[idx] = i; - j++; - } - for (i = 0; i < RANDOM_SIZE; i++) - kfifo_in(fifo, - (unsigned char *) &rarray[i], - sizeof(u32)); - } else - for (i = skip_low; i < nr - skip_high; i++) - kfifo_in(fifo, (unsigned char *) &i, sizeof(u32)); - - for (i = 0; i < skip_low + skip_high; i++) - if (kfifo_out_locked(fifo, (unsigned char *) &entry, - sizeof(u32), fifo_lock)) - break; - return 0; -} - -static int c4iw_init_resource_fifo(struct kfifo *fifo, spinlock_t * fifo_lock, - u32 nr, u32 skip_low, u32 skip_high) -{ - return __c4iw_init_resource_fifo(fifo, fifo_lock, nr, skip_low, - skip_high, 0); -} - -static int c4iw_init_resource_fifo_random(struct kfifo *fifo, - spinlock_t *fifo_lock, - u32 nr, u32 skip_low, u32 skip_high) -{ - return __c4iw_init_resource_fifo(fifo, fifo_lock, nr, skip_low, - skip_high, 1); -} - -static int c4iw_init_qid_fifo(struct c4iw_rdev *rdev) +static int c4iw_init_qid_table(struct c4iw_rdev *rdev) { u32 i; - spin_lock_init(&rdev->resource.qid_fifo_lock); - - if (kfifo_alloc(&rdev->resource.qid_fifo, rdev->lldi.vr->qp.size * - sizeof(u32), GFP_KERNEL)) + if (c4iw_id_table_alloc(&rdev->resource.qid_table, + rdev->lldi.vr->qp.start, + rdev->lldi.vr->qp.size, + rdev->lldi.vr->qp.size, 0)) return -ENOMEM; for (i = rdev->lldi.vr->qp.start; - i < rdev->lldi.vr->qp.start + rdev->lldi.vr->qp.size; i++) + i < rdev->lldi.vr->qp.start + rdev->lldi.vr->qp.size; i++) if (!(i & rdev->qpmask)) - kfifo_in(&rdev->resource.qid_fifo, - (unsigned char *) &i, sizeof(u32)); + c4iw_id_free(&rdev->resource.qid_table, i); return 0; } @@ -127,44 +56,42 @@ static int c4iw_init_qid_fifo(struct c4iw_rdev *rdev) int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid) { int err = 0; - err = c4iw_init_resource_fifo_random(&rdev->resource.tpt_fifo, - &rdev->resource.tpt_fifo_lock, - nr_tpt, 1, 0); + err = c4iw_id_table_alloc(&rdev->resource.tpt_table, 0, nr_tpt, 1, + C4IW_ID_TABLE_F_RANDOM); if (err) goto tpt_err; - err = c4iw_init_qid_fifo(rdev); + err = c4iw_init_qid_table(rdev); if (err) goto qid_err; - err = c4iw_init_resource_fifo(&rdev->resource.pdid_fifo, - &rdev->resource.pdid_fifo_lock, - nr_pdid, 1, 0); + err = c4iw_id_table_alloc(&rdev->resource.pdid_table, 0, + nr_pdid, 1, 0); if (err) goto pdid_err; return 0; -pdid_err: - kfifo_free(&rdev->resource.qid_fifo); -qid_err: - kfifo_free(&rdev->resource.tpt_fifo); -tpt_err: + pdid_err: + c4iw_id_table_free(&rdev->resource.qid_table); + qid_err: + c4iw_id_table_free(&rdev->resource.tpt_table); + tpt_err: return -ENOMEM; } /* * returns 0 if no resource available */ -u32 c4iw_get_resource(struct kfifo *fifo, spinlock_t *lock) +u32 c4iw_get_resource(struct c4iw_id_table *id_table) { u32 entry; - if (kfifo_out_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock)) - return entry; - else + entry = c4iw_id_alloc(id_table); + if (entry == (u32)(-1)) return 0; + return entry; } -void c4iw_put_resource(struct kfifo *fifo, u32 entry, spinlock_t *lock) +void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry) { PDBG("%s entry 0x%x\n", __func__, entry); - kfifo_in_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock); + c4iw_id_free(id_table, entry); } u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx) @@ -181,10 +108,12 @@ u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx) qid = entry->qid; kfree(entry); } else { - qid = c4iw_get_resource(&rdev->resource.qid_fifo, - &rdev->resource.qid_fifo_lock); + qid = c4iw_get_resource(&rdev->resource.qid_table); if (!qid) goto out; + mutex_lock(&rdev->stats.lock); + rdev->stats.qid.cur += rdev->qpmask + 1; + mutex_unlock(&rdev->stats.lock); for (i = qid+1; i & rdev->qpmask; i++) { entry = kmalloc(sizeof *entry, GFP_KERNEL); if (!entry) @@ -213,6 +142,10 @@ u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx) out: mutex_unlock(&uctx->lock); PDBG("%s qid 0x%x\n", __func__, qid); + mutex_lock(&rdev->stats.lock); + if (rdev->stats.qid.cur > rdev->stats.qid.max) + rdev->stats.qid.max = rdev->stats.qid.cur; + mutex_unlock(&rdev->stats.lock); return qid; } @@ -245,10 +178,12 @@ u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx) qid = entry->qid; kfree(entry); } else { - qid = c4iw_get_resource(&rdev->resource.qid_fifo, - &rdev->resource.qid_fifo_lock); + qid = c4iw_get_resource(&rdev->resource.qid_table); if (!qid) goto out; + mutex_lock(&rdev->stats.lock); + rdev->stats.qid.cur += rdev->qpmask + 1; + mutex_unlock(&rdev->stats.lock); for (i = qid+1; i & rdev->qpmask; i++) { entry = kmalloc(sizeof *entry, GFP_KERNEL); if (!entry) @@ -277,6 +212,10 @@ u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx) out: mutex_unlock(&uctx->lock); PDBG("%s qid 0x%x\n", __func__, qid); + mutex_lock(&rdev->stats.lock); + if (rdev->stats.qid.cur > rdev->stats.qid.max) + rdev->stats.qid.max = rdev->stats.qid.cur; + mutex_unlock(&rdev->stats.lock); return qid; } @@ -297,9 +236,9 @@ void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qid, void c4iw_destroy_resource(struct c4iw_resource *rscp) { - kfifo_free(&rscp->tpt_fifo); - kfifo_free(&rscp->qid_fifo); - kfifo_free(&rscp->pdid_fifo); + c4iw_id_table_free(&rscp->tpt_table); + c4iw_id_table_free(&rscp->qid_table); + c4iw_id_table_free(&rscp->pdid_table); } /* @@ -312,15 +251,23 @@ u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size) { unsigned long addr = gen_pool_alloc(rdev->pbl_pool, size); PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size); - if (!addr) - printk_ratelimited(KERN_WARNING MOD "%s: Out of PBL memory\n", - pci_name(rdev->lldi.pdev)); + mutex_lock(&rdev->stats.lock); + if (addr) { + rdev->stats.pbl.cur += roundup(size, 1 << MIN_PBL_SHIFT); + if (rdev->stats.pbl.cur > rdev->stats.pbl.max) + rdev->stats.pbl.max = rdev->stats.pbl.cur; + } else + rdev->stats.pbl.fail++; + mutex_unlock(&rdev->stats.lock); return (u32)addr; } void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size) { PDBG("%s addr 0x%x size %d\n", __func__, addr, size); + mutex_lock(&rdev->stats.lock); + rdev->stats.pbl.cur -= roundup(size, 1 << MIN_PBL_SHIFT); + mutex_unlock(&rdev->stats.lock); gen_pool_free(rdev->pbl_pool, (unsigned long)addr, size); } @@ -377,12 +324,23 @@ u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size) if (!addr) printk_ratelimited(KERN_WARNING MOD "%s: Out of RQT memory\n", pci_name(rdev->lldi.pdev)); + mutex_lock(&rdev->stats.lock); + if (addr) { + rdev->stats.rqt.cur += roundup(size << 6, 1 << MIN_RQT_SHIFT); + if (rdev->stats.rqt.cur > rdev->stats.rqt.max) + rdev->stats.rqt.max = rdev->stats.rqt.cur; + } else + rdev->stats.rqt.fail++; + mutex_unlock(&rdev->stats.lock); return (u32)addr; } void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size) { PDBG("%s addr 0x%x size %d\n", __func__, addr, size << 6); + mutex_lock(&rdev->stats.lock); + rdev->stats.rqt.cur -= roundup(size << 6, 1 << MIN_RQT_SHIFT); + mutex_unlock(&rdev->stats.lock); gen_pool_free(rdev->rqt_pool, (unsigned long)addr, size << 6); } @@ -433,12 +391,22 @@ u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size) { unsigned long addr = gen_pool_alloc(rdev->ocqp_pool, size); PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size); + if (addr) { + mutex_lock(&rdev->stats.lock); + rdev->stats.ocqp.cur += roundup(size, 1 << MIN_OCQP_SHIFT); + if (rdev->stats.ocqp.cur > rdev->stats.ocqp.max) + rdev->stats.ocqp.max = rdev->stats.ocqp.cur; + mutex_unlock(&rdev->stats.lock); + } return (u32)addr; } void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size) { PDBG("%s addr 0x%x size %d\n", __func__, addr, size); + mutex_lock(&rdev->stats.lock); + rdev->stats.ocqp.cur -= roundup(size, 1 << MIN_OCQP_SHIFT); + mutex_unlock(&rdev->stats.lock); gen_pool_free(rdev->ocqp_pool, (unsigned long)addr, size); } diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index c0221eec8817..16f26ab29302 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -62,6 +62,10 @@ struct t4_status_page { __be16 pidx; u8 qp_err; /* flit 1 - sw owns */ u8 db_off; + u8 pad; + u16 host_wq_pidx; + u16 host_cidx; + u16 host_pidx; }; #define T4_EQ_ENTRY_SIZE 64 @@ -375,6 +379,16 @@ static inline void t4_rq_consume(struct t4_wq *wq) wq->rq.cidx = 0; } +static inline u16 t4_rq_host_wq_pidx(struct t4_wq *wq) +{ + return wq->rq.queue[wq->rq.size].status.host_wq_pidx; +} + +static inline u16 t4_rq_wq_size(struct t4_wq *wq) +{ + return wq->rq.size * T4_RQ_NUM_SLOTS; +} + static inline int t4_sq_onchip(struct t4_sq *sq) { return sq->flags & T4_SQ_ONCHIP; @@ -412,6 +426,16 @@ static inline void t4_sq_consume(struct t4_wq *wq) wq->sq.cidx = 0; } +static inline u16 t4_sq_host_wq_pidx(struct t4_wq *wq) +{ + return wq->sq.queue[wq->sq.size].status.host_wq_pidx; +} + +static inline u16 t4_sq_wq_size(struct t4_wq *wq) +{ + return wq->sq.size * T4_SQ_NUM_SLOTS; +} + static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc) { wmb(); diff --git a/drivers/infiniband/hw/cxgb4/user.h b/drivers/infiniband/hw/cxgb4/user.h index e6669d54770e..32b754c35ab7 100644 --- a/drivers/infiniband/hw/cxgb4/user.h +++ b/drivers/infiniband/hw/cxgb4/user.h @@ -32,7 +32,7 @@ #ifndef __C4IW_USER_H__ #define __C4IW_USER_H__ -#define C4IW_UVERBS_ABI_VERSION 1 +#define C4IW_UVERBS_ABI_VERSION 2 /* * Make sure that all structs defined in this file remain laid out so diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c index 1d7aea132a09..7cc305488a3d 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6110.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c @@ -596,8 +596,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, ipath_format_hwerrors(hwerrs, ipath_6110_hwerror_msgs, - sizeof(ipath_6110_hwerror_msgs) / - sizeof(ipath_6110_hwerror_msgs[0]), + ARRAY_SIZE(ipath_6110_hwerror_msgs), msg, msgl); if (hwerrs & (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS)) diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index c0a03ac03ee7..26dfbc8ee0f1 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -209,8 +209,7 @@ void ipath_format_hwerrors(u64 hwerrs, { int i; const int glen = - sizeof(ipath_generic_hwerror_msgs) / - sizeof(ipath_generic_hwerror_msgs[0]); + ARRAY_SIZE(ipath_generic_hwerror_msgs); for (i=0; i<glen; i++) { if (hwerrs & ipath_generic_hwerror_msgs[i].mask) { diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 77c8cb4c5073..6d4ef71cbcdf 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -50,7 +50,7 @@ static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type) struct ib_cq *ibcq; if (type != MLX4_EVENT_TYPE_CQ_ERROR) { - printk(KERN_WARNING "mlx4_ib: Unexpected event type %d " + pr_warn("Unexpected event type %d " "on CQ %06x\n", type, cq->cqn); return; } @@ -222,6 +222,9 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector uar = &dev->priv_uar; } + if (dev->eq_table) + vector = dev->eq_table[vector % ibdev->num_comp_vectors]; + err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar, cq->db.dma, &cq->mcq, vector, 0); if (err) @@ -463,7 +466,7 @@ static void dump_cqe(void *cqe) { __be32 *buf = cqe; - printk(KERN_DEBUG "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n", + pr_debug("CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n", be32_to_cpu(buf[0]), be32_to_cpu(buf[1]), be32_to_cpu(buf[2]), be32_to_cpu(buf[3]), be32_to_cpu(buf[4]), be32_to_cpu(buf[5]), be32_to_cpu(buf[6]), be32_to_cpu(buf[7])); @@ -473,7 +476,7 @@ static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe, struct ib_wc *wc) { if (cqe->syndrome == MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR) { - printk(KERN_DEBUG "local QP operation err " + pr_debug("local QP operation err " "(QPN %06x, WQE index %x, vendor syndrome %02x, " "opcode = %02x)\n", be32_to_cpu(cqe->my_qpn), be16_to_cpu(cqe->wqe_index), @@ -576,7 +579,7 @@ repoll: if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_OPCODE_NOP && is_send)) { - printk(KERN_WARNING "Completion for NOP opcode detected!\n"); + pr_warn("Completion for NOP opcode detected!\n"); return -EINVAL; } @@ -606,7 +609,7 @@ repoll: mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev, be32_to_cpu(cqe->vlan_my_qpn)); if (unlikely(!mqp)) { - printk(KERN_WARNING "CQ %06x with entry for unknown QPN %06x\n", + pr_warn("CQ %06x with entry for unknown QPN %06x\n", cq->mcq.cqn, be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK); return -EINVAL; } diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index b948b6dd5d55..ee1c577238f7 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -789,7 +789,7 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) list_del(&ge->list); kfree(ge); } else - printk(KERN_WARNING "could not find mgid entry\n"); + pr_warn("could not find mgid entry\n"); mutex_unlock(&mqp->mutex); @@ -902,7 +902,7 @@ static void update_gids_task(struct work_struct *work) mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { - printk(KERN_WARNING "update gid table failed %ld\n", PTR_ERR(mailbox)); + pr_warn("update gid table failed %ld\n", PTR_ERR(mailbox)); return; } @@ -913,7 +913,7 @@ static void update_gids_task(struct work_struct *work) 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); if (err) - printk(KERN_WARNING "set port command failed\n"); + pr_warn("set port command failed\n"); else { memcpy(gw->dev->iboe.gid_table[gw->port - 1], gw->gids, sizeof gw->gids); event.device = &gw->dev->ib_dev; @@ -1076,18 +1076,98 @@ static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event return NOTIFY_DONE; } +static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) +{ + char name[32]; + int eq_per_port = 0; + int added_eqs = 0; + int total_eqs = 0; + int i, j, eq; + + /* Init eq table */ + ibdev->eq_table = NULL; + ibdev->eq_added = 0; + + /* Legacy mode? */ + if (dev->caps.comp_pool == 0) + return; + + eq_per_port = rounddown_pow_of_two(dev->caps.comp_pool/ + dev->caps.num_ports); + + /* Init eq table */ + added_eqs = 0; + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) + added_eqs += eq_per_port; + + total_eqs = dev->caps.num_comp_vectors + added_eqs; + + ibdev->eq_table = kzalloc(total_eqs * sizeof(int), GFP_KERNEL); + if (!ibdev->eq_table) + return; + + ibdev->eq_added = added_eqs; + + eq = 0; + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) { + for (j = 0; j < eq_per_port; j++) { + sprintf(name, "mlx4-ib-%d-%d@%s", + i, j, dev->pdev->bus->name); + /* Set IRQ for specific name (per ring) */ + if (mlx4_assign_eq(dev, name, &ibdev->eq_table[eq])) { + /* Use legacy (same as mlx4_en driver) */ + pr_warn("Can't allocate EQ %d; reverting to legacy\n", eq); + ibdev->eq_table[eq] = + (eq % dev->caps.num_comp_vectors); + } + eq++; + } + } + + /* Fill the reset of the vector with legacy EQ */ + for (i = 0, eq = added_eqs; i < dev->caps.num_comp_vectors; i++) + ibdev->eq_table[eq++] = i; + + /* Advertise the new number of EQs to clients */ + ibdev->ib_dev.num_comp_vectors = total_eqs; +} + +static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) +{ + int i; + int total_eqs; + + /* Reset the advertised EQ number */ + ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; + + /* Free only the added eqs */ + for (i = 0; i < ibdev->eq_added; i++) { + /* Don't free legacy eqs if used */ + if (ibdev->eq_table[i] <= dev->caps.num_comp_vectors) + continue; + mlx4_release_eq(dev, ibdev->eq_table[i]); + } + + total_eqs = dev->caps.num_comp_vectors + ibdev->eq_added; + memset(ibdev->eq_table, 0, total_eqs * sizeof(int)); + kfree(ibdev->eq_table); + + ibdev->eq_table = NULL; + ibdev->eq_added = 0; +} + static void *mlx4_ib_add(struct mlx4_dev *dev) { struct mlx4_ib_dev *ibdev; int num_ports = 0; - int i; + int i, j; int err; struct mlx4_ib_iboe *iboe; - printk_once(KERN_INFO "%s", mlx4_ib_version); + pr_info_once("%s", mlx4_ib_version); if (mlx4_is_mfunc(dev)) { - printk(KERN_WARNING "IB not yet supported in SRIOV\n"); + pr_warn("IB not yet supported in SRIOV\n"); return NULL; } @@ -1210,6 +1290,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); } + mlx4_ib_alloc_eqs(dev, ibdev); + spin_lock_init(&iboe->lock); if (init_node_data(ibdev)) @@ -1241,9 +1323,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) goto err_reg; } - for (i = 0; i < ARRAY_SIZE(mlx4_class_attributes); ++i) { + for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) { if (device_create_file(&ibdev->ib_dev.dev, - mlx4_class_attributes[i])) + mlx4_class_attributes[j])) goto err_notif; } @@ -1253,7 +1335,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) err_notif: if (unregister_netdevice_notifier(&ibdev->iboe.nb)) - printk(KERN_WARNING "failure unregistering notifier\n"); + pr_warn("failure unregistering notifier\n"); flush_workqueue(wq); err_reg: @@ -1288,7 +1370,7 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) ib_unregister_device(&ibdev->ib_dev); if (ibdev->iboe.nb.notifier_call) { if (unregister_netdevice_notifier(&ibdev->iboe.nb)) - printk(KERN_WARNING "failure unregistering notifier\n"); + pr_warn("failure unregistering notifier\n"); ibdev->iboe.nb.notifier_call = NULL; } iounmap(ibdev->uar_map); @@ -1298,6 +1380,8 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB) mlx4_CLOSE_PORT(dev, p); + mlx4_ib_free_eqs(dev, ibdev); + mlx4_uar_free(dev, &ibdev->priv_uar); mlx4_pd_free(dev, ibdev->priv_pdn); ib_dealloc_device(&ibdev->ib_dev); diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index ed80345c99ae..e62297cc77cc 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -202,6 +202,8 @@ struct mlx4_ib_dev { bool ib_active; struct mlx4_ib_iboe iboe; int counters[MLX4_MAX_PORTS]; + int *eq_table; + int eq_added; }; static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev) diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index dca55b19a6f1..bbaf6176f207 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -338,7 +338,7 @@ int mlx4_ib_unmap_fmr(struct list_head *fmr_list) err = mlx4_SYNC_TPT(mdev); if (err) - printk(KERN_WARNING "mlx4_ib: SYNC_TPT error %d when " + pr_warn("SYNC_TPT error %d when " "unmapping FMRs\n", err); return 0; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 3a7848966627..ceb33327091a 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -84,6 +84,11 @@ enum { MLX4_IB_CACHE_LINE_SIZE = 64, }; +enum { + MLX4_RAW_QP_MTU = 7, + MLX4_RAW_QP_MSGMAX = 31, +}; + static const __be32 mlx4_ib_opcode[] = { [IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND), [IB_WR_LSO] = cpu_to_be32(MLX4_OPCODE_LSO), @@ -256,7 +261,7 @@ static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type) event.event = IB_EVENT_QP_ACCESS_ERR; break; default: - printk(KERN_WARNING "mlx4_ib: Unexpected event type %d " + pr_warn("Unexpected event type %d " "on QP %06x\n", type, qp->qpn); return; } @@ -573,7 +578,12 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (sqpn) { qpn = sqpn; } else { - err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn); + /* Raw packet QPNs must be aligned to 8 bits. If not, the WQE + * BlueFlame setup flow wrongly causes VLAN insertion. */ + if (init_attr->qp_type == IB_QPT_RAW_PACKET) + err = mlx4_qp_reserve_range(dev->dev, 1, 1 << 8, &qpn); + else + err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn); if (err) goto err_wrid; } @@ -715,7 +725,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, if (qp->state != IB_QPS_RESET) if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state), MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp)) - printk(KERN_WARNING "mlx4_ib: modify QP %06x to RESET failed.\n", + pr_warn("modify QP %06x to RESET failed.\n", qp->mqp.qpn); get_cqs(qp, &send_cq, &recv_cq); @@ -791,6 +801,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, case IB_QPT_RC: case IB_QPT_UC: case IB_QPT_UD: + case IB_QPT_RAW_PACKET: { qp = kzalloc(sizeof *qp, GFP_KERNEL); if (!qp) @@ -872,7 +883,8 @@ static int to_mlx4_st(enum ib_qp_type type) case IB_QPT_XRC_INI: case IB_QPT_XRC_TGT: return MLX4_QP_ST_XRC; case IB_QPT_SMI: - case IB_QPT_GSI: return MLX4_QP_ST_MLX; + case IB_QPT_GSI: + case IB_QPT_RAW_PACKET: return MLX4_QP_ST_MLX; default: return -1; } } @@ -946,7 +958,7 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, if (ah->ah_flags & IB_AH_GRH) { if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len[port]) { - printk(KERN_ERR "sgid_index (%u) too large. max is %d\n", + pr_err("sgid_index (%u) too large. max is %d\n", ah->grh.sgid_index, dev->dev->caps.gid_table_len[port] - 1); return -1; } @@ -1042,6 +1054,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) context->mtu_msgmax = (IB_MTU_4096 << 5) | 11; + else if (ibqp->qp_type == IB_QPT_RAW_PACKET) + context->mtu_msgmax = (MLX4_RAW_QP_MTU << 5) | MLX4_RAW_QP_MSGMAX; else if (ibqp->qp_type == IB_QPT_UD) { if (qp->flags & MLX4_IB_QP_LSO) context->mtu_msgmax = (IB_MTU_4096 << 5) | @@ -1050,7 +1064,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, context->mtu_msgmax = (IB_MTU_4096 << 5) | 12; } else if (attr_mask & IB_QP_PATH_MTU) { if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) { - printk(KERN_ERR "path MTU (%u) is invalid\n", + pr_err("path MTU (%u) is invalid\n", attr->path_mtu); goto out; } @@ -1200,7 +1214,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR && (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI || - ibqp->qp_type == IB_QPT_UD)) { + ibqp->qp_type == IB_QPT_UD || + ibqp->qp_type == IB_QPT_RAW_PACKET)) { context->pri_path.sched_queue = (qp->port - 1) << 6; if (is_qp0(dev, qp)) context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE; @@ -1266,7 +1281,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (is_qp0(dev, qp)) { if (cur_state != IB_QPS_RTR && new_state == IB_QPS_RTR) if (mlx4_INIT_PORT(dev->dev, qp->port)) - printk(KERN_WARNING "INIT_PORT failed for port %d\n", + pr_warn("INIT_PORT failed for port %d\n", qp->port); if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR && @@ -1319,6 +1334,11 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; } + if ((attr_mask & IB_QP_PORT) && (ibqp->qp_type == IB_QPT_RAW_PACKET) && + (rdma_port_get_link_layer(&dev->ib_dev, attr->port_num) != + IB_LINK_LAYER_ETHERNET)) + goto out; + if (attr_mask & IB_QP_PKEY_INDEX) { int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p]) @@ -1424,6 +1444,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, if (is_eth) { u8 *smac; + u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13; + + mlx->sched_prio = cpu_to_be16(pcp); memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6); /* FIXME: cache smac value? */ @@ -1434,10 +1457,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, if (!is_vlan) { sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE); } else { - u16 pcp; - sqp->ud_header.vlan.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE); - pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13; sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp); } } else { @@ -1460,16 +1480,16 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf); if (0) { - printk(KERN_ERR "built UD header of size %d:\n", header_size); + pr_err("built UD header of size %d:\n", header_size); for (i = 0; i < header_size / 4; ++i) { if (i % 8 == 0) - printk(" [%02x] ", i * 4); - printk(" %08x", - be32_to_cpu(((__be32 *) sqp->header_buf)[i])); + pr_err(" [%02x] ", i * 4); + pr_cont(" %08x", + be32_to_cpu(((__be32 *) sqp->header_buf)[i])); if ((i + 1) % 8 == 0) - printk("\n"); + pr_cont("\n"); } - printk("\n"); + pr_err("\n"); } /* diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 39542f3703b8..60c5fb025fc7 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -59,7 +59,7 @@ static void mlx4_ib_srq_event(struct mlx4_srq *srq, enum mlx4_event type) event.event = IB_EVENT_SRQ_ERR; break; default: - printk(KERN_WARNING "mlx4_ib: Unexpected event type %d " + pr_warn("Unexpected event type %d " "on SRQ %06x\n", type, srq->srqn); return; } diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 71edfbbcce1c..020e95c4c4b9 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -2884,7 +2884,8 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp) ibevent.device = nesqp->ibqp.device; ibevent.event = nesqp->terminate_eventtype; ibevent.element.qp = &nesqp->ibqp; - nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context); + if (nesqp->ibqp.event_handler) + nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context); } } @@ -3320,6 +3321,10 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) nesqp->private_data_len = conn_param->private_data_len; nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)conn_param->ord); + /* space for rdma0 read msg */ + if (conn_param->ord == 0) + nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32(1); + nes_debug(NES_DBG_CM, "requested ord = 0x%08X.\n", (u32)conn_param->ord); nes_debug(NES_DBG_CM, "mpa private data len =%u\n", conn_param->private_data_len); diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 6b811e3e8bd1..7e62f4137148 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -530,8 +530,6 @@ struct qib_pportdata { /* qib_lflags driver is waiting for */ u32 state_wanted; spinlock_t lflags_lock; - /* number of (port-specific) interrupts for this port -- saturates... */ - u32 int_counter; /* ref count for each pkey */ atomic_t pkeyrefs[4]; @@ -543,24 +541,26 @@ struct qib_pportdata { u64 *statusp; /* SendDMA related entries */ - spinlock_t sdma_lock; - struct qib_sdma_state sdma_state; - unsigned long sdma_buf_jiffies; + + /* read mostly */ struct qib_sdma_desc *sdma_descq; + struct qib_sdma_state sdma_state; + dma_addr_t sdma_descq_phys; + volatile __le64 *sdma_head_dma; /* DMA'ed by chip */ + dma_addr_t sdma_head_phys; + u16 sdma_descq_cnt; + + /* read/write using lock */ + spinlock_t sdma_lock ____cacheline_aligned_in_smp; + struct list_head sdma_activelist; u64 sdma_descq_added; u64 sdma_descq_removed; - u16 sdma_descq_cnt; u16 sdma_descq_tail; u16 sdma_descq_head; - u16 sdma_next_intr; - u16 sdma_reset_wait; u8 sdma_generation; - struct tasklet_struct sdma_sw_clean_up_task; - struct list_head sdma_activelist; - dma_addr_t sdma_descq_phys; - volatile __le64 *sdma_head_dma; /* DMA'ed by chip */ - dma_addr_t sdma_head_phys; + struct tasklet_struct sdma_sw_clean_up_task + ____cacheline_aligned_in_smp; wait_queue_head_t state_wait; /* for state_wanted */ @@ -873,7 +873,14 @@ struct qib_devdata { * pio_writing. */ spinlock_t pioavail_lock; - + /* + * index of last buffer to optimize search for next + */ + u32 last_pio; + /* + * min kernel pio buffer to optimize search + */ + u32 min_kernel_pio; /* * Shadow copies of registers; size indicates read access size. * Most of them are readonly, but some are write-only register, diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 6fc9365ba8a6..8895cfec5019 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -38,6 +38,7 @@ #include <linux/netdevice.h> #include <linux/vmalloc.h> #include <linux/module.h> +#include <linux/prefetch.h> #include "qib.h" @@ -481,8 +482,10 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts) etail = qib_hdrget_index(rhf_addr); updegr = 1; if (tlen > sizeof(*hdr) || - etype >= RCVHQ_RCV_TYPE_NON_KD) + etype >= RCVHQ_RCV_TYPE_NON_KD) { ebuf = qib_get_egrbuf(rcd, etail); + prefetch_range(ebuf, tlen - sizeof(*hdr)); + } } if (!eflags) { u16 lrh_len = be16_to_cpu(hdr->lrh[2]) << 2; diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index d0c64d514813..4d352b90750a 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -3132,6 +3132,7 @@ static void get_6120_chip_params(struct qib_devdata *dd) val = qib_read_kreg64(dd, kr_sendpiobufcnt); dd->piobcnt2k = val & ~0U; dd->piobcnt4k = val >> 32; + dd->last_pio = dd->piobcnt4k + dd->piobcnt2k - 1; /* these may be adjusted in init_chip_wc_pat() */ dd->pio2kbase = (u32 __iomem *) (((char __iomem *)dd->kregbase) + dd->pio2k_bufbase); diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 3c722f79d6f6..86a0ba7ca0c2 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -4157,6 +4157,7 @@ static int qib_init_7220_variables(struct qib_devdata *dd) dd->cspec->sdmabufcnt; dd->lastctxt_piobuf = dd->cspec->lastbuf_for_pio - sbufs; dd->cspec->lastbuf_for_pio--; /* range is <= , not < */ + dd->last_pio = dd->cspec->lastbuf_for_pio; dd->pbufsctxt = dd->lastctxt_piobuf / (dd->cfgctxts - dd->first_user_ctxt); diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 060b96064469..c881e744c091 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -6379,6 +6379,7 @@ static int qib_init_7322_variables(struct qib_devdata *dd) dd->cspec->sdmabufcnt; dd->lastctxt_piobuf = dd->cspec->lastbuf_for_pio - sbufs; dd->cspec->lastbuf_for_pio--; /* range is <= , not < */ + dd->last_pio = dd->cspec->lastbuf_for_pio; dd->pbufsctxt = (dd->cfgctxts > dd->first_user_ctxt) ? dd->lastctxt_piobuf / (dd->cfgctxts - dd->first_user_ctxt) : 0; @@ -7708,7 +7709,7 @@ static int serdes_7322_init_new(struct qib_pportdata *ppd) ibsd_wr_allchans(ppd, 5, 0, BMASK(0, 0)); msleep(20); /* Set Frequency Loop Bandwidth */ - ibsd_wr_allchans(ppd, 2, (7 << 5), BMASK(8, 5)); + ibsd_wr_allchans(ppd, 2, (15 << 5), BMASK(8, 5)); /* Enable Frequency Loop */ ibsd_wr_allchans(ppd, 2, (1 << 4), BMASK(4, 4)); /* Set Timing Loop Bandwidth */ diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index cf0cd30adc8d..dc14e100a7f1 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -102,6 +102,8 @@ void qib_set_ctxtcnt(struct qib_devdata *dd) dd->cfgctxts = qib_cfgctxts; else dd->cfgctxts = dd->ctxtcnt; + dd->freectxts = (dd->first_user_ctxt > dd->cfgctxts) ? 0 : + dd->cfgctxts - dd->first_user_ctxt; } /* @@ -402,7 +404,6 @@ static void enable_chip(struct qib_devdata *dd) if (rcd) dd->f_rcvctrl(rcd->ppd, rcvmask, i); } - dd->freectxts = dd->cfgctxts - dd->first_user_ctxt; } static void verify_interrupt(unsigned long opaque) diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index c4ff788823b5..43390217a026 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -396,6 +396,7 @@ static int get_linkdowndefaultstate(struct qib_pportdata *ppd) static int check_mkey(struct qib_ibport *ibp, struct ib_smp *smp, int mad_flags) { + int valid_mkey = 0; int ret = 0; /* Is the mkey in the process of expiring? */ @@ -406,23 +407,36 @@ static int check_mkey(struct qib_ibport *ibp, struct ib_smp *smp, int mad_flags) ibp->mkeyprot = 0; } - /* M_Key checking depends on Portinfo:M_Key_protect_bits */ - if ((mad_flags & IB_MAD_IGNORE_MKEY) == 0 && ibp->mkey != 0 && - ibp->mkey != smp->mkey && - (smp->method == IB_MGMT_METHOD_SET || - smp->method == IB_MGMT_METHOD_TRAP_REPRESS || - (smp->method == IB_MGMT_METHOD_GET && ibp->mkeyprot >= 2))) { - if (ibp->mkey_violations != 0xFFFF) - ++ibp->mkey_violations; - if (!ibp->mkey_lease_timeout && ibp->mkey_lease_period) - ibp->mkey_lease_timeout = jiffies + - ibp->mkey_lease_period * HZ; - /* Generate a trap notice. */ - qib_bad_mkey(ibp, smp); - ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; - } else if (ibp->mkey_lease_timeout) + if ((mad_flags & IB_MAD_IGNORE_MKEY) || ibp->mkey == 0 || + ibp->mkey == smp->mkey) + valid_mkey = 1; + + /* Unset lease timeout on any valid Get/Set/TrapRepress */ + if (valid_mkey && ibp->mkey_lease_timeout && + (smp->method == IB_MGMT_METHOD_GET || + smp->method == IB_MGMT_METHOD_SET || + smp->method == IB_MGMT_METHOD_TRAP_REPRESS)) ibp->mkey_lease_timeout = 0; + if (!valid_mkey) { + switch (smp->method) { + case IB_MGMT_METHOD_GET: + /* Bad mkey not a violation below level 2 */ + if (ibp->mkeyprot < 2) + break; + case IB_MGMT_METHOD_SET: + case IB_MGMT_METHOD_TRAP_REPRESS: + if (ibp->mkey_violations != 0xFFFF) + ++ibp->mkey_violations; + if (!ibp->mkey_lease_timeout && ibp->mkey_lease_period) + ibp->mkey_lease_timeout = jiffies + + ibp->mkey_lease_period * HZ; + /* Generate a trap notice. */ + qib_bad_mkey(ibp, smp); + ret = 1; + } + } + return ret; } @@ -450,6 +464,7 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev, ibp = to_iport(ibdev, port_num); ret = check_mkey(ibp, smp, 0); if (ret) + ret = IB_MAD_RESULT_FAILURE; goto bail; } } @@ -631,7 +646,7 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev, struct qib_devdata *dd; struct qib_pportdata *ppd; struct qib_ibport *ibp; - char clientrereg = 0; + u8 clientrereg = (pip->clientrereg_resv_subnetto & 0x80); unsigned long flags; u16 lid, smlid; u8 lwe; @@ -781,12 +796,6 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev, ibp->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F; - if (pip->clientrereg_resv_subnetto & 0x80) { - clientrereg = 1; - event.event = IB_EVENT_CLIENT_REREGISTER; - ib_dispatch_event(&event); - } - /* * Do the port state change now that the other link parameters * have been set. @@ -844,10 +853,15 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev, smp->status |= IB_SMP_INVALID_FIELD; } + if (clientrereg) { + event.event = IB_EVENT_CLIENT_REREGISTER; + ib_dispatch_event(&event); + } + ret = subn_get_portinfo(smp, ibdev, port); - if (clientrereg) - pip->clientrereg_resv_subnetto |= 0x80; + /* restore re-reg bit per o14-12.2.1 */ + pip->clientrereg_resv_subnetto |= clientrereg; goto get_only; @@ -1835,6 +1849,7 @@ static int process_subn(struct ib_device *ibdev, int mad_flags, port_num && port_num <= ibdev->phys_port_cnt && port != port_num) (void) check_mkey(to_iport(ibdev, port_num), smp, 0); + ret = IB_MAD_RESULT_FAILURE; goto bail; } diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 7e7e16fbee99..1ce56b51ab1a 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -1038,6 +1038,11 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, goto bail_swq; } RCU_INIT_POINTER(qp->next, NULL); + qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), GFP_KERNEL); + if (!qp->s_hdr) { + ret = ERR_PTR(-ENOMEM); + goto bail_qp; + } qp->timeout_jiffies = usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / 1000UL); @@ -1159,6 +1164,7 @@ bail_ip: vfree(qp->r_rq.wq); free_qpn(&dev->qpn_table, qp->ibqp.qp_num); bail_qp: + kfree(qp->s_hdr); kfree(qp); bail_swq: vfree(swq); @@ -1214,6 +1220,7 @@ int qib_destroy_qp(struct ib_qp *ibqp) else vfree(qp->r_rq.wq); vfree(qp->s_wq); + kfree(qp->s_hdr); kfree(qp); return 0; } diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 765b4cbaa020..b641416148eb 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -244,9 +244,9 @@ int qib_make_rc_req(struct qib_qp *qp) int ret = 0; int delta; - ohdr = &qp->s_hdr.u.oth; + ohdr = &qp->s_hdr->u.oth; if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) - ohdr = &qp->s_hdr.u.l.oth; + ohdr = &qp->s_hdr->u.l.oth; /* * The lock is needed to synchronize between the sending tasklet, diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index b4b37e47321a..c0ee7e095d81 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -688,17 +688,17 @@ void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr, nwords = (qp->s_cur_size + extra_bytes) >> 2; lrh0 = QIB_LRH_BTH; if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { - qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr.u.l.grh, + qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr->u.l.grh, &qp->remote_ah_attr.grh, qp->s_hdrwords, nwords); lrh0 = QIB_LRH_GRH; } lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 | qp->remote_ah_attr.sl << 4; - qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); - qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); - qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); - qp->s_hdr.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid | + qp->s_hdr->lrh[0] = cpu_to_be16(lrh0); + qp->s_hdr->lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); + qp->s_hdr->lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); + qp->s_hdr->lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid | qp->remote_ah_attr.src_path_bits); bth0 |= qib_get_pkey(ibp, qp->s_pkey_index); bth0 |= extra_bytes << 20; @@ -758,7 +758,7 @@ void qib_do_send(struct work_struct *work) * If the packet cannot be sent now, return and * the send tasklet will be woken up later. */ - if (qib_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords, + if (qib_verbs_send(qp, qp->s_hdr, qp->s_hdrwords, qp->s_cur_sge, qp->s_cur_size)) break; /* Record that s_hdr is empty. */ diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index dae51604cfcd..dd9cd49d0979 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -503,8 +503,11 @@ static ssize_t show_nctxts(struct device *device, struct qib_devdata *dd = dd_from_dev(dev); /* Return the number of user ports (contexts) available. */ - return scnprintf(buf, PAGE_SIZE, "%u\n", dd->cfgctxts - - dd->first_user_ctxt); + /* The calculation below deals with a special case where + * cfgctxts is set to 1 on a single-port board. */ + return scnprintf(buf, PAGE_SIZE, "%u\n", + (dd->first_user_ctxt > dd->cfgctxts) ? 0 : + (dd->cfgctxts - dd->first_user_ctxt)); } static ssize_t show_nfreectxts(struct device *device, diff --git a/drivers/infiniband/hw/qib/qib_tx.c b/drivers/infiniband/hw/qib/qib_tx.c index 1bf626c40172..31d3561400a4 100644 --- a/drivers/infiniband/hw/qib/qib_tx.c +++ b/drivers/infiniband/hw/qib/qib_tx.c @@ -295,6 +295,7 @@ u32 __iomem *qib_getsendbuf_range(struct qib_devdata *dd, u32 *pbufnum, nbufs = last - first + 1; /* number in range to check */ if (dd->upd_pio_shadow) { +update_shadow: /* * Minor optimization. If we had no buffers on last call, * start out by doing the update; continue and do scan even @@ -304,37 +305,39 @@ u32 __iomem *qib_getsendbuf_range(struct qib_devdata *dd, u32 *pbufnum, updated++; } i = first; -rescan: /* * While test_and_set_bit() is atomic, we do that and then the * change_bit(), and the pair is not. See if this is the cause * of the remaining armlaunch errors. */ spin_lock_irqsave(&dd->pioavail_lock, flags); + if (dd->last_pio >= first && dd->last_pio <= last) + i = dd->last_pio + 1; + if (!first) + /* adjust to min possible */ + nbufs = last - dd->min_kernel_pio + 1; for (j = 0; j < nbufs; j++, i++) { if (i > last) - i = first; + i = !first ? dd->min_kernel_pio : first; if (__test_and_set_bit((2 * i) + 1, shadow)) continue; /* flip generation bit */ __change_bit(2 * i, shadow); /* remember that the buffer can be written to now */ __set_bit(i, dd->pio_writing); + if (!first && first != last) /* first == last on VL15, avoid */ + dd->last_pio = i; break; } spin_unlock_irqrestore(&dd->pioavail_lock, flags); if (j == nbufs) { - if (!updated) { + if (!updated) /* * First time through; shadow exhausted, but may be * buffers available, try an update and then rescan. */ - update_send_bufs(dd); - updated++; - i = first; - goto rescan; - } + goto update_shadow; no_send_bufs(dd); buf = NULL; } else { @@ -422,14 +425,20 @@ void qib_chg_pioavailkernel(struct qib_devdata *dd, unsigned start, __clear_bit(QLOGIC_IB_SENDPIOAVAIL_CHECK_SHIFT + start, dd->pioavailshadow); __set_bit(start, dd->pioavailkernel); + if ((start >> 1) < dd->min_kernel_pio) + dd->min_kernel_pio = start >> 1; } else { __set_bit(start + QLOGIC_IB_SENDPIOAVAIL_BUSY_SHIFT, dd->pioavailshadow); __clear_bit(start, dd->pioavailkernel); + if ((start >> 1) > dd->min_kernel_pio) + dd->min_kernel_pio = start >> 1; } start += 2; } + if (dd->min_kernel_pio > 0 && dd->last_pio < dd->min_kernel_pio - 1) + dd->last_pio = dd->min_kernel_pio - 1; spin_unlock_irqrestore(&dd->pioavail_lock, flags); dd->f_txchk_change(dd, ostart, len, avail, rcd); diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index 7ce2ac2ed219..ce7387ff5d91 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -72,9 +72,9 @@ int qib_make_uc_req(struct qib_qp *qp) goto done; } - ohdr = &qp->s_hdr.u.oth; + ohdr = &qp->s_hdr->u.oth; if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) - ohdr = &qp->s_hdr.u.l.oth; + ohdr = &qp->s_hdr->u.l.oth; /* header size in 32-bit words LRH+BTH = (8+12)/4. */ hwords = 5; diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 828609fa4d28..a468bf2d4465 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -321,11 +321,11 @@ int qib_make_ud_req(struct qib_qp *qp) if (ah_attr->ah_flags & IB_AH_GRH) { /* Header size in 32-bit words. */ - qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr.u.l.grh, + qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr->u.l.grh, &ah_attr->grh, qp->s_hdrwords, nwords); lrh0 = QIB_LRH_GRH; - ohdr = &qp->s_hdr.u.l.oth; + ohdr = &qp->s_hdr->u.l.oth; /* * Don't worry about sending to locally attached multicast * QPs. It is unspecified by the spec. what happens. @@ -333,7 +333,7 @@ int qib_make_ud_req(struct qib_qp *qp) } else { /* Header size in 32-bit words. */ lrh0 = QIB_LRH_BTH; - ohdr = &qp->s_hdr.u.oth; + ohdr = &qp->s_hdr->u.oth; } if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { qp->s_hdrwords++; @@ -346,15 +346,15 @@ int qib_make_ud_req(struct qib_qp *qp) lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */ else lrh0 |= ibp->sl_to_vl[ah_attr->sl] << 12; - qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); - qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */ - qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); + qp->s_hdr->lrh[0] = cpu_to_be16(lrh0); + qp->s_hdr->lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */ + qp->s_hdr->lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); lid = ppd->lid; if (lid) { lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1); - qp->s_hdr.lrh[3] = cpu_to_be16(lid); + qp->s_hdr->lrh[3] = cpu_to_be16(lid); } else - qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE; + qp->s_hdr->lrh[3] = IB_LID_PERMISSIVE; if (wqe->wr.send_flags & IB_SEND_SOLICITED) bth0 |= IB_BTH_SOLICITED; bth0 |= extra_bytes << 20; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 0c19ef0c4123..487606024659 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -367,9 +367,10 @@ struct qib_rwq { struct qib_rq { struct qib_rwq *wq; - spinlock_t lock; /* protect changes in this struct */ u32 size; /* size of RWQE array */ u8 max_sge; + spinlock_t lock /* protect changes in this struct */ + ____cacheline_aligned_in_smp; }; struct qib_srq { @@ -412,31 +413,75 @@ struct qib_ack_entry { */ struct qib_qp { struct ib_qp ibqp; - struct qib_qp *next; /* link list for QPN hash table */ - struct qib_qp *timer_next; /* link list for qib_ib_timer() */ - struct list_head iowait; /* link for wait PIO buf */ - struct list_head rspwait; /* link for waititing to respond */ + /* read mostly fields above and below */ struct ib_ah_attr remote_ah_attr; struct ib_ah_attr alt_ah_attr; - struct qib_ib_header s_hdr; /* next packet header to send */ - atomic_t refcount; - wait_queue_head_t wait; - wait_queue_head_t wait_dma; - struct timer_list s_timer; - struct work_struct s_work; + struct qib_qp *next; /* link list for QPN hash table */ + struct qib_swqe *s_wq; /* send work queue */ struct qib_mmap_info *ip; + struct qib_ib_header *s_hdr; /* next packet header to send */ + unsigned long timeout_jiffies; /* computed from timeout */ + + enum ib_mtu path_mtu; + u32 remote_qpn; + u32 pmtu; /* decoded from path_mtu */ + u32 qkey; /* QKEY for this QP (for UD or RD) */ + u32 s_size; /* send work queue size */ + u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ + + u8 state; /* QP state */ + u8 qp_access_flags; + u8 alt_timeout; /* Alternate path timeout for this QP */ + u8 timeout; /* Timeout for this QP */ + u8 s_srate; + u8 s_mig_state; + u8 port_num; + u8 s_pkey_index; /* PKEY index to use */ + u8 s_alt_pkey_index; /* Alternate path PKEY index to use */ + u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ + u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ + u8 s_retry_cnt; /* number of times to retry */ + u8 s_rnr_retry_cnt; + u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ + u8 s_max_sge; /* size of s_wq->sg_list */ + u8 s_draining; + + /* start of read/write fields */ + + atomic_t refcount ____cacheline_aligned_in_smp; + wait_queue_head_t wait; + + + struct qib_ack_entry s_ack_queue[QIB_MAX_RDMA_ATOMIC + 1] + ____cacheline_aligned_in_smp; + struct qib_sge_state s_rdma_read_sge; + + spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */ + unsigned long r_aflags; + u64 r_wr_id; /* ID for current receive WQE */ + u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ + u32 r_len; /* total length of r_sge */ + u32 r_rcv_len; /* receive data len processed */ + u32 r_psn; /* expected rcv packet sequence number */ + u32 r_msn; /* message sequence number */ + + u8 r_state; /* opcode of last packet received */ + u8 r_flags; + u8 r_head_ack_queue; /* index into s_ack_queue[] */ + + struct list_head rspwait; /* link for waititing to respond */ + + struct qib_sge_state r_sge; /* current receive data */ + struct qib_rq r_rq; /* receive work queue */ + + spinlock_t s_lock ____cacheline_aligned_in_smp; struct qib_sge_state *s_cur_sge; + u32 s_flags; struct qib_verbs_txreq *s_tx; - struct qib_mregion *s_rdma_mr; + struct qib_swqe *s_wqe; struct qib_sge_state s_sge; /* current send request data */ - struct qib_ack_entry s_ack_queue[QIB_MAX_RDMA_ATOMIC + 1]; - struct qib_sge_state s_ack_rdma_sge; - struct qib_sge_state s_rdma_read_sge; - struct qib_sge_state r_sge; /* current receive data */ - spinlock_t r_lock; /* used for APM */ - spinlock_t s_lock; + struct qib_mregion *s_rdma_mr; atomic_t s_dma_busy; - u32 s_flags; u32 s_cur_size; /* size of send packet in bytes */ u32 s_len; /* total length of s_sge */ u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ @@ -447,60 +492,34 @@ struct qib_qp { u32 s_psn; /* current packet sequence number */ u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ - u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ - u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ - u64 r_wr_id; /* ID for current receive WQE */ - unsigned long r_aflags; - u32 r_len; /* total length of r_sge */ - u32 r_rcv_len; /* receive data len processed */ - u32 r_psn; /* expected rcv packet sequence number */ - u32 r_msn; /* message sequence number */ + u32 s_head; /* new entries added here */ + u32 s_tail; /* next entry to process */ + u32 s_cur; /* current work queue entry */ + u32 s_acked; /* last un-ACK'ed entry */ + u32 s_last; /* last completed entry */ + u32 s_ssn; /* SSN of tail entry */ + u32 s_lsn; /* limit sequence number (credit) */ u16 s_hdrwords; /* size of s_hdr in 32 bit words */ u16 s_rdma_ack_cnt; - u8 state; /* QP state */ u8 s_state; /* opcode of last packet sent */ u8 s_ack_state; /* opcode of packet to ACK */ u8 s_nak_state; /* non-zero if NAK is pending */ - u8 r_state; /* opcode of last packet received */ u8 r_nak_state; /* non-zero if NAK is pending */ - u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ - u8 r_flags; - u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ - u8 r_head_ack_queue; /* index into s_ack_queue[] */ - u8 qp_access_flags; - u8 s_max_sge; /* size of s_wq->sg_list */ - u8 s_retry_cnt; /* number of times to retry */ - u8 s_rnr_retry_cnt; u8 s_retry; /* requester retry counter */ u8 s_rnr_retry; /* requester RNR retry counter */ - u8 s_pkey_index; /* PKEY index to use */ - u8 s_alt_pkey_index; /* Alternate path PKEY index to use */ - u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ u8 s_tail_ack_queue; /* index into s_ack_queue[] */ - u8 s_srate; - u8 s_draining; - u8 s_mig_state; - u8 timeout; /* Timeout for this QP */ - u8 alt_timeout; /* Alternate path timeout for this QP */ - u8 port_num; - enum ib_mtu path_mtu; - u32 pmtu; /* decoded from path_mtu */ - u32 remote_qpn; - u32 qkey; /* QKEY for this QP (for UD or RD) */ - u32 s_size; /* send work queue size */ - u32 s_head; /* new entries added here */ - u32 s_tail; /* next entry to process */ - u32 s_cur; /* current work queue entry */ - u32 s_acked; /* last un-ACK'ed entry */ - u32 s_last; /* last completed entry */ - u32 s_ssn; /* SSN of tail entry */ - u32 s_lsn; /* limit sequence number (credit) */ - unsigned long timeout_jiffies; /* computed from timeout */ - struct qib_swqe *s_wq; /* send work queue */ - struct qib_swqe *s_wqe; - struct qib_rq r_rq; /* receive work queue */ - struct qib_sge r_sg_list[0]; /* verified SGEs */ + + struct qib_sge_state s_ack_rdma_sge; + struct timer_list s_timer; + struct list_head iowait; /* link for wait PIO buf */ + + struct work_struct s_work; + + wait_queue_head_t wait_dma; + + struct qib_sge r_sg_list[0] /* verified SGEs */ + ____cacheline_aligned_in_smp; }; /* diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index db43b3117168..0ab8c9cc3a78 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -573,10 +573,9 @@ iscsi_iser_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr, err = iser_connect(ib_conn, NULL, (struct sockaddr_in *)dst_addr, non_blocking); - if (err) { - iscsi_destroy_endpoint(ep); + if (err) return ERR_PTR(err); - } + return ep; } diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 14224ba44fd8..2dddabd8fcf9 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -613,8 +613,9 @@ id_failure: ib_conn->cma_id = NULL; addr_failure: ib_conn->state = ISER_CONN_DOWN; + iser_conn_put(ib_conn, 1); /* deref ib conn's cma id */ connect_failure: - iser_conn_release(ib_conn, 1); + iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */ return err; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 0fe18850c838..ec2dafe8ae5b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -51,6 +51,8 @@ #define FW_VERSION_MINOR 1 #define FW_VERSION_MICRO 0 +#define CH_WARN(adap, fmt, ...) dev_warn(adap->pdev_dev, fmt, ## __VA_ARGS__) + enum { MAX_NPORTS = 4, /* max # of ports */ SERNUM_LEN = 24, /* Serial # length */ @@ -64,6 +66,15 @@ enum { MEM_MC }; +enum { + MEMWIN0_APERTURE = 65536, + MEMWIN0_BASE = 0x30000, + MEMWIN1_APERTURE = 32768, + MEMWIN1_BASE = 0x28000, + MEMWIN2_APERTURE = 2048, + MEMWIN2_BASE = 0x1b800, +}; + enum dev_master { MASTER_CANT, MASTER_MAY, @@ -403,6 +414,9 @@ struct sge_txq { struct tx_sw_desc *sdesc; /* address of SW Tx descriptor ring */ struct sge_qstat *stat; /* queue status entry */ dma_addr_t phys_addr; /* physical address of the ring */ + spinlock_t db_lock; + int db_disabled; + unsigned short db_pidx; }; struct sge_eth_txq { /* state for an SGE Ethernet Tx queue */ @@ -475,6 +489,7 @@ struct adapter { void __iomem *regs; struct pci_dev *pdev; struct device *pdev_dev; + unsigned int mbox; unsigned int fn; unsigned int flags; @@ -504,6 +519,8 @@ struct adapter { void **tid_release_head; spinlock_t tid_release_lock; struct work_struct tid_release_task; + struct work_struct db_full_task; + struct work_struct db_drop_task; bool tid_release_task_busy; struct dentry *debugfs_root; @@ -605,6 +622,7 @@ irqreturn_t t4_sge_intr_msix(int irq, void *cookie); void t4_sge_init(struct adapter *adap); void t4_sge_start(struct adapter *adap); void t4_sge_stop(struct adapter *adap); +extern int dbfifo_int_thresh; #define for_each_port(adapter, iter) \ for (iter = 0; iter < (adapter)->params.nports; ++iter) @@ -719,4 +737,9 @@ int t4_ctrl_eq_free(struct adapter *adap, unsigned int mbox, unsigned int pf, int t4_ofld_eq_free(struct adapter *adap, unsigned int mbox, unsigned int pf, unsigned int vf, unsigned int eqid); int t4_handle_fw_rpl(struct adapter *adap, const __be64 *rpl); +void t4_db_full(struct adapter *adapter); +void t4_db_dropped(struct adapter *adapter); +int t4_mem_win_read_len(struct adapter *adap, u32 addr, __be32 *data, int len); +int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox, + u32 addr, u32 val); #endif /* __CXGB4_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index b126b98065a9..e1f96fbb48c1 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -149,15 +149,6 @@ static unsigned int pfvfres_pmask(struct adapter *adapter, #endif enum { - MEMWIN0_APERTURE = 65536, - MEMWIN0_BASE = 0x30000, - MEMWIN1_APERTURE = 32768, - MEMWIN1_BASE = 0x28000, - MEMWIN2_APERTURE = 2048, - MEMWIN2_BASE = 0x1b800, -}; - -enum { MAX_TXQ_ENTRIES = 16384, MAX_CTRL_TXQ_ENTRIES = 1024, MAX_RSPQ_ENTRIES = 16384, @@ -371,6 +362,15 @@ static int set_addr_filters(const struct net_device *dev, bool sleep) uhash | mhash, sleep); } +int dbfifo_int_thresh = 10; /* 10 == 640 entry threshold */ +module_param(dbfifo_int_thresh, int, 0644); +MODULE_PARM_DESC(dbfifo_int_thresh, "doorbell fifo interrupt threshold"); + +int dbfifo_drain_delay = 1000; /* usecs to sleep while draining the dbfifo */ +module_param(dbfifo_drain_delay, int, 0644); +MODULE_PARM_DESC(dbfifo_drain_delay, + "usecs to sleep while draining the dbfifo"); + /* * Set Rx properties of a port, such as promiscruity, address filters, and MTU. * If @mtu is -1 it is left unchanged. @@ -389,6 +389,8 @@ static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok) return ret; } +static struct workqueue_struct *workq; + /** * link_start - enable a port * @dev: the port to enable @@ -2196,7 +2198,7 @@ static void cxgb4_queue_tid_release(struct tid_info *t, unsigned int chan, adap->tid_release_head = (void **)((uintptr_t)p | chan); if (!adap->tid_release_task_busy) { adap->tid_release_task_busy = true; - schedule_work(&adap->tid_release_task); + queue_work(workq, &adap->tid_release_task); } spin_unlock_bh(&adap->tid_release_lock); } @@ -2366,6 +2368,16 @@ unsigned int cxgb4_port_chan(const struct net_device *dev) } EXPORT_SYMBOL(cxgb4_port_chan); +unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo) +{ + struct adapter *adap = netdev2adap(dev); + u32 v; + + v = t4_read_reg(adap, A_SGE_DBFIFO_STATUS); + return lpfifo ? G_LP_COUNT(v) : G_HP_COUNT(v); +} +EXPORT_SYMBOL(cxgb4_dbfifo_count); + /** * cxgb4_port_viid - get the VI id of a port * @dev: the net device for the port @@ -2413,6 +2425,59 @@ void cxgb4_iscsi_init(struct net_device *dev, unsigned int tag_mask, } EXPORT_SYMBOL(cxgb4_iscsi_init); +int cxgb4_flush_eq_cache(struct net_device *dev) +{ + struct adapter *adap = netdev2adap(dev); + int ret; + + ret = t4_fwaddrspace_write(adap, adap->mbox, + 0xe1000000 + A_SGE_CTXT_CMD, 0x20000000); + return ret; +} +EXPORT_SYMBOL(cxgb4_flush_eq_cache); + +static int read_eq_indices(struct adapter *adap, u16 qid, u16 *pidx, u16 *cidx) +{ + u32 addr = t4_read_reg(adap, A_SGE_DBQ_CTXT_BADDR) + 24 * qid + 8; + __be64 indices; + int ret; + + ret = t4_mem_win_read_len(adap, addr, (__be32 *)&indices, 8); + if (!ret) { + indices = be64_to_cpu(indices); + *cidx = (indices >> 25) & 0xffff; + *pidx = (indices >> 9) & 0xffff; + } + return ret; +} + +int cxgb4_sync_txq_pidx(struct net_device *dev, u16 qid, u16 pidx, + u16 size) +{ + struct adapter *adap = netdev2adap(dev); + u16 hw_pidx, hw_cidx; + int ret; + + ret = read_eq_indices(adap, qid, &hw_pidx, &hw_cidx); + if (ret) + goto out; + + if (pidx != hw_pidx) { + u16 delta; + + if (pidx >= hw_pidx) + delta = pidx - hw_pidx; + else + delta = size - hw_pidx + pidx; + wmb(); + t4_write_reg(adap, MYPF_REG(A_SGE_PF_KDOORBELL), + V_QID(qid) | V_PIDX(delta)); + } +out: + return ret; +} +EXPORT_SYMBOL(cxgb4_sync_txq_pidx); + static struct pci_driver cxgb4_driver; static void check_neigh_update(struct neighbour *neigh) @@ -2446,6 +2511,144 @@ static struct notifier_block cxgb4_netevent_nb = { .notifier_call = netevent_cb }; +static void drain_db_fifo(struct adapter *adap, int usecs) +{ + u32 v; + + do { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(usecs_to_jiffies(usecs)); + v = t4_read_reg(adap, A_SGE_DBFIFO_STATUS); + if (G_LP_COUNT(v) == 0 && G_HP_COUNT(v) == 0) + break; + } while (1); +} + +static void disable_txq_db(struct sge_txq *q) +{ + spin_lock_irq(&q->db_lock); + q->db_disabled = 1; + spin_unlock_irq(&q->db_lock); +} + +static void enable_txq_db(struct sge_txq *q) +{ + spin_lock_irq(&q->db_lock); + q->db_disabled = 0; + spin_unlock_irq(&q->db_lock); +} + +static void disable_dbs(struct adapter *adap) +{ + int i; + + for_each_ethrxq(&adap->sge, i) + disable_txq_db(&adap->sge.ethtxq[i].q); + for_each_ofldrxq(&adap->sge, i) + disable_txq_db(&adap->sge.ofldtxq[i].q); + for_each_port(adap, i) + disable_txq_db(&adap->sge.ctrlq[i].q); +} + +static void enable_dbs(struct adapter *adap) +{ + int i; + + for_each_ethrxq(&adap->sge, i) + enable_txq_db(&adap->sge.ethtxq[i].q); + for_each_ofldrxq(&adap->sge, i) + enable_txq_db(&adap->sge.ofldtxq[i].q); + for_each_port(adap, i) + enable_txq_db(&adap->sge.ctrlq[i].q); +} + +static void sync_txq_pidx(struct adapter *adap, struct sge_txq *q) +{ + u16 hw_pidx, hw_cidx; + int ret; + + spin_lock_bh(&q->db_lock); + ret = read_eq_indices(adap, (u16)q->cntxt_id, &hw_pidx, &hw_cidx); + if (ret) + goto out; + if (q->db_pidx != hw_pidx) { + u16 delta; + + if (q->db_pidx >= hw_pidx) + delta = q->db_pidx - hw_pidx; + else + delta = q->size - hw_pidx + q->db_pidx; + wmb(); + t4_write_reg(adap, MYPF_REG(A_SGE_PF_KDOORBELL), + V_QID(q->cntxt_id) | V_PIDX(delta)); + } +out: + q->db_disabled = 0; + spin_unlock_bh(&q->db_lock); + if (ret) + CH_WARN(adap, "DB drop recovery failed.\n"); +} +static void recover_all_queues(struct adapter *adap) +{ + int i; + + for_each_ethrxq(&adap->sge, i) + sync_txq_pidx(adap, &adap->sge.ethtxq[i].q); + for_each_ofldrxq(&adap->sge, i) + sync_txq_pidx(adap, &adap->sge.ofldtxq[i].q); + for_each_port(adap, i) + sync_txq_pidx(adap, &adap->sge.ctrlq[i].q); +} + +static void notify_rdma_uld(struct adapter *adap, enum cxgb4_control cmd) +{ + mutex_lock(&uld_mutex); + if (adap->uld_handle[CXGB4_ULD_RDMA]) + ulds[CXGB4_ULD_RDMA].control(adap->uld_handle[CXGB4_ULD_RDMA], + cmd); + mutex_unlock(&uld_mutex); +} + +static void process_db_full(struct work_struct *work) +{ + struct adapter *adap; + + adap = container_of(work, struct adapter, db_full_task); + + notify_rdma_uld(adap, CXGB4_CONTROL_DB_FULL); + drain_db_fifo(adap, dbfifo_drain_delay); + t4_set_reg_field(adap, A_SGE_INT_ENABLE3, + F_DBFIFO_HP_INT | F_DBFIFO_LP_INT, + F_DBFIFO_HP_INT | F_DBFIFO_LP_INT); + notify_rdma_uld(adap, CXGB4_CONTROL_DB_EMPTY); +} + +static void process_db_drop(struct work_struct *work) +{ + struct adapter *adap; + + adap = container_of(work, struct adapter, db_drop_task); + + t4_set_reg_field(adap, A_SGE_DOORBELL_CONTROL, F_DROPPED_DB, 0); + disable_dbs(adap); + notify_rdma_uld(adap, CXGB4_CONTROL_DB_DROP); + drain_db_fifo(adap, 1); + recover_all_queues(adap); + enable_dbs(adap); +} + +void t4_db_full(struct adapter *adap) +{ + t4_set_reg_field(adap, A_SGE_INT_ENABLE3, + F_DBFIFO_HP_INT | F_DBFIFO_LP_INT, 0); + queue_work(workq, &adap->db_full_task); +} + +void t4_db_dropped(struct adapter *adap) +{ + queue_work(workq, &adap->db_drop_task); +} + static void uld_attach(struct adapter *adap, unsigned int uld) { void *handle; @@ -2479,6 +2682,7 @@ static void uld_attach(struct adapter *adap, unsigned int uld) lli.gts_reg = adap->regs + MYPF_REG(SGE_PF_GTS); lli.db_reg = adap->regs + MYPF_REG(SGE_PF_KDOORBELL); lli.fw_vers = adap->params.fw_vers; + lli.dbfifo_int_thresh = dbfifo_int_thresh; handle = ulds[uld].add(&lli); if (IS_ERR(handle)) { @@ -2649,6 +2853,8 @@ static void cxgb_down(struct adapter *adapter) { t4_intr_disable(adapter); cancel_work_sync(&adapter->tid_release_task); + cancel_work_sync(&adapter->db_full_task); + cancel_work_sync(&adapter->db_drop_task); adapter->tid_release_task_busy = false; adapter->tid_release_head = NULL; @@ -3593,6 +3799,7 @@ static int __devinit init_one(struct pci_dev *pdev, adapter->pdev = pdev; adapter->pdev_dev = &pdev->dev; + adapter->mbox = func; adapter->fn = func; adapter->msg_enable = dflt_msg_enable; memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map)); @@ -3601,6 +3808,8 @@ static int __devinit init_one(struct pci_dev *pdev, spin_lock_init(&adapter->tid_release_lock); INIT_WORK(&adapter->tid_release_task, process_tid_release_list); + INIT_WORK(&adapter->db_full_task, process_db_full); + INIT_WORK(&adapter->db_drop_task, process_db_drop); err = t4_prep_adapter(adapter); if (err) @@ -3788,6 +3997,10 @@ static int __init cxgb4_init_module(void) { int ret; + workq = create_singlethread_workqueue("cxgb4"); + if (!workq) + return -ENOMEM; + /* Debugfs support is optional, just warn if this fails */ cxgb4_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL); if (!cxgb4_debugfs_root) @@ -3803,6 +4016,8 @@ static void __exit cxgb4_cleanup_module(void) { pci_unregister_driver(&cxgb4_driver); debugfs_remove(cxgb4_debugfs_root); /* NULL ok */ + flush_workqueue(workq); + destroy_workqueue(workq); } module_init(cxgb4_init_module); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index b1d39b8d141a..d79980c5fc63 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -163,6 +163,12 @@ enum cxgb4_state { CXGB4_STATE_DETACH }; +enum cxgb4_control { + CXGB4_CONTROL_DB_FULL, + CXGB4_CONTROL_DB_EMPTY, + CXGB4_CONTROL_DB_DROP, +}; + struct pci_dev; struct l2t_data; struct net_device; @@ -212,6 +218,7 @@ struct cxgb4_lld_info { unsigned short ucq_density; /* # of user CQs/page */ void __iomem *gts_reg; /* address of GTS register */ void __iomem *db_reg; /* address of kernel doorbell */ + int dbfifo_int_thresh; /* doorbell fifo int threshold */ }; struct cxgb4_uld_info { @@ -220,11 +227,13 @@ struct cxgb4_uld_info { int (*rx_handler)(void *handle, const __be64 *rsp, const struct pkt_gl *gl); int (*state_change)(void *handle, enum cxgb4_state new_state); + int (*control)(void *handle, enum cxgb4_control control, ...); }; int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p); int cxgb4_unregister_uld(enum cxgb4_uld type); int cxgb4_ofld_send(struct net_device *dev, struct sk_buff *skb); +unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo); unsigned int cxgb4_port_chan(const struct net_device *dev); unsigned int cxgb4_port_viid(const struct net_device *dev); unsigned int cxgb4_port_idx(const struct net_device *dev); @@ -236,4 +245,6 @@ void cxgb4_iscsi_init(struct net_device *dev, unsigned int tag_mask, const unsigned int *pgsz_order); struct sk_buff *cxgb4_pktgl_to_skb(const struct pkt_gl *gl, unsigned int skb_len, unsigned int pull_len); +int cxgb4_sync_txq_pidx(struct net_device *dev, u16 qid, u16 pidx, u16 size); +int cxgb4_flush_eq_cache(struct net_device *dev); #endif /* !__CXGB4_OFLD_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 2dae7959f000..e111d974afd8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -767,8 +767,13 @@ static void write_sgl(const struct sk_buff *skb, struct sge_txq *q, static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n) { wmb(); /* write descriptors before telling HW */ - t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), - QID(q->cntxt_id) | PIDX(n)); + spin_lock(&q->db_lock); + if (!q->db_disabled) { + t4_write_reg(adap, MYPF_REG(A_SGE_PF_KDOORBELL), + V_QID(q->cntxt_id) | V_PIDX(n)); + } + q->db_pidx = q->pidx; + spin_unlock(&q->db_lock); } /** @@ -2081,6 +2086,7 @@ static void init_txq(struct adapter *adap, struct sge_txq *q, unsigned int id) q->stops = q->restarts = 0; q->stat = (void *)&q->desc[q->size]; q->cntxt_id = id; + spin_lock_init(&q->db_lock); adap->sge.egr_map[id - adap->sge.egr_start] = q; } @@ -2415,6 +2421,18 @@ void t4_sge_init(struct adapter *adap) RXPKTCPLMODE | (STAT_LEN == 128 ? EGRSTATUSPAGESIZE : 0)); + /* + * Set up to drop DOORBELL writes when the DOORBELL FIFO overflows + * and generate an interrupt when this occurs so we can recover. + */ + t4_set_reg_field(adap, A_SGE_DBFIFO_STATUS, + V_HP_INT_THRESH(M_HP_INT_THRESH) | + V_LP_INT_THRESH(M_LP_INT_THRESH), + V_HP_INT_THRESH(dbfifo_int_thresh) | + V_LP_INT_THRESH(dbfifo_int_thresh)); + t4_set_reg_field(adap, A_SGE_DOORBELL_CONTROL, F_ENABLE_DROP, + F_ENABLE_DROP); + for (i = v = 0; i < 32; i += 4) v |= (PAGE_SHIFT - 10) << i; t4_write_reg(adap, SGE_HOST_PAGE_SIZE, v); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index d1ec111aebd8..32e1dd566a14 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -868,11 +868,14 @@ int t4_restart_aneg(struct adapter *adap, unsigned int mbox, unsigned int port) return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL); } +typedef void (*int_handler_t)(struct adapter *adap); + struct intr_info { unsigned int mask; /* bits to check in interrupt status */ const char *msg; /* message to print or NULL */ short stat_idx; /* stat counter to increment or -1 */ unsigned short fatal; /* whether the condition reported is fatal */ + int_handler_t int_handler; /* platform-specific int handler */ }; /** @@ -905,6 +908,8 @@ static int t4_handle_intr_status(struct adapter *adapter, unsigned int reg, } else if (acts->msg && printk_ratelimit()) dev_warn(adapter->pdev_dev, "%s (0x%x)\n", acts->msg, status & acts->mask); + if (acts->int_handler) + acts->int_handler(adapter); mask |= acts->mask; } status &= mask; @@ -1013,7 +1018,9 @@ static void sge_intr_handler(struct adapter *adapter) { ERR_INVALID_CIDX_INC, "SGE GTS CIDX increment too large", -1, 0 }, { ERR_CPL_OPCODE_0, "SGE received 0-length CPL", -1, 0 }, - { ERR_DROPPED_DB, "SGE doorbell dropped", -1, 0 }, + { F_DBFIFO_LP_INT, NULL, -1, 0, t4_db_full }, + { F_DBFIFO_HP_INT, NULL, -1, 0, t4_db_full }, + { F_ERR_DROPPED_DB, NULL, -1, 0, t4_db_dropped }, { ERR_DATA_CPL_ON_HIGH_QID1 | ERR_DATA_CPL_ON_HIGH_QID0, "SGE IQID > 1023 received CPL for FL", -1, 0 }, { ERR_BAD_DB_PIDX3, "SGE DBP 3 pidx increment too large", -1, @@ -1034,10 +1041,10 @@ static void sge_intr_handler(struct adapter *adapter) }; v = (u64)t4_read_reg(adapter, SGE_INT_CAUSE1) | - ((u64)t4_read_reg(adapter, SGE_INT_CAUSE2) << 32); + ((u64)t4_read_reg(adapter, SGE_INT_CAUSE2) << 32); if (v) { dev_alert(adapter->pdev_dev, "SGE parity error (%#llx)\n", - (unsigned long long)v); + (unsigned long long)v); t4_write_reg(adapter, SGE_INT_CAUSE1, v); t4_write_reg(adapter, SGE_INT_CAUSE2, v >> 32); } @@ -1513,6 +1520,7 @@ void t4_intr_enable(struct adapter *adapter) ERR_BAD_DB_PIDX2 | ERR_BAD_DB_PIDX1 | ERR_BAD_DB_PIDX0 | ERR_ING_CTXT_PRIO | ERR_EGR_CTXT_PRIO | INGRESS_SIZE_ERR | + F_DBFIFO_HP_INT | F_DBFIFO_LP_INT | EGRESS_SIZE_ERR); t4_write_reg(adapter, MYPF_REG(PL_PF_INT_ENABLE), PF_INTR_MASK); t4_set_reg_field(adapter, PL_INT_MAP0, 0, 1 << pf); @@ -1986,6 +1994,54 @@ int t4_wol_pat_enable(struct adapter *adap, unsigned int port, unsigned int map, (var).retval_len16 = htonl(FW_LEN16(var)); \ } while (0) +int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox, + u32 addr, u32 val) +{ + struct fw_ldst_cmd c; + + memset(&c, 0, sizeof(c)); + c.op_to_addrspace = htonl(V_FW_CMD_OP(FW_LDST_CMD) | F_FW_CMD_REQUEST | + F_FW_CMD_WRITE | + V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_FIRMWARE)); + c.cycles_to_len16 = htonl(FW_LEN16(c)); + c.u.addrval.addr = htonl(addr); + c.u.addrval.val = htonl(val); + + return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL); +} + +/* + * t4_mem_win_read_len - read memory through PCIE memory window + * @adap: the adapter + * @addr: address of first byte requested aligned on 32b. + * @data: len bytes to hold the data read + * @len: amount of data to read from window. Must be <= + * MEMWIN0_APERATURE after adjusting for 16B alignment + * requirements of the the memory window. + * + * Read len bytes of data from MC starting at @addr. + */ +int t4_mem_win_read_len(struct adapter *adap, u32 addr, __be32 *data, int len) +{ + int i; + int off; + + /* + * Align on a 16B boundary. + */ + off = addr & 15; + if ((addr & 3) || (len + off) > MEMWIN0_APERTURE) + return -EINVAL; + + t4_write_reg(adap, A_PCIE_MEM_ACCESS_OFFSET, addr & ~15); + t4_read_reg(adap, A_PCIE_MEM_ACCESS_OFFSET); + + for (i = 0; i < len; i += 4) + *data++ = t4_read_reg(adap, (MEMWIN0_BASE + off + i)); + + return 0; +} + /** * t4_mdio_rd - read a PHY register through MDIO * @adap: the adapter diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index 0adc5bcec7c4..111fc323f155 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -190,6 +190,59 @@ #define SGE_DEBUG_DATA_LOW 0x10d4 #define SGE_INGRESS_QUEUES_PER_PAGE_PF 0x10f4 +#define S_LP_INT_THRESH 12 +#define V_LP_INT_THRESH(x) ((x) << S_LP_INT_THRESH) +#define S_HP_INT_THRESH 28 +#define V_HP_INT_THRESH(x) ((x) << S_HP_INT_THRESH) +#define A_SGE_DBFIFO_STATUS 0x10a4 + +#define S_ENABLE_DROP 13 +#define V_ENABLE_DROP(x) ((x) << S_ENABLE_DROP) +#define F_ENABLE_DROP V_ENABLE_DROP(1U) +#define A_SGE_DOORBELL_CONTROL 0x10a8 + +#define A_SGE_CTXT_CMD 0x11fc +#define A_SGE_DBQ_CTXT_BADDR 0x1084 + +#define A_SGE_PF_KDOORBELL 0x0 + +#define S_QID 15 +#define V_QID(x) ((x) << S_QID) + +#define S_PIDX 0 +#define V_PIDX(x) ((x) << S_PIDX) + +#define M_LP_COUNT 0x7ffU +#define S_LP_COUNT 0 +#define G_LP_COUNT(x) (((x) >> S_LP_COUNT) & M_LP_COUNT) + +#define M_HP_COUNT 0x7ffU +#define S_HP_COUNT 16 +#define G_HP_COUNT(x) (((x) >> S_HP_COUNT) & M_HP_COUNT) + +#define A_SGE_INT_ENABLE3 0x1040 + +#define S_DBFIFO_HP_INT 8 +#define V_DBFIFO_HP_INT(x) ((x) << S_DBFIFO_HP_INT) +#define F_DBFIFO_HP_INT V_DBFIFO_HP_INT(1U) + +#define S_DBFIFO_LP_INT 7 +#define V_DBFIFO_LP_INT(x) ((x) << S_DBFIFO_LP_INT) +#define F_DBFIFO_LP_INT V_DBFIFO_LP_INT(1U) + +#define S_DROPPED_DB 0 +#define V_DROPPED_DB(x) ((x) << S_DROPPED_DB) +#define F_DROPPED_DB V_DROPPED_DB(1U) + +#define S_ERR_DROPPED_DB 18 +#define V_ERR_DROPPED_DB(x) ((x) << S_ERR_DROPPED_DB) +#define F_ERR_DROPPED_DB V_ERR_DROPPED_DB(1U) + +#define A_PCIE_MEM_ACCESS_OFFSET 0x306c + +#define M_HP_INT_THRESH 0xfU +#define M_LP_INT_THRESH 0xfU + #define PCIE_PF_CLI 0x44 #define PCIE_INT_CAUSE 0x3004 #define UNXSPLCPLERR 0x20000000U diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index edcfd7ec7802..ad53f796b574 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -1620,4 +1620,19 @@ struct fw_hdr { #define FW_HDR_FW_VER_MINOR_GET(x) (((x) >> 16) & 0xff) #define FW_HDR_FW_VER_MICRO_GET(x) (((x) >> 8) & 0xff) #define FW_HDR_FW_VER_BUILD_GET(x) (((x) >> 0) & 0xff) + +#define S_FW_CMD_OP 24 +#define V_FW_CMD_OP(x) ((x) << S_FW_CMD_OP) + +#define S_FW_CMD_REQUEST 23 +#define V_FW_CMD_REQUEST(x) ((x) << S_FW_CMD_REQUEST) +#define F_FW_CMD_REQUEST V_FW_CMD_REQUEST(1U) + +#define S_FW_CMD_WRITE 21 +#define V_FW_CMD_WRITE(x) ((x) << S_FW_CMD_WRITE) +#define F_FW_CMD_WRITE V_FW_CMD_WRITE(1U) + +#define S_FW_LDST_CMD_ADDRSPACE 0 +#define V_FW_LDST_CMD_ADDRSPACE(x) ((x) << S_FW_LDST_CMD_ADDRSPACE) + #endif /* _T4FW_INTERFACE_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c index 8be20e7ea3d1..06fef5b44f77 100644 --- a/drivers/net/ethernet/mellanox/mlx4/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c @@ -124,9 +124,6 @@ void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt) spin_lock(&bitmap->lock); bitmap_clear(bitmap->table, obj, cnt); - bitmap->last = min(bitmap->last, obj); - bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top) - & bitmap->mask; bitmap->avail += cnt; spin_unlock(&bitmap->lock); } diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 2a02ba522e60..f7488dfef8eb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -118,6 +118,20 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u64 flags) mlx4_dbg(dev, " %s\n", fname[i]); } +static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) +{ + static const char * const fname[] = { + [0] = "RSS support", + [1] = "RSS Toeplitz Hash Function support", + [2] = "RSS XOR Hash Function support" + }; + int i; + + for (i = 0; i < ARRAY_SIZE(fname); ++i) + if (fname[i] && (flags & (1LL << i))) + mlx4_dbg(dev, " %s\n", fname[i]); +} + int mlx4_MOD_STAT_CFG(struct mlx4_dev *dev, struct mlx4_mod_stat_cfg *cfg) { struct mlx4_cmd_mailbox *mailbox; @@ -346,6 +360,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_MAX_REQ_QP_OFFSET 0x29 #define QUERY_DEV_CAP_MAX_RES_QP_OFFSET 0x2b #define QUERY_DEV_CAP_MAX_GSO_OFFSET 0x2d +#define QUERY_DEV_CAP_RSS_OFFSET 0x2e #define QUERY_DEV_CAP_MAX_RDMA_OFFSET 0x2f #define QUERY_DEV_CAP_RSZ_SRQ_OFFSET 0x33 #define QUERY_DEV_CAP_ACK_DELAY_OFFSET 0x35 @@ -390,6 +405,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_RSVD_LKEY_OFFSET 0x98 #define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0 + dev_cap->flags2 = 0; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); @@ -439,6 +455,17 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) else dev_cap->max_gso_sz = 1 << field; + MLX4_GET(field, outbox, QUERY_DEV_CAP_RSS_OFFSET); + if (field & 0x20) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RSS_XOR; + if (field & 0x10) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RSS_TOP; + field &= 0xf; + if (field) { + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RSS; + dev_cap->max_rss_tbl_sz = 1 << field; + } else + dev_cap->max_rss_tbl_sz = 0; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RDMA_OFFSET); dev_cap->max_rdma_global = 1 << (field & 0x3f); MLX4_GET(field, outbox, QUERY_DEV_CAP_ACK_DELAY_OFFSET); @@ -632,8 +659,10 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->max_rq_desc_sz, dev_cap->max_rq_sg); mlx4_dbg(dev, "Max GSO size: %d\n", dev_cap->max_gso_sz); mlx4_dbg(dev, "Max counters: %d\n", dev_cap->max_counters); + mlx4_dbg(dev, "Max RSS Table size: %d\n", dev_cap->max_rss_tbl_sz); dump_dev_cap_flags(dev, dev_cap->flags); + dump_dev_cap_flags2(dev, dev_cap->flags2); out: mlx4_free_cmd_mailbox(dev, mailbox); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index e1a5fa56bcbc..64c0399e4b78 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -79,6 +79,7 @@ struct mlx4_dev_cap { u64 trans_code[MLX4_MAX_PORTS + 1]; u16 stat_rate_support; u64 flags; + u64 flags2; int reserved_uars; int uar_size; int min_page_sz; @@ -110,6 +111,7 @@ struct mlx4_dev_cap { u32 reserved_lkey; u64 max_icm_sz; int max_gso_sz; + int max_rss_tbl_sz; u8 supported_port_types[MLX4_MAX_PORTS + 1]; u8 suggested_type[MLX4_MAX_PORTS + 1]; u8 default_sense[MLX4_MAX_PORTS + 1]; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 8bb05b46db86..bb04a8208780 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -272,10 +272,12 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.max_msg_sz = dev_cap->max_msg_sz; dev->caps.page_size_cap = ~(u32) (dev_cap->min_page_sz - 1); dev->caps.flags = dev_cap->flags; + dev->caps.flags2 = dev_cap->flags2; dev->caps.bmme_flags = dev_cap->bmme_flags; dev->caps.reserved_lkey = dev_cap->reserved_lkey; dev->caps.stat_rate_support = dev_cap->stat_rate_support; dev->caps.max_gso_sz = dev_cap->max_gso_sz; + dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz; /* Sense port always allowed on supported devices for ConnectX1 and 2 */ if (dev->pdev->device != 0x1003) diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 834c96c5d879..7f5e8d564e8e 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -98,6 +98,12 @@ enum { MLX4_DEV_CAP_FLAG_SENSE_SUPPORT = 1LL << 55 }; +enum { + MLX4_DEV_CAP_FLAG2_RSS = 1LL << 0, + MLX4_DEV_CAP_FLAG2_RSS_TOP = 1LL << 1, + MLX4_DEV_CAP_FLAG2_RSS_XOR = 1LL << 2 +}; + #define MLX4_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90) enum { @@ -292,11 +298,13 @@ struct mlx4_caps { u32 max_msg_sz; u32 page_size_cap; u64 flags; + u64 flags2; u32 bmme_flags; u32 reserved_lkey; u16 stat_rate_support; u8 port_width_cap[MLX4_MAX_PORTS + 1]; int max_gso_sz; + int max_rss_tbl_sz; int reserved_qps_cnt[MLX4_NUM_QP_REGION]; int reserved_qps; int reserved_qps_base[MLX4_NUM_QP_REGION]; diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 091f9e7dc8b9..bb57d5c58df2 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -233,7 +233,8 @@ struct mlx4_wqe_mlx_seg { u8 owner; u8 reserved1[2]; u8 opcode; - u8 reserved2[3]; + __be16 sched_prio; + u8 reserved2; u8 size; /* * [17] VL15 diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index b513f57e1725..3d81b90cc315 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -160,7 +160,7 @@ struct ib_rmpp_hdr { typedef u64 __bitwise ib_sa_comp_mask; -#define IB_SA_COMP_MASK(n) ((__force ib_sa_comp_mask) cpu_to_be64(1ull << n)) +#define IB_SA_COMP_MASK(n) ((__force ib_sa_comp_mask) cpu_to_be64(1ull << (n))) /* * ib_sa_hdr and ib_sa_mad structures must be packed because they have diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index c3cca5a4dacd..07996af8265a 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -605,7 +605,7 @@ enum ib_qp_type { IB_QPT_UD, IB_QPT_RAW_IPV6, IB_QPT_RAW_ETHERTYPE, - /* Save 8 for RAW_PACKET */ + IB_QPT_RAW_PACKET = 8, IB_QPT_XRC_INI = 9, IB_QPT_XRC_TGT, IB_QPT_MAX @@ -964,7 +964,7 @@ struct ib_qp { struct ib_srq *srq; struct ib_xrcd *xrcd; /* XRC TGT QPs only */ struct list_head xrcd_list; - atomic_t usecnt; /* count times opened */ + atomic_t usecnt; /* count times opened, mcast attaches */ struct list_head open_list; struct ib_qp *real_qp; struct ib_uobject *uobject; |