From a2268cfbf599e7f55d4ee68193f08b4f44535fac Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 4 May 2018 15:34:32 -0400 Subject: xprtrdma: Add proper SPDX tags for NetApp-contributed source Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/sunrpc/xprtrdma/verbs.c') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index fe5eaca2d197..b2c6f525b020 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* * Copyright (c) 2014-2017 Oracle. All rights reserved. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. -- cgit v1.2.1 From 52d28fe4f61350baf7e84b3f3f3e34f0fc077ec1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 4 May 2018 15:34:37 -0400 Subject: xprtrdma: Try to fail quickly if proto=rdma rdma_resolve_addr(3) says: > This call is used to map a given destination IP address to a > usable RDMA address. The IP to RDMA address mapping is done > using the local routing tables, or via ARP. If this can't be done, there's no local device that can be used to establish an RDMA-capable network path to the remote. In this case, the RDMA CM very quickly posts an RDMA_CM_EVENT_ADDR_ERROR upcall. Currently rpcrdma_conn_upcall() converts RDMA_CM_EVENT_ADDR_ERROR to EHOSTUNREACH. mount.nfs seems to want to retry EHOSTUNREACH forever, thinking that this is a temporary situation. This makes mount.nfs appear to hang if I try to mount with proto=rdma through, say, a conventional Ethernet device. If the admin has specified proto=rdma along with a server IP address that requires a network path that does not support RDMA, instead let's fail with a permanent error. -EPROTONOSUPPORT is returned when NFSv4 or one of its minor versions is not supported. -EPROTO is not (currently) retried by mount.nfs. There are potentially other similar cases where -EPROTO is an appropriate return code. Signed-off-by: Chuck Lever Tested-by: Olga Kornievskaia Tested-by: Anna Schumaker Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/sunrpc/xprtrdma/verbs.c') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index b2c6f525b020..4ee9704ffe19 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -232,7 +232,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) complete(&ia->ri_done); break; case RDMA_CM_EVENT_ADDR_ERROR: - ia->ri_async_rc = -EHOSTUNREACH; + ia->ri_async_rc = -EPROTO; complete(&ia->ri_done); break; case RDMA_CM_EVENT_ROUTE_ERROR: @@ -263,7 +263,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) connstate = -ENOTCONN; goto connected; case RDMA_CM_EVENT_UNREACHABLE: - connstate = -ENETDOWN; + connstate = -ENETUNREACH; goto connected; case RDMA_CM_EVENT_REJECTED: dprintk("rpcrdma: connection to %s:%s rejected: %s\n", -- cgit v1.2.1 From 107c4beb9bedd07d6e22f7010333dba3dc988292 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 4 May 2018 15:34:42 -0400 Subject: xprtrdma: Create transport's CM ID in the correct network namespace Set up RPC/RDMA transport in mount.nfs's network namespace. This passes the correct namespace information to the RDMA core, similar to how RPC sockets are created (see xs_create_sock). Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/sunrpc/xprtrdma/verbs.c') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 4ee9704ffe19..07529ef8e33e 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -306,8 +306,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia) init_completion(&ia->ri_done); init_completion(&ia->ri_remove_done); - id = rdma_create_id(&init_net, rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, - IB_QPT_RC); + id = rdma_create_id(xprt->rx_xprt.xprt_net, rpcrdma_conn_upcall, + xprt, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(id)) { rc = PTR_ERR(id); dprintk("RPC: %s: rdma_create_id() failed %i\n", -- cgit v1.2.1 From 914fcad9873cbd46e3a4c3c31551b98b15a49079 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 4 May 2018 15:34:48 -0400 Subject: xprtrdma: Fix max_send_wr computation For FRWR, the computation of max_send_wr is split between frwr_op_open and rpcrdma_ep_create, which makes it difficult to tell that the max_send_wr result is currently incorrect if frwr_op_open has to reduce the credit limit to accommodate a small max_qp_wr. This is a problem now that extra WRs are needed for backchannel operations and a drain CQE. So, refactor the computation so that it is all done in ->ro_open, and fix the FRWR version of this computation so that it accommodates HCAs with small max_qp_wr correctly. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) (limited to 'net/sunrpc/xprtrdma/verbs.c') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 07529ef8e33e..62baddefced3 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -501,8 +501,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) { struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private; - unsigned int max_qp_wr, max_sge; struct ib_cq *sendcq, *recvcq; + unsigned int max_sge; int rc; max_sge = min_t(unsigned int, ia->ri_device->attrs.max_sge, @@ -513,29 +513,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, } ia->ri_max_send_sges = max_sge; - if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) { - dprintk("RPC: %s: insufficient wqe's available\n", - __func__); - return -ENOMEM; - } - max_qp_wr = ia->ri_device->attrs.max_qp_wr - RPCRDMA_BACKWARD_WRS - 1; - - /* check provider's send/recv wr limits */ - if (cdata->max_requests > max_qp_wr) - cdata->max_requests = max_qp_wr; + rc = ia->ri_ops->ro_open(ia, ep, cdata); + if (rc) + return rc; ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; ep->rep_attr.qp_context = ep; ep->rep_attr.srq = NULL; - ep->rep_attr.cap.max_send_wr = cdata->max_requests; - ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS; - ep->rep_attr.cap.max_send_wr += 1; /* drain cqe */ - rc = ia->ri_ops->ro_open(ia, ep, cdata); - if (rc) - return rc; - ep->rep_attr.cap.max_recv_wr = cdata->max_requests; - ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; - ep->rep_attr.cap.max_recv_wr += 1; /* drain cqe */ ep->rep_attr.cap.max_send_sge = max_sge; ep->rep_attr.cap.max_recv_sge = 1; ep->rep_attr.cap.max_inline_data = 0; -- cgit v1.2.1 From 0e0b854cfb3302b1907e9d3a927469b95710238f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 4 May 2018 15:35:14 -0400 Subject: xprtrdma: Clean up Receive trace points For clarity, report the posting and completion of Receive CQEs. Also, the wc->byte_len field contains garbage if wc->status is non-zero, and the vendor error field contains garbage if wc->status is zero. For readability, don't save those fields in those cases. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/sunrpc/xprtrdma/verbs.c') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 62baddefced3..4b18926de912 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -160,7 +160,7 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) rr_cqe); /* WARNING: Only wr_id and status are reliable at this point */ - trace_xprtrdma_wc_receive(rep, wc); + trace_xprtrdma_wc_receive(wc); if (wc->status != IB_WC_SUCCESS) goto out_fail; @@ -1570,7 +1570,7 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, if (!rpcrdma_dma_map_regbuf(ia, rep->rr_rdmabuf)) goto out_map; rc = ib_post_recv(ia->ri_id->qp, &rep->rr_recv_wr, &recv_wr_fail); - trace_xprtrdma_post_recv(rep, rc); + trace_xprtrdma_post_recv(rep->rr_recv_wr.wr_cqe); if (rc) return -ENOTCONN; return 0; -- cgit v1.2.1 From 7c8d9e7c8863905951d4eaa7a8d277150f3a37f7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 4 May 2018 15:35:20 -0400 Subject: xprtrdma: Move Receive posting to Receive handler Receive completion and Reply handling are done by a BOUND workqueue, meaning they run on only one CPU. Posting receives is currently done in the send_request path, which on large systems is typically done on a different CPU than the one handling Receive completions. This results in movement of Receive-related cachelines between the sending and receiving CPUs. More importantly, it means that currently Receives are posted while the transport's write lock is held, which is unnecessary and costly. Finally, allocation of Receive buffers is performed on-demand in the Receive completion handler. This helps guarantee that they are allocated on the same NUMA node as the CPU that handles Receive completions. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 176 +++++++++++++++++++++++++------------------- 1 file changed, 99 insertions(+), 77 deletions(-) (limited to 'net/sunrpc/xprtrdma/verbs.c') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 4b18926de912..2ed2ee4f2c6c 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -74,6 +74,7 @@ */ static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf); +static int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp); static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb); struct workqueue_struct *rpcrdma_receive_wq __read_mostly; @@ -726,7 +727,6 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) { struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); - unsigned int extras; int rc; retry: @@ -770,9 +770,8 @@ retry: } dprintk("RPC: %s: connected\n", __func__); - extras = r_xprt->rx_buf.rb_bc_srv_max_requests; - if (extras) - rpcrdma_ep_post_extra_recv(r_xprt, extras); + + rpcrdma_post_recvs(r_xprt, true); out: if (rc) @@ -1082,14 +1081,8 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt) return req; } -/** - * rpcrdma_create_rep - Allocate an rpcrdma_rep object - * @r_xprt: controlling transport - * - * Returns 0 on success or a negative errno on failure. - */ -int -rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) +static int +rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp) { struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; struct rpcrdma_buffer *buf = &r_xprt->rx_buf; @@ -1117,6 +1110,7 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) rep->rr_recv_wr.wr_cqe = &rep->rr_cqe; rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; rep->rr_recv_wr.num_sge = 1; + rep->rr_temp = temp; spin_lock(&buf->rb_lock); list_add(&rep->rr_list, &buf->rb_recv_bufs); @@ -1168,12 +1162,8 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) list_add(&req->rl_list, &buf->rb_send_bufs); } + buf->rb_posted_receives = 0; INIT_LIST_HEAD(&buf->rb_recv_bufs); - for (i = 0; i <= buf->rb_max_requests; i++) { - rc = rpcrdma_create_rep(r_xprt); - if (rc) - goto out; - } rc = rpcrdma_sendctxs_create(r_xprt); if (rc) @@ -1263,7 +1253,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) rep = rpcrdma_buffer_get_rep_locked(buf); rpcrdma_destroy_rep(rep); } - buf->rb_send_count = 0; spin_lock(&buf->rb_reqslock); while (!list_empty(&buf->rb_allreqs)) { @@ -1278,7 +1267,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) spin_lock(&buf->rb_reqslock); } spin_unlock(&buf->rb_reqslock); - buf->rb_recv_count = 0; rpcrdma_mrs_destroy(buf); } @@ -1351,27 +1339,11 @@ rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr) __rpcrdma_mr_put(&r_xprt->rx_buf, mr); } -static struct rpcrdma_rep * -rpcrdma_buffer_get_rep(struct rpcrdma_buffer *buffers) -{ - /* If an RPC previously completed without a reply (say, a - * credential problem or a soft timeout occurs) then hold off - * on supplying more Receive buffers until the number of new - * pending RPCs catches up to the number of posted Receives. - */ - if (unlikely(buffers->rb_send_count < buffers->rb_recv_count)) - return NULL; - - if (unlikely(list_empty(&buffers->rb_recv_bufs))) - return NULL; - buffers->rb_recv_count++; - return rpcrdma_buffer_get_rep_locked(buffers); -} - -/* - * Get a set of request/reply buffers. +/** + * rpcrdma_buffer_get - Get a request buffer + * @buffers: Buffer pool from which to obtain a buffer * - * Reply buffer (if available) is attached to send buffer upon return. + * Returns a fresh rpcrdma_req, or NULL if none are available. */ struct rpcrdma_req * rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) @@ -1379,23 +1351,21 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) struct rpcrdma_req *req; spin_lock(&buffers->rb_lock); - if (list_empty(&buffers->rb_send_bufs)) - goto out_reqbuf; - buffers->rb_send_count++; + if (unlikely(list_empty(&buffers->rb_send_bufs))) + goto out_noreqs; req = rpcrdma_buffer_get_req_locked(buffers); - req->rl_reply = rpcrdma_buffer_get_rep(buffers); spin_unlock(&buffers->rb_lock); - return req; -out_reqbuf: +out_noreqs: spin_unlock(&buffers->rb_lock); return NULL; } -/* - * Put request/reply buffers back into pool. - * Pre-decrement counter/array index. +/** + * rpcrdma_buffer_put - Put request/reply buffers back into pool + * @req: object to return + * */ void rpcrdma_buffer_put(struct rpcrdma_req *req) @@ -1406,27 +1376,16 @@ rpcrdma_buffer_put(struct rpcrdma_req *req) req->rl_reply = NULL; spin_lock(&buffers->rb_lock); - buffers->rb_send_count--; - list_add_tail(&req->rl_list, &buffers->rb_send_bufs); + list_add(&req->rl_list, &buffers->rb_send_bufs); if (rep) { - buffers->rb_recv_count--; - list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs); + if (!rep->rr_temp) { + list_add(&rep->rr_list, &buffers->rb_recv_bufs); + rep = NULL; + } } spin_unlock(&buffers->rb_lock); -} - -/* - * Recover reply buffers from pool. - * This happens when recovering from disconnect. - */ -void -rpcrdma_recv_buffer_get(struct rpcrdma_req *req) -{ - struct rpcrdma_buffer *buffers = req->rl_buffer; - - spin_lock(&buffers->rb_lock); - req->rl_reply = rpcrdma_buffer_get_rep(buffers); - spin_unlock(&buffers->rb_lock); + if (rep) + rpcrdma_destroy_rep(rep); } /* @@ -1438,10 +1397,13 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) { struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf; - spin_lock(&buffers->rb_lock); - buffers->rb_recv_count--; - list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs); - spin_unlock(&buffers->rb_lock); + if (!rep->rr_temp) { + spin_lock(&buffers->rb_lock); + list_add(&rep->rr_list, &buffers->rb_recv_bufs); + spin_unlock(&buffers->rb_lock); + } else { + rpcrdma_destroy_rep(rep); + } } /** @@ -1537,13 +1499,6 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr; int rc; - if (req->rl_reply) { - rc = rpcrdma_ep_post_recv(ia, req->rl_reply); - if (rc) - return rc; - req->rl_reply = NULL; - } - if (!ep->rep_send_count || test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) { send_wr->send_flags |= IB_SEND_SIGNALED; @@ -1618,3 +1573,70 @@ out_rc: rpcrdma_recv_buffer_put(rep); return rc; } + +/** + * rpcrdma_post_recvs - Maybe post some Receive buffers + * @r_xprt: controlling transport + * @temp: when true, allocate temp rpcrdma_rep objects + * + */ +void +rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) +{ + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; + struct ib_recv_wr *wr, *bad_wr; + int needed, count, rc; + + needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1); + if (buf->rb_posted_receives > needed) + return; + needed -= buf->rb_posted_receives; + + count = 0; + wr = NULL; + while (needed) { + struct rpcrdma_regbuf *rb; + struct rpcrdma_rep *rep; + + spin_lock(&buf->rb_lock); + rep = list_first_entry_or_null(&buf->rb_recv_bufs, + struct rpcrdma_rep, rr_list); + if (likely(rep)) + list_del(&rep->rr_list); + spin_unlock(&buf->rb_lock); + if (!rep) { + if (rpcrdma_create_rep(r_xprt, temp)) + break; + continue; + } + + rb = rep->rr_rdmabuf; + if (!rpcrdma_regbuf_is_mapped(rb)) { + if (!__rpcrdma_dma_map_regbuf(&r_xprt->rx_ia, rb)) { + rpcrdma_recv_buffer_put(rep); + break; + } + } + + trace_xprtrdma_post_recv(rep->rr_recv_wr.wr_cqe); + rep->rr_recv_wr.next = wr; + wr = &rep->rr_recv_wr; + ++count; + --needed; + } + if (!count) + return; + + rc = ib_post_recv(r_xprt->rx_ia.ri_id->qp, wr, &bad_wr); + if (rc) { + for (wr = bad_wr; wr; wr = wr->next) { + struct rpcrdma_rep *rep; + + rep = container_of(wr, struct rpcrdma_rep, rr_recv_wr); + rpcrdma_recv_buffer_put(rep); + --count; + } + } + buf->rb_posted_receives += count; + trace_xprtrdma_post_recvs(r_xprt, count, rc); +} -- cgit v1.2.1 From a7986f09986ac1befc85bcab30970312c476dbc7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 4 May 2018 15:35:25 -0400 Subject: xprtrdma: Remove rpcrdma_ep_{post_recv, post_extra_recv} Clean up: These functions are no longer used. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 59 --------------------------------------------- 1 file changed, 59 deletions(-) (limited to 'net/sunrpc/xprtrdma/verbs.c') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 2ed2ee4f2c6c..601b78b64483 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1515,65 +1515,6 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, return 0; } -int -rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, - struct rpcrdma_rep *rep) -{ - struct ib_recv_wr *recv_wr_fail; - int rc; - - if (!rpcrdma_dma_map_regbuf(ia, rep->rr_rdmabuf)) - goto out_map; - rc = ib_post_recv(ia->ri_id->qp, &rep->rr_recv_wr, &recv_wr_fail); - trace_xprtrdma_post_recv(rep->rr_recv_wr.wr_cqe); - if (rc) - return -ENOTCONN; - return 0; - -out_map: - pr_err("rpcrdma: failed to DMA map the Receive buffer\n"); - return -EIO; -} - -/** - * rpcrdma_ep_post_extra_recv - Post buffers for incoming backchannel requests - * @r_xprt: transport associated with these backchannel resources - * @count: minimum number of incoming requests expected - * - * Returns zero if all requested buffers were posted, or a negative errno. - */ -int -rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count) -{ - struct rpcrdma_buffer *buffers = &r_xprt->rx_buf; - struct rpcrdma_ia *ia = &r_xprt->rx_ia; - struct rpcrdma_rep *rep; - int rc; - - while (count--) { - spin_lock(&buffers->rb_lock); - if (list_empty(&buffers->rb_recv_bufs)) - goto out_reqbuf; - rep = rpcrdma_buffer_get_rep_locked(buffers); - spin_unlock(&buffers->rb_lock); - - rc = rpcrdma_ep_post_recv(ia, rep); - if (rc) - goto out_rc; - } - - return 0; - -out_reqbuf: - spin_unlock(&buffers->rb_lock); - trace_xprtrdma_noreps(r_xprt); - return -ENOMEM; - -out_rc: - rpcrdma_recv_buffer_put(rep); - return rc; -} - /** * rpcrdma_post_recvs - Maybe post some Receive buffers * @r_xprt: controlling transport -- cgit v1.2.1 From e68699cc1a025d24608cf1533abc2edd7256d82c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 4 May 2018 15:35:31 -0400 Subject: xprtrdma: Remove rpcrdma_buffer_get_req_locked() Clean up. There is only one call-site for this helper, and it can be simplified by using list_first_entry_or_null(). Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) (limited to 'net/sunrpc/xprtrdma/verbs.c') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 601b78b64483..b9a6e5d8d8c7 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1175,17 +1175,6 @@ out: return rc; } -static struct rpcrdma_req * -rpcrdma_buffer_get_req_locked(struct rpcrdma_buffer *buf) -{ - struct rpcrdma_req *req; - - req = list_first_entry(&buf->rb_send_bufs, - struct rpcrdma_req, rl_list); - list_del_init(&req->rl_list); - return req; -} - static struct rpcrdma_rep * rpcrdma_buffer_get_rep_locked(struct rpcrdma_buffer *buf) { @@ -1351,15 +1340,12 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) struct rpcrdma_req *req; spin_lock(&buffers->rb_lock); - if (unlikely(list_empty(&buffers->rb_send_bufs))) - goto out_noreqs; - req = rpcrdma_buffer_get_req_locked(buffers); + req = list_first_entry_or_null(&buffers->rb_send_bufs, + struct rpcrdma_req, rl_list); + if (req) + list_del_init(&req->rl_list); spin_unlock(&buffers->rb_lock); return req; - -out_noreqs: - spin_unlock(&buffers->rb_lock); - return NULL; } /** -- cgit v1.2.1 From 9d95cd534ca1ce969ff3f87d6e8ca6a9dfd1ad04 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 4 May 2018 15:35:36 -0400 Subject: xprtrdma: Remove rpcrdma_buffer_get_rep_locked() Clean up: There is only one remaining call site for this helper. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'net/sunrpc/xprtrdma/verbs.c') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index b9a6e5d8d8c7..db11aa04c612 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1175,17 +1175,6 @@ out: return rc; } -static struct rpcrdma_rep * -rpcrdma_buffer_get_rep_locked(struct rpcrdma_buffer *buf) -{ - struct rpcrdma_rep *rep; - - rep = list_first_entry(&buf->rb_recv_bufs, - struct rpcrdma_rep, rr_list); - list_del(&rep->rr_list); - return rep; -} - static void rpcrdma_destroy_rep(struct rpcrdma_rep *rep) { @@ -1239,7 +1228,9 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) while (!list_empty(&buf->rb_recv_bufs)) { struct rpcrdma_rep *rep; - rep = rpcrdma_buffer_get_rep_locked(buf); + rep = list_first_entry(&buf->rb_recv_bufs, + struct rpcrdma_rep, rr_list); + list_del(&rep->rr_list); rpcrdma_destroy_rep(rep); } -- cgit v1.2.1 From efd81e90d3f719a2a7fd696c9ed0247e1cae1b7c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 4 May 2018 15:35:41 -0400 Subject: xprtrdma: Make rpcrdma_sendctx_put_locked() a static function Clean up: The only call site is in the same file as the function's definition. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/sunrpc/xprtrdma/verbs.c') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index db11aa04c612..0e0b7d5cb74d 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -72,6 +72,7 @@ /* * internal functions */ +static void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc); static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf); static int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp); @@ -949,7 +950,8 @@ out_emptyq: * * The caller serializes calls to this function (per rpcrdma_buffer). */ -void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc) +static void +rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc) { struct rpcrdma_buffer *buf = &sc->sc_xprt->rx_buf; unsigned long next_tail; -- cgit v1.2.1 From 2fad659209d5b1dbaa1f58606372571c61fc8cbd Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 4 May 2018 15:35:57 -0400 Subject: xprtrdma: Wait on empty sendctx queue Currently, when the sendctx queue is exhausted during marshaling, the RPC/RDMA transport places the RPC task on the delayq, which forces a wait for HZ >> 2 before the marshal and send is retried. With this change, the transport now places such an RPC task on the pending queue, and wakes it just as soon as more sendctxs become available. This typically takes less than a millisecond, and the write_space waking mechanism is less deadlock-prone. Moreover, the waiting RPC task is holding the transport's write lock, which blocks the transport from sending RPCs. Therefore faster recovery from sendctx queue exhaustion is desirable. Cf. commit 5804891455d5 ("xprtrdma: ->send_request returns -EAGAIN when there are no free MRs"). Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'net/sunrpc/xprtrdma/verbs.c') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 0e0b7d5cb74d..7276e82db3b4 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -878,6 +878,7 @@ static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt) sc->sc_xprt = r_xprt; buf->rb_sc_ctxs[i] = sc; } + buf->rb_flags = 0; return 0; @@ -935,7 +936,7 @@ out_emptyq: * completions recently. This is a sign the Send Queue is * backing up. Cause the caller to pause and try again. */ - dprintk("RPC: %s: empty sendctx queue\n", __func__); + set_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags); r_xprt = container_of(buf, struct rpcrdma_xprt, rx_buf); r_xprt->rx_stats.empty_sendctx_q++; return NULL; @@ -970,6 +971,11 @@ rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc) /* Paired with READ_ONCE */ smp_store_release(&buf->rb_sc_tail, next_tail); + + if (test_and_clear_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags)) { + smp_mb__after_atomic(); + xprt_write_space(&sc->sc_xprt->rx_xprt); + } } static void -- cgit v1.2.1