diff options
-rw-r--r-- | fs/lockd/svclock.c | 4 | ||||
-rw-r--r-- | fs/lockd/xdr.c | 8 | ||||
-rw-r--r-- | fs/nfsd/nfs4state.c | 25 | ||||
-rw-r--r-- | fs/nfsd/nfs4xdr.c | 50 | ||||
-rw-r--r-- | fs/nfsd/state.h | 2 | ||||
-rw-r--r-- | include/linux/sunrpc/svc.h | 2 | ||||
-rw-r--r-- | include/linux/sunrpc/svc_rdma.h | 13 | ||||
-rw-r--r-- | net/sunrpc/svc.c | 4 | ||||
-rw-r--r-- | net/sunrpc/svc_xprt.c | 3 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_marshal.c | 16 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 244 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_sendto.c | 46 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_transport.c | 47 |
13 files changed, 264 insertions, 200 deletions
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 56598742dde4..5581e020644b 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -57,8 +57,8 @@ static DEFINE_SPINLOCK(nlm_blocked_lock); static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie) { /* - * We can get away with a static buffer because we're only - * called with BKL held. + * We can get away with a static buffer because this is only called + * from lockd, which is single-threaded. */ static char buf[2*NLM_MAXCOOKIELEN+1]; unsigned int i, len = sizeof(buf); diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 9340e7e10ef6..5b651daad518 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c @@ -95,14 +95,6 @@ nlm_decode_fh(__be32 *p, struct nfs_fh *f) return p + XDR_QUADLEN(NFS2_FHSIZE); } -static inline __be32 * -nlm_encode_fh(__be32 *p, struct nfs_fh *f) -{ - *p++ = htonl(NFS2_FHSIZE); - memcpy(p, f->data, NFS2_FHSIZE); - return p + XDR_QUADLEN(NFS2_FHSIZE); -} - /* * Encode and decode owner handle */ diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 532a60cca2fb..370a53a5da13 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -150,16 +150,6 @@ renew_client_locked(struct nfs4_client *clp) clp->cl_time = get_seconds(); } -static inline void -renew_client(struct nfs4_client *clp) -{ - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - - spin_lock(&nn->client_lock); - renew_client_locked(clp); - spin_unlock(&nn->client_lock); -} - static void put_client_renew_locked(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); @@ -688,7 +678,7 @@ static void nfs4_put_deleg_lease(struct nfs4_file *fp) struct file *filp = NULL; spin_lock(&fp->fi_lock); - if (fp->fi_deleg_file && atomic_dec_and_test(&fp->fi_delegees)) + if (fp->fi_deleg_file && --fp->fi_delegees == 0) swap(filp, fp->fi_deleg_file); spin_unlock(&fp->fi_lock); @@ -1518,7 +1508,12 @@ unhash_session(struct nfsd4_session *ses) static int STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn) { - if (clid->cl_boot == nn->boot_time) + /* + * We're assuming the clid was not given out from a boot + * precisely 2^32 (about 136 years) before this one. That seems + * a safe assumption: + */ + if (clid->cl_boot == (u32)nn->boot_time) return 0; dprintk("NFSD stale clientid (%08x/%08x) boot_time %08lx\n", clid->cl_boot, clid->cl_id, nn->boot_time); @@ -3856,12 +3851,12 @@ static int nfs4_setlease(struct nfs4_delegation *dp) /* Race breaker */ if (fp->fi_deleg_file) { status = 0; - atomic_inc(&fp->fi_delegees); + ++fp->fi_delegees; hash_delegation_locked(dp, fp); goto out_unlock; } fp->fi_deleg_file = filp; - atomic_set(&fp->fi_delegees, 1); + fp->fi_delegees = 1; hash_delegation_locked(dp, fp); spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); @@ -3902,7 +3897,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, status = -EAGAIN; goto out_unlock; } - atomic_inc(&fp->fi_delegees); + ++fp->fi_delegees; hash_delegation_locked(dp, fp); status = 0; out_unlock: diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 15f7b73e0c0f..974533e5a427 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -234,6 +234,26 @@ static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes) return ret; } +/* + * We require the high 32 bits of 'seconds' to be 0, and + * we ignore all 32 bits of 'nseconds'. + */ +static __be32 +nfsd4_decode_time(struct nfsd4_compoundargs *argp, struct timespec *tv) +{ + DECODE_HEAD; + u64 sec; + + READ_BUF(12); + p = xdr_decode_hyper(p, &sec); + tv->tv_sec = sec; + tv->tv_nsec = be32_to_cpup(p++); + if (tv->tv_nsec >= (u32)1000000000) + return nfserr_inval; + + DECODE_TAIL; +} + static __be32 nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) { @@ -267,7 +287,6 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, { int expected_len, len = 0; u32 dummy32; - u64 sec; char *buf; DECODE_HEAD; @@ -358,15 +377,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, dummy32 = be32_to_cpup(p++); switch (dummy32) { case NFS4_SET_TO_CLIENT_TIME: - /* We require the high 32 bits of 'seconds' to be 0, and we ignore - all 32 bits of 'nseconds'. */ - READ_BUF(12); len += 12; - p = xdr_decode_hyper(p, &sec); - iattr->ia_atime.tv_sec = (time_t)sec; - iattr->ia_atime.tv_nsec = be32_to_cpup(p++); - if (iattr->ia_atime.tv_nsec >= (u32)1000000000) - return nfserr_inval; + status = nfsd4_decode_time(argp, &iattr->ia_atime); + if (status) + return status; iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET); break; case NFS4_SET_TO_SERVER_TIME: @@ -382,15 +396,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, dummy32 = be32_to_cpup(p++); switch (dummy32) { case NFS4_SET_TO_CLIENT_TIME: - /* We require the high 32 bits of 'seconds' to be 0, and we ignore - all 32 bits of 'nseconds'. */ - READ_BUF(12); len += 12; - p = xdr_decode_hyper(p, &sec); - iattr->ia_mtime.tv_sec = sec; - iattr->ia_mtime.tv_nsec = be32_to_cpup(p++); - if (iattr->ia_mtime.tv_nsec >= (u32)1000000000) - return nfserr_inval; + status = nfsd4_decode_time(argp, &iattr->ia_mtime); + if (status) + return status; iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET); break; case NFS4_SET_TO_SERVER_TIME: @@ -2768,16 +2777,17 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, if (entry_bytes > cd->rd_maxcount) goto fail; cd->rd_maxcount -= entry_bytes; - if (!cd->rd_dircount) - goto fail; /* * RFC 3530 14.2.24 describes rd_dircount as only a "hint", so * let's always let through the first entry, at least: */ - name_and_cookie = 4 * XDR_QUADLEN(namlen) + 8; + if (!cd->rd_dircount) + goto fail; + name_and_cookie = 4 + 4 * XDR_QUADLEN(namlen) + 8; if (name_and_cookie > cd->rd_dircount && cd->cookie_offset) goto fail; cd->rd_dircount -= min(cd->rd_dircount, name_and_cookie); + cd->cookie_offset = cookie_offset; skip_entry: cd->common.err = nfs_ok; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 9d3be371240a..dab6553ceea1 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -493,7 +493,7 @@ struct nfs4_file { atomic_t fi_access[2]; u32 fi_share_deny; struct file *fi_deleg_file; - atomic_t fi_delegees; + int fi_delegees; struct knfsd_fh fi_fhandle; bool fi_had_conflict; }; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 6f22cfeef5e3..fae6fb947fc8 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -110,7 +110,7 @@ struct svc_serv { * We use sv_nrthreads as a reference count. svc_destroy() drops * this refcount, so we need to bump it up around operations that * change the number of threads. Horrible, but there it is. - * Should be called with the BKL held. + * Should be called with the "service mutex" held. */ static inline void svc_get(struct svc_serv *serv) { diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 975da754c778..c343a94bc791 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -79,6 +79,7 @@ struct svc_rdma_op_ctxt { enum ib_wr_opcode wr_op; enum ib_wc_status wc_status; u32 byte_len; + u32 position; struct svcxprt_rdma *xprt; unsigned long flags; enum dma_data_direction direction; @@ -150,6 +151,10 @@ struct svcxprt_rdma { struct ib_cq *sc_rq_cq; struct ib_cq *sc_sq_cq; struct ib_mr *sc_phys_mr; /* MR for server memory */ + int (*sc_reader)(struct svcxprt_rdma *, + struct svc_rqst *, + struct svc_rdma_op_ctxt *, + int *, u32 *, u32, u32, u64, bool); u32 sc_dev_caps; /* distilled device caps */ u32 sc_dma_lkey; /* local dma key */ unsigned int sc_frmr_pg_list_len; @@ -178,8 +183,6 @@ struct svcxprt_rdma { #define RPCRDMA_MAX_REQ_SIZE 4096 /* svc_rdma_marshal.c */ -extern void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *, - int *, int *); extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *); extern int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *); extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *, @@ -197,6 +200,12 @@ extern int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *); /* svc_rdma_recvfrom.c */ extern int svc_rdma_recvfrom(struct svc_rqst *); +extern int rdma_read_chunk_lcl(struct svcxprt_rdma *, struct svc_rqst *, + struct svc_rdma_op_ctxt *, int *, u32 *, + u32, u32, u64, bool); +extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *, + struct svc_rdma_op_ctxt *, int *, u32 *, + u32, u32, u64, bool); /* svc_rdma_sendto.c */ extern int svc_rdma_sendto(struct svc_rqst *); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 91eaef1844c8..78974e4d9ad2 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -768,8 +768,8 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) EXPORT_SYMBOL_GPL(svc_set_num_threads); /* - * Called from a server thread as it's exiting. Caller must hold the BKL or - * the "service mutex", whichever is appropriate for the service. + * Called from a server thread as it's exiting. Caller must hold the "service + * mutex" for the service. */ void svc_exit_thread(struct svc_rqst *rqstp) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index c69358b3cf7f..163ac45c3639 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -42,7 +42,7 @@ static LIST_HEAD(svc_xprt_class_list); * svc_pool->sp_lock protects most of the fields of that pool. * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt. * when both need to be taken (rare), svc_serv->sv_lock is first. - * BKL protects svc_serv->sv_nrthread. + * The "service mutex" protects svc_serv->sv_nrthread. * svc_sock->sk_lock protects the svc_sock->sk_deferred list * and the ->sk_info_authunix cache. * @@ -67,7 +67,6 @@ static LIST_HEAD(svc_xprt_class_list); * that no other thread will be using the transport or will * try to set XPT_DEAD. */ - int svc_reg_xprt_class(struct svc_xprt_class *xcl) { struct svc_xprt_class *cl; diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c index 65b146297f5a..b681855cf970 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c +++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c @@ -71,22 +71,6 @@ static u32 *decode_read_list(u32 *va, u32 *vaend) } /* - * Determine number of chunks and total bytes in chunk list. The chunk - * list has already been verified to fit within the RPCRDMA header. - */ -void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *ch, - int *ch_count, int *byte_count) -{ - /* compute the number of bytes represented by read chunks */ - *byte_count = 0; - *ch_count = 0; - for (; ch->rc_discrim != 0; ch++) { - *byte_count = *byte_count + ntohl(ch->rc_target.rs_length); - *ch_count = *ch_count + 1; - } -} - -/* * Decodes a write chunk list. The expected format is as follows: * descrim : xdr_one * nchunks : <count> diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index e0110270d650..f9f13a32ddb8 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -43,7 +43,6 @@ #include <linux/sunrpc/debug.h> #include <linux/sunrpc/rpc_rdma.h> #include <linux/spinlock.h> -#include <linux/highmem.h> #include <asm/unaligned.h> #include <rdma/ib_verbs.h> #include <rdma/rdma_cm.h> @@ -60,6 +59,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *ctxt, u32 byte_count) { + struct rpcrdma_msg *rmsgp; struct page *page; u32 bc; int sge_no; @@ -82,7 +82,14 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, /* If data remains, store it in the pagelist */ rqstp->rq_arg.page_len = bc; rqstp->rq_arg.page_base = 0; - rqstp->rq_arg.pages = &rqstp->rq_pages[1]; + + /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */ + rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; + if (be32_to_cpu(rmsgp->rm_type) == RDMA_NOMSG) + rqstp->rq_arg.pages = &rqstp->rq_pages[0]; + else + rqstp->rq_arg.pages = &rqstp->rq_pages[1]; + sge_no = 1; while (bc && sge_no < ctxt->count) { page = ctxt->pages[sge_no]; @@ -95,14 +102,6 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, rqstp->rq_respages = &rqstp->rq_pages[sge_no]; rqstp->rq_next_page = rqstp->rq_respages + 1; - /* We should never run out of SGE because the limit is defined to - * support the max allowed RPC data length - */ - BUG_ON(bc && (sge_no == ctxt->count)); - BUG_ON((rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len) - != byte_count); - BUG_ON(rqstp->rq_arg.len != byte_count); - /* If not all pages were used from the SGL, free the remaining ones */ bc = sge_no; while (sge_no < ctxt->count) { @@ -125,26 +124,16 @@ static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) return min_t(int, sge_count, xprt->sc_max_sge); } -typedef int (*rdma_reader_fn)(struct svcxprt_rdma *xprt, - struct svc_rqst *rqstp, - struct svc_rdma_op_ctxt *head, - int *page_no, - u32 *page_offset, - u32 rs_handle, - u32 rs_length, - u64 rs_offset, - int last); - /* Issue an RDMA_READ using the local lkey to map the data sink */ -static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, - struct svc_rqst *rqstp, - struct svc_rdma_op_ctxt *head, - int *page_no, - u32 *page_offset, - u32 rs_handle, - u32 rs_length, - u64 rs_offset, - int last) +int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, + struct svc_rqst *rqstp, + struct svc_rdma_op_ctxt *head, + int *page_no, + u32 *page_offset, + u32 rs_handle, + u32 rs_length, + u64 rs_offset, + bool last) { struct ib_send_wr read_wr; int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; @@ -229,15 +218,15 @@ static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, } /* Issue an RDMA_READ using an FRMR to map the data sink */ -static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, - struct svc_rqst *rqstp, - struct svc_rdma_op_ctxt *head, - int *page_no, - u32 *page_offset, - u32 rs_handle, - u32 rs_length, - u64 rs_offset, - int last) +int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, + struct svc_rqst *rqstp, + struct svc_rdma_op_ctxt *head, + int *page_no, + u32 *page_offset, + u32 rs_handle, + u32 rs_length, + u64 rs_offset, + bool last) { struct ib_send_wr read_wr; struct ib_send_wr inv_wr; @@ -365,24 +354,84 @@ static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, return ret; } +static unsigned int +rdma_rcl_chunk_count(struct rpcrdma_read_chunk *ch) +{ + unsigned int count; + + for (count = 0; ch->rc_discrim != xdr_zero; ch++) + count++; + return count; +} + +/* If there was additional inline content, append it to the end of arg.pages. + * Tail copy has to be done after the reader function has determined how many + * pages are needed for RDMA READ. + */ +static int +rdma_copy_tail(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head, + u32 position, u32 byte_count, u32 page_offset, int page_no) +{ + char *srcp, *destp; + int ret; + + ret = 0; + srcp = head->arg.head[0].iov_base + position; + byte_count = head->arg.head[0].iov_len - position; + if (byte_count > PAGE_SIZE) { + dprintk("svcrdma: large tail unsupported\n"); + return 0; + } + + /* Fit as much of the tail on the current page as possible */ + if (page_offset != PAGE_SIZE) { + destp = page_address(rqstp->rq_arg.pages[page_no]); + destp += page_offset; + while (byte_count--) { + *destp++ = *srcp++; + page_offset++; + if (page_offset == PAGE_SIZE && byte_count) + goto more; + } + goto done; + } + +more: + /* Fit the rest on the next page */ + page_no++; + destp = page_address(rqstp->rq_arg.pages[page_no]); + while (byte_count--) + *destp++ = *srcp++; + + rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1]; + rqstp->rq_next_page = rqstp->rq_respages + 1; + +done: + byte_count = head->arg.head[0].iov_len - position; + head->arg.page_len += byte_count; + head->arg.len += byte_count; + head->arg.buflen += byte_count; + return 1; +} + static int rdma_read_chunks(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head) { - int page_no, ch_count, ret; + int page_no, ret; struct rpcrdma_read_chunk *ch; - u32 page_offset, byte_count; + u32 handle, page_offset, byte_count; + u32 position; u64 rs_offset; - rdma_reader_fn reader; + bool last; /* If no read list is present, return 0 */ ch = svc_rdma_get_read_chunk(rmsgp); if (!ch) return 0; - svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); - if (ch_count > RPCSVC_MAXPAGES) + if (rdma_rcl_chunk_count(ch) > RPCSVC_MAXPAGES) return -EINVAL; /* The request is completed when the RDMA_READs complete. The @@ -391,34 +440,41 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt, */ head->arg.head[0] = rqstp->rq_arg.head[0]; head->arg.tail[0] = rqstp->rq_arg.tail[0]; - head->arg.pages = &head->pages[head->count]; head->hdr_count = head->count; head->arg.page_base = 0; head->arg.page_len = 0; head->arg.len = rqstp->rq_arg.len; head->arg.buflen = rqstp->rq_arg.buflen; - /* Use FRMR if supported */ - if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG) - reader = rdma_read_chunk_frmr; - else - reader = rdma_read_chunk_lcl; + ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; + position = be32_to_cpu(ch->rc_position); + + /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */ + if (position == 0) { + head->arg.pages = &head->pages[0]; + page_offset = head->byte_len; + } else { + head->arg.pages = &head->pages[head->count]; + page_offset = 0; + } - page_no = 0; page_offset = 0; - for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; - ch->rc_discrim != 0; ch++) { + ret = 0; + page_no = 0; + for (; ch->rc_discrim != xdr_zero; ch++) { + if (be32_to_cpu(ch->rc_position) != position) + goto err; + handle = be32_to_cpu(ch->rc_target.rs_handle), + byte_count = be32_to_cpu(ch->rc_target.rs_length); xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset, &rs_offset); - byte_count = ntohl(ch->rc_target.rs_length); while (byte_count > 0) { - ret = reader(xprt, rqstp, head, - &page_no, &page_offset, - ntohl(ch->rc_target.rs_handle), - byte_count, rs_offset, - ((ch+1)->rc_discrim == 0) /* last */ - ); + last = (ch + 1)->rc_discrim == xdr_zero; + ret = xprt->sc_reader(xprt, rqstp, head, + &page_no, &page_offset, + handle, byte_count, + rs_offset, last); if (ret < 0) goto err; byte_count -= ret; @@ -426,7 +482,24 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt, head->arg.buflen += ret; } } + + /* Read list may need XDR round-up (see RFC 5666, s. 3.7) */ + if (page_offset & 3) { + u32 pad = 4 - (page_offset & 3); + + head->arg.page_len += pad; + head->arg.len += pad; + head->arg.buflen += pad; + page_offset += pad; + } + ret = 1; + if (position && position < head->arg.head[0].iov_len) + ret = rdma_copy_tail(rqstp, head, position, + byte_count, page_offset, page_no); + head->arg.head[0].iov_len = position; + head->position = position; + err: /* Detach arg pages. svc_recv will replenish them */ for (page_no = 0; @@ -436,47 +509,33 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt, return ret; } -/* - * To avoid a separate RDMA READ just for a handful of zero bytes, - * RFC 5666 section 3.7 allows the client to omit the XDR zero pad - * in chunk lists. - */ -static void -rdma_fix_xdr_pad(struct xdr_buf *buf) -{ - unsigned int page_len = buf->page_len; - unsigned int size = (XDR_QUADLEN(page_len) << 2) - page_len; - unsigned int offset, pg_no; - char *p; - - if (size == 0) - return; - - pg_no = page_len >> PAGE_SHIFT; - offset = page_len & ~PAGE_MASK; - p = page_address(buf->pages[pg_no]); - memset(p + offset, 0, size); - - buf->page_len += size; - buf->buflen += size; - buf->len += size; -} - static int rdma_read_complete(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head) { int page_no; int ret; - BUG_ON(!head); - /* Copy RPC pages */ for (page_no = 0; page_no < head->count; page_no++) { put_page(rqstp->rq_pages[page_no]); rqstp->rq_pages[page_no] = head->pages[page_no]; } + + /* Adjustments made for RDMA_NOMSG type requests */ + if (head->position == 0) { + if (head->arg.len <= head->sge[0].length) { + head->arg.head[0].iov_len = head->arg.len - + head->byte_len; + head->arg.page_len = 0; + } else { + head->arg.head[0].iov_len = head->sge[0].length - + head->byte_len; + head->arg.page_len = head->arg.len - + head->sge[0].length; + } + } + /* Point rq_arg.pages past header */ - rdma_fix_xdr_pad(&head->arg); rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count]; rqstp->rq_arg.page_len = head->arg.page_len; rqstp->rq_arg.page_base = head->arg.page_base; @@ -501,8 +560,8 @@ static int rdma_read_complete(struct svc_rqst *rqstp, ret = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len + rqstp->rq_arg.tail[0].iov_len; - dprintk("svcrdma: deferred read ret=%d, rq_arg.len =%d, " - "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n", + dprintk("svcrdma: deferred read ret=%d, rq_arg.len=%u, " + "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len=%zu\n", ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base, rqstp->rq_arg.head[0].iov_len); @@ -558,7 +617,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) } dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n", ctxt, rdma_xprt, rqstp, ctxt->wc_status); - BUG_ON(ctxt->wc_status != IB_WC_SUCCESS); atomic_inc(&rdma_stat_recv); /* Build up the XDR from the receive buffers. */ @@ -591,8 +649,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) + rqstp->rq_arg.tail[0].iov_len; svc_rdma_put_context(ctxt, 0); out: - dprintk("svcrdma: ret = %d, rq_arg.len =%d, " - "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n", + dprintk("svcrdma: ret=%d, rq_arg.len=%u, " + "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len=%zd\n", ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base, rqstp->rq_arg.head[0].iov_len); diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 9f1b50689c0f..7de33d1af9b6 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -60,8 +60,11 @@ static int map_xdr(struct svcxprt_rdma *xprt, u32 page_off; int page_no; - BUG_ON(xdr->len != - (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)); + if (xdr->len != + (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)) { + pr_err("svcrdma: map_xdr: XDR buffer length error\n"); + return -EIO; + } /* Skip the first sge, this is for the RPCRDMA header */ sge_no = 1; @@ -150,7 +153,11 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, int bc; struct svc_rdma_op_ctxt *ctxt; - BUG_ON(vec->count > RPCSVC_MAXPAGES); + if (vec->count > RPCSVC_MAXPAGES) { + pr_err("svcrdma: Too many pages (%lu)\n", vec->count); + return -EIO; + } + dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, " "write_len=%d, vec->sge=%p, vec->count=%lu\n", rmr, (unsigned long long)to, xdr_off, @@ -190,7 +197,10 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, sge_off = 0; sge_no++; xdr_sge_no++; - BUG_ON(xdr_sge_no > vec->count); + if (xdr_sge_no > vec->count) { + pr_err("svcrdma: Too many sges (%d)\n", xdr_sge_no); + goto err; + } bc -= sge_bytes; if (sge_no == xprt->sc_max_sge) break; @@ -421,7 +431,10 @@ static int send_reply(struct svcxprt_rdma *rdma, ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey; ctxt->sge[sge_no].length = sge_bytes; } - BUG_ON(byte_count != 0); + if (byte_count != 0) { + pr_err("svcrdma: Could not map %d bytes\n", byte_count); + goto err; + } /* Save all respages in the ctxt and remove them from the * respages array. They are our pages until the I/O @@ -442,7 +455,10 @@ static int send_reply(struct svcxprt_rdma *rdma, } rqstp->rq_next_page = rqstp->rq_respages + 1; - BUG_ON(sge_no > rdma->sc_max_sge); + if (sge_no > rdma->sc_max_sge) { + pr_err("svcrdma: Too many sges (%d)\n", sge_no); + goto err; + } memset(&send_wr, 0, sizeof send_wr); ctxt->wr_op = IB_WR_SEND; send_wr.wr_id = (unsigned long)ctxt; @@ -467,18 +483,6 @@ void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp) { } -/* - * Return the start of an xdr buffer. - */ -static void *xdr_start(struct xdr_buf *xdr) -{ - return xdr->head[0].iov_base - - (xdr->len - - xdr->page_len - - xdr->tail[0].iov_len - - xdr->head[0].iov_len); -} - int svc_rdma_sendto(struct svc_rqst *rqstp) { struct svc_xprt *xprt = rqstp->rq_xprt; @@ -496,8 +500,10 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) dprintk("svcrdma: sending response for rqstp=%p\n", rqstp); - /* Get the RDMA request header. */ - rdma_argp = xdr_start(&rqstp->rq_arg); + /* Get the RDMA request header. The receive logic always + * places this at the start of page 0. + */ + rdma_argp = page_address(rqstp->rq_pages[0]); /* Build an req vec for the XDR */ ctxt = svc_rdma_get_context(rdma); diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 4e618808bc98..f609c1c2d38d 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -139,7 +139,6 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) struct svcxprt_rdma *xprt; int i; - BUG_ON(!ctxt); xprt = ctxt->xprt; if (free_pages) for (i = 0; i < ctxt->count; i++) @@ -339,12 +338,14 @@ static void process_context(struct svcxprt_rdma *xprt, switch (ctxt->wr_op) { case IB_WR_SEND: - BUG_ON(ctxt->frmr); + if (ctxt->frmr) + pr_err("svcrdma: SEND: ctxt->frmr != NULL\n"); svc_rdma_put_context(ctxt, 1); break; case IB_WR_RDMA_WRITE: - BUG_ON(ctxt->frmr); + if (ctxt->frmr) + pr_err("svcrdma: WRITE: ctxt->frmr != NULL\n"); svc_rdma_put_context(ctxt, 0); break; @@ -353,19 +354,21 @@ static void process_context(struct svcxprt_rdma *xprt, svc_rdma_put_frmr(xprt, ctxt->frmr); if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; - BUG_ON(!read_hdr); - spin_lock_bh(&xprt->sc_rq_dto_lock); - set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); - list_add_tail(&read_hdr->dto_q, - &xprt->sc_read_complete_q); - spin_unlock_bh(&xprt->sc_rq_dto_lock); + if (read_hdr) { + spin_lock_bh(&xprt->sc_rq_dto_lock); + set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); + list_add_tail(&read_hdr->dto_q, + &xprt->sc_read_complete_q); + spin_unlock_bh(&xprt->sc_rq_dto_lock); + } else { + pr_err("svcrdma: ctxt->read_hdr == NULL\n"); + } svc_xprt_enqueue(&xprt->sc_xprt); } svc_rdma_put_context(ctxt, 0); break; default: - BUG_ON(1); printk(KERN_ERR "svcrdma: unexpected completion type, " "opcode=%d\n", ctxt->wr_op); @@ -513,7 +516,10 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) buflen = 0; ctxt->direction = DMA_FROM_DEVICE; for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) { - BUG_ON(sge_no >= xprt->sc_max_sge); + if (sge_no >= xprt->sc_max_sge) { + pr_err("svcrdma: Too many sges (%d)\n", sge_no); + goto err_put_ctxt; + } page = svc_rdma_get_page(); ctxt->pages[sge_no] = page; pa = ib_dma_map_page(xprt->sc_cm_id->device, @@ -687,7 +693,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, { struct rdma_cm_id *listen_id; struct svcxprt_rdma *cma_xprt; - struct svc_xprt *xprt; int ret; dprintk("svcrdma: Creating RDMA socket\n"); @@ -698,7 +703,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, cma_xprt = rdma_create_xprt(serv, 1); if (!cma_xprt) return ERR_PTR(-ENOMEM); - xprt = &cma_xprt->sc_xprt; listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP, IB_QPT_RC); @@ -822,7 +826,7 @@ void svc_rdma_put_frmr(struct svcxprt_rdma *rdma, if (frmr) { frmr_unmap_dma(rdma, frmr); spin_lock_bh(&rdma->sc_frmr_q_lock); - BUG_ON(!list_empty(&frmr->frmr_list)); + WARN_ON_ONCE(!list_empty(&frmr->frmr_list)); list_add(&frmr->frmr_list, &rdma->sc_frmr_q); spin_unlock_bh(&rdma->sc_frmr_q_lock); } @@ -970,10 +974,12 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) * NB: iWARP requires remote write access for the data sink * of an RDMA_READ. IB does not. */ + newxprt->sc_reader = rdma_read_chunk_lcl; if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { newxprt->sc_frmr_pg_list_len = devattr.max_fast_reg_page_list_len; newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG; + newxprt->sc_reader = rdma_read_chunk_frmr; } /* @@ -1125,7 +1131,9 @@ static void __svc_rdma_free(struct work_struct *work) dprintk("svcrdma: svc_rdma_free(%p)\n", rdma); /* We should only be called from kref_put */ - BUG_ON(atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0); + if (atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0) + pr_err("svcrdma: sc_xprt still in use? (%d)\n", + atomic_read(&rdma->sc_xprt.xpt_ref.refcount)); /* * Destroy queued, but not processed read completions. Note @@ -1153,8 +1161,12 @@ static void __svc_rdma_free(struct work_struct *work) } /* Warn if we leaked a resource or under-referenced */ - WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0); - WARN_ON(atomic_read(&rdma->sc_dma_used) != 0); + if (atomic_read(&rdma->sc_ctxt_used) != 0) + pr_err("svcrdma: ctxt still in use? (%d)\n", + atomic_read(&rdma->sc_ctxt_used)); + if (atomic_read(&rdma->sc_dma_used) != 0) + pr_err("svcrdma: dma still in use? (%d)\n", + atomic_read(&rdma->sc_dma_used)); /* De-allocate fastreg mr */ rdma_dealloc_frmr_q(rdma); @@ -1254,7 +1266,6 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) return -ENOTCONN; - BUG_ON(wr->send_flags != IB_SEND_SIGNALED); wr_count = 1; for (n_wr = wr->next; n_wr; n_wr = n_wr->next) wr_count++; |