summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/lockd/svclock.c4
-rw-r--r--fs/lockd/xdr.c8
-rw-r--r--fs/nfsd/nfs4state.c25
-rw-r--r--fs/nfsd/nfs4xdr.c50
-rw-r--r--fs/nfsd/state.h2
-rw-r--r--include/linux/sunrpc/svc.h2
-rw-r--r--include/linux/sunrpc/svc_rdma.h13
-rw-r--r--net/sunrpc/svc.c4
-rw-r--r--net/sunrpc/svc_xprt.c3
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_marshal.c16
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c244
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c46
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c47
13 files changed, 264 insertions, 200 deletions
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 56598742dde4..5581e020644b 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -57,8 +57,8 @@ static DEFINE_SPINLOCK(nlm_blocked_lock);
static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie)
{
/*
- * We can get away with a static buffer because we're only
- * called with BKL held.
+ * We can get away with a static buffer because this is only called
+ * from lockd, which is single-threaded.
*/
static char buf[2*NLM_MAXCOOKIELEN+1];
unsigned int i, len = sizeof(buf);
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 9340e7e10ef6..5b651daad518 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -95,14 +95,6 @@ nlm_decode_fh(__be32 *p, struct nfs_fh *f)
return p + XDR_QUADLEN(NFS2_FHSIZE);
}
-static inline __be32 *
-nlm_encode_fh(__be32 *p, struct nfs_fh *f)
-{
- *p++ = htonl(NFS2_FHSIZE);
- memcpy(p, f->data, NFS2_FHSIZE);
- return p + XDR_QUADLEN(NFS2_FHSIZE);
-}
-
/*
* Encode and decode owner handle
*/
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 532a60cca2fb..370a53a5da13 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -150,16 +150,6 @@ renew_client_locked(struct nfs4_client *clp)
clp->cl_time = get_seconds();
}
-static inline void
-renew_client(struct nfs4_client *clp)
-{
- struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
-
- spin_lock(&nn->client_lock);
- renew_client_locked(clp);
- spin_unlock(&nn->client_lock);
-}
-
static void put_client_renew_locked(struct nfs4_client *clp)
{
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
@@ -688,7 +678,7 @@ static void nfs4_put_deleg_lease(struct nfs4_file *fp)
struct file *filp = NULL;
spin_lock(&fp->fi_lock);
- if (fp->fi_deleg_file && atomic_dec_and_test(&fp->fi_delegees))
+ if (fp->fi_deleg_file && --fp->fi_delegees == 0)
swap(filp, fp->fi_deleg_file);
spin_unlock(&fp->fi_lock);
@@ -1518,7 +1508,12 @@ unhash_session(struct nfsd4_session *ses)
static int
STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn)
{
- if (clid->cl_boot == nn->boot_time)
+ /*
+ * We're assuming the clid was not given out from a boot
+ * precisely 2^32 (about 136 years) before this one. That seems
+ * a safe assumption:
+ */
+ if (clid->cl_boot == (u32)nn->boot_time)
return 0;
dprintk("NFSD stale clientid (%08x/%08x) boot_time %08lx\n",
clid->cl_boot, clid->cl_id, nn->boot_time);
@@ -3856,12 +3851,12 @@ static int nfs4_setlease(struct nfs4_delegation *dp)
/* Race breaker */
if (fp->fi_deleg_file) {
status = 0;
- atomic_inc(&fp->fi_delegees);
+ ++fp->fi_delegees;
hash_delegation_locked(dp, fp);
goto out_unlock;
}
fp->fi_deleg_file = filp;
- atomic_set(&fp->fi_delegees, 1);
+ fp->fi_delegees = 1;
hash_delegation_locked(dp, fp);
spin_unlock(&fp->fi_lock);
spin_unlock(&state_lock);
@@ -3902,7 +3897,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
status = -EAGAIN;
goto out_unlock;
}
- atomic_inc(&fp->fi_delegees);
+ ++fp->fi_delegees;
hash_delegation_locked(dp, fp);
status = 0;
out_unlock:
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 15f7b73e0c0f..974533e5a427 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -234,6 +234,26 @@ static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes)
return ret;
}
+/*
+ * We require the high 32 bits of 'seconds' to be 0, and
+ * we ignore all 32 bits of 'nseconds'.
+ */
+static __be32
+nfsd4_decode_time(struct nfsd4_compoundargs *argp, struct timespec *tv)
+{
+ DECODE_HEAD;
+ u64 sec;
+
+ READ_BUF(12);
+ p = xdr_decode_hyper(p, &sec);
+ tv->tv_sec = sec;
+ tv->tv_nsec = be32_to_cpup(p++);
+ if (tv->tv_nsec >= (u32)1000000000)
+ return nfserr_inval;
+
+ DECODE_TAIL;
+}
+
static __be32
nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval)
{
@@ -267,7 +287,6 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
{
int expected_len, len = 0;
u32 dummy32;
- u64 sec;
char *buf;
DECODE_HEAD;
@@ -358,15 +377,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
dummy32 = be32_to_cpup(p++);
switch (dummy32) {
case NFS4_SET_TO_CLIENT_TIME:
- /* We require the high 32 bits of 'seconds' to be 0, and we ignore
- all 32 bits of 'nseconds'. */
- READ_BUF(12);
len += 12;
- p = xdr_decode_hyper(p, &sec);
- iattr->ia_atime.tv_sec = (time_t)sec;
- iattr->ia_atime.tv_nsec = be32_to_cpup(p++);
- if (iattr->ia_atime.tv_nsec >= (u32)1000000000)
- return nfserr_inval;
+ status = nfsd4_decode_time(argp, &iattr->ia_atime);
+ if (status)
+ return status;
iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET);
break;
case NFS4_SET_TO_SERVER_TIME:
@@ -382,15 +396,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
dummy32 = be32_to_cpup(p++);
switch (dummy32) {
case NFS4_SET_TO_CLIENT_TIME:
- /* We require the high 32 bits of 'seconds' to be 0, and we ignore
- all 32 bits of 'nseconds'. */
- READ_BUF(12);
len += 12;
- p = xdr_decode_hyper(p, &sec);
- iattr->ia_mtime.tv_sec = sec;
- iattr->ia_mtime.tv_nsec = be32_to_cpup(p++);
- if (iattr->ia_mtime.tv_nsec >= (u32)1000000000)
- return nfserr_inval;
+ status = nfsd4_decode_time(argp, &iattr->ia_mtime);
+ if (status)
+ return status;
iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET);
break;
case NFS4_SET_TO_SERVER_TIME:
@@ -2768,16 +2777,17 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
if (entry_bytes > cd->rd_maxcount)
goto fail;
cd->rd_maxcount -= entry_bytes;
- if (!cd->rd_dircount)
- goto fail;
/*
* RFC 3530 14.2.24 describes rd_dircount as only a "hint", so
* let's always let through the first entry, at least:
*/
- name_and_cookie = 4 * XDR_QUADLEN(namlen) + 8;
+ if (!cd->rd_dircount)
+ goto fail;
+ name_and_cookie = 4 + 4 * XDR_QUADLEN(namlen) + 8;
if (name_and_cookie > cd->rd_dircount && cd->cookie_offset)
goto fail;
cd->rd_dircount -= min(cd->rd_dircount, name_and_cookie);
+
cd->cookie_offset = cookie_offset;
skip_entry:
cd->common.err = nfs_ok;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 9d3be371240a..dab6553ceea1 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -493,7 +493,7 @@ struct nfs4_file {
atomic_t fi_access[2];
u32 fi_share_deny;
struct file *fi_deleg_file;
- atomic_t fi_delegees;
+ int fi_delegees;
struct knfsd_fh fi_fhandle;
bool fi_had_conflict;
};
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 6f22cfeef5e3..fae6fb947fc8 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -110,7 +110,7 @@ struct svc_serv {
* We use sv_nrthreads as a reference count. svc_destroy() drops
* this refcount, so we need to bump it up around operations that
* change the number of threads. Horrible, but there it is.
- * Should be called with the BKL held.
+ * Should be called with the "service mutex" held.
*/
static inline void svc_get(struct svc_serv *serv)
{
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 975da754c778..c343a94bc791 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -79,6 +79,7 @@ struct svc_rdma_op_ctxt {
enum ib_wr_opcode wr_op;
enum ib_wc_status wc_status;
u32 byte_len;
+ u32 position;
struct svcxprt_rdma *xprt;
unsigned long flags;
enum dma_data_direction direction;
@@ -150,6 +151,10 @@ struct svcxprt_rdma {
struct ib_cq *sc_rq_cq;
struct ib_cq *sc_sq_cq;
struct ib_mr *sc_phys_mr; /* MR for server memory */
+ int (*sc_reader)(struct svcxprt_rdma *,
+ struct svc_rqst *,
+ struct svc_rdma_op_ctxt *,
+ int *, u32 *, u32, u32, u64, bool);
u32 sc_dev_caps; /* distilled device caps */
u32 sc_dma_lkey; /* local dma key */
unsigned int sc_frmr_pg_list_len;
@@ -178,8 +183,6 @@ struct svcxprt_rdma {
#define RPCRDMA_MAX_REQ_SIZE 4096
/* svc_rdma_marshal.c */
-extern void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *,
- int *, int *);
extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *);
extern int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *);
extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *,
@@ -197,6 +200,12 @@ extern int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *);
/* svc_rdma_recvfrom.c */
extern int svc_rdma_recvfrom(struct svc_rqst *);
+extern int rdma_read_chunk_lcl(struct svcxprt_rdma *, struct svc_rqst *,
+ struct svc_rdma_op_ctxt *, int *, u32 *,
+ u32, u32, u64, bool);
+extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *,
+ struct svc_rdma_op_ctxt *, int *, u32 *,
+ u32, u32, u64, bool);
/* svc_rdma_sendto.c */
extern int svc_rdma_sendto(struct svc_rqst *);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 91eaef1844c8..78974e4d9ad2 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -768,8 +768,8 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
EXPORT_SYMBOL_GPL(svc_set_num_threads);
/*
- * Called from a server thread as it's exiting. Caller must hold the BKL or
- * the "service mutex", whichever is appropriate for the service.
+ * Called from a server thread as it's exiting. Caller must hold the "service
+ * mutex" for the service.
*/
void
svc_exit_thread(struct svc_rqst *rqstp)
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index c69358b3cf7f..163ac45c3639 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -42,7 +42,7 @@ static LIST_HEAD(svc_xprt_class_list);
* svc_pool->sp_lock protects most of the fields of that pool.
* svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt.
* when both need to be taken (rare), svc_serv->sv_lock is first.
- * BKL protects svc_serv->sv_nrthread.
+ * The "service mutex" protects svc_serv->sv_nrthread.
* svc_sock->sk_lock protects the svc_sock->sk_deferred list
* and the ->sk_info_authunix cache.
*
@@ -67,7 +67,6 @@ static LIST_HEAD(svc_xprt_class_list);
* that no other thread will be using the transport or will
* try to set XPT_DEAD.
*/
-
int svc_reg_xprt_class(struct svc_xprt_class *xcl)
{
struct svc_xprt_class *cl;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
index 65b146297f5a..b681855cf970 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
@@ -71,22 +71,6 @@ static u32 *decode_read_list(u32 *va, u32 *vaend)
}
/*
- * Determine number of chunks and total bytes in chunk list. The chunk
- * list has already been verified to fit within the RPCRDMA header.
- */
-void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *ch,
- int *ch_count, int *byte_count)
-{
- /* compute the number of bytes represented by read chunks */
- *byte_count = 0;
- *ch_count = 0;
- for (; ch->rc_discrim != 0; ch++) {
- *byte_count = *byte_count + ntohl(ch->rc_target.rs_length);
- *ch_count = *ch_count + 1;
- }
-}
-
-/*
* Decodes a write chunk list. The expected format is as follows:
* descrim : xdr_one
* nchunks : <count>
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index e0110270d650..f9f13a32ddb8 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -43,7 +43,6 @@
#include <linux/sunrpc/debug.h>
#include <linux/sunrpc/rpc_rdma.h>
#include <linux/spinlock.h>
-#include <linux/highmem.h>
#include <asm/unaligned.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
@@ -60,6 +59,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *ctxt,
u32 byte_count)
{
+ struct rpcrdma_msg *rmsgp;
struct page *page;
u32 bc;
int sge_no;
@@ -82,7 +82,14 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
/* If data remains, store it in the pagelist */
rqstp->rq_arg.page_len = bc;
rqstp->rq_arg.page_base = 0;
- rqstp->rq_arg.pages = &rqstp->rq_pages[1];
+
+ /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */
+ rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
+ if (be32_to_cpu(rmsgp->rm_type) == RDMA_NOMSG)
+ rqstp->rq_arg.pages = &rqstp->rq_pages[0];
+ else
+ rqstp->rq_arg.pages = &rqstp->rq_pages[1];
+
sge_no = 1;
while (bc && sge_no < ctxt->count) {
page = ctxt->pages[sge_no];
@@ -95,14 +102,6 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
rqstp->rq_respages = &rqstp->rq_pages[sge_no];
rqstp->rq_next_page = rqstp->rq_respages + 1;
- /* We should never run out of SGE because the limit is defined to
- * support the max allowed RPC data length
- */
- BUG_ON(bc && (sge_no == ctxt->count));
- BUG_ON((rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len)
- != byte_count);
- BUG_ON(rqstp->rq_arg.len != byte_count);
-
/* If not all pages were used from the SGL, free the remaining ones */
bc = sge_no;
while (sge_no < ctxt->count) {
@@ -125,26 +124,16 @@ static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
return min_t(int, sge_count, xprt->sc_max_sge);
}
-typedef int (*rdma_reader_fn)(struct svcxprt_rdma *xprt,
- struct svc_rqst *rqstp,
- struct svc_rdma_op_ctxt *head,
- int *page_no,
- u32 *page_offset,
- u32 rs_handle,
- u32 rs_length,
- u64 rs_offset,
- int last);
-
/* Issue an RDMA_READ using the local lkey to map the data sink */
-static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
- struct svc_rqst *rqstp,
- struct svc_rdma_op_ctxt *head,
- int *page_no,
- u32 *page_offset,
- u32 rs_handle,
- u32 rs_length,
- u64 rs_offset,
- int last)
+int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
+ struct svc_rqst *rqstp,
+ struct svc_rdma_op_ctxt *head,
+ int *page_no,
+ u32 *page_offset,
+ u32 rs_handle,
+ u32 rs_length,
+ u64 rs_offset,
+ bool last)
{
struct ib_send_wr read_wr;
int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
@@ -229,15 +218,15 @@ static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
}
/* Issue an RDMA_READ using an FRMR to map the data sink */
-static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
- struct svc_rqst *rqstp,
- struct svc_rdma_op_ctxt *head,
- int *page_no,
- u32 *page_offset,
- u32 rs_handle,
- u32 rs_length,
- u64 rs_offset,
- int last)
+int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
+ struct svc_rqst *rqstp,
+ struct svc_rdma_op_ctxt *head,
+ int *page_no,
+ u32 *page_offset,
+ u32 rs_handle,
+ u32 rs_length,
+ u64 rs_offset,
+ bool last)
{
struct ib_send_wr read_wr;
struct ib_send_wr inv_wr;
@@ -365,24 +354,84 @@ static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
return ret;
}
+static unsigned int
+rdma_rcl_chunk_count(struct rpcrdma_read_chunk *ch)
+{
+ unsigned int count;
+
+ for (count = 0; ch->rc_discrim != xdr_zero; ch++)
+ count++;
+ return count;
+}
+
+/* If there was additional inline content, append it to the end of arg.pages.
+ * Tail copy has to be done after the reader function has determined how many
+ * pages are needed for RDMA READ.
+ */
+static int
+rdma_copy_tail(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head,
+ u32 position, u32 byte_count, u32 page_offset, int page_no)
+{
+ char *srcp, *destp;
+ int ret;
+
+ ret = 0;
+ srcp = head->arg.head[0].iov_base + position;
+ byte_count = head->arg.head[0].iov_len - position;
+ if (byte_count > PAGE_SIZE) {
+ dprintk("svcrdma: large tail unsupported\n");
+ return 0;
+ }
+
+ /* Fit as much of the tail on the current page as possible */
+ if (page_offset != PAGE_SIZE) {
+ destp = page_address(rqstp->rq_arg.pages[page_no]);
+ destp += page_offset;
+ while (byte_count--) {
+ *destp++ = *srcp++;
+ page_offset++;
+ if (page_offset == PAGE_SIZE && byte_count)
+ goto more;
+ }
+ goto done;
+ }
+
+more:
+ /* Fit the rest on the next page */
+ page_no++;
+ destp = page_address(rqstp->rq_arg.pages[page_no]);
+ while (byte_count--)
+ *destp++ = *srcp++;
+
+ rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1];
+ rqstp->rq_next_page = rqstp->rq_respages + 1;
+
+done:
+ byte_count = head->arg.head[0].iov_len - position;
+ head->arg.page_len += byte_count;
+ head->arg.len += byte_count;
+ head->arg.buflen += byte_count;
+ return 1;
+}
+
static int rdma_read_chunks(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rmsgp,
struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *head)
{
- int page_no, ch_count, ret;
+ int page_no, ret;
struct rpcrdma_read_chunk *ch;
- u32 page_offset, byte_count;
+ u32 handle, page_offset, byte_count;
+ u32 position;
u64 rs_offset;
- rdma_reader_fn reader;
+ bool last;
/* If no read list is present, return 0 */
ch = svc_rdma_get_read_chunk(rmsgp);
if (!ch)
return 0;
- svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count);
- if (ch_count > RPCSVC_MAXPAGES)
+ if (rdma_rcl_chunk_count(ch) > RPCSVC_MAXPAGES)
return -EINVAL;
/* The request is completed when the RDMA_READs complete. The
@@ -391,34 +440,41 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt,
*/
head->arg.head[0] = rqstp->rq_arg.head[0];
head->arg.tail[0] = rqstp->rq_arg.tail[0];
- head->arg.pages = &head->pages[head->count];
head->hdr_count = head->count;
head->arg.page_base = 0;
head->arg.page_len = 0;
head->arg.len = rqstp->rq_arg.len;
head->arg.buflen = rqstp->rq_arg.buflen;
- /* Use FRMR if supported */
- if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)
- reader = rdma_read_chunk_frmr;
- else
- reader = rdma_read_chunk_lcl;
+ ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
+ position = be32_to_cpu(ch->rc_position);
+
+ /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */
+ if (position == 0) {
+ head->arg.pages = &head->pages[0];
+ page_offset = head->byte_len;
+ } else {
+ head->arg.pages = &head->pages[head->count];
+ page_offset = 0;
+ }
- page_no = 0; page_offset = 0;
- for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
- ch->rc_discrim != 0; ch++) {
+ ret = 0;
+ page_no = 0;
+ for (; ch->rc_discrim != xdr_zero; ch++) {
+ if (be32_to_cpu(ch->rc_position) != position)
+ goto err;
+ handle = be32_to_cpu(ch->rc_target.rs_handle),
+ byte_count = be32_to_cpu(ch->rc_target.rs_length);
xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset,
&rs_offset);
- byte_count = ntohl(ch->rc_target.rs_length);
while (byte_count > 0) {
- ret = reader(xprt, rqstp, head,
- &page_no, &page_offset,
- ntohl(ch->rc_target.rs_handle),
- byte_count, rs_offset,
- ((ch+1)->rc_discrim == 0) /* last */
- );
+ last = (ch + 1)->rc_discrim == xdr_zero;
+ ret = xprt->sc_reader(xprt, rqstp, head,
+ &page_no, &page_offset,
+ handle, byte_count,
+ rs_offset, last);
if (ret < 0)
goto err;
byte_count -= ret;
@@ -426,7 +482,24 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt,
head->arg.buflen += ret;
}
}
+
+ /* Read list may need XDR round-up (see RFC 5666, s. 3.7) */
+ if (page_offset & 3) {
+ u32 pad = 4 - (page_offset & 3);
+
+ head->arg.page_len += pad;
+ head->arg.len += pad;
+ head->arg.buflen += pad;
+ page_offset += pad;
+ }
+
ret = 1;
+ if (position && position < head->arg.head[0].iov_len)
+ ret = rdma_copy_tail(rqstp, head, position,
+ byte_count, page_offset, page_no);
+ head->arg.head[0].iov_len = position;
+ head->position = position;
+
err:
/* Detach arg pages. svc_recv will replenish them */
for (page_no = 0;
@@ -436,47 +509,33 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt,
return ret;
}
-/*
- * To avoid a separate RDMA READ just for a handful of zero bytes,
- * RFC 5666 section 3.7 allows the client to omit the XDR zero pad
- * in chunk lists.
- */
-static void
-rdma_fix_xdr_pad(struct xdr_buf *buf)
-{
- unsigned int page_len = buf->page_len;
- unsigned int size = (XDR_QUADLEN(page_len) << 2) - page_len;
- unsigned int offset, pg_no;
- char *p;
-
- if (size == 0)
- return;
-
- pg_no = page_len >> PAGE_SHIFT;
- offset = page_len & ~PAGE_MASK;
- p = page_address(buf->pages[pg_no]);
- memset(p + offset, 0, size);
-
- buf->page_len += size;
- buf->buflen += size;
- buf->len += size;
-}
-
static int rdma_read_complete(struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *head)
{
int page_no;
int ret;
- BUG_ON(!head);
-
/* Copy RPC pages */
for (page_no = 0; page_no < head->count; page_no++) {
put_page(rqstp->rq_pages[page_no]);
rqstp->rq_pages[page_no] = head->pages[page_no];
}
+
+ /* Adjustments made for RDMA_NOMSG type requests */
+ if (head->position == 0) {
+ if (head->arg.len <= head->sge[0].length) {
+ head->arg.head[0].iov_len = head->arg.len -
+ head->byte_len;
+ head->arg.page_len = 0;
+ } else {
+ head->arg.head[0].iov_len = head->sge[0].length -
+ head->byte_len;
+ head->arg.page_len = head->arg.len -
+ head->sge[0].length;
+ }
+ }
+
/* Point rq_arg.pages past header */
- rdma_fix_xdr_pad(&head->arg);
rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count];
rqstp->rq_arg.page_len = head->arg.page_len;
rqstp->rq_arg.page_base = head->arg.page_base;
@@ -501,8 +560,8 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
ret = rqstp->rq_arg.head[0].iov_len
+ rqstp->rq_arg.page_len
+ rqstp->rq_arg.tail[0].iov_len;
- dprintk("svcrdma: deferred read ret=%d, rq_arg.len =%d, "
- "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n",
+ dprintk("svcrdma: deferred read ret=%d, rq_arg.len=%u, "
+ "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len=%zu\n",
ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base,
rqstp->rq_arg.head[0].iov_len);
@@ -558,7 +617,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
}
dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n",
ctxt, rdma_xprt, rqstp, ctxt->wc_status);
- BUG_ON(ctxt->wc_status != IB_WC_SUCCESS);
atomic_inc(&rdma_stat_recv);
/* Build up the XDR from the receive buffers. */
@@ -591,8 +649,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
+ rqstp->rq_arg.tail[0].iov_len;
svc_rdma_put_context(ctxt, 0);
out:
- dprintk("svcrdma: ret = %d, rq_arg.len =%d, "
- "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n",
+ dprintk("svcrdma: ret=%d, rq_arg.len=%u, "
+ "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len=%zd\n",
ret, rqstp->rq_arg.len,
rqstp->rq_arg.head[0].iov_base,
rqstp->rq_arg.head[0].iov_len);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 9f1b50689c0f..7de33d1af9b6 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -60,8 +60,11 @@ static int map_xdr(struct svcxprt_rdma *xprt,
u32 page_off;
int page_no;
- BUG_ON(xdr->len !=
- (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len));
+ if (xdr->len !=
+ (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)) {
+ pr_err("svcrdma: map_xdr: XDR buffer length error\n");
+ return -EIO;
+ }
/* Skip the first sge, this is for the RPCRDMA header */
sge_no = 1;
@@ -150,7 +153,11 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
int bc;
struct svc_rdma_op_ctxt *ctxt;
- BUG_ON(vec->count > RPCSVC_MAXPAGES);
+ if (vec->count > RPCSVC_MAXPAGES) {
+ pr_err("svcrdma: Too many pages (%lu)\n", vec->count);
+ return -EIO;
+ }
+
dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, "
"write_len=%d, vec->sge=%p, vec->count=%lu\n",
rmr, (unsigned long long)to, xdr_off,
@@ -190,7 +197,10 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
sge_off = 0;
sge_no++;
xdr_sge_no++;
- BUG_ON(xdr_sge_no > vec->count);
+ if (xdr_sge_no > vec->count) {
+ pr_err("svcrdma: Too many sges (%d)\n", xdr_sge_no);
+ goto err;
+ }
bc -= sge_bytes;
if (sge_no == xprt->sc_max_sge)
break;
@@ -421,7 +431,10 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
ctxt->sge[sge_no].length = sge_bytes;
}
- BUG_ON(byte_count != 0);
+ if (byte_count != 0) {
+ pr_err("svcrdma: Could not map %d bytes\n", byte_count);
+ goto err;
+ }
/* Save all respages in the ctxt and remove them from the
* respages array. They are our pages until the I/O
@@ -442,7 +455,10 @@ static int send_reply(struct svcxprt_rdma *rdma,
}
rqstp->rq_next_page = rqstp->rq_respages + 1;
- BUG_ON(sge_no > rdma->sc_max_sge);
+ if (sge_no > rdma->sc_max_sge) {
+ pr_err("svcrdma: Too many sges (%d)\n", sge_no);
+ goto err;
+ }
memset(&send_wr, 0, sizeof send_wr);
ctxt->wr_op = IB_WR_SEND;
send_wr.wr_id = (unsigned long)ctxt;
@@ -467,18 +483,6 @@ void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp)
{
}
-/*
- * Return the start of an xdr buffer.
- */
-static void *xdr_start(struct xdr_buf *xdr)
-{
- return xdr->head[0].iov_base -
- (xdr->len -
- xdr->page_len -
- xdr->tail[0].iov_len -
- xdr->head[0].iov_len);
-}
-
int svc_rdma_sendto(struct svc_rqst *rqstp)
{
struct svc_xprt *xprt = rqstp->rq_xprt;
@@ -496,8 +500,10 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
- /* Get the RDMA request header. */
- rdma_argp = xdr_start(&rqstp->rq_arg);
+ /* Get the RDMA request header. The receive logic always
+ * places this at the start of page 0.
+ */
+ rdma_argp = page_address(rqstp->rq_pages[0]);
/* Build an req vec for the XDR */
ctxt = svc_rdma_get_context(rdma);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 4e618808bc98..f609c1c2d38d 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -139,7 +139,6 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
struct svcxprt_rdma *xprt;
int i;
- BUG_ON(!ctxt);
xprt = ctxt->xprt;
if (free_pages)
for (i = 0; i < ctxt->count; i++)
@@ -339,12 +338,14 @@ static void process_context(struct svcxprt_rdma *xprt,
switch (ctxt->wr_op) {
case IB_WR_SEND:
- BUG_ON(ctxt->frmr);
+ if (ctxt->frmr)
+ pr_err("svcrdma: SEND: ctxt->frmr != NULL\n");
svc_rdma_put_context(ctxt, 1);
break;
case IB_WR_RDMA_WRITE:
- BUG_ON(ctxt->frmr);
+ if (ctxt->frmr)
+ pr_err("svcrdma: WRITE: ctxt->frmr != NULL\n");
svc_rdma_put_context(ctxt, 0);
break;
@@ -353,19 +354,21 @@ static void process_context(struct svcxprt_rdma *xprt,
svc_rdma_put_frmr(xprt, ctxt->frmr);
if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
- BUG_ON(!read_hdr);
- spin_lock_bh(&xprt->sc_rq_dto_lock);
- set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
- list_add_tail(&read_hdr->dto_q,
- &xprt->sc_read_complete_q);
- spin_unlock_bh(&xprt->sc_rq_dto_lock);
+ if (read_hdr) {
+ spin_lock_bh(&xprt->sc_rq_dto_lock);
+ set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
+ list_add_tail(&read_hdr->dto_q,
+ &xprt->sc_read_complete_q);
+ spin_unlock_bh(&xprt->sc_rq_dto_lock);
+ } else {
+ pr_err("svcrdma: ctxt->read_hdr == NULL\n");
+ }
svc_xprt_enqueue(&xprt->sc_xprt);
}
svc_rdma_put_context(ctxt, 0);
break;
default:
- BUG_ON(1);
printk(KERN_ERR "svcrdma: unexpected completion type, "
"opcode=%d\n",
ctxt->wr_op);
@@ -513,7 +516,10 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
buflen = 0;
ctxt->direction = DMA_FROM_DEVICE;
for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) {
- BUG_ON(sge_no >= xprt->sc_max_sge);
+ if (sge_no >= xprt->sc_max_sge) {
+ pr_err("svcrdma: Too many sges (%d)\n", sge_no);
+ goto err_put_ctxt;
+ }
page = svc_rdma_get_page();
ctxt->pages[sge_no] = page;
pa = ib_dma_map_page(xprt->sc_cm_id->device,
@@ -687,7 +693,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
{
struct rdma_cm_id *listen_id;
struct svcxprt_rdma *cma_xprt;
- struct svc_xprt *xprt;
int ret;
dprintk("svcrdma: Creating RDMA socket\n");
@@ -698,7 +703,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
cma_xprt = rdma_create_xprt(serv, 1);
if (!cma_xprt)
return ERR_PTR(-ENOMEM);
- xprt = &cma_xprt->sc_xprt;
listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP,
IB_QPT_RC);
@@ -822,7 +826,7 @@ void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
if (frmr) {
frmr_unmap_dma(rdma, frmr);
spin_lock_bh(&rdma->sc_frmr_q_lock);
- BUG_ON(!list_empty(&frmr->frmr_list));
+ WARN_ON_ONCE(!list_empty(&frmr->frmr_list));
list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
spin_unlock_bh(&rdma->sc_frmr_q_lock);
}
@@ -970,10 +974,12 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
* NB: iWARP requires remote write access for the data sink
* of an RDMA_READ. IB does not.
*/
+ newxprt->sc_reader = rdma_read_chunk_lcl;
if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
newxprt->sc_frmr_pg_list_len =
devattr.max_fast_reg_page_list_len;
newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
+ newxprt->sc_reader = rdma_read_chunk_frmr;
}
/*
@@ -1125,7 +1131,9 @@ static void __svc_rdma_free(struct work_struct *work)
dprintk("svcrdma: svc_rdma_free(%p)\n", rdma);
/* We should only be called from kref_put */
- BUG_ON(atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0);
+ if (atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0)
+ pr_err("svcrdma: sc_xprt still in use? (%d)\n",
+ atomic_read(&rdma->sc_xprt.xpt_ref.refcount));
/*
* Destroy queued, but not processed read completions. Note
@@ -1153,8 +1161,12 @@ static void __svc_rdma_free(struct work_struct *work)
}
/* Warn if we leaked a resource or under-referenced */
- WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0);
- WARN_ON(atomic_read(&rdma->sc_dma_used) != 0);
+ if (atomic_read(&rdma->sc_ctxt_used) != 0)
+ pr_err("svcrdma: ctxt still in use? (%d)\n",
+ atomic_read(&rdma->sc_ctxt_used));
+ if (atomic_read(&rdma->sc_dma_used) != 0)
+ pr_err("svcrdma: dma still in use? (%d)\n",
+ atomic_read(&rdma->sc_dma_used));
/* De-allocate fastreg mr */
rdma_dealloc_frmr_q(rdma);
@@ -1254,7 +1266,6 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
return -ENOTCONN;
- BUG_ON(wr->send_flags != IB_SEND_SIGNALED);
wr_count = 1;
for (n_wr = wr->next; n_wr; n_wr = n_wr->next)
wr_count++;