diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2021-04-13 10:45:40 +1000 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2021-04-13 10:45:40 +1000 |
commit | be4a98b09aecdd96ea6711bc943d642591a1dea2 (patch) | |
tree | 52bd49f9321fbee08ef396d33dffce989080c904 | |
parent | 03a9909d6c832b8ab13089fa89aa5f814b12e11a (diff) | |
parent | b73ac6808b0f7994a05ebc38571e2e9eaf98a0f4 (diff) | |
download | linux-next-be4a98b09aecdd96ea6711bc943d642591a1dea2.tar.gz |
Merge remote-tracking branch 'cel/for-next'
40 files changed, 1599 insertions, 1185 deletions
diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c index 79c563c1a5e8..5a5bd85d08f8 100644 --- a/fs/nfs_common/nfsacl.c +++ b/fs/nfs_common/nfsacl.c @@ -136,6 +136,77 @@ int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, } EXPORT_SYMBOL_GPL(nfsacl_encode); +/** + * nfs_stream_encode_acl - Encode an NFSv3 ACL + * + * @xdr: an xdr_stream positioned to receive an encoded ACL + * @inode: inode of file whose ACL this is + * @acl: posix_acl to encode + * @encode_entries: whether to encode ACEs as well + * @typeflag: ACL type: NFS_ACL_DEFAULT or zero + * + * Return values: + * %false: The ACL could not be encoded + * %true: @xdr is advanced to the next available position + */ +bool nfs_stream_encode_acl(struct xdr_stream *xdr, struct inode *inode, + struct posix_acl *acl, int encode_entries, + int typeflag) +{ + const size_t elem_size = XDR_UNIT * 3; + u32 entries = (acl && acl->a_count) ? max_t(int, acl->a_count, 4) : 0; + struct nfsacl_encode_desc nfsacl_desc = { + .desc = { + .elem_size = elem_size, + .array_len = encode_entries ? entries : 0, + .xcode = xdr_nfsace_encode, + }, + .acl = acl, + .typeflag = typeflag, + .uid = inode->i_uid, + .gid = inode->i_gid, + }; + struct nfsacl_simple_acl aclbuf; + unsigned int base; + int err; + + if (entries > NFS_ACL_MAX_ENTRIES) + return false; + if (xdr_stream_encode_u32(xdr, entries) < 0) + return false; + + if (encode_entries && acl && acl->a_count == 3) { + struct posix_acl *acl2 = &aclbuf.acl; + + /* Avoid the use of posix_acl_alloc(). nfsacl_encode() is + * invoked in contexts where a memory allocation failure is + * fatal. Fortunately this fake ACL is small enough to + * construct on the stack. */ + posix_acl_init(acl2, 4); + + /* Insert entries in canonical order: other orders seem + to confuse Solaris VxFS. */ + acl2->a_entries[0] = acl->a_entries[0]; /* ACL_USER_OBJ */ + acl2->a_entries[1] = acl->a_entries[1]; /* ACL_GROUP_OBJ */ + acl2->a_entries[2] = acl->a_entries[1]; /* ACL_MASK */ + acl2->a_entries[2].e_tag = ACL_MASK; + acl2->a_entries[3] = acl->a_entries[2]; /* ACL_OTHER */ + nfsacl_desc.acl = acl2; + } + + base = xdr_stream_pos(xdr); + if (!xdr_reserve_space(xdr, XDR_UNIT + + elem_size * nfsacl_desc.desc.array_len)) + return false; + err = xdr_encode_array2(xdr->buf, base, &nfsacl_desc.desc); + if (err) + return false; + + return true; +} +EXPORT_SYMBOL_GPL(nfs_stream_encode_acl); + + struct nfsacl_decode_desc { struct xdr_array2_desc desc; unsigned int count; diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index d6cff5fbe705..5fa38ad9e7e3 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -99,7 +99,7 @@ config NFSD_BLOCKLAYOUT help This option enables support for the exporting pNFS block layouts in the kernel's NFS server. The pNFS block layout enables NFS - clients to directly perform I/O to block devices accesible to both + clients to directly perform I/O to block devices accessible to both the server and the clients. See RFC 5663 for more details. If unsure, say N. @@ -113,7 +113,7 @@ config NFSD_SCSILAYOUT help This option enables support for the exporting pNFS SCSI layouts in the kernel's NFS server. The pNFS SCSI layout enables NFS - clients to directly perform I/O to SCSI devices accesible to both + clients to directly perform I/O to SCSI devices accessible to both the server and the clients. See draft-ietf-nfsv4-scsi-layout for more details. @@ -127,7 +127,7 @@ config NFSD_FLEXFILELAYOUT This option enables support for the exporting pNFS Flex File layouts in the kernel's NFS server. The pNFS Flex File layout enables NFS clients to directly perform I/O to NFSv3 devices - accesible to both the server and the clients. See + accessible to both the server and the clients. See draft-ietf-nfsv4-flex-files for more details. Warning, this server implements the bare minimum functionality diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index c330f5bd0cf3..a75abeb1e698 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -51,9 +51,6 @@ struct nfsd_net { bool grace_ended; time64_t boot_time; - /* internal mount of the "nfsd" pseudofilesystem: */ - struct vfsmount *nfsd_mnt; - struct dentry *nfsd_client_dir; /* @@ -130,6 +127,9 @@ struct nfsd_net { wait_queue_head_t ntf_wq; atomic_t ntf_refcnt; + /* Allow umount to wait for nfsd state cleanup */ + struct completion nfsd_shutdown_complete; + /* * clientid and stateid data for construction of net unique COPY * stateids. diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 855e17772eba..4b43929c1f25 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -242,79 +242,61 @@ static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p) /* GETACL */ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_getaclres *resp = rqstp->rq_resp; struct dentry *dentry = resp->fh.fh_dentry; struct inode *inode; - struct kvec *head = rqstp->rq_res.head; - unsigned int base; - int n; int w; - *p++ = resp->status; - if (resp->status != nfs_ok) - return xdr_ressize_check(rqstp, p); + if (!svcxdr_encode_stat(xdr, resp->status)) + return 0; - /* - * Since this is version 2, the check for nfserr in - * nfsd_dispatch actually ensures the following cannot happen. - * However, it seems fragile to depend on that. - */ if (dentry == NULL || d_really_is_negative(dentry)) - return 0; + return 1; inode = d_inode(dentry); - p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat); - *p++ = htonl(resp->mask); - if (!xdr_ressize_check(rqstp, p)) + if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) + return 0; + if (xdr_stream_encode_u32(xdr, resp->mask) < 0) return 0; - base = (char *)p - (char *)head->iov_base; rqstp->rq_res.page_len = w = nfsacl_size( (resp->mask & NFS_ACL) ? resp->acl_access : NULL, (resp->mask & NFS_DFACL) ? resp->acl_default : NULL); while (w > 0) { if (!*(rqstp->rq_next_page++)) - return 0; + return 1; w -= PAGE_SIZE; } - n = nfsacl_encode(&rqstp->rq_res, base, inode, - resp->acl_access, - resp->mask & NFS_ACL, 0); - if (n > 0) - n = nfsacl_encode(&rqstp->rq_res, base + n, inode, - resp->acl_default, - resp->mask & NFS_DFACL, - NFS_ACL_DEFAULT); - return (n > 0); -} - -static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p) -{ - struct nfsd_attrstat *resp = rqstp->rq_resp; - - *p++ = resp->status; - if (resp->status != nfs_ok) - goto out; + if (!nfs_stream_encode_acl(xdr, inode, resp->acl_access, + resp->mask & NFS_ACL, 0)) + return 0; + if (!nfs_stream_encode_acl(xdr, inode, resp->acl_default, + resp->mask & NFS_DFACL, NFS_ACL_DEFAULT)) + return 0; - p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat); -out: - return xdr_ressize_check(rqstp, p); + return 1; } /* ACCESS */ static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_accessres *resp = rqstp->rq_resp; - *p++ = resp->status; - if (resp->status != nfs_ok) - goto out; + if (!svcxdr_encode_stat(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) + return 0; + if (xdr_stream_encode_u32(xdr, resp->access) < 0) + return 0; + break; + } - p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat); - *p++ = htonl(resp->access); -out: - return xdr_ressize_check(rqstp, p); + return 1; } /* @@ -329,13 +311,6 @@ static void nfsaclsvc_release_getacl(struct svc_rqst *rqstp) posix_acl_release(resp->acl_default); } -static void nfsaclsvc_release_attrstat(struct svc_rqst *rqstp) -{ - struct nfsd_attrstat *resp = rqstp->rq_resp; - - fh_put(&resp->fh); -} - static void nfsaclsvc_release_access(struct svc_rqst *rqstp) { struct nfsd3_accessres *resp = rqstp->rq_resp; @@ -375,8 +350,8 @@ static const struct svc_procedure nfsd_acl_procedures2[5] = { [ACLPROC2_SETACL] = { .pc_func = nfsacld_proc_setacl, .pc_decode = nfsaclsvc_decode_setaclargs, - .pc_encode = nfsaclsvc_encode_attrstatres, - .pc_release = nfsaclsvc_release_attrstat, + .pc_encode = nfssvc_encode_attrstatres, + .pc_release = nfssvc_release_attrstat, .pc_argsize = sizeof(struct nfsd3_setaclargs), .pc_ressize = sizeof(struct nfsd_attrstat), .pc_cachetype = RC_NOCACHE, @@ -386,8 +361,8 @@ static const struct svc_procedure nfsd_acl_procedures2[5] = { [ACLPROC2_GETATTR] = { .pc_func = nfsacld_proc_getattr, .pc_decode = nfssvc_decode_fhandleargs, - .pc_encode = nfsaclsvc_encode_attrstatres, - .pc_release = nfsaclsvc_release_attrstat, + .pc_encode = nfssvc_encode_attrstatres, + .pc_release = nfssvc_release_attrstat, .pc_argsize = sizeof(struct nfsd_fhandle), .pc_ressize = sizeof(struct nfsd_attrstat), .pc_cachetype = RC_NOCACHE, diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 9a6f18d74d14..a1591feeea22 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -168,22 +168,25 @@ static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p) /* GETACL */ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_getaclres *resp = rqstp->rq_resp; struct dentry *dentry = resp->fh.fh_dentry; + struct kvec *head = rqstp->rq_res.head; + struct inode *inode = d_inode(dentry); + unsigned int base; + int n; + int w; - *p++ = resp->status; - p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh); - if (resp->status == 0 && dentry && d_really_is_positive(dentry)) { - struct inode *inode = d_inode(dentry); - struct kvec *head = rqstp->rq_res.head; - unsigned int base; - int n; - int w; - - *p++ = htonl(resp->mask); - if (!xdr_ressize_check(rqstp, p)) + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) + return 0; + if (xdr_stream_encode_u32(xdr, resp->mask) < 0) return 0; - base = (char *)p - (char *)head->iov_base; + + base = (char *)xdr->p - (char *)head->iov_base; rqstp->rq_res.page_len = w = nfsacl_size( (resp->mask & NFS_ACL) ? resp->acl_access : NULL, @@ -204,9 +207,11 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p) NFS_ACL_DEFAULT); if (n <= 0) return 0; - } else - if (!xdr_ressize_check(rqstp, p)) + break; + default: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) return 0; + } return 1; } @@ -214,11 +219,11 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p) /* SETACL */ static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_attrstat *resp = rqstp->rq_resp; - *p++ = resp->status; - p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh); - return xdr_ressize_check(rqstp, p); + return svcxdr_encode_nfsstat3(xdr, resp->status) && + svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh); } /* diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 8675851199f8..17715a6c7a40 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -126,14 +126,15 @@ nfsd3_proc_readlink(struct svc_rqst *rqstp) { struct nfsd_fhandle *argp = rqstp->rq_argp; struct nfsd3_readlinkres *resp = rqstp->rq_resp; - char *buffer = page_address(*(rqstp->rq_next_page++)); dprintk("nfsd: READLINK(3) %s\n", SVCFH_fmt(&argp->fh)); /* Read the symlink. */ fh_copy(&resp->fh, &argp->fh); resp->len = NFS3_MAXPATHLEN; - resp->status = nfsd_readlink(rqstp, &resp->fh, buffer, &resp->len); + resp->pages = rqstp->rq_next_page++; + resp->status = nfsd_readlink(rqstp, &resp->fh, + page_address(*resp->pages), &resp->len); return rpc_success; } @@ -158,6 +159,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp) v = 0; len = argp->count; + resp->pages = rqstp->rq_next_page; while (len > 0) { struct page *page = *(rqstp->rq_next_page++); @@ -439,17 +441,30 @@ static void nfsd3_init_dirlist_pages(struct svc_rqst *rqstp, struct nfsd3_readdirres *resp, int count) { + struct xdr_buf *buf = &resp->dirlist; + struct xdr_stream *xdr = &resp->xdr; + count = min_t(u32, count, svc_max_payload(rqstp)); - /* Convert byte count to number of words (i.e. >> 2), - * and reserve room for the NULL ptr & eof flag (-2 words) */ - resp->buflen = (count >> 2) - 2; + memset(buf, 0, sizeof(*buf)); - resp->buffer = page_address(*rqstp->rq_next_page); + /* Reserve room for the NULL ptr & eof flag (-2 words) */ + buf->buflen = count - XDR_UNIT * 2; + buf->pages = rqstp->rq_next_page; while (count > 0) { rqstp->rq_next_page++; count -= PAGE_SIZE; } + + /* This is xdr_init_encode(), but it assumes that + * the head kvec has already been consumed. */ + xdr_set_scratch_buffer(xdr, NULL, 0); + xdr->buf = buf; + xdr->page_ptr = buf->pages; + xdr->iov = NULL; + xdr->p = page_address(*buf->pages); + xdr->end = xdr->p + (PAGE_SIZE >> 2); + xdr->rqst = NULL; } /* @@ -460,10 +475,7 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp) { struct nfsd3_readdirargs *argp = rqstp->rq_argp; struct nfsd3_readdirres *resp = rqstp->rq_resp; - int count = 0; loff_t offset; - struct page **p; - caddr_t page_addr = NULL; dprintk("nfsd: READDIR(3) %s %d bytes at %d\n", SVCFH_fmt(&argp->fh), @@ -471,39 +483,18 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp) nfsd3_init_dirlist_pages(rqstp, resp, argp->count); - /* Read directory and encode entries on the fly */ fh_copy(&resp->fh, &argp->fh); - resp->common.err = nfs_ok; + resp->cookie_offset = 0; resp->rqstp = rqstp; offset = argp->cookie; - resp->status = nfsd_readdir(rqstp, &resp->fh, &offset, - &resp->common, nfs3svc_encode_entry); + &resp->common, nfs3svc_encode_entry3); memcpy(resp->verf, argp->verf, 8); - count = 0; - for (p = rqstp->rq_respages + 1; p < rqstp->rq_next_page; p++) { - page_addr = page_address(*p); + nfs3svc_encode_cookie3(resp, offset); - if (((caddr_t)resp->buffer >= page_addr) && - ((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) { - count += (caddr_t)resp->buffer - page_addr; - break; - } - count += PAGE_SIZE; - } - resp->count = count >> 2; - if (resp->offset) { - if (unlikely(resp->offset1)) { - /* we ended up with offset on a page boundary */ - *resp->offset = htonl(offset >> 32); - *resp->offset1 = htonl(offset & 0xffffffff); - resp->offset1 = NULL; - } else { - xdr_encode_hyper(resp->offset, offset); - } - resp->offset = NULL; - } + /* Recycle only pages that were part of the reply */ + rqstp->rq_next_page = resp->xdr.page_ptr + 1; return rpc_success; } @@ -517,10 +508,7 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp) { struct nfsd3_readdirargs *argp = rqstp->rq_argp; struct nfsd3_readdirres *resp = rqstp->rq_resp; - int count = 0; loff_t offset; - struct page **p; - caddr_t page_addr = NULL; dprintk("nfsd: READDIR+(3) %s %d bytes at %d\n", SVCFH_fmt(&argp->fh), @@ -528,10 +516,9 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp) nfsd3_init_dirlist_pages(rqstp, resp, argp->count); - /* Read directory and encode entries on the fly */ fh_copy(&resp->fh, &argp->fh); - resp->common.err = nfs_ok; + resp->cookie_offset = 0; resp->rqstp = rqstp; offset = argp->cookie; @@ -545,30 +532,12 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp) } resp->status = nfsd_readdir(rqstp, &resp->fh, &offset, - &resp->common, nfs3svc_encode_entry_plus); + &resp->common, nfs3svc_encode_entryplus3); memcpy(resp->verf, argp->verf, 8); - for (p = rqstp->rq_respages + 1; p < rqstp->rq_next_page; p++) { - page_addr = page_address(*p); + nfs3svc_encode_cookie3(resp, offset); - if (((caddr_t)resp->buffer >= page_addr) && - ((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) { - count += (caddr_t)resp->buffer - page_addr; - break; - } - count += PAGE_SIZE; - } - resp->count = count >> 2; - if (resp->offset) { - if (unlikely(resp->offset1)) { - /* we ended up with offset on a page boundary */ - *resp->offset = htonl(offset >> 32); - *resp->offset1 = htonl(offset & 0xffffffff); - resp->offset1 = NULL; - } else { - xdr_encode_hyper(resp->offset, offset); - } - resp->offset = NULL; - } + /* Recycle only pages that were part of the reply */ + rqstp->rq_next_page = resp->xdr.page_ptr + 1; out: return rpc_success; @@ -736,7 +705,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { [NFS3PROC_GETATTR] = { .pc_func = nfsd3_proc_getattr, .pc_decode = nfs3svc_decode_fhandleargs, - .pc_encode = nfs3svc_encode_attrstatres, + .pc_encode = nfs3svc_encode_getattrres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd_fhandle), .pc_ressize = sizeof(struct nfsd3_attrstatres), @@ -758,7 +727,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { [NFS3PROC_LOOKUP] = { .pc_func = nfsd3_proc_lookup, .pc_decode = nfs3svc_decode_diropargs, - .pc_encode = nfs3svc_encode_diropres, + .pc_encode = nfs3svc_encode_lookupres, .pc_release = nfs3svc_release_fhandle2, .pc_argsize = sizeof(struct nfsd3_diropargs), .pc_ressize = sizeof(struct nfsd3_diropres), diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 9d9a01ce0b27..0a5ebc52e6a9 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -14,13 +14,26 @@ #include "netns.h" #include "vfs.h" -#define NFSDDBG_FACILITY NFSDDBG_XDR +/* + * Force construction of an empty post-op attr + */ +static const struct svc_fh nfs3svc_null_fh = { + .fh_no_wcc = true, +}; +/* + * time_delta. {1, 0} means the server is accurate only + * to the nearest second. + */ +static const struct timespec64 nfs3svc_time_delta = { + .tv_sec = 1, + .tv_nsec = 0, +}; /* * Mapping of S_IF* types to NFS file types */ -static u32 nfs3_ftypes[] = { +static const u32 nfs3_ftypes[] = { NF3NON, NF3FIFO, NF3CHR, NF3BAD, NF3DIR, NF3BAD, NF3BLK, NF3BAD, NF3REG, NF3BAD, NF3LNK, NF3BAD, @@ -33,9 +46,11 @@ static u32 nfs3_ftypes[] = { */ static __be32 * -encode_time3(__be32 *p, struct timespec64 *time) +encode_nfstime3(__be32 *p, const struct timespec64 *time) { - *p++ = htonl((u32) time->tv_sec); *p++ = htonl(time->tv_nsec); + *p++ = cpu_to_be32((u32)time->tv_sec); + *p++ = cpu_to_be32(time->tv_nsec); + return p; } @@ -82,14 +97,80 @@ svcxdr_decode_nfs_fh3(struct xdr_stream *xdr, struct svc_fh *fhp) return true; } -static __be32 * -encode_fh(__be32 *p, struct svc_fh *fhp) +/** + * svcxdr_encode_nfsstat3 - Encode an NFSv3 status code + * @xdr: XDR stream + * @status: status value to encode + * + * Return values: + * %false: Send buffer space was exhausted + * %true: Success + */ +bool +svcxdr_encode_nfsstat3(struct xdr_stream *xdr, __be32 status) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, sizeof(status)); + if (!p) + return false; + *p = status; + + return true; +} + +static bool +svcxdr_encode_nfs_fh3(struct xdr_stream *xdr, const struct svc_fh *fhp) { - unsigned int size = fhp->fh_handle.fh_size; - *p++ = htonl(size); - if (size) p[XDR_QUADLEN(size)-1]=0; + u32 size = fhp->fh_handle.fh_size; + __be32 *p; + + p = xdr_reserve_space(xdr, XDR_UNIT + size); + if (!p) + return false; + *p++ = cpu_to_be32(size); + if (size) + p[XDR_QUADLEN(size) - 1] = 0; memcpy(p, &fhp->fh_handle.fh_base, size); - return p + XDR_QUADLEN(size); + + return true; +} + +static bool +svcxdr_encode_post_op_fh3(struct xdr_stream *xdr, const struct svc_fh *fhp) +{ + if (xdr_stream_encode_item_present(xdr) < 0) + return false; + if (!svcxdr_encode_nfs_fh3(xdr, fhp)) + return false; + + return true; +} + +static bool +svcxdr_encode_cookieverf3(struct xdr_stream *xdr, const __be32 *verf) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, NFS3_COOKIEVERFSIZE); + if (!p) + return false; + memcpy(p, verf, NFS3_COOKIEVERFSIZE); + + return true; +} + +static bool +svcxdr_encode_writeverf3(struct xdr_stream *xdr, const __be32 *verf) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, NFS3_WRITEVERFSIZE); + if (!p) + return false; + memcpy(p, verf, NFS3_WRITEVERFSIZE); + + return true; } static bool @@ -253,115 +334,157 @@ svcxdr_decode_devicedata3(struct svc_rqst *rqstp, struct xdr_stream *xdr, svcxdr_decode_specdata3(xdr, args); } -static __be32 *encode_fsid(__be32 *p, struct svc_fh *fhp) +static bool +svcxdr_encode_fattr3(struct svc_rqst *rqstp, struct xdr_stream *xdr, + const struct svc_fh *fhp, const struct kstat *stat) { - u64 f; + struct user_namespace *userns = nfsd_user_namespace(rqstp); + __be32 *p; + u64 fsid; + + p = xdr_reserve_space(xdr, XDR_UNIT * 21); + if (!p) + return false; + + *p++ = cpu_to_be32(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]); + *p++ = cpu_to_be32((u32)(stat->mode & S_IALLUGO)); + *p++ = cpu_to_be32((u32)stat->nlink); + *p++ = cpu_to_be32((u32)from_kuid_munged(userns, stat->uid)); + *p++ = cpu_to_be32((u32)from_kgid_munged(userns, stat->gid)); + if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) + p = xdr_encode_hyper(p, (u64)NFS3_MAXPATHLEN); + else + p = xdr_encode_hyper(p, (u64)stat->size); + + /* used */ + p = xdr_encode_hyper(p, ((u64)stat->blocks) << 9); + + /* rdev */ + *p++ = cpu_to_be32((u32)MAJOR(stat->rdev)); + *p++ = cpu_to_be32((u32)MINOR(stat->rdev)); + switch(fsid_source(fhp)) { - default: - case FSIDSOURCE_DEV: - p = xdr_encode_hyper(p, (u64)huge_encode_dev - (fhp->fh_dentry->d_sb->s_dev)); - break; case FSIDSOURCE_FSID: - p = xdr_encode_hyper(p, (u64) fhp->fh_export->ex_fsid); + fsid = (u64)fhp->fh_export->ex_fsid; break; case FSIDSOURCE_UUID: - f = ((u64*)fhp->fh_export->ex_uuid)[0]; - f ^= ((u64*)fhp->fh_export->ex_uuid)[1]; - p = xdr_encode_hyper(p, f); + fsid = ((u64 *)fhp->fh_export->ex_uuid)[0]; + fsid ^= ((u64 *)fhp->fh_export->ex_uuid)[1]; break; + default: + fsid = (u64)huge_encode_dev(fhp->fh_dentry->d_sb->s_dev); } - return p; -} + p = xdr_encode_hyper(p, fsid); -static __be32 * -encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, - struct kstat *stat) -{ - struct user_namespace *userns = nfsd_user_namespace(rqstp); - *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]); - *p++ = htonl((u32) (stat->mode & S_IALLUGO)); - *p++ = htonl((u32) stat->nlink); - *p++ = htonl((u32) from_kuid_munged(userns, stat->uid)); - *p++ = htonl((u32) from_kgid_munged(userns, stat->gid)); - if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) { - p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN); - } else { - p = xdr_encode_hyper(p, (u64) stat->size); - } - p = xdr_encode_hyper(p, ((u64)stat->blocks) << 9); - *p++ = htonl((u32) MAJOR(stat->rdev)); - *p++ = htonl((u32) MINOR(stat->rdev)); - p = encode_fsid(p, fhp); + /* fileid */ p = xdr_encode_hyper(p, stat->ino); - p = encode_time3(p, &stat->atime); - p = encode_time3(p, &stat->mtime); - p = encode_time3(p, &stat->ctime); - return p; + p = encode_nfstime3(p, &stat->atime); + p = encode_nfstime3(p, &stat->mtime); + encode_nfstime3(p, &stat->ctime); + + return true; } -static __be32 * -encode_saved_post_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) +static bool +svcxdr_encode_wcc_attr(struct xdr_stream *xdr, const struct svc_fh *fhp) { - /* Attributes to follow */ - *p++ = xdr_one; - return encode_fattr3(rqstp, p, fhp, &fhp->fh_post_attr); + __be32 *p; + + p = xdr_reserve_space(xdr, XDR_UNIT * 6); + if (!p) + return false; + p = xdr_encode_hyper(p, (u64)fhp->fh_pre_size); + p = encode_nfstime3(p, &fhp->fh_pre_mtime); + encode_nfstime3(p, &fhp->fh_pre_ctime); + + return true; } -/* - * Encode post-operation attributes. - * The inode may be NULL if the call failed because of a stale file - * handle. In this case, no attributes are returned. - */ -static __be32 * -encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) +static bool +svcxdr_encode_pre_op_attr(struct xdr_stream *xdr, const struct svc_fh *fhp) { - struct dentry *dentry = fhp->fh_dentry; - if (!fhp->fh_no_wcc && dentry && d_really_is_positive(dentry)) { - __be32 err; - struct kstat stat; - - err = fh_getattr(fhp, &stat); - if (!err) { - *p++ = xdr_one; /* attributes follow */ - lease_get_mtime(d_inode(dentry), &stat.mtime); - return encode_fattr3(rqstp, p, fhp, &stat); - } + if (!fhp->fh_pre_saved) { + if (xdr_stream_encode_item_absent(xdr) < 0) + return false; + return true; } - *p++ = xdr_zero; - return p; + + if (xdr_stream_encode_item_present(xdr) < 0) + return false; + return svcxdr_encode_wcc_attr(xdr, fhp); } -/* Helper for NFSv3 ACLs */ -__be32 * -nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) +/** + * svcxdr_encode_post_op_attr - Encode NFSv3 post-op attributes + * @rqstp: Context of a completed RPC transaction + * @xdr: XDR stream + * @fhp: File handle to encode + * + * Return values: + * %false: Send buffer space was exhausted + * %true: Success + */ +bool +svcxdr_encode_post_op_attr(struct svc_rqst *rqstp, struct xdr_stream *xdr, + const struct svc_fh *fhp) { - return encode_post_op_attr(rqstp, p, fhp); + struct dentry *dentry = fhp->fh_dentry; + struct kstat stat; + + /* + * The inode may be NULL if the call failed because of a + * stale file handle. In this case, no attributes are + * returned. + */ + if (fhp->fh_no_wcc || !dentry || !d_really_is_positive(dentry)) + goto no_post_op_attrs; + if (fh_getattr(fhp, &stat) != nfs_ok) + goto no_post_op_attrs; + + if (xdr_stream_encode_item_present(xdr) < 0) + return false; + lease_get_mtime(d_inode(dentry), &stat.mtime); + if (!svcxdr_encode_fattr3(rqstp, xdr, fhp, &stat)) + return false; + + return true; + +no_post_op_attrs: + return xdr_stream_encode_item_absent(xdr) > 0; } /* - * Enocde weak cache consistency data + * Encode weak cache consistency data */ -static __be32 * -encode_wcc_data(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) +static bool +svcxdr_encode_wcc_data(struct svc_rqst *rqstp, struct xdr_stream *xdr, + const struct svc_fh *fhp) { - struct dentry *dentry = fhp->fh_dentry; - - if (dentry && d_really_is_positive(dentry) && fhp->fh_post_saved) { - if (fhp->fh_pre_saved) { - *p++ = xdr_one; - p = xdr_encode_hyper(p, (u64) fhp->fh_pre_size); - p = encode_time3(p, &fhp->fh_pre_mtime); - p = encode_time3(p, &fhp->fh_pre_ctime); - } else { - *p++ = xdr_zero; - } - return encode_saved_post_attr(rqstp, p, fhp); - } - /* no pre- or post-attrs */ - *p++ = xdr_zero; - return encode_post_op_attr(rqstp, p, fhp); + struct dentry *dentry = fhp->fh_dentry; + + if (!dentry || !d_really_is_positive(dentry) || !fhp->fh_post_saved) + goto neither; + + /* before */ + if (!svcxdr_encode_pre_op_attr(xdr, fhp)) + return false; + + /* after */ + if (xdr_stream_encode_item_present(xdr) < 0) + return false; + if (!svcxdr_encode_fattr3(rqstp, xdr, fhp, &fhp->fh_post_attr)) + return false; + + return true; + +neither: + if (xdr_stream_encode_item_absent(xdr) < 0) + return false; + if (!svcxdr_encode_post_op_attr(rqstp, xdr, fhp)) + return false; + + return true; } static bool fs_supports_change_attribute(struct super_block *sb) @@ -713,210 +836,252 @@ nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p) /* GETATTR */ int -nfs3svc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p) +nfs3svc_encode_getattrres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_attrstat *resp = rqstp->rq_resp; - *p++ = resp->status; - if (resp->status == 0) { - lease_get_mtime(d_inode(resp->fh.fh_dentry), - &resp->stat.mtime); - p = encode_fattr3(rqstp, p, &resp->fh, &resp->stat); + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + lease_get_mtime(d_inode(resp->fh.fh_dentry), &resp->stat.mtime); + if (!svcxdr_encode_fattr3(rqstp, xdr, &resp->fh, &resp->stat)) + return 0; + break; } - return xdr_ressize_check(rqstp, p); + + return 1; } /* SETATTR, REMOVE, RMDIR */ int nfs3svc_encode_wccstat(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_attrstat *resp = rqstp->rq_resp; - *p++ = resp->status; - p = encode_wcc_data(rqstp, p, &resp->fh); - return xdr_ressize_check(rqstp, p); + return svcxdr_encode_nfsstat3(xdr, resp->status) && + svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh); } /* LOOKUP */ -int -nfs3svc_encode_diropres(struct svc_rqst *rqstp, __be32 *p) +int nfs3svc_encode_lookupres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_diropres *resp = rqstp->rq_resp; - *p++ = resp->status; - if (resp->status == 0) { - p = encode_fh(p, &resp->fh); - p = encode_post_op_attr(rqstp, p, &resp->fh); + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_nfs_fh3(xdr, &resp->fh)) + return 0; + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) + return 0; + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->dirfh)) + return 0; + break; + default: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->dirfh)) + return 0; } - p = encode_post_op_attr(rqstp, p, &resp->dirfh); - return xdr_ressize_check(rqstp, p); + + return 1; } /* ACCESS */ int nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_accessres *resp = rqstp->rq_resp; - *p++ = resp->status; - p = encode_post_op_attr(rqstp, p, &resp->fh); - if (resp->status == 0) - *p++ = htonl(resp->access); - return xdr_ressize_check(rqstp, p); + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) + return 0; + if (xdr_stream_encode_u32(xdr, resp->access) < 0) + return 0; + break; + default: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) + return 0; + } + + return 1; } /* READLINK */ int nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_readlinkres *resp = rqstp->rq_resp; struct kvec *head = rqstp->rq_res.head; - *p++ = resp->status; - p = encode_post_op_attr(rqstp, p, &resp->fh); - if (resp->status == 0) { - *p++ = htonl(resp->len); - xdr_ressize_check(rqstp, p); - rqstp->rq_res.page_len = resp->len; - if (resp->len & 3) { - /* need to pad the tail */ - rqstp->rq_res.tail[0].iov_base = p; - *p = 0; - rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3); - } - if (svc_encode_result_payload(rqstp, head->iov_len, resp->len)) + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) return 0; - return 1; - } else - return xdr_ressize_check(rqstp, p); + if (xdr_stream_encode_u32(xdr, resp->len) < 0) + return 0; + xdr_write_pages(xdr, resp->pages, 0, resp->len); + if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0) + return 0; + break; + default: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) + return 0; + } + + return 1; } /* READ */ int nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_readres *resp = rqstp->rq_resp; struct kvec *head = rqstp->rq_res.head; - *p++ = resp->status; - p = encode_post_op_attr(rqstp, p, &resp->fh); - if (resp->status == 0) { - *p++ = htonl(resp->count); - *p++ = htonl(resp->eof); - *p++ = htonl(resp->count); /* xdr opaque count */ - xdr_ressize_check(rqstp, p); - /* now update rqstp->rq_res to reflect data as well */ - rqstp->rq_res.page_len = resp->count; - if (resp->count & 3) { - /* need to pad the tail */ - rqstp->rq_res.tail[0].iov_base = p; - *p = 0; - rqstp->rq_res.tail[0].iov_len = 4 - (resp->count & 3); - } - if (svc_encode_result_payload(rqstp, head->iov_len, - resp->count)) + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) return 0; - return 1; - } else - return xdr_ressize_check(rqstp, p); + if (xdr_stream_encode_u32(xdr, resp->count) < 0) + return 0; + if (xdr_stream_encode_bool(xdr, resp->eof) < 0) + return 0; + if (xdr_stream_encode_u32(xdr, resp->count) < 0) + return 0; + xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base, + resp->count); + if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0) + return 0; + break; + default: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) + return 0; + } + + return 1; } /* WRITE */ int nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_writeres *resp = rqstp->rq_resp; - *p++ = resp->status; - p = encode_wcc_data(rqstp, p, &resp->fh); - if (resp->status == 0) { - *p++ = htonl(resp->count); - *p++ = htonl(resp->committed); - *p++ = resp->verf[0]; - *p++ = resp->verf[1]; + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh)) + return 0; + if (xdr_stream_encode_u32(xdr, resp->count) < 0) + return 0; + if (xdr_stream_encode_u32(xdr, resp->committed) < 0) + return 0; + if (!svcxdr_encode_writeverf3(xdr, resp->verf)) + return 0; + break; + default: + if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh)) + return 0; } - return xdr_ressize_check(rqstp, p); + + return 1; } /* CREATE, MKDIR, SYMLINK, MKNOD */ int nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_diropres *resp = rqstp->rq_resp; - *p++ = resp->status; - if (resp->status == 0) { - *p++ = xdr_one; - p = encode_fh(p, &resp->fh); - p = encode_post_op_attr(rqstp, p, &resp->fh); + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_post_op_fh3(xdr, &resp->fh)) + return 0; + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) + return 0; + if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->dirfh)) + return 0; + break; + default: + if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->dirfh)) + return 0; } - p = encode_wcc_data(rqstp, p, &resp->dirfh); - return xdr_ressize_check(rqstp, p); + + return 1; } /* RENAME */ int nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_renameres *resp = rqstp->rq_resp; - *p++ = resp->status; - p = encode_wcc_data(rqstp, p, &resp->ffh); - p = encode_wcc_data(rqstp, p, &resp->tfh); - return xdr_ressize_check(rqstp, p); + return svcxdr_encode_nfsstat3(xdr, resp->status) && + svcxdr_encode_wcc_data(rqstp, xdr, &resp->ffh) && + svcxdr_encode_wcc_data(rqstp, xdr, &resp->tfh); } /* LINK */ int nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_linkres *resp = rqstp->rq_resp; - *p++ = resp->status; - p = encode_post_op_attr(rqstp, p, &resp->fh); - p = encode_wcc_data(rqstp, p, &resp->tfh); - return xdr_ressize_check(rqstp, p); + return svcxdr_encode_nfsstat3(xdr, resp->status) && + svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh) && + svcxdr_encode_wcc_data(rqstp, xdr, &resp->tfh); } /* READDIR */ int nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_readdirres *resp = rqstp->rq_resp; + struct xdr_buf *dirlist = &resp->dirlist; - *p++ = resp->status; - p = encode_post_op_attr(rqstp, p, &resp->fh); - - if (resp->status == 0) { - /* stupid readdir cookie */ - memcpy(p, resp->verf, 8); p += 2; - xdr_ressize_check(rqstp, p); - if (rqstp->rq_res.head[0].iov_len + (2<<2) > PAGE_SIZE) - return 1; /*No room for trailer */ - rqstp->rq_res.page_len = (resp->count) << 2; - - /* add the 'tail' to the end of the 'head' page - page 0. */ - rqstp->rq_res.tail[0].iov_base = p; - *p++ = 0; /* no more entries */ - *p++ = htonl(resp->common.err == nfserr_eof); - rqstp->rq_res.tail[0].iov_len = 2<<2; - return 1; - } else - return xdr_ressize_check(rqstp, p); -} - -static __be32 * -encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, - int namlen, u64 ino) -{ - *p++ = xdr_one; /* mark entry present */ - p = xdr_encode_hyper(p, ino); /* file id */ - p = xdr_encode_array(p, name, namlen);/* name length & name */ - - cd->offset = p; /* remember pointer */ - p = xdr_encode_hyper(p, NFS_OFFSET_MAX);/* offset of next entry */ + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) + return 0; + if (!svcxdr_encode_cookieverf3(xdr, resp->verf)) + return 0; + xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len); + /* no more entries */ + if (xdr_stream_encode_item_absent(xdr) < 0) + return 0; + if (xdr_stream_encode_bool(xdr, resp->common.err == nfserr_eof) < 0) + return 0; + break; + default: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) + return 0; + } - return p; + return 1; } static __be32 @@ -957,267 +1122,327 @@ out: return rv; } -static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen, u64 ino) +/** + * nfs3svc_encode_cookie3 - Encode a directory offset cookie + * @resp: readdir result context + * @offset: offset cookie to encode + * + * The buffer space for the offset cookie has already been reserved + * by svcxdr_encode_entry3_common(). + */ +void nfs3svc_encode_cookie3(struct nfsd3_readdirres *resp, u64 offset) { - struct svc_fh *fh = &cd->scratch; - __be32 err; - - fh_init(fh, NFS3_FHSIZE); - err = compose_entry_fh(cd, fh, name, namlen, ino); - if (err) { - *p++ = 0; - *p++ = 0; - goto out; - } - p = encode_post_op_attr(cd->rqstp, p, fh); - *p++ = xdr_one; /* yes, a file handle follows */ - p = encode_fh(p, fh); -out: - fh_put(fh); - return p; -} + __be64 cookie = cpu_to_be64(offset); -/* - * Encode a directory entry. This one works for both normal readdir - * and readdirplus. - * The normal readdir reply requires 2 (fileid) + 1 (stringlen) - * + string + 2 (cookie) + 1 (next) words, i.e. 6 + strlen. - * - * The readdirplus baggage is 1+21 words for post_op_attr, plus the - * file handle. - */ + if (!resp->cookie_offset) + return; + write_bytes_to_xdr_buf(&resp->dirlist, resp->cookie_offset, &cookie, + sizeof(cookie)); + resp->cookie_offset = 0; +} -#define NFS3_ENTRY_BAGGAGE (2 + 1 + 2 + 1) -#define NFS3_ENTRYPLUS_BAGGAGE (1 + 21 + 1 + (NFS3_FHSIZE >> 2)) -static int -encode_entry(struct readdir_cd *ccd, const char *name, int namlen, - loff_t offset, u64 ino, unsigned int d_type, int plus) +static bool +svcxdr_encode_entry3_common(struct nfsd3_readdirres *resp, const char *name, + int namlen, loff_t offset, u64 ino) { - struct nfsd3_readdirres *cd = container_of(ccd, struct nfsd3_readdirres, - common); - __be32 *p = cd->buffer; - caddr_t curr_page_addr = NULL; - struct page ** page; - int slen; /* string (name) length */ - int elen; /* estimated entry length in words */ - int num_entry_words = 0; /* actual number of words */ - - if (cd->offset) { - u64 offset64 = offset; - - if (unlikely(cd->offset1)) { - /* we ended up with offset on a page boundary */ - *cd->offset = htonl(offset64 >> 32); - *cd->offset1 = htonl(offset64 & 0xffffffff); - cd->offset1 = NULL; - } else { - xdr_encode_hyper(cd->offset, offset64); - } - cd->offset = NULL; - } + struct xdr_buf *dirlist = &resp->dirlist; + struct xdr_stream *xdr = &resp->xdr; - /* - dprintk("encode_entry(%.*s @%ld%s)\n", - namlen, name, (long) offset, plus? " plus" : ""); - */ - - /* truncate filename if too long */ - namlen = min(namlen, NFS3_MAXNAMLEN); + if (xdr_stream_encode_item_present(xdr) < 0) + return false; + /* fileid */ + if (xdr_stream_encode_u64(xdr, ino) < 0) + return false; + /* name */ + if (xdr_stream_encode_opaque(xdr, name, min(namlen, NFS3_MAXNAMLEN)) < 0) + return false; + /* cookie */ + resp->cookie_offset = dirlist->len; + if (xdr_stream_encode_u64(xdr, NFS_OFFSET_MAX) < 0) + return false; - slen = XDR_QUADLEN(namlen); - elen = slen + NFS3_ENTRY_BAGGAGE - + (plus? NFS3_ENTRYPLUS_BAGGAGE : 0); + return true; +} - if (cd->buflen < elen) { - cd->common.err = nfserr_toosmall; - return -EINVAL; - } +/** + * nfs3svc_encode_entry3 - encode one NFSv3 READDIR entry + * @data: directory context + * @name: name of the object to be encoded + * @namlen: length of that name, in bytes + * @offset: the offset of the previous entry + * @ino: the fileid of this entry + * @d_type: unused + * + * Return values: + * %0: Entry was successfully encoded. + * %-EINVAL: An encoding problem occured, secondary status code in resp->common.err + * + * On exit, the following fields are updated: + * - resp->xdr + * - resp->common.err + * - resp->cookie_offset + */ +int nfs3svc_encode_entry3(void *data, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type) +{ + struct readdir_cd *ccd = data; + struct nfsd3_readdirres *resp = container_of(ccd, + struct nfsd3_readdirres, + common); + unsigned int starting_length = resp->dirlist.len; - /* determine which page in rq_respages[] we are currently filling */ - for (page = cd->rqstp->rq_respages + 1; - page < cd->rqstp->rq_next_page; page++) { - curr_page_addr = page_address(*page); + /* The offset cookie for the previous entry */ + nfs3svc_encode_cookie3(resp, offset); - if (((caddr_t)cd->buffer >= curr_page_addr) && - ((caddr_t)cd->buffer < curr_page_addr + PAGE_SIZE)) - break; - } + if (!svcxdr_encode_entry3_common(resp, name, namlen, offset, ino)) + goto out_toosmall; - if ((caddr_t)(cd->buffer + elen) < (curr_page_addr + PAGE_SIZE)) { - /* encode entry in current page */ + xdr_commit_encode(&resp->xdr); + resp->common.err = nfs_ok; + return 0; - p = encode_entry_baggage(cd, p, name, namlen, ino); +out_toosmall: + resp->cookie_offset = 0; + resp->common.err = nfserr_toosmall; + resp->dirlist.len = starting_length; + return -EINVAL; +} - if (plus) - p = encode_entryplus_baggage(cd, p, name, namlen, ino); - num_entry_words = p - cd->buffer; - } else if (*(page+1) != NULL) { - /* temporarily encode entry into next page, then move back to - * current and next page in rq_respages[] */ - __be32 *p1, *tmp; - int len1, len2; +static bool +svcxdr_encode_entry3_plus(struct nfsd3_readdirres *resp, const char *name, + int namlen, u64 ino) +{ + struct xdr_stream *xdr = &resp->xdr; + struct svc_fh *fhp = &resp->scratch; + bool result; - /* grab next page for temporary storage of entry */ - p1 = tmp = page_address(*(page+1)); + result = false; + fh_init(fhp, NFS3_FHSIZE); + if (compose_entry_fh(resp, fhp, name, namlen, ino) != nfs_ok) + goto out_noattrs; - p1 = encode_entry_baggage(cd, p1, name, namlen, ino); + if (!svcxdr_encode_post_op_attr(resp->rqstp, xdr, fhp)) + goto out; + if (!svcxdr_encode_post_op_fh3(xdr, fhp)) + goto out; + result = true; - if (plus) - p1 = encode_entryplus_baggage(cd, p1, name, namlen, ino); +out: + fh_put(fhp); + return result; - /* determine entry word length and lengths to go in pages */ - num_entry_words = p1 - tmp; - len1 = curr_page_addr + PAGE_SIZE - (caddr_t)cd->buffer; - if ((num_entry_words << 2) < len1) { - /* the actual number of words in the entry is less - * than elen and can still fit in the current page - */ - memmove(p, tmp, num_entry_words << 2); - p += num_entry_words; - - /* update offset */ - cd->offset = cd->buffer + (cd->offset - tmp); - } else { - unsigned int offset_r = (cd->offset - tmp) << 2; - - /* update pointer to offset location. - * This is a 64bit quantity, so we need to - * deal with 3 cases: - * - entirely in first page - * - entirely in second page - * - 4 bytes in each page - */ - if (offset_r + 8 <= len1) { - cd->offset = p + (cd->offset - tmp); - } else if (offset_r >= len1) { - cd->offset -= len1 >> 2; - } else { - /* sitting on the fence */ - BUG_ON(offset_r != len1 - 4); - cd->offset = p + (cd->offset - tmp); - cd->offset1 = tmp; - } - - len2 = (num_entry_words << 2) - len1; - - /* move from temp page to current and next pages */ - memmove(p, tmp, len1); - memmove(tmp, (caddr_t)tmp+len1, len2); - - p = tmp + (len2 >> 2); - } - } - else { - cd->common.err = nfserr_toosmall; - return -EINVAL; - } +out_noattrs: + if (xdr_stream_encode_item_absent(xdr) < 0) + return false; + if (xdr_stream_encode_item_absent(xdr) < 0) + return false; + return true; +} - cd->buflen -= num_entry_words; - cd->buffer = p; - cd->common.err = nfs_ok; +/** + * nfs3svc_encode_entryplus3 - encode one NFSv3 READDIRPLUS entry + * @data: directory context + * @name: name of the object to be encoded + * @namlen: length of that name, in bytes + * @offset: the offset of the previous entry + * @ino: the fileid of this entry + * @d_type: unused + * + * Return values: + * %0: Entry was successfully encoded. + * %-EINVAL: An encoding problem occured, secondary status code in resp->common.err + * + * On exit, the following fields are updated: + * - resp->xdr + * - resp->common.err + * - resp->cookie_offset + */ +int nfs3svc_encode_entryplus3(void *data, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type) +{ + struct readdir_cd *ccd = data; + struct nfsd3_readdirres *resp = container_of(ccd, + struct nfsd3_readdirres, + common); + unsigned int starting_length = resp->dirlist.len; + + /* The offset cookie for the previous entry */ + nfs3svc_encode_cookie3(resp, offset); + + if (!svcxdr_encode_entry3_common(resp, name, namlen, offset, ino)) + goto out_toosmall; + if (!svcxdr_encode_entry3_plus(resp, name, namlen, ino)) + goto out_toosmall; + + xdr_commit_encode(&resp->xdr); + resp->common.err = nfs_ok; return 0; +out_toosmall: + resp->cookie_offset = 0; + resp->common.err = nfserr_toosmall; + resp->dirlist.len = starting_length; + return -EINVAL; } -int -nfs3svc_encode_entry(void *cd, const char *name, - int namlen, loff_t offset, u64 ino, unsigned int d_type) +static bool +svcxdr_encode_fsstat3resok(struct xdr_stream *xdr, + const struct nfsd3_fsstatres *resp) { - return encode_entry(cd, name, namlen, offset, ino, d_type, 0); -} + const struct kstatfs *s = &resp->stats; + u64 bs = s->f_bsize; + __be32 *p; -int -nfs3svc_encode_entry_plus(void *cd, const char *name, - int namlen, loff_t offset, u64 ino, - unsigned int d_type) -{ - return encode_entry(cd, name, namlen, offset, ino, d_type, 1); + p = xdr_reserve_space(xdr, XDR_UNIT * 13); + if (!p) + return false; + p = xdr_encode_hyper(p, bs * s->f_blocks); /* total bytes */ + p = xdr_encode_hyper(p, bs * s->f_bfree); /* free bytes */ + p = xdr_encode_hyper(p, bs * s->f_bavail); /* user available bytes */ + p = xdr_encode_hyper(p, s->f_files); /* total inodes */ + p = xdr_encode_hyper(p, s->f_ffree); /* free inodes */ + p = xdr_encode_hyper(p, s->f_ffree); /* user available inodes */ + *p = cpu_to_be32(resp->invarsec); /* mean unchanged time */ + + return true; } /* FSSTAT */ int nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_fsstatres *resp = rqstp->rq_resp; - struct kstatfs *s = &resp->stats; - u64 bs = s->f_bsize; - - *p++ = resp->status; - *p++ = xdr_zero; /* no post_op_attr */ - - if (resp->status == 0) { - p = xdr_encode_hyper(p, bs * s->f_blocks); /* total bytes */ - p = xdr_encode_hyper(p, bs * s->f_bfree); /* free bytes */ - p = xdr_encode_hyper(p, bs * s->f_bavail); /* user available bytes */ - p = xdr_encode_hyper(p, s->f_files); /* total inodes */ - p = xdr_encode_hyper(p, s->f_ffree); /* free inodes */ - p = xdr_encode_hyper(p, s->f_ffree); /* user available inodes */ - *p++ = htonl(resp->invarsec); /* mean unchanged time */ + + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) + return 0; + if (!svcxdr_encode_fsstat3resok(xdr, resp)) + return 0; + break; + default: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) + return 0; } - return xdr_ressize_check(rqstp, p); + + return 1; +} + +static bool +svcxdr_encode_fsinfo3resok(struct xdr_stream *xdr, + const struct nfsd3_fsinfores *resp) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, XDR_UNIT * 12); + if (!p) + return false; + *p++ = cpu_to_be32(resp->f_rtmax); + *p++ = cpu_to_be32(resp->f_rtpref); + *p++ = cpu_to_be32(resp->f_rtmult); + *p++ = cpu_to_be32(resp->f_wtmax); + *p++ = cpu_to_be32(resp->f_wtpref); + *p++ = cpu_to_be32(resp->f_wtmult); + *p++ = cpu_to_be32(resp->f_dtpref); + p = xdr_encode_hyper(p, resp->f_maxfilesize); + p = encode_nfstime3(p, &nfs3svc_time_delta); + *p = cpu_to_be32(resp->f_properties); + + return true; } /* FSINFO */ int nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_fsinfores *resp = rqstp->rq_resp; - *p++ = resp->status; - *p++ = xdr_zero; /* no post_op_attr */ - - if (resp->status == 0) { - *p++ = htonl(resp->f_rtmax); - *p++ = htonl(resp->f_rtpref); - *p++ = htonl(resp->f_rtmult); - *p++ = htonl(resp->f_wtmax); - *p++ = htonl(resp->f_wtpref); - *p++ = htonl(resp->f_wtmult); - *p++ = htonl(resp->f_dtpref); - p = xdr_encode_hyper(p, resp->f_maxfilesize); - *p++ = xdr_one; - *p++ = xdr_zero; - *p++ = htonl(resp->f_properties); + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) + return 0; + if (!svcxdr_encode_fsinfo3resok(xdr, resp)) + return 0; + break; + default: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) + return 0; } - return xdr_ressize_check(rqstp, p); + return 1; +} + +static bool +svcxdr_encode_pathconf3resok(struct xdr_stream *xdr, + const struct nfsd3_pathconfres *resp) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, XDR_UNIT * 6); + if (!p) + return false; + *p++ = cpu_to_be32(resp->p_link_max); + *p++ = cpu_to_be32(resp->p_name_max); + p = xdr_encode_bool(p, resp->p_no_trunc); + p = xdr_encode_bool(p, resp->p_chown_restricted); + p = xdr_encode_bool(p, resp->p_case_insensitive); + xdr_encode_bool(p, resp->p_case_preserving); + + return true; } /* PATHCONF */ int nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_pathconfres *resp = rqstp->rq_resp; - *p++ = resp->status; - *p++ = xdr_zero; /* no post_op_attr */ - - if (resp->status == 0) { - *p++ = htonl(resp->p_link_max); - *p++ = htonl(resp->p_name_max); - *p++ = htonl(resp->p_no_trunc); - *p++ = htonl(resp->p_chown_restricted); - *p++ = htonl(resp->p_case_insensitive); - *p++ = htonl(resp->p_case_preserving); + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) + return 0; + if (!svcxdr_encode_pathconf3resok(xdr, resp)) + return 0; + break; + default: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) + return 0; } - return xdr_ressize_check(rqstp, p); + return 1; } /* COMMIT */ int nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_commitres *resp = rqstp->rq_resp; - *p++ = resp->status; - p = encode_wcc_data(rqstp, p, &resp->fh); - /* Write verifier */ - if (resp->status == 0) { - *p++ = resp->verf[0]; - *p++ = resp->verf[1]; + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh)) + return 0; + if (!svcxdr_encode_writeverf3(xdr, resp->verf)) + return 0; + break; + default: + if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh)) + return 0; } - return xdr_ressize_check(rqstp, p); + + return 1; } /* diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index dd9f38d072dd..daf43b980d4b 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1383,10 +1383,13 @@ static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync) static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy) { ssize_t bytes_copied = 0; - size_t bytes_total = copy->cp_count; + u64 bytes_total = copy->cp_count; u64 src_pos = copy->cp_src_pos; u64 dst_pos = copy->cp_dst_pos; + /* See RFC 7862 p.67: */ + if (bytes_total == 0) + bytes_total = ULLONG_MAX; do { if (kthread_should_stop()) break; @@ -1538,8 +1541,8 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (!nfs4_init_copy_state(nn, copy)) goto out_err; refcount_set(&async_copy->refcount, 1); - memcpy(©->cp_res.cb_stateid, ©->cp_stateid, - sizeof(copy->cp_stateid)); + memcpy(©->cp_res.cb_stateid, ©->cp_stateid.stid, + sizeof(copy->cp_res.cb_stateid)); dup_copy_fields(copy, async_copy); async_copy->copy_task = kthread_create(nfsd4_do_async_copy, async_copy, "%s", "copy thread"); @@ -2262,25 +2265,6 @@ static bool need_wrongsec_check(struct svc_rqst *rqstp) return !(nextd->op_flags & OP_HANDLES_WRONGSEC); } -static void svcxdr_init_encode(struct svc_rqst *rqstp, - struct nfsd4_compoundres *resp) -{ - struct xdr_stream *xdr = &resp->xdr; - struct xdr_buf *buf = &rqstp->rq_res; - struct kvec *head = buf->head; - - xdr->buf = buf; - xdr->iov = head; - xdr->p = head->iov_base + head->iov_len; - xdr->end = head->iov_base + PAGE_SIZE - rqstp->rq_auth_slack; - /* Tail and page_len should be zero at this point: */ - buf->len = buf->head[0].iov_len; - xdr_reset_scratch_buffer(xdr); - xdr->page_ptr = buf->pages - 1; - buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages) - - rqstp->rq_auth_slack; -} - #ifdef CONFIG_NFSD_V4_2_INTER_SSC static void check_if_stalefh_allowed(struct nfsd4_compoundargs *args) @@ -2335,10 +2319,14 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); __be32 status; - svcxdr_init_encode(rqstp, resp); - resp->tagp = resp->xdr.p; + resp->xdr = &rqstp->rq_res_stream; + + /* reserve space for: NFS status code */ + xdr_reserve_space(resp->xdr, XDR_UNIT); + + resp->tagp = resp->xdr->p; /* reserve space for: taglen, tag, and opcnt */ - xdr_reserve_space(&resp->xdr, 8 + args->taglen); + xdr_reserve_space(resp->xdr, XDR_UNIT * 2 + args->taglen); resp->taglen = args->taglen; resp->tag = args->tag; resp->rqstp = rqstp; @@ -2444,7 +2432,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) encode_op: if (op->status == nfserr_replay_me) { op->replay = &cstate->replay_owner->so_replay; - nfsd4_encode_replay(&resp->xdr, op); + nfsd4_encode_replay(resp->xdr, op); status = op->status = op->replay->rp_status; } else { nfsd4_encode_operation(resp, op); diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 891395c6c7d3..6fedc49726bf 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -626,7 +626,7 @@ nfsd4_legacy_tracking_init(struct net *net) status = nfsd4_load_reboot_recovery_data(net); if (status) goto err; - printk("NFSD: Using legacy client tracking operations.\n"); + pr_info("NFSD: Using legacy client tracking operations.\n"); return 0; err: @@ -1028,7 +1028,7 @@ nfsd4_init_cld_pipe(struct net *net) status = __nfsd4_init_cld_pipe(net); if (!status) - printk("NFSD: Using old nfsdcld client tracking operations.\n"); + pr_info("NFSD: Using old nfsdcld client tracking operations.\n"); return status; } @@ -1605,7 +1605,7 @@ nfsd4_cld_tracking_init(struct net *net) nfs4_release_reclaim(nn); goto err_remove; } else - printk("NFSD: Using nfsdcld client tracking operations.\n"); + pr_info("NFSD: Using nfsdcld client tracking operations.\n"); return 0; err_remove: @@ -1864,7 +1864,7 @@ nfsd4_umh_cltrack_init(struct net *net) ret = nfsd4_umh_cltrack_upcall("init", NULL, grace_start, NULL); kfree(grace_start); if (!ret) - printk("NFSD: Using UMH upcall client tracking operations.\n"); + pr_info("NFSD: Using UMH upcall client tracking operations.\n"); return ret; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 97447a64bad0..7698172ac0c7 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -43,6 +43,7 @@ #include <linux/sunrpc/addr.h> #include <linux/jhash.h> #include <linux/string_helpers.h> +#include <linux/fsnotify.h> #include "xdr4.h" #include "xdr4cb.h" #include "vfs.h" @@ -2352,6 +2353,10 @@ static int client_info_show(struct seq_file *m, void *v) memcpy(&clid, &clp->cl_clientid, sizeof(clid)); seq_printf(m, "clientid: 0x%llx\n", clid); seq_printf(m, "address: \"%pISpc\"\n", (struct sockaddr *)&clp->cl_addr); + if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags)) + seq_puts(m, "status: confirmed\n"); + else + seq_puts(m, "status: unconfirmed\n"); seq_printf(m, "name: "); seq_quote_mem(m, clp->cl_name.data, clp->cl_name.len); seq_printf(m, "\nminor version: %d\n", clp->cl_minorversion); @@ -2702,6 +2707,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, int ret; struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct dentry *dentries[ARRAY_SIZE(client_files)]; clp = alloc_client(name); if (clp == NULL) @@ -2721,9 +2727,11 @@ static struct nfs4_client *create_client(struct xdr_netobj name, memcpy(&clp->cl_addr, sa, sizeof(struct sockaddr_storage)); clp->cl_cb_session = NULL; clp->net = net; - clp->cl_nfsd_dentry = nfsd_client_mkdir(nn, &clp->cl_nfsdfs, - clp->cl_clientid.cl_id - nn->clientid_base, - client_files); + clp->cl_nfsd_dentry = nfsd_client_mkdir( + nn, &clp->cl_nfsdfs, + clp->cl_clientid.cl_id - nn->clientid_base, + client_files, dentries); + clp->cl_nfsd_info_dentry = dentries[0]; if (!clp->cl_nfsd_dentry) { free_client(clp); return NULL; @@ -2798,7 +2806,10 @@ move_to_confirmed(struct nfs4_client *clp) list_move(&clp->cl_idhash, &nn->conf_id_hashtbl[idhashval]); rb_erase(&clp->cl_namenode, &nn->unconf_name_tree); add_clp_to_name_tree(clp, &nn->conf_name_tree); - set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags); + if (!test_and_set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags) && + clp->cl_nfsd_dentry && + clp->cl_nfsd_info_dentry) + fsnotify_dentry(clp->cl_nfsd_info_dentry, FS_MODIFY); renew_client_locked(clp); } @@ -2903,7 +2914,7 @@ out_err: static void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) { - struct xdr_buf *buf = resp->xdr.buf; + struct xdr_buf *buf = resp->xdr->buf; struct nfsd4_slot *slot = resp->cstate.slot; unsigned int base; @@ -2973,7 +2984,7 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, struct nfsd4_sequence *seq) { struct nfsd4_slot *slot = resp->cstate.slot; - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; __be32 status; @@ -3708,7 +3719,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { struct nfsd4_sequence *seq = &u->sequence; struct nfsd4_compoundres *resp = rqstp->rq_resp; - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; struct nfsd4_session *session; struct nfs4_client *clp; struct nfsd4_slot *slot; @@ -5338,6 +5349,22 @@ static bool clients_still_reclaiming(struct nfsd_net *nn) return true; } +struct laundry_time { + time64_t cutoff; + time64_t new_timeo; +}; + +static bool state_expired(struct laundry_time *lt, time64_t last_refresh) +{ + time64_t time_remaining; + + if (last_refresh < lt->cutoff) + return true; + time_remaining = last_refresh - lt->cutoff; + lt->new_timeo = min(lt->new_timeo, time_remaining); + return false; +} + static time64_t nfs4_laundromat(struct nfsd_net *nn) { @@ -5347,14 +5374,16 @@ nfs4_laundromat(struct nfsd_net *nn) struct nfs4_ol_stateid *stp; struct nfsd4_blocked_lock *nbl; struct list_head *pos, *next, reaplist; - time64_t cutoff = ktime_get_boottime_seconds() - nn->nfsd4_lease; - time64_t t, new_timeo = nn->nfsd4_lease; + struct laundry_time lt = { + .cutoff = ktime_get_boottime_seconds() - nn->nfsd4_lease, + .new_timeo = nn->nfsd4_lease + }; struct nfs4_cpntf_state *cps; copy_stateid_t *cps_t; int i; if (clients_still_reclaiming(nn)) { - new_timeo = 0; + lt.new_timeo = 0; goto out; } nfsd4_end_grace(nn); @@ -5364,7 +5393,7 @@ nfs4_laundromat(struct nfsd_net *nn) idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) { cps = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid); if (cps->cp_stateid.sc_type == NFS4_COPYNOTIFY_STID && - cps->cpntf_time < cutoff) + state_expired(<, cps->cpntf_time)) _free_cpntf_state_locked(nn, cps); } spin_unlock(&nn->s2s_cp_lock); @@ -5372,11 +5401,8 @@ nfs4_laundromat(struct nfsd_net *nn) spin_lock(&nn->client_lock); list_for_each_safe(pos, next, &nn->client_lru) { clp = list_entry(pos, struct nfs4_client, cl_lru); - if (clp->cl_time > cutoff) { - t = clp->cl_time - cutoff; - new_timeo = min(new_timeo, t); + if (!state_expired(<, clp->cl_time)) break; - } if (mark_client_expired_locked(clp)) { trace_nfsd_clid_expired(&clp->cl_clientid); continue; @@ -5393,11 +5419,8 @@ nfs4_laundromat(struct nfsd_net *nn) spin_lock(&state_lock); list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - if (dp->dl_time > cutoff) { - t = dp->dl_time - cutoff; - new_timeo = min(new_timeo, t); + if (!state_expired(<, dp->dl_time)) break; - } WARN_ON(!unhash_delegation_locked(dp)); list_add(&dp->dl_recall_lru, &reaplist); } @@ -5413,11 +5436,8 @@ nfs4_laundromat(struct nfsd_net *nn) while (!list_empty(&nn->close_lru)) { oo = list_first_entry(&nn->close_lru, struct nfs4_openowner, oo_close_lru); - if (oo->oo_time > cutoff) { - t = oo->oo_time - cutoff; - new_timeo = min(new_timeo, t); + if (!state_expired(<, oo->oo_time)) break; - } list_del_init(&oo->oo_close_lru); stp = oo->oo_last_closed_stid; oo->oo_last_closed_stid = NULL; @@ -5443,11 +5463,8 @@ nfs4_laundromat(struct nfsd_net *nn) while (!list_empty(&nn->blocked_locks_lru)) { nbl = list_first_entry(&nn->blocked_locks_lru, struct nfsd4_blocked_lock, nbl_lru); - if (nbl->nbl_time > cutoff) { - t = nbl->nbl_time - cutoff; - new_timeo = min(new_timeo, t); + if (!state_expired(<, nbl->nbl_time)) break; - } list_move(&nbl->nbl_lru, &reaplist); list_del_init(&nbl->nbl_list); } @@ -5460,8 +5477,7 @@ nfs4_laundromat(struct nfsd_net *nn) free_blocked_lock(nbl); } out: - new_timeo = max_t(time64_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); - return new_timeo; + return max_t(time64_t, lt.new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); } static struct workqueue_struct *laundry_wq; @@ -7321,14 +7337,9 @@ nfs4_state_start_net(struct net *net) struct nfsd_net *nn = net_generic(net, nfsd_net_id); int ret; - ret = get_nfsdfs(net); - if (ret) - return ret; ret = nfs4_state_create_net(net); - if (ret) { - mntput(nn->nfsd_mnt); + if (ret) return ret; - } locks_start_grace(net, &nn->nfsd4_manager); nfsd4_client_tracking_init(net); if (nn->track_reclaim_completes && nn->reclaim_str_hashtbl_size == 0) @@ -7398,7 +7409,6 @@ nfs4_state_shutdown_net(struct net *net) nfsd4_client_tracking_exit(net); nfs4_state_destroy_net(net); - mntput(nn->nfsd_mnt); } void diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index eaaa1605b5b5..e0f06d3cbd44 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3581,7 +3581,7 @@ nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid) static __be32 nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 8); @@ -3594,7 +3594,7 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_bind_conn_to_session *bcts) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 8); @@ -3611,7 +3611,7 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, static __be32 nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_stateid(xdr, &close->cl_stateid); } @@ -3620,7 +3620,7 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c static __be32 nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); @@ -3634,7 +3634,7 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ static __be32 nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 20); @@ -3649,7 +3649,7 @@ static __be32 nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getattr *getattr) { struct svc_fh *fhp = getattr->ga_fhp; - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_fattr(xdr, fhp, fhp->fh_export, fhp->fh_dentry, getattr->ga_bmval, resp->rqstp, 0); @@ -3658,7 +3658,7 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 static __be32 nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh **fhpp) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; struct svc_fh *fhp = *fhpp; unsigned int len; __be32 *p; @@ -3713,7 +3713,7 @@ again: static __be32 nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; if (!nfserr) nfserr = nfsd4_encode_stateid(xdr, &lock->lk_resp_stateid); @@ -3726,7 +3726,7 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo static __be32 nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; if (nfserr == nfserr_denied) nfsd4_encode_lock_denied(xdr, &lockt->lt_denied); @@ -3736,7 +3736,7 @@ nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l static __be32 nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_stateid(xdr, &locku->lu_stateid); } @@ -3745,7 +3745,7 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l static __be32 nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 20); @@ -3759,7 +3759,7 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li static __be32 nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; nfserr = nfsd4_encode_stateid(xdr, &open->op_stateid); @@ -3853,7 +3853,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op static __be32 nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_stateid(xdr, &oc->oc_resp_stateid); } @@ -3861,7 +3861,7 @@ nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct static __be32 nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_stateid(xdr, &od->od_stateid); } @@ -3871,7 +3871,7 @@ static __be32 nfsd4_encode_splice_read( struct nfsd4_read *read, struct file *file, unsigned long maxcount) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; struct xdr_buf *buf = xdr->buf; int status, space_left; u32 eof; @@ -3937,7 +3937,7 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, struct nfsd4_read *read, struct file *file, unsigned long maxcount) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; u32 eof; int starting_len = xdr->buf->len - 8; __be32 nfserr; @@ -3976,7 +3976,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_read *read) { unsigned long maxcount; - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; struct file *file; int starting_len = xdr->buf->len; __be32 *p; @@ -3990,7 +3990,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)); return nfserr_resource; } - if (resp->xdr.buf->page_len && + if (resp->xdr->buf->page_len && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) { WARN_ON_ONCE(1); return nfserr_resource; @@ -4020,7 +4020,7 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd int maxcount; __be32 wire_count; int zero = 0; - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; int length_offset = xdr->buf->len; int status; __be32 *p; @@ -4072,7 +4072,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 int bytes_left; loff_t offset; __be64 wire_offset; - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; int starting_len = xdr->buf->len; __be32 *p; @@ -4083,8 +4083,8 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */ *p++ = cpu_to_be32(0); *p++ = cpu_to_be32(0); - resp->xdr.buf->head[0].iov_len = ((char *)resp->xdr.p) - - (char *)resp->xdr.buf->head[0].iov_base; + xdr->buf->head[0].iov_len = (char *)xdr->p - + (char *)xdr->buf->head[0].iov_base; /* * Number of bytes left for directory entries allowing for the @@ -4159,7 +4159,7 @@ err_no_verf: static __be32 nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 20); @@ -4172,7 +4172,7 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ static __be32 nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 40); @@ -4255,7 +4255,7 @@ static __be32 nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_secinfo *secinfo) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; return nfsd4_do_encode_secinfo(xdr, secinfo->si_exp); } @@ -4264,7 +4264,7 @@ static __be32 nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_secinfo_no_name *secinfo) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; return nfsd4_do_encode_secinfo(xdr, secinfo->sin_exp); } @@ -4276,7 +4276,7 @@ nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 16); @@ -4300,7 +4300,7 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 static __be32 nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; if (!nfserr) { @@ -4324,7 +4324,7 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n static __be32 nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 16); @@ -4341,7 +4341,7 @@ static __be32 nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_exchange_id *exid) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; char *major_id; char *server_scope; @@ -4419,7 +4419,7 @@ static __be32 nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create_session *sess) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 24); @@ -4472,7 +4472,7 @@ static __be32 nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_sequence *seq) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 20); @@ -4495,7 +4495,7 @@ static __be32 nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_test_stateid *test_stateid) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; struct nfsd4_test_stateid_id *stateid, *next; __be32 *p; @@ -4516,7 +4516,7 @@ static __be32 nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getdeviceinfo *gdev) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; const struct nfsd4_layout_ops *ops; u32 starting_len = xdr->buf->len, needed_len; __be32 *p; @@ -4572,7 +4572,7 @@ static __be32 nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_layoutget *lgp) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; const struct nfsd4_layout_ops *ops; __be32 *p; @@ -4599,7 +4599,7 @@ static __be32 nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_layoutcommit *lcp) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 4); @@ -4620,7 +4620,7 @@ static __be32 nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_layoutreturn *lrp) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 4); @@ -4638,7 +4638,7 @@ nfsd42_encode_write_res(struct nfsd4_compoundres *resp, struct nfsd42_write_res *write, bool sync) { __be32 *p; - p = xdr_reserve_space(&resp->xdr, 4); + p = xdr_reserve_space(resp->xdr, 4); if (!p) return nfserr_resource; @@ -4647,11 +4647,11 @@ nfsd42_encode_write_res(struct nfsd4_compoundres *resp, else { __be32 nfserr; *p++ = cpu_to_be32(1); - nfserr = nfsd4_encode_stateid(&resp->xdr, &write->cb_stateid); + nfserr = nfsd4_encode_stateid(resp->xdr, &write->cb_stateid); if (nfserr) return nfserr; } - p = xdr_reserve_space(&resp->xdr, 8 + 4 + NFS4_VERIFIER_SIZE); + p = xdr_reserve_space(resp->xdr, 8 + 4 + NFS4_VERIFIER_SIZE); if (!p) return nfserr_resource; @@ -4665,7 +4665,7 @@ nfsd42_encode_write_res(struct nfsd4_compoundres *resp, static __be32 nfsd42_encode_nl4_server(struct nfsd4_compoundres *resp, struct nl4_server *ns) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; struct nfs42_netaddr *addr; __be32 *p; @@ -4713,7 +4713,7 @@ nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr, if (nfserr) return nfserr; - p = xdr_reserve_space(&resp->xdr, 4 + 4); + p = xdr_reserve_space(resp->xdr, 4 + 4); *p++ = xdr_one; /* cr_consecutive */ *p++ = cpu_to_be32(copy->cp_synchronous); return 0; @@ -4723,7 +4723,7 @@ static __be32 nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_offload_status *os) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 8 + 4); @@ -4740,7 +4740,7 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, unsigned long *maxcount, u32 *eof, loff_t *pos) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; struct file *file = read->rd_nf->nf_file; int starting_len = xdr->buf->len; loff_t hole_pos; @@ -4799,7 +4799,7 @@ nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp, count = data_pos - read->rd_offset; /* Content type, offset, byte count */ - p = xdr_reserve_space(&resp->xdr, 4 + 8 + 8); + p = xdr_reserve_space(resp->xdr, 4 + 8 + 8); if (!p) return nfserr_resource; @@ -4817,7 +4817,7 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_read *read) { unsigned long maxcount, count; - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; struct file *file; int starting_len = xdr->buf->len; int last_segment = xdr->buf->len; @@ -4888,7 +4888,7 @@ static __be32 nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_copy_notify *cn) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; if (nfserr) @@ -4924,7 +4924,7 @@ nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, { __be32 *p; - p = xdr_reserve_space(&resp->xdr, 4 + 8); + p = xdr_reserve_space(resp->xdr, 4 + 8); *p++ = cpu_to_be32(seek->seek_eof); p = xdr_encode_hyper(p, seek->seek_pos); @@ -4985,7 +4985,7 @@ static __be32 nfsd4_encode_getxattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getxattr *getxattr) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p, err; p = xdr_reserve_space(xdr, 4); @@ -5009,7 +5009,7 @@ static __be32 nfsd4_encode_setxattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setxattr *setxattr) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 20); @@ -5050,7 +5050,7 @@ static __be32 nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_listxattrs *listxattrs) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; u32 cookie_offset, count_offset, eof; u32 left, xdrleft, slen, count; u32 xdrlen, offset; @@ -5161,7 +5161,7 @@ static __be32 nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_removexattr *removexattr) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 20); @@ -5301,7 +5301,7 @@ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 respsize) void nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; struct nfs4_stateowner *so = resp->cstate.replay_owner; struct svc_rqst *rqstp = resp->rqstp; const struct nfsd4_operation *opdesc = op->opdesc; @@ -5430,14 +5430,14 @@ int nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd4_compoundres *resp = rqstp->rq_resp; - struct xdr_buf *buf = resp->xdr.buf; + struct xdr_buf *buf = resp->xdr->buf; WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len + buf->tail[0].iov_len); *p = resp->cstate.status; - rqstp->rq_next_page = resp->xdr.page_ptr + 1; + rqstp->rq_next_page = resp->xdr->page_ptr + 1; p = resp->tagp; *p++ = htonl(resp->taglen); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index ef86ed23af82..853bf50a2a9b 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1266,7 +1266,8 @@ static void nfsdfs_remove_files(struct dentry *root) /* XXX: cut'n'paste from simple_fill_super; figure out if we could share * code instead. */ static int nfsdfs_create_files(struct dentry *root, - const struct tree_descr *files) + const struct tree_descr *files, + struct dentry **fdentries) { struct inode *dir = d_inode(root); struct inode *inode; @@ -1275,8 +1276,6 @@ static int nfsdfs_create_files(struct dentry *root, inode_lock(dir); for (i = 0; files->name && files->name[0]; i++, files++) { - if (!files->name) - continue; dentry = d_alloc_name(root, files->name); if (!dentry) goto out; @@ -1290,6 +1289,8 @@ static int nfsdfs_create_files(struct dentry *root, inode->i_private = __get_nfsdfs_client(dir); d_add(dentry, inode); fsnotify_create(dir, dentry); + if (fdentries) + fdentries[i] = dentry; } inode_unlock(dir); return 0; @@ -1301,8 +1302,9 @@ out: /* on success, returns positive number unique to that client. */ struct dentry *nfsd_client_mkdir(struct nfsd_net *nn, - struct nfsdfs_client *ncl, u32 id, - const struct tree_descr *files) + struct nfsdfs_client *ncl, u32 id, + const struct tree_descr *files, + struct dentry **fdentries) { struct dentry *dentry; char name[11]; @@ -1313,7 +1315,7 @@ struct dentry *nfsd_client_mkdir(struct nfsd_net *nn, dentry = nfsd_mkdir(nn->nfsd_client_dir, ncl, name); if (IS_ERR(dentry)) /* XXX: tossing errors? */ return NULL; - ret = nfsdfs_create_files(dentry, files); + ret = nfsdfs_create_files(dentry, files, fdentries); if (ret) { nfsd_client_rmdir(dentry); return NULL; @@ -1416,6 +1418,8 @@ static void nfsd_umount(struct super_block *sb) { struct net *net = sb->s_fs_info; + nfsd_shutdown_threads(net); + kill_litter_super(sb); put_net(net); } @@ -1428,18 +1432,6 @@ static struct file_system_type nfsd_fs_type = { }; MODULE_ALIAS_FS("nfsd"); -int get_nfsdfs(struct net *net) -{ - struct nfsd_net *nn = net_generic(net, nfsd_net_id); - struct vfsmount *mnt; - - mnt = vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL); - if (IS_ERR(mnt)) - return PTR_ERR(mnt); - nn->nfsd_mnt = mnt; - return 0; -} - #ifdef CONFIG_PROC_FS static int create_proc_exports_entry(void) { diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 8bdc37aa2c2e..14dbfa75059d 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -93,13 +93,12 @@ int nfsd_get_nrthreads(int n, int *, struct net *); int nfsd_set_nrthreads(int n, int *, struct net *); int nfsd_pool_stats_open(struct inode *, struct file *); int nfsd_pool_stats_release(struct inode *, struct file *); +void nfsd_shutdown_threads(struct net *net); void nfsd_destroy(struct net *net); bool i_am_nfsd(void); -int get_nfsdfs(struct net *); - struct nfsdfs_client { struct kref cl_ref; void (*cl_release)(struct kref *kref); @@ -107,7 +106,9 @@ struct nfsdfs_client { struct nfsdfs_client *get_nfsdfs_client(struct inode *); struct dentry *nfsd_client_mkdir(struct nfsd_net *nn, - struct nfsdfs_client *ncl, u32 id, const struct tree_descr *); + struct nfsdfs_client *ncl, u32 id, + const struct tree_descr *, + struct dentry **fdentries); void nfsd_client_rmdir(struct dentry *dentry); diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 10b44421eace..c475d2271f9c 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -711,7 +711,7 @@ char * SVCFH_fmt(struct svc_fh *fhp) return buf; } -enum fsid_source fsid_source(struct svc_fh *fhp) +enum fsid_source fsid_source(const struct svc_fh *fhp) { if (fhp->fh_handle.fh_version != 1) return FSIDSOURCE_DEV; diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index f58933519f38..aff2cda5c6c3 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -82,7 +82,7 @@ enum fsid_source { FSIDSOURCE_FSID, FSIDSOURCE_UUID, }; -extern enum fsid_source fsid_source(struct svc_fh *fhp); +extern enum fsid_source fsid_source(const struct svc_fh *fhp); /* diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index a8d5449dd0e9..c2cd2984e41d 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -151,13 +151,14 @@ nfsd_proc_readlink(struct svc_rqst *rqstp) { struct nfsd_fhandle *argp = rqstp->rq_argp; struct nfsd_readlinkres *resp = rqstp->rq_resp; - char *buffer = page_address(*(rqstp->rq_next_page++)); dprintk("nfsd: READLINK %s\n", SVCFH_fmt(&argp->fh)); /* Read the symlink. */ resp->len = NFS_MAXPATHLEN; - resp->status = nfsd_readlink(rqstp, &argp->fh, buffer, &resp->len); + resp->page = *(rqstp->rq_next_page++); + resp->status = nfsd_readlink(rqstp, &argp->fh, + page_address(resp->page), &resp->len); fh_put(&argp->fh); return rpc_success; @@ -184,6 +185,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) v = 0; len = argp->count; + resp->pages = rqstp->rq_next_page; while (len > 0) { struct page *page = *(rqstp->rq_next_page++); @@ -557,14 +559,27 @@ static void nfsd_init_dirlist_pages(struct svc_rqst *rqstp, struct nfsd_readdirres *resp, int count) { + struct xdr_buf *buf = &resp->dirlist; + struct xdr_stream *xdr = &resp->xdr; + count = min_t(u32, count, PAGE_SIZE); - /* Convert byte count to number of words (i.e. >> 2), - * and reserve room for the NULL ptr & eof flag (-2 words) */ - resp->buflen = (count >> 2) - 2; + memset(buf, 0, sizeof(*buf)); - resp->buffer = page_address(*rqstp->rq_next_page); + /* Reserve room for the NULL ptr & eof flag (-2 words) */ + buf->buflen = count - sizeof(__be32) * 2; + buf->pages = rqstp->rq_next_page; rqstp->rq_next_page++; + + /* This is xdr_init_encode(), but it assumes that + * the head kvec has already been consumed. */ + xdr_set_scratch_buffer(xdr, NULL, 0); + xdr->buf = buf; + xdr->page_ptr = buf->pages; + xdr->iov = NULL; + xdr->p = page_address(*buf->pages); + xdr->end = xdr->p + (PAGE_SIZE >> 2); + xdr->rqst = NULL; } /* @@ -576,25 +591,19 @@ nfsd_proc_readdir(struct svc_rqst *rqstp) struct nfsd_readdirargs *argp = rqstp->rq_argp; struct nfsd_readdirres *resp = rqstp->rq_resp; loff_t offset; - __be32 *buffer; dprintk("nfsd: READDIR %s %d bytes at %d\n", SVCFH_fmt(&argp->fh), argp->count, argp->cookie); nfsd_init_dirlist_pages(rqstp, resp, argp->count); - buffer = resp->buffer; - resp->offset = NULL; resp->common.err = nfs_ok; - /* Read directory and encode entries on the fly */ + resp->cookie_offset = 0; offset = argp->cookie; resp->status = nfsd_readdir(rqstp, &argp->fh, &offset, &resp->common, nfssvc_encode_entry); - - resp->count = resp->buffer - buffer; - if (resp->offset) - *resp->offset = htonl(offset); + nfssvc_encode_nfscookie(resp, offset); fh_put(&argp->fh); return rpc_success; @@ -640,7 +649,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { [NFSPROC_GETATTR] = { .pc_func = nfsd_proc_getattr, .pc_decode = nfssvc_decode_fhandleargs, - .pc_encode = nfssvc_encode_attrstat, + .pc_encode = nfssvc_encode_attrstatres, .pc_release = nfssvc_release_attrstat, .pc_argsize = sizeof(struct nfsd_fhandle), .pc_ressize = sizeof(struct nfsd_attrstat), @@ -651,7 +660,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { [NFSPROC_SETATTR] = { .pc_func = nfsd_proc_setattr, .pc_decode = nfssvc_decode_sattrargs, - .pc_encode = nfssvc_encode_attrstat, + .pc_encode = nfssvc_encode_attrstatres, .pc_release = nfssvc_release_attrstat, .pc_argsize = sizeof(struct nfsd_sattrargs), .pc_ressize = sizeof(struct nfsd_attrstat), @@ -714,7 +723,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { [NFSPROC_WRITE] = { .pc_func = nfsd_proc_write, .pc_decode = nfssvc_decode_writeargs, - .pc_encode = nfssvc_encode_attrstat, + .pc_encode = nfssvc_encode_attrstatres, .pc_release = nfssvc_release_attrstat, .pc_argsize = sizeof(struct nfsd_writeargs), .pc_ressize = sizeof(struct nfsd_attrstat), @@ -736,7 +745,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { [NFSPROC_REMOVE] = { .pc_func = nfsd_proc_remove, .pc_decode = nfssvc_decode_diropargs, - .pc_encode = nfssvc_encode_stat, + .pc_encode = nfssvc_encode_statres, .pc_argsize = sizeof(struct nfsd_diropargs), .pc_ressize = sizeof(struct nfsd_stat), .pc_cachetype = RC_REPLSTAT, @@ -746,7 +755,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { [NFSPROC_RENAME] = { .pc_func = nfsd_proc_rename, .pc_decode = nfssvc_decode_renameargs, - .pc_encode = nfssvc_encode_stat, + .pc_encode = nfssvc_encode_statres, .pc_argsize = sizeof(struct nfsd_renameargs), .pc_ressize = sizeof(struct nfsd_stat), .pc_cachetype = RC_REPLSTAT, @@ -756,7 +765,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { [NFSPROC_LINK] = { .pc_func = nfsd_proc_link, .pc_decode = nfssvc_decode_linkargs, - .pc_encode = nfssvc_encode_stat, + .pc_encode = nfssvc_encode_statres, .pc_argsize = sizeof(struct nfsd_linkargs), .pc_ressize = sizeof(struct nfsd_stat), .pc_cachetype = RC_REPLSTAT, @@ -766,7 +775,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { [NFSPROC_SYMLINK] = { .pc_func = nfsd_proc_symlink, .pc_decode = nfssvc_decode_symlinkargs, - .pc_encode = nfssvc_encode_stat, + .pc_encode = nfssvc_encode_statres, .pc_argsize = sizeof(struct nfsd_symlinkargs), .pc_ressize = sizeof(struct nfsd_stat), .pc_cachetype = RC_REPLSTAT, @@ -787,7 +796,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { [NFSPROC_RMDIR] = { .pc_func = nfsd_proc_rmdir, .pc_decode = nfssvc_decode_diropargs, - .pc_encode = nfssvc_encode_stat, + .pc_encode = nfssvc_encode_statres, .pc_argsize = sizeof(struct nfsd_diropargs), .pc_ressize = sizeof(struct nfsd_stat), .pc_cachetype = RC_REPLSTAT, diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 6de406322106..82ba034fa579 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -84,7 +84,7 @@ DEFINE_MUTEX(nfsd_mutex); * version 4.1 DRC caches. * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage. */ -spinlock_t nfsd_drc_lock; +DEFINE_SPINLOCK(nfsd_drc_lock); unsigned long nfsd_drc_max_mem; unsigned long nfsd_drc_mem_used; @@ -563,7 +563,6 @@ static void set_max_drc(void) nfsd_drc_max_mem = (nr_free_buffer_pages() >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE; nfsd_drc_mem_used = 0; - spin_lock_init(&nfsd_drc_lock); dprintk("%s nfsd_drc_max_mem %lu \n", __func__, nfsd_drc_max_mem); } @@ -596,6 +595,37 @@ static const struct svc_serv_ops nfsd_thread_sv_ops = { .svo_module = THIS_MODULE, }; +static void nfsd_complete_shutdown(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + WARN_ON(!mutex_is_locked(&nfsd_mutex)); + + nn->nfsd_serv = NULL; + complete(&nn->nfsd_shutdown_complete); +} + +void nfsd_shutdown_threads(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct svc_serv *serv; + + mutex_lock(&nfsd_mutex); + serv = nn->nfsd_serv; + if (serv == NULL) { + mutex_unlock(&nfsd_mutex); + return; + } + + svc_get(serv); + /* Kill outstanding nfsd threads */ + serv->sv_ops->svo_setup(serv, NULL, 0); + nfsd_destroy(net); + mutex_unlock(&nfsd_mutex); + /* Wait for shutdown of nfsd_serv to complete */ + wait_for_completion(&nn->nfsd_shutdown_complete); +} + bool i_am_nfsd(void) { return kthread_func(current) == nfsd; @@ -618,11 +648,13 @@ int nfsd_create_serv(struct net *net) &nfsd_thread_sv_ops); if (nn->nfsd_serv == NULL) return -ENOMEM; + init_completion(&nn->nfsd_shutdown_complete); nn->nfsd_serv->sv_maxconn = nn->max_connections; error = svc_bind(nn->nfsd_serv, net); if (error < 0) { svc_destroy(nn->nfsd_serv); + nfsd_complete_shutdown(net); return error; } @@ -671,7 +703,7 @@ void nfsd_destroy(struct net *net) svc_shutdown_net(nn->nfsd_serv, net); svc_destroy(nn->nfsd_serv); if (destroy) - nn->nfsd_serv = NULL; + nfsd_complete_shutdown(net); } int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) @@ -997,7 +1029,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) * NFSv4 does some encoding while processing */ p = resv->iov_base + resv->iov_len; - resv->iov_len += sizeof(__be32); + svcxdr_init_encode(rqstp); *statp = proc->pc_func(rqstp); if (*statp == rpc_drop_reply || test_bit(RQ_DROPME, &rqstp->rq_flags)) @@ -1052,7 +1084,7 @@ int nfssvc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p) */ int nfssvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p) { - return xdr_ressize_check(rqstp, p); + return 1; } int nfsd_pool_stats_open(struct inode *inode, struct file *file) diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 5d79ef6a0c7f..a06c05fe3b42 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -9,12 +9,10 @@ #include "xdr.h" #include "auth.h" -#define NFSDDBG_FACILITY NFSDDBG_XDR - /* * Mapping of S_IF* types to NFS file types */ -static u32 nfs_ftypes[] = { +static const u32 nfs_ftypes[] = { NFNON, NFCHR, NFCHR, NFBAD, NFDIR, NFBAD, NFBLK, NFBAD, NFREG, NFBAD, NFLNK, NFBAD, @@ -27,6 +25,28 @@ static u32 nfs_ftypes[] = { */ /** + * svcxdr_encode_stat - Encode an NFSv2 status code + * @xdr: XDR stream + * @status: status value to encode + * + * Return values: + * %false: Send buffer space was exhausted + * %true: Success + */ +bool +svcxdr_encode_stat(struct xdr_stream *xdr, __be32 status) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, sizeof(status)); + if (!p) + return false; + *p = status; + + return true; +} + +/** * svcxdr_decode_fhandle - Decode an NFSv2 file handle * @xdr: XDR stream positioned at an encoded NFSv2 FH * @fhp: OUT: filled-in server file handle @@ -50,11 +70,28 @@ svcxdr_decode_fhandle(struct xdr_stream *xdr, struct svc_fh *fhp) return true; } -static __be32 * -encode_fh(__be32 *p, struct svc_fh *fhp) +static bool +svcxdr_encode_fhandle(struct xdr_stream *xdr, const struct svc_fh *fhp) { + __be32 *p; + + p = xdr_reserve_space(xdr, NFS_FHSIZE); + if (!p) + return false; memcpy(p, &fhp->fh_handle.fh_base, NFS_FHSIZE); - return p + (NFS_FHSIZE>> 2); + + return true; +} + +static __be32 * +encode_timeval(__be32 *p, const struct timespec64 *time) +{ + *p++ = cpu_to_be32((u32)time->tv_sec); + if (time->tv_nsec) + *p++ = cpu_to_be32(time->tv_nsec / NSEC_PER_USEC); + else + *p++ = xdr_zero; + return p; } static bool @@ -162,68 +199,73 @@ svcxdr_decode_sattr(struct svc_rqst *rqstp, struct xdr_stream *xdr, return true; } -static __be32 * -encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, - struct kstat *stat) +/** + * svcxdr_encode_fattr - Encode NFSv2 file attributes + * @rqstp: Context of a completed RPC transaction + * @xdr: XDR stream + * @fhp: File handle to encode + * @stat: Attributes to encode + * + * Return values: + * %false: Send buffer space was exhausted + * %true: Success + */ +bool +svcxdr_encode_fattr(struct svc_rqst *rqstp, struct xdr_stream *xdr, + const struct svc_fh *fhp, const struct kstat *stat) { struct user_namespace *userns = nfsd_user_namespace(rqstp); - struct dentry *dentry = fhp->fh_dentry; - int type; + struct dentry *dentry = fhp->fh_dentry; + int type = stat->mode & S_IFMT; struct timespec64 time; - u32 f; + __be32 *p; + u32 fsid; - type = (stat->mode & S_IFMT); + p = xdr_reserve_space(xdr, XDR_UNIT * 17); + if (!p) + return false; - *p++ = htonl(nfs_ftypes[type >> 12]); - *p++ = htonl((u32) stat->mode); - *p++ = htonl((u32) stat->nlink); - *p++ = htonl((u32) from_kuid_munged(userns, stat->uid)); - *p++ = htonl((u32) from_kgid_munged(userns, stat->gid)); + *p++ = cpu_to_be32(nfs_ftypes[type >> 12]); + *p++ = cpu_to_be32((u32)stat->mode); + *p++ = cpu_to_be32((u32)stat->nlink); + *p++ = cpu_to_be32((u32)from_kuid_munged(userns, stat->uid)); + *p++ = cpu_to_be32((u32)from_kgid_munged(userns, stat->gid)); - if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) { - *p++ = htonl(NFS_MAXPATHLEN); - } else { - *p++ = htonl((u32) stat->size); - } - *p++ = htonl((u32) stat->blksize); + if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) + *p++ = cpu_to_be32(NFS_MAXPATHLEN); + else + *p++ = cpu_to_be32((u32) stat->size); + *p++ = cpu_to_be32((u32) stat->blksize); if (S_ISCHR(type) || S_ISBLK(type)) - *p++ = htonl(new_encode_dev(stat->rdev)); + *p++ = cpu_to_be32(new_encode_dev(stat->rdev)); else - *p++ = htonl(0xffffffff); - *p++ = htonl((u32) stat->blocks); + *p++ = cpu_to_be32(0xffffffff); + *p++ = cpu_to_be32((u32)stat->blocks); + switch (fsid_source(fhp)) { - default: - case FSIDSOURCE_DEV: - *p++ = htonl(new_encode_dev(stat->dev)); - break; case FSIDSOURCE_FSID: - *p++ = htonl((u32) fhp->fh_export->ex_fsid); + fsid = (u32)fhp->fh_export->ex_fsid; break; case FSIDSOURCE_UUID: - f = ((u32*)fhp->fh_export->ex_uuid)[0]; - f ^= ((u32*)fhp->fh_export->ex_uuid)[1]; - f ^= ((u32*)fhp->fh_export->ex_uuid)[2]; - f ^= ((u32*)fhp->fh_export->ex_uuid)[3]; - *p++ = htonl(f); + fsid = ((u32 *)fhp->fh_export->ex_uuid)[0]; + fsid ^= ((u32 *)fhp->fh_export->ex_uuid)[1]; + fsid ^= ((u32 *)fhp->fh_export->ex_uuid)[2]; + fsid ^= ((u32 *)fhp->fh_export->ex_uuid)[3]; + break; + default: + fsid = new_encode_dev(stat->dev); break; } - *p++ = htonl((u32) stat->ino); - *p++ = htonl((u32) stat->atime.tv_sec); - *p++ = htonl(stat->atime.tv_nsec ? stat->atime.tv_nsec / 1000 : 0); - time = stat->mtime; - lease_get_mtime(d_inode(dentry), &time); - *p++ = htonl((u32) time.tv_sec); - *p++ = htonl(time.tv_nsec ? time.tv_nsec / 1000 : 0); - *p++ = htonl((u32) stat->ctime.tv_sec); - *p++ = htonl(stat->ctime.tv_nsec ? stat->ctime.tv_nsec / 1000 : 0); + *p++ = cpu_to_be32(fsid); - return p; -} + *p++ = cpu_to_be32((u32)stat->ino); + p = encode_timeval(p, &stat->atime); + time = stat->mtime; + lease_get_mtime(d_inode(dentry), &time); + p = encode_timeval(p, &time); + encode_timeval(p, &stat->ctime); -/* Helper function for NFSv2 ACL code */ -__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat) -{ - return encode_fattr(rqstp, p, fhp, stat); + return true; } /* @@ -390,106 +432,118 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p) */ int -nfssvc_encode_stat(struct svc_rqst *rqstp, __be32 *p) +nfssvc_encode_statres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_stat *resp = rqstp->rq_resp; - *p++ = resp->status; - return xdr_ressize_check(rqstp, p); + return svcxdr_encode_stat(xdr, resp->status); } int -nfssvc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p) +nfssvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_attrstat *resp = rqstp->rq_resp; - *p++ = resp->status; - if (resp->status != nfs_ok) - goto out; - p = encode_fattr(rqstp, p, &resp->fh, &resp->stat); -out: - return xdr_ressize_check(rqstp, p); + if (!svcxdr_encode_stat(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) + return 0; + break; + } + + return 1; } int nfssvc_encode_diropres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_diropres *resp = rqstp->rq_resp; - *p++ = resp->status; - if (resp->status != nfs_ok) - goto out; - p = encode_fh(p, &resp->fh); - p = encode_fattr(rqstp, p, &resp->fh, &resp->stat); -out: - return xdr_ressize_check(rqstp, p); + if (!svcxdr_encode_stat(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_fhandle(xdr, &resp->fh)) + return 0; + if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) + return 0; + break; + } + + return 1; } int nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_readlinkres *resp = rqstp->rq_resp; struct kvec *head = rqstp->rq_res.head; - *p++ = resp->status; - if (resp->status != nfs_ok) - return xdr_ressize_check(rqstp, p); - - *p++ = htonl(resp->len); - xdr_ressize_check(rqstp, p); - rqstp->rq_res.page_len = resp->len; - if (resp->len & 3) { - /* need to pad the tail */ - rqstp->rq_res.tail[0].iov_base = p; - *p = 0; - rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3); - } - if (svc_encode_result_payload(rqstp, head->iov_len, resp->len)) + if (!svcxdr_encode_stat(xdr, resp->status)) return 0; + switch (resp->status) { + case nfs_ok: + if (xdr_stream_encode_u32(xdr, resp->len) < 0) + return 0; + xdr_write_pages(xdr, &resp->page, 0, resp->len); + if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0) + return 0; + break; + } + return 1; } int nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_readres *resp = rqstp->rq_resp; struct kvec *head = rqstp->rq_res.head; - *p++ = resp->status; - if (resp->status != nfs_ok) - return xdr_ressize_check(rqstp, p); - - p = encode_fattr(rqstp, p, &resp->fh, &resp->stat); - *p++ = htonl(resp->count); - xdr_ressize_check(rqstp, p); - - /* now update rqstp->rq_res to reflect data as well */ - rqstp->rq_res.page_len = resp->count; - if (resp->count & 3) { - /* need to pad the tail */ - rqstp->rq_res.tail[0].iov_base = p; - *p = 0; - rqstp->rq_res.tail[0].iov_len = 4 - (resp->count&3); - } - if (svc_encode_result_payload(rqstp, head->iov_len, resp->count)) + if (!svcxdr_encode_stat(xdr, resp->status)) return 0; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) + return 0; + if (xdr_stream_encode_u32(xdr, resp->count) < 0) + return 0; + xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base, + resp->count); + if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0) + return 0; + break; + } + return 1; } int nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_readdirres *resp = rqstp->rq_resp; + struct xdr_buf *dirlist = &resp->dirlist; - *p++ = resp->status; - if (resp->status != nfs_ok) - return xdr_ressize_check(rqstp, p); - - xdr_ressize_check(rqstp, p); - p = resp->buffer; - *p++ = 0; /* no more entries */ - *p++ = htonl((resp->common.err == nfserr_eof)); - rqstp->rq_res.page_len = (((unsigned long)p-1) & ~PAGE_MASK)+1; + if (!svcxdr_encode_stat(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len); + /* no more entries */ + if (xdr_stream_encode_item_absent(xdr) < 0) + return 0; + if (xdr_stream_encode_bool(xdr, resp->common.err == nfserr_eof) < 0) + return 0; + break; + } return 1; } @@ -497,64 +551,113 @@ nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p) int nfssvc_encode_statfsres(struct svc_rqst *rqstp, __be32 *p) { + struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_statfsres *resp = rqstp->rq_resp; struct kstatfs *stat = &resp->stats; - *p++ = resp->status; - if (resp->status != nfs_ok) - return xdr_ressize_check(rqstp, p); + if (!svcxdr_encode_stat(xdr, resp->status)) + return 0; + switch (resp->status) { + case nfs_ok: + p = xdr_reserve_space(xdr, XDR_UNIT * 5); + if (!p) + return 0; + *p++ = cpu_to_be32(NFSSVC_MAXBLKSIZE_V2); + *p++ = cpu_to_be32(stat->f_bsize); + *p++ = cpu_to_be32(stat->f_blocks); + *p++ = cpu_to_be32(stat->f_bfree); + *p = cpu_to_be32(stat->f_bavail); + break; + } - *p++ = htonl(NFSSVC_MAXBLKSIZE_V2); /* max transfer size */ - *p++ = htonl(stat->f_bsize); - *p++ = htonl(stat->f_blocks); - *p++ = htonl(stat->f_bfree); - *p++ = htonl(stat->f_bavail); - return xdr_ressize_check(rqstp, p); + return 1; } -int -nfssvc_encode_entry(void *ccdv, const char *name, - int namlen, loff_t offset, u64 ino, unsigned int d_type) +/** + * nfssvc_encode_nfscookie - Encode a directory offset cookie + * @resp: readdir result context + * @offset: offset cookie to encode + * + * The buffer space for the offset cookie has already been reserved + * by svcxdr_encode_entry_common(). + */ +void nfssvc_encode_nfscookie(struct nfsd_readdirres *resp, u32 offset) { - struct readdir_cd *ccd = ccdv; - struct nfsd_readdirres *cd = container_of(ccd, struct nfsd_readdirres, common); - __be32 *p = cd->buffer; - int buflen, slen; + __be32 cookie = cpu_to_be32(offset); - /* - dprintk("nfsd: entry(%.*s off %ld ino %ld)\n", - namlen, name, offset, ino); - */ + if (!resp->cookie_offset) + return; - if (offset > ~((u32) 0)) { - cd->common.err = nfserr_fbig; - return -EINVAL; - } - if (cd->offset) - *cd->offset = htonl(offset); + write_bytes_to_xdr_buf(&resp->dirlist, resp->cookie_offset, &cookie, + sizeof(cookie)); + resp->cookie_offset = 0; +} - /* truncate filename */ - namlen = min(namlen, NFS2_MAXNAMLEN); - slen = XDR_QUADLEN(namlen); +static bool +svcxdr_encode_entry_common(struct nfsd_readdirres *resp, const char *name, + int namlen, loff_t offset, u64 ino) +{ + struct xdr_buf *dirlist = &resp->dirlist; + struct xdr_stream *xdr = &resp->xdr; - if ((buflen = cd->buflen - slen - 4) < 0) { - cd->common.err = nfserr_toosmall; - return -EINVAL; - } - if (ino > ~((u32) 0)) { - cd->common.err = nfserr_fbig; - return -EINVAL; - } - *p++ = xdr_one; /* mark entry present */ - *p++ = htonl((u32) ino); /* file id */ - p = xdr_encode_array(p, name, namlen);/* name length & name */ - cd->offset = p; /* remember pointer */ - *p++ = htonl(~0U); /* offset of next entry */ - - cd->buflen = buflen; - cd->buffer = p; - cd->common.err = nfs_ok; + if (xdr_stream_encode_item_present(xdr) < 0) + return false; + /* fileid */ + if (xdr_stream_encode_u32(xdr, (u32)ino) < 0) + return false; + /* name */ + if (xdr_stream_encode_opaque(xdr, name, min(namlen, NFS2_MAXNAMLEN)) < 0) + return false; + /* cookie */ + resp->cookie_offset = dirlist->len; + if (xdr_stream_encode_u32(xdr, ~0U) < 0) + return false; + + return true; +} + +/** + * nfssvc_encode_entry - encode one NFSv2 READDIR entry + * @data: directory context + * @name: name of the object to be encoded + * @namlen: length of that name, in bytes + * @offset: the offset of the previous entry + * @ino: the fileid of this entry + * @d_type: unused + * + * Return values: + * %0: Entry was successfully encoded. + * %-EINVAL: An encoding problem occured, secondary status code in resp->common.err + * + * On exit, the following fields are updated: + * - resp->xdr + * - resp->common.err + * - resp->cookie_offset + */ +int nfssvc_encode_entry(void *data, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type) +{ + struct readdir_cd *ccd = data; + struct nfsd_readdirres *resp = container_of(ccd, + struct nfsd_readdirres, + common); + unsigned int starting_length = resp->dirlist.len; + + /* The offset cookie for the previous entry */ + nfssvc_encode_nfscookie(resp, offset); + + if (!svcxdr_encode_entry_common(resp, name, namlen, offset, ino)) + goto out_toosmall; + + xdr_commit_encode(&resp->xdr); + resp->common.err = nfs_ok; return 0; + +out_toosmall: + resp->cookie_offset = 0; + resp->common.err = nfserr_toosmall; + resp->dirlist.len = starting_length; + return -EINVAL; } /* diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 73deea353169..54cab651ac1d 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -371,6 +371,10 @@ struct nfs4_client { /* debugging info directory under nfsd/clients/ : */ struct dentry *cl_nfsd_dentry; + /* 'info' file within that directory. Ref is not counted, + * but will remain valid iff cl_nfsd_dentry != NULL + */ + struct dentry *cl_nfsd_info_dentry; /* for nfs41 callbacks */ /* We currently support a single back channel with a single slot */ diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 92a0973dd671..27a93ebd1d80 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -391,6 +391,30 @@ DEFINE_EVENT(nfsd_err_class, nfsd_##name, \ DEFINE_NFSD_ERR_EVENT(read_err); DEFINE_NFSD_ERR_EVENT(write_err); +TRACE_EVENT(nfsd_dirent, + TP_PROTO(struct svc_fh *fhp, + u64 ino, + const char *name, + int namlen), + TP_ARGS(fhp, ino, name, namlen), + TP_STRUCT__entry( + __field(u32, fh_hash) + __field(u64, ino) + __field(int, len) + __dynamic_array(unsigned char, name, namlen) + ), + TP_fast_assign( + __entry->fh_hash = fhp ? knfsd_fh_hash(&fhp->fh_handle) : 0; + __entry->ino = ino; + __entry->len = namlen; + memcpy(__get_str(name), name, namlen); + __assign_str(name, name); + ), + TP_printk("fh_hash=0x%08x ino=%llu name=%.*s", + __entry->fh_hash, __entry->ino, + __entry->len, __get_str(name)) +) + #include "state.h" #include "filecache.h" #include "vfs.h" diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index fd6be35a1642..15adf1f6ab21 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1968,8 +1968,9 @@ static int nfsd_buffered_filldir(struct dir_context *ctx, const char *name, return 0; } -static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func, - struct readdir_cd *cdp, loff_t *offsetp) +static __be32 nfsd_buffered_readdir(struct file *file, struct svc_fh *fhp, + nfsd_filldir_t func, struct readdir_cd *cdp, + loff_t *offsetp) { struct buffered_dirent *de; int host_err; @@ -2015,6 +2016,8 @@ static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func, if (cdp->err != nfs_ok) break; + trace_nfsd_dirent(fhp, de->ino, de->name, de->namlen); + reclen = ALIGN(sizeof(*de) + de->namlen, sizeof(u64)); size -= reclen; @@ -2062,7 +2065,7 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, goto out_close; } - err = nfsd_buffered_readdir(file, func, cdp, offsetp); + err = nfsd_buffered_readdir(file, fhp, func, cdp, offsetp); if (err == nfserr_eof || err == nfserr_toosmall) err = nfs_ok; /* can still be found in ->err */ diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index a2442ebe5acf..b21b76e6b9a8 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -152,7 +152,7 @@ static inline void fh_drop_write(struct svc_fh *fh) } } -static inline __be32 fh_getattr(struct svc_fh *fh, struct kstat *stat) +static inline __be32 fh_getattr(const struct svc_fh *fh, struct kstat *stat) { struct path p = {.mnt = fh->fh_export->ex_path.mnt, .dentry = fh->fh_dentry}; diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h index 3018b52b6d5e..f45b4bc93f52 100644 --- a/fs/nfsd/xdr.h +++ b/fs/nfsd/xdr.h @@ -94,6 +94,7 @@ struct nfsd_diropres { struct nfsd_readlinkres { __be32 status; int len; + struct page *page; }; struct nfsd_readres { @@ -101,17 +102,20 @@ struct nfsd_readres { struct svc_fh fh; unsigned long count; struct kstat stat; + struct page **pages; }; struct nfsd_readdirres { + /* Components of the reply */ __be32 status; int count; + /* Used to encode the reply's entry list */ + struct xdr_stream xdr; + struct xdr_buf dirlist; struct readdir_cd common; - __be32 * buffer; - int buflen; - __be32 * offset; + unsigned int cookie_offset; }; struct nfsd_statfsres { @@ -147,23 +151,26 @@ int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *); int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *); int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *); int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *); -int nfssvc_encode_stat(struct svc_rqst *, __be32 *); -int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *); +int nfssvc_encode_statres(struct svc_rqst *, __be32 *); +int nfssvc_encode_attrstatres(struct svc_rqst *, __be32 *); int nfssvc_encode_diropres(struct svc_rqst *, __be32 *); int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *); int nfssvc_encode_readres(struct svc_rqst *, __be32 *); int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *); int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *); -int nfssvc_encode_entry(void *, const char *name, - int namlen, loff_t offset, u64 ino, unsigned int); +void nfssvc_encode_nfscookie(struct nfsd_readdirres *resp, u32 offset); +int nfssvc_encode_entry(void *data, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type); void nfssvc_release_attrstat(struct svc_rqst *rqstp); void nfssvc_release_diropres(struct svc_rqst *rqstp); void nfssvc_release_readres(struct svc_rqst *rqstp); /* Helper functions for NFSv2 ACL code */ -__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat); bool svcxdr_decode_fhandle(struct xdr_stream *xdr, struct svc_fh *fhp); +bool svcxdr_encode_stat(struct xdr_stream *xdr, __be32 status); +bool svcxdr_encode_fattr(struct svc_rqst *rqstp, struct xdr_stream *xdr, + const struct svc_fh *fhp, const struct kstat *stat); #endif /* LINUX_NFSD_H */ diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index 3e1578953f54..933008382bbe 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -137,6 +137,7 @@ struct nfsd3_readlinkres { __be32 status; struct svc_fh fh; __u32 len; + struct page **pages; }; struct nfsd3_readres { @@ -144,6 +145,7 @@ struct nfsd3_readres { struct svc_fh fh; unsigned long count; __u32 eof; + struct page **pages; }; struct nfsd3_writeres { @@ -167,19 +169,17 @@ struct nfsd3_linkres { }; struct nfsd3_readdirres { + /* Components of the reply */ __be32 status; struct svc_fh fh; - /* Just to save kmalloc on every readdirplus entry (svc_fh is a - * little large for the stack): */ - struct svc_fh scratch; - int count; __be32 verf[2]; + /* Used to encode the reply's entry list */ + struct xdr_stream xdr; + struct xdr_buf dirlist; + struct svc_fh scratch; struct readdir_cd common; - __be32 * buffer; - int buflen; - __be32 * offset; - __be32 * offset1; + unsigned int cookie_offset; struct svc_rqst * rqstp; }; @@ -280,9 +280,9 @@ int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *); int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *); int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *); int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *); -int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *); +int nfs3svc_encode_getattrres(struct svc_rqst *, __be32 *); int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *); -int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *); +int nfs3svc_encode_lookupres(struct svc_rqst *, __be32 *); int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *); int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *); int nfs3svc_encode_readres(struct svc_rqst *, __be32 *); @@ -298,15 +298,16 @@ int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *); void nfs3svc_release_fhandle(struct svc_rqst *); void nfs3svc_release_fhandle2(struct svc_rqst *); -int nfs3svc_encode_entry(void *, const char *name, - int namlen, loff_t offset, u64 ino, - unsigned int); -int nfs3svc_encode_entry_plus(void *, const char *name, - int namlen, loff_t offset, u64 ino, - unsigned int); + +void nfs3svc_encode_cookie3(struct nfsd3_readdirres *resp, u64 offset); +int nfs3svc_encode_entry3(void *data, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type); +int nfs3svc_encode_entryplus3(void *data, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type); /* Helper functions for NFSv3 ACL code */ -__be32 *nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, - struct svc_fh *fhp); bool svcxdr_decode_nfs_fh3(struct xdr_stream *xdr, struct svc_fh *fhp); +bool svcxdr_encode_nfsstat3(struct xdr_stream *xdr, __be32 status); +bool svcxdr_encode_post_op_attr(struct svc_rqst *rqstp, struct xdr_stream *xdr, + const struct svc_fh *fhp); #endif /* _LINUX_NFSD_XDR3_H */ diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index c300885ae75d..fe540a3415c6 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -698,7 +698,7 @@ struct nfsd4_compoundargs { struct nfsd4_compoundres { /* scratch variables for XDR encode */ - struct xdr_stream xdr; + struct xdr_stream *xdr; struct svc_rqst * rqstp; u32 taglen; diff --git a/include/linux/nfsacl.h b/include/linux/nfsacl.h index 0ba99c513649..8e76a79cdc6a 100644 --- a/include/linux/nfsacl.h +++ b/include/linux/nfsacl.h @@ -41,5 +41,8 @@ nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt, extern bool nfs_stream_decode_acl(struct xdr_stream *xdr, unsigned int *aclcnt, struct posix_acl **pacl); +extern bool +nfs_stream_encode_acl(struct xdr_stream *xdr, struct inode *inode, + struct posix_acl *acl, int encode_entries, int typeflag); #endif /* __LINUX_NFSACL_H */ diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 31ee3b6047c3..e91d51ea028b 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -248,6 +248,7 @@ struct svc_rqst { size_t rq_xprt_hlen; /* xprt header len */ struct xdr_buf rq_arg; struct xdr_stream rq_arg_stream; + struct xdr_stream rq_res_stream; struct page *rq_scratch_page; struct xdr_buf rq_res; struct page *rq_pages[RPCSVC_MAXPAGES + 1]; @@ -574,4 +575,28 @@ static inline void svcxdr_init_decode(struct svc_rqst *rqstp) xdr_set_scratch_page(xdr, rqstp->rq_scratch_page); } +/** + * svcxdr_init_encode - Prepare an xdr_stream for svc Reply encoding + * @rqstp: controlling server RPC transaction context + * + */ +static inline void svcxdr_init_encode(struct svc_rqst *rqstp) +{ + struct xdr_stream *xdr = &rqstp->rq_res_stream; + struct xdr_buf *buf = &rqstp->rq_res; + struct kvec *resv = buf->head; + + xdr_reset_scratch_buffer(xdr); + + xdr->buf = buf; + xdr->iov = resv; + xdr->p = resv->iov_base + resv->iov_len; + xdr->end = resv->iov_base + PAGE_SIZE - rqstp->rq_auth_slack; + buf->len = resv->iov_len; + xdr->page_ptr = buf->pages - 1; + buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages); + buf->buflen -= rqstp->rq_auth_slack; + xdr->rqst = NULL; +} + #endif /* SUNRPC_SVC_H */ diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 1e76ed688044..3184465de3a0 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -94,6 +94,8 @@ struct svcxprt_rdma { spinlock_t sc_rw_ctxt_lock; struct list_head sc_rw_ctxts; + u32 sc_pending_recvs; + u32 sc_recv_batch; struct list_head sc_rq_dto_q; spinlock_t sc_rq_dto_lock; struct ib_qp *sc_qp; @@ -104,7 +106,6 @@ struct svcxprt_rdma { wait_queue_head_t sc_send_wait; /* SQ exhaustion waitlist */ unsigned long sc_flags; - struct list_head sc_read_complete_q; struct work_struct sc_work; struct llist_head sc_recv_ctxts; @@ -133,12 +134,10 @@ struct svc_rdma_recv_ctxt { struct rpc_rdma_cid rc_cid; struct ib_sge rc_recv_sge; void *rc_recv_buf; - struct xdr_buf rc_arg; struct xdr_stream rc_stream; bool rc_temp; u32 rc_byte_len; unsigned int rc_page_count; - unsigned int rc_hdr_count; u32 rc_inv_rkey; __be32 rc_msgtype; @@ -148,8 +147,6 @@ struct svc_rdma_recv_ctxt { struct svc_rdma_chunk *rc_cur_result_payload; struct svc_rdma_pcl rc_write_pcl; struct svc_rdma_pcl rc_reply_pcl; - - struct page *rc_pages[RPCSVC_MAXPAGES]; }; struct svc_rdma_send_ctxt { @@ -158,12 +155,12 @@ struct svc_rdma_send_ctxt { struct ib_send_wr sc_send_wr; struct ib_cqe sc_cqe; + struct completion sc_done; struct xdr_buf sc_hdrbuf; struct xdr_stream sc_stream; void *sc_xprt_buf; - int sc_page_count; int sc_cur_sge_no; - struct page *sc_pages[RPCSVC_MAXPAGES]; + struct ib_sge sc_sges[]; }; diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 92455e0d5244..571f605bc91e 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -130,6 +130,7 @@ void svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *, int svc_create_xprt(struct svc_serv *, const char *, struct net *, const int, const unsigned short, int, const struct cred *); +void svc_xprt_received(struct svc_xprt *xprt); void svc_xprt_do_enqueue(struct svc_xprt *xprt); void svc_xprt_enqueue(struct svc_xprt *xprt); void svc_xprt_put(struct svc_xprt *xprt); @@ -143,6 +144,7 @@ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, int svc_xprt_names(struct svc_serv *serv, char *buf, const int buflen); void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *xprt); void svc_age_temp_xprts_now(struct svc_serv *, struct sockaddr *); +void svc_xprt_deferred_close(struct svc_xprt *xprt); static inline void svc_xprt_get(struct svc_xprt *xprt) { diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 2bc75c167f00..a965cbc136ad 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -395,6 +395,40 @@ static inline int xdr_stream_encode_item_absent(struct xdr_stream *xdr) } /** + * xdr_encode_bool - Encode a boolean item + * @p: address in a buffer into which to encode + * @n: boolean value to encode + * + * Return value: + * Address of item following the encoded boolean + */ +static inline __be32 *xdr_encode_bool(__be32 *p, u32 n) +{ + *p = n ? xdr_one : xdr_zero; + return p++; +} + +/** + * xdr_stream_encode_bool - Encode a boolean item + * @xdr: pointer to xdr_stream + * @n: boolean value to encode + * + * Return values: + * On success, returns length in bytes of XDR buffer consumed + * %-EMSGSIZE on XDR buffer overflow + */ +static inline int xdr_stream_encode_bool(struct xdr_stream *xdr, __u32 n) +{ + const size_t len = XDR_UNIT; + __be32 *p = xdr_reserve_space(xdr, len); + + if (unlikely(!p)) + return -EMSGSIZE; + xdr_encode_bool(p, n); + return len; +} + +/** * xdr_stream_encode_u32 - Encode a 32-bit integer * @xdr: pointer to xdr_stream * @n: integer to encode diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 036eb1f5c133..bda16e9e6ba7 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1781,6 +1781,7 @@ DECLARE_EVENT_CLASS(svc_xprt_event, ), \ TP_ARGS(xprt)) +DEFINE_SVC_XPRT_EVENT(received); DEFINE_SVC_XPRT_EVENT(no_write_space); DEFINE_SVC_XPRT_EVENT(close); DEFINE_SVC_XPRT_EVENT(detach); diff --git a/include/uapi/linux/nfsd/nfsfh.h b/include/uapi/linux/nfsd/nfsfh.h index ff0ca88b1c8f..427294dd56a1 100644 --- a/include/uapi/linux/nfsd/nfsfh.h +++ b/include/uapi/linux/nfsd/nfsfh.h @@ -64,13 +64,24 @@ struct nfs_fhbase_old { * in include/linux/exportfs.h for currently registered values. */ struct nfs_fhbase_new { - __u8 fb_version; /* == 1, even => nfs_fhbase_old */ - __u8 fb_auth_type; - __u8 fb_fsid_type; - __u8 fb_fileid_type; - __u32 fb_auth[1]; -/* __u32 fb_fsid[0]; floating */ -/* __u32 fb_fileid[0]; floating */ + union { + struct { + __u8 fb_version_aux; /* == 1, even => nfs_fhbase_old */ + __u8 fb_auth_type_aux; + __u8 fb_fsid_type_aux; + __u8 fb_fileid_type_aux; + __u32 fb_auth[1]; + /* __u32 fb_fsid[0]; floating */ + /* __u32 fb_fileid[0]; floating */ + }; + struct { + __u8 fb_version; /* == 1, even => nfs_fhbase_old */ + __u8 fb_auth_type; + __u8 fb_fsid_type; + __u8 fb_fileid_type; + __u32 fb_auth_flex[]; /* flexible-array member */ + }; + }; }; struct knfsd_fh { @@ -97,7 +108,7 @@ struct knfsd_fh { #define fh_fsid_type fh_base.fh_new.fb_fsid_type #define fh_auth_type fh_base.fh_new.fb_auth_type #define fh_fileid_type fh_base.fh_new.fb_fileid_type -#define fh_fsid fh_base.fh_new.fb_auth +#define fh_fsid fh_base.fh_new.fb_auth_flex /* Do not use, provided for userspace compatiblity. */ #define fh_auth fh_base.fh_new.fb_auth diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 3cdd71a8df1e..42565f0c7d5a 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -139,6 +139,20 @@ int svc_print_xprts(char *buf, int maxlen) return len; } +/** + * svc_xprt_deferred_close - Close a transport + * @xprt: transport instance + * + * Used in contexts that need to defer the work of shutting down + * the transport to an nfsd thread. + */ +void svc_xprt_deferred_close(struct svc_xprt *xprt) +{ + if (!test_and_set_bit(XPT_CLOSE, &xprt->xpt_flags)) + svc_xprt_enqueue(xprt); +} +EXPORT_SYMBOL_GPL(svc_xprt_deferred_close); + static void svc_xprt_free(struct kref *kref) { struct svc_xprt *xprt = @@ -233,21 +247,25 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, return xprt; } -/* - * svc_xprt_received conditionally queues the transport for processing - * by another thread. The caller must hold the XPT_BUSY bit and must +/** + * svc_xprt_received - start next receiver thread + * @xprt: controlling transport + * + * The caller must hold the XPT_BUSY bit and must * not thereafter touch transport data. * * Note: XPT_DATA only gets cleared when a read-attempt finds no (or * insufficient) data. */ -static void svc_xprt_received(struct svc_xprt *xprt) +void svc_xprt_received(struct svc_xprt *xprt) { if (!test_bit(XPT_BUSY, &xprt->xpt_flags)) { WARN_ONCE(1, "xprt=0x%p already busy!", xprt); return; } + trace_svc_xprt_received(xprt); + /* As soon as we clear busy, the xprt could be closed and * 'put', so we need a reference to call svc_enqueue_xprt with: */ @@ -257,6 +275,7 @@ static void svc_xprt_received(struct svc_xprt *xprt) xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt); svc_xprt_put(xprt); } +EXPORT_SYMBOL_GPL(svc_xprt_received); void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new) { @@ -801,8 +820,10 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) newxpt->xpt_cred = get_cred(xprt->xpt_cred); svc_add_new_temp_xprt(serv, newxpt); trace_svc_xprt_accept(newxpt, serv->sv_name); - } else + } else { module_put(xprt->xpt_class->xcl_owner); + } + svc_xprt_received(xprt); } else if (svc_xprt_reserve_slot(rqstp, xprt)) { /* XPT_DATA|XPT_DEFERRED case: */ dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", @@ -817,8 +838,6 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) rqstp->rq_reserved = serv->sv_max_mesg; atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); } - /* clear XPT_BUSY: */ - svc_xprt_received(xprt); out: trace_svc_handle_xprt(xprt, len); return len; @@ -1229,6 +1248,7 @@ static noinline int svc_deferred_recv(struct svc_rqst *rqstp) rqstp->rq_xprt_hlen = dr->xprt_hlen; rqstp->rq_daddr = dr->daddr; rqstp->rq_respages = rqstp->rq_pages; + svc_xprt_received(rqstp->rq_xprt); return (dr->argslen<<2) - dr->xprt_hlen; } diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 97c0bddba7a3..35b7966ac3b3 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -303,15 +303,6 @@ static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class, return NULL; } -static inline struct ip_map *ip_map_lookup(struct net *net, char *class, - struct in6_addr *addr) -{ - struct sunrpc_net *sn; - - sn = net_generic(net, sunrpc_net_id); - return __ip_map_lookup(sn->ip_map_cache, class, addr); -} - static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, struct unix_domain *udom, time64_t expiry) { diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 2e2f007dfc9f..9eb5b6b89077 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -519,6 +519,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) if (serv->sv_stats) serv->sv_stats->netudpcnt++; + svc_xprt_received(rqstp->rq_xprt); return len; out_recv_err: @@ -527,7 +528,7 @@ out_recv_err: set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); } trace_svcsock_udp_recv_err(&svsk->sk_xprt, err); - return 0; + goto out_clear_busy; out_cmsg_err: net_warn_ratelimited("svc: received unknown control message %d/%d; dropping RPC reply datagram\n", cmh->cmsg_level, cmh->cmsg_type); @@ -536,6 +537,8 @@ out_bh_enable: local_bh_enable(); out_free: kfree_skb(skb); +out_clear_busy: + svc_xprt_received(rqstp->rq_xprt); return 0; } @@ -728,10 +731,8 @@ static void svc_tcp_state_change(struct sock *sk) rmb(); svsk->sk_ostate(sk); trace_svcsock_tcp_state(&svsk->sk_xprt, svsk->sk_sock); - if (sk->sk_state != TCP_ESTABLISHED) { - set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); - svc_xprt_enqueue(&svsk->sk_xprt); - } + if (sk->sk_state != TCP_ESTABLISHED) + svc_xprt_deferred_close(&svsk->sk_xprt); } } @@ -901,7 +902,7 @@ err_too_large: net_notice_ratelimited("svc: %s %s RPC fragment too large: %d\n", __func__, svsk->sk_xprt.xpt_server->sv_name, svc_sock_reclen(svsk)); - set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); + svc_xprt_deferred_close(&svsk->sk_xprt); err_short: return -EAGAIN; } @@ -1035,6 +1036,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) if (serv->sv_stats) serv->sv_stats->nettcpcnt++; + svc_xprt_received(rqstp->rq_xprt); return rqstp->rq_arg.len; err_incomplete: @@ -1052,13 +1054,14 @@ error: if (len != -EAGAIN) goto err_delete; trace_svcsock_tcp_recv_eagain(&svsk->sk_xprt, 0); - return 0; + goto err_noclose; err_nuts: svsk->sk_datalen = 0; err_delete: trace_svcsock_tcp_recv_err(&svsk->sk_xprt, len); - set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); + svc_xprt_deferred_close(&svsk->sk_xprt); err_noclose: + svc_xprt_received(rqstp->rq_xprt); return 0; /* record not complete */ } @@ -1188,8 +1191,7 @@ out_close: xprt->xpt_server->sv_name, (err < 0) ? "got error" : "sent", (err < 0) ? err : sent, xdr->len); - set_bit(XPT_CLOSE, &xprt->xpt_flags); - svc_xprt_enqueue(xprt); + svc_xprt_deferred_close(xprt); atomic_dec(&svsk->sk_sendqlen); mutex_unlock(&xprt->xpt_mutex); return -EAGAIN; @@ -1268,7 +1270,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) case TCP_ESTABLISHED: break; default: - set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); + svc_xprt_deferred_close(&svsk->sk_xprt); } } } diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index 9150df35fb6f..16897fcb659c 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -93,7 +93,13 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma, */ get_page(virt_to_page(rqst->rq_buffer)); sctxt->sc_send_wr.opcode = IB_WR_SEND; - return svc_rdma_send(rdma, sctxt); + ret = svc_rdma_send(rdma, sctxt); + if (ret < 0) + return ret; + + ret = wait_for_completion_killable(&sctxt->sc_done); + svc_rdma_send_ctxt_put(rdma, sctxt); + return ret; } /* Server-side transport endpoint wants a whole page for its send diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 7d34290e2ff8..6be23ce7a93d 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -89,8 +89,7 @@ * svc_rdma_recvfrom call returns. * * During the second svc_rdma_recvfrom call, RDMA Read sink pages - * are transferred from the svc_rdma_recv_ctxt to the second svc_rqst - * (see rdma_read_complete() below). + * are transferred from the svc_rdma_recv_ctxt to the second svc_rqst. */ #include <linux/slab.h> @@ -107,8 +106,6 @@ #include "xprt_rdma.h" #include <trace/events/rpcrdma.h> -#define RPCDBG_FACILITY RPCDBG_SVCXPRT - static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc); static inline struct svc_rdma_recv_ctxt * @@ -230,11 +227,6 @@ out_empty: void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma, struct svc_rdma_recv_ctxt *ctxt) { - unsigned int i; - - for (i = 0; i < ctxt->rc_page_count; i++) - put_page(ctxt->rc_pages[i]); - pcl_free(&ctxt->rc_call_pcl); pcl_free(&ctxt->rc_read_pcl); pcl_free(&ctxt->rc_write_pcl); @@ -266,33 +258,48 @@ void svc_rdma_release_rqst(struct svc_rqst *rqstp) svc_rdma_recv_ctxt_put(rdma, ctxt); } -static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma, - struct svc_rdma_recv_ctxt *ctxt) +static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma, + unsigned int wanted, bool temp) { + const struct ib_recv_wr *bad_wr = NULL; + struct svc_rdma_recv_ctxt *ctxt; + struct ib_recv_wr *recv_chain; int ret; - trace_svcrdma_post_recv(ctxt); - ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, NULL); - if (ret) - goto err_post; - return 0; + if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags)) + return false; -err_post: - trace_svcrdma_rq_post_err(rdma, ret); - svc_rdma_recv_ctxt_put(rdma, ctxt); - return ret; -} + recv_chain = NULL; + while (wanted--) { + ctxt = svc_rdma_recv_ctxt_get(rdma); + if (!ctxt) + break; -static int svc_rdma_post_recv(struct svcxprt_rdma *rdma) -{ - struct svc_rdma_recv_ctxt *ctxt; + trace_svcrdma_post_recv(ctxt); + ctxt->rc_temp = temp; + ctxt->rc_recv_wr.next = recv_chain; + recv_chain = &ctxt->rc_recv_wr; + rdma->sc_pending_recvs++; + } + if (!recv_chain) + return false; - if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags)) - return 0; - ctxt = svc_rdma_recv_ctxt_get(rdma); - if (!ctxt) - return -ENOMEM; - return __svc_rdma_post_recv(rdma, ctxt); + ret = ib_post_recv(rdma->sc_qp, recv_chain, &bad_wr); + if (ret) + goto err_free; + return true; + +err_free: + trace_svcrdma_rq_post_err(rdma, ret); + while (bad_wr) { + ctxt = container_of(bad_wr, struct svc_rdma_recv_ctxt, + rc_recv_wr); + bad_wr = bad_wr->next; + svc_rdma_recv_ctxt_put(rdma, ctxt); + } + /* Since we're destroying the xprt, no need to reset + * sc_pending_recvs. */ + return false; } /** @@ -303,20 +310,7 @@ static int svc_rdma_post_recv(struct svcxprt_rdma *rdma) */ bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma) { - struct svc_rdma_recv_ctxt *ctxt; - unsigned int i; - int ret; - - for (i = 0; i < rdma->sc_max_requests; i++) { - ctxt = svc_rdma_recv_ctxt_get(rdma); - if (!ctxt) - return false; - ctxt->rc_temp = true; - ret = __svc_rdma_post_recv(rdma, ctxt); - if (ret) - return false; - } - return true; + return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests, true); } /** @@ -324,8 +318,6 @@ bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma) * @cq: Completion Queue context * @wc: Work Completion object * - * NB: The svc_xprt/svcxprt_rdma is pinned whenever it's possible that - * the Receive completion handler could be running. */ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) { @@ -333,6 +325,8 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) struct ib_cqe *cqe = wc->wr_cqe; struct svc_rdma_recv_ctxt *ctxt; + rdma->sc_pending_recvs--; + /* WARNING: Only wc->wr_cqe and wc->status are reliable */ ctxt = container_of(cqe, struct svc_rdma_recv_ctxt, rc_cqe); @@ -340,8 +334,18 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) if (wc->status != IB_WC_SUCCESS) goto flushed; - if (svc_rdma_post_recv(rdma)) - goto post_err; + /* If receive posting fails, the connection is about to be + * lost anyway. The server will not be able to send a reply + * for this RPC, and the client will retransmit this RPC + * anyway when it reconnects. + * + * Therefore we drop the Receive, even if status was SUCCESS + * to reduce the likelihood of replayed requests once the + * client reconnects. + */ + if (rdma->sc_pending_recvs < rdma->sc_max_requests) + if (!svc_rdma_refresh_recvs(rdma, rdma->sc_recv_batch, false)) + goto flushed; /* All wc fields are now known to be valid */ ctxt->rc_byte_len = wc->byte_len; @@ -356,10 +360,8 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) return; flushed: -post_err: svc_rdma_recv_ctxt_put(rdma, ctxt); - set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); - svc_xprt_enqueue(&rdma->sc_xprt); + svc_xprt_deferred_close(&rdma->sc_xprt); } /** @@ -371,10 +373,6 @@ void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma) { struct svc_rdma_recv_ctxt *ctxt; - while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_read_complete_q))) { - list_del(&ctxt->rc_list); - svc_rdma_recv_ctxt_put(rdma, ctxt); - } while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_rq_dto_q))) { list_del(&ctxt->rc_list); svc_rdma_recv_ctxt_put(rdma, ctxt); @@ -712,35 +710,6 @@ out_inval: return -EINVAL; } -static void rdma_read_complete(struct svc_rqst *rqstp, - struct svc_rdma_recv_ctxt *head) -{ - int page_no; - - /* Move Read chunk pages to rqstp so that they will be released - * when svc_process is done with them. - */ - for (page_no = 0; page_no < head->rc_page_count; page_no++) { - put_page(rqstp->rq_pages[page_no]); - rqstp->rq_pages[page_no] = head->rc_pages[page_no]; - } - head->rc_page_count = 0; - - /* Point rq_arg.pages past header */ - rqstp->rq_arg.pages = &rqstp->rq_pages[head->rc_hdr_count]; - rqstp->rq_arg.page_len = head->rc_arg.page_len; - - /* rq_respages starts after the last arg page */ - rqstp->rq_respages = &rqstp->rq_pages[page_no]; - rqstp->rq_next_page = rqstp->rq_respages + 1; - - /* Rebuild rq_arg head and tail. */ - rqstp->rq_arg.head[0] = head->rc_arg.head[0]; - rqstp->rq_arg.tail[0] = head->rc_arg.tail[0]; - rqstp->rq_arg.len = head->rc_arg.len; - rqstp->rq_arg.buflen = head->rc_arg.buflen; -} - static void svc_rdma_send_error(struct svcxprt_rdma *rdma, struct svc_rdma_recv_ctxt *rctxt, int status) @@ -825,25 +794,22 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) rqstp->rq_xprt_ctxt = NULL; + ctxt = NULL; spin_lock(&rdma_xprt->sc_rq_dto_lock); - ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_read_complete_q); - if (ctxt) { - list_del(&ctxt->rc_list); - spin_unlock(&rdma_xprt->sc_rq_dto_lock); - rdma_read_complete(rqstp, ctxt); - goto complete; - } ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_rq_dto_q); - if (!ctxt) { + if (ctxt) + list_del(&ctxt->rc_list); + else /* No new incoming requests, terminate the loop */ clear_bit(XPT_DATA, &xprt->xpt_flags); - spin_unlock(&rdma_xprt->sc_rq_dto_lock); - return 0; - } - list_del(&ctxt->rc_list); spin_unlock(&rdma_xprt->sc_rq_dto_lock); - percpu_counter_inc(&svcrdma_stat_recv); + /* Unblock the transport for the next receive */ + svc_xprt_received(xprt); + if (!ctxt) + return 0; + + percpu_counter_inc(&svcrdma_stat_recv); ib_dma_sync_single_for_cpu(rdma_xprt->sc_pd->device, ctxt->rc_recv_sge.addr, ctxt->rc_byte_len, DMA_FROM_DEVICE); @@ -868,21 +834,17 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) svc_rdma_get_inv_rkey(rdma_xprt, ctxt); if (!pcl_is_empty(&ctxt->rc_read_pcl) || - !pcl_is_empty(&ctxt->rc_call_pcl)) - goto out_readlist; + !pcl_is_empty(&ctxt->rc_call_pcl)) { + ret = svc_rdma_process_read_list(rdma_xprt, rqstp, ctxt); + if (ret < 0) + goto out_readfail; + } -complete: rqstp->rq_xprt_ctxt = ctxt; rqstp->rq_prot = IPPROTO_MAX; svc_xprt_copy_addrs(rqstp, xprt); return rqstp->rq_arg.len; -out_readlist: - ret = svc_rdma_process_read_list(rdma_xprt, rqstp, ctxt); - if (ret < 0) - goto out_readfail; - return 0; - out_err: svc_rdma_send_error(rdma_xprt, ctxt, ret); svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 693d139a8633..5238bc829235 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -150,6 +150,8 @@ struct svc_rdma_chunk_ctxt { struct svcxprt_rdma *cc_rdma; struct list_head cc_rwctxts; int cc_sqecount; + enum ib_wc_status cc_status; + struct completion cc_done; }; static void svc_rdma_cc_cid_init(struct svcxprt_rdma *rdma, @@ -250,7 +252,7 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) wake_up(&rdma->sc_send_wait); if (unlikely(wc->status != IB_WC_SUCCESS)) - set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); + svc_xprt_deferred_close(&rdma->sc_xprt); svc_rdma_write_info_free(info); } @@ -299,29 +301,15 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc) struct svc_rdma_chunk_ctxt *cc = container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe); struct svcxprt_rdma *rdma = cc->cc_rdma; - struct svc_rdma_read_info *info = - container_of(cc, struct svc_rdma_read_info, ri_cc); trace_svcrdma_wc_read(wc, &cc->cc_cid); atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail); wake_up(&rdma->sc_send_wait); - if (unlikely(wc->status != IB_WC_SUCCESS)) { - set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); - svc_rdma_recv_ctxt_put(rdma, info->ri_readctxt); - } else { - spin_lock(&rdma->sc_rq_dto_lock); - list_add_tail(&info->ri_readctxt->rc_list, - &rdma->sc_read_complete_q); - /* Note the unlock pairs with the smp_rmb in svc_xprt_ready: */ - set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags); - spin_unlock(&rdma->sc_rq_dto_lock); - - svc_xprt_enqueue(&rdma->sc_xprt); - } - - svc_rdma_read_info_free(info); + cc->cc_status = wc->status; + complete(&cc->cc_done); + return; } /* This function sleeps when the transport's Send Queue is congested. @@ -334,7 +322,6 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc) static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc) { struct svcxprt_rdma *rdma = cc->cc_rdma; - struct svc_xprt *xprt = &rdma->sc_xprt; struct ib_send_wr *first_wr; const struct ib_send_wr *bad_wr; struct list_head *tmp; @@ -373,7 +360,7 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc) } while (1); trace_svcrdma_sq_post_err(rdma, ret); - set_bit(XPT_CLOSE, &xprt->xpt_flags); + svc_xprt_deferred_close(&rdma->sc_xprt); /* If even one was posted, there will be a completion. */ if (bad_wr != first_wr) @@ -677,8 +664,8 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info, struct svc_rdma_recv_ctxt *head = info->ri_readctxt; struct svc_rdma_chunk_ctxt *cc = &info->ri_cc; struct svc_rqst *rqstp = info->ri_rqst; - struct svc_rdma_rw_ctxt *ctxt; unsigned int sge_no, seg_len, len; + struct svc_rdma_rw_ctxt *ctxt; struct scatterlist *sg; int ret; @@ -694,8 +681,6 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info, seg_len = min_t(unsigned int, len, PAGE_SIZE - info->ri_pageoff); - head->rc_arg.pages[info->ri_pageno] = - rqstp->rq_pages[info->ri_pageno]; if (!info->ri_pageoff) head->rc_page_count++; @@ -789,12 +774,10 @@ static int svc_rdma_copy_inline_range(struct svc_rdma_read_info *info, page_len = min_t(unsigned int, remaining, PAGE_SIZE - info->ri_pageoff); - head->rc_arg.pages[info->ri_pageno] = - rqstp->rq_pages[info->ri_pageno]; if (!info->ri_pageoff) head->rc_page_count++; - dst = page_address(head->rc_arg.pages[info->ri_pageno]); + dst = page_address(rqstp->rq_pages[info->ri_pageno]); memcpy(dst + info->ri_pageno, src + offset, page_len); info->ri_totalbytes += page_len; @@ -814,7 +797,7 @@ static int svc_rdma_copy_inline_range(struct svc_rdma_read_info *info, * svc_rdma_read_multiple_chunks - Construct RDMA Reads to pull data item Read chunks * @info: context for RDMA Reads * - * The chunk data lands in head->rc_arg as a series of contiguous pages, + * The chunk data lands in rqstp->rq_arg as a series of contiguous pages, * like an incoming TCP call. * * Return values: @@ -828,8 +811,8 @@ static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *inf { struct svc_rdma_recv_ctxt *head = info->ri_readctxt; const struct svc_rdma_pcl *pcl = &head->rc_read_pcl; + struct xdr_buf *buf = &info->ri_rqst->rq_arg; struct svc_rdma_chunk *chunk, *next; - struct xdr_buf *buf = &head->rc_arg; unsigned int start, length; int ret; @@ -865,9 +848,9 @@ static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *inf buf->len += info->ri_totalbytes; buf->buflen += info->ri_totalbytes; - head->rc_hdr_count = 1; - buf->head[0].iov_base = page_address(head->rc_pages[0]); + buf->head[0].iov_base = page_address(info->ri_rqst->rq_pages[0]); buf->head[0].iov_len = min_t(size_t, PAGE_SIZE, info->ri_totalbytes); + buf->pages = &info->ri_rqst->rq_pages[1]; buf->page_len = info->ri_totalbytes - buf->head[0].iov_len; return 0; } @@ -876,9 +859,9 @@ static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *inf * svc_rdma_read_data_item - Construct RDMA Reads to pull data item Read chunks * @info: context for RDMA Reads * - * The chunk data lands in the page list of head->rc_arg.pages. + * The chunk data lands in the page list of rqstp->rq_arg.pages. * - * Currently NFSD does not look at the head->rc_arg.tail[0] iovec. + * Currently NFSD does not look at the rqstp->rq_arg.tail[0] kvec. * Therefore, XDR round-up of the Read chunk and trailing * inline content must both be added at the end of the pagelist. * @@ -892,7 +875,7 @@ static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *inf static int svc_rdma_read_data_item(struct svc_rdma_read_info *info) { struct svc_rdma_recv_ctxt *head = info->ri_readctxt; - struct xdr_buf *buf = &head->rc_arg; + struct xdr_buf *buf = &info->ri_rqst->rq_arg; struct svc_rdma_chunk *chunk; unsigned int length; int ret; @@ -902,8 +885,6 @@ static int svc_rdma_read_data_item(struct svc_rdma_read_info *info) if (ret < 0) goto out; - head->rc_hdr_count = 0; - /* Split the Receive buffer between the head and tail * buffers at Read chunk's position. XDR roundup of the * chunk is not included in either the pagelist or in @@ -922,7 +903,8 @@ static int svc_rdma_read_data_item(struct svc_rdma_read_info *info) * Currently these chunks always start at page offset 0, * thus the rounded-up length never crosses a page boundary. */ - length = XDR_QUADLEN(info->ri_totalbytes) << 2; + buf->pages = &info->ri_rqst->rq_pages[0]; + length = xdr_align_size(chunk->ch_length); buf->page_len = length; buf->len += length; buf->buflen += length; @@ -1034,8 +1016,7 @@ static int svc_rdma_read_call_chunk(struct svc_rdma_read_info *info) * @info: context for RDMA Reads * * The start of the data lands in the first page just after the - * Transport header, and the rest lands in the page list of - * head->rc_arg.pages. + * Transport header, and the rest lands in rqstp->rq_arg.pages. * * Assumptions: * - A PZRC is never sent in an RDMA_MSG message, though it's @@ -1050,8 +1031,7 @@ static int svc_rdma_read_call_chunk(struct svc_rdma_read_info *info) */ static noinline int svc_rdma_read_special(struct svc_rdma_read_info *info) { - struct svc_rdma_recv_ctxt *head = info->ri_readctxt; - struct xdr_buf *buf = &head->rc_arg; + struct xdr_buf *buf = &info->ri_rqst->rq_arg; int ret; ret = svc_rdma_read_call_chunk(info); @@ -1061,35 +1041,15 @@ static noinline int svc_rdma_read_special(struct svc_rdma_read_info *info) buf->len += info->ri_totalbytes; buf->buflen += info->ri_totalbytes; - head->rc_hdr_count = 1; - buf->head[0].iov_base = page_address(head->rc_pages[0]); + buf->head[0].iov_base = page_address(info->ri_rqst->rq_pages[0]); buf->head[0].iov_len = min_t(size_t, PAGE_SIZE, info->ri_totalbytes); + buf->pages = &info->ri_rqst->rq_pages[1]; buf->page_len = info->ri_totalbytes - buf->head[0].iov_len; out: return ret; } -/* Pages under I/O have been copied to head->rc_pages. Ensure they - * are not released by svc_xprt_release() until the I/O is complete. - * - * This has to be done after all Read WRs are constructed to properly - * handle a page that is part of I/O on behalf of two different RDMA - * segments. - * - * Do this only if I/O has been posted. Otherwise, we do indeed want - * svc_xprt_release() to clean things up properly. - */ -static void svc_rdma_save_io_pages(struct svc_rqst *rqstp, - const unsigned int start, - const unsigned int num_pages) -{ - unsigned int i; - - for (i = start; i < num_pages + start; i++) - rqstp->rq_pages[i] = NULL; -} - /** * svc_rdma_process_read_list - Pull list of Read chunks from the client * @rdma: controlling RDMA transport @@ -1121,18 +1081,6 @@ int svc_rdma_process_read_list(struct svcxprt_rdma *rdma, struct svc_rdma_chunk_ctxt *cc; int ret; - /* The request (with page list) is constructed in - * head->rc_arg. Pages involved with RDMA Read I/O are - * transferred there. - */ - head->rc_arg.head[0] = rqstp->rq_arg.head[0]; - head->rc_arg.tail[0] = rqstp->rq_arg.tail[0]; - head->rc_arg.pages = head->rc_pages; - head->rc_arg.page_base = 0; - head->rc_arg.page_len = 0; - head->rc_arg.len = rqstp->rq_arg.len; - head->rc_arg.buflen = rqstp->rq_arg.buflen; - info = svc_rdma_read_info_alloc(rdma); if (!info) return -ENOMEM; @@ -1154,11 +1102,22 @@ int svc_rdma_process_read_list(struct svcxprt_rdma *rdma, goto out_err; trace_svcrdma_post_read_chunk(&cc->cc_cid, cc->cc_sqecount); + init_completion(&cc->cc_done); ret = svc_rdma_post_chunk_ctxt(cc); if (ret < 0) goto out_err; - svc_rdma_save_io_pages(rqstp, 0, head->rc_page_count); - return 1; + + ret = 1; + wait_for_completion(&cc->cc_done); + if (cc->cc_status != IB_WC_SUCCESS) + ret = -EIO; + + /* rq_respages starts after the last arg page */ + rqstp->rq_respages = &rqstp->rq_pages[head->rc_page_count]; + rqstp->rq_next_page = rqstp->rq_respages + 1; + + /* Ensure svc_rdma_recv_ctxt_put() does not try to release pages */ + head->rc_page_count = 0; out_err: svc_rdma_read_info_free(info); diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 52c759a8543e..056452cabc98 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -111,8 +111,6 @@ #include "xprt_rdma.h" #include <trace/events/rpcrdma.h> -#define RPCDBG_FACILITY RPCDBG_SVCXPRT - static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc); static inline struct svc_rdma_send_ctxt * @@ -157,6 +155,7 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma) ctxt->sc_send_wr.wr_cqe = &ctxt->sc_cqe; ctxt->sc_send_wr.sg_list = ctxt->sc_sges; ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED; + init_completion(&ctxt->sc_done); ctxt->sc_cqe.done = svc_rdma_wc_send; ctxt->sc_xprt_buf = buffer; xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf, @@ -220,7 +219,6 @@ out: ctxt->sc_send_wr.num_sge = 0; ctxt->sc_cur_sge_no = 0; - ctxt->sc_page_count = 0; return ctxt; out_empty: @@ -235,8 +233,6 @@ out_empty: * svc_rdma_send_ctxt_put - Return send_ctxt to free list * @rdma: controlling svcxprt_rdma * @ctxt: object to return to the free list - * - * Pages left in sc_pages are DMA unmapped and released. */ void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt) @@ -257,9 +253,6 @@ void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, ctxt->sc_sges[i].length); } - for (i = 0; i < ctxt->sc_page_count; ++i) - put_page(ctxt->sc_pages[i]); - spin_lock(&rdma->sc_send_lock); list_add(&ctxt->sc_list, &rdma->sc_send_ctxts); spin_unlock(&rdma->sc_send_lock); @@ -282,15 +275,13 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) trace_svcrdma_wc_send(wc, &ctxt->sc_cid); + complete(&ctxt->sc_done); + atomic_inc(&rdma->sc_sq_avail); wake_up(&rdma->sc_send_wait); - svc_rdma_send_ctxt_put(rdma, ctxt); - - if (unlikely(wc->status != IB_WC_SUCCESS)) { - set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); - svc_xprt_enqueue(&rdma->sc_xprt); - } + if (unlikely(wc->status != IB_WC_SUCCESS)) + svc_xprt_deferred_close(&rdma->sc_xprt); } /** @@ -298,7 +289,7 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) * @rdma: transport on which to post the WR * @ctxt: send ctxt with a Send WR ready to post * - * Returns zero the Send WR was posted successfully. Otherwise, a + * Returns zero if the Send WR was posted successfully. Otherwise, a * negative errno is returned. */ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt) @@ -306,7 +297,7 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt) struct ib_send_wr *wr = &ctxt->sc_send_wr; int ret; - might_sleep(); + reinit_completion(&ctxt->sc_done); /* Sync the transport header buffer */ ib_dma_sync_single_for_device(rdma->sc_pd->device, @@ -336,7 +327,7 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt) } trace_svcrdma_sq_post_err(rdma, ret); - set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); + svc_xprt_deferred_close(&rdma->sc_xprt); wake_up(&rdma->sc_send_wait); return ret; } @@ -795,25 +786,6 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, svc_rdma_xb_dma_map, &args); } -/* The svc_rqst and all resources it owns are released as soon as - * svc_rdma_sendto returns. Transfer pages under I/O to the ctxt - * so they are released by the Send completion handler. - */ -static void svc_rdma_save_io_pages(struct svc_rqst *rqstp, - struct svc_rdma_send_ctxt *ctxt) -{ - int i, pages = rqstp->rq_next_page - rqstp->rq_respages; - - ctxt->sc_page_count += pages; - for (i = 0; i < pages; i++) { - ctxt->sc_pages[i] = rqstp->rq_respages[i]; - rqstp->rq_respages[i] = NULL; - } - - /* Prevent svc_xprt_release from releasing pages in rq_pages */ - rqstp->rq_next_page = rqstp->rq_respages; -} - /* Prepare the portion of the RPC Reply that will be transmitted * via RDMA Send. The RPC-over-RDMA transport header is prepared * in sc_sges[0], and the RPC xdr_buf is prepared in following sges. @@ -843,15 +815,20 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma, if (ret < 0) return ret; - svc_rdma_save_io_pages(rqstp, sctxt); - if (rctxt->rc_inv_rkey) { sctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV; sctxt->sc_send_wr.ex.invalidate_rkey = rctxt->rc_inv_rkey; } else { sctxt->sc_send_wr.opcode = IB_WR_SEND; } - return svc_rdma_send(rdma, sctxt); + + ret = svc_rdma_send(rdma, sctxt); + if (ret < 0) + return ret; + + ret = wait_for_completion_killable(&sctxt->sc_done); + svc_rdma_send_ctxt_put(rdma, sctxt); + return ret; } /** @@ -917,7 +894,8 @@ void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len; if (svc_rdma_send(rdma, sctxt)) goto put_ctxt; - return; + + wait_for_completion_killable(&sctxt->sc_done); put_ctxt: svc_rdma_send_ctxt_put(rdma, sctxt); @@ -979,16 +957,17 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp); if (ret < 0) goto err1; + + /* Prevent svc_xprt_release() from releasing the page backing + * rq_res.head[0].iov_base. It's no longer being accessed by + * the I/O device. */ + rqstp->rq_respages++; return 0; err2: if (ret != -E2BIG && ret != -EINVAL) goto err1; - /* Send completion releases payload pages that were part - * of previously posted RDMA Writes. - */ - svc_rdma_save_io_pages(rqstp, sctxt); svc_rdma_send_error_msg(rdma, sctxt, rctxt, ret); return 0; @@ -996,7 +975,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) svc_rdma_send_ctxt_put(rdma, sctxt); err0: trace_svcrdma_send_err(rqstp, ret); - set_bit(XPT_CLOSE, &xprt->xpt_flags); + svc_xprt_deferred_close(&rdma->sc_xprt); return -ENOTCONN; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index c895f80df659..d94b7759ada1 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -119,8 +119,7 @@ static void qp_event_handler(struct ib_event *event, void *context) case IB_EVENT_QP_ACCESS_ERR: case IB_EVENT_DEVICE_FATAL: default: - set_bit(XPT_CLOSE, &xprt->xpt_flags); - svc_xprt_enqueue(xprt); + svc_xprt_deferred_close(xprt); break; } } @@ -137,7 +136,6 @@ static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv, svc_xprt_init(net, &svc_rdma_class, &cma_xprt->sc_xprt, serv); INIT_LIST_HEAD(&cma_xprt->sc_accept_q); INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); - INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); INIT_LIST_HEAD(&cma_xprt->sc_send_ctxts); init_llist_head(&cma_xprt->sc_recv_ctxts); INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts); @@ -279,12 +277,14 @@ static int svc_rdma_cma_handler(struct rdma_cm_id *cma_id, switch (event->event) { case RDMA_CM_EVENT_ESTABLISHED: clear_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags); + + /* Handle any requests that were received while + * CONN_PENDING was set. */ svc_xprt_enqueue(xprt); break; case RDMA_CM_EVENT_DISCONNECTED: case RDMA_CM_EVENT_DEVICE_REMOVAL: - set_bit(XPT_CLOSE, &xprt->xpt_flags); - svc_xprt_enqueue(xprt); + svc_xprt_deferred_close(xprt); break; default: break; @@ -404,11 +404,14 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) newxprt->sc_max_req_size = svcrdma_max_req_size; newxprt->sc_max_requests = svcrdma_max_requests; newxprt->sc_max_bc_requests = svcrdma_max_bc_requests; - rq_depth = newxprt->sc_max_requests + newxprt->sc_max_bc_requests; + newxprt->sc_recv_batch = RPCRDMA_MAX_RECV_BATCH; + rq_depth = newxprt->sc_max_requests + newxprt->sc_max_bc_requests + + newxprt->sc_recv_batch; if (rq_depth > dev->attrs.max_qp_wr) { pr_warn("svcrdma: reducing receive depth to %d\n", dev->attrs.max_qp_wr); rq_depth = dev->attrs.max_qp_wr; + newxprt->sc_recv_batch = 1; newxprt->sc_max_requests = rq_depth - 2; newxprt->sc_max_bc_requests = 2; } |