From 3132e49ecef9dab43d858d8e7066662c6a1efb16 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 10 Aug 2016 15:58:24 -0400 Subject: pnfs: track multiple layout types in fsinfo structure Current NFSv4.1/pNFS client assumes that MDS supports only one layout type. While it's true for most existing servers, nevertheless, this can be change in the near future. For now, this patch just plumbs in the ability to track a list of layouts in the fsinfo structure. The existing behavior of the client is preserved, by having it just select the first entry in the list. Signed-off-by: Tigran Mkrtchyan Signed-off-by: Jeff Layton Reviewed-by: J. Bruce Fields Signed-off-by: Anna Schumaker --- fs/nfs/client.c | 2 +- fs/nfs/nfs4xdr.c | 23 ++++++++++------------- fs/nfs/pnfs.c | 27 ++++++++++++++++----------- fs/nfs/pnfs.h | 4 ++-- 4 files changed, 29 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 1e106780a237..0c1b3a002dc2 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -785,7 +785,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs } fsinfo.fattr = fattr; - fsinfo.layouttype = 0; + memset(fsinfo.layouttype, 0, sizeof(fsinfo.layouttype)); error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo); if (error < 0) goto out_error; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 7bd3a5c09d31..41a02f994976 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4725,14 +4725,13 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, } /* - * Decode potentially multiple layout types. Currently we only support - * one layout driver per file system. + * Decode potentially multiple layout types. */ -static int decode_first_pnfs_layout_type(struct xdr_stream *xdr, +static int decode_pnfs_layout_types(struct xdr_stream *xdr, uint32_t *layouttype) { __be32 *p; - int num; + uint32_t num, i; p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) @@ -4741,18 +4740,17 @@ static int decode_first_pnfs_layout_type(struct xdr_stream *xdr, /* pNFS is not supported by the underlying file system */ if (num == 0) { - *layouttype = 0; return 0; } - if (num > 1) - printk(KERN_INFO "NFS: %s: Warning: Multiple pNFS layout " - "drivers per filesystem not supported\n", __func__); + if (num > NFS_MAX_LAYOUT_TYPES) + printk(KERN_INFO "NFS: %s: Warning: Too many (%d) pNFS layout types\n", __func__, num); /* Decode and set first layout type, move xdr->p past unused types */ p = xdr_inline_decode(xdr, num * 4); if (unlikely(!p)) goto out_overflow; - *layouttype = be32_to_cpup(p); + for(i = 0; i < num && i < NFS_MAX_LAYOUT_TYPES; i++) + layouttype[i] = be32_to_cpup(p++); return 0; out_overflow: print_overflow_msg(__func__, xdr); @@ -4772,10 +4770,9 @@ static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap, if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U))) return -EIO; if (bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES) { - status = decode_first_pnfs_layout_type(xdr, layouttype); + status = decode_pnfs_layout_types(xdr, layouttype); bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES; - } else - *layouttype = 0; + } return status; } @@ -4856,7 +4853,7 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta); if (status != 0) goto xdr_error; - status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype); + status = decode_attr_pnfstype(xdr, bitmap, fsinfo->layouttype); if (status != 0) goto xdr_error; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 2c93a85eda51..a6a683fbd230 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -102,32 +102,37 @@ unset_pnfs_layoutdriver(struct nfs_server *nfss) * Try to set the server's pnfs module to the pnfs layout type specified by id. * Currently only one pNFS layout driver per filesystem is supported. * - * @id layout type. Zero (illegal layout type) indicates pNFS not in use. + * @ids array of layout types supported by MDS. */ void set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh, - u32 id) + u32 *ids) { struct pnfs_layoutdriver_type *ld_type = NULL; + u32 id; - if (id == 0) - goto out_no_driver; if (!(server->nfs_client->cl_exchange_flags & (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) { - printk(KERN_ERR "NFS: %s: id %u cl_exchange_flags 0x%x\n", - __func__, id, server->nfs_client->cl_exchange_flags); + printk(KERN_ERR "NFS: %s: cl_exchange_flags 0x%x\n", + __func__, server->nfs_client->cl_exchange_flags); goto out_no_driver; } + + id = ids[0]; + if (!id) + goto out_no_driver; + ld_type = find_pnfs_driver(id); if (!ld_type) { request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id); ld_type = find_pnfs_driver(id); - if (!ld_type) { - dprintk("%s: No pNFS module found for %u.\n", - __func__, id); - goto out_no_driver; - } } + + if (!ld_type) { + dprintk("%s: No pNFS module found for %u.\n", __func__, id); + goto out_no_driver; + } + server->pnfs_curr_ld = ld_type; if (ld_type->set_layoutdriver && ld_type->set_layoutdriver(server, mntfh)) { diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 31d99b2927b0..be515e6a3823 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -236,7 +236,7 @@ void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); void pnfs_put_lseg(struct pnfs_layout_segment *lseg); void pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg); -void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); +void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32 *); void unset_pnfs_layoutdriver(struct nfs_server *); void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); @@ -657,7 +657,7 @@ pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task) } static inline void set_pnfs_layoutdriver(struct nfs_server *s, - const struct nfs_fh *mntfh, u32 id) + const struct nfs_fh *mntfh, u32 *ids) { } -- cgit v1.2.1 From f4b52bb0842642a781a23dd36ae364253e03fdb4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 29 Aug 2016 20:03:48 -0400 Subject: NFSv4.x: Set up struct svc_serv_ops for the callback channel In order to manage the threads using svc_set_num_threads, we need to fill in a few extra fields. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/callback.c | 57 +++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 52a28311e2a4..478bb0a7effe 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -155,13 +155,6 @@ nfs41_callback_up(struct svc_serv *serv) return rqstp; } -static void nfs_minorversion_callback_svc_setup(struct svc_serv *serv, - struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp)) -{ - *rqstpp = nfs41_callback_up(serv); - *callback_svc = nfs41_callback_svc; -} - static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, struct svc_serv *serv) { @@ -173,11 +166,10 @@ static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, xprt->bc_serv = serv; } #else -static void nfs_minorversion_callback_svc_setup(struct svc_serv *serv, - struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp)) +static struct svc_rqst * +nfs41_callback_up(struct svc_serv *serv) { - *rqstpp = ERR_PTR(-ENOTSUPP); - *callback_svc = ERR_PTR(-ENOTSUPP); + return ERR_PTR(-ENOTSUPP); } static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, @@ -190,7 +182,6 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, struct svc_serv *serv) { struct svc_rqst *rqstp; - int (*callback_svc)(void *vrqstp); struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; int ret; @@ -203,11 +194,9 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, case 0: /* v4.0 callback setup */ rqstp = nfs4_callback_up(serv); - callback_svc = nfs4_callback_svc; break; default: - nfs_minorversion_callback_svc_setup(serv, - &rqstp, &callback_svc); + rqstp = nfs41_callback_up(serv); } if (IS_ERR(rqstp)) @@ -217,7 +206,8 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, cb_info->serv = serv; cb_info->rqst = rqstp; - cb_info->task = kthread_create(callback_svc, cb_info->rqst, + cb_info->task = kthread_create(serv->sv_ops->svo_function, + cb_info->rqst, "nfsv4.%u-svc", minorversion); if (IS_ERR(cb_info->task)) { ret = PTR_ERR(cb_info->task); @@ -281,14 +271,34 @@ err_bind: return ret; } -static struct svc_serv_ops nfs_cb_sv_ops = { +static struct svc_serv_ops nfs40_cb_sv_ops = { + .svo_function = nfs4_callback_svc, .svo_enqueue_xprt = svc_xprt_do_enqueue, + .svo_module = THIS_MODULE, +}; +#if defined(CONFIG_NFS_V4_1) +static struct svc_serv_ops nfs41_cb_sv_ops = { + .svo_function = nfs41_callback_svc, + .svo_enqueue_xprt = svc_xprt_do_enqueue, + .svo_module = THIS_MODULE, }; +struct svc_serv_ops *nfs4_cb_sv_ops[] = { + [0] = &nfs40_cb_sv_ops, + [1] = &nfs41_cb_sv_ops, +}; +#else +struct svc_serv_ops *nfs4_cb_sv_ops[] = { + [0] = &nfs40_cb_sv_ops, + [1] = NULL, +}; +#endif + static struct svc_serv *nfs_callback_create_svc(int minorversion) { struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; struct svc_serv *serv; + struct svc_serv_ops *sv_ops; /* * Check whether we're already up and running. @@ -302,6 +312,17 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion) return cb_info->serv; } + switch (minorversion) { + case 0: + sv_ops = nfs4_cb_sv_ops[0]; + break; + default: + sv_ops = nfs4_cb_sv_ops[1]; + } + + if (sv_ops == NULL) + return ERR_PTR(-ENOTSUPP); + /* * Sanity check: if there's no task, * we should be the first user ... @@ -310,7 +331,7 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion) printk(KERN_WARNING "nfs_callback_create_svc: no kthread, %d users??\n", cb_info->users); - serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, &nfs_cb_sv_ops); + serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, sv_ops); if (!serv) { printk(KERN_ERR "nfs_callback_create_svc: create service failed\n"); return ERR_PTR(-ENOMEM); -- cgit v1.2.1 From d00252688604edfd07d0e11a05d3a2b7cf05bb3d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 29 Aug 2016 20:03:49 -0400 Subject: SUNRPC: Initialise struct svc_serv backchannel fields during __svc_create() Clean up. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/callback.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 478bb0a7effe..08e16f9b3333 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -147,9 +147,6 @@ nfs41_callback_up(struct svc_serv *serv) { struct svc_rqst *rqstp; - INIT_LIST_HEAD(&serv->sv_cb_list); - spin_lock_init(&serv->sv_cb_lock); - init_waitqueue_head(&serv->sv_cb_waitq); rqstp = svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE); dprintk("--> %s return %d\n", __func__, PTR_ERR_OR_ZERO(rqstp)); return rqstp; -- cgit v1.2.1 From 3b01c11ee8bfa9d5d14b028e9fbe861a0f22d328 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 29 Aug 2016 20:03:50 -0400 Subject: NFSv4.x: Fix up the global tracking of the callback server Ensure that the nfs_callback_info[] array correctly tracks the struct svc_serv. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/callback.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 08e16f9b3333..b125f02ddf8b 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -201,7 +201,6 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, svc_sock_update_bufs(serv); - cb_info->serv = serv; cb_info->rqst = rqstp; cb_info->task = kthread_create(serv->sv_ops->svo_function, cb_info->rqst, @@ -300,7 +299,7 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion) /* * Check whether we're already up and running. */ - if (cb_info->task) { + if (cb_info->serv) { /* * Note: increase service usage, because later in case of error * svc_destroy() will be called. @@ -333,6 +332,7 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion) printk(KERN_ERR "nfs_callback_create_svc: create service failed\n"); return ERR_PTR(-ENOMEM); } + cb_info->serv = serv; /* As there is only one thread we need to over-ride the * default maximum of 80 connections */ @@ -375,6 +375,8 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) * thread exits. */ err_net: + if (!cb_info->users) + cb_info->serv = NULL; svc_destroy(serv); err_create: mutex_unlock(&nfs_callback_mutex); @@ -396,9 +398,11 @@ void nfs_callback_down(int minorversion, struct net *net) mutex_lock(&nfs_callback_mutex); nfs_callback_down_net(minorversion, cb_info->serv, net); cb_info->users--; - if (cb_info->users == 0 && cb_info->task != NULL) { - kthread_stop(cb_info->task); - dprintk("nfs_callback_down: service stopped\n"); + if (cb_info->users == 0) { + if (cb_info->task != NULL) { + kthread_stop(cb_info->task); + dprintk("nfs_callback_down: service stopped\n"); + } svc_exit_thread(cb_info->rqst); dprintk("nfs_callback_down: service destroyed\n"); cb_info->serv = NULL; -- cgit v1.2.1 From bb6aeba736ba9fd4d9569eec4bc3f7aecb42162a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 29 Aug 2016 20:03:51 -0400 Subject: NFSv4.x: Switch to using svc_set_num_threads() to manage the callback threads This will allow us to bump the number of callback threads at will. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/callback.c | 79 +++++++++---------------------------------------------- fs/nfs/callback.h | 2 ++ 2 files changed, 15 insertions(+), 66 deletions(-) (limited to 'fs') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index b125f02ddf8b..b91c49f4670b 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -31,8 +31,6 @@ struct nfs_callback_data { unsigned int users; struct svc_serv *serv; - struct svc_rqst *rqst; - struct task_struct *task; }; static struct nfs_callback_data nfs_callback_info[NFS4_MAX_MINOR_VERSION + 1]; @@ -89,15 +87,6 @@ nfs4_callback_svc(void *vrqstp) return 0; } -/* - * Prepare to bring up the NFSv4 callback service - */ -static struct svc_rqst * -nfs4_callback_up(struct svc_serv *serv) -{ - return svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE); -} - #if defined(CONFIG_NFS_V4_1) /* * The callback service for NFSv4.1 callbacks @@ -139,19 +128,6 @@ nfs41_callback_svc(void *vrqstp) return 0; } -/* - * Bring up the NFSv4.1 callback service - */ -static struct svc_rqst * -nfs41_callback_up(struct svc_serv *serv) -{ - struct svc_rqst *rqstp; - - rqstp = svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE); - dprintk("--> %s return %d\n", __func__, PTR_ERR_OR_ZERO(rqstp)); - return rqstp; -} - static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, struct svc_serv *serv) { @@ -163,12 +139,6 @@ static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, xprt->bc_serv = serv; } #else -static struct svc_rqst * -nfs41_callback_up(struct svc_serv *serv) -{ - return ERR_PTR(-ENOTSUPP); -} - static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, struct svc_serv *serv) { @@ -178,42 +148,19 @@ static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, struct svc_serv *serv) { - struct svc_rqst *rqstp; - struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; + const int nrservs = NFS4_NR_CALLBACK_THREADS; int ret; nfs_callback_bc_serv(minorversion, xprt, serv); - if (cb_info->task) + if (serv->sv_nrthreads-1 == nrservs) return 0; - switch (minorversion) { - case 0: - /* v4.0 callback setup */ - rqstp = nfs4_callback_up(serv); - break; - default: - rqstp = nfs41_callback_up(serv); - } - - if (IS_ERR(rqstp)) - return PTR_ERR(rqstp); - - svc_sock_update_bufs(serv); - - cb_info->rqst = rqstp; - cb_info->task = kthread_create(serv->sv_ops->svo_function, - cb_info->rqst, - "nfsv4.%u-svc", minorversion); - if (IS_ERR(cb_info->task)) { - ret = PTR_ERR(cb_info->task); - svc_exit_thread(cb_info->rqst); - cb_info->rqst = NULL; - cb_info->task = NULL; + ret = serv->sv_ops->svo_setup(serv, NULL, nrservs); + if (ret) { + serv->sv_ops->svo_setup(serv, NULL, 0); return ret; } - rqstp->rq_task = cb_info->task; - wake_up_process(cb_info->task); dprintk("nfs_callback_up: service started\n"); return 0; } @@ -270,12 +217,14 @@ err_bind: static struct svc_serv_ops nfs40_cb_sv_ops = { .svo_function = nfs4_callback_svc, .svo_enqueue_xprt = svc_xprt_do_enqueue, + .svo_setup = svc_set_num_threads, .svo_module = THIS_MODULE, }; #if defined(CONFIG_NFS_V4_1) static struct svc_serv_ops nfs41_cb_sv_ops = { .svo_function = nfs41_callback_svc, .svo_enqueue_xprt = svc_xprt_do_enqueue, + .svo_setup = svc_set_num_threads, .svo_module = THIS_MODULE, }; @@ -394,20 +343,18 @@ err_start: void nfs_callback_down(int minorversion, struct net *net) { struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; + struct svc_serv *serv; mutex_lock(&nfs_callback_mutex); - nfs_callback_down_net(minorversion, cb_info->serv, net); + serv = cb_info->serv; + nfs_callback_down_net(minorversion, serv, net); cb_info->users--; if (cb_info->users == 0) { - if (cb_info->task != NULL) { - kthread_stop(cb_info->task); - dprintk("nfs_callback_down: service stopped\n"); - } - svc_exit_thread(cb_info->rqst); + svc_get(serv); + serv->sv_ops->svo_setup(serv, NULL, 0); + svc_destroy(serv); dprintk("nfs_callback_down: service destroyed\n"); cb_info->serv = NULL; - cb_info->rqst = NULL; - cb_info->task = NULL; } mutex_unlock(&nfs_callback_mutex); } diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index 5fe1cecbf9f0..14bd863495db 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -198,6 +198,8 @@ extern void nfs_callback_down(int minorversion, struct net *net); #define NFS41_BC_MIN_CALLBACKS 1 #define NFS41_BC_MAX_CALLBACKS 1 +#define NFS4_NR_CALLBACK_THREADS 1 + extern unsigned int nfs_callback_set_tcpport; #endif /* __LINUX_FS_NFS_CALLBACK_H */ -- cgit v1.2.1 From 5405fc44c3377358205ab4095fe9c44716aa0900 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 29 Aug 2016 20:03:52 -0400 Subject: NFSv4.x: Add kernel parameter to control the callback server Add support for the kernel parameter nfs.callback_nr_threads to set the number of threads that will be assigned to the callback channel. Add support for the kernel parameter nfs.nfs.max_session_cb_slots to set the maximum size of the callback channel slot table. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/callback.c | 5 ++++- fs/nfs/callback.h | 3 ++- fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4proc.c | 7 +++---- fs/nfs/nfs4session.h | 1 + fs/nfs/super.c | 10 ++++++++++ 6 files changed, 21 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index b91c49f4670b..532d8e242d4d 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -148,11 +148,14 @@ static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, struct svc_serv *serv) { - const int nrservs = NFS4_NR_CALLBACK_THREADS; + int nrservs = nfs_callback_nr_threads; int ret; nfs_callback_bc_serv(minorversion, xprt, serv); + if (nrservs < NFS4_MIN_NR_CALLBACK_THREADS) + nrservs = NFS4_MIN_NR_CALLBACK_THREADS; + if (serv->sv_nrthreads-1 == nrservs) return 0; diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index 14bd863495db..41ad65151ec8 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -198,8 +198,9 @@ extern void nfs_callback_down(int minorversion, struct net *net); #define NFS41_BC_MIN_CALLBACKS 1 #define NFS41_BC_MAX_CALLBACKS 1 -#define NFS4_NR_CALLBACK_THREADS 1 +#define NFS4_MIN_NR_CALLBACK_THREADS 1 extern unsigned int nfs_callback_set_tcpport; +extern unsigned short nfs_callback_nr_threads; #endif /* __LINUX_FS_NFS_CALLBACK_H */ diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 9bf64eacba5b..f230aa62ca59 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -471,6 +471,7 @@ extern struct nfs_subversion nfs_v4; struct dentry *nfs4_try_mount(int, const char *, struct nfs_mount_info *, struct nfs_subversion *); extern bool nfs4_disable_idmapping; extern unsigned short max_session_slots; +extern unsigned short max_session_cb_slots; extern unsigned short send_implementation_id; extern bool recover_lost_locks; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a9dec32ba9ba..251e48e7ba16 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7463,7 +7463,7 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args, args->bc_attrs.max_resp_sz = max_bc_payload; args->bc_attrs.max_resp_sz_cached = 0; args->bc_attrs.max_ops = NFS4_MAX_BACK_CHANNEL_OPS; - args->bc_attrs.max_reqs = NFS41_BC_MAX_CALLBACKS; + args->bc_attrs.max_reqs = min_t(unsigned short, max_session_cb_slots, 1); dprintk("%s: Back Channel : max_rqst_sz=%u max_resp_sz=%u " "max_resp_sz_cached=%u max_ops=%u max_reqs=%u\n", @@ -7510,10 +7510,9 @@ static int nfs4_verify_back_channel_attrs(struct nfs41_create_session_args *args return -EINVAL; if (rcvd->max_resp_sz_cached > sent->max_resp_sz_cached) return -EINVAL; - /* These would render the backchannel useless: */ - if (rcvd->max_ops != sent->max_ops) + if (rcvd->max_ops > sent->max_ops) return -EINVAL; - if (rcvd->max_reqs != sent->max_reqs) + if (rcvd->max_reqs > sent->max_reqs) return -EINVAL; out: return 0; diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index f703b755351b..3bb6af70973c 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -9,6 +9,7 @@ /* maximum number of slots to use */ #define NFS4_DEF_SLOT_TABLE_SIZE (64U) +#define NFS4_DEF_CB_SLOT_TABLE_SIZE (1U) #define NFS4_MAX_SLOT_TABLE (1024U) #define NFS4_NO_SLOT ((u32)-1) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index d39601381adf..001796bcd6c8 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2848,19 +2848,23 @@ out_invalid_transport_udp: * NFS client for backwards compatibility */ unsigned int nfs_callback_set_tcpport; +unsigned short nfs_callback_nr_threads; /* Default cache timeout is 10 minutes */ unsigned int nfs_idmap_cache_timeout = 600; /* Turn off NFSv4 uid/gid mapping when using AUTH_SYS */ bool nfs4_disable_idmapping = true; unsigned short max_session_slots = NFS4_DEF_SLOT_TABLE_SIZE; +unsigned short max_session_cb_slots = NFS4_DEF_CB_SLOT_TABLE_SIZE; unsigned short send_implementation_id = 1; char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN] = ""; bool recover_lost_locks = false; +EXPORT_SYMBOL_GPL(nfs_callback_nr_threads); EXPORT_SYMBOL_GPL(nfs_callback_set_tcpport); EXPORT_SYMBOL_GPL(nfs_idmap_cache_timeout); EXPORT_SYMBOL_GPL(nfs4_disable_idmapping); EXPORT_SYMBOL_GPL(max_session_slots); +EXPORT_SYMBOL_GPL(max_session_cb_slots); EXPORT_SYMBOL_GPL(send_implementation_id); EXPORT_SYMBOL_GPL(nfs4_client_id_uniquifier); EXPORT_SYMBOL_GPL(recover_lost_locks); @@ -2887,6 +2891,9 @@ static const struct kernel_param_ops param_ops_portnr = { #define param_check_portnr(name, p) __param_check(name, p, unsigned int); module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644); +module_param_named(callback_nr_threads, nfs_callback_nr_threads, ushort, 0644); +MODULE_PARM_DESC(callback_nr_threads, "Number of threads that will be " + "assigned to the NFSv4 callback channels."); module_param(nfs_idmap_cache_timeout, int, 0644); module_param(nfs4_disable_idmapping, bool, 0644); module_param_string(nfs4_unique_id, nfs4_client_id_uniquifier, @@ -2896,6 +2903,9 @@ MODULE_PARM_DESC(nfs4_disable_idmapping, module_param(max_session_slots, ushort, 0644); MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 " "requests the client will negotiate"); +module_param(max_session_cb_slots, ushort, 0644); +MODULE_PARM_DESC(max_session_slots, "Maximum number of parallel NFSv4.1 " + "callbacks the client will process for a given server"); module_param(send_implementation_id, ushort, 0644); MODULE_PARM_DESC(send_implementation_id, "Send implementation ID with NFSv4.1 exchange_id"); -- cgit v1.2.1 From 8d89bd70bc93914359fca3f7fbe2bde9556bcf76 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 9 Sep 2016 09:22:18 -0400 Subject: NFS setup async exchange_id Testing an rpc_xprt for session trunking should not delay application progress over already established transports. Setup exchange_id to be able to be an async call to test an rpc_xprt for session trunking use. Signed-off-by: Andy Adamson Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 215 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 134 insertions(+), 81 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 251e48e7ba16..05b90a756fb5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7104,6 +7104,80 @@ static int nfs4_sp4_select_mode(struct nfs_client *clp, return 0; } +struct nfs41_exchange_id_data { + struct nfs41_exchange_id_res res; + struct nfs41_exchange_id_args args; + int rpc_status; +}; + +static void nfs4_exchange_id_done(struct rpc_task *task, void *data) +{ + struct nfs41_exchange_id_data *cdata = + (struct nfs41_exchange_id_data *)data; + struct nfs_client *clp = cdata->args.client; + int status = task->tk_status; + + trace_nfs4_exchange_id(clp, status); + + if (status == 0) + status = nfs4_check_cl_exchange_flags(cdata->res.flags); + if (status == 0) + status = nfs4_sp4_select_mode(clp, &cdata->res.state_protect); + + if (status == 0) { + clp->cl_clientid = cdata->res.clientid; + clp->cl_exchange_flags = cdata->res.flags; + /* Client ID is not confirmed */ + if (!(cdata->res.flags & EXCHGID4_FLAG_CONFIRMED_R)) { + clear_bit(NFS4_SESSION_ESTABLISHED, + &clp->cl_session->session_state); + clp->cl_seqid = cdata->res.seqid; + } + + kfree(clp->cl_serverowner); + clp->cl_serverowner = cdata->res.server_owner; + cdata->res.server_owner = NULL; + + /* use the most recent implementation id */ + kfree(clp->cl_implid); + clp->cl_implid = cdata->res.impl_id; + cdata->res.impl_id = NULL; + + if (clp->cl_serverscope != NULL && + !nfs41_same_server_scope(clp->cl_serverscope, + cdata->res.server_scope)) { + dprintk("%s: server_scope mismatch detected\n", + __func__); + set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state); + kfree(clp->cl_serverscope); + clp->cl_serverscope = NULL; + } + + if (clp->cl_serverscope == NULL) { + clp->cl_serverscope = cdata->res.server_scope; + cdata->res.server_scope = NULL; + } + } + cdata->rpc_status = status; +} + +static void nfs4_exchange_id_release(void *data) +{ + struct nfs41_exchange_id_data *cdata = + (struct nfs41_exchange_id_data *)data; + + nfs_put_client(cdata->args.client); + kfree(cdata->res.impl_id); + kfree(cdata->res.server_scope); + kfree(cdata->res.server_owner); + kfree(cdata); +} + +static const struct rpc_call_ops nfs4_exchange_id_call_ops = { + .rpc_call_done = nfs4_exchange_id_done, + .rpc_release = nfs4_exchange_id_release, +}; + /* * _nfs4_proc_exchange_id() * @@ -7113,66 +7187,60 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, u32 sp4_how) { nfs4_verifier verifier; - struct nfs41_exchange_id_args args = { - .verifier = &verifier, - .client = clp, -#ifdef CONFIG_NFS_V4_1_MIGRATION - .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER | - EXCHGID4_FLAG_BIND_PRINC_STATEID | - EXCHGID4_FLAG_SUPP_MOVED_MIGR, -#else - .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER | - EXCHGID4_FLAG_BIND_PRINC_STATEID, -#endif - }; - struct nfs41_exchange_id_res res = { - 0 - }; - int status; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_EXCHANGE_ID], - .rpc_argp = &args, - .rpc_resp = &res, .rpc_cred = cred, }; + struct rpc_task_setup task_setup_data = { + .rpc_client = clp->cl_rpcclient, + .callback_ops = &nfs4_exchange_id_call_ops, + .rpc_message = &msg, + .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT, + }; + struct nfs41_exchange_id_data *calldata; + struct rpc_task *task; + int status = -EIO; + + if (!atomic_inc_not_zero(&clp->cl_count)) + goto out; + + status = -ENOMEM; + calldata = kzalloc(sizeof(*calldata), GFP_NOFS); + if (!calldata) + goto out; nfs4_init_boot_verifier(clp, &verifier); status = nfs4_init_uniform_client_string(clp); if (status) - goto out; + goto out_calldata; dprintk("NFS call exchange_id auth=%s, '%s'\n", clp->cl_rpcclient->cl_auth->au_ops->au_name, clp->cl_owner_id); - res.server_owner = kzalloc(sizeof(struct nfs41_server_owner), - GFP_NOFS); - if (unlikely(res.server_owner == NULL)) { - status = -ENOMEM; - goto out; - } + calldata->res.server_owner = kzalloc(sizeof(struct nfs41_server_owner), + GFP_NOFS); + status = -ENOMEM; + if (unlikely(calldata->res.server_owner == NULL)) + goto out_calldata; - res.server_scope = kzalloc(sizeof(struct nfs41_server_scope), + calldata->res.server_scope = kzalloc(sizeof(struct nfs41_server_scope), GFP_NOFS); - if (unlikely(res.server_scope == NULL)) { - status = -ENOMEM; + if (unlikely(calldata->res.server_scope == NULL)) goto out_server_owner; - } - res.impl_id = kzalloc(sizeof(struct nfs41_impl_id), GFP_NOFS); - if (unlikely(res.impl_id == NULL)) { - status = -ENOMEM; + calldata->res.impl_id = kzalloc(sizeof(struct nfs41_impl_id), GFP_NOFS); + if (unlikely(calldata->res.impl_id == NULL)) goto out_server_scope; - } switch (sp4_how) { case SP4_NONE: - args.state_protect.how = SP4_NONE; + calldata->args.state_protect.how = SP4_NONE; break; case SP4_MACH_CRED: - args.state_protect = nfs4_sp4_mach_cred_request; + calldata->args.state_protect = nfs4_sp4_mach_cred_request; break; default: @@ -7182,55 +7250,30 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, goto out_impl_id; } - status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); - trace_nfs4_exchange_id(clp, status); - if (status == 0) - status = nfs4_check_cl_exchange_flags(res.flags); - - if (status == 0) - status = nfs4_sp4_select_mode(clp, &res.state_protect); - - if (status == 0) { - clp->cl_clientid = res.clientid; - clp->cl_exchange_flags = res.flags; - /* Client ID is not confirmed */ - if (!(res.flags & EXCHGID4_FLAG_CONFIRMED_R)) { - clear_bit(NFS4_SESSION_ESTABLISHED, - &clp->cl_session->session_state); - clp->cl_seqid = res.seqid; - } - - kfree(clp->cl_serverowner); - clp->cl_serverowner = res.server_owner; - res.server_owner = NULL; - - /* use the most recent implementation id */ - kfree(clp->cl_implid); - clp->cl_implid = res.impl_id; - res.impl_id = NULL; - - if (clp->cl_serverscope != NULL && - !nfs41_same_server_scope(clp->cl_serverscope, - res.server_scope)) { - dprintk("%s: server_scope mismatch detected\n", - __func__); - set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state); - kfree(clp->cl_serverscope); - clp->cl_serverscope = NULL; - } + calldata->args.verifier = &verifier; + calldata->args.client = clp; +#ifdef CONFIG_NFS_V4_1_MIGRATION + calldata->args.flags = EXCHGID4_FLAG_SUPP_MOVED_REFER | + EXCHGID4_FLAG_BIND_PRINC_STATEID | + EXCHGID4_FLAG_SUPP_MOVED_MIGR, +#else + calldata->args.flags = EXCHGID4_FLAG_SUPP_MOVED_REFER | + EXCHGID4_FLAG_BIND_PRINC_STATEID, +#endif + msg.rpc_argp = &calldata->args; + msg.rpc_resp = &calldata->res; + task_setup_data.callback_data = calldata; - if (clp->cl_serverscope == NULL) { - clp->cl_serverscope = res.server_scope; - res.server_scope = NULL; - } + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) { + status = PTR_ERR(task); + goto out_impl_id; } -out_impl_id: - kfree(res.impl_id); -out_server_scope: - kfree(res.server_scope); -out_server_owner: - kfree(res.server_owner); + status = rpc_wait_for_completion_task(task); + if (!status) + status = calldata->rpc_status; + rpc_put_task(task); out: if (clp->cl_implid != NULL) dprintk("NFS reply exchange_id: Server Implementation ID: " @@ -7240,6 +7283,16 @@ out: clp->cl_implid->date.nseconds); dprintk("NFS reply exchange_id: %d\n", status); return status; + +out_impl_id: + kfree(calldata->res.impl_id); +out_server_scope: + kfree(calldata->res.server_scope); +out_server_owner: + kfree(calldata->res.server_owner); +out_calldata: + kfree(calldata); + goto out; } /* -- cgit v1.2.1 From 8e548edb4006c126e4aafba26a80c924e82fd2b4 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 9 Sep 2016 09:22:19 -0400 Subject: NFS refactor nfs4_match_clientids For session trunking, to compare nfs41_exchange_id_res with exiting nfs_client. Signed-off-by: Andy Adamson Signed-off-by: Anna Schumaker --- fs/nfs/nfs4client.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index cd3b7cfdde16..b3a5d91a1351 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -562,15 +562,15 @@ out: /* * Returns true if the client IDs match */ -static bool nfs4_match_clientids(struct nfs_client *a, struct nfs_client *b) +static bool nfs4_match_clientids(u64 a, u64 b) { - if (a->cl_clientid != b->cl_clientid) { + if (a != b) { dprintk("NFS: --> %s client ID %llx does not match %llx\n", - __func__, a->cl_clientid, b->cl_clientid); + __func__, a, b); return false; } dprintk("NFS: --> %s client ID %llx matches %llx\n", - __func__, a->cl_clientid, b->cl_clientid); + __func__, a, b); return true; } @@ -650,7 +650,7 @@ int nfs41_walk_client_list(struct nfs_client *new, if (pos->cl_cons_state != NFS_CS_READY) continue; - if (!nfs4_match_clientids(pos, new)) + if (!nfs4_match_clientids(pos->cl_clientid, new->cl_clientid)) continue; /* -- cgit v1.2.1 From e7b7cbf662deb857539cd4b594451ea89c4d7ee6 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 9 Sep 2016 09:22:20 -0400 Subject: NFS refactor nfs4_check_serverowner_major_id For session trunking, to compare nfs41_exchange_id_res with existing nfs_client Signed-off-by: Andy Adamson Signed-off-by: Anna Schumaker --- fs/nfs/nfs4client.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index b3a5d91a1351..0077b2eadfae 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -578,17 +578,15 @@ static bool nfs4_match_clientids(u64 a, u64 b) * Returns true if the server major ids match */ static bool -nfs4_check_clientid_trunking(struct nfs_client *a, struct nfs_client *b) +nfs4_check_serverowner_major_id(struct nfs41_server_owner *o1, + struct nfs41_server_owner *o2) { - struct nfs41_server_owner *o1 = a->cl_serverowner; - struct nfs41_server_owner *o2 = b->cl_serverowner; - if (o1->major_id_sz != o2->major_id_sz) goto out_major_mismatch; if (memcmp(o1->major_id, o2->major_id, o1->major_id_sz) != 0) goto out_major_mismatch; - dprintk("NFS: --> %s server owners match\n", __func__); + dprintk("NFS: --> %s server owner major IDs match\n", __func__); return true; out_major_mismatch: @@ -658,7 +656,8 @@ int nfs41_walk_client_list(struct nfs_client *new, * client id trunking. In either case, we want to fall back * to using the existing nfs_client. */ - if (!nfs4_check_clientid_trunking(pos, new)) + if (!nfs4_check_serverowner_major_id(pos->cl_serverowner, + new->cl_serverowner)) continue; /* Unlike NFSv4.0, we know that NFSv4.1 always uses the -- cgit v1.2.1 From ba84db96aa07995b853db403a3eba2249f0fa4e3 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 9 Sep 2016 09:22:21 -0400 Subject: NFS detect session trunking Signed-off-by: Andy Adamson Signed-off-by: Anna Schumaker --- fs/nfs/nfs4_fs.h | 2 ++ fs/nfs/nfs4client.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index f230aa62ca59..cf744aa0fa74 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -278,6 +278,8 @@ extern int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo); extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync); +extern int nfs4_detect_session_trunking(struct nfs_client *clp, + struct nfs41_exchange_id_res *res, struct rpc_xprt *xprt); static inline bool is_ds_only_client(struct nfs_client *clp) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 0077b2eadfae..6914d4b176ae 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -595,6 +595,96 @@ out_major_mismatch: return false; } +/* + * Returns true if server minor ids match + */ +static bool +nfs4_check_serverowner_minor_id(struct nfs41_server_owner *o1, + struct nfs41_server_owner *o2) +{ + /* Check eir_server_owner so_minor_id */ + if (o1->minor_id != o2->minor_id) + goto out_minor_mismatch; + + dprintk("NFS: --> %s server owner minor IDs match\n", __func__); + return true; + +out_minor_mismatch: + dprintk("NFS: --> %s server owner minor IDs do not match\n", __func__); + return false; +} + +/* + * Returns true if the server scopes match + */ +static bool +nfs4_check_server_scope(struct nfs41_server_scope *s1, + struct nfs41_server_scope *s2) +{ + if (s1->server_scope_sz != s2->server_scope_sz) + goto out_scope_mismatch; + if (memcmp(s1->server_scope, s2->server_scope, + s1->server_scope_sz) != 0) + goto out_scope_mismatch; + + dprintk("NFS: --> %s server scopes match\n", __func__); + return true; + +out_scope_mismatch: + dprintk("NFS: --> %s server scopes do not match\n", + __func__); + return false; +} + +/** + * nfs4_detect_session_trunking - Checks for session trunking called + * after a successful EXCHANGE_ID testing a multi-addr connection to be + * potentially added as a session trunk + * + * @clp: original mount nfs_client + * @res: result structure from an exchange_id using the original mount + * nfs_client with a new multi_addr transport + * + * Returns zero on success, otherwise -EINVAL + * + * Note: since the exchange_id for the new multi_addr transport uses the + * same nfs_client from the original mount, the cl_owner_id is reused, + * so eir_clientowner is the same. + */ +int nfs4_detect_session_trunking(struct nfs_client *clp, + struct nfs41_exchange_id_res *res, + struct rpc_xprt *xprt) +{ + /* Check eir_clientid */ + if (!nfs4_match_clientids(clp->cl_clientid, res->clientid)) + goto out_err; + + /* Check eir_server_owner so_major_id */ + if (!nfs4_check_serverowner_major_id(clp->cl_serverowner, + res->server_owner)) + goto out_err; + + /* Check eir_server_owner so_minor_id */ + if (!nfs4_check_serverowner_minor_id(clp->cl_serverowner, + res->server_owner)) + goto out_err; + + /* Check eir_server_scope */ + if (!nfs4_check_server_scope(clp->cl_serverscope, res->server_scope)) + goto out_err; + + pr_info("NFS: %s: Session trunking succeeded for %s\n", + clp->cl_hostname, + xprt->address_strings[RPC_DISPLAY_ADDR]); + + return 0; +out_err: + pr_info("NFS: %s: Session trunking failed for %s\n", clp->cl_hostname, + xprt->address_strings[RPC_DISPLAY_ADDR]); + + return -EINVAL; +} + /** * nfs41_walk_client_list - Find nfs_client that matches a client/server owner * -- cgit v1.2.1 From 04ea1b3e6d8ed4978bb608c1748530af3de8c274 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 9 Sep 2016 09:22:27 -0400 Subject: NFS add xprt switch addrs test to match client Signed-off-by: Andy Adamson Signed-off-by: Anna Schumaker --- fs/nfs/client.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 0c1b3a002dc2..535c2563b701 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -313,7 +313,10 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat continue; /* Match the full socket address */ if (!rpc_cmp_addr_port(sap, clap)) - continue; + /* Match all xprt_switch full socket addresses */ + if (!rpc_clnt_xprt_switch_has_addr(clp->cl_rpcclient, + sap)) + continue; atomic_inc(&clp->cl_count); return clp; -- cgit v1.2.1 From ad0849a7efc3bebbb5b9385225085abdae3911b0 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 9 Sep 2016 09:22:28 -0400 Subject: NFS test session trunking with exchange id Use an async exchange id call to test for session trunking To conform with RFC 5661 section 18.35.4, the Non-Update on Existing Clientid case, save the exchange id verifier in cl_confirm and use it for the session trunking exhange id test. Signed-off-by: Andy Adamson Signed-off-by: Anna Schumaker --- fs/nfs/nfs4client.c | 10 +++++++--- fs/nfs/nfs4proc.c | 45 +++++++++++++++++++++++++++++++++++++-------- 2 files changed, 44 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 6914d4b176ae..a2c9654d018f 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -637,9 +637,10 @@ out_scope_mismatch: } /** - * nfs4_detect_session_trunking - Checks for session trunking called - * after a successful EXCHANGE_ID testing a multi-addr connection to be - * potentially added as a session trunk + * nfs4_detect_session_trunking - Checks for session trunking. + * + * Called after a successful EXCHANGE_ID on a multi-addr connection. + * Upon success, add the transport. * * @clp: original mount nfs_client * @res: result structure from an exchange_id using the original mount @@ -673,6 +674,9 @@ int nfs4_detect_session_trunking(struct nfs_client *clp, if (!nfs4_check_server_scope(clp->cl_serverscope, res->server_scope)) goto out_err; + /* Session trunking passed, add the xprt */ + rpc_clnt_xprt_switch_add_xprt(clp->cl_rpcclient, xprt); + pr_info("NFS: %s: Session trunking succeeded for %s\n", clp->cl_hostname, xprt->address_strings[RPC_DISPLAY_ADDR]); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 05b90a756fb5..a70a972eb06f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7107,6 +7107,7 @@ static int nfs4_sp4_select_mode(struct nfs_client *clp, struct nfs41_exchange_id_data { struct nfs41_exchange_id_res res; struct nfs41_exchange_id_args args; + struct rpc_xprt *xprt; int rpc_status; }; @@ -7121,6 +7122,13 @@ static void nfs4_exchange_id_done(struct rpc_task *task, void *data) if (status == 0) status = nfs4_check_cl_exchange_flags(cdata->res.flags); + + if (cdata->xprt && status == 0) { + status = nfs4_detect_session_trunking(clp, &cdata->res, + cdata->xprt); + goto out; + } + if (status == 0) status = nfs4_sp4_select_mode(clp, &cdata->res.state_protect); @@ -7157,8 +7165,13 @@ static void nfs4_exchange_id_done(struct rpc_task *task, void *data) clp->cl_serverscope = cdata->res.server_scope; cdata->res.server_scope = NULL; } + /* Save the EXCHANGE_ID verifier session trunk tests */ + memcpy(clp->cl_confirm.data, cdata->args.verifier->data, + sizeof(clp->cl_confirm.data)); } +out: cdata->rpc_status = status; + return; } static void nfs4_exchange_id_release(void *data) @@ -7167,6 +7180,10 @@ static void nfs4_exchange_id_release(void *data) (struct nfs41_exchange_id_data *)data; nfs_put_client(cdata->args.client); + if (cdata->xprt) { + xprt_put(cdata->xprt); + rpc_clnt_xprt_switch_put(cdata->args.client->cl_rpcclient); + } kfree(cdata->res.impl_id); kfree(cdata->res.server_scope); kfree(cdata->res.server_owner); @@ -7184,7 +7201,7 @@ static const struct rpc_call_ops nfs4_exchange_id_call_ops = { * Wrapper for EXCHANGE_ID operation. */ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, - u32 sp4_how) + u32 sp4_how, struct rpc_xprt *xprt) { nfs4_verifier verifier; struct rpc_message msg = { @@ -7209,7 +7226,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, if (!calldata) goto out; - nfs4_init_boot_verifier(clp, &verifier); + if (!xprt) + nfs4_init_boot_verifier(clp, &verifier); status = nfs4_init_uniform_client_string(clp); if (status) @@ -7249,8 +7267,15 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, status = -EINVAL; goto out_impl_id; } - - calldata->args.verifier = &verifier; + if (xprt) { + calldata->xprt = xprt; + task_setup_data.rpc_xprt = xprt; + task_setup_data.flags = + RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC; + calldata->args.verifier = &clp->cl_confirm; + } else { + calldata->args.verifier = &verifier; + } calldata->args.client = clp; #ifdef CONFIG_NFS_V4_1_MIGRATION calldata->args.flags = EXCHGID4_FLAG_SUPP_MOVED_REFER | @@ -7270,9 +7295,13 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, goto out_impl_id; } - status = rpc_wait_for_completion_task(task); - if (!status) + if (!xprt) { + status = rpc_wait_for_completion_task(task); + if (!status) + status = calldata->rpc_status; + } else /* session trunking test */ status = calldata->rpc_status; + rpc_put_task(task); out: if (clp->cl_implid != NULL) @@ -7315,13 +7344,13 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) /* try SP4_MACH_CRED if krb5i/p */ if (authflavor == RPC_AUTH_GSS_KRB5I || authflavor == RPC_AUTH_GSS_KRB5P) { - status = _nfs4_proc_exchange_id(clp, cred, SP4_MACH_CRED); + status = _nfs4_proc_exchange_id(clp, cred, SP4_MACH_CRED, NULL); if (!status) return 0; } /* try SP4_NONE */ - return _nfs4_proc_exchange_id(clp, cred, SP4_NONE); + return _nfs4_proc_exchange_id(clp, cred, SP4_NONE, NULL); } static int _nfs4_proc_destroy_clientid(struct nfs_client *clp, -- cgit v1.2.1 From 04fa2c6bb51b1bf34a49007e0cd824bde39eeac0 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 9 Sep 2016 09:22:29 -0400 Subject: NFS pnfs data server multipath session trunking Try all multipath addresses for a data server. The first address that successfully connects and creates a session is the DS mount address. All subsequent addresses are tested for session trunking and added as aliases. Signed-off-by: Andy Adamson Signed-off-by: Anna Schumaker --- fs/nfs/internal.h | 3 +++ fs/nfs/nfs4_fs.h | 6 ++++++ fs/nfs/nfs4proc.c | 33 +++++++++++++++++++++++++++++++ fs/nfs/pnfs_nfs.c | 58 +++++++++++++++++++++++++++++++++++++++++-------------- 4 files changed, 86 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 74935a19e4bf..fd82bed39ed4 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -572,6 +572,9 @@ extern int nfs40_walk_client_list(struct nfs_client *clp, extern int nfs41_walk_client_list(struct nfs_client *clp, struct nfs_client **result, struct rpc_cred *cred); +extern int nfs4_test_session_trunk(struct rpc_clnt *, + struct rpc_xprt *, + void *); static inline struct inode *nfs_igrab_and_active(struct inode *inode) { diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index cf744aa0fa74..804113421337 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -59,6 +59,7 @@ struct nfs4_minor_version_ops { struct nfs4_lock_state *); struct nfs_seqid * (*alloc_seqid)(struct nfs_seqid_counter *, gfp_t); + int (*session_trunk)(struct rpc_clnt *, struct rpc_xprt *, void *); const struct rpc_call_ops *call_sync_ops; const struct nfs4_state_recovery_ops *reboot_recovery_ops; const struct nfs4_state_recovery_ops *nograce_recovery_ops; @@ -203,6 +204,11 @@ struct nfs4_state_recovery_ops { struct rpc_cred *); }; +struct nfs4_add_xprt_data { + struct nfs_client *clp; + struct rpc_cred *cred; +}; + struct nfs4_state_maintenance_ops { int (*sched_state_renewal)(struct nfs_client *, struct rpc_cred *, unsigned); struct rpc_cred * (*get_state_renewal_cred_locked)(struct nfs_client *); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a70a972eb06f..cfdf45a14cc3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7353,6 +7353,37 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) return _nfs4_proc_exchange_id(clp, cred, SP4_NONE, NULL); } +/** + * nfs4_test_session_trunk + * + * This is an add_xprt_test() test function called from + * rpc_clnt_setup_test_and_add_xprt. + * + * The rpc_xprt_switch is referrenced by rpc_clnt_setup_test_and_add_xprt + * and is dereferrenced in nfs4_exchange_id_release + * + * Upon success, add the new transport to the rpc_clnt + * + * @clnt: struct rpc_clnt to get new transport + * @xprt: the rpc_xprt to test + * @data: call data for _nfs4_proc_exchange_id. + */ +int nfs4_test_session_trunk(struct rpc_clnt *clnt, struct rpc_xprt *xprt, + void *data) +{ + struct nfs4_add_xprt_data *adata = (struct nfs4_add_xprt_data *)data; + u32 sp4_how; + + dprintk("--> %s try %s\n", __func__, + xprt->address_strings[RPC_DISPLAY_ADDR]); + + sp4_how = (adata->clp->cl_sp4_flags == 0 ? SP4_NONE : SP4_MACH_CRED); + + /* Test connection for session trunking. Async exchange_id call */ + return _nfs4_proc_exchange_id(adata->clp, adata->cred, sp4_how, xprt); +} +EXPORT_SYMBOL_GPL(nfs4_test_session_trunk); + static int _nfs4_proc_destroy_clientid(struct nfs_client *clp, struct rpc_cred *cred) { @@ -8944,6 +8975,7 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { .find_root_sec = nfs41_find_root_sec, .free_lock_state = nfs41_free_lock_state, .alloc_seqid = nfs_alloc_no_seqid, + .session_trunk = nfs4_test_session_trunk, .call_sync_ops = &nfs41_call_sync_ops, .reboot_recovery_ops = &nfs41_reboot_recovery_ops, .nograce_recovery_ops = &nfs41_nograce_recovery_ops, @@ -8973,6 +9005,7 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { .free_lock_state = nfs41_free_lock_state, .call_sync_ops = &nfs41_call_sync_ops, .alloc_seqid = nfs_alloc_no_seqid, + .session_trunk = nfs4_test_session_trunk, .reboot_recovery_ops = &nfs41_reboot_recovery_ops, .nograce_recovery_ops = &nfs41_nograce_recovery_ops, .state_renewal_ops = &nfs41_state_renewal_ops, diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index f3468b57a32a..53b4705abcc7 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -690,13 +690,50 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, dprintk("%s: DS %s: trying address %s\n", __func__, ds->ds_remotestr, da->da_remotestr); - clp = nfs4_set_ds_client(mds_srv, - (struct sockaddr *)&da->da_addr, - da->da_addrlen, IPPROTO_TCP, - timeo, retrans, minor_version, - au_flavor); - if (!IS_ERR(clp)) - break; + if (!IS_ERR(clp) && clp->cl_mvops->session_trunk) { + struct xprt_create xprt_args = { + .ident = XPRT_TRANSPORT_TCP, + .net = clp->cl_net, + .dstaddr = (struct sockaddr *)&da->da_addr, + .addrlen = da->da_addrlen, + .servername = clp->cl_hostname, + }; + struct nfs4_add_xprt_data xprtdata = { + .clp = clp, + .cred = nfs4_get_clid_cred(clp), + }; + struct rpc_add_xprt_test rpcdata = { + .add_xprt_test = clp->cl_mvops->session_trunk, + .data = &xprtdata, + }; + + /** + * Test this address for session trunking and + * add as an alias + */ + rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args, + rpc_clnt_setup_test_and_add_xprt, + &rpcdata); + if (xprtdata.cred) + put_rpccred(xprtdata.cred); + } else { + clp = nfs4_set_ds_client(mds_srv, + (struct sockaddr *)&da->da_addr, + da->da_addrlen, IPPROTO_TCP, + timeo, retrans, minor_version, + au_flavor); + if (IS_ERR(clp)) + continue; + + status = nfs4_init_ds_session(clp, + mds_srv->nfs_client->cl_lease_time); + if (status) { + nfs_put_client(clp); + clp = ERR_PTR(-EIO); + continue; + } + + } } if (IS_ERR(clp)) { @@ -704,18 +741,11 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, goto out; } - status = nfs4_init_ds_session(clp, mds_srv->nfs_client->cl_lease_time); - if (status) - goto out_put; - smp_wmb(); ds->ds_clp = clp; dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); out: return status; -out_put: - nfs_put_client(clp); - goto out; } /* -- cgit v1.2.1 From ca440c383a588091cae9fbce610b86a6e9d961ad Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 15 Sep 2016 14:40:49 -0400 Subject: pnfs: add a new mechanism to select a layout driver according to an ordered list Currently, the layout driver selection code always chooses the first one from the list. That's not really ideal however, as the server can send the list of layout types in any order that it likes. It's up to the client to select the best one for its needs. This patch adds an ordered list of preferred driver types and has the selection code sort the list of available layout drivers according to it. Any unrecognized layout type is sorted to the end of the list. For now, the order of preference is hardcoded, but it should be possible to make this configurable in the future. Signed-off-by: Jeff Layton Reviewed-by: J. Bruce Fields Signed-off-by: Anna Schumaker --- fs/nfs/client.c | 1 + fs/nfs/nfs4proc.c | 2 +- fs/nfs/nfs4xdr.c | 31 +++++++++++++++++------------- fs/nfs/pnfs.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++-------- fs/nfs/pnfs.h | 5 +++-- 5 files changed, 71 insertions(+), 24 deletions(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 535c2563b701..51136b03788d 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -788,6 +788,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs } fsinfo.fattr = fattr; + fsinfo.nlayouttypes = 0; memset(fsinfo.layouttype, 0, sizeof(fsinfo.layouttype)); error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo); if (error < 0) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index cfdf45a14cc3..acf5a2caa423 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4312,7 +4312,7 @@ static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, s if (error == 0) { /* block layout checks this! */ server->pnfs_blksize = fsinfo->blksize; - set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype); + set_pnfs_layoutdriver(server, fhandle, fsinfo); } return error; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 41a02f994976..17b4e059c588 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4728,29 +4728,34 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, * Decode potentially multiple layout types. */ static int decode_pnfs_layout_types(struct xdr_stream *xdr, - uint32_t *layouttype) + struct nfs_fsinfo *fsinfo) { __be32 *p; - uint32_t num, i; + uint32_t i; p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) goto out_overflow; - num = be32_to_cpup(p); + fsinfo->nlayouttypes = be32_to_cpup(p); /* pNFS is not supported by the underlying file system */ - if (num == 0) { + if (fsinfo->nlayouttypes == 0) return 0; - } - if (num > NFS_MAX_LAYOUT_TYPES) - printk(KERN_INFO "NFS: %s: Warning: Too many (%d) pNFS layout types\n", __func__, num); /* Decode and set first layout type, move xdr->p past unused types */ - p = xdr_inline_decode(xdr, num * 4); + p = xdr_inline_decode(xdr, fsinfo->nlayouttypes * 4); if (unlikely(!p)) goto out_overflow; - for(i = 0; i < num && i < NFS_MAX_LAYOUT_TYPES; i++) - layouttype[i] = be32_to_cpup(p++); + + /* If we get too many, then just cap it at the max */ + if (fsinfo->nlayouttypes > NFS_MAX_LAYOUT_TYPES) { + printk(KERN_INFO "NFS: %s: Warning: Too many (%u) pNFS layout types\n", + __func__, fsinfo->nlayouttypes); + fsinfo->nlayouttypes = NFS_MAX_LAYOUT_TYPES; + } + + for(i = 0; i < fsinfo->nlayouttypes; ++i) + fsinfo->layouttype[i] = be32_to_cpup(p++); return 0; out_overflow: print_overflow_msg(__func__, xdr); @@ -4762,7 +4767,7 @@ out_overflow: * Note we must ensure that layouttype is set in any non-error case. */ static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap, - uint32_t *layouttype) + struct nfs_fsinfo *fsinfo) { int status = 0; @@ -4770,7 +4775,7 @@ static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap, if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U))) return -EIO; if (bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES) { - status = decode_pnfs_layout_types(xdr, layouttype); + status = decode_pnfs_layout_types(xdr, fsinfo); bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES; } return status; @@ -4853,7 +4858,7 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta); if (status != 0) goto xdr_error; - status = decode_attr_pnfstype(xdr, bitmap, fsinfo->layouttype); + status = decode_attr_pnfstype(xdr, bitmap, fsinfo); if (status != 0) goto xdr_error; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index a6a683fbd230..b588ccf05045 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "internal.h" #include "pnfs.h" #include "iostat.h" @@ -98,6 +99,39 @@ unset_pnfs_layoutdriver(struct nfs_server *nfss) nfss->pnfs_curr_ld = NULL; } +/* + * When the server sends a list of layout types, we choose one in the order + * given in the list below. + * + * FIXME: should this list be configurable in some fashion? module param? + * mount option? something else? + */ +static const u32 ld_prefs[] = { + LAYOUT_SCSI, + LAYOUT_BLOCK_VOLUME, + LAYOUT_OSD2_OBJECTS, + LAYOUT_FLEX_FILES, + LAYOUT_NFSV4_1_FILES, + 0 +}; + +static int +ld_cmp(const void *e1, const void *e2) +{ + u32 ld1 = *((u32 *)e1); + u32 ld2 = *((u32 *)e2); + int i; + + for (i = 0; ld_prefs[i] != 0; i++) { + if (ld1 == ld_prefs[i]) + return -1; + + if (ld2 == ld_prefs[i]) + return 1; + } + return 0; +} + /* * Try to set the server's pnfs module to the pnfs layout type specified by id. * Currently only one pNFS layout driver per filesystem is supported. @@ -106,10 +140,11 @@ unset_pnfs_layoutdriver(struct nfs_server *nfss) */ void set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh, - u32 *ids) + struct nfs_fsinfo *fsinfo) { struct pnfs_layoutdriver_type *ld_type = NULL; u32 id; + int i; if (!(server->nfs_client->cl_exchange_flags & (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) { @@ -118,18 +153,23 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh, goto out_no_driver; } - id = ids[0]; - if (!id) - goto out_no_driver; + sort(fsinfo->layouttype, fsinfo->nlayouttypes, + sizeof(*fsinfo->layouttype), ld_cmp, NULL); - ld_type = find_pnfs_driver(id); - if (!ld_type) { - request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id); + for (i = 0; i < fsinfo->nlayouttypes; i++) { + id = fsinfo->layouttype[i]; ld_type = find_pnfs_driver(id); + if (!ld_type) { + request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, + id); + ld_type = find_pnfs_driver(id); + } + if (ld_type) + break; } if (!ld_type) { - dprintk("%s: No pNFS module found for %u.\n", __func__, id); + dprintk("%s: No pNFS module found!\n", __func__); goto out_no_driver; } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index be515e6a3823..5c295512c967 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -236,7 +236,7 @@ void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); void pnfs_put_lseg(struct pnfs_layout_segment *lseg); void pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg); -void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32 *); +void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, struct nfs_fsinfo *); void unset_pnfs_layoutdriver(struct nfs_server *); void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); @@ -657,7 +657,8 @@ pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task) } static inline void set_pnfs_layoutdriver(struct nfs_server *s, - const struct nfs_fh *mntfh, u32 *ids) + const struct nfs_fh *mntfh, + struct nfs_fsinfo *fsinfo) { } -- cgit v1.2.1 From f844cd0d76378fa898890d2448222d407f3f8ecf Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 20 Sep 2016 13:59:07 +0800 Subject: nfs: cover ->migratepage with CONFIG_MIGRATION It will be more clean to use CONFIG_MIGRATION to cover nfs' private .migratepage in nfs_file_aops like we do in other part of nfs operations. Signed-off-by: Chao Yu Signed-off-by: Anna Schumaker --- fs/nfs/file.c | 2 ++ fs/nfs/internal.h | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index ca699ddc11c1..580d1c588228 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -543,7 +543,9 @@ const struct address_space_operations nfs_file_aops = { .invalidatepage = nfs_invalidate_page, .releasepage = nfs_release_page, .direct_IO = nfs_direct_IO, +#ifdef CONFIG_MIGRATION .migratepage = nfs_migrate_page, +#endif .launder_page = nfs_launder_page, .is_dirty_writeback = nfs_check_dirty_writeback, .error_remove_page = generic_error_remove_page, diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index fd82bed39ed4..4f0b2db8327f 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -535,12 +535,9 @@ void nfs_clear_pnfs_ds_commit_verifiers(struct pnfs_ds_commit_info *cinfo) } #endif - #ifdef CONFIG_MIGRATION extern int nfs_migrate_page(struct address_space *, struct page *, struct page *, enum migrate_mode); -#else -#define nfs_migrate_page NULL #endif static inline int -- cgit v1.2.1 From b60475c9401bf89a2b14918fca0a220a15867ace Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 17 Sep 2016 18:17:31 -0400 Subject: nfs: the length argument to read_buf should be unsigned Since it gets passed through to xdr_inline_decode, we might as well have read_buf expect what it expects -- a size_t. Signed-off-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/callback_xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 656f68f7fe53..a710825f3d61 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -72,7 +72,7 @@ static int nfs4_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy) return xdr_ressize_check(rqstp, p); } -static __be32 *read_buf(struct xdr_stream *xdr, int nbytes) +static __be32 *read_buf(struct xdr_stream *xdr, size_t nbytes) { __be32 *p; -- cgit v1.2.1 From 75575ddf29cbbf8e3c96fb02ae80a33f22054aad Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 17 Sep 2016 18:17:32 -0400 Subject: nfs: eliminate pointless and confusing do_vfs_lock wrappers Signed-off-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/file.c | 9 ++------- fs/nfs/nfs4proc.c | 15 +++++---------- 2 files changed, 7 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 580d1c588228..3f1c623eee39 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -710,11 +710,6 @@ out_noconflict: goto out; } -static int do_vfs_lock(struct file *file, struct file_lock *fl) -{ - return locks_lock_file_wait(file, fl); -} - static int do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) { @@ -747,7 +742,7 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) if (!is_local) status = NFS_PROTO(inode)->lock(filp, cmd, fl); else - status = do_vfs_lock(filp, fl); + status = locks_lock_file_wait(filp, fl); return status; } @@ -772,7 +767,7 @@ do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) if (!is_local) status = NFS_PROTO(inode)->lock(filp, cmd, fl); else - status = do_vfs_lock(filp, fl); + status = locks_lock_file_wait(filp, fl); if (status < 0) goto out; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index acf5a2caa423..a84612e5979e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5600,11 +5600,6 @@ static int nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock * return err; } -static int do_vfs_lock(struct inode *inode, struct file_lock *fl) -{ - return locks_lock_inode_wait(inode, fl); -} - struct nfs4_unlockdata { struct nfs_locku_args arg; struct nfs_locku_res res; @@ -5657,7 +5652,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) switch (task->tk_status) { case 0: renew_lease(calldata->server, calldata->timestamp); - do_vfs_lock(calldata->lsp->ls_state->inode, &calldata->fl); + locks_lock_inode_wait(calldata->lsp->ls_state->inode, &calldata->fl); if (nfs4_update_lock_stateid(calldata->lsp, &calldata->res.stateid)) break; @@ -5765,7 +5760,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * mutex_lock(&sp->so_delegreturn_mutex); /* Exclude nfs4_reclaim_open_stateid() - note nesting! */ down_read(&nfsi->rwsem); - if (do_vfs_lock(inode, request) == -ENOENT) { + if (locks_lock_inode_wait(inode, request) == -ENOENT) { up_read(&nfsi->rwsem); mutex_unlock(&sp->so_delegreturn_mutex); goto out; @@ -5906,7 +5901,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) data->timestamp); if (data->arg.new_lock) { data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS); - if (do_vfs_lock(lsp->ls_state->inode, &data->fl) < 0) { + if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0) { rpc_restart_call_prepare(task); break; } @@ -6148,7 +6143,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock if (status != 0) goto out; request->fl_flags |= FL_ACCESS; - status = do_vfs_lock(state->inode, request); + status = locks_lock_inode_wait(state->inode, request); if (status < 0) goto out; mutex_lock(&sp->so_delegreturn_mutex); @@ -6157,7 +6152,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock /* Yes: cache locks! */ /* ...but avoid races with delegation recall... */ request->fl_flags = fl_flags & ~FL_SLEEP; - status = do_vfs_lock(state->inode, request); + status = locks_lock_inode_wait(state->inode, request); up_read(&nfsi->rwsem); mutex_unlock(&sp->so_delegreturn_mutex); goto out; -- cgit v1.2.1 From 66f570ab7307ad613b0d800fff27aba316014eaf Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 17 Sep 2016 18:17:33 -0400 Subject: nfs: use safe, interruptible sleeps when waiting to retry LOCK We actually want to use TASK_INTERRUPTIBLE sleeps when we're in the process of polling for a NFSv4 lock. If there is a signal pending when the task wakes up, then we'll be returning an error anyway. So, we might as well wake up immediately for non-fatal signals as well. That allows us to return to userland more quickly in that case, but won't change the error that userland sees. Also, there is no need to use the *_unsafe sleep variants here, as no vfs-layer locks should be held at this point. Signed-off-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a84612e5979e..0aac0fbf0b58 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5537,7 +5537,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4 static unsigned long nfs4_set_lock_task_retry(unsigned long timeout) { - freezable_schedule_timeout_killable_unsafe(timeout); + freezable_schedule_timeout_interruptible(timeout); timeout <<= 1; if (timeout > NFS4_LOCK_MAXTIMEOUT) return NFS4_LOCK_MAXTIMEOUT; -- cgit v1.2.1 From a8ce377a5db8d32b08c3bcc1c5d70cad64a0b21d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 17 Sep 2016 18:17:35 -0400 Subject: nfs: track whether server sets MAY_NOTIFY_LOCK flag We want to handle the two cases differently, such that we poll more aggressively when we don't expect a callback. Signed-off-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4proc.c | 2 ++ 2 files changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 804113421337..55cfeadedb6e 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -157,6 +157,7 @@ enum { NFS_STATE_RECLAIM_NOGRACE, /* OPEN stateid needs to recover state */ NFS_STATE_POSIX_LOCKS, /* Posix locks are supported */ NFS_STATE_RECOVERY_FAILED, /* OPEN stateid state recovery failed */ + NFS_STATE_MAY_NOTIFY_LOCK, /* server may CB_NOTIFY_LOCK */ }; struct nfs4_state { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0aac0fbf0b58..93cf38e8098a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2537,6 +2537,8 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, goto out; if (server->caps & NFS_CAP_POSIX_LOCK) set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); + if (opendata->o_res.rflags & NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK) + set_bit(NFS_STATE_MAY_NOTIFY_LOCK, &state->flags); dentry = opendata->dentry; if (d_really_is_negative(dentry)) { -- cgit v1.2.1 From db783688d4a20bae1b77c5c96a0ad3eb3079f9f6 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 17 Sep 2016 18:17:36 -0400 Subject: nfs: add handling for CB_NOTIFY_LOCK in client For now, the callback doesn't do anything. Support for that will be added in later patches. Signed-off-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/callback.h | 9 +++++++++ fs/nfs/callback_proc.c | 12 ++++++++++++ fs/nfs/callback_xdr.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 71 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index 41ad65151ec8..c701c308fac5 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -179,6 +179,15 @@ extern __be32 nfs4_callback_devicenotify( struct cb_devicenotifyargs *args, void *dummy, struct cb_process_state *cps); +struct cb_notify_lock_args { + struct nfs_fh cbnl_fh; + struct nfs_lowner cbnl_owner; + bool cbnl_valid; +}; + +extern __be32 nfs4_callback_notify_lock(struct cb_notify_lock_args *args, + void *dummy, + struct cb_process_state *cps); #endif /* CONFIG_NFS_V4_1 */ extern int check_gss_callback_principal(struct nfs_client *, struct svc_rqst *); extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index f953ef6b2f2e..974881824414 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -628,4 +628,16 @@ out: dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); return status; } + +__be32 nfs4_callback_notify_lock(struct cb_notify_lock_args *args, void *dummy, + struct cb_process_state *cps) +{ + if (!cps->clp) /* set in cb_sequence */ + return htonl(NFS4ERR_OP_NOT_IN_SESSION); + + dprintk_rcu("NFS: CB_NOTIFY_LOCK request from %s\n", + rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR)); + + return htonl(NFS4_OK); +} #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index a710825f3d61..eb094c6011d8 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -35,6 +35,7 @@ (1 + 3) * 4) // seqid, 3 slotids #define CB_OP_RECALLANY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #define CB_OP_RECALLSLOT_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) +#define CB_OP_NOTIFY_LOCK_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #endif /* CONFIG_NFS_V4_1 */ #define NFSDBG_FACILITY NFSDBG_CALLBACK @@ -534,6 +535,49 @@ static __be32 decode_recallslot_args(struct svc_rqst *rqstp, return 0; } +static __be32 decode_lockowner(struct xdr_stream *xdr, struct cb_notify_lock_args *args) +{ + __be32 *p; + unsigned int len; + + p = read_buf(xdr, 12); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_BADXDR); + + p = xdr_decode_hyper(p, &args->cbnl_owner.clientid); + len = be32_to_cpu(*p); + + p = read_buf(xdr, len); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_BADXDR); + + /* Only try to decode if the length is right */ + if (len == 20) { + p += 2; /* skip "lock id:" */ + args->cbnl_owner.s_dev = be32_to_cpu(*p++); + xdr_decode_hyper(p, &args->cbnl_owner.id); + args->cbnl_valid = true; + } else { + args->cbnl_owner.s_dev = 0; + args->cbnl_owner.id = 0; + args->cbnl_valid = false; + } + return 0; +} + +static __be32 decode_notify_lock_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_notify_lock_args *args) +{ + __be32 status; + + status = decode_fh(xdr, &args->cbnl_fh); + if (unlikely(status != 0)) + goto out; + status = decode_lockowner(xdr, args); +out: + dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); + return status; +} + #endif /* CONFIG_NFS_V4_1 */ static __be32 encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) @@ -746,6 +790,7 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op) case OP_CB_RECALL_SLOT: case OP_CB_LAYOUTRECALL: case OP_CB_NOTIFY_DEVICEID: + case OP_CB_NOTIFY_LOCK: *op = &callback_ops[op_nr]; break; @@ -753,7 +798,6 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op) case OP_CB_PUSH_DELEG: case OP_CB_RECALLABLE_OBJ_AVAIL: case OP_CB_WANTS_CANCELLED: - case OP_CB_NOTIFY_LOCK: return htonl(NFS4ERR_NOTSUPP); default: @@ -1006,6 +1050,11 @@ static struct callback_op callback_ops[] = { .decode_args = (callback_decode_arg_t)decode_recallslot_args, .res_maxsize = CB_OP_RECALLSLOT_RES_MAXSZ, }, + [OP_CB_NOTIFY_LOCK] = { + .process_op = (callback_process_op_t)nfs4_callback_notify_lock, + .decode_args = (callback_decode_arg_t)decode_notify_lock_args, + .res_maxsize = CB_OP_NOTIFY_LOCK_RES_MAXSZ, + }, #endif /* CONFIG_NFS_V4_1 */ }; -- cgit v1.2.1 From 1ea67dbd9828278188fb846972c3c368b0ef23a1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 17 Sep 2016 18:17:37 -0400 Subject: nfs: move nfs4_set_lock_state call into caller We need to have this info set up before adding the waiter to the waitqueue, so move this out of the _nfs4_proc_setlk and into the caller. That's more efficient anyway since we don't need to do this more than once if we end up waiting on the lock. Signed-off-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 93cf38e8098a..ffadf6be3668 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6135,15 +6135,8 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock struct nfs_inode *nfsi = NFS_I(state->inode); struct nfs4_state_owner *sp = state->owner; unsigned char fl_flags = request->fl_flags; - int status = -ENOLCK; + int status; - if ((fl_flags & FL_POSIX) && - !test_bit(NFS_STATE_POSIX_LOCKS, &state->flags)) - goto out; - /* Is this a delegated open? */ - status = nfs4_set_lock_state(state, request); - if (status != 0) - goto out; request->fl_flags |= FL_ACCESS; status = locks_lock_inode_wait(state->inode, request); if (status < 0) @@ -6217,6 +6210,11 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) if (state == NULL) return -ENOLCK; + + if ((request->fl_flags & FL_POSIX) && + !test_bit(NFS_STATE_POSIX_LOCKS, &state->flags)) + return -ENOLCK; + /* * Don't rely on the VFS having checked the file open mode, * since it won't do this for flock() locks. @@ -6231,6 +6229,10 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) return -EBADF; } + status = nfs4_set_lock_state(state, request); + if (status != 0) + return status; + do { status = nfs4_proc_setlk(state, cmd, request); if ((status != -EAGAIN) || IS_SETLK(cmd)) -- cgit v1.2.1 From d2f3a7f9187b0c6a64276d598cd6157437c5a336 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 17 Sep 2016 18:17:38 -0400 Subject: nfs: move nfs4 lock retry attempt loop to a separate function This also consolidates the waiting logic into a single function, instead of having it spread across two like it is now. Signed-off-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 49 ++++++++++++++++++++++--------------------------- 1 file changed, 22 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ffadf6be3668..a74c4167b5b0 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5530,22 +5530,6 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4 return err; } -#define NFS4_LOCK_MINTIMEOUT (1 * HZ) -#define NFS4_LOCK_MAXTIMEOUT (30 * HZ) - -/* - * sleep, with exponential backoff, and retry the LOCK operation. - */ -static unsigned long -nfs4_set_lock_task_retry(unsigned long timeout) -{ - freezable_schedule_timeout_interruptible(timeout); - timeout <<= 1; - if (timeout > NFS4_LOCK_MAXTIMEOUT) - return NFS4_LOCK_MAXTIMEOUT; - return timeout; -} - static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *request) { struct inode *inode = state->inode; @@ -6178,12 +6162,32 @@ static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock * return err; } +#define NFS4_LOCK_MINTIMEOUT (1 * HZ) +#define NFS4_LOCK_MAXTIMEOUT (30 * HZ) + +static int +nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + int status = -ERESTARTSYS; + unsigned long timeout = NFS4_LOCK_MINTIMEOUT; + + while(!signalled()) { + status = nfs4_proc_setlk(state, cmd, request); + if ((status != -EAGAIN) || IS_SETLK(cmd)) + break; + freezable_schedule_timeout_interruptible(timeout); + timeout *= 2; + timeout = min_t(unsigned long, NFS4_LOCK_MAXTIMEOUT, timeout); + status = -ERESTARTSYS; + } + return status; +} + static int nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) { struct nfs_open_context *ctx; struct nfs4_state *state; - unsigned long timeout = NFS4_LOCK_MINTIMEOUT; int status; /* verify open state */ @@ -6233,16 +6237,7 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) if (status != 0) return status; - do { - status = nfs4_proc_setlk(state, cmd, request); - if ((status != -EAGAIN) || IS_SETLK(cmd)) - break; - timeout = nfs4_set_lock_task_retry(timeout); - status = -ERESTARTSYS; - if (signalled()) - break; - } while(status < 0); - return status; + return nfs4_retry_setlk(state, cmd, request); } int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid) -- cgit v1.2.1 From a1d617d8f134679741b0b35e8e1436b015ac5538 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 17 Sep 2016 18:17:39 -0400 Subject: nfs: allow blocking locks to be awoken by lock callbacks Add a waitqueue head to the client structure. Have clients set a wait on that queue prior to requesting a lock from the server. If the lock is blocked, then we can use that to wait for wakeups. Note that we do need to do this "manually" since we need to set the wait on the waitqueue prior to requesting the lock, but requesting a lock can involve activities that can block. However, only do that for NFSv4.1 locks, either by compiling out all of the waitqueue handling when CONFIG_NFS_V4_1 is disabled, or skipping all of it at runtime if we're dealing with v4.0, or v4.1 servers that don't send lock callbacks. Note too that even when we expect to get a lock callback, RFC5661 section 20.11.4 is pretty clear that we still need to poll for them, so we do still sleep on a timeout. We do however always poll at the longest interval in that case. Signed-off-by: Jeff Layton [Anna: nfs4_retry_setlk() "status" should default to -ERESTARTSYS] Signed-off-by: Anna Schumaker --- fs/nfs/callback_proc.c | 4 +++ fs/nfs/nfs4client.c | 3 ++ fs/nfs/nfs4proc.c | 94 +++++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 100 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 974881824414..e9aa235e9d10 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -638,6 +638,10 @@ __be32 nfs4_callback_notify_lock(struct cb_notify_lock_args *args, void *dummy, dprintk_rcu("NFS: CB_NOTIFY_LOCK request from %s\n", rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR)); + /* Don't wake anybody if the string looked bogus */ + if (args->cbnl_valid) + __wake_up(&cps->clp->cl_lock_waitq, TASK_NORMAL, 0, args); + return htonl(NFS4_OK); } #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index a2c9654d018f..074ac7131459 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -199,6 +199,9 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) clp->cl_minorversion = cl_init->minorversion; clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion]; clp->cl_mig_gen = 1; +#if IS_ENABLED(CONFIG_NFS_V4_1) + init_waitqueue_head(&clp->cl_lock_waitq); +#endif return clp; error: diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a74c4167b5b0..5ec333f3a19b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6166,7 +6166,8 @@ static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock * #define NFS4_LOCK_MAXTIMEOUT (30 * HZ) static int -nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) +nfs4_retry_setlk_simple(struct nfs4_state *state, int cmd, + struct file_lock *request) { int status = -ERESTARTSYS; unsigned long timeout = NFS4_LOCK_MINTIMEOUT; @@ -6183,6 +6184,97 @@ nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) return status; } +#ifdef CONFIG_NFS_V4_1 +struct nfs4_lock_waiter { + struct task_struct *task; + struct inode *inode; + struct nfs_lowner *owner; + bool notified; +}; + +static int +nfs4_wake_lock_waiter(wait_queue_t *wait, unsigned int mode, int flags, void *key) +{ + int ret; + struct cb_notify_lock_args *cbnl = key; + struct nfs4_lock_waiter *waiter = wait->private; + struct nfs_lowner *lowner = &cbnl->cbnl_owner, + *wowner = waiter->owner; + + /* Only wake if the callback was for the same owner */ + if (lowner->clientid != wowner->clientid || + lowner->id != wowner->id || + lowner->s_dev != wowner->s_dev) + return 0; + + /* Make sure it's for the right inode */ + if (nfs_compare_fh(NFS_FH(waiter->inode), &cbnl->cbnl_fh)) + return 0; + + waiter->notified = true; + + /* override "private" so we can use default_wake_function */ + wait->private = waiter->task; + ret = autoremove_wake_function(wait, mode, flags, key); + wait->private = waiter; + return ret; +} + +static int +nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + int status = -ERESTARTSYS; + unsigned long flags; + struct nfs4_lock_state *lsp = request->fl_u.nfs4_fl.owner; + struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs_client *clp = server->nfs_client; + wait_queue_head_t *q = &clp->cl_lock_waitq; + struct nfs_lowner owner = { .clientid = clp->cl_clientid, + .id = lsp->ls_seqid.owner_id, + .s_dev = server->s_dev }; + struct nfs4_lock_waiter waiter = { .task = current, + .inode = state->inode, + .owner = &owner, + .notified = false }; + wait_queue_t wait; + + /* Don't bother with waitqueue if we don't expect a callback */ + if (!test_bit(NFS_STATE_MAY_NOTIFY_LOCK, &state->flags)) + return nfs4_retry_setlk_simple(state, cmd, request); + + init_wait(&wait); + wait.private = &waiter; + wait.func = nfs4_wake_lock_waiter; + add_wait_queue(q, &wait); + + while(!signalled()) { + status = nfs4_proc_setlk(state, cmd, request); + if ((status != -EAGAIN) || IS_SETLK(cmd)) + break; + + status = -ERESTARTSYS; + spin_lock_irqsave(&q->lock, flags); + if (waiter.notified) { + spin_unlock_irqrestore(&q->lock, flags); + continue; + } + set_current_state(TASK_INTERRUPTIBLE); + spin_unlock_irqrestore(&q->lock, flags); + + freezable_schedule_timeout_interruptible(NFS4_LOCK_MAXTIMEOUT); + } + + finish_wait(q, &wait); + return status; +} +#else /* !CONFIG_NFS_V4_1 */ +static inline int +nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + return nfs4_retry_setlk_simple(state, cmd, request); +} +#endif + static int nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) { -- cgit v1.2.1 From 78d04af4995a1358039f4406bd31c28faedfa030 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Sep 2016 14:34:24 -0400 Subject: NFS: nfs_prime_dcache must validate the filename Before we try to stash it in the dcache, we need to at least check that the filename passed to us by the server is non-empty and doesn't contain any illegal '\0' or '/' characters. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 177fefb26c18..eb095f77c39d 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -496,6 +496,14 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) return; if (!(entry->fattr->valid & NFS_ATTR_FATTR_FSID)) return; + if (filename.len == 0) + return; + /* Validate that the name doesn't contain any illegal '\0' */ + if (strnlen(filename.name, filename.len) != filename.len) + return; + /* ...or '/' */ + if (strnchr(filename.name, filename.len, '/')) + return; if (filename.name[0] == '.') { if (filename.len == 1) return; -- cgit v1.2.1 From 024de8f1ade4c513f74172993572e9694ede81a1 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Thu, 22 Sep 2016 13:54:28 +0200 Subject: NFS: direct: use complete() instead of complete_all() There is only one waiter for the completion, therefore there is no need to use complete_all(). Let's make that clear by using complete() instead of complete_all(). nfs_file_direct_write() or nfs_file_direct_read() allocated a request object via nfs_direct_req_alloc(), which initializes the completion. The request object then is freed later in the exit path. Between the initialization and the release either nfs_direct_write_schedule_iovec() resp nfs_direct_read_schedule_iovec() are called which will asynchronously process the request. The calling function waits via nfs_direct_wait() till the async work has been done. Thus there is only one waiter on the completion. nfs_direct_pgio_init() and nfs_direct_read_completion() are passed via function pointers to nfs pageio. The first function does a ref counting (get_dreq() and put_dreq()) which ensures that nfs_direct_read_completion() and nfs_direct_read_schedule_iovec() only call the completion path once. The usage pattern of the completion is: waiter context waker context nfs_file_direct_write() dreq = nfs_direct_req_alloc() init_completion() nfs_direct_write_schedule_iovec() nfs_direct_wait() wait_for_completion_killable() nfs_direct_write_schedule_work() nfs_direct_complete() complete() nfs_file_direct_read() dreq = nfs_direct_req_all() init_completion() nfs_direct_read_schedule_iovec() nfs_direct_wait() wait_for_completion_killable() nfs_direct_read_schedule_iovec() nfs_direct_complete() complete() nfs_direct_read_completion() nfs_direct_complete() complete() Signed-off-by: Daniel Wagner Signed-off-by: Anna Schumaker --- fs/nfs/direct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 72b7d13ee3c6..bd81bcf3ffcf 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -387,7 +387,7 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) dreq->iocb->ki_complete(dreq->iocb, res, 0); } - complete_all(&dreq->completion); + complete(&dreq->completion); nfs_direct_req_release(dreq); } -- cgit v1.2.1 From 2a446a5d99fe838b8aba2592a4bc88d93d8cebf8 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Thu, 22 Sep 2016 13:54:29 +0200 Subject: NFS: cache_lib: use complete() instead of complete_all() There is only one waiter for the completion, therefore there is no need to use complete_all(). Let's make that clear by using complete() instead of complete_all(). The generic caching code from sunrpc is calling revisit() only once. The usage pattern of the completion is: waiter context waker context do_cache_lookup_wait() nfs_cache_defer_req_alloc() init_completion() do_cache_lookup() nfs_cache_wait_for_upcall() wait_for_completion_timeout() nfs_dns_cache_revisit() complete() nfs_cache_defer_req_put() Signed-off-by: Daniel Wagner Signed-off-by: Anna Schumaker --- fs/nfs/cache_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c index 5f7b053720ee..6de15709d024 100644 --- a/fs/nfs/cache_lib.c +++ b/fs/nfs/cache_lib.c @@ -76,7 +76,7 @@ static void nfs_dns_cache_revisit(struct cache_deferred_req *d, int toomany) dreq = container_of(d, struct nfs_cache_defer_req, deferred_req); - complete_all(&dreq->completion); + complete(&dreq->completion); nfs_cache_defer_req_put(dreq); } -- cgit v1.2.1 From 0a014a44a50839a8064618e959fae5bbc44c2fd5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Sep 2016 13:38:51 -0400 Subject: NFSv4.1: Don't deadlock the state manager on the SEQUENCE status flags As described in RFC5661, section 18.46, some of the status flags exist in order to tell the client when it needs to acknowledge the existence of revoked state on the server and/or to recover state. Those flags will then remain set until the recovery procedure is done. In order to avoid looping, the client therefore needs to ignore those particular flags while recovering. Signed-off-by: Trond Myklebust Tested-by: Oleg Drokin Signed-off-by: Anna Schumaker --- fs/nfs/nfs4_fs.h | 2 +- fs/nfs/nfs4proc.c | 5 ++++- fs/nfs/nfs4session.h | 1 + fs/nfs/nfs4state.c | 12 +++++++++++- 4 files changed, 17 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 55cfeadedb6e..2d1889de69fb 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -448,7 +448,7 @@ extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp); extern int nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); extern int nfs4_schedule_migration_recovery(const struct nfs_server *); extern void nfs4_schedule_lease_moved_recovery(struct nfs_client *); -extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); +extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags, bool); extern void nfs41_handle_server_scope(struct nfs_client *, struct nfs41_server_scope **); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5ec333f3a19b..9369639fef05 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -616,6 +616,7 @@ int nfs40_setup_sequence(struct nfs4_slot_table *tbl, } spin_unlock(&tbl->slot_tbl_lock); + slot->privileged = args->sa_privileged ? 1 : 0; args->sa_slot = slot; res->sr_slot = slot; @@ -728,7 +729,8 @@ static int nfs41_sequence_process(struct rpc_task *task, clp = session->clp; do_renew_lease(clp, res->sr_timestamp); /* Check sequence flags */ - nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags); + nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags, + !!slot->privileged); nfs41_update_target_slotid(slot->table, slot, res); break; case 1: @@ -875,6 +877,7 @@ int nfs41_setup_sequence(struct nfs4_session *session, } spin_unlock(&tbl->slot_tbl_lock); + slot->privileged = args->sa_privileged ? 1 : 0; args->sa_slot = slot; dprintk("<-- %s slotid=%u seqid=%u\n", __func__, diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index 3bb6af70973c..dae385500005 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -23,6 +23,7 @@ struct nfs4_slot { u32 slot_nr; u32 seq_nr; unsigned int interrupted : 1, + privileged : 1, seq_done : 1; }; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index cada00aa5096..9801b5bb5fac 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2227,13 +2227,22 @@ static void nfs41_handle_cb_path_down(struct nfs_client *clp) nfs4_schedule_state_manager(clp); } -void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) +void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags, + bool recovery) { if (!flags) return; dprintk("%s: \"%s\" (client ID %llx) flags=0x%08x\n", __func__, clp->cl_hostname, clp->cl_clientid, flags); + /* + * If we're called from the state manager thread, then assume we're + * already handling the RECLAIM_NEEDED and/or STATE_REVOKED. + * Those flags are expected to remain set until we're done + * recovering (see RFC5661, section 18.46.3). + */ + if (recovery) + goto out_recovery; if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED) nfs41_handle_server_reboot(clp); @@ -2246,6 +2255,7 @@ void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) nfs4_schedule_lease_moved_recovery(clp); if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED) nfs41_handle_recallable_state_revoked(clp); +out_recovery: if (flags & SEQ4_STATUS_BACKCHANNEL_FAULT) nfs41_handle_backchannel_fault(clp); else if (flags & (SEQ4_STATUS_CB_PATH_DOWN | -- cgit v1.2.1 From 7dc72d5f7a0ec97a53e126c46e2cbd2560757955 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Sep 2016 13:38:52 -0400 Subject: NFS: Fix inode corruption in nfs_prime_dcache() Due to inode number reuse in filesystems, we can end up corrupting the inode on our client if we apply the file attributes without ensuring that the filehandle matches. Typical symptoms include spurious "mode changed" reports in the syslog. We still do want to ensure that we don't invalidate the dentry if the inode number matches, but we don't have a filehandle. Fixes: fa9233699cc1 ("NFS: Don't require a filehandle to refresh...") Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v4.0+ Tested-by: Oleg Drokin Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index eb095f77c39d..2bade7909dec 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -435,11 +435,11 @@ int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry) return 0; nfsi = NFS_I(inode); - if (entry->fattr->fileid == nfsi->fileid) - return 1; - if (nfs_compare_fh(entry->fh, &nfsi->fh) == 0) - return 1; - return 0; + if (entry->fattr->fileid != nfsi->fileid) + return 0; + if (entry->fh->size && nfs_compare_fh(entry->fh, &nfsi->fh) != 0) + return 0; + return 1; } static @@ -525,6 +525,8 @@ again: &entry->fattr->fsid)) goto out; if (nfs_same_file(dentry, entry)) { + if (!entry->fh->size) + goto out; nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); status = nfs_refresh_inode(d_inode(dentry), entry->fattr); if (!status) @@ -537,6 +539,10 @@ again: goto again; } } + if (!entry->fh->size) { + d_lookup_done(dentry); + goto out; + } inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr, entry->label); alias = d_splice_alias(inode, dentry); -- cgit v1.2.1 From b3f9e7239074613aa6bdafa4caf7c104fe1e7276 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Sep 2016 13:38:53 -0400 Subject: NFSv4: Don't report revoked delegations as valid in nfs_have_delegation() If the delegation is revoked, then it can't be used for caching. Fixes: 869f9dfa4d6d ("NFSv4: Fix races between nfs_remove_bad_delegation()...") Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v3.19+ Tested-by: Oleg Drokin Signed-off-by: Anna Schumaker --- fs/nfs/delegation.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 322c2585bc34..86d2c748140b 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -51,6 +51,7 @@ nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark) rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); if (delegation != NULL && (delegation->type & flags) == flags && + !test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) && !test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) { if (mark) nfs_mark_delegation_referenced(delegation); -- cgit v1.2.1 From aa05c87f23efe417adc7ff9b4193b7201ec0dd79 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Sep 2016 13:38:54 -0400 Subject: NFSv4: nfs4_copy_delegation_stateid() must fail if the delegation is invalid We must not allow the use of delegations that have been revoked or are being returned. Signed-off-by: Trond Myklebust Fixes: 869f9dfa4d6d ("NFSv4: Fix races between nfs_remove_bad_delegation()...") Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v3.19+ Tested-by: Oleg Drokin Signed-off-by: Anna Schumaker --- fs/nfs/delegation.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 86d2c748140b..b9c65421ed81 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -41,6 +41,17 @@ void nfs_mark_delegation_referenced(struct nfs_delegation *delegation) set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags); } +static bool +nfs4_is_valid_delegation(const struct nfs_delegation *delegation, + fmode_t flags) +{ + if (delegation != NULL && (delegation->type & flags) == flags && + !test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) && + !test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) + return true; + return false; +} + static int nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark) { @@ -50,9 +61,7 @@ nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark) flags &= FMODE_READ|FMODE_WRITE; rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); - if (delegation != NULL && (delegation->type & flags) == flags && - !test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) && - !test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) { + if (nfs4_is_valid_delegation(delegation, flags)) { if (mark) nfs_mark_delegation_referenced(delegation); ret = 1; @@ -894,7 +903,7 @@ bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, flags &= FMODE_READ|FMODE_WRITE; rcu_read_lock(); delegation = rcu_dereference(nfsi->delegation); - ret = (delegation != NULL && (delegation->type & flags) == flags); + ret = nfs4_is_valid_delegation(delegation, flags); if (ret) { nfs4_stateid_copy(dst, &delegation->stateid); nfs_mark_delegation_referenced(delegation); -- cgit v1.2.1 From 4c8e5447465c9cf92ff1c5e36e8f316b69d46aa8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Sep 2016 13:38:55 -0400 Subject: NFSv4.1: Don't check delegations that are already marked as revoked If the delegation has been marked as revoked, we don't have to test it, because we should already have called FREE_STATEID on it. Signed-off-by: Trond Myklebust Tested-by: Olek Drokin Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9369639fef05..6bbcd2f24e22 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2423,6 +2423,11 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state) rcu_read_unlock(); return; } + if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) { + rcu_read_unlock(); + nfs_finish_clear_delegation_stateid(state); + return; + } nfs4_stateid_copy(&stateid, &delegation->stateid); cred = get_rpccred(delegation->cred); -- cgit v1.2.1 From 43912bbbae26ed258cc1d5c39b9727cd8ae60783 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Sep 2016 13:38:56 -0400 Subject: NFSv4.1: Allow test_stateid to handle session errors without waiting If the server crashes while we're testing stateids for validity, then we want to initiate session recovery. Usually, we will be calling from a state manager thread, though, so we don't really want to wait. Signed-off-by: Trond Myklebust Tested-by: Oleg Drokin Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6bbcd2f24e22..7f4d0ad27ddc 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8797,6 +8797,23 @@ static int _nfs41_test_stateid(struct nfs_server *server, return -res.status; } +static void nfs4_handle_delay_or_session_error(struct nfs_server *server, + int err, struct nfs4_exception *exception) +{ + exception->retry = 0; + switch(err) { + case -NFS4ERR_DELAY: + nfs4_handle_exception(server, err, exception); + break; + case -NFS4ERR_BADSESSION: + case -NFS4ERR_BADSLOT: + case -NFS4ERR_BAD_HIGH_SLOT: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + case -NFS4ERR_DEADSESSION: + nfs4_do_handle_exception(server, err, exception); + } +} + /** * nfs41_test_stateid - perform a TEST_STATEID operation * @@ -8816,9 +8833,7 @@ static int nfs41_test_stateid(struct nfs_server *server, int err; do { err = _nfs41_test_stateid(server, stateid, cred); - if (err != -NFS4ERR_DELAY) - break; - nfs4_handle_exception(server, err, &exception); + nfs4_handle_delay_or_session_error(server, err, &exception); } while (exception.retry); return err; } -- cgit v1.2.1 From 4586f6e2832776e070cc5eb88c2b2f0177e928ae Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Sep 2016 13:38:57 -0400 Subject: NFSv4.1: Add a helper function to deal with expired stateids In NFSv4.1 and newer, if the server decides to revoke some or all of the protocol state, the client is required to iterate through all the stateids that it holds and call TEST_STATEID to determine which stateids still correspond to valid state, and then call FREE_STATEID on the others. Signed-off-by: Trond Myklebust Tested-by: Oleg Drokin Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 45 ++++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7f4d0ad27ddc..04baee4327b2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2408,6 +2408,26 @@ static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st } #if defined(CONFIG_NFS_V4_1) +static int nfs41_test_and_free_expired_stateid(struct nfs_server *server, + nfs4_stateid *stateid, + struct rpc_cred *cred) +{ + int status; + + status = nfs41_test_stateid(server, stateid, cred); + + switch (status) { + case -NFS4ERR_EXPIRED: + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_DELEG_REVOKED: + /* Ack the revoked state to the server */ + nfs41_free_stateid(server, stateid, cred); + case -NFS4ERR_BAD_STATEID: + return status; + } + return NFS_OK; +} + static void nfs41_check_delegation_stateid(struct nfs4_state *state) { struct nfs_server *server = NFS_SERVER(state->inode); @@ -2432,16 +2452,10 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state) nfs4_stateid_copy(&stateid, &delegation->stateid); cred = get_rpccred(delegation->cred); rcu_read_unlock(); - status = nfs41_test_stateid(server, &stateid, cred); + status = nfs41_test_and_free_expired_stateid(server, &stateid, cred); trace_nfs4_test_delegation_stateid(state, NULL, status); - - if (status != NFS_OK) { - /* Free the stateid unless the server explicitly - * informs us the stateid is unrecognized. */ - if (status != -NFS4ERR_BAD_STATEID) - nfs41_free_stateid(server, &stateid, cred); + if (status != NFS_OK) nfs_finish_clear_delegation_stateid(state); - } put_rpccred(cred); } @@ -2467,14 +2481,9 @@ static int nfs41_check_open_stateid(struct nfs4_state *state) (test_bit(NFS_O_RDWR_STATE, &state->flags) == 0)) return -NFS4ERR_BAD_STATEID; - status = nfs41_test_stateid(server, stateid, cred); + status = nfs41_test_and_free_expired_stateid(server, stateid, cred); trace_nfs4_test_open_stateid(state, NULL, status); if (status != NFS_OK) { - /* Free the stateid unless the server explicitly - * informs us the stateid is unrecognized. */ - if (status != -NFS4ERR_BAD_STATEID) - nfs41_free_stateid(server, stateid, cred); - clear_bit(NFS_O_RDONLY_STATE, &state->flags); clear_bit(NFS_O_WRONLY_STATE, &state->flags); clear_bit(NFS_O_RDWR_STATE, &state->flags); @@ -6090,17 +6099,11 @@ static int nfs41_check_expired_locks(struct nfs4_state *state) if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) { struct rpc_cred *cred = lsp->ls_state->owner->so_cred; - status = nfs41_test_stateid(server, + status = nfs41_test_and_free_expired_stateid(server, &lsp->ls_stateid, cred); trace_nfs4_test_lock_stateid(state, lsp, status); if (status != NFS_OK) { - /* Free the stateid unless the server - * informs us the stateid is unrecognized. */ - if (status != -NFS4ERR_BAD_STATEID) - nfs41_free_stateid(server, - &lsp->ls_stateid, - cred); clear_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags); ret = status; } -- cgit v1.2.1 From 41020b671aa553f31e766fd1e9d38598eba72bd6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Sep 2016 13:38:58 -0400 Subject: NFSv4.x: Allow callers of nfs_remove_bad_delegation() to specify a stateid Allow the callers of nfs_remove_bad_delegation() to specify the stateid that needs to be marked as bad. Signed-off-by: Trond Myklebust Tested-by: Oleg Drokin Signed-off-by: Anna Schumaker --- fs/nfs/delegation.c | 30 +++++++++++++++++++++++------- fs/nfs/delegation.h | 2 +- fs/nfs/flexfilelayout/flexfilelayout.c | 2 +- fs/nfs/nfs4proc.c | 14 ++++++++------ 4 files changed, 33 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index b9c65421ed81..e5212e5c73d2 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -652,23 +652,39 @@ static void nfs_client_mark_return_unused_delegation_types(struct nfs_client *cl rcu_read_unlock(); } -static void nfs_revoke_delegation(struct inode *inode) +static void nfs_mark_delegation_revoked(struct nfs_server *server, + struct nfs_delegation *delegation) +{ + set_bit(NFS_DELEGATION_REVOKED, &delegation->flags); + nfs_mark_return_delegation(server, delegation); +} + +static bool nfs_revoke_delegation(struct inode *inode, + const nfs4_stateid *stateid) { struct nfs_delegation *delegation; + bool ret = false; + rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); - if (delegation != NULL) { - set_bit(NFS_DELEGATION_REVOKED, &delegation->flags); - nfs_mark_return_delegation(NFS_SERVER(inode), delegation); - } + if (delegation == NULL) + goto out; + if (stateid && !nfs4_stateid_match(stateid, &delegation->stateid)) + goto out; + nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation); + ret = true; +out: rcu_read_unlock(); + return ret; } -void nfs_remove_bad_delegation(struct inode *inode) +void nfs_remove_bad_delegation(struct inode *inode, + const nfs4_stateid *stateid) { struct nfs_delegation *delegation; - nfs_revoke_delegation(inode); + if (!nfs_revoke_delegation(inode, stateid)) + return; delegation = nfs_inode_detach_delegation(inode); if (delegation) { nfs_inode_find_state_and_recover(inode, &delegation->stateid); diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 64724d252a79..d40827af5913 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -47,7 +47,7 @@ void nfs_expire_unused_delegation_types(struct nfs_client *clp, fmode_t flags); void nfs_expire_unreferenced_delegations(struct nfs_client *clp); int nfs_client_return_marked_delegations(struct nfs_client *clp); int nfs_delegations_present(struct nfs_client *clp); -void nfs_remove_bad_delegation(struct inode *inode); +void nfs_remove_bad_delegation(struct inode *inode, const nfs4_stateid *stateid); void nfs_delegation_mark_reclaim(struct nfs_client *clp); void nfs_delegation_reap_unclaimed(struct nfs_client *clp); diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 51b51369704c..98ace127bf86 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1080,7 +1080,7 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, case -NFS4ERR_BAD_STATEID: if (state == NULL) break; - nfs_remove_bad_delegation(state->inode); + nfs_remove_bad_delegation(state->inode, NULL); case -NFS4ERR_OPENMODE: if (state == NULL) break; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 04baee4327b2..4cc86001220a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2385,9 +2385,10 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta return ret; } -static void nfs_finish_clear_delegation_stateid(struct nfs4_state *state) +static void nfs_finish_clear_delegation_stateid(struct nfs4_state *state, + const nfs4_stateid *stateid) { - nfs_remove_bad_delegation(state->inode); + nfs_remove_bad_delegation(state->inode, stateid); write_seqlock(&state->seqlock); nfs4_stateid_copy(&state->stateid, &state->open_stateid); write_sequnlock(&state->seqlock); @@ -2397,7 +2398,7 @@ static void nfs_finish_clear_delegation_stateid(struct nfs4_state *state) static void nfs40_clear_delegation_stateid(struct nfs4_state *state) { if (rcu_access_pointer(NFS_I(state->inode)->delegation) != NULL) - nfs_finish_clear_delegation_stateid(state); + nfs_finish_clear_delegation_stateid(state, NULL); } static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) @@ -2443,19 +2444,20 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state) rcu_read_unlock(); return; } + + nfs4_stateid_copy(&stateid, &delegation->stateid); if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) { rcu_read_unlock(); - nfs_finish_clear_delegation_stateid(state); + nfs_finish_clear_delegation_stateid(state, &stateid); return; } - nfs4_stateid_copy(&stateid, &delegation->stateid); cred = get_rpccred(delegation->cred); rcu_read_unlock(); status = nfs41_test_and_free_expired_stateid(server, &stateid, cred); trace_nfs4_test_delegation_stateid(state, NULL, status); if (status != NFS_OK) - nfs_finish_clear_delegation_stateid(state); + nfs_finish_clear_delegation_stateid(state, &stateid); put_rpccred(cred); } -- cgit v1.2.1 From 45870d6909d5a1f702d2a3781d8fc831301d13c8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Sep 2016 13:38:59 -0400 Subject: NFSv4.1: Test delegation stateids when server declares "some state revoked" According to RFC5661, if any of the SEQUENCE status bits SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED, SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED, SEQ4_STATUS_ADMIN_STATE_REVOKED, or SEQ4_STATUS_RECALLABLE_STATE_REVOKED are set, then we need to use TEST_STATEID to figure out which stateids have been revoked, so we can acknowledge the loss of state using FREE_STATEID. While we already do this for open and lock state, we have not been doing so for all the delegations. Signed-off-by: Trond Myklebust Tested-by: Oleg Drokin Signed-off-by: Anna Schumaker --- fs/nfs/delegation.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++++- fs/nfs/delegation.h | 4 +++ fs/nfs/nfs4_fs.h | 3 ++ fs/nfs/nfs4proc.c | 10 ++++++ fs/nfs/nfs4state.c | 17 +++++----- 5 files changed, 122 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index e5212e5c73d2..dfb300901f7e 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -812,8 +812,15 @@ static void nfs_delegation_mark_reclaim_server(struct nfs_server *server) { struct nfs_delegation *delegation; - list_for_each_entry_rcu(delegation, &server->delegations, super_list) + list_for_each_entry_rcu(delegation, &server->delegations, super_list) { + /* + * If the delegation may have been admin revoked, then we + * cannot reclaim it. + */ + if (test_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags)) + continue; set_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); + } } /** @@ -877,6 +884,94 @@ restart: rcu_read_unlock(); } +static void nfs_mark_test_expired_delegation(struct nfs_server *server, + struct nfs_delegation *delegation) +{ + clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); + set_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags); + set_bit(NFS4CLNT_DELEGATION_EXPIRED, &server->nfs_client->cl_state); +} + +static void nfs_delegation_mark_test_expired_server(struct nfs_server *server) +{ + struct nfs_delegation *delegation; + + list_for_each_entry_rcu(delegation, &server->delegations, super_list) + nfs_mark_test_expired_delegation(server, delegation); +} + +/** + * nfs_mark_test_expired_all_delegations - mark all delegations for testing + * @clp: nfs_client to process + * + * Iterates through all the delegations associated with this server and + * marks them as needing to be checked for validity. + */ +void nfs_mark_test_expired_all_delegations(struct nfs_client *clp) +{ + struct nfs_server *server; + + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) + nfs_delegation_mark_test_expired_server(server); + rcu_read_unlock(); +} + +/** + * nfs_reap_expired_delegations - reap expired delegations + * @clp: nfs_client to process + * + * Iterates through all the delegations associated with this server and + * checks if they have may have been revoked. This function is usually + * expected to be called in cases where the server may have lost its + * lease. + */ +void nfs_reap_expired_delegations(struct nfs_client *clp) +{ + const struct nfs4_minor_version_ops *ops = clp->cl_mvops; + struct nfs_delegation *delegation; + struct nfs_server *server; + struct inode *inode; + struct rpc_cred *cred; + nfs4_stateid stateid; + +restart: + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + list_for_each_entry_rcu(delegation, &server->delegations, + super_list) { + if (test_bit(NFS_DELEGATION_RETURNING, + &delegation->flags)) + continue; + if (test_bit(NFS_DELEGATION_TEST_EXPIRED, + &delegation->flags) == 0) + continue; + if (!nfs_sb_active(server->super)) + continue; + inode = nfs_delegation_grab_inode(delegation); + if (inode == NULL) { + rcu_read_unlock(); + nfs_sb_deactive(server->super); + goto restart; + } + cred = get_rpccred_rcu(delegation->cred); + nfs4_stateid_copy(&stateid, &delegation->stateid); + clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags); + rcu_read_unlock(); + if (cred != NULL && + ops->test_and_free_expired(server, &stateid, cred) < 0) { + nfs_revoke_delegation(inode, &stateid); + nfs_inode_find_state_and_recover(inode, &stateid); + } + put_rpccred(cred); + iput(inode); + nfs_sb_deactive(server->super); + goto restart; + } + } + rcu_read_unlock(); +} + /** * nfs_delegations_present - check for existence of delegations * @clp: client state handle diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index d40827af5913..1442e3b1521d 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -32,6 +32,7 @@ enum { NFS_DELEGATION_REFERENCED, NFS_DELEGATION_RETURNING, NFS_DELEGATION_REVOKED, + NFS_DELEGATION_TEST_EXPIRED, }; int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); @@ -52,6 +53,9 @@ void nfs_remove_bad_delegation(struct inode *inode, const nfs4_stateid *stateid) void nfs_delegation_mark_reclaim(struct nfs_client *clp); void nfs_delegation_reap_unclaimed(struct nfs_client *clp); +void nfs_mark_test_expired_all_delegations(struct nfs_client *clp); +void nfs_reap_expired_delegations(struct nfs_client *clp); + /* NFSv4 delegation-related procedures */ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync); int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid, fmode_t type); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 2d1889de69fb..9b3a82abab07 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -39,6 +39,7 @@ enum nfs4_client_state { NFS4CLNT_BIND_CONN_TO_SESSION, NFS4CLNT_MOVED, NFS4CLNT_LEASE_MOVED, + NFS4CLNT_DELEGATION_EXPIRED, }; #define NFS4_RENEW_TIMEOUT 0x01 @@ -57,6 +58,8 @@ struct nfs4_minor_version_ops { struct nfs_fsinfo *); void (*free_lock_state)(struct nfs_server *, struct nfs4_lock_state *); + int (*test_and_free_expired)(struct nfs_server *, + nfs4_stateid *, struct rpc_cred *); struct nfs_seqid * (*alloc_seqid)(struct nfs_seqid_counter *, gfp_t); int (*session_trunk)(struct rpc_clnt *, struct rpc_xprt *, void *); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4cc86001220a..083786aad83c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2408,6 +2408,13 @@ static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st return nfs4_open_expired(sp, state); } +static int nfs40_test_and_free_expired_stateid(struct nfs_server *server, + nfs4_stateid *stateid, + struct rpc_cred *cred) +{ + return -NFS4ERR_BAD_STATEID; +} + #if defined(CONFIG_NFS_V4_1) static int nfs41_test_and_free_expired_stateid(struct nfs_server *server, nfs4_stateid *stateid, @@ -9061,6 +9068,7 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { .match_stateid = nfs4_match_stateid, .find_root_sec = nfs4_find_root_sec, .free_lock_state = nfs4_release_lockowner, + .test_and_free_expired = nfs40_test_and_free_expired_stateid, .alloc_seqid = nfs_alloc_seqid, .call_sync_ops = &nfs40_call_sync_ops, .reboot_recovery_ops = &nfs40_reboot_recovery_ops, @@ -9088,6 +9096,7 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { .match_stateid = nfs41_match_stateid, .find_root_sec = nfs41_find_root_sec, .free_lock_state = nfs41_free_lock_state, + .test_and_free_expired = nfs41_test_and_free_expired_stateid, .alloc_seqid = nfs_alloc_no_seqid, .session_trunk = nfs4_test_session_trunk, .call_sync_ops = &nfs41_call_sync_ops, @@ -9118,6 +9127,7 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { .find_root_sec = nfs41_find_root_sec, .free_lock_state = nfs41_free_lock_state, .call_sync_ops = &nfs41_call_sync_ops, + .test_and_free_expired = nfs41_test_and_free_expired_stateid, .alloc_seqid = nfs_alloc_no_seqid, .session_trunk = nfs4_test_session_trunk, .reboot_recovery_ops = &nfs41_reboot_recovery_ops, diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 9801b5bb5fac..63da0411e2af 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1656,15 +1656,9 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) put_rpccred(cred); } -static void nfs_delegation_clear_all(struct nfs_client *clp) -{ - nfs_delegation_mark_reclaim(clp); - nfs_delegation_reap_unclaimed(clp); -} - static void nfs4_state_start_reclaim_nograce(struct nfs_client *clp) { - nfs_delegation_clear_all(clp); + nfs_mark_test_expired_all_delegations(clp); nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce); } @@ -2195,7 +2189,7 @@ static void nfs41_handle_all_state_revoked(struct nfs_client *clp) static void nfs41_handle_some_state_revoked(struct nfs_client *clp) { - nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce); + nfs4_state_start_reclaim_nograce(clp); nfs4_schedule_state_manager(clp); dprintk("%s: state revoked on server %s\n", __func__, clp->cl_hostname); @@ -2420,6 +2414,13 @@ static void nfs4_state_manager(struct nfs_client *clp) nfs4_state_end_reclaim_reboot(clp); } + /* Detect expired delegations... */ + if (test_and_clear_bit(NFS4CLNT_DELEGATION_EXPIRED, &clp->cl_state)) { + section = "detect expired delegations"; + nfs_reap_expired_delegations(clp); + continue; + } + /* Now recover expired state... */ if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) { section = "reclaim nograce"; -- cgit v1.2.1 From bb3d1a3b24b61d8dd87e2d8c127a92ec8dd5d0d4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Sep 2016 13:39:00 -0400 Subject: NFSv4.1: Deal with server reboots during delegation expiration recovery Ensure that if the server reboots while we're testing and recovering from revoked delegations, we exit to allow the state manager to handle matters. Signed-off-by: Trond Myklebust Tested-by: Oleg Drokin Signed-off-by: Anna Schumaker --- fs/nfs/delegation.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index dfb300901f7e..0ffead281555 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -884,6 +884,13 @@ restart: rcu_read_unlock(); } +static inline bool nfs4_server_rebooted(const struct nfs_client *clp) +{ + return (clp->cl_state & (BIT(NFS4CLNT_CHECK_LEASE) | + BIT(NFS4CLNT_LEASE_EXPIRED) | + BIT(NFS4CLNT_SESSION_RESET))) != 0; +} + static void nfs_mark_test_expired_delegation(struct nfs_server *server, struct nfs_delegation *delegation) { @@ -892,6 +899,19 @@ static void nfs_mark_test_expired_delegation(struct nfs_server *server, set_bit(NFS4CLNT_DELEGATION_EXPIRED, &server->nfs_client->cl_state); } +static void nfs_inode_mark_test_expired_delegation(struct nfs_server *server, + struct inode *inode) +{ + struct nfs_delegation *delegation; + + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation) + nfs_mark_test_expired_delegation(server, delegation); + rcu_read_unlock(); + +} + static void nfs_delegation_mark_test_expired_server(struct nfs_server *server) { struct nfs_delegation *delegation; @@ -964,6 +984,12 @@ restart: nfs_inode_find_state_and_recover(inode, &stateid); } put_rpccred(cred); + if (nfs4_server_rebooted(clp)) { + nfs_inode_mark_test_expired_delegation(server,inode); + iput(inode); + nfs_sb_deactive(server->super); + return; + } iput(inode); nfs_sb_deactive(server->super); goto restart; -- cgit v1.2.1 From 63d63cbf5e03f47c99baa0a1ba1c345fe426e3bd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Sep 2016 13:39:01 -0400 Subject: NFSv4.1: Don't recheck delegations that have already been checked Ensure we don't spam the server with test_stateid() calls for delegations that have already been checked. Signed-off-by: Trond Myklebust Tested-by: Oleg Drokin Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 083786aad83c..dfa46e49e356 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2459,6 +2459,11 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state) return; } + if (!test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags)) { + rcu_read_unlock(); + return; + } + cred = get_rpccred(delegation->cred); rcu_read_unlock(); status = nfs41_test_and_free_expired_stateid(server, &stateid, cred); -- cgit v1.2.1 From f7a62adad01cdb2b64c5a17cdd440736b99a5829 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Sep 2016 13:39:02 -0400 Subject: NFSv4.1: Allow revoked stateids to skip the call to TEST_STATEID In some cases (e.g. when the SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED sequence flag is set) we may already know that the stateid was revoked and that the only valid operation we can call is FREE_STATEID. In those cases, allow the stateid to carry the information in the type field, so that we skip the redundant call to TEST_STATEID. Signed-off-by: Trond Myklebust Tested-by: Oleg Drokin Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index dfa46e49e356..02eab9113273 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2422,18 +2422,29 @@ static int nfs41_test_and_free_expired_stateid(struct nfs_server *server, { int status; - status = nfs41_test_stateid(server, stateid, cred); + switch (stateid->type) { + default: + break; + case NFS4_INVALID_STATEID_TYPE: + case NFS4_SPECIAL_STATEID_TYPE: + return -NFS4ERR_BAD_STATEID; + case NFS4_REVOKED_STATEID_TYPE: + goto out_free; + } + status = nfs41_test_stateid(server, stateid, cred); switch (status) { case -NFS4ERR_EXPIRED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_DELEG_REVOKED: - /* Ack the revoked state to the server */ - nfs41_free_stateid(server, stateid, cred); - case -NFS4ERR_BAD_STATEID: + break; + default: return status; } - return NFS_OK; +out_free: + /* Ack the revoked state to the server */ + nfs41_free_stateid(server, stateid, cred); + return -NFS4ERR_EXPIRED; } static void nfs41_check_delegation_stateid(struct nfs4_state *state) @@ -2468,7 +2479,7 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state) rcu_read_unlock(); status = nfs41_test_and_free_expired_stateid(server, &stateid, cred); trace_nfs4_test_delegation_stateid(state, NULL, status); - if (status != NFS_OK) + if (status == -NFS4ERR_EXPIRED || status == -NFS4ERR_BAD_STATEID) nfs_finish_clear_delegation_stateid(state, &stateid); put_rpccred(cred); @@ -2497,7 +2508,7 @@ static int nfs41_check_open_stateid(struct nfs4_state *state) status = nfs41_test_and_free_expired_stateid(server, stateid, cred); trace_nfs4_test_open_stateid(state, NULL, status); - if (status != NFS_OK) { + if (status == -NFS4ERR_EXPIRED || status == -NFS4ERR_BAD_STATEID) { clear_bit(NFS_O_RDONLY_STATE, &state->flags); clear_bit(NFS_O_WRONLY_STATE, &state->flags); clear_bit(NFS_O_RDWR_STATE, &state->flags); @@ -6105,7 +6116,7 @@ out: */ static int nfs41_check_expired_locks(struct nfs4_state *state) { - int status, ret = -NFS4ERR_BAD_STATEID; + int status, ret = NFS_OK; struct nfs4_lock_state *lsp; struct nfs_server *server = NFS_SERVER(state->inode); @@ -6117,9 +6128,12 @@ static int nfs41_check_expired_locks(struct nfs4_state *state) &lsp->ls_stateid, cred); trace_nfs4_test_lock_stateid(state, lsp, status); - if (status != NFS_OK) { + if (status == -NFS4ERR_EXPIRED || + status == -NFS4ERR_BAD_STATEID) clear_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags); + else if (status != NFS_OK) { ret = status; + break; } } }; -- cgit v1.2.1 From c5896fc8622d57b31e1e98545d67d7089019e478 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Sep 2016 13:39:03 -0400 Subject: NFSv4.1: Ensure we always run TEST/FREE_STATEID on locks Right now, we're only running TEST/FREE_STATEID on the locks if the open stateid recovery succeeds. The protocol requires us to always do so. The fix would be to move the call to TEST/FREE_STATEID and do it before we attempt open recovery. Signed-off-by: Trond Myklebust Tested-by: Oleg Drokin Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 92 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 52 insertions(+), 40 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 02eab9113273..b5290fd99209 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2485,6 +2485,45 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state) put_rpccred(cred); } +/** + * nfs41_check_expired_locks - possibly free a lock stateid + * + * @state: NFSv4 state for an inode + * + * Returns NFS_OK if recovery for this stateid is now finished. + * Otherwise a negative NFS4ERR value is returned. + */ +static int nfs41_check_expired_locks(struct nfs4_state *state) +{ + int status, ret = NFS_OK; + struct nfs4_lock_state *lsp; + struct nfs_server *server = NFS_SERVER(state->inode); + + if (!test_bit(LK_STATE_IN_USE, &state->flags)) + goto out; + list_for_each_entry(lsp, &state->lock_states, ls_locks) { + if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) { + struct rpc_cred *cred = lsp->ls_state->owner->so_cred; + + status = nfs41_test_and_free_expired_stateid(server, + &lsp->ls_stateid, + cred); + trace_nfs4_test_lock_stateid(state, lsp, status); + if (status == -NFS4ERR_EXPIRED || + status == -NFS4ERR_BAD_STATEID) { + clear_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags); + if (!recover_lost_locks) + set_bit(NFS_LOCK_LOST, &lsp->ls_flags); + } else if (status != NFS_OK) { + ret = status; + break; + } + } + }; +out: + return ret; +} + /** * nfs41_check_open_stateid - possibly free an open stateid * @@ -2522,6 +2561,9 @@ static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st int status; nfs41_check_delegation_stateid(state); + status = nfs41_check_expired_locks(state); + if (status != NFS_OK) + return status; status = nfs41_check_open_stateid(state); if (status != NFS_OK) status = nfs4_open_expired(sp, state); @@ -6106,49 +6148,19 @@ out: } #if defined(CONFIG_NFS_V4_1) -/** - * nfs41_check_expired_locks - possibly free a lock stateid - * - * @state: NFSv4 state for an inode - * - * Returns NFS_OK if recovery for this stateid is now finished. - * Otherwise a negative NFS4ERR value is returned. - */ -static int nfs41_check_expired_locks(struct nfs4_state *state) -{ - int status, ret = NFS_OK; - struct nfs4_lock_state *lsp; - struct nfs_server *server = NFS_SERVER(state->inode); - - list_for_each_entry(lsp, &state->lock_states, ls_locks) { - if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) { - struct rpc_cred *cred = lsp->ls_state->owner->so_cred; - - status = nfs41_test_and_free_expired_stateid(server, - &lsp->ls_stateid, - cred); - trace_nfs4_test_lock_stateid(state, lsp, status); - if (status == -NFS4ERR_EXPIRED || - status == -NFS4ERR_BAD_STATEID) - clear_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags); - else if (status != NFS_OK) { - ret = status; - break; - } - } - }; - - return ret; -} - static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *request) { - int status = NFS_OK; + struct nfs4_lock_state *lsp; + int status; - if (test_bit(LK_STATE_IN_USE, &state->flags)) - status = nfs41_check_expired_locks(state); - if (status != NFS_OK) - status = nfs4_lock_expired(state, request); + status = nfs4_set_lock_state(state, request); + if (status != 0) + return status; + lsp = request->fl_u.nfs4_fl.owner; + if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) || + test_bit(NFS_LOCK_LOST, &lsp->ls_flags)) + return 0; + status = nfs4_lock_expired(state, request); return status; } #endif -- cgit v1.2.1 From f0b0bf8826145f2da8b13f375177e907ae44b796 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Sep 2016 13:39:04 -0400 Subject: NFSv4.1: FREE_STATEID can be asynchronous Nothing should need to be serialised with FREE_STATEID on the client, so let's make the RPC call always asynchronous. Also constify the stateid argument. Signed-off-by: Trond Myklebust Tested-by: Oleg Drokin Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b5290fd99209..1741e6454684 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -99,8 +99,8 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, #ifdef CONFIG_NFS_V4_1 static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *, struct rpc_cred *); -static int nfs41_free_stateid(struct nfs_server *, nfs4_stateid *, - struct rpc_cred *); +static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *, + struct rpc_cred *, bool); #endif #ifdef CONFIG_NFS_V4_SECURITY_LABEL @@ -2443,7 +2443,7 @@ static int nfs41_test_and_free_expired_stateid(struct nfs_server *server, } out_free: /* Ack the revoked state to the server */ - nfs41_free_stateid(server, stateid, cred); + nfs41_free_stateid(server, stateid, cred, true); return -NFS4ERR_EXPIRED; } @@ -8921,7 +8921,7 @@ static const struct rpc_call_ops nfs41_free_stateid_ops = { }; static struct rpc_task *_nfs41_free_stateid(struct nfs_server *server, - nfs4_stateid *stateid, + const nfs4_stateid *stateid, struct rpc_cred *cred, bool privileged) { @@ -8964,38 +8964,31 @@ static struct rpc_task *_nfs41_free_stateid(struct nfs_server *server, * @server: server / transport on which to perform the operation * @stateid: state ID to release * @cred: credential + * @is_recovery: set to true if this call needs to be privileged * - * Returns NFS_OK if the server freed "stateid". Otherwise a - * negative NFS4ERR value is returned. + * Note: this function is always asynchronous. */ static int nfs41_free_stateid(struct nfs_server *server, - nfs4_stateid *stateid, - struct rpc_cred *cred) + const nfs4_stateid *stateid, + struct rpc_cred *cred, + bool is_recovery) { struct rpc_task *task; - int ret; - task = _nfs41_free_stateid(server, stateid, cred, true); + task = _nfs41_free_stateid(server, stateid, cred, is_recovery); if (IS_ERR(task)) return PTR_ERR(task); - ret = rpc_wait_for_completion_task(task); - if (!ret) - ret = task->tk_status; rpc_put_task(task); - return ret; + return 0; } static void nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp) { - struct rpc_task *task; struct rpc_cred *cred = lsp->ls_state->owner->so_cred; - task = _nfs41_free_stateid(server, &lsp->ls_stateid, cred, false); + nfs41_free_stateid(server, &lsp->ls_stateid, cred, false); nfs4_free_lock_state(server, lsp); - if (IS_ERR(task)) - return; - rpc_put_task(task); } static bool nfs41_match_stateid(const nfs4_stateid *s1, -- cgit v1.2.1 From 26d36301bd653df6481fd38f3e1435a1f15e56d1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Sep 2016 13:39:05 -0400 Subject: NFSv4.1: Ensure we call FREE_STATEID if needed on close/delegreturn/locku If a server returns NFS4ERR_ADMIN_REVOKED, NFS4ERR_DELEG_REVOKED or NFS4ERR_EXPIRED on a call to close, open_downgrade, delegreturn, or locku, we should call FREE_STATEID before attempting to recover. Signed-off-by: Trond Myklebust Tested-by: Oleg Drokin Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1741e6454684..ee1fd4c5e646 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -328,6 +328,33 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent kunmap_atomic(start); } +static void nfs4_test_and_free_stateid(struct nfs_server *server, + nfs4_stateid *stateid, + struct rpc_cred *cred) +{ + const struct nfs4_minor_version_ops *ops = server->nfs_client->cl_mvops; + + ops->test_and_free_expired(server, stateid, cred); +} + +static void __nfs4_free_revoked_stateid(struct nfs_server *server, + nfs4_stateid *stateid, + struct rpc_cred *cred) +{ + stateid->type = NFS4_REVOKED_STATEID_TYPE; + nfs4_test_and_free_stateid(server, stateid, cred); +} + +static void nfs4_free_revoked_stateid(struct nfs_server *server, + const nfs4_stateid *stateid, + struct rpc_cred *cred) +{ + nfs4_stateid tmp; + + nfs4_stateid_copy(&tmp, stateid); + __nfs4_free_revoked_stateid(server, &tmp, cred); +} + static long nfs4_update_delay(long *timeout) { long ret; @@ -2985,9 +3012,12 @@ static void nfs4_close_done(struct rpc_task *task, void *data) break; case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + nfs4_free_revoked_stateid(server, + &calldata->arg.stateid, + task->tk_msg.rpc_cred); case -NFS4ERR_OLD_STATEID: case -NFS4ERR_BAD_STATEID: - case -NFS4ERR_EXPIRED: if (!nfs4_stateid_match(&calldata->arg.stateid, &state->open_stateid)) { rpc_restart_call_prepare(task); @@ -5479,10 +5509,13 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) renew_lease(data->res.server, data->timestamp); case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_EXPIRED: + nfs4_free_revoked_stateid(data->res.server, + data->args.stateid, + task->tk_msg.rpc_cred); case -NFS4ERR_BAD_STATEID: case -NFS4ERR_OLD_STATEID: case -NFS4ERR_STALE_STATEID: - case -NFS4ERR_EXPIRED: task->tk_status = 0; if (data->roc) pnfs_roc_set_barrier(data->inode, data->roc_barrier); @@ -5726,10 +5759,14 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) if (nfs4_update_lock_stateid(calldata->lsp, &calldata->res.stateid)) break; + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_EXPIRED: + nfs4_free_revoked_stateid(calldata->server, + &calldata->arg.stateid, + task->tk_msg.rpc_cred); case -NFS4ERR_BAD_STATEID: case -NFS4ERR_OLD_STATEID: case -NFS4ERR_STALE_STATEID: - case -NFS4ERR_EXPIRED: if (!nfs4_stateid_match(&calldata->arg.stateid, &calldata->lsp->ls_stateid)) rpc_restart_call_prepare(task); -- cgit v1.2.1 From 059b43e9744efe5b6f8e83516d2a0e813bfaee70 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Sep 2016 13:39:06 -0400 Subject: NFSv4: Ensure we don't re-test revoked and freed stateids This fixes a potential infinite loop in nfs_reap_expired_delegations. Signed-off-by: Trond Myklebust Tested-by: Oleg Drokin Signed-off-by: Anna Schumaker --- fs/nfs/delegation.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 0ffead281555..484f14700108 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -656,6 +656,7 @@ static void nfs_mark_delegation_revoked(struct nfs_server *server, struct nfs_delegation *delegation) { set_bit(NFS_DELEGATION_REVOKED, &delegation->flags); + delegation->stateid.type = NFS4_INVALID_STATEID_TYPE; nfs_mark_return_delegation(server, delegation); } @@ -894,6 +895,8 @@ static inline bool nfs4_server_rebooted(const struct nfs_client *clp) static void nfs_mark_test_expired_delegation(struct nfs_server *server, struct nfs_delegation *delegation) { + if (delegation->stateid.type == NFS4_INVALID_STATEID_TYPE) + return; clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); set_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags); set_bit(NFS4CLNT_DELEGATION_EXPIRED, &server->nfs_client->cl_state); -- cgit v1.2.1 From 6c2d8f8d307033a7cdfee3f4cb9dc4679ab69513 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Sep 2016 13:39:07 -0400 Subject: NFSv4: nfs_inode_find_state_and_recover() should check all stateids Modify the helper nfs_inode_find_state_and_recover() so that it can check all open/lock/delegation state trackers on that inode for whether or not they need are affected by a revoked stateid error. Signed-off-by: Trond Myklebust Tested-by: Oleg Drokin Signed-off-by: Anna Schumaker --- fs/nfs/delegation.c | 19 +++++++++++++++++++ fs/nfs/delegation.h | 2 ++ fs/nfs/nfs4state.c | 44 +++++++++++++++++++++++++++++++++++++++----- 3 files changed, 60 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 484f14700108..5de4cfb2ab07 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -1001,6 +1001,25 @@ restart: rcu_read_unlock(); } +void nfs_inode_find_delegation_state_and_recover(struct inode *inode, + const nfs4_stateid *stateid) +{ + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + struct nfs_delegation *delegation; + bool found = false; + + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation && + nfs4_stateid_match_other(&delegation->stateid, stateid)) { + nfs_mark_test_expired_delegation(NFS_SERVER(inode), delegation); + found = true; + } + rcu_read_unlock(); + if (found) + nfs4_schedule_state_manager(clp); +} + /** * nfs_delegations_present - check for existence of delegations * @clp: client state handle diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 1442e3b1521d..e9d555796873 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -66,6 +66,8 @@ void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); int nfs4_have_delegation(struct inode *inode, fmode_t flags); int nfs4_check_delegation(struct inode *inode, fmode_t flags); bool nfs4_delegation_flush_on_close(const struct inode *inode); +void nfs_inode_find_delegation_state_and_recover(struct inode *inode, + const nfs4_stateid *stateid); #endif diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 63da0411e2af..4b538bb194f8 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1337,6 +1337,35 @@ int nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_ } EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery); +static struct nfs4_lock_state * +nfs_state_find_lock_state_by_stateid(struct nfs4_state *state, + const nfs4_stateid *stateid) +{ + struct nfs4_lock_state *pos; + + list_for_each_entry(pos, &state->lock_states, ls_locks) { + if (!test_bit(NFS_LOCK_INITIALIZED, &pos->ls_flags)) + continue; + if (nfs4_stateid_match_other(&pos->ls_stateid, stateid)) + return pos; + } + return NULL; +} + +static bool nfs_state_lock_state_matches_stateid(struct nfs4_state *state, + const nfs4_stateid *stateid) +{ + bool found = false; + + if (test_bit(LK_STATE_IN_USE, &state->flags)) { + spin_lock(&state->state_lock); + if (nfs_state_find_lock_state_by_stateid(state, stateid)) + found = true; + spin_unlock(&state->state_lock); + } + return found; +} + void nfs_inode_find_state_and_recover(struct inode *inode, const nfs4_stateid *stateid) { @@ -1351,14 +1380,19 @@ void nfs_inode_find_state_and_recover(struct inode *inode, state = ctx->state; if (state == NULL) continue; - if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) - continue; - if (!nfs4_stateid_match(&state->stateid, stateid)) + if (nfs4_stateid_match_other(&state->stateid, stateid)) { + nfs4_state_mark_reclaim_nograce(clp, state); + found = true; continue; - nfs4_state_mark_reclaim_nograce(clp, state); - found = true; + } + if (nfs_state_lock_state_matches_stateid(state, stateid)) { + nfs4_state_mark_reclaim_nograce(clp, state); + found = true; + } } spin_unlock(&inode->i_lock); + + nfs_inode_find_delegation_state_and_recover(inode, stateid); if (found) nfs4_schedule_state_manager(clp); } -- cgit v1.2.1 From 404ea3569af1e1d288db6ff0e7230104a3687f3c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Sep 2016 13:39:08 -0400 Subject: NFSv4: nfs4_handle_delegation_recall_error() handle expiration as revoke case If the server tells us our stateid has expired, then handle that as if it was revoked. Signed-off-by: Trond Myklebust Tested-by: Oleg Drokin Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ee1fd4c5e646..89887f15946e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1919,7 +1919,6 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_STALE_STATEID: set_bit(NFS_DELEGATED_STATE, &state->flags); - case -NFS4ERR_EXPIRED: /* Don't recall a delegation if it was lost */ nfs4_schedule_lease_recovery(server->nfs_client); return -EAGAIN; @@ -1931,6 +1930,7 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct return -EAGAIN; case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_EXPIRED: case -NFS4ERR_BAD_STATEID: case -NFS4ERR_OPENMODE: nfs_inode_find_state_and_recover(state->inode, -- cgit v1.2.1 From d7f3e4bfe78847aa792c86a348f0473ae3a8800f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Sep 2016 13:39:09 -0400 Subject: NFSv4: nfs4_handle_setlk_error() handle expiration as revoke case If the server tells us our stateid has expired, then handle that as if it was revoked. Signed-off-by: Trond Myklebust Tested-by: Oleg Drokin Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 89887f15946e..27004f5262e6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6067,6 +6067,7 @@ static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_ { switch (error) { case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_EXPIRED: case -NFS4ERR_BAD_STATEID: lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; if (new_lock_owner != 0 || @@ -6075,7 +6076,6 @@ static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_ break; case -NFS4ERR_STALE_STATEID: lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; - case -NFS4ERR_EXPIRED: nfs4_schedule_lease_recovery(server->nfs_client); }; } -- cgit v1.2.1 From 26f474432a7b4be336ed40d94f5a8245781cfc67 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Sep 2016 13:39:10 -0400 Subject: NFSv4.1: nfs4_layoutget_handle_exception handle revoked state Handle revoked open/lock/delegation stateids when LAYOUTGET tells us the state was revoked. Signed-off-by: Trond Myklebust Tested-by: Oleg Drokin Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 27004f5262e6..446de949f960 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8268,6 +8268,8 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, case -NFS4ERR_RECALLCONFLICT: status = -ERECALLCONFLICT; break; + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_EXPIRED: case -NFS4ERR_BAD_STATEID: exception->timeout = 0; @@ -8279,6 +8281,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, &lgp->args.ctx->state->stateid)) { spin_unlock(&inode->i_lock); exception->state = lgp->args.ctx->state; + exception->stateid = &lgp->args.stateid; break; } -- cgit v1.2.1 From 9c27869d3f0680fa01d15616891a666d9e30fd83 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Sep 2016 13:39:11 -0400 Subject: NFSv4: Pass the stateid to the exception handler in nfs4_read/write_done_cb The actual stateid used in the READ or WRITE can represent a delegation, a lock or a stateid, so it is useful to pass it as an argument to the exception handler when an expired/revoked response is received from the server. It also ensures that we don't re-label the state as needing recovery if that has already occurred. Signed-off-by: Trond Myklebust Tested-by: Oleg Drokin Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 446de949f960..d1e60c915a07 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4525,11 +4525,18 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr) struct nfs_server *server = NFS_SERVER(hdr->inode); trace_nfs4_read(hdr, task->tk_status); - if (nfs4_async_handle_error(task, server, - hdr->args.context->state, - NULL) == -EAGAIN) { - rpc_restart_call_prepare(task); - return -EAGAIN; + if (task->tk_status < 0) { + struct nfs4_exception exception = { + .inode = hdr->inode, + .state = hdr->args.context->state, + .stateid = &hdr->args.stateid, + }; + task->tk_status = nfs4_async_handle_exception(task, + server, task->tk_status, &exception); + if (exception.retry) { + rpc_restart_call_prepare(task); + return -EAGAIN; + } } __nfs4_read_done_cb(hdr); @@ -4598,11 +4605,19 @@ static int nfs4_write_done_cb(struct rpc_task *task, struct inode *inode = hdr->inode; trace_nfs4_write(hdr, task->tk_status); - if (nfs4_async_handle_error(task, NFS_SERVER(inode), - hdr->args.context->state, - NULL) == -EAGAIN) { - rpc_restart_call_prepare(task); - return -EAGAIN; + if (task->tk_status < 0) { + struct nfs4_exception exception = { + .inode = hdr->inode, + .state = hdr->args.context->state, + .stateid = &hdr->args.stateid, + }; + task->tk_status = nfs4_async_handle_exception(task, + NFS_SERVER(inode), task->tk_status, + &exception); + if (exception.retry) { + rpc_restart_call_prepare(task); + return -EAGAIN; + } } if (task->tk_status >= 0) { renew_lease(NFS_SERVER(inode), hdr->timestamp); -- cgit v1.2.1 From b1a318de9bc4946ad4a20481ab3ce28c2e8cd72c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Sep 2016 13:39:12 -0400 Subject: NFSv4: Fix a race in nfs_inode_reclaim_delegation() If we race with a delegreturn before taking the spin lock, we currently end up dropping the delegation stateid. Signed-off-by: Trond Myklebust Tested-by: Oleg Drokin Signed-off-by: Anna Schumaker --- fs/nfs/delegation.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 5de4cfb2ab07..094e0efe6a82 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -195,15 +195,13 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, rcu_read_unlock(); put_rpccred(oldcred); trace_nfs4_reclaim_delegation(inode, res->delegation_type); - } else { - /* We appear to have raced with a delegation return. */ - spin_unlock(&delegation->lock); - rcu_read_unlock(); - nfs_inode_set_delegation(inode, cred, res); + return; } - } else { - rcu_read_unlock(); + /* We appear to have raced with a delegation return. */ + spin_unlock(&delegation->lock); } + rcu_read_unlock(); + nfs_inode_set_delegation(inode, cred, res); } static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync) -- cgit v1.2.1 From 1393d9612ba02d8bb8dae1dab319807e92354fe3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Sep 2016 13:39:13 -0400 Subject: NFSv4: Fix a race when updating an open_stateid If we're replacing an old stateid which has a different 'other' field, then we probably need to free the old stateid. Signed-off-by: Trond Myklebust Tested-by: Oleg Drokin Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d1e60c915a07..389089e857f4 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1399,11 +1399,12 @@ static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state) } static bool nfs_need_update_open_stateid(struct nfs4_state *state, - nfs4_stateid *stateid) + const nfs4_stateid *stateid, nfs4_stateid *freeme) { if (test_and_set_bit(NFS_OPEN_STATE, &state->flags) == 0) return true; if (!nfs4_stateid_match_other(stateid, &state->open_stateid)) { + nfs4_stateid_copy(freeme, &state->open_stateid); nfs_test_and_clear_all_open_stateid(state); return true; } @@ -1467,7 +1468,9 @@ static void nfs_clear_open_stateid(struct nfs4_state *state, nfs4_schedule_state_manager(state->owner->so_server->nfs_client); } -static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) +static void nfs_set_open_stateid_locked(struct nfs4_state *state, + const nfs4_stateid *stateid, fmode_t fmode, + nfs4_stateid *freeme) { switch (fmode) { case FMODE_READ: @@ -1479,14 +1482,18 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid * case FMODE_READ|FMODE_WRITE: set_bit(NFS_O_RDWR_STATE, &state->flags); } - if (!nfs_need_update_open_stateid(state, stateid)) + if (!nfs_need_update_open_stateid(state, stateid, freeme)) return; if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) nfs4_stateid_copy(&state->stateid, stateid); nfs4_stateid_copy(&state->open_stateid, stateid); } -static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, const nfs4_stateid *deleg_stateid, fmode_t fmode) +static void __update_open_stateid(struct nfs4_state *state, + const nfs4_stateid *open_stateid, + const nfs4_stateid *deleg_stateid, + fmode_t fmode, + nfs4_stateid *freeme) { /* * Protect the call to nfs4_state_set_mode_locked and @@ -1499,16 +1506,22 @@ static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_s set_bit(NFS_DELEGATED_STATE, &state->flags); } if (open_stateid != NULL) - nfs_set_open_stateid_locked(state, open_stateid, fmode); + nfs_set_open_stateid_locked(state, open_stateid, fmode, freeme); write_sequnlock(&state->seqlock); update_open_stateflags(state, fmode); spin_unlock(&state->owner->so_lock); } -static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, nfs4_stateid *delegation, fmode_t fmode) +static int update_open_stateid(struct nfs4_state *state, + const nfs4_stateid *open_stateid, + const nfs4_stateid *delegation, + fmode_t fmode) { + struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs_client *clp = server->nfs_client; struct nfs_inode *nfsi = NFS_I(state->inode); struct nfs_delegation *deleg_cur; + nfs4_stateid freeme = {0}; int ret = 0; fmode &= (FMODE_READ|FMODE_WRITE); @@ -1530,7 +1543,8 @@ static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stat goto no_delegation_unlock; nfs_mark_delegation_referenced(deleg_cur); - __update_open_stateid(state, open_stateid, &deleg_cur->stateid, fmode); + __update_open_stateid(state, open_stateid, &deleg_cur->stateid, + fmode, &freeme); ret = 1; no_delegation_unlock: spin_unlock(&deleg_cur->lock); @@ -1538,11 +1552,14 @@ no_delegation: rcu_read_unlock(); if (!ret && open_stateid != NULL) { - __update_open_stateid(state, open_stateid, NULL, fmode); + __update_open_stateid(state, open_stateid, NULL, fmode, &freeme); ret = 1; } if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags)) - nfs4_schedule_state_manager(state->owner->so_server->nfs_client); + nfs4_schedule_state_manager(clp); + if (freeme.type != 0) + nfs4_test_and_free_stateid(server, &freeme, + state->owner->so_cred); return ret; } -- cgit v1.2.1 From 7f04883146b7cb1db57c3479820cbed511cafd05 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Sep 2016 13:39:14 -0400 Subject: NFS: Always call nfs_inode_find_state_and_recover() when revoking a delegation Don't rely on nfs_inode_detach_delegation() succeeding. That can race... Signed-off-by: Trond Myklebust Tested-by: Oleg Drokin Signed-off-by: Anna Schumaker --- fs/nfs/delegation.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 094e0efe6a82..dff600ae0d74 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -662,18 +662,24 @@ static bool nfs_revoke_delegation(struct inode *inode, const nfs4_stateid *stateid) { struct nfs_delegation *delegation; + nfs4_stateid tmp; bool ret = false; rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); if (delegation == NULL) goto out; - if (stateid && !nfs4_stateid_match(stateid, &delegation->stateid)) + if (stateid == NULL) { + nfs4_stateid_copy(&tmp, &delegation->stateid); + stateid = &tmp; + } else if (!nfs4_stateid_match(stateid, &delegation->stateid)) goto out; nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation); ret = true; out: rcu_read_unlock(); + if (ret) + nfs_inode_find_state_and_recover(inode, stateid); return ret; } @@ -685,10 +691,8 @@ void nfs_remove_bad_delegation(struct inode *inode, if (!nfs_revoke_delegation(inode, stateid)) return; delegation = nfs_inode_detach_delegation(inode); - if (delegation) { - nfs_inode_find_state_and_recover(inode, &delegation->stateid); + if (delegation) nfs_free_delegation(delegation); - } } EXPORT_SYMBOL_GPL(nfs_remove_bad_delegation); -- cgit v1.2.1 From 272289a3df7297fd798a8ddf652c6d29f675de28 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Sep 2016 13:39:15 -0400 Subject: NFSv4: nfs4_do_handle_exception() handle revoke/expiry of a single stateid If we're not yet sure that all state has expired or been revoked, we should try to do a minimal recovery on just the one stateid. Signed-off-by: Trond Myklebust Tested-by: Oleg Drokin Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 389089e857f4..409b9fa2b844 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -397,13 +397,23 @@ static int nfs4_do_handle_exception(struct nfs_server *server, exception->delay = 0; exception->recovering = 0; exception->retry = 0; + + if (stateid == NULL && state != NULL) + stateid = &state->stateid; + switch(errorcode) { case 0: return 0; - case -NFS4ERR_OPENMODE: case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_EXPIRED: case -NFS4ERR_BAD_STATEID: + if (inode != NULL && stateid != NULL) { + nfs_inode_find_state_and_recover(inode, + stateid); + goto wait_on_recovery; + } + case -NFS4ERR_OPENMODE: if (inode) { int err; @@ -422,12 +432,6 @@ static int nfs4_do_handle_exception(struct nfs_server *server, if (ret < 0) break; goto wait_on_recovery; - case -NFS4ERR_EXPIRED: - if (state != NULL) { - ret = nfs4_schedule_stateid_recovery(server, state); - if (ret < 0) - break; - } case -NFS4ERR_STALE_STATEID: case -NFS4ERR_STALE_CLIENTID: nfs4_schedule_lease_recovery(clp); -- cgit v1.2.1 From b134fc4a533300402514154fcb0a661ddf106dd9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Sep 2016 13:39:16 -0400 Subject: NFSv4: Don't test open_stateid unless it is set We need to test the NFS_OPEN_STATE flag for whether or not the open_stateid is valid. Signed-off-by: Trond Myklebust Tested-by: Oleg Drokin Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 409b9fa2b844..6f0df2c8846c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2587,6 +2587,11 @@ static int nfs41_check_open_stateid(struct nfs4_state *state) struct rpc_cred *cred = state->owner->so_cred; int status; + if (test_bit(NFS_OPEN_STATE, &state->flags) == 0) { + if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) + return NFS_OK; + return -NFS4ERR_BAD_STATEID; + } /* If a state reset has been done, test_stateid is unneeded */ if ((test_bit(NFS_O_RDONLY_STATE, &state->flags) == 0) && (test_bit(NFS_O_WRONLY_STATE, &state->flags) == 0) && -- cgit v1.2.1 From 67dd483026c64444d7fd8eab83334621b3c47b76 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Sep 2016 13:39:17 -0400 Subject: NFSv4: Mark the lock and open stateids as invalid after freeing them Signed-off-by: Trond Myklebust Tested-by: Oleg Drokin Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6f0df2c8846c..fdd7faceaf22 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2560,6 +2560,7 @@ static int nfs41_check_expired_locks(struct nfs4_state *state) if (status == -NFS4ERR_EXPIRED || status == -NFS4ERR_BAD_STATEID) { clear_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags); + lsp->ls_stateid.type = NFS4_INVALID_STATEID_TYPE; if (!recover_lost_locks) set_bit(NFS_LOCK_LOST, &lsp->ls_flags); } else if (status != NFS_OK) { @@ -2605,6 +2606,7 @@ static int nfs41_check_open_stateid(struct nfs4_state *state) clear_bit(NFS_O_WRONLY_STATE, &state->flags); clear_bit(NFS_O_RDWR_STATE, &state->flags); clear_bit(NFS_OPEN_STATE, &state->flags); + stateid->type = NFS4_INVALID_STATEID_TYPE; } return status; } -- cgit v1.2.1 From 304020fe48c6c7fff8b5a38f382b54404f0f79d3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Sep 2016 13:39:18 -0400 Subject: NFSv4: Open state recovery must account for file permission changes If the file permissions change on the server, then we may not be able to recover open state. If so, we need to ensure that we mark the file descriptor appropriately. Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust Tested-by: Oleg Drokin Signed-off-by: Anna Schumaker --- fs/nfs/nfs4state.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 4b538bb194f8..0a25f70a3b0b 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1532,6 +1532,9 @@ restart: __func__, status); case -ENOENT: case -ENOMEM: + case -EACCES: + case -EROFS: + case -EIO: case -ESTALE: /* Open state on this file cannot be recovered */ nfs4_state_mark_recovery_failed(state, status); -- cgit v1.2.1 From 76e8a1bd143a84bcc16ee1c19a2cd2715696b572 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Sep 2016 13:39:19 -0400 Subject: NFSv4: Fix retry issues with nfs41_test/free_stateid _nfs41_free_stateid() needs to be cached by the session, but nfs41_test_stateid() may return NFS4ERR_RETRY_UNCACHED_REP (in which case we should just retry). Signed-off-by: Trond Myklebust Tested-by: Oleg Drokin Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index fdd7faceaf22..79109c84f18d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8929,6 +8929,7 @@ static void nfs4_handle_delay_or_session_error(struct nfs_server *server, exception->retry = 0; switch(err) { case -NFS4ERR_DELAY: + case -NFS4ERR_RETRY_UNCACHED_REP: nfs4_handle_exception(server, err, exception); break; case -NFS4ERR_BADSESSION: @@ -9034,7 +9035,7 @@ static struct rpc_task *_nfs41_free_stateid(struct nfs_server *server, msg.rpc_argp = &data->args; msg.rpc_resp = &data->res; - nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); if (privileged) nfs4_set_sequence_privileged(&data->args.seq_args); -- cgit v1.2.1 From 7ebeb7fe74bb855bc76a72154e669f00dc614729 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Sep 2016 13:39:20 -0400 Subject: NFSv4: If recovery failed for a specific open stateid, then don't retry Signed-off-by: Trond Myklebust Tested-by: Oleg Drokin Signed-off-by: Anna Schumaker --- fs/nfs/nfs4state.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 0a25f70a3b0b..5f4281ec5f72 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -991,6 +991,8 @@ int nfs4_select_rw_stateid(struct nfs4_state *state, { int ret; + if (!nfs4_valid_open_stateid(state)) + return -EIO; if (cred != NULL) *cred = NULL; ret = nfs4_copy_lock_stateid(dst, state, lockowner); @@ -1303,6 +1305,8 @@ void nfs4_schedule_path_down_recovery(struct nfs_client *clp) static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) { + if (!nfs4_valid_open_stateid(state)) + return 0; set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); /* Don't recover state that expired before the reboot */ if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags)) { @@ -1316,6 +1320,8 @@ static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_st int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state) { + if (!nfs4_valid_open_stateid(state)) + return 0; set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags); clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); set_bit(NFS_OWNER_RECLAIM_NOGRACE, &state->owner->so_flags); @@ -1327,9 +1333,8 @@ int nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_ { struct nfs_client *clp = server->nfs_client; - if (!nfs4_valid_open_stateid(state)) + if (!nfs4_state_mark_reclaim_nograce(clp, state)) return -EBADF; - nfs4_state_mark_reclaim_nograce(clp, state); dprintk("%s: scheduling stateid recovery for server %s\n", __func__, clp->cl_hostname); nfs4_schedule_state_manager(clp); @@ -1380,15 +1385,14 @@ void nfs_inode_find_state_and_recover(struct inode *inode, state = ctx->state; if (state == NULL) continue; - if (nfs4_stateid_match_other(&state->stateid, stateid)) { - nfs4_state_mark_reclaim_nograce(clp, state); + if (nfs4_stateid_match_other(&state->stateid, stateid) && + nfs4_state_mark_reclaim_nograce(clp, state)) { found = true; continue; } - if (nfs_state_lock_state_matches_stateid(state, stateid)) { - nfs4_state_mark_reclaim_nograce(clp, state); + if (nfs_state_lock_state_matches_stateid(state, stateid) && + nfs4_state_mark_reclaim_nograce(clp, state)) found = true; - } } spin_unlock(&inode->i_lock); -- cgit v1.2.1 From 8a64c4ef106d17805691e893642912041a622938 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Sep 2016 13:39:21 -0400 Subject: NFSv4.1: Even if the stateid is OK, we may need to recover the open modes TEST_STATEID only tells you that you have a valid open stateid. It doesn't tell the client anything about whether or not it holds the required share locks. Signed-off-by: Trond Myklebust Tested-by: Oleg Drokin [Anna: Wrap nfs_open_stateid_recover_openmode in CONFIG_NFS_V4_1 checks] Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 79109c84f18d..27120baf1ff3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1387,6 +1387,19 @@ static void update_open_stateflags(struct nfs4_state *state, fmode_t fmode) nfs4_state_set_mode_locked(state, state->state | fmode); } +#ifdef CONFIG_NFS_V4_1 +static bool nfs_open_stateid_recover_openmode(struct nfs4_state *state) +{ + if (state->n_rdonly && !test_bit(NFS_O_RDONLY_STATE, &state->flags)) + return true; + if (state->n_wronly && !test_bit(NFS_O_WRONLY_STATE, &state->flags)) + return true; + if (state->n_rdwr && !test_bit(NFS_O_RDWR_STATE, &state->flags)) + return true; + return false; +} +#endif /* CONFIG_NFS_V4_1 */ + static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state) { struct nfs_client *clp = state->owner->so_server->nfs_client; @@ -2589,16 +2602,13 @@ static int nfs41_check_open_stateid(struct nfs4_state *state) int status; if (test_bit(NFS_OPEN_STATE, &state->flags) == 0) { - if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) - return NFS_OK; + if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) { + if (nfs4_have_delegation(state->inode, state->state)) + return NFS_OK; + return -NFS4ERR_OPENMODE; + } return -NFS4ERR_BAD_STATEID; } - /* If a state reset has been done, test_stateid is unneeded */ - if ((test_bit(NFS_O_RDONLY_STATE, &state->flags) == 0) && - (test_bit(NFS_O_WRONLY_STATE, &state->flags) == 0) && - (test_bit(NFS_O_RDWR_STATE, &state->flags) == 0)) - return -NFS4ERR_BAD_STATEID; - status = nfs41_test_and_free_expired_stateid(server, stateid, cred); trace_nfs4_test_open_stateid(state, NULL, status); if (status == -NFS4ERR_EXPIRED || status == -NFS4ERR_BAD_STATEID) { @@ -2608,7 +2618,11 @@ static int nfs41_check_open_stateid(struct nfs4_state *state) clear_bit(NFS_OPEN_STATE, &state->flags); stateid->type = NFS4_INVALID_STATEID_TYPE; } - return status; + if (status != NFS_OK) + return status; + if (nfs_open_stateid_recover_openmode(state)) + return -NFS4ERR_OPENMODE; + return NFS_OK; } static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) -- cgit v1.2.1 From bfc505ded01e3c57d12c6f939f352200655d8635 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 15 Sep 2016 18:26:05 -0400 Subject: pNFS: Fix atime updates on pNFS clients Fix the code so that we always mark the atime as invalid in nfs4_read_done(). Currently, the expectation appears to be that the pNFS drivers should always do this, with the result that most of them don't. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/internal.h | 1 - fs/nfs/nfs4proc.c | 8 ++------ fs/nfs/pnfs.c | 4 +--- 3 files changed, 3 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 4f0b2db8327f..512b776824c9 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -560,7 +560,6 @@ void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); /* nfs4proc.c */ -extern void __nfs4_read_done_cb(struct nfs_pgio_header *); extern struct nfs_client *nfs4_init_client(struct nfs_client *clp, const struct nfs_client_initdata *); extern int nfs40_walk_client_list(struct nfs_client *clp, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 27120baf1ff3..5b2b07bca490 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4557,11 +4557,6 @@ static bool nfs4_error_stateid_expired(int err) return false; } -void __nfs4_read_done_cb(struct nfs_pgio_header *hdr) -{ - nfs_invalidate_atime(hdr->inode); -} - static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr) { struct nfs_server *server = NFS_SERVER(hdr->inode); @@ -4581,7 +4576,6 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr) } } - __nfs4_read_done_cb(hdr); if (task->tk_status > 0) renew_lease(server, hdr->timestamp); return 0; @@ -4610,6 +4604,8 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) return -EAGAIN; if (nfs4_read_stateid_changed(task, &hdr->args)) return -EAGAIN; + if (task->tk_status > 0) + nfs_invalidate_atime(hdr->inode); return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) : nfs4_read_done_cb(task, hdr); } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index b588ccf05045..56b2d96f9103 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2230,10 +2230,8 @@ static void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr) */ void pnfs_ld_read_done(struct nfs_pgio_header *hdr) { - if (likely(!hdr->pnfs_error)) { - __nfs4_read_done_cb(hdr); + if (likely(!hdr->pnfs_error)) hdr->mds_ops->rpc_call_done(&hdr->task, hdr); - } trace_nfs4_pnfs_read(hdr, hdr->pnfs_error); if (unlikely(hdr->pnfs_error)) pnfs_ld_handle_read_error(hdr); -- cgit v1.2.1 From a865880e20ca4d2df362f61c9ef51f0fc0273131 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 23 Sep 2016 17:24:03 -0400 Subject: Retry operation on EREMOTEIO on an interrupted slot If an operation got interrupted, then since we don't know if the server processed it on not, we keep the seq#. Upon reuse of slot and seq# if we get reply from the cache (ie EREMOTEIO) then we need to retry the operation after bumping the seq# Signed-off-by: Olga Kornievskaia Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5b2b07bca490..8b30cdf41848 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -755,6 +755,13 @@ static int nfs41_sequence_process(struct rpc_task *task, /* Check the SEQUENCE operation status */ switch (res->sr_status) { case 0: + /* If previous op on slot was interrupted and we reused + * the seq# and got a reply from the cache, then retry + */ + if (task->tk_status == -EREMOTEIO && interrupted) { + ++slot->seq_nr; + goto retry_nowait; + } /* Update the slot's sequence and clientid lease timer */ slot->seq_done = 1; clp = session->clp; -- cgit v1.2.1 From 2f86e0919a02d61ed2350c07f383e5542b904289 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Sat, 1 Oct 2016 16:46:26 -0700 Subject: fs: nfs: Make nfs boot time y2038 safe boot_time is represented as a struct timespec. struct timespec and CURRENT_TIME are not y2038 safe. Overall, the plan is to use timespec64 and ktime_t for all internal kernel representation of timestamps. CURRENT_TIME will also be removed. boot_time is used to construct the nfs client boot verifier. Use ktime_t to represent boot_time and ktime_get_real() for the boot_time value. Following Trond's request https://lkml.org/lkml/2016/6/9/22 , use ktime_t instead of converting to struct timespec64. Use higher and lower 32 bit parts of ktime_t for the boot verifier. Use the lower 32 bit part of ktime_t for the authsys_parms stamp field. Signed-off-by: Deepa Dinamani Reviewed-by: Arnd Bergmann Cc: Trond Myklebust Cc: Anna Schumaker Cc: linux-nfs@vger.kernel.org Signed-off-by: Anna Schumaker --- fs/nfs/client.c | 2 +- fs/nfs/netns.h | 2 +- fs/nfs/nfs4proc.c | 10 ++++++---- fs/nfs/nfs4xdr.c | 2 +- 4 files changed, 9 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 51136b03788d..7555ba889d1f 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1082,7 +1082,7 @@ void nfs_clients_init(struct net *net) idr_init(&nn->cb_ident_idr); #endif spin_lock_init(&nn->nfs_client_lock); - nn->boot_time = CURRENT_TIME; + nn->boot_time = ktime_get_real(); } #ifdef CONFIG_PROC_FS diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index f0e06e4acbef..fbce0d885d4c 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -29,7 +29,7 @@ struct nfs_net { int cb_users[NFS4_MAX_MINOR_VERSION + 1]; #endif spinlock_t nfs_client_lock; - struct timespec boot_time; + ktime_t boot_time; #ifdef CONFIG_PROC_FS struct proc_dir_entry *proc_nfsfs; #endif diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8b30cdf41848..ed7da820d4bf 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5299,12 +5299,14 @@ static void nfs4_init_boot_verifier(const struct nfs_client *clp, if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) { /* An impossible timestamp guarantees this value * will never match a generated boot time. */ - verf[0] = 0; - verf[1] = cpu_to_be32(NSEC_PER_SEC + 1); + verf[0] = cpu_to_be32(U32_MAX); + verf[1] = cpu_to_be32(U32_MAX); } else { struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); - verf[0] = cpu_to_be32(nn->boot_time.tv_sec); - verf[1] = cpu_to_be32(nn->boot_time.tv_nsec); + u64 ns = ktime_to_ns(nn->boot_time); + + verf[0] = cpu_to_be32(ns >> 32); + verf[1] = cpu_to_be32(ns); } memcpy(bootverf->data, verf, sizeof(bootverf->data)); } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 17b4e059c588..fc89e5ed07ee 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1850,7 +1850,7 @@ static void encode_create_session(struct xdr_stream *xdr, *p++ = cpu_to_be32(RPC_AUTH_UNIX); /* auth_sys */ /* authsys_parms rfc1831 */ - *p++ = cpu_to_be32(nn->boot_time.tv_nsec); /* stamp */ + *p++ = cpu_to_be32(ktime_to_ns(nn->boot_time)); /* stamp */ p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen); *p++ = cpu_to_be32(0); /* UID */ *p++ = cpu_to_be32(0); /* GID */ -- cgit v1.2.1 From 3f807e5ae5597bd65a6fff684083e8eaa21f3fa7 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 4 Oct 2016 00:07:43 -0400 Subject: NFSv4.2: Fix a reference leak in nfs42_proc_layoutstats_generic The caller of rpc_run_task also gets a reference that must be put. Signed-off-by: Jeff Layton Cc: stable@vger.kernel.org # 4.2+ Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs42proc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 64b43b4ad9dd..608501971fe0 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -443,6 +443,7 @@ int nfs42_proc_layoutstats_generic(struct nfs_server *server, task = rpc_run_task(&task_setup); if (IS_ERR(task)) return PTR_ERR(task); + rpc_put_task(task); return 0; } -- cgit v1.2.1