From 9343919c1495b085a4a1cf4cbada8d7888daf099 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:56 -0400 Subject: fanotify: allow fanotify to be built We disabled the ability to build fanotify in commit 7c5347733dcc4ba0ba. This reverts that commit and allows people to build fanotify. Signed-off-by: Eric Paris --- fs/notify/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig index b388443c3a09..22c629eedd82 100644 --- a/fs/notify/Kconfig +++ b/fs/notify/Kconfig @@ -3,4 +3,4 @@ config FSNOTIFY source "fs/notify/dnotify/Kconfig" source "fs/notify/inotify/Kconfig" -#source "fs/notify/fanotify/Kconfig" +source "fs/notify/fanotify/Kconfig" -- cgit v1.2.1 From 6ad2d4e3e97ee4bfde0b45e8dfe37911330fc4aa Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:56 -0400 Subject: fsnotify: implement ordering between notifiers fanotify needs to be able to specify that some groups get events before others. They use this idea to make sure that a hierarchical storage manager gets access to files before programs which actually use them. This is purely infrastructure. Everything will have a priority of 0, but the infrastructure will exist for it to be non-zero. Signed-off-by: Eric Paris --- fs/notify/inode_mark.c | 9 +++++++-- fs/notify/vfsmount_mark.c | 6 +++++- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index 21ed10660b80..4c29fcf557d1 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c @@ -177,7 +177,8 @@ void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *mark, * Attach an initialized mark to a given inode. * These marks may be used for the fsnotify backend to determine which * event types should be delivered to which group and for which inodes. These - * marks are ordered according to the group's location in memory. + * marks are ordered according to priority, highest number first, and then by + * the group's location in memory. */ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, struct fsnotify_group *group, struct inode *inode, @@ -211,7 +212,11 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, goto out; } - if (mark->group < lmark->group) + if (mark->group->priority < lmark->group->priority) + continue; + + if ((mark->group->priority == lmark->group->priority) && + (mark->group < lmark->group)) continue; hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list); diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c index 56772b578fbd..85eebff6d0d7 100644 --- a/fs/notify/vfsmount_mark.c +++ b/fs/notify/vfsmount_mark.c @@ -169,7 +169,11 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, goto out; } - if (mark->group < lmark->group) + if (mark->group->priority < lmark->group->priority) + continue; + + if ((mark->group->priority == lmark->group->priority) && + (mark->group < lmark->group)) continue; hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list); -- cgit v1.2.1 From 4231a23530a30e86eb32fbe869bbef1b3e54d5aa Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:56 -0400 Subject: fanotify: implement fanotify listener ordering The fanotify listeners needs to be able to specify what types of operations they are going to perform so they can be ordered appropriately between other listeners doing other types of operations. They need this to be able to make sure that things like hierarchichal storage managers will get access to inodes before processes which need the data. This patch defines 3 possible uses which groups must indicate in the fanotify_init() flags. FAN_CLASS_PRE_CONTENT FAN_CLASS_CONTENT FAN_CLASS_NOTIF Groups will receive notification in that order. The order between 2 groups in the same class is undeterministic. FAN_CLASS_PRE_CONTENT is intended to be used by listeners which need access to the inode before they are certain that the inode contains it's final data. A hierarchical storage manager should choose to use this class. FAN_CLASS_CONTENT is intended to be used by listeners which need access to the inode after it contains its intended contents. This would be the appropriate level for an AV solution or document control system. FAN_CLASS_NOTIF is intended for normal async notification about access, much the same as inotify and dnotify. Syncronous permissions events are not permitted at this class. Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify_user.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index bbcb98e7fcc6..1c09e6321c5e 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -664,6 +664,20 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) init_waitqueue_head(&group->fanotify_data.access_waitq); INIT_LIST_HEAD(&group->fanotify_data.access_list); #endif + switch (flags & FAN_ALL_CLASS_BITS) { + case FAN_CLASS_NOTIF: + group->priority = FS_PRIO_0; + break; + case FAN_CLASS_CONTENT: + group->priority = FS_PRIO_1; + break; + case FAN_CLASS_PRE_CONTENT: + group->priority = FS_PRIO_2; + break; + default: + fd = -EINVAL; + goto out_put_group; + } fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); if (fd < 0) @@ -719,6 +733,16 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, ret = -EINVAL; if (unlikely(filp->f_op != &fanotify_fops)) goto fput_and_out; + group = filp->private_data; + + /* + * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not + * allowed to set permissions events. + */ + ret = -EINVAL; + if (mask & FAN_ALL_PERM_EVENTS && + group->priority == FS_PRIO_0) + goto fput_and_out; ret = fanotify_find_path(dfd, pathname, &path, flags); if (ret) @@ -729,7 +753,6 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, inode = path.dentry->d_inode; else mnt = path.mnt; - group = filp->private_data; /* create/update an inode mark */ switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) { -- cgit v1.2.1 From ff8bcbd03da881bf1171910c6c07d44bd3c0a234 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:56 -0400 Subject: fsnotify: correctly handle return codes from listeners When fsnotify groups return errors they are ignored. For permissions events these should be passed back up the stack, but for most events these should continue to be ignored. Signed-off-by: Eric Paris --- fs/notify/fsnotify.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 4498a208df94..57ecadd85abf 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -252,20 +252,23 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, if (inode_group > vfsmount_group) { /* handle inode */ - send_to_group(to_tell, NULL, inode_mark, NULL, mask, data, - data_is, cookie, file_name, &event); + ret = send_to_group(to_tell, NULL, inode_mark, NULL, mask, data, + data_is, cookie, file_name, &event); /* we didn't use the vfsmount_mark */ vfsmount_group = NULL; } else if (vfsmount_group > inode_group) { - send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data, - data_is, cookie, file_name, &event); + ret = send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data, + data_is, cookie, file_name, &event); inode_group = NULL; } else { - send_to_group(to_tell, mnt, inode_mark, vfsmount_mark, - mask, data, data_is, cookie, file_name, - &event); + ret = send_to_group(to_tell, mnt, inode_mark, vfsmount_mark, + mask, data, data_is, cookie, file_name, + &event); } + if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) + goto out; + if (inode_group) inode_node = srcu_dereference(inode_node->next, &fsnotify_mark_srcu); @@ -273,7 +276,8 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, vfsmount_node = srcu_dereference(vfsmount_node->next, &fsnotify_mark_srcu); } - + ret = 0; +out: srcu_read_unlock(&fsnotify_mark_srcu, idx); /* * fsnotify_create_event() took a reference so the event can't be cleaned -- cgit v1.2.1 From 52420392c81c8712f555e6bcd116d8bd214ce43a Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:56 -0400 Subject: fsnotify: call fsnotify_parent in perm events fsnotify perm events do not call fsnotify parent. That means you cannot register a perm event on a directory and enforce permissions on all inodes in that directory. This patch fixes that situation. Signed-off-by: Eric Paris --- fs/notify/fsnotify.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 57ecadd85abf..20dc218707ca 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -84,16 +84,17 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) } /* Notify this dentry's parent about a child's events. */ -void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask) +int __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask) { struct dentry *parent; struct inode *p_inode; + int ret = 0; if (!dentry) dentry = path->dentry; if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED)) - return; + return 0; parent = dget_parent(dentry); p_inode = parent->d_inode; @@ -106,14 +107,16 @@ void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask) mask |= FS_EVENT_ON_CHILD; if (path) - fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH, - dentry->d_name.name, 0); + ret = fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH, + dentry->d_name.name, 0); else - fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE, - dentry->d_name.name, 0); + ret = fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE, + dentry->d_name.name, 0); } dput(parent); + + return ret; } EXPORT_SYMBOL_GPL(__fsnotify_parent); -- cgit v1.2.1 From 5322a59f14e4cae5f878b9c0c5612d403c230d7f Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:57 -0400 Subject: fanotify: ignore fanotify ignore marks if open writers fanotify will clear ignore marks if a task changes the contents of an inode. The problem is with the races around when userspace finishes checking a file and when that result is actually attached to the inode. This race was described as such: Consider the following scenario with hostile processes A and B, and victim process C: 1. Process A opens new file for writing. File check request is generated. 2. File check is performed in userspace. Check result is "file has no malware". 3. The "permit" response is delivered to kernel space. 4. File ignored mark set. 5. Process A writes dummy bytes to the file. File ignored flags are cleared. 6. Process B opens the same file for reading. File check request is generated. 7. File check is performed in userspace. Check result is "file has no malware". 8. Process A writes malware bytes to the file. There is no cached response yet. 9. The "permit" response is delivered to kernel space and is cached in fanotify. 10. File ignored mark set. 11. Now any process C will be permitted to open the malware file. There is a race between steps 8 and 10 While fanotify makes no strong guarantees about systems with hostile processes there is no reason we cannot harden against this race. We do that by simply ignoring any ignore marks if the inode has open writers (aka i_writecount > 0). (We actually do not ignore ignore marks if the FAN_MARK_SURV_MODIFY flag is set) Reported-by: Vasily Novikov Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify_user.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 1c09e6321c5e..b265936e92d6 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -610,6 +610,16 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, pr_debug("%s: group=%p inode=%p\n", __func__, group, inode); + /* + * If some other task has this inode open for write we should not add + * an ignored mark, unless that ignored mark is supposed to survive + * modification changes anyway. + */ + if ((flags & FAN_MARK_IGNORED_MASK) && + !(flags & FAN_MARK_IGNORED_SURV_MODIFY) && + (atomic_read(&inode->i_writecount) > 0)) + return 0; + fsn_mark = fsnotify_find_inode_mark(group, inode); if (!fsn_mark) { int ret; -- cgit v1.2.1 From 2529a0df0f64dab1f60ae08e038b89c53a6b4c02 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:57 -0400 Subject: fsnotify: implement a default maximum queue depth Currently fanotify has no maximum queue depth. Since fanotify is CAP_SYS_ADMIN only this does not pose a normal user DoS issue, but it certianly is possible that an fanotify listener which can't keep up could OOM the box. This patch implements a default 16k depth. This is the same default depth used by inotify, but given fanotify's better queue merging in many situations this queue will contain many additional useful events by comparison. Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify_user.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index b265936e92d6..04f2fe47b66a 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -16,6 +16,8 @@ #include +#define FANOTIFY_DEFAULT_MAX_EVENTS 16384 + extern const struct fsnotify_ops fanotify_fsnotify_ops; static struct kmem_cache *fanotify_mark_cache __read_mostly; @@ -689,6 +691,8 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) goto out_put_group; } + group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS; + fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); if (fd < 0) goto out_put_group; -- cgit v1.2.1 From 5dd03f55fd2f21916ce248bb2e68bbfb39d94fe5 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:57 -0400 Subject: fanotify: allow userspace to override max queue depth fanotify has a defualt max queue depth. This patch allows processes which explicitly request it to have an 'unlimited' queue depth. These processes need to be very careful to make sure they cannot fall far enough behind that they OOM the box. Thus this flag is gated on CAP_SYS_ADMIN. Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify_user.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 04f2fe47b66a..43d66d9b2eff 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -691,7 +691,14 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) goto out_put_group; } - group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS; + if (flags & FAN_UNLIMITED_QUEUE) { + fd = -EPERM; + if (!capable(CAP_SYS_ADMIN)) + goto out_put_group; + group->max_events = UINT_MAX; + } else { + group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS; + } fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); if (fd < 0) -- cgit v1.2.1 From e7099d8a5a34d2876908a9fab4952dabdcfc5909 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:57 -0400 Subject: fanotify: limit the number of marks in a single fanotify group There is currently no limit on the number of marks a given fanotify group can have. Since fanotify is gated on CAP_SYS_ADMIN this was not seen as a serious DoS threat. This patch implements a default of 8192, the same as inotify to work towards removing the CAP_SYS_ADMIN gating and eliminating the default DoS'able status. Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify_user.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 43d66d9b2eff..1d33d7db277a 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -17,6 +17,7 @@ #include #define FANOTIFY_DEFAULT_MAX_EVENTS 16384 +#define FANOTIFY_DEFAULT_MAX_MARKS 8192 extern const struct fsnotify_ops fanotify_fsnotify_ops; @@ -584,6 +585,9 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, if (!fsn_mark) { int ret; + if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks) + return -ENOSPC; + fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL); if (!fsn_mark) return -ENOMEM; @@ -626,6 +630,9 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, if (!fsn_mark) { int ret; + if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks) + return -ENOSPC; + fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL); if (!fsn_mark) return -ENOMEM; @@ -700,6 +707,8 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS; } + group->fanotify_data.max_marks = FANOTIFY_DEFAULT_MAX_MARKS; + fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); if (fd < 0) goto out_put_group; -- cgit v1.2.1 From ac7e22dcfafd04c842a02057afd6541c1d613ef9 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:58 -0400 Subject: fanotify: allow userspace to override max marks Some fanotify groups, especially those like AV scanners, will need to place lots of marks, particularly ignore marks. Since ignore marks do not pin inodes in cache and are cleared if the inode is removed from core (usually under memory pressure) we expose an interface for listeners, with CAP_SYS_ADMIN, to override the maximum number of marks and be allowed to set and 'unlimited' number of marks. Programs which make use of this feature will be able to OOM a machine. Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify_user.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 1d33d7db277a..f9216102b426 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -707,7 +707,14 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS; } - group->fanotify_data.max_marks = FANOTIFY_DEFAULT_MAX_MARKS; + if (flags & FAN_UNLIMITED_MARKS) { + fd = -EPERM; + if (!capable(CAP_SYS_ADMIN)) + goto out_put_group; + group->fanotify_data.max_marks = UINT_MAX; + } else { + group->fanotify_data.max_marks = FANOTIFY_DEFAULT_MAX_MARKS; + } fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); if (fd < 0) -- cgit v1.2.1 From 4afeff8505cb8a38e36c1ef2bd3447c4b8f87367 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:58 -0400 Subject: fanotify: limit number of listeners per user fanotify currently has no limit on the number of listeners a given user can have open. This patch limits the total number of listeners per user to 128. This is the same as the inotify default limit. Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify.c | 11 ++++++++++- fs/notify/fanotify/fanotify_user.c | 11 +++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 85366c78cc37..60c11c306fd9 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -200,10 +200,19 @@ static bool fanotify_should_send_event(struct fsnotify_group *group, return false; } +static void fanotify_free_group_priv(struct fsnotify_group *group) +{ + struct user_struct *user; + + user = group->fanotify_data.user; + atomic_dec(&user->fanotify_listeners); + free_uid(user); +} + const struct fsnotify_ops fanotify_fsnotify_ops = { .handle_event = fanotify_handle_event, .should_send_event = fanotify_should_send_event, - .free_group_priv = NULL, + .free_group_priv = fanotify_free_group_priv, .free_event_priv = NULL, .freeing_mark = NULL, }; diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index f9216102b426..a7d9369482d5 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -18,6 +18,7 @@ #define FANOTIFY_DEFAULT_MAX_EVENTS 16384 #define FANOTIFY_DEFAULT_MAX_MARKS 8192 +#define FANOTIFY_DEFAULT_MAX_LISTENERS 128 extern const struct fsnotify_ops fanotify_fsnotify_ops; @@ -656,6 +657,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) { struct fsnotify_group *group; int f_flags, fd; + struct user_struct *user; pr_debug("%s: flags=%d event_f_flags=%d\n", __func__, flags, event_f_flags); @@ -666,6 +668,12 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) if (flags & ~FAN_ALL_INIT_FLAGS) return -EINVAL; + user = get_current_user(); + if (atomic_read(&user->fanotify_listeners) > FANOTIFY_DEFAULT_MAX_LISTENERS) { + free_uid(user); + return -EMFILE; + } + f_flags = O_RDWR | FMODE_NONOTIFY; if (flags & FAN_CLOEXEC) f_flags |= O_CLOEXEC; @@ -677,6 +685,9 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) if (IS_ERR(group)) return PTR_ERR(group); + group->fanotify_data.user = user; + atomic_inc(&user->fanotify_listeners); + group->fanotify_data.f_flags = event_f_flags; #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS mutex_init(&group->fanotify_data.access_mutex); -- cgit v1.2.1 From e1c048ba786789afdc66f32d8394bb5a0014bbba Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:58 -0400 Subject: fanotify: do not send events for irregular files fanotify_should_send_event has a test to see if an object is a file or directory and does not send an event otherwise. The problem is that the test is actually checking if the object with a mark is a file or directory, not if the object the event happened on is a file or directory. We should check the latter. Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 60c11c306fd9..8d98e1f5817b 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -160,20 +160,21 @@ static bool fanotify_should_send_event(struct fsnotify_group *group, __u32 event_mask, void *data, int data_type) { __u32 marks_mask, marks_ignored_mask; + struct path *path = data; pr_debug("%s: group=%p to_tell=%p inode_mark=%p vfsmnt_mark=%p " "mask=%x data=%p data_type=%d\n", __func__, group, to_tell, inode_mark, vfsmnt_mark, event_mask, data, data_type); - /* sorry, fanotify only gives a damn about files and dirs */ - if (!S_ISREG(to_tell->i_mode) && - !S_ISDIR(to_tell->i_mode)) - return false; - /* if we don't have enough info to send an event to userspace say no */ if (data_type != FSNOTIFY_EVENT_PATH) return false; + /* sorry, fanotify only gives a damn about files and dirs */ + if (!S_ISREG(path->dentry->d_inode->i_mode) && + !S_ISDIR(path->dentry->d_inode->i_mode)) + return false; + if (inode_mark && vfsmnt_mark) { marks_mask = (vfsmnt_mark->mask | inode_mark->mask); marks_ignored_mask = (vfsmnt_mark->ignored_mask | inode_mark->ignored_mask); -- cgit v1.2.1 From b29866aab8489487f11cc4506590ac31bdbae22a Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:58 -0400 Subject: fsnotify: rename FS_IN_ISDIR to FS_ISDIR The _IN_ in the naming is reserved for flags only used by inotify. Since I am about to use this flag for fanotify rename it to be generic like the rest. Signed-off-by: Eric Paris --- fs/notify/inotify/inotify_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 24edc1185d53..444c305a468c 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -862,7 +862,7 @@ static int __init inotify_user_setup(void) BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW); BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED); BUILD_BUG_ON(IN_EXCL_UNLINK != FS_EXCL_UNLINK); - BUILD_BUG_ON(IN_ISDIR != FS_IN_ISDIR); + BUILD_BUG_ON(IN_ISDIR != FS_ISDIR); BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT); BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21); -- cgit v1.2.1 From 8fcd65280abc4699510f1853ede31f43e8a3783a Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:59 -0400 Subject: fanotify: ignore events on directories unless specifically requested fanotify has a very limited number of events it sends on directories. The usefulness of these events is yet to be seen and still we send them. This is particularly painful for mount marks where one might receive many of these useless events. As such this patch will drop events on IS_DIR() inodes unless they were explictly requested with FAN_ON_DIR. This means that a mark on a directory without FAN_EVENT_ON_CHILD or FAN_ON_DIR is meaningless and will result in no events ever (although it will still be allowed since detecting it is hard) Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify.c | 5 +++++ fs/notify/fanotify/fanotify_user.c | 12 ++++++++++++ 2 files changed, 17 insertions(+) (limited to 'fs') diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 8d98e1f5817b..b04f88eed09e 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -131,6 +131,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW); BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM); BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM); + BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR); pr_debug("%s: group=%p event=%p\n", __func__, group, event); @@ -195,6 +196,10 @@ static bool fanotify_should_send_event(struct fsnotify_group *group, BUG(); } + if (S_ISDIR(path->dentry->d_inode->i_mode) && + (marks_ignored_mask & FS_ISDIR)) + return false; + if (event_mask & marks_mask & ~marks_ignored_mask) return true; diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index a7d9369482d5..ff1a908c9708 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -570,6 +570,12 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, if (flags & FAN_MARK_IGNORED_SURV_MODIFY) fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY; } + + if (!(flags & FAN_MARK_ONDIR)) { + __u32 tmask = fsn_mark->ignored_mask | FAN_ONDIR; + fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask); + } + spin_unlock(&fsn_mark->lock); return mask & ~oldmask; @@ -766,6 +772,12 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, default: return -EINVAL; } + + if (mask & FAN_ONDIR) { + flags |= FAN_MARK_ONDIR; + mask &= ~FAN_ONDIR; + } + #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS if (mask & ~(FAN_ALL_EVENTS | FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD)) #else -- cgit v1.2.1 From 192ca4d1941228e69c1fbeebab317725407e6e65 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:59 -0400 Subject: fanotify: do not recalculate the mask if the ignored mask changed If fanotify sets a new bit in the ignored mask it will cause the generic fsnotify layer to recalculate the real mask. This is stupid since we didn't change that part. Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify_user.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index ff1a908c9708..fa71d5dfd102 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -558,15 +558,15 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, __u32 mask, unsigned int flags) { - __u32 oldmask; + __u32 oldmask = -1; spin_lock(&fsn_mark->lock); if (!(flags & FAN_MARK_IGNORED_MASK)) { oldmask = fsn_mark->mask; fsnotify_set_mark_mask_locked(fsn_mark, (oldmask | mask)); } else { - oldmask = fsn_mark->ignored_mask; - fsnotify_set_mark_ignored_mask_locked(fsn_mark, (oldmask | mask)); + __u32 tmask = fsn_mark->ignored_mask | mask; + fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask); if (flags & FAN_MARK_IGNORED_SURV_MODIFY) fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY; } -- cgit v1.2.1 From 19ba54f4645f8c5edae4b08919a37a409b8793aa Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 28 Oct 2010 17:21:59 -0400 Subject: fs/notify/fanotify/fanotify_user.c: fix warnings fs/notify/fanotify/fanotify_user.c: In function 'fanotify_release': fs/notify/fanotify/fanotify_user.c:375: warning: unused variable 'lre' fs/notify/fanotify/fanotify_user.c:375: warning: unused variable 're' this is really ugly. Cc: Eric Paris Signed-off-by: Andrew Morton Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify_user.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index fa71d5dfd102..fce66dfbf7d5 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -376,11 +376,10 @@ static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t static int fanotify_release(struct inode *ignored, struct file *file) { struct fsnotify_group *group = file->private_data; - struct fanotify_response_event *re, *lre; - - pr_debug("%s: file=%p group=%p\n", __func__, file, group); #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS + struct fanotify_response_event *re, *lre; + mutex_lock(&group->fanotify_data.access_mutex); group->fanotify_data.bypass_perm = true; -- cgit v1.2.1 From 1a5cea7215f7c6bd3c960d7b44e864f3a73d1ad4 Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Fri, 29 Oct 2010 12:06:42 +0200 Subject: make fanotify_read() restartable across signals In fanotify_read() return -ERESTARTSYS instead of -EINTR to make read() restartable across signals (BSD semantic). Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index fce66dfbf7d5..063224812b7e 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -330,7 +330,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, ret = -EAGAIN; if (file->f_flags & O_NONBLOCK) break; - ret = -EINTR; + ret = -ERESTARTSYS; if (signal_pending(current)) break; -- cgit v1.2.1