From 41124db869b7e00e12052555f8987867ac01d70c Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Thu, 1 Jun 2017 11:08:01 -0700
Subject: fs: warn in case userspace lied about modprobe return

kmod <= v19 was broken -- it could return 0 to modprobe calls,
incorrectly assuming that a kernel module was built-in, whereas in
reality the module was just forming in the kernel. The reason for this
is an incorrect userspace heuristics. A userspace kmod fix is available
for it [0], however should userspace break again we could go on with
an failed get_fs_type() which is hard to debug as the request_module()
is detected as returning 0. The first suspect would be that there is
something worth with the kernel's module loader and obviously in this
case that is not the issue.

Since these issues are painful to debug complain when we know userspace
has outright lied to us.

[0] http://git.kernel.org/cgit/utils/kernel/kmod/kmod.git/commit/libkmod/libkmod-module.c?id=fd44a98ae2eb5eb32161088954ab21e58e19dfc4

Suggested-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jessica Yu <jeyu@redhat.com>
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/filesystems.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/filesystems.c b/fs/filesystems.c
index cac75547d35c..8b99955e3504 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -275,8 +275,10 @@ struct file_system_type *get_fs_type(const char *name)
 	int len = dot ? dot - name : strlen(name);
 
 	fs = __get_fs_type(name, len);
-	if (!fs && (request_module("fs-%.*s", len, name) == 0))
+	if (!fs && (request_module("fs-%.*s", len, name) == 0)) {
 		fs = __get_fs_type(name, len);
+		WARN_ONCE(!fs, "request_module fs-%.*s succeeded, but still no fs?\n", len, name);
+	}
 
 	if (dot && fs && !(fs->fs_flags & FS_HAS_SUBTYPE)) {
 		put_filesystem(fs);
-- 
cgit v1.2.1


From cc658db47d6897a8571fb6227f59d1d18151b0b2 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 21 Jun 2017 09:53:06 -0700
Subject: fs: Reorder inode_owner_or_capable() to avoid needless

Checking for capabilities should be the last operation when performing
access control tests so that PF_SUPERPRIV is set only when it was required
for success (implying that the capability was needed for the operation).

Reported-by: Solar Designer <solar@openwall.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Serge Hallyn <serge@hallyn.com>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/inode.c | 2 +-
 fs/namei.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/inode.c b/fs/inode.c
index db5914783a71..7092debe90cc 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2023,7 +2023,7 @@ bool inode_owner_or_capable(const struct inode *inode)
 		return true;
 
 	ns = current_user_ns();
-	if (ns_capable(ns, CAP_FOWNER) && kuid_has_mapping(ns, inode->i_uid))
+	if (kuid_has_mapping(ns, inode->i_uid) && ns_capable(ns, CAP_FOWNER))
 		return true;
 	return false;
 }
diff --git a/fs/namei.c b/fs/namei.c
index 6571a5f5112e..efe53a5d0737 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1008,7 +1008,7 @@ static int may_linkat(struct path *link)
 	/* Source inode owner (or CAP_FOWNER) can hardlink all they like,
 	 * otherwise, it must be a safe source.
 	 */
-	if (inode_owner_or_capable(inode) || safe_hardlink_source(inode))
+	if (safe_hardlink_source(inode) || inode_owner_or_capable(inode))
 		return 0;
 
 	audit_log_link_denied("linkat", link);
-- 
cgit v1.2.1


From 4f2ed694148131f93baffca9e68b0cd8dcc96c38 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <dvlasenk@redhat.com>
Date: Wed, 21 Jun 2017 19:46:47 +0200
Subject: minix: Deinline get_block, save 2691 bytes

This function compiles to 1402 bytes of machine code.

It has 2 callsites, and also a not-inlined copy gets created by compiler
anyway since its address gets passed as a parameter to block_truncate_page().

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Al Viro <viro@zeniv.linux.org.uk>
CC: linux-fsdevel@vger.kernel.org
CC: linux-kernel@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/minix/itree_common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/minix/itree_common.c b/fs/minix/itree_common.c
index 4c57c9af6946..2d1ca08870f7 100644
--- a/fs/minix/itree_common.c
+++ b/fs/minix/itree_common.c
@@ -142,7 +142,7 @@ changed:
 	return -EAGAIN;
 }
 
-static inline int get_block(struct inode * inode, sector_t block,
+static int get_block(struct inode * inode, sector_t block,
 			struct buffer_head *bh, int create)
 {
 	int err = -EIO;
-- 
cgit v1.2.1


From 6916363f3083837ed5adb3df2dd90d6b97017dff Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 27 Jun 2017 18:19:11 +0200
Subject: fs/dcache: init in_lookup_hashtable

in_lookup_hashtable was introduced in commit 94bdd655caba ("parallel
lookups machinery, part 3") and never initialized but since it is in
the data it is all zeros. But we need this for -RT.

Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index a9f995f6859e..b85da8897ffa 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -3608,6 +3608,11 @@ EXPORT_SYMBOL(d_genocide);
 
 void __init vfs_caches_init_early(void)
 {
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(in_lookup_hashtable); i++)
+		INIT_HLIST_BL_HEAD(&in_lookup_hashtable[i]);
+
 	dcache_init_early();
 	inode_init_early();
 }
-- 
cgit v1.2.1


From a8e2b6367794e6cee9eecba6d5ff425f338e0754 Mon Sep 17 00:00:00 2001
From: Carlos Maiolino <cmaiolino@redhat.com>
Date: Thu, 29 Jun 2017 11:25:40 +0200
Subject: Make statfs properly return read-only state after emergency remount

Emergency remount (sysrq-u) sets MS_RDONLY to the superblock but doesn't set
MNT_READONLY to the mount point.

Once calculate_f_flags() only check for the mount point read only state,
when setting kstatfs flags, after an emergency remount, statfs does not
report the filesystem as read-only, even though it is.

Enable flags_by_sb() to also check for superblock read only state, so the
kstatfs and consequently statfs can properly show the read-only state of
the filesystem.

Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/statfs.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/statfs.c b/fs/statfs.c
index 4e4623c7a126..c1dfc374e3c1 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -38,6 +38,8 @@ static int flags_by_sb(int s_flags)
 		flags |= ST_SYNCHRONOUS;
 	if (s_flags & MS_MANDLOCK)
 		flags |= ST_MANDLOCK;
+	if (s_flags & MS_RDONLY)
+		flags |= ST_RDONLY;
 	return flags;
 }
 
-- 
cgit v1.2.1


From 49d31c2f389acfe83417083e1208422b4091cd9e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 7 Jul 2017 14:51:19 -0400
Subject: dentry name snapshots

take_dentry_name_snapshot() takes a safe snapshot of dentry name;
if the name is a short one, it gets copied into caller-supplied
structure, otherwise an extra reference to external name is grabbed
(those are never modified).  In either case the pointer to stable
string is stored into the same structure.

dentry must be held by the caller of take_dentry_name_snapshot(),
but may be freely dropped afterwards - the snapshot will stay
until destroyed by release_dentry_name_snapshot().

Intended use:
	struct name_snapshot s;

	take_dentry_name_snapshot(&s, dentry);
	...
	access s.name
	...
	release_dentry_name_snapshot(&s);

Replaces fsnotify_oldname_...(), gets used in fsnotify to obtain the name
to pass down with event.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c          | 27 +++++++++++++++++++++++++++
 fs/debugfs/inode.c   | 10 +++++-----
 fs/namei.c           |  8 ++++----
 fs/notify/fsnotify.c |  8 ++++++--
 4 files changed, 42 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index b85da8897ffa..831f3a9a8f05 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -277,6 +277,33 @@ static inline int dname_external(const struct dentry *dentry)
 	return dentry->d_name.name != dentry->d_iname;
 }
 
+void take_dentry_name_snapshot(struct name_snapshot *name, struct dentry *dentry)
+{
+	spin_lock(&dentry->d_lock);
+	if (unlikely(dname_external(dentry))) {
+		struct external_name *p = external_name(dentry);
+		atomic_inc(&p->u.count);
+		spin_unlock(&dentry->d_lock);
+		name->name = p->name;
+	} else {
+		memcpy(name->inline_name, dentry->d_iname, DNAME_INLINE_LEN);
+		spin_unlock(&dentry->d_lock);
+		name->name = name->inline_name;
+	}
+}
+EXPORT_SYMBOL(take_dentry_name_snapshot);
+
+void release_dentry_name_snapshot(struct name_snapshot *name)
+{
+	if (unlikely(name->name != name->inline_name)) {
+		struct external_name *p;
+		p = container_of(name->name, struct external_name, name[0]);
+		if (unlikely(atomic_dec_and_test(&p->u.count)))
+			kfree_rcu(p, u.head);
+	}
+}
+EXPORT_SYMBOL(release_dentry_name_snapshot);
+
 static inline void __d_set_inode_and_type(struct dentry *dentry,
 					  struct inode *inode,
 					  unsigned type_flags)
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index e892ae7d89f8..acd3be2cc691 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -766,7 +766,7 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
 {
 	int error;
 	struct dentry *dentry = NULL, *trap;
-	const char *old_name;
+	struct name_snapshot old_name;
 
 	trap = lock_rename(new_dir, old_dir);
 	/* Source or destination directories don't exist? */
@@ -781,19 +781,19 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
 	if (IS_ERR(dentry) || dentry == trap || d_really_is_positive(dentry))
 		goto exit;
 
-	old_name = fsnotify_oldname_init(old_dentry->d_name.name);
+	take_dentry_name_snapshot(&old_name, old_dentry);
 
 	error = simple_rename(d_inode(old_dir), old_dentry, d_inode(new_dir),
 			      dentry, 0);
 	if (error) {
-		fsnotify_oldname_free(old_name);
+		release_dentry_name_snapshot(&old_name);
 		goto exit;
 	}
 	d_move(old_dentry, dentry);
-	fsnotify_move(d_inode(old_dir), d_inode(new_dir), old_name,
+	fsnotify_move(d_inode(old_dir), d_inode(new_dir), old_name.name,
 		d_is_dir(old_dentry),
 		NULL, old_dentry);
-	fsnotify_oldname_free(old_name);
+	release_dentry_name_snapshot(&old_name);
 	unlock_rename(new_dir, old_dir);
 	dput(dentry);
 	return old_dentry;
diff --git a/fs/namei.c b/fs/namei.c
index efe53a5d0737..c5588e837b15 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4362,11 +4362,11 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 {
 	int error;
 	bool is_dir = d_is_dir(old_dentry);
-	const unsigned char *old_name;
 	struct inode *source = old_dentry->d_inode;
 	struct inode *target = new_dentry->d_inode;
 	bool new_is_dir = false;
 	unsigned max_links = new_dir->i_sb->s_max_links;
+	struct name_snapshot old_name;
 
 	if (source == target)
 		return 0;
@@ -4413,7 +4413,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if (error)
 		return error;
 
-	old_name = fsnotify_oldname_init(old_dentry->d_name.name);
+	take_dentry_name_snapshot(&old_name, old_dentry);
 	dget(new_dentry);
 	if (!is_dir || (flags & RENAME_EXCHANGE))
 		lock_two_nondirectories(source, target);
@@ -4468,14 +4468,14 @@ out:
 		inode_unlock(target);
 	dput(new_dentry);
 	if (!error) {
-		fsnotify_move(old_dir, new_dir, old_name, is_dir,
+		fsnotify_move(old_dir, new_dir, old_name.name, is_dir,
 			      !(flags & RENAME_EXCHANGE) ? target : NULL, old_dentry);
 		if (flags & RENAME_EXCHANGE) {
 			fsnotify_move(new_dir, old_dir, old_dentry->d_name.name,
 				      new_is_dir, NULL, new_dentry);
 		}
 	}
-	fsnotify_oldname_free(old_name);
+	release_dentry_name_snapshot(&old_name);
 
 	return error;
 }
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 01a9f0f007d4..0c4583b61717 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -161,16 +161,20 @@ int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask
 	if (unlikely(!fsnotify_inode_watches_children(p_inode)))
 		__fsnotify_update_child_dentry_flags(p_inode);
 	else if (p_inode->i_fsnotify_mask & mask) {
+		struct name_snapshot name;
+
 		/* we are notifying a parent so come up with the new mask which
 		 * specifies these are events which came from a child. */
 		mask |= FS_EVENT_ON_CHILD;
 
+		take_dentry_name_snapshot(&name, dentry);
 		if (path)
 			ret = fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH,
-				       dentry->d_name.name, 0);
+				       name.name, 0);
 		else
 			ret = fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
-				       dentry->d_name.name, 0);
+				       name.name, 0);
+		release_dentry_name_snapshot(&name);
 	}
 
 	dput(parent);
-- 
cgit v1.2.1