From 9c12a831d73dd938a22418d70b39aed4feb4bdf2 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 16 Sep 2013 08:24:26 -0400
Subject: ext4: fix performance regression in writeback of random writes

The Linux Kernel Performance project guys have reported that commit
4e7ea81db5 introduces a performance regression for the following fio
workload:

[global]
direct=0
ioengine=mmap
size=1500M
bs=4k
pre_read=1
numjobs=1
overwrite=1
loops=5
runtime=300
group_reporting
invalidate=0
directory=/mnt/
file_service_type=random:36
file_service_type=random:36

[job0]
startdelay=0
rw=randrw
filename=data0/f1:data0/f2

[job1]
startdelay=0
rw=randrw
filename=data0/f2:data0/f1
...

[job7]
startdelay=0
rw=randrw
filename=data0/f2:data0/f1

The culprit of the problem is that after the commit ext4_writepages()
are more aggressive in writing back pages. Thus we have less consecutive
dirty pages resulting in more seeking.

This increased aggressivity is caused by a bug in the condition
terminating ext4_writepages(). We start writing from the beginning of
the file even if we should have terminated ext4_writepages() because
wbc->nr_to_write <= 0.

After fixing the condition the throughput of the fio workload is about 20%
better than before writeback reorganization.

Reported-by: "Yan, Zheng" <zheng.z.yan@intel.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9115f2807515..4cf2619f007c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2559,7 +2559,7 @@ retry:
 			break;
 	}
 	blk_finish_plug(&plug);
-	if (!ret && !cycled) {
+	if (!ret && !cycled && wbc->nr_to_write > 0) {
 		cycled = 1;
 		mpd.last_page = writeback_index - 1;
 		mpd.first_page = 0;
-- 
cgit v1.2.1


From 8660998608cfa1077e560034db81885af8e1e885 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <dave.kleikamp@oracle.com>
Date: Fri, 6 Sep 2013 21:49:56 -0500
Subject: jfs: fix error path in ialloc

If insert_inode_locked() fails, we shouldn't be calling
unlock_new_inode().

Signed-off-by: Dave Kleikamp <dave.kleikamp@oracle.com>
Tested-by: Michael L. Semon <mlsemon35@gmail.com>
Cc: stable@vger.kernel.org
---
 fs/jfs/jfs_inode.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index c1a3e603279c..7f464c513ba0 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -95,7 +95,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
 
 	if (insert_inode_locked(inode) < 0) {
 		rc = -EINVAL;
-		goto fail_unlock;
+		goto fail_put;
 	}
 
 	inode_init_owner(inode, parent, mode);
@@ -156,7 +156,6 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
 fail_drop:
 	dquot_drop(inode);
 	inode->i_flags |= S_NOQUOTA;
-fail_unlock:
 	clear_nlink(inode);
 	unlock_new_inode(inode);
 fail_put:
-- 
cgit v1.2.1


From c31f330719b7331b2112a5525fe5941a99ac223d Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@us.ibm.com>
Date: Sat, 28 Sep 2013 18:24:12 -0500
Subject: do not treat non-symlink reparse points as valid symlinks

Windows 8 and later can create NFS symlinks (within reparse points)
which we were assuming were normal NTFS symlinks and thus reporting
corrupt paths for.  Add check for reparse points to make sure that
they really are normal symlinks before we try to parse the pathname.

We also should not be parsing other types of reparse points (DFS
junctions etc) as if they were a  symlink so return EOPNOTSUPP
on those.  Also fix endian errors (we were not parsing symlink
lengths as little endian).

This fixes commit d244bf2dfbebfded05f494ffd53659fa7b1e32c1
which implemented follow link for non-Unix CIFS mounts

CC: Stable <stable@kernel.org>
Reviewed-by: Andrew Bartlett <abartlet@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifspdu.h  | 31 +++++++++++++++++++++++--------
 fs/cifs/cifssmb.c  | 40 ++++++++++++++++++++++++++++++++++------
 fs/cifs/smbfsctl.h | 14 ++++++++++++++
 3 files changed, 71 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index a630475e421c..08f9dfb1a894 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -1491,15 +1491,30 @@ struct file_notify_information {
 	__u8  FileName[0];
 } __attribute__((packed));
 
-struct reparse_data {
-	__u32	ReparseTag;
-	__u16	ReparseDataLength;
+/* For IO_REPARSE_TAG_SYMLINK */
+struct reparse_symlink_data {
+	__le32	ReparseTag;
+	__le16	ReparseDataLength;
 	__u16	Reserved;
-	__u16	SubstituteNameOffset;
-	__u16	SubstituteNameLength;
-	__u16	PrintNameOffset;
-	__u16	PrintNameLength;
-	__u32	Flags;
+	__le16	SubstituteNameOffset;
+	__le16	SubstituteNameLength;
+	__le16	PrintNameOffset;
+	__le16	PrintNameLength;
+	__le32	Flags;
+	char	PathBuffer[0];
+} __attribute__((packed));
+
+/* For IO_REPARSE_TAG_NFS */
+#define NFS_SPECFILE_LNK	0x00000000014B4E4C
+#define NFS_SPECFILE_CHR	0x0000000000524843
+#define NFS_SPECFILE_BLK	0x00000000004B4C42
+#define NFS_SPECFILE_FIFO	0x000000004F464946
+#define NFS_SPECFILE_SOCK	0x000000004B434F53
+struct reparse_posix_data {
+	__le32	ReparseTag;
+	__le16	ReparseDataLength;
+	__u16	Reserved;
+	__le64	InodeType; /* LNK, FIFO, CHR etc. */
 	char	PathBuffer[0];
 } __attribute__((packed));
 
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 4baf35949b51..ccd31ab815d4 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -3088,7 +3088,8 @@ CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon,
 	bool is_unicode;
 	unsigned int sub_len;
 	char *sub_start;
-	struct reparse_data *reparse_buf;
+	struct reparse_symlink_data *reparse_buf;
+	struct reparse_posix_data *posix_buf;
 	__u32 data_offset, data_count;
 	char *end_of_smb;
 
@@ -3137,20 +3138,47 @@ CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon,
 		goto qreparse_out;
 	}
 	end_of_smb = 2 + get_bcc(&pSMBr->hdr) + (char *)&pSMBr->ByteCount;
-	reparse_buf = (struct reparse_data *)
+	reparse_buf = (struct reparse_symlink_data *)
 				((char *)&pSMBr->hdr.Protocol + data_offset);
 	if ((char *)reparse_buf >= end_of_smb) {
 		rc = -EIO;
 		goto qreparse_out;
 	}
-	if ((reparse_buf->PathBuffer + reparse_buf->PrintNameOffset +
-				reparse_buf->PrintNameLength) > end_of_smb) {
+	if (reparse_buf->ReparseTag == cpu_to_le32(IO_REPARSE_TAG_NFS)) {
+		cifs_dbg(FYI, "NFS style reparse tag\n");
+		posix_buf =  (struct reparse_posix_data *)reparse_buf;
+
+		if (posix_buf->InodeType != cpu_to_le64(NFS_SPECFILE_LNK)) {
+			cifs_dbg(FYI, "unsupported file type 0x%llx\n",
+				 le64_to_cpu(posix_buf->InodeType));
+			rc = -EOPNOTSUPP;
+			goto qreparse_out;
+		}
+		is_unicode = true;
+		sub_len = le16_to_cpu(reparse_buf->ReparseDataLength);
+		if (posix_buf->PathBuffer + sub_len > end_of_smb) {
+			cifs_dbg(FYI, "reparse buf beyond SMB\n");
+			rc = -EIO;
+			goto qreparse_out;
+		}
+		*symlinkinfo = cifs_strndup_from_utf16(posix_buf->PathBuffer,
+				sub_len, is_unicode, nls_codepage);
+		goto qreparse_out;
+	} else if (reparse_buf->ReparseTag !=
+			cpu_to_le32(IO_REPARSE_TAG_SYMLINK)) {
+		rc = -EOPNOTSUPP;
+		goto qreparse_out;
+	}
+
+	/* Reparse tag is NTFS symlink */
+	sub_start = le16_to_cpu(reparse_buf->SubstituteNameOffset) +
+				reparse_buf->PathBuffer;
+	sub_len = le16_to_cpu(reparse_buf->SubstituteNameLength);
+	if (sub_start + sub_len > end_of_smb) {
 		cifs_dbg(FYI, "reparse buf beyond SMB\n");
 		rc = -EIO;
 		goto qreparse_out;
 	}
-	sub_start = reparse_buf->SubstituteNameOffset + reparse_buf->PathBuffer;
-	sub_len = reparse_buf->SubstituteNameLength;
 	if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE)
 		is_unicode = true;
 	else
diff --git a/fs/cifs/smbfsctl.h b/fs/cifs/smbfsctl.h
index d952ee48f4dc..a4b2391fe66e 100644
--- a/fs/cifs/smbfsctl.h
+++ b/fs/cifs/smbfsctl.h
@@ -97,9 +97,23 @@
 #define FSCTL_QUERY_NETWORK_INTERFACE_INFO 0x001401FC /* BB add struct */
 #define FSCTL_SRV_READ_HASH          0x001441BB /* BB add struct */
 
+/* See FSCC 2.1.2.5 */
 #define IO_REPARSE_TAG_MOUNT_POINT   0xA0000003
 #define IO_REPARSE_TAG_HSM           0xC0000004
 #define IO_REPARSE_TAG_SIS           0x80000007
+#define IO_REPARSE_TAG_HSM2          0x80000006
+#define IO_REPARSE_TAG_DRIVER_EXTENDER 0x80000005
+/* Used by the DFS filter. See MS-DFSC */
+#define IO_REPARSE_TAG_DFS           0x8000000A
+/* Used by the DFS filter See MS-DFSC */
+#define IO_REPARSE_TAG_DFSR          0x80000012
+#define IO_REPARSE_TAG_FILTER_MANAGER 0x8000000B
+/* See section MS-FSCC 2.1.2.4 */
+#define IO_REPARSE_TAG_SYMLINK       0xA000000C
+#define IO_REPARSE_TAG_DEDUP         0x80000013
+#define IO_REPARSE_APPXSTREAM	     0xC0000014
+/* NFS symlinks, Win 8/SMB3 and later */
+#define IO_REPARSE_TAG_NFS           0x80000014
 
 /* fsctl flags */
 /* If Flags is set to this value, the request is an FSCTL not ioctl request */
-- 
cgit v1.2.1


From eb4c7df6c20b407ecbf1a985edc33d967371c2e8 Mon Sep 17 00:00:00 2001
From: Shirish Pargaonkar <shirishpargaonkar@gmail.com>
Date: Thu, 3 Oct 2013 05:44:45 -0500
Subject: cifs: Avoid umount hangs with smb2 when server is unresponsive

Do not send SMB2 Logoff command when reconnecting, the way smb1
code base works.

Also, no need to wait for a credit for an echo command when one is already
in flight.

Without these changes, umount command hangs if the server is unresponsive
e.g. hibernating.

Signed-off-by: Shirish Pargaonkar <shirishpargaonkar@gmail.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <smfrench@us.ibm.com>
---
 fs/cifs/smb2pdu.c   | 6 ++++++
 fs/cifs/transport.c | 9 +++++++--
 2 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index eba0efde66d7..edccb5252462 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -687,6 +687,10 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses)
 	else
 		return -EIO;
 
+	/* no need to send SMB logoff if uid already closed due to reconnect */
+	if (ses->need_reconnect)
+		goto smb2_session_already_dead;
+
 	rc = small_smb2_init(SMB2_LOGOFF, NULL, (void **) &req);
 	if (rc)
 		return rc;
@@ -701,6 +705,8 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses)
 	 * No tcon so can't do
 	 * cifs_stats_inc(&tcon->stats.smb2_stats.smb2_com_fail[SMB2...]);
 	 */
+
+smb2_session_already_dead:
 	return rc;
 }
 
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 6fdcb1b4a106..800b938e4061 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -410,8 +410,13 @@ static int
 wait_for_free_request(struct TCP_Server_Info *server, const int timeout,
 		      const int optype)
 {
-	return wait_for_free_credits(server, timeout,
-				server->ops->get_credits_field(server, optype));
+	int *val;
+
+	val = server->ops->get_credits_field(server, optype);
+	/* Since an echo is already inflight, no need to wait to send another */
+	if (*val <= 0 && optype == CIFS_ECHO_OP)
+		return -EAGAIN;
+	return wait_for_free_credits(server, timeout, val);
 }
 
 static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf,
-- 
cgit v1.2.1


From 2f6c9479633780ba4a3484bba7eba5a721a5cf20 Mon Sep 17 00:00:00 2001
From: Jan Klos <honza.klos@gmail.com>
Date: Sun, 6 Oct 2013 21:08:20 +0200
Subject: cifs: Fix inability to write files >2GB to SMB2/3 shares

When connecting to SMB2/3 shares, maximum file size is set to non-LFS maximum in superblock. This is due to cap_large_files bit being different for SMB1 and SMB2/3 (where it is just an internal flag that is not negotiated and the SMB1 one corresponds to multichannel capability, so maybe LFS works correctly if server sends 0x08 flag) while capabilities are checked always for the SMB1 bit in cifs_read_super().

The patch fixes this by checking for the correct bit according to the protocol version.

CC: Stable <stable@kernel.org>
Signed-off-by: Jan Klos <honza.klos@gmail.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsfs.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index a16b4e58bcc6..77fc5e181077 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -120,14 +120,16 @@ cifs_read_super(struct super_block *sb)
 {
 	struct inode *inode;
 	struct cifs_sb_info *cifs_sb;
+	struct cifs_tcon *tcon;
 	int rc = 0;
 
 	cifs_sb = CIFS_SB(sb);
+	tcon = cifs_sb_master_tcon(cifs_sb);
 
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIXACL)
 		sb->s_flags |= MS_POSIXACL;
 
-	if (cifs_sb_master_tcon(cifs_sb)->ses->capabilities & CAP_LARGE_FILES)
+	if (tcon->ses->capabilities & tcon->ses->server->vals->cap_large_files)
 		sb->s_maxbytes = MAX_LFS_FILESIZE;
 	else
 		sb->s_maxbytes = MAX_NON_LFS;
@@ -147,7 +149,7 @@ cifs_read_super(struct super_block *sb)
 		goto out_no_root;
 	}
 
-	if (cifs_sb_master_tcon(cifs_sb)->nocase)
+	if (tcon->nocase)
 		sb->s_d_op = &cifs_ci_dentry_ops;
 	else
 		sb->s_d_op = &cifs_dentry_ops;
-- 
cgit v1.2.1


From dde2356c8466298bd77fa699e0ea296372eed47b Mon Sep 17 00:00:00 2001
From: Sachin Prabhu <sprabhu@redhat.com>
Date: Fri, 27 Sep 2013 18:35:42 +0100
Subject: cifs: Allow LANMAN auth method for servers supporting unencapsulated
 authentication methods

This allows users to use LANMAN authentication on servers which support
unencapsulated authentication.

The patch fixes a regression where users using plaintext authentication
were no longer able to do so because of changed bought in by patch
3f618223dc0bdcbc8d510350e78ee2195ff93768

https://bugzilla.redhat.com/show_bug.cgi?id=1011621

Reported-by: Panos Kavalagios <Panagiotis.Kavalagios@eurodyn.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Sachin Prabhu <sprabhu@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/sess.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 352358de1d7e..e87387dbf39f 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -500,9 +500,9 @@ select_sectype(struct TCP_Server_Info *server, enum securityEnum requested)
 				return NTLMv2;
 			if (global_secflags & CIFSSEC_MAY_NTLM)
 				return NTLM;
-			/* Fallthrough */
 		default:
-			return Unspecified;
+			/* Fallthrough to attempt LANMAN authentication next */
+			break;
 		}
 	case CIFS_NEGFLAVOR_LANMAN:
 		switch (requested) {
-- 
cgit v1.2.1


From 4871c1588f92c6c13f4713a7009f25f217055807 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fusionio.com>
Date: Wed, 9 Oct 2013 12:24:04 -0400
Subject: Btrfs: use right root when checking for hash collision

btrfs_rename was using the root of the old dir instead of the root of the new
dir when checking for a hash collision, so if you tried to move a file into a
subvol it would freak out because it would see the file you are trying to move
in its current root.  This fixes the bug where this would fail

btrfs subvol create test1
btrfs subvol create test2
mv test1 test2.

Thanks to Chris Murphy for catching this,

Cc: stable@vger.kernel.org
Reported-by: Chris Murphy <lists@colorremedies.com>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
---
 fs/btrfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8af6c03953ad..3b4ffaf0cd52 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7986,7 +7986,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 
 	/* check for collisions, even if the  name isn't there */
-	ret = btrfs_check_dir_item_collision(root, new_dir->i_ino,
+	ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino,
 			     new_dentry->d_name.name,
 			     new_dentry->d_name.len);
 
-- 
cgit v1.2.1


From 7bf811a595a895b7a886dcf218d0d34f97df76dc Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fusionio.com>
Date: Mon, 7 Oct 2013 22:11:09 -0400
Subject: Btrfs: limit delalloc pages outside of find_delalloc_range

Liu fixed part of this problem and unfortunately I steered him in slightly the
wrong direction and so didn't completely fix the problem.  The problem is we
limit the size of the delalloc range we are looking for to max bytes and then we
try to lock that range.  If we fail to lock the pages in that range we will
shrink the max bytes to a single page and re loop.  However if our first page is
inside of the delalloc range then we will end up limiting the end of the range
to a period before our first page.  This is illustrated below

[0 -------- delalloc range --------- 256mb]
                                  [page]

So find_delalloc_range will return with delalloc_start as 0 and end as 128mb,
and then we will notice that delalloc_start < *start and adjust it up, but not
adjust delalloc_end up, so things go sideways.  To fix this we need to not limit
the max bytes in find_delalloc_range, but in find_lock_delalloc_range and that
way we don't end up with this confusion.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
---
 fs/btrfs/extent_io.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 43feb4663f5b..d8ea0cb200b4 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1482,10 +1482,8 @@ static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
 		cur_start = state->end + 1;
 		node = rb_next(node);
 		total_bytes += state->end - state->start + 1;
-		if (total_bytes >= max_bytes) {
-			*end = *start + max_bytes - 1;
+		if (total_bytes >= max_bytes)
 			break;
-		}
 		if (!node)
 			break;
 	}
@@ -1627,10 +1625,9 @@ again:
 
 	/*
 	 * make sure to limit the number of pages we try to lock down
-	 * if we're looping.
 	 */
-	if (delalloc_end + 1 - delalloc_start > max_bytes && loops)
-		delalloc_end = delalloc_start + PAGE_CACHE_SIZE - 1;
+	if (delalloc_end + 1 - delalloc_start > max_bytes)
+		delalloc_end = delalloc_start + max_bytes - 1;
 
 	/* step two, lock all the pages after the page that has start */
 	ret = lock_delalloc_pages(inode, locked_page,
@@ -1641,8 +1638,7 @@ again:
 		 */
 		free_extent_state(cached_state);
 		if (!loops) {
-			unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1);
-			max_bytes = PAGE_CACHE_SIZE - offset;
+			max_bytes = PAGE_CACHE_SIZE;
 			loops = 1;
 			goto again;
 		} else {
-- 
cgit v1.2.1


From 14927d95464956ffe8af4278331a6bfea94ab780 Mon Sep 17 00:00:00 2001
From: Miao Xie <miaox@cn.fujitsu.com>
Date: Wed, 25 Sep 2013 21:47:43 +0800
Subject: Btrfs: insert orphan roots into fs radix tree

Now we don't drop all the deleted snapshots/subvolumes before the space
balance. It means we have to relocate the space which is held by the dead
snapshots/subvolumes. So we must into them into fs radix tree, or we would
forget to commit the change of them when doing transaction commit, and it
would corrupt the metadata.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
---
 fs/btrfs/root-tree.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 0b1f4ef8db98..ec71ea44d2b4 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -299,11 +299,6 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
 			continue;
 		}
 
-		if (btrfs_root_refs(&root->root_item) == 0) {
-			btrfs_add_dead_root(root);
-			continue;
-		}
-
 		err = btrfs_init_fs_root(root);
 		if (err) {
 			btrfs_free_fs_root(root);
@@ -318,6 +313,9 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
 			btrfs_free_fs_root(root);
 			break;
 		}
+
+		if (btrfs_root_refs(&root->root_item) == 0)
+			btrfs_add_dead_root(root);
 	}
 
 	btrfs_free_path(path);
-- 
cgit v1.2.1


From c00869f1ae6a8fa49802d5e60d843b7051a112ec Mon Sep 17 00:00:00 2001
From: Miao Xie <miaox@cn.fujitsu.com>
Date: Wed, 25 Sep 2013 21:47:44 +0800
Subject: Btrfs: fix oops caused by the space balance and dead roots

When doing space balance and subvolume destroy at the same time, we met
the following oops:

kernel BUG at fs/btrfs/relocation.c:2247!
RIP: 0010: [<ffffffffa04cec16>] prepare_to_merge+0x154/0x1f0 [btrfs]
Call Trace:
 [<ffffffffa04b5ab7>] relocate_block_group+0x466/0x4e6 [btrfs]
 [<ffffffffa04b5c7a>] btrfs_relocate_block_group+0x143/0x275 [btrfs]
 [<ffffffffa0495c56>] btrfs_relocate_chunk.isra.27+0x5c/0x5a2 [btrfs]
 [<ffffffffa0459871>] ? btrfs_item_key_to_cpu+0x15/0x31 [btrfs]
 [<ffffffffa048b46a>] ? btrfs_get_token_64+0x7e/0xcd [btrfs]
 [<ffffffffa04a3467>] ? btrfs_tree_read_unlock_blocking+0xb2/0xb7 [btrfs]
 [<ffffffffa049907d>] btrfs_balance+0x9c7/0xb6f [btrfs]
 [<ffffffffa049ef84>] btrfs_ioctl_balance+0x234/0x2ac [btrfs]
 [<ffffffffa04a1e8e>] btrfs_ioctl+0xd87/0x1ef9 [btrfs]
 [<ffffffff81122f53>] ? path_openat+0x234/0x4db
 [<ffffffff813c3b78>] ? __do_page_fault+0x31d/0x391
 [<ffffffff810f8ab6>] ? vma_link+0x74/0x94
 [<ffffffff811250f5>] vfs_ioctl+0x1d/0x39
 [<ffffffff811258c8>] do_vfs_ioctl+0x32d/0x3e2
 [<ffffffff811259d4>] SyS_ioctl+0x57/0x83
 [<ffffffff813c3bfa>] ? do_page_fault+0xe/0x10
 [<ffffffff813c73c2>] system_call_fastpath+0x16/0x1b

It is because we returned the error number if the reference of the root was 0
when doing space relocation. It was not right here, because though the root
was dead(refs == 0), but the space it held still need be relocated, or we
could not remove the block group. So in this case, we should return the root
no matter it is dead or not.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
---
 fs/btrfs/disk-io.c    |  9 +++++----
 fs/btrfs/disk-io.h    | 13 +++++++++++--
 fs/btrfs/relocation.c |  2 +-
 3 files changed, 17 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 4ae17ed13b32..62176ad89846 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1561,8 +1561,9 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
 	return ret;
 }
 
-struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
-					      struct btrfs_key *location)
+struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
+				     struct btrfs_key *location,
+				     bool check_ref)
 {
 	struct btrfs_root *root;
 	int ret;
@@ -1586,7 +1587,7 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
 again:
 	root = btrfs_lookup_fs_root(fs_info, location->objectid);
 	if (root) {
-		if (btrfs_root_refs(&root->root_item) == 0)
+		if (check_ref && btrfs_root_refs(&root->root_item) == 0)
 			return ERR_PTR(-ENOENT);
 		return root;
 	}
@@ -1595,7 +1596,7 @@ again:
 	if (IS_ERR(root))
 		return root;
 
-	if (btrfs_root_refs(&root->root_item) == 0) {
+	if (check_ref && btrfs_root_refs(&root->root_item) == 0) {
 		ret = -ENOENT;
 		goto fail;
 	}
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index b71acd6e1e5b..5ce2a7da8b11 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -68,8 +68,17 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
 int btrfs_init_fs_root(struct btrfs_root *root);
 int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
 			 struct btrfs_root *root);
-struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
-					      struct btrfs_key *location);
+
+struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
+				     struct btrfs_key *key,
+				     bool check_ref);
+static inline struct btrfs_root *
+btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
+			   struct btrfs_key *location)
+{
+	return btrfs_get_fs_root(fs_info, location, true);
+}
+
 int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
 void btrfs_btree_balance_dirty(struct btrfs_root *root);
 void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index a5a26320503f..4a355726151e 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -588,7 +588,7 @@ static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info,
 	else
 		key.offset = (u64)-1;
 
-	return btrfs_read_fs_root_no_name(fs_info, &key);
+	return btrfs_get_fs_root(fs_info, &key, false);
 }
 
 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-- 
cgit v1.2.1


From 6e4ea8e33b2057b85d75175dd89b93f5e26de3bc Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Thu, 10 Oct 2013 20:05:35 -0400
Subject: ext4: fix memory leak in xattr

If we take the 2nd retry path in ext4_expand_extra_isize_ea, we
potentionally return from the function without having freed these
allocations.  If we don't do the return, we over-write the previous
allocation pointers, so we leak either way.

Spotted with Coverity.

[ Fixed by tytso to set is and bs to NULL after freeing these
  pointers, in case in the retry loop we later end up triggering an
  error causing a jump to cleanup, at which point we could have a double
  free bug. -- Ted ]

Signed-off-by: Dave Jones <davej@fedoraproject.org>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
Cc: stable@vger.kernel.org
---
 fs/ext4/xattr.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index c081e34f717f..03e9bebba198 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1350,6 +1350,8 @@ retry:
 				    s_min_extra_isize) {
 					tried_min_extra_isize++;
 					new_extra_isize = s_min_extra_isize;
+					kfree(is); is = NULL;
+					kfree(bs); bs = NULL;
 					goto retry;
 				}
 				error = -1;
-- 
cgit v1.2.1


From 9d05746e7b16d8565dddbe3200faa1e669d23bbf Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 30 Sep 2013 08:35:10 -0700
Subject: vfs: allow O_PATH file descriptors for fstatfs()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Olga reported that file descriptors opened with O_PATH do not work with
fstatfs(), found during further development of ksh93's thread support.

There is no reason to not allow O_PATH file descriptors here (fstatfs is
very much a path operation), so use "fdget_raw()".  See commit
55815f70147d ("vfs: make O_PATH file descriptors usable for 'fstat()'")
for a very similar issue reported for fstat() by the same team.

Reported-and-tested-by: ольга крыжановская <olga.kryzhanovska@gmail.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: stable@kernel.org	# O_PATH introduced in 3.0+
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/statfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/statfs.c b/fs/statfs.c
index c219e733f553..083dc0ac9140 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -94,7 +94,7 @@ retry:
 
 int fd_statfs(int fd, struct kstatfs *st)
 {
-	struct fd f = fdget(fd);
+	struct fd f = fdget_raw(fd);
 	int error = -EBADF;
 	if (f.file) {
 		error = vfs_statfs(&f.file->f_path, st);
-- 
cgit v1.2.1


From 0c26606cbe4937f2228a27bb0c2cad19855be87a Mon Sep 17 00:00:00 2001
From: Tim Gardner <tim.gardner@canonical.com>
Date: Sun, 13 Oct 2013 13:29:03 -0600
Subject: cifs: ntstatus_to_dos_map[] is not terminated

Functions that walk the ntstatus_to_dos_map[] array could
run off the end. For example, ntstatus_to_dos() loops
while ntstatus_to_dos_map[].ntstatus is not 0. Granted,
this is mostly theoretical, but could be used as a DOS attack
if the error code in the SMB header is bogus.

[Might consider adding to stable, as this patch is low risk - Steve]

Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/netmisc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index af847e1cf1c1..651a5279607b 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -780,7 +780,9 @@ static const struct {
 	ERRDOS, ERRnoaccess, 0xc0000290}, {
 	ERRDOS, ERRbadfunc, 0xc000029c}, {
 	ERRDOS, ERRsymlink, NT_STATUS_STOPPED_ON_SYMLINK}, {
-	ERRDOS, ERRinvlevel, 0x007c0001}, };
+	ERRDOS, ERRinvlevel, 0x007c0001}, {
+	0, 0, 0 }
+};
 
 /*****************************************************************************
  Print an error message from the status code
-- 
cgit v1.2.1


From 43ae9e3fc70ca0057ae0a24ef5eedff05e3fae06 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Thu, 10 Oct 2013 16:48:19 +0200
Subject: ext[34]: fix double put in tmpfile

d_tmpfile() already swallowed the inode ref.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Cc: stable@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext3/namei.c | 5 ++---
 fs/ext4/namei.c | 5 ++---
 2 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 1194b1f0f839..f8cde46de9cd 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1783,7 +1783,7 @@ retry:
 		d_tmpfile(dentry, inode);
 		err = ext3_orphan_add(handle, inode);
 		if (err)
-			goto err_drop_inode;
+			goto err_unlock_inode;
 		mark_inode_dirty(inode);
 		unlock_new_inode(inode);
 	}
@@ -1791,10 +1791,9 @@ retry:
 	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
 		goto retry;
 	return err;
-err_drop_inode:
+err_unlock_inode:
 	ext3_journal_stop(handle);
 	unlock_new_inode(inode);
-	iput(inode);
 	return err;
 }
 
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 1bec5a5c1e45..5a0408d7b114 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2319,7 +2319,7 @@ retry:
 		d_tmpfile(dentry, inode);
 		err = ext4_orphan_add(handle, inode);
 		if (err)
-			goto err_drop_inode;
+			goto err_unlock_inode;
 		mark_inode_dirty(inode);
 		unlock_new_inode(inode);
 	}
@@ -2328,10 +2328,9 @@ retry:
 	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
 		goto retry;
 	return err;
-err_drop_inode:
+err_unlock_inode:
 	ext4_journal_stop(handle);
 	unlock_new_inode(inode);
-	iput(inode);
 	return err;
 }
 
-- 
cgit v1.2.1


From 3edc8376c06133e3386265a824869cad03a4efd4 Mon Sep 17 00:00:00 2001
From: "Geyslan G. Bem" <geyslan@gmail.com>
Date: Fri, 11 Oct 2013 16:49:16 -0300
Subject: ecryptfs: Fix memory leakage in keystore.c

In 'decrypt_pki_encrypted_session_key' function:

Initializes 'payload' pointer and releases it on exit.

Signed-off-by: Geyslan G. Bem <geyslan@gmail.com>
Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Cc: stable@vger.kernel.org # v2.6.28+
---
 fs/ecryptfs/keystore.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 7d52806c2119..4725a07f003c 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -1149,7 +1149,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
 	struct ecryptfs_msg_ctx *msg_ctx;
 	struct ecryptfs_message *msg = NULL;
 	char *auth_tok_sig;
-	char *payload;
+	char *payload = NULL;
 	size_t payload_len = 0;
 	int rc;
 
@@ -1203,6 +1203,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
 	}
 out:
 	kfree(msg);
+	kfree(payload);
 	return rc;
 }
 
-- 
cgit v1.2.1


From e9cdd6e771580e6ff872e5c64e8b766972c7d1bc Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Wed, 16 Oct 2013 13:46:53 -0700
Subject: mm: /proc/pid/pagemap: inspect _PAGE_SOFT_DIRTY only on present pages

If a page we are inspecting is in swap we may occasionally report it as
having soft dirty bit (even if it is clean).  The pte_soft_dirty helper
should be called on present pte only.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Matt Mackall <mpm@selenic.com>
Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 7366e9d63cee..390bdab01c3c 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -941,6 +941,8 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
 		frame = pte_pfn(pte);
 		flags = PM_PRESENT;
 		page = vm_normal_page(vma, addr, pte);
+		if (pte_soft_dirty(pte))
+			flags2 |= __PM_SOFT_DIRTY;
 	} else if (is_swap_pte(pte)) {
 		swp_entry_t entry;
 		if (pte_swp_soft_dirty(pte))
@@ -960,7 +962,7 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
 
 	if (page && !PageAnon(page))
 		flags |= PM_FILE;
-	if ((vma->vm_flags & VM_SOFTDIRTY) || pte_soft_dirty(pte))
+	if ((vma->vm_flags & VM_SOFTDIRTY))
 		flags2 |= __PM_SOFT_DIRTY;
 
 	*pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags);
-- 
cgit v1.2.1


From 84235de394d9775bfaa7fa9762a59d91fef0c1fc Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Wed, 16 Oct 2013 13:47:00 -0700
Subject: fs: buffer: move allocation failure loop into the allocator

Buffer allocation has a very crude indefinite loop around waking the
flusher threads and performing global NOFS direct reclaim because it can
not handle allocation failures.

The most immediate problem with this is that the allocation may fail due
to a memory cgroup limit, where flushers + direct reclaim might not make
any progress towards resolving the situation at all.  Because unlike the
global case, a memory cgroup may not have any cache at all, only
anonymous pages but no swap.  This situation will lead to a reclaim
livelock with insane IO from waking the flushers and thrashing unrelated
filesystem cache in a tight loop.

Use __GFP_NOFAIL allocations for buffers for now.  This makes sure that
any looping happens in the page allocator, which knows how to
orchestrate kswapd, direct reclaim, and the flushers sensibly.  It also
allows memory cgroups to detect allocations that can't handle failure
and will allow them to ultimately bypass the limit if reclaim can not
make progress.

Reported-by: azurIt <azurit@pobox.sk>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/buffer.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 4d7433534f5c..6024877335ca 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1005,9 +1005,19 @@ grow_dev_page(struct block_device *bdev, sector_t block,
 	struct buffer_head *bh;
 	sector_t end_block;
 	int ret = 0;		/* Will call free_more_memory() */
+	gfp_t gfp_mask;
 
-	page = find_or_create_page(inode->i_mapping, index,
-		(mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE);
+	gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS;
+	gfp_mask |= __GFP_MOVABLE;
+	/*
+	 * XXX: __getblk_slow() can not really deal with failure and
+	 * will endlessly loop on improvised global reclaim.  Prefer
+	 * looping in the allocator rather than here, at least that
+	 * code knows what it's doing.
+	 */
+	gfp_mask |= __GFP_NOFAIL;
+
+	page = find_or_create_page(inode->i_mapping, index, gfp_mask);
 	if (!page)
 		return ret;
 
-- 
cgit v1.2.1


From 2cbe3b0af82279f14cfb3195f2406651f28ee9b8 Mon Sep 17 00:00:00 2001
From: HATAYAMA Daisuke <d.hatayama@jp.fujitsu.com>
Date: Wed, 16 Oct 2013 13:47:04 -0700
Subject: procfs: fix unintended truncation of returned mapped address

Currently, proc_reg_get_unmapped_area truncates upper 32-bit of the
mapped virtual address returned from get_unmapped_area method in
pde->proc_fops due to the variable rv of signed integer on x86_64.  This
is too small to have vitual address of unsigned long on x86_64 since on
x86_64, signed integer is of 4 bytes while unsigned long is of 8 bytes.
To fix this issue, use unsigned long instead.

Fixes a regression added in commit c4fe24485729 ("sparc: fix PCI device
proc file mmap(2)").

Signed-off-by: HATAYAMA Daisuke <d.hatayama@jp.fujitsu.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: David S. Miller <davem@davemloft.net>
Tested-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 9f8ef9b7674d..6c501c4d996d 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -288,7 +288,7 @@ static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
 static unsigned long proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr, unsigned long len, unsigned long pgoff, unsigned long flags)
 {
 	struct proc_dir_entry *pde = PDE(file_inode(file));
-	int rv = -EIO;
+	unsigned long rv = -EIO;
 	unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
 	if (use_pde(pde)) {
 		get_unmapped_area = pde->proc_fops->get_unmapped_area;
-- 
cgit v1.2.1


From fad1a86e25e0a1f85635ed06ef62ddadd5b8fa4c Mon Sep 17 00:00:00 2001
From: HATAYAMA Daisuke <d.hatayama@jp.fujitsu.com>
Date: Wed, 16 Oct 2013 13:47:05 -0700
Subject: procfs: call default get_unmapped_area on MMU-present architectures

Commit c4fe24485729 ("sparc: fix PCI device proc file mmap(2)") added
proc_reg_get_unmapped_area in proc_reg_file_ops and
proc_reg_file_ops_no_compat, by which now mmap always returns EIO if
get_unmapped_area method is not defined for the target procfs file,
which causes regression of mmap on /proc/vmcore.

To address this issue, like get_unmapped_area(), call default
current->mm->get_unmapped_area on MMU-present architectures if
pde->proc_fops->get_unmapped_area, i.e.  the one in actual file
operation in the procfs file, is not defined.

Reported-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: HATAYAMA Daisuke <d.hatayama@jp.fujitsu.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: David S. Miller <davem@davemloft.net>
Tested-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/inode.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 6c501c4d996d..8eaa1ba793fc 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -289,9 +289,13 @@ static unsigned long proc_reg_get_unmapped_area(struct file *file, unsigned long
 {
 	struct proc_dir_entry *pde = PDE(file_inode(file));
 	unsigned long rv = -EIO;
-	unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
+	unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long) = NULL;
 	if (use_pde(pde)) {
-		get_unmapped_area = pde->proc_fops->get_unmapped_area;
+#ifdef CONFIG_MMU
+		get_unmapped_area = current->mm->get_unmapped_area;
+#endif
+		if (pde->proc_fops->get_unmapped_area)
+			get_unmapped_area = pde->proc_fops->get_unmapped_area;
 		if (get_unmapped_area)
 			rv = get_unmapped_area(file, orig_addr, len, pgoff, flags);
 		unuse_pde(pde);
-- 
cgit v1.2.1


From 1bda19eb73d68b304148e67253e47cef049a419d Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fusionio.com>
Date: Fri, 18 Oct 2013 12:10:36 -0400
Subject: Btrfs: release path before starting transaction in can_nocow_extent

We can't be holding tree locks while we try to start a transaction, we will
deadlock.  Thanks,

Reported-by: Sage Weil <sage@inktank.com>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
---
 fs/btrfs/inode.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3b4ffaf0cd52..f4a6851e6c88 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6437,6 +6437,7 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
 
 	if (btrfs_extent_readonly(root, disk_bytenr))
 		goto out;
+	btrfs_release_path(path);
 
 	/*
 	 * look for other files referencing this extent, if we
-- 
cgit v1.2.1


From c7314d74fcb089b127ef5753b5263ac8473f33bc Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 20 Oct 2013 08:44:39 -0400
Subject: nfsd regression since delayed fput()

Background: nfsd v[23] had throughput regression since delayed fput
went in; every read or write ends up doing fput() and we get a pair
of extra context switches out of that (plus quite a bit of work
in queue_work itselfi, apparently).  Use of schedule_delayed_work()
gives it a chance to accumulate a bit before we do __fput() on all
of them.  I'm not too happy about that solution, but... on at least
one real-world setup it reverts about 10% throughput loss we got from
switch to delayed fput.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/file_table.c b/fs/file_table.c
index abdd15ad13c9..e900ca518635 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -297,7 +297,7 @@ void flush_delayed_fput(void)
 	delayed_fput(NULL);
 }
 
-static DECLARE_WORK(delayed_fput_work, delayed_fput);
+static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
 
 void fput(struct file *file)
 {
@@ -317,7 +317,7 @@ void fput(struct file *file)
 		}
 
 		if (llist_add(&file->f_u.fu_llist, &delayed_fput_list))
-			schedule_work(&delayed_fput_work);
+			schedule_delayed_work(&delayed_fput_work, 1);
 	}
 }
 
-- 
cgit v1.2.1


From 606d6fe3ffdb5190d4c8e4d6cd23aa6c1f9cb6ad Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 19 Oct 2013 14:56:55 -0700
Subject: fs/namei.c: fix new kernel-doc warning

Add @path parameter to fix kernel-doc warning.
Also fix a spello/typo.

  Warning(fs/namei.c:2304): No description found for parameter 'path'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/namei.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 645268f23eb6..caa28051e197 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2294,10 +2294,11 @@ out:
  * path_mountpoint - look up a path to be umounted
  * @dfd:	directory file descriptor to start walk from
  * @name:	full pathname to walk
+ * @path:	pointer to container for result
  * @flags:	lookup flags
  *
  * Look up the given name, but don't attempt to revalidate the last component.
- * Returns 0 and "path" will be valid on success; Retuns error otherwise.
+ * Returns 0 and "path" will be valid on success; Returns error otherwise.
  */
 static int
 path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags)
-- 
cgit v1.2.1


From 69c88dc7d9f1a6c3eceb7058111677c640811c94 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 19 Oct 2013 14:57:07 -0700
Subject: vfs: fix new kernel-doc warnings

Move kernel-doc notation to immediately before its function to eliminate
kernel-doc warnings introduced by commit db14fc3abcd5 ("vfs: add
d_walk()")

  Warning(fs/dcache.c:1343): No description found for parameter 'data'
  Warning(fs/dcache.c:1343): No description found for parameter 'dentry'
  Warning(fs/dcache.c:1343): Excess function parameter 'parent' description in 'check_mount'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Miklos Szeredi <mszeredi@suse.cz>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dcache.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index 41000305d716..20532cb0b06e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1331,14 +1331,6 @@ rename_retry:
  * list is non-empty and continue searching.
  */
 
-/**
- * have_submounts - check for mounts over a dentry
- * @parent: dentry to check.
- *
- * Return true if the parent or its subdirectories contain
- * a mount point
- */
-
 static enum d_walk_ret check_mount(void *data, struct dentry *dentry)
 {
 	int *ret = data;
@@ -1349,6 +1341,13 @@ static enum d_walk_ret check_mount(void *data, struct dentry *dentry)
 	return D_WALK_CONTINUE;
 }
 
+/**
+ * have_submounts - check for mounts over a dentry
+ * @parent: dentry to check.
+ *
+ * Return true if the parent or its subdirectories contain
+ * a mount point
+ */
 int have_submounts(struct dentry *parent)
 {
 	int ret = 0;
-- 
cgit v1.2.1


From 43b7c6c6a4e3916edd186ceb61be0c67d1e0969e Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 24 Oct 2013 14:08:07 +0000
Subject: eCryptfs: fix 32 bit corruption issue

Shifting page->index on 32 bit systems was overflowing, causing
data corruption of > 4GB files. Fix this by casting it first.

https://launchpad.net/bugs/1243636

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reported-by: Lars Duesing <lars.duesing@camelotsweb.de>
Cc: stable@vger.kernel.org # v3.11+
Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
---
 fs/ecryptfs/crypto.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index c88e355f7635..000eae2782b6 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -408,7 +408,7 @@ static loff_t lower_offset_for_page(struct ecryptfs_crypt_stat *crypt_stat,
 				    struct page *page)
 {
 	return ecryptfs_lower_header_size(crypt_stat) +
-	       (page->index << PAGE_CACHE_SHIFT);
+	       ((loff_t)page->index << PAGE_CACHE_SHIFT);
 }
 
 /**
-- 
cgit v1.2.1


From 05e16745c0c471bba313961b605b6da3b21a853d Mon Sep 17 00:00:00 2001
From: Gu Zheng <guz.fnst@cn.fujitsu.com>
Date: Fri, 25 Oct 2013 18:15:06 +0800
Subject: seq_file: always update file->f_pos in seq_lseek()

This issue was first pointed out by Jiaxing Wang several months ago, but no
further comments:
https://lkml.org/lkml/2013/6/29/41

As we know pread() does not change f_pos, so after pread(), file->f_pos
and m->read_pos become different. And seq_lseek() does not update file->f_pos
if offset equals to m->read_pos, so after pread() and seq_lseek()(lseek to
m->read_pos), then a subsequent read may read from a wrong position, the
following program produces the problem:

    char str1[32] = { 0 };
    char str2[32] = { 0 };
    int poffset = 10;
    int count = 20;

    /*open any seq file*/
    int fd = open("/proc/modules", O_RDONLY);

    pread(fd, str1, count, poffset);
    printf("pread:%s\n", str1);

    /*seek to where m->read_pos is*/
    lseek(fd, poffset+count, SEEK_SET);

    /*supposed to read from poffset+count, but this read from position 0*/
    read(fd, str2, count);
    printf("read:%s\n", str2);

out put:
pread:
 ck_netbios_ns 12665
read:
 nf_conntrack_netbios

/proc/modules:
nf_conntrack_netbios_ns 12665 0 - Live 0xffffffffa038b000
nf_conntrack_broadcast 12589 1 nf_conntrack_netbios_ns, Live 0xffffffffa0386000

So we always update file->f_pos to offset in seq_lseek() to fix this issue.

Signed-off-by: Jiaxing Wang <hello.wjx@gmail.com>
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/seq_file.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/seq_file.c b/fs/seq_file.c
index 3135c2525c76..a290157265ef 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -328,6 +328,8 @@ loff_t seq_lseek(struct file *file, loff_t offset, int whence)
 				m->read_pos = offset;
 				retval = file->f_pos = offset;
 			}
+		} else {
+			file->f_pos = offset;
 		}
 	}
 	file->f_version = m->version;
-- 
cgit v1.2.1


From c511851de162e8ec03d62e7d7feecbdf590d881d Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 29 Oct 2013 13:12:56 +0100
Subject: Revert "epoll: use freezable blocking call"

This reverts commit 1c441e921201 (epoll: use freezable blocking call)
which is reported to cause user space memory corruption to happen
after suspend to RAM.

Since it appears to be extremely difficult to root cause this
problem, it is best to revert the offending commit and try to address
the original issue in a better way later.

References: https://bugzilla.kernel.org/show_bug.cgi?id=61781
Reported-by: Natrio <natrio@list.ru>
Reported-by: Jeff Pohlmeyer <yetanothergeek@gmail.com>
Bisected-by: Leo Wolf <jclw@ymail.com>
Fixes: 1c441e921201 (epoll: use freezable blocking call)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: 3.11+ <stable@vger.kernel.org> # 3.11+
---
 fs/eventpoll.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 473e09da7d02..810c28fb8c3c 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -34,7 +34,6 @@
 #include <linux/mutex.h>
 #include <linux/anon_inodes.h>
 #include <linux/device.h>
-#include <linux/freezer.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
 #include <asm/mman.h>
@@ -1605,8 +1604,7 @@ fetch_events:
 			}
 
 			spin_unlock_irqrestore(&ep->lock, flags);
-			if (!freezable_schedule_hrtimeout_range(to, slack,
-								HRTIMER_MODE_ABS))
+			if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
 				timed_out = 1;
 
 			spin_lock_irqsave(&ep->lock, flags);
-- 
cgit v1.2.1


From 59612d187912750f416fbffe0c00bc0811c54ab5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 29 Oct 2013 23:43:08 +0100
Subject: Revert "select: use freezable blocking call"

This reverts commit 9745cdb36da8 (select: use freezable blocking call)
that triggers problems during resume from suspend to RAM on Paul Bolle's
32-bit x86 machines.  Paul says:

  Ever since I tried running (release candidates of) v3.11 on the two
  working i686s I still have lying around I ran into issues on resuming
  from suspend. Reverting 9745cdb36da8 (select: use freezable blocking
  call) resolves those issues.

  Resuming from suspend on i686 on (release candidates of) v3.11 and
  later triggers issues like:

  traps: systemd[1] general protection ip:b738e490 sp:bf882fc0 error:0 in libc-2.16.so[b731c000+1b0000]

  and

  traps: rtkit-daemon[552] general protection ip:804d6e5 sp:b6cb32f0 error:0 in rtkit-daemon[8048000+d000]

  Once I hit the systemd error I can only get out of the mess that the
  system is at that point by power cycling it.

Since we are reverting another freezer-related change causing similar
problems to happen, this one should be reverted as well.

References: https://lkml.org/lkml/2013/10/29/583
Reported-by: Paul Bolle <pebolle@tiscali.nl>
Fixes: 9745cdb36da8 (select: use freezable blocking call)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: 3.11+ <stable@vger.kernel.org> # 3.11+
---
 fs/select.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/select.c b/fs/select.c
index 35d4adc749d9..dfd5cb18c012 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -238,8 +238,7 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
 
 	set_current_state(state);
 	if (!pwq->triggered)
-		rc = freezable_schedule_hrtimeout_range(expires, slack,
-							HRTIMER_MODE_ABS);
+		rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS);
 	__set_current_state(TASK_RUNNING);
 
 	/*
-- 
cgit v1.2.1