From 9c12a831d73dd938a22418d70b39aed4feb4bdf2 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 16 Sep 2013 08:24:26 -0400 Subject: ext4: fix performance regression in writeback of random writes The Linux Kernel Performance project guys have reported that commit 4e7ea81db5 introduces a performance regression for the following fio workload: [global] direct=0 ioengine=mmap size=1500M bs=4k pre_read=1 numjobs=1 overwrite=1 loops=5 runtime=300 group_reporting invalidate=0 directory=/mnt/ file_service_type=random:36 file_service_type=random:36 [job0] startdelay=0 rw=randrw filename=data0/f1:data0/f2 [job1] startdelay=0 rw=randrw filename=data0/f2:data0/f1 ... [job7] startdelay=0 rw=randrw filename=data0/f2:data0/f1 The culprit of the problem is that after the commit ext4_writepages() are more aggressive in writing back pages. Thus we have less consecutive dirty pages resulting in more seeking. This increased aggressivity is caused by a bug in the condition terminating ext4_writepages(). We start writing from the beginning of the file even if we should have terminated ext4_writepages() because wbc->nr_to_write <= 0. After fixing the condition the throughput of the fio workload is about 20% better than before writeback reorganization. Reported-by: "Yan, Zheng" Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9115f2807515..4cf2619f007c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2559,7 +2559,7 @@ retry: break; } blk_finish_plug(&plug); - if (!ret && !cycled) { + if (!ret && !cycled && wbc->nr_to_write > 0) { cycled = 1; mpd.last_page = writeback_index - 1; mpd.first_page = 0; -- cgit v1.2.1 From 8660998608cfa1077e560034db81885af8e1e885 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Fri, 6 Sep 2013 21:49:56 -0500 Subject: jfs: fix error path in ialloc If insert_inode_locked() fails, we shouldn't be calling unlock_new_inode(). Signed-off-by: Dave Kleikamp Tested-by: Michael L. Semon Cc: stable@vger.kernel.org --- fs/jfs/jfs_inode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c index c1a3e603279c..7f464c513ba0 100644 --- a/fs/jfs/jfs_inode.c +++ b/fs/jfs/jfs_inode.c @@ -95,7 +95,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode) if (insert_inode_locked(inode) < 0) { rc = -EINVAL; - goto fail_unlock; + goto fail_put; } inode_init_owner(inode, parent, mode); @@ -156,7 +156,6 @@ struct inode *ialloc(struct inode *parent, umode_t mode) fail_drop: dquot_drop(inode); inode->i_flags |= S_NOQUOTA; -fail_unlock: clear_nlink(inode); unlock_new_inode(inode); fail_put: -- cgit v1.2.1 From c31f330719b7331b2112a5525fe5941a99ac223d Mon Sep 17 00:00:00 2001 From: Steve French Date: Sat, 28 Sep 2013 18:24:12 -0500 Subject: do not treat non-symlink reparse points as valid symlinks Windows 8 and later can create NFS symlinks (within reparse points) which we were assuming were normal NTFS symlinks and thus reporting corrupt paths for. Add check for reparse points to make sure that they really are normal symlinks before we try to parse the pathname. We also should not be parsing other types of reparse points (DFS junctions etc) as if they were a symlink so return EOPNOTSUPP on those. Also fix endian errors (we were not parsing symlink lengths as little endian). This fixes commit d244bf2dfbebfded05f494ffd53659fa7b1e32c1 which implemented follow link for non-Unix CIFS mounts CC: Stable Reviewed-by: Andrew Bartlett Signed-off-by: Steve French --- fs/cifs/cifspdu.h | 31 +++++++++++++++++++++++-------- fs/cifs/cifssmb.c | 40 ++++++++++++++++++++++++++++++++++------ fs/cifs/smbfsctl.h | 14 ++++++++++++++ 3 files changed, 71 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index a630475e421c..08f9dfb1a894 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -1491,15 +1491,30 @@ struct file_notify_information { __u8 FileName[0]; } __attribute__((packed)); -struct reparse_data { - __u32 ReparseTag; - __u16 ReparseDataLength; +/* For IO_REPARSE_TAG_SYMLINK */ +struct reparse_symlink_data { + __le32 ReparseTag; + __le16 ReparseDataLength; __u16 Reserved; - __u16 SubstituteNameOffset; - __u16 SubstituteNameLength; - __u16 PrintNameOffset; - __u16 PrintNameLength; - __u32 Flags; + __le16 SubstituteNameOffset; + __le16 SubstituteNameLength; + __le16 PrintNameOffset; + __le16 PrintNameLength; + __le32 Flags; + char PathBuffer[0]; +} __attribute__((packed)); + +/* For IO_REPARSE_TAG_NFS */ +#define NFS_SPECFILE_LNK 0x00000000014B4E4C +#define NFS_SPECFILE_CHR 0x0000000000524843 +#define NFS_SPECFILE_BLK 0x00000000004B4C42 +#define NFS_SPECFILE_FIFO 0x000000004F464946 +#define NFS_SPECFILE_SOCK 0x000000004B434F53 +struct reparse_posix_data { + __le32 ReparseTag; + __le16 ReparseDataLength; + __u16 Reserved; + __le64 InodeType; /* LNK, FIFO, CHR etc. */ char PathBuffer[0]; } __attribute__((packed)); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 4baf35949b51..ccd31ab815d4 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -3088,7 +3088,8 @@ CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, bool is_unicode; unsigned int sub_len; char *sub_start; - struct reparse_data *reparse_buf; + struct reparse_symlink_data *reparse_buf; + struct reparse_posix_data *posix_buf; __u32 data_offset, data_count; char *end_of_smb; @@ -3137,20 +3138,47 @@ CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, goto qreparse_out; } end_of_smb = 2 + get_bcc(&pSMBr->hdr) + (char *)&pSMBr->ByteCount; - reparse_buf = (struct reparse_data *) + reparse_buf = (struct reparse_symlink_data *) ((char *)&pSMBr->hdr.Protocol + data_offset); if ((char *)reparse_buf >= end_of_smb) { rc = -EIO; goto qreparse_out; } - if ((reparse_buf->PathBuffer + reparse_buf->PrintNameOffset + - reparse_buf->PrintNameLength) > end_of_smb) { + if (reparse_buf->ReparseTag == cpu_to_le32(IO_REPARSE_TAG_NFS)) { + cifs_dbg(FYI, "NFS style reparse tag\n"); + posix_buf = (struct reparse_posix_data *)reparse_buf; + + if (posix_buf->InodeType != cpu_to_le64(NFS_SPECFILE_LNK)) { + cifs_dbg(FYI, "unsupported file type 0x%llx\n", + le64_to_cpu(posix_buf->InodeType)); + rc = -EOPNOTSUPP; + goto qreparse_out; + } + is_unicode = true; + sub_len = le16_to_cpu(reparse_buf->ReparseDataLength); + if (posix_buf->PathBuffer + sub_len > end_of_smb) { + cifs_dbg(FYI, "reparse buf beyond SMB\n"); + rc = -EIO; + goto qreparse_out; + } + *symlinkinfo = cifs_strndup_from_utf16(posix_buf->PathBuffer, + sub_len, is_unicode, nls_codepage); + goto qreparse_out; + } else if (reparse_buf->ReparseTag != + cpu_to_le32(IO_REPARSE_TAG_SYMLINK)) { + rc = -EOPNOTSUPP; + goto qreparse_out; + } + + /* Reparse tag is NTFS symlink */ + sub_start = le16_to_cpu(reparse_buf->SubstituteNameOffset) + + reparse_buf->PathBuffer; + sub_len = le16_to_cpu(reparse_buf->SubstituteNameLength); + if (sub_start + sub_len > end_of_smb) { cifs_dbg(FYI, "reparse buf beyond SMB\n"); rc = -EIO; goto qreparse_out; } - sub_start = reparse_buf->SubstituteNameOffset + reparse_buf->PathBuffer; - sub_len = reparse_buf->SubstituteNameLength; if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) is_unicode = true; else diff --git a/fs/cifs/smbfsctl.h b/fs/cifs/smbfsctl.h index d952ee48f4dc..a4b2391fe66e 100644 --- a/fs/cifs/smbfsctl.h +++ b/fs/cifs/smbfsctl.h @@ -97,9 +97,23 @@ #define FSCTL_QUERY_NETWORK_INTERFACE_INFO 0x001401FC /* BB add struct */ #define FSCTL_SRV_READ_HASH 0x001441BB /* BB add struct */ +/* See FSCC 2.1.2.5 */ #define IO_REPARSE_TAG_MOUNT_POINT 0xA0000003 #define IO_REPARSE_TAG_HSM 0xC0000004 #define IO_REPARSE_TAG_SIS 0x80000007 +#define IO_REPARSE_TAG_HSM2 0x80000006 +#define IO_REPARSE_TAG_DRIVER_EXTENDER 0x80000005 +/* Used by the DFS filter. See MS-DFSC */ +#define IO_REPARSE_TAG_DFS 0x8000000A +/* Used by the DFS filter See MS-DFSC */ +#define IO_REPARSE_TAG_DFSR 0x80000012 +#define IO_REPARSE_TAG_FILTER_MANAGER 0x8000000B +/* See section MS-FSCC 2.1.2.4 */ +#define IO_REPARSE_TAG_SYMLINK 0xA000000C +#define IO_REPARSE_TAG_DEDUP 0x80000013 +#define IO_REPARSE_APPXSTREAM 0xC0000014 +/* NFS symlinks, Win 8/SMB3 and later */ +#define IO_REPARSE_TAG_NFS 0x80000014 /* fsctl flags */ /* If Flags is set to this value, the request is an FSCTL not ioctl request */ -- cgit v1.2.1 From eb4c7df6c20b407ecbf1a985edc33d967371c2e8 Mon Sep 17 00:00:00 2001 From: Shirish Pargaonkar Date: Thu, 3 Oct 2013 05:44:45 -0500 Subject: cifs: Avoid umount hangs with smb2 when server is unresponsive Do not send SMB2 Logoff command when reconnecting, the way smb1 code base works. Also, no need to wait for a credit for an echo command when one is already in flight. Without these changes, umount command hangs if the server is unresponsive e.g. hibernating. Signed-off-by: Shirish Pargaonkar Acked-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 6 ++++++ fs/cifs/transport.c | 9 +++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index eba0efde66d7..edccb5252462 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -687,6 +687,10 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses) else return -EIO; + /* no need to send SMB logoff if uid already closed due to reconnect */ + if (ses->need_reconnect) + goto smb2_session_already_dead; + rc = small_smb2_init(SMB2_LOGOFF, NULL, (void **) &req); if (rc) return rc; @@ -701,6 +705,8 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses) * No tcon so can't do * cifs_stats_inc(&tcon->stats.smb2_stats.smb2_com_fail[SMB2...]); */ + +smb2_session_already_dead: return rc; } diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 6fdcb1b4a106..800b938e4061 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -410,8 +410,13 @@ static int wait_for_free_request(struct TCP_Server_Info *server, const int timeout, const int optype) { - return wait_for_free_credits(server, timeout, - server->ops->get_credits_field(server, optype)); + int *val; + + val = server->ops->get_credits_field(server, optype); + /* Since an echo is already inflight, no need to wait to send another */ + if (*val <= 0 && optype == CIFS_ECHO_OP) + return -EAGAIN; + return wait_for_free_credits(server, timeout, val); } static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf, -- cgit v1.2.1 From 2f6c9479633780ba4a3484bba7eba5a721a5cf20 Mon Sep 17 00:00:00 2001 From: Jan Klos Date: Sun, 6 Oct 2013 21:08:20 +0200 Subject: cifs: Fix inability to write files >2GB to SMB2/3 shares When connecting to SMB2/3 shares, maximum file size is set to non-LFS maximum in superblock. This is due to cap_large_files bit being different for SMB1 and SMB2/3 (where it is just an internal flag that is not negotiated and the SMB1 one corresponds to multichannel capability, so maybe LFS works correctly if server sends 0x08 flag) while capabilities are checked always for the SMB1 bit in cifs_read_super(). The patch fixes this by checking for the correct bit according to the protocol version. CC: Stable Signed-off-by: Jan Klos Reviewed-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index a16b4e58bcc6..77fc5e181077 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -120,14 +120,16 @@ cifs_read_super(struct super_block *sb) { struct inode *inode; struct cifs_sb_info *cifs_sb; + struct cifs_tcon *tcon; int rc = 0; cifs_sb = CIFS_SB(sb); + tcon = cifs_sb_master_tcon(cifs_sb); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIXACL) sb->s_flags |= MS_POSIXACL; - if (cifs_sb_master_tcon(cifs_sb)->ses->capabilities & CAP_LARGE_FILES) + if (tcon->ses->capabilities & tcon->ses->server->vals->cap_large_files) sb->s_maxbytes = MAX_LFS_FILESIZE; else sb->s_maxbytes = MAX_NON_LFS; @@ -147,7 +149,7 @@ cifs_read_super(struct super_block *sb) goto out_no_root; } - if (cifs_sb_master_tcon(cifs_sb)->nocase) + if (tcon->nocase) sb->s_d_op = &cifs_ci_dentry_ops; else sb->s_d_op = &cifs_dentry_ops; -- cgit v1.2.1 From dde2356c8466298bd77fa699e0ea296372eed47b Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Fri, 27 Sep 2013 18:35:42 +0100 Subject: cifs: Allow LANMAN auth method for servers supporting unencapsulated authentication methods This allows users to use LANMAN authentication on servers which support unencapsulated authentication. The patch fixes a regression where users using plaintext authentication were no longer able to do so because of changed bought in by patch 3f618223dc0bdcbc8d510350e78ee2195ff93768 https://bugzilla.redhat.com/show_bug.cgi?id=1011621 Reported-by: Panos Kavalagios Reviewed-by: Jeff Layton Signed-off-by: Sachin Prabhu Signed-off-by: Steve French --- fs/cifs/sess.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 352358de1d7e..e87387dbf39f 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -500,9 +500,9 @@ select_sectype(struct TCP_Server_Info *server, enum securityEnum requested) return NTLMv2; if (global_secflags & CIFSSEC_MAY_NTLM) return NTLM; - /* Fallthrough */ default: - return Unspecified; + /* Fallthrough to attempt LANMAN authentication next */ + break; } case CIFS_NEGFLAVOR_LANMAN: switch (requested) { -- cgit v1.2.1 From 4871c1588f92c6c13f4713a7009f25f217055807 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 9 Oct 2013 12:24:04 -0400 Subject: Btrfs: use right root when checking for hash collision btrfs_rename was using the root of the old dir instead of the root of the new dir when checking for a hash collision, so if you tried to move a file into a subvol it would freak out because it would see the file you are trying to move in its current root. This fixes the bug where this would fail btrfs subvol create test1 btrfs subvol create test2 mv test1 test2. Thanks to Chris Murphy for catching this, Cc: stable@vger.kernel.org Reported-by: Chris Murphy Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8af6c03953ad..3b4ffaf0cd52 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7986,7 +7986,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, /* check for collisions, even if the name isn't there */ - ret = btrfs_check_dir_item_collision(root, new_dir->i_ino, + ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino, new_dentry->d_name.name, new_dentry->d_name.len); -- cgit v1.2.1 From 7bf811a595a895b7a886dcf218d0d34f97df76dc Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 7 Oct 2013 22:11:09 -0400 Subject: Btrfs: limit delalloc pages outside of find_delalloc_range Liu fixed part of this problem and unfortunately I steered him in slightly the wrong direction and so didn't completely fix the problem. The problem is we limit the size of the delalloc range we are looking for to max bytes and then we try to lock that range. If we fail to lock the pages in that range we will shrink the max bytes to a single page and re loop. However if our first page is inside of the delalloc range then we will end up limiting the end of the range to a period before our first page. This is illustrated below [0 -------- delalloc range --------- 256mb] [page] So find_delalloc_range will return with delalloc_start as 0 and end as 128mb, and then we will notice that delalloc_start < *start and adjust it up, but not adjust delalloc_end up, so things go sideways. To fix this we need to not limit the max bytes in find_delalloc_range, but in find_lock_delalloc_range and that way we don't end up with this confusion. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 43feb4663f5b..d8ea0cb200b4 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1482,10 +1482,8 @@ static noinline u64 find_delalloc_range(struct extent_io_tree *tree, cur_start = state->end + 1; node = rb_next(node); total_bytes += state->end - state->start + 1; - if (total_bytes >= max_bytes) { - *end = *start + max_bytes - 1; + if (total_bytes >= max_bytes) break; - } if (!node) break; } @@ -1627,10 +1625,9 @@ again: /* * make sure to limit the number of pages we try to lock down - * if we're looping. */ - if (delalloc_end + 1 - delalloc_start > max_bytes && loops) - delalloc_end = delalloc_start + PAGE_CACHE_SIZE - 1; + if (delalloc_end + 1 - delalloc_start > max_bytes) + delalloc_end = delalloc_start + max_bytes - 1; /* step two, lock all the pages after the page that has start */ ret = lock_delalloc_pages(inode, locked_page, @@ -1641,8 +1638,7 @@ again: */ free_extent_state(cached_state); if (!loops) { - unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1); - max_bytes = PAGE_CACHE_SIZE - offset; + max_bytes = PAGE_CACHE_SIZE; loops = 1; goto again; } else { -- cgit v1.2.1 From 14927d95464956ffe8af4278331a6bfea94ab780 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 25 Sep 2013 21:47:43 +0800 Subject: Btrfs: insert orphan roots into fs radix tree Now we don't drop all the deleted snapshots/subvolumes before the space balance. It means we have to relocate the space which is held by the dead snapshots/subvolumes. So we must into them into fs radix tree, or we would forget to commit the change of them when doing transaction commit, and it would corrupt the metadata. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/root-tree.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 0b1f4ef8db98..ec71ea44d2b4 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -299,11 +299,6 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) continue; } - if (btrfs_root_refs(&root->root_item) == 0) { - btrfs_add_dead_root(root); - continue; - } - err = btrfs_init_fs_root(root); if (err) { btrfs_free_fs_root(root); @@ -318,6 +313,9 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) btrfs_free_fs_root(root); break; } + + if (btrfs_root_refs(&root->root_item) == 0) + btrfs_add_dead_root(root); } btrfs_free_path(path); -- cgit v1.2.1 From c00869f1ae6a8fa49802d5e60d843b7051a112ec Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 25 Sep 2013 21:47:44 +0800 Subject: Btrfs: fix oops caused by the space balance and dead roots When doing space balance and subvolume destroy at the same time, we met the following oops: kernel BUG at fs/btrfs/relocation.c:2247! RIP: 0010: [] prepare_to_merge+0x154/0x1f0 [btrfs] Call Trace: [] relocate_block_group+0x466/0x4e6 [btrfs] [] btrfs_relocate_block_group+0x143/0x275 [btrfs] [] btrfs_relocate_chunk.isra.27+0x5c/0x5a2 [btrfs] [] ? btrfs_item_key_to_cpu+0x15/0x31 [btrfs] [] ? btrfs_get_token_64+0x7e/0xcd [btrfs] [] ? btrfs_tree_read_unlock_blocking+0xb2/0xb7 [btrfs] [] btrfs_balance+0x9c7/0xb6f [btrfs] [] btrfs_ioctl_balance+0x234/0x2ac [btrfs] [] btrfs_ioctl+0xd87/0x1ef9 [btrfs] [] ? path_openat+0x234/0x4db [] ? __do_page_fault+0x31d/0x391 [] ? vma_link+0x74/0x94 [] vfs_ioctl+0x1d/0x39 [] do_vfs_ioctl+0x32d/0x3e2 [] SyS_ioctl+0x57/0x83 [] ? do_page_fault+0xe/0x10 [] system_call_fastpath+0x16/0x1b It is because we returned the error number if the reference of the root was 0 when doing space relocation. It was not right here, because though the root was dead(refs == 0), but the space it held still need be relocated, or we could not remove the block group. So in this case, we should return the root no matter it is dead or not. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 9 +++++---- fs/btrfs/disk-io.h | 13 +++++++++++-- fs/btrfs/relocation.c | 2 +- 3 files changed, 17 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4ae17ed13b32..62176ad89846 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1561,8 +1561,9 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, return ret; } -struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, - struct btrfs_key *location) +struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, + struct btrfs_key *location, + bool check_ref) { struct btrfs_root *root; int ret; @@ -1586,7 +1587,7 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, again: root = btrfs_lookup_fs_root(fs_info, location->objectid); if (root) { - if (btrfs_root_refs(&root->root_item) == 0) + if (check_ref && btrfs_root_refs(&root->root_item) == 0) return ERR_PTR(-ENOENT); return root; } @@ -1595,7 +1596,7 @@ again: if (IS_ERR(root)) return root; - if (btrfs_root_refs(&root->root_item) == 0) { + if (check_ref && btrfs_root_refs(&root->root_item) == 0) { ret = -ENOENT; goto fail; } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index b71acd6e1e5b..5ce2a7da8b11 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -68,8 +68,17 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, int btrfs_init_fs_root(struct btrfs_root *root); int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); -struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, - struct btrfs_key *location); + +struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, + struct btrfs_key *key, + bool check_ref); +static inline struct btrfs_root * +btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, + struct btrfs_key *location) +{ + return btrfs_get_fs_root(fs_info, location, true); +} + int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info); void btrfs_btree_balance_dirty(struct btrfs_root *root); void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index a5a26320503f..4a355726151e 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -588,7 +588,7 @@ static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info, else key.offset = (u64)-1; - return btrfs_read_fs_root_no_name(fs_info, &key); + return btrfs_get_fs_root(fs_info, &key, false); } #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 -- cgit v1.2.1 From 6e4ea8e33b2057b85d75175dd89b93f5e26de3bc Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 10 Oct 2013 20:05:35 -0400 Subject: ext4: fix memory leak in xattr If we take the 2nd retry path in ext4_expand_extra_isize_ea, we potentionally return from the function without having freed these allocations. If we don't do the return, we over-write the previous allocation pointers, so we leak either way. Spotted with Coverity. [ Fixed by tytso to set is and bs to NULL after freeing these pointers, in case in the retry loop we later end up triggering an error causing a jump to cleanup, at which point we could have a double free bug. -- Ted ] Signed-off-by: Dave Jones Signed-off-by: "Theodore Ts'o" Reviewed-by: Eric Sandeen Cc: stable@vger.kernel.org --- fs/ext4/xattr.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index c081e34f717f..03e9bebba198 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1350,6 +1350,8 @@ retry: s_min_extra_isize) { tried_min_extra_isize++; new_extra_isize = s_min_extra_isize; + kfree(is); is = NULL; + kfree(bs); bs = NULL; goto retry; } error = -1; -- cgit v1.2.1 From 9d05746e7b16d8565dddbe3200faa1e669d23bbf Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 30 Sep 2013 08:35:10 -0700 Subject: vfs: allow O_PATH file descriptors for fstatfs() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Olga reported that file descriptors opened with O_PATH do not work with fstatfs(), found during further development of ksh93's thread support. There is no reason to not allow O_PATH file descriptors here (fstatfs is very much a path operation), so use "fdget_raw()". See commit 55815f70147d ("vfs: make O_PATH file descriptors usable for 'fstat()'") for a very similar issue reported for fstat() by the same team. Reported-and-tested-by: ольга крыжановская Acked-by: Al Viro Cc: stable@kernel.org # O_PATH introduced in 3.0+ Signed-off-by: Linus Torvalds --- fs/statfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/statfs.c b/fs/statfs.c index c219e733f553..083dc0ac9140 100644 --- a/fs/statfs.c +++ b/fs/statfs.c @@ -94,7 +94,7 @@ retry: int fd_statfs(int fd, struct kstatfs *st) { - struct fd f = fdget(fd); + struct fd f = fdget_raw(fd); int error = -EBADF; if (f.file) { error = vfs_statfs(&f.file->f_path, st); -- cgit v1.2.1 From 0c26606cbe4937f2228a27bb0c2cad19855be87a Mon Sep 17 00:00:00 2001 From: Tim Gardner Date: Sun, 13 Oct 2013 13:29:03 -0600 Subject: cifs: ntstatus_to_dos_map[] is not terminated Functions that walk the ntstatus_to_dos_map[] array could run off the end. For example, ntstatus_to_dos() loops while ntstatus_to_dos_map[].ntstatus is not 0. Granted, this is mostly theoretical, but could be used as a DOS attack if the error code in the SMB header is bogus. [Might consider adding to stable, as this patch is low risk - Steve] Reviewed-by: Jeff Layton Signed-off-by: Tim Gardner Signed-off-by: Steve French --- fs/cifs/netmisc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index af847e1cf1c1..651a5279607b 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -780,7 +780,9 @@ static const struct { ERRDOS, ERRnoaccess, 0xc0000290}, { ERRDOS, ERRbadfunc, 0xc000029c}, { ERRDOS, ERRsymlink, NT_STATUS_STOPPED_ON_SYMLINK}, { - ERRDOS, ERRinvlevel, 0x007c0001}, }; + ERRDOS, ERRinvlevel, 0x007c0001}, { + 0, 0, 0 } +}; /***************************************************************************** Print an error message from the status code -- cgit v1.2.1 From 43ae9e3fc70ca0057ae0a24ef5eedff05e3fae06 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 10 Oct 2013 16:48:19 +0200 Subject: ext[34]: fix double put in tmpfile d_tmpfile() already swallowed the inode ref. Signed-off-by: Miklos Szeredi Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- fs/ext3/namei.c | 5 ++--- fs/ext4/namei.c | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 1194b1f0f839..f8cde46de9cd 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1783,7 +1783,7 @@ retry: d_tmpfile(dentry, inode); err = ext3_orphan_add(handle, inode); if (err) - goto err_drop_inode; + goto err_unlock_inode; mark_inode_dirty(inode); unlock_new_inode(inode); } @@ -1791,10 +1791,9 @@ retry: if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) goto retry; return err; -err_drop_inode: +err_unlock_inode: ext3_journal_stop(handle); unlock_new_inode(inode); - iput(inode); return err; } diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 1bec5a5c1e45..5a0408d7b114 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2319,7 +2319,7 @@ retry: d_tmpfile(dentry, inode); err = ext4_orphan_add(handle, inode); if (err) - goto err_drop_inode; + goto err_unlock_inode; mark_inode_dirty(inode); unlock_new_inode(inode); } @@ -2328,10 +2328,9 @@ retry: if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) goto retry; return err; -err_drop_inode: +err_unlock_inode: ext4_journal_stop(handle); unlock_new_inode(inode); - iput(inode); return err; } -- cgit v1.2.1 From 3edc8376c06133e3386265a824869cad03a4efd4 Mon Sep 17 00:00:00 2001 From: "Geyslan G. Bem" Date: Fri, 11 Oct 2013 16:49:16 -0300 Subject: ecryptfs: Fix memory leakage in keystore.c In 'decrypt_pki_encrypted_session_key' function: Initializes 'payload' pointer and releases it on exit. Signed-off-by: Geyslan G. Bem Signed-off-by: Tyler Hicks Cc: stable@vger.kernel.org # v2.6.28+ --- fs/ecryptfs/keystore.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 7d52806c2119..4725a07f003c 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -1149,7 +1149,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, struct ecryptfs_msg_ctx *msg_ctx; struct ecryptfs_message *msg = NULL; char *auth_tok_sig; - char *payload; + char *payload = NULL; size_t payload_len = 0; int rc; @@ -1203,6 +1203,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, } out: kfree(msg); + kfree(payload); return rc; } -- cgit v1.2.1 From e9cdd6e771580e6ff872e5c64e8b766972c7d1bc Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 16 Oct 2013 13:46:53 -0700 Subject: mm: /proc/pid/pagemap: inspect _PAGE_SOFT_DIRTY only on present pages If a page we are inspecting is in swap we may occasionally report it as having soft dirty bit (even if it is clean). The pte_soft_dirty helper should be called on present pte only. Signed-off-by: Cyrill Gorcunov Cc: Pavel Emelyanov Cc: Andy Lutomirski Cc: Matt Mackall Cc: Xiao Guangrong Cc: Marcelo Tosatti Cc: KOSAKI Motohiro Cc: Stephen Rothwell Cc: Peter Zijlstra Cc: "Aneesh Kumar K.V" Reviewed-by: Naoya Horiguchi Cc: Mel Gorman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 7366e9d63cee..390bdab01c3c 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -941,6 +941,8 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, frame = pte_pfn(pte); flags = PM_PRESENT; page = vm_normal_page(vma, addr, pte); + if (pte_soft_dirty(pte)) + flags2 |= __PM_SOFT_DIRTY; } else if (is_swap_pte(pte)) { swp_entry_t entry; if (pte_swp_soft_dirty(pte)) @@ -960,7 +962,7 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, if (page && !PageAnon(page)) flags |= PM_FILE; - if ((vma->vm_flags & VM_SOFTDIRTY) || pte_soft_dirty(pte)) + if ((vma->vm_flags & VM_SOFTDIRTY)) flags2 |= __PM_SOFT_DIRTY; *pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags); -- cgit v1.2.1 From 84235de394d9775bfaa7fa9762a59d91fef0c1fc Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 16 Oct 2013 13:47:00 -0700 Subject: fs: buffer: move allocation failure loop into the allocator Buffer allocation has a very crude indefinite loop around waking the flusher threads and performing global NOFS direct reclaim because it can not handle allocation failures. The most immediate problem with this is that the allocation may fail due to a memory cgroup limit, where flushers + direct reclaim might not make any progress towards resolving the situation at all. Because unlike the global case, a memory cgroup may not have any cache at all, only anonymous pages but no swap. This situation will lead to a reclaim livelock with insane IO from waking the flushers and thrashing unrelated filesystem cache in a tight loop. Use __GFP_NOFAIL allocations for buffers for now. This makes sure that any looping happens in the page allocator, which knows how to orchestrate kswapd, direct reclaim, and the flushers sensibly. It also allows memory cgroups to detect allocations that can't handle failure and will allow them to ultimately bypass the limit if reclaim can not make progress. Reported-by: azurIt Signed-off-by: Johannes Weiner Cc: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/buffer.c b/fs/buffer.c index 4d7433534f5c..6024877335ca 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1005,9 +1005,19 @@ grow_dev_page(struct block_device *bdev, sector_t block, struct buffer_head *bh; sector_t end_block; int ret = 0; /* Will call free_more_memory() */ + gfp_t gfp_mask; - page = find_or_create_page(inode->i_mapping, index, - (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE); + gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS; + gfp_mask |= __GFP_MOVABLE; + /* + * XXX: __getblk_slow() can not really deal with failure and + * will endlessly loop on improvised global reclaim. Prefer + * looping in the allocator rather than here, at least that + * code knows what it's doing. + */ + gfp_mask |= __GFP_NOFAIL; + + page = find_or_create_page(inode->i_mapping, index, gfp_mask); if (!page) return ret; -- cgit v1.2.1 From 2cbe3b0af82279f14cfb3195f2406651f28ee9b8 Mon Sep 17 00:00:00 2001 From: HATAYAMA Daisuke Date: Wed, 16 Oct 2013 13:47:04 -0700 Subject: procfs: fix unintended truncation of returned mapped address Currently, proc_reg_get_unmapped_area truncates upper 32-bit of the mapped virtual address returned from get_unmapped_area method in pde->proc_fops due to the variable rv of signed integer on x86_64. This is too small to have vitual address of unsigned long on x86_64 since on x86_64, signed integer is of 4 bytes while unsigned long is of 8 bytes. To fix this issue, use unsigned long instead. Fixes a regression added in commit c4fe24485729 ("sparc: fix PCI device proc file mmap(2)"). Signed-off-by: HATAYAMA Daisuke Cc: Alexey Dobriyan Cc: David S. Miller Tested-by: Michael Holzheu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 9f8ef9b7674d..6c501c4d996d 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -288,7 +288,7 @@ static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma) static unsigned long proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct proc_dir_entry *pde = PDE(file_inode(file)); - int rv = -EIO; + unsigned long rv = -EIO; unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); if (use_pde(pde)) { get_unmapped_area = pde->proc_fops->get_unmapped_area; -- cgit v1.2.1 From fad1a86e25e0a1f85635ed06ef62ddadd5b8fa4c Mon Sep 17 00:00:00 2001 From: HATAYAMA Daisuke Date: Wed, 16 Oct 2013 13:47:05 -0700 Subject: procfs: call default get_unmapped_area on MMU-present architectures Commit c4fe24485729 ("sparc: fix PCI device proc file mmap(2)") added proc_reg_get_unmapped_area in proc_reg_file_ops and proc_reg_file_ops_no_compat, by which now mmap always returns EIO if get_unmapped_area method is not defined for the target procfs file, which causes regression of mmap on /proc/vmcore. To address this issue, like get_unmapped_area(), call default current->mm->get_unmapped_area on MMU-present architectures if pde->proc_fops->get_unmapped_area, i.e. the one in actual file operation in the procfs file, is not defined. Reported-by: Michael Holzheu Signed-off-by: HATAYAMA Daisuke Cc: Alexey Dobriyan Cc: David S. Miller Tested-by: Michael Holzheu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/inode.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 6c501c4d996d..8eaa1ba793fc 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -289,9 +289,13 @@ static unsigned long proc_reg_get_unmapped_area(struct file *file, unsigned long { struct proc_dir_entry *pde = PDE(file_inode(file)); unsigned long rv = -EIO; - unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); + unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long) = NULL; if (use_pde(pde)) { - get_unmapped_area = pde->proc_fops->get_unmapped_area; +#ifdef CONFIG_MMU + get_unmapped_area = current->mm->get_unmapped_area; +#endif + if (pde->proc_fops->get_unmapped_area) + get_unmapped_area = pde->proc_fops->get_unmapped_area; if (get_unmapped_area) rv = get_unmapped_area(file, orig_addr, len, pgoff, flags); unuse_pde(pde); -- cgit v1.2.1 From 1bda19eb73d68b304148e67253e47cef049a419d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 18 Oct 2013 12:10:36 -0400 Subject: Btrfs: release path before starting transaction in can_nocow_extent We can't be holding tree locks while we try to start a transaction, we will deadlock. Thanks, Reported-by: Sage Weil Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3b4ffaf0cd52..f4a6851e6c88 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6437,6 +6437,7 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, if (btrfs_extent_readonly(root, disk_bytenr)) goto out; + btrfs_release_path(path); /* * look for other files referencing this extent, if we -- cgit v1.2.1 From c7314d74fcb089b127ef5753b5263ac8473f33bc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 20 Oct 2013 08:44:39 -0400 Subject: nfsd regression since delayed fput() Background: nfsd v[23] had throughput regression since delayed fput went in; every read or write ends up doing fput() and we get a pair of extra context switches out of that (plus quite a bit of work in queue_work itselfi, apparently). Use of schedule_delayed_work() gives it a chance to accumulate a bit before we do __fput() on all of them. I'm not too happy about that solution, but... on at least one real-world setup it reverts about 10% throughput loss we got from switch to delayed fput. Signed-off-by: Al Viro --- fs/file_table.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/file_table.c b/fs/file_table.c index abdd15ad13c9..e900ca518635 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -297,7 +297,7 @@ void flush_delayed_fput(void) delayed_fput(NULL); } -static DECLARE_WORK(delayed_fput_work, delayed_fput); +static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput); void fput(struct file *file) { @@ -317,7 +317,7 @@ void fput(struct file *file) } if (llist_add(&file->f_u.fu_llist, &delayed_fput_list)) - schedule_work(&delayed_fput_work); + schedule_delayed_work(&delayed_fput_work, 1); } } -- cgit v1.2.1 From 606d6fe3ffdb5190d4c8e4d6cd23aa6c1f9cb6ad Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 19 Oct 2013 14:56:55 -0700 Subject: fs/namei.c: fix new kernel-doc warning Add @path parameter to fix kernel-doc warning. Also fix a spello/typo. Warning(fs/namei.c:2304): No description found for parameter 'path' Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- fs/namei.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 645268f23eb6..caa28051e197 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2294,10 +2294,11 @@ out: * path_mountpoint - look up a path to be umounted * @dfd: directory file descriptor to start walk from * @name: full pathname to walk + * @path: pointer to container for result * @flags: lookup flags * * Look up the given name, but don't attempt to revalidate the last component. - * Returns 0 and "path" will be valid on success; Retuns error otherwise. + * Returns 0 and "path" will be valid on success; Returns error otherwise. */ static int path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags) -- cgit v1.2.1 From 69c88dc7d9f1a6c3eceb7058111677c640811c94 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 19 Oct 2013 14:57:07 -0700 Subject: vfs: fix new kernel-doc warnings Move kernel-doc notation to immediately before its function to eliminate kernel-doc warnings introduced by commit db14fc3abcd5 ("vfs: add d_walk()") Warning(fs/dcache.c:1343): No description found for parameter 'data' Warning(fs/dcache.c:1343): No description found for parameter 'dentry' Warning(fs/dcache.c:1343): Excess function parameter 'parent' description in 'check_mount' Signed-off-by: Randy Dunlap Cc: Miklos Szeredi Cc: Al Viro Signed-off-by: Linus Torvalds --- fs/dcache.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/dcache.c b/fs/dcache.c index 41000305d716..20532cb0b06e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1331,14 +1331,6 @@ rename_retry: * list is non-empty and continue searching. */ -/** - * have_submounts - check for mounts over a dentry - * @parent: dentry to check. - * - * Return true if the parent or its subdirectories contain - * a mount point - */ - static enum d_walk_ret check_mount(void *data, struct dentry *dentry) { int *ret = data; @@ -1349,6 +1341,13 @@ static enum d_walk_ret check_mount(void *data, struct dentry *dentry) return D_WALK_CONTINUE; } +/** + * have_submounts - check for mounts over a dentry + * @parent: dentry to check. + * + * Return true if the parent or its subdirectories contain + * a mount point + */ int have_submounts(struct dentry *parent) { int ret = 0; -- cgit v1.2.1 From 43b7c6c6a4e3916edd186ceb61be0c67d1e0969e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 24 Oct 2013 14:08:07 +0000 Subject: eCryptfs: fix 32 bit corruption issue Shifting page->index on 32 bit systems was overflowing, causing data corruption of > 4GB files. Fix this by casting it first. https://launchpad.net/bugs/1243636 Signed-off-by: Colin Ian King Reported-by: Lars Duesing Cc: stable@vger.kernel.org # v3.11+ Signed-off-by: Tyler Hicks --- fs/ecryptfs/crypto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index c88e355f7635..000eae2782b6 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -408,7 +408,7 @@ static loff_t lower_offset_for_page(struct ecryptfs_crypt_stat *crypt_stat, struct page *page) { return ecryptfs_lower_header_size(crypt_stat) + - (page->index << PAGE_CACHE_SHIFT); + ((loff_t)page->index << PAGE_CACHE_SHIFT); } /** -- cgit v1.2.1 From 05e16745c0c471bba313961b605b6da3b21a853d Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Fri, 25 Oct 2013 18:15:06 +0800 Subject: seq_file: always update file->f_pos in seq_lseek() This issue was first pointed out by Jiaxing Wang several months ago, but no further comments: https://lkml.org/lkml/2013/6/29/41 As we know pread() does not change f_pos, so after pread(), file->f_pos and m->read_pos become different. And seq_lseek() does not update file->f_pos if offset equals to m->read_pos, so after pread() and seq_lseek()(lseek to m->read_pos), then a subsequent read may read from a wrong position, the following program produces the problem: char str1[32] = { 0 }; char str2[32] = { 0 }; int poffset = 10; int count = 20; /*open any seq file*/ int fd = open("/proc/modules", O_RDONLY); pread(fd, str1, count, poffset); printf("pread:%s\n", str1); /*seek to where m->read_pos is*/ lseek(fd, poffset+count, SEEK_SET); /*supposed to read from poffset+count, but this read from position 0*/ read(fd, str2, count); printf("read:%s\n", str2); out put: pread: ck_netbios_ns 12665 read: nf_conntrack_netbios /proc/modules: nf_conntrack_netbios_ns 12665 0 - Live 0xffffffffa038b000 nf_conntrack_broadcast 12589 1 nf_conntrack_netbios_ns, Live 0xffffffffa0386000 So we always update file->f_pos to offset in seq_lseek() to fix this issue. Signed-off-by: Jiaxing Wang Signed-off-by: Gu Zheng Signed-off-by: Al Viro --- fs/seq_file.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/seq_file.c b/fs/seq_file.c index 3135c2525c76..a290157265ef 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -328,6 +328,8 @@ loff_t seq_lseek(struct file *file, loff_t offset, int whence) m->read_pos = offset; retval = file->f_pos = offset; } + } else { + file->f_pos = offset; } } file->f_version = m->version; -- cgit v1.2.1 From c511851de162e8ec03d62e7d7feecbdf590d881d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 29 Oct 2013 13:12:56 +0100 Subject: Revert "epoll: use freezable blocking call" This reverts commit 1c441e921201 (epoll: use freezable blocking call) which is reported to cause user space memory corruption to happen after suspend to RAM. Since it appears to be extremely difficult to root cause this problem, it is best to revert the offending commit and try to address the original issue in a better way later. References: https://bugzilla.kernel.org/show_bug.cgi?id=61781 Reported-by: Natrio Reported-by: Jeff Pohlmeyer Bisected-by: Leo Wolf Fixes: 1c441e921201 (epoll: use freezable blocking call) Signed-off-by: Rafael J. Wysocki Cc: 3.11+ # 3.11+ --- fs/eventpoll.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 473e09da7d02..810c28fb8c3c 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include @@ -1605,8 +1604,7 @@ fetch_events: } spin_unlock_irqrestore(&ep->lock, flags); - if (!freezable_schedule_hrtimeout_range(to, slack, - HRTIMER_MODE_ABS)) + if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) timed_out = 1; spin_lock_irqsave(&ep->lock, flags); -- cgit v1.2.1 From 59612d187912750f416fbffe0c00bc0811c54ab5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 29 Oct 2013 23:43:08 +0100 Subject: Revert "select: use freezable blocking call" This reverts commit 9745cdb36da8 (select: use freezable blocking call) that triggers problems during resume from suspend to RAM on Paul Bolle's 32-bit x86 machines. Paul says: Ever since I tried running (release candidates of) v3.11 on the two working i686s I still have lying around I ran into issues on resuming from suspend. Reverting 9745cdb36da8 (select: use freezable blocking call) resolves those issues. Resuming from suspend on i686 on (release candidates of) v3.11 and later triggers issues like: traps: systemd[1] general protection ip:b738e490 sp:bf882fc0 error:0 in libc-2.16.so[b731c000+1b0000] and traps: rtkit-daemon[552] general protection ip:804d6e5 sp:b6cb32f0 error:0 in rtkit-daemon[8048000+d000] Once I hit the systemd error I can only get out of the mess that the system is at that point by power cycling it. Since we are reverting another freezer-related change causing similar problems to happen, this one should be reverted as well. References: https://lkml.org/lkml/2013/10/29/583 Reported-by: Paul Bolle Fixes: 9745cdb36da8 (select: use freezable blocking call) Signed-off-by: Rafael J. Wysocki Cc: 3.11+ # 3.11+ --- fs/select.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/select.c b/fs/select.c index 35d4adc749d9..dfd5cb18c012 100644 --- a/fs/select.c +++ b/fs/select.c @@ -238,8 +238,7 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state, set_current_state(state); if (!pwq->triggered) - rc = freezable_schedule_hrtimeout_range(expires, slack, - HRTIMER_MODE_ABS); + rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS); __set_current_state(TASK_RUNNING); /* -- cgit v1.2.1