From 8eb2d829ffea3677c21bd038f19e5d8ca6b43e36 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 9 Nov 2010 14:48:01 +0800 Subject: btrfs: Fix threshold calculation for block groups smaller than 1GB If a block group is smaller than 1GB, the extent entry threadhold calculation will always set the threshold to 0. So as free space gets fragmented, btrfs will switch to use bitmap to manage free space, but then will never switch back to extents due to this bug. Reviewed-by: Josef Bacik Signed-off-by: Li Zefan --- fs/btrfs/free-space-cache.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 60d684266959..42f4015988ec 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1016,14 +1016,18 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) u64 max_bytes; u64 bitmap_bytes; u64 extent_bytes; + u64 size = block_group->key.offset; /* * The goal is to keep the total amount of memory used per 1gb of space * at or below 32k, so we need to adjust how much memory we allow to be * used by extent based free space tracking */ - max_bytes = MAX_CACHE_BYTES_PER_GIG * - (div64_u64(block_group->key.offset, 1024 * 1024 * 1024)); + if (size < 1024 * 1024 * 1024) + max_bytes = MAX_CACHE_BYTES_PER_GIG; + else + max_bytes = MAX_CACHE_BYTES_PER_GIG * + div64_u64(size, 1024 * 1024 * 1024); /* * we want to account for 1 more bitmap than what we have so we can make -- cgit v1.2.1 From edf6e2d1ddbac7f326b34a27adbca71ece53ccce Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 9 Nov 2010 14:50:07 +0800 Subject: btrfs: Add helper function free_bitmap() Remove some duplicated code. This prepares for the next patch. Reviewed-by: Josef Bacik Signed-off-by: Li Zefan --- fs/btrfs/free-space-cache.c | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 42f4015988ec..850104f05178 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1175,6 +1175,16 @@ static void add_new_bitmap(struct btrfs_block_group_cache *block_group, recalculate_thresholds(block_group); } +static void free_bitmap(struct btrfs_block_group_cache *block_group, + struct btrfs_free_space *bitmap_info) +{ + unlink_free_space(block_group, bitmap_info); + kfree(bitmap_info->bitmap); + kfree(bitmap_info); + block_group->total_bitmaps--; + recalculate_thresholds(block_group); +} + static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group, struct btrfs_free_space *bitmap_info, u64 *offset, u64 *bytes) @@ -1215,13 +1225,8 @@ again: if (*bytes) { struct rb_node *next = rb_next(&bitmap_info->offset_index); - if (!bitmap_info->bytes) { - unlink_free_space(block_group, bitmap_info); - kfree(bitmap_info->bitmap); - kfree(bitmap_info); - block_group->total_bitmaps--; - recalculate_thresholds(block_group); - } + if (!bitmap_info->bytes) + free_bitmap(block_group, bitmap_info); /* * no entry after this bitmap, but we still have bytes to @@ -1254,13 +1259,8 @@ again: return -EAGAIN; goto again; - } else if (!bitmap_info->bytes) { - unlink_free_space(block_group, bitmap_info); - kfree(bitmap_info->bitmap); - kfree(bitmap_info); - block_group->total_bitmaps--; - recalculate_thresholds(block_group); - } + } else if (!bitmap_info->bytes) + free_bitmap(block_group, bitmap_info); return 0; } @@ -1689,13 +1689,8 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, ret = offset; if (entry->bitmap) { bitmap_clear_bits(block_group, entry, offset, bytes); - if (!entry->bytes) { - unlink_free_space(block_group, entry); - kfree(entry->bitmap); - kfree(entry); - block_group->total_bitmaps--; - recalculate_thresholds(block_group); - } + if (!entry->bytes) + free_bitmap(block_group, entry); } else { unlink_free_space(block_group, entry); entry->offset += bytes; -- cgit v1.2.1 From 70b7da304f9f9bbf1566085155895e32e775a745 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 9 Nov 2010 14:51:45 +0800 Subject: btrfs: Free fully occupied bitmap in cluster If there's no more free space in a bitmap, we should free it. Reviewed-by: Josef Bacik Signed-off-by: Li Zefan --- fs/btrfs/free-space-cache.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 850104f05178..cb0137e4047f 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1788,6 +1788,8 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, ret = search_start; bitmap_clear_bits(block_group, entry, ret, bytes); + if (entry->bytes == 0) + free_bitmap(block_group, entry); out: spin_unlock(&cluster->lock); spin_unlock(&block_group->tree_lock); -- cgit v1.2.1 From 5e71b5d5ec07e4b3fb4c78c4e4b108ff667f123f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 9 Nov 2010 14:55:34 +0800 Subject: btrfs: Update stats when allocating from a cluster When allocating extent entry from a cluster, we should update the free_space and free_extents fields of the block group. Reviewed-by: Josef Bacik Signed-off-by: Li Zefan --- fs/btrfs/free-space-cache.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index cb0137e4047f..2974c4744d5c 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1843,15 +1843,26 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, entry->offset += bytes; entry->bytes -= bytes; - if (entry->bytes == 0) { + if (entry->bytes == 0) rb_erase(&entry->offset_index, &cluster->root); - kfree(entry); - } break; } out: spin_unlock(&cluster->lock); + if (!ret) + return 0; + + spin_lock(&block_group->tree_lock); + + block_group->free_space -= bytes; + if (entry->bytes == 0) { + block_group->free_extents--; + kfree(entry); + } + + spin_unlock(&block_group->tree_lock); + return ret; } -- cgit v1.2.1 From 120d66eec0dcb966fbd03f743598b2ff2513436b Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 9 Nov 2010 14:56:50 +0800 Subject: btrfs: Add a helper try_merge_free_space() When adding a new extent, we'll firstly see if we can merge this extent to the left or/and right extent. Extract this as a helper try_merge_free_space(). As a side effect, we fix a small bug that if the new extent has non-bitmap left entry but is unmergeble, we'll directly link the extent without trying to drop it into bitmap. This also prepares for the next patch. Reviewed-by: Josef Bacik Signed-off-by: Li Zefan --- fs/btrfs/free-space-cache.c | 75 ++++++++++++++++++++++++++------------------- 1 file changed, 43 insertions(+), 32 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 2974c4744d5c..cf67dc3b7bf8 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1363,22 +1363,14 @@ out: return ret; } -int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, - u64 offset, u64 bytes) +bool try_merge_free_space(struct btrfs_block_group_cache *block_group, + struct btrfs_free_space *info) { - struct btrfs_free_space *right_info = NULL; - struct btrfs_free_space *left_info = NULL; - struct btrfs_free_space *info = NULL; - int ret = 0; - - info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); - if (!info) - return -ENOMEM; - - info->offset = offset; - info->bytes = bytes; - - spin_lock(&block_group->tree_lock); + struct btrfs_free_space *left_info; + struct btrfs_free_space *right_info; + bool merged = false; + u64 offset = info->offset; + u64 bytes = info->bytes; /* * first we want to see if there is free space adjacent to the range we @@ -1392,27 +1384,11 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, else left_info = tree_search_offset(block_group, offset - 1, 0, 0); - /* - * If there was no extent directly to the left or right of this new - * extent then we know we're going to have to allocate a new extent, so - * before we do that see if we need to drop this into a bitmap - */ - if ((!left_info || left_info->bitmap) && - (!right_info || right_info->bitmap)) { - ret = insert_into_bitmap(block_group, info); - - if (ret < 0) { - goto out; - } else if (ret) { - ret = 0; - goto out; - } - } - if (right_info && !right_info->bitmap) { unlink_free_space(block_group, right_info); info->bytes += right_info->bytes; kfree(right_info); + merged = true; } if (left_info && !left_info->bitmap && @@ -1421,8 +1397,43 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, info->offset = left_info->offset; info->bytes += left_info->bytes; kfree(left_info); + merged = true; } + return merged; +} + +int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, + u64 offset, u64 bytes) +{ + struct btrfs_free_space *info; + int ret = 0; + + info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); + if (!info) + return -ENOMEM; + + info->offset = offset; + info->bytes = bytes; + + spin_lock(&block_group->tree_lock); + + if (try_merge_free_space(block_group, info)) + goto link; + + /* + * There was no extent directly to the left or right of this new + * extent then we know we're going to have to allocate a new extent, so + * before we do that see if we need to drop this into a bitmap + */ + ret = insert_into_bitmap(block_group, info); + if (ret < 0) { + goto out; + } else if (ret) { + ret = 0; + goto out; + } +link: ret = link_free_space(block_group, info); if (ret) kfree(info); -- cgit v1.2.1 From f333adb5d64bc1c4d6099072fc341c3c8f84e0cf Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 9 Nov 2010 14:57:39 +0800 Subject: btrfs: Check mergeable free space when removing a cluster After returing extents from a cluster to the block group, some extents in the block group may be mergeable. Reviewed-by: Josef Bacik Signed-off-by: Li Zefan --- fs/btrfs/free-space-cache.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index cf67dc3b7bf8..a5501edc3c9f 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -987,11 +987,18 @@ tree_search_offset(struct btrfs_block_group_cache *block_group, return entry; } -static void unlink_free_space(struct btrfs_block_group_cache *block_group, - struct btrfs_free_space *info) +static inline void +__unlink_free_space(struct btrfs_block_group_cache *block_group, + struct btrfs_free_space *info) { rb_erase(&info->offset_index, &block_group->free_space_offset); block_group->free_extents--; +} + +static void unlink_free_space(struct btrfs_block_group_cache *block_group, + struct btrfs_free_space *info) +{ + __unlink_free_space(block_group, info); block_group->free_space -= info->bytes; } @@ -1364,7 +1371,7 @@ out: } bool try_merge_free_space(struct btrfs_block_group_cache *block_group, - struct btrfs_free_space *info) + struct btrfs_free_space *info, bool update_stat) { struct btrfs_free_space *left_info; struct btrfs_free_space *right_info; @@ -1385,7 +1392,10 @@ bool try_merge_free_space(struct btrfs_block_group_cache *block_group, left_info = tree_search_offset(block_group, offset - 1, 0, 0); if (right_info && !right_info->bitmap) { - unlink_free_space(block_group, right_info); + if (update_stat) + unlink_free_space(block_group, right_info); + else + __unlink_free_space(block_group, right_info); info->bytes += right_info->bytes; kfree(right_info); merged = true; @@ -1393,7 +1403,10 @@ bool try_merge_free_space(struct btrfs_block_group_cache *block_group, if (left_info && !left_info->bitmap && left_info->offset + left_info->bytes == offset) { - unlink_free_space(block_group, left_info); + if (update_stat) + unlink_free_space(block_group, left_info); + else + __unlink_free_space(block_group, left_info); info->offset = left_info->offset; info->bytes += left_info->bytes; kfree(left_info); @@ -1418,7 +1431,7 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, spin_lock(&block_group->tree_lock); - if (try_merge_free_space(block_group, info)) + if (try_merge_free_space(block_group, info, true)) goto link; /* @@ -1636,6 +1649,7 @@ __btrfs_return_cluster_to_free_space( node = rb_next(&entry->offset_index); rb_erase(&entry->offset_index, &cluster->root); BUG_ON(entry->bitmap); + try_merge_free_space(block_group, entry, false); tree_insert_offset(&block_group->free_space_offset, entry->offset, &entry->offset_index, 0); } -- cgit v1.2.1 From 83a4d54840c88a4a45c49670f044b8c7ddeaa8c7 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 27 Dec 2010 16:19:53 +0800 Subject: Btrfs: Fix memory leak at umount fs_info, which is allocated in open_ctree(), should be freed in close_ctree(). Signed-off-by: Li Zefan --- fs/btrfs/disk-io.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a5d2249e6da5..089871e5cd5a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2513,6 +2513,8 @@ int close_ctree(struct btrfs_root *root) kfree(fs_info->chunk_root); kfree(fs_info->dev_root); kfree(fs_info->csum_root); + kfree(fs_info); + return 0; } -- cgit v1.2.1 From bdc924bb4cdac92b945945c3149ab8191c92d75d Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Mon, 27 Dec 2010 16:33:15 +0800 Subject: Btrfs: Fix memory leak on finding existing super We missed a memory deallocation in commit 450ba0ea. If an existing super block is found at mount and there is no error condition then the pre-allocated tree_root and fs_info are no not used and are not freeded. Signed-off-by: Ian Kent Signed-off-by: Li Zefan --- fs/btrfs/super.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 61bd79abb805..f50253c2279d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -654,6 +654,8 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, } btrfs_close_devices(fs_devices); + kfree(fs_info); + kfree(tree_root); } else { char b[BDEVNAME_SIZE]; -- cgit v1.2.1 From 3f3d0bc0df041236fad4ffa82188a6e4ef9af75e Mon Sep 17 00:00:00 2001 From: Tero Roponen Date: Mon, 27 Dec 2010 16:43:13 +0800 Subject: Btrfs: Free correct pointer after using strsep We must save and free the original kstrdup()'ed pointer because strsep() modifies its first argument. Signed-off-by: Tero Roponen Signed-off-by: Li Zefan --- fs/btrfs/super.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f50253c2279d..78ee681465af 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -277,7 +277,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, struct btrfs_fs_devices **fs_devices) { substring_t args[MAX_OPT_ARGS]; - char *opts, *p; + char *opts, *orig, *p; int error = 0; int intarg; @@ -291,6 +291,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, opts = kstrdup(options, GFP_KERNEL); if (!opts) return -ENOMEM; + orig = opts; while ((p = strsep(&opts, ",")) != NULL) { int token; @@ -326,7 +327,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, } out_free_opts: - kfree(opts); + kfree(orig); out: /* * If no subvolume name is specified we use the default one. Allocate -- cgit v1.2.1 From d0f69686c2ae775529aadc7a8acc6f13ad41de66 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Tue, 25 Jan 2011 15:46:17 +0800 Subject: Btrfs: Don't return acl info when mounting with noacl option Steps to reproduce: # mkfs.btrfs /dev/sda2 # mount /dev/sda2 /mnt # touch /mnt/file0 # setfacl -m 'u:root:x,g::x,o::x' /mnt/file0 # umount /mnt # mount /dev/sda2 -o noacl /mnt # getfacl /mnt/file0 ... user::rw- user:root:--x group::--x mask::--x other::--x The output should be: user::rw- group::--x other::--x Signed-off-by: Miao Xie Signed-off-by: Li Zefan --- fs/btrfs/acl.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 2222d161c7b6..3c52fc8afe29 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -37,6 +37,9 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) char *value = NULL; struct posix_acl *acl; + if (!IS_POSIXACL(inode)) + return NULL; + acl = get_cached_acl(inode, type); if (acl != ACL_NOT_CACHED) return acl; @@ -82,6 +85,9 @@ static int btrfs_xattr_acl_get(struct dentry *dentry, const char *name, struct posix_acl *acl; int ret = 0; + if (!IS_POSIXACL(dentry->d_inode)) + return -EOPNOTSUPP; + acl = btrfs_get_acl(dentry->d_inode, type); if (IS_ERR(acl)) -- cgit v1.2.1 From b897abec032deb7cc3ce67392a1f544ac965ddea Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 26 Jan 2011 16:19:22 +0800 Subject: Btrfs: Fix memory leak in writepage fixup work fixup, which is allocated when starting page write to fix up the extent without ORDERED bit set, should be freed after this work is done. Signed-off-by: Miao Xie Signed-off-by: Li Zefan --- fs/btrfs/inode.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5f9194438f7c..3a6edc4c5642 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1544,6 +1544,7 @@ out: out_page: unlock_page(page); page_cache_release(page); + kfree(fixup); } /* -- cgit v1.2.1 From 4d728ec7aefdca5419d2ebfb28c147e81a4b59f4 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 26 Jan 2011 14:10:43 +0800 Subject: Btrfs: Fix file clone when source offset is not 0 Suppose: - the source extent is: [0, 100] - the src offset is 10 - the clone length is 90 - the dest offset is 0 This statement: new_key.offset = key.offset + destoff - off will produce such an extent for the dest file: [ino, BTRFS_EXTENT_DATA_KEY, -10] , which is obviously wrong. Signed-off-by: Li Zefan --- fs/btrfs/ioctl.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f87552a1d7ea..1b61dab64062 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1788,7 +1788,10 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, memcpy(&new_key, &key, sizeof(new_key)); new_key.objectid = inode->i_ino; - new_key.offset = key.offset + destoff - off; + if (off <= key.offset) + new_key.offset = key.offset + destoff - off; + else + new_key.offset = destoff; trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { -- cgit v1.2.1 From 6b82ce8d824bd46053e46a895876cde39d9026e4 Mon Sep 17 00:00:00 2001 From: liubo Date: Wed, 26 Jan 2011 06:21:39 +0000 Subject: btrfs: fix uncheck memory allocation in btrfs_submit_compressed_read btrfs_submit_compressed_read() is lack of memory allocation checks and corresponding error route. After this fix, if it comes to "no memory" case, errno will be returned to userland step by step, and tell users this operation cannot go on. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/compression.c | 25 +++++++++++++++++++++++-- fs/btrfs/extent_io.c | 4 ++-- 2 files changed, 25 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index f745287fbf2e..3a932f183da1 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -562,7 +562,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, u64 em_len; u64 em_start; struct extent_map *em; - int ret; + int ret = -ENOMEM; u32 *sums; tree = &BTRFS_I(inode)->io_tree; @@ -577,6 +577,9 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, compressed_len = em->block_len; cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); + if (!cb) + goto out; + atomic_set(&cb->pending_bios, 0); cb->errors = 0; cb->inode = inode; @@ -597,13 +600,18 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE; - cb->compressed_pages = kmalloc(sizeof(struct page *) * nr_pages, + cb->compressed_pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); + if (!cb->compressed_pages) + goto fail1; + bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; for (page_index = 0; page_index < nr_pages; page_index++) { cb->compressed_pages[page_index] = alloc_page(GFP_NOFS | __GFP_HIGHMEM); + if (!cb->compressed_pages[page_index]) + goto fail2; } cb->nr_pages = nr_pages; @@ -614,6 +622,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, cb->len = uncompressed_len; comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS); + if (!comp_bio) + goto fail2; comp_bio->bi_private = cb; comp_bio->bi_end_io = end_compressed_bio_read; atomic_inc(&cb->pending_bios); @@ -681,6 +691,17 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, bio_put(comp_bio); return 0; + +fail2: + for (page_index = 0; page_index < nr_pages; page_index++) + free_page((unsigned long)cb->compressed_pages[page_index]); + + kfree(cb->compressed_pages); +fail1: + kfree(cb); +out: + free_extent_map(em); + return ret; } static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES]; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 8b8d3d99ae68..6411ed6ca449 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1865,7 +1865,7 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num, bio_get(bio); if (tree->ops && tree->ops->submit_bio_hook) - tree->ops->submit_bio_hook(page->mapping->host, rw, bio, + ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio, mirror_num, bio_flags, start); else submit_bio(rw, bio); @@ -2126,7 +2126,7 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page, ret = __extent_read_full_page(tree, page, get_extent, &bio, 0, &bio_flags); if (bio) - submit_one_bio(READ, bio, 0, bio_flags); + ret = submit_one_bio(READ, bio, 0, bio_flags); return ret; } -- cgit v1.2.1 From 2a29edc6b60a5248ccab588e7ba7dad38cef0235 Mon Sep 17 00:00:00 2001 From: liubo Date: Wed, 26 Jan 2011 06:22:08 +0000 Subject: btrfs: fix several uncheck memory allocations To make btrfs more stable, add several missing necessary memory allocation checks, and when no memory, return proper errno. We've checked that some of those -ENOMEM errors will be returned to userspace, and some will be catched by BUG_ON() in the upper callers, and none will be ignored silently. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/export.c | 2 ++ fs/btrfs/file-item.c | 2 ++ fs/btrfs/file.c | 4 ++++ fs/btrfs/tree-log.c | 25 +++++++++++++++++++++++++ 4 files changed, 33 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 6f0444473594..3220ad1aafc8 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -176,6 +176,8 @@ static struct dentry *btrfs_get_parent(struct dentry *child) int ret; path = btrfs_alloc_path(); + if (!path) + return ERR_PTR(-ENOMEM); if (dir->i_ino == BTRFS_FIRST_FREE_OBJECTID) { key.objectid = root->root_key.objectid; diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index a562a250ae77..d0bc72657cd7 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -536,6 +536,8 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, root = root->fs_info->csum_root; path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; while (1) { key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index f903433f5bdf..65b2424a4116 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -945,6 +945,10 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / (sizeof(struct page *))); pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); + if (!pages) { + ret = -ENOMEM; + goto out; + } /* generic_write_checks can change our pos */ start_pos = pos; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 054744ac5719..c25a41d86118 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -338,6 +338,12 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, } dst_copy = kmalloc(item_size, GFP_NOFS); src_copy = kmalloc(item_size, GFP_NOFS); + if (!dst_copy || !src_copy) { + btrfs_release_path(root, path); + kfree(dst_copy); + kfree(src_copy); + return -ENOMEM; + } read_extent_buffer(eb, src_copy, src_ptr, item_size); @@ -665,6 +671,9 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, btrfs_dir_item_key_to_cpu(leaf, di, &location); name_len = btrfs_dir_name_len(leaf, di); name = kmalloc(name_len, GFP_NOFS); + if (!name) + return -ENOMEM; + read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len); btrfs_release_path(root, path); @@ -744,6 +753,9 @@ static noinline int backref_in_log(struct btrfs_root *log, int match = 0; path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + ret = btrfs_search_slot(NULL, log, key, path, 0, 0); if (ret != 0) goto out; @@ -967,6 +979,8 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, key.offset = (u64)-1; path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; while (1) { ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); @@ -1178,6 +1192,9 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, name_len = btrfs_dir_name_len(eb, di); name = kmalloc(name_len, GFP_NOFS); + if (!name) + return -ENOMEM; + log_type = btrfs_dir_type(eb, di); read_extent_buffer(eb, name, (unsigned long)(di + 1), name_len); @@ -1692,6 +1709,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, root_owner = btrfs_header_owner(parent); next = btrfs_find_create_tree_block(root, bytenr, blocksize); + if (!next) + return -ENOMEM; if (*level == 1) { wc->process_func(root, next, wc, ptr_gen); @@ -2194,6 +2213,9 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, log = root->log_root; path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino, name, name_len, -1); if (IS_ERR(di)) { @@ -2594,6 +2616,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, ins_data = kmalloc(nr * sizeof(struct btrfs_key) + nr * sizeof(u32), GFP_NOFS); + if (!ins_data) + return -ENOMEM; + ins_sizes = (u32 *)ins_data; ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32)); -- cgit v1.2.1 From 333e8105445d4f51101fc3d23199a919d66730b3 Mon Sep 17 00:00:00 2001 From: liubo Date: Wed, 26 Jan 2011 06:22:33 +0000 Subject: btrfs: fix missing break in switch phrase There is a missing break in switch, fix it. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/print-tree.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 0d126be22b63..fb2605d998e9 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -260,6 +260,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) #else BUG(); #endif + break; case BTRFS_BLOCK_GROUP_ITEM_KEY: bi = btrfs_item_ptr(l, i, struct btrfs_block_group_item); -- cgit v1.2.1 From 34d19bada00f4825588b338a8ee193820f9ceeb0 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 24 Jan 2011 19:55:19 +0000 Subject: fs/btrfs/inode.c: Add missing IS_ERR test After the conditional that precedes the following code, inode may be an ERR_PTR value. This can eg result from a memory allocation failure via the call to btrfs_iget, and thus does not imply that root is different than sub_root. Thus, an IS_ERR check is added to ensure that there is no dereference of inode in this case. The semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @r@ identifier f; @@ f(...) { ... return ERR_PTR(...); } @@ identifier r.f, fld; expression x; statement S1,S2; @@ x = f(...) ... when != IS_ERR(x) ( if (IS_ERR(x) ||...) S1 else S2 | *x->fld ) // Signed-off-by: Julia Lawall Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2c9a2f7d5631..2b7d251d6ad1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4137,7 +4137,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) } srcu_read_unlock(&root->fs_info->subvol_srcu, index); - if (root != sub_root) { + if (!IS_ERR(inode) && root != sub_root) { down_read(&root->fs_info->cleanup_work_sem); if (!(inode->i_sb->s_flags & MS_RDONLY)) btrfs_orphan_cleanup(sub_root); -- cgit v1.2.1 From 3612b49598c303cfb22a4b609427f829828e2427 Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Tue, 25 Jan 2011 02:51:38 +0000 Subject: btrfs: fix return value check of btrfs_join_transaction() The error check of btrfs_join_transaction()/btrfs_join_transaction_nolock() is added, and the mistake of the error check in several places is corrected. For more stable Btrfs, I think that we should reduce BUG_ON(). But, I think that long time is necessary for this. So, I propose this patch as a short-term solution. With this patch: - To more stable Btrfs, the part that should be corrected is clarified. - The panic isn't done by the NULL pointer reference etc. (even if BUG_ON() is increased temporarily) - The error code is returned in the place where the error can be easily returned. As a long-term plan: - BUG_ON() is reduced by using the forced-readonly framework, etc. Signed-off-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 5 +++++ fs/btrfs/extent-tree.c | 2 +- fs/btrfs/inode.c | 24 ++++++++++++++++-------- fs/btrfs/ioctl.c | 2 +- fs/btrfs/relocation.c | 26 +++++++++++++++++++++++--- fs/btrfs/transaction.c | 5 +++++ 6 files changed, 51 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2887b8be6fdd..b36eeef19194 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1550,6 +1550,7 @@ static int transaction_kthread(void *arg) spin_unlock(&root->fs_info->new_trans_lock); trans = btrfs_join_transaction(root, 1); + BUG_ON(IS_ERR(trans)); if (transid == trans->transid) { ret = btrfs_commit_transaction(trans, root); BUG_ON(ret); @@ -2464,10 +2465,14 @@ int btrfs_commit_super(struct btrfs_root *root) up_write(&root->fs_info->cleanup_work_sem); trans = btrfs_join_transaction(root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); ret = btrfs_commit_transaction(trans, root); BUG_ON(ret); /* run commit again to drop the original snapshot */ trans = btrfs_join_transaction(root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); btrfs_commit_transaction(trans, root); ret = btrfs_write_and_wait_transaction(NULL, root); BUG_ON(ret); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index bcf303204f7f..98ee139885cc 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7478,7 +7478,7 @@ int btrfs_drop_dead_reloc_roots(struct btrfs_root *root) BUG_ON(reloc_root->commit_root != NULL); while (1) { trans = btrfs_join_transaction(root, 1); - BUG_ON(!trans); + BUG_ON(IS_ERR(trans)); mutex_lock(&root->fs_info->drop_mutex); ret = btrfs_drop_snapshot(trans, reloc_root); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2b7d251d6ad1..40fee137dd11 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -416,7 +416,7 @@ again: } if (start == 0) { trans = btrfs_join_transaction(root, 1); - BUG_ON(!trans); + BUG_ON(IS_ERR(trans)); btrfs_set_trans_block_group(trans, inode); trans->block_rsv = &root->fs_info->delalloc_block_rsv; @@ -612,6 +612,7 @@ retry: GFP_NOFS); trans = btrfs_join_transaction(root, 1); + BUG_ON(IS_ERR(trans)); ret = btrfs_reserve_extent(trans, root, async_extent->compressed_size, async_extent->compressed_size, @@ -771,7 +772,7 @@ static noinline int cow_file_range(struct inode *inode, BUG_ON(root == root->fs_info->tree_root); trans = btrfs_join_transaction(root, 1); - BUG_ON(!trans); + BUG_ON(IS_ERR(trans)); btrfs_set_trans_block_group(trans, inode); trans->block_rsv = &root->fs_info->delalloc_block_rsv; @@ -1049,7 +1050,7 @@ static noinline int run_delalloc_nocow(struct inode *inode, } else { trans = btrfs_join_transaction(root, 1); } - BUG_ON(!trans); + BUG_ON(IS_ERR(trans)); cow_start = (u64)-1; cur_offset = start; @@ -1704,7 +1705,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) trans = btrfs_join_transaction_nolock(root, 1); else trans = btrfs_join_transaction(root, 1); - BUG_ON(!trans); + BUG_ON(IS_ERR(trans)); btrfs_set_trans_block_group(trans, inode); trans->block_rsv = &root->fs_info->delalloc_block_rsv; ret = btrfs_update_inode(trans, root, inode); @@ -1721,6 +1722,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) trans = btrfs_join_transaction_nolock(root, 1); else trans = btrfs_join_transaction(root, 1); + BUG_ON(IS_ERR(trans)); btrfs_set_trans_block_group(trans, inode); trans->block_rsv = &root->fs_info->delalloc_block_rsv; @@ -2382,6 +2384,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) if (root->orphan_block_rsv || root->orphan_item_inserted) { trans = btrfs_join_transaction(root, 1); + BUG_ON(IS_ERR(trans)); btrfs_end_transaction(trans, root); } @@ -4350,6 +4353,8 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) trans = btrfs_join_transaction_nolock(root, 1); else trans = btrfs_join_transaction(root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); btrfs_set_trans_block_group(trans, inode); if (nolock) ret = btrfs_end_transaction_nolock(trans, root); @@ -4375,6 +4380,7 @@ void btrfs_dirty_inode(struct inode *inode) return; trans = btrfs_join_transaction(root, 1); + BUG_ON(IS_ERR(trans)); btrfs_set_trans_block_group(trans, inode); ret = btrfs_update_inode(trans, root, inode); @@ -5179,6 +5185,8 @@ again: em = NULL; btrfs_release_path(root, path); trans = btrfs_join_transaction(root, 1); + if (IS_ERR(trans)) + return ERR_CAST(trans); goto again; } map = kmap(page); @@ -5283,8 +5291,8 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, btrfs_drop_extent_cache(inode, start, start + len - 1, 0); trans = btrfs_join_transaction(root, 0); - if (!trans) - return ERR_PTR(-ENOMEM); + if (IS_ERR(trans)) + return ERR_CAST(trans); trans->block_rsv = &root->fs_info->delalloc_block_rsv; @@ -5508,7 +5516,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, * while we look for nocow cross refs */ trans = btrfs_join_transaction(root, 0); - if (!trans) + if (IS_ERR(trans)) goto must_cow; if (can_nocow_odirect(trans, inode, start, len) == 1) { @@ -5643,7 +5651,7 @@ again: BUG_ON(!ordered); trans = btrfs_join_transaction(root, 1); - if (!trans) { + if (IS_ERR(trans)) { err = -ENOMEM; goto out; } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index edd82becbb9e..04b4fb9144a9 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -203,7 +203,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) trans = btrfs_join_transaction(root, 1); - BUG_ON(!trans); + BUG_ON(IS_ERR(trans)); ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 045c9c2b2d7e..ea9965430241 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2147,6 +2147,12 @@ again: } trans = btrfs_join_transaction(rc->extent_root, 1); + if (IS_ERR(trans)) { + if (!err) + btrfs_block_rsv_release(rc->extent_root, + rc->block_rsv, num_bytes); + return PTR_ERR(trans); + } if (!err) { if (num_bytes != rc->merging_rsv_size) { @@ -3222,6 +3228,7 @@ truncate: trans = btrfs_join_transaction(root, 0); if (IS_ERR(trans)) { btrfs_free_path(path); + ret = PTR_ERR(trans); goto out; } @@ -3628,6 +3635,7 @@ int prepare_to_relocate(struct reloc_control *rc) set_reloc_control(rc); trans = btrfs_join_transaction(rc->extent_root, 1); + BUG_ON(IS_ERR(trans)); btrfs_commit_transaction(trans, rc->extent_root); return 0; } @@ -3804,7 +3812,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) /* get rid of pinned extents */ trans = btrfs_join_transaction(rc->extent_root, 1); - btrfs_commit_transaction(trans, rc->extent_root); + if (IS_ERR(trans)) + err = PTR_ERR(trans); + else + btrfs_commit_transaction(trans, rc->extent_root); out_free: btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); btrfs_free_path(path); @@ -4125,6 +4136,11 @@ int btrfs_recover_relocation(struct btrfs_root *root) set_reloc_control(rc); trans = btrfs_join_transaction(rc->extent_root, 1); + if (IS_ERR(trans)) { + unset_reloc_control(rc); + err = PTR_ERR(trans); + goto out_free; + } rc->merge_reloc_tree = 1; @@ -4154,9 +4170,13 @@ int btrfs_recover_relocation(struct btrfs_root *root) unset_reloc_control(rc); trans = btrfs_join_transaction(rc->extent_root, 1); - btrfs_commit_transaction(trans, rc->extent_root); -out: + if (IS_ERR(trans)) + err = PTR_ERR(trans); + else + btrfs_commit_transaction(trans, rc->extent_root); +out_free: kfree(rc); +out: while (!list_empty(&reloc_roots)) { reloc_root = list_entry(reloc_roots.next, struct btrfs_root, root_list); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index bae5c7b8bbe2..3d73c8d93bbb 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1161,6 +1161,11 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, INIT_DELAYED_WORK(&ac->work, do_async_commit); ac->root = root; ac->newtrans = btrfs_join_transaction(root, 0); + if (IS_ERR(ac->newtrans)) { + int err = PTR_ERR(ac->newtrans); + kfree(ac); + return err; + } /* take transaction reference */ mutex_lock(&root->fs_info->trans_mutex); -- cgit v1.2.1 From abd30bb0af9d4671506502278e8631bed9e3c35c Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Mon, 24 Jan 2011 00:57:10 +0000 Subject: btrfs: check return value of btrfs_start_ioctl_transaction() properly btrfs_start_ioctl_transaction() returns ERR_PTR(), not NULL. So, it is necessary to use IS_ERR() to check the return value. Signed-off-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 04b4fb9144a9..12dabe28cf54 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2085,7 +2085,7 @@ static long btrfs_ioctl_trans_start(struct file *file) ret = -ENOMEM; trans = btrfs_start_ioctl_transaction(root, 0); - if (!trans) + if (IS_ERR(trans)) goto out_drop; file->private_data = trans; -- cgit v1.2.1 From dedefd7215d3ec451291ca393e5c8e4c1882c8c6 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 24 Jan 2011 21:43:18 +0000 Subject: Btrfs: fix check_path_shared so it returns the right value When running xfstests 224 I kept getting ENOSPC when trying to remove the files, and this is because we were returning ret from check_path_shared while it was uninitalized, which isn't right. Fix this to return 0 properly, and now xfstests 224 doesn't freak out when it tries to clean itself up. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 40fee137dd11..5621818921f8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2718,9 +2718,10 @@ static int check_path_shared(struct btrfs_root *root, struct extent_buffer *eb; int level; u64 refs = 1; - int uninitialized_var(ret); for (level = 0; level < BTRFS_MAX_LEVEL; level++) { + int ret; + if (!path->nodes[level]) break; eb = path->nodes[level]; @@ -2731,7 +2732,7 @@ static int check_path_shared(struct btrfs_root *root, if (refs > 1) return 1; } - return ret; /* XXX callers? */ + return 0; } /* -- cgit v1.2.1 From e9e22899de661af94cb9995885fd04e4c738838b Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 24 Jan 2011 21:43:19 +0000 Subject: Btrfs: do not release more reserved bytes to the global_block_rsv than we need When we do btrfs_block_rsv_release, if global_block_rsv is not full we will release all the extra bytes to global_block_rsv, even if it's only a little short of the amount of space that we need to reserve. This causes us to starve ourselves of reservable space during the transaction which will force us to shrink delalloc bytes and commit the transaction more often than we should. So instead just add the amount of bytes we need to add to the global reserve so reserved == size, and then add the rest back into the space_info for general use. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 98ee139885cc..7af618dcf2c0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3589,8 +3589,20 @@ void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, if (num_bytes > 0) { if (dest) { - block_rsv_add_bytes(dest, num_bytes, 0); - } else { + spin_lock(&dest->lock); + if (!dest->full) { + u64 bytes_to_add; + + bytes_to_add = dest->size - dest->reserved; + bytes_to_add = min(num_bytes, bytes_to_add); + dest->reserved += bytes_to_add; + if (dest->reserved >= dest->size) + dest->full = 1; + num_bytes -= bytes_to_add; + } + spin_unlock(&dest->lock); + } + if (num_bytes) { spin_lock(&space_info->lock); space_info->bytes_reserved -= num_bytes; spin_unlock(&space_info->lock); -- cgit v1.2.1 From 68a82277b8619e6d0f2738b1d9b160b627e81e92 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 24 Jan 2011 21:43:20 +0000 Subject: Btrfs: use the global block reserve if we cannot reserve space We call use_block_rsv right before we make an allocation in order to make sure we have enough space. Now normally people have called btrfs_start_transaction() with the appropriate amount of space that we need, so we just use some of that pre-reserved space and move along happily. The problem is where people use btrfs_join_transaction(), which doesn't actually reserve any space. So we try and reserve space here, but we cannot flush delalloc, so this forces us to return -ENOSPC when in reality we have plenty of space. The most common symptom is seeing a bunch of "couldn't dirty inode" messages in syslog. With xfstests 224 we end up falling back to start_transaction and then doing all the flush delalloc stuff which causes to hang for a very long time. So instead steal from the global reserve, which is what this is meant for anyway. With this patch and the other 2 I have sent xfstests 224 now passes successfully. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7af618dcf2c0..ff6bbfd75cf7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5646,6 +5646,7 @@ use_block_rsv(struct btrfs_trans_handle *trans, struct btrfs_root *root, u32 blocksize) { struct btrfs_block_rsv *block_rsv; + struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; int ret; block_rsv = get_block_rsv(trans, root); @@ -5653,14 +5654,39 @@ use_block_rsv(struct btrfs_trans_handle *trans, if (block_rsv->size == 0) { ret = reserve_metadata_bytes(trans, root, block_rsv, blocksize, 0); - if (ret) + /* + * If we couldn't reserve metadata bytes try and use some from + * the global reserve. + */ + if (ret && block_rsv != global_rsv) { + ret = block_rsv_use_bytes(global_rsv, blocksize); + if (!ret) + return global_rsv; + return ERR_PTR(ret); + } else if (ret) { return ERR_PTR(ret); + } return block_rsv; } ret = block_rsv_use_bytes(block_rsv, blocksize); if (!ret) return block_rsv; + if (ret) { + WARN_ON(1); + ret = reserve_metadata_bytes(trans, root, block_rsv, blocksize, + 0); + if (!ret) { + spin_lock(&block_rsv->lock); + block_rsv->size += blocksize; + spin_unlock(&block_rsv->lock); + return block_rsv; + } else if (ret && block_rsv != global_rsv) { + ret = block_rsv_use_bytes(global_rsv, blocksize); + if (!ret) + return global_rsv; + } + } return ERR_PTR(-ENOSPC); } -- cgit v1.2.1 From ad0397a7a97f55fd7f70998ec208c5d8b90310ff Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 28 Jan 2011 18:44:44 +0000 Subject: Btrfs: do error checking in btrfs_del_csums Got a report of a box panicing because we got a NULL eb in read_extent_buffer. His fs was borked and btrfs_search_path returned EIO, but we don't check for errors so the box paniced. Yes I know this will just make something higher up the stack panic, but that's a problem for future Josef. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/file-item.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index d0bc72657cd7..4f19a3e1bf32 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -550,7 +550,10 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, if (path->slots[0] == 0) goto out; path->slots[0]--; + } else if (ret < 0) { + goto out; } + leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); -- cgit v1.2.1 From 7adf5dfbb3af65a00e20b3ead224c3a1b40e4ec4 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 25 Jan 2011 22:11:54 +0000 Subject: Btrfs: handle no memory properly in prepare_pages Instead of doing a BUG_ON(1) in prepare_pages if grab_cache_page() fails, just loop through the pages we've already grabbed and unlock and release them, then return -ENOMEM like we should. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/file.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 65b2424a4116..9e097fbfc78d 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -792,8 +792,12 @@ again: for (i = 0; i < num_pages; i++) { pages[i] = grab_cache_page(inode->i_mapping, index + i); if (!pages[i]) { - err = -ENOMEM; - BUG_ON(1); + int c; + for (c = i - 1; c >= 0; c--) { + unlock_page(pages[c]); + page_cache_release(pages[c]); + } + return -ENOMEM; } wait_on_page_writeback(pages[i]); } -- cgit v1.2.1 From ffeb414a59291d5891f09727beb793c109f19f08 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 29 Jan 2011 07:03:02 -0500 Subject: cifs: fix two compiler warning about uninitialized vars MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fs/cifs/link.c: In function ‘symlink_hash’: fs/cifs/link.c:58:3: warning: ‘rc’ may be used uninitialized in this function [-Wuninitialized] fs/cifs/smbencrypt.c: In function ‘mdfour’: fs/cifs/smbencrypt.c:61:3: warning: ‘rc’ may be used uninitialized in this function [-Wuninitialized] Reviewed-by: Shirish Pargaonkar Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/link.c | 3 ++- fs/cifs/smbencrypt.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 02cd60aefbff..e8804d373404 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -55,8 +55,9 @@ symlink_hash(unsigned int link_len, const char *link_str, u8 *md5_hash) md5 = crypto_alloc_shash("md5", 0, 0); if (IS_ERR(md5)) { + rc = PTR_ERR(md5); cERROR(1, "%s: Crypto md5 allocation error %d\n", __func__, rc); - return PTR_ERR(md5); + return rc; } size = sizeof(struct shash_desc) + crypto_shash_descsize(md5); sdescmd5 = kmalloc(size, GFP_KERNEL); diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c index b5450e9f40c0..b5041c849981 100644 --- a/fs/cifs/smbencrypt.c +++ b/fs/cifs/smbencrypt.c @@ -58,8 +58,9 @@ mdfour(unsigned char *md4_hash, unsigned char *link_str, int link_len) md4 = crypto_alloc_shash("md4", 0, 0); if (IS_ERR(md4)) { + rc = PTR_ERR(md4); cERROR(1, "%s: Crypto md4 allocation error %d\n", __func__, rc); - return PTR_ERR(md4); + return rc; } size = sizeof(struct shash_desc) + crypto_shash_descsize(md4); sdescmd4 = kmalloc(size, GFP_KERNEL); -- cgit v1.2.1 From 1be912dde772b77aaaa21770eeabb0a7a5e297a6 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 28 Jan 2011 07:08:28 -0500 Subject: cifs: handle cancelled requests better Currently, when a request is cancelled via signal, we delete the mid immediately. If the request was already transmitted however, the client is still likely to receive a response. When it does, it won't recognize it however and will pop a printk. It's also a little dangerous to just delete the mid entry like this. We may end up reusing that mid. If we do then we could potentially get the response from the first request confused with the later one. Prevent the reuse of mids by marking them as cancelled and keeping them on the pending_mid_q list. If the reply comes in, we'll delete it from the list then. If it never comes, then we'll delete it at reconnect or when cifsd comes down. Reviewed-by: Pavel Shilovsky Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/transport.c | 43 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index c1ccca1a933f..9b2d0373a8a7 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -579,8 +579,17 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, goto out; rc = wait_for_response(ses->server, midQ); - if (rc != 0) - goto out; + if (rc != 0) { + spin_lock(&GlobalMid_Lock); + if (midQ->midState == MID_REQUEST_SUBMITTED) { + midQ->callback = DeleteMidQEntry; + spin_unlock(&GlobalMid_Lock); + atomic_dec(&ses->server->inFlight); + wake_up(&ses->server->request_q); + return rc; + } + spin_unlock(&GlobalMid_Lock); + } rc = sync_mid_result(midQ, ses->server); if (rc != 0) { @@ -724,8 +733,18 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, goto out; rc = wait_for_response(ses->server, midQ); - if (rc != 0) - goto out; + if (rc != 0) { + spin_lock(&GlobalMid_Lock); + if (midQ->midState == MID_REQUEST_SUBMITTED) { + /* no longer considered to be "in-flight" */ + midQ->callback = DeleteMidQEntry; + spin_unlock(&GlobalMid_Lock); + atomic_dec(&ses->server->inFlight); + wake_up(&ses->server->request_q); + return rc; + } + spin_unlock(&GlobalMid_Lock); + } rc = sync_mid_result(midQ, ses->server); if (rc != 0) { @@ -922,10 +941,20 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, } } - if (wait_for_response(ses->server, midQ) == 0) { - /* We got the response - restart system call. */ - rstart = 1; + rc = wait_for_response(ses->server, midQ); + if (rc) { + spin_lock(&GlobalMid_Lock); + if (midQ->midState == MID_REQUEST_SUBMITTED) { + /* no longer considered to be "in-flight" */ + midQ->callback = DeleteMidQEntry; + spin_unlock(&GlobalMid_Lock); + return rc; + } + spin_unlock(&GlobalMid_Lock); } + + /* We got the response - restart system call. */ + rstart = 1; } rc = sync_mid_result(midQ, ses->server); -- cgit v1.2.1 From 2db7c5815555d8daabf7d4ab1253ce690852c140 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 28 Jan 2011 07:08:28 -0500 Subject: cifs: send an NT_CANCEL request when a process is signalled Use the new send_nt_cancel function to send an NT_CANCEL when the process is delivered a fatal signal. This is a "best effort" enterprise however, so don't bother to check the return code. There's nothing we can reasonably do if it fails anyway. Reviewed-by: Pavel Shilovsky Reviewed-by: Suresh Jayaraman Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/transport.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 9b2d0373a8a7..bdaa4aa58b03 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -570,20 +570,25 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, #endif mutex_unlock(&ses->server->srv_mutex); - cifs_small_buf_release(in_buf); - if (rc < 0) + if (rc < 0) { + cifs_small_buf_release(in_buf); goto out; + } - if (long_op == CIFS_ASYNC_OP) + if (long_op == CIFS_ASYNC_OP) { + cifs_small_buf_release(in_buf); goto out; + } rc = wait_for_response(ses->server, midQ); if (rc != 0) { + send_nt_cancel(ses->server, in_buf, midQ); spin_lock(&GlobalMid_Lock); if (midQ->midState == MID_REQUEST_SUBMITTED) { midQ->callback = DeleteMidQEntry; spin_unlock(&GlobalMid_Lock); + cifs_small_buf_release(in_buf); atomic_dec(&ses->server->inFlight); wake_up(&ses->server->request_q); return rc; @@ -591,6 +596,8 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, spin_unlock(&GlobalMid_Lock); } + cifs_small_buf_release(in_buf); + rc = sync_mid_result(midQ, ses->server); if (rc != 0) { atomic_dec(&ses->server->inFlight); @@ -734,6 +741,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, rc = wait_for_response(ses->server, midQ); if (rc != 0) { + send_nt_cancel(ses->server, in_buf, midQ); spin_lock(&GlobalMid_Lock); if (midQ->midState == MID_REQUEST_SUBMITTED) { /* no longer considered to be "in-flight" */ @@ -943,6 +951,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, rc = wait_for_response(ses->server, midQ); if (rc) { + send_nt_cancel(ses->server, in_buf, midQ); spin_lock(&GlobalMid_Lock); if (midQ->midState == MID_REQUEST_SUBMITTED) { /* no longer considered to be "in-flight" */ -- cgit v1.2.1 From 68abaffa6bbd3cadfaa4b7216d10bcd32406090b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 28 Jan 2011 15:05:42 -0500 Subject: cifs: simplify SMB header check routine ...just cleanup. There should be no behavior change. Signed-off-by: Jeff Layton Reviewed-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/misc.c | 46 ++++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index a09e077ba925..72e99ece78cf 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -381,29 +381,31 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ , } static int -checkSMBhdr(struct smb_hdr *smb, __u16 mid) +check_smb_hdr(struct smb_hdr *smb, __u16 mid) { - /* Make sure that this really is an SMB, that it is a response, - and that the message ids match */ - if ((*(__le32 *) smb->Protocol == cpu_to_le32(0x424d53ff)) && - (mid == smb->Mid)) { - if (smb->Flags & SMBFLG_RESPONSE) - return 0; - else { - /* only one valid case where server sends us request */ - if (smb->Command == SMB_COM_LOCKING_ANDX) - return 0; - else - cERROR(1, "Received Request not response"); - } - } else { /* bad signature or mid */ - if (*(__le32 *) smb->Protocol != cpu_to_le32(0x424d53ff)) - cERROR(1, "Bad protocol string signature header %x", - *(unsigned int *) smb->Protocol); - if (mid != smb->Mid) - cERROR(1, "Mids do not match"); + /* does it have the right SMB "signature" ? */ + if (*(__le32 *) smb->Protocol != cpu_to_le32(0x424d53ff)) { + cERROR(1, "Bad protocol string signature header 0x%x", + *(unsigned int *)smb->Protocol); + return 1; } - cERROR(1, "bad smb detected. The Mid=%d", smb->Mid); + + /* Make sure that message ids match */ + if (mid != smb->Mid) { + cERROR(1, "Mids do not match. received=%u expected=%u", + smb->Mid, mid); + return 1; + } + + /* if it's a response then accept */ + if (smb->Flags & SMBFLG_RESPONSE) + return 0; + + /* only one valid case where server sends us request */ + if (smb->Command == SMB_COM_LOCKING_ANDX) + return 0; + + cERROR(1, "Server sent request, not response. mid=%u", smb->Mid); return 1; } @@ -448,7 +450,7 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length) return 1; } - if (checkSMBhdr(smb, mid)) + if (check_smb_hdr(smb, mid)) return 1; clc_len = smbCalcSize_LE(smb); -- cgit v1.2.1 From d804d41d163c0975d2890c82d7135ada7a2f23a4 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 28 Jan 2011 15:05:43 -0500 Subject: cifs: don't pop a printk when sending on a socket is interrupted If we kill the process while it's sending on a socket then the kernel_sendmsg will return -EINTR. This is normal. No need to spam the ring buffer with this info. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/transport.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index bdaa4aa58b03..b8c5e2eb43d0 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -236,9 +236,9 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec) server->tcpStatus = CifsNeedReconnect; } - if (rc < 0) { + if (rc < 0 && rc != -EINTR) cERROR(1, "Error %d sending data on socket to server", rc); - } else + else rc = 0; /* Don't want to modify the buffer as a -- cgit v1.2.1 From 92a4e0f0169498867ecb19c2244510dd4beba149 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 29 Jan 2011 07:02:28 -0500 Subject: cifs: force a reconnect if there are too many MIDs in flight Currently, we allow the pending_mid_q to grow without bound with SIGKILL'ed processes. This could eventually be a DoS'able problem. An unprivileged user could a process that does a long-running call and then SIGKILL it. If he can also intercept the NT_CANCEL calls or the replies from the server, then the pending_mid_q could grow very large, possibly even to 2^16 entries which might leave GetNextMid in an infinite loop. Fix this by imposing a hard limit of 32k calls per server. If we cross that limit, set the tcpStatus to CifsNeedReconnect to force cifsd to eventually reconnect the socket and clean out the pending_mid_q. While we're at it, clean up the function a bit and eliminate an unnecessary NULL pointer check. Signed-off-by: Jeff Layton Reviewed-by: Shirish Pargaonkar Signed-off-by: Steve French --- fs/cifs/misc.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 72e99ece78cf..24f0a9d97ad8 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -236,10 +236,7 @@ __u16 GetNextMid(struct TCP_Server_Info *server) { __u16 mid = 0; __u16 last_mid; - int collision; - - if (server == NULL) - return mid; + bool collision; spin_lock(&GlobalMid_Lock); last_mid = server->CurrentMid; /* we do not want to loop forever */ @@ -252,24 +249,38 @@ __u16 GetNextMid(struct TCP_Server_Info *server) (and it would also have to have been a request that did not time out) */ while (server->CurrentMid != last_mid) { - struct list_head *tmp; struct mid_q_entry *mid_entry; + unsigned int num_mids; - collision = 0; + collision = false; if (server->CurrentMid == 0) server->CurrentMid++; - list_for_each(tmp, &server->pending_mid_q) { - mid_entry = list_entry(tmp, struct mid_q_entry, qhead); - - if ((mid_entry->mid == server->CurrentMid) && - (mid_entry->midState == MID_REQUEST_SUBMITTED)) { + num_mids = 0; + list_for_each_entry(mid_entry, &server->pending_mid_q, qhead) { + ++num_mids; + if (mid_entry->mid == server->CurrentMid && + mid_entry->midState == MID_REQUEST_SUBMITTED) { /* This mid is in use, try a different one */ - collision = 1; + collision = true; break; } } - if (collision == 0) { + + /* + * if we have more than 32k mids in the list, then something + * is very wrong. Possibly a local user is trying to DoS the + * box by issuing long-running calls and SIGKILL'ing them. If + * we get to 2^16 mids then we're in big trouble as this + * function could loop forever. + * + * Go ahead and assign out the mid in this situation, but force + * an eventual reconnect to clean out the pending_mid_q. + */ + if (num_mids > 32768) + server->tcpStatus = CifsNeedReconnect; + + if (!collision) { mid = server->CurrentMid; break; } -- cgit v1.2.1 From f855f6cbeb4f94cd4e4a225c2246ee8012c384a2 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 31 Jan 2011 08:41:36 -0500 Subject: cifs: make CIFS depend on CRYPTO_MD4 Recently CIFS was changed to use the kernel crypto API for MD4 hashes, but the Kconfig dependencies were not changed to reflect this. Signed-off-by: Jeff Layton Reported-and-Tested-by: Suresh Jayaraman Reviewed-by: Shirish Pargaonkar Signed-off-by: Steve French --- fs/cifs/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index ee45648b0d1a..7cb0f7f847e4 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -3,6 +3,7 @@ config CIFS depends on INET select NLS select CRYPTO + select CRYPTO_MD4 select CRYPTO_MD5 select CRYPTO_HMAC select CRYPTO_ARC4 -- cgit v1.2.1 From 31c2659d78c8be970833bc1e633593d291553ed3 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 31 Jan 2011 07:24:46 -0500 Subject: cifs: clean up some compiler warnings New compiler warnings that I noticed when building a patchset based on recent Fedora kernel: fs/cifs/cifssmb.c: In function 'CIFSSMBSetFileSize': fs/cifs/cifssmb.c:4813:8: warning: variable 'data_offset' set but not used [-Wunused-but-set-variable] fs/cifs/file.c: In function 'cifs_open': fs/cifs/file.c:349:24: warning: variable 'pCifsInode' set but not used [-Wunused-but-set-variable] fs/cifs/file.c: In function 'cifs_partialpagewrite': fs/cifs/file.c:1149:23: warning: variable 'cifs_sb' set but not used [-Wunused-but-set-variable] fs/cifs/file.c: In function 'cifs_iovec_write': fs/cifs/file.c:1740:9: warning: passing argument 6 of 'CIFSSMBWrite2' from incompatible pointer type [enabled by default] fs/cifs/cifsproto.h:337:12: note: expected 'unsigned int *' but argument is of type 'size_t *' fs/cifs/readdir.c: In function 'cifs_readdir': fs/cifs/readdir.c:767:23: warning: variable 'cifs_sb' set but not used [-Wunused-but-set-variable] fs/cifs/cifs_dfs_ref.c: In function 'cifs_dfs_d_automount': fs/cifs/cifs_dfs_ref.c:342:2: warning: 'rc' may be used uninitialized in this function [-Wuninitialized] fs/cifs/cifs_dfs_ref.c:278:6: note: 'rc' was declared here Signed-off-by: Jeff Layton Reviewed-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifs_dfs_ref.c | 9 ++++----- fs/cifs/cifssmb.c | 3 --- fs/cifs/file.c | 8 ++------ fs/cifs/readdir.c | 3 --- 4 files changed, 6 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index f1c68629f277..0a265ad9e426 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -282,8 +282,6 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) cFYI(1, "in %s", __func__); BUG_ON(IS_ROOT(mntpt)); - xid = GetXid(); - /* * The MSDFS spec states that paths in DFS referral requests and * responses must be prefixed by a single '\' character instead of @@ -293,7 +291,7 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) mnt = ERR_PTR(-ENOMEM); full_path = build_path_from_dentry(mntpt); if (full_path == NULL) - goto free_xid; + goto cdda_exit; cifs_sb = CIFS_SB(mntpt->d_inode->i_sb); tlink = cifs_sb_tlink(cifs_sb); @@ -303,9 +301,11 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) } ses = tlink_tcon(tlink)->ses; + xid = GetXid(); rc = get_dfs_path(xid, ses, full_path + 1, cifs_sb->local_nls, &num_referrals, &referrals, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + FreeXid(xid); cifs_put_tlink(tlink); @@ -338,8 +338,7 @@ success: free_dfs_info_array(referrals, num_referrals); free_full_path: kfree(full_path); -free_xid: - FreeXid(xid); +cdda_exit: cFYI(1, "leaving %s" , __func__); return mnt; } diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 3106f5e5c633..46c66ed01af4 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -4914,7 +4914,6 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size, __u16 fid, __u32 pid_of_opener, bool SetAllocation) { struct smb_com_transaction2_sfi_req *pSMB = NULL; - char *data_offset; struct file_end_of_file_info *parm_data; int rc = 0; __u16 params, param_offset, offset, byte_count, count; @@ -4938,8 +4937,6 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size, param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4; offset = param_offset + params; - data_offset = (char *) (&pSMB->hdr.Protocol) + offset; - count = sizeof(struct file_end_of_file_info); pSMB->MaxParameterCount = cpu_to_le16(2); /* BB find exact max SMB PDU from sess structure BB */ diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 0de17c1db608..74c0a282d012 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -346,7 +346,6 @@ int cifs_open(struct inode *inode, struct file *file) struct cifsTconInfo *tcon; struct tcon_link *tlink; struct cifsFileInfo *pCifsFile = NULL; - struct cifsInodeInfo *pCifsInode; char *full_path = NULL; bool posix_open_ok = false; __u16 netfid; @@ -361,8 +360,6 @@ int cifs_open(struct inode *inode, struct file *file) } tcon = tlink_tcon(tlink); - pCifsInode = CIFS_I(file->f_path.dentry->d_inode); - full_path = build_path_from_dentry(file->f_path.dentry); if (full_path == NULL) { rc = -ENOMEM; @@ -1146,7 +1143,6 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) char *write_data; int rc = -EFAULT; int bytes_written = 0; - struct cifs_sb_info *cifs_sb; struct inode *inode; struct cifsFileInfo *open_file; @@ -1154,7 +1150,6 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) return -EFAULT; inode = page->mapping->host; - cifs_sb = CIFS_SB(inode->i_sb); offset += (loff_t)from; write_data = kmap(page); @@ -1667,7 +1662,8 @@ static ssize_t cifs_iovec_write(struct file *file, const struct iovec *iov, unsigned long nr_segs, loff_t *poffset) { - size_t total_written = 0, written = 0; + size_t total_written = 0; + unsigned int written = 0; unsigned long num_pages, npages; size_t copied, len, cur_len, i; struct kvec *to_send; diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 7f25cc3d2256..f8e4cd2a7912 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -764,7 +764,6 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) { int rc = 0; int xid, i; - struct cifs_sb_info *cifs_sb; struct cifsTconInfo *pTcon; struct cifsFileInfo *cifsFile = NULL; char *current_entry; @@ -775,8 +774,6 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) xid = GetXid(); - cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); - /* * Ensure FindFirst doesn't fail before doing filldir() for '.' and * '..'. Otherwise we won't be able to notify VFS in case of failure. -- cgit v1.2.1 From 7a8587e7c8e4e32ba778bfbbb822a0a7e8d5f3e3 Mon Sep 17 00:00:00 2001 From: Shirish Pargaonkar Date: Sat, 29 Jan 2011 13:54:58 -0600 Subject: cifs: No need to check crypto blockcipher allocation Missed one change as per earlier suggestion. Signed-off-by: Shirish Pargaonkar Reviewed-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsencrypt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 0db5f1de0227..a51585f9852b 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -657,9 +657,10 @@ calc_seckey(struct cifsSesInfo *ses) get_random_bytes(sec_key, CIFS_SESS_KEY_SIZE); tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); - if (!tfm_arc4 || IS_ERR(tfm_arc4)) { + if (IS_ERR(tfm_arc4)) { + rc = PTR_ERR(tfm_arc4); cERROR(1, "could not allocate crypto API arc4\n"); - return PTR_ERR(tfm_arc4); + return rc; } desc.tfm = tfm_arc4; -- cgit v1.2.1 From b1953bcec95c189b1eea690a08e89646d7750bda Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 21 Jan 2011 21:10:01 +0000 Subject: Btrfs: make shrink_delalloc a little friendlier Xfstests 224 will just sit there and spin for ever until eventually we give up flushing delalloc and exit. On my box this took several hours. I could not interrupt this process either, even though we use INTERRUPTIBLE. So do 2 things 1) Keep us from looping over and over again without reclaiming anything 2) If we get interrupted exit the loop I tested this and the test now exits in a reasonable amount of time, and can be interrupted with ctrl+c. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ff6bbfd75cf7..f96641a93fc9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3345,8 +3345,10 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, u64 reserved; u64 max_reclaim; u64 reclaimed = 0; + long time_left; int pause = 1; int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; + int loops = 0; block_rsv = &root->fs_info->delalloc_block_rsv; space_info = block_rsv->space_info; @@ -3359,7 +3361,7 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, max_reclaim = min(reserved, to_reclaim); - while (1) { + while (loops < 1024) { /* have the flusher threads jump in and do some IO */ smp_mb(); nr_pages = min_t(unsigned long, nr_pages, @@ -3367,8 +3369,12 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); spin_lock(&space_info->lock); - if (reserved > space_info->bytes_reserved) + if (reserved > space_info->bytes_reserved) { + loops = 0; reclaimed += reserved - space_info->bytes_reserved; + } else { + loops++; + } reserved = space_info->bytes_reserved; spin_unlock(&space_info->lock); @@ -3379,7 +3385,12 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, return -EAGAIN; __set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(pause); + time_left = schedule_timeout(pause); + + /* We were interrupted, exit */ + if (time_left) + break; + pause <<= 1; if (pause > HZ / 10) pause = HZ / 10; -- cgit v1.2.1 From b31eabd86eb68d3c217e6821078249bc045e698a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 31 Jan 2011 16:48:24 -0500 Subject: Btrfs: catch errors from btrfs_sync_log btrfs_sync_log returns -EAGAIN when we need full transaction commits instead of small log commits, but sometimes we were dropping the return value. In practice, we check for this a few different ways, but this is still a bug that can leave off full log commits when we really need them. Signed-off-by: Chris Mason --- fs/btrfs/tree-log.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c25a41d86118..42dfc3077040 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2051,6 +2051,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, wait_log_commit(trans, log_root_tree, log_root_tree->log_transid); mutex_unlock(&log_root_tree->log_mutex); + ret = 0; goto out; } atomic_set(&log_root_tree->log_commit[index2], 1); @@ -2115,7 +2116,7 @@ out: smp_mb(); if (waitqueue_active(&root->log_commit_wait[index1])) wake_up(&root->log_commit_wait[index1]); - return 0; + return ret; } static void free_log_tree(struct btrfs_trans_handle *trans, -- cgit v1.2.1 From cab6958da0094e36a098751f844409fc9ee26251 Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 31 Jan 2011 21:56:35 +0000 Subject: [CIFS] Update cifs minor version Signed-off-by: Steve French --- fs/cifs/cifsfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 14789a97304e..4a3330235d55 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -127,5 +127,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* EXPERIMENTAL */ -#define CIFS_VERSION "1.69" +#define CIFS_VERSION "1.70" #endif /* _CIFSFS_H */ -- cgit v1.2.1 From 6284644e8de1f4005166c918c3d2aa4c510ab9f6 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 31 Jan 2011 09:14:17 -0500 Subject: cifs: fix length checks in checkSMB The cERROR message in checkSMB when the calculated length doesn't match the RFC1001 length is incorrect in many cases. It always says that the RFC1001 length is bigger than the SMB, even when it's actually the reverse. Fix the error message to say the reverse of what it does now when the SMB length goes beyond the end of the received data. Also, clarify the error message when the RFC length is too big. Finally, clarify the comments to show that the 512 byte limit on extra data at the end of the packet is arbitrary. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/misc.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 24f0a9d97ad8..2a930a752a78 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -478,25 +478,26 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length) if (((4 + len) & 0xFFFF) == (clc_len & 0xFFFF)) return 0; /* bcc wrapped */ } - cFYI(1, "Calculated size %d vs length %d mismatch for mid %d", + cFYI(1, "Calculated size %u vs length %u mismatch for mid=%u", clc_len, 4 + len, smb->Mid); - /* Windows XP can return a few bytes too much, presumably - an illegal pad, at the end of byte range lock responses - so we allow for that three byte pad, as long as actual - received length is as long or longer than calculated length */ - /* We have now had to extend this more, since there is a - case in which it needs to be bigger still to handle a - malformed response to transact2 findfirst from WinXP when - access denied is returned and thus bcc and wct are zero - but server says length is 0x21 bytes too long as if the server - forget to reset the smb rfc1001 length when it reset the - wct and bcc to minimum size and drop the t2 parms and data */ - if ((4+len > clc_len) && (len <= clc_len + 512)) - return 0; - else { - cERROR(1, "RFC1001 size %d bigger than SMB for Mid=%d", + + if (4 + len < clc_len) { + cERROR(1, "RFC1001 size %u smaller than SMB for mid=%u", len, smb->Mid); return 1; + } else if (len > clc_len + 512) { + /* + * Some servers (Windows XP in particular) send more + * data than the lengths in the SMB packet would + * indicate on certain calls (byte range locks and + * trans2 find first calls in particular). While the + * client can handle such a frame by ignoring the + * trailing data, we choose limit the amount of extra + * data to 512 bytes. + */ + cERROR(1, "RFC1001 size %u more than 512 bytes larger " + "than SMB for mid=%u", len, smb->Mid); + return 1; } } return 0; -- cgit v1.2.1 From c87fb6fdcaf7560940b31a0c78c3e6370e3433cf Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 31 Jan 2011 19:54:59 -0500 Subject: Btrfs: avoid uninit variable warnings in ordered-data.c This one isn't really an uninit variable, but for pretty obscure reasons. Let's make it clearly correct. Signed-off-by: Chris Mason --- fs/btrfs/ordered-data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 2b61e1ddcd99..083a55477375 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -141,7 +141,7 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, u64 file_offset) { struct rb_root *root = &tree->tree; - struct rb_node *prev; + struct rb_node *prev = NULL; struct rb_node *ret; struct btrfs_ordered_extent *entry; -- cgit v1.2.1 From 5df67083488ccbad925f583b698ab38f8629a016 Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Tue, 1 Feb 2011 09:17:35 +0000 Subject: btrfs: checking NULL or not in some functions Because NULL is returned when the memory allocation fails, it is checked whether it is NULL. Signed-off-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 2 ++ fs/btrfs/extent_io.c | 2 ++ fs/btrfs/tree-log.c | 6 ++++++ 3 files changed, 10 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f96641a93fc9..9de4ff03882a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6496,6 +6496,8 @@ static noinline int relocate_inode_pages(struct inode *inode, u64 start, int ret = 0; ra = kzalloc(sizeof(*ra), GFP_NOFS); + if (!ra) + return -ENOMEM; mutex_lock(&inode->i_mutex); first_index = start >> PAGE_CACHE_SHIFT; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 6411ed6ca449..8862dda46ff6 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1920,6 +1920,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, nr = bio_get_nr_vecs(bdev); bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); + if (!bio) + return -ENOMEM; bio_add_page(bio, page, page_size, offset); bio->bi_end_io = end_io_func; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 42dfc3077040..6d66e5caff97 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2751,7 +2751,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, log = root->log_root; path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; dst_path = btrfs_alloc_path(); + if (!dst_path) { + btrfs_free_path(path); + return -ENOMEM; + } min_key.objectid = inode->i_ino; min_key.type = BTRFS_INODE_ITEM_KEY; -- cgit v1.2.1 From 98d5dc13e7e74b77ca3b4c3cbded9f48d2dbbbb7 Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Thu, 20 Jan 2011 06:19:37 +0000 Subject: btrfs: fix return value check of btrfs_start_transaction() The error check of btrfs_start_transaction() is added, and the mistake of the error check on several places is corrected. Signed-off-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 7 +++++-- fs/btrfs/inode.c | 1 + fs/btrfs/ioctl.c | 10 ++++++++-- fs/btrfs/relocation.c | 3 +++ fs/btrfs/super.c | 2 ++ fs/btrfs/tree-log.c | 1 + fs/btrfs/volumes.c | 19 +++++++++++++++++-- 7 files changed, 37 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9de4ff03882a..f07ba21cbf06 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6271,6 +6271,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root, BUG_ON(!wc); trans = btrfs_start_transaction(tree_root, 0); + BUG_ON(IS_ERR(trans)); + if (block_rsv) trans->block_rsv = block_rsv; @@ -6368,6 +6370,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, btrfs_end_transaction_throttle(trans, tree_root); trans = btrfs_start_transaction(tree_root, 0); + BUG_ON(IS_ERR(trans)); if (block_rsv) trans->block_rsv = block_rsv; } @@ -7587,7 +7590,7 @@ int btrfs_cleanup_reloc_trees(struct btrfs_root *root) if (found) { trans = btrfs_start_transaction(root, 1); - BUG_ON(!trans); + BUG_ON(IS_ERR(trans)); ret = btrfs_commit_transaction(trans, root); BUG_ON(ret); } @@ -7831,7 +7834,7 @@ static noinline int relocate_one_extent(struct btrfs_root *extent_root, trans = btrfs_start_transaction(extent_root, 1); - BUG_ON(!trans); + BUG_ON(IS_ERR(trans)); if (extent_key->objectid == 0) { ret = del_extent_zero(trans, extent_root, path, extent_key); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5621818921f8..36bc3f49ebf9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2357,6 +2357,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) */ if (is_bad_inode(inode)) { trans = btrfs_start_transaction(root, 0); + BUG_ON(IS_ERR(trans)); btrfs_orphan_del(trans, inode); btrfs_end_transaction(trans, root); iput(inode); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 12dabe28cf54..02d224e8c83f 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -907,6 +907,10 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, if (new_size > old_size) { trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out_unlock; + } ret = btrfs_grow_device(trans, device, new_size); btrfs_commit_transaction(trans, root); } else { @@ -2141,9 +2145,9 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) path->leave_spinning = 1; trans = btrfs_start_transaction(root, 1); - if (!trans) { + if (IS_ERR(trans)) { btrfs_free_path(path); - return -ENOMEM; + return PTR_ERR(trans); } dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); @@ -2337,6 +2341,8 @@ static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp u64 transid; trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) + return PTR_ERR(trans); transid = trans->transid; btrfs_commit_transaction_async(trans, root, 0); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index ea9965430241..1f5556acb530 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2028,6 +2028,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, while (1) { trans = btrfs_start_transaction(root, 0); + BUG_ON(IS_ERR(trans)); trans->block_rsv = rc->block_rsv; ret = btrfs_block_rsv_check(trans, root, rc->block_rsv, @@ -3665,6 +3666,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) while (1) { trans = btrfs_start_transaction(rc->extent_root, 0); + BUG_ON(IS_ERR(trans)); if (update_backref_cache(trans, &rc->backref_cache)) { btrfs_end_transaction(trans, rc->extent_root); @@ -4033,6 +4035,7 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root) int ret; trans = btrfs_start_transaction(root->fs_info->tree_root, 0); + BUG_ON(IS_ERR(trans)); memset(&root->root_item.drop_progress, 0, sizeof(root->root_item.drop_progress)); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f4e45fdded30..0209b5fc772c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -623,6 +623,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait) btrfs_wait_ordered_extents(root, 0, 0); trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) + return PTR_ERR(trans); ret = btrfs_commit_transaction(trans, root); return ret; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 6d66e5caff97..a4bbb854dfd2 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3112,6 +3112,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) BUG_ON(!path); trans = btrfs_start_transaction(fs_info->tree_root, 0); + BUG_ON(IS_ERR(trans)); wc.trans = trans; wc.pin = 1; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f2d2f4ccc738..7cad59353b09 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1212,6 +1212,10 @@ static int btrfs_rm_dev_item(struct btrfs_root *root, return -ENOMEM; trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + btrfs_free_path(path); + return PTR_ERR(trans); + } key.objectid = BTRFS_DEV_ITEMS_OBJECTID; key.type = BTRFS_DEV_ITEM_KEY; key.offset = device->devid; @@ -1604,6 +1608,12 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) } trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + kfree(device); + ret = PTR_ERR(trans); + goto error; + } + lock_chunks(root); device->barriers = 1; @@ -1872,7 +1882,7 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, return ret; trans = btrfs_start_transaction(root, 0); - BUG_ON(!trans); + BUG_ON(IS_ERR(trans)); lock_chunks(root); @@ -2046,7 +2056,7 @@ int btrfs_balance(struct btrfs_root *dev_root) BUG_ON(ret); trans = btrfs_start_transaction(dev_root, 0); - BUG_ON(!trans); + BUG_ON(IS_ERR(trans)); ret = btrfs_grow_device(trans, device, old_size); BUG_ON(ret); @@ -2212,6 +2222,11 @@ again: /* Shrinking succeeded, else we would be at "done". */ trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto done; + } + lock_chunks(root); device->disk_total_bytes = new_size; -- cgit v1.2.1 From 9587fcff42f5bece3c0a44066b079235ee73cbb3 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 1 Feb 2011 08:40:43 -0500 Subject: cifs: fix length vs. total_read confusion in cifs_demultiplex_thread length at this point is the length returned by the last kernel_recvmsg call. total_read is the length of all of the data read so far. length is more or less meaningless at this point, so use total_read for everything. Signed-off-by: Jeff Layton Reviewed-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/connect.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 47d8ff623683..945b2202275f 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -578,12 +578,12 @@ incomplete_rcv: else if (reconnect == 1) continue; - length += 4; /* account for rfc1002 hdr */ + total_read += 4; /* account for rfc1002 hdr */ - - dump_smb(smb_buffer, length); - if (checkSMB(smb_buffer, smb_buffer->Mid, total_read+4)) { - cifs_dump_mem("Bad SMB: ", smb_buffer, 48); + dump_smb(smb_buffer, total_read); + if (checkSMB(smb_buffer, smb_buffer->Mid, total_read)) { + cifs_dump_mem("Bad SMB: ", smb_buffer, + total_read < 48 ? total_read : 48); continue; } -- cgit v1.2.1 From 0781b909b5586f4db720b5d1838b78f9d8e42f14 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 1 Feb 2011 15:52:35 -0800 Subject: epoll: epoll_wait() should not use timespec_add_ns() commit 95aac7b1cd224f ("epoll: make epoll_wait() use the hrtimer range feature") added a performance regression because it uses timespec_add_ns() with potential very large 'ns' values. [akpm@linux-foundation.org: s/epoll_set_mstimeout/ep_set_mstimeout/, per Davide] Reported-by: Simon Kirby Signed-off-by: Eric Dumazet Cc: Shawn Bohrer Acked-by: Davide Libenzi Cc: [2.6.37.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/eventpoll.c b/fs/eventpoll.c index cc8a9b7d6064..267d0ada4541 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1114,6 +1114,17 @@ static int ep_send_events(struct eventpoll *ep, return ep_scan_ready_list(ep, ep_send_events_proc, &esed); } +static inline struct timespec ep_set_mstimeout(long ms) +{ + struct timespec now, ts = { + .tv_sec = ms / MSEC_PER_SEC, + .tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC), + }; + + ktime_get_ts(&now); + return timespec_add_safe(now, ts); +} + static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, int maxevents, long timeout) { @@ -1121,12 +1132,11 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, unsigned long flags; long slack; wait_queue_t wait; - struct timespec end_time; ktime_t expires, *to = NULL; if (timeout > 0) { - ktime_get_ts(&end_time); - timespec_add_ns(&end_time, (u64)timeout * NSEC_PER_MSEC); + struct timespec end_time = ep_set_mstimeout(timeout); + slack = select_estimate_accuracy(&end_time); to = &expires; *to = timespec_to_ktime(end_time); -- cgit v1.2.1 From 3cd90ea42f2c15f928b70ed66f6d8ed0a8e7aadd Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 1 Feb 2011 15:52:46 -0800 Subject: vfs: sparse: add __FMODE_EXEC FMODE_EXEC is a constant type of fmode_t but was used with normal integer constants. This results in following warnings from sparse. Fix it using new macro __FMODE_EXEC. fs/exec.c:116:58: warning: restricted fmode_t degrades to integer fs/exec.c:689:58: warning: restricted fmode_t degrades to integer fs/fcntl.c:777:9: warning: restricted fmode_t degrades to integer Signed-off-by: Namhyung Kim Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 4 ++-- fs/fcntl.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index c62efcb959c7..52a447d9b6ab 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -120,7 +120,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) goto out; file = do_filp_open(AT_FDCWD, tmp, - O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0, + O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0, MAY_READ | MAY_EXEC | MAY_OPEN); putname(tmp); error = PTR_ERR(file); @@ -723,7 +723,7 @@ struct file *open_exec(const char *name) int err; file = do_filp_open(AT_FDCWD, name, - O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0, + O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0, MAY_EXEC | MAY_OPEN); if (IS_ERR(file)) goto out; diff --git a/fs/fcntl.c b/fs/fcntl.c index ecc8b3954ed6..cb1026181bdc 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -815,7 +815,7 @@ static int __init fcntl_init(void) __O_SYNC | O_DSYNC | FASYNC | O_DIRECT | O_LARGEFILE | O_DIRECTORY | O_NOFOLLOW | O_NOATIME | O_CLOEXEC | - FMODE_EXEC + __FMODE_EXEC )); fasync_cache = kmem_cache_create("fasync_cache", -- cgit v1.2.1 From d54cdc8ca7aabc69e145a62155855db42b04ed0b Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 1 Feb 2011 15:52:47 -0800 Subject: fs: make block fiemap mapping length at least blocksize long Some filesystems don't deal well with being asked to map less than blocksize blocks (GFS2 for example). Since we are always mapping at least blocksize sections anyway, just make sure len is at least as big as a blocksize so we don't trip up any filesystems. Thanks, Signed-off-by: Josef Bacik Cc: Steven Whitehouse Cc: Christoph Hellwig Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ioctl.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs') diff --git a/fs/ioctl.c b/fs/ioctl.c index a59635e295fa..1eebeb72b202 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -273,6 +273,13 @@ int __generic_block_fiemap(struct inode *inode, len = isize; } + /* + * Some filesystems can't deal with being asked to map less than + * blocksize, so make sure our len is at least block length. + */ + if (logical_to_blk(inode, len) == 0) + len = blk_to_logical(inode, 1); + start_blk = logical_to_blk(inode, start); last_blk = logical_to_blk(inode, start + len - 1); -- cgit v1.2.1 From 0b0abeaf3d30cec03ac6497fe978b8f7edecc5ae Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Wed, 2 Feb 2011 21:02:12 +0200 Subject: Revert "exofs: Set i_mapping->backing_dev_info anyway" This reverts commit 115e19c53501edc11f730191f7f047736815ae3d. Apparently setting inode->bdi to one's own sb->s_bdi stops VFS from sending *read-aheads*. This problem was bisected to this commit. A revert fixes it. I'll investigate farther why is this happening for the next Kernel, but for now a revert. I'm sending to stable@kernel.org as well, since it exists also in 2.6.37. 2.6.36 is good and does not have this patch. CC: Stable Tree Signed-off-by: Boaz Harrosh Signed-off-by: Linus Torvalds --- fs/exofs/inode.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs') diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 42685424817b..a7555238c41a 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -1030,7 +1030,6 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino) memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data)); } - inode->i_mapping->backing_dev_info = sb->s_bdi; if (S_ISREG(inode->i_mode)) { inode->i_op = &exofs_file_inode_operations; inode->i_fop = &exofs_file_operations; @@ -1131,7 +1130,6 @@ struct inode *exofs_new_inode(struct inode *dir, int mode) sbi = sb->s_fs_info; - inode->i_mapping->backing_dev_info = sb->s_bdi; sb->s_dirt = 1; inode_init_owner(inode, dir, mode); inode->i_ino = sbi->s_nextid++; -- cgit v1.2.1 From 8f1f745331c1b560f53c0d60e55a4f4f43f7cce5 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 3 Feb 2011 14:33:15 -0500 Subject: ext4: fix panic on module unload when stopping lazyinit thread https://bugzilla.kernel.org/show_bug.cgi?id=27652 If the lazyinit thread is running, the teardown function ext4_destroy_lazyinit_thread() has problems: ext4_clear_request_list(); while (ext4_li_info->li_task) { wake_up(&ext4_li_info->li_wait_daemon); wait_event(ext4_li_info->li_wait_task, ext4_li_info->li_task == NULL); } Clearing the request list will cause the thread to exit and free ext4_li_info, so then we're waiting on something which is getting freed. Fix this up by making the thread respond to kthread_stop, and exit, without the need to wait for that exit in some other homegrown way. Cc: stable@kernel.org Reported-and-Tested-by: Tao Ma Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 48ce561fafac..3d8cf2cab379 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -77,6 +77,7 @@ static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data); static void ext4_destroy_lazyinit_thread(void); static void ext4_unregister_li_request(struct super_block *sb); +static void ext4_clear_request_list(void); #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) static struct file_system_type ext3_fs_type = { @@ -2716,6 +2717,8 @@ static void ext4_unregister_li_request(struct super_block *sb) mutex_unlock(&ext4_li_info->li_list_mtx); } +static struct task_struct *ext4_lazyinit_task; + /* * This is the function where ext4lazyinit thread lives. It walks * through the request list searching for next scheduled filesystem. @@ -2784,6 +2787,10 @@ cont_thread: if (time_before(jiffies, next_wakeup)) schedule(); finish_wait(&eli->li_wait_daemon, &wait); + if (kthread_should_stop()) { + ext4_clear_request_list(); + goto exit_thread; + } } exit_thread: @@ -2808,6 +2815,7 @@ exit_thread: wake_up(&eli->li_wait_task); kfree(ext4_li_info); + ext4_lazyinit_task = NULL; ext4_li_info = NULL; mutex_unlock(&ext4_li_mtx); @@ -2830,11 +2838,10 @@ static void ext4_clear_request_list(void) static int ext4_run_lazyinit_thread(void) { - struct task_struct *t; - - t = kthread_run(ext4_lazyinit_thread, ext4_li_info, "ext4lazyinit"); - if (IS_ERR(t)) { - int err = PTR_ERR(t); + ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread, + ext4_li_info, "ext4lazyinit"); + if (IS_ERR(ext4_lazyinit_task)) { + int err = PTR_ERR(ext4_lazyinit_task); ext4_clear_request_list(); del_timer_sync(&ext4_li_info->li_timer); kfree(ext4_li_info); @@ -2985,16 +2992,10 @@ static void ext4_destroy_lazyinit_thread(void) * If thread exited earlier * there's nothing to be done. */ - if (!ext4_li_info) + if (!ext4_li_info || !ext4_lazyinit_task) return; - ext4_clear_request_list(); - - while (ext4_li_info->li_task) { - wake_up(&ext4_li_info->li_wait_daemon); - wait_event(ext4_li_info->li_wait_task, - ext4_li_info->li_task == NULL); - } + kthread_stop(ext4_lazyinit_task); } static int ext4_fill_super(struct super_block *sb, void *data, int silent) -- cgit v1.2.1 From 8f021222c1e2756ea4c9dde93b23e1d2a0a4ec37 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Thu, 3 Feb 2011 14:33:33 -0500 Subject: ext4: unregister features interface on module unload Ext4 features interface was not properly unregistered which led to problems while unloading/reloading ext4 module. This commit fixes that by adding proper kobject unregistration code into ext4_exit_fs() as well as fail-path of ext4_init_fs() Reported-by: Eric Sandeen Signed-off-by: Lukas Czerner Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/super.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 3d8cf2cab379..4898cb1ff606 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4769,7 +4769,7 @@ static struct file_system_type ext4_fs_type = { .fs_flags = FS_REQUIRES_DEV, }; -int __init ext4_init_feat_adverts(void) +static int __init ext4_init_feat_adverts(void) { struct ext4_features *ef; int ret = -ENOMEM; @@ -4793,6 +4793,13 @@ out: return ret; } +static void ext4_exit_feat_adverts(void) +{ + kobject_put(&ext4_feat->f_kobj); + wait_for_completion(&ext4_feat->f_kobj_unregister); + kfree(ext4_feat); +} + static int __init ext4_init_fs(void) { int err; @@ -4839,7 +4846,7 @@ out1: out2: ext4_exit_mballoc(); out3: - kfree(ext4_feat); + ext4_exit_feat_adverts(); remove_proc_entry("fs/ext4", NULL); kset_unregister(ext4_kset); out4: @@ -4858,6 +4865,7 @@ static void __exit ext4_exit_fs(void) destroy_inodecache(); ext4_exit_xattr(); ext4_exit_mballoc(); + ext4_exit_feat_adverts(); remove_proc_entry("fs/ext4", NULL); kset_unregister(ext4_kset); ext4_exit_system_zone(); -- cgit v1.2.1 From dd68314ccf3fb918c1fb6471817edbc60ece4b52 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 3 Feb 2011 14:33:49 -0500 Subject: ext4: fix up ext4 error handling Make sure we the correct cleanup happens if we die while trying to load the ext4 file system. Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 4898cb1ff606..86b05486dc63 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4810,13 +4810,17 @@ static int __init ext4_init_fs(void) return err; err = ext4_init_system_zone(); if (err) - goto out5; + goto out7; ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); if (!ext4_kset) - goto out4; + goto out6; ext4_proc_root = proc_mkdir("fs/ext4", NULL); + if (!ext4_proc_root) + goto out5; err = ext4_init_feat_adverts(); + if (err) + goto out4; err = ext4_init_mballoc(); if (err) @@ -4847,11 +4851,13 @@ out2: ext4_exit_mballoc(); out3: ext4_exit_feat_adverts(); +out4: remove_proc_entry("fs/ext4", NULL); +out5: kset_unregister(ext4_kset); -out4: +out6: ext4_exit_system_zone(); -out5: +out7: ext4_exit_pageio(); return err; } -- cgit v1.2.1 From c5b8d0bce052949e173b5b32f96bd59bceaa2ab0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 2 Feb 2011 09:32:39 -0700 Subject: hfsplus: fix failed mount handling Currently the error handling in hfsplus_fill_super is a mess, and can lead to accessing fields in the superblock that haven't been even set up yet. Fix this by making sure we do not set up sb->s_root until we have the mount fully set up, and before that do proper step by step unwinding instead of using hfsplus_put_super as a big hammer. Reported-by: Dan Williams Signed-off-by: Christoph Hellwig --- fs/hfsplus/super.c | 106 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 61 insertions(+), 45 deletions(-) (limited to 'fs') diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 9a3b4795f43c..b49b55584c84 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -338,20 +338,22 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) struct inode *root, *inode; struct qstr str; struct nls_table *nls = NULL; - int err = -EINVAL; + int err; + err = -EINVAL; sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) - return -ENOMEM; + goto out; sb->s_fs_info = sbi; mutex_init(&sbi->alloc_mutex); mutex_init(&sbi->vh_mutex); hfsplus_fill_defaults(sbi); + + err = -EINVAL; if (!hfsplus_parse_options(data, sbi)) { printk(KERN_ERR "hfs: unable to parse mount options\n"); - err = -EINVAL; - goto cleanup; + goto out_unload_nls; } /* temporarily use utf8 to correctly find the hidden dir below */ @@ -359,16 +361,14 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) sbi->nls = load_nls("utf8"); if (!sbi->nls) { printk(KERN_ERR "hfs: unable to load nls for utf8\n"); - err = -EINVAL; - goto cleanup; + goto out_unload_nls; } /* Grab the volume header */ if (hfsplus_read_wrapper(sb)) { if (!silent) printk(KERN_WARNING "hfs: unable to find HFS+ superblock\n"); - err = -EINVAL; - goto cleanup; + goto out_unload_nls; } vhdr = sbi->s_vhdr; @@ -377,7 +377,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) if (be16_to_cpu(vhdr->version) < HFSPLUS_MIN_VERSION || be16_to_cpu(vhdr->version) > HFSPLUS_CURRENT_VERSION) { printk(KERN_ERR "hfs: wrong filesystem version\n"); - goto cleanup; + goto out_free_vhdr; } sbi->total_blocks = be32_to_cpu(vhdr->total_blocks); sbi->free_blocks = be32_to_cpu(vhdr->free_blocks); @@ -421,19 +421,19 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) sbi->ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID); if (!sbi->ext_tree) { printk(KERN_ERR "hfs: failed to load extents file\n"); - goto cleanup; + goto out_free_vhdr; } sbi->cat_tree = hfs_btree_open(sb, HFSPLUS_CAT_CNID); if (!sbi->cat_tree) { printk(KERN_ERR "hfs: failed to load catalog file\n"); - goto cleanup; + goto out_close_ext_tree; } inode = hfsplus_iget(sb, HFSPLUS_ALLOC_CNID); if (IS_ERR(inode)) { printk(KERN_ERR "hfs: failed to load allocation file\n"); err = PTR_ERR(inode); - goto cleanup; + goto out_close_cat_tree; } sbi->alloc_file = inode; @@ -442,14 +442,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) if (IS_ERR(root)) { printk(KERN_ERR "hfs: failed to load root directory\n"); err = PTR_ERR(root); - goto cleanup; - } - sb->s_d_op = &hfsplus_dentry_operations; - sb->s_root = d_alloc_root(root); - if (!sb->s_root) { - iput(root); - err = -ENOMEM; - goto cleanup; + goto out_put_alloc_file; } str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; @@ -459,46 +452,69 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) if (!hfs_brec_read(&fd, &entry, sizeof(entry))) { hfs_find_exit(&fd); if (entry.type != cpu_to_be16(HFSPLUS_FOLDER)) - goto cleanup; + goto out_put_root; inode = hfsplus_iget(sb, be32_to_cpu(entry.folder.id)); if (IS_ERR(inode)) { err = PTR_ERR(inode); - goto cleanup; + goto out_put_root; } sbi->hidden_dir = inode; } else hfs_find_exit(&fd); - if (sb->s_flags & MS_RDONLY) - goto out; + if (!(sb->s_flags & MS_RDONLY)) { + /* + * H+LX == hfsplusutils, H+Lx == this driver, H+lx is unused + * all three are registered with Apple for our use + */ + vhdr->last_mount_vers = cpu_to_be32(HFSP_MOUNT_VERSION); + vhdr->modify_date = hfsp_now2mt(); + be32_add_cpu(&vhdr->write_count, 1); + vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT); + vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT); + hfsplus_sync_fs(sb, 1); - /* H+LX == hfsplusutils, H+Lx == this driver, H+lx is unused - * all three are registered with Apple for our use - */ - vhdr->last_mount_vers = cpu_to_be32(HFSP_MOUNT_VERSION); - vhdr->modify_date = hfsp_now2mt(); - be32_add_cpu(&vhdr->write_count, 1); - vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT); - vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT); - hfsplus_sync_fs(sb, 1); - - if (!sbi->hidden_dir) { - mutex_lock(&sbi->vh_mutex); - sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR); - hfsplus_create_cat(sbi->hidden_dir->i_ino, sb->s_root->d_inode, - &str, sbi->hidden_dir); - mutex_unlock(&sbi->vh_mutex); - - hfsplus_mark_inode_dirty(sbi->hidden_dir, HFSPLUS_I_CAT_DIRTY); + if (!sbi->hidden_dir) { + mutex_lock(&sbi->vh_mutex); + sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR); + hfsplus_create_cat(sbi->hidden_dir->i_ino, root, &str, + sbi->hidden_dir); + mutex_unlock(&sbi->vh_mutex); + + hfsplus_mark_inode_dirty(sbi->hidden_dir, + HFSPLUS_I_CAT_DIRTY); + } } -out: + + sb->s_d_op = &hfsplus_dentry_operations; + sb->s_root = d_alloc_root(root); + if (!sb->s_root) { + err = -ENOMEM; + goto out_put_hidden_dir; + } + unload_nls(sbi->nls); sbi->nls = nls; return 0; -cleanup: - hfsplus_put_super(sb); +out_put_hidden_dir: + iput(sbi->hidden_dir); +out_put_root: + iput(sbi->alloc_file); +out_put_alloc_file: + iput(sbi->alloc_file); +out_close_cat_tree: + hfs_btree_close(sbi->cat_tree); +out_close_ext_tree: + hfs_btree_close(sbi->ext_tree); +out_free_vhdr: + kfree(sbi->s_vhdr); + kfree(sbi->s_backup_vhdr); +out_unload_nls: + unload_nls(sbi->nls); unload_nls(nls); + kfree(sbi); +out: return err; } -- cgit v1.2.1 From 14dd01f88319a37b06ca909738044e39ec5bfdee Mon Sep 17 00:00:00 2001 From: Chuck Ebbert Date: Tue, 1 Feb 2011 16:41:55 -0500 Subject: hfsplus: do not leak buffer on error Signed-Off-By: Chuck Ebbert Signed-off-by: Christoph Hellwig --- fs/hfsplus/part_tbl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/hfsplus/part_tbl.c b/fs/hfsplus/part_tbl.c index d66ad113b1cc..40ad88c12c64 100644 --- a/fs/hfsplus/part_tbl.c +++ b/fs/hfsplus/part_tbl.c @@ -134,7 +134,7 @@ int hfs_part_find(struct super_block *sb, res = hfsplus_submit_bio(sb->s_bdev, *part_start + HFS_PMAP_BLK, data, READ); if (res) - return res; + goto out; switch (be16_to_cpu(*((__be16 *)data))) { case HFS_OLD_PMAP_MAGIC: @@ -147,7 +147,7 @@ int hfs_part_find(struct super_block *sb, res = -ENOENT; break; } - +out: kfree(data); return res; } -- cgit v1.2.1 From a1dbcef0172555464b5329f8ba47d43c98132dfa Mon Sep 17 00:00:00 2001 From: Chuck Ebbert Date: Wed, 2 Feb 2011 10:55:06 -0500 Subject: hfsplus: fix two memory leaks in wrapper.c Signed-Off-By: Chuck Ebbert Signed-off-by: Christoph Hellwig --- fs/hfsplus/wrapper.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index 196231794f64..3031d81f5f0f 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c @@ -167,7 +167,7 @@ reread: break; case cpu_to_be16(HFSP_WRAP_MAGIC): if (!hfsplus_read_mdb(sbi->s_vhdr, &wd)) - goto out; + goto out_free_backup_vhdr; wd.ablk_size >>= HFSPLUS_SECTOR_SHIFT; part_start += wd.ablk_start + wd.embed_start * wd.ablk_size; part_size = wd.embed_count * wd.ablk_size; @@ -179,7 +179,7 @@ reread: * (should do this only for cdrom/loop though) */ if (hfs_part_find(sb, &part_start, &part_size)) - goto out; + goto out_free_backup_vhdr; goto reread; } -- cgit v1.2.1 From 1065348d472f97b4b8eb53b60ec67e99148cbbca Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 2 Feb 2011 09:40:33 -0700 Subject: hfsplus: fix up a comparism in hfsplus_file_extend Revert an incorrect hunk from commit b2837fcf4994e699a4def002e26f274d95b387c1, "hfsplus: %L-to-%ll, macro correction, and remove unneeded braces" revert a pointless change of comparism operation argument order, which turned out to not even be equivalent. Reported-by: Joe Perches Signed-off-by: Christoph Hellwig --- fs/hfsplus/extents.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index 52a0bcaa7b6d..b1991a2a08e0 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -397,8 +397,8 @@ int hfsplus_file_extend(struct inode *inode) u32 start, len, goal; int res; - if (sbi->total_blocks - sbi->free_blocks + 8 > - sbi->alloc_file->i_size * 8) { + if (sbi->alloc_file->i_size * 8 < + sbi->total_blocks - sbi->free_blocks + 8) { /* extend alloc file */ printk(KERN_ERR "hfs: extend alloc file! " "(%llu,%u,%u)\n", -- cgit v1.2.1 From 76429c148b939f5a6863c0a024eb8960ae91469a Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Mon, 31 Jan 2011 16:03:08 +0300 Subject: CIFS: Fix variable types in cifs_iovec_read/write (try #2) Variable 'i' should be unsigned long as it's used in circle with num_pages, and bytes_read/total_written should be ssize_t according to return value. Signed-off-by: Pavel Shilovsky Reviewed-by: Shirish Pargaonkar Signed-off-by: Steve French --- fs/cifs/file.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 74c0a282d012..e964b1cd5dd0 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1662,10 +1662,10 @@ static ssize_t cifs_iovec_write(struct file *file, const struct iovec *iov, unsigned long nr_segs, loff_t *poffset) { - size_t total_written = 0; - unsigned int written = 0; - unsigned long num_pages, npages; - size_t copied, len, cur_len, i; + unsigned int written; + unsigned long num_pages, npages, i; + size_t copied, len, cur_len; + ssize_t total_written = 0; struct kvec *to_send; struct page **pages; struct iov_iter it; @@ -1821,7 +1821,8 @@ cifs_iovec_read(struct file *file, const struct iovec *iov, { int rc; int xid; - unsigned int total_read, bytes_read = 0; + ssize_t total_read; + unsigned int bytes_read = 0; size_t len, cur_len; int iov_offset = 0; struct cifs_sb_info *cifs_sb; -- cgit v1.2.1 From 78d2978874e4e10e97dfd4fd79db45bdc0748550 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 4 Feb 2011 18:13:24 +0000 Subject: CRED: Fix kernel panic upon security_file_alloc() failure. In get_empty_filp() since 2.6.29, file_free(f) is called with f->f_cred == NULL when security_file_alloc() returned an error. As a result, kernel will panic() due to put_cred(NULL) call within RCU callback. Fix this bug by assigning f->f_cred before calling security_file_alloc(). Signed-off-by: Tetsuo Handa Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- fs/file_table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/file_table.c b/fs/file_table.c index c3e89adf53c0..eb36b6b17e26 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -125,13 +125,13 @@ struct file *get_empty_filp(void) goto fail; percpu_counter_inc(&nr_files); + f->f_cred = get_cred(cred); if (security_file_alloc(f)) goto fail_sec; INIT_LIST_HEAD(&f->f_u.fu_list); atomic_long_set(&f->f_count, 1); rwlock_init(&f->f_owner.lock); - f->f_cred = get_cred(cred); spin_lock_init(&f->f_lock); eventpoll_init_file(f); /* f->f_version: 0 */ -- cgit v1.2.1 From 64474bdd07f673cc48509ea0375274422c8f73bf Mon Sep 17 00:00:00 2001 From: Shirish Pargaonkar Date: Thu, 3 Feb 2011 14:31:18 -0600 Subject: cifs: Possible slab memory corruption while updating extended stats (repost) Updating extended statistics here can cause slab memory corruption if a callback function frees slab memory (mid_entry). Signed-off-by: Shirish Pargaonkar Reviewed-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/connect.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 945b2202275f..1f32a2893b5f 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -633,11 +633,11 @@ incomplete_rcv: mid_entry->largeBuf = isLargeBuf; multi_t2_fnd: mid_entry->midState = MID_RESPONSE_RECEIVED; - list_del_init(&mid_entry->qhead); - mid_entry->callback(mid_entry); #ifdef CONFIG_CIFS_STATS2 mid_entry->when_received = jiffies; #endif + list_del_init(&mid_entry->qhead); + mid_entry->callback(mid_entry); break; } mid_entry = NULL; -- cgit v1.2.1 From e3f0dadb2b44746f6223ce4560406d19e02fb1cc Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 4 Feb 2011 07:21:26 -0500 Subject: cifs: enable signing flag in SMB header when server has it on cifs_sign_smb only generates a signature if the correct Flags2 bit is set. Make sure that it gets set correctly if we're sending an async call. This patch fixes: https://bugzilla.kernel.org/show_bug.cgi?id=28142 Reported-and-Tested-by: JG Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/transport.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index b8c5e2eb43d0..fbc5aace54b1 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -359,6 +359,10 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_hdr *in_buf, if (rc) return rc; + /* enable signing if server requires it */ + if (server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) + in_buf->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; + mutex_lock(&server->srv_mutex); mid = AllocMidQEntry(in_buf, server); if (mid == NULL) { -- cgit v1.2.1 From 247ec9b418ba50c9022280035330059364d54540 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 4 Feb 2011 17:09:50 -0500 Subject: cifs: don't send an echo request unless NegProt has been done When the socket to the server is disconnected, the client more or less immediately calls cifs_reconnect to reconnect the socket. The NegProt and SessSetup however are not done until an actual call needs to be made. With the addition of the SMB echo code, it's possible that the server will initiate a disconnect on an idle socket. The client will then reconnect the socket but no NegotiateProtocol request is done. The SMBEcho workqueue job will then eventually pop, and an SMBEcho will be sent on the socket. The server will then reject it since no NegProt was done. The ideal fix would be to either have the socket not be reconnected until we plan to use it, or to immediately do a NegProt when the reconnect occurs. The code is not structured for this however. For now we must just settle for not sending any echoes until the NegProt is done. Reported-by: JG Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/connect.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 1f32a2893b5f..257b6d895e20 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -337,8 +337,12 @@ cifs_echo_request(struct work_struct *work) struct TCP_Server_Info *server = container_of(work, struct TCP_Server_Info, echo.work); - /* no need to ping if we got a response recently */ - if (time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ)) + /* + * We cannot send an echo until the NEGOTIATE_PROTOCOL request is done. + * Also, no need to ping if we got a response recently + */ + if (server->tcpStatus != CifsGood || + time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ)) goto requeue_echo; rc = CIFSSMBEcho(server); -- cgit v1.2.1 From e8e1ba96b207deba1339b09983f8b29f92cb1497 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 4 Feb 2011 20:45:58 -0800 Subject: ceph: queue cap_snaps once per realm We were forming a dirty list, and then queueing cap_snaps for each realm _and_ its children, regardless of whether the children were already in the dirty list. This meant we did it twice for some realms. Which in turn meant we corrupted mdsc->snap_flush_list when the cap_snap was re-added to the list it was already on, and could trigger an infinite loop. We were also using recursion to do reach all the children, a no-no when stack is limited. Instead, (re)queue any children on the dirty list, avoiding processing anything twice and avoiding any recursion. Signed-off-by: Sage Weil --- fs/ceph/snap.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 39c243acd062..f40b9139e437 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -584,10 +584,14 @@ static void queue_realm_cap_snaps(struct ceph_snap_realm *realm) if (lastinode) iput(lastinode); - dout("queue_realm_cap_snaps %p %llx children\n", realm, realm->ino); - list_for_each_entry(child, &realm->children, child_item) - queue_realm_cap_snaps(child); + list_for_each_entry(child, &realm->children, child_item) { + dout("queue_realm_cap_snaps %p %llx queue child %p %llx\n", + realm, realm->ino, child, child->ino); + list_del_init(&child->dirty_item); + list_add(&child->dirty_item, &realm->dirty_item); + } + list_del_init(&realm->dirty_item); dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino); } @@ -683,7 +687,9 @@ more: * queue cap snaps _after_ we've built the new snap contexts, * so that i_head_snapc can be set appropriately. */ - list_for_each_entry(realm, &dirty_realms, dirty_item) { + while (!list_empty(&dirty_realms)) { + realm = list_first_entry(&dirty_realms, struct ceph_snap_realm, + dirty_item); queue_realm_cap_snaps(realm); } -- cgit v1.2.1 From 8132b65bc6ce6d9a4baafdfc28c7cd9c258ed6e4 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Sun, 6 Feb 2011 02:05:28 +0300 Subject: cifs: add check for kmalloc in parse_dacl Exit from parse_dacl if no memory returned from the call to kmalloc. Signed-off-by: Stanislav Fomichev Signed-off-by: Steve French --- fs/cifs/cifsacl.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 1e7636b145a8..beeebf194234 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -372,6 +372,10 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl, ppace = kmalloc(num_aces * sizeof(struct cifs_ace *), GFP_KERNEL); + if (!ppace) { + cERROR(1, "DACL memory allocation error"); + return; + } for (i = 0; i < num_aces; ++i) { ppace[i] = (struct cifs_ace *) (acl_base + acl_size); -- cgit v1.2.1 From 13dbc08987f25d9dba488a34b44b43e3844b027c Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 3 Feb 2011 02:39:52 +0000 Subject: Btrfs: make sure search_bitmap finds something in remove_from_bitmap When we're cleaning up the tree log we need to be able to remove free space from the block group. The problem is if that free space spans bitmaps we would not find the space since we're looking for too many bytes. So make sure the amount of bytes we search for is limited to either the number of bytes we want, or the number of bytes left in the bitmap. This was tested by a user who was hitting the BUG() after search_bitmap. With this patch he can now mount his fs. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/free-space-cache.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index a5501edc3c9f..a0390657451b 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1216,6 +1216,7 @@ again: */ search_start = *offset; search_bytes = *bytes; + search_bytes = min(search_bytes, end - search_start + 1); ret = search_bitmap(block_group, bitmap_info, &search_start, &search_bytes); BUG_ON(ret < 0 || search_start != *offset); -- cgit v1.2.1 From 3c14874acc71180553fb5aba528e3cf57c5b958b Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 2 Feb 2011 15:53:47 +0000 Subject: Btrfs: exclude super blocks when we read in block groups This has been resulting in a BUT_ON(ret) after btrfs_reserve_extent in btrfs_cow_file_range. The reason is we don't actually calculate the bytes_super for a block group until we go to cache it, which means that the space_info can hand out reservations for space that it doesn't actually have, and we can run out of data space. This is also a problem if you are using space caching since we don't ever calculate bytes_super for the block groups. So instead everytime we read a block group call exclude_super_stripes, which calculates the bytes_super for the block group so it can be left out of the space_info. Then whenever caching completes we just call free_excluded_extents so that the super excluded extents are freed up. Also if we are unmounting and we hit any block groups that haven't been cached we still need to call free_excluded_extents to make sure things are cleaned up properly. Thanks, Reported-by: Arne Jansen Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f07ba21cbf06..565e22d77b1b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -320,11 +320,6 @@ static int caching_kthread(void *data) if (!path) return -ENOMEM; - exclude_super_stripes(extent_root, block_group); - spin_lock(&block_group->space_info->lock); - block_group->space_info->bytes_readonly += block_group->bytes_super; - spin_unlock(&block_group->space_info->lock); - last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); /* @@ -467,8 +462,10 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, cache->cached = BTRFS_CACHE_NO; } spin_unlock(&cache->lock); - if (ret == 1) + if (ret == 1) { + free_excluded_extents(fs_info->extent_root, cache); return 0; + } } if (load_cache_only) @@ -4036,6 +4033,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) num_bytes = ALIGN(num_bytes, root->sectorsize); atomic_dec(&BTRFS_I(inode)->outstanding_extents); + WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0); spin_lock(&BTRFS_I(inode)->accounting_lock); nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); @@ -8325,6 +8323,13 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) if (block_group->cached == BTRFS_CACHE_STARTED) wait_block_group_cache_done(block_group); + /* + * We haven't cached this block group, which means we could + * possibly have excluded extents on this block group. + */ + if (block_group->cached == BTRFS_CACHE_NO) + free_excluded_extents(info->extent_root, block_group); + btrfs_remove_free_space_cache(block_group); btrfs_put_block_group(block_group); @@ -8439,6 +8444,13 @@ int btrfs_read_block_groups(struct btrfs_root *root) cache->flags = btrfs_block_group_flags(&cache->item); cache->sectorsize = root->sectorsize; + /* + * We need to exclude the super stripes now so that the space + * info has super bytes accounted for, otherwise we'll think + * we have more space than we actually do. + */ + exclude_super_stripes(root, cache); + /* * check for two cases, either we are full, and therefore * don't need to bother with the caching work since we won't @@ -8447,12 +8459,10 @@ int btrfs_read_block_groups(struct btrfs_root *root) * time, particularly in the full case. */ if (found_key.offset == btrfs_block_group_used(&cache->item)) { - exclude_super_stripes(root, cache); cache->last_byte_to_unpin = (u64)-1; cache->cached = BTRFS_CACHE_FINISHED; free_excluded_extents(root, cache); } else if (btrfs_block_group_used(&cache->item) == 0) { - exclude_super_stripes(root, cache); cache->last_byte_to_unpin = (u64)-1; cache->cached = BTRFS_CACHE_FINISHED; add_new_free_space(cache, root->fs_info, -- cgit v1.2.1 From 554233a6e0e8557e8e81e54cc70628d101291122 Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Thu, 3 Feb 2011 03:16:25 +0000 Subject: btrfs: cleanup error handling in btrfs_unlink_inode() When btrfs_alloc_path() fails, btrfs_free_path() need not be called. Therefore, it changes the branch ahead. Signed-off-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 36bc3f49ebf9..c9bc0afdbfc6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2646,7 +2646,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; - goto err; + goto out; } path->leave_spinning = 1; -- cgit v1.2.1 From 8e4eef7a60eeca0fe7503e5cbd3b24ff4941c732 Mon Sep 17 00:00:00 2001 From: Alexey Charkov Date: Wed, 2 Feb 2011 21:15:35 +0000 Subject: btrfs: Drop __exit attribute on btrfs_exit_compress As this function is called in some error paths while not removing the module, the __exit attribute prevents the kernel image from linking when btrfs is compiled in statically. Signed-off-by: Alexey Charkov Signed-off-by: Chris Mason --- fs/btrfs/compression.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 3a932f183da1..4d2110eafe29 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -921,7 +921,7 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, return ret; } -void __exit btrfs_exit_compress(void) +void btrfs_exit_compress(void) { free_workspaces(); } -- cgit v1.2.1 From d402539b8fc3fa21f16eb5e654be742670399e8a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 7 Feb 2011 08:54:35 -0500 Subject: cifs: remove checks for ses->status == CifsExiting ses->status is never set to CifsExiting, so these checks are always false. Tested-by: JG Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 46c66ed01af4..904aa47e3515 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -136,9 +136,6 @@ cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command) } } - if (ses->status == CifsExiting) - return -EIO; - /* * Give demultiplex thread up to 10 seconds to reconnect, should be * greater than cifs socket timeout which is 7 seconds @@ -156,7 +153,7 @@ cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command) * retrying until process is killed or server comes * back on-line */ - if (!tcon->retry || ses->status == CifsExiting) { + if (!tcon->retry) { cFYI(1, "gave up waiting on reconnect in smb_init"); return -EHOSTDOWN; } -- cgit v1.2.1 From d50bdd5aa55127635fd8a5c74bd2abb256bd34e3 Mon Sep 17 00:00:00 2001 From: Curt Wohlgemuth Date: Mon, 7 Feb 2011 12:46:14 -0500 Subject: ext4: Fix data corruption with multi-block writepages support This fixes a corruption problem with the multi-block writepages submittal change for ext4, from commit bd2d0210cf22f2bd0cef72eb97cf94fc7d31d8cc ("ext4: use bio layer instead of buffer layer in mpage_da_submit_io"). (Note that this corruption is not present in 2.6.37 on ext4, because the corruption was detected after the feature was merged in 2.6.37-rc1, and so it was turned off by adding a non-default mount option, mblk_io_submit. With this commit, which hopefully fixes the last of the bugs with this feature, we'll be able to turn on this performance feature by default in 2.6.38, and remove the mblk_io_submit option.) The ext4 code path to bundle multiple pages for writeback in ext4_bio_write_page() had a bug: we should be clearing buffer head dirty flags *before* we submit the bio, not in the completion routine. The patch below was tested on 2.6.37 under KVM with the postgresql script which was submitted by Jon Nelson as documented in commit 1449032be1. Without the patch, I'd hit the corruption problem about 50-70% of the time. With the patch, I executed the script > 100 times with no corruption seen. I also fixed a bug to make sure ext4_end_bio() doesn't dereference the bio after the bio_put() call. Reported-by: Jon Nelson Reported-by: Matthias Bayer Signed-off-by: Curt Wohlgemuth Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/page-io.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 7270dcfca92a..4e9b0a242f4c 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -190,6 +190,7 @@ static void ext4_end_bio(struct bio *bio, int error) struct inode *inode; unsigned long flags; int i; + sector_t bi_sector = bio->bi_sector; BUG_ON(!io_end); bio->bi_private = NULL; @@ -207,9 +208,7 @@ static void ext4_end_bio(struct bio *bio, int error) if (error) SetPageError(page); BUG_ON(!head); - if (head->b_size == PAGE_CACHE_SIZE) - clear_buffer_dirty(head); - else { + if (head->b_size != PAGE_CACHE_SIZE) { loff_t offset; loff_t io_end_offset = io_end->offset + io_end->size; @@ -221,7 +220,6 @@ static void ext4_end_bio(struct bio *bio, int error) if (error) buffer_io_error(bh); - clear_buffer_dirty(bh); } if (buffer_delay(bh)) partial_write = 1; @@ -257,7 +255,7 @@ static void ext4_end_bio(struct bio *bio, int error) (unsigned long long) io_end->offset, (long) io_end->size, (unsigned long long) - bio->bi_sector >> (inode->i_blkbits - 9)); + bi_sector >> (inode->i_blkbits - 9)); } /* Add the io_end to per-inode completed io list*/ @@ -380,6 +378,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, blocksize = 1 << inode->i_blkbits; + BUG_ON(!PageLocked(page)); BUG_ON(PageWriteback(page)); set_page_writeback(page); ClearPageError(page); @@ -397,12 +396,14 @@ int ext4_bio_write_page(struct ext4_io_submit *io, for (bh = head = page_buffers(page), block_start = 0; bh != head || !block_start; block_start = block_end, bh = bh->b_this_page) { + block_end = block_start + blocksize; if (block_start >= len) { clear_buffer_dirty(bh); set_buffer_uptodate(bh); continue; } + clear_buffer_dirty(bh); ret = io_submit_add_bh(io, io_page, inode, wbc, bh); if (ret) { /* -- cgit v1.2.1 From 3a90983dbdcb2f4f48c0d771d8e5b4d88f27fae6 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 18 Jan 2011 13:34:40 +0800 Subject: Btrfs: Fix page count calculation take offset of start position into account when calculating page count. Signed-off-by: Yan, Zheng Signed-off-by: Chris Mason --- fs/btrfs/file.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 9e097fbfc78d..b0ff34b96607 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -991,8 +991,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, size_t write_bytes = min(iov_iter_count(&i), nrptrs * (size_t)PAGE_CACHE_SIZE - offset); - size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT; + size_t num_pages = (write_bytes + offset + + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; WARN_ON(num_pages > nrptrs); memset(pages, 0, sizeof(struct page *) * nrptrs); @@ -1022,8 +1022,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, copied = btrfs_copy_from_user(pos, num_pages, write_bytes, pages, &i); - dirty_pages = (copied + PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT; + dirty_pages = (copied + offset + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; if (num_pages > dirty_pages) { if (copied > 0) -- cgit v1.2.1 From 7e90d705fc9f8c5e3a1549281dce0654d049243b Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 8 Feb 2011 23:52:32 +0000 Subject: [CIFS] Do not send SMBEcho requests on new sockets until SMBNegotiate In order to determine whether an SMBEcho request can be sent we need to know that the socket is established (server tcpStatus == CifsGood) AND that an SMB NegotiateProtocol has been sent (server maxBuf != 0). Without the second check we can send an Echo request during reconnection before the server can accept it. CC: JG Reviewed-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 2 ++ fs/cifs/connect.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index edd5b29b53c9..1ab33eb71d95 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -188,6 +188,8 @@ struct TCP_Server_Info { /* multiplexed reads or writes */ unsigned int maxBuf; /* maxBuf specifies the maximum */ /* message size the server can send or receive for non-raw SMBs */ + /* maxBuf is returned by SMB NegotiateProtocol so maxBuf is only 0 */ + /* when socket is setup (and during reconnect) before NegProt sent */ unsigned int max_rw; /* maxRw specifies the maximum */ /* message size the server can send or receive for */ /* SMB_COM_WRITE_RAW or SMB_COM_READ_RAW. */ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 257b6d895e20..10011e99b34d 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -341,7 +341,7 @@ cifs_echo_request(struct work_struct *work) * We cannot send an echo until the NEGOTIATE_PROTOCOL request is done. * Also, no need to ping if we got a response recently */ - if (server->tcpStatus != CifsGood || + if ((server->tcpStatus != CifsGood) || (server->maxBuf == 0) || time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ)) goto requeue_echo; -- cgit v1.2.1 From 195291e68c2ad59a046fc56d32bf59635b100e5c Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 9 Feb 2011 12:01:42 -0500 Subject: cifs: clean up checks in cifs_echo_request Follow-on patch to 7e90d705 which is already in Steve's tree... The check for tcpStatus == CifsGood is not meaningful since it doesn't indicate whether the NEGOTIATE request has been done. Also, clarify why we're checking for maxBuf == 0. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/connect.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 10011e99b34d..161f24ca4f6e 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -338,10 +338,11 @@ cifs_echo_request(struct work_struct *work) struct TCP_Server_Info, echo.work); /* - * We cannot send an echo until the NEGOTIATE_PROTOCOL request is done. - * Also, no need to ping if we got a response recently + * We cannot send an echo until the NEGOTIATE_PROTOCOL request is + * done, which is indicated by maxBuf != 0. Also, no need to ping if + * we got a response recently */ - if ((server->tcpStatus != CifsGood) || (server->maxBuf == 0) || + if (server->maxBuf == 0 || time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ)) goto requeue_echo; -- cgit v1.2.1 From 71823baff1978be892e7a36eddf6170e1cc6650d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 10 Feb 2011 08:03:50 -0500 Subject: cifs: don't always drop malformed replies on the floor (try #3) Slight revision to this patch...use min_t() instead of conditional assignment. Also, remove the FIXME comment and replace it with the explanation that Steve gave earlier. After receiving a packet, we currently check the header. If it's no good, then we toss it out and continue the loop, leaving the caller waiting on that response. In cases where the packet has length inconsistencies, but the MID is valid, this leads to unneeded delays. That's especially problematic now that the client waits indefinitely for responses. Instead, don't immediately discard the packet if checkSMB fails. Try to find a matching mid_q_entry, mark it as having a malformed response and issue the callback. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 2 +- fs/cifs/connect.c | 30 ++++++++++++++++++++++++------ fs/cifs/transport.c | 3 +++ 3 files changed, 28 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 1ab33eb71d95..17afb0fbcaed 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -654,7 +654,7 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param, #define MID_REQUEST_SUBMITTED 2 #define MID_RESPONSE_RECEIVED 4 #define MID_RETRY_NEEDED 8 /* session closed while this request out */ -#define MID_NO_RESP_NEEDED 0x10 +#define MID_RESPONSE_MALFORMED 0x10 /* Types of response buffer returned from SendReceive2 */ #define CIFS_NO_BUFFER 0 /* Response buffer not returned */ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 161f24ca4f6e..8d6c17ab593d 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -586,11 +586,20 @@ incomplete_rcv: total_read += 4; /* account for rfc1002 hdr */ dump_smb(smb_buffer, total_read); - if (checkSMB(smb_buffer, smb_buffer->Mid, total_read)) { + + /* + * We know that we received enough to get to the MID as we + * checked the pdu_length earlier. Now check to see + * if the rest of the header is OK. We borrow the length + * var for the rest of the loop to avoid a new stack var. + * + * 48 bytes is enough to display the header and a little bit + * into the payload for debugging purposes. + */ + length = checkSMB(smb_buffer, smb_buffer->Mid, total_read); + if (length != 0) cifs_dump_mem("Bad SMB: ", smb_buffer, - total_read < 48 ? total_read : 48); - continue; - } + min_t(unsigned int, total_read, 48)); mid_entry = NULL; server->lstrp = jiffies; @@ -602,7 +611,8 @@ incomplete_rcv: if ((mid_entry->mid == smb_buffer->Mid) && (mid_entry->midState == MID_REQUEST_SUBMITTED) && (mid_entry->command == smb_buffer->Command)) { - if (check2ndT2(smb_buffer,server->maxBuf) > 0) { + if (length == 0 && + check2ndT2(smb_buffer, server->maxBuf) > 0) { /* We have a multipart transact2 resp */ isMultiRsp = true; if (mid_entry->resp_buf) { @@ -637,7 +647,12 @@ incomplete_rcv: mid_entry->resp_buf = smb_buffer; mid_entry->largeBuf = isLargeBuf; multi_t2_fnd: - mid_entry->midState = MID_RESPONSE_RECEIVED; + if (length == 0) + mid_entry->midState = + MID_RESPONSE_RECEIVED; + else + mid_entry->midState = + MID_RESPONSE_MALFORMED; #ifdef CONFIG_CIFS_STATS2 mid_entry->when_received = jiffies; #endif @@ -658,6 +673,9 @@ multi_t2_fnd: else smallbuf = NULL; } + } else if (length != 0) { + /* response sanity checks failed */ + continue; } else if (!is_valid_oplock_break(smb_buffer, server) && !isMultiRsp) { cERROR(1, "No task to wake, unknown frame received! " diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index fbc5aace54b1..46d8756f2b24 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -457,6 +457,9 @@ sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) case MID_RETRY_NEEDED: rc = -EAGAIN; break; + case MID_RESPONSE_MALFORMED: + rc = -EIO; + break; default: cERROR(1, "%s: invalid mid state mid=%d state=%d", __func__, mid->mid, mid->midState); -- cgit v1.2.1 From 6b155c8fd4d239f7d883d455bbad1be47724bbfc Mon Sep 17 00:00:00 2001 From: David Teigland Date: Fri, 11 Feb 2011 16:44:31 -0600 Subject: dlm: use single thread workqueues The recent commit to use cmwq for send and recv threads dcce240ead802d42b1e45ad2fcb2ed4a399cb255 introduced problems, apparently due to multiple workqueue threads. Single threads make the problems go away, so return to that until we fully understand the concurrency issues with multiple threads. Signed-off-by: David Teigland --- fs/dlm/lowcomms.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 9c64ae9e4c1a..2d8c87b951c2 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -1468,15 +1468,13 @@ static void work_stop(void) static int work_start(void) { - recv_workqueue = alloc_workqueue("dlm_recv", WQ_MEM_RECLAIM | - WQ_HIGHPRI | WQ_FREEZEABLE, 0); + recv_workqueue = create_singlethread_workqueue("dlm_recv"); if (!recv_workqueue) { log_print("can't start dlm_recv"); return -ENOMEM; } - send_workqueue = alloc_workqueue("dlm_send", WQ_MEM_RECLAIM | - WQ_HIGHPRI | WQ_FREEZEABLE, 0); + send_workqueue = create_singlethread_workqueue("dlm_send"); if (!send_workqueue) { log_print("can't start dlm_send"); destroy_workqueue(recv_workqueue); -- cgit v1.2.1 From 2dab597441667d6c04451a7dcf215241ad4c74f6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 11 Feb 2011 15:53:38 -0800 Subject: Fix possible filp_cachep memory corruption In commit 31e6b01f4183 ("fs: rcu-walk for path lookup") we started doing path lookup using RCU, which then falls back to a careful non-RCU lookup in case of problems (LOOKUP_REVAL). So do_filp_open() has this "re-do the lookup carefully" looping case. However, that means that we must not release the open-intent file data if we are going to loop around and use it once more! Fix this by moving the release of the open-intent data to the function that allocates it (do_filp_open() itself) rather than the helper functions that can get called multiple times (finish_open() and do_last()). This makes the logic for the lifetime of that field much more obvious, and avoids the possible double free. Reported-by: J. R. Okajima Acked-by: Al Viro Cc: Nick Piggin Cc: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namei.c | 20 ++++++++++---------- fs/open.c | 2 ++ 2 files changed, 12 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 7d77f24d32a9..ec4b2d0190a8 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -561,10 +561,14 @@ static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd) */ void release_open_intent(struct nameidata *nd) { - if (nd->intent.open.file->f_path.dentry == NULL) - put_filp(nd->intent.open.file); - else - fput(nd->intent.open.file); + struct file *file = nd->intent.open.file; + + if (file && !IS_ERR(file)) { + if (file->f_path.dentry == NULL) + put_filp(file); + else + fput(file); + } } /* @@ -2265,8 +2269,6 @@ static struct file *finish_open(struct nameidata *nd, return filp; exit: - if (!IS_ERR(nd->intent.open.file)) - release_open_intent(nd); path_put(&nd->path); return ERR_PTR(error); } @@ -2389,8 +2391,6 @@ exit_mutex_unlock: exit_dput: path_put_conditional(path, nd); exit: - if (!IS_ERR(nd->intent.open.file)) - release_open_intent(nd); path_put(&nd->path); return ERR_PTR(error); } @@ -2477,6 +2477,7 @@ struct file *do_filp_open(int dfd, const char *pathname, } audit_inode(pathname, nd.path.dentry); filp = finish_open(&nd, open_flag, acc_mode); + release_open_intent(&nd); return filp; creat: @@ -2553,6 +2554,7 @@ out: path_put(&nd.root); if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL)) goto reval; + release_open_intent(&nd); return filp; exit_dput: @@ -2560,8 +2562,6 @@ exit_dput: out_path: path_put(&nd.path); out_filp: - if (!IS_ERR(nd.intent.open.file)) - release_open_intent(&nd); filp = ERR_PTR(error); goto out; } diff --git a/fs/open.c b/fs/open.c index e52389e1f05b..5a2c6ebc22b5 100644 --- a/fs/open.c +++ b/fs/open.c @@ -790,6 +790,8 @@ struct file *nameidata_to_filp(struct nameidata *nd) /* Pick up the filp from the open intent */ filp = nd->intent.open.file; + nd->intent.open.file = NULL; + /* Has the filesystem initialised the file for us? */ if (filp->f_path.dentry == NULL) { path_get(&nd->path); -- cgit v1.2.1 From d863b50ab01333659314c2034890cb76d9fdc3c7 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Thu, 10 Feb 2011 15:01:20 -0800 Subject: vfs: call rcu_barrier after ->kill_sb() In commit fa0d7e3de6d6 ("fs: icache RCU free inodes"), we use rcu free inode instead of freeing the inode directly. It causes a crash when we rmmod immediately after we umount the volume[1]. So we need to call rcu_barrier after we kill_sb so that the inode is freed before we do rmmod. The idea is inspired by Aneesh Kumar. rcu_barrier will wait for all callbacks to end before preceding. The original patch was done by Tao Ma, but synchronize_rcu() is not enough here. 1. http://marc.info/?l=linux-fsdevel&m=129680863330185&w=2 Tested-by: Tao Ma Signed-off-by: Boaz Harrosh Cc: Nick Piggin Cc: Al Viro Cc: Chris Mason Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/super.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/super.c b/fs/super.c index 74e149efed81..7e9dd4cc2c01 100644 --- a/fs/super.c +++ b/fs/super.c @@ -177,6 +177,11 @@ void deactivate_locked_super(struct super_block *s) struct file_system_type *fs = s->s_type; if (atomic_dec_and_test(&s->s_active)) { fs->kill_sb(s); + /* + * We need to call rcu_barrier so all the delayed rcu free + * inodes are flushed before we release the fs module. + */ + rcu_barrier(); put_filesystem(fs); put_super(s); } else { -- cgit v1.2.1 From 2892c15ddda6a76dc10b7499e56c0f3b892e5a69 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Sat, 12 Feb 2011 08:12:18 -0500 Subject: ext4: make grpinfo slab cache names static In 2.6.37 I was running into oopses with repeated module loads & unloads. I tracked this down to: fb1813f4 ext4: use dedicated slab caches for group_info structures (this was in addition to the features advert unload problem) The kstrdup & subsequent kfree of the cache name was causing a double free. In slub, at least, if I read it right it allocates & frees the name itself, slab seems to do something different... so in slub I think we were leaking -our- cachep->name, and double freeing the one allocated by slub. After getting lost in slab/slub/slob a bit, I just looked at other sized-caches that get allocated. jbd2, biovec, sgpool all do it more or less the way jbd2 does. Below patch follows the jbd2 method of dynamically allocating a cache at mount time from a list of static names. (This might also possibly fix a race creating the caches with parallel mounts running). [Folded in a fix from Dan Carpenter which fixed an off-by-one error in the original patch] Cc: stable@kernel.org Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 100 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 60 insertions(+), 40 deletions(-) (limited to 'fs') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 851f49b2f9d2..d1fe09aea73d 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -342,10 +342,15 @@ static struct kmem_cache *ext4_free_ext_cachep; /* We create slab caches for groupinfo data structures based on the * superblock block size. There will be one per mounted filesystem for * each unique s_blocksize_bits */ -#define NR_GRPINFO_CACHES \ - (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE + 1) +#define NR_GRPINFO_CACHES 8 static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES]; +static const char *ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = { + "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k", + "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k", + "ext4_groupinfo_64k", "ext4_groupinfo_128k" +}; + static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, ext4_group_t group); static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, @@ -2414,6 +2419,55 @@ err_freesgi: return -ENOMEM; } +static void ext4_groupinfo_destroy_slabs(void) +{ + int i; + + for (i = 0; i < NR_GRPINFO_CACHES; i++) { + if (ext4_groupinfo_caches[i]) + kmem_cache_destroy(ext4_groupinfo_caches[i]); + ext4_groupinfo_caches[i] = NULL; + } +} + +static int ext4_groupinfo_create_slab(size_t size) +{ + static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex); + int slab_size; + int blocksize_bits = order_base_2(size); + int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE; + struct kmem_cache *cachep; + + if (cache_index >= NR_GRPINFO_CACHES) + return -EINVAL; + + if (unlikely(cache_index < 0)) + cache_index = 0; + + mutex_lock(&ext4_grpinfo_slab_create_mutex); + if (ext4_groupinfo_caches[cache_index]) { + mutex_unlock(&ext4_grpinfo_slab_create_mutex); + return 0; /* Already created */ + } + + slab_size = offsetof(struct ext4_group_info, + bb_counters[blocksize_bits + 2]); + + cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index], + slab_size, 0, SLAB_RECLAIM_ACCOUNT, + NULL); + + mutex_unlock(&ext4_grpinfo_slab_create_mutex); + if (!cachep) { + printk(KERN_EMERG "EXT4: no memory for groupinfo slab cache\n"); + return -ENOMEM; + } + + ext4_groupinfo_caches[cache_index] = cachep; + + return 0; +} + int ext4_mb_init(struct super_block *sb, int needs_recovery) { struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -2421,9 +2475,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) unsigned offset; unsigned max; int ret; - int cache_index; - struct kmem_cache *cachep; - char *namep = NULL; i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets); @@ -2440,30 +2491,9 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) goto out; } - cache_index = sb->s_blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE; - cachep = ext4_groupinfo_caches[cache_index]; - if (!cachep) { - char name[32]; - int len = offsetof(struct ext4_group_info, - bb_counters[sb->s_blocksize_bits + 2]); - - sprintf(name, "ext4_groupinfo_%d", sb->s_blocksize_bits); - namep = kstrdup(name, GFP_KERNEL); - if (!namep) { - ret = -ENOMEM; - goto out; - } - - /* Need to free the kmem_cache_name() when we - * destroy the slab */ - cachep = kmem_cache_create(namep, len, 0, - SLAB_RECLAIM_ACCOUNT, NULL); - if (!cachep) { - ret = -ENOMEM; - goto out; - } - ext4_groupinfo_caches[cache_index] = cachep; - } + ret = ext4_groupinfo_create_slab(sb->s_blocksize); + if (ret < 0) + goto out; /* order 0 is regular bitmap */ sbi->s_mb_maxs[0] = sb->s_blocksize << 3; @@ -2520,7 +2550,6 @@ out: if (ret) { kfree(sbi->s_mb_offsets); kfree(sbi->s_mb_maxs); - kfree(namep); } return ret; } @@ -2734,7 +2763,6 @@ int __init ext4_init_mballoc(void) void ext4_exit_mballoc(void) { - int i; /* * Wait for completion of call_rcu()'s on ext4_pspace_cachep * before destroying the slab cache. @@ -2743,15 +2771,7 @@ void ext4_exit_mballoc(void) kmem_cache_destroy(ext4_pspace_cachep); kmem_cache_destroy(ext4_ac_cachep); kmem_cache_destroy(ext4_free_ext_cachep); - - for (i = 0; i < NR_GRPINFO_CACHES; i++) { - struct kmem_cache *cachep = ext4_groupinfo_caches[i]; - if (cachep) { - char *name = (char *)kmem_cache_name(cachep); - kmem_cache_destroy(cachep); - kfree(name); - } - } + ext4_groupinfo_destroy_slabs(); ext4_remove_debugfs_entry(); } -- cgit v1.2.1 From e9e3bcecf44c04b9e6b505fd8e2eb9cea58fb94d Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Sat, 12 Feb 2011 08:17:34 -0500 Subject: ext4: serialize unaligned asynchronous DIO ext4 has a data corruption case when doing non-block-aligned asynchronous direct IO into a sparse file, as demonstrated by xfstest 240. The root cause is that while ext4 preallocates space in the hole, mappings of that space still look "new" and dio_zero_block() will zero out the unwritten portions. When more than one AIO thread is going, they both find this "new" block and race to zero out their portion; this is uncoordinated and causes data corruption. Dave Chinner fixed this for xfs by simply serializing all unaligned asynchronous direct IO. I've done the same here. The difference is that we only wait on conversions, not all IO. This is a very big hammer, and I'm not very pleased with stuffing this into ext4_file_write(). But since ext4 is DIO_LOCKING, we need to serialize it at this high level. I tried to move this into ext4_ext_direct_IO, but by then we have the i_mutex already, and we will wait on the work queue to do conversions - which must also take the i_mutex. So that won't work. This was originally exposed by qemu-kvm installing to a raw disk image with a normal sector-63 alignment. I've tested a backport of this patch with qemu, and it does avoid the corruption. It is also quite a lot slower (14 min for package installs, vs. 8 min for well-aligned) but I'll take slow correctness over fast corruption any day. Mingming suggested that we can track outstanding conversions, and wait on those so that non-sparse files won't be affected, and I've implemented that here; unaligned AIO to nonsparse files won't take a perf hit. [tytso@mit.edu: Keep the mutex as a hashed array instead of bloating the ext4 inode] [tytso@mit.edu: Fix up namespace issues so that global variables are protected with an "ext4_" prefix.] Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 10 ++++++++++ fs/ext4/extents.c | 10 ++++++---- fs/ext4/file.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- fs/ext4/page-io.c | 25 ++++++++++++----------- fs/ext4/super.c | 13 +++++++++++- 5 files changed, 100 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0c8d97b56f34..3aa0b72b3b94 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -848,6 +848,7 @@ struct ext4_inode_info { atomic_t i_ioend_count; /* Number of outstanding io_end structs */ /* current io_end structure for async DIO write*/ ext4_io_end_t *cur_aio_dio; + atomic_t i_aiodio_unwritten; /* Nr. of inflight conversions pending */ spinlock_t i_block_reservation_lock; @@ -2119,6 +2120,15 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh) #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) +/* For ioend & aio unwritten conversion wait queues */ +#define EXT4_WQ_HASH_SZ 37 +#define ext4_ioend_wq(v) (&ext4__ioend_wq[((unsigned long)(v)) %\ + EXT4_WQ_HASH_SZ]) +#define ext4_aio_mutex(v) (&ext4__aio_mutex[((unsigned long)(v)) %\ + EXT4_WQ_HASH_SZ]) +extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; +extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; + #endif /* __KERNEL__ */ #endif /* _EXT4_H */ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 63a75810b7c3..ccce8a7e94ed 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3174,9 +3174,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, * that this IO needs to convertion to written when IO is * completed */ - if (io) + if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { io->flag = EXT4_IO_END_UNWRITTEN; - else + atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); + } else ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); if (ext4_should_dioread_nolock(inode)) map->m_flags |= EXT4_MAP_UNINIT; @@ -3463,9 +3464,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, * that we need to perform convertion when IO is done. */ if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { - if (io) + if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { io->flag = EXT4_IO_END_UNWRITTEN; - else + atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); + } else ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); } diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 2e8322c8aa88..7b80d543b89e 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -55,11 +55,47 @@ static int ext4_release_file(struct inode *inode, struct file *filp) return 0; } +static void ext4_aiodio_wait(struct inode *inode) +{ + wait_queue_head_t *wq = ext4_ioend_wq(inode); + + wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_aiodio_unwritten) == 0)); +} + +/* + * This tests whether the IO in question is block-aligned or not. + * Ext4 utilizes unwritten extents when hole-filling during direct IO, and they + * are converted to written only after the IO is complete. Until they are + * mapped, these blocks appear as holes, so dio_zero_block() will assume that + * it needs to zero out portions of the start and/or end block. If 2 AIO + * threads are at work on the same unwritten block, they must be synchronized + * or one thread will zero the other's data, causing corruption. + */ +static int +ext4_unaligned_aio(struct inode *inode, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + struct super_block *sb = inode->i_sb; + int blockmask = sb->s_blocksize - 1; + size_t count = iov_length(iov, nr_segs); + loff_t final_size = pos + count; + + if (pos >= inode->i_size) + return 0; + + if ((pos & blockmask) || (final_size & blockmask)) + return 1; + + return 0; +} + static ssize_t ext4_file_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; + int unaligned_aio = 0; + int ret; /* * If we have encountered a bitmap-format file, the size limit @@ -78,9 +114,31 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, nr_segs = iov_shorten((struct iovec *)iov, nr_segs, sbi->s_bitmap_maxbytes - pos); } + } else if (unlikely((iocb->ki_filp->f_flags & O_DIRECT) && + !is_sync_kiocb(iocb))) { + unaligned_aio = ext4_unaligned_aio(inode, iov, nr_segs, pos); } - return generic_file_aio_write(iocb, iov, nr_segs, pos); + /* Unaligned direct AIO must be serialized; see comment above */ + if (unaligned_aio) { + static unsigned long unaligned_warn_time; + + /* Warn about this once per day */ + if (printk_timed_ratelimit(&unaligned_warn_time, 60*60*24*HZ)) + ext4_msg(inode->i_sb, KERN_WARNING, + "Unaligned AIO/DIO on inode %ld by %s; " + "performance will be poor.", + inode->i_ino, current->comm); + mutex_lock(ext4_aio_mutex(inode)); + ext4_aiodio_wait(inode); + } + + ret = generic_file_aio_write(iocb, iov, nr_segs, pos); + + if (unaligned_aio) + mutex_unlock(ext4_aio_mutex(inode)); + + return ret; } static const struct vm_operations_struct ext4_file_vm_ops = { diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 4e9b0a242f4c..955cc309142f 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -32,14 +32,8 @@ static struct kmem_cache *io_page_cachep, *io_end_cachep; -#define WQ_HASH_SZ 37 -#define to_ioend_wq(v) (&ioend_wq[((unsigned long)v) % WQ_HASH_SZ]) -static wait_queue_head_t ioend_wq[WQ_HASH_SZ]; - int __init ext4_init_pageio(void) { - int i; - io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT); if (io_page_cachep == NULL) return -ENOMEM; @@ -48,9 +42,6 @@ int __init ext4_init_pageio(void) kmem_cache_destroy(io_page_cachep); return -ENOMEM; } - for (i = 0; i < WQ_HASH_SZ; i++) - init_waitqueue_head(&ioend_wq[i]); - return 0; } @@ -62,7 +53,7 @@ void ext4_exit_pageio(void) void ext4_ioend_wait(struct inode *inode) { - wait_queue_head_t *wq = to_ioend_wq(inode); + wait_queue_head_t *wq = ext4_ioend_wq(inode); wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0)); } @@ -87,7 +78,7 @@ void ext4_free_io_end(ext4_io_end_t *io) for (i = 0; i < io->num_io_pages; i++) put_io_page(io->pages[i]); io->num_io_pages = 0; - wq = to_ioend_wq(io->inode); + wq = ext4_ioend_wq(io->inode); if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) && waitqueue_active(wq)) wake_up_all(wq); @@ -102,6 +93,7 @@ int ext4_end_io_nolock(ext4_io_end_t *io) struct inode *inode = io->inode; loff_t offset = io->offset; ssize_t size = io->size; + wait_queue_head_t *wq; int ret = 0; ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," @@ -126,7 +118,16 @@ int ext4_end_io_nolock(ext4_io_end_t *io) if (io->iocb) aio_complete(io->iocb, io->result, 0); /* clear the DIO AIO unwritten flag */ - io->flag &= ~EXT4_IO_END_UNWRITTEN; + if (io->flag & EXT4_IO_END_UNWRITTEN) { + io->flag &= ~EXT4_IO_END_UNWRITTEN; + /* Wake up anyone waiting on unwritten extent conversion */ + wq = ext4_ioend_wq(io->inode); + if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten) && + waitqueue_active(wq)) { + wake_up_all(wq); + } + } + return ret; } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 86b05486dc63..f6a318f836b2 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -833,6 +833,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ei->i_sync_tid = 0; ei->i_datasync_tid = 0; atomic_set(&ei->i_ioend_count, 0); + atomic_set(&ei->i_aiodio_unwritten, 0); return &ei->vfs_inode; } @@ -4800,11 +4801,21 @@ static void ext4_exit_feat_adverts(void) kfree(ext4_feat); } +/* Shared across all ext4 file systems */ +wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; +struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; + static int __init ext4_init_fs(void) { - int err; + int i, err; ext4_check_flag_values(); + + for (i = 0; i < EXT4_WQ_HASH_SZ; i++) { + mutex_init(&ext4__aio_mutex[i]); + init_waitqueue_head(&ext4__ioend_wq[i]); + } + err = ext4_init_pageio(); if (err) return err; -- cgit v1.2.1 From e44718318004a5618d1dfe2d080e2862532d8e5f Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 12 Feb 2011 08:18:24 -0500 Subject: jbd2: call __jbd2_log_start_commit with j_state_lock write locked On an SMP ARM system running ext4, I've received a report that the first J_ASSERT in jbd2_journal_commit_transaction has been triggering: J_ASSERT(journal->j_running_transaction != NULL); While investigating possible causes for this problem, I noticed that __jbd2_log_start_commit() is getting called with j_state_lock only read-locked, in spite of the fact that it's possible for it might j_commit_request. Fix this by grabbing the necessary information so we can test to see if we need to start a new transaction before dropping the read lock, and then calling jbd2_log_start_commit() which will grab the write lock. Signed-off-by: "Theodore Ts'o" --- fs/jbd2/journal.c | 9 +++++++-- fs/jbd2/transaction.c | 21 ++++++++++++++------- 2 files changed, 21 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 9e4686900f18..97e73469b2c4 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -473,7 +473,8 @@ int __jbd2_log_space_left(journal_t *journal) } /* - * Called under j_state_lock. Returns true if a transaction commit was started. + * Called with j_state_lock locked for writing. + * Returns true if a transaction commit was started. */ int __jbd2_log_start_commit(journal_t *journal, tid_t target) { @@ -520,11 +521,13 @@ int jbd2_journal_force_commit_nested(journal_t *journal) { transaction_t *transaction = NULL; tid_t tid; + int need_to_start = 0; read_lock(&journal->j_state_lock); if (journal->j_running_transaction && !current->journal_info) { transaction = journal->j_running_transaction; - __jbd2_log_start_commit(journal, transaction->t_tid); + if (!tid_geq(journal->j_commit_request, transaction->t_tid)) + need_to_start = 1; } else if (journal->j_committing_transaction) transaction = journal->j_committing_transaction; @@ -535,6 +538,8 @@ int jbd2_journal_force_commit_nested(journal_t *journal) tid = transaction->t_tid; read_unlock(&journal->j_state_lock); + if (need_to_start) + jbd2_log_start_commit(journal, tid); jbd2_log_wait_commit(journal, tid); return 1; } diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index faad2bd787c7..1d1191050f99 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -117,10 +117,10 @@ static inline void update_t_max_wait(transaction_t *transaction) static int start_this_handle(journal_t *journal, handle_t *handle, int gfp_mask) { - transaction_t *transaction; - int needed; - int nblocks = handle->h_buffer_credits; - transaction_t *new_transaction = NULL; + transaction_t *transaction, *new_transaction = NULL; + tid_t tid; + int needed, need_to_start; + int nblocks = handle->h_buffer_credits; if (nblocks > journal->j_max_transaction_buffers) { printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", @@ -222,8 +222,11 @@ repeat: atomic_sub(nblocks, &transaction->t_outstanding_credits); prepare_to_wait(&journal->j_wait_transaction_locked, &wait, TASK_UNINTERRUPTIBLE); - __jbd2_log_start_commit(journal, transaction->t_tid); + tid = transaction->t_tid; + need_to_start = !tid_geq(journal->j_commit_request, tid); read_unlock(&journal->j_state_lock); + if (need_to_start) + jbd2_log_start_commit(journal, tid); schedule(); finish_wait(&journal->j_wait_transaction_locked, &wait); goto repeat; @@ -442,7 +445,8 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask) { transaction_t *transaction = handle->h_transaction; journal_t *journal = transaction->t_journal; - int ret; + tid_t tid; + int need_to_start, ret; /* If we've had an abort of any type, don't even think about * actually doing the restart! */ @@ -465,8 +469,11 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask) spin_unlock(&transaction->t_handle_lock); jbd_debug(2, "restarting handle %p\n", handle); - __jbd2_log_start_commit(journal, transaction->t_tid); + tid = transaction->t_tid; + need_to_start = !tid_geq(journal->j_commit_request, tid); read_unlock(&journal->j_state_lock); + if (need_to_start) + jbd2_log_start_commit(journal, tid); lock_map_release(&handle->h_lockdep_map); handle->h_buffer_credits = nblocks; -- cgit v1.2.1 From 541ce98c10111dae7604543dda6c6f7e7a6015d8 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 14 Jan 2011 20:00:02 -0500 Subject: nfsd: don't leak dentry count on mnt_want_write failure The exit cleanup isn't quite right here. Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 641117f2188d..fda3be237773 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1812,22 +1812,22 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); if (host_err) - goto out_nfserr; + goto out_put; host_err = nfsd_break_lease(rdentry->d_inode); if (host_err) - goto out_put; + goto out_drop_write; if (type != S_IFDIR) host_err = vfs_unlink(dirp, rdentry); else host_err = vfs_rmdir(dirp, rdentry); -out_put: - dput(rdentry); - if (!host_err) host_err = commit_metadata(fhp); - +out_drop_write: mnt_drop_write(fhp->fh_export->ex_path.mnt); +out_put: + dput(rdentry); + out_nfserr: err = nfserrno(host_err); out: -- cgit v1.2.1 From 0af3f814ccf0a13d3e01e8115b96f1824379fc72 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Thu, 13 Jan 2011 11:25:31 +0200 Subject: NFSD: use nfserr for status after decode_cb_op_status Bugs introduced in 85a56480191ca9f08fc775c129b9eb5c8c1f2c05 "NFSD: Update XDR decoders in NFSv4 callback client" Cc: Chuck Lever Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 3be975e18919..cde36cb0f348 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -484,7 +484,7 @@ static int decode_cb_sequence4res(struct xdr_stream *xdr, out: return status; out_default: - return nfs_cb_stat_to_errno(status); + return nfs_cb_stat_to_errno(nfserr); } /* @@ -564,11 +564,9 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, if (unlikely(status)) goto out; if (unlikely(nfserr != NFS4_OK)) - goto out_default; + status = nfs_cb_stat_to_errno(nfserr); out: return status; -out_default: - return nfs_cb_stat_to_errno(status); } /* -- cgit v1.2.1 From 3aa6e0aa8ab3e64bbfba092c64d42fd1d006b124 Mon Sep 17 00:00:00 2001 From: Konstantin Khorenko Date: Tue, 1 Feb 2011 17:16:29 +0300 Subject: NFSD: memory corruption due to writing beyond the stat array If nfsd fails to find an exported via NFS file in the readahead cache, it should increment corresponding nfsdstats counter (ra_depth[10]), but due to a bug it may instead write to ra_depth[11], corrupting the following field. In a kernel with NFSDv4 compiled in the corruption takes the form of an increment of a counter of the number of NFSv4 operation 0's received; since there is no operation 0, this is harmless. In a kernel with NFSDv4 disabled it corrupts whatever happens to be in the memory beyond nfsdstats. Signed-off-by: Konstantin Khorenko Cc: stable@kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index fda3be237773..30c73f8a5791 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -808,7 +808,7 @@ nfsd_get_raparms(dev_t dev, ino_t ino) if (ra->p_count == 0) frap = rap; } - depth = nfsdstats.ra_size*11/10; + depth = nfsdstats.ra_size; if (!frap) { spin_unlock(&rab->pb_lock); return NULL; -- cgit v1.2.1 From 6b57d9c86d0ab11c091b6db2edff8b5553fd445b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 31 Jan 2011 11:54:04 -0500 Subject: nfsd4: split up nfsd_break_deleg_cb We'll be adding some more code here soon. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index d98d0213285d..ceb66170fda3 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2329,23 +2329,8 @@ nfs4_file_downgrade(struct nfs4_file *fp, unsigned int share_access) nfs4_file_put_access(fp, O_RDONLY); } -/* - * Spawn a thread to perform a recall on the delegation represented - * by the lease (file_lock) - * - * Called from break_lease() with lock_flocks() held. - * Note: we assume break_lease will only call this *once* for any given - * lease. - */ -static -void nfsd_break_deleg_cb(struct file_lock *fl) +static void nfsd_break_one_deleg(struct nfs4_delegation *dp) { - struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner; - - dprintk("NFSD nfsd_break_deleg_cb: dp %p fl %p\n",dp,fl); - if (!dp) - return; - /* We're assuming the state code never drops its reference * without first removing the lease. Since we're in this lease * callback (and since the lease code is serialized by the kernel @@ -2360,15 +2345,28 @@ void nfsd_break_deleg_cb(struct file_lock *fl) /* only place dl_time is set. protected by lock_flocks*/ dp->dl_time = get_seconds(); + nfsd4_cb_recall(dp); +} + +/* + * Called from break_lease() with lock_flocks() held. + * Note: we assume break_lease will only call this *once* for any given + * lease. + */ +static void nfsd_break_deleg_cb(struct file_lock *fl) +{ + struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner; + + BUG_ON(!dp); /* * We don't want the locks code to timeout the lease for us; * we'll remove it ourself if the delegation isn't returned - * in time. + * in time: */ fl->fl_break_time = 0; + nfsd_break_one_deleg(dp); dp->dl_file->fi_had_conflict = true; - nfsd4_cb_recall(dp); } static -- cgit v1.2.1 From 22d38c4c10e8344aa406897d99a35d585d2cb77d Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 31 Jan 2011 11:55:12 -0500 Subject: nfsd4: add helper function for lease setup Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index ceb66170fda3..65978a9aa877 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2639,6 +2639,26 @@ static bool nfsd4_cb_channel_good(struct nfs4_client *clp) return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN; } +static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int flag) +{ + struct file_lock *fl; + + fl = locks_alloc_lock(); + if (!fl) + return NULL; + locks_init_lock(fl); + fl->fl_lmops = &nfsd_lease_mng_ops; + fl->fl_flags = FL_LEASE; + fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; + fl->fl_end = OFFSET_MAX; + fl->fl_owner = (fl_owner_t)dp; + fl->fl_file = dp->dl_vfs_file; + BUG_ON(!fl->fl_file); + fl->fl_pid = current->tgid; + dp->dl_flock = fl; + return fl; +} + /* * Attempt to hand out a delegation. */ @@ -2684,20 +2704,9 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta goto out; } status = -ENOMEM; - fl = locks_alloc_lock(); + fl = nfs4_alloc_init_lease(dp, flag); if (!fl) goto out; - locks_init_lock(fl); - fl->fl_lmops = &nfsd_lease_mng_ops; - fl->fl_flags = FL_LEASE; - fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; - fl->fl_end = OFFSET_MAX; - fl->fl_owner = (fl_owner_t)dp; - fl->fl_file = find_readable_file(stp->st_file); - BUG_ON(!fl->fl_file); - fl->fl_pid = current->tgid; - dp->dl_flock = fl; - /* vfs_setlease checks to see if delegation should be handed out. * the lock_manager callback fl_change is used */ -- cgit v1.2.1 From dd239cc05f0ad9f582dd83d88a4fb5edcc57a026 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 31 Jan 2011 17:14:55 -0500 Subject: nfsd4: fix leak on allocation error Also share some common exit code. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 65978a9aa877..099d6fa64f7f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2699,14 +2699,12 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta } dp = alloc_init_deleg(sop->so_client, stp, fh, flag); - if (dp == NULL) { - flag = NFS4_OPEN_DELEGATE_NONE; - goto out; - } + if (dp == NULL) + goto out_no_deleg; status = -ENOMEM; fl = nfs4_alloc_init_lease(dp, flag); if (!fl) - goto out; + goto out_free; /* vfs_setlease checks to see if delegation should be handed out. * the lock_manager callback fl_change is used */ @@ -2714,9 +2712,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta dprintk("NFSD: setlease failed [%d], no delegation\n", status); dp->dl_flock = NULL; locks_free_lock(fl); - unhash_delegation(dp); - flag = NFS4_OPEN_DELEGATE_NONE; - goto out; + goto out_free; } memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid)); @@ -2729,6 +2725,12 @@ out: && open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) dprintk("NFSD: WARNING: refusing delegation reclaim\n"); open->op_delegate_type = flag; + return; +out_free: + unhash_delegation(dp); +out_no_deleg: + flag = NFS4_OPEN_DELEGATE_NONE; + goto out; } /* -- cgit v1.2.1 From edab9782b5a16abb8d139d261e81e13ef0be35a9 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 31 Jan 2011 17:58:10 -0500 Subject: nfsd4: split lease setting into separate function Splitting some code into a separate function which we'll be adding some more to. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 099d6fa64f7f..dbb2141cf88f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2659,6 +2659,23 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int f return fl; } +static int nfs4_setlease(struct nfs4_delegation *dp, int flag) +{ + struct file_lock *fl; + int status; + + fl = nfs4_alloc_init_lease(dp, flag); + if (!fl) + return -ENOMEM; + status = vfs_setlease(dp->dl_vfs_file, fl->fl_type, &fl); + if (status) { + dp->dl_flock = NULL; + locks_free_lock(fl); + return -ENOMEM; + } + return 0; +} + /* * Attempt to hand out a delegation. */ @@ -2668,7 +2685,6 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta struct nfs4_delegation *dp; struct nfs4_stateowner *sop = stp->st_stateowner; int cb_up; - struct file_lock *fl; int status, flag = 0; cb_up = nfsd4_cb_channel_good(sop->so_client); @@ -2701,19 +2717,9 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta dp = alloc_init_deleg(sop->so_client, stp, fh, flag); if (dp == NULL) goto out_no_deleg; - status = -ENOMEM; - fl = nfs4_alloc_init_lease(dp, flag); - if (!fl) - goto out_free; - /* vfs_setlease checks to see if delegation should be handed out. - * the lock_manager callback fl_change is used - */ - if ((status = vfs_setlease(fl->fl_file, fl->fl_type, &fl))) { - dprintk("NFSD: setlease failed [%d], no delegation\n", status); - dp->dl_flock = NULL; - locks_free_lock(fl); + status = nfs4_setlease(dp, flag); + if (status) goto out_free; - } memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid)); -- cgit v1.2.1 From 65bc58f5187e2ff4011ef1bd3082e83cd1b036f1 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 7 Feb 2011 15:44:12 -0500 Subject: nfsd4: remove unused deleg dprintk's. These aren't all that useful, and get in the way of the next steps. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index dbb2141cf88f..d978192838a3 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -958,8 +958,6 @@ expire_client(struct nfs4_client *clp) spin_lock(&recall_lock); while (!list_empty(&clp->cl_delegations)) { dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt); - dprintk("NFSD: expire client. dp %p, fp %p\n", dp, - dp->dl_flock); list_del_init(&dp->dl_perclnt); list_move(&dp->dl_recall_lru, &reaplist); } @@ -2931,8 +2929,6 @@ nfs4_laundromat(void) test_val = u; break; } - dprintk("NFSD: purging unused delegation dp %p, fp %p\n", - dp, dp->dl_flock); list_move(&dp->dl_recall_lru, &reaplist); } spin_unlock(&recall_lock); -- cgit v1.2.1 From 5d926e8c2f46dc09f4ddde86644a5f1d0726a470 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 7 Feb 2011 16:53:46 -0500 Subject: nfsd4: modify fi_delegations under recall_lock Modify fi_delegations only under the recall_lock, allowing us to use that list on lease breaks. Also some trivial cleanup to simplify later changes. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index d978192838a3..8b6cd3cf4835 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -277,9 +277,9 @@ nfs4_close_delegation(struct nfs4_delegation *dp) static void unhash_delegation(struct nfs4_delegation *dp) { - list_del_init(&dp->dl_perfile); list_del_init(&dp->dl_perclnt); spin_lock(&recall_lock); + list_del_init(&dp->dl_perfile); list_del_init(&dp->dl_recall_lru); spin_unlock(&recall_lock); nfs4_close_delegation(dp); @@ -2336,9 +2336,7 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) * it's safe to take a reference: */ atomic_inc(&dp->dl_count); - spin_lock(&recall_lock); list_add_tail(&dp->dl_recall_lru, &del_recall_lru); - spin_unlock(&recall_lock); /* only place dl_time is set. protected by lock_flocks*/ dp->dl_time = get_seconds(); @@ -2363,8 +2361,10 @@ static void nfsd_break_deleg_cb(struct file_lock *fl) */ fl->fl_break_time = 0; - nfsd_break_one_deleg(dp); + spin_lock(&recall_lock); dp->dl_file->fi_had_conflict = true; + nfsd_break_one_deleg(dp); + spin_unlock(&recall_lock); } static -- cgit v1.2.1 From acfdf5c383b38f7f4dddae41b97c97f1ae058f49 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 31 Jan 2011 19:20:39 -0500 Subject: nfsd4: acquire only one lease per file Instead of acquiring one lease each time another client opens a file, nfsd can acquire just one lease to represent all of them, and reference count it to determine when to release it. This fixes a regression introduced by c45821d263a8a5109d69a9e8942b8d65bcd5f31a "locks: eliminate fl_mylease callback": after that patch, only the struct file * is used to determine who owns a given lease. But since we recently converted the server to share a single struct file per open, if we acquire multiple leases on the same file from nfsd, it then becomes impossible on unlocking a lease to determine which of those leases (all of whom share the same struct file *) we meant to remove. Thanks to Takashi Iwai for catching a bug in a previous version of this patch. Tested-by: Takashi Iwai Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 95 +++++++++++++++++++++++++++++++---------------------- fs/nfsd/state.h | 5 +-- 2 files changed, 58 insertions(+), 42 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 8b6cd3cf4835..54b60bfceb8d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -230,9 +230,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f dp->dl_client = clp; get_nfs4_file(fp); dp->dl_file = fp; - dp->dl_vfs_file = find_readable_file(fp); - get_file(dp->dl_vfs_file); - dp->dl_flock = NULL; dp->dl_type = type; dp->dl_stateid.si_boot = boot_time; dp->dl_stateid.si_stateownerid = current_delegid++; @@ -241,8 +238,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f fh_copy_shallow(&dp->dl_fh, ¤t_fh->fh_handle); dp->dl_time = 0; atomic_set(&dp->dl_count, 1); - list_add(&dp->dl_perfile, &fp->fi_delegations); - list_add(&dp->dl_perclnt, &clp->cl_delegations); INIT_WORK(&dp->dl_recall.cb_work, nfsd4_do_callback_rpc); return dp; } @@ -253,24 +248,18 @@ nfs4_put_delegation(struct nfs4_delegation *dp) if (atomic_dec_and_test(&dp->dl_count)) { dprintk("NFSD: freeing dp %p\n",dp); put_nfs4_file(dp->dl_file); - fput(dp->dl_vfs_file); kmem_cache_free(deleg_slab, dp); num_delegations--; } } -/* Remove the associated file_lock first, then remove the delegation. - * lease_modify() is called to remove the FS_LEASE file_lock from - * the i_flock list, eventually calling nfsd's lock_manager - * fl_release_callback. - */ -static void -nfs4_close_delegation(struct nfs4_delegation *dp) +static void nfs4_put_deleg_lease(struct nfs4_file *fp) { - dprintk("NFSD: close_delegation dp %p\n",dp); - /* XXX: do we even need this check?: */ - if (dp->dl_flock) - vfs_setlease(dp->dl_vfs_file, F_UNLCK, &dp->dl_flock); + if (atomic_dec_and_test(&fp->fi_delegees)) { + vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease); + fp->fi_lease = NULL; + fp->fi_deleg_file = NULL; + } } /* Called under the state lock. */ @@ -282,7 +271,7 @@ unhash_delegation(struct nfs4_delegation *dp) list_del_init(&dp->dl_perfile); list_del_init(&dp->dl_recall_lru); spin_unlock(&recall_lock); - nfs4_close_delegation(dp); + nfs4_put_deleg_lease(dp->dl_file); nfs4_put_delegation(dp); } @@ -2076,6 +2065,7 @@ alloc_init_file(struct inode *ino) fp->fi_inode = igrab(ino); fp->fi_id = current_fileid++; fp->fi_had_conflict = false; + fp->fi_lease = NULL; memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); memset(fp->fi_access, 0, sizeof(fp->fi_access)); spin_lock(&recall_lock); @@ -2344,26 +2334,26 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) nfsd4_cb_recall(dp); } -/* - * Called from break_lease() with lock_flocks() held. - * Note: we assume break_lease will only call this *once* for any given - * lease. - */ +/* Called from break_lease() with lock_flocks() held. */ static void nfsd_break_deleg_cb(struct file_lock *fl) { - struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner; + struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner; + struct nfs4_delegation *dp; - BUG_ON(!dp); + BUG_ON(!fp); + /* We assume break_lease is only called once per lease: */ + BUG_ON(fp->fi_had_conflict); /* * We don't want the locks code to timeout the lease for us; - * we'll remove it ourself if the delegation isn't returned + * we'll remove it ourself if a delegation isn't returned * in time: */ fl->fl_break_time = 0; spin_lock(&recall_lock); - dp->dl_file->fi_had_conflict = true; - nfsd_break_one_deleg(dp); + fp->fi_had_conflict = true; + list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) + nfsd_break_one_deleg(dp); spin_unlock(&recall_lock); } @@ -2455,13 +2445,15 @@ nfs4_check_delegmode(struct nfs4_delegation *dp, int flags) static struct nfs4_delegation * find_delegation_file(struct nfs4_file *fp, stateid_t *stid) { - struct nfs4_delegation *dp; + struct nfs4_delegation *dp = NULL; + spin_lock(&recall_lock); list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) { if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid) - return dp; + break; } - return NULL; + spin_unlock(&recall_lock); + return dp; } int share_access_to_flags(u32 share_access) @@ -2649,28 +2641,51 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int f fl->fl_flags = FL_LEASE; fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; fl->fl_end = OFFSET_MAX; - fl->fl_owner = (fl_owner_t)dp; - fl->fl_file = dp->dl_vfs_file; - BUG_ON(!fl->fl_file); + fl->fl_owner = (fl_owner_t)(dp->dl_file); fl->fl_pid = current->tgid; - dp->dl_flock = fl; return fl; } static int nfs4_setlease(struct nfs4_delegation *dp, int flag) { + struct nfs4_file *fp = dp->dl_file; struct file_lock *fl; int status; fl = nfs4_alloc_init_lease(dp, flag); if (!fl) return -ENOMEM; - status = vfs_setlease(dp->dl_vfs_file, fl->fl_type, &fl); + fl->fl_file = find_readable_file(fp); + list_add(&dp->dl_perclnt, &dp->dl_client->cl_delegations); + status = vfs_setlease(fl->fl_file, fl->fl_type, &fl); if (status) { - dp->dl_flock = NULL; + list_del_init(&dp->dl_perclnt); locks_free_lock(fl); return -ENOMEM; } + fp->fi_lease = fl; + fp->fi_deleg_file = fl->fl_file; + get_file(fp->fi_deleg_file); + atomic_set(&fp->fi_delegees, 1); + list_add(&dp->dl_perfile, &fp->fi_delegations); + return 0; +} + +static int nfs4_set_delegation(struct nfs4_delegation *dp, int flag) +{ + struct nfs4_file *fp = dp->dl_file; + + if (!fp->fi_lease) + return nfs4_setlease(dp, flag); + spin_lock(&recall_lock); + if (fp->fi_had_conflict) { + spin_unlock(&recall_lock); + return -EAGAIN; + } + atomic_inc(&fp->fi_delegees); + list_add(&dp->dl_perfile, &fp->fi_delegations); + spin_unlock(&recall_lock); + list_add(&dp->dl_perclnt, &dp->dl_client->cl_delegations); return 0; } @@ -2715,7 +2730,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta dp = alloc_init_deleg(sop->so_client, stp, fh, flag); if (dp == NULL) goto out_no_deleg; - status = nfs4_setlease(dp, flag); + status = nfs4_set_delegation(dp, flag); if (status) goto out_free; @@ -2731,7 +2746,7 @@ out: open->op_delegate_type = flag; return; out_free: - unhash_delegation(dp); + nfs4_put_delegation(dp); out_no_deleg: flag = NFS4_OPEN_DELEGATE_NONE; goto out; @@ -3139,7 +3154,7 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, goto out; renew_client(dp->dl_client); if (filpp) { - *filpp = find_readable_file(dp->dl_file); + *filpp = dp->dl_file->fi_deleg_file; BUG_ON(!*filpp); } } else { /* open or lock stateid */ diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 3074656ba7bf..2d31224b07bf 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -83,8 +83,6 @@ struct nfs4_delegation { atomic_t dl_count; /* ref count */ struct nfs4_client *dl_client; struct nfs4_file *dl_file; - struct file *dl_vfs_file; - struct file_lock *dl_flock; u32 dl_type; time_t dl_time; /* For recall: */ @@ -379,6 +377,9 @@ struct nfs4_file { */ atomic_t fi_readers; atomic_t fi_writers; + struct file *fi_deleg_file; + struct file_lock *fi_lease; + atomic_t fi_delegees; struct inode *fi_inode; u32 fi_id; /* used with stateowner->so_id * for stateid_hashtbl hash */ -- cgit v1.2.1 From 83f6b0c18204f68961f58b9f69e5dba0d36056a2 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 6 Feb 2011 16:46:30 -0500 Subject: nfsd: break lease on unlink due to rename 4795bb37effb7b8fe77e2d2034545d062d3788a8 "nfsd: break lease on unlink, link, and rename", only broke the lease on the file that was being renamed, and didn't handle the case where the target path refers to an already-existing file that will be unlinked by a rename--in that case the target file should have any leases broken as well. Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs') diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 30c73f8a5791..da1d9701f8e4 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1742,6 +1742,13 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, goto out_dput_new; host_err = nfsd_break_lease(odentry->d_inode); + if (host_err) + goto out_drop_write; + if (ndentry->d_inode) { + host_err = nfsd_break_lease(ndentry->d_inode); + if (host_err) + goto out_drop_write; + } if (host_err) goto out_drop_write; host_err = vfs_rename(fdir, odentry, tdir, ndentry); -- cgit v1.2.1 From eb14ab8ed24a0405fd056068b28c33a1cd846024 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 10 Feb 2011 12:35:00 -0500 Subject: Btrfs: fix page->private races There is a race where btrfs_releasepage can drop the page->private contents just as alloc_extent_buffer is setting up pages for metadata. Because of how the Btrfs page flags work, this results in us skipping the crc on the page during IO. This patch sovles the race by waiting until after the extent buffer is inserted into the radix tree before it sets page private. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 8 ++++++-- fs/btrfs/extent_io.c | 38 +++++++++++++++++++++++++++++++++++--- 2 files changed, 41 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b36eeef19194..3e1ea3e0477e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -359,10 +359,14 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) tree = &BTRFS_I(page->mapping->host)->io_tree; - if (page->private == EXTENT_PAGE_PRIVATE) + if (page->private == EXTENT_PAGE_PRIVATE) { + WARN_ON(1); goto out; - if (!page->private) + } + if (!page->private) { + WARN_ON(1); goto out; + } len = page->private >> 2; WARN_ON(len == 0); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 8862dda46ff6..0418bf2c9757 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1946,6 +1946,7 @@ void set_page_extent_mapped(struct page *page) static void set_page_extent_head(struct page *page, unsigned long len) { + WARN_ON(!PagePrivate(page)); set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); } @@ -3195,7 +3196,13 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, } if (!PageUptodate(p)) uptodate = 0; - unlock_page(p); + + /* + * see below about how we avoid a nasty race with release page + * and why we unlock later + */ + if (i != 0) + unlock_page(p); } if (uptodate) set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); @@ -3219,9 +3226,26 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, atomic_inc(&eb->refs); spin_unlock(&tree->buffer_lock); radix_tree_preload_end(); + + /* + * there is a race where release page may have + * tried to find this extent buffer in the radix + * but failed. It will tell the VM it is safe to + * reclaim the, and it will clear the page private bit. + * We must make sure to set the page private bit properly + * after the extent buffer is in the radix tree so + * it doesn't get lost + */ + set_page_extent_mapped(eb->first_page); + set_page_extent_head(eb->first_page, eb->len); + if (!page0) + unlock_page(eb->first_page); return eb; free_eb: + if (eb->first_page && !page0) + unlock_page(eb->first_page); + if (!atomic_dec_and_test(&eb->refs)) return exists; btrfs_release_extent_buffer(eb); @@ -3272,10 +3296,11 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, continue; lock_page(page); + WARN_ON(!PagePrivate(page)); + + set_page_extent_mapped(page); if (i == 0) set_page_extent_head(page, eb->len); - else - set_page_private(page, EXTENT_PAGE_PRIVATE); clear_page_dirty_for_io(page); spin_lock_irq(&page->mapping->tree_lock); @@ -3465,6 +3490,13 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, for (i = start_i; i < num_pages; i++) { page = extent_buffer_page(eb, i); + + WARN_ON(!PagePrivate(page)); + + set_page_extent_mapped(page); + if (i == 0) + set_page_extent_head(page, eb->len); + if (inc_all_pages) page_cache_get(page); if (!PageUptodate(page)) { -- cgit v1.2.1 From e3f24cc521cb7ba60ac137abd1939e4e03435e80 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 14 Feb 2011 12:52:08 -0500 Subject: Btrfs: don't release pages when we can't clear the uptodate bits Btrfs tracks uptodate state in an rbtree as well as in the page bits. This is supposed to enable us to use block sizes other than the page size, but there are a few parts still missing before that completely works. But, our readpage routine trusts this additional range based tracking of uptodateness, much in the same way the buffer head up to date bits are trusted for the other filesystems. The problem is that sometimes we need to allocate memory in order to split records in the rbtree, even when we are just clearing bits. This can be difficult when our clearing function is called GFP_ATOMIC, which can happen in the releasepage path. So, what happens today looks like this: releasepage called with GFP_ATOMIC btrfs_releasepage calls clear_extent_bit clear_extent_bit fails to allocate ram, leaving the up to date bit set btrfs_releasepage returns success The end result is the page being gone, but btrfs thinking the range is up to date. Later on if someone tries to read that same page, the btrfs readpage code will return immediately thinking the page is already up to date. This commit fixes things to fail the releasepage when we can't clear the extent state bits. It covers both data pages and metadata tree blocks. Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 0418bf2c9757..e7aeba242701 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2822,9 +2822,17 @@ int try_release_extent_state(struct extent_map_tree *map, * at this point we can safely clear everything except the * locked bit and the nodatasum bit */ - clear_extent_bit(tree, start, end, + ret = clear_extent_bit(tree, start, end, ~(EXTENT_LOCKED | EXTENT_NODATASUM), 0, 0, NULL, mask); + + /* if clear_extent_bit failed for enomem reasons, + * we can't allow the release to continue. + */ + if (ret < 0) + ret = 0; + else + ret = 1; } return ret; } -- cgit v1.2.1 From 6848ad6461e551849ba3c32d945d4f45e96453a6 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 14 Feb 2011 16:00:03 -0500 Subject: Btrfs: Fix balance panic Mark the cloned backref_node as checked in clone_backref_node() Signed-off-by: Yan, Zheng Signed-off-by: Chris Mason --- fs/btrfs/relocation.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 1f5556acb530..0825e4ed9447 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1157,6 +1157,7 @@ static int clone_backref_node(struct btrfs_trans_handle *trans, new_node->bytenr = dest->node->start; new_node->level = node->level; new_node->lowest = node->lowest; + new_node->checked = 1; new_node->root = dest; if (!node->lowest) { -- cgit v1.2.1 From 51788b1bdd0d68345bab0af4301e7fa429277228 Mon Sep 17 00:00:00 2001 From: Dan Rosenberg Date: Mon, 14 Feb 2011 16:04:23 -0500 Subject: btrfs: prevent heap corruption in btrfs_ioctl_space_info() Commit bf5fc093c5b625e4259203f1cee7ca73488a5620 refactored btrfs_ioctl_space_info() and introduced several security issues. space_args.space_slots is an unsigned 64-bit type controlled by a possibly unprivileged caller. The comparison as a signed int type allows providing values that are treated as negative and cause the subsequent allocation size calculation to wrap, or be truncated to 0. By providing a size that's truncated to 0, kmalloc() will return ZERO_SIZE_PTR. It's also possible to provide a value smaller than the slot count. The subsequent loop ignores the allocation size when copying data in, resulting in a heap overflow or write to ZERO_SIZE_PTR. The fix changes the slot count type and comparison typecast to u64, which prevents truncation or signedness errors, and also ensures that we don't copy more data than we've allocated in the subsequent loop. Note that zero-size allocations are no longer possible since there is already an explicit check for space_args.space_slots being 0 and truncation of this value is no longer an issue. Signed-off-by: Dan Rosenberg Signed-off-by: Josef Bacik Reviewed-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 02d224e8c83f..be2d4f6aaa5e 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2208,7 +2208,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) int num_types = 4; int alloc_size; int ret = 0; - int slot_count = 0; + u64 slot_count = 0; int i, c; if (copy_from_user(&space_args, @@ -2247,7 +2247,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) goto out; } - slot_count = min_t(int, space_args.space_slots, slot_count); + slot_count = min_t(u64, space_args.space_slots, slot_count); alloc_size = sizeof(*dest) * slot_count; @@ -2267,6 +2267,9 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) for (i = 0; i < num_types; i++) { struct btrfs_space_info *tmp; + if (!slot_count) + break; + info = NULL; rcu_read_lock(); list_for_each_entry_rcu(tmp, &root->fs_info->space_info, @@ -2288,7 +2291,10 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) memcpy(dest, &space, sizeof(space)); dest++; space_args.total_spaces++; + slot_count--; } + if (!slot_count) + break; } up_read(&info->groups_sem); } -- cgit v1.2.1 From 67100f255dba284bcbb5ce795355dad1cff35658 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sun, 6 Feb 2011 19:58:21 +0000 Subject: Btrfs - Fix memory leak in btrfs_init_new_device() Memory allocated by calling kstrdup() should be freed. Signed-off-by: Ilya Dryomov Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 7cad59353b09..dadaaa8005c8 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1603,12 +1603,14 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) ret = find_next_devid(root, &device->devid); if (ret) { + kfree(device->name); kfree(device); goto error; } trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { + kfree(device->name); kfree(device); ret = PTR_ERR(trans); goto error; -- cgit v1.2.1 From c26a920373a983b52223eed5a13b97404d8b4158 Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Mon, 14 Feb 2011 00:45:29 +0000 Subject: Btrfs: check return value of alloc_extent_map() I add the check on the return value of alloc_extent_map() to several places. In addition, alloc_extent_map() returns only the address or NULL. Therefore, check by IS_ERR() is unnecessary. So, I remove IS_ERR() checking. Signed-off-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 2 +- fs/btrfs/extent_map.c | 4 ++-- fs/btrfs/file.c | 1 + fs/btrfs/inode.c | 3 +++ 4 files changed, 7 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 565e22d77b1b..a7aaa10c5302 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6584,7 +6584,7 @@ static noinline int relocate_data_extent(struct inode *reloc_inode, u64 end = start + extent_key->offset - 1; em = alloc_extent_map(GFP_NOFS); - BUG_ON(!em || IS_ERR(em)); + BUG_ON(!em); em->start = start; em->len = extent_key->offset; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index b0e1fce12530..2b6c12e983b3 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -51,8 +51,8 @@ struct extent_map *alloc_extent_map(gfp_t mask) { struct extent_map *em; em = kmem_cache_alloc(extent_map_cache, mask); - if (!em || IS_ERR(em)) - return em; + if (!em) + return NULL; em->in_tree = 0; em->flags = 0; em->compress_type = BTRFS_COMPRESS_NONE; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index b0ff34b96607..65338a1d14ad 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -185,6 +185,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, split = alloc_extent_map(GFP_NOFS); if (!split2) split2 = alloc_extent_map(GFP_NOFS); + BUG_ON(!split || !split2); write_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, len); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c9bc0afdbfc6..8d392ed73d57 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -644,6 +644,7 @@ retry: async_extent->ram_size - 1, 0); em = alloc_extent_map(GFP_NOFS); + BUG_ON(!em); em->start = async_extent->start; em->len = async_extent->ram_size; em->orig_start = em->start; @@ -820,6 +821,7 @@ static noinline int cow_file_range(struct inode *inode, BUG_ON(ret); em = alloc_extent_map(GFP_NOFS); + BUG_ON(!em); em->start = start; em->orig_start = em->start; ram_size = ins.offset; @@ -1169,6 +1171,7 @@ out_check: struct extent_map_tree *em_tree; em_tree = &BTRFS_I(inode)->extent_tree; em = alloc_extent_map(GFP_NOFS); + BUG_ON(!em); em->start = cur_offset; em->orig_start = em->start; em->len = num_bytes; -- cgit v1.2.1 From 844a391799c25d9ba85cbce33e4697db06083ec6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 15 Feb 2011 00:38:26 -0500 Subject: nothing in do_follow_link() is going to see RCU Signed-off-by: Al Viro --- fs/namei.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index ec4b2d0190a8..9ce6d272f4f2 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -668,9 +668,6 @@ force_reval_path(struct path *path, struct nameidata *nd) return 0; if (!status) { - /* Don't d_invalidate in rcu-walk mode */ - if (nameidata_drop_rcu(nd)) - return -ECHILD; d_invalidate(dentry); status = -ESTALE; } @@ -777,6 +774,8 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p) int error; struct dentry *dentry = link->dentry; + BUG_ON(nd->flags & LOOKUP_RCU); + touch_atime(link->mnt, dentry); nd_set_link(nd, NULL); @@ -811,6 +810,11 @@ static inline int do_follow_link(struct path *path, struct nameidata *nd) { void *cookie; int err = -ELOOP; + + /* We drop rcu-walk here */ + if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry)) + return -ECHILD; + if (current->link_count >= MAX_NESTED_LINKS) goto loop; if (current->total_link_count >= 40) @@ -1419,9 +1423,6 @@ exec_again: goto out_dput; if (inode->i_op->follow_link) { - /* We commonly drop rcu-walk here */ - if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry)) - return -ECHILD; BUG_ON(inode != next.dentry->d_inode); err = do_follow_link(&next, nd); if (err) @@ -1467,8 +1468,6 @@ last_component: break; if (inode && unlikely(inode->i_op->follow_link) && (lookup_flags & LOOKUP_FOLLOW)) { - if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry)) - return -ECHILD; BUG_ON(inode != next.dentry->d_inode); err = do_follow_link(&next, nd); if (err) -- cgit v1.2.1 From 24643087e748bf192f1182766716e522dc1c972f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 15 Feb 2011 01:26:22 -0500 Subject: in do_lookup() split RCU and non-RCU cases of need_revalidate and use unlikely() instead of gotos, for fsck sake... Signed-off-by: Al Viro --- fs/namei.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 9ce6d272f4f2..7609bacc7046 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1259,9 +1259,15 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, return -ECHILD; nd->seq = seq; - if (dentry->d_flags & DCACHE_OP_REVALIDATE) - goto need_revalidate; -done2: + if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { + dentry = do_revalidate(dentry, nd); + if (!dentry) + goto need_lookup; + if (IS_ERR(dentry)) + goto fail; + if (!(nd->flags & LOOKUP_RCU)) + goto done; + } path->mnt = mnt; path->dentry = dentry; if (likely(__follow_mount_rcu(nd, path, inode, false))) @@ -1274,8 +1280,13 @@ done2: if (!dentry) goto need_lookup; found: - if (dentry->d_flags & DCACHE_OP_REVALIDATE) - goto need_revalidate; + if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { + dentry = do_revalidate(dentry, nd); + if (!dentry) + goto need_lookup; + if (IS_ERR(dentry)) + goto fail; + } done: path->mnt = mnt; path->dentry = dentry; @@ -1317,16 +1328,6 @@ need_lookup: mutex_unlock(&dir->i_mutex); goto found; -need_revalidate: - dentry = do_revalidate(dentry, nd); - if (!dentry) - goto need_lookup; - if (IS_ERR(dentry)) - goto fail; - if (nd->flags & LOOKUP_RCU) - goto done2; - goto done; - fail: return PTR_ERR(dentry); } -- cgit v1.2.1 From f5e1c1c1afc1d979e2ac6a24cc99ba7143639f4d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 15 Feb 2011 01:32:55 -0500 Subject: split do_revalidate() into RCU and non-RCU cases fixing oopsen in lookup_one_len() Signed-off-by: Al Viro --- fs/namei.c | 47 ++++++++++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 7609bacc7046..a98f7f141780 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -592,12 +592,10 @@ static int d_revalidate(struct dentry *dentry, struct nameidata *nd) return status; } -static inline struct dentry * +static struct dentry * do_revalidate(struct dentry *dentry, struct nameidata *nd) { - int status; - - status = d_revalidate(dentry, nd); + int status = d_revalidate(dentry, nd); if (unlikely(status <= 0)) { /* * The dentry failed validation. @@ -606,24 +604,39 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd) * to return a fail status. */ if (status < 0) { - /* If we're in rcu-walk, we don't have a ref */ - if (!(nd->flags & LOOKUP_RCU)) - dput(dentry); + dput(dentry); dentry = ERR_PTR(status); - - } else { - /* Don't d_invalidate in rcu-walk mode */ - if (nameidata_dentry_drop_rcu_maybe(nd, dentry)) - return ERR_PTR(-ECHILD); - if (!d_invalidate(dentry)) { - dput(dentry); - dentry = NULL; - } + } else if (!d_invalidate(dentry)) { + dput(dentry); + dentry = NULL; } } return dentry; } +static inline struct dentry * +do_revalidate_rcu(struct dentry *dentry, struct nameidata *nd) +{ + int status = dentry->d_op->d_revalidate(dentry, nd); + if (likely(status > 0)) + return dentry; + if (status == -ECHILD) { + if (nameidata_dentry_drop_rcu(nd, dentry)) + return ERR_PTR(-ECHILD); + return do_revalidate(dentry, nd); + } + if (status < 0) + return ERR_PTR(status); + /* Don't d_invalidate in rcu-walk mode */ + if (nameidata_dentry_drop_rcu(nd, dentry)) + return ERR_PTR(-ECHILD); + if (!d_invalidate(dentry)) { + dput(dentry); + dentry = NULL; + } + return dentry; +} + static inline int need_reval_dot(struct dentry *dentry) { if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE))) @@ -1260,7 +1273,7 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, nd->seq = seq; if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { - dentry = do_revalidate(dentry, nd); + dentry = do_revalidate_rcu(dentry, nd); if (!dentry) goto need_lookup; if (IS_ERR(dentry)) -- cgit v1.2.1 From f60aef7ec625236a6366722bb1be7b37596bf0ae Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 15 Feb 2011 01:35:28 -0500 Subject: drop out of RCU in return_reval ... thus killing the need to handle drop-from-RCU in d_revalidate() Signed-off-by: Al Viro --- fs/namei.c | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index a98f7f141780..10635d329175 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -571,25 +571,9 @@ void release_open_intent(struct nameidata *nd) } } -/* - * Call d_revalidate and handle filesystems that request rcu-walk - * to be dropped. This may be called and return in rcu-walk mode, - * regardless of success or error. If -ECHILD is returned, the caller - * must return -ECHILD back up the path walk stack so path walk may - * be restarted in ref-walk mode. - */ -static int d_revalidate(struct dentry *dentry, struct nameidata *nd) +static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd) { - int status; - - status = dentry->d_op->d_revalidate(dentry, nd); - if (status == -ECHILD) { - if (nameidata_dentry_drop_rcu(nd, dentry)) - return status; - status = dentry->d_op->d_revalidate(dentry, nd); - } - - return status; + return dentry->d_op->d_revalidate(dentry, nd); } static struct dentry * @@ -617,7 +601,7 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd) static inline struct dentry * do_revalidate_rcu(struct dentry *dentry, struct nameidata *nd) { - int status = dentry->d_op->d_revalidate(dentry, nd); + int status = d_revalidate(dentry, nd); if (likely(status > 0)) return dentry; if (status == -ECHILD) { @@ -1517,12 +1501,15 @@ return_reval: * We may need to check the cached dentry for staleness. */ if (need_reval_dot(nd->path.dentry)) { + if (nameidata_drop_rcu_last_maybe(nd)) + return -ECHILD; /* Note: we do not d_invalidate() */ err = d_revalidate(nd->path.dentry, nd); if (!err) err = -ESTALE; if (err < 0) break; + return 0; } return_base: if (nameidata_drop_rcu_last_maybe(nd)) -- cgit v1.2.1 From 4e924a4f53a0e1ea060bd50695a12a238b250322 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 15 Feb 2011 01:42:59 -0500 Subject: get rid of nameidata_dentry_drop_rcu() calling nameidata_drop_rcu() can't happen anymore and didn't work right anyway Signed-off-by: Al Viro --- fs/namei.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 10635d329175..9e701e28a329 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -455,14 +455,6 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry struct fs_struct *fs = current->fs; struct dentry *parent = nd->path.dentry; - /* - * It can be possible to revalidate the dentry that we started - * the path walk with. force_reval_path may also revalidate the - * dentry already committed to the nameidata. - */ - if (unlikely(parent == dentry)) - return nameidata_drop_rcu(nd); - BUG_ON(!(nd->flags & LOOKUP_RCU)); if (nd->root.mnt) { spin_lock(&fs->lock); -- cgit v1.2.1 From 261cd298a8c363d7985e3482946edb4bfedacf98 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 15 Feb 2011 09:43:32 +0100 Subject: s390: remove task_show_regs task_show_regs used to be a debugging aid in the early bringup days of Linux on s390. /proc//status is a world readable file, it is not a good idea to show the registers of a process. The only correct fix is to remove task_show_regs. Reported-by: Al Viro Signed-off-by: Martin Schwidefsky Signed-off-by: Linus Torvalds --- fs/proc/array.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/proc/array.c b/fs/proc/array.c index df2b703b9d0f..7c99c1cf7e5c 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -353,9 +353,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, task_cap(m, task); task_cpus_allowed(m, task); cpuset_task_status_allowed(m, task); -#if defined(CONFIG_S390) - task_show_regs(m, task); -#endif task_context_switch_counts(m, task); return 0; } -- cgit v1.2.1 From 58a69cb47ec6991bf006a3e5d202e8571b0327a4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 16 Feb 2011 09:25:31 +0100 Subject: workqueue, freezer: unify spelling of 'freeze' + 'able' to 'freezable' There are two spellings in use for 'freeze' + 'able' - 'freezable' and 'freezeable'. The former is the more prominent one. The latter is mostly used by workqueue and in a few other odd places. Unify the spelling to 'freezable'. Signed-off-by: Tejun Heo Reported-by: Alan Stern Acked-by: "Rafael J. Wysocki" Acked-by: Greg Kroah-Hartman Acked-by: Dmitry Torokhov Cc: David Woodhouse Cc: Alex Dubov Cc: "David S. Miller" Cc: Steven Whitehouse --- fs/gfs2/glock.c | 4 ++-- fs/gfs2/main.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 08a8beb152e6..7cd9a5a68d59 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1779,11 +1779,11 @@ int __init gfs2_glock_init(void) #endif glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | - WQ_HIGHPRI | WQ_FREEZEABLE, 0); + WQ_HIGHPRI | WQ_FREEZABLE, 0); if (IS_ERR(glock_workqueue)) return PTR_ERR(glock_workqueue); gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", - WQ_MEM_RECLAIM | WQ_FREEZEABLE, + WQ_MEM_RECLAIM | WQ_FREEZABLE, 0); if (IS_ERR(gfs2_delete_workqueue)) { destroy_workqueue(glock_workqueue); diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index ebef7ab6e17e..85ba027d1c4d 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -144,7 +144,7 @@ static int __init init_gfs2_fs(void) error = -ENOMEM; gfs_recovery_wq = alloc_workqueue("gfs_recovery", - WQ_MEM_RECLAIM | WQ_FREEZEABLE, 0); + WQ_MEM_RECLAIM | WQ_FREEZABLE, 0); if (!gfs_recovery_wq) goto fail_wq; -- cgit v1.2.1 From 3abb17e82f08628b59e20d8cbcb55e2204180f69 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 16 Feb 2011 08:56:55 -0800 Subject: vfs: fix BUG_ON() in fs/namei.c:1461 When Al moved the nameidata_dentry_drop_rcu_maybe() call into the do_follow_link function in commit 844a391799c2 ("nothing in do_follow_link() is going to see RCU"), he mistakenly left the BUG_ON(inode != path->dentry->d_inode); behind. Which would otherwise be ok, but that BUG_ON() really needs to be _after_ dropping RCU, since the dentry isn't necessarily stable otherwise. So complete the code movement in that commit, and move the BUG_ON() into do_follow_link() too. This means that we need to pass in 'inode' as an argument (just for this one use), but that's a small thing. And eventually we may be confident enough in our path lookup that we can just remove the BUG_ON() and the unnecessary inode argument. Reported-and-tested-by: Eric Dumazet Acked-by: Al Viro Signed-off-by: Linus Torvalds --- fs/namei.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 9e701e28a329..0087cf9c2c6b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -795,7 +795,7 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p) * Without that kind of total limit, nasty chains of consecutive * symlinks can cause almost arbitrarily long lookups. */ -static inline int do_follow_link(struct path *path, struct nameidata *nd) +static inline int do_follow_link(struct inode *inode, struct path *path, struct nameidata *nd) { void *cookie; int err = -ELOOP; @@ -803,6 +803,7 @@ static inline int do_follow_link(struct path *path, struct nameidata *nd) /* We drop rcu-walk here */ if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry)) return -ECHILD; + BUG_ON(inode != path->dentry->d_inode); if (current->link_count >= MAX_NESTED_LINKS) goto loop; @@ -1413,8 +1414,7 @@ exec_again: goto out_dput; if (inode->i_op->follow_link) { - BUG_ON(inode != next.dentry->d_inode); - err = do_follow_link(&next, nd); + err = do_follow_link(inode, &next, nd); if (err) goto return_err; nd->inode = nd->path.dentry->d_inode; @@ -1458,8 +1458,7 @@ last_component: break; if (inode && unlikely(inode->i_op->follow_link) && (lookup_flags & LOOKUP_FOLLOW)) { - BUG_ON(inode != next.dentry->d_inode); - err = do_follow_link(&next, nd); + err = do_follow_link(inode, &next, nd); if (err) goto return_err; nd->inode = nd->path.dentry->d_inode; -- cgit v1.2.1 From 47c85291d3dd1a51501555000b90f8e281a0458e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 16 Feb 2011 13:08:35 +1100 Subject: nfsd: correctly handle return value from nfsd_map_name_to_* These functions return an nfs status, not a host_err. So don't try to convert before returning. This is a regression introduced by 3c726023402a2f3b28f49b9d90ebf9e71151157d; I fixed up two of the callers, but missed these two. Cc: stable@kernel.org Reported-by: Herbert Poetzl Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 956629b9cdc9..1275b8655070 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -317,8 +317,8 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, READ_BUF(dummy32); len += (XDR_QUADLEN(dummy32) << 2); READMEM(buf, dummy32); - if ((host_err = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid))) - goto out_nfserr; + if ((status = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid))) + return status; iattr->ia_valid |= ATTR_UID; } if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) { @@ -328,8 +328,8 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, READ_BUF(dummy32); len += (XDR_QUADLEN(dummy32) << 2); READMEM(buf, dummy32); - if ((host_err = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid))) - goto out_nfserr; + if ((status = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid))) + return status; iattr->ia_valid |= ATTR_GID; } if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) { -- cgit v1.2.1 From e51900f7d38cbcfb481d84567fd92540e7e1d23a Mon Sep 17 00:00:00 2001 From: Chuck Ebbert Date: Wed, 16 Feb 2011 18:11:53 -0500 Subject: block: revert block_dev read-only check This reverts commit 75f1dc0d076d ("block: check bdev_read_only() from blkdev_get()"). That commit added stricter checking to make sure devices that were being used read-only were actually opened in that mode. It turns out that the change breaks a bunch of kernel code that opens block devices. Affected systems include dm, md, and the loop device. Because strict checking for read-only opens of block devices was not done before this, the code that opens the devices was opening them read-write even if they were being used read-only. Auditing all that code will take time, and new userspace packages for dm, mdadm, etc. will also be required. Signed-off-by: Chuck Ebbert Signed-off-by: Linus Torvalds --- fs/block_dev.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index 333a7bb4cb9c..4fb8a3431531 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1215,12 +1215,6 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) res = __blkdev_get(bdev, mode, 0); - /* __blkdev_get() may alter read only status, check it afterwards */ - if (!res && (mode & FMODE_WRITE) && bdev_read_only(bdev)) { - __blkdev_put(bdev, mode, 0); - res = -EACCES; - } - if (whole) { /* finish claiming */ mutex_lock(&bdev->bd_mutex); @@ -1298,6 +1292,11 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, if (err) return ERR_PTR(err); + if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) { + blkdev_put(bdev, mode); + return ERR_PTR(-EACCES); + } + return bdev; } EXPORT_SYMBOL(blkdev_get_by_path); -- cgit v1.2.1 From 9616125611ee47693186533d76e403856a36b3c8 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 16 Feb 2011 09:34:16 -0500 Subject: cifs: fix handling of scopeid in cifs_convert_address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The code finds, the '%' sign in an ipv6 address and copies that to a buffer allocated on the stack. It then ignores that buffer, and passes 'pct' to simple_strtoul(), which doesn't work right because we're comparing 'endp' against a completely different string. Fix it by passing the correct pointer. While we're at it, this is a good candidate for conversion to strict_strtoul as well. Cc: stable@kernel.org Cc: David Howells Reported-by: Björn JACKE Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/netmisc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index 8d9189f64477..79f641eeda30 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -170,7 +170,7 @@ cifs_convert_address(struct sockaddr *dst, const char *src, int len) { int rc, alen, slen; const char *pct; - char *endp, scope_id[13]; + char scope_id[13]; struct sockaddr_in *s4 = (struct sockaddr_in *) dst; struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) dst; @@ -197,9 +197,9 @@ cifs_convert_address(struct sockaddr *dst, const char *src, int len) memcpy(scope_id, pct + 1, slen); scope_id[slen] = '\0'; - s6->sin6_scope_id = (u32) simple_strtoul(pct, &endp, 0); - if (endp != scope_id + slen) - return 0; + rc = strict_strtoul(scope_id, 0, + (unsigned long *)&s6->sin6_scope_id); + rc = (rc == 0) ? 1 : 0; } return rc; -- cgit v1.2.1 From fa7ea87a057958a8b7926c1a60a3ca6d696328ed Mon Sep 17 00:00:00 2001 From: Timo Warns Date: Thu, 17 Feb 2011 22:27:40 +0100 Subject: fs/partitions: Validate map_count in Mac partition tables Validate number of blocks in map and remove redundant variable. Signed-off-by: Timo Warns Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- fs/partitions/mac.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/partitions/mac.c b/fs/partitions/mac.c index 68d6a216ee79..11f688bd76c5 100644 --- a/fs/partitions/mac.c +++ b/fs/partitions/mac.c @@ -29,10 +29,9 @@ static inline void mac_fix_string(char *stg, int len) int mac_partition(struct parsed_partitions *state) { - int slot = 1; Sector sect; unsigned char *data; - int blk, blocks_in_map; + int slot, blocks_in_map; unsigned secsize; #ifdef CONFIG_PPC_PMAC int found_root = 0; @@ -59,10 +58,14 @@ int mac_partition(struct parsed_partitions *state) put_dev_sector(sect); return 0; /* not a MacOS disk */ } - strlcat(state->pp_buf, " [mac]", PAGE_SIZE); blocks_in_map = be32_to_cpu(part->map_count); - for (blk = 1; blk <= blocks_in_map; ++blk) { - int pos = blk * secsize; + if (blocks_in_map < 0 || blocks_in_map >= DISK_MAX_PARTS) { + put_dev_sector(sect); + return 0; + } + strlcat(state->pp_buf, " [mac]", PAGE_SIZE); + for (slot = 1; slot <= blocks_in_map; ++slot) { + int pos = slot * secsize; put_dev_sector(sect); data = read_part_sector(state, pos/512, §); if (!data) @@ -113,13 +116,11 @@ int mac_partition(struct parsed_partitions *state) } if (goodness > found_root_goodness) { - found_root = blk; + found_root = slot; found_root_goodness = goodness; } } #endif /* CONFIG_PPC_PMAC */ - - ++slot; } #ifdef CONFIG_PPC_PMAC if (found_root_goodness) -- cgit v1.2.1 From 8787c7a3e0e3f1aa21856d6b6cd6880cc93497e9 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Thu, 17 Feb 2011 18:51:24 -0600 Subject: eCryptfs: Revert "dont call lookup_one_len to avoid NULL nameidata" This reverts commit 21edad32205e97dc7ccb81a85234c77e760364c8 and commit 93c3fe40c279f002906ad14584c30671097d4394, which fixed a regression by the former. Al Viro pointed out bypassed dcache lookups in ecryptfs_new_lower_dentry(), misuse of vfs_path_lookup() in ecryptfs_lookup_one_lower() and a dislike of passing nameidata to the lower filesystem. Reported-by: Al Viro Signed-off-by: Tyler Hicks --- fs/ecryptfs/inode.c | 106 ++++++---------------------------------------------- 1 file changed, 12 insertions(+), 94 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index bd33f87a1907..fc44823fea3a 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -348,75 +348,6 @@ out: return rc; } -/** - * ecryptfs_new_lower_dentry - * @name: The name of the new dentry. - * @lower_dir_dentry: Parent directory of the new dentry. - * @nd: nameidata from last lookup. - * - * Create a new dentry or get it from lower parent dir. - */ -static struct dentry * -ecryptfs_new_lower_dentry(struct qstr *name, struct dentry *lower_dir_dentry, - struct nameidata *nd) -{ - struct dentry *new_dentry; - struct dentry *tmp; - struct inode *lower_dir_inode; - - lower_dir_inode = lower_dir_dentry->d_inode; - - tmp = d_alloc(lower_dir_dentry, name); - if (!tmp) - return ERR_PTR(-ENOMEM); - - mutex_lock(&lower_dir_inode->i_mutex); - new_dentry = lower_dir_inode->i_op->lookup(lower_dir_inode, tmp, nd); - mutex_unlock(&lower_dir_inode->i_mutex); - - if (!new_dentry) - new_dentry = tmp; - else - dput(tmp); - - return new_dentry; -} - - -/** - * ecryptfs_lookup_one_lower - * @ecryptfs_dentry: The eCryptfs dentry that we are looking up - * @lower_dir_dentry: lower parent directory - * @name: lower file name - * - * Get the lower dentry from vfs. If lower dentry does not exist yet, - * create it. - */ -static struct dentry * -ecryptfs_lookup_one_lower(struct dentry *ecryptfs_dentry, - struct dentry *lower_dir_dentry, struct qstr *name) -{ - struct nameidata nd; - struct vfsmount *lower_mnt; - int err; - - lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt( - ecryptfs_dentry->d_parent)); - err = vfs_path_lookup(lower_dir_dentry, lower_mnt, name->name , 0, &nd); - mntput(lower_mnt); - - if (!err) { - /* we dont need the mount */ - mntput(nd.path.mnt); - return nd.path.dentry; - } - if (err != -ENOENT) - return ERR_PTR(err); - - /* create a new lower dentry */ - return ecryptfs_new_lower_dentry(name, lower_dir_dentry, &nd); -} - /** * ecryptfs_lookup * @ecryptfs_dir_inode: The eCryptfs directory inode @@ -434,7 +365,6 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, size_t encrypted_and_encoded_name_size; struct ecryptfs_mount_crypt_stat *mount_crypt_stat = NULL; struct dentry *lower_dir_dentry, *lower_dentry; - struct qstr lower_name; int rc = 0; if ((ecryptfs_dentry->d_name.len == 1 @@ -444,20 +374,14 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, goto out_d_drop; } lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent); - lower_name.name = ecryptfs_dentry->d_name.name; - lower_name.len = ecryptfs_dentry->d_name.len; - lower_name.hash = ecryptfs_dentry->d_name.hash; - if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) { - rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry, - lower_dir_dentry->d_inode, &lower_name); - if (rc < 0) - goto out_d_drop; - } - lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry, - lower_dir_dentry, &lower_name); + mutex_lock(&lower_dir_dentry->d_inode->i_mutex); + lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name, + lower_dir_dentry, + ecryptfs_dentry->d_name.len); + mutex_unlock(&lower_dir_dentry->d_inode->i_mutex); if (IS_ERR(lower_dentry)) { rc = PTR_ERR(lower_dentry); - ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned " + ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " "[%d] on lower_dentry = [%s]\n", __func__, rc, encrypted_and_encoded_name); goto out_d_drop; @@ -479,20 +403,14 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, "filename; rc = [%d]\n", __func__, rc); goto out_d_drop; } - lower_name.name = encrypted_and_encoded_name; - lower_name.len = encrypted_and_encoded_name_size; - lower_name.hash = full_name_hash(lower_name.name, lower_name.len); - if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) { - rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry, - lower_dir_dentry->d_inode, &lower_name); - if (rc < 0) - goto out_d_drop; - } - lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry, - lower_dir_dentry, &lower_name); + mutex_lock(&lower_dir_dentry->d_inode->i_mutex); + lower_dentry = lookup_one_len(encrypted_and_encoded_name, + lower_dir_dentry, + encrypted_and_encoded_name_size); + mutex_unlock(&lower_dir_dentry->d_inode->i_mutex); if (IS_ERR(lower_dentry)) { rc = PTR_ERR(lower_dentry); - ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned " + ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " "[%d] on lower_dentry = [%s]\n", __func__, rc, encrypted_and_encoded_name); goto out_d_drop; -- cgit v1.2.1 From 97d79b403ef03f729883246208ef5d8a2ebc4d68 Mon Sep 17 00:00:00 2001 From: Yehuda Sadeh Date: Tue, 18 Jan 2011 13:37:28 -0800 Subject: ceph: keep reference to parent inode on ceph_dentry When creating a new dentry we now hold a reference to the parent inode in the ceph_dentry. This is required due to the new RCU changes from 949854d0, which set dentry->d_parent to NULL in d_kill before calling the ->release() callback. If/when that behavior is changed, we can revert this hack. Signed-off-by: Yehuda Sadeh Signed-off-by: Sage Weil --- fs/ceph/dir.c | 5 ++++- fs/ceph/super.h | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 562f9884a4d9..6bfaa6a4ec47 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -60,6 +60,7 @@ int ceph_init_dentry(struct dentry *dentry) } di->dentry = dentry; di->lease_session = NULL; + di->parent_inode = igrab(dentry->d_parent->d_inode); dentry->d_fsdata = di; dentry->d_time = jiffies; ceph_dentry_lru_add(dentry); @@ -1025,7 +1026,7 @@ static void ceph_dentry_release(struct dentry *dentry) u64 snapid = CEPH_NOSNAP; if (!IS_ROOT(dentry)) { - parent_inode = dentry->d_parent->d_inode; + parent_inode = di->parent_inode; if (parent_inode) snapid = ceph_snap(parent_inode); } @@ -1050,6 +1051,8 @@ static void ceph_dentry_release(struct dentry *dentry) kmem_cache_free(ceph_dentry_cachep, di); dentry->d_fsdata = NULL; } + if (parent_inode) + iput(parent_inode); } static int ceph_snapdir_d_revalidate(struct dentry *dentry, diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 6e0826695112..c01aa646b407 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -207,6 +207,7 @@ struct ceph_dentry_info { struct dentry *dentry; u64 time; u64 offset; + struct inode *parent_inode; }; struct ceph_inode_xattrs_info { -- cgit v1.2.1 From 70b8902199003b098fde86d1db02e7465115a02c Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Thu, 17 Feb 2011 17:35:20 -0600 Subject: eCryptfs: Handle NULL nameidata pointers Allow for NULL nameidata pointers in eCryptfs create, lookup, and d_revalidate functions. Signed-off-by: Tyler Hicks --- fs/ecryptfs/dentry.c | 22 +++++++++++++--------- fs/ecryptfs/ecryptfs_kernel.h | 3 +-- fs/ecryptfs/inode.c | 30 +++++++++++++++--------------- 3 files changed, 29 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c index 6fc4f319b550..534c1d46e69e 100644 --- a/fs/ecryptfs/dentry.c +++ b/fs/ecryptfs/dentry.c @@ -46,24 +46,28 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd) { struct dentry *lower_dentry; struct vfsmount *lower_mnt; - struct dentry *dentry_save; - struct vfsmount *vfsmount_save; + struct dentry *dentry_save = NULL; + struct vfsmount *vfsmount_save = NULL; int rc = 1; - if (nd->flags & LOOKUP_RCU) + if (nd && nd->flags & LOOKUP_RCU) return -ECHILD; lower_dentry = ecryptfs_dentry_to_lower(dentry); lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate) goto out; - dentry_save = nd->path.dentry; - vfsmount_save = nd->path.mnt; - nd->path.dentry = lower_dentry; - nd->path.mnt = lower_mnt; + if (nd) { + dentry_save = nd->path.dentry; + vfsmount_save = nd->path.mnt; + nd->path.dentry = lower_dentry; + nd->path.mnt = lower_mnt; + } rc = lower_dentry->d_op->d_revalidate(lower_dentry, nd); - nd->path.dentry = dentry_save; - nd->path.mnt = vfsmount_save; + if (nd) { + nd->path.dentry = dentry_save; + nd->path.mnt = vfsmount_save; + } if (dentry->d_inode) { struct inode *lower_inode = ecryptfs_inode_to_lower(dentry->d_inode); diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index dbc84ed96336..e00753496e3e 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -632,8 +632,7 @@ int ecryptfs_interpose(struct dentry *hidden_dentry, u32 flags); int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, struct dentry *lower_dentry, - struct inode *ecryptfs_dir_inode, - struct nameidata *ecryptfs_nd); + struct inode *ecryptfs_dir_inode); int ecryptfs_decode_and_decrypt_filename(char **decrypted_name, size_t *decrypted_name_size, struct dentry *ecryptfs_dentry, diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index fc44823fea3a..eb0d267ee715 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -74,16 +74,20 @@ ecryptfs_create_underlying_file(struct inode *lower_dir_inode, unsigned int flags_save; int rc; - dentry_save = nd->path.dentry; - vfsmount_save = nd->path.mnt; - flags_save = nd->flags; - nd->path.dentry = lower_dentry; - nd->path.mnt = lower_mnt; - nd->flags &= ~LOOKUP_OPEN; + if (nd) { + dentry_save = nd->path.dentry; + vfsmount_save = nd->path.mnt; + flags_save = nd->flags; + nd->path.dentry = lower_dentry; + nd->path.mnt = lower_mnt; + nd->flags &= ~LOOKUP_OPEN; + } rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd); - nd->path.dentry = dentry_save; - nd->path.mnt = vfsmount_save; - nd->flags = flags_save; + if (nd) { + nd->path.dentry = dentry_save; + nd->path.mnt = vfsmount_save; + nd->flags = flags_save; + } return rc; } @@ -241,8 +245,7 @@ out: */ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, struct dentry *lower_dentry, - struct inode *ecryptfs_dir_inode, - struct nameidata *ecryptfs_nd) + struct inode *ecryptfs_dir_inode) { struct dentry *lower_dir_dentry; struct vfsmount *lower_mnt; @@ -290,8 +293,6 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, goto out; if (special_file(lower_inode->i_mode)) goto out; - if (!ecryptfs_nd) - goto out; /* Released in this function */ page_virt = kmem_cache_zalloc(ecryptfs_header_cache_2, GFP_USER); if (!page_virt) { @@ -417,8 +418,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, } lookup_and_interpose: rc = ecryptfs_lookup_and_interpose_lower(ecryptfs_dentry, lower_dentry, - ecryptfs_dir_inode, - ecryptfs_nd); + ecryptfs_dir_inode); goto out; out_d_drop: d_drop(ecryptfs_dentry); -- cgit v1.2.1 From 323ef68faf1bbd9b1e66aea268fd09d358d7e8ab Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Wed, 16 Feb 2011 04:49:59 +0000 Subject: ecryptfs: read on a directory should return EISDIR if not supported read() calls against a file descriptor connected to a directory are incorrectly returning EINVAL rather than EISDIR: [EISDIR] [XSI] [Option Start] The fildes argument refers to a directory and the implementation does not allow the directory to be read using read() or pread(). The readdir() function should be used instead. [Option End] This occurs because we do not have a .read operation defined for ecryptfs directories. Connect this up to generic_read_dir(). BugLink: http://bugs.launchpad.net/bugs/719691 Signed-off-by: Andy Whitcroft Signed-off-by: Tyler Hicks --- fs/ecryptfs/file.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 81e10e6a9443..7d1050e254f9 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -317,6 +317,7 @@ ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) const struct file_operations ecryptfs_dir_fops = { .readdir = ecryptfs_readdir, + .read = generic_read_dir, .unlocked_ioctl = ecryptfs_unlocked_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ecryptfs_compat_ioctl, -- cgit v1.2.1 From 55f9cf6bbaa682958a7dd2755f883b768270c3ce Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Tue, 11 Jan 2011 12:43:42 -0600 Subject: eCryptfs: Copy up lower inode attrs in getattr The lower filesystem may do some type of inode revalidation during a getattr call. eCryptfs should take advantage of that by copying the lower inode attributes to the eCryptfs inode after a call to vfs_getattr() on the lower inode. I originally wrote this fix while working on eCryptfs on nfsv3 support, but discovered it also fixed an eCryptfs on ext4 nanosecond timestamp bug that was reported. https://bugs.launchpad.net/bugs/613873 Cc: Signed-off-by: Tyler Hicks --- fs/ecryptfs/inode.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index eb0d267ee715..b592938a84bc 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -1010,6 +1010,8 @@ int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry, rc = vfs_getattr(ecryptfs_dentry_to_lower_mnt(dentry), ecryptfs_dentry_to_lower(dentry), &lower_stat); if (!rc) { + fsstack_copy_attr_all(dentry->d_inode, + ecryptfs_inode_to_lower(dentry->d_inode)); generic_fillattr(dentry->d_inode, stat); stat->blocks = lower_stat.blocks; } -- cgit v1.2.1 From 5e640927a597a7c3e72b61e8bce74c22e906de65 Mon Sep 17 00:00:00 2001 From: Shirish Pargaonkar Date: Thu, 17 Feb 2011 14:38:31 -0600 Subject: cifs: Fix regression in LANMAN (LM) auth code LANMAN response length was changed to 16 bytes instead of 24 bytes. Revert it back to 24 bytes. Signed-off-by: Shirish Pargaonkar CC: stable@kernel.org Signed-off-by: Steve French --- fs/cifs/sess.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 1adc9625a344..16765703131b 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -656,13 +656,13 @@ ssetup_ntlmssp_authenticate: if (type == LANMAN) { #ifdef CONFIG_CIFS_WEAK_PW_HASH - char lnm_session_key[CIFS_SESS_KEY_SIZE]; + char lnm_session_key[CIFS_AUTH_RESP_SIZE]; pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE; /* no capabilities flags in old lanman negotiation */ - pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE); + pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE); /* Calculate hash with password and copy into bcc_ptr. * Encryption Key (stored as in cryptkey) gets used if the @@ -675,8 +675,8 @@ ssetup_ntlmssp_authenticate: true : false, lnm_session_key); ses->flags |= CIFS_SES_LANMAN; - memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_SESS_KEY_SIZE); - bcc_ptr += CIFS_SESS_KEY_SIZE; + memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE); + bcc_ptr += CIFS_AUTH_RESP_SIZE; /* can not sign if LANMAN negotiated so no need to calculate signing key? but what if server -- cgit v1.2.1 From eed9e8307e01d6d8d6170afcb2f00e1a471b87d4 Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 21 Feb 2011 22:31:47 +0000 Subject: [CIFS] update cifs version Update version to 1.71 so we can more easily spot modules with the last two fixes Signed-off-by: Steve French --- fs/cifs/cifsfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 4a3330235d55..a9371b6578c0 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -127,5 +127,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* EXPERIMENTAL */ -#define CIFS_VERSION "1.70" +#define CIFS_VERSION "1.71" #endif /* _CIFSFS_H */ -- cgit v1.2.1 From 361821854b71fc3a53c9e17701538247bddbd4ba Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 20 Feb 2011 20:08:35 -0800 Subject: Docbook: add fs/eventfd.c and fix typos in it Add fs/eventfd.c to filesystems docbook. Make typo corrections in fs/eventfd.c. Signed-off-by: Randy Dunlap Cc: Davide Libenzi Signed-off-by: Linus Torvalds --- fs/eventfd.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/eventfd.c b/fs/eventfd.c index e0194b3e14d6..d9a591773919 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -99,7 +99,7 @@ EXPORT_SYMBOL_GPL(eventfd_ctx_get); * @ctx: [in] Pointer to eventfd context. * * The eventfd context reference must have been previously acquired either - * with eventfd_ctx_get() or eventfd_ctx_fdget()). + * with eventfd_ctx_get() or eventfd_ctx_fdget(). */ void eventfd_ctx_put(struct eventfd_ctx *ctx) { @@ -146,9 +146,9 @@ static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt) * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue. * @ctx: [in] Pointer to eventfd context. * @wait: [in] Wait queue to be removed. - * @cnt: [out] Pointer to the 64bit conter value. + * @cnt: [out] Pointer to the 64-bit counter value. * - * Returns zero if successful, or the following error codes: + * Returns %0 if successful, or the following error codes: * * -EAGAIN : The operation would have blocked. * @@ -175,11 +175,11 @@ EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue); * eventfd_ctx_read - Reads the eventfd counter or wait if it is zero. * @ctx: [in] Pointer to eventfd context. * @no_wait: [in] Different from zero if the operation should not block. - * @cnt: [out] Pointer to the 64bit conter value. + * @cnt: [out] Pointer to the 64-bit counter value. * - * Returns zero if successful, or the following error codes: + * Returns %0 if successful, or the following error codes: * - * -EAGAIN : The operation would have blocked but @no_wait was nonzero. + * -EAGAIN : The operation would have blocked but @no_wait was non-zero. * -ERESTARTSYS : A signal interrupted the wait operation. * * If @no_wait is zero, the function might sleep until the eventfd internal -- cgit v1.2.1