summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/block-rsv.c3
-rw-r--r--fs/btrfs/ctree.c32
-rw-r--r--fs/btrfs/disk-io.c25
-rw-r--r--fs/btrfs/file-item.c5
-rw-r--r--fs/btrfs/free-space-cache.c7
-rw-r--r--fs/btrfs/free-space-tree.c50
-rw-r--r--fs/btrfs/free-space-tree.h3
-rw-r--r--fs/btrfs/inode.c3
-rw-r--r--fs/btrfs/ioctl.c4
-rw-r--r--fs/btrfs/print-tree.c6
-rw-r--r--fs/btrfs/super.c6
-rw-r--r--fs/btrfs/zoned.c17
-rw-r--r--fs/cifs/cifsfs.c16
-rw-r--r--fs/cifs/connect.c7
-rw-r--r--fs/cifs/smb2ops.c2
-rw-r--r--fs/ext4/balloc.c25
-rw-r--r--fs/ext4/extents_status.c30
-rw-r--r--fs/ext4/hash.c6
-rw-r--r--fs/ext4/inline.c17
-rw-r--r--fs/ext4/inode.c2
-rw-r--r--fs/ext4/mballoc.c6
-rw-r--r--fs/ext4/mmp.c30
-rw-r--r--fs/ext4/namei.c53
-rw-r--r--fs/ext4/super.c19
-rw-r--r--fs/ext4/xattr.c5
-rw-r--r--fs/f2fs/data.c20
-rw-r--r--fs/f2fs/debug.c65
-rw-r--r--fs/f2fs/extent_cache.c579
-rw-r--r--fs/f2fs/f2fs.h213
-rw-r--r--fs/f2fs/file.c8
-rw-r--r--fs/f2fs/gc.c143
-rw-r--r--fs/f2fs/gc.h14
-rw-r--r--fs/f2fs/inode.c44
-rw-r--r--fs/f2fs/namei.c20
-rw-r--r--fs/f2fs/node.c10
-rw-r--r--fs/f2fs/node.h2
-rw-r--r--fs/f2fs/segment.c11
-rw-r--r--fs/f2fs/shrinker.c19
-rw-r--r--fs/f2fs/super.c12
-rw-r--r--fs/fs-writeback.c2
-rw-r--r--fs/ksmbd/connection.c68
-rw-r--r--fs/ksmbd/connection.h58
-rw-r--r--fs/ksmbd/mgmt/tree_connect.c3
-rw-r--r--fs/ksmbd/mgmt/user_session.c138
-rw-r--r--fs/ksmbd/mgmt/user_session.h5
-rw-r--r--fs/ksmbd/server.c3
-rw-r--r--fs/ksmbd/smb2pdu.c122
-rw-r--r--fs/ksmbd/smb2pdu.h2
-rw-r--r--fs/ksmbd/transport_tcp.c2
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c11
-rw-r--r--fs/ntfs3/bitmap.c3
-rw-r--r--fs/ntfs3/namei.c10
-rw-r--r--fs/ntfs3/ntfs.h3
-rw-r--r--fs/proc/proc_sysctl.c42
54 files changed, 1239 insertions, 772 deletions
diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c
index ec96285357e0..2044f1e18629 100644
--- a/fs/btrfs/block-rsv.c
+++ b/fs/btrfs/block-rsv.c
@@ -122,7 +122,8 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
} else {
num_bytes = 0;
}
- if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) {
+ if (qgroup_to_release_ret &&
+ block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) {
qgroup_to_release = block_rsv->qgroup_rsv_reserved -
block_rsv->qgroup_rsv_size;
block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index dcb510f38dda..dbbae92ac23d 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -4411,10 +4411,12 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
{
struct btrfs_key key;
+ struct btrfs_key orig_key;
struct btrfs_disk_key found_key;
int ret;
btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
+ orig_key = key;
if (key.offset > 0) {
key.offset--;
@@ -4431,8 +4433,36 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
btrfs_release_path(path);
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
+ if (ret <= 0)
return ret;
+
+ /*
+ * Previous key not found. Even if we were at slot 0 of the leaf we had
+ * before releasing the path and calling btrfs_search_slot(), we now may
+ * be in a slot pointing to the same original key - this can happen if
+ * after we released the path, one of more items were moved from a
+ * sibling leaf into the front of the leaf we had due to an insertion
+ * (see push_leaf_right()).
+ * If we hit this case and our slot is > 0 and just decrement the slot
+ * so that the caller does not process the same key again, which may or
+ * may not break the caller, depending on its logic.
+ */
+ if (path->slots[0] < btrfs_header_nritems(path->nodes[0])) {
+ btrfs_item_key(path->nodes[0], &found_key, path->slots[0]);
+ ret = comp_keys(&found_key, &orig_key);
+ if (ret == 0) {
+ if (path->slots[0] > 0) {
+ path->slots[0]--;
+ return 0;
+ }
+ /*
+ * At slot 0, same key as before, it means orig_key is
+ * the lowest, leftmost, key in the tree. We're done.
+ */
+ return 1;
+ }
+ }
+
btrfs_item_key(path->nodes[0], &found_key, 0);
ret = comp_keys(&found_key, &key);
/*
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5b1b5e1a63c8..acae82a5f8ee 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3215,23 +3215,34 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info)
{
int ret;
const bool cache_opt = btrfs_test_opt(fs_info, SPACE_CACHE);
- bool clear_free_space_tree = false;
+ bool rebuild_free_space_tree = false;
if (btrfs_test_opt(fs_info, CLEAR_CACHE) &&
btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
- clear_free_space_tree = true;
+ rebuild_free_space_tree = true;
} else if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID)) {
btrfs_warn(fs_info, "free space tree is invalid");
- clear_free_space_tree = true;
+ rebuild_free_space_tree = true;
}
- if (clear_free_space_tree) {
- btrfs_info(fs_info, "clearing free space tree");
- ret = btrfs_clear_free_space_tree(fs_info);
+ if (rebuild_free_space_tree) {
+ btrfs_info(fs_info, "rebuilding free space tree");
+ ret = btrfs_rebuild_free_space_tree(fs_info);
if (ret) {
btrfs_warn(fs_info,
- "failed to clear free space tree: %d", ret);
+ "failed to rebuild free space tree: %d", ret);
+ goto out;
+ }
+ }
+
+ if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
+ !btrfs_test_opt(fs_info, FREE_SPACE_TREE)) {
+ btrfs_info(fs_info, "disabling free space tree");
+ ret = btrfs_delete_free_space_tree(fs_info);
+ if (ret) {
+ btrfs_warn(fs_info,
+ "failed to disable free space tree: %d", ret);
goto out;
}
}
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 6bb9fa961a6a..4fab7da63259 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -47,13 +47,13 @@ void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_siz
u64 start, end, i_size;
int ret;
+ spin_lock(&inode->lock);
i_size = new_i_size ?: i_size_read(&inode->vfs_inode);
if (btrfs_fs_incompat(fs_info, NO_HOLES)) {
inode->disk_i_size = i_size;
- return;
+ goto out_unlock;
}
- spin_lock(&inode->lock);
ret = find_contiguous_extent_bit(&inode->file_extent_tree, 0, &start,
&end, EXTENT_DIRTY);
if (!ret && start == 0)
@@ -61,6 +61,7 @@ void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_siz
else
i_size = 0;
inode->disk_i_size = i_size;
+out_unlock:
spin_unlock(&inode->lock);
}
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 6a8f2bd350f4..4cd8e44cba4c 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -861,15 +861,16 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
}
spin_lock(&ctl->tree_lock);
ret = link_free_space(ctl, e);
- ctl->total_bitmaps++;
- recalculate_thresholds(ctl);
- spin_unlock(&ctl->tree_lock);
if (ret) {
+ spin_unlock(&ctl->tree_lock);
btrfs_err(fs_info,
"Duplicate entries in free space cache, dumping");
kmem_cache_free(btrfs_free_space_cachep, e);
goto free_cache;
}
+ ctl->total_bitmaps++;
+ recalculate_thresholds(ctl);
+ spin_unlock(&ctl->tree_lock);
list_add_tail(&e->list, &bitmaps);
}
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 367bcfcf68f5..e040eea3937d 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -1247,7 +1247,7 @@ out:
return ret;
}
-int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
+int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info)
{
struct btrfs_trans_handle *trans;
struct btrfs_root *tree_root = fs_info->tree_root;
@@ -1293,6 +1293,54 @@ abort:
return ret;
}
+int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_key key = {
+ .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
+ .type = BTRFS_ROOT_ITEM_KEY,
+ .offset = 0,
+ };
+ struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
+ struct rb_node *node;
+ int ret;
+
+ trans = btrfs_start_transaction(free_space_root, 1);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
+ set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
+
+ ret = clear_free_space_tree(trans, free_space_root);
+ if (ret)
+ goto abort;
+
+ node = rb_first_cached(&fs_info->block_group_cache_tree);
+ while (node) {
+ struct btrfs_block_group *block_group;
+
+ block_group = rb_entry(node, struct btrfs_block_group,
+ cache_node);
+ ret = populate_free_space_tree(trans, block_group);
+ if (ret)
+ goto abort;
+ node = rb_next(node);
+ }
+
+ btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
+ btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
+ clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
+
+ ret = btrfs_commit_transaction(trans);
+ clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
+ return ret;
+abort:
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+ return ret;
+}
+
static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group,
struct btrfs_path *path)
diff --git a/fs/btrfs/free-space-tree.h b/fs/btrfs/free-space-tree.h
index dc2463e4cfe3..6d5551d0ced8 100644
--- a/fs/btrfs/free-space-tree.h
+++ b/fs/btrfs/free-space-tree.h
@@ -18,7 +18,8 @@ struct btrfs_caching_control;
void set_free_space_tree_thresholds(struct btrfs_block_group *block_group);
int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info);
-int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info);
+int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info);
+int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info);
int load_free_space_tree(struct btrfs_caching_control *caching_ctl);
int add_block_group_free_space(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0e516aefbf51..56e9efbffd58 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3237,6 +3237,9 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
btrfs_rewrite_logical_zoned(ordered_extent);
btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr,
ordered_extent->disk_num_bytes);
+ } else if (btrfs_is_data_reloc_root(inode->root)) {
+ btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr,
+ ordered_extent->disk_num_bytes);
}
btrfs_free_io_failure_record(inode, start, end);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0cebc203c4cc..9de647e48e7e 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -443,7 +443,9 @@ void btrfs_exclop_balance(struct btrfs_fs_info *fs_info,
case BTRFS_EXCLOP_BALANCE_PAUSED:
spin_lock(&fs_info->super_lock);
ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE ||
- fs_info->exclusive_operation == BTRFS_EXCLOP_DEV_ADD);
+ fs_info->exclusive_operation == BTRFS_EXCLOP_DEV_ADD ||
+ fs_info->exclusive_operation == BTRFS_EXCLOP_NONE ||
+ fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED);
fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE_PAUSED;
spin_unlock(&fs_info->super_lock);
break;
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index dd8777872143..228eeb04d03d 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -148,10 +148,10 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type)
pr_cont("shared data backref parent %llu count %u\n",
offset, btrfs_shared_data_ref_count(eb, sref));
/*
- * offset is supposed to be a tree block which
- * must be aligned to nodesize.
+ * Offset is supposed to be a tree block which must be
+ * aligned to sectorsize.
*/
- if (!IS_ALIGNED(offset, eb->fs_info->nodesize))
+ if (!IS_ALIGNED(offset, eb->fs_info->sectorsize))
pr_info(
"\t\t\t(parent %llu not aligned to sectorsize %u)\n",
offset, eb->fs_info->sectorsize);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index e43b16199e22..6438300fa246 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1136,7 +1136,11 @@ out:
!btrfs_test_opt(info, CLEAR_CACHE)) {
btrfs_err(info, "cannot disable free space tree");
ret = -EINVAL;
-
+ }
+ if (btrfs_fs_compat_ro(info, BLOCK_GROUP_TREE) &&
+ !btrfs_test_opt(info, FREE_SPACE_TREE)) {
+ btrfs_err(info, "cannot disable free space tree with block-group-tree feature");
+ ret = -EINVAL;
}
if (!ret)
ret = btrfs_check_mountopts_zoned(info);
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index e97c5a1ac95d..836babd23db5 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -119,10 +119,9 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones,
int i;
for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) {
- u64 bytenr;
-
- bytenr = ((zones[i].start + zones[i].len)
- << SECTOR_SHIFT) - BTRFS_SUPER_INFO_SIZE;
+ u64 zone_end = (zones[i].start + zones[i].capacity) << SECTOR_SHIFT;
+ u64 bytenr = ALIGN_DOWN(zone_end, BTRFS_SUPER_INFO_SIZE) -
+ BTRFS_SUPER_INFO_SIZE;
page[i] = read_cache_page_gfp(mapping,
bytenr >> PAGE_SHIFT, GFP_NOFS);
@@ -1163,12 +1162,12 @@ int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size)
return -ERANGE;
/* All the zones are conventional */
- if (find_next_bit(zinfo->seq_zones, begin, end) == end)
+ if (find_next_bit(zinfo->seq_zones, end, begin) == end)
return 0;
/* All the zones are sequential and empty */
- if (find_next_zero_bit(zinfo->seq_zones, begin, end) == end &&
- find_next_zero_bit(zinfo->empty_zones, begin, end) == end)
+ if (find_next_zero_bit(zinfo->seq_zones, end, begin) == end &&
+ find_next_zero_bit(zinfo->empty_zones, end, begin) == end)
return 0;
for (pos = start; pos < start + size; pos += zinfo->zone_size) {
@@ -1605,11 +1604,11 @@ void btrfs_redirty_list_add(struct btrfs_transaction *trans,
!list_empty(&eb->release_list))
return;
+ memzero_extent_buffer(eb, 0, eb->len);
+ set_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags);
set_extent_buffer_dirty(eb);
set_extent_bits_nowait(&trans->dirty_pages, eb->start,
eb->start + eb->len - 1, EXTENT_DIRTY);
- memzero_extent_buffer(eb, 0, eb->len);
- set_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags);
spin_lock(&trans->releasing_ebs_lock);
list_add_tail(&eb->release_list, &trans->releasing_ebs);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 03e3e95cf25b..078df1e2dd18 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -743,6 +743,7 @@ static void cifs_umount_begin(struct super_block *sb)
spin_unlock(&tcon->tc_lock);
spin_unlock(&cifs_tcp_ses_lock);
+ cifs_close_all_deferred_files(tcon);
/* cancel_brl_requests(tcon); */ /* BB mark all brl mids as exiting */
/* cancel_notify_requests(tcon); */
if (tcon->ses && tcon->ses->server) {
@@ -758,6 +759,20 @@ static void cifs_umount_begin(struct super_block *sb)
return;
}
+static int cifs_freeze(struct super_block *sb)
+{
+ struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
+ struct cifs_tcon *tcon;
+
+ if (cifs_sb == NULL)
+ return 0;
+
+ tcon = cifs_sb_master_tcon(cifs_sb);
+
+ cifs_close_all_deferred_files(tcon);
+ return 0;
+}
+
#ifdef CONFIG_CIFS_STATS2
static int cifs_show_stats(struct seq_file *s, struct dentry *root)
{
@@ -796,6 +811,7 @@ static const struct super_operations cifs_super_ops = {
as opens */
.show_options = cifs_show_options,
.umount_begin = cifs_umount_begin,
+ .freeze_fs = cifs_freeze,
#ifdef CONFIG_CIFS_STATS2
.show_stats = cifs_show_stats,
#endif
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 21b31d1640e5..935fe198a4ba 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2742,6 +2742,13 @@ cifs_match_super(struct super_block *sb, void *data)
spin_lock(&cifs_tcp_ses_lock);
cifs_sb = CIFS_SB(sb);
+
+ /* We do not want to use a superblock that has been shutdown */
+ if (CIFS_MOUNT_SHUTDOWN & cifs_sb->mnt_cifs_flags) {
+ spin_unlock(&cifs_tcp_ses_lock);
+ return 0;
+ }
+
tlink = cifs_get_tlink(cifs_sb_master_tlink(cifs_sb));
if (tlink == NULL) {
/* can not match superblock if tlink were ever null */
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index ccf311750927..7468f8baf499 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1682,7 +1682,7 @@ smb2_copychunk_range(const unsigned int xid,
pcchunk->SourceOffset = cpu_to_le64(src_off);
pcchunk->TargetOffset = cpu_to_le64(dest_off);
pcchunk->Length =
- cpu_to_le32(min_t(u32, len, tcon->max_bytes_chunk));
+ cpu_to_le32(min_t(u64, len, tcon->max_bytes_chunk));
/* Request server copy to target from src identified by key */
kfree(retbuf);
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 8ff4b9192a9f..f2c415f31b75 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -303,6 +303,22 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
return desc;
}
+static ext4_fsblk_t ext4_valid_block_bitmap_padding(struct super_block *sb,
+ ext4_group_t block_group,
+ struct buffer_head *bh)
+{
+ ext4_grpblk_t next_zero_bit;
+ unsigned long bitmap_size = sb->s_blocksize * 8;
+ unsigned int offset = num_clusters_in_group(sb, block_group);
+
+ if (bitmap_size <= offset)
+ return 0;
+
+ next_zero_bit = ext4_find_next_zero_bit(bh->b_data, bitmap_size, offset);
+
+ return (next_zero_bit < bitmap_size ? next_zero_bit : 0);
+}
+
/*
* Return the block number which was discovered to be invalid, or 0 if
* the block bitmap is valid.
@@ -401,6 +417,15 @@ static int ext4_validate_block_bitmap(struct super_block *sb,
EXT4_GROUP_INFO_BBITMAP_CORRUPT);
return -EFSCORRUPTED;
}
+ blk = ext4_valid_block_bitmap_padding(sb, block_group, bh);
+ if (unlikely(blk != 0)) {
+ ext4_unlock_group(sb, block_group);
+ ext4_error(sb, "bg %u: block %llu: padding at end of block bitmap is not set",
+ block_group, blk);
+ ext4_mark_group_bitmap_corrupted(sb, block_group,
+ EXT4_GROUP_INFO_BBITMAP_CORRUPT);
+ return -EFSCORRUPTED;
+ }
set_buffer_verified(bh);
verified:
ext4_unlock_group(sb, block_group);
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 7ada374ff27d..44e83521bfde 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -269,14 +269,12 @@ static void __es_find_extent_range(struct inode *inode,
/* see if the extent has been cached */
es->es_lblk = es->es_len = es->es_pblk = 0;
- if (tree->cache_es) {
- es1 = tree->cache_es;
- if (in_range(lblk, es1->es_lblk, es1->es_len)) {
- es_debug("%u cached by [%u/%u) %llu %x\n",
- lblk, es1->es_lblk, es1->es_len,
- ext4_es_pblock(es1), ext4_es_status(es1));
- goto out;
- }
+ es1 = READ_ONCE(tree->cache_es);
+ if (es1 && in_range(lblk, es1->es_lblk, es1->es_len)) {
+ es_debug("%u cached by [%u/%u) %llu %x\n",
+ lblk, es1->es_lblk, es1->es_len,
+ ext4_es_pblock(es1), ext4_es_status(es1));
+ goto out;
}
es1 = __es_tree_search(&tree->root, lblk);
@@ -295,7 +293,7 @@ out:
}
if (es1 && matching_fn(es1)) {
- tree->cache_es = es1;
+ WRITE_ONCE(tree->cache_es, es1);
es->es_lblk = es1->es_lblk;
es->es_len = es1->es_len;
es->es_pblk = es1->es_pblk;
@@ -933,14 +931,12 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
/* find extent in cache firstly */
es->es_lblk = es->es_len = es->es_pblk = 0;
- if (tree->cache_es) {
- es1 = tree->cache_es;
- if (in_range(lblk, es1->es_lblk, es1->es_len)) {
- es_debug("%u cached by [%u/%u)\n",
- lblk, es1->es_lblk, es1->es_len);
- found = 1;
- goto out;
- }
+ es1 = READ_ONCE(tree->cache_es);
+ if (es1 && in_range(lblk, es1->es_lblk, es1->es_len)) {
+ es_debug("%u cached by [%u/%u)\n",
+ lblk, es1->es_lblk, es1->es_len);
+ found = 1;
+ goto out;
}
node = tree->root.rb_node;
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index 147b5241dd94..46c3423ddfa1 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -277,7 +277,11 @@ static int __ext4fs_dirhash(const struct inode *dir, const char *name, int len,
}
default:
hinfo->hash = 0;
- return -1;
+ hinfo->minor_hash = 0;
+ ext4_warning(dir->i_sb,
+ "invalid/unsupported hash tree version %u",
+ hinfo->hash_version);
+ return -EINVAL;
}
hash = hash & ~1;
if (hash == (EXT4_HTREE_EOF_32BIT << 1))
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index c4475a74c762..3a91be1d9bbe 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -34,6 +34,7 @@ static int get_max_inline_xattr_value_size(struct inode *inode,
struct ext4_xattr_ibody_header *header;
struct ext4_xattr_entry *entry;
struct ext4_inode *raw_inode;
+ void *end;
int free, min_offs;
if (!EXT4_INODE_HAS_XATTR_SPACE(inode))
@@ -57,14 +58,23 @@ static int get_max_inline_xattr_value_size(struct inode *inode,
raw_inode = ext4_raw_inode(iloc);
header = IHDR(inode, raw_inode);
entry = IFIRST(header);
+ end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
/* Compute min_offs. */
- for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
+ while (!IS_LAST_ENTRY(entry)) {
+ void *next = EXT4_XATTR_NEXT(entry);
+
+ if (next >= end) {
+ EXT4_ERROR_INODE(inode,
+ "corrupt xattr in inline inode");
+ return 0;
+ }
if (!entry->e_value_inum && entry->e_value_size) {
size_t offs = le16_to_cpu(entry->e_value_offs);
if (offs < min_offs)
min_offs = offs;
}
+ entry = next;
}
free = min_offs -
((void *)entry - (void *)IFIRST(header)) - sizeof(__u32);
@@ -351,7 +361,7 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode,
error = ext4_xattr_ibody_get(inode, i.name_index, i.name,
value, len);
- if (error == -ENODATA)
+ if (error < 0)
goto out;
BUFFER_TRACE(is.iloc.bh, "get_write_access");
@@ -1178,6 +1188,7 @@ static int ext4_finish_convert_inline_dir(handle_t *handle,
ext4_initialize_dirent_tail(dir_block,
inode->i_sb->s_blocksize);
set_buffer_uptodate(dir_block);
+ unlock_buffer(dir_block);
err = ext4_handle_dirty_dirblock(handle, inode, dir_block);
if (err)
return err;
@@ -1252,6 +1263,7 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
if (!S_ISDIR(inode->i_mode)) {
memcpy(data_bh->b_data, buf, inline_size);
set_buffer_uptodate(data_bh);
+ unlock_buffer(data_bh);
error = ext4_handle_dirty_metadata(handle,
inode, data_bh);
} else {
@@ -1259,7 +1271,6 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
buf, inline_size);
}
- unlock_buffer(data_bh);
out_restore:
if (error)
ext4_restore_inline_data(handle, inode, iloc, buf, inline_size);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 42003b5c4cad..ffc810436ef2 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3503,7 +3503,7 @@ static int ext4_iomap_overwrite_begin(struct inode *inode, loff_t offset,
*/
flags &= ~IOMAP_WRITE;
ret = ext4_iomap_begin(inode, offset, length, flags, iomap, srcmap);
- WARN_ON_ONCE(iomap->type != IOMAP_MAPPED);
+ WARN_ON_ONCE(!ret && iomap->type != IOMAP_MAPPED);
return ret;
}
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 9dad93059945..912c4a1093fe 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4820,7 +4820,11 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
trace_ext4_mb_release_group_pa(sb, pa);
BUG_ON(pa->pa_deleted == 0);
ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
- BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
+ if (unlikely(group != e4b->bd_group && pa->pa_len != 0)) {
+ ext4_warning(sb, "bad group: expected %u, group %u, pa_start %llu",
+ e4b->bd_group, group, pa->pa_pstart);
+ return 0;
+ }
mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 588cb09c5291..23930ed3cbda 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -39,28 +39,36 @@ static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
* Write the MMP block using REQ_SYNC to try to get the block on-disk
* faster.
*/
-static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
+static int write_mmp_block_thawed(struct super_block *sb,
+ struct buffer_head *bh)
{
struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data);
- /*
- * We protect against freezing so that we don't create dirty buffers
- * on frozen filesystem.
- */
- sb_start_write(sb);
ext4_mmp_csum_set(sb, mmp);
lock_buffer(bh);
bh->b_end_io = end_buffer_write_sync;
get_bh(bh);
submit_bh(REQ_OP_WRITE | REQ_SYNC | REQ_META | REQ_PRIO, bh);
wait_on_buffer(bh);
- sb_end_write(sb);
if (unlikely(!buffer_uptodate(bh)))
return -EIO;
-
return 0;
}
+static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
+{
+ int err;
+
+ /*
+ * We protect against freezing so that we don't create dirty buffers
+ * on frozen filesystem.
+ */
+ sb_start_write(sb);
+ err = write_mmp_block_thawed(sb, bh);
+ sb_end_write(sb);
+ return err;
+}
+
/*
* Read the MMP block. It _must_ be read from disk and hence we clear the
* uptodate flag on the buffer.
@@ -346,7 +354,11 @@ skip:
seq = mmp_new_seq();
mmp->mmp_seq = cpu_to_le32(seq);
- retval = write_mmp_block(sb, bh);
+ /*
+ * On mount / remount we are protected against fs freezing (by s_umount
+ * semaphore) and grabbing freeze protection upsets lockdep
+ */
+ retval = write_mmp_block_thawed(sb, bh);
if (retval)
goto failed;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 56f09598448b..5a3dbbabe23a 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -674,7 +674,7 @@ static struct stats dx_show_leaf(struct inode *dir,
len = de->name_len;
if (!IS_ENCRYPTED(dir)) {
/* Directory is not encrypted */
- ext4fs_dirhash(dir, de->name,
+ (void) ext4fs_dirhash(dir, de->name,
de->name_len, &h);
printk("%*.s:(U)%x.%u ", len,
name, h.hash,
@@ -709,8 +709,9 @@ static struct stats dx_show_leaf(struct inode *dir,
if (IS_CASEFOLDED(dir))
h.hash = EXT4_DIRENT_HASH(de);
else
- ext4fs_dirhash(dir, de->name,
- de->name_len, &h);
+ (void) ext4fs_dirhash(dir,
+ de->name,
+ de->name_len, &h);
printk("%*.s:(E)%x.%u ", len, name,
h.hash, (unsigned) ((char *) de
- base));
@@ -720,7 +721,8 @@ static struct stats dx_show_leaf(struct inode *dir,
#else
int len = de->name_len;
char *name = de->name;
- ext4fs_dirhash(dir, de->name, de->name_len, &h);
+ (void) ext4fs_dirhash(dir, de->name,
+ de->name_len, &h);
printk("%*.s:%x.%u ", len, name, h.hash,
(unsigned) ((char *) de - base));
#endif
@@ -849,8 +851,14 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
/* hash is already computed for encrypted casefolded directory */
if (fname && fname_name(fname) &&
- !(IS_ENCRYPTED(dir) && IS_CASEFOLDED(dir)))
- ext4fs_dirhash(dir, fname_name(fname), fname_len(fname), hinfo);
+ !(IS_ENCRYPTED(dir) && IS_CASEFOLDED(dir))) {
+ int ret = ext4fs_dirhash(dir, fname_name(fname),
+ fname_len(fname), hinfo);
+ if (ret < 0) {
+ ret_err = ERR_PTR(ret);
+ goto fail;
+ }
+ }
hash = hinfo->hash;
if (root->info.unused_flags & 1) {
@@ -1111,7 +1119,12 @@ static int htree_dirblock_to_tree(struct file *dir_file,
hinfo->minor_hash = 0;
}
} else {
- ext4fs_dirhash(dir, de->name, de->name_len, hinfo);
+ err = ext4fs_dirhash(dir, de->name,
+ de->name_len, hinfo);
+ if (err < 0) {
+ count = err;
+ goto errout;
+ }
}
if ((hinfo->hash < start_hash) ||
((hinfo->hash == start_hash) &&
@@ -1313,8 +1326,12 @@ static int dx_make_map(struct inode *dir, struct buffer_head *bh,
if (de->name_len && de->inode) {
if (ext4_hash_in_dirent(dir))
h.hash = EXT4_DIRENT_HASH(de);
- else
- ext4fs_dirhash(dir, de->name, de->name_len, &h);
+ else {
+ int err = ext4fs_dirhash(dir, de->name,
+ de->name_len, &h);
+ if (err < 0)
+ return err;
+ }
map_tail--;
map_tail->hash = h.hash;
map_tail->offs = ((char *) de - base)>>2;
@@ -1452,10 +1469,9 @@ int ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
hinfo->hash_version = DX_HASH_SIPHASH;
hinfo->seed = NULL;
if (cf_name->name)
- ext4fs_dirhash(dir, cf_name->name, cf_name->len, hinfo);
+ return ext4fs_dirhash(dir, cf_name->name, cf_name->len, hinfo);
else
- ext4fs_dirhash(dir, iname->name, iname->len, hinfo);
- return 0;
+ return ext4fs_dirhash(dir, iname->name, iname->len, hinfo);
}
#endif
@@ -2298,10 +2314,15 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
fname->hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
/* casefolded encrypted hashes are computed on fname setup */
- if (!ext4_hash_in_dirent(dir))
- ext4fs_dirhash(dir, fname_name(fname),
- fname_len(fname), &fname->hinfo);
-
+ if (!ext4_hash_in_dirent(dir)) {
+ int err = ext4fs_dirhash(dir, fname_name(fname),
+ fname_len(fname), &fname->hinfo);
+ if (err < 0) {
+ brelse(bh2);
+ brelse(bh);
+ return err;
+ }
+ }
memset(frames, 0, sizeof(frames));
frame = frames;
frame->entries = entries;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2528e8216c33..d542f068ca99 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3195,11 +3195,9 @@ static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group,
crc = crc16(crc, (__u8 *)gdp, offset);
offset += sizeof(gdp->bg_checksum); /* skip checksum */
/* for checksum of struct ext4_group_desc do the rest...*/
- if (ext4_has_feature_64bit(sb) &&
- offset < le16_to_cpu(sbi->s_es->s_desc_size))
+ if (ext4_has_feature_64bit(sb) && offset < sbi->s_desc_size)
crc = crc16(crc, (__u8 *)gdp + offset,
- le16_to_cpu(sbi->s_es->s_desc_size) -
- offset);
+ sbi->s_desc_size - offset);
out:
return cpu_to_le16(crc);
@@ -6568,9 +6566,6 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
}
#ifdef CONFIG_QUOTA
- /* Release old quota file names */
- for (i = 0; i < EXT4_MAXQUOTAS; i++)
- kfree(old_opts.s_qf_names[i]);
if (enable_quota) {
if (sb_any_quota_suspended(sb))
dquot_resume(sb, -1);
@@ -6580,6 +6575,9 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
goto restore_opts;
}
}
+ /* Release old quota file names */
+ for (i = 0; i < EXT4_MAXQUOTAS; i++)
+ kfree(old_opts.s_qf_names[i]);
#endif
if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
ext4_release_system_zone(sb);
@@ -6590,6 +6588,13 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
return 0;
restore_opts:
+ /*
+ * If there was a failing r/w to ro transition, we may need to
+ * re-enable quota
+ */
+ if ((sb->s_flags & SB_RDONLY) && !(old_sb_flags & SB_RDONLY) &&
+ sb_any_quota_suspended(sb))
+ dquot_resume(sb, -1);
sb->s_flags = old_sb_flags;
sbi->s_mount_opt = old_opts.s_mount_opt;
sbi->s_mount_opt2 = old_opts.s_mount_opt2;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index b17c1b90e122..b1b8fe86ccdb 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -2564,6 +2564,7 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
.in_inode = !!entry->e_value_inum,
};
struct ext4_xattr_ibody_header *header = IHDR(inode, raw_inode);
+ int needs_kvfree = 0;
int error;
is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS);
@@ -2586,7 +2587,7 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
error = -ENOMEM;
goto out;
}
-
+ needs_kvfree = 1;
error = ext4_xattr_inode_get(inode, entry, buffer, value_size);
if (error)
goto out;
@@ -2625,7 +2626,7 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
out:
kfree(b_entry_name);
- if (entry->e_value_inum && buffer)
+ if (needs_kvfree && buffer)
kvfree(buffer);
if (is)
brelse(is->iloc.bh);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 770a606eb3f6..de6b056f090b 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1134,7 +1134,7 @@ void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
{
dn->data_blkaddr = blkaddr;
f2fs_set_data_blkaddr(dn);
- f2fs_update_extent_cache(dn);
+ f2fs_update_read_extent_cache(dn);
}
/* dn->ofs_in_node will be returned with up-to-date last block pointer */
@@ -1203,7 +1203,7 @@ int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
struct extent_info ei = {0, };
struct inode *inode = dn->inode;
- if (f2fs_lookup_extent_cache(inode, index, &ei)) {
+ if (f2fs_lookup_read_extent_cache(inode, index, &ei)) {
dn->data_blkaddr = ei.blk + index - ei.fofs;
return 0;
}
@@ -1224,7 +1224,7 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
if (!page)
return ERR_PTR(-ENOMEM);
- if (f2fs_lookup_extent_cache(inode, index, &ei)) {
+ if (f2fs_lookup_read_extent_cache(inode, index, &ei)) {
dn.data_blkaddr = ei.blk + index - ei.fofs;
if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr,
DATA_GENERIC_ENHANCE_READ)) {
@@ -1486,7 +1486,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
pgofs = (pgoff_t)map->m_lblk;
end = pgofs + maxblocks;
- if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
+ if (!create && f2fs_lookup_read_extent_cache(inode, pgofs, &ei)) {
if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO &&
map->m_may_create)
goto next_dnode;
@@ -1696,7 +1696,7 @@ skip:
if (map->m_flags & F2FS_MAP_MAPPED) {
unsigned int ofs = start_pgofs - map->m_lblk;
- f2fs_update_extent_cache_range(&dn,
+ f2fs_update_read_extent_cache_range(&dn,
start_pgofs, map->m_pblk + ofs,
map->m_len - ofs);
}
@@ -1741,7 +1741,7 @@ sync_out:
if (map->m_flags & F2FS_MAP_MAPPED) {
unsigned int ofs = start_pgofs - map->m_lblk;
- f2fs_update_extent_cache_range(&dn,
+ f2fs_update_read_extent_cache_range(&dn,
start_pgofs, map->m_pblk + ofs,
map->m_len - ofs);
}
@@ -2202,7 +2202,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
if (f2fs_cluster_is_empty(cc))
goto out;
- if (f2fs_lookup_extent_cache(inode, start_idx, &ei))
+ if (f2fs_lookup_read_extent_cache(inode, start_idx, &ei))
from_dnode = false;
if (!from_dnode)
@@ -2636,7 +2636,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
set_new_dnode(&dn, inode, NULL, NULL, 0);
if (need_inplace_update(fio) &&
- f2fs_lookup_extent_cache(inode, page->index, &ei)) {
+ f2fs_lookup_read_extent_cache(inode, page->index, &ei)) {
fio->old_blkaddr = ei.blk + page->index - ei.fofs;
if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
@@ -3361,7 +3361,7 @@ restart:
} else if (locked) {
err = f2fs_get_block(&dn, index);
} else {
- if (f2fs_lookup_extent_cache(inode, index, &ei)) {
+ if (f2fs_lookup_read_extent_cache(inode, index, &ei)) {
dn.data_blkaddr = ei.blk + index - ei.fofs;
} else {
/* hole case */
@@ -3402,7 +3402,7 @@ static int __find_data_block(struct inode *inode, pgoff_t index,
set_new_dnode(&dn, inode, ipage, ipage, 0);
- if (f2fs_lookup_extent_cache(inode, index, &ei)) {
+ if (f2fs_lookup_read_extent_cache(inode, index, &ei)) {
dn.data_blkaddr = ei.blk + index - ei.fofs;
} else {
/* hole case */
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index a216dcdf6941..a9baa121d829 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -72,15 +72,23 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->main_area_zones = si->main_area_sections /
le32_to_cpu(raw_super->secs_per_zone);
- /* validation check of the segment numbers */
+ /* general extent cache stats */
+ for (i = 0; i < NR_EXTENT_CACHES; i++) {
+ struct extent_tree_info *eti = &sbi->extent_tree[i];
+
+ si->hit_cached[i] = atomic64_read(&sbi->read_hit_cached[i]);
+ si->hit_rbtree[i] = atomic64_read(&sbi->read_hit_rbtree[i]);
+ si->total_ext[i] = atomic64_read(&sbi->total_hit_ext[i]);
+ si->hit_total[i] = si->hit_cached[i] + si->hit_rbtree[i];
+ si->ext_tree[i] = atomic_read(&eti->total_ext_tree);
+ si->zombie_tree[i] = atomic_read(&eti->total_zombie_tree);
+ si->ext_node[i] = atomic_read(&eti->total_ext_node);
+ }
+ /* read extent_cache only */
si->hit_largest = atomic64_read(&sbi->read_hit_largest);
- si->hit_cached = atomic64_read(&sbi->read_hit_cached);
- si->hit_rbtree = atomic64_read(&sbi->read_hit_rbtree);
- si->hit_total = si->hit_largest + si->hit_cached + si->hit_rbtree;
- si->total_ext = atomic64_read(&sbi->total_hit_ext);
- si->ext_tree = atomic_read(&sbi->total_ext_tree);
- si->zombie_tree = atomic_read(&sbi->total_zombie_tree);
- si->ext_node = atomic_read(&sbi->total_ext_node);
+ si->hit_total[EX_READ] += si->hit_largest;
+
+ /* validation check of the segment numbers */
si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS);
si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META);
@@ -294,10 +302,16 @@ get_cache:
sizeof(struct nat_entry_set);
for (i = 0; i < MAX_INO_ENTRY; i++)
si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry);
- si->cache_mem += atomic_read(&sbi->total_ext_tree) *
+
+ for (i = 0; i < NR_EXTENT_CACHES; i++) {
+ struct extent_tree_info *eti = &sbi->extent_tree[i];
+
+ si->ext_mem[i] = atomic_read(&eti->total_ext_tree) *
sizeof(struct extent_tree);
- si->cache_mem += atomic_read(&sbi->total_ext_node) *
+ si->ext_mem[i] += atomic_read(&eti->total_ext_node) *
sizeof(struct extent_node);
+ si->cache_mem += si->ext_mem[i];
+ }
si->page_mem = 0;
if (sbi->node_inode) {
@@ -490,16 +504,18 @@ static int stat_show(struct seq_file *s, void *v)
si->bg_node_blks);
seq_printf(s, "BG skip : IO: %u, Other: %u\n",
si->io_skip_bggc, si->other_skip_bggc);
- seq_puts(s, "\nExtent Cache:\n");
+ seq_puts(s, "\nExtent Cache (Read):\n");
seq_printf(s, " - Hit Count: L1-1:%llu L1-2:%llu L2:%llu\n",
- si->hit_largest, si->hit_cached,
- si->hit_rbtree);
+ si->hit_largest, si->hit_cached[EX_READ],
+ si->hit_rbtree[EX_READ]);
seq_printf(s, " - Hit Ratio: %llu%% (%llu / %llu)\n",
- !si->total_ext ? 0 :
- div64_u64(si->hit_total * 100, si->total_ext),
- si->hit_total, si->total_ext);
+ !si->total_ext[EX_READ] ? 0 :
+ div64_u64(si->hit_total[EX_READ] * 100,
+ si->total_ext[EX_READ]),
+ si->hit_total[EX_READ], si->total_ext[EX_READ]);
seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n",
- si->ext_tree, si->zombie_tree, si->ext_node);
+ si->ext_tree[EX_READ], si->zombie_tree[EX_READ],
+ si->ext_node[EX_READ]);
seq_puts(s, "\nBalancing F2FS Async:\n");
seq_printf(s, " - DIO (R: %4d, W: %4d)\n",
si->nr_dio_read, si->nr_dio_write);
@@ -566,8 +582,10 @@ static int stat_show(struct seq_file *s, void *v)
(si->base_mem + si->cache_mem + si->page_mem) >> 10);
seq_printf(s, " - static: %llu KB\n",
si->base_mem >> 10);
- seq_printf(s, " - cached: %llu KB\n",
+ seq_printf(s, " - cached all: %llu KB\n",
si->cache_mem >> 10);
+ seq_printf(s, " - read extent cache: %llu KB\n",
+ si->ext_mem[EX_READ] >> 10);
seq_printf(s, " - paged : %llu KB\n",
si->page_mem >> 10);
}
@@ -600,10 +618,15 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
si->sbi = sbi;
sbi->stat_info = si;
- atomic64_set(&sbi->total_hit_ext, 0);
- atomic64_set(&sbi->read_hit_rbtree, 0);
+ /* general extent cache stats */
+ for (i = 0; i < NR_EXTENT_CACHES; i++) {
+ atomic64_set(&sbi->total_hit_ext[i], 0);
+ atomic64_set(&sbi->read_hit_rbtree[i], 0);
+ atomic64_set(&sbi->read_hit_cached[i], 0);
+ }
+
+ /* read extent_cache only */
atomic64_set(&sbi->read_hit_largest, 0);
- atomic64_set(&sbi->read_hit_cached, 0);
atomic_set(&sbi->inline_xattr, 0);
atomic_set(&sbi->inline_inode, 0);
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 6c9e6f78a3e3..16692c96e765 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -15,6 +15,122 @@
#include "node.h"
#include <trace/events/f2fs.h>
+bool sanity_check_extent_cache(struct inode *inode)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ struct extent_info *ei;
+
+ if (!fi->extent_tree[EX_READ])
+ return true;
+
+ ei = &fi->extent_tree[EX_READ]->largest;
+
+ if (ei->len &&
+ (!f2fs_is_valid_blkaddr(sbi, ei->blk,
+ DATA_GENERIC_ENHANCE) ||
+ !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
+ DATA_GENERIC_ENHANCE))) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix",
+ __func__, inode->i_ino,
+ ei->blk, ei->fofs, ei->len);
+ return false;
+ }
+ return true;
+}
+
+static void __set_extent_info(struct extent_info *ei,
+ unsigned int fofs, unsigned int len,
+ block_t blk, bool keep_clen,
+ enum extent_type type)
+{
+ ei->fofs = fofs;
+ ei->len = len;
+
+ if (type == EX_READ) {
+ ei->blk = blk;
+ if (keep_clen)
+ return;
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ ei->c_len = 0;
+#endif
+ }
+}
+
+static bool __may_read_extent_tree(struct inode *inode)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
+ if (!test_opt(sbi, READ_EXTENT_CACHE))
+ return false;
+ if (is_inode_flag_set(inode, FI_NO_EXTENT))
+ return false;
+ if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) &&
+ !f2fs_sb_has_readonly(sbi))
+ return false;
+ return S_ISREG(inode->i_mode);
+}
+
+static bool __init_may_extent_tree(struct inode *inode, enum extent_type type)
+{
+ if (type == EX_READ)
+ return __may_read_extent_tree(inode);
+ return false;
+}
+
+static bool __may_extent_tree(struct inode *inode, enum extent_type type)
+{
+ /*
+ * for recovered files during mount do not create extents
+ * if shrinker is not registered.
+ */
+ if (list_empty(&F2FS_I_SB(inode)->s_list))
+ return false;
+
+ return __init_may_extent_tree(inode, type);
+}
+
+static void __try_update_largest_extent(struct extent_tree *et,
+ struct extent_node *en)
+{
+ if (et->type != EX_READ)
+ return;
+ if (en->ei.len <= et->largest.len)
+ return;
+
+ et->largest = en->ei;
+ et->largest_updated = true;
+}
+
+static bool __is_extent_mergeable(struct extent_info *back,
+ struct extent_info *front, enum extent_type type)
+{
+ if (type == EX_READ) {
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ if (back->c_len && back->len != back->c_len)
+ return false;
+ if (front->c_len && front->len != front->c_len)
+ return false;
+#endif
+ return (back->fofs + back->len == front->fofs &&
+ back->blk + back->len == front->blk);
+ }
+ return false;
+}
+
+static bool __is_back_mergeable(struct extent_info *cur,
+ struct extent_info *back, enum extent_type type)
+{
+ return __is_extent_mergeable(back, cur, type);
+}
+
+static bool __is_front_mergeable(struct extent_info *cur,
+ struct extent_info *front, enum extent_type type)
+{
+ return __is_extent_mergeable(cur, front, type);
+}
+
static struct rb_entry *__lookup_rb_tree_fast(struct rb_entry *cached_re,
unsigned int ofs)
{
@@ -58,29 +174,6 @@ struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root,
return re;
}
-struct rb_node **f2fs_lookup_rb_tree_ext(struct f2fs_sb_info *sbi,
- struct rb_root_cached *root,
- struct rb_node **parent,
- unsigned long long key, bool *leftmost)
-{
- struct rb_node **p = &root->rb_root.rb_node;
- struct rb_entry *re;
-
- while (*p) {
- *parent = *p;
- re = rb_entry(*parent, struct rb_entry, rb_node);
-
- if (key < re->key) {
- p = &(*p)->rb_left;
- } else {
- p = &(*p)->rb_right;
- *leftmost = false;
- }
- }
-
- return p;
-}
-
struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
struct rb_root_cached *root,
struct rb_node **parent,
@@ -189,7 +282,7 @@ lookup_neighbors:
}
bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
- struct rb_root_cached *root, bool check_key)
+ struct rb_root_cached *root)
{
#ifdef CONFIG_F2FS_CHECK_FS
struct rb_node *cur = rb_first_cached(root), *next;
@@ -206,23 +299,12 @@ bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
cur_re = rb_entry(cur, struct rb_entry, rb_node);
next_re = rb_entry(next, struct rb_entry, rb_node);
- if (check_key) {
- if (cur_re->key > next_re->key) {
- f2fs_info(sbi, "inconsistent rbtree, "
- "cur(%llu) next(%llu)",
- cur_re->key, next_re->key);
- return false;
- }
- goto next;
- }
-
if (cur_re->ofs + cur_re->len > next_re->ofs) {
f2fs_info(sbi, "inconsistent rbtree, cur(%u, %u) next(%u, %u)",
cur_re->ofs, cur_re->len,
next_re->ofs, next_re->len);
return false;
}
-next:
cur = next;
}
#endif
@@ -237,6 +319,7 @@ static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
struct rb_node *parent, struct rb_node **p,
bool leftmost)
{
+ struct extent_tree_info *eti = &sbi->extent_tree[et->type];
struct extent_node *en;
en = f2fs_kmem_cache_alloc(extent_node_slab, GFP_ATOMIC, false, sbi);
@@ -250,16 +333,18 @@ static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
rb_link_node(&en->rb_node, parent, p);
rb_insert_color_cached(&en->rb_node, &et->root, leftmost);
atomic_inc(&et->node_cnt);
- atomic_inc(&sbi->total_ext_node);
+ atomic_inc(&eti->total_ext_node);
return en;
}
static void __detach_extent_node(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_node *en)
{
+ struct extent_tree_info *eti = &sbi->extent_tree[et->type];
+
rb_erase_cached(&en->rb_node, &et->root);
atomic_dec(&et->node_cnt);
- atomic_dec(&sbi->total_ext_node);
+ atomic_dec(&eti->total_ext_node);
if (et->cached_en == en)
et->cached_en = NULL;
@@ -275,61 +360,51 @@ static void __detach_extent_node(struct f2fs_sb_info *sbi,
static void __release_extent_node(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_node *en)
{
- spin_lock(&sbi->extent_lock);
+ struct extent_tree_info *eti = &sbi->extent_tree[et->type];
+
+ spin_lock(&eti->extent_lock);
f2fs_bug_on(sbi, list_empty(&en->list));
list_del_init(&en->list);
- spin_unlock(&sbi->extent_lock);
+ spin_unlock(&eti->extent_lock);
__detach_extent_node(sbi, et, en);
}
-static struct extent_tree *__grab_extent_tree(struct inode *inode)
+static struct extent_tree *__grab_extent_tree(struct inode *inode,
+ enum extent_type type)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct extent_tree_info *eti = &sbi->extent_tree[type];
struct extent_tree *et;
nid_t ino = inode->i_ino;
- mutex_lock(&sbi->extent_tree_lock);
- et = radix_tree_lookup(&sbi->extent_tree_root, ino);
+ mutex_lock(&eti->extent_tree_lock);
+ et = radix_tree_lookup(&eti->extent_tree_root, ino);
if (!et) {
et = f2fs_kmem_cache_alloc(extent_tree_slab,
GFP_NOFS, true, NULL);
- f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et);
+ f2fs_radix_tree_insert(&eti->extent_tree_root, ino, et);
memset(et, 0, sizeof(struct extent_tree));
et->ino = ino;
+ et->type = type;
et->root = RB_ROOT_CACHED;
et->cached_en = NULL;
rwlock_init(&et->lock);
INIT_LIST_HEAD(&et->list);
atomic_set(&et->node_cnt, 0);
- atomic_inc(&sbi->total_ext_tree);
+ atomic_inc(&eti->total_ext_tree);
} else {
- atomic_dec(&sbi->total_zombie_tree);
+ atomic_dec(&eti->total_zombie_tree);
list_del_init(&et->list);
}
- mutex_unlock(&sbi->extent_tree_lock);
+ mutex_unlock(&eti->extent_tree_lock);
/* never died until evict_inode */
- F2FS_I(inode)->extent_tree = et;
+ F2FS_I(inode)->extent_tree[type] = et;
return et;
}
-static struct extent_node *__init_extent_tree(struct f2fs_sb_info *sbi,
- struct extent_tree *et, struct extent_info *ei)
-{
- struct rb_node **p = &et->root.rb_root.rb_node;
- struct extent_node *en;
-
- en = __attach_extent_node(sbi, et, ei, NULL, p, true);
- if (!en)
- return NULL;
-
- et->largest = en->ei;
- et->cached_en = en;
- return en;
-}
-
static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
struct extent_tree *et)
{
@@ -358,71 +433,78 @@ static void __drop_largest_extent(struct extent_tree *et,
}
}
-/* return true, if inode page is changed */
-static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
+void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct f2fs_extent *i_ext = ipage ? &F2FS_INODE(ipage)->i_ext : NULL;
+ struct extent_tree_info *eti = &sbi->extent_tree[EX_READ];
+ struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext;
struct extent_tree *et;
struct extent_node *en;
struct extent_info ei;
- if (!f2fs_may_extent_tree(inode)) {
- /* drop largest extent */
+ if (!__may_extent_tree(inode, EX_READ)) {
+ /* drop largest read extent */
if (i_ext && i_ext->len) {
f2fs_wait_on_page_writeback(ipage, NODE, true, true);
i_ext->len = 0;
set_page_dirty(ipage);
- return;
}
- return;
+ goto out;
}
- et = __grab_extent_tree(inode);
+ et = __grab_extent_tree(inode, EX_READ);
if (!i_ext || !i_ext->len)
- return;
+ goto out;
- get_extent_info(&ei, i_ext);
+ get_read_extent_info(&ei, i_ext);
write_lock(&et->lock);
if (atomic_read(&et->node_cnt))
- goto out;
+ goto unlock_out;
- en = __init_extent_tree(sbi, et, &ei);
+ en = __attach_extent_node(sbi, et, &ei, NULL,
+ &et->root.rb_root.rb_node, true);
if (en) {
- spin_lock(&sbi->extent_lock);
- list_add_tail(&en->list, &sbi->extent_list);
- spin_unlock(&sbi->extent_lock);
+ et->largest = en->ei;
+ et->cached_en = en;
+
+ spin_lock(&eti->extent_lock);
+ list_add_tail(&en->list, &eti->extent_list);
+ spin_unlock(&eti->extent_lock);
}
-out:
+unlock_out:
write_unlock(&et->lock);
+out:
+ if (!F2FS_I(inode)->extent_tree[EX_READ])
+ set_inode_flag(inode, FI_NO_EXTENT);
}
-void f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
+void f2fs_init_extent_tree(struct inode *inode)
{
- __f2fs_init_extent_tree(inode, ipage);
-
- if (!F2FS_I(inode)->extent_tree)
- set_inode_flag(inode, FI_NO_EXTENT);
+ /* initialize read cache */
+ if (__init_may_extent_tree(inode, EX_READ))
+ __grab_extent_tree(inode, EX_READ);
}
-static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
- struct extent_info *ei)
+static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
+ struct extent_info *ei, enum extent_type type)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct extent_tree *et = F2FS_I(inode)->extent_tree;
+ struct extent_tree_info *eti = &sbi->extent_tree[type];
+ struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
struct extent_node *en;
bool ret = false;
if (!et)
return false;
- trace_f2fs_lookup_extent_tree_start(inode, pgofs);
+ trace_f2fs_lookup_extent_tree_start(inode, pgofs, type);
read_lock(&et->lock);
- if (et->largest.fofs <= pgofs &&
+ if (type == EX_READ &&
+ et->largest.fofs <= pgofs &&
et->largest.fofs + et->largest.len > pgofs) {
*ei = et->largest;
ret = true;
@@ -436,23 +518,24 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
goto out;
if (en == et->cached_en)
- stat_inc_cached_node_hit(sbi);
+ stat_inc_cached_node_hit(sbi, type);
else
- stat_inc_rbtree_node_hit(sbi);
+ stat_inc_rbtree_node_hit(sbi, type);
*ei = en->ei;
- spin_lock(&sbi->extent_lock);
+ spin_lock(&eti->extent_lock);
if (!list_empty(&en->list)) {
- list_move_tail(&en->list, &sbi->extent_list);
+ list_move_tail(&en->list, &eti->extent_list);
et->cached_en = en;
}
- spin_unlock(&sbi->extent_lock);
+ spin_unlock(&eti->extent_lock);
ret = true;
out:
- stat_inc_total_hit(sbi);
+ stat_inc_total_hit(sbi, type);
read_unlock(&et->lock);
- trace_f2fs_lookup_extent_tree_end(inode, pgofs, ei);
+ if (type == EX_READ)
+ trace_f2fs_lookup_read_extent_tree_end(inode, pgofs, ei);
return ret;
}
@@ -461,18 +544,20 @@ static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
struct extent_node *prev_ex,
struct extent_node *next_ex)
{
+ struct extent_tree_info *eti = &sbi->extent_tree[et->type];
struct extent_node *en = NULL;
- if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei)) {
+ if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei, et->type)) {
prev_ex->ei.len += ei->len;
ei = &prev_ex->ei;
en = prev_ex;
}
- if (next_ex && __is_front_mergeable(ei, &next_ex->ei)) {
+ if (next_ex && __is_front_mergeable(ei, &next_ex->ei, et->type)) {
next_ex->ei.fofs = ei->fofs;
- next_ex->ei.blk = ei->blk;
next_ex->ei.len += ei->len;
+ if (et->type == EX_READ)
+ next_ex->ei.blk = ei->blk;
if (en)
__release_extent_node(sbi, et, prev_ex);
@@ -484,12 +569,12 @@ static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
__try_update_largest_extent(et, en);
- spin_lock(&sbi->extent_lock);
+ spin_lock(&eti->extent_lock);
if (!list_empty(&en->list)) {
- list_move_tail(&en->list, &sbi->extent_list);
+ list_move_tail(&en->list, &eti->extent_list);
et->cached_en = en;
}
- spin_unlock(&sbi->extent_lock);
+ spin_unlock(&eti->extent_lock);
return en;
}
@@ -499,6 +584,7 @@ static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
struct rb_node *insert_parent,
bool leftmost)
{
+ struct extent_tree_info *eti = &sbi->extent_tree[et->type];
struct rb_node **p;
struct rb_node *parent = NULL;
struct extent_node *en = NULL;
@@ -521,47 +607,50 @@ do_insert:
__try_update_largest_extent(et, en);
/* update in global extent list */
- spin_lock(&sbi->extent_lock);
- list_add_tail(&en->list, &sbi->extent_list);
+ spin_lock(&eti->extent_lock);
+ list_add_tail(&en->list, &eti->extent_list);
et->cached_en = en;
- spin_unlock(&sbi->extent_lock);
+ spin_unlock(&eti->extent_lock);
return en;
}
-static void f2fs_update_extent_tree_range(struct inode *inode,
- pgoff_t fofs, block_t blkaddr, unsigned int len)
+static void __update_extent_tree_range(struct inode *inode,
+ struct extent_info *tei, enum extent_type type)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct extent_tree *et = F2FS_I(inode)->extent_tree;
+ struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
struct extent_node *en = NULL, *en1 = NULL;
struct extent_node *prev_en = NULL, *next_en = NULL;
struct extent_info ei, dei, prev;
struct rb_node **insert_p = NULL, *insert_parent = NULL;
+ unsigned int fofs = tei->fofs, len = tei->len;
unsigned int end = fofs + len;
- unsigned int pos = (unsigned int)fofs;
bool updated = false;
bool leftmost = false;
if (!et)
return;
- trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, len, 0);
-
+ if (type == EX_READ)
+ trace_f2fs_update_read_extent_tree_range(inode, fofs, len,
+ tei->blk, 0);
write_lock(&et->lock);
- if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
- write_unlock(&et->lock);
- return;
- }
+ if (type == EX_READ) {
+ if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
+ write_unlock(&et->lock);
+ return;
+ }
- prev = et->largest;
- dei.len = 0;
+ prev = et->largest;
+ dei.len = 0;
- /*
- * drop largest extent before lookup, in case it's already
- * been shrunk from extent tree
- */
- __drop_largest_extent(et, fofs, len);
+ /*
+ * drop largest extent before lookup, in case it's already
+ * been shrunk from extent tree
+ */
+ __drop_largest_extent(et, fofs, len);
+ }
/* 1. lookup first extent node in range [fofs, fofs + len - 1] */
en = (struct extent_node *)f2fs_lookup_rb_tree_ret(&et->root,
@@ -582,26 +671,30 @@ static void f2fs_update_extent_tree_range(struct inode *inode,
dei = en->ei;
org_end = dei.fofs + dei.len;
- f2fs_bug_on(sbi, pos >= org_end);
+ f2fs_bug_on(sbi, fofs >= org_end);
- if (pos > dei.fofs && pos - dei.fofs >= F2FS_MIN_EXTENT_LEN) {
- en->ei.len = pos - en->ei.fofs;
+ if (fofs > dei.fofs && (type != EX_READ ||
+ fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN)) {
+ en->ei.len = fofs - en->ei.fofs;
prev_en = en;
parts = 1;
}
- if (end < org_end && org_end - end >= F2FS_MIN_EXTENT_LEN) {
+ if (end < org_end && (type != EX_READ ||
+ org_end - end >= F2FS_MIN_EXTENT_LEN)) {
if (parts) {
- set_extent_info(&ei, end,
- end - dei.fofs + dei.blk,
- org_end - end);
+ __set_extent_info(&ei,
+ end, org_end - end,
+ end - dei.fofs + dei.blk, false,
+ type);
en1 = __insert_extent_tree(sbi, et, &ei,
NULL, NULL, true);
next_en = en1;
} else {
- en->ei.fofs = end;
- en->ei.blk += end - dei.fofs;
- en->ei.len -= end - dei.fofs;
+ __set_extent_info(&en->ei,
+ end, en->ei.len - (end - dei.fofs),
+ en->ei.blk + (end - dei.fofs), true,
+ type);
next_en = en;
}
parts++;
@@ -631,10 +724,11 @@ static void f2fs_update_extent_tree_range(struct inode *inode,
en = next_en;
}
- /* 3. update extent in extent cache */
- if (blkaddr) {
+ /* 3. update extent in read extent cache */
+ BUG_ON(type != EX_READ);
- set_extent_info(&ei, fofs, blkaddr, len);
+ if (tei->blk) {
+ __set_extent_info(&ei, fofs, len, tei->blk, false, EX_READ);
if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
__insert_extent_tree(sbi, et, &ei,
insert_p, insert_parent, leftmost);
@@ -664,19 +758,20 @@ static void f2fs_update_extent_tree_range(struct inode *inode,
}
#ifdef CONFIG_F2FS_FS_COMPRESSION
-void f2fs_update_extent_tree_range_compressed(struct inode *inode,
+void f2fs_update_read_extent_tree_range_compressed(struct inode *inode,
pgoff_t fofs, block_t blkaddr, unsigned int llen,
unsigned int c_len)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct extent_tree *et = F2FS_I(inode)->extent_tree;
+ struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ];
struct extent_node *en = NULL;
struct extent_node *prev_en = NULL, *next_en = NULL;
struct extent_info ei;
struct rb_node **insert_p = NULL, *insert_parent = NULL;
bool leftmost = false;
- trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, llen, c_len);
+ trace_f2fs_update_read_extent_tree_range(inode, fofs, llen,
+ blkaddr, c_len);
/* it is safe here to check FI_NO_EXTENT w/o et->lock in ro image */
if (is_inode_flag_set(inode, FI_NO_EXTENT))
@@ -693,7 +788,7 @@ void f2fs_update_extent_tree_range_compressed(struct inode *inode,
if (en)
goto unlock_out;
- set_extent_info(&ei, fofs, blkaddr, llen);
+ __set_extent_info(&ei, fofs, llen, blkaddr, true, EX_READ);
ei.c_len = c_len;
if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
@@ -704,24 +799,43 @@ unlock_out:
}
#endif
-unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
+static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type)
{
+ struct extent_info ei;
+
+ if (!__may_extent_tree(dn->inode, type))
+ return;
+
+ ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
+ dn->ofs_in_node;
+ ei.len = 1;
+
+ if (type == EX_READ) {
+ if (dn->data_blkaddr == NEW_ADDR)
+ ei.blk = NULL_ADDR;
+ else
+ ei.blk = dn->data_blkaddr;
+ }
+ __update_extent_tree_range(dn->inode, &ei, type);
+}
+
+static unsigned int __shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink,
+ enum extent_type type)
+{
+ struct extent_tree_info *eti = &sbi->extent_tree[type];
struct extent_tree *et, *next;
struct extent_node *en;
unsigned int node_cnt = 0, tree_cnt = 0;
int remained;
- if (!test_opt(sbi, EXTENT_CACHE))
- return 0;
-
- if (!atomic_read(&sbi->total_zombie_tree))
+ if (!atomic_read(&eti->total_zombie_tree))
goto free_node;
- if (!mutex_trylock(&sbi->extent_tree_lock))
+ if (!mutex_trylock(&eti->extent_tree_lock))
goto out;
/* 1. remove unreferenced extent tree */
- list_for_each_entry_safe(et, next, &sbi->zombie_list, list) {
+ list_for_each_entry_safe(et, next, &eti->zombie_list, list) {
if (atomic_read(&et->node_cnt)) {
write_lock(&et->lock);
node_cnt += __free_extent_tree(sbi, et);
@@ -729,61 +843,100 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
}
f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
list_del_init(&et->list);
- radix_tree_delete(&sbi->extent_tree_root, et->ino);
+ radix_tree_delete(&eti->extent_tree_root, et->ino);
kmem_cache_free(extent_tree_slab, et);
- atomic_dec(&sbi->total_ext_tree);
- atomic_dec(&sbi->total_zombie_tree);
+ atomic_dec(&eti->total_ext_tree);
+ atomic_dec(&eti->total_zombie_tree);
tree_cnt++;
if (node_cnt + tree_cnt >= nr_shrink)
goto unlock_out;
cond_resched();
}
- mutex_unlock(&sbi->extent_tree_lock);
+ mutex_unlock(&eti->extent_tree_lock);
free_node:
/* 2. remove LRU extent entries */
- if (!mutex_trylock(&sbi->extent_tree_lock))
+ if (!mutex_trylock(&eti->extent_tree_lock))
goto out;
remained = nr_shrink - (node_cnt + tree_cnt);
- spin_lock(&sbi->extent_lock);
+ spin_lock(&eti->extent_lock);
for (; remained > 0; remained--) {
- if (list_empty(&sbi->extent_list))
+ if (list_empty(&eti->extent_list))
break;
- en = list_first_entry(&sbi->extent_list,
+ en = list_first_entry(&eti->extent_list,
struct extent_node, list);
et = en->et;
if (!write_trylock(&et->lock)) {
/* refresh this extent node's position in extent list */
- list_move_tail(&en->list, &sbi->extent_list);
+ list_move_tail(&en->list, &eti->extent_list);
continue;
}
list_del_init(&en->list);
- spin_unlock(&sbi->extent_lock);
+ spin_unlock(&eti->extent_lock);
__detach_extent_node(sbi, et, en);
write_unlock(&et->lock);
node_cnt++;
- spin_lock(&sbi->extent_lock);
+ spin_lock(&eti->extent_lock);
}
- spin_unlock(&sbi->extent_lock);
+ spin_unlock(&eti->extent_lock);
unlock_out:
- mutex_unlock(&sbi->extent_tree_lock);
+ mutex_unlock(&eti->extent_tree_lock);
out:
- trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt);
+ trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt, type);
return node_cnt + tree_cnt;
}
-unsigned int f2fs_destroy_extent_node(struct inode *inode)
+/* read extent cache operations */
+bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs,
+ struct extent_info *ei)
+{
+ if (!__may_extent_tree(inode, EX_READ))
+ return false;
+
+ return __lookup_extent_tree(inode, pgofs, ei, EX_READ);
+}
+
+void f2fs_update_read_extent_cache(struct dnode_of_data *dn)
+{
+ return __update_extent_cache(dn, EX_READ);
+}
+
+void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn,
+ pgoff_t fofs, block_t blkaddr, unsigned int len)
+{
+ struct extent_info ei = {
+ .fofs = fofs,
+ .len = len,
+ .blk = blkaddr,
+ };
+
+ if (!__may_extent_tree(dn->inode, EX_READ))
+ return;
+
+ __update_extent_tree_range(dn->inode, &ei, EX_READ);
+}
+
+unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
+{
+ if (!test_opt(sbi, READ_EXTENT_CACHE))
+ return 0;
+
+ return __shrink_extent_tree(sbi, nr_shrink, EX_READ);
+}
+
+static unsigned int __destroy_extent_node(struct inode *inode,
+ enum extent_type type)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct extent_tree *et = F2FS_I(inode)->extent_tree;
+ struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
unsigned int node_cnt = 0;
if (!et || !atomic_read(&et->node_cnt))
@@ -796,31 +949,44 @@ unsigned int f2fs_destroy_extent_node(struct inode *inode)
return node_cnt;
}
-void f2fs_drop_extent_tree(struct inode *inode)
+void f2fs_destroy_extent_node(struct inode *inode)
+{
+ __destroy_extent_node(inode, EX_READ);
+}
+
+static void __drop_extent_tree(struct inode *inode, enum extent_type type)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct extent_tree *et = F2FS_I(inode)->extent_tree;
+ struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
bool updated = false;
- if (!f2fs_may_extent_tree(inode))
+ if (!__may_extent_tree(inode, type))
return;
write_lock(&et->lock);
- set_inode_flag(inode, FI_NO_EXTENT);
__free_extent_tree(sbi, et);
- if (et->largest.len) {
- et->largest.len = 0;
- updated = true;
+ if (type == EX_READ) {
+ set_inode_flag(inode, FI_NO_EXTENT);
+ if (et->largest.len) {
+ et->largest.len = 0;
+ updated = true;
+ }
}
write_unlock(&et->lock);
if (updated)
f2fs_mark_inode_dirty_sync(inode, true);
}
-void f2fs_destroy_extent_tree(struct inode *inode)
+void f2fs_drop_extent_tree(struct inode *inode)
+{
+ __drop_extent_tree(inode, EX_READ);
+}
+
+static void __destroy_extent_tree(struct inode *inode, enum extent_type type)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct extent_tree *et = F2FS_I(inode)->extent_tree;
+ struct extent_tree_info *eti = &sbi->extent_tree[type];
+ struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
unsigned int node_cnt = 0;
if (!et)
@@ -828,76 +994,49 @@ void f2fs_destroy_extent_tree(struct inode *inode)
if (inode->i_nlink && !is_bad_inode(inode) &&
atomic_read(&et->node_cnt)) {
- mutex_lock(&sbi->extent_tree_lock);
- list_add_tail(&et->list, &sbi->zombie_list);
- atomic_inc(&sbi->total_zombie_tree);
- mutex_unlock(&sbi->extent_tree_lock);
+ mutex_lock(&eti->extent_tree_lock);
+ list_add_tail(&et->list, &eti->zombie_list);
+ atomic_inc(&eti->total_zombie_tree);
+ mutex_unlock(&eti->extent_tree_lock);
return;
}
/* free all extent info belong to this extent tree */
- node_cnt = f2fs_destroy_extent_node(inode);
+ node_cnt = __destroy_extent_node(inode, type);
/* delete extent tree entry in radix tree */
- mutex_lock(&sbi->extent_tree_lock);
+ mutex_lock(&eti->extent_tree_lock);
f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
- radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
+ radix_tree_delete(&eti->extent_tree_root, inode->i_ino);
kmem_cache_free(extent_tree_slab, et);
- atomic_dec(&sbi->total_ext_tree);
- mutex_unlock(&sbi->extent_tree_lock);
+ atomic_dec(&eti->total_ext_tree);
+ mutex_unlock(&eti->extent_tree_lock);
- F2FS_I(inode)->extent_tree = NULL;
-
- trace_f2fs_destroy_extent_tree(inode, node_cnt);
-}
-
-bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
- struct extent_info *ei)
-{
- if (!f2fs_may_extent_tree(inode))
- return false;
+ F2FS_I(inode)->extent_tree[type] = NULL;
- return f2fs_lookup_extent_tree(inode, pgofs, ei);
+ trace_f2fs_destroy_extent_tree(inode, node_cnt, type);
}
-void f2fs_update_extent_cache(struct dnode_of_data *dn)
+void f2fs_destroy_extent_tree(struct inode *inode)
{
- pgoff_t fofs;
- block_t blkaddr;
-
- if (!f2fs_may_extent_tree(dn->inode))
- return;
-
- if (dn->data_blkaddr == NEW_ADDR)
- blkaddr = NULL_ADDR;
- else
- blkaddr = dn->data_blkaddr;
-
- fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
- dn->ofs_in_node;
- f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, 1);
+ __destroy_extent_tree(inode, EX_READ);
}
-void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
- pgoff_t fofs, block_t blkaddr, unsigned int len)
-
+static void __init_extent_tree_info(struct extent_tree_info *eti)
{
- if (!f2fs_may_extent_tree(dn->inode))
- return;
-
- f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, len);
+ INIT_RADIX_TREE(&eti->extent_tree_root, GFP_NOIO);
+ mutex_init(&eti->extent_tree_lock);
+ INIT_LIST_HEAD(&eti->extent_list);
+ spin_lock_init(&eti->extent_lock);
+ atomic_set(&eti->total_ext_tree, 0);
+ INIT_LIST_HEAD(&eti->zombie_list);
+ atomic_set(&eti->total_zombie_tree, 0);
+ atomic_set(&eti->total_ext_node, 0);
}
void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi)
{
- INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO);
- mutex_init(&sbi->extent_tree_lock);
- INIT_LIST_HEAD(&sbi->extent_list);
- spin_lock_init(&sbi->extent_lock);
- atomic_set(&sbi->total_ext_tree, 0);
- INIT_LIST_HEAD(&sbi->zombie_list);
- atomic_set(&sbi->total_zombie_tree, 0);
- atomic_set(&sbi->total_ext_node, 0);
+ __init_extent_tree_info(&sbi->extent_tree[EX_READ]);
}
int __init f2fs_create_extent_cache(void)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 4b44ca1decdd..a0a232551da9 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -91,7 +91,7 @@ extern const char *f2fs_fault_name[FAULT_MAX];
#define F2FS_MOUNT_FLUSH_MERGE 0x00000400
#define F2FS_MOUNT_NOBARRIER 0x00000800
#define F2FS_MOUNT_FASTBOOT 0x00001000
-#define F2FS_MOUNT_EXTENT_CACHE 0x00002000
+#define F2FS_MOUNT_READ_EXTENT_CACHE 0x00002000
#define F2FS_MOUNT_DATA_FLUSH 0x00008000
#define F2FS_MOUNT_FAULT_INJECTION 0x00010000
#define F2FS_MOUNT_USRQUOTA 0x00080000
@@ -593,35 +593,43 @@ enum {
/* dirty segments threshold for triggering CP */
#define DEFAULT_DIRTY_THRESHOLD 4
+#define RECOVERY_MAX_RA_BLOCKS BIO_MAX_VECS
+#define RECOVERY_MIN_RA_BLOCKS 1
+
+#define F2FS_ONSTACK_PAGES 16 /* nr of onstack pages */
+
/* for in-memory extent cache entry */
#define F2FS_MIN_EXTENT_LEN 64 /* minimum extent length */
/* number of extent info in extent cache we try to shrink */
-#define EXTENT_CACHE_SHRINK_NUMBER 128
+#define READ_EXTENT_CACHE_SHRINK_NUMBER 128
-#define RECOVERY_MAX_RA_BLOCKS BIO_MAX_VECS
-#define RECOVERY_MIN_RA_BLOCKS 1
-
-#define F2FS_ONSTACK_PAGES 16 /* nr of onstack pages */
+/* extent cache type */
+enum extent_type {
+ EX_READ,
+ NR_EXTENT_CACHES,
+};
struct rb_entry {
struct rb_node rb_node; /* rb node located in rb-tree */
- union {
- struct {
- unsigned int ofs; /* start offset of the entry */
- unsigned int len; /* length of the entry */
- };
- unsigned long long key; /* 64-bits key */
- } __packed;
+ unsigned int ofs; /* start offset of the entry */
+ unsigned int len; /* length of the entry */
};
struct extent_info {
unsigned int fofs; /* start offset in a file */
unsigned int len; /* length of the extent */
- u32 blk; /* start block address of the extent */
+ union {
+ /* read extent_cache */
+ struct {
+ /* start block address of the extent */
+ block_t blk;
#ifdef CONFIG_F2FS_FS_COMPRESSION
- unsigned int c_len; /* physical extent length of compressed blocks */
+ /* physical extent length of compressed blocks */
+ unsigned int c_len;
#endif
+ };
+ };
};
struct extent_node {
@@ -633,13 +641,25 @@ struct extent_node {
struct extent_tree {
nid_t ino; /* inode number */
+ enum extent_type type; /* keep the extent tree type */
struct rb_root_cached root; /* root of extent info rb-tree */
struct extent_node *cached_en; /* recently accessed extent node */
- struct extent_info largest; /* largested extent info */
struct list_head list; /* to be used by sbi->zombie_list */
rwlock_t lock; /* protect extent info rb-tree */
atomic_t node_cnt; /* # of extent node in rb-tree*/
bool largest_updated; /* largest extent updated */
+ struct extent_info largest; /* largest cached extent for EX_READ */
+};
+
+struct extent_tree_info {
+ struct radix_tree_root extent_tree_root;/* cache extent cache entries */
+ struct mutex extent_tree_lock; /* locking extent radix tree */
+ struct list_head extent_list; /* lru list for shrinker */
+ spinlock_t extent_lock; /* locking extent lru list */
+ atomic_t total_ext_tree; /* extent tree count */
+ struct list_head zombie_list; /* extent zombie tree list */
+ atomic_t total_zombie_tree; /* extent zombie tree count */
+ atomic_t total_ext_node; /* extent info count */
};
/*
@@ -801,7 +821,8 @@ struct f2fs_inode_info {
struct list_head dirty_list; /* dirty list for dirs and files */
struct list_head gdirty_list; /* linked in global dirty list */
struct task_struct *atomic_write_task; /* store atomic write task */
- struct extent_tree *extent_tree; /* cached extent_tree entry */
+ struct extent_tree *extent_tree[NR_EXTENT_CACHES];
+ /* cached extent_tree entry */
struct inode *cow_inode; /* copy-on-write inode for atomic write */
/* avoid racing between foreground op and gc */
@@ -826,7 +847,7 @@ struct f2fs_inode_info {
loff_t original_i_size; /* original i_size before atomic write */
};
-static inline void get_extent_info(struct extent_info *ext,
+static inline void get_read_extent_info(struct extent_info *ext,
struct f2fs_extent *i_ext)
{
ext->fofs = le32_to_cpu(i_ext->fofs);
@@ -834,7 +855,7 @@ static inline void get_extent_info(struct extent_info *ext,
ext->len = le32_to_cpu(i_ext->len);
}
-static inline void set_raw_extent(struct extent_info *ext,
+static inline void set_raw_read_extent(struct extent_info *ext,
struct f2fs_extent *i_ext)
{
i_ext->fofs = cpu_to_le32(ext->fofs);
@@ -842,17 +863,6 @@ static inline void set_raw_extent(struct extent_info *ext,
i_ext->len = cpu_to_le32(ext->len);
}
-static inline void set_extent_info(struct extent_info *ei, unsigned int fofs,
- u32 blk, unsigned int len)
-{
- ei->fofs = fofs;
- ei->blk = blk;
- ei->len = len;
-#ifdef CONFIG_F2FS_FS_COMPRESSION
- ei->c_len = 0;
-#endif
-}
-
static inline bool __is_discard_mergeable(struct discard_info *back,
struct discard_info *front, unsigned int max_len)
{
@@ -872,41 +882,6 @@ static inline bool __is_discard_front_mergeable(struct discard_info *cur,
return __is_discard_mergeable(cur, front, max_len);
}
-static inline bool __is_extent_mergeable(struct extent_info *back,
- struct extent_info *front)
-{
-#ifdef CONFIG_F2FS_FS_COMPRESSION
- if (back->c_len && back->len != back->c_len)
- return false;
- if (front->c_len && front->len != front->c_len)
- return false;
-#endif
- return (back->fofs + back->len == front->fofs &&
- back->blk + back->len == front->blk);
-}
-
-static inline bool __is_back_mergeable(struct extent_info *cur,
- struct extent_info *back)
-{
- return __is_extent_mergeable(back, cur);
-}
-
-static inline bool __is_front_mergeable(struct extent_info *cur,
- struct extent_info *front)
-{
- return __is_extent_mergeable(cur, front);
-}
-
-extern void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync);
-static inline void __try_update_largest_extent(struct extent_tree *et,
- struct extent_node *en)
-{
- if (en->ei.len > et->largest.len) {
- et->largest = en->ei;
- et->largest_updated = true;
- }
-}
-
/*
* For free nid management
*/
@@ -1670,14 +1645,7 @@ struct f2fs_sb_info {
struct mutex flush_lock; /* for flush exclusion */
/* for extent tree cache */
- struct radix_tree_root extent_tree_root;/* cache extent cache entries */
- struct mutex extent_tree_lock; /* locking extent radix tree */
- struct list_head extent_list; /* lru list for shrinker */
- spinlock_t extent_lock; /* locking extent lru list */
- atomic_t total_ext_tree; /* extent tree count */
- struct list_head zombie_list; /* extent zombie tree list */
- atomic_t total_zombie_tree; /* extent zombie tree count */
- atomic_t total_ext_node; /* extent info count */
+ struct extent_tree_info extent_tree[NR_EXTENT_CACHES];
/* basic filesystem units */
unsigned int log_sectors_per_block; /* log2 sectors per block */
@@ -1761,10 +1729,14 @@ struct f2fs_sb_info {
unsigned int segment_count[2]; /* # of allocated segments */
unsigned int block_count[2]; /* # of allocated blocks */
atomic_t inplace_count; /* # of inplace update */
- atomic64_t total_hit_ext; /* # of lookup extent cache */
- atomic64_t read_hit_rbtree; /* # of hit rbtree extent node */
- atomic64_t read_hit_largest; /* # of hit largest extent node */
- atomic64_t read_hit_cached; /* # of hit cached extent node */
+ /* # of lookup extent cache */
+ atomic64_t total_hit_ext[NR_EXTENT_CACHES];
+ /* # of hit rbtree extent node */
+ atomic64_t read_hit_rbtree[NR_EXTENT_CACHES];
+ /* # of hit cached extent node */
+ atomic64_t read_hit_cached[NR_EXTENT_CACHES];
+ /* # of hit largest extent node in read extent cache */
+ atomic64_t read_hit_largest;
atomic_t inline_xattr; /* # of inline_xattr inodes */
atomic_t inline_inode; /* # of inline_data inodes */
atomic_t inline_dir; /* # of inline_dentry inodes */
@@ -2578,6 +2550,7 @@ static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi)
return le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum);
}
+extern void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync);
static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
struct inode *inode, bool is_inode)
{
@@ -3865,9 +3838,17 @@ struct f2fs_stat_info {
struct f2fs_sb_info *sbi;
int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs;
int main_area_segs, main_area_sections, main_area_zones;
- unsigned long long hit_largest, hit_cached, hit_rbtree;
- unsigned long long hit_total, total_ext;
- int ext_tree, zombie_tree, ext_node;
+ unsigned long long hit_cached[NR_EXTENT_CACHES];
+ unsigned long long hit_rbtree[NR_EXTENT_CACHES];
+ unsigned long long total_ext[NR_EXTENT_CACHES];
+ unsigned long long hit_total[NR_EXTENT_CACHES];
+ int ext_tree[NR_EXTENT_CACHES];
+ int zombie_tree[NR_EXTENT_CACHES];
+ int ext_node[NR_EXTENT_CACHES];
+ /* to count memory footprint */
+ unsigned long long ext_mem[NR_EXTENT_CACHES];
+ /* for read extent cache */
+ unsigned long long hit_largest;
int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta;
int ndirty_data, ndirty_qdata;
unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all;
@@ -3926,10 +3907,10 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
#define stat_other_skip_bggc_count(sbi) ((sbi)->other_skip_bggc++)
#define stat_inc_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]++)
#define stat_dec_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]--)
-#define stat_inc_total_hit(sbi) (atomic64_inc(&(sbi)->total_hit_ext))
-#define stat_inc_rbtree_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_rbtree))
+#define stat_inc_total_hit(sbi, type) (atomic64_inc(&(sbi)->total_hit_ext[type]))
+#define stat_inc_rbtree_node_hit(sbi, type) (atomic64_inc(&(sbi)->read_hit_rbtree[type]))
#define stat_inc_largest_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_largest))
-#define stat_inc_cached_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_cached))
+#define stat_inc_cached_node_hit(sbi, type) (atomic64_inc(&(sbi)->read_hit_cached[type]))
#define stat_inc_inline_xattr(inode) \
do { \
if (f2fs_has_inline_xattr(inode)) \
@@ -4052,10 +4033,10 @@ void f2fs_update_sit_info(struct f2fs_sb_info *sbi);
#define stat_other_skip_bggc_count(sbi) do { } while (0)
#define stat_inc_dirty_inode(sbi, type) do { } while (0)
#define stat_dec_dirty_inode(sbi, type) do { } while (0)
-#define stat_inc_total_hit(sbi) do { } while (0)
-#define stat_inc_rbtree_node_hit(sbi) do { } while (0)
+#define stat_inc_total_hit(sbi, type) do { } while (0)
+#define stat_inc_rbtree_node_hit(sbi, type) do { } while (0)
#define stat_inc_largest_node_hit(sbi) do { } while (0)
-#define stat_inc_cached_node_hit(sbi) do { } while (0)
+#define stat_inc_cached_node_hit(sbi, type) do { } while (0)
#define stat_inc_inline_xattr(inode) do { } while (0)
#define stat_dec_inline_xattr(inode) do { } while (0)
#define stat_inc_inline_inode(inode) do { } while (0)
@@ -4144,12 +4125,9 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi);
/*
* extent_cache.c
*/
+bool sanity_check_extent_cache(struct inode *inode);
struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root,
struct rb_entry *cached_re, unsigned int ofs);
-struct rb_node **f2fs_lookup_rb_tree_ext(struct f2fs_sb_info *sbi,
- struct rb_root_cached *root,
- struct rb_node **parent,
- unsigned long long key, bool *left_most);
struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
struct rb_root_cached *root,
struct rb_node **parent,
@@ -4160,21 +4138,25 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root,
struct rb_node ***insert_p, struct rb_node **insert_parent,
bool force, bool *leftmost);
bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
- struct rb_root_cached *root, bool check_key);
-unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink);
-void f2fs_init_extent_tree(struct inode *inode, struct page *ipage);
+ struct rb_root_cached *root);
+void f2fs_init_extent_tree(struct inode *inode);
void f2fs_drop_extent_tree(struct inode *inode);
-unsigned int f2fs_destroy_extent_node(struct inode *inode);
+void f2fs_destroy_extent_node(struct inode *inode);
void f2fs_destroy_extent_tree(struct inode *inode);
-bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
- struct extent_info *ei);
-void f2fs_update_extent_cache(struct dnode_of_data *dn);
-void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
- pgoff_t fofs, block_t blkaddr, unsigned int len);
void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi);
int __init f2fs_create_extent_cache(void);
void f2fs_destroy_extent_cache(void);
+/* read extent cache ops */
+void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage);
+bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs,
+ struct extent_info *ei);
+void f2fs_update_read_extent_cache(struct dnode_of_data *dn);
+void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn,
+ pgoff_t fofs, block_t blkaddr, unsigned int len);
+unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi,
+ int nr_shrink);
+
/*
* sysfs.c
*/
@@ -4244,9 +4226,9 @@ int f2fs_write_multi_pages(struct compress_ctx *cc,
struct writeback_control *wbc,
enum iostat_type io_type);
int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index);
-void f2fs_update_extent_tree_range_compressed(struct inode *inode,
- pgoff_t fofs, block_t blkaddr, unsigned int llen,
- unsigned int c_len);
+void f2fs_update_read_extent_tree_range_compressed(struct inode *inode,
+ pgoff_t fofs, block_t blkaddr,
+ unsigned int llen, unsigned int c_len);
int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
unsigned nr_pages, sector_t *last_block_in_bio,
bool is_readahead, bool for_write);
@@ -4327,9 +4309,10 @@ static inline bool f2fs_load_compressed_page(struct f2fs_sb_info *sbi,
static inline void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi,
nid_t ino) { }
#define inc_compr_inode_stat(inode) do { } while (0)
-static inline void f2fs_update_extent_tree_range_compressed(struct inode *inode,
- pgoff_t fofs, block_t blkaddr, unsigned int llen,
- unsigned int c_len) { }
+static inline void f2fs_update_read_extent_tree_range_compressed(
+ struct inode *inode,
+ pgoff_t fofs, block_t blkaddr,
+ unsigned int llen, unsigned int c_len) { }
#endif
static inline int set_compress_context(struct inode *inode)
@@ -4400,26 +4383,6 @@ F2FS_FEATURE_FUNCS(casefold, CASEFOLD);
F2FS_FEATURE_FUNCS(compression, COMPRESSION);
F2FS_FEATURE_FUNCS(readonly, RO);
-static inline bool f2fs_may_extent_tree(struct inode *inode)
-{
- struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-
- if (!test_opt(sbi, EXTENT_CACHE) ||
- is_inode_flag_set(inode, FI_NO_EXTENT) ||
- (is_inode_flag_set(inode, FI_COMPRESSED_FILE) &&
- !f2fs_sb_has_readonly(sbi)))
- return false;
-
- /*
- * for recovered files during mount do not create extents
- * if shrinker is not registered.
- */
- if (list_empty(&sbi->s_list))
- return false;
-
- return S_ISREG(inode->i_mode);
-}
-
#ifdef CONFIG_BLK_DEV_ZONED
static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
block_t blkaddr)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index bf37983304a3..dbad2db68f1b 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -618,7 +618,7 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
*/
fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page),
dn->inode) + ofs;
- f2fs_update_extent_cache_range(dn, fofs, 0, len);
+ f2fs_update_read_extent_cache_range(dn, fofs, 0, len);
dec_valid_block_count(sbi, dn->inode, nr_free);
}
dn->ofs_in_node = ofs;
@@ -1496,7 +1496,7 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start,
f2fs_set_data_blkaddr(dn);
}
- f2fs_update_extent_cache_range(dn, start, 0, index - start);
+ f2fs_update_read_extent_cache_range(dn, start, 0, index - start);
return ret;
}
@@ -2573,7 +2573,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
struct f2fs_map_blocks map = { .m_next_extent = NULL,
.m_seg_type = NO_CHECK_TYPE,
.m_may_create = false };
- struct extent_info ei = {0, 0, 0};
+ struct extent_info ei = {0, };
pgoff_t pg_start, pg_end, next_pgofs;
unsigned int blk_per_seg = sbi->blocks_per_seg;
unsigned int total = 0, sec_num;
@@ -2605,7 +2605,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
* lookup mapping info in extent cache, skip defragmenting if physical
* block addresses are continuous.
*/
- if (f2fs_lookup_extent_cache(inode, pg_start, &ei)) {
+ if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) {
if (ei.fofs + ei.len >= pg_end)
goto out;
}
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index aa928d1c8159..5cd19fdc1059 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -389,40 +389,95 @@ static unsigned int count_bits(const unsigned long *addr,
return sum;
}
-static struct victim_entry *attach_victim_entry(struct f2fs_sb_info *sbi,
- unsigned long long mtime, unsigned int segno,
- struct rb_node *parent, struct rb_node **p,
- bool left_most)
+static bool f2fs_check_victim_tree(struct f2fs_sb_info *sbi,
+ struct rb_root_cached *root)
+{
+#ifdef CONFIG_F2FS_CHECK_FS
+ struct rb_node *cur = rb_first_cached(root), *next;
+ struct victim_entry *cur_ve, *next_ve;
+
+ while (cur) {
+ next = rb_next(cur);
+ if (!next)
+ return true;
+
+ cur_ve = rb_entry(cur, struct victim_entry, rb_node);
+ next_ve = rb_entry(next, struct victim_entry, rb_node);
+
+ if (cur_ve->mtime > next_ve->mtime) {
+ f2fs_info(sbi, "broken victim_rbtree, "
+ "cur_mtime(%llu) next_mtime(%llu)",
+ cur_ve->mtime, next_ve->mtime);
+ return false;
+ }
+ cur = next;
+ }
+#endif
+ return true;
+}
+
+static struct victim_entry *__lookup_victim_entry(struct f2fs_sb_info *sbi,
+ unsigned long long mtime)
+{
+ struct atgc_management *am = &sbi->am;
+ struct rb_node *node = am->root.rb_root.rb_node;
+ struct victim_entry *ve = NULL;
+
+ while (node) {
+ ve = rb_entry(node, struct victim_entry, rb_node);
+
+ if (mtime < ve->mtime)
+ node = node->rb_left;
+ else
+ node = node->rb_right;
+ }
+ return ve;
+}
+
+static struct victim_entry *__create_victim_entry(struct f2fs_sb_info *sbi,
+ unsigned long long mtime, unsigned int segno)
{
struct atgc_management *am = &sbi->am;
struct victim_entry *ve;
- ve = f2fs_kmem_cache_alloc(victim_entry_slab,
- GFP_NOFS, true, NULL);
+ ve = f2fs_kmem_cache_alloc(victim_entry_slab, GFP_NOFS, true, NULL);
ve->mtime = mtime;
ve->segno = segno;
- rb_link_node(&ve->rb_node, parent, p);
- rb_insert_color_cached(&ve->rb_node, &am->root, left_most);
-
list_add_tail(&ve->list, &am->victim_list);
-
am->victim_count++;
return ve;
}
-static void insert_victim_entry(struct f2fs_sb_info *sbi,
+static void __insert_victim_entry(struct f2fs_sb_info *sbi,
unsigned long long mtime, unsigned int segno)
{
struct atgc_management *am = &sbi->am;
- struct rb_node **p;
+ struct rb_root_cached *root = &am->root;
+ struct rb_node **p = &root->rb_root.rb_node;
struct rb_node *parent = NULL;
+ struct victim_entry *ve;
bool left_most = true;
- p = f2fs_lookup_rb_tree_ext(sbi, &am->root, &parent, mtime, &left_most);
- attach_victim_entry(sbi, mtime, segno, parent, p, left_most);
+ /* look up rb tree to find parent node */
+ while (*p) {
+ parent = *p;
+ ve = rb_entry(parent, struct victim_entry, rb_node);
+
+ if (mtime < ve->mtime) {
+ p = &(*p)->rb_left;
+ } else {
+ p = &(*p)->rb_right;
+ left_most = false;
+ }
+ }
+
+ ve = __create_victim_entry(sbi, mtime, segno);
+
+ rb_link_node(&ve->rb_node, parent, p);
+ rb_insert_color_cached(&ve->rb_node, root, left_most);
}
static void add_victim_entry(struct f2fs_sb_info *sbi,
@@ -458,19 +513,7 @@ static void add_victim_entry(struct f2fs_sb_info *sbi,
if (sit_i->dirty_max_mtime - mtime < p->age_threshold)
return;
- insert_victim_entry(sbi, mtime, segno);
-}
-
-static struct rb_node *lookup_central_victim(struct f2fs_sb_info *sbi,
- struct victim_sel_policy *p)
-{
- struct atgc_management *am = &sbi->am;
- struct rb_node *parent = NULL;
- bool left_most;
-
- f2fs_lookup_rb_tree_ext(sbi, &am->root, &parent, p->age, &left_most);
-
- return parent;
+ __insert_victim_entry(sbi, mtime, segno);
}
static void atgc_lookup_victim(struct f2fs_sb_info *sbi,
@@ -480,7 +523,6 @@ static void atgc_lookup_victim(struct f2fs_sb_info *sbi,
struct atgc_management *am = &sbi->am;
struct rb_root_cached *root = &am->root;
struct rb_node *node;
- struct rb_entry *re;
struct victim_entry *ve;
unsigned long long total_time;
unsigned long long age, u, accu;
@@ -507,12 +549,10 @@ static void atgc_lookup_victim(struct f2fs_sb_info *sbi,
node = rb_first_cached(root);
next:
- re = rb_entry_safe(node, struct rb_entry, rb_node);
- if (!re)
+ ve = rb_entry_safe(node, struct victim_entry, rb_node);
+ if (!ve)
return;
- ve = (struct victim_entry *)re;
-
if (ve->mtime >= max_mtime || ve->mtime < min_mtime)
goto skip;
@@ -554,8 +594,6 @@ static void atssr_lookup_victim(struct f2fs_sb_info *sbi,
{
struct sit_info *sit_i = SIT_I(sbi);
struct atgc_management *am = &sbi->am;
- struct rb_node *node;
- struct rb_entry *re;
struct victim_entry *ve;
unsigned long long age;
unsigned long long max_mtime = sit_i->dirty_max_mtime;
@@ -565,25 +603,22 @@ static void atssr_lookup_victim(struct f2fs_sb_info *sbi,
unsigned int dirty_threshold = max(am->max_candidate_count,
am->candidate_ratio *
am->victim_count / 100);
- unsigned int cost;
- unsigned int iter = 0;
+ unsigned int cost, iter;
int stage = 0;
if (max_mtime < min_mtime)
return;
max_mtime += 1;
next_stage:
- node = lookup_central_victim(sbi, p);
+ iter = 0;
+ ve = __lookup_victim_entry(sbi, p->age);
next_node:
- re = rb_entry_safe(node, struct rb_entry, rb_node);
- if (!re) {
- if (stage == 0)
- goto skip_stage;
+ if (!ve) {
+ if (stage++ == 0)
+ goto next_stage;
return;
}
- ve = (struct victim_entry *)re;
-
if (ve->mtime >= max_mtime || ve->mtime < min_mtime)
goto skip_node;
@@ -609,24 +644,20 @@ next_node:
}
skip_node:
if (iter < dirty_threshold) {
- if (stage == 0)
- node = rb_prev(node);
- else if (stage == 1)
- node = rb_next(node);
+ ve = rb_entry(stage == 0 ? rb_prev(&ve->rb_node) :
+ rb_next(&ve->rb_node),
+ struct victim_entry, rb_node);
goto next_node;
}
-skip_stage:
- if (stage < 1) {
- stage++;
- iter = 0;
+
+ if (stage++ == 0)
goto next_stage;
- }
}
+
static void lookup_victim_by_age(struct f2fs_sb_info *sbi,
struct victim_sel_policy *p)
{
- f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
- &sbi->am.root, true));
+ f2fs_bug_on(sbi, !f2fs_check_victim_tree(sbi, &sbi->am.root));
if (p->gc_mode == GC_AT)
atgc_lookup_victim(sbi, p);
@@ -1147,7 +1178,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index)
struct address_space *mapping = inode->i_mapping;
struct dnode_of_data dn;
struct page *page;
- struct extent_info ei = {0, 0, 0};
+ struct extent_info ei = {0, };
struct f2fs_io_info fio = {
.sbi = sbi,
.ino = inode->i_ino,
@@ -1165,7 +1196,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index)
if (!page)
return -ENOMEM;
- if (f2fs_lookup_extent_cache(inode, index, &ei)) {
+ if (f2fs_lookup_read_extent_cache(inode, index, &ei)) {
dn.data_blkaddr = ei.blk + index - ei.fofs;
if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
DATA_GENERIC_ENHANCE_READ))) {
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index 19b956c2d697..ca84024b9c9e 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -55,20 +55,10 @@ struct gc_inode_list {
struct radix_tree_root iroot;
};
-struct victim_info {
- unsigned long long mtime; /* mtime of section */
- unsigned int segno; /* section No. */
-};
-
struct victim_entry {
struct rb_node rb_node; /* rb node located in rb-tree */
- union {
- struct {
- unsigned long long mtime; /* mtime of section */
- unsigned int segno; /* segment No. */
- };
- struct victim_info vi; /* victim info */
- };
+ unsigned long long mtime; /* mtime of section */
+ unsigned int segno; /* segment No. */
struct list_head list;
};
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 229ddc2f7b07..aab3b8b3ab0a 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -262,22 +262,6 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
return false;
}
- if (fi->extent_tree) {
- struct extent_info *ei = &fi->extent_tree->largest;
-
- if (ei->len &&
- (!f2fs_is_valid_blkaddr(sbi, ei->blk,
- DATA_GENERIC_ENHANCE) ||
- !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
- DATA_GENERIC_ENHANCE))) {
- set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix",
- __func__, inode->i_ino,
- ei->blk, ei->fofs, ei->len);
- return false;
- }
- }
-
if (f2fs_sanity_check_inline_data(inode)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_data, run fsck to fix",
@@ -392,8 +376,6 @@ static int do_read_inode(struct inode *inode)
fi->i_pino = le32_to_cpu(ri->i_pino);
fi->i_dir_level = ri->i_dir_level;
- f2fs_init_extent_tree(inode, node_page);
-
get_inline_info(inode, ri);
fi->i_extra_isize = f2fs_has_extra_attr(inode) ?
@@ -415,12 +397,6 @@ static int do_read_inode(struct inode *inode)
fi->i_inline_xattr_size = 0;
}
- if (!sanity_check_inode(inode, node_page)) {
- f2fs_put_page(node_page, 1);
- f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE);
- return -EFSCORRUPTED;
- }
-
/* check data exist */
if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode))
__recover_inline_status(inode, node_page);
@@ -479,6 +455,22 @@ static int do_read_inode(struct inode *inode)
}
init_idisk_time(inode);
+
+ /* Need all the flag bits */
+ f2fs_init_read_extent_tree(inode, node_page);
+
+ if (!sanity_check_inode(inode, node_page)) {
+ f2fs_put_page(node_page, 1);
+ f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE);
+ return -EFSCORRUPTED;
+ }
+
+ if (!sanity_check_extent_cache(inode)) {
+ f2fs_put_page(node_page, 1);
+ f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE);
+ return -EFSCORRUPTED;
+ }
+
f2fs_put_page(node_page, 1);
stat_inc_inline_xattr(inode);
@@ -607,7 +599,7 @@ retry:
void f2fs_update_inode(struct inode *inode, struct page *node_page)
{
struct f2fs_inode *ri;
- struct extent_tree *et = F2FS_I(inode)->extent_tree;
+ struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ];
f2fs_wait_on_page_writeback(node_page, NODE, true, true);
set_page_dirty(node_page);
@@ -629,7 +621,7 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page)
if (et) {
read_lock(&et->lock);
- set_raw_extent(&et->largest, &ri->i_ext);
+ set_raw_read_extent(&et->largest, &ri->i_ext);
read_unlock(&et->lock);
} else {
memset(&ri->i_ext, 0, sizeof(ri->i_ext));
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index b6c14c9c33a0..d879a295b688 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -258,8 +258,6 @@ static struct inode *f2fs_new_inode(struct user_namespace *mnt_userns,
}
F2FS_I(inode)->i_inline_xattr_size = xattr_size;
- f2fs_init_extent_tree(inode, NULL);
-
F2FS_I(inode)->i_flags =
f2fs_mask_flags(mode, F2FS_I(dir)->i_flags & F2FS_FL_INHERITED);
@@ -282,6 +280,8 @@ static struct inode *f2fs_new_inode(struct user_namespace *mnt_userns,
f2fs_set_inode_flags(inode);
+ f2fs_init_extent_tree(inode);
+
trace_f2fs_new_inode(inode, 0);
return inode;
@@ -1002,12 +1002,20 @@ static int f2fs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
goto out;
}
+ /*
+ * Copied from ext4_rename: we need to protect against old.inode
+ * directory getting converted from inline directory format into
+ * a normal one.
+ */
+ if (S_ISDIR(old_inode->i_mode))
+ inode_lock_nested(old_inode, I_MUTEX_NONDIR2);
+
err = -ENOENT;
old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
if (!old_entry) {
if (IS_ERR(old_page))
err = PTR_ERR(old_page);
- goto out;
+ goto out_unlock_old;
}
if (S_ISDIR(old_inode->i_mode)) {
@@ -1115,6 +1123,9 @@ static int f2fs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
f2fs_unlock_op(sbi);
+ if (S_ISDIR(old_inode->i_mode))
+ inode_unlock(old_inode);
+
if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
f2fs_sync_fs(sbi->sb, 1);
@@ -1129,6 +1140,9 @@ out_dir:
f2fs_put_page(old_dir_page, 0);
out_old:
f2fs_put_page(old_page, 0);
+out_unlock_old:
+ if (S_ISDIR(old_inode->i_mode))
+ inode_unlock(old_inode);
out:
iput(whiteout);
return err;
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index b9ee5a1176a0..07419c3e42a5 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -85,10 +85,12 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type)
sizeof(struct ino_entry);
mem_size >>= PAGE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
- } else if (type == EXTENT_CACHE) {
- mem_size = (atomic_read(&sbi->total_ext_tree) *
+ } else if (type == READ_EXTENT_CACHE) {
+ struct extent_tree_info *eti = &sbi->extent_tree[EX_READ];
+
+ mem_size = (atomic_read(&eti->total_ext_tree) *
sizeof(struct extent_tree) +
- atomic_read(&sbi->total_ext_node) *
+ atomic_read(&eti->total_ext_node) *
sizeof(struct extent_node)) >> PAGE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
} else if (type == DISCARD_CACHE) {
@@ -859,7 +861,7 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
blkaddr = data_blkaddr(dn->inode, dn->node_page,
dn->ofs_in_node + 1);
- f2fs_update_extent_tree_range_compressed(dn->inode,
+ f2fs_update_read_extent_tree_range_compressed(dn->inode,
index, blkaddr,
F2FS_I(dn->inode)->i_cluster_size,
c_len);
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 3c09cae058b0..0aa48704c77a 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -146,7 +146,7 @@ enum mem_type {
NAT_ENTRIES, /* indicates the cached nat entry */
DIRTY_DENTS, /* indicates dirty dentry pages */
INO_ENTRIES, /* indicates inode entries */
- EXTENT_CACHE, /* indicates extent cache */
+ READ_EXTENT_CACHE, /* indicates read extent cache */
DISCARD_CACHE, /* indicates memory of cached discard cmds */
COMPRESS_PAGE, /* indicates memory of cached compressed pages */
BASE_CHECK, /* check kernel status */
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index b0fbdee16a96..cbbf95b99541 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -262,7 +262,7 @@ retry:
f2fs_put_dnode(&dn);
trace_f2fs_replace_atomic_write_block(inode, F2FS_I(inode)->cow_inode,
- index, *old_addr, new_addr, recover);
+ index, old_addr ? *old_addr : 0, new_addr, recover);
return 0;
}
@@ -452,8 +452,9 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
return;
/* try to shrink extent cache when there is no enough memory */
- if (!f2fs_available_free_memory(sbi, EXTENT_CACHE))
- f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);
+ if (!f2fs_available_free_memory(sbi, READ_EXTENT_CACHE))
+ f2fs_shrink_read_extent_tree(sbi,
+ READ_EXTENT_CACHE_SHRINK_NUMBER);
/* check the # of cached NAT entries */
if (!f2fs_available_free_memory(sbi, NAT_ENTRIES))
@@ -1473,7 +1474,7 @@ retry:
goto next;
if (unlikely(dcc->rbtree_check))
f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
- &dcc->root, false));
+ &dcc->root));
blk_start_plug(&plug);
list_for_each_entry_safe(dc, tmp, pend_list, list) {
f2fs_bug_on(sbi, dc->state != D_PREP);
@@ -3001,7 +3002,7 @@ next:
mutex_lock(&dcc->cmd_lock);
if (unlikely(dcc->rbtree_check))
f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
- &dcc->root, false));
+ &dcc->root));
dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
NULL, start,
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index dd3c3c7a90ec..33c490e69ae3 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -28,10 +28,13 @@ static unsigned long __count_free_nids(struct f2fs_sb_info *sbi)
return count > 0 ? count : 0;
}
-static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi)
+static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi,
+ enum extent_type type)
{
- return atomic_read(&sbi->total_zombie_tree) +
- atomic_read(&sbi->total_ext_node);
+ struct extent_tree_info *eti = &sbi->extent_tree[type];
+
+ return atomic_read(&eti->total_zombie_tree) +
+ atomic_read(&eti->total_ext_node);
}
unsigned long f2fs_shrink_count(struct shrinker *shrink,
@@ -53,8 +56,8 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink,
}
spin_unlock(&f2fs_list_lock);
- /* count extent cache entries */
- count += __count_extent_cache(sbi);
+ /* count read extent cache entries */
+ count += __count_extent_cache(sbi, EX_READ);
/* count clean nat cache entries */
count += __count_nat_entries(sbi);
@@ -99,8 +102,8 @@ unsigned long f2fs_shrink_scan(struct shrinker *shrink,
sbi->shrinker_run_no = run_no;
- /* shrink extent cache entries */
- freed += f2fs_shrink_extent_tree(sbi, nr >> 1);
+ /* shrink read extent cache entries */
+ freed += f2fs_shrink_read_extent_tree(sbi, nr >> 1);
/* shrink clean nat cache entries */
if (freed < nr)
@@ -130,7 +133,7 @@ void f2fs_join_shrinker(struct f2fs_sb_info *sbi)
void f2fs_leave_shrinker(struct f2fs_sb_info *sbi)
{
- f2fs_shrink_extent_tree(sbi, __count_extent_cache(sbi));
+ f2fs_shrink_read_extent_tree(sbi, __count_extent_cache(sbi, EX_READ));
spin_lock(&f2fs_list_lock);
list_del_init(&sbi->s_list);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 5af05411818a..c46533d65372 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -810,10 +810,10 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
set_opt(sbi, FASTBOOT);
break;
case Opt_extent_cache:
- set_opt(sbi, EXTENT_CACHE);
+ set_opt(sbi, READ_EXTENT_CACHE);
break;
case Opt_noextent_cache:
- clear_opt(sbi, EXTENT_CACHE);
+ clear_opt(sbi, READ_EXTENT_CACHE);
break;
case Opt_noinline_data:
clear_opt(sbi, INLINE_DATA);
@@ -1939,7 +1939,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",nobarrier");
if (test_opt(sbi, FASTBOOT))
seq_puts(seq, ",fastboot");
- if (test_opt(sbi, EXTENT_CACHE))
+ if (test_opt(sbi, READ_EXTENT_CACHE))
seq_puts(seq, ",extent_cache");
else
seq_puts(seq, ",noextent_cache");
@@ -2057,7 +2057,7 @@ static void default_options(struct f2fs_sb_info *sbi)
set_opt(sbi, INLINE_XATTR);
set_opt(sbi, INLINE_DATA);
set_opt(sbi, INLINE_DENTRY);
- set_opt(sbi, EXTENT_CACHE);
+ set_opt(sbi, READ_EXTENT_CACHE);
set_opt(sbi, NOHEAP);
clear_opt(sbi, DISABLE_CHECKPOINT);
set_opt(sbi, MERGE_CHECKPOINT);
@@ -2198,7 +2198,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
bool need_restart_ckpt = false, need_stop_ckpt = false;
bool need_restart_flush = false, need_stop_flush = false;
bool need_restart_discard = false, need_stop_discard = false;
- bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
+ bool no_read_extent_cache = !test_opt(sbi, READ_EXTENT_CACHE);
bool enable_checkpoint = !test_opt(sbi, DISABLE_CHECKPOINT);
bool no_io_align = !F2FS_IO_ALIGNED(sbi);
bool no_atgc = !test_opt(sbi, ATGC);
@@ -2288,7 +2288,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
}
/* disallow enable/disable extent_cache dynamically */
- if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) {
+ if (no_read_extent_cache == !!test_opt(sbi, READ_EXTENT_CACHE)) {
err = -EINVAL;
f2fs_warn(sbi, "switch extent_cache option is not allowed");
goto restore_opts;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index aa33c39be182..d387708977a5 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -827,7 +827,7 @@ void wbc_detach_inode(struct writeback_control *wbc)
* is okay. The main goal is avoiding keeping an inode on
* the wrong wb for an extended period of time.
*/
- if (hweight32(history) > WB_FRN_HIST_THR_SLOTS)
+ if (hweight16(history) > WB_FRN_HIST_THR_SLOTS)
inode_switch_wbs(inode, max_id);
}
diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c
index b8f9d627f241..e3312fbf4c09 100644
--- a/fs/ksmbd/connection.c
+++ b/fs/ksmbd/connection.c
@@ -20,7 +20,7 @@ static DEFINE_MUTEX(init_lock);
static struct ksmbd_conn_ops default_conn_ops;
LIST_HEAD(conn_list);
-DEFINE_RWLOCK(conn_list_lock);
+DECLARE_RWSEM(conn_list_lock);
/**
* ksmbd_conn_free() - free resources of the connection instance
@@ -32,9 +32,9 @@ DEFINE_RWLOCK(conn_list_lock);
*/
void ksmbd_conn_free(struct ksmbd_conn *conn)
{
- write_lock(&conn_list_lock);
+ down_write(&conn_list_lock);
list_del(&conn->conns_list);
- write_unlock(&conn_list_lock);
+ up_write(&conn_list_lock);
xa_destroy(&conn->sessions);
kvfree(conn->request_buf);
@@ -56,7 +56,7 @@ struct ksmbd_conn *ksmbd_conn_alloc(void)
return NULL;
conn->need_neg = true;
- conn->status = KSMBD_SESS_NEW;
+ ksmbd_conn_set_new(conn);
conn->local_nls = load_nls("utf8");
if (!conn->local_nls)
conn->local_nls = load_nls_default();
@@ -84,9 +84,9 @@ struct ksmbd_conn *ksmbd_conn_alloc(void)
spin_lock_init(&conn->llist_lock);
INIT_LIST_HEAD(&conn->lock_list);
- write_lock(&conn_list_lock);
+ down_write(&conn_list_lock);
list_add(&conn->conns_list, &conn_list);
- write_unlock(&conn_list_lock);
+ up_write(&conn_list_lock);
return conn;
}
@@ -95,7 +95,7 @@ bool ksmbd_conn_lookup_dialect(struct ksmbd_conn *c)
struct ksmbd_conn *t;
bool ret = false;
- read_lock(&conn_list_lock);
+ down_read(&conn_list_lock);
list_for_each_entry(t, &conn_list, conns_list) {
if (memcmp(t->ClientGUID, c->ClientGUID, SMB2_CLIENT_GUID_SIZE))
continue;
@@ -103,7 +103,7 @@ bool ksmbd_conn_lookup_dialect(struct ksmbd_conn *c)
ret = true;
break;
}
- read_unlock(&conn_list_lock);
+ up_read(&conn_list_lock);
return ret;
}
@@ -149,19 +149,47 @@ int ksmbd_conn_try_dequeue_request(struct ksmbd_work *work)
return ret;
}
-static void ksmbd_conn_lock(struct ksmbd_conn *conn)
+void ksmbd_conn_lock(struct ksmbd_conn *conn)
{
mutex_lock(&conn->srv_mutex);
}
-static void ksmbd_conn_unlock(struct ksmbd_conn *conn)
+void ksmbd_conn_unlock(struct ksmbd_conn *conn)
{
mutex_unlock(&conn->srv_mutex);
}
-void ksmbd_conn_wait_idle(struct ksmbd_conn *conn)
+void ksmbd_all_conn_set_status(u64 sess_id, u32 status)
{
+ struct ksmbd_conn *conn;
+
+ down_read(&conn_list_lock);
+ list_for_each_entry(conn, &conn_list, conns_list) {
+ if (conn->binding || xa_load(&conn->sessions, sess_id))
+ WRITE_ONCE(conn->status, status);
+ }
+ up_read(&conn_list_lock);
+}
+
+void ksmbd_conn_wait_idle(struct ksmbd_conn *conn, u64 sess_id)
+{
+ struct ksmbd_conn *bind_conn;
+
wait_event(conn->req_running_q, atomic_read(&conn->req_running) < 2);
+
+ down_read(&conn_list_lock);
+ list_for_each_entry(bind_conn, &conn_list, conns_list) {
+ if (bind_conn == conn)
+ continue;
+
+ if ((bind_conn->binding || xa_load(&bind_conn->sessions, sess_id)) &&
+ !ksmbd_conn_releasing(bind_conn) &&
+ atomic_read(&bind_conn->req_running)) {
+ wait_event(bind_conn->req_running_q,
+ atomic_read(&bind_conn->req_running) == 0);
+ }
+ }
+ up_read(&conn_list_lock);
}
int ksmbd_conn_write(struct ksmbd_work *work)
@@ -245,7 +273,7 @@ bool ksmbd_conn_alive(struct ksmbd_conn *conn)
if (!ksmbd_server_running())
return false;
- if (conn->status == KSMBD_SESS_EXITING)
+ if (ksmbd_conn_exiting(conn))
return false;
if (kthread_should_stop())
@@ -305,7 +333,7 @@ int ksmbd_conn_handler_loop(void *p)
pdu_size = get_rfc1002_len(hdr_buf);
ksmbd_debug(CONN, "RFC1002 header %u bytes\n", pdu_size);
- if (conn->status == KSMBD_SESS_GOOD)
+ if (ksmbd_conn_good(conn))
max_allowed_pdu_size =
SMB3_MAX_MSGSIZE + conn->vals->max_write_size;
else
@@ -314,7 +342,7 @@ int ksmbd_conn_handler_loop(void *p)
if (pdu_size > max_allowed_pdu_size) {
pr_err_ratelimited("PDU length(%u) excceed maximum allowed pdu size(%u) on connection(%d)\n",
pdu_size, max_allowed_pdu_size,
- conn->status);
+ READ_ONCE(conn->status));
break;
}
@@ -362,10 +390,10 @@ int ksmbd_conn_handler_loop(void *p)
}
out:
+ ksmbd_conn_set_releasing(conn);
/* Wait till all reference dropped to the Server object*/
wait_event(conn->r_count_q, atomic_read(&conn->r_count) == 0);
-
if (IS_ENABLED(CONFIG_UNICODE))
utf8_unload(conn->um);
unload_nls(conn->local_nls);
@@ -409,7 +437,7 @@ static void stop_sessions(void)
struct ksmbd_transport *t;
again:
- read_lock(&conn_list_lock);
+ down_read(&conn_list_lock);
list_for_each_entry(conn, &conn_list, conns_list) {
struct task_struct *task;
@@ -418,14 +446,14 @@ again:
if (task)
ksmbd_debug(CONN, "Stop session handler %s/%d\n",
task->comm, task_pid_nr(task));
- conn->status = KSMBD_SESS_EXITING;
+ ksmbd_conn_set_exiting(conn);
if (t->ops->shutdown) {
- read_unlock(&conn_list_lock);
+ up_read(&conn_list_lock);
t->ops->shutdown(t);
- read_lock(&conn_list_lock);
+ down_read(&conn_list_lock);
}
}
- read_unlock(&conn_list_lock);
+ up_read(&conn_list_lock);
if (!list_empty(&conn_list)) {
schedule_timeout_interruptible(HZ / 10); /* 100ms */
diff --git a/fs/ksmbd/connection.h b/fs/ksmbd/connection.h
index 0e3a848defaf..ad8dfaa48ffb 100644
--- a/fs/ksmbd/connection.h
+++ b/fs/ksmbd/connection.h
@@ -26,7 +26,8 @@ enum {
KSMBD_SESS_GOOD,
KSMBD_SESS_EXITING,
KSMBD_SESS_NEED_RECONNECT,
- KSMBD_SESS_NEED_NEGOTIATE
+ KSMBD_SESS_NEED_NEGOTIATE,
+ KSMBD_SESS_RELEASING
};
struct ksmbd_stats {
@@ -140,10 +141,10 @@ struct ksmbd_transport {
#define KSMBD_TCP_PEER_SOCKADDR(c) ((struct sockaddr *)&((c)->peer_addr))
extern struct list_head conn_list;
-extern rwlock_t conn_list_lock;
+extern struct rw_semaphore conn_list_lock;
bool ksmbd_conn_alive(struct ksmbd_conn *conn);
-void ksmbd_conn_wait_idle(struct ksmbd_conn *conn);
+void ksmbd_conn_wait_idle(struct ksmbd_conn *conn, u64 sess_id);
struct ksmbd_conn *ksmbd_conn_alloc(void);
void ksmbd_conn_free(struct ksmbd_conn *conn);
bool ksmbd_conn_lookup_dialect(struct ksmbd_conn *c);
@@ -162,6 +163,8 @@ void ksmbd_conn_init_server_callbacks(struct ksmbd_conn_ops *ops);
int ksmbd_conn_handler_loop(void *p);
int ksmbd_conn_transport_init(void);
void ksmbd_conn_transport_destroy(void);
+void ksmbd_conn_lock(struct ksmbd_conn *conn);
+void ksmbd_conn_unlock(struct ksmbd_conn *conn);
/*
* WARNING
@@ -169,43 +172,60 @@ void ksmbd_conn_transport_destroy(void);
* This is a hack. We will move status to a proper place once we land
* a multi-sessions support.
*/
-static inline bool ksmbd_conn_good(struct ksmbd_work *work)
+static inline bool ksmbd_conn_good(struct ksmbd_conn *conn)
{
- return work->conn->status == KSMBD_SESS_GOOD;
+ return READ_ONCE(conn->status) == KSMBD_SESS_GOOD;
}
-static inline bool ksmbd_conn_need_negotiate(struct ksmbd_work *work)
+static inline bool ksmbd_conn_need_negotiate(struct ksmbd_conn *conn)
{
- return work->conn->status == KSMBD_SESS_NEED_NEGOTIATE;
+ return READ_ONCE(conn->status) == KSMBD_SESS_NEED_NEGOTIATE;
}
-static inline bool ksmbd_conn_need_reconnect(struct ksmbd_work *work)
+static inline bool ksmbd_conn_need_reconnect(struct ksmbd_conn *conn)
{
- return work->conn->status == KSMBD_SESS_NEED_RECONNECT;
+ return READ_ONCE(conn->status) == KSMBD_SESS_NEED_RECONNECT;
}
-static inline bool ksmbd_conn_exiting(struct ksmbd_work *work)
+static inline bool ksmbd_conn_exiting(struct ksmbd_conn *conn)
{
- return work->conn->status == KSMBD_SESS_EXITING;
+ return READ_ONCE(conn->status) == KSMBD_SESS_EXITING;
}
-static inline void ksmbd_conn_set_good(struct ksmbd_work *work)
+static inline bool ksmbd_conn_releasing(struct ksmbd_conn *conn)
{
- work->conn->status = KSMBD_SESS_GOOD;
+ return READ_ONCE(conn->status) == KSMBD_SESS_RELEASING;
}
-static inline void ksmbd_conn_set_need_negotiate(struct ksmbd_work *work)
+static inline void ksmbd_conn_set_new(struct ksmbd_conn *conn)
{
- work->conn->status = KSMBD_SESS_NEED_NEGOTIATE;
+ WRITE_ONCE(conn->status, KSMBD_SESS_NEW);
}
-static inline void ksmbd_conn_set_need_reconnect(struct ksmbd_work *work)
+static inline void ksmbd_conn_set_good(struct ksmbd_conn *conn)
{
- work->conn->status = KSMBD_SESS_NEED_RECONNECT;
+ WRITE_ONCE(conn->status, KSMBD_SESS_GOOD);
}
-static inline void ksmbd_conn_set_exiting(struct ksmbd_work *work)
+static inline void ksmbd_conn_set_need_negotiate(struct ksmbd_conn *conn)
{
- work->conn->status = KSMBD_SESS_EXITING;
+ WRITE_ONCE(conn->status, KSMBD_SESS_NEED_NEGOTIATE);
}
+
+static inline void ksmbd_conn_set_need_reconnect(struct ksmbd_conn *conn)
+{
+ WRITE_ONCE(conn->status, KSMBD_SESS_NEED_RECONNECT);
+}
+
+static inline void ksmbd_conn_set_exiting(struct ksmbd_conn *conn)
+{
+ WRITE_ONCE(conn->status, KSMBD_SESS_EXITING);
+}
+
+static inline void ksmbd_conn_set_releasing(struct ksmbd_conn *conn)
+{
+ WRITE_ONCE(conn->status, KSMBD_SESS_RELEASING);
+}
+
+void ksmbd_all_conn_set_status(u64 sess_id, u32 status);
#endif /* __CONNECTION_H__ */
diff --git a/fs/ksmbd/mgmt/tree_connect.c b/fs/ksmbd/mgmt/tree_connect.c
index f19de20c2960..f07a05f37651 100644
--- a/fs/ksmbd/mgmt/tree_connect.c
+++ b/fs/ksmbd/mgmt/tree_connect.c
@@ -137,6 +137,9 @@ int ksmbd_tree_conn_session_logoff(struct ksmbd_session *sess)
struct ksmbd_tree_connect *tc;
unsigned long id;
+ if (!sess)
+ return -EINVAL;
+
xa_for_each(&sess->tree_conns, id, tc)
ret |= ksmbd_tree_conn_disconnect(sess, tc);
xa_destroy(&sess->tree_conns);
diff --git a/fs/ksmbd/mgmt/user_session.c b/fs/ksmbd/mgmt/user_session.c
index 92b1603b5abe..ea4b56d570fb 100644
--- a/fs/ksmbd/mgmt/user_session.c
+++ b/fs/ksmbd/mgmt/user_session.c
@@ -30,15 +30,15 @@ struct ksmbd_session_rpc {
static void free_channel_list(struct ksmbd_session *sess)
{
- struct channel *chann, *tmp;
+ struct channel *chann;
+ unsigned long index;
- write_lock(&sess->chann_lock);
- list_for_each_entry_safe(chann, tmp, &sess->ksmbd_chann_list,
- chann_list) {
- list_del(&chann->chann_list);
+ xa_for_each(&sess->ksmbd_chann_list, index, chann) {
+ xa_erase(&sess->ksmbd_chann_list, index);
kfree(chann);
}
- write_unlock(&sess->chann_lock);
+
+ xa_destroy(&sess->ksmbd_chann_list);
}
static void __session_rpc_close(struct ksmbd_session *sess,
@@ -153,10 +153,6 @@ void ksmbd_session_destroy(struct ksmbd_session *sess)
if (!sess)
return;
- down_write(&sessions_table_lock);
- hash_del(&sess->hlist);
- up_write(&sessions_table_lock);
-
if (sess->user)
ksmbd_free_user(sess->user);
@@ -174,76 +170,101 @@ static struct ksmbd_session *__session_lookup(unsigned long long id)
struct ksmbd_session *sess;
hash_for_each_possible(sessions_table, sess, hlist, id) {
- if (id == sess->id)
+ if (id == sess->id) {
+ sess->last_active = jiffies;
return sess;
+ }
}
return NULL;
}
+static void ksmbd_expire_session(struct ksmbd_conn *conn)
+{
+ unsigned long id;
+ struct ksmbd_session *sess;
+
+ down_write(&sessions_table_lock);
+ xa_for_each(&conn->sessions, id, sess) {
+ if (sess->state != SMB2_SESSION_VALID ||
+ time_after(jiffies,
+ sess->last_active + SMB2_SESSION_TIMEOUT)) {
+ xa_erase(&conn->sessions, sess->id);
+ hash_del(&sess->hlist);
+ ksmbd_session_destroy(sess);
+ continue;
+ }
+ }
+ up_write(&sessions_table_lock);
+}
+
int ksmbd_session_register(struct ksmbd_conn *conn,
struct ksmbd_session *sess)
{
sess->dialect = conn->dialect;
memcpy(sess->ClientGUID, conn->ClientGUID, SMB2_CLIENT_GUID_SIZE);
+ ksmbd_expire_session(conn);
return xa_err(xa_store(&conn->sessions, sess->id, sess, GFP_KERNEL));
}
static int ksmbd_chann_del(struct ksmbd_conn *conn, struct ksmbd_session *sess)
{
- struct channel *chann, *tmp;
-
- write_lock(&sess->chann_lock);
- list_for_each_entry_safe(chann, tmp, &sess->ksmbd_chann_list,
- chann_list) {
- if (chann->conn == conn) {
- list_del(&chann->chann_list);
- kfree(chann);
- write_unlock(&sess->chann_lock);
- return 0;
- }
- }
- write_unlock(&sess->chann_lock);
+ struct channel *chann;
- return -ENOENT;
+ chann = xa_erase(&sess->ksmbd_chann_list, (long)conn);
+ if (!chann)
+ return -ENOENT;
+
+ kfree(chann);
+ return 0;
}
void ksmbd_sessions_deregister(struct ksmbd_conn *conn)
{
struct ksmbd_session *sess;
+ unsigned long id;
+ down_write(&sessions_table_lock);
if (conn->binding) {
int bkt;
+ struct hlist_node *tmp;
- down_write(&sessions_table_lock);
- hash_for_each(sessions_table, bkt, sess, hlist) {
- if (!ksmbd_chann_del(conn, sess)) {
- up_write(&sessions_table_lock);
- goto sess_destroy;
+ hash_for_each_safe(sessions_table, bkt, tmp, sess, hlist) {
+ if (!ksmbd_chann_del(conn, sess) &&
+ xa_empty(&sess->ksmbd_chann_list)) {
+ hash_del(&sess->hlist);
+ ksmbd_session_destroy(sess);
}
}
- up_write(&sessions_table_lock);
- } else {
- unsigned long id;
-
- xa_for_each(&conn->sessions, id, sess) {
- if (!ksmbd_chann_del(conn, sess))
- goto sess_destroy;
- }
}
- return;
+ xa_for_each(&conn->sessions, id, sess) {
+ unsigned long chann_id;
+ struct channel *chann;
+
+ xa_for_each(&sess->ksmbd_chann_list, chann_id, chann) {
+ if (chann->conn != conn)
+ ksmbd_conn_set_exiting(chann->conn);
+ }
-sess_destroy:
- if (list_empty(&sess->ksmbd_chann_list)) {
- xa_erase(&conn->sessions, sess->id);
- ksmbd_session_destroy(sess);
+ ksmbd_chann_del(conn, sess);
+ if (xa_empty(&sess->ksmbd_chann_list)) {
+ xa_erase(&conn->sessions, sess->id);
+ hash_del(&sess->hlist);
+ ksmbd_session_destroy(sess);
+ }
}
+ up_write(&sessions_table_lock);
}
struct ksmbd_session *ksmbd_session_lookup(struct ksmbd_conn *conn,
unsigned long long id)
{
- return xa_load(&conn->sessions, id);
+ struct ksmbd_session *sess;
+
+ sess = xa_load(&conn->sessions, id);
+ if (sess)
+ sess->last_active = jiffies;
+ return sess;
}
struct ksmbd_session *ksmbd_session_lookup_slowpath(unsigned long long id)
@@ -252,6 +273,8 @@ struct ksmbd_session *ksmbd_session_lookup_slowpath(unsigned long long id)
down_read(&sessions_table_lock);
sess = __session_lookup(id);
+ if (sess)
+ sess->last_active = jiffies;
up_read(&sessions_table_lock);
return sess;
@@ -320,6 +343,9 @@ static struct ksmbd_session *__session_create(int protocol)
struct ksmbd_session *sess;
int ret;
+ if (protocol != CIFDS_SESSION_FLAG_SMB2)
+ return NULL;
+
sess = kzalloc(sizeof(struct ksmbd_session), GFP_KERNEL);
if (!sess)
return NULL;
@@ -327,32 +353,24 @@ static struct ksmbd_session *__session_create(int protocol)
if (ksmbd_init_file_table(&sess->file_table))
goto error;
+ sess->last_active = jiffies;
+ sess->state = SMB2_SESSION_IN_PROGRESS;
set_session_flag(sess, protocol);
xa_init(&sess->tree_conns);
- INIT_LIST_HEAD(&sess->ksmbd_chann_list);
+ xa_init(&sess->ksmbd_chann_list);
INIT_LIST_HEAD(&sess->rpc_handle_list);
sess->sequence_number = 1;
- rwlock_init(&sess->chann_lock);
-
- switch (protocol) {
- case CIFDS_SESSION_FLAG_SMB2:
- ret = __init_smb2_session(sess);
- break;
- default:
- ret = -EINVAL;
- break;
- }
+ ret = __init_smb2_session(sess);
if (ret)
goto error;
ida_init(&sess->tree_conn_ida);
- if (protocol == CIFDS_SESSION_FLAG_SMB2) {
- down_write(&sessions_table_lock);
- hash_add(sessions_table, &sess->hlist, sess->id);
- up_write(&sessions_table_lock);
- }
+ down_write(&sessions_table_lock);
+ hash_add(sessions_table, &sess->hlist, sess->id);
+ up_write(&sessions_table_lock);
+
return sess;
error:
diff --git a/fs/ksmbd/mgmt/user_session.h b/fs/ksmbd/mgmt/user_session.h
index 8934b8ee275b..51f38e5b61ab 100644
--- a/fs/ksmbd/mgmt/user_session.h
+++ b/fs/ksmbd/mgmt/user_session.h
@@ -21,7 +21,6 @@ struct ksmbd_file_table;
struct channel {
__u8 smb3signingkey[SMB3_SIGN_KEY_SIZE];
struct ksmbd_conn *conn;
- struct list_head chann_list;
};
struct preauth_session {
@@ -50,8 +49,7 @@ struct ksmbd_session {
char sess_key[CIFS_KEY_SIZE];
struct hlist_node hlist;
- rwlock_t chann_lock;
- struct list_head ksmbd_chann_list;
+ struct xarray ksmbd_chann_list;
struct xarray tree_conns;
struct ida tree_conn_ida;
struct list_head rpc_handle_list;
@@ -61,6 +59,7 @@ struct ksmbd_session {
__u8 smb3signingkey[SMB3_SIGN_KEY_SIZE];
struct ksmbd_file_table file_table;
+ unsigned long last_active;
};
static inline int test_session_flag(struct ksmbd_session *sess, int bit)
diff --git a/fs/ksmbd/server.c b/fs/ksmbd/server.c
index 8c2bc513445c..8a0ad399f245 100644
--- a/fs/ksmbd/server.c
+++ b/fs/ksmbd/server.c
@@ -93,7 +93,8 @@ static inline int check_conn_state(struct ksmbd_work *work)
{
struct smb_hdr *rsp_hdr;
- if (ksmbd_conn_exiting(work) || ksmbd_conn_need_reconnect(work)) {
+ if (ksmbd_conn_exiting(work->conn) ||
+ ksmbd_conn_need_reconnect(work->conn)) {
rsp_hdr = work->response_buf;
rsp_hdr->Status.CifsError = STATUS_CONNECTION_DISCONNECTED;
return 1;
diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
index acd66fb40c5f..8f96b96dbac1 100644
--- a/fs/ksmbd/smb2pdu.c
+++ b/fs/ksmbd/smb2pdu.c
@@ -74,14 +74,7 @@ static inline bool check_session_id(struct ksmbd_conn *conn, u64 id)
struct channel *lookup_chann_list(struct ksmbd_session *sess, struct ksmbd_conn *conn)
{
- struct channel *chann;
-
- list_for_each_entry(chann, &sess->ksmbd_chann_list, chann_list) {
- if (chann->conn == conn)
- return chann;
- }
-
- return NULL;
+ return xa_load(&sess->ksmbd_chann_list, (long)conn);
}
/**
@@ -254,7 +247,7 @@ int init_smb2_neg_rsp(struct ksmbd_work *work)
rsp = smb2_get_msg(work->response_buf);
- WARN_ON(ksmbd_conn_good(work));
+ WARN_ON(ksmbd_conn_good(conn));
rsp->StructureSize = cpu_to_le16(65);
ksmbd_debug(SMB, "conn->dialect 0x%x\n", conn->dialect);
@@ -284,7 +277,7 @@ int init_smb2_neg_rsp(struct ksmbd_work *work)
rsp->SecurityMode |= SMB2_NEGOTIATE_SIGNING_REQUIRED_LE;
conn->use_spnego = true;
- ksmbd_conn_set_need_negotiate(work);
+ ksmbd_conn_set_need_negotiate(conn);
return 0;
}
@@ -574,7 +567,7 @@ int smb2_check_user_session(struct ksmbd_work *work)
cmd == SMB2_SESSION_SETUP_HE)
return 0;
- if (!ksmbd_conn_good(work))
+ if (!ksmbd_conn_good(conn))
return -EINVAL;
sess_id = le64_to_cpu(req_hdr->SessionId);
@@ -592,6 +585,7 @@ static void destroy_previous_session(struct ksmbd_conn *conn,
struct ksmbd_session *prev_sess = ksmbd_session_lookup_slowpath(id);
struct ksmbd_user *prev_user;
struct channel *chann;
+ long index;
if (!prev_sess)
return;
@@ -605,10 +599,8 @@ static void destroy_previous_session(struct ksmbd_conn *conn,
return;
prev_sess->state = SMB2_SESSION_EXPIRED;
- write_lock(&prev_sess->chann_lock);
- list_for_each_entry(chann, &prev_sess->ksmbd_chann_list, chann_list)
- chann->conn->status = KSMBD_SESS_EXITING;
- write_unlock(&prev_sess->chann_lock);
+ xa_for_each(&prev_sess->ksmbd_chann_list, index, chann)
+ ksmbd_conn_set_exiting(chann->conn);
}
/**
@@ -1075,7 +1067,7 @@ int smb2_handle_negotiate(struct ksmbd_work *work)
ksmbd_debug(SMB, "Received negotiate request\n");
conn->need_neg = false;
- if (ksmbd_conn_good(work)) {
+ if (ksmbd_conn_good(conn)) {
pr_err("conn->tcp_status is already in CifsGood State\n");
work->send_no_response = 1;
return rc;
@@ -1230,7 +1222,7 @@ int smb2_handle_negotiate(struct ksmbd_work *work)
}
conn->srv_sec_mode = le16_to_cpu(rsp->SecurityMode);
- ksmbd_conn_set_need_negotiate(work);
+ ksmbd_conn_set_need_negotiate(conn);
err_out:
if (rc < 0)
@@ -1520,19 +1512,14 @@ static int ntlm_authenticate(struct ksmbd_work *work)
binding_session:
if (conn->dialect >= SMB30_PROT_ID) {
- read_lock(&sess->chann_lock);
chann = lookup_chann_list(sess, conn);
- read_unlock(&sess->chann_lock);
if (!chann) {
chann = kmalloc(sizeof(struct channel), GFP_KERNEL);
if (!chann)
return -ENOMEM;
chann->conn = conn;
- INIT_LIST_HEAD(&chann->chann_list);
- write_lock(&sess->chann_lock);
- list_add(&chann->chann_list, &sess->ksmbd_chann_list);
- write_unlock(&sess->chann_lock);
+ xa_store(&sess->ksmbd_chann_list, (long)conn, chann, GFP_KERNEL);
}
}
@@ -1606,19 +1593,14 @@ static int krb5_authenticate(struct ksmbd_work *work)
}
if (conn->dialect >= SMB30_PROT_ID) {
- read_lock(&sess->chann_lock);
chann = lookup_chann_list(sess, conn);
- read_unlock(&sess->chann_lock);
if (!chann) {
chann = kmalloc(sizeof(struct channel), GFP_KERNEL);
if (!chann)
return -ENOMEM;
chann->conn = conn;
- INIT_LIST_HEAD(&chann->chann_list);
- write_lock(&sess->chann_lock);
- list_add(&chann->chann_list, &sess->ksmbd_chann_list);
- write_unlock(&sess->chann_lock);
+ xa_store(&sess->ksmbd_chann_list, (long)conn, chann, GFP_KERNEL);
}
}
@@ -1661,6 +1643,7 @@ int smb2_sess_setup(struct ksmbd_work *work)
rsp->SecurityBufferLength = 0;
inc_rfc1001_len(work->response_buf, 9);
+ ksmbd_conn_lock(conn);
if (!req->hdr.SessionId) {
sess = ksmbd_smb2_session_create();
if (!sess) {
@@ -1708,6 +1691,12 @@ int smb2_sess_setup(struct ksmbd_work *work)
goto out_err;
}
+ if (ksmbd_conn_need_reconnect(conn)) {
+ rc = -EFAULT;
+ sess = NULL;
+ goto out_err;
+ }
+
if (ksmbd_session_lookup(conn, sess_id)) {
rc = -EACCES;
goto out_err;
@@ -1732,12 +1721,20 @@ int smb2_sess_setup(struct ksmbd_work *work)
rc = -ENOENT;
goto out_err;
}
+
+ if (sess->state == SMB2_SESSION_EXPIRED) {
+ rc = -EFAULT;
+ goto out_err;
+ }
+
+ if (ksmbd_conn_need_reconnect(conn)) {
+ rc = -EFAULT;
+ sess = NULL;
+ goto out_err;
+ }
}
work->sess = sess;
- if (sess->state == SMB2_SESSION_EXPIRED)
- sess->state = SMB2_SESSION_IN_PROGRESS;
-
negblob_off = le16_to_cpu(req->SecurityBufferOffset);
negblob_len = le16_to_cpu(req->SecurityBufferLength);
if (negblob_off < offsetof(struct smb2_sess_setup_req, Buffer) ||
@@ -1767,8 +1764,10 @@ int smb2_sess_setup(struct ksmbd_work *work)
goto out_err;
}
- ksmbd_conn_set_good(work);
- sess->state = SMB2_SESSION_VALID;
+ if (!ksmbd_conn_need_reconnect(conn)) {
+ ksmbd_conn_set_good(conn);
+ sess->state = SMB2_SESSION_VALID;
+ }
kfree(sess->Preauth_HashValue);
sess->Preauth_HashValue = NULL;
} else if (conn->preferred_auth_mech == KSMBD_AUTH_NTLMSSP) {
@@ -1790,8 +1789,10 @@ int smb2_sess_setup(struct ksmbd_work *work)
if (rc)
goto out_err;
- ksmbd_conn_set_good(work);
- sess->state = SMB2_SESSION_VALID;
+ if (!ksmbd_conn_need_reconnect(conn)) {
+ ksmbd_conn_set_good(conn);
+ sess->state = SMB2_SESSION_VALID;
+ }
if (conn->binding) {
struct preauth_session *preauth_sess;
@@ -1859,14 +1860,17 @@ out_err:
if (sess->user && sess->user->flags & KSMBD_USER_FLAG_DELAY_SESSION)
try_delay = true;
- xa_erase(&conn->sessions, sess->id);
- ksmbd_session_destroy(sess);
- work->sess = NULL;
- if (try_delay)
+ sess->last_active = jiffies;
+ sess->state = SMB2_SESSION_EXPIRED;
+ if (try_delay) {
+ ksmbd_conn_set_need_reconnect(conn);
ssleep(5);
+ ksmbd_conn_set_need_negotiate(conn);
+ }
}
}
+ ksmbd_conn_unlock(conn);
return rc;
}
@@ -2091,21 +2095,25 @@ int smb2_session_logoff(struct ksmbd_work *work)
{
struct ksmbd_conn *conn = work->conn;
struct smb2_logoff_rsp *rsp = smb2_get_msg(work->response_buf);
- struct ksmbd_session *sess = work->sess;
+ struct ksmbd_session *sess;
+ struct smb2_logoff_req *req = smb2_get_msg(work->request_buf);
+ u64 sess_id = le64_to_cpu(req->hdr.SessionId);
rsp->StructureSize = cpu_to_le16(4);
inc_rfc1001_len(work->response_buf, 4);
ksmbd_debug(SMB, "request\n");
- /* setting CifsExiting here may race with start_tcp_sess */
- ksmbd_conn_set_need_reconnect(work);
+ ksmbd_all_conn_set_status(sess_id, KSMBD_SESS_NEED_RECONNECT);
ksmbd_close_session_fds(work);
- ksmbd_conn_wait_idle(conn);
+ ksmbd_conn_wait_idle(conn, sess_id);
+ /*
+ * Re-lookup session to validate if session is deleted
+ * while waiting request complete
+ */
+ sess = ksmbd_session_lookup_all(conn, sess_id);
if (ksmbd_tree_conn_session_logoff(sess)) {
- struct smb2_logoff_req *req = smb2_get_msg(work->request_buf);
-
ksmbd_debug(SMB, "Invalid tid %d\n", req->hdr.Id.SyncId.TreeId);
rsp->hdr.Status = STATUS_NETWORK_NAME_DELETED;
smb2_set_err_rsp(work);
@@ -2117,9 +2125,7 @@ int smb2_session_logoff(struct ksmbd_work *work)
ksmbd_free_user(sess->user);
sess->user = NULL;
-
- /* let start_tcp_sess free connection info now */
- ksmbd_conn_set_need_negotiate(work);
+ ksmbd_all_conn_set_status(sess_id, KSMBD_SESS_NEED_NEGOTIATE);
return 0;
}
@@ -6947,7 +6953,7 @@ int smb2_lock(struct ksmbd_work *work)
nolock = 1;
/* check locks in connection list */
- read_lock(&conn_list_lock);
+ down_read(&conn_list_lock);
list_for_each_entry(conn, &conn_list, conns_list) {
spin_lock(&conn->llist_lock);
list_for_each_entry_safe(cmp_lock, tmp2, &conn->lock_list, clist) {
@@ -6964,7 +6970,7 @@ int smb2_lock(struct ksmbd_work *work)
list_del(&cmp_lock->flist);
list_del(&cmp_lock->clist);
spin_unlock(&conn->llist_lock);
- read_unlock(&conn_list_lock);
+ up_read(&conn_list_lock);
locks_free_lock(cmp_lock->fl);
kfree(cmp_lock);
@@ -6986,7 +6992,7 @@ int smb2_lock(struct ksmbd_work *work)
cmp_lock->start > smb_lock->start &&
cmp_lock->start < smb_lock->end) {
spin_unlock(&conn->llist_lock);
- read_unlock(&conn_list_lock);
+ up_read(&conn_list_lock);
pr_err("previous lock conflict with zero byte lock range\n");
goto out;
}
@@ -6995,7 +7001,7 @@ int smb2_lock(struct ksmbd_work *work)
smb_lock->start > cmp_lock->start &&
smb_lock->start < cmp_lock->end) {
spin_unlock(&conn->llist_lock);
- read_unlock(&conn_list_lock);
+ up_read(&conn_list_lock);
pr_err("current lock conflict with zero byte lock range\n");
goto out;
}
@@ -7006,14 +7012,14 @@ int smb2_lock(struct ksmbd_work *work)
cmp_lock->end >= smb_lock->end)) &&
!cmp_lock->zero_len && !smb_lock->zero_len) {
spin_unlock(&conn->llist_lock);
- read_unlock(&conn_list_lock);
+ up_read(&conn_list_lock);
pr_err("Not allow lock operation on exclusive lock range\n");
goto out;
}
}
spin_unlock(&conn->llist_lock);
}
- read_unlock(&conn_list_lock);
+ up_read(&conn_list_lock);
out_check_cl:
if (smb_lock->fl->fl_type == F_UNLCK && nolock) {
pr_err("Try to unlock nolocked range\n");
@@ -8428,14 +8434,11 @@ int smb3_check_sign_req(struct ksmbd_work *work)
if (le16_to_cpu(hdr->Command) == SMB2_SESSION_SETUP_HE) {
signing_key = work->sess->smb3signingkey;
} else {
- read_lock(&work->sess->chann_lock);
chann = lookup_chann_list(work->sess, conn);
if (!chann) {
- read_unlock(&work->sess->chann_lock);
return 0;
}
signing_key = chann->smb3signingkey;
- read_unlock(&work->sess->chann_lock);
}
if (!signing_key) {
@@ -8495,14 +8498,11 @@ void smb3_set_sign_rsp(struct ksmbd_work *work)
le16_to_cpu(hdr->Command) == SMB2_SESSION_SETUP_HE) {
signing_key = work->sess->smb3signingkey;
} else {
- read_lock(&work->sess->chann_lock);
chann = lookup_chann_list(work->sess, work->conn);
if (!chann) {
- read_unlock(&work->sess->chann_lock);
return;
}
signing_key = chann->smb3signingkey;
- read_unlock(&work->sess->chann_lock);
}
if (!signing_key)
diff --git a/fs/ksmbd/smb2pdu.h b/fs/ksmbd/smb2pdu.h
index f4baa9800f6e..dd10f8031606 100644
--- a/fs/ksmbd/smb2pdu.h
+++ b/fs/ksmbd/smb2pdu.h
@@ -61,6 +61,8 @@ struct preauth_integrity_info {
#define SMB2_SESSION_IN_PROGRESS BIT(0)
#define SMB2_SESSION_VALID BIT(1)
+#define SMB2_SESSION_TIMEOUT (10 * HZ)
+
struct create_durable_req_v2 {
struct create_context ccontext;
__u8 Name[8];
diff --git a/fs/ksmbd/transport_tcp.c b/fs/ksmbd/transport_tcp.c
index 20e85e2701f2..eff7a1d793f0 100644
--- a/fs/ksmbd/transport_tcp.c
+++ b/fs/ksmbd/transport_tcp.c
@@ -333,7 +333,7 @@ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig,
if (length == -EINTR) {
total_read = -ESHUTDOWN;
break;
- } else if (conn->status == KSMBD_SESS_NEED_RECONNECT) {
+ } else if (ksmbd_conn_need_reconnect(conn)) {
total_read = -EAGAIN;
break;
} else if (length == -ERESTARTSYS || length == -EAGAIN) {
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 49cfe2ae6d23..993375f0db67 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -65,7 +65,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask,
struct fsnotify_event *fsn_event;
struct fsnotify_group *group = inode_mark->group;
int ret;
- int len = 0;
+ int len = 0, wd;
int alloc_len = sizeof(struct inotify_event_info);
struct mem_cgroup *old_memcg;
@@ -81,6 +81,13 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask,
fsn_mark);
/*
+ * We can be racing with mark being detached. Don't report event with
+ * invalid wd.
+ */
+ wd = READ_ONCE(i_mark->wd);
+ if (wd == -1)
+ return 0;
+ /*
* Whoever is interested in the event, pays for the allocation. Do not
* trigger OOM killer in the target monitoring memcg as it may have
* security repercussion.
@@ -110,7 +117,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask,
fsn_event = &event->fse;
fsnotify_init_event(fsn_event);
event->mask = mask;
- event->wd = i_mark->wd;
+ event->wd = wd;
event->sync_cookie = cookie;
event->name_len = len;
if (len)
diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c
index 45f95c1cb258..e0cdc91d88a8 100644
--- a/fs/ntfs3/bitmap.c
+++ b/fs/ntfs3/bitmap.c
@@ -661,7 +661,8 @@ int wnd_init(struct wnd_bitmap *wnd, struct super_block *sb, size_t nbits)
if (!wnd->bits_last)
wnd->bits_last = wbits;
- wnd->free_bits = kcalloc(wnd->nwnd, sizeof(u16), GFP_NOFS | __GFP_NOWARN);
+ wnd->free_bits =
+ kcalloc(wnd->nwnd, sizeof(u16), GFP_NOFS | __GFP_NOWARN);
if (!wnd->free_bits)
return -ENOMEM;
diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c
index bc22cc321a74..a9549e73081f 100644
--- a/fs/ntfs3/namei.c
+++ b/fs/ntfs3/namei.c
@@ -86,6 +86,16 @@ static struct dentry *ntfs_lookup(struct inode *dir, struct dentry *dentry,
__putname(uni);
}
+ /*
+ * Check for a null pointer
+ * If the MFT record of ntfs inode is not a base record, inode->i_op can be NULL.
+ * This causes null pointer dereference in d_splice_alias().
+ */
+ if (!IS_ERR_OR_NULL(inode) && !inode->i_op) {
+ iput(inode);
+ inode = ERR_PTR(-EINVAL);
+ }
+
return d_splice_alias(inode, dentry);
}
diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h
index 9cc396b117bf..0f38d558169a 100644
--- a/fs/ntfs3/ntfs.h
+++ b/fs/ntfs3/ntfs.h
@@ -436,9 +436,6 @@ static inline u64 attr_svcn(const struct ATTRIB *attr)
return attr->non_res ? le64_to_cpu(attr->nres.svcn) : 0;
}
-/* The size of resident attribute by its resident size. */
-#define BYTES_PER_RESIDENT(b) (0x18 + (b))
-
static_assert(sizeof(struct ATTRIB) == 0x48);
static_assert(sizeof(((struct ATTRIB *)NULL)->res) == 0x08);
static_assert(sizeof(((struct ATTRIB *)NULL)->nres) == 0x38);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 48f2d60bd78a..72f2b373221e 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -1281,7 +1281,10 @@ out:
* __register_sysctl_table - register a leaf sysctl table
* @set: Sysctl tree to register on
* @path: The path to the directory the sysctl table is in.
- * @table: the top-level table structure
+ * @table: the top-level table structure without any child. This table
+ * should not be free'd after registration. So it should not be
+ * used on stack. It can either be a global or dynamically allocated
+ * by the caller and free'd later after sysctl unregistration.
*
* Register a sysctl table hierarchy. @table should be a filled in ctl_table
* array. A completely 0 filled entry terminates the table.
@@ -1302,9 +1305,12 @@ out:
* proc_handler - the text handler routine (described below)
*
* extra1, extra2 - extra pointers usable by the proc handler routines
+ * XXX: we should eventually modify these to use long min / max [0]
+ * [0] https://lkml.kernel.org/87zgpte9o4.fsf@email.froward.int.ebiederm.org
*
* Leaf nodes in the sysctl tree will be represented by a single file
- * under /proc; non-leaf nodes will be represented by directories.
+ * under /proc; non-leaf nodes (where child is not NULL) are not allowed,
+ * sysctl_check_table() verifies this.
*
* There must be a proc_handler routine for any terminal nodes.
* Several default handlers are available to cover common cases -
@@ -1346,7 +1352,7 @@ struct ctl_table_header *__register_sysctl_table(
spin_lock(&sysctl_lock);
dir = &set->dir;
- /* Reference moved down the diretory tree get_subdir */
+ /* Reference moved down the directory tree get_subdir */
dir->header.nreg++;
spin_unlock(&sysctl_lock);
@@ -1363,6 +1369,11 @@ struct ctl_table_header *__register_sysctl_table(
if (namelen == 0)
continue;
+ /*
+ * namelen ensures if name is "foo/bar/yay" only foo is
+ * registered first. We traverse as if using mkdir -p and
+ * return a ctl_dir for the last directory entry.
+ */
dir = get_subdir(dir, name, namelen);
if (IS_ERR(dir))
goto fail;
@@ -1388,8 +1399,15 @@ fail:
/**
* register_sysctl - register a sysctl table
- * @path: The path to the directory the sysctl table is in.
- * @table: the table structure
+ * @path: The path to the directory the sysctl table is in. If the path
+ * doesn't exist we will create it for you.
+ * @table: the table structure. The calller must ensure the life of the @table
+ * will be kept during the lifetime use of the syctl. It must not be freed
+ * until unregister_sysctl_table() is called with the given returned table
+ * with this registration. If your code is non modular then you don't need
+ * to call unregister_sysctl_table() and can instead use something like
+ * register_sysctl_init() which does not care for the result of the syctl
+ * registration.
*
* Register a sysctl table. @table should be a filled in ctl_table
* array. A completely 0 filled entry terminates the table.
@@ -1405,8 +1423,11 @@ EXPORT_SYMBOL(register_sysctl);
/**
* __register_sysctl_init() - register sysctl table to path
- * @path: path name for sysctl base
- * @table: This is the sysctl table that needs to be registered to the path
+ * @path: path name for sysctl base. If that path doesn't exist we will create
+ * it for you.
+ * @table: This is the sysctl table that needs to be registered to the path.
+ * The caller must ensure the life of the @table will be kept during the
+ * lifetime use of the sysctl.
* @table_name: The name of sysctl table, only used for log printing when
* registration fails
*
@@ -1418,10 +1439,7 @@ EXPORT_SYMBOL(register_sysctl);
* register_sysctl() failing on init are extremely low, and so for both reasons
* this function does not return any error as it is used by initialization code.
*
- * Context: Can only be called after your respective sysctl base path has been
- * registered. So for instance, most base directories are registered early on
- * init before init levels are processed through proc_sys_init() and
- * sysctl_init_bases().
+ * Context: if your base directory does not exist it will be created for you.
*/
void __init __register_sysctl_init(const char *path, struct ctl_table *table,
const char *table_name)
@@ -1551,6 +1569,7 @@ out:
*
* Register a sysctl table hierarchy. @table should be a filled in ctl_table
* array. A completely 0 filled entry terminates the table.
+ * We are slowly deprecating this call so avoid its use.
*
* See __register_sysctl_table for more details.
*/
@@ -1622,6 +1641,7 @@ err_register_leaves:
*
* Register a sysctl table hierarchy. @table should be a filled in ctl_table
* array. A completely 0 filled entry terminates the table.
+ * We are slowly deprecating this caller so avoid future uses of it.
*
* See __register_sysctl_paths for more details.
*/