summaryrefslogtreecommitdiff
path: root/mm/truncate.c
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2016-05-12 18:29:18 +0200
committerRoss Zwisler <ross.zwisler@linux.intel.com>2016-05-19 15:20:54 -0600
commitac401cc782429cc8560ce4840b1405d603740917 (patch)
tree44deea39b147b4f2e75286943e2ec1c838e7a2fa /mm/truncate.c
parent4f622938a5e2b7f1374ffb1e5fc212744898f513 (diff)
downloadlinux-next-ac401cc782429cc8560ce4840b1405d603740917.tar.gz
dax: New fault locking
Currently DAX page fault locking is racy. CPU0 (write fault) CPU1 (read fault) __dax_fault() __dax_fault() get_block(inode, block, &bh, 0) -> not mapped get_block(inode, block, &bh, 0) -> not mapped if (!buffer_mapped(&bh)) if (vmf->flags & FAULT_FLAG_WRITE) get_block(inode, block, &bh, 1) -> allocates blocks if (page) -> no if (!buffer_mapped(&bh)) if (vmf->flags & FAULT_FLAG_WRITE) { } else { dax_load_hole(); } dax_insert_mapping() And we are in a situation where we fail in dax_radix_entry() with -EIO. Another problem with the current DAX page fault locking is that there is no race-free way to clear dirty tag in the radix tree. We can always end up with clean radix tree and dirty data in CPU cache. We fix the first problem by introducing locking of exceptional radix tree entries in DAX mappings acting very similarly to page lock and thus synchronizing properly faults against the same mapping index. The same lock can later be used to avoid races when clearing radix tree dirty tag. Reviewed-by: NeilBrown <neilb@suse.com> Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com> Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Diffstat (limited to 'mm/truncate.c')
-rw-r--r--mm/truncate.c62
1 files changed, 30 insertions, 32 deletions
diff --git a/mm/truncate.c b/mm/truncate.c
index b00272810871..4064f8f53daa 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -34,40 +34,38 @@ static void clear_exceptional_entry(struct address_space *mapping,
if (shmem_mapping(mapping))
return;
- spin_lock_irq(&mapping->tree_lock);
-
if (dax_mapping(mapping)) {
- if (radix_tree_delete_item(&mapping->page_tree, index, entry))
- mapping->nrexceptional--;
- } else {
- /*
- * Regular page slots are stabilized by the page lock even
- * without the tree itself locked. These unlocked entries
- * need verification under the tree lock.
- */
- if (!__radix_tree_lookup(&mapping->page_tree, index, &node,
- &slot))
- goto unlock;
- if (*slot != entry)
- goto unlock;
- radix_tree_replace_slot(slot, NULL);
- mapping->nrexceptional--;
- if (!node)
- goto unlock;
- workingset_node_shadows_dec(node);
- /*
- * Don't track node without shadow entries.
- *
- * Avoid acquiring the list_lru lock if already untracked.
- * The list_empty() test is safe as node->private_list is
- * protected by mapping->tree_lock.
- */
- if (!workingset_node_shadows(node) &&
- !list_empty(&node->private_list))
- list_lru_del(&workingset_shadow_nodes,
- &node->private_list);
- __radix_tree_delete_node(&mapping->page_tree, node);
+ dax_delete_mapping_entry(mapping, index);
+ return;
}
+ spin_lock_irq(&mapping->tree_lock);
+ /*
+ * Regular page slots are stabilized by the page lock even
+ * without the tree itself locked. These unlocked entries
+ * need verification under the tree lock.
+ */
+ if (!__radix_tree_lookup(&mapping->page_tree, index, &node,
+ &slot))
+ goto unlock;
+ if (*slot != entry)
+ goto unlock;
+ radix_tree_replace_slot(slot, NULL);
+ mapping->nrexceptional--;
+ if (!node)
+ goto unlock;
+ workingset_node_shadows_dec(node);
+ /*
+ * Don't track node without shadow entries.
+ *
+ * Avoid acquiring the list_lru lock if already untracked.
+ * The list_empty() test is safe as node->private_list is
+ * protected by mapping->tree_lock.
+ */
+ if (!workingset_node_shadows(node) &&
+ !list_empty(&node->private_list))
+ list_lru_del(&workingset_shadow_nodes,
+ &node->private_list);
+ __radix_tree_delete_node(&mapping->page_tree, node);
unlock:
spin_unlock_irq(&mapping->tree_lock);
}