summaryrefslogtreecommitdiff
path: root/kernel/cgroup.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r--kernel/cgroup.c475
1 files changed, 231 insertions, 244 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 468e410f9e61..ae4c46834633 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -215,6 +215,8 @@ static u64 cgroup_serial_nr_next = 1;
*/
static int need_forkexit_callback __read_mostly;
+static struct cftype cgroup_base_files[];
+
static void cgroup_offline_fn(struct work_struct *work);
static int cgroup_destroy_locked(struct cgroup *cgrp);
static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys,
@@ -464,7 +466,7 @@ static inline void put_css_set_taskexit(struct css_set *cset)
* @new_cgrp: cgroup that's being entered by the task
* @template: desired set of css pointers in css_set (pre-calculated)
*
- * Returns true if "cg" matches "old_cg" except for the hierarchy
+ * Returns true if "cset" matches "old_cset" except for the hierarchy
* which "new_cgrp" belongs to, for which it should match "new_cgrp".
*/
static bool compare_css_sets(struct css_set *cset,
@@ -804,8 +806,7 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task,
static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned int);
static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
-static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files,
- unsigned long subsys_mask);
+static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask);
static const struct inode_operations cgroup_dir_inode_operations;
static const struct file_operations proc_cgroupstats_operations;
@@ -865,8 +866,6 @@ static void cgroup_free_fn(struct work_struct *work)
*/
dput(cgrp->parent->dentry);
- ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id);
-
/*
* Drop the active superblock reference that we took when we
* created the cgroup. This will free cgrp->root, if we are
@@ -957,28 +956,23 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
}
/**
- * cgroup_clear_directory - selective removal of base and subsystem files
- * @dir: directory containing the files
- * @base_files: true if the base files should be removed
+ * cgroup_clear_dir - remove subsys files in a cgroup directory
+ * @cgrp: target cgroup
* @subsys_mask: mask of the subsystem ids whose files should be removed
*/
-static void cgroup_clear_directory(struct dentry *dir, bool base_files,
- unsigned long subsys_mask)
+static void cgroup_clear_dir(struct cgroup *cgrp, unsigned long subsys_mask)
{
- struct cgroup *cgrp = __d_cgrp(dir);
struct cgroup_subsys *ss;
+ int i;
- for_each_root_subsys(cgrp->root, ss) {
+ for_each_subsys(ss, i) {
struct cftype_set *set;
- if (!test_bit(ss->subsys_id, &subsys_mask))
+
+ if (!test_bit(i, &subsys_mask))
continue;
list_for_each_entry(set, &ss->cftsets, node)
cgroup_addrm_files(cgrp, NULL, set->cfts, false);
}
- if (base_files) {
- while (!list_empty(&cgrp->files))
- cgroup_rm_file(cgrp, NULL);
- }
}
/*
@@ -987,9 +981,6 @@ static void cgroup_clear_directory(struct dentry *dir, bool base_files,
static void cgroup_d_remove_dir(struct dentry *dentry)
{
struct dentry *parent;
- struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
-
- cgroup_clear_directory(dentry, true, root->subsys_mask);
parent = dentry->d_parent;
spin_lock(&parent->d_lock);
@@ -1010,32 +1001,47 @@ static int rebind_subsystems(struct cgroupfs_root *root,
{
struct cgroup *cgrp = &root->top_cgroup;
struct cgroup_subsys *ss;
- int i;
+ unsigned long pinned = 0;
+ int i, ret;
BUG_ON(!mutex_is_locked(&cgroup_mutex));
BUG_ON(!mutex_is_locked(&cgroup_root_mutex));
/* Check that any added subsystems are currently free */
for_each_subsys(ss, i) {
- unsigned long bit = 1UL << i;
-
- if (!(bit & added_mask))
+ if (!(added_mask & (1 << i)))
continue;
+ /* is the subsystem mounted elsewhere? */
if (ss->root != &cgroup_dummy_root) {
- /* Subsystem isn't free */
- return -EBUSY;
+ ret = -EBUSY;
+ goto out_put;
+ }
+
+ /* pin the module */
+ if (!try_module_get(ss->module)) {
+ ret = -ENOENT;
+ goto out_put;
}
+ pinned |= 1 << i;
}
- /* Currently we don't handle adding/removing subsystems when
- * any child cgroups exist. This is theoretically supportable
- * but involves complex error handling, so it's being left until
- * later */
- if (root->number_of_cgroups > 1)
- return -EBUSY;
+ /* subsys could be missing if unloaded between parsing and here */
+ if (added_mask != pinned) {
+ ret = -ENOENT;
+ goto out_put;
+ }
+
+ ret = cgroup_populate_dir(cgrp, added_mask);
+ if (ret)
+ goto out_put;
+
+ /*
+ * Nothing can fail from this point on. Remove files for the
+ * removed subsystems and rebind each subsystem.
+ */
+ cgroup_clear_dir(cgrp, removed_mask);
- /* Process each subsystem */
for_each_subsys(ss, i) {
unsigned long bit = 1UL << i;
@@ -1069,20 +1075,6 @@ static int rebind_subsystems(struct cgroupfs_root *root,
/* subsystem is now free - drop reference on module */
module_put(ss->module);
root->subsys_mask &= ~bit;
- } else if (bit & root->subsys_mask) {
- /* Subsystem state should already exist */
- BUG_ON(!cgrp->subsys[i]);
- /*
- * a refcount was taken, but we already had one, so
- * drop the extra reference.
- */
- module_put(ss->module);
-#ifdef CONFIG_MODULE_UNLOAD
- BUG_ON(ss->module && !module_refcount(ss->module));
-#endif
- } else {
- /* Subsystem state shouldn't exist */
- BUG_ON(cgrp->subsys[i]);
}
}
@@ -1093,6 +1085,12 @@ static int rebind_subsystems(struct cgroupfs_root *root,
root->flags |= CGRP_ROOT_SUBSYS_BOUND;
return 0;
+
+out_put:
+ for_each_subsys(ss, i)
+ if (pinned & (1 << i))
+ module_put(ss->module);
+ return ret;
}
static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
@@ -1143,7 +1141,6 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
char *token, *o = data;
bool all_ss = false, one_ss = false;
unsigned long mask = (unsigned long)-1;
- bool module_pin_failed = false;
struct cgroup_subsys *ss;
int i;
@@ -1286,52 +1283,9 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
if (!opts->subsys_mask && !opts->name)
return -EINVAL;
- /*
- * Grab references on all the modules we'll need, so the subsystems
- * don't dance around before rebind_subsystems attaches them. This may
- * take duplicate reference counts on a subsystem that's already used,
- * but rebind_subsystems handles this case.
- */
- for_each_subsys(ss, i) {
- if (!(opts->subsys_mask & (1UL << i)))
- continue;
- if (!try_module_get(cgroup_subsys[i]->module)) {
- module_pin_failed = true;
- break;
- }
- }
- if (module_pin_failed) {
- /*
- * oops, one of the modules was going away. this means that we
- * raced with a module_delete call, and to the user this is
- * essentially a "subsystem doesn't exist" case.
- */
- for (i--; i >= 0; i--) {
- /* drop refcounts only on the ones we took */
- unsigned long bit = 1UL << i;
-
- if (!(bit & opts->subsys_mask))
- continue;
- module_put(cgroup_subsys[i]->module);
- }
- return -ENOENT;
- }
-
return 0;
}
-static void drop_parsed_module_refcounts(unsigned long subsys_mask)
-{
- struct cgroup_subsys *ss;
- int i;
-
- mutex_lock(&cgroup_mutex);
- for_each_subsys(ss, i)
- if (subsys_mask & (1UL << i))
- module_put(cgroup_subsys[i]->module);
- mutex_unlock(&cgroup_mutex);
-}
-
static int cgroup_remount(struct super_block *sb, int *flags, char *data)
{
int ret = 0;
@@ -1371,22 +1325,15 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
goto out_unlock;
}
- /*
- * Clear out the files of subsystems that should be removed, do
- * this before rebind_subsystems, since rebind_subsystems may
- * change this hierarchy's subsys_list.
- */
- cgroup_clear_directory(cgrp->dentry, false, removed_mask);
-
- ret = rebind_subsystems(root, added_mask, removed_mask);
- if (ret) {
- /* rebind_subsystems failed, re-populate the removed files */
- cgroup_populate_dir(cgrp, false, removed_mask);
+ /* remounting is not allowed for populated hierarchies */
+ if (root->number_of_cgroups > 1) {
+ ret = -EBUSY;
goto out_unlock;
}
- /* re-populate subsystem files */
- cgroup_populate_dir(cgrp, false, added_mask);
+ ret = rebind_subsystems(root, added_mask, removed_mask);
+ if (ret)
+ goto out_unlock;
if (opts.release_agent)
strcpy(root->release_agent_path, opts.release_agent);
@@ -1396,8 +1343,6 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
mutex_unlock(&cgroup_root_mutex);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
- if (ret)
- drop_parsed_module_refcounts(opts.subsys_mask);
return ret;
}
@@ -1432,6 +1377,7 @@ static void init_cgroup_root(struct cgroupfs_root *root)
cgrp->root = root;
RCU_INIT_POINTER(cgrp->name, &root_cgroup_name);
init_cgroup_housekeeping(cgrp);
+ idr_init(&root->cgroup_idr);
}
static int cgroup_init_root_id(struct cgroupfs_root *root, int start, int end)
@@ -1504,7 +1450,6 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
*/
root->subsys_mask = opts->subsys_mask;
root->flags = opts->flags;
- ida_init(&root->cgroup_ida);
if (opts->release_agent)
strcpy(root->release_agent_path, opts->release_agent);
if (opts->name)
@@ -1520,7 +1465,7 @@ static void cgroup_free_root(struct cgroupfs_root *root)
/* hierarhcy ID shoulid already have been released */
WARN_ON_ONCE(root->hierarchy_id);
- ida_destroy(&root->cgroup_ida);
+ idr_destroy(&root->cgroup_idr);
kfree(root);
}
}
@@ -1585,7 +1530,9 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
int ret = 0;
struct super_block *sb;
struct cgroupfs_root *new_root;
+ struct list_head tmp_links;
struct inode *inode;
+ const struct cred *cred;
/* First find the desired set of subsystems */
mutex_lock(&cgroup_mutex);
@@ -1601,7 +1548,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
new_root = cgroup_root_from_opts(&opts);
if (IS_ERR(new_root)) {
ret = PTR_ERR(new_root);
- goto drop_modules;
+ goto out_err;
}
opts.new_root = new_root;
@@ -1610,17 +1557,15 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
if (IS_ERR(sb)) {
ret = PTR_ERR(sb);
cgroup_free_root(opts.new_root);
- goto drop_modules;
+ goto out_err;
}
root = sb->s_fs_info;
BUG_ON(!root);
if (root == opts.new_root) {
/* We used the new root structure, so this is a new hierarchy */
- struct list_head tmp_links;
struct cgroup *root_cgrp = &root->top_cgroup;
struct cgroupfs_root *existing_root;
- const struct cred *cred;
int i;
struct css_set *cset;
@@ -1635,6 +1580,11 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
mutex_lock(&cgroup_mutex);
mutex_lock(&cgroup_root_mutex);
+ root_cgrp->id = idr_alloc(&root->cgroup_idr, root_cgrp,
+ 0, 1, GFP_KERNEL);
+ if (root_cgrp->id < 0)
+ goto unlock_drop;
+
/* Check for name clashes with existing mounts */
ret = -EBUSY;
if (strlen(root->name))
@@ -1658,26 +1608,37 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
if (ret)
goto unlock_drop;
+ sb->s_root->d_fsdata = root_cgrp;
+ root_cgrp->dentry = sb->s_root;
+
+ /*
+ * We're inside get_sb() and will call lookup_one_len() to
+ * create the root files, which doesn't work if SELinux is
+ * in use. The following cred dancing somehow works around
+ * it. See 2ce9738ba ("cgroupfs: use init_cred when
+ * populating new cgroupfs mount") for more details.
+ */
+ cred = override_creds(&init_cred);
+
+ ret = cgroup_addrm_files(root_cgrp, NULL, cgroup_base_files, true);
+ if (ret)
+ goto rm_base_files;
+
ret = rebind_subsystems(root, root->subsys_mask, 0);
- if (ret == -EBUSY) {
- free_cgrp_cset_links(&tmp_links);
- goto unlock_drop;
- }
+ if (ret)
+ goto rm_base_files;
+
+ revert_creds(cred);
+
/*
* There must be no failure case after here, since rebinding
* takes care of subsystems' refcounts, which are explicitly
* dropped in the failure exit path.
*/
- /* EBUSY should be the only error here */
- BUG_ON(ret);
-
list_add(&root->root_list, &cgroup_roots);
cgroup_root_count++;
- sb->s_root->d_fsdata = root_cgrp;
- root->top_cgroup.dentry = sb->s_root;
-
/* Link the top cgroup in this hierarchy into all
* the css_set objects */
write_lock(&css_set_lock);
@@ -1690,9 +1651,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
BUG_ON(!list_empty(&root_cgrp->children));
BUG_ON(root->number_of_cgroups != 1);
- cred = override_creds(&init_cred);
- cgroup_populate_dir(root_cgrp, true, root->subsys_mask);
- revert_creds(cred);
mutex_unlock(&cgroup_root_mutex);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&inode->i_mutex);
@@ -1712,15 +1670,16 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
pr_warning("cgroup: new mount options do not match the existing superblock, will be ignored\n");
}
}
-
- /* no subsys rebinding, so refcounts don't change */
- drop_parsed_module_refcounts(opts.subsys_mask);
}
kfree(opts.release_agent);
kfree(opts.name);
return dget(sb->s_root);
+ rm_base_files:
+ free_cgrp_cset_links(&tmp_links);
+ cgroup_addrm_files(&root->top_cgroup, NULL, cgroup_base_files, false);
+ revert_creds(cred);
unlock_drop:
cgroup_exit_root_id(root);
mutex_unlock(&cgroup_root_mutex);
@@ -1728,8 +1687,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
mutex_unlock(&inode->i_mutex);
drop_new_super:
deactivate_locked_super(sb);
- drop_modules:
- drop_parsed_module_refcounts(opts.subsys_mask);
out_err:
kfree(opts.release_agent);
kfree(opts.name);
@@ -1747,6 +1704,7 @@ static void cgroup_kill_sb(struct super_block *sb) {
BUG_ON(root->number_of_cgroups != 1);
BUG_ON(!list_empty(&cgrp->children));
+ mutex_lock(&cgrp->dentry->d_inode->i_mutex);
mutex_lock(&cgroup_mutex);
mutex_lock(&cgroup_root_mutex);
@@ -1779,6 +1737,7 @@ static void cgroup_kill_sb(struct super_block *sb) {
mutex_unlock(&cgroup_root_mutex);
mutex_unlock(&cgroup_mutex);
+ mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
simple_xattrs_free(&cgrp->xattrs);
@@ -1890,7 +1849,7 @@ EXPORT_SYMBOL_GPL(task_cgroup_path);
struct task_and_cgroup {
struct task_struct *task;
struct cgroup *cgrp;
- struct css_set *cg;
+ struct css_set *cset;
};
struct cgroup_taskset {
@@ -2108,8 +2067,8 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk,
tc = flex_array_get(group, i);
old_cset = task_css_set(tc->task);
- tc->cg = find_css_set(old_cset, cgrp);
- if (!tc->cg) {
+ tc->cset = find_css_set(old_cset, cgrp);
+ if (!tc->cset) {
retval = -ENOMEM;
goto out_put_css_set_refs;
}
@@ -2122,7 +2081,7 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk,
*/
for (i = 0; i < group_size; i++) {
tc = flex_array_get(group, i);
- cgroup_task_migrate(tc->cgrp, tc->task, tc->cg);
+ cgroup_task_migrate(tc->cgrp, tc->task, tc->cset);
}
/* nothing is sensitive to fork() after this point. */
@@ -2142,9 +2101,9 @@ out_put_css_set_refs:
if (retval) {
for (i = 0; i < group_size; i++) {
tc = flex_array_get(group, i);
- if (!tc->cg)
+ if (!tc->cset)
break;
- put_css_set(tc->cg);
+ put_css_set(tc->cset);
}
}
out_cancel_attach:
@@ -2254,9 +2213,9 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
mutex_lock(&cgroup_mutex);
for_each_active_root(root) {
- struct cgroup *from_cg = task_cgroup_from_root(from, root);
+ struct cgroup *from_cgrp = task_cgroup_from_root(from, root);
- retval = cgroup_attach_task(from_cg, tsk, false);
+ retval = cgroup_attach_task(from_cgrp, tsk, false);
if (retval)
break;
}
@@ -2448,11 +2407,6 @@ static ssize_t cgroup_file_read(struct file *file, char __user *buf,
* supports string->u64 maps, but can be extended in future.
*/
-struct cgroup_seqfile_state {
- struct cftype *cft;
- struct cgroup *cgroup;
-};
-
static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value)
{
struct seq_file *sf = cb->state;
@@ -2461,59 +2415,45 @@ static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value)
static int cgroup_seqfile_show(struct seq_file *m, void *arg)
{
- struct cgroup_seqfile_state *state = m->private;
- struct cftype *cft = state->cft;
+ struct cfent *cfe = m->private;
+ struct cftype *cft = cfe->type;
+ struct cgroup *cgrp = __d_cgrp(cfe->dentry->d_parent);
+
if (cft->read_map) {
struct cgroup_map_cb cb = {
.fill = cgroup_map_add,
.state = m,
};
- return cft->read_map(state->cgroup, cft, &cb);
+ return cft->read_map(cgrp, cft, &cb);
}
- return cft->read_seq_string(state->cgroup, cft, m);
-}
-
-static int cgroup_seqfile_release(struct inode *inode, struct file *file)
-{
- struct seq_file *seq = file->private_data;
- kfree(seq->private);
- return single_release(inode, file);
+ return cft->read_seq_string(cgrp, cft, m);
}
static const struct file_operations cgroup_seqfile_operations = {
.read = seq_read,
.write = cgroup_file_write,
.llseek = seq_lseek,
- .release = cgroup_seqfile_release,
+ .release = single_release,
};
static int cgroup_file_open(struct inode *inode, struct file *file)
{
int err;
+ struct cfent *cfe;
struct cftype *cft;
err = generic_file_open(inode, file);
if (err)
return err;
- cft = __d_cft(file->f_dentry);
+ cfe = __d_cfe(file->f_dentry);
+ cft = cfe->type;
if (cft->read_map || cft->read_seq_string) {
- struct cgroup_seqfile_state *state;
-
- state = kzalloc(sizeof(*state), GFP_USER);
- if (!state)
- return -ENOMEM;
-
- state->cft = cft;
- state->cgroup = __d_cgrp(file->f_dentry->d_parent);
file->f_op = &cgroup_seqfile_operations;
- err = single_open(file, cgroup_seqfile_show, state);
- if (err < 0)
- kfree(state);
- } else if (cft->open)
+ err = single_open(file, cgroup_seqfile_show, cfe);
+ } else if (cft->open) {
err = cft->open(inode, file);
- else
- err = 0;
+ }
return err;
}
@@ -2791,11 +2731,26 @@ out:
return error;
}
+/**
+ * cgroup_addrm_files - add or remove files to a cgroup directory
+ * @cgrp: the target cgroup
+ * @subsys: the subsystem of files to be added
+ * @cfts: array of cftypes to be added
+ * @is_add: whether to add or remove
+ *
+ * Depending on @is_add, add or remove files defined by @cfts on @cgrp.
+ * All @cfts should belong to @subsys. For removals, this function never
+ * fails. If addition fails, this function doesn't remove files already
+ * added. The caller is responsible for cleaning up.
+ */
static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys,
struct cftype cfts[], bool is_add)
{
struct cftype *cft;
- int err, ret = 0;
+ int ret;
+
+ lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex);
+ lockdep_assert_held(&cgroup_mutex);
for (cft = cfts; cft->name[0] != '\0'; cft++) {
/* does cft->flags tell us to skip this file on @cgrp? */
@@ -2807,16 +2762,17 @@ static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys,
continue;
if (is_add) {
- err = cgroup_add_file(cgrp, subsys, cft);
- if (err)
+ ret = cgroup_add_file(cgrp, subsys, cft);
+ if (ret) {
pr_warn("cgroup_addrm_files: failed to add %s, err=%d\n",
- cft->name, err);
- ret = err;
+ cft->name, ret);
+ return ret;
+ }
} else {
cgroup_rm_file(cgrp, cft);
}
}
- return ret;
+ return 0;
}
static void cgroup_cfts_prepare(void)
@@ -2831,8 +2787,8 @@ static void cgroup_cfts_prepare(void)
mutex_lock(&cgroup_mutex);
}
-static void cgroup_cfts_commit(struct cgroup_subsys *ss,
- struct cftype *cfts, bool is_add)
+static int cgroup_cfts_commit(struct cgroup_subsys *ss,
+ struct cftype *cfts, bool is_add)
__releases(&cgroup_mutex)
{
LIST_HEAD(pending);
@@ -2841,12 +2797,13 @@ static void cgroup_cfts_commit(struct cgroup_subsys *ss,
struct dentry *prev = NULL;
struct inode *inode;
u64 update_before;
+ int ret = 0;
/* %NULL @cfts indicates abort and don't bother if @ss isn't attached */
if (!cfts || ss->root == &cgroup_dummy_root ||
!atomic_inc_not_zero(&sb->s_active)) {
mutex_unlock(&cgroup_mutex);
- return;
+ return 0;
}
/*
@@ -2862,10 +2819,13 @@ static void cgroup_cfts_commit(struct cgroup_subsys *ss,
inode = root->dentry->d_inode;
mutex_lock(&inode->i_mutex);
mutex_lock(&cgroup_mutex);
- cgroup_addrm_files(root, ss, cfts, is_add);
+ ret = cgroup_addrm_files(root, ss, cfts, is_add);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&inode->i_mutex);
+ if (ret)
+ goto out_deact;
+
/* add/rm files for all cgroups created before */
rcu_read_lock();
cgroup_for_each_descendant_pre(cgrp, root) {
@@ -2882,15 +2842,19 @@ static void cgroup_cfts_commit(struct cgroup_subsys *ss,
mutex_lock(&inode->i_mutex);
mutex_lock(&cgroup_mutex);
if (cgrp->serial_nr < update_before && !cgroup_is_dead(cgrp))
- cgroup_addrm_files(cgrp, ss, cfts, is_add);
+ ret = cgroup_addrm_files(cgrp, ss, cfts, is_add);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&inode->i_mutex);
rcu_read_lock();
+ if (ret)
+ break;
}
rcu_read_unlock();
dput(prev);
+out_deact:
deactivate_super(sb);
+ return ret;
}
/**
@@ -2910,6 +2874,7 @@ static void cgroup_cfts_commit(struct cgroup_subsys *ss,
int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
{
struct cftype_set *set;
+ int ret;
set = kzalloc(sizeof(*set), GFP_KERNEL);
if (!set)
@@ -2918,9 +2883,10 @@ int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
cgroup_cfts_prepare();
set->cfts = cfts;
list_add_tail(&set->node, &ss->cftsets);
- cgroup_cfts_commit(ss, cfts, true);
-
- return 0;
+ ret = cgroup_cfts_commit(ss, cfts, true);
+ if (ret)
+ cgroup_rm_cftypes(ss, cfts);
+ return ret;
}
EXPORT_SYMBOL_GPL(cgroup_add_cftypes);
@@ -3349,8 +3315,8 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
* guarantees forward progress and that we don't miss any tasks.
*/
heap->size = 0;
- cgroup_iter_start(scan->cg, &it);
- while ((p = cgroup_iter_next(scan->cg, &it))) {
+ cgroup_iter_start(scan->cgrp, &it);
+ while ((p = cgroup_iter_next(scan->cgrp, &it))) {
/*
* Only affect tasks that qualify per the caller's callback,
* if he provided one
@@ -3383,7 +3349,7 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
* the heap and wasn't inserted
*/
}
- cgroup_iter_end(scan->cg, &it);
+ cgroup_iter_end(scan->cgrp, &it);
if (heap->size) {
for (i = 0; i < heap->size; i++) {
@@ -3429,7 +3395,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
{
struct cgroup_scanner scan;
- scan.cg = from;
+ scan.cgrp = from;
scan.test_task = NULL; /* select all tasks in cgroup */
scan.process_task = cgroup_transfer_one_task;
scan.heap = NULL;
@@ -3477,7 +3443,7 @@ struct cgroup_pidlist {
/* pointer to the cgroup we belong to, for list removal purposes */
struct cgroup *owner;
/* protects the other fields */
- struct rw_semaphore mutex;
+ struct rw_semaphore rwsem;
};
/*
@@ -3550,7 +3516,7 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
struct pid_namespace *ns = task_active_pid_ns(current);
/*
- * We can't drop the pidlist_mutex before taking the l->mutex in case
+ * We can't drop the pidlist_mutex before taking the l->rwsem in case
* the last ref-holder is trying to remove l from the list at the same
* time. Holding the pidlist_mutex precludes somebody taking whichever
* list we find out from under us - compare release_pid_array().
@@ -3559,7 +3525,7 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
list_for_each_entry(l, &cgrp->pidlists, links) {
if (l->key.type == type && l->key.ns == ns) {
/* make sure l doesn't vanish out from under us */
- down_write(&l->mutex);
+ down_write(&l->rwsem);
mutex_unlock(&cgrp->pidlist_mutex);
return l;
}
@@ -3570,8 +3536,8 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
mutex_unlock(&cgrp->pidlist_mutex);
return l;
}
- init_rwsem(&l->mutex);
- down_write(&l->mutex);
+ init_rwsem(&l->rwsem);
+ down_write(&l->rwsem);
l->key.type = type;
l->key.ns = get_pid_ns(ns);
l->owner = cgrp;
@@ -3632,7 +3598,7 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
l->list = array;
l->length = length;
l->use_count++;
- up_write(&l->mutex);
+ up_write(&l->rwsem);
*lp = l;
return 0;
}
@@ -3710,7 +3676,7 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
int index = 0, pid = *pos;
int *iter;
- down_read(&l->mutex);
+ down_read(&l->rwsem);
if (pid) {
int end = l->length;
@@ -3737,7 +3703,7 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
static void cgroup_pidlist_stop(struct seq_file *s, void *v)
{
struct cgroup_pidlist *l = s->private;
- up_read(&l->mutex);
+ up_read(&l->rwsem);
}
static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
@@ -3783,7 +3749,7 @@ static void cgroup_release_pid_array(struct cgroup_pidlist *l)
* pidlist_mutex, we have to take pidlist_mutex first.
*/
mutex_lock(&l->owner->pidlist_mutex);
- down_write(&l->mutex);
+ down_write(&l->rwsem);
BUG_ON(!l->use_count);
if (!--l->use_count) {
/* we're the last user if refcount is 0; remove and free */
@@ -3791,12 +3757,12 @@ static void cgroup_release_pid_array(struct cgroup_pidlist *l)
mutex_unlock(&l->owner->pidlist_mutex);
pidlist_free(l->list);
put_pid_ns(l->key.ns);
- up_write(&l->mutex);
+ up_write(&l->rwsem);
kfree(l);
return;
}
mutex_unlock(&l->owner->pidlist_mutex);
- up_write(&l->mutex);
+ up_write(&l->rwsem);
}
static int cgroup_pidlist_release(struct inode *inode, struct file *file)
@@ -3975,11 +3941,11 @@ static void cgroup_event_ptable_queue_proc(struct file *file,
static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
const char *buffer)
{
- struct cgroup_event *event = NULL;
+ struct cgroup_event *event;
struct cgroup *cgrp_cfile;
unsigned int efd, cfd;
- struct file *efile = NULL;
- struct file *cfile = NULL;
+ struct file *efile;
+ struct file *cfile;
char *endp;
int ret;
@@ -4005,31 +3971,31 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
efile = eventfd_fget(efd);
if (IS_ERR(efile)) {
ret = PTR_ERR(efile);
- goto fail;
+ goto out_kfree;
}
event->eventfd = eventfd_ctx_fileget(efile);
if (IS_ERR(event->eventfd)) {
ret = PTR_ERR(event->eventfd);
- goto fail;
+ goto out_put_efile;
}
cfile = fget(cfd);
if (!cfile) {
ret = -EBADF;
- goto fail;
+ goto out_put_eventfd;
}
/* the process need read permission on control file */
/* AV: shouldn't we check that it's been opened for read instead? */
ret = inode_permission(file_inode(cfile), MAY_READ);
if (ret < 0)
- goto fail;
+ goto out_put_cfile;
event->cft = __file_cft(cfile);
if (IS_ERR(event->cft)) {
ret = PTR_ERR(event->cft);
- goto fail;
+ goto out_put_cfile;
}
/*
@@ -4039,18 +4005,18 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
cgrp_cfile = __d_cgrp(cfile->f_dentry->d_parent);
if (cgrp_cfile != cgrp) {
ret = -EINVAL;
- goto fail;
+ goto out_put_cfile;
}
if (!event->cft->register_event || !event->cft->unregister_event) {
ret = -EINVAL;
- goto fail;
+ goto out_put_cfile;
}
ret = event->cft->register_event(cgrp, event->cft,
event->eventfd, buffer);
if (ret)
- goto fail;
+ goto out_put_cfile;
efile->f_op->poll(efile, &event->pt);
@@ -4070,16 +4036,13 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
return 0;
-fail:
- if (cfile)
- fput(cfile);
-
- if (event && event->eventfd && !IS_ERR(event->eventfd))
- eventfd_ctx_put(event->eventfd);
-
- if (!IS_ERR_OR_NULL(efile))
- fput(efile);
-
+out_put_cfile:
+ fput(cfile);
+out_put_eventfd:
+ eventfd_ctx_put(event->eventfd);
+out_put_efile:
+ fput(efile);
+out_kfree:
kfree(event);
return ret;
@@ -4157,31 +4120,29 @@ static struct cftype cgroup_base_files[] = {
};
/**
- * cgroup_populate_dir - selectively creation of files in a directory
+ * cgroup_populate_dir - create subsys files in a cgroup directory
* @cgrp: target cgroup
- * @base_files: true if the base files should be added
* @subsys_mask: mask of the subsystem ids whose files should be added
+ *
+ * On failure, no file is added.
*/
-static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files,
- unsigned long subsys_mask)
+static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask)
{
- int err;
struct cgroup_subsys *ss;
-
- if (base_files) {
- err = cgroup_addrm_files(cgrp, NULL, cgroup_base_files, true);
- if (err < 0)
- return err;
- }
+ int i, ret = 0;
/* process cftsets of each subsystem */
- for_each_root_subsys(cgrp->root, ss) {
+ for_each_subsys(ss, i) {
struct cftype_set *set;
- if (!test_bit(ss->subsys_id, &subsys_mask))
+
+ if (!test_bit(i, &subsys_mask))
continue;
- list_for_each_entry(set, &ss->cftsets, node)
- cgroup_addrm_files(cgrp, ss, set->cfts, true);
+ list_for_each_entry(set, &ss->cftsets, node) {
+ ret = cgroup_addrm_files(cgrp, ss, set->cfts, true);
+ if (ret < 0)
+ goto err;
+ }
}
/* This cgroup is ready now */
@@ -4199,6 +4160,9 @@ static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files,
}
return 0;
+err:
+ cgroup_clear_dir(cgrp, subsys_mask);
+ return ret;
}
static void css_dput_fn(struct work_struct *work)
@@ -4238,7 +4202,7 @@ static void init_cgroup_css(struct cgroup_subsys_state *css,
INIT_WORK(&css->dput_work, css_dput_fn);
}
-/* invoke ->post_create() on a new CSS and mark it online if successful */
+/* invoke ->css_online() on a new CSS and mark it online if successful */
static int online_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
{
int ret = 0;
@@ -4252,9 +4216,8 @@ static int online_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
return ret;
}
-/* if the CSS is online, invoke ->pre_destory() on it and mark it offline */
+/* if the CSS is online, invoke ->css_offline() on it and mark it offline */
static void offline_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
- __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
{
struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
@@ -4297,7 +4260,11 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
goto err_free_cgrp;
rcu_assign_pointer(cgrp->name, name);
- cgrp->id = ida_simple_get(&root->cgroup_ida, 1, 0, GFP_KERNEL);
+ /*
+ * Temporarily set the pointer to NULL, so idr_find() won't return
+ * a half-baked cgroup.
+ */
+ cgrp->id = idr_alloc(&root->cgroup_idr, NULL, 1, 0, GFP_KERNEL);
if (cgrp->id < 0)
goto err_free_name;
@@ -4397,7 +4364,13 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
}
}
- err = cgroup_populate_dir(cgrp, true, root->subsys_mask);
+ idr_replace(&root->cgroup_idr, cgrp, cgrp->id);
+
+ err = cgroup_addrm_files(cgrp, NULL, cgroup_base_files, true);
+ if (err)
+ goto err_destroy;
+
+ err = cgroup_populate_dir(cgrp, root->subsys_mask);
if (err)
goto err_destroy;
@@ -4419,7 +4392,7 @@ err_free_all:
/* Release the reference count that we took on the superblock */
deactivate_super(sb);
err_free_id:
- ida_simple_remove(&root->cgroup_ida, cgrp->id);
+ idr_remove(&root->cgroup_idr, cgrp->id);
err_free_name:
kfree(rcu_dereference_raw(cgrp->name));
err_free_cgrp:
@@ -4550,9 +4523,11 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
raw_spin_unlock(&release_list_lock);
/*
- * Remove @cgrp directory. The removal puts the base ref but we
- * aren't quite done with @cgrp yet, so hold onto it.
+ * Clear and remove @cgrp directory. The removal puts the base ref
+ * but we aren't quite done with @cgrp yet, so hold onto it.
*/
+ cgroup_clear_dir(cgrp, cgrp->root->subsys_mask);
+ cgroup_addrm_files(cgrp, NULL, cgroup_base_files, false);
dget(d);
cgroup_d_remove_dir(d);
@@ -4610,6 +4585,14 @@ static void cgroup_offline_fn(struct work_struct *work)
/* delete this cgroup from parent->children */
list_del_rcu(&cgrp->sibling);
+ /*
+ * We should remove the cgroup object from idr before its grace
+ * period starts, so we won't be looking up a cgroup while the
+ * cgroup is being freed.
+ */
+ idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
+ cgrp->id = -1;
+
dput(d);
set_bit(CGRP_RELEASABLE, &parent->flags);
@@ -4811,7 +4794,7 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
/*
* we shouldn't be called if the subsystem is in use, and the use of
- * try_module_get in parse_cgroupfs_options should ensure that it
+ * try_module_get() in rebind_subsystems() should ensure that it
* doesn't start being used while we're killing it off.
*/
BUG_ON(ss->root != &cgroup_dummy_root);
@@ -4935,6 +4918,10 @@ int __init cgroup_init(void)
BUG_ON(cgroup_init_root_id(&cgroup_dummy_root, 0, 1));
+ err = idr_alloc(&cgroup_dummy_root.cgroup_idr, cgroup_dummy_top,
+ 0, 1, GFP_KERNEL);
+ BUG_ON(err < 0);
+
mutex_unlock(&cgroup_root_mutex);
mutex_unlock(&cgroup_mutex);