X-Git-Url: http://pilppa.org/gitweb/gitweb.cgi?a=blobdiff_plain;f=kernel%2Fcgroup.c;h=d8abe996e009702663d9769714f6428f88aac641;hb=f619cfe1bda809a97c407f4c723eb3235ecd64e5;hp=5987dccdb2a0bad4de04333dfa0e45b0690d3405;hpb=0c326331c8b107abc0a160e8899d749150b8f76a;p=linux-2.6-omap-h63xx.git diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 5987dccdb2a..d8abe996e00 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1,6 +1,4 @@ /* - * kernel/cgroup.c - * * Generic process-grouping system. * * Based originally on the cpuset system, extracted by Paul Menage @@ -115,9 +113,9 @@ static int root_count; #define dummytop (&rootnode.top_cgroup) /* This flag indicates whether tasks in the fork and exit paths should - * take callback_mutex and check for fork/exit handlers to call. This - * avoids us having to do extra work in the fork/exit path if none of the - * subsystems need to be called. + * check for fork/exit handlers to call. This avoids us having to do + * extra work in the fork/exit path if none of the subsystems need to + * be called. */ static int need_forkexit_callback; @@ -143,7 +141,7 @@ enum { ROOT_NOPREFIX, /* mounted subsystems have no named prefix */ }; -inline int cgroup_is_releasable(const struct cgroup *cgrp) +static int cgroup_is_releasable(const struct cgroup *cgrp) { const int bits = (1 << CGRP_RELEASABLE) | @@ -151,7 +149,7 @@ inline int cgroup_is_releasable(const struct cgroup *cgrp) return (cgrp->flags & bits) == bits; } -inline int notify_on_release(const struct cgroup *cgrp) +static int notify_on_release(const struct cgroup *cgrp) { return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); } @@ -309,7 +307,6 @@ static inline void put_css_set_taskexit(struct css_set *cg) * template: location in which to build the desired set of subsystem * state objects for the new cgroup group */ - static struct css_set *find_existing_css_set( struct css_set *oldcg, struct cgroup *cgrp, @@ -322,7 +319,7 @@ static struct css_set *find_existing_css_set( /* Built the set of subsystem state objects that we want to * see in the new css_set */ for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { - if (root->subsys_bits & (1ull << i)) { + if (root->subsys_bits & (1UL << i)) { /* Subsystem is in this hierarchy. So we want * the subsystem state from the new * cgroup */ @@ -356,7 +353,6 @@ static struct css_set *find_existing_css_set( * and chains them on tmp through their cgrp_link_list fields. Returns 0 on * success or a negative error */ - static int allocate_cg_links(int count, struct list_head *tmp) { struct cg_cgroup_link *link; @@ -398,7 +394,6 @@ static void free_cg_links(struct list_head *tmp) * substituted into the appropriate hierarchy. Must be called with * cgroup_mutex held */ - static struct css_set *find_css_set( struct css_set *oldcg, struct cgroup *cgrp) { @@ -475,7 +470,6 @@ static struct css_set *find_css_set( /* Link this cgroup group into the list */ list_add(&res->list, &init_css_set.list); css_set_count++; - INIT_LIST_HEAD(&res->tasks); write_unlock(&css_set_lock); return res; @@ -491,7 +485,7 @@ static struct css_set *find_css_set( * Any task can increment and decrement the count field without lock. * So in general, code holding cgroup_mutex can't rely on the count * field not changing. However, if the count goes to zero, then only - * attach_task() can increment it again. Because a count of zero + * cgroup_attach_task() can increment it again. Because a count of zero * means that no tasks are currently attached, therefore there is no * way a task attached to that cgroup can fork (the other way to * increment the count). So code holding cgroup_mutex can safely @@ -509,8 +503,8 @@ static struct css_set *find_css_set( * critical pieces of code here. The exception occurs on cgroup_exit(), * when a task in a notify_on_release cgroup exits. Then cgroup_mutex * is taken, and if the cgroup count is zero, a usermode call made - * to /sbin/cgroup_release_agent with the name of the cgroup (path - * relative to the root of cgroup file system) as the argument. + * to the release agent with the name of the cgroup (path relative to + * the root of cgroup file system) as the argument. * * A cgroup can only be deleted if both its 'count' of using tasks * is zero, and its list of 'children' cgroups is empty. Since all @@ -522,24 +516,23 @@ static struct css_set *find_css_set( * The task_lock() exception * * The need for this exception arises from the action of - * attach_task(), which overwrites one tasks cgroup pointer with - * another. It does so using cgroup_mutexe, however there are + * cgroup_attach_task(), which overwrites one tasks cgroup pointer with + * another. It does so using cgroup_mutex, however there are * several performance critical places that need to reference * task->cgroup without the expense of grabbing a system global * mutex. Therefore except as noted below, when dereferencing or, as - * in attach_task(), modifying a task'ss cgroup pointer we use + * in cgroup_attach_task(), modifying a task'ss cgroup pointer we use * task_lock(), which acts on a spinlock (task->alloc_lock) already in * the task_struct routinely used for such matters. * * P.S. One more locking exception. RCU is used to guard the - * update of a tasks cgroup pointer by attach_task() + * update of a tasks cgroup pointer by cgroup_attach_task() */ /** * cgroup_lock - lock out any changes to cgroup structures * */ - void cgroup_lock(void) { mutex_lock(&cgroup_mutex); @@ -550,7 +543,6 @@ void cgroup_lock(void) * * Undo the lock taken in a previous cgroup_lock() call. */ - void cgroup_unlock(void) { mutex_unlock(&cgroup_mutex); @@ -588,11 +580,25 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) return inode; } +/* + * Call subsys's pre_destroy handler. + * This is called before css refcnt check. + */ +static void cgroup_call_pre_destroy(struct cgroup *cgrp) +{ + struct cgroup_subsys *ss; + for_each_subsys(cgrp->root, ss) + if (ss->pre_destroy && cgrp->subsys[ss->subsys_id]) + ss->pre_destroy(ss, cgrp); + return; +} + static void cgroup_diput(struct dentry *dentry, struct inode *inode) { /* is dentry a directory ? if so, kfree() associated cgroup */ if (S_ISDIR(inode->i_mode)) { struct cgroup *cgrp = dentry->d_fsdata; + struct cgroup_subsys *ss; BUG_ON(!(cgroup_is_removed(cgrp))); /* It's possible for external users to be holding css * reference counts on a cgroup; css_put() needs to @@ -601,6 +607,23 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) * queue the cgroup to be handled by the release * agent */ synchronize_rcu(); + + mutex_lock(&cgroup_mutex); + /* + * Release the subsystem state objects. + */ + for_each_subsys(cgrp->root, ss) { + if (cgrp->subsys[ss->subsys_id]) + ss->destroy(ss, cgrp); + } + + cgrp->root->number_of_cgroups--; + mutex_unlock(&cgroup_mutex); + + /* Drop the active superblock reference that we took when we + * created the cgroup */ + deactivate_super(cgrp->root->sb); + kfree(cgrp); } iput(inode); @@ -665,7 +688,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, added_bits = final_bits & ~root->actual_subsys_bits; /* Check that any added subsystems are currently free */ for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { - unsigned long long bit = 1ull << i; + unsigned long bit = 1UL << i; struct cgroup_subsys *ss = subsys[i]; if (!(bit & added_bits)) continue; @@ -896,7 +919,6 @@ static int cgroup_get_rootdir(struct super_block *sb) if (!inode) return -ENOMEM; - inode->i_op = &simple_dir_inode_operations; inode->i_fop = &simple_dir_operations; inode->i_op = &cgroup_dir_inode_operations; /* directories start off with i_nlink == 2 (for "." entry) */ @@ -930,8 +952,11 @@ static int cgroup_get_sb(struct file_system_type *fs_type, } root = kzalloc(sizeof(*root), GFP_KERNEL); - if (!root) + if (!root) { + if (opts.release_agent) + kfree(opts.release_agent); return -ENOMEM; + } init_cgroup_root(root); root->subsys_bits = opts.subsys_bits; @@ -1098,8 +1123,13 @@ static inline struct cftype *__d_cft(struct dentry *dentry) return dentry->d_fsdata; } -/* - * Called with cgroup_mutex held. Writes path of cgroup into buf. +/** + * cgroup_path - generate the path of a cgroup + * @cgrp: the cgroup in question + * @buf: the buffer to write the path into + * @buflen: the length of the buffer + * + * Called with cgroup_mutex held. Writes path of cgroup into buf. * Returns 0 on success, -errno on error. */ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) @@ -1157,13 +1187,15 @@ static void get_first_subsys(const struct cgroup *cgrp, *subsys_id = test_ss->subsys_id; } -/* - * Attach task 'tsk' to cgroup 'cgrp' +/** + * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp' + * @cgrp: the cgroup the task is attaching to + * @tsk: the task to be attached * - * Call holding cgroup_mutex. May take task_lock of - * the task 'pid' during call. + * Call holding cgroup_mutex. May take task_lock of + * the task 'tsk' during call. */ -static int attach_task(struct cgroup *cgrp, struct task_struct *tsk) +int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) { int retval = 0; struct cgroup_subsys *ss; @@ -1183,9 +1215,8 @@ static int attach_task(struct cgroup *cgrp, struct task_struct *tsk) for_each_subsys(root, ss) { if (ss->can_attach) { retval = ss->can_attach(ss, cgrp, tsk); - if (retval) { + if (retval) return retval; - } } } @@ -1194,9 +1225,8 @@ static int attach_task(struct cgroup *cgrp, struct task_struct *tsk) * based on its final set of cgroups */ newcg = find_css_set(cg, cgrp); - if (!newcg) { + if (!newcg) return -ENOMEM; - } task_lock(tsk); if (tsk->flags & PF_EXITING) { @@ -1216,9 +1246,8 @@ static int attach_task(struct cgroup *cgrp, struct task_struct *tsk) write_unlock(&css_set_lock); for_each_subsys(root, ss) { - if (ss->attach) { + if (ss->attach) ss->attach(ss, cgrp, oldcgrp, tsk); - } } set_bit(CGRP_RELEASABLE, &oldcgrp->flags); synchronize_rcu(); @@ -1241,7 +1270,7 @@ static int attach_task_by_pid(struct cgroup *cgrp, char *pidbuf) if (pid) { rcu_read_lock(); - tsk = find_task_by_pid(pid); + tsk = find_task_by_vpid(pid); if (!tsk || tsk->flags & PF_EXITING) { rcu_read_unlock(); return -ESRCH; @@ -1259,13 +1288,12 @@ static int attach_task_by_pid(struct cgroup *cgrp, char *pidbuf) get_task_struct(tsk); } - ret = attach_task(cgrp, tsk); + ret = cgroup_attach_task(cgrp, tsk); put_task_struct(tsk); return ret; } /* The various types of files and directories in a cgroup file system */ - enum cgroup_filetype { FILE_ROOT, FILE_DIR, @@ -1331,9 +1359,14 @@ static ssize_t cgroup_common_file_write(struct cgroup *cgrp, goto out1; } buffer[nbytes] = 0; /* nul-terminate */ + strstrip(buffer); /* strip -just- trailing whitespace */ mutex_lock(&cgroup_mutex); + /* + * This was already checked for in cgroup_file_write(), but + * check again now we're holding cgroup_mutex. + */ if (cgroup_is_removed(cgrp)) { retval = -ENODEV; goto out2; @@ -1351,24 +1384,9 @@ static ssize_t cgroup_common_file_write(struct cgroup *cgrp, clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); break; case FILE_RELEASE_AGENT: - { - struct cgroupfs_root *root = cgrp->root; - /* Strip trailing newline */ - if (nbytes && (buffer[nbytes-1] == '\n')) { - buffer[nbytes-1] = 0; - } - if (nbytes < sizeof(root->release_agent_path)) { - /* We never write anything other than '\0' - * into the last char of release_agent_path, - * so it always remains a NUL-terminated - * string */ - strncpy(root->release_agent_path, buffer, nbytes); - root->release_agent_path[nbytes] = 0; - } else { - retval = -ENOSPC; - } + BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); + strcpy(cgrp->root->release_agent_path, buffer); break; - } default: retval = -EINVAL; goto out2; @@ -1389,7 +1407,7 @@ static ssize_t cgroup_file_write(struct file *file, const char __user *buf, struct cftype *cft = __d_cft(file->f_dentry); struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); - if (!cft) + if (!cft || cgroup_is_removed(cgrp)) return -ENODEV; if (cft->write) return cft->write(cgrp, cft, file, buf, nbytes, ppos); @@ -1459,7 +1477,7 @@ static ssize_t cgroup_file_read(struct file *file, char __user *buf, struct cftype *cft = __d_cft(file->f_dentry); struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); - if (!cft) + if (!cft || cgroup_is_removed(cgrp)) return -ENODEV; if (cft->read) @@ -1566,12 +1584,11 @@ static int cgroup_create_file(struct dentry *dentry, int mode, } /* - * cgroup_create_dir - create a directory for an object. - * cgrp: the cgroup we create the directory for. - * It must have a valid ->parent field - * And we are going to fill its ->dentry field. - * dentry: dentry of the new cgroup - * mode: mode to set on new directory. + * cgroup_create_dir - create a directory for an object. + * @cgrp: the cgroup we create the directory for. It must have a valid + * ->parent field. And we are going to fill its ->dentry field. + * @dentry: dentry of the new cgroup + * @mode: mode to set on new directory. */ static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry, int mode) @@ -1633,8 +1650,12 @@ int cgroup_add_files(struct cgroup *cgrp, return 0; } -/* Count the number of tasks in a cgroup. */ - +/** + * cgroup_task_count - count the number of tasks in a cgroup. + * @cgrp: the cgroup in question + * + * Return the number of tasks in the cgroup. + */ int cgroup_task_count(const struct cgroup *cgrp) { int count = 0; @@ -1677,6 +1698,29 @@ static void cgroup_advance_iter(struct cgroup *cgrp, it->task = cg->tasks.next; } +/* + * To reduce the fork() overhead for systems that are not actually + * using their cgroups capability, we don't maintain the lists running + * through each css_set to its tasks until we see the list actually + * used - in other words after the first call to cgroup_iter_start(). + * + * The tasklist_lock is not held here, as do_each_thread() and + * while_each_thread() are protected by RCU. + */ +void cgroup_enable_task_cg_lists(void) +{ + struct task_struct *p, *g; + write_lock(&css_set_lock); + use_task_css_set_links = 1; + do_each_thread(g, p) { + task_lock(p); + if (list_empty(&p->cg_list)) + list_add(&p->cg_list, &p->cgroups->tasks); + task_unlock(p); + } while_each_thread(g, p); + write_unlock(&css_set_lock); +} + void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it) { /* @@ -1684,18 +1728,9 @@ void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it) * we need to enable the list linking each css_set to its * tasks, and fix up all existing tasks. */ - if (!use_task_css_set_links) { - struct task_struct *p, *g; - write_lock(&css_set_lock); - use_task_css_set_links = 1; - do_each_thread(g, p) { - task_lock(p); - if (list_empty(&p->cg_list)) - list_add(&p->cg_list, &p->cgroups->tasks); - task_unlock(p); - } while_each_thread(g, p); - write_unlock(&css_set_lock); - } + if (!use_task_css_set_links) + cgroup_enable_task_cg_lists(); + read_lock(&css_set_lock); it->cg_link = &cgrp->css_sets; cgroup_advance_iter(cgrp, it); @@ -1728,6 +1763,166 @@ void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it) read_unlock(&css_set_lock); } +static inline int started_after_time(struct task_struct *t1, + struct timespec *time, + struct task_struct *t2) +{ + int start_diff = timespec_compare(&t1->start_time, time); + if (start_diff > 0) { + return 1; + } else if (start_diff < 0) { + return 0; + } else { + /* + * Arbitrarily, if two processes started at the same + * time, we'll say that the lower pointer value + * started first. Note that t2 may have exited by now + * so this may not be a valid pointer any longer, but + * that's fine - it still serves to distinguish + * between two tasks started (effectively) simultaneously. + */ + return t1 > t2; + } +} + +/* + * This function is a callback from heap_insert() and is used to order + * the heap. + * In this case we order the heap in descending task start time. + */ +static inline int started_after(void *p1, void *p2) +{ + struct task_struct *t1 = p1; + struct task_struct *t2 = p2; + return started_after_time(t1, &t2->start_time, t2); +} + +/** + * cgroup_scan_tasks - iterate though all the tasks in a cgroup + * @scan: struct cgroup_scanner containing arguments for the scan + * + * Arguments include pointers to callback functions test_task() and + * process_task(). + * Iterate through all the tasks in a cgroup, calling test_task() for each, + * and if it returns true, call process_task() for it also. + * The test_task pointer may be NULL, meaning always true (select all tasks). + * Effectively duplicates cgroup_iter_{start,next,end}() + * but does not lock css_set_lock for the call to process_task(). + * The struct cgroup_scanner may be embedded in any structure of the caller's + * creation. + * It is guaranteed that process_task() will act on every task that + * is a member of the cgroup for the duration of this call. This + * function may or may not call process_task() for tasks that exit + * or move to a different cgroup during the call, or are forked or + * move into the cgroup during the call. + * + * Note that test_task() may be called with locks held, and may in some + * situations be called multiple times for the same task, so it should + * be cheap. + * If the heap pointer in the struct cgroup_scanner is non-NULL, a heap has been + * pre-allocated and will be used for heap operations (and its "gt" member will + * be overwritten), else a temporary heap will be used (allocation of which + * may cause this function to fail). + */ +int cgroup_scan_tasks(struct cgroup_scanner *scan) +{ + int retval, i; + struct cgroup_iter it; + struct task_struct *p, *dropped; + /* Never dereference latest_task, since it's not refcounted */ + struct task_struct *latest_task = NULL; + struct ptr_heap tmp_heap; + struct ptr_heap *heap; + struct timespec latest_time = { 0, 0 }; + + if (scan->heap) { + /* The caller supplied our heap and pre-allocated its memory */ + heap = scan->heap; + heap->gt = &started_after; + } else { + /* We need to allocate our own heap memory */ + heap = &tmp_heap; + retval = heap_init(heap, PAGE_SIZE, GFP_KERNEL, &started_after); + if (retval) + /* cannot allocate the heap */ + return retval; + } + + again: + /* + * Scan tasks in the cgroup, using the scanner's "test_task" callback + * to determine which are of interest, and using the scanner's + * "process_task" callback to process any of them that need an update. + * Since we don't want to hold any locks during the task updates, + * gather tasks to be processed in a heap structure. + * The heap is sorted by descending task start time. + * If the statically-sized heap fills up, we overflow tasks that + * started later, and in future iterations only consider tasks that + * started after the latest task in the previous pass. This + * guarantees forward progress and that we don't miss any tasks. + */ + heap->size = 0; + cgroup_iter_start(scan->cg, &it); + while ((p = cgroup_iter_next(scan->cg, &it))) { + /* + * Only affect tasks that qualify per the caller's callback, + * if he provided one + */ + if (scan->test_task && !scan->test_task(p, scan)) + continue; + /* + * Only process tasks that started after the last task + * we processed + */ + if (!started_after_time(p, &latest_time, latest_task)) + continue; + dropped = heap_insert(heap, p); + if (dropped == NULL) { + /* + * The new task was inserted; the heap wasn't + * previously full + */ + get_task_struct(p); + } else if (dropped != p) { + /* + * The new task was inserted, and pushed out a + * different task + */ + get_task_struct(p); + put_task_struct(dropped); + } + /* + * Else the new task was newer than anything already in + * the heap and wasn't inserted + */ + } + cgroup_iter_end(scan->cg, &it); + + if (heap->size) { + for (i = 0; i < heap->size; i++) { + struct task_struct *p = heap->ptrs[i]; + if (i == 0) { + latest_time = p->start_time; + latest_task = p; + } + /* Process the task per the caller's callback */ + scan->process_task(p, scan); + put_task_struct(p); + } + /* + * If we had to process any tasks at all, scan again + * in case some of them were in the middle of forking + * children that didn't get processed. + * Not the most efficient way to do it, but it avoids + * having to take callback_mutex in the fork path + */ + goto again; + } + if (heap == &tmp_heap) + heap_free(&tmp_heap); + return 0; +} + /* * Stuff for reading the 'tasks' file. * @@ -1763,19 +1958,20 @@ static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp) while ((tsk = cgroup_iter_next(cgrp, &it))) { if (unlikely(n == npids)) break; - pidarray[n++] = task_pid_nr(tsk); + pidarray[n++] = task_pid_vnr(tsk); } cgroup_iter_end(cgrp, &it); return n; } /** - * Build and fill cgroupstats so that taskstats can export it to user - * space. - * + * cgroupstats_build - build and fill cgroupstats * @stats: cgroupstats to fill information into * @dentry: A dentry entry belonging to the cgroup for which stats have * been requested. + * + * Build and fill cgroupstats so that taskstats can export it to user + * space. */ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) { @@ -2007,14 +2203,13 @@ static void init_cgroup_css(struct cgroup_subsys_state *css, } /* - * cgroup_create - create a cgroup - * parent: cgroup that will be parent of the new cgroup. - * name: name of the new cgroup. Will be strcpy'ed. - * mode: mode to set on new inode + * cgroup_create - create a cgroup + * @parent: cgroup that will be parent of the new cgroup + * @dentry: dentry of the new cgroup + * @mode: mode to set on new inode * - * Must be called with the mutex on the parent inode held + * Must be called with the mutex on the parent inode held */ - static long cgroup_create(struct cgroup *parent, struct dentry *dentry, int mode) { @@ -2128,9 +2323,8 @@ static inline int cgroup_has_css_refs(struct cgroup *cgrp) * matter, since it can only happen if the cgroup * has been deleted and hence no longer needs the * release agent to be called anyway. */ - if (css && atomic_read(&css->refcnt)) { + if (css && atomic_read(&css->refcnt)) return 1; - } } return 0; } @@ -2140,7 +2334,6 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) struct cgroup *cgrp = dentry->d_fsdata; struct dentry *d; struct cgroup *parent; - struct cgroup_subsys *ss; struct super_block *sb; struct cgroupfs_root *root; @@ -2160,16 +2353,17 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) root = cgrp->root; sb = root->sb; + /* + * Call pre_destroy handlers of subsys. Notify subsystems + * that rmdir() request comes. + */ + cgroup_call_pre_destroy(cgrp); + if (cgroup_has_css_refs(cgrp)) { mutex_unlock(&cgroup_mutex); return -EBUSY; } - for_each_subsys(root, ss) { - if (cgrp->subsys[ss->subsys_id]) - ss->destroy(ss, cgrp); - } - spin_lock(&release_list_lock); set_bit(CGRP_REMOVED, &cgrp->flags); if (!list_empty(&cgrp->release_list)) @@ -2184,15 +2378,11 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) cgroup_d_remove_dir(d); dput(d); - root->number_of_cgroups--; set_bit(CGRP_RELEASABLE, &parent->flags); check_for_release(parent); mutex_unlock(&cgroup_mutex); - /* Drop the active superblock reference that we took when we - * created the cgroup */ - deactivate_super(sb); return 0; } @@ -2200,7 +2390,8 @@ static void cgroup_init_subsys(struct cgroup_subsys *ss) { struct cgroup_subsys_state *css; struct list_head *l; - printk(KERN_ERR "Initializing cgroup subsys %s\n", ss->name); + + printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name); /* Create the top cgroup state for this subsystem */ ss->root = &rootnode; @@ -2242,8 +2433,10 @@ static void cgroup_init_subsys(struct cgroup_subsys *ss) } /** - * cgroup_init_early - initialize cgroups at system boot, and - * initialize any subsystems that request early init. + * cgroup_init_early - cgroup initialization at system boot + * + * Initialize cgroups at system boot, and initialize any + * subsystems that request early init. */ int __init cgroup_init_early(void) { @@ -2273,7 +2466,7 @@ int __init cgroup_init_early(void) BUG_ON(!ss->create); BUG_ON(!ss->destroy); if (ss->subsys_id != i) { - printk(KERN_ERR "Subsys %s id == %d\n", + printk(KERN_ERR "cgroup: Subsys %s id == %d\n", ss->name, ss->subsys_id); BUG(); } @@ -2285,8 +2478,10 @@ int __init cgroup_init_early(void) } /** - * cgroup_init - register cgroup filesystem and /proc file, and - * initialize any subsystems that didn't request early init. + * cgroup_init - cgroup initialization + * + * Register cgroup filesystem and /proc file, and initialize + * any subsystems that didn't request early init. */ int __init cgroup_init(void) { @@ -2325,7 +2520,7 @@ out: * - Used for /proc//cgroup. * - No need to task_lock(tsk) on this tsk->cgroup reference, as it * doesn't really matter if tsk->cgroup changes after we read it, - * and we take cgroup_mutex, keeping attach_task() from changing it + * and we take cgroup_mutex, keeping cgroup_attach_task() from changing it * anyway. No need to check that tsk->cgroup != NULL, thanks to * the_top_cgroup_hack in cgroup_exit(), which sets an exiting tasks * cgroup to top_cgroup. @@ -2402,7 +2597,6 @@ struct file_operations proc_cgroup_operations = { static int proc_cgroupstats_show(struct seq_file *m, void *v) { int i; - struct cgroupfs_root *root; seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\n"); mutex_lock(&cgroup_mutex); @@ -2430,14 +2624,14 @@ static struct file_operations proc_cgroupstats_operations = { /** * cgroup_fork - attach newly forked task to its parents cgroup. - * @tsk: pointer to task_struct of forking parent process. + * @child: pointer to task_struct of forking parent process. * * Description: A task inherits its parent's cgroup at fork(). * * A pointer to the shared css_set was automatically copied in * fork.c by dup_task_struct(). However, we ignore that copy, since * it was not made under the protection of RCU or cgroup_mutex, so - * might no longer be a valid cgroup pointer. attach_task() might + * might no longer be a valid cgroup pointer. cgroup_attach_task() might * have already changed current->cgroups, allowing the previously * referenced cgroup group to be removed and freed. * @@ -2454,9 +2648,12 @@ void cgroup_fork(struct task_struct *child) } /** - * cgroup_fork_callbacks - called on a new task very soon before - * adding it to the tasklist. No need to take any locks since no-one - * can be operating on this task + * cgroup_fork_callbacks - run fork callbacks + * @child: the new task + * + * Called on a new task very soon before adding it to the + * tasklist. No need to take any locks since no-one can + * be operating on this task. */ void cgroup_fork_callbacks(struct task_struct *child) { @@ -2471,11 +2668,14 @@ void cgroup_fork_callbacks(struct task_struct *child) } /** - * cgroup_post_fork - called on a new task after adding it to the - * task list. Adds the task to the list running through its css_set - * if necessary. Has to be after the task is visible on the task list - * in case we race with the first call to cgroup_iter_start() - to - * guarantee that the new task ends up on its list. */ + * cgroup_post_fork - called on a new task after adding it to the task list + * @child: the task in question + * + * Adds the task to the list running through its css_set if necessary. + * Has to be after the task is visible on the task list in case we race + * with the first call to cgroup_iter_start() - to guarantee that the + * new task ends up on its list. + */ void cgroup_post_fork(struct task_struct *child) { if (use_task_css_set_links) { @@ -2488,6 +2688,7 @@ void cgroup_post_fork(struct task_struct *child) /** * cgroup_exit - detach cgroup from exiting task * @tsk: pointer to task_struct of exiting process + * @run_callback: run exit callbacks? * * Description: Detach cgroup from @tsk and release it. * @@ -2516,9 +2717,8 @@ void cgroup_post_fork(struct task_struct *child) * attach us to a different cgroup, decrementing the count on * the first cgroup that we never incremented. But in this case, * top_cgroup isn't going away, and either task has PF_EXITING set, - * which wards off any attach_task() attempts, or task is a failed - * fork, never visible to attach_task. - * + * which wards off any cgroup_attach_task() attempts, or task is a failed + * fork, never visible to cgroup_attach_task. */ void cgroup_exit(struct task_struct *tsk, int run_callbacks) { @@ -2555,9 +2755,13 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) } /** - * cgroup_clone - duplicate the current cgroup in the hierarchy - * that the given subsystem is attached to, and move this task into - * the new child + * cgroup_clone - clone the cgroup the given subsystem is attached to + * @tsk: the task to be moved + * @subsys: the given subsystem + * + * Duplicate the current cgroup in the hierarchy that the given + * subsystem is attached to, and move this task into the new + * child. */ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys) { @@ -2606,7 +2810,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys) dentry = lookup_one_len(nodename, parent->dentry, strlen(nodename)); if (IS_ERR(dentry)) { printk(KERN_INFO - "Couldn't allocate dentry for %s: %ld\n", nodename, + "cgroup: Couldn't allocate dentry for %s: %ld\n", nodename, PTR_ERR(dentry)); ret = PTR_ERR(dentry); goto out_release; @@ -2657,7 +2861,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys) } /* All seems fine. Finish by moving the task into the new cgroup */ - ret = attach_task(child, tsk); + ret = cgroup_attach_task(child, tsk); mutex_unlock(&cgroup_mutex); out_release: @@ -2670,9 +2874,12 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys) return ret; } -/* - * See if "cgrp" is a descendant of the current task's cgroup in - * the appropriate hierarchy +/** + * cgroup_is_descendant - see if @cgrp is a descendant of current task's cgrp + * @cgrp: the cgroup in question + * + * See if @cgrp is a descendant of the current task's cgroup in + * the appropriate hierarchy. * * If we are sending in dummytop, then presumably we are creating * the top cgroup in the subsystem. @@ -2751,9 +2958,7 @@ void __css_put(struct cgroup_subsys_state *css) * release agent task. We don't bother to wait because the caller of * this routine has no use for the exit status of the release agent * task, so no sense holding our caller up for that. - * */ - static void cgroup_release_agent(struct work_struct *work) { BUG_ON(work != &release_agent_work);