X-Git-Url: http://pilppa.org/gitweb/gitweb.cgi?a=blobdiff_plain;f=kernel%2Fcpuset.c;h=12815d3f1a05ee25efc0eb489ff9c6abc4e4cb98;hb=b2e977ca364764dabb091c360f329868b7e3f29b;hp=2a75e44e1a41355bc55cc4819f167e4bc63f0cab;hpb=977127174a7dff52d17faeeb4c4949a54221881f;p=linux-2.6-omap-h63xx.git diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 2a75e44e1a4..12815d3f1a0 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -641,7 +641,7 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) * task has been modifying its cpuset. */ -void cpuset_update_task_memory_state() +void cpuset_update_task_memory_state(void) { int my_cpusets_mem_gen; struct task_struct *tsk = current; @@ -1554,7 +1554,7 @@ struct ctr_struct { * when reading out p->cpuset, as we don't really care if it changes * on the next cycle, and we are not going to try to dereference it. */ -static inline int pid_array_load(pid_t *pidarray, int npids, struct cpuset *cs) +static int pid_array_load(pid_t *pidarray, int npids, struct cpuset *cs) { int n = 0; struct task_struct *g, *p; @@ -1977,6 +1977,39 @@ void cpuset_fork(struct task_struct *child) * We don't need to task_lock() this reference to tsk->cpuset, * because tsk is already marked PF_EXITING, so attach_task() won't * mess with it, or task is a failed fork, never visible to attach_task. + * + * Hack: + * + * Set the exiting tasks cpuset to the root cpuset (top_cpuset). + * + * Don't leave a task unable to allocate memory, as that is an + * accident waiting to happen should someone add a callout in + * do_exit() after the cpuset_exit() call that might allocate. + * If a task tries to allocate memory with an invalid cpuset, + * it will oops in cpuset_update_task_memory_state(). + * + * We call cpuset_exit() while the task is still competent to + * handle notify_on_release(), then leave the task attached to + * the root cpuset (top_cpuset) for the remainder of its exit. + * + * To do this properly, we would increment the reference count on + * top_cpuset, and near the very end of the kernel/exit.c do_exit() + * code we would add a second cpuset function call, to drop that + * reference. This would just create an unnecessary hot spot on + * the top_cpuset reference count, to no avail. + * + * Normally, holding a reference to a cpuset without bumping its + * count is unsafe. The cpuset could go away, or someone could + * attach us to a different cpuset, decrementing the count on + * the first cpuset that we never incremented. But in this case, + * top_cpuset isn't going away, and either task has PF_EXITING set, + * which wards off any attach_task() attempts, or task is a failed + * fork, never visible to attach_task. + * + * Another way to do this would be to set the cpuset pointer + * to NULL here, and check in cpuset_update_task_memory_state() + * for a NULL pointer. This hack avoids that NULL check, for no + * cost (other than this way too long comment ;). **/ void cpuset_exit(struct task_struct *tsk) @@ -1984,7 +2017,7 @@ void cpuset_exit(struct task_struct *tsk) struct cpuset *cs; cs = tsk->cpuset; - tsk->cpuset = NULL; + tsk->cpuset = &top_cpuset; /* Hack - see comment above */ if (notify_on_release(cs)) { char *pathbuf = NULL; @@ -2149,6 +2182,33 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) return allowed; } +/** + * cpuset_lock - lock out any changes to cpuset structures + * + * The out of memory (oom) code needs to lock down cpusets + * from being changed while it scans the tasklist looking for a + * task in an overlapping cpuset. Expose callback_sem via this + * cpuset_lock() routine, so the oom code can lock it, before + * locking the task list. The tasklist_lock is a spinlock, so + * must be taken inside callback_sem. + */ + +void cpuset_lock(void) +{ + down(&callback_sem); +} + +/** + * cpuset_unlock - release lock on cpuset changes + * + * Undo the lock taken in a previous cpuset_lock() call. + */ + +void cpuset_unlock(void) +{ + up(&callback_sem); +} + /** * cpuset_excl_nodes_overlap - Do we overlap @p's mem_exclusive ancestors? * @p: pointer to task_struct of some other task. @@ -2158,7 +2218,7 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) * determine if task @p's memory usage might impact the memory * available to the current task. * - * Acquires callback_sem - not suitable for calling from a fast path. + * Call while holding callback_sem. **/ int cpuset_excl_nodes_overlap(const struct task_struct *p) @@ -2166,8 +2226,6 @@ int cpuset_excl_nodes_overlap(const struct task_struct *p) const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */ int overlap = 0; /* do cpusets overlap? */ - down(&callback_sem); - task_lock(current); if (current->flags & PF_EXITING) { task_unlock(current); @@ -2186,8 +2244,6 @@ int cpuset_excl_nodes_overlap(const struct task_struct *p) overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed); done: - up(&callback_sem); - return overlap; }