X-Git-Url: http://pilppa.org/gitweb/gitweb.cgi?a=blobdiff_plain;f=kernel%2Fcpuset.c;h=1a649f2bb9bb2779fb2a033dda7eba5898da596b;hb=02b311bce9fc87987a123adc3e6a2d0a2caa70e2;hp=72248d1b9e3f7a18198796ad1134c9abb2fd4b4e;hpb=f1f76afd71e0f17af9a35fcb649f4bab53304a4d;p=linux-2.6-omap-h63xx.git diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 72248d1b9e3..1a649f2bb9b 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -18,7 +18,6 @@ * distribution for more details. */ -#include #include #include #include @@ -41,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -392,11 +392,11 @@ static int cpuset_fill_super(struct super_block *sb, void *unused_data, return 0; } -static struct super_block *cpuset_get_sb(struct file_system_type *fs_type, - int flags, const char *unused_dev_name, - void *data) +static int cpuset_get_sb(struct file_system_type *fs_type, + int flags, const char *unused_dev_name, + void *data, struct vfsmount *mnt) { - return get_sb_single(fs_type, flags, data, cpuset_fill_super); + return get_sb_single(fs_type, flags, data, cpuset_fill_super, mnt); } static struct file_system_type cpuset_fs_type = { @@ -762,6 +762,8 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) * * Call with manage_mutex held. May nest a call to the * lock_cpu_hotplug()/unlock_cpu_hotplug() pair. + * Must not be called holding callback_mutex, because we must + * not call lock_cpu_hotplug() while holding callback_mutex. */ static void update_cpu_domains(struct cpuset *cur) @@ -781,7 +783,7 @@ static void update_cpu_domains(struct cpuset *cur) if (is_cpu_exclusive(c)) cpus_andnot(pspan, pspan, c->cpus_allowed); } - if (is_removed(cur) || !is_cpu_exclusive(cur)) { + if (!is_cpu_exclusive(cur)) { cpus_or(pspan, pspan, cur->cpus_allowed); if (cpus_equal(pspan, cur->cpus_allowed)) return; @@ -1063,7 +1065,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf) } /* - * Frequency meter - How fast is some event occuring? + * Frequency meter - How fast is some event occurring? * * These routines manage a digitally filtered, constant time based, * event frequency meter. There are four routines: @@ -1177,6 +1179,7 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) cpumask_t cpus; nodemask_t from, to; struct mm_struct *mm; + int retval; if (sscanf(pidbuf, "%d", &pid) != 1) return -EIO; @@ -1205,6 +1208,12 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) get_task_struct(tsk); } + retval = security_task_setscheduler(tsk, 0, NULL); + if (retval) { + put_task_struct(tsk); + return retval; + } + mutex_lock(&callback_mutex); task_lock(tsk); @@ -1910,6 +1919,17 @@ static int cpuset_mkdir(struct inode *dir, struct dentry *dentry, int mode) return cpuset_create(c_parent, dentry->d_name.name, mode | S_IFDIR); } +/* + * Locking note on the strange update_flag() call below: + * + * If the cpuset being removed is marked cpu_exclusive, then simulate + * turning cpu_exclusive off, which will call update_cpu_domains(). + * The lock_cpu_hotplug() call in update_cpu_domains() must not be + * made while holding callback_mutex. Elsewhere the kernel nests + * callback_mutex inside lock_cpu_hotplug() calls. So the reverse + * nesting would risk an ABBA deadlock. + */ + static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) { struct cpuset *cs = dentry->d_fsdata; @@ -1929,11 +1949,16 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) mutex_unlock(&manage_mutex); return -EBUSY; } + if (is_cpu_exclusive(cs)) { + int retval = update_flag(CS_CPU_EXCLUSIVE, cs, "0"); + if (retval < 0) { + mutex_unlock(&manage_mutex); + return retval; + } + } parent = cs->parent; mutex_lock(&callback_mutex); set_bit(CS_REMOVED, &cs->flags); - if (is_cpu_exclusive(cs)) - update_cpu_domains(cs); list_del(&cs->sibling); /* delete my sibling from parent->children */ spin_lock(&cs->dentry->d_lock); d = dget(cs->dentry); @@ -2231,19 +2256,25 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) * So only GFP_KERNEL allocations, if all nodes in the cpuset are * short of memory, might require taking the callback_mutex mutex. * - * The first loop over the zonelist in mm/page_alloc.c:__alloc_pages() - * calls here with __GFP_HARDWALL always set in gfp_mask, enforcing - * hardwall cpusets - no allocation on a node outside the cpuset is - * allowed (unless in interrupt, of course). + * The first call here from mm/page_alloc:get_page_from_freelist() + * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, so + * no allocation on a node outside the cpuset is allowed (unless in + * interrupt, of course). * - * The second loop doesn't even call here for GFP_ATOMIC requests - * (if the __alloc_pages() local variable 'wait' is set). That check - * and the checks below have the combined affect in the second loop of - * the __alloc_pages() routine that: + * The second pass through get_page_from_freelist() doesn't even call + * here for GFP_ATOMIC calls. For those calls, the __alloc_pages() + * variable 'wait' is not set, and the bit ALLOC_CPUSET is not set + * in alloc_flags. That logic and the checks below have the combined + * affect that: * in_interrupt - any node ok (current task context irrelevant) * GFP_ATOMIC - any node ok * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok * GFP_USER - only nodes in current tasks mems allowed ok. + * + * Rule: + * Don't call cpuset_zone_allowed() if you can't sleep, unless you + * pass in the __GFP_HARDWALL flag set in gfp_flag, which disables + * the code that might scan up ancestor cpusets and sleep. **/ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) @@ -2255,6 +2286,7 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) if (in_interrupt()) return 1; node = z->zone_pgdat->node_id; + might_sleep_if(!(gfp_mask & __GFP_HARDWALL)); if (node_isset(node, current->mems_allowed)) return 1; if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */ @@ -2427,31 +2459,43 @@ void __cpuset_memory_pressure_bump(void) */ static int proc_cpuset_show(struct seq_file *m, void *v) { + struct pid *pid; struct task_struct *tsk; char *buf; - int retval = 0; + int retval; + retval = -ENOMEM; buf = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!buf) - return -ENOMEM; + goto out; + + retval = -ESRCH; + pid = m->private; + tsk = get_pid_task(pid, PIDTYPE_PID); + if (!tsk) + goto out_free; - tsk = m->private; + retval = -EINVAL; mutex_lock(&manage_mutex); + retval = cpuset_path(tsk->cpuset, buf, PAGE_SIZE); if (retval < 0) - goto out; + goto out_unlock; seq_puts(m, buf); seq_putc(m, '\n'); -out: +out_unlock: mutex_unlock(&manage_mutex); + put_task_struct(tsk); +out_free: kfree(buf); +out: return retval; } static int cpuset_open(struct inode *inode, struct file *file) { - struct task_struct *tsk = PROC_I(inode)->task; - return single_open(file, proc_cpuset_show, tsk); + struct pid *pid = PROC_I(inode)->pid; + return single_open(file, proc_cpuset_show, pid); } struct file_operations proc_cpuset_operations = {