X-Git-Url: http://pilppa.org/gitweb/?a=blobdiff_plain;f=kernel%2Fcpuset.c;h=28176d083f7baadca422dfd510fdd7dbfd892c00;hb=bb7e7e032d2cb8e0e9a88a2be209de5e61033b39;hp=712d02029971ed456a4025d23f0cd4d187d2f6ff;hpb=3874b98c655b9490bea1cf9c7697d5dc5338376f;p=linux-2.6-omap-h63xx.git diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 712d0202997..28176d083f7 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -180,6 +180,42 @@ static struct super_block *cpuset_sb = NULL; */ static DECLARE_MUTEX(cpuset_sem); +static struct task_struct *cpuset_sem_owner; +static int cpuset_sem_depth; + +/* + * The global cpuset semaphore cpuset_sem can be needed by the + * memory allocator to update a tasks mems_allowed (see the calls + * to cpuset_update_current_mems_allowed()) or to walk up the + * cpuset hierarchy to find a mem_exclusive cpuset see the calls + * to cpuset_excl_nodes_overlap()). + * + * But if the memory allocation is being done by cpuset.c code, it + * usually already holds cpuset_sem. Double tripping on a kernel + * semaphore deadlocks the current task, and any other task that + * subsequently tries to obtain the lock. + * + * Run all up's and down's on cpuset_sem through the following + * wrappers, which will detect this nested locking, and avoid + * deadlocking. + */ + +static inline void cpuset_down(struct semaphore *psem) +{ + if (cpuset_sem_owner != current) { + down(psem); + cpuset_sem_owner = current; + } + cpuset_sem_depth++; +} + +static inline void cpuset_up(struct semaphore *psem) +{ + if (--cpuset_sem_depth == 0) { + cpuset_sem_owner = NULL; + up(psem); + } +} /* * A couple of forward declarations required, due to cyclic reference loop: @@ -522,19 +558,10 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) * Refresh current tasks mems_allowed and mems_generation from * current tasks cpuset. Call with cpuset_sem held. * - * Be sure to call refresh_mems() on any cpuset operation which - * (1) holds cpuset_sem, and (2) might possibly alloc memory. - * Call after obtaining cpuset_sem lock, before any possible - * allocation. Otherwise one risks trying to allocate memory - * while the task cpuset_mems_generation is not the same as - * the mems_generation in its cpuset, which would deadlock on - * cpuset_sem in cpuset_update_current_mems_allowed(). - * - * Since we hold cpuset_sem, once refresh_mems() is called, the - * test (current->cpuset_mems_generation != cs->mems_generation) - * in cpuset_update_current_mems_allowed() will remain false, - * until we drop cpuset_sem. Anyone else who would change our - * cpusets mems_generation needs to lock cpuset_sem first. + * This routine is needed to update the per-task mems_allowed + * data, within the tasks context, when it is trying to allocate + * memory (in various mm/mempolicy.c routines) and notices + * that some other task has been modifying its cpuset. */ static void refresh_mems(void) @@ -840,7 +867,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us } buffer[nbytes] = 0; /* nul-terminate */ - down(&cpuset_sem); + cpuset_down(&cpuset_sem); if (is_removed(cs)) { retval = -ENODEV; @@ -874,7 +901,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us if (retval == 0) retval = nbytes; out2: - up(&cpuset_sem); + cpuset_up(&cpuset_sem); cpuset_release_agent(pathbuf); out1: kfree(buffer); @@ -914,9 +941,9 @@ static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs) { cpumask_t mask; - down(&cpuset_sem); + cpuset_down(&cpuset_sem); mask = cs->cpus_allowed; - up(&cpuset_sem); + cpuset_up(&cpuset_sem); return cpulist_scnprintf(page, PAGE_SIZE, mask); } @@ -925,9 +952,9 @@ static int cpuset_sprintf_memlist(char *page, struct cpuset *cs) { nodemask_t mask; - down(&cpuset_sem); + cpuset_down(&cpuset_sem); mask = cs->mems_allowed; - up(&cpuset_sem); + cpuset_up(&cpuset_sem); return nodelist_scnprintf(page, PAGE_SIZE, mask); } @@ -941,8 +968,6 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf, char *page; ssize_t retval = 0; char *s; - char *start; - size_t n; if (!(page = (char *)__get_free_page(GFP_KERNEL))) return -ENOMEM; @@ -972,14 +997,7 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf, *s++ = '\n'; *s = '\0'; - /* Do nothing if *ppos is at the eof or beyond the eof. */ - if (s - page <= *ppos) - return 0; - - start = page + *ppos; - n = s - start; - retval = n - copy_to_user(buf, start, min(n, nbytes)); - *ppos += retval; + retval = simple_read_from_buffer(buf, nbytes, ppos, page, s - page); out: free_page((unsigned long)page); return retval; @@ -1334,8 +1352,7 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode) if (!cs) return -ENOMEM; - down(&cpuset_sem); - refresh_mems(); + cpuset_down(&cpuset_sem); cs->flags = 0; if (notify_on_release(parent)) set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags); @@ -1360,14 +1377,14 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode) * will down() this new directory's i_sem and if we race with * another mkdir, we might deadlock. */ - up(&cpuset_sem); + cpuset_up(&cpuset_sem); err = cpuset_populate_dir(cs->dentry); /* If err < 0, we have a half-filled directory - oh well ;) */ return 0; err: list_del(&cs->sibling); - up(&cpuset_sem); + cpuset_up(&cpuset_sem); kfree(cs); return err; } @@ -1389,14 +1406,13 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) /* the vfs holds both inode->i_sem already */ - down(&cpuset_sem); - refresh_mems(); + cpuset_down(&cpuset_sem); if (atomic_read(&cs->count) > 0) { - up(&cpuset_sem); + cpuset_up(&cpuset_sem); return -EBUSY; } if (!list_empty(&cs->children)) { - up(&cpuset_sem); + cpuset_up(&cpuset_sem); return -EBUSY; } parent = cs->parent; @@ -1412,7 +1428,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) spin_unlock(&d->d_lock); cpuset_d_remove_dir(d); dput(d); - up(&cpuset_sem); + cpuset_up(&cpuset_sem); cpuset_release_agent(pathbuf); return 0; } @@ -1515,10 +1531,10 @@ void cpuset_exit(struct task_struct *tsk) if (notify_on_release(cs)) { char *pathbuf = NULL; - down(&cpuset_sem); + cpuset_down(&cpuset_sem); if (atomic_dec_and_test(&cs->count)) check_for_release(cs, &pathbuf); - up(&cpuset_sem); + cpuset_up(&cpuset_sem); cpuset_release_agent(pathbuf); } else { atomic_dec(&cs->count); @@ -1539,11 +1555,11 @@ cpumask_t cpuset_cpus_allowed(const struct task_struct *tsk) { cpumask_t mask; - down(&cpuset_sem); + cpuset_down(&cpuset_sem); task_lock((struct task_struct *)tsk); guarantee_online_cpus(tsk->cpuset, &mask); task_unlock((struct task_struct *)tsk); - up(&cpuset_sem); + cpuset_up(&cpuset_sem); return mask; } @@ -1568,9 +1584,9 @@ void cpuset_update_current_mems_allowed(void) if (!cs) return; /* task is exiting */ if (current->cpuset_mems_generation != cs->mems_generation) { - down(&cpuset_sem); + cpuset_down(&cpuset_sem); refresh_mems(); - up(&cpuset_sem); + cpuset_up(&cpuset_sem); } } @@ -1654,7 +1670,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) * GFP_USER - only nodes in current tasks mems allowed ok. **/ -int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask) +int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) { int node; /* node that zone z is on */ const struct cpuset *cs; /* current cpuset ancestors */ @@ -1669,14 +1685,14 @@ int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask) return 0; /* Not hardwall and node outside mems_allowed: scan up cpusets */ - down(&cpuset_sem); + cpuset_down(&cpuset_sem); cs = current->cpuset; if (!cs) goto done; /* current task exiting */ cs = nearest_exclusive_ancestor(cs); allowed = node_isset(node, cs->mems_allowed); done: - up(&cpuset_sem); + cpuset_up(&cpuset_sem); return allowed; } @@ -1697,7 +1713,7 @@ int cpuset_excl_nodes_overlap(const struct task_struct *p) const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */ int overlap = 0; /* do cpusets overlap? */ - down(&cpuset_sem); + cpuset_down(&cpuset_sem); cs1 = current->cpuset; if (!cs1) goto done; /* current task exiting */ @@ -1708,7 +1724,7 @@ int cpuset_excl_nodes_overlap(const struct task_struct *p) cs2 = nearest_exclusive_ancestor(cs2); overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed); done: - up(&cpuset_sem); + cpuset_up(&cpuset_sem); return overlap; } @@ -1731,7 +1747,7 @@ static int proc_cpuset_show(struct seq_file *m, void *v) return -ENOMEM; tsk = m->private; - down(&cpuset_sem); + cpuset_down(&cpuset_sem); task_lock(tsk); cs = tsk->cpuset; task_unlock(tsk); @@ -1746,7 +1762,7 @@ static int proc_cpuset_show(struct seq_file *m, void *v) seq_puts(m, buf); seq_putc(m, '\n'); out: - up(&cpuset_sem); + cpuset_up(&cpuset_sem); kfree(buf); return retval; }