* A cpuset can only be deleted if both its 'count' of using tasks
* is zero, and its list of 'children' cpusets is empty. Since all
* tasks in the system use _some_ cpuset, and since there is always at
- * least one task in the system (init, pid == 1), therefore, top_cpuset
+ * least one task in the system (init), therefore, top_cpuset
* always has either children cpusets and/or using tasks. So we don't
* need a special hack to ensure that top_cpuset cannot be deleted.
*
inode->i_mode = mode;
inode->i_uid = current->fsuid;
inode->i_gid = current->fsgid;
- inode->i_blksize = PAGE_CACHE_SIZE;
inode->i_blocks = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
inode->i_mapping->backing_dev_info = &cpuset_backing_dev_info;
inode->i_op = &simple_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
/* directories start off with i_nlink == 2 (for "." entry) */
- inode->i_nlink++;
+ inc_nlink(inode);
} else {
return -ENOMEM;
}
*
*
* When reading/writing to a file:
- * - the cpuset to use in file->f_dentry->d_parent->d_fsdata
- * - the 'cftype' of the file is file->f_dentry->d_fsdata
+ * - the cpuset to use in file->f_path.dentry->d_parent->d_fsdata
+ * - the 'cftype' of the file is file->f_path.dentry->d_fsdata
*/
struct cftype {
}
/* Remaining checks don't apply to root cpuset */
- if ((par = cur->parent) == NULL)
+ if (cur == &top_cpuset)
return 0;
+ par = cur->parent;
+
/* We must be a subset of our parent cpuset */
if (!is_cpuset_subset(trial, par))
return -EACCES;
int fudge;
int retval;
+ /* top_cpuset.mems_allowed tracks node_online_map; it's read-only */
+ if (cs == &top_cpuset)
+ return -EACCES;
+
trialcs = *cs;
retval = nodelist_parse(buf, trialcs.mems_allowed);
if (retval < 0)
cpu_exclusive_changed =
(is_cpu_exclusive(cs) != is_cpu_exclusive(&trialcs));
mutex_lock(&callback_mutex);
- if (turning_on)
- set_bit(bit, &cs->flags);
- else
- clear_bit(bit, &cs->flags);
+ cs->flags = trialcs.flags;
mutex_unlock(&callback_mutex);
if (cpu_exclusive_changed)
task_lock(tsk);
oldcs = tsk->cpuset;
- if (!oldcs) {
+ /*
+ * After getting 'oldcs' cpuset ptr, be sure still not exiting.
+ * If 'oldcs' might be the top_cpuset due to the_top_cpuset_hack
+ * then fail this attach_task(), to avoid breaking top_cpuset.count.
+ */
+ if (tsk->flags & PF_EXITING) {
task_unlock(tsk);
mutex_unlock(&callback_mutex);
put_task_struct(tsk);
FILE_TASKLIST,
} cpuset_filetype_t;
-static ssize_t cpuset_common_file_write(struct file *file, const char __user *userbuf,
+static ssize_t cpuset_common_file_write(struct file *file,
+ const char __user *userbuf,
size_t nbytes, loff_t *unused_ppos)
{
- struct cpuset *cs = __d_cs(file->f_dentry->d_parent);
- struct cftype *cft = __d_cft(file->f_dentry);
+ struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent);
+ struct cftype *cft = __d_cft(file->f_path.dentry);
cpuset_filetype_t type = cft->private;
char *buffer;
char *pathbuf = NULL;
int retval = 0;
/* Crude upper limit on largest legitimate cpulist user might write. */
- if (nbytes > 100 + 6 * NR_CPUS)
+ if (nbytes > 100 + 6 * max(NR_CPUS, MAX_NUMNODES))
return -E2BIG;
/* +1 for nul-terminator */
size_t nbytes, loff_t *ppos)
{
ssize_t retval = 0;
- struct cftype *cft = __d_cft(file->f_dentry);
+ struct cftype *cft = __d_cft(file->f_path.dentry);
if (!cft)
return -ENODEV;
static ssize_t cpuset_common_file_read(struct file *file, char __user *buf,
size_t nbytes, loff_t *ppos)
{
- struct cftype *cft = __d_cft(file->f_dentry);
- struct cpuset *cs = __d_cs(file->f_dentry->d_parent);
+ struct cftype *cft = __d_cft(file->f_path.dentry);
+ struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent);
cpuset_filetype_t type = cft->private;
char *page;
ssize_t retval = 0;
loff_t *ppos)
{
ssize_t retval = 0;
- struct cftype *cft = __d_cft(file->f_dentry);
+ struct cftype *cft = __d_cft(file->f_path.dentry);
if (!cft)
return -ENODEV;
if (err)
return err;
- cft = __d_cft(file->f_dentry);
+ cft = __d_cft(file->f_path.dentry);
if (!cft)
return -ENODEV;
if (cft->open)
static int cpuset_file_release(struct inode *inode, struct file *file)
{
- struct cftype *cft = __d_cft(file->f_dentry);
+ struct cftype *cft = __d_cft(file->f_path.dentry);
if (cft->release)
return cft->release(inode, file);
return 0;
return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
}
-static struct file_operations cpuset_file_operations = {
+static const struct file_operations cpuset_file_operations = {
.read = cpuset_file_read,
.write = cpuset_file_write,
.llseek = generic_file_llseek,
inode->i_fop = &simple_dir_operations;
/* start off with i_nlink == 2 (for "." entry) */
- inode->i_nlink++;
+ inc_nlink(inode);
} else if (S_ISREG(mode)) {
inode->i_size = 0;
inode->i_fop = &cpuset_file_operations;
error = cpuset_create_file(dentry, S_IFDIR | mode);
if (!error) {
dentry->d_fsdata = cs;
- parent->d_inode->i_nlink++;
+ inc_nlink(parent->d_inode);
cs->dentry = dentry;
}
dput(dentry);
*/
static int cpuset_tasks_open(struct inode *unused, struct file *file)
{
- struct cpuset *cs = __d_cs(file->f_dentry->d_parent);
+ struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent);
struct ctr_struct *ctr;
pid_t *pidarray;
int npids;
}
root = cpuset_mount->mnt_sb->s_root;
root->d_fsdata = &top_cpuset;
- root->d_inode->i_nlink++;
+ inc_nlink(root->d_inode);
top_cpuset.dentry = root;
root->d_inode->i_op = &cpuset_dir_inode_operations;
number_of_cpusets = 1;
}
/*
- * The top_cpuset tracks what CPUs and Memory Nodes are online,
- * period. This is necessary in order to make cpusets transparent
- * (of no affect) on systems that are actively using CPU hotplug
- * but making no active use of cpusets.
- *
- * This handles CPU hotplug (cpuhp) events. If someday Memory
- * Nodes can be hotplugged (dynamically changing node_online_map)
- * then we should handle that too, perhaps in a similar way.
+ * If common_cpu_mem_hotplug_unplug(), below, unplugs any CPUs
+ * or memory nodes, we need to walk over the cpuset hierarchy,
+ * removing that CPU or node from all cpusets. If this removes the
+ * last CPU or node from a cpuset, then the guarantee_online_cpus()
+ * or guarantee_online_mems() code will use that emptied cpusets
+ * parent online CPUs or nodes. Cpusets that were already empty of
+ * CPUs or nodes are left empty.
+ *
+ * This routine is intentionally inefficient in a couple of regards.
+ * It will check all cpusets in a subtree even if the top cpuset of
+ * the subtree has no offline CPUs or nodes. It checks both CPUs and
+ * nodes, even though the caller could have been coded to know that
+ * only one of CPUs or nodes needed to be checked on a given call.
+ * This was done to minimize text size rather than cpu cycles.
+ *
+ * Call with both manage_mutex and callback_mutex held.
+ *
+ * Recursive, on depth of cpuset subtree.
*/
-#ifdef CONFIG_HOTPLUG_CPU
-static int cpuset_handle_cpuhp(struct notifier_block *nb,
- unsigned long phase, void *cpu)
+static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur)
+{
+ struct cpuset *c;
+
+ /* Each of our child cpusets mems must be online */
+ list_for_each_entry(c, &cur->children, sibling) {
+ guarantee_online_cpus_mems_in_subtree(c);
+ if (!cpus_empty(c->cpus_allowed))
+ guarantee_online_cpus(c, &c->cpus_allowed);
+ if (!nodes_empty(c->mems_allowed))
+ guarantee_online_mems(c, &c->mems_allowed);
+ }
+}
+
+/*
+ * The cpus_allowed and mems_allowed nodemasks in the top_cpuset track
+ * cpu_online_map and node_online_map. Force the top cpuset to track
+ * whats online after any CPU or memory node hotplug or unplug event.
+ *
+ * To ensure that we don't remove a CPU or node from the top cpuset
+ * that is currently in use by a child cpuset (which would violate
+ * the rule that cpusets must be subsets of their parent), we first
+ * call the recursive routine guarantee_online_cpus_mems_in_subtree().
+ *
+ * Since there are two callers of this routine, one for CPU hotplug
+ * events and one for memory node hotplug events, we could have coded
+ * two separate routines here. We code it as a single common routine
+ * in order to minimize text size.
+ */
+
+static void common_cpu_mem_hotplug_unplug(void)
{
mutex_lock(&manage_mutex);
mutex_lock(&callback_mutex);
+ guarantee_online_cpus_mems_in_subtree(&top_cpuset);
top_cpuset.cpus_allowed = cpu_online_map;
+ top_cpuset.mems_allowed = node_online_map;
mutex_unlock(&callback_mutex);
mutex_unlock(&manage_mutex);
+}
+/*
+ * The top_cpuset tracks what CPUs and Memory Nodes are online,
+ * period. This is necessary in order to make cpusets transparent
+ * (of no affect) on systems that are actively using CPU hotplug
+ * but making no active use of cpusets.
+ *
+ * This routine ensures that top_cpuset.cpus_allowed tracks
+ * cpu_online_map on each CPU hotplug (cpuhp) event.
+ */
+
+static int cpuset_handle_cpuhp(struct notifier_block *nb,
+ unsigned long phase, void *cpu)
+{
+ common_cpu_mem_hotplug_unplug();
return 0;
}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+/*
+ * Keep top_cpuset.mems_allowed tracking node_online_map.
+ * Call this routine anytime after you change node_online_map.
+ * See also the previous routine cpuset_handle_cpuhp().
+ */
+
+void cpuset_track_online_nodes(void)
+{
+ common_cpu_mem_hotplug_unplug();
+}
#endif
/**
int i;
for (i = 0; zl->zones[i]; i++) {
- int nid = zl->zones[i]->zone_pgdat->node_id;
+ int nid = zone_to_nid(zl->zones[i]);
if (node_isset(nid, current->mems_allowed))
return 1;
const struct cpuset *cs; /* current cpuset ancestors */
int allowed; /* is allocation in zone z allowed? */
- if (in_interrupt())
+ if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
return 1;
- node = z->zone_pgdat->node_id;
+ node = zone_to_nid(z);
might_sleep_if(!(gfp_mask & __GFP_HARDWALL));
if (node_isset(node, current->mems_allowed))
return 1;
return single_open(file, proc_cpuset_show, pid);
}
-struct file_operations proc_cpuset_operations = {
+const struct file_operations proc_cpuset_operations = {
.open = cpuset_open,
.read = seq_read,
.llseek = seq_lseek,