#include <linux/rmap.h>
#include <linux/security.h>
#include <linux/syscalls.h>
+#include <linux/ctype.h>
#include <asm/tlbflush.h>
#include <asm/uaccess.h>
policied. */
enum zone_type policy_zone = 0;
+/*
+ * run-time system-wide default policy => local allocation
+ */
struct mempolicy default_policy = {
.refcnt = ATOMIC_INIT(1), /* never free it */
- .mode = MPOL_DEFAULT,
+ .mode = MPOL_PREFERRED,
+ .flags = MPOL_F_LOCAL,
};
static const struct mempolicy_operations {
static int mpol_new_preferred(struct mempolicy *pol, const nodemask_t *nodes)
{
if (!nodes)
- pol->v.preferred_node = -1; /* local allocation */
+ pol->flags |= MPOL_F_LOCAL; /* local allocation */
else if (nodes_empty(*nodes))
return -EINVAL; /* no allowed nodes */
else
if (mode == MPOL_DEFAULT) {
if (nodes && !nodes_empty(*nodes))
return ERR_PTR(-EINVAL);
- return NULL;
+ return NULL; /* simply delete any existing policy */
}
VM_BUG_ON(!nodes);
{
if (!atomic_dec_and_test(&p->refcnt))
return;
- p->mode = MPOL_DEFAULT;
kmem_cache_free(policy_cache, p);
}
if (pol->flags & MPOL_F_STATIC_NODES) {
int node = first_node(pol->w.user_nodemask);
- if (node_isset(node, *nodes))
+ if (node_isset(node, *nodes)) {
pol->v.preferred_node = node;
- else
- pol->v.preferred_node = -1;
+ pol->flags &= ~MPOL_F_LOCAL;
+ } else
+ pol->flags |= MPOL_F_LOCAL;
} else if (pol->flags & MPOL_F_RELATIVE_NODES) {
mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes);
pol->v.preferred_node = first_node(tmp);
- } else if (pol->v.preferred_node != -1) {
+ } else if (!(pol->flags & MPOL_F_LOCAL)) {
pol->v.preferred_node = node_remap(pol->v.preferred_node,
pol->w.cpuset_mems_allowed,
*nodes);
return 0;
}
-/* Fill a zone bitmap for a policy */
-static void get_zonemask(struct mempolicy *p, nodemask_t *nodes)
+/*
+ * Return nodemask for policy for get_mempolicy() query
+ */
+static void get_policy_nodemask(struct mempolicy *p, nodemask_t *nodes)
{
nodes_clear(*nodes);
+ if (p == &default_policy)
+ return;
+
switch (p->mode) {
- case MPOL_DEFAULT:
- break;
case MPOL_BIND:
/* Fall through */
case MPOL_INTERLEAVE:
*nodes = p->v.nodes;
break;
case MPOL_PREFERRED:
- /* or use current node instead of memory_map? */
- if (p->v.preferred_node < 0)
- *nodes = node_states[N_HIGH_MEMORY];
- else
+ if (!(p->flags & MPOL_F_LOCAL))
node_set(p->v.preferred_node, *nodes);
+ /* else return empty node mask for local allocation */
break;
default:
BUG();
}
if (flags & MPOL_F_ADDR) {
+ /*
+ * Do NOT fall back to task policy if the
+ * vma/shared policy at addr is NULL. We
+ * want to return MPOL_DEFAULT in this case.
+ */
down_read(&mm->mmap_sem);
vma = find_vma_intersection(mm, addr, addr+1);
if (!vma) {
return -EINVAL;
if (!pol)
- pol = &default_policy;
+ pol = &default_policy; /* indicates default behavior */
if (flags & MPOL_F_NODE) {
if (flags & MPOL_F_ADDR) {
err = -EINVAL;
goto out;
}
- } else
- *policy = pol->mode | pol->flags;
+ } else {
+ *policy = pol == &default_policy ? MPOL_DEFAULT :
+ pol->mode;
+ *policy |= pol->flags;
+ }
if (vma) {
up_read(¤t->mm->mmap_sem);
err = 0;
if (nmask)
- get_zonemask(pol, nmask);
+ get_policy_nodemask(pol, nmask);
out:
mpol_cond_put(pol);
int err = 0;
nodemask_t tmp;
- down_read(&mm->mmap_sem);
+ down_read(&mm->mmap_sem);
err = migrate_vmas(mm, from_nodes, to_nodes, flags);
if (err)
addr);
if (vpol)
pol = vpol;
- } else if (vma->vm_policy &&
- vma->vm_policy->mode != MPOL_DEFAULT)
+ } else if (vma->vm_policy)
pol = vma->vm_policy;
}
if (!pol)
/* Return a zonelist indicated by gfp for node representing a mempolicy */
static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy)
{
- int nd;
+ int nd = numa_node_id();
switch (policy->mode) {
case MPOL_PREFERRED:
- nd = policy->v.preferred_node;
- if (nd < 0)
- nd = numa_node_id();
+ if (!(policy->flags & MPOL_F_LOCAL))
+ nd = policy->v.preferred_node;
break;
case MPOL_BIND:
/*
* current node is part of the mask, we use the zonelist for
* the first node in the mask instead.
*/
- nd = numa_node_id();
if (unlikely(gfp & __GFP_THISNODE) &&
unlikely(!node_isset(nd, policy->v.nodes)))
nd = first_node(policy->v.nodes);
break;
case MPOL_INTERLEAVE: /* should not happen */
- case MPOL_DEFAULT:
- nd = numa_node_id();
break;
default:
- nd = 0;
BUG();
}
return node_zonelist(nd, gfp);
*/
unsigned slab_node(struct mempolicy *policy)
{
- unsigned short pol = policy ? policy->mode : MPOL_DEFAULT;
+ if (!policy || policy->flags & MPOL_F_LOCAL)
+ return numa_node_id();
+
+ switch (policy->mode) {
+ case MPOL_PREFERRED:
+ /*
+ * handled MPOL_F_LOCAL above
+ */
+ return policy->v.preferred_node;
- switch (pol) {
case MPOL_INTERLEAVE:
return interleave_nodes(policy);
return zone->node;
}
- case MPOL_PREFERRED:
- if (policy->v.preferred_node >= 0)
- return policy->v.preferred_node;
- /* Fall through */
-
default:
- return numa_node_id();
+ BUG();
}
}
if (a->mode != MPOL_DEFAULT && !mpol_match_intent(a, b))
return 0;
switch (a->mode) {
- case MPOL_DEFAULT:
- return 1;
case MPOL_BIND:
/* Fall through */
case MPOL_INTERLEAVE:
return nodes_equal(a->v.nodes, b->v.nodes);
case MPOL_PREFERRED:
- return a->v.preferred_node == b->v.preferred_node;
+ return a->v.preferred_node == b->v.preferred_node &&
+ a->flags == b->flags;
default:
BUG();
return 0;
return 0;
}
-void mpol_shared_policy_init(struct shared_policy *info, unsigned short policy,
- unsigned short flags, nodemask_t *policy_nodes)
-{
- info->root = RB_ROOT;
- spin_lock_init(&info->lock);
-
- if (policy != MPOL_DEFAULT) {
- struct mempolicy *newpol;
-
- /* Falls back to MPOL_DEFAULT on any error */
- newpol = mpol_new(policy, flags, policy_nodes);
- if (!IS_ERR(newpol)) {
- /* Create pseudo-vma that contains just the policy */
- struct vm_area_struct pvma;
-
- memset(&pvma, 0, sizeof(struct vm_area_struct));
- /* Policy covers entire file */
- pvma.vm_end = TASK_SIZE;
- mpol_set_shared_policy(info, &pvma, newpol);
- mpol_put(newpol);
- }
+/**
+ * mpol_shared_policy_init - initialize shared policy for inode
+ * @sp: pointer to inode shared policy
+ * @mpol: struct mempolicy to install
+ *
+ * Install non-NULL @mpol in inode's shared policy rb-tree.
+ * On entry, the current task has a reference on a non-NULL @mpol.
+ * This must be released on exit.
+ */
+void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
+{
+ sp->root = RB_ROOT; /* empty tree == default mempolicy */
+ spin_lock_init(&sp->lock);
+
+ if (mpol) {
+ struct vm_area_struct pvma;
+ struct mempolicy *new;
+
+ /* contextualize the tmpfs mount point mempolicy */
+ new = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask);
+ mpol_put(mpol); /* drop our ref on sb mpol */
+ if (IS_ERR(new))
+ return; /* no valid nodemask intersection */
+
+ /* Create pseudo-vma that contains just the policy */
+ memset(&pvma, 0, sizeof(struct vm_area_struct));
+ pvma.vm_end = TASK_SIZE; /* policy covers entire file */
+ mpol_set_shared_policy(sp, &pvma, new); /* adds ref */
+ mpol_put(new); /* drop initial ref */
}
}
}
/*
- * Display pages allocated per node and memory policy via /proc.
+ * Parse and format mempolicy from/to strings
*/
-static const char * const policy_types[] =
- { "default", "prefer", "bind", "interleave" };
/*
+ * "local" is pseudo-policy: MPOL_PREFERRED with MPOL_F_LOCAL flag
+ * Used only for mpol_parse_str() and mpol_to_str()
+ */
+#define MPOL_LOCAL (MPOL_INTERLEAVE + 1)
+static const char * const policy_types[] =
+ { "default", "prefer", "bind", "interleave", "local" };
+
+
+#ifdef CONFIG_TMPFS
+/**
+ * mpol_parse_str - parse string to mempolicy
+ * @str: string containing mempolicy to parse
+ * @mpol: pointer to struct mempolicy pointer, returned on success.
+ * @no_context: flag whether to "contextualize" the mempolicy
+ *
+ * Format of input:
+ * <mode>[=<flags>][:<nodelist>]
+ *
+ * if @no_context is true, save the input nodemask in w.user_nodemask in
+ * the returned mempolicy. This will be used to "clone" the mempolicy in
+ * a specific context [cpuset] at a later time. Used to parse tmpfs mpol
+ * mount option. Note that if 'static' or 'relative' mode flags were
+ * specified, the input nodemask will already have been saved. Saving
+ * it again is redundant, but safe.
+ *
+ * On success, returns 0, else 1
+ */
+int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context)
+{
+ struct mempolicy *new = NULL;
+ unsigned short uninitialized_var(mode);
+ unsigned short uninitialized_var(mode_flags);
+ nodemask_t nodes;
+ char *nodelist = strchr(str, ':');
+ char *flags = strchr(str, '=');
+ int i;
+ int err = 1;
+
+ if (nodelist) {
+ /* NUL-terminate mode or flags string */
+ *nodelist++ = '\0';
+ if (nodelist_parse(nodelist, nodes))
+ goto out;
+ if (!nodes_subset(nodes, node_states[N_HIGH_MEMORY]))
+ goto out;
+ } else
+ nodes_clear(nodes);
+
+ if (flags)
+ *flags++ = '\0'; /* terminate mode string */
+
+ for (i = 0; i <= MPOL_LOCAL; i++) {
+ if (!strcmp(str, policy_types[i])) {
+ mode = i;
+ break;
+ }
+ }
+ if (i > MPOL_LOCAL)
+ goto out;
+
+ switch (mode) {
+ case MPOL_PREFERRED:
+ /*
+ * Insist on a nodelist of one node only
+ */
+ if (nodelist) {
+ char *rest = nodelist;
+ while (isdigit(*rest))
+ rest++;
+ if (!*rest)
+ err = 0;
+ }
+ break;
+ case MPOL_INTERLEAVE:
+ /*
+ * Default to online nodes with memory if no nodelist
+ */
+ if (!nodelist)
+ nodes = node_states[N_HIGH_MEMORY];
+ err = 0;
+ break;
+ case MPOL_LOCAL:
+ /*
+ * Don't allow a nodelist; mpol_new() checks flags
+ */
+ if (nodelist)
+ goto out;
+ mode = MPOL_PREFERRED;
+ break;
+
+ /*
+ * case MPOL_BIND: mpol_new() enforces non-empty nodemask.
+ * case MPOL_DEFAULT: mpol_new() enforces empty nodemask, ignores flags.
+ */
+ }
+
+ mode_flags = 0;
+ if (flags) {
+ /*
+ * Currently, we only support two mutually exclusive
+ * mode flags.
+ */
+ if (!strcmp(flags, "static"))
+ mode_flags |= MPOL_F_STATIC_NODES;
+ else if (!strcmp(flags, "relative"))
+ mode_flags |= MPOL_F_RELATIVE_NODES;
+ else
+ err = 1;
+ }
+
+ new = mpol_new(mode, mode_flags, &nodes);
+ if (IS_ERR(new))
+ err = 1;
+ else if (no_context)
+ new->w.user_nodemask = nodes; /* save for contextualization */
+
+out:
+ /* Restore string for error message */
+ if (nodelist)
+ *--nodelist = ':';
+ if (flags)
+ *--flags = '=';
+ if (!err)
+ *mpol = new;
+ return err;
+}
+#endif /* CONFIG_TMPFS */
+
+/**
+ * mpol_to_str - format a mempolicy structure for printing
+ * @buffer: to contain formatted mempolicy string
+ * @maxlen: length of @buffer
+ * @pol: pointer to mempolicy to be formatted
+ * @no_context: "context free" mempolicy - use nodemask in w.user_nodemask
+ *
* Convert a mempolicy into a string.
* Returns the number of characters in buffer (if positive)
* or an error (negative)
*/
-static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
+int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context)
{
char *p = buffer;
int l;
nodemask_t nodes;
- unsigned short mode = pol ? pol->mode : MPOL_DEFAULT;
+ unsigned short mode;
unsigned short flags = pol ? pol->flags : 0;
+ /*
+ * Sanity check: room for longest mode, flag and some nodes
+ */
+ VM_BUG_ON(maxlen < strlen("interleave") + strlen("relative") + 16);
+
+ if (!pol || pol == &default_policy)
+ mode = MPOL_DEFAULT;
+ else
+ mode = pol->mode;
+
switch (mode) {
case MPOL_DEFAULT:
nodes_clear(nodes);
case MPOL_PREFERRED:
nodes_clear(nodes);
- node_set(pol->v.preferred_node, nodes);
+ if (flags & MPOL_F_LOCAL)
+ mode = MPOL_LOCAL; /* pseudo-policy */
+ else
+ node_set(pol->v.preferred_node, nodes);
break;
case MPOL_BIND:
/* Fall through */
case MPOL_INTERLEAVE:
- nodes = pol->v.nodes;
+ if (no_context)
+ nodes = pol->w.user_nodemask;
+ else
+ nodes = pol->v.nodes;
break;
default:
BUG();
- return -EFAULT;
}
l = strlen(policy_types[mode]);
- if (buffer + maxlen < p + l + 1)
- return -ENOSPC;
+ if (buffer + maxlen < p + l + 1)
+ return -ENOSPC;
strcpy(p, policy_types[mode]);
p += l;
- if (flags) {
- int need_bar = 0;
-
+ if (flags & MPOL_MODE_FLAGS) {
if (buffer + maxlen < p + 2)
return -ENOSPC;
*p++ = '=';
+ /*
+ * Currently, the only defined flags are mutually exclusive
+ */
if (flags & MPOL_F_STATIC_NODES)
- p += sprintf(p, "%sstatic", need_bar++ ? "|" : "");
- if (flags & MPOL_F_RELATIVE_NODES)
- p += sprintf(p, "%srelative", need_bar++ ? "|" : "");
+ p += snprintf(p, buffer + maxlen - p, "static");
+ else if (flags & MPOL_F_RELATIVE_NODES)
+ p += snprintf(p, buffer + maxlen - p, "relative");
}
if (!nodes_empty(nodes)) {
if (buffer + maxlen < p + 2)
return -ENOSPC;
- *p++ = '=';
+ *p++ = ':';
p += nodelist_scnprintf(p, buffer + maxlen - p, nodes);
}
return p - buffer;
}
#endif
+/*
+ * Display pages allocated per node and memory policy via /proc.
+ */
int show_numa_map(struct seq_file *m, void *v)
{
struct proc_maps_private *priv = m->private;
return 0;
pol = get_vma_policy(priv->task, vma, vma->vm_start);
- mpol_to_str(buffer, sizeof(buffer), pol);
+ mpol_to_str(buffer, sizeof(buffer), pol, 0);
mpol_cond_put(pol);
seq_printf(m, "%08lx %s", vma->vm_start, buffer);