X-Git-Url: http://pilppa.org/gitweb/gitweb.cgi?a=blobdiff_plain;f=mm%2Fmempolicy.c;h=83c69f8a64c29dd0878050b3912e5f0c2b1af3ac;hb=dde0013782dbd09e1cc68ca03860f3a62b03cb34;hp=bb54b88c3d5aaab2752569f046129bea8082f4ec;hpb=b2315372eac9cd9f622c32a93e323cf6f0f03462;p=linux-2.6-omap-h63xx.git diff --git a/mm/mempolicy.c b/mm/mempolicy.c index bb54b88c3d5..83c69f8a64c 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -72,23 +72,23 @@ #include #include #include -#include #include #include #include #include #include #include +#include #include #include #include -#include #include #include #include #include #include #include +#include #include #include @@ -110,6 +110,9 @@ struct mempolicy default_policy = { .policy = MPOL_DEFAULT, }; +static void mpol_rebind_policy(struct mempolicy *pol, + const nodemask_t *newmask); + /* Do sanity checking on a policy */ static int mpol_check_policy(int mode, nodemask_t *nodes) { @@ -128,7 +131,7 @@ static int mpol_check_policy(int mode, nodemask_t *nodes) return -EINVAL; break; } - return nodes_subset(*nodes, node_online_map) ? 0 : -EINVAL; + return nodes_subset(*nodes, node_states[N_HIGH_MEMORY]) ? 0 : -EINVAL; } /* Generate a custom zonelist for the BIND policy. */ @@ -185,7 +188,9 @@ static struct mempolicy *mpol_new(int mode, nodemask_t *nodes) switch (mode) { case MPOL_INTERLEAVE: policy->v.nodes = *nodes; - if (nodes_weight(*nodes) == 0) { + nodes_and(policy->v.nodes, policy->v.nodes, + node_states[N_HIGH_MEMORY]); + if (nodes_weight(policy->v.nodes) == 0) { kmem_cache_free(policy_cache, policy); return ERR_PTR(-EINVAL); } @@ -459,7 +464,7 @@ static void mpol_set_task_struct_flag(void) } /* Set the process memory policy */ -long do_set_mempolicy(int mode, nodemask_t *nodes) +static long do_set_mempolicy(int mode, nodemask_t *nodes) { struct mempolicy *new; @@ -494,9 +499,9 @@ static void get_zonemask(struct mempolicy *p, nodemask_t *nodes) *nodes = p->v.nodes; break; case MPOL_PREFERRED: - /* or use current node instead of online map? */ + /* or use current node instead of memory_map? */ if (p->v.preferred_node < 0) - *nodes = node_online_map; + *nodes = node_states[N_HIGH_MEMORY]; else node_set(p->v.preferred_node, *nodes); break; @@ -519,8 +524,8 @@ static int lookup_node(struct mm_struct *mm, unsigned long addr) } /* Retrieve NUMA policy */ -long do_get_mempolicy(int *policy, nodemask_t *nmask, - unsigned long addr, unsigned long flags) +static long do_get_mempolicy(int *policy, nodemask_t *nmask, + unsigned long addr, unsigned long flags) { int err; struct mm_struct *mm = current->mm; @@ -528,8 +533,18 @@ long do_get_mempolicy(int *policy, nodemask_t *nmask, struct mempolicy *pol = current->mempolicy; cpuset_update_task_memory_state(); - if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR)) + if (flags & + ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR|MPOL_F_MEMS_ALLOWED)) return -EINVAL; + + if (flags & MPOL_F_MEMS_ALLOWED) { + if (flags & (MPOL_F_NODE|MPOL_F_ADDR)) + return -EINVAL; + *policy = 0; /* just so it's initialized */ + *nmask = cpuset_current_mems_allowed; + return 0; + } + if (flags & MPOL_F_ADDR) { down_read(&mm->mmap_sem); vma = find_vma_intersection(mm, addr, addr+1); @@ -601,7 +616,8 @@ static struct page *new_node_page(struct page *page, unsigned long node, int **x * Migrate pages from one node to a target node. * Returns error or the number of pages not migrated. */ -int migrate_to_node(struct mm_struct *mm, int source, int dest, int flags) +static int migrate_to_node(struct mm_struct *mm, int source, int dest, + int flags) { nodemask_t nmask; LIST_HEAD(pagelist); @@ -706,12 +722,29 @@ out: } +/* + * Allocate a new page for page migration based on vma policy. + * Start assuming that page is mapped by vma pointed to by @private. + * Search forward from there, if not. N.B., this assumes that the + * list of pages handed to migrate_pages()--which is how we get here-- + * is in virtual address order. + */ static struct page *new_vma_page(struct page *page, unsigned long private, int **x) { struct vm_area_struct *vma = (struct vm_area_struct *)private; + unsigned long uninitialized_var(address); - return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, - page_address_in_vma(page, vma)); + while (vma) { + address = page_address_in_vma(page, vma); + if (address != -EFAULT) + break; + vma = vma->vm_next; + } + + /* + * if !vma, alloc_page_vma() will use task or system default policy + */ + return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); } #else @@ -732,8 +765,9 @@ static struct page *new_vma_page(struct page *page, unsigned long private, int * } #endif -long do_mbind(unsigned long start, unsigned long len, - unsigned long mode, nodemask_t *nmask, unsigned long flags) +static long do_mbind(unsigned long start, unsigned long len, + unsigned long mode, nodemask_t *nmask, + unsigned long flags) { struct vm_area_struct *vma; struct mm_struct *mm = current->mm; @@ -924,7 +958,7 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, /* Find the mm_struct */ read_lock(&tasklist_lock); - task = pid ? find_task_by_pid(pid) : current; + task = pid ? find_task_by_vpid(pid) : current; if (!task) { read_unlock(&tasklist_lock); return -ESRCH; @@ -955,7 +989,7 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, goto out; } - if (!nodes_subset(new, node_online_map)) { + if (!nodes_subset(new, node_states[N_HIGH_MEMORY])) { err = -EINVAL; goto out; } @@ -978,7 +1012,8 @@ asmlinkage long sys_get_mempolicy(int __user *policy, unsigned long maxnode, unsigned long addr, unsigned long flags) { - int err, pval; + int err; + int uninitialized_var(pval); nodemask_t nodes; if (nmask != NULL && maxnode < MAX_NUMNODES) @@ -1077,21 +1112,37 @@ asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len, #endif -/* Return effective policy for a VMA */ +/* + * get_vma_policy(@task, @vma, @addr) + * @task - task for fallback if vma policy == default + * @vma - virtual memory area whose policy is sought + * @addr - address in @vma for shared policy lookup + * + * Returns effective policy for a VMA at specified address. + * Falls back to @task or system default policy, as necessary. + * Returned policy has extra reference count if shared, vma, + * or some other task's policy [show_numa_maps() can pass + * @task != current]. It is the caller's responsibility to + * free the reference in these cases. + */ static struct mempolicy * get_vma_policy(struct task_struct *task, struct vm_area_struct *vma, unsigned long addr) { struct mempolicy *pol = task->mempolicy; + int shared_pol = 0; if (vma) { - if (vma->vm_ops && vma->vm_ops->get_policy) + if (vma->vm_ops && vma->vm_ops->get_policy) { pol = vma->vm_ops->get_policy(vma, addr); - else if (vma->vm_policy && + shared_pol = 1; /* if pol non-NULL, add ref below */ + } else if (vma->vm_policy && vma->vm_policy->policy != MPOL_DEFAULT) pol = vma->vm_policy; } if (!pol) pol = &default_policy; + else if (!shared_pol && pol != current->mempolicy) + mpol_get(pol); /* vma or other task's policy */ return pol; } @@ -1207,19 +1258,45 @@ static inline unsigned interleave_nid(struct mempolicy *pol, } #ifdef CONFIG_HUGETLBFS -/* Return a zonelist suitable for a huge page allocation. */ +/* + * huge_zonelist(@vma, @addr, @gfp_flags, @mpol) + * @vma = virtual memory area whose policy is sought + * @addr = address in @vma for shared policy lookup and interleave policy + * @gfp_flags = for requested zone + * @mpol = pointer to mempolicy pointer for reference counted 'BIND policy + * + * Returns a zonelist suitable for a huge page allocation. + * If the effective policy is 'BIND, returns pointer to policy's zonelist. + * If it is also a policy for which get_vma_policy() returns an extra + * reference, we must hold that reference until after allocation. + * In that case, return policy via @mpol so hugetlb allocation can drop + * the reference. For non-'BIND referenced policies, we can/do drop the + * reference here, so the caller doesn't need to know about the special case + * for default and current task policy. + */ struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr, - gfp_t gfp_flags) + gfp_t gfp_flags, struct mempolicy **mpol) { struct mempolicy *pol = get_vma_policy(current, vma, addr); + struct zonelist *zl; + *mpol = NULL; /* probably no unref needed */ if (pol->policy == MPOL_INTERLEAVE) { unsigned nid; nid = interleave_nid(pol, vma, addr, HPAGE_SHIFT); + __mpol_free(pol); /* finished with pol */ return NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_flags); } - return zonelist_policy(GFP_HIGHUSER, pol); + + zl = zonelist_policy(GFP_HIGHUSER, pol); + if (unlikely(pol != &default_policy && pol != current->mempolicy)) { + if (pol->policy != MPOL_BIND) + __mpol_free(pol); /* finished with pol */ + else + *mpol = pol; /* unref needed after allocation */ + } + return zl; } #endif @@ -1264,6 +1341,7 @@ struct page * alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) { struct mempolicy *pol = get_vma_policy(current, vma, addr); + struct zonelist *zl; cpuset_update_task_memory_state(); @@ -1273,7 +1351,19 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) nid = interleave_nid(pol, vma, addr, PAGE_SHIFT); return alloc_page_interleave(gfp, 0, nid); } - return __alloc_pages(gfp, 0, zonelist_policy(gfp, pol)); + zl = zonelist_policy(gfp, pol); + if (pol != &default_policy && pol != current->mempolicy) { + /* + * slow path: ref counted policy -- shared or vma + */ + struct page *page = __alloc_pages(gfp, 0, zl); + __mpol_free(pol); + return page; + } + /* + * fast path: default or task policy + */ + return __alloc_pages(gfp, 0, zl); } /** @@ -1316,7 +1406,6 @@ EXPORT_SYMBOL(alloc_pages_current); * keeps mempolicies cpuset relative after its cpuset moves. See * further kernel/cpuset.c update_nodemask(). */ -void *cpuset_being_rebound; /* Slow path of a mempolicy copy */ struct mempolicy *__mpol_copy(struct mempolicy *old) @@ -1472,8 +1561,8 @@ static void sp_delete(struct shared_policy *sp, struct sp_node *n) kmem_cache_free(sn_cache, n); } -struct sp_node * -sp_alloc(unsigned long start, unsigned long end, struct mempolicy *pol) +static struct sp_node *sp_alloc(unsigned long start, unsigned long end, + struct mempolicy *pol) { struct sp_node *n = kmem_cache_alloc(sn_cache, GFP_KERNEL); @@ -1622,7 +1711,7 @@ void __init numa_policy_init(void) * fall back to the largest node if they're all smaller. */ nodes_clear(interleave_nodes); - for_each_online_node(nid) { + for_each_node_state(nid, N_HIGH_MEMORY) { unsigned long total_pages = node_present_pages(nid); /* Preserve the largest node */ @@ -1651,7 +1740,8 @@ void numa_default_policy(void) } /* Migrate a policy to a different set of nodes */ -void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask) +static void mpol_rebind_policy(struct mempolicy *pol, + const nodemask_t *newmask) { nodemask_t *mpolmask; nodemask_t tmp; @@ -1872,6 +1962,7 @@ int show_numa_map(struct seq_file *m, void *v) struct numa_maps *md; struct file *file = vma->vm_file; struct mm_struct *mm = vma->vm_mm; + struct mempolicy *pol; int n; char buffer[50]; @@ -1882,8 +1973,13 @@ int show_numa_map(struct seq_file *m, void *v) if (!md) return 0; - mpol_to_str(buffer, sizeof(buffer), - get_vma_policy(priv->task, vma, vma->vm_start)); + pol = get_vma_policy(priv->task, vma, vma->vm_start); + mpol_to_str(buffer, sizeof(buffer), pol); + /* + * unref shared or other task's mempolicy + */ + if (pol != &default_policy && pol != current->mempolicy) + __mpol_free(pol); seq_printf(m, "%08lx %s", vma->vm_start, buffer); @@ -1902,7 +1998,7 @@ int show_numa_map(struct seq_file *m, void *v) seq_printf(m, " huge"); } else { check_pgd_range(vma, vma->vm_start, vma->vm_end, - &node_online_map, MPOL_MF_STATS, md); + &node_states[N_HIGH_MEMORY], MPOL_MF_STATS, md); } if (!md->pages) @@ -1929,7 +2025,7 @@ int show_numa_map(struct seq_file *m, void *v) if (md->writeback) seq_printf(m," writeback=%lu", md->writeback); - for_each_online_node(n) + for_each_node_state(n, N_HIGH_MEMORY) if (md->node[n]) seq_printf(m, " N%d=%lu", n, md->node[n]); out: @@ -1940,4 +2036,3 @@ out: m->version = (vma != priv->tail_vma) ? vma->vm_start : 0; return 0; } -