X-Git-Url: http://pilppa.org/gitweb/gitweb.cgi?a=blobdiff_plain;f=mm%2Fpage_alloc.c;h=b2838c24e582c11b8e0680ca5d5b5f97389145ba;hb=16da2f93054fc379522b93afc71d49751bd8be2b;hp=a44715e820583fcf1052af31a9e21e74e1190d94;hpb=a5d76b54a3f3a40385d7f76069a2feac9f1bad63;p=linux-2.6-omap-h63xx.git diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a44715e8205..b2838c24e58 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -122,7 +123,7 @@ static unsigned long __meminitdata dma_reserve; #ifdef CONFIG_ARCH_POPULATES_NODE_MAP /* - * MAX_ACTIVE_REGIONS determines the maxmimum number of distinct + * MAX_ACTIVE_REGIONS determines the maximum number of distinct * ranges of memory (RAM) that may be registered with add_active_range(). * Ranges passed to add_active_range() will be merged if possible * so the number of times add_active_range() can be called is @@ -304,7 +305,6 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags) { int i; - VM_BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM); /* * clear_highpage() will use KM_USER0, so it's a bug to use __GFP_ZERO * and __GFP_HIGHMEM from hard or soft interrupt context. @@ -489,7 +489,7 @@ static void free_pages_bulk(struct zone *zone, int count, struct list_head *list, int order) { spin_lock(&zone->lock); - zone->all_unreclaimable = 0; + zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE); zone->pages_scanned = 0; while (count--) { struct page *page; @@ -506,7 +506,7 @@ static void free_pages_bulk(struct zone *zone, int count, static void free_one_page(struct zone *zone, struct page *page, int order) { spin_lock(&zone->lock); - zone->all_unreclaimable = 0; + zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE); zone->pages_scanned = 0; __free_one_page(page, zone, order); spin_unlock(&zone->lock); @@ -748,23 +748,6 @@ int move_freepages_block(struct zone *zone, struct page *page, int migratetype) return move_freepages(zone, start_page, end_page, migratetype); } -/* Return the page with the lowest PFN in the list */ -static struct page *min_page(struct list_head *list) -{ - unsigned long min_pfn = -1UL; - struct page *min_page = NULL, *page;; - - list_for_each_entry(page, list, lru) { - unsigned long pfn = page_to_pfn(page); - if (pfn < min_pfn) { - min_pfn = pfn; - min_page = page; - } - } - - return min_page; -} - /* Remove an element from the buddy allocator from the fallback list */ static struct page *__rmqueue_fallback(struct zone *zone, int order, int start_migratetype) @@ -788,11 +771,8 @@ static struct page *__rmqueue_fallback(struct zone *zone, int order, if (list_empty(&area->free_list[migratetype])) continue; - /* Bias kernel allocations towards low pfns */ page = list_entry(area->free_list[migratetype].next, struct page, lru); - if (unlikely(start_migratetype != MIGRATE_MOVABLE)) - page = min_page(&area->free_list[migratetype]); area->nr_free--; /* @@ -867,8 +847,19 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, struct page *page = __rmqueue(zone, order, migratetype); if (unlikely(page == NULL)) break; + + /* + * Split buddy pages returned by expand() are received here + * in physical page order. The page is added to the callers and + * list and the list head then moves forward. From the callers + * perspective, the linked list is ordered by page number in + * some conditions. This is useful for IO devices that can + * merge IO requests if the physical pages are ordered + * properly. + */ list_add(&page->lru, list); set_page_private(page, migratetype); + list = &page->lru; } spin_unlock(&zone->lock); return i; @@ -1259,7 +1250,7 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark, * skip over zones that are not allowed by the cpuset, or that have * been recently (in last second) found to be nearly full. See further * comments in mmzone.h. Reduces cache footprint of zonelist scans - * that have to skip over alot of full or unallowed zones. + * that have to skip over a lot of full or unallowed zones. * * If the zonelist cache is present in the passed in zonelist, then * returns a pointer to the allowed node mask (either the current @@ -1586,6 +1577,11 @@ nofail_alloc: if (page) goto got_pg; } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { + if (!try_set_zone_oom(zonelist)) { + schedule_timeout_uninterruptible(1); + goto restart; + } + /* * Go through the zonelist yet one more time, keep * very high watermark here, this is only to catch @@ -1594,14 +1590,19 @@ nofail_alloc: */ page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, zonelist, ALLOC_WMARK_HIGH|ALLOC_CPUSET); - if (page) + if (page) { + clear_zonelist_oom(zonelist); goto got_pg; + } /* The OOM killer will not help higher order allocs so fail */ - if (order > PAGE_ALLOC_COSTLY_ORDER) + if (order > PAGE_ALLOC_COSTLY_ORDER) { + clear_zonelist_oom(zonelist); goto nopage; + } out_of_memory(zonelist, gfp_mask, order); + clear_zonelist_oom(zonelist); goto restart; } @@ -1850,7 +1851,7 @@ void show_free_areas(void) K(zone_page_state(zone, NR_INACTIVE)), K(zone->present_pages), zone->pages_scanned, - (zone->all_unreclaimable ? "yes" : "no") + (zone_is_all_unreclaimable(zone) ? "yes" : "no") ); printk("lowmem_reserve[]:"); for (i = 0; i < MAX_NR_ZONES; i++) @@ -2347,7 +2348,7 @@ void build_all_zonelists(void) __build_all_zonelists(NULL); cpuset_init_current_mems_allowed(); } else { - /* we have to stop all cpus to guaranntee there is no user + /* we have to stop all cpus to guarantee there is no user of zonelist */ stop_machine_run(__build_all_zonelists, NULL, NR_CPUS); /* cpuset refresh routine should be here */ @@ -2565,7 +2566,7 @@ static void __meminit zone_init_free_lists(struct pglist_data *pgdat, memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY) #endif -static int __devinit zone_batchsize(struct zone *zone) +static int zone_batchsize(struct zone *zone) { int batch; @@ -2853,7 +2854,7 @@ static int __meminit first_active_region_index_in_nid(int nid) /* * Basic iterator support. Return the next active range of PFNs for a node - * Note: nid == MAX_NUMNODES returns next region regardles of node + * Note: nid == MAX_NUMNODES returns next region regardless of node */ static int __meminit next_active_region_index_in_nid(int index, int nid) { @@ -3275,6 +3276,16 @@ static void inline setup_usemap(struct pglist_data *pgdat, #endif /* CONFIG_SPARSEMEM */ #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE + +/* Return a sensible default order for the pageblock size. */ +static inline int pageblock_default_order(void) +{ + if (HPAGE_SHIFT > PAGE_SHIFT) + return HUGETLB_PAGE_ORDER; + + return MAX_ORDER-1; +} + /* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */ static inline void __init set_pageblock_order(unsigned int order) { @@ -3290,7 +3301,16 @@ static inline void __init set_pageblock_order(unsigned int order) } #else /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ -/* Defined this way to avoid accidently referencing HUGETLB_PAGE_ORDER */ +/* + * When CONFIG_HUGETLB_PAGE_SIZE_VARIABLE is not set, set_pageblock_order() + * and pageblock_default_order() are unused as pageblock_order is set + * at compile-time. See include/linux/pageblock-flags.h for the values of + * pageblock_order based on the kernel config + */ +static inline int pageblock_default_order(unsigned int order) +{ + return MAX_ORDER-1; +} #define set_pageblock_order(x) do {} while (0) #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ @@ -3371,11 +3391,11 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, zone->nr_scan_active = 0; zone->nr_scan_inactive = 0; zap_zone_vm_stats(zone); - atomic_set(&zone->reclaim_in_progress, 0); + zone->flags = 0; if (!size) continue; - set_pageblock_order(HUGETLB_PAGE_ORDER); + set_pageblock_order(pageblock_default_order()); setup_usemap(pgdat, zone, size); ret = init_currently_empty_zone(zone, zone_start_pfn, size, MEMMAP_EARLY); @@ -3418,7 +3438,7 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat) mem_map = NODE_DATA(0)->node_mem_map; #ifdef CONFIG_ARCH_POPULATES_NODE_MAP if (page_to_pfn(mem_map) != pgdat->node_start_pfn) - mem_map -= pgdat->node_start_pfn; + mem_map -= (pgdat->node_start_pfn - ARCH_PFN_OFFSET); #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ } #endif @@ -4477,3 +4497,50 @@ void unset_migratetype_isolate(struct page *page) out: spin_unlock_irqrestore(&zone->lock, flags); } + +#ifdef CONFIG_MEMORY_HOTREMOVE +/* + * All pages in the range must be isolated before calling this. + */ +void +__offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) +{ + struct page *page; + struct zone *zone; + int order, i; + unsigned long pfn; + unsigned long flags; + /* find the first valid pfn */ + for (pfn = start_pfn; pfn < end_pfn; pfn++) + if (pfn_valid(pfn)) + break; + if (pfn == end_pfn) + return; + zone = page_zone(pfn_to_page(pfn)); + spin_lock_irqsave(&zone->lock, flags); + pfn = start_pfn; + while (pfn < end_pfn) { + if (!pfn_valid(pfn)) { + pfn++; + continue; + } + page = pfn_to_page(pfn); + BUG_ON(page_count(page)); + BUG_ON(!PageBuddy(page)); + order = page_order(page); +#ifdef CONFIG_DEBUG_VM + printk(KERN_INFO "remove from free list %lx %d %lx\n", + pfn, 1 << order, end_pfn); +#endif + list_del(&page->lru); + rmv_page_order(page); + zone->free_area[order].nr_free--; + __mod_zone_page_state(zone, NR_FREE_PAGES, + - (1UL << order)); + for (i = 0; i < (1 << order); i++) + SetPageReserved((page+i)); + pfn += (1 << order); + } + spin_unlock_irqrestore(&zone->lock, flags); +} +#endif