X-Git-Url: http://pilppa.org/gitweb/?a=blobdiff_plain;f=mm%2Fpage_alloc.c;h=b2838c24e582c11b8e0680ca5d5b5f97389145ba;hb=37931db5bdce35d37a9bdf93082604620ba3341a;hp=71ced519c31cd78eef572d7fd308f82cf6b22d76;hpb=48f13bf3e742fca8aab87f6c39451d03bf5952d4;p=linux-2.6-omap-h63xx.git diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 71ced519c31..b2838c24e58 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +42,7 @@ #include #include #include +#include #include #include @@ -121,7 +123,7 @@ static unsigned long __meminitdata dma_reserve; #ifdef CONFIG_ARCH_POPULATES_NODE_MAP /* - * MAX_ACTIVE_REGIONS determines the maxmimum number of distinct + * MAX_ACTIVE_REGIONS determines the maximum number of distinct * ranges of memory (RAM) that may be registered with add_active_range(). * Ranges passed to add_active_range() will be merged if possible * so the number of times add_active_range() can be called is @@ -303,7 +305,6 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags) { int i; - VM_BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM); /* * clear_highpage() will use KM_USER0, so it's a bug to use __GFP_ZERO * and __GFP_HIGHMEM from hard or soft interrupt context. @@ -488,7 +489,7 @@ static void free_pages_bulk(struct zone *zone, int count, struct list_head *list, int order) { spin_lock(&zone->lock); - zone->all_unreclaimable = 0; + zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE); zone->pages_scanned = 0; while (count--) { struct page *page; @@ -505,7 +506,7 @@ static void free_pages_bulk(struct zone *zone, int count, static void free_one_page(struct zone *zone, struct page *page, int order) { spin_lock(&zone->lock); - zone->all_unreclaimable = 0; + zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE); zone->pages_scanned = 0; __free_one_page(page, zone, order); spin_unlock(&zone->lock); @@ -747,23 +748,6 @@ int move_freepages_block(struct zone *zone, struct page *page, int migratetype) return move_freepages(zone, start_page, end_page, migratetype); } -/* Return the page with the lowest PFN in the list */ -static struct page *min_page(struct list_head *list) -{ - unsigned long min_pfn = -1UL; - struct page *min_page = NULL, *page;; - - list_for_each_entry(page, list, lru) { - unsigned long pfn = page_to_pfn(page); - if (pfn < min_pfn) { - min_pfn = pfn; - min_page = page; - } - } - - return min_page; -} - /* Remove an element from the buddy allocator from the fallback list */ static struct page *__rmqueue_fallback(struct zone *zone, int order, int start_migratetype) @@ -787,11 +771,8 @@ static struct page *__rmqueue_fallback(struct zone *zone, int order, if (list_empty(&area->free_list[migratetype])) continue; - /* Bias kernel allocations towards low pfns */ page = list_entry(area->free_list[migratetype].next, struct page, lru); - if (unlikely(start_migratetype != MIGRATE_MOVABLE)) - page = min_page(&area->free_list[migratetype]); area->nr_free--; /* @@ -866,8 +847,19 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, struct page *page = __rmqueue(zone, order, migratetype); if (unlikely(page == NULL)) break; + + /* + * Split buddy pages returned by expand() are received here + * in physical page order. The page is added to the callers and + * list and the list head then moves forward. From the callers + * perspective, the linked list is ordered by page number in + * some conditions. This is useful for IO devices that can + * merge IO requests if the physical pages are ordered + * properly. + */ list_add(&page->lru, list); set_page_private(page, migratetype); + list = &page->lru; } spin_unlock(&zone->lock); return i; @@ -1258,7 +1250,7 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark, * skip over zones that are not allowed by the cpuset, or that have * been recently (in last second) found to be nearly full. See further * comments in mmzone.h. Reduces cache footprint of zonelist scans - * that have to skip over alot of full or unallowed zones. + * that have to skip over a lot of full or unallowed zones. * * If the zonelist cache is present in the passed in zonelist, then * returns a pointer to the allowed node mask (either the current @@ -1585,6 +1577,11 @@ nofail_alloc: if (page) goto got_pg; } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { + if (!try_set_zone_oom(zonelist)) { + schedule_timeout_uninterruptible(1); + goto restart; + } + /* * Go through the zonelist yet one more time, keep * very high watermark here, this is only to catch @@ -1593,14 +1590,19 @@ nofail_alloc: */ page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, zonelist, ALLOC_WMARK_HIGH|ALLOC_CPUSET); - if (page) + if (page) { + clear_zonelist_oom(zonelist); goto got_pg; + } /* The OOM killer will not help higher order allocs so fail */ - if (order > PAGE_ALLOC_COSTLY_ORDER) + if (order > PAGE_ALLOC_COSTLY_ORDER) { + clear_zonelist_oom(zonelist); goto nopage; + } out_of_memory(zonelist, gfp_mask, order); + clear_zonelist_oom(zonelist); goto restart; } @@ -1849,7 +1851,7 @@ void show_free_areas(void) K(zone_page_state(zone, NR_INACTIVE)), K(zone->present_pages), zone->pages_scanned, - (zone->all_unreclaimable ? "yes" : "no") + (zone_is_all_unreclaimable(zone) ? "yes" : "no") ); printk("lowmem_reserve[]:"); for (i = 0; i < MAX_NR_ZONES; i++) @@ -2346,7 +2348,7 @@ void build_all_zonelists(void) __build_all_zonelists(NULL); cpuset_init_current_mems_allowed(); } else { - /* we have to stop all cpus to guaranntee there is no user + /* we have to stop all cpus to guarantee there is no user of zonelist */ stop_machine_run(__build_all_zonelists, NULL, NR_CPUS); /* cpuset refresh routine should be here */ @@ -2564,7 +2566,7 @@ static void __meminit zone_init_free_lists(struct pglist_data *pgdat, memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY) #endif -static int __devinit zone_batchsize(struct zone *zone) +static int zone_batchsize(struct zone *zone) { int batch; @@ -2852,7 +2854,7 @@ static int __meminit first_active_region_index_in_nid(int nid) /* * Basic iterator support. Return the next active range of PFNs for a node - * Note: nid == MAX_NUMNODES returns next region regardles of node + * Note: nid == MAX_NUMNODES returns next region regardless of node */ static int __meminit next_active_region_index_in_nid(int index, int nid) { @@ -3274,6 +3276,16 @@ static void inline setup_usemap(struct pglist_data *pgdat, #endif /* CONFIG_SPARSEMEM */ #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE + +/* Return a sensible default order for the pageblock size. */ +static inline int pageblock_default_order(void) +{ + if (HPAGE_SHIFT > PAGE_SHIFT) + return HUGETLB_PAGE_ORDER; + + return MAX_ORDER-1; +} + /* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */ static inline void __init set_pageblock_order(unsigned int order) { @@ -3289,7 +3301,16 @@ static inline void __init set_pageblock_order(unsigned int order) } #else /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ -/* Defined this way to avoid accidently referencing HUGETLB_PAGE_ORDER */ +/* + * When CONFIG_HUGETLB_PAGE_SIZE_VARIABLE is not set, set_pageblock_order() + * and pageblock_default_order() are unused as pageblock_order is set + * at compile-time. See include/linux/pageblock-flags.h for the values of + * pageblock_order based on the kernel config + */ +static inline int pageblock_default_order(unsigned int order) +{ + return MAX_ORDER-1; +} #define set_pageblock_order(x) do {} while (0) #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ @@ -3370,11 +3391,11 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, zone->nr_scan_active = 0; zone->nr_scan_inactive = 0; zap_zone_vm_stats(zone); - atomic_set(&zone->reclaim_in_progress, 0); + zone->flags = 0; if (!size) continue; - set_pageblock_order(HUGETLB_PAGE_ORDER); + set_pageblock_order(pageblock_default_order()); setup_usemap(pgdat, zone, size); ret = init_currently_empty_zone(zone, zone_start_pfn, size, MEMMAP_EARLY); @@ -3417,7 +3438,7 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat) mem_map = NODE_DATA(0)->node_mem_map; #ifdef CONFIG_ARCH_POPULATES_NODE_MAP if (page_to_pfn(mem_map) != pgdat->node_start_pfn) - mem_map -= pgdat->node_start_pfn; + mem_map -= (pgdat->node_start_pfn - ARCH_PFN_OFFSET); #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ } #endif @@ -4433,3 +4454,93 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags, else __clear_bit(bitidx + start_bitidx, bitmap); } + +/* + * This is designed as sub function...plz see page_isolation.c also. + * set/clear page block's type to be ISOLATE. + * page allocater never alloc memory from ISOLATE block. + */ + +int set_migratetype_isolate(struct page *page) +{ + struct zone *zone; + unsigned long flags; + int ret = -EBUSY; + + zone = page_zone(page); + spin_lock_irqsave(&zone->lock, flags); + /* + * In future, more migrate types will be able to be isolation target. + */ + if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE) + goto out; + set_pageblock_migratetype(page, MIGRATE_ISOLATE); + move_freepages_block(zone, page, MIGRATE_ISOLATE); + ret = 0; +out: + spin_unlock_irqrestore(&zone->lock, flags); + if (!ret) + drain_all_local_pages(); + return ret; +} + +void unset_migratetype_isolate(struct page *page) +{ + struct zone *zone; + unsigned long flags; + zone = page_zone(page); + spin_lock_irqsave(&zone->lock, flags); + if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE) + goto out; + set_pageblock_migratetype(page, MIGRATE_MOVABLE); + move_freepages_block(zone, page, MIGRATE_MOVABLE); +out: + spin_unlock_irqrestore(&zone->lock, flags); +} + +#ifdef CONFIG_MEMORY_HOTREMOVE +/* + * All pages in the range must be isolated before calling this. + */ +void +__offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) +{ + struct page *page; + struct zone *zone; + int order, i; + unsigned long pfn; + unsigned long flags; + /* find the first valid pfn */ + for (pfn = start_pfn; pfn < end_pfn; pfn++) + if (pfn_valid(pfn)) + break; + if (pfn == end_pfn) + return; + zone = page_zone(pfn_to_page(pfn)); + spin_lock_irqsave(&zone->lock, flags); + pfn = start_pfn; + while (pfn < end_pfn) { + if (!pfn_valid(pfn)) { + pfn++; + continue; + } + page = pfn_to_page(pfn); + BUG_ON(page_count(page)); + BUG_ON(!PageBuddy(page)); + order = page_order(page); +#ifdef CONFIG_DEBUG_VM + printk(KERN_INFO "remove from free list %lx %d %lx\n", + pfn, 1 << order, end_pfn); +#endif + list_del(&page->lru); + rmv_page_order(page); + zone->free_area[order].nr_free--; + __mod_zone_page_state(zone, NR_FREE_PAGES, + - (1UL << order)); + for (i = 0; i < (1 << order); i++) + SetPageReserved((page+i)); + pfn += (1 << order); + } + spin_unlock_irqrestore(&zone->lock, flags); +} +#endif