[PATCH] USB: UHCI: fix obscure bug in enqueue()

[linux-2.6-omap-h63xx.git] / mm / page_alloc.c
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 338a02bb004d3b3f37f49711ad05baaaa1e7d529..253a450c400df06898de5d864ff2c8863c560043 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -39,6 +39,7 @@
  #include <linux/mempolicy.h>
  
  #include <asm/tlbflush.h>
+#include <asm/div64.h>
  #include "internal.h"
  
  /*
@@ -49,9 +50,9 @@ nodemask_t node_online_map __read_mostly = { { [0] = 1UL } };
  EXPORT_SYMBOL(node_online_map);
  nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
  EXPORT_SYMBOL(node_possible_map);
-struct pglist_data *pgdat_list __read_mostly;
  unsigned long totalram_pages __read_mostly;
  unsigned long totalhigh_pages __read_mostly;
+unsigned long totalreserve_pages __read_mostly;
  long nr_swap_pages;
  int percpu_pagelist_fraction;
  
@@ -152,7 +153,8 @@ static void bad_page(struct page *page)
                         1 << PG_reclaim |
                         1 << PG_slab    |
                         1 << PG_swapcache |
-                       1 << PG_writeback );
+                       1 << PG_writeback |
+                       1 << PG_buddy );
         set_page_count(page, 0);
         reset_page_mapcount(page);
         page->mapping = NULL;
@@ -231,18 +233,20 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
   * zone->lock is already acquired when we use these.
   * So, we don't need atomic page->flags operations here.
   */
-static inline unsigned long page_order(struct page *page) {
+static inline unsigned long page_order(struct page *page)
+{
         return page_private(page);
  }
  
-static inline void set_page_order(struct page *page, int order) {
+static inline void set_page_order(struct page *page, int order)
+{
         set_page_private(page, order);
-       __SetPagePrivate(page);
+       __SetPageBuddy(page);
  }
  
  static inline void rmv_page_order(struct page *page)
  {
-       __ClearPagePrivate(page);
+       __ClearPageBuddy(page);
         set_page_private(page, 0);
  }
  
@@ -281,11 +285,13 @@ __find_combined_index(unsigned long page_idx, unsigned int order)
   * This function checks whether a page is free && is the buddy
   * we can do coalesce a page and its buddy if
   * (a) the buddy is not in a hole &&
- * (b) the buddy is free &&
- * (c) the buddy is on the buddy system &&
- * (d) a page and its buddy have the same order.
- * for recording page's order, we use page_private(page) and PG_private.
+ * (b) the buddy is in the buddy system &&
+ * (c) a page and its buddy have the same order.
   *
+ * For recording whether a page is in the buddy system, we use PG_buddy.
+ * Setting, clearing, and testing PG_buddy is serialized by zone->lock.
+ *
+ * For recording page's order, we use page_private(page).
   */
  static inline int page_is_buddy(struct page *page, int order)
  {
@@ -294,11 +300,11 @@ static inline int page_is_buddy(struct page *page, int order)
                 return 0;
  #endif
  
-       if (PagePrivate(page)           &&
-           (page_order(page) == order) &&
-            page_count(page) == 0)
-               return 1;
-       return 0;
+       if (PageBuddy(page) && page_order(page) == order) {
+               BUG_ON(page_count(page) != 0);
+               return 1;
+       }
+       return 0;
  }
  
  /*
@@ -314,7 +320,7 @@ static inline int page_is_buddy(struct page *page, int order)
   * as necessary, plus some accounting needed to play nicely with other
   * parts of the VM system.
   * At each level, we keep a list of pages, which are heads of continuous
- * free pages of length of (1 << order) and marked with PG_Private.Page's
+ * free pages of length of (1 << order) and marked with PG_buddy. Page's
   * order is recorded in page_private(page) field.
   * So when we are allocating or freeing one, we can derive the state of the
   * other.  That is, if we allocate a small block, and both were   
@@ -377,7 +383,8 @@ static inline int free_pages_check(struct page *page)
                         1 << PG_slab    |
                         1 << PG_swapcache |
                         1 << PG_writeback |
-                       1 << PG_reserved ))))
+                       1 << PG_reserved |
+                       1 << PG_buddy ))))
                 bad_page(page);
         if (PageDirty(page))
                 __ClearPageDirty(page);
@@ -525,7 +532,8 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
                         1 << PG_slab    |
                         1 << PG_swapcache |
                         1 << PG_writeback |
-                       1 << PG_reserved ))))
+                       1 << PG_reserved |
+                       1 << PG_buddy ))))
                 bad_page(page);
  
         /*
@@ -943,7 +951,7 @@ restart:
                 goto got_pg;
  
         do {
-               if (cpuset_zone_allowed(*z, gfp_mask))
+               if (cpuset_zone_allowed(*z, gfp_mask|__GFP_HARDWALL))
                         wakeup_kswapd(*z, order);
         } while (*(++z));
  
@@ -962,7 +970,8 @@ restart:
                 alloc_flags |= ALLOC_HARDER;
         if (gfp_mask & __GFP_HIGH)
                 alloc_flags |= ALLOC_HIGH;
-       alloc_flags |= ALLOC_CPUSET;
+       if (wait)
+               alloc_flags |= ALLOC_CPUSET;
  
         /*
          * Go through the zonelist again. Let __GFP_HIGH and allocations
@@ -1201,7 +1210,7 @@ unsigned int nr_free_highpages (void)
         pg_data_t *pgdat;
         unsigned int pages = 0;
  
-       for_each_pgdat(pgdat)
+       for_each_online_pgdat(pgdat)
                 pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
  
         return pages;
@@ -1343,7 +1352,7 @@ void get_zone_counts(unsigned long *active,
         *active = 0;
         *inactive = 0;
         *free = 0;
-       for_each_pgdat(pgdat) {
+       for_each_online_pgdat(pgdat) {
                 unsigned long l, m, n;
                 __get_zone_counts(&l, &m, &n, pgdat);
                 *active += l;
@@ -1955,7 +1964,7 @@ static inline void free_zone_pagesets(int cpu)
         }
  }
  
-static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb,
+static int pageset_cpuup_callback(struct notifier_block *nfb,
                 unsigned long action,
                 void *hcpu)
  {
@@ -2042,7 +2051,6 @@ static __meminit void init_currently_empty_zone(struct zone *zone,
         zone_wait_table_init(zone, size);
         pgdat->nr_zones = zone_idx(zone) + 1;
  
-       zone->zone_mem_map = pfn_to_page(zone_start_pfn);
         zone->zone_start_pfn = zone_start_pfn;
  
         memmap_init(size, pgdat->node_id, zone_idx(zone), zone_start_pfn);
@@ -2117,14 +2125,22 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat)
  #ifdef CONFIG_FLAT_NODE_MEM_MAP
         /* ia64 gets its own node_mem_map, before this, without bootmem */
         if (!pgdat->node_mem_map) {
-               unsigned long size;
+               unsigned long size, start, end;
                 struct page *map;
  
-               size = (pgdat->node_spanned_pages + 1) * sizeof(struct page);
+               /*
+                * The zone's endpoints aren't required to be MAX_ORDER
+                * aligned but the node_mem_map endpoints must be in order
+                * for the buddy allocator to function correctly.
+                */
+               start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1);
+               end = pgdat->node_start_pfn + pgdat->node_spanned_pages;
+               end = ALIGN(end, MAX_ORDER_NR_PAGES);
+               size =  (end - start) * sizeof(struct page);
                 map = alloc_remap(pgdat->node_id, size);
                 if (!map)
                         map = alloc_bootmem_node(pgdat, size);
-               pgdat->node_mem_map = map;
+               pgdat->node_mem_map = map + (pgdat->node_start_pfn - start);
         }
  #ifdef CONFIG_FLATMEM
         /*
@@ -2170,8 +2186,9 @@ static void *frag_start(struct seq_file *m, loff_t *pos)
  {
         pg_data_t *pgdat;
         loff_t node = *pos;
-
-       for (pgdat = pgdat_list; pgdat && node; pgdat = pgdat->pgdat_next)
+       for (pgdat = first_online_pgdat();
+            pgdat && node;
+            pgdat = next_online_pgdat(pgdat))
                 --node;
  
         return pgdat;
@@ -2182,7 +2199,7 @@ static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
         pg_data_t *pgdat = (pg_data_t *)arg;
  
         (*pos)++;
-       return pgdat->pgdat_next;
+       return next_online_pgdat(pgdat);
  }
  
  static void frag_stop(struct seq_file *m, void *arg)
@@ -2472,6 +2489,38 @@ void __init page_alloc_init(void)
         hotcpu_notifier(page_alloc_cpu_notify, 0);
  }
  
+/*
+ * calculate_totalreserve_pages - called when sysctl_lower_zone_reserve_ratio
+ *     or min_free_kbytes changes.
+ */
+static void calculate_totalreserve_pages(void)
+{
+       struct pglist_data *pgdat;
+       unsigned long reserve_pages = 0;
+       int i, j;
+
+       for_each_online_pgdat(pgdat) {
+               for (i = 0; i < MAX_NR_ZONES; i++) {
+                       struct zone *zone = pgdat->node_zones + i;
+                       unsigned long max = 0;
+
+                       /* Find valid and maximum lowmem_reserve in the zone */
+                       for (j = i; j < MAX_NR_ZONES; j++) {
+                               if (zone->lowmem_reserve[j] > max)
+                                       max = zone->lowmem_reserve[j];
+                       }
+
+                       /* we treat pages_high as reserved pages. */
+                       max += zone->pages_high;
+
+                       if (max > zone->present_pages)
+                               max = zone->present_pages;
+                       reserve_pages += max;
+               }
+       }
+       totalreserve_pages = reserve_pages;
+}
+
  /*
   * setup_per_zone_lowmem_reserve - called whenever
   *     sysctl_lower_zone_reserve_ratio changes.  Ensures that each zone
@@ -2483,7 +2532,7 @@ static void setup_per_zone_lowmem_reserve(void)
         struct pglist_data *pgdat;
         int j, idx;
  
-       for_each_pgdat(pgdat) {
+       for_each_online_pgdat(pgdat) {
                 for (j = 0; j < MAX_NR_ZONES; j++) {
                         struct zone *zone = pgdat->node_zones + j;
                         unsigned long present_pages = zone->present_pages;
@@ -2503,6 +2552,9 @@ static void setup_per_zone_lowmem_reserve(void)
                         }
                 }
         }
+
+       /* update totalreserve_pages */
+       calculate_totalreserve_pages();
  }
  
  /*
@@ -2524,9 +2576,11 @@ void setup_per_zone_pages_min(void)
         }
  
         for_each_zone(zone) {
-               unsigned long tmp;
+               u64 tmp;
+
                 spin_lock_irqsave(&zone->lru_lock, flags);
-               tmp = (pages_min * zone->present_pages) / lowmem_pages;
+               tmp = (u64)pages_min * zone->present_pages;
+               do_div(tmp, lowmem_pages);
                 if (is_highmem(zone)) {
                         /*
                          * __GFP_HIGH and PF_MEMALLOC allocations usually don't
@@ -2553,10 +2607,13 @@ void setup_per_zone_pages_min(void)
                         zone->pages_min = tmp;
                 }
  
-               zone->pages_low   = zone->pages_min + tmp / 4;
-               zone->pages_high  = zone->pages_min + tmp / 2;
+               zone->pages_low   = zone->pages_min + (tmp >> 2);
+               zone->pages_high  = zone->pages_min + (tmp >> 1);
                 spin_unlock_irqrestore(&zone->lru_lock, flags);
         }
+
+       /* update totalreserve_pages */
+       calculate_totalreserve_pages();
  }
  
  /*
@@ -2745,3 +2802,44 @@ void *__init alloc_large_system_hash(const char *tablename,
  
         return table;
  }
+
+#ifdef CONFIG_OUT_OF_LINE_PFN_TO_PAGE
+/*
+ * pfn <-> page translation. out-of-line version.
+ * (see asm-generic/memory_model.h)
+ */
+#if defined(CONFIG_FLATMEM)
+struct page *pfn_to_page(unsigned long pfn)
+{
+       return mem_map + (pfn - ARCH_PFN_OFFSET);
+}
+unsigned long page_to_pfn(struct page *page)
+{
+       return (page - mem_map) + ARCH_PFN_OFFSET;
+}
+#elif defined(CONFIG_DISCONTIGMEM)
+struct page *pfn_to_page(unsigned long pfn)
+{
+       int nid = arch_pfn_to_nid(pfn);
+       return NODE_DATA(nid)->node_mem_map + arch_local_page_offset(pfn,nid);
+}
+unsigned long page_to_pfn(struct page *page)
+{
+       struct pglist_data *pgdat = NODE_DATA(page_to_nid(page));
+       return (page - pgdat->node_mem_map) + pgdat->node_start_pfn;
+}
+#elif defined(CONFIG_SPARSEMEM)
+struct page *pfn_to_page(unsigned long pfn)
+{
+       return __section_mem_map_addr(__pfn_to_section(pfn)) + pfn;
+}
+
+unsigned long page_to_pfn(struct page *page)
+{
+       long section_id = page_to_section(page);
+       return page - __section_mem_map_addr(__nr_to_section(section_id));
+}
+#endif /* CONFIG_FLATMEM/DISCONTIGMME/SPARSEMEM */
+EXPORT_SYMBOL(pfn_to_page);
+EXPORT_SYMBOL(page_to_pfn);
+#endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */