[PATCH] USB: UHCI: fix obscure bug in enqueue()

[linux-2.6-omap-h63xx.git] / mm / page_alloc.c
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index dc523a1f270db11ad4930fa47548232c14e6e4ad..253a450c400df06898de5d864ff2c8863c560043 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -39,6 +39,7 @@
  #include <linux/mempolicy.h>
  
  #include <asm/tlbflush.h>
+#include <asm/div64.h>
  #include "internal.h"
  
  /*
@@ -51,6 +52,7 @@ nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
  EXPORT_SYMBOL(node_possible_map);
  unsigned long totalram_pages __read_mostly;
  unsigned long totalhigh_pages __read_mostly;
+unsigned long totalreserve_pages __read_mostly;
  long nr_swap_pages;
  int percpu_pagelist_fraction;
  
@@ -151,7 +153,8 @@ static void bad_page(struct page *page)
                         1 << PG_reclaim |
                         1 << PG_slab    |
                         1 << PG_swapcache |
-                       1 << PG_writeback );
+                       1 << PG_writeback |
+                       1 << PG_buddy );
         set_page_count(page, 0);
         reset_page_mapcount(page);
         page->mapping = NULL;
@@ -230,18 +233,20 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
   * zone->lock is already acquired when we use these.
   * So, we don't need atomic page->flags operations here.
   */
-static inline unsigned long page_order(struct page *page) {
+static inline unsigned long page_order(struct page *page)
+{
         return page_private(page);
  }
  
-static inline void set_page_order(struct page *page, int order) {
+static inline void set_page_order(struct page *page, int order)
+{
         set_page_private(page, order);
-       __SetPagePrivate(page);
+       __SetPageBuddy(page);
  }
  
  static inline void rmv_page_order(struct page *page)
  {
-       __ClearPagePrivate(page);
+       __ClearPageBuddy(page);
         set_page_private(page, 0);
  }
  
@@ -280,11 +285,13 @@ __find_combined_index(unsigned long page_idx, unsigned int order)
   * This function checks whether a page is free && is the buddy
   * we can do coalesce a page and its buddy if
   * (a) the buddy is not in a hole &&
- * (b) the buddy is free &&
- * (c) the buddy is on the buddy system &&
- * (d) a page and its buddy have the same order.
- * for recording page's order, we use page_private(page) and PG_private.
+ * (b) the buddy is in the buddy system &&
+ * (c) a page and its buddy have the same order.
+ *
+ * For recording whether a page is in the buddy system, we use PG_buddy.
+ * Setting, clearing, and testing PG_buddy is serialized by zone->lock.
   *
+ * For recording page's order, we use page_private(page).
   */
  static inline int page_is_buddy(struct page *page, int order)
  {
@@ -293,11 +300,11 @@ static inline int page_is_buddy(struct page *page, int order)
                 return 0;
  #endif
  
-       if (PagePrivate(page)           &&
-           (page_order(page) == order) &&
-            page_count(page) == 0)
-               return 1;
-       return 0;
+       if (PageBuddy(page) && page_order(page) == order) {
+               BUG_ON(page_count(page) != 0);
+               return 1;
+       }
+       return 0;
  }
  
  /*
@@ -313,7 +320,7 @@ static inline int page_is_buddy(struct page *page, int order)
   * as necessary, plus some accounting needed to play nicely with other
   * parts of the VM system.
   * At each level, we keep a list of pages, which are heads of continuous
- * free pages of length of (1 << order) and marked with PG_Private.Page's
+ * free pages of length of (1 << order) and marked with PG_buddy. Page's
   * order is recorded in page_private(page) field.
   * So when we are allocating or freeing one, we can derive the state of the
   * other.  That is, if we allocate a small block, and both were   
@@ -376,7 +383,8 @@ static inline int free_pages_check(struct page *page)
                         1 << PG_slab    |
                         1 << PG_swapcache |
                         1 << PG_writeback |
-                       1 << PG_reserved ))))
+                       1 << PG_reserved |
+                       1 << PG_buddy ))))
                 bad_page(page);
         if (PageDirty(page))
                 __ClearPageDirty(page);
@@ -524,7 +532,8 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
                         1 << PG_slab    |
                         1 << PG_swapcache |
                         1 << PG_writeback |
-                       1 << PG_reserved ))))
+                       1 << PG_reserved |
+                       1 << PG_buddy ))))
                 bad_page(page);
  
         /*
@@ -942,7 +951,7 @@ restart:
                 goto got_pg;
  
         do {
-               if (cpuset_zone_allowed(*z, gfp_mask))
+               if (cpuset_zone_allowed(*z, gfp_mask|__GFP_HARDWALL))
                         wakeup_kswapd(*z, order);
         } while (*(++z));
  
@@ -961,7 +970,8 @@ restart:
                 alloc_flags |= ALLOC_HARDER;
         if (gfp_mask & __GFP_HIGH)
                 alloc_flags |= ALLOC_HIGH;
-       alloc_flags |= ALLOC_CPUSET;
+       if (wait)
+               alloc_flags |= ALLOC_CPUSET;
  
         /*
          * Go through the zonelist again. Let __GFP_HIGH and allocations
@@ -1954,7 +1964,7 @@ static inline void free_zone_pagesets(int cpu)
         }
  }
  
-static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb,
+static int pageset_cpuup_callback(struct notifier_block *nfb,
                 unsigned long action,
                 void *hcpu)
  {
@@ -2115,14 +2125,22 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat)
  #ifdef CONFIG_FLAT_NODE_MEM_MAP
         /* ia64 gets its own node_mem_map, before this, without bootmem */
         if (!pgdat->node_mem_map) {
-               unsigned long size;
+               unsigned long size, start, end;
                 struct page *map;
  
-               size = (pgdat->node_spanned_pages + 1) * sizeof(struct page);
+               /*
+                * The zone's endpoints aren't required to be MAX_ORDER
+                * aligned but the node_mem_map endpoints must be in order
+                * for the buddy allocator to function correctly.
+                */
+               start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1);
+               end = pgdat->node_start_pfn + pgdat->node_spanned_pages;
+               end = ALIGN(end, MAX_ORDER_NR_PAGES);
+               size =  (end - start) * sizeof(struct page);
                 map = alloc_remap(pgdat->node_id, size);
                 if (!map)
                         map = alloc_bootmem_node(pgdat, size);
-               pgdat->node_mem_map = map;
+               pgdat->node_mem_map = map + (pgdat->node_start_pfn - start);
         }
  #ifdef CONFIG_FLATMEM
         /*
@@ -2471,6 +2489,38 @@ void __init page_alloc_init(void)
         hotcpu_notifier(page_alloc_cpu_notify, 0);
  }
  
+/*
+ * calculate_totalreserve_pages - called when sysctl_lower_zone_reserve_ratio
+ *     or min_free_kbytes changes.
+ */
+static void calculate_totalreserve_pages(void)
+{
+       struct pglist_data *pgdat;
+       unsigned long reserve_pages = 0;
+       int i, j;
+
+       for_each_online_pgdat(pgdat) {
+               for (i = 0; i < MAX_NR_ZONES; i++) {
+                       struct zone *zone = pgdat->node_zones + i;
+                       unsigned long max = 0;
+
+                       /* Find valid and maximum lowmem_reserve in the zone */
+                       for (j = i; j < MAX_NR_ZONES; j++) {
+                               if (zone->lowmem_reserve[j] > max)
+                                       max = zone->lowmem_reserve[j];
+                       }
+
+                       /* we treat pages_high as reserved pages. */
+                       max += zone->pages_high;
+
+                       if (max > zone->present_pages)
+                               max = zone->present_pages;
+                       reserve_pages += max;
+               }
+       }
+       totalreserve_pages = reserve_pages;
+}
+
  /*
   * setup_per_zone_lowmem_reserve - called whenever
   *     sysctl_lower_zone_reserve_ratio changes.  Ensures that each zone
@@ -2502,6 +2552,9 @@ static void setup_per_zone_lowmem_reserve(void)
                         }
                 }
         }
+
+       /* update totalreserve_pages */
+       calculate_totalreserve_pages();
  }
  
  /*
@@ -2523,9 +2576,11 @@ void setup_per_zone_pages_min(void)
         }
  
         for_each_zone(zone) {
-               unsigned long tmp;
+               u64 tmp;
+
                 spin_lock_irqsave(&zone->lru_lock, flags);
-               tmp = (pages_min * zone->present_pages) / lowmem_pages;
+               tmp = (u64)pages_min * zone->present_pages;
+               do_div(tmp, lowmem_pages);
                 if (is_highmem(zone)) {
                         /*
                          * __GFP_HIGH and PF_MEMALLOC allocations usually don't
@@ -2552,10 +2607,13 @@ void setup_per_zone_pages_min(void)
                         zone->pages_min = tmp;
                 }
  
-               zone->pages_low   = zone->pages_min + tmp / 4;
-               zone->pages_high  = zone->pages_min + tmp / 2;
+               zone->pages_low   = zone->pages_min + (tmp >> 2);
+               zone->pages_high  = zone->pages_min + (tmp >> 1);
                 spin_unlock_irqrestore(&zone->lru_lock, flags);
         }
+
+       /* update totalreserve_pages */
+       calculate_totalreserve_pages();
  }
  
  /*