[PATCH] vmscan: use unsigned longs

[linux-2.6-omap-h63xx.git] / mm / vmscan.c
diff --git a/mm/vmscan.c b/mm/vmscan.c

index b0af7593d01e315a83c79ec6841c9a4a3b91c1e1..62cd7cd257e3715b19721dd8a5dc77abe423a1af 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -52,9 +52,6 @@ typedef enum {
  } pageout_t;
  
  struct scan_control {
-       /* Ask refill_inactive_zone, or shrink_cache to scan this many pages */
-       unsigned long nr_to_scan;
-
         /* Incremented by the number of inactive pages that were scanned */
         unsigned long nr_scanned;
  
@@ -63,9 +60,6 @@ struct scan_control {
  
         unsigned long nr_mapped;        /* From page_state */
  
-       /* Ask shrink_caches, or shrink_zone to scan at this priority */
-       unsigned int priority;
-
         /* This context's GFP mask */
         gfp_t gfp_mask;
  
@@ -183,10 +177,11 @@ EXPORT_SYMBOL(remove_shrinker);
   *
   * Returns the number of slab objects which we shrunk.
   */
-int shrink_slab(unsigned long scanned, gfp_t gfp_mask, unsigned long lru_pages)
+unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
+                       unsigned long lru_pages)
  {
         struct shrinker *shrinker;
-       int ret = 0;
+       unsigned long ret = 0;
  
         if (scanned == 0)
                 scanned = SWAP_CLUSTER_MAX;
@@ -416,12 +411,13 @@ cannot_free:
  /*
   * shrink_list adds the number of reclaimed pages to sc->nr_reclaimed
   */
-static int shrink_list(struct list_head *page_list, struct scan_control *sc)
+static unsigned long shrink_list(struct list_head *page_list,
+                               struct scan_control *sc)
  {
         LIST_HEAD(ret_pages);
         struct pagevec freed_pvec;
         int pgactivate = 0;
-       int reclaimed = 0;
+       unsigned long reclaimed = 0;
  
         cond_resched();
  
@@ -605,11 +601,11 @@ static inline void move_to_lru(struct page *page)
   *
   * returns the number of pages put back.
   */
-int putback_lru_pages(struct list_head *l)
+unsigned long putback_lru_pages(struct list_head *l)
  {
         struct page *page;
         struct page *page2;
-       int count = 0;
+       unsigned long count = 0;
  
         list_for_each_entry_safe(page, page2, l, lru) {
                 move_to_lru(page);
@@ -700,7 +696,7 @@ int migrate_page_remove_references(struct page *newpage,
          * the page.
          */
         if (!mapping || page_mapcount(page) + nr_refs != page_count(page))
-               return 1;
+               return -EAGAIN;
  
         /*
          * Establish swap ptes for anonymous pages or destroy pte
@@ -721,13 +717,15 @@ int migrate_page_remove_references(struct page *newpage,
          * If the page was not migrated then the PageSwapCache bit
          * is still set and the operation may continue.
          */
-       try_to_unmap(page, 1);
+       if (try_to_unmap(page, 1) == SWAP_FAIL)
+               /* A vma has VM_LOCKED set -> Permanent failure */
+               return -EPERM;
  
         /*
          * Give up if we were unable to remove all mappings.
          */
         if (page_mapcount(page))
-               return 1;
+               return -EAGAIN;
  
         write_lock_irq(&mapping->tree_lock);
  
@@ -738,7 +736,7 @@ int migrate_page_remove_references(struct page *newpage,
         if (!page_mapping(page) || page_count(page) != nr_refs ||
                         *radix_pointer != page) {
                 write_unlock_irq(&mapping->tree_lock);
-               return 1;
+               return -EAGAIN;
         }
  
         /*
@@ -813,10 +811,14 @@ EXPORT_SYMBOL(migrate_page_copy);
   */
  int migrate_page(struct page *newpage, struct page *page)
  {
+       int rc;
+
         BUG_ON(PageWriteback(page));    /* Writeback must be complete */
  
-       if (migrate_page_remove_references(newpage, page, 2))
-               return -EAGAIN;
+       rc = migrate_page_remove_references(newpage, page, 2);
+
+       if (rc)
+               return rc;
  
         migrate_page_copy(newpage, page);
  
@@ -848,11 +850,11 @@ EXPORT_SYMBOL(migrate_page);
   *
   * Return: Number of pages not migrated when "to" ran empty.
   */
-int migrate_pages(struct list_head *from, struct list_head *to,
+unsigned long migrate_pages(struct list_head *from, struct list_head *to,
                   struct list_head *moved, struct list_head *failed)
  {
-       int retry;
-       int nr_failed = 0;
+       unsigned long retry;
+       unsigned long nr_failed = 0;
         int pass = 0;
         struct page *page;
         struct page *page2;
@@ -1036,9 +1038,10 @@ int isolate_lru_page(struct page *page)
         if (PageLRU(page)) {
                 struct zone *zone = page_zone(page);
                 spin_lock_irq(&zone->lru_lock);
-               if (TestClearPageLRU(page)) {
+               if (PageLRU(page)) {
                         ret = 1;
                         get_page(page);
+                       ClearPageLRU(page);
                         if (PageActive(page))
                                 del_page_from_active_list(zone, page);
                         else
@@ -1068,32 +1071,35 @@ int isolate_lru_page(struct page *page)
   *
   * returns how many pages were moved onto *@dst.
   */
-static int isolate_lru_pages(int nr_to_scan, struct list_head *src,
-                            struct list_head *dst, int *scanned)
+static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
+               struct list_head *src, struct list_head *dst,
+               unsigned long *scanned)
  {
-       int nr_taken = 0;
+       unsigned long nr_taken = 0;
         struct page *page;
-       int scan = 0;
+       unsigned long scan = 0;
  
         while (scan++ < nr_to_scan && !list_empty(src)) {
+               struct list_head *target;
                 page = lru_to_page(src);
                 prefetchw_prev_lru_page(page, src, flags);
  
-               if (!TestClearPageLRU(page))
-                       BUG();
+               BUG_ON(!PageLRU(page));
+
                 list_del(&page->lru);
-               if (get_page_testone(page)) {
+               target = src;
+               if (likely(get_page_unless_zero(page))) {
                         /*
-                        * It is being freed elsewhere
+                        * Be careful not to clear PageLRU until after we're
+                        * sure the page is not being freed elsewhere -- the
+                        * page release code relies on it.
                          */
-                       __put_page(page);
-                       SetPageLRU(page);
-                       list_add(&page->lru, src);
-                       continue;
-               } else {
-                       list_add(&page->lru, dst);
+                       ClearPageLRU(page);
+                       target = dst;
                         nr_taken++;
-               }
+               } /* else it is being freed elsewhere */
+
+               list_add(&page->lru, target);
         }
  
         *scanned = scan;
@@ -1103,21 +1109,22 @@ static int isolate_lru_pages(int nr_to_scan, struct list_head *src,
  /*
   * shrink_cache() adds the number of pages reclaimed to sc->nr_reclaimed
   */
-static void shrink_cache(struct zone *zone, struct scan_control *sc)
+static void shrink_cache(unsigned long max_scan, struct zone *zone,
+                       struct scan_control *sc)
  {
         LIST_HEAD(page_list);
         struct pagevec pvec;
-       int max_scan = sc->nr_to_scan;
+       unsigned long nr_scanned = 0;
  
         pagevec_init(&pvec, 1);
  
         lru_add_drain();
         spin_lock_irq(&zone->lru_lock);
-       while (max_scan > 0) {
+       do {
                 struct page *page;
-               int nr_taken;
-               int nr_scan;
-               int nr_freed;
+               unsigned long nr_taken;
+               unsigned long nr_scan;
+               unsigned long nr_freed;
  
                 nr_taken = isolate_lru_pages(sc->swap_cluster_max,
                                              &zone->inactive_list,
@@ -1129,7 +1136,7 @@ static void shrink_cache(struct zone *zone, struct scan_control *sc)
                 if (nr_taken == 0)
                         goto done;
  
-               max_scan -= nr_scan;
+               nr_scanned += nr_scan;
                 nr_freed = shrink_list(&page_list, sc);
  
                 local_irq_disable();
@@ -1146,8 +1153,8 @@ static void shrink_cache(struct zone *zone, struct scan_control *sc)
                  */
                 while (!list_empty(&page_list)) {
                         page = lru_to_page(&page_list);
-                       if (TestSetPageLRU(page))
-                               BUG();
+                       BUG_ON(PageLRU(page));
+                       SetPageLRU(page);
                         list_del(&page->lru);
                         if (PageActive(page))
                                 add_page_to_active_list(zone, page);
@@ -1159,7 +1166,7 @@ static void shrink_cache(struct zone *zone, struct scan_control *sc)
                                 spin_lock_irq(&zone->lru_lock);
                         }
                 }
-       }
+       } while (nr_scanned < max_scan);
         spin_unlock_irq(&zone->lru_lock);
  done:
         pagevec_release(&pvec);
@@ -1183,12 +1190,12 @@ done:
   * But we had to alter page->flags anyway.
   */
  static void
-refill_inactive_zone(struct zone *zone, struct scan_control *sc)
+refill_inactive_zone(unsigned long nr_pages, struct zone *zone,
+                       struct scan_control *sc)
  {
-       int pgmoved;
+       unsigned long pgmoved;
         int pgdeactivate = 0;
-       int pgscanned;
-       int nr_pages = sc->nr_to_scan;
+       unsigned long pgscanned;
         LIST_HEAD(l_hold);      /* The pages which were snipped off */
         LIST_HEAD(l_inactive);  /* Pages to go onto the inactive_list */
         LIST_HEAD(l_active);    /* Pages to go onto the active_list */
@@ -1266,10 +1273,11 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc)
         while (!list_empty(&l_inactive)) {
                 page = lru_to_page(&l_inactive);
                 prefetchw_prev_lru_page(page, &l_inactive, flags);
-               if (TestSetPageLRU(page))
-                       BUG();
-               if (!TestClearPageActive(page))
-                       BUG();
+               BUG_ON(PageLRU(page));
+               SetPageLRU(page);
+               BUG_ON(!PageActive(page));
+               ClearPageActive(page);
+
                 list_move(&page->lru, &zone->inactive_list);
                 pgmoved++;
                 if (!pagevec_add(&pvec, page)) {
@@ -1295,8 +1303,8 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc)
         while (!list_empty(&l_active)) {
                 page = lru_to_page(&l_active);
                 prefetchw_prev_lru_page(page, &l_active, flags);
-               if (TestSetPageLRU(page))
-                       BUG();
+               BUG_ON(PageLRU(page));
+               SetPageLRU(page);
                 BUG_ON(!PageActive(page));
                 list_move(&page->lru, &zone->active_list);
                 pgmoved++;
@@ -1321,11 +1329,12 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc)
  /*
   * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim.
   */
-static void
-shrink_zone(struct zone *zone, struct scan_control *sc)
+static void shrink_zone(int priority, struct zone *zone,
+                       struct scan_control *sc)
  {
         unsigned long nr_active;
         unsigned long nr_inactive;
+       unsigned long nr_to_scan;
  
         atomic_inc(&zone->reclaim_in_progress);
  
@@ -1333,14 +1342,14 @@ shrink_zone(struct zone *zone, struct scan_control *sc)
          * Add one to `nr_to_scan' just to make sure that the kernel will
          * slowly sift through the active list.
          */
-       zone->nr_scan_active += (zone->nr_active >> sc->priority) + 1;
+       zone->nr_scan_active += (zone->nr_active >> priority) + 1;
         nr_active = zone->nr_scan_active;
         if (nr_active >= sc->swap_cluster_max)
                 zone->nr_scan_active = 0;
         else
                 nr_active = 0;
  
-       zone->nr_scan_inactive += (zone->nr_inactive >> sc->priority) + 1;
+       zone->nr_scan_inactive += (zone->nr_inactive >> priority) + 1;
         nr_inactive = zone->nr_scan_inactive;
         if (nr_inactive >= sc->swap_cluster_max)
                 zone->nr_scan_inactive = 0;
@@ -1349,17 +1358,17 @@ shrink_zone(struct zone *zone, struct scan_control *sc)
  
         while (nr_active || nr_inactive) {
                 if (nr_active) {
-                       sc->nr_to_scan = min(nr_active,
+                       nr_to_scan = min(nr_active,
                                         (unsigned long)sc->swap_cluster_max);
-                       nr_active -= sc->nr_to_scan;
-                       refill_inactive_zone(zone, sc);
+                       nr_active -= nr_to_scan;
+                       refill_inactive_zone(nr_to_scan, zone, sc);
                 }
  
                 if (nr_inactive) {
-                       sc->nr_to_scan = min(nr_inactive,
+                       nr_to_scan = min(nr_inactive,
                                         (unsigned long)sc->swap_cluster_max);
-                       nr_inactive -= sc->nr_to_scan;
-                       shrink_cache(zone, sc);
+                       nr_inactive -= nr_to_scan;
+                       shrink_cache(nr_to_scan, zone, sc);
                 }
         }
  
@@ -1384,8 +1393,8 @@ shrink_zone(struct zone *zone, struct scan_control *sc)
   * If a zone is deemed to be full of pinned pages then just give it a light
   * scan then give up on it.
   */
-static void
-shrink_caches(struct zone **zones, struct scan_control *sc)
+static void shrink_caches(int priority, struct zone **zones,
+                               struct scan_control *sc)
  {
         int i;
  
@@ -1398,14 +1407,14 @@ shrink_caches(struct zone **zones, struct scan_control *sc)
                 if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
                         continue;
  
-               zone->temp_priority = sc->priority;
-               if (zone->prev_priority > sc->priority)
-                       zone->prev_priority = sc->priority;
+               zone->temp_priority = priority;
+               if (zone->prev_priority > priority)
+                       zone->prev_priority = priority;
  
-               if (zone->all_unreclaimable && sc->priority != DEF_PRIORITY)
+               if (zone->all_unreclaimable && priority != DEF_PRIORITY)
                         continue;       /* Let kswapd poll it */
  
-               shrink_zone(zone, sc);
+               shrink_zone(priority, zone, sc);
         }
  }
   
@@ -1422,19 +1431,21 @@ shrink_caches(struct zone **zones, struct scan_control *sc)
   * holds filesystem locks which prevent writeout this might not work, and the
   * allocation attempt will fail.
   */
-int try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
+unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
  {
         int priority;
         int ret = 0;
-       int total_scanned = 0, total_reclaimed = 0;
+       unsigned long total_scanned = 0;
+       unsigned long total_reclaimed = 0;
         struct reclaim_state *reclaim_state = current->reclaim_state;
-       struct scan_control sc;
         unsigned long lru_pages = 0;
         int i;
-
-       sc.gfp_mask = gfp_mask;
-       sc.may_writepage = !laptop_mode;
-       sc.may_swap = 1;
+       struct scan_control sc = {
+               .gfp_mask = gfp_mask,
+               .may_writepage = !laptop_mode,
+               .swap_cluster_max = SWAP_CLUSTER_MAX,
+               .may_swap = 1,
+       };
  
         inc_page_state(allocstall);
  
@@ -1452,11 +1463,9 @@ int try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
                 sc.nr_mapped = read_page_state(nr_mapped);
                 sc.nr_scanned = 0;
                 sc.nr_reclaimed = 0;
-               sc.priority = priority;
-               sc.swap_cluster_max = SWAP_CLUSTER_MAX;
                 if (!priority)
                         disable_swap_token();
-               shrink_caches(zones, &sc);
+               shrink_caches(priority, zones, &sc);
                 shrink_slab(sc.nr_scanned, gfp_mask, lru_pages);
                 if (reclaim_state) {
                         sc.nr_reclaimed += reclaim_state->reclaimed_slab;
@@ -1476,7 +1485,8 @@ int try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
                  * that's undesirable in laptop mode, where we *want* lumpy
                  * writeout.  So in laptop mode, write out the whole world.
                  */
-               if (total_scanned > sc.swap_cluster_max + sc.swap_cluster_max/2) {
+               if (total_scanned > sc.swap_cluster_max +
+                                       sc.swap_cluster_max / 2) {
                         wakeup_pdflush(laptop_mode ? 0 : total_scanned);
                         sc.may_writepage = 1;
                 }
@@ -1522,22 +1532,26 @@ out:
   * the page allocator fallback scheme to ensure that aging of pages is balanced
   * across the zones.
   */
-static int balance_pgdat(pg_data_t *pgdat, int nr_pages, int order)
+static unsigned long balance_pgdat(pg_data_t *pgdat, unsigned long nr_pages,
+                               int order)
  {
-       int to_free = nr_pages;
+       unsigned long to_free = nr_pages;
         int all_zones_ok;
         int priority;
         int i;
-       int total_scanned, total_reclaimed;
+       unsigned long total_scanned;
+       unsigned long total_reclaimed;
         struct reclaim_state *reclaim_state = current->reclaim_state;
-       struct scan_control sc;
+       struct scan_control sc = {
+               .gfp_mask = GFP_KERNEL,
+               .may_swap = 1,
+               .swap_cluster_max = nr_pages ? nr_pages : SWAP_CLUSTER_MAX,
+       };
  
  loop_again:
         total_scanned = 0;
         total_reclaimed = 0;
-       sc.gfp_mask = GFP_KERNEL;
-       sc.may_writepage = !laptop_mode;
-       sc.may_swap = 1;
+       sc.may_writepage = !laptop_mode,
         sc.nr_mapped = read_page_state(nr_mapped);
  
         inc_page_state(pageoutrun);
@@ -1619,9 +1633,7 @@ scan:
                                 zone->prev_priority = priority;
                         sc.nr_scanned = 0;
                         sc.nr_reclaimed = 0;
-                       sc.priority = priority;
-                       sc.swap_cluster_max = nr_pages? nr_pages : SWAP_CLUSTER_MAX;
-                       shrink_zone(zone, &sc);
+                       shrink_zone(priority, zone, &sc);
                         reclaim_state->reclaimed_slab = 0;
                         nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
                                                 lru_pages);
@@ -1773,22 +1785,23 @@ void wakeup_kswapd(struct zone *zone, int order)
   * Try to free `nr_pages' of memory, system-wide.  Returns the number of freed
   * pages.
   */
-int shrink_all_memory(int nr_pages)
+unsigned long shrink_all_memory(unsigned long nr_pages)
  {
         pg_data_t *pgdat;
-       int nr_to_free = nr_pages;
-       int ret = 0;
+       unsigned long nr_to_free = nr_pages;
+       unsigned long ret = 0;
         struct reclaim_state reclaim_state = {
                 .reclaimed_slab = 0,
         };
  
         current->reclaim_state = &reclaim_state;
         for_each_pgdat(pgdat) {
-               int freed;
+               unsigned long freed;
+
                 freed = balance_pgdat(pgdat, nr_to_free, 0);
                 ret += freed;
                 nr_to_free -= freed;
-               if (nr_to_free <= 0)
+               if ((long)nr_to_free <= 0)
                         break;
         }
         current->reclaim_state = NULL;
@@ -1802,8 +1815,7 @@ int shrink_all_memory(int nr_pages)
     away, we get changed to run anywhere: as the first one comes back,
     restore their cpu bindings. */
  static int __devinit cpu_callback(struct notifier_block *nfb,
-                                 unsigned long action,
-                                 void *hcpu)
+                                 unsigned long action, void *hcpu)
  {
         pg_data_t *pgdat;
         cpumask_t mask;
@@ -1823,10 +1835,15 @@ static int __devinit cpu_callback(struct notifier_block *nfb,
  static int __init kswapd_init(void)
  {
         pg_data_t *pgdat;
+
         swap_setup();
-       for_each_pgdat(pgdat)
-               pgdat->kswapd
-               = find_task_by_pid(kernel_thread(kswapd, pgdat, CLONE_KERNEL));
+       for_each_pgdat(pgdat) {
+               pid_t pid;
+
+               pid = kernel_thread(kswapd, pgdat, CLONE_KERNEL);
+               BUG_ON(pid < 0);
+               pgdat->kswapd = find_task_by_pid(pid);
+       }
         total_memory = nr_free_pagecache_pages();
         hotcpu_notifier(cpu_callback, 0);
         return 0;
@@ -1868,45 +1885,22 @@ int zone_reclaim_interval __read_mostly = 30*HZ;
  /*
   * Try to free up some pages from this zone through reclaim.
   */
-int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
+static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
  {
-       int nr_pages;
+       const unsigned long nr_pages = 1 << order;
         struct task_struct *p = current;
         struct reclaim_state reclaim_state;
-       struct scan_control sc;
-       cpumask_t mask;
-       int node_id;
-
-       if (time_before(jiffies,
-               zone->last_unsuccessful_zone_reclaim + zone_reclaim_interval))
-                       return 0;
-
-       if (!(gfp_mask & __GFP_WAIT) ||
-               zone->all_unreclaimable ||
-               atomic_read(&zone->reclaim_in_progress) > 0)
-                       return 0;
-
-       node_id = zone->zone_pgdat->node_id;
-       mask = node_to_cpumask(node_id);
-       if (!cpus_empty(mask) && node_id != numa_node_id())
-               return 0;
-
-       sc.may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE);
-       sc.may_swap = !!(zone_reclaim_mode & RECLAIM_SWAP);
-       sc.nr_scanned = 0;
-       sc.nr_reclaimed = 0;
-       sc.priority = ZONE_RECLAIM_PRIORITY + 1;
-       sc.nr_mapped = read_page_state(nr_mapped);
-       sc.gfp_mask = gfp_mask;
+       int priority;
+       struct scan_control sc = {
+               .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
+               .may_swap = !!(zone_reclaim_mode & RECLAIM_SWAP),
+               .nr_mapped = read_page_state(nr_mapped),
+               .swap_cluster_max = max_t(unsigned long, nr_pages,
+                                       SWAP_CLUSTER_MAX),
+               .gfp_mask = gfp_mask,
+       };
  
         disable_swap_token();
-
-       nr_pages = 1 << order;
-       if (nr_pages > SWAP_CLUSTER_MAX)
-               sc.swap_cluster_max = nr_pages;
-       else
-               sc.swap_cluster_max = SWAP_CLUSTER_MAX;
-
         cond_resched();
         /*
          * We need to be able to allocate from the reserves for RECLAIM_SWAP
@@ -1921,11 +1915,11 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
          * Free memory by calling shrink zone with increasing priorities
          * until we have enough memory freed.
          */
+       priority = ZONE_RECLAIM_PRIORITY;
         do {
-               sc.priority--;
-               shrink_zone(zone, &sc);
-
-       } while (sc.nr_reclaimed < nr_pages && sc.priority > 0);
+               shrink_zone(priority, zone, &sc);
+               priority--;
+       } while (priority >= 0 && sc.nr_reclaimed < nr_pages);
  
         if (sc.nr_reclaimed < nr_pages && (zone_reclaim_mode & RECLAIM_SLAB)) {
                 /*
@@ -1947,5 +1941,43 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
  
         return sc.nr_reclaimed >= nr_pages;
  }
-#endif
  
+int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
+{
+       cpumask_t mask;
+       int node_id;
+
+       /*
+        * Do not reclaim if there was a recent unsuccessful attempt at zone
+        * reclaim.  In that case we let allocations go off node for the
+        * zone_reclaim_interval.  Otherwise we would scan for each off-node
+        * page allocation.
+        */
+       if (time_before(jiffies,
+               zone->last_unsuccessful_zone_reclaim + zone_reclaim_interval))
+                       return 0;
+
+       /*
+        * Avoid concurrent zone reclaims, do not reclaim in a zone that does
+        * not have reclaimable pages and if we should not delay the allocation
+        * then do not scan.
+        */
+       if (!(gfp_mask & __GFP_WAIT) ||
+               zone->all_unreclaimable ||
+               atomic_read(&zone->reclaim_in_progress) > 0 ||
+               (current->flags & PF_MEMALLOC))
+                       return 0;
+
+       /*
+        * Only run zone reclaim on the local zone or on zones that do not
+        * have associated processors. This will favor the local processor
+        * over remote processors and spread off node memory allocations
+        * as wide as possible.
+        */
+       node_id = zone->zone_pgdat->node_id;
+       mask = node_to_cpumask(node_id);
+       if (!cpus_empty(mask) && node_id != numa_node_id())
+               return 0;
+       return __zone_reclaim(zone, gfp_mask, order);
+}
+#endif