X-Git-Url: http://pilppa.org/gitweb/gitweb.cgi?a=blobdiff_plain;f=mm%2Fvmscan.c;h=4fe7e3aa02e2f38773105620d2640d779f6381ca;hb=e031d33efde817ce6b4f907f0fa1ff021301748f;hp=5a610804cd06a938b902006d2780a540d66be704;hpb=1a56f54c351d8d2e276aefc4f4094037c178d6c7;p=linux-2.6-omap-h63xx.git

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5a610804cd0..4fe7e3aa02e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -443,6 +443,10 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc)
 		BUG_ON(PageActive(page));
 
 		sc->nr_scanned++;
+
+		if (!sc->may_swap && page_mapped(page))
+			goto keep_locked;
+
 		/* Double the slab pressure for mapped and swapcache pages */
 		if (page_mapped(page) || PageSwapCache(page))
 			sc->nr_scanned++;
@@ -632,7 +636,7 @@ static int swap_page(struct page *page)
 	struct address_space *mapping = page_mapping(page);
 
 	if (page_mapped(page) && mapping)
-		if (try_to_unmap(page, 0) != SWAP_SUCCESS)
+		if (try_to_unmap(page, 1) != SWAP_SUCCESS)
 			goto unlock_retry;
 
 	if (PageDirty(page)) {
@@ -696,7 +700,7 @@ int migrate_page_remove_references(struct page *newpage,
 	 * the page.
 	 */
 	if (!mapping || page_mapcount(page) + nr_refs != page_count(page))
-		return 1;
+		return -EAGAIN;
 
 	/*
 	 * Establish swap ptes for anonymous pages or destroy pte
@@ -717,13 +721,15 @@ int migrate_page_remove_references(struct page *newpage,
 	 * If the page was not migrated then the PageSwapCache bit
 	 * is still set and the operation may continue.
 	 */
-	try_to_unmap(page, 1);
+	if (try_to_unmap(page, 1) == SWAP_FAIL)
+		/* A vma has VM_LOCKED set -> Permanent failure */
+		return -EPERM;
 
 	/*
 	 * Give up if we were unable to remove all mappings.
 	 */
 	if (page_mapcount(page))
-		return 1;
+		return -EAGAIN;
 
 	write_lock_irq(&mapping->tree_lock);
 
@@ -734,7 +740,7 @@ int migrate_page_remove_references(struct page *newpage,
 	if (!page_mapping(page) || page_count(page) != nr_refs ||
 			*radix_pointer != page) {
 		write_unlock_irq(&mapping->tree_lock);
-		return 1;
+		return -EAGAIN;
 	}
 
 	/*
@@ -809,10 +815,14 @@ EXPORT_SYMBOL(migrate_page_copy);
  */
 int migrate_page(struct page *newpage, struct page *page)
 {
+	int rc;
+
 	BUG_ON(PageWriteback(page));	/* Writeback must be complete */
 
-	if (migrate_page_remove_references(newpage, page, 2))
-		return -EAGAIN;
+	rc = migrate_page_remove_references(newpage, page, 2);
+
+	if (rc)
+		return rc;
 
 	migrate_page_copy(newpage, page);
 
@@ -839,7 +849,7 @@ EXPORT_SYMBOL(migrate_page);
  * pages are swapped out.
  *
  * The function returns after 10 attempts or if no pages
- * are movable anymore because t has become empty
+ * are movable anymore because to has become empty
  * or no retryable pages exist anymore.
  *
  * Return: Number of pages not migrated when "to" ran empty.
@@ -928,12 +938,21 @@ redo:
 			goto unlock_both;
 
 		if (mapping->a_ops->migratepage) {
+			/*
+			 * Most pages have a mapping and most filesystems
+			 * should provide a migration function. Anonymous
+			 * pages are part of swap space which also has its
+			 * own migration function. This is the most common
+			 * path for page migration.
+			 */
 			rc = mapping->a_ops->migratepage(newpage, page);
 			goto unlock_both;
                 }
 
 		/*
-		 * Trigger writeout if page is dirty
+		 * Default handling if a filesystem does not provide
+		 * a migration function. We can only migrate clean
+		 * pages so try to write out any dirty pages first.
 		 */
 		if (PageDirty(page)) {
 			switch (pageout(page, mapping)) {
@@ -949,9 +968,10 @@ redo:
 				; /* try to migrate the page below */
 			}
                 }
+
 		/*
-		 * If we have no buffer or can release the buffer
-		 * then do a simple migration.
+		 * Buffers are managed in a filesystem specific way.
+		 * We must have no buffers or drop them.
 		 */
 		if (!page_has_buffers(page) ||
 		    try_to_release_page(page, GFP_KERNEL)) {
@@ -966,6 +986,11 @@ redo:
 		 * swap them out.
 		 */
 		if (pass > 4) {
+			/*
+			 * Persistently unable to drop buffers..... As a
+			 * measure of last resort we fall back to
+			 * swap_page().
+			 */
 			unlock_page(newpage);
 			newpage = NULL;
 			rc = swap_page(page);
@@ -1176,9 +1201,47 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc)
 	struct page *page;
 	struct pagevec pvec;
 	int reclaim_mapped = 0;
-	long mapped_ratio;
-	long distress;
-	long swap_tendency;
+
+	if (unlikely(sc->may_swap)) {
+		long mapped_ratio;
+		long distress;
+		long swap_tendency;
+
+		/*
+		 * `distress' is a measure of how much trouble we're having
+		 * reclaiming pages.  0 -> no problems.  100 -> great trouble.
+		 */
+		distress = 100 >> zone->prev_priority;
+
+		/*
+		 * The point of this algorithm is to decide when to start
+		 * reclaiming mapped memory instead of just pagecache.  Work out
+		 * how much memory
+		 * is mapped.
+		 */
+		mapped_ratio = (sc->nr_mapped * 100) / total_memory;
+
+		/*
+		 * Now decide how much we really want to unmap some pages.  The
+		 * mapped ratio is downgraded - just because there's a lot of
+		 * mapped memory doesn't necessarily mean that page reclaim
+		 * isn't succeeding.
+		 *
+		 * The distress ratio is important - we don't want to start
+		 * going oom.
+		 *
+		 * A 100% value of vm_swappiness overrides this algorithm
+		 * altogether.
+		 */
+		swap_tendency = mapped_ratio / 2 + distress + vm_swappiness;
+
+		/*
+		 * Now use this metric to decide whether to start moving mapped
+		 * memory onto the inactive list.
+		 */
+		if (swap_tendency >= 100)
+			reclaim_mapped = 1;
+	}
 
 	lru_add_drain();
 	spin_lock_irq(&zone->lru_lock);
@@ -1188,37 +1251,6 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc)
 	zone->nr_active -= pgmoved;
 	spin_unlock_irq(&zone->lru_lock);
 
-	/*
-	 * `distress' is a measure of how much trouble we're having reclaiming
-	 * pages.  0 -> no problems.  100 -> great trouble.
-	 */
-	distress = 100 >> zone->prev_priority;
-
-	/*
-	 * The point of this algorithm is to decide when to start reclaiming
-	 * mapped memory instead of just pagecache.  Work out how much memory
-	 * is mapped.
-	 */
-	mapped_ratio = (sc->nr_mapped * 100) / total_memory;
-
-	/*
-	 * Now decide how much we really want to unmap some pages.  The mapped
-	 * ratio is downgraded - just because there's a lot of mapped memory
-	 * doesn't necessarily mean that page reclaim isn't succeeding.
-	 *
-	 * The distress ratio is important - we don't want to start going oom.
-	 *
-	 * A 100% value of vm_swappiness overrides this algorithm altogether.
-	 */
-	swap_tendency = mapped_ratio / 2 + distress + vm_swappiness;
-
-	/*
-	 * Now use this metric to decide whether to start moving mapped memory
-	 * onto the inactive list.
-	 */
-	if (swap_tendency >= 100)
-		reclaim_mapped = 1;
-
 	while (!list_empty(&l_hold)) {
 		cond_resched();
 		page = lru_to_page(&l_hold);
@@ -1595,9 +1627,7 @@ scan:
 			sc.nr_reclaimed = 0;
 			sc.priority = priority;
 			sc.swap_cluster_max = nr_pages? nr_pages : SWAP_CLUSTER_MAX;
-			atomic_inc(&zone->reclaim_in_progress);
 			shrink_zone(zone, &sc);
-			atomic_dec(&zone->reclaim_in_progress);
 			reclaim_state->reclaimed_slab = 0;
 			nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
 						lru_pages);
@@ -1859,7 +1889,8 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 
 	if (!(gfp_mask & __GFP_WAIT) ||
 		zone->all_unreclaimable ||
-		atomic_read(&zone->reclaim_in_progress) > 0)
+		atomic_read(&zone->reclaim_in_progress) > 0 ||
+		(p->flags & PF_MEMALLOC))
 			return 0;
 
 	node_id = zone->zone_pgdat->node_id;
@@ -1884,7 +1915,12 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 		sc.swap_cluster_max = SWAP_CLUSTER_MAX;
 
 	cond_resched();
-	p->flags |= PF_MEMALLOC;
+	/*
+	 * We need to be able to allocate from the reserves for RECLAIM_SWAP
+	 * and we also need to be able to write out pages for RECLAIM_WRITE
+	 * and RECLAIM_SWAP.
+	 */
+	p->flags |= PF_MEMALLOC | PF_SWAPWRITE;
 	reclaim_state.reclaimed_slab = 0;
 	p->reclaim_state = &reclaim_state;
 
@@ -1908,11 +1944,10 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 		 * a long time.
 		 */
 		shrink_slab(sc.nr_scanned, gfp_mask, order);
-		sc.nr_reclaimed = 1;    /* Avoid getting the off node timeout */
 	}
 
 	p->reclaim_state = NULL;
-	current->flags &= ~PF_MEMALLOC;
+	current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE);
 
 	if (sc.nr_reclaimed == 0)
 		zone->last_unsuccessful_zone_reclaim = jiffies;