X-Git-Url: http://pilppa.org/gitweb/gitweb.cgi?a=blobdiff_plain;f=mm%2Fvmscan.c;h=3b5860294bb6654a7b6765a327cef83a9952a29d;hb=refs%2Ftags%2Fv2.6.28-rc4;hp=9588973849d0ff0ba03b509be31865100f6d5470;hpb=7e9cd484204f9e5b316ed35b241abf088d76e0af;p=linux-2.6-omap-h63xx.git diff --git a/mm/vmscan.c b/mm/vmscan.c index 9588973849d..3b5860294bb 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -470,6 +471,85 @@ int remove_mapping(struct address_space *mapping, struct page *page) return 0; } +/** + * putback_lru_page - put previously isolated page onto appropriate LRU list + * @page: page to be put back to appropriate lru list + * + * Add previously isolated @page to appropriate LRU list. + * Page may still be unevictable for other reasons. + * + * lru_lock must not be held, interrupts must be enabled. + */ +#ifdef CONFIG_UNEVICTABLE_LRU +void putback_lru_page(struct page *page) +{ + int lru; + int active = !!TestClearPageActive(page); + int was_unevictable = PageUnevictable(page); + + VM_BUG_ON(PageLRU(page)); + +redo: + ClearPageUnevictable(page); + + if (page_evictable(page, NULL)) { + /* + * For evictable pages, we can use the cache. + * In event of a race, worst case is we end up with an + * unevictable page on [in]active list. + * We know how to handle that. + */ + lru = active + page_is_file_cache(page); + lru_cache_add_lru(page, lru); + } else { + /* + * Put unevictable pages directly on zone's unevictable + * list. + */ + lru = LRU_UNEVICTABLE; + add_page_to_unevictable_list(page); + } + mem_cgroup_move_lists(page, lru); + + /* + * page's status can change while we move it among lru. If an evictable + * page is on unevictable list, it never be freed. To avoid that, + * check after we added it to the list, again. + */ + if (lru == LRU_UNEVICTABLE && page_evictable(page, NULL)) { + if (!isolate_lru_page(page)) { + put_page(page); + goto redo; + } + /* This means someone else dropped this page from LRU + * So, it will be freed or putback to LRU again. There is + * nothing to do here. + */ + } + + if (was_unevictable && lru != LRU_UNEVICTABLE) + count_vm_event(UNEVICTABLE_PGRESCUED); + else if (!was_unevictable && lru == LRU_UNEVICTABLE) + count_vm_event(UNEVICTABLE_PGCULLED); + + put_page(page); /* drop ref from isolate */ +} + +#else /* CONFIG_UNEVICTABLE_LRU */ + +void putback_lru_page(struct page *page) +{ + int lru; + VM_BUG_ON(PageLRU(page)); + + lru = !!TestClearPageActive(page) + page_is_file_cache(page); + lru_cache_add_lru(page, lru); + mem_cgroup_move_lists(page, lru); + put_page(page); +} +#endif /* CONFIG_UNEVICTABLE_LRU */ + + /* * shrink_page_list() returns the number of reclaimed pages */ @@ -503,6 +583,9 @@ static unsigned long shrink_page_list(struct list_head *page_list, sc->nr_scanned++; + if (unlikely(!page_evictable(page, NULL))) + goto cull_mlocked; + if (!sc->may_swap && page_mapped(page)) goto keep_locked; @@ -539,9 +622,19 @@ static unsigned long shrink_page_list(struct list_head *page_list, * Anonymous process memory has backing store? * Try to allocate it some swap space here. */ - if (PageAnon(page) && !PageSwapCache(page)) + if (PageAnon(page) && !PageSwapCache(page)) { + switch (try_to_munlock(page)) { + case SWAP_FAIL: /* shouldn't happen */ + case SWAP_AGAIN: + goto keep_locked; + case SWAP_MLOCK: + goto cull_mlocked; + case SWAP_SUCCESS: + ; /* fall thru'; add to swap cache */ + } if (!add_to_swap(page, GFP_ATOMIC)) goto activate_locked; + } #endif /* CONFIG_SWAP */ mapping = page_mapping(page); @@ -556,6 +649,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, goto activate_locked; case SWAP_AGAIN: goto keep_locked; + case SWAP_MLOCK: + goto cull_mlocked; case SWAP_SUCCESS: ; /* try to free the page below */ } @@ -602,7 +697,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, * possible for a page to have PageDirty set, but it is actually * clean (all its buffers are clean). This happens if the * buffers were written out directly, with submit_bh(). ext3 - * will do this, as well as the blockdev mapping. + * will do this, as well as the blockdev mapping. * try_to_release_page() will discover that cleanness and will * drop the buffers and mark the page clean - it can be freed. * @@ -637,7 +732,14 @@ static unsigned long shrink_page_list(struct list_head *page_list, if (!mapping || !__remove_mapping(mapping, page)) goto keep_locked; - unlock_page(page); + /* + * At this point, we have no other references and there is + * no way to pick any more up (removed from LRU, removed + * from pagecache). Can use non-atomic bitops now (and + * we obviously don't have to worry about waking up a process + * waiting on the page lock, because there are no references. + */ + __clear_page_locked(page); free_it: nr_reclaimed++; if (!pagevec_add(&freed_pvec, page)) { @@ -646,17 +748,23 @@ free_it: } continue; +cull_mlocked: + unlock_page(page); + putback_lru_page(page); + continue; + activate_locked: /* Not a candidate for swapping, so reclaim swap space. */ if (PageSwapCache(page) && vm_swap_full()) remove_exclusive_swap_page_ref(page); + VM_BUG_ON(PageActive(page)); SetPageActive(page); pgactivate++; keep_locked: unlock_page(page); keep: list_add(&page->lru, &ret_pages); - VM_BUG_ON(PageLRU(page)); + VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); } list_splice(&ret_pages, page_list); if (pagevec_count(&freed_pvec)) @@ -699,6 +807,14 @@ int __isolate_lru_page(struct page *page, int mode, int file) if (mode != ISOLATE_BOTH && (!page_is_file_cache(page) != !file)) return ret; + /* + * When this function is being called for lumpy reclaim, we + * initially look into all LRU pages, active, inactive and + * unevictable; only give shrink_page_list evictable pages. + */ + if (PageUnevictable(page)) + return ret; + ret = -EBUSY; if (likely(get_page_unless_zero(page))) { /* @@ -810,7 +926,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, /* else it is being freed elsewhere */ list_move(&cursor_page->lru, src); default: - break; + break; /* ! on LRU or wrong list */ } } } @@ -870,8 +986,9 @@ static unsigned long clear_active_flags(struct list_head *page_list, * Returns -EBUSY if the page was not on an LRU list. * * The returned page will have PageLRU() cleared. If it was found on - * the active list, it will have PageActive set. That flag may need - * to be cleared by the caller before letting the page go. + * the active list, it will have PageActive set. If it was found on + * the unevictable list, it will have the PageUnevictable bit set. That flag + * may need to be cleared by the caller before letting the page go. * * The vmstat statistic corresponding to the list on which the page was * found will be decremented. @@ -892,11 +1009,10 @@ int isolate_lru_page(struct page *page) spin_lock_irq(&zone->lru_lock); if (PageLRU(page) && get_page_unless_zero(page)) { - int lru = LRU_BASE; + int lru = page_lru(page); ret = 0; ClearPageLRU(page); - lru += page_is_file_cache(page) + !!PageActive(page); del_page_from_lru_list(zone, page, lru); } spin_unlock_irq(&zone->lru_lock); @@ -909,7 +1025,8 @@ int isolate_lru_page(struct page *page) * of reclaimed pages */ static unsigned long shrink_inactive_list(unsigned long max_scan, - struct zone *zone, struct scan_control *sc, int file) + struct zone *zone, struct scan_control *sc, + int priority, int file) { LIST_HEAD(page_list); struct pagevec pvec; @@ -927,8 +1044,19 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, unsigned long nr_freed; unsigned long nr_active; unsigned int count[NR_LRU_LISTS] = { 0, }; - int mode = (sc->order > PAGE_ALLOC_COSTLY_ORDER) ? - ISOLATE_BOTH : ISOLATE_INACTIVE; + int mode = ISOLATE_INACTIVE; + + /* + * If we need a large contiguous chunk of memory, or have + * trouble getting a small set of contiguous pages, we + * will reclaim both active and inactive pages. + * + * We use the same threshold as pageout congestion_wait below. + */ + if (sc->order > PAGE_ALLOC_COSTLY_ORDER) + mode = ISOLATE_BOTH; + else if (sc->order && priority < DEF_PRIORITY - 2) + mode = ISOLATE_BOTH; nr_taken = sc->isolate_pages(sc->swap_cluster_max, &page_list, &nr_scan, sc->order, mode, @@ -996,11 +1124,20 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, * Put back any unfreeable pages. */ while (!list_empty(&page_list)) { + int lru; page = lru_to_page(&page_list); VM_BUG_ON(PageLRU(page)); - SetPageLRU(page); list_del(&page->lru); - add_page_to_lru_list(zone, page, page_lru(page)); + if (unlikely(!page_evictable(page, NULL))) { + spin_unlock_irq(&zone->lru_lock); + putback_lru_page(page); + spin_lock_irq(&zone->lru_lock); + continue; + } + SetPageLRU(page); + lru = page_lru(page); + add_page_to_lru_list(zone, page, lru); + mem_cgroup_move_lists(page, lru); if (PageActive(page) && scan_global_lru(sc)) { int file = !!page_is_file_cache(page); zone->recent_rotated[file]++; @@ -1095,6 +1232,11 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, page = lru_to_page(&l_hold); list_del(&page->lru); + if (unlikely(!page_evictable(page, NULL))) { + putback_lru_page(page); + continue; + } + /* page_referenced clears PageReferenced */ if (page_mapping_inuse(page) && page_referenced(page, 0, sc->mem_cgroup)) @@ -1128,7 +1270,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, ClearPageActive(page); list_move(&page->lru, &zone->lru[lru].list); - mem_cgroup_move_lists(page, false); + mem_cgroup_move_lists(page, lru); pgmoved++; if (!pagevec_add(&pvec, page)) { __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); @@ -1172,7 +1314,7 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, shrink_active_list(nr_to_scan, zone, sc, priority, file); return 0; } - return shrink_inactive_list(nr_to_scan, zone, sc, file); + return shrink_inactive_list(nr_to_scan, zone, sc, priority, file); } /* @@ -1274,20 +1416,17 @@ static unsigned long shrink_zone(int priority, struct zone *zone, get_scan_ratio(zone, sc, percent); - for_each_lru(l) { + for_each_evictable_lru(l) { if (scan_global_lru(sc)) { int file = is_file_lru(l); int scan; - /* - * Add one to nr_to_scan just to make sure that the - * kernel will slowly sift through each list. - */ + scan = zone_page_state(zone, NR_LRU_BASE + l); if (priority) { scan >>= priority; scan = (scan * percent[file]) / 100; } - zone->lru[l].nr_scan += scan + 1; + zone->lru[l].nr_scan += scan; nr[l] = zone->lru[l].nr_scan; if (nr[l] >= sc->swap_cluster_max) zone->lru[l].nr_scan = 0; @@ -1306,7 +1445,7 @@ static unsigned long shrink_zone(int priority, struct zone *zone, while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || nr[LRU_INACTIVE_FILE]) { - for_each_lru(l) { + for_each_evictable_lru(l) { if (nr[l]) { nr_to_scan = min(nr[l], (unsigned long)sc->swap_cluster_max); @@ -1863,8 +2002,8 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio, if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY) continue; - for_each_lru(l) { - /* For pass = 0 we don't shrink the active list */ + for_each_evictable_lru(l) { + /* For pass = 0, we don't shrink the active list */ if (pass == 0 && (l == LRU_ACTIVE || l == LRU_ACTIVE_FILE)) continue; @@ -2201,3 +2340,285 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) return ret; } #endif + +#ifdef CONFIG_UNEVICTABLE_LRU +/* + * page_evictable - test whether a page is evictable + * @page: the page to test + * @vma: the VMA in which the page is or will be mapped, may be NULL + * + * Test whether page is evictable--i.e., should be placed on active/inactive + * lists vs unevictable list. The vma argument is !NULL when called from the + * fault path to determine how to instantate a new page. + * + * Reasons page might not be evictable: + * (1) page's mapping marked unevictable + * (2) page is part of an mlocked VMA + * + */ +int page_evictable(struct page *page, struct vm_area_struct *vma) +{ + + if (mapping_unevictable(page_mapping(page))) + return 0; + + if (PageMlocked(page) || (vma && is_mlocked_vma(vma, page))) + return 0; + + return 1; +} + +static void show_page_path(struct page *page) +{ + char buf[256]; + if (page_is_file_cache(page)) { + struct address_space *mapping = page->mapping; + struct dentry *dentry; + pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); + + spin_lock(&mapping->i_mmap_lock); + dentry = d_find_alias(mapping->host); + printk(KERN_INFO "rescued: %s %lu\n", + dentry_path(dentry, buf, 256), pgoff); + spin_unlock(&mapping->i_mmap_lock); + } else { +#if defined(CONFIG_MM_OWNER) && defined(CONFIG_MMU) + struct anon_vma *anon_vma; + struct vm_area_struct *vma; + + anon_vma = page_lock_anon_vma(page); + if (!anon_vma) + return; + + list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { + printk(KERN_INFO "rescued: anon %s\n", + vma->vm_mm->owner->comm); + break; + } + page_unlock_anon_vma(anon_vma); +#endif + } +} + + +/** + * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list + * @page: page to check evictability and move to appropriate lru list + * @zone: zone page is in + * + * Checks a page for evictability and moves the page to the appropriate + * zone lru list. + * + * Restrictions: zone->lru_lock must be held, page must be on LRU and must + * have PageUnevictable set. + */ +static void check_move_unevictable_page(struct page *page, struct zone *zone) +{ + VM_BUG_ON(PageActive(page)); + +retry: + ClearPageUnevictable(page); + if (page_evictable(page, NULL)) { + enum lru_list l = LRU_INACTIVE_ANON + page_is_file_cache(page); + + show_page_path(page); + + __dec_zone_state(zone, NR_UNEVICTABLE); + list_move(&page->lru, &zone->lru[l].list); + __inc_zone_state(zone, NR_INACTIVE_ANON + l); + __count_vm_event(UNEVICTABLE_PGRESCUED); + } else { + /* + * rotate unevictable list + */ + SetPageUnevictable(page); + list_move(&page->lru, &zone->lru[LRU_UNEVICTABLE].list); + if (page_evictable(page, NULL)) + goto retry; + } +} + +/** + * scan_mapping_unevictable_pages - scan an address space for evictable pages + * @mapping: struct address_space to scan for evictable pages + * + * Scan all pages in mapping. Check unevictable pages for + * evictability and move them to the appropriate zone lru list. + */ +void scan_mapping_unevictable_pages(struct address_space *mapping) +{ + pgoff_t next = 0; + pgoff_t end = (i_size_read(mapping->host) + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; + struct zone *zone; + struct pagevec pvec; + + if (mapping->nrpages == 0) + return; + + pagevec_init(&pvec, 0); + while (next < end && + pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { + int i; + int pg_scanned = 0; + + zone = NULL; + + for (i = 0; i < pagevec_count(&pvec); i++) { + struct page *page = pvec.pages[i]; + pgoff_t page_index = page->index; + struct zone *pagezone = page_zone(page); + + pg_scanned++; + if (page_index > next) + next = page_index; + next++; + + if (pagezone != zone) { + if (zone) + spin_unlock_irq(&zone->lru_lock); + zone = pagezone; + spin_lock_irq(&zone->lru_lock); + } + + if (PageLRU(page) && PageUnevictable(page)) + check_move_unevictable_page(page, zone); + } + if (zone) + spin_unlock_irq(&zone->lru_lock); + pagevec_release(&pvec); + + count_vm_events(UNEVICTABLE_PGSCANNED, pg_scanned); + } + +} + +/** + * scan_zone_unevictable_pages - check unevictable list for evictable pages + * @zone - zone of which to scan the unevictable list + * + * Scan @zone's unevictable LRU lists to check for pages that have become + * evictable. Move those that have to @zone's inactive list where they + * become candidates for reclaim, unless shrink_inactive_zone() decides + * to reactivate them. Pages that are still unevictable are rotated + * back onto @zone's unevictable list. + */ +#define SCAN_UNEVICTABLE_BATCH_SIZE 16UL /* arbitrary lock hold batch size */ +void scan_zone_unevictable_pages(struct zone *zone) +{ + struct list_head *l_unevictable = &zone->lru[LRU_UNEVICTABLE].list; + unsigned long scan; + unsigned long nr_to_scan = zone_page_state(zone, NR_UNEVICTABLE); + + while (nr_to_scan > 0) { + unsigned long batch_size = min(nr_to_scan, + SCAN_UNEVICTABLE_BATCH_SIZE); + + spin_lock_irq(&zone->lru_lock); + for (scan = 0; scan < batch_size; scan++) { + struct page *page = lru_to_page(l_unevictable); + + if (!trylock_page(page)) + continue; + + prefetchw_prev_lru_page(page, l_unevictable, flags); + + if (likely(PageLRU(page) && PageUnevictable(page))) + check_move_unevictable_page(page, zone); + + unlock_page(page); + } + spin_unlock_irq(&zone->lru_lock); + + nr_to_scan -= batch_size; + } +} + + +/** + * scan_all_zones_unevictable_pages - scan all unevictable lists for evictable pages + * + * A really big hammer: scan all zones' unevictable LRU lists to check for + * pages that have become evictable. Move those back to the zones' + * inactive list where they become candidates for reclaim. + * This occurs when, e.g., we have unswappable pages on the unevictable lists, + * and we add swap to the system. As such, it runs in the context of a task + * that has possibly/probably made some previously unevictable pages + * evictable. + */ +void scan_all_zones_unevictable_pages(void) +{ + struct zone *zone; + + for_each_zone(zone) { + scan_zone_unevictable_pages(zone); + } +} + +/* + * scan_unevictable_pages [vm] sysctl handler. On demand re-scan of + * all nodes' unevictable lists for evictable pages + */ +unsigned long scan_unevictable_pages; + +int scan_unevictable_handler(struct ctl_table *table, int write, + struct file *file, void __user *buffer, + size_t *length, loff_t *ppos) +{ + proc_doulongvec_minmax(table, write, file, buffer, length, ppos); + + if (write && *(unsigned long *)table->data) + scan_all_zones_unevictable_pages(); + + scan_unevictable_pages = 0; + return 0; +} + +/* + * per node 'scan_unevictable_pages' attribute. On demand re-scan of + * a specified node's per zone unevictable lists for evictable pages. + */ + +static ssize_t read_scan_unevictable_node(struct sys_device *dev, + struct sysdev_attribute *attr, + char *buf) +{ + return sprintf(buf, "0\n"); /* always zero; should fit... */ +} + +static ssize_t write_scan_unevictable_node(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, size_t count) +{ + struct zone *node_zones = NODE_DATA(dev->id)->node_zones; + struct zone *zone; + unsigned long res; + unsigned long req = strict_strtoul(buf, 10, &res); + + if (!req) + return 1; /* zero is no-op */ + + for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) { + if (!populated_zone(zone)) + continue; + scan_zone_unevictable_pages(zone); + } + return 1; +} + + +static SYSDEV_ATTR(scan_unevictable_pages, S_IRUGO | S_IWUSR, + read_scan_unevictable_node, + write_scan_unevictable_node); + +int scan_unevictable_register_node(struct node *node) +{ + return sysdev_create_file(&node->sysdev, &attr_scan_unevictable_pages); +} + +void scan_unevictable_unregister_node(struct node *node) +{ + sysdev_remove_file(&node->sysdev, &attr_scan_unevictable_pages); +} + +#endif