X-Git-Url: http://pilppa.org/gitweb/gitweb.cgi?a=blobdiff_plain;f=arch%2Fx86%2Fmm%2Fpageattr.c;h=7d9517abc9af500fc91c7a3f79b397c8e60dd328;hb=c9caa02c529d5e113e40cbc77254558fcdfa4215;hp=7b4a17236972bc90815204f766e9e8bfc389dc10;hpb=c31c7d4844ea4817692ae16bf70f9c96c05a50eb;p=linux-2.6-omap-h63xx.git diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 7b4a1723697..7d9517abc9a 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include #include @@ -17,6 +19,7 @@ #include #include #include +#include /* * The current flushing context - we pass it instead of 5 arguments: @@ -28,6 +31,7 @@ struct cpa_data { int numpages; int flushtlb; unsigned long pfn; + unsigned force_split : 1; }; #ifdef CONFIG_X86_64 @@ -44,6 +48,12 @@ static inline unsigned long highmap_end_pfn(void) #endif +#ifdef CONFIG_DEBUG_PAGEALLOC +# define debug_pagealloc 1 +#else +# define debug_pagealloc 0 +#endif + static inline int within(unsigned long addr, unsigned long start, unsigned long end) { @@ -253,6 +263,9 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, int i, do_split = 1; unsigned int level; + if (cpa->force_split) + return 1; + spin_lock_irqsave(&pgd_lock, flags); /* * Check for races, another CPU might have split this page @@ -355,45 +368,48 @@ out_unlock: static LIST_HEAD(page_pool); static unsigned long pool_size, pool_pages, pool_low; -static unsigned long pool_used, pool_failed, pool_refill; +static unsigned long pool_used, pool_failed; -static void cpa_fill_pool(void) +static void cpa_fill_pool(struct page **ret) { - struct page *p; gfp_t gfp = GFP_KERNEL; + unsigned long flags; + struct page *p; - /* Do not allocate from interrupt context */ - if (in_irq() || irqs_disabled()) - return; /* - * Check unlocked. I does not matter when we have one more - * page in the pool. The bit lock avoids recursive pool - * allocations: + * Avoid recursion (on debug-pagealloc) and also signal + * our priority to get to these pagetables: */ - if (pool_pages >= pool_size || test_and_set_bit_lock(0, &pool_refill)) + if (current->flags & PF_MEMALLOC) return; + current->flags |= PF_MEMALLOC; -#ifdef CONFIG_DEBUG_PAGEALLOC /* - * We could do: - * gfp = in_atomic() ? GFP_ATOMIC : GFP_KERNEL; - * but this fails on !PREEMPT kernels + * Allocate atomically from atomic contexts: */ - gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN; -#endif + if (in_atomic() || irqs_disabled() || debug_pagealloc) + gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN; - while (pool_pages < pool_size) { + while (pool_pages < pool_size || (ret && !*ret)) { p = alloc_pages(gfp, 0); if (!p) { pool_failed++; break; } - spin_lock_irq(&pgd_lock); + /* + * If the call site needs a page right now, provide it: + */ + if (ret && !*ret) { + *ret = p; + continue; + } + spin_lock_irqsave(&pgd_lock, flags); list_add(&p->lru, &page_pool); pool_pages++; - spin_unlock_irq(&pgd_lock); + spin_unlock_irqrestore(&pgd_lock, flags); } - clear_bit_unlock(0, &pool_refill); + + current->flags &= ~PF_MEMALLOC; } #define SHIFT_MB (20 - PAGE_SHIFT) @@ -414,11 +430,15 @@ void __init cpa_init(void) * GiB. Shift MiB to Gib and multiply the result by * POOL_PAGES_PER_GB: */ - gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB; - pool_size = POOL_PAGES_PER_GB * gb; + if (debug_pagealloc) { + gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB; + pool_size = POOL_PAGES_PER_GB * gb; + } else { + pool_size = 1; + } pool_low = pool_size; - cpa_fill_pool(); + cpa_fill_pool(NULL); printk(KERN_DEBUG "CPA: page pool initialized %lu of %lu pages preallocated\n", pool_pages, pool_size); @@ -440,16 +460,20 @@ static int split_large_page(pte_t *kpte, unsigned long address) spin_lock_irqsave(&pgd_lock, flags); if (list_empty(&page_pool)) { spin_unlock_irqrestore(&pgd_lock, flags); - return -ENOMEM; + base = NULL; + cpa_fill_pool(&base); + if (!base) + return -ENOMEM; + spin_lock_irqsave(&pgd_lock, flags); + } else { + base = list_first_entry(&page_pool, struct page, lru); + list_del(&base->lru); + pool_pages--; + + if (pool_pages < pool_low) + pool_low = pool_pages; } - base = list_first_entry(&page_pool, struct page, lru); - list_del(&base->lru); - pool_pages--; - - if (pool_pages < pool_low) - pool_low = pool_pages; - /* * Check for races, another CPU might have split this page * up for us already: @@ -513,7 +537,6 @@ static int __change_page_attr(struct cpa_data *cpa, int primary) unsigned long address = cpa->vaddr; int do_split, err; unsigned int level; - struct page *kpte_page; pte_t *kpte, old_pte; repeat: @@ -532,10 +555,6 @@ repeat: return -EINVAL; } - kpte_page = virt_to_page(kpte); - BUG_ON(PageLRU(kpte_page)); - BUG_ON(PageCompound(kpte_page)); - if (level == PG_LEVEL_4K) { pte_t new_pte; pgprot_t new_prot = pte_pgprot(old_pte); @@ -594,19 +613,34 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias); static int cpa_process_alias(struct cpa_data *cpa) { struct cpa_data alias_cpa; - int ret; + int ret = 0; if (cpa->pfn > max_pfn_mapped) return 0; - alias_cpa = *cpa; - alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); + /* + * No need to redo, when the primary call touched the direct + * mapping already: + */ + if (!within(cpa->vaddr, PAGE_OFFSET, + PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) { + + alias_cpa = *cpa; + alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); - ret = __change_page_attr_set_clr(&alias_cpa, 0); + ret = __change_page_attr_set_clr(&alias_cpa, 0); + } #ifdef CONFIG_X86_64 if (ret) return ret; + /* + * No need to redo, when the primary call touched the high + * mapping already: + */ + if (within(cpa->vaddr, (unsigned long) _text, (unsigned long) _end)) + return 0; + /* * If the physical address is inside the kernel map, we need * to touch the high mapped kernel as well: @@ -666,10 +700,11 @@ static inline int cache_attr(pgprot_t attr) } static int change_page_attr_set_clr(unsigned long addr, int numpages, - pgprot_t mask_set, pgprot_t mask_clr) + pgprot_t mask_set, pgprot_t mask_clr, + int force_split) { struct cpa_data cpa; - int ret, cache; + int ret, cache, checkalias; /* * Check, if we are requested to change a not supported @@ -677,7 +712,7 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, */ mask_set = canon_pgprot(mask_set); mask_clr = canon_pgprot(mask_clr); - if (!pgprot_val(mask_set) && !pgprot_val(mask_clr)) + if (!pgprot_val(mask_set) && !pgprot_val(mask_clr) && !force_split) return 0; /* Ensure we are PAGE_SIZE aligned */ @@ -694,8 +729,12 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, cpa.mask_set = mask_set; cpa.mask_clr = mask_clr; cpa.flushtlb = 0; + cpa.force_split = force_split; + + /* No alias checking for _NX bit modifications */ + checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX; - ret = __change_page_attr_set_clr(&cpa, 1); + ret = __change_page_attr_set_clr(&cpa, checkalias); /* * Check whether we really changed something: @@ -721,33 +760,69 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, cpa_flush_all(cache); out: - cpa_fill_pool(); + cpa_fill_pool(NULL); + return ret; } static inline int change_page_attr_set(unsigned long addr, int numpages, pgprot_t mask) { - return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0)); + return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0); } static inline int change_page_attr_clear(unsigned long addr, int numpages, pgprot_t mask) { - return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask); + return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0); } -int set_memory_uc(unsigned long addr, int numpages) +int _set_memory_uc(unsigned long addr, int numpages) { return change_page_attr_set(addr, numpages, - __pgprot(_PAGE_PCD | _PAGE_PWT)); + __pgprot(_PAGE_CACHE_UC)); +} + +int set_memory_uc(unsigned long addr, int numpages) +{ + if (reserve_memtype(addr, addr + numpages * PAGE_SIZE, + _PAGE_CACHE_UC, NULL)) + return -EINVAL; + + return _set_memory_uc(addr, numpages); } EXPORT_SYMBOL(set_memory_uc); -int set_memory_wb(unsigned long addr, int numpages) +int _set_memory_wc(unsigned long addr, int numpages) +{ + return change_page_attr_set(addr, numpages, + __pgprot(_PAGE_CACHE_WC)); +} + +int set_memory_wc(unsigned long addr, int numpages) +{ + if (!pat_wc_enabled) + return set_memory_uc(addr, numpages); + + if (reserve_memtype(addr, addr + numpages * PAGE_SIZE, + _PAGE_CACHE_WC, NULL)) + return -EINVAL; + + return _set_memory_wc(addr, numpages); +} +EXPORT_SYMBOL(set_memory_wc); + +int _set_memory_wb(unsigned long addr, int numpages) { return change_page_attr_clear(addr, numpages, - __pgprot(_PAGE_PCD | _PAGE_PWT)); + __pgprot(_PAGE_CACHE_MASK)); +} + +int set_memory_wb(unsigned long addr, int numpages) +{ + free_memtype(addr, addr + numpages * PAGE_SIZE); + + return _set_memory_wb(addr, numpages); } EXPORT_SYMBOL(set_memory_wb); @@ -778,6 +853,12 @@ int set_memory_np(unsigned long addr, int numpages) return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT)); } +int set_memory_4k(unsigned long addr, int numpages) +{ + return change_page_attr_set_clr(addr, numpages, __pgprot(0), + __pgprot(0), 1); +} + int set_pages_uc(struct page *page, int numpages) { unsigned long addr = (unsigned long)page_address(page); @@ -884,10 +965,66 @@ void kernel_map_pages(struct page *page, int numpages, int enable) * Try to refill the page pool here. We can do this only after * the tlb flush. */ - cpa_fill_pool(); + cpa_fill_pool(NULL); +} + +#ifdef CONFIG_DEBUG_FS +static int dpa_show(struct seq_file *m, void *v) +{ + seq_puts(m, "DEBUG_PAGEALLOC\n"); + seq_printf(m, "pool_size : %lu\n", pool_size); + seq_printf(m, "pool_pages : %lu\n", pool_pages); + seq_printf(m, "pool_low : %lu\n", pool_low); + seq_printf(m, "pool_used : %lu\n", pool_used); + seq_printf(m, "pool_failed : %lu\n", pool_failed); + + return 0; } + +static int dpa_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, dpa_show, NULL); +} + +static const struct file_operations dpa_fops = { + .open = dpa_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +int __init debug_pagealloc_proc_init(void) +{ + struct dentry *de; + + de = debugfs_create_file("debug_pagealloc", 0600, NULL, NULL, + &dpa_fops); + if (!de) + return -ENOMEM; + + return 0; +} +__initcall(debug_pagealloc_proc_init); #endif +#ifdef CONFIG_HIBERNATION + +bool kernel_page_present(struct page *page) +{ + unsigned int level; + pte_t *pte; + + if (PageHighMem(page)) + return false; + + pte = lookup_address((unsigned long)page_address(page), &level); + return (pte_val(*pte) & _PAGE_PRESENT); +} + +#endif /* CONFIG_HIBERNATION */ + +#endif /* CONFIG_DEBUG_PAGEALLOC */ + /* * The testcases use internal knowledge of the implementation that shouldn't * be exposed to the rest of the kernel. Include these directly here.