]> pilppa.org Git - linux-2.6-omap-h63xx.git/blob - arch/x86/mm/pageattr.c
x86: cpa: rename global_flush_tlb() to cpa_flush_all()
[linux-2.6-omap-h63xx.git] / arch / x86 / mm / pageattr.c
1 /*
2  * Copyright 2002 Andi Kleen, SuSE Labs.
3  * Thanks to Ben LaHaise for precious feedback.
4  */
5 #include <linux/highmem.h>
6 #include <linux/bootmem.h>
7 #include <linux/module.h>
8 #include <linux/sched.h>
9 #include <linux/slab.h>
10 #include <linux/mm.h>
11
12 #include <asm/e820.h>
13 #include <asm/processor.h>
14 #include <asm/tlbflush.h>
15 #include <asm/sections.h>
16 #include <asm/uaccess.h>
17 #include <asm/pgalloc.h>
18
19 static inline int
20 within(unsigned long addr, unsigned long start, unsigned long end)
21 {
22         return addr >= start && addr < end;
23 }
24
25 /*
26  * Flushing functions
27  */
28 void clflush_cache_range(void *addr, int size)
29 {
30         int i;
31
32         for (i = 0; i < size; i += boot_cpu_data.x86_clflush_size)
33                 clflush(addr+i);
34 }
35
36 static void __cpa_flush_all(void *arg)
37 {
38         /*
39          * Flush all to work around Errata in early athlons regarding
40          * large page flushing.
41          */
42         __flush_tlb_all();
43
44         if (boot_cpu_data.x86_model >= 4)
45                 wbinvd();
46 }
47
48 static void cpa_flush_all(void)
49 {
50         BUG_ON(irqs_disabled());
51
52         on_each_cpu(__cpa_flush_all, NULL, 1, 1);
53 }
54
55 struct clflush_data {
56         unsigned long addr;
57         int numpages;
58 };
59
60 static void __cpa_flush_range(void *arg)
61 {
62         struct clflush_data *cld = arg;
63
64         /*
65          * We could optimize that further and do individual per page
66          * tlb invalidates for a low number of pages. Caveat: we must
67          * flush the high aliases on 64bit as well.
68          */
69         __flush_tlb_all();
70
71         clflush_cache_range((void *) cld->addr, cld->numpages * PAGE_SIZE);
72 }
73
74 static void cpa_flush_range(unsigned long addr, int numpages)
75 {
76         struct clflush_data cld;
77
78         BUG_ON(irqs_disabled());
79
80         cld.addr = addr;
81         cld.numpages = numpages;
82
83         on_each_cpu(__cpa_flush_range, &cld, 1, 1);
84 }
85
86 /*
87  * Certain areas of memory on x86 require very specific protection flags,
88  * for example the BIOS area or kernel text. Callers don't always get this
89  * right (again, ioremap() on BIOS memory is not uncommon) so this function
90  * checks and fixes these known static required protection bits.
91  */
92 static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
93 {
94         pgprot_t forbidden = __pgprot(0);
95
96         /*
97          * The BIOS area between 640k and 1Mb needs to be executable for
98          * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support.
99          */
100         if (within(__pa(address), BIOS_BEGIN, BIOS_END))
101                 pgprot_val(forbidden) |= _PAGE_NX;
102
103         /*
104          * The kernel text needs to be executable for obvious reasons
105          * Does not cover __inittext since that is gone later on
106          */
107         if (within(address, (unsigned long)_text, (unsigned long)_etext))
108                 pgprot_val(forbidden) |= _PAGE_NX;
109
110 #ifdef CONFIG_DEBUG_RODATA
111         /* The .rodata section needs to be read-only */
112         if (within(address, (unsigned long)__start_rodata,
113                                 (unsigned long)__end_rodata))
114                 pgprot_val(forbidden) |= _PAGE_RW;
115 #endif
116
117         prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
118
119         return prot;
120 }
121
122 pte_t *lookup_address(unsigned long address, int *level)
123 {
124         pgd_t *pgd = pgd_offset_k(address);
125         pud_t *pud;
126         pmd_t *pmd;
127
128         *level = PG_LEVEL_NONE;
129
130         if (pgd_none(*pgd))
131                 return NULL;
132         pud = pud_offset(pgd, address);
133         if (pud_none(*pud))
134                 return NULL;
135         pmd = pmd_offset(pud, address);
136         if (pmd_none(*pmd))
137                 return NULL;
138
139         *level = PG_LEVEL_2M;
140         if (pmd_large(*pmd))
141                 return (pte_t *)pmd;
142
143         *level = PG_LEVEL_4K;
144         return pte_offset_kernel(pmd, address);
145 }
146
147 static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
148 {
149         /* change init_mm */
150         set_pte_atomic(kpte, pte);
151 #ifdef CONFIG_X86_32
152         if (!SHARED_KERNEL_PMD) {
153                 struct page *page;
154
155                 for (page = pgd_list; page; page = (struct page *)page->index) {
156                         pgd_t *pgd;
157                         pud_t *pud;
158                         pmd_t *pmd;
159
160                         pgd = (pgd_t *)page_address(page) + pgd_index(address);
161                         pud = pud_offset(pgd, address);
162                         pmd = pmd_offset(pud, address);
163                         set_pte_atomic((pte_t *)pmd, pte);
164                 }
165         }
166 #endif
167 }
168
169 static int split_large_page(pte_t *kpte, unsigned long address)
170 {
171         pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte));
172         gfp_t gfp_flags = GFP_KERNEL;
173         unsigned long flags;
174         unsigned long addr;
175         pte_t *pbase, *tmp;
176         struct page *base;
177         int i, level;
178
179 #ifdef CONFIG_DEBUG_PAGEALLOC
180         gfp_flags = GFP_ATOMIC;
181 #endif
182         base = alloc_pages(gfp_flags, 0);
183         if (!base)
184                 return -ENOMEM;
185
186         spin_lock_irqsave(&pgd_lock, flags);
187         /*
188          * Check for races, another CPU might have split this page
189          * up for us already:
190          */
191         tmp = lookup_address(address, &level);
192         if (tmp != kpte) {
193                 WARN_ON_ONCE(1);
194                 goto out_unlock;
195         }
196
197         address = __pa(address);
198         addr = address & LARGE_PAGE_MASK;
199         pbase = (pte_t *)page_address(base);
200 #ifdef CONFIG_X86_32
201         paravirt_alloc_pt(&init_mm, page_to_pfn(base));
202 #endif
203
204         for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
205                 set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));
206
207         /*
208          * Install the new, split up pagetable. Important detail here:
209          *
210          * On Intel the NX bit of all levels must be cleared to make a
211          * page executable. See section 4.13.2 of Intel 64 and IA-32
212          * Architectures Software Developer's Manual).
213          */
214         ref_prot = pte_pgprot(pte_mkexec(pte_clrhuge(*kpte)));
215         __set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
216         base = NULL;
217
218 out_unlock:
219         spin_unlock_irqrestore(&pgd_lock, flags);
220
221         if (base)
222                 __free_pages(base, 0);
223
224         return 0;
225 }
226
227 static int
228 __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot)
229 {
230         struct page *kpte_page;
231         int level, err = 0;
232         pte_t *kpte;
233
234 #ifdef CONFIG_X86_32
235         BUG_ON(pfn > max_low_pfn);
236 #endif
237
238 repeat:
239         kpte = lookup_address(address, &level);
240         if (!kpte)
241                 return -EINVAL;
242
243         kpte_page = virt_to_page(kpte);
244         BUG_ON(PageLRU(kpte_page));
245         BUG_ON(PageCompound(kpte_page));
246
247         prot = static_protections(prot, address);
248
249         if (level == PG_LEVEL_4K) {
250                 WARN_ON_ONCE(pgprot_val(prot) & _PAGE_PSE);
251                 set_pte_atomic(kpte, pfn_pte(pfn, canon_pgprot(prot)));
252         } else {
253                 /* Clear the PSE bit for the 4k level pages ! */
254                 pgprot_val(prot) = pgprot_val(prot) & ~_PAGE_PSE;
255
256                 err = split_large_page(kpte, address);
257                 if (!err)
258                         goto repeat;
259         }
260         return err;
261 }
262
263 /**
264  * change_page_attr_addr - Change page table attributes in linear mapping
265  * @address: Virtual address in linear mapping.
266  * @prot:    New page table attribute (PAGE_*)
267  *
268  * Change page attributes of a page in the direct mapping. This is a variant
269  * of change_page_attr() that also works on memory holes that do not have
270  * mem_map entry (pfn_valid() is false).
271  *
272  * See change_page_attr() documentation for more details.
273  *
274  * Modules and drivers should use the set_memory_* APIs instead.
275  */
276
277 static int change_page_attr_addr(unsigned long address, pgprot_t prot)
278 {
279         int err = 0, kernel_map = 0;
280         unsigned long pfn = __pa(address) >> PAGE_SHIFT;
281
282 #ifdef CONFIG_X86_64
283         if (address >= __START_KERNEL_map &&
284                         address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
285
286                 address = (unsigned long)__va(__pa(address));
287                 kernel_map = 1;
288         }
289 #endif
290
291         if (!kernel_map || pte_present(pfn_pte(0, prot))) {
292                 err = __change_page_attr(address, pfn, prot);
293                 if (err)
294                         return err;
295         }
296
297 #ifdef CONFIG_X86_64
298         /*
299          * Handle kernel mapping too which aliases part of
300          * lowmem:
301          */
302         if (__pa(address) < KERNEL_TEXT_SIZE) {
303                 unsigned long addr2;
304                 pgprot_t prot2;
305
306                 addr2 = __START_KERNEL_map + __pa(address);
307                 /* Make sure the kernel mappings stay executable */
308                 prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
309                 err = __change_page_attr(addr2, pfn, prot2);
310         }
311 #endif
312
313         return err;
314 }
315
316 static int __change_page_attr_set_clr(unsigned long addr, int numpages,
317                                       pgprot_t mask_set, pgprot_t mask_clr)
318 {
319         pgprot_t new_prot;
320         int level;
321         pte_t *pte;
322         int i, ret;
323
324         for (i = 0; i < numpages ; i++) {
325
326                 pte = lookup_address(addr, &level);
327                 if (!pte)
328                         return -EINVAL;
329
330                 new_prot = pte_pgprot(*pte);
331
332                 pgprot_val(new_prot) &= ~pgprot_val(mask_clr);
333                 pgprot_val(new_prot) |= pgprot_val(mask_set);
334
335                 ret = change_page_attr_addr(addr, new_prot);
336                 if (ret)
337                         return ret;
338                 addr += PAGE_SIZE;
339         }
340
341         return 0;
342 }
343
344 static int change_page_attr_set_clr(unsigned long addr, int numpages,
345                                     pgprot_t mask_set, pgprot_t mask_clr)
346 {
347         int ret = __change_page_attr_set_clr(addr, numpages, mask_set,
348                                              mask_clr);
349
350         /*
351          * On success we use clflush, when the CPU supports it to
352          * avoid the wbindv. If the CPU does not support it and in the
353          * error case we fall back to cpa_flush_all (which uses
354          * wbindv):
355          */
356         if (!ret && cpu_has_clflush)
357                 cpa_flush_range(addr, numpages);
358         else
359                 cpa_flush_all();
360
361         return ret;
362 }
363
364 static inline int change_page_attr_set(unsigned long addr, int numpages,
365                                        pgprot_t mask)
366 {
367         return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0));
368 }
369
370 static inline int change_page_attr_clear(unsigned long addr, int numpages,
371                                          pgprot_t mask)
372 {
373         return __change_page_attr_set_clr(addr, numpages, __pgprot(0), mask);
374
375 }
376
377 int set_memory_uc(unsigned long addr, int numpages)
378 {
379         return change_page_attr_set(addr, numpages,
380                                     __pgprot(_PAGE_PCD | _PAGE_PWT));
381 }
382 EXPORT_SYMBOL(set_memory_uc);
383
384 int set_memory_wb(unsigned long addr, int numpages)
385 {
386         return change_page_attr_clear(addr, numpages,
387                                       __pgprot(_PAGE_PCD | _PAGE_PWT));
388 }
389 EXPORT_SYMBOL(set_memory_wb);
390
391 int set_memory_x(unsigned long addr, int numpages)
392 {
393         return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_NX));
394 }
395 EXPORT_SYMBOL(set_memory_x);
396
397 int set_memory_nx(unsigned long addr, int numpages)
398 {
399         return change_page_attr_set(addr, numpages, __pgprot(_PAGE_NX));
400 }
401 EXPORT_SYMBOL(set_memory_nx);
402
403 int set_memory_ro(unsigned long addr, int numpages)
404 {
405         return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_RW));
406 }
407
408 int set_memory_rw(unsigned long addr, int numpages)
409 {
410         return change_page_attr_set(addr, numpages, __pgprot(_PAGE_RW));
411 }
412
413 int set_memory_np(unsigned long addr, int numpages)
414 {
415         return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT));
416 }
417
418 int set_pages_uc(struct page *page, int numpages)
419 {
420         unsigned long addr = (unsigned long)page_address(page);
421
422         return set_memory_uc(addr, numpages);
423 }
424 EXPORT_SYMBOL(set_pages_uc);
425
426 int set_pages_wb(struct page *page, int numpages)
427 {
428         unsigned long addr = (unsigned long)page_address(page);
429
430         return set_memory_wb(addr, numpages);
431 }
432 EXPORT_SYMBOL(set_pages_wb);
433
434 int set_pages_x(struct page *page, int numpages)
435 {
436         unsigned long addr = (unsigned long)page_address(page);
437
438         return set_memory_x(addr, numpages);
439 }
440 EXPORT_SYMBOL(set_pages_x);
441
442 int set_pages_nx(struct page *page, int numpages)
443 {
444         unsigned long addr = (unsigned long)page_address(page);
445
446         return set_memory_nx(addr, numpages);
447 }
448 EXPORT_SYMBOL(set_pages_nx);
449
450 int set_pages_ro(struct page *page, int numpages)
451 {
452         unsigned long addr = (unsigned long)page_address(page);
453
454         return set_memory_ro(addr, numpages);
455 }
456
457 int set_pages_rw(struct page *page, int numpages)
458 {
459         unsigned long addr = (unsigned long)page_address(page);
460
461         return set_memory_rw(addr, numpages);
462 }
463
464
465 #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_CPA_DEBUG)
466 static inline int __change_page_attr_set(unsigned long addr, int numpages,
467                                          pgprot_t mask)
468 {
469         return __change_page_attr_set_clr(addr, numpages, mask, __pgprot(0));
470 }
471
472 static inline int __change_page_attr_clear(unsigned long addr, int numpages,
473                                            pgprot_t mask)
474 {
475         return __change_page_attr_set_clr(addr, numpages, __pgprot(0), mask);
476 }
477 #endif
478
479 #ifdef CONFIG_DEBUG_PAGEALLOC
480
481 static int __set_pages_p(struct page *page, int numpages)
482 {
483         unsigned long addr = (unsigned long)page_address(page);
484
485         return __change_page_attr_set(addr, numpages,
486                                       __pgprot(_PAGE_PRESENT | _PAGE_RW));
487 }
488
489 static int __set_pages_np(struct page *page, int numpages)
490 {
491         unsigned long addr = (unsigned long)page_address(page);
492
493         return __change_page_attr_clear(addr, numpages,
494                                         __pgprot(_PAGE_PRESENT));
495 }
496
497 void kernel_map_pages(struct page *page, int numpages, int enable)
498 {
499         if (PageHighMem(page))
500                 return;
501         if (!enable) {
502                 debug_check_no_locks_freed(page_address(page),
503                                            numpages * PAGE_SIZE);
504         }
505
506         /*
507          * If page allocator is not up yet then do not call c_p_a():
508          */
509         if (!debug_pagealloc_enabled)
510                 return;
511
512         /*
513          * The return value is ignored - the calls cannot fail,
514          * large pages are disabled at boot time:
515          */
516         if (enable)
517                 __set_pages_p(page, numpages);
518         else
519                 __set_pages_np(page, numpages);
520
521         /*
522          * We should perform an IPI and flush all tlbs,
523          * but that can deadlock->flush only current cpu:
524          */
525         __flush_tlb_all();
526 }
527 #endif
528
529 /*
530  * The testcases use internal knowledge of the implementation that shouldn't
531  * be exposed to the rest of the kernel. Include these directly here.
532  */
533 #ifdef CONFIG_CPA_DEBUG
534 #include "pageattr-test.c"
535 #endif