X-Git-Url: http://pilppa.org/gitweb/gitweb.cgi?a=blobdiff_plain;f=arch%2Fx86%2Fmm%2Finit_64.c;h=fb30486c82f7f7c21e036feebb9cb58f957d8a36;hb=3dd392a407d15250a501fa109cc1f93fee95ef85;hp=129618ca0ea274a980ba414b30e604e67c3217fd;hpb=29111f579f4f3f2a07385f931854ab0527ae7ea5;p=linux-2.6-omap-h63xx.git diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 129618ca0ea..fb30486c82f 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -60,7 +60,7 @@ static unsigned long dma_reserve __initdata; DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); -int direct_gbpages __meminitdata +int direct_gbpages #ifdef CONFIG_DIRECT_GBPAGES = 1 #endif @@ -88,7 +88,11 @@ early_param("gbpages", parse_direct_gbpages_on); int after_bootmem; -static __init void *spp_getpage(void) +/* + * NOTE: This function is marked __ref because it calls __init function + * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0. + */ +static __ref void *spp_getpage(void) { void *ptr; @@ -221,7 +225,7 @@ void __init init_extra_mapping_uc(unsigned long phys, unsigned long size) void __init cleanup_highmap(void) { unsigned long vaddr = __START_KERNEL_map; - unsigned long end = round_up((unsigned long)_end, PMD_SIZE) - 1; + unsigned long end = roundup((unsigned long)_end, PMD_SIZE) - 1; pmd_t *pmd = level2_kernel_pgt; pmd_t *last_pmd = pmd + PTRS_PER_PMD; @@ -237,7 +241,7 @@ static unsigned long __initdata table_start; static unsigned long __meminitdata table_end; static unsigned long __meminitdata table_top; -static __meminit void *alloc_low_page(unsigned long *phys) +static __ref void *alloc_low_page(unsigned long *phys) { unsigned long pfn = table_end++; void *adr; @@ -258,7 +262,7 @@ static __meminit void *alloc_low_page(unsigned long *phys) return adr; } -static __meminit void unmap_low_page(void *adr) +static __ref void unmap_low_page(void *adr) { if (after_bootmem) return; @@ -267,7 +271,8 @@ static __meminit void unmap_low_page(void *adr) } static unsigned long __meminit -phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end) +phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, + pgprot_t prot) { unsigned pages = 0; unsigned long last_map_addr = end; @@ -285,32 +290,40 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end) break; } + /* + * We will re-use the existing mapping. + * Xen for example has some special requirements, like mapping + * pagetable pages as RO. So assume someone who pre-setup + * these mappings are more intelligent. + */ if (pte_val(*pte)) continue; if (0) printk(" pte=%p addr=%lx pte=%016lx\n", pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte); - set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL)); - last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE; pages++; + set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, prot)); + last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE; } + update_page_count(PG_LEVEL_4K, pages); return last_map_addr; } static unsigned long __meminit -phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end) +phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end, + pgprot_t prot) { pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd); - return phys_pte_init(pte, address, end); + return phys_pte_init(pte, address, end, prot); } static unsigned long __meminit phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, - unsigned long page_size_mask) + unsigned long page_size_mask, pgprot_t prot) { unsigned long pages = 0; unsigned long last_map_addr = end; @@ -321,6 +334,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, unsigned long pte_phys; pmd_t *pmd = pmd_page + pmd_index(address); pte_t *pte; + pgprot_t new_prot = prot; if (address >= end) { if (!after_bootmem) { @@ -331,25 +345,48 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, } if (pmd_val(*pmd)) { - if (!pmd_large(*pmd)) + if (!pmd_large(*pmd)) { + spin_lock(&init_mm.page_table_lock); last_map_addr = phys_pte_update(pmd, address, - end); - continue; + end, prot); + spin_unlock(&init_mm.page_table_lock); + continue; + } + /* + * If we are ok with PG_LEVEL_2M mapping, then we will + * use the existing mapping, + * + * Otherwise, we will split the large page mapping but + * use the same existing protection bits except for + * large page, so that we don't violate Intel's TLB + * Application note (317080) which says, while changing + * the page sizes, new and old translations should + * not differ with respect to page frame and + * attributes. + */ + if (page_size_mask & (1 << PG_LEVEL_2M)) + continue; + new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd)); } if (page_size_mask & (1<> PAGE_SHIFT, PAGE_KERNEL_LARGE)); + pfn_pte(address >> PAGE_SHIFT, + __pgprot(pgprot_val(prot) | _PAGE_PSE))); + spin_unlock(&init_mm.page_table_lock); last_map_addr = (address & PMD_MASK) + PMD_SIZE; continue; } pte = alloc_low_page(&pte_phys); - last_map_addr = phys_pte_init(pte, address, end); + last_map_addr = phys_pte_init(pte, address, end, new_prot); unmap_low_page(pte); + spin_lock(&init_mm.page_table_lock); pmd_populate_kernel(&init_mm, pmd, __va(pte_phys)); + spin_unlock(&init_mm.page_table_lock); } update_page_count(PG_LEVEL_2M, pages); return last_map_addr; @@ -357,14 +394,12 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, static unsigned long __meminit phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end, - unsigned long page_size_mask) + unsigned long page_size_mask, pgprot_t prot) { pmd_t *pmd = pmd_offset(pud, 0); unsigned long last_map_addr; - spin_lock(&init_mm.page_table_lock); - last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask); - spin_unlock(&init_mm.page_table_lock); + last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask, prot); __flush_tlb_all(); return last_map_addr; } @@ -381,6 +416,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, unsigned long pmd_phys; pud_t *pud = pud_page + pud_index(addr); pmd_t *pmd; + pgprot_t prot = PAGE_KERNEL; if (addr >= end) break; @@ -392,30 +428,49 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, } if (pud_val(*pud)) { - if (!pud_large(*pud)) + if (!pud_large(*pud)) { last_map_addr = phys_pmd_update(pud, addr, end, - page_size_mask); - continue; + page_size_mask, prot); + continue; + } + /* + * If we are ok with PG_LEVEL_1G mapping, then we will + * use the existing mapping. + * + * Otherwise, we will split the gbpage mapping but use + * the same existing protection bits except for large + * page, so that we don't violate Intel's TLB + * Application note (317080) which says, while changing + * the page sizes, new and old translations should + * not differ with respect to page frame and + * attributes. + */ + if (page_size_mask & (1 << PG_LEVEL_1G)) + continue; + prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud)); } if (page_size_mask & (1<> PAGE_SHIFT, PAGE_KERNEL_LARGE)); + spin_unlock(&init_mm.page_table_lock); last_map_addr = (addr & PUD_MASK) + PUD_SIZE; continue; } pmd = alloc_low_page(&pmd_phys); + last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask, + prot); + unmap_low_page(pmd); spin_lock(&init_mm.page_table_lock); - last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask); - unmap_low_page(pmd); pud_populate(&init_mm, pud, __va(pmd_phys)); spin_unlock(&init_mm.page_table_lock); - } __flush_tlb_all(); + update_page_count(PG_LEVEL_1G, pages); return last_map_addr; @@ -432,27 +487,28 @@ phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end, return phys_pud_init(pud, addr, end, page_size_mask); } -static void __init find_early_table_space(unsigned long end) +static void __init find_early_table_space(unsigned long end, int use_pse, + int use_gbpages) { unsigned long puds, pmds, ptes, tables, start; puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; - tables = round_up(puds * sizeof(pud_t), PAGE_SIZE); - if (direct_gbpages) { + tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); + if (use_gbpages) { unsigned long extra; extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; } else pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; - tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE); + tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); - if (cpu_has_pse) { + if (use_pse) { unsigned long extra; extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; } else ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; - tables += round_up(ptes * sizeof(pte_t), PAGE_SIZE); + tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); /* * RED-PEN putting page tables only on node 0 could @@ -505,17 +561,16 @@ static unsigned long __init kernel_physical_mapping_init(unsigned long start, continue; } - if (after_bootmem) - pud = pud_offset(pgd, start & PGDIR_MASK); - else - pud = alloc_low_page(&pud_phys); - + pud = alloc_low_page(&pud_phys); last_map_addr = phys_pud_init(pud, __pa(start), __pa(next), page_size_mask); unmap_low_page(pud); - pgd_populate(&init_mm, pgd_offset_k(start), - __va(pud_phys)); + + spin_lock(&init_mm.page_table_lock); + pgd_populate(&init_mm, pgd, __va(pud_phys)); + spin_unlock(&init_mm.page_table_lock); } + __flush_tlb_all(); return last_map_addr; } @@ -559,6 +614,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, struct map_range mr[NR_RANGE_MR]; int nr_range, i; + int use_pse, use_gbpages; printk(KERN_INFO "init_memory_mapping\n"); @@ -572,9 +628,21 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, if (!after_bootmem) init_gbpages(); - if (direct_gbpages) +#ifdef CONFIG_DEBUG_PAGEALLOC + /* + * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. + * This will simplify cpa(), which otherwise needs to support splitting + * large pages into small in interrupt context, etc. + */ + use_pse = use_gbpages = 0; +#else + use_pse = cpu_has_pse; + use_gbpages = direct_gbpages; +#endif + + if (use_gbpages) page_size_mask |= 1 << PG_LEVEL_1G; - if (cpu_has_pse) + if (use_pse) page_size_mask |= 1 << PG_LEVEL_2M; memset(mr, 0, sizeof(mr)); @@ -635,7 +703,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, (mr[i].page_size_mask & (1<> 10, initsize >> 10); - - cpa_init(); } void free_init_pages(char *what, unsigned long begin, unsigned long end)