when more than 4g memory is installed, don't map the big hole below 4g.
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
        if (!phys || !size)
                return NULL;
 
-       if (phys+size <= (max_pfn_mapped << PAGE_SHIFT))
+       if (phys+size <= (max_low_pfn_mapped << PAGE_SHIFT))
                return __va(phys);
 
        offset = phys & (PAGE_SIZE - 1);
 
                 * Don't do it for gbpages because there seems very little
                 * benefit in doing so.
                 */
-               if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) &&
-                   (tseg >> PMD_SHIFT) <
-                       (max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT)))
+               if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
+                   if ((tseg>>PMD_SHIFT) <
+                               (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) ||
+                       ((tseg>>PMD_SHIFT) <
+                               (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) &&
+                        (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT))))
                        set_memory_4k((unsigned long)__va(tseg), 1);
+               }
        }
 }
 
 
 /*
  * Find the highest page frame number we have available
  */
-unsigned long __init e820_end(void)
+static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
 {
        int i;
        unsigned long last_pfn = 0;
 
        for (i = 0; i < e820.nr_map; i++) {
                struct e820entry *ei = &e820.map[i];
+               unsigned long start_pfn;
                unsigned long end_pfn;
 
-               if (ei->type != E820_RAM)
+               if (ei->type != type)
                        continue;
 
+               start_pfn = ei->addr >> PAGE_SHIFT;
                end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT;
+
+               if (start_pfn >= limit_pfn)
+                       continue;
+               if (end_pfn > limit_pfn) {
+                       last_pfn = limit_pfn;
+                       break;
+               }
                if (end_pfn > last_pfn)
                        last_pfn = end_pfn;
        }
                         last_pfn, max_arch_pfn);
        return last_pfn;
 }
+unsigned long __init e820_end_of_ram_pfn(void)
+{
+       return e820_end_pfn(MAX_ARCH_PFN, E820_RAM);
+}
 
+unsigned long __init e820_end_of_low_ram_pfn(void)
+{
+       return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM);
+}
 /*
  * Finds an active region in the address range from start_pfn to last_pfn and
  * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
                 * the real mem size before original memory map is
                 * reset.
                 */
-               saved_max_pfn = e820_end();
+               saved_max_pfn = e820_end_of_ram_pfn();
 #endif
                e820.nr_map = 0;
                userdef = 1;
 
                size = md->num_pages << EFI_PAGE_SHIFT;
                end = md->phys_addr + size;
 
-               if (PFN_UP(end) <= max_pfn_mapped)
+               if (PFN_UP(end) <= max_low_pfn_mapped)
                        va = __va(md->phys_addr);
                else
                        va = efi_ioremap(md->phys_addr, size);
 
         * partially used pages are not usable - thus
         * we are rounding upwards:
         */
-       max_pfn = e820_end();
+       max_pfn = e820_end_of_ram_pfn();
 
        /* preallocate 4k for mptable mpc */
        early_reserve_e820_mpc_new();
        /* update e820 for memory not covered by WB MTRRs */
        mtrr_bp_init();
        if (mtrr_trim_uncached_memory(max_pfn))
-               max_pfn = e820_end();
+               max_pfn = e820_end_of_ram_pfn();
 
 #ifdef CONFIG_X86_32
        /* max_low_pfn get updated here */
 
        /* How many end-of-memory variables you have, grandma! */
        /* need this before calling reserve_initrd */
-       max_low_pfn = max_pfn;
+       if (max_pfn > (1UL<<(32 - PAGE_SHIFT)))
+               max_low_pfn = e820_end_of_low_ram_pfn();
+       else
+               max_low_pfn = max_pfn;
+
        high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
 #endif
 
        /* max_pfn_mapped is updated here */
-       max_pfn_mapped = init_memory_mapping(0, (max_low_pfn << PAGE_SHIFT));
+       max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
+       max_pfn_mapped = max_low_pfn_mapped;
+
+#ifdef CONFIG_X86_64
+       if (max_pfn > max_low_pfn) {
+               max_pfn_mapped = init_memory_mapping(1UL<<32,
+                                                    max_pfn<<PAGE_SHIFT);
+               /* can we preseve max_low_pfn ?*/
+               max_low_pfn = max_pfn;
+       }
+#endif
 
        /*
         * NOTE: On x86-32, only from this point on, fixmaps are ready for use.
 
 
 unsigned int __VMALLOC_RESERVE = 128 << 20;
 
+unsigned long max_low_pfn_mapped;
 unsigned long max_pfn_mapped;
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 
  * The direct mapping extends to max_pfn_mapped, so that we can directly access
  * apertures, ACPI and other tables without having to play with fixmaps.
  */
+unsigned long max_low_pfn_mapped;
 unsigned long max_pfn_mapped;
 
 static unsigned long dma_reserve __initdata;
 
                set_pte(&pbase[i], pfn_pte(pfn, ref_prot));
 
        if (address >= (unsigned long)__va(0) &&
+               address < (unsigned long)__va(max_low_pfn_mapped << PAGE_SHIFT))
+               split_page_count(level);
+
+#ifdef CONFIG_X86_64
+       if (address >= (unsigned long)__va(1UL<<32) &&
                address < (unsigned long)__va(max_pfn_mapped << PAGE_SHIFT))
                split_page_count(level);
+#endif
 
        /*
         * Install the new, split up pagetable. Important details here:
        if (cpa->pfn > max_pfn_mapped)
                return 0;
 
+#ifdef CONFIG_X86_64
+       if (cpa->pfn > max_low_pfn_mapped && cpa->pfn < (1UL<<(32-PAGE_SHIFT)))
+               return 0;
+#endif
        /*
         * No need to redo, when the primary call touched the direct
         * mapping already:
         */
-       if (!within(cpa->vaddr, PAGE_OFFSET,
-                   PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) {
+       if (!(within(cpa->vaddr, PAGE_OFFSET,
+                   PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT))
+#ifdef CONFIG_X86_64
+               || within(cpa->vaddr, PAGE_OFFSET + (1UL<<32),
+                   PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))
+#endif
+       )) {
 
                alias_cpa = *cpa;
                alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
 
        if (retval < 0)
                return 0;
 
-       if (pfn <= max_pfn_mapped &&
+       if (((pfn <= max_low_pfn_mapped) ||
+            (pfn >= (1UL<<(32 - PAGE_SHIFT)) && pfn <= max_pfn_mapped)) &&
            ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) {
                free_memtype(offset, offset + size);
                printk(KERN_INFO
 
                flags = new_flags;
        }
 
-       if (vma->vm_pgoff <= max_pfn_mapped &&
+       if (((vma->vm_pgoff <= max_low_pfn_mapped) ||
+            (vma->vm_pgoff >= (1UL<<(32 - PAGE_SHIFT)) &&
+             vma->vm_pgoff <= max_pfn_mapped)) &&
            ioremap_change_attr((unsigned long)__va(addr), len, flags)) {
                free_memtype(addr, addr + len);
                return -EINVAL;
 
 extern void early_res_to_bootmem(u64 start, u64 end);
 extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
 
-extern unsigned long e820_end(void);
+extern unsigned long e820_end_of_ram_pfn(void);
+extern unsigned long e820_end_of_low_ram_pfn(void);
 extern int e820_find_active_region(const struct e820entry *ei,
                                  unsigned long start_pfn,
                                  unsigned long last_pfn,
 
 extern void unmap_devmem(unsigned long pfn, unsigned long size,
                         pgprot_t vma_prot);
 
+extern unsigned long max_low_pfn_mapped;
 extern unsigned long max_pfn_mapped;
 
 struct page;