]> pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - arch/x86/kvm/mmu.c
Merge branch 'devel' into next
[linux-2.6-omap-h63xx.git] / arch / x86 / kvm / mmu.c
index 072e9422c9145ab682296b0a239002211e2fc399..7e7c3969f7a2d01f0f9d2d2542b046fc9d0aa157 100644 (file)
@@ -79,36 +79,6 @@ static int dbg = 1;
        }
 #endif
 
-#define PT64_PT_BITS 9
-#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
-#define PT32_PT_BITS 10
-#define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS)
-
-#define PT_WRITABLE_SHIFT 1
-
-#define PT_PRESENT_MASK (1ULL << 0)
-#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
-#define PT_USER_MASK (1ULL << 2)
-#define PT_PWT_MASK (1ULL << 3)
-#define PT_PCD_MASK (1ULL << 4)
-#define PT_ACCESSED_MASK (1ULL << 5)
-#define PT_DIRTY_MASK (1ULL << 6)
-#define PT_PAGE_SIZE_MASK (1ULL << 7)
-#define PT_PAT_MASK (1ULL << 7)
-#define PT_GLOBAL_MASK (1ULL << 8)
-#define PT64_NX_SHIFT 63
-#define PT64_NX_MASK (1ULL << PT64_NX_SHIFT)
-
-#define PT_PAT_SHIFT 7
-#define PT_DIR_PAT_SHIFT 12
-#define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT)
-
-#define PT32_DIR_PSE36_SIZE 4
-#define PT32_DIR_PSE36_SHIFT 13
-#define PT32_DIR_PSE36_MASK \
-       (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
-
-
 #define PT_FIRST_AVAIL_BITS_SHIFT 9
 #define PT64_SECOND_AVAIL_BITS_SHIFT 52
 
@@ -154,10 +124,6 @@ static int dbg = 1;
 #define PFERR_USER_MASK (1U << 2)
 #define PFERR_FETCH_MASK (1U << 4)
 
-#define PT64_ROOT_LEVEL 4
-#define PT32_ROOT_LEVEL 2
-#define PT32E_ROOT_LEVEL 3
-
 #define PT_DIRECTORY_LEVEL 2
 #define PT_PAGE_TABLE_LEVEL 1
 
@@ -186,6 +152,12 @@ static struct kmem_cache *mmu_page_header_cache;
 
 static u64 __read_mostly shadow_trap_nonpresent_pte;
 static u64 __read_mostly shadow_notrap_nonpresent_pte;
+static u64 __read_mostly shadow_base_present_pte;
+static u64 __read_mostly shadow_nx_mask;
+static u64 __read_mostly shadow_x_mask;        /* mutual exclusive with nx_mask */
+static u64 __read_mostly shadow_user_mask;
+static u64 __read_mostly shadow_accessed_mask;
+static u64 __read_mostly shadow_dirty_mask;
 
 void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
 {
@@ -194,6 +166,23 @@ void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes);
 
+void kvm_mmu_set_base_ptes(u64 base_pte)
+{
+       shadow_base_present_pte = base_pte;
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes);
+
+void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
+               u64 dirty_mask, u64 nx_mask, u64 x_mask)
+{
+       shadow_user_mask = user_mask;
+       shadow_accessed_mask = accessed_mask;
+       shadow_dirty_mask = dirty_mask;
+       shadow_nx_mask = nx_mask;
+       shadow_x_mask = x_mask;
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
+
 static int is_write_protection(struct kvm_vcpu *vcpu)
 {
        return vcpu->arch.cr0 & X86_CR0_WP;
@@ -232,7 +221,7 @@ static int is_writeble_pte(unsigned long pte)
 
 static int is_dirty_pte(unsigned long pte)
 {
-       return pte & PT_DIRTY_MASK;
+       return pte & shadow_dirty_mask;
 }
 
 static int is_rmap_pte(u64 pte)
@@ -240,6 +229,11 @@ static int is_rmap_pte(u64 pte)
        return is_shadow_present_pte(pte);
 }
 
+static pfn_t spte_to_pfn(u64 pte)
+{
+       return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
+}
+
 static gfn_t pse36_gfn_delta(u32 gpte)
 {
        int shift = 32 - PT32_DIR_PSE36_SHIFT - PAGE_SHIFT;
@@ -382,7 +376,6 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn)
 
        write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn));
        *write_count += 1;
-       WARN_ON(*write_count > KVM_PAGES_PER_HPAGE);
 }
 
 static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
@@ -534,19 +527,20 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
        struct kvm_rmap_desc *desc;
        struct kvm_rmap_desc *prev_desc;
        struct kvm_mmu_page *sp;
-       struct page *page;
+       pfn_t pfn;
        unsigned long *rmapp;
        int i;
 
        if (!is_rmap_pte(*spte))
                return;
        sp = page_header(__pa(spte));
-       page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
-       mark_page_accessed(page);
+       pfn = spte_to_pfn(*spte);
+       if (*spte & shadow_accessed_mask)
+               kvm_set_pfn_accessed(pfn);
        if (is_writeble_pte(*spte))
-               kvm_release_page_dirty(page);
+               kvm_release_pfn_dirty(pfn);
        else
-               kvm_release_page_clean(page);
+               kvm_release_pfn_clean(pfn);
        rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], is_large_pte(*spte));
        if (!*rmapp) {
                printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
@@ -626,6 +620,14 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
                }
                spte = rmap_next(kvm, rmapp, spte);
        }
+       if (write_protected) {
+               pfn_t pfn;
+
+               spte = rmap_next(kvm, rmapp, NULL);
+               pfn = spte_to_pfn(*spte);
+               kvm_set_pfn_dirty(pfn);
+       }
+
        /* check for huge page mappings */
        rmapp = gfn_to_rmap(kvm, gfn, 1);
        spte = rmap_next(kvm, rmapp, NULL);
@@ -638,6 +640,7 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
                        rmap_remove(kvm, spte);
                        --kvm->stat.lpages;
                        set_shadow_pte(spte, shadow_trap_nonpresent_pte);
+                       spte = NULL;
                        write_protected = 1;
                }
                spte = rmap_next(kvm, rmapp, spte);
@@ -656,7 +659,7 @@ static int is_empty_shadow_page(u64 *spt)
        u64 *end;
 
        for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++)
-               if (*pos != shadow_trap_nonpresent_pte) {
+               if (is_shadow_present_pte(*pos)) {
                        printk(KERN_ERR "%s: %p %llx\n", __func__,
                               pos, *pos);
                        return 0;
@@ -836,9 +839,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
        sp->gfn = gfn;
        sp->role = role;
        hlist_add_head(&sp->hash_link, bucket);
-       vcpu->arch.mmu.prefetch_page(vcpu, sp);
        if (!metaphysical)
                rmap_write_protect(vcpu->kvm, gfn);
+       vcpu->arch.mmu.prefetch_page(vcpu, sp);
        return sp;
 }
 
@@ -1020,12 +1023,11 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
                         unsigned pt_access, unsigned pte_access,
                         int user_fault, int write_fault, int dirty,
                         int *ptwrite, int largepage, gfn_t gfn,
-                        struct page *page)
+                        pfn_t pfn, bool speculative)
 {
        u64 spte;
        int was_rmapped = 0;
        int was_writeble = is_writeble_pte(*shadow_pte);
-       hfn_t host_pfn = (*shadow_pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
 
        pgprintk("%s: spte %llx access %x write_fault %d"
                 " user_fault %d gfn %lx\n",
@@ -1043,9 +1045,9 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 
                        child = page_header(pte & PT64_BASE_ADDR_MASK);
                        mmu_page_remove_parent_pte(child, shadow_pte);
-               } else if (host_pfn != page_to_pfn(page)) {
+               } else if (pfn != spte_to_pfn(*shadow_pte)) {
                        pgprintk("hfn old %lx new %lx\n",
-                                host_pfn, page_to_pfn(page));
+                                spte_to_pfn(*shadow_pte), pfn);
                        rmap_remove(vcpu->kvm, shadow_pte);
                } else {
                        if (largepage)
@@ -1060,29 +1062,27 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
         * whether the guest actually used the pte (in order to detect
         * demand paging).
         */
-       spte = PT_PRESENT_MASK | PT_DIRTY_MASK;
+       spte = shadow_base_present_pte | shadow_dirty_mask;
+       if (!speculative)
+               pte_access |= PT_ACCESSED_MASK;
        if (!dirty)
                pte_access &= ~ACC_WRITE_MASK;
-       if (!(pte_access & ACC_EXEC_MASK))
-               spte |= PT64_NX_MASK;
-
-       spte |= PT_PRESENT_MASK;
+       if (pte_access & ACC_EXEC_MASK)
+               spte |= shadow_x_mask;
+       else
+               spte |= shadow_nx_mask;
        if (pte_access & ACC_USER_MASK)
-               spte |= PT_USER_MASK;
+               spte |= shadow_user_mask;
        if (largepage)
                spte |= PT_PAGE_SIZE_MASK;
 
-       spte |= page_to_phys(page);
+       spte |= (u64)pfn << PAGE_SHIFT;
 
        if ((pte_access & ACC_WRITE_MASK)
            || (write_fault && !is_write_protection(vcpu) && !user_fault)) {
                struct kvm_mmu_page *shadow;
 
                spte |= PT_WRITABLE_MASK;
-               if (user_fault) {
-                       mmu_unshadow(vcpu->kvm, gfn);
-                       goto unshadowed;
-               }
 
                shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn);
                if (shadow ||
@@ -1099,8 +1099,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
                }
        }
 
-unshadowed:
-
        if (pte_access & ACC_WRITE_MASK)
                mark_page_dirty(vcpu->kvm, gfn);
 
@@ -1117,12 +1115,12 @@ unshadowed:
        if (!was_rmapped) {
                rmap_add(vcpu, shadow_pte, gfn, largepage);
                if (!is_rmap_pte(*shadow_pte))
-                       kvm_release_page_clean(page);
+                       kvm_release_pfn_clean(pfn);
        } else {
                if (was_writeble)
-                       kvm_release_page_dirty(page);
+                       kvm_release_pfn_dirty(pfn);
                else
-                       kvm_release_page_clean(page);
+                       kvm_release_pfn_clean(pfn);
        }
        if (!ptwrite || !*ptwrite)
                vcpu->arch.last_pte_updated = shadow_pte;
@@ -1133,7 +1131,7 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
 }
 
 static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
-                          int largepage, gfn_t gfn, struct page *page,
+                          int largepage, gfn_t gfn, pfn_t pfn,
                           int level)
 {
        hpa_t table_addr = vcpu->arch.mmu.root_hpa;
@@ -1148,13 +1146,13 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
 
                if (level == 1) {
                        mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL,
-                                    0, write, 1, &pt_write, 0, gfn, page);
+                                    0, write, 1, &pt_write, 0, gfn, pfn, false);
                        return pt_write;
                }
 
                if (largepage && level == 2) {
                        mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL,
-                                   0, write, 1, &pt_write, 1, gfn, page);
+                                    0, write, 1, &pt_write, 1, gfn, pfn, false);
                        return pt_write;
                }
 
@@ -1169,12 +1167,13 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
                                                     1, ACC_ALL, &table[index]);
                        if (!new_table) {
                                pgprintk("nonpaging_map: ENOMEM\n");
-                               kvm_release_page_clean(page);
+                               kvm_release_pfn_clean(pfn);
                                return -ENOMEM;
                        }
 
-                       table[index] = __pa(new_table->spt) | PT_PRESENT_MASK
-                               | PT_WRITABLE_MASK | PT_USER_MASK;
+                       table[index] = __pa(new_table->spt)
+                               | PT_PRESENT_MASK | PT_WRITABLE_MASK
+                               | shadow_user_mask | shadow_x_mask;
                }
                table_addr = table[index] & PT64_BASE_ADDR_MASK;
        }
@@ -1184,10 +1183,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
 {
        int r;
        int largepage = 0;
-
-       struct page *page;
-
-       down_read(&vcpu->kvm->slots_lock);
+       pfn_t pfn;
 
        down_read(&current->mm->mmap_sem);
        if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) {
@@ -1195,23 +1191,21 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
                largepage = 1;
        }
 
-       page = gfn_to_page(vcpu->kvm, gfn);
+       pfn = gfn_to_pfn(vcpu->kvm, gfn);
        up_read(&current->mm->mmap_sem);
 
        /* mmio */
-       if (is_error_page(page)) {
-               kvm_release_page_clean(page);
-               up_read(&vcpu->kvm->slots_lock);
+       if (is_error_pfn(pfn)) {
+               kvm_release_pfn_clean(pfn);
                return 1;
        }
 
        spin_lock(&vcpu->kvm->mmu_lock);
        kvm_mmu_free_some_pages(vcpu);
-       r = __direct_map(vcpu, v, write, largepage, gfn, page,
+       r = __direct_map(vcpu, v, write, largepage, gfn, pfn,
                         PT32E_ROOT_LEVEL);
        spin_unlock(&vcpu->kvm->mmu_lock);
 
-       up_read(&vcpu->kvm->slots_lock);
 
        return r;
 }
@@ -1234,7 +1228,6 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
        if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
                return;
        spin_lock(&vcpu->kvm->mmu_lock);
-#ifdef CONFIG_X86_64
        if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
                hpa_t root = vcpu->arch.mmu.root_hpa;
 
@@ -1246,7 +1239,6 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
                spin_unlock(&vcpu->kvm->mmu_lock);
                return;
        }
-#endif
        for (i = 0; i < 4; ++i) {
                hpa_t root = vcpu->arch.mmu.pae_root[i];
 
@@ -1272,7 +1264,6 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
 
        root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT;
 
-#ifdef CONFIG_X86_64
        if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
                hpa_t root = vcpu->arch.mmu.root_hpa;
 
@@ -1287,7 +1278,6 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
                vcpu->arch.mmu.root_hpa = root;
                return;
        }
-#endif
        metaphysical = !is_paging(vcpu);
        if (tdp_enabled)
                metaphysical = 1;
@@ -1341,7 +1331,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
 static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
                                u32 error_code)
 {
-       struct page *page;
+       pfn_t pfn;
        int r;
        int largepage = 0;
        gfn_t gfn = gpa >> PAGE_SHIFT;
@@ -1358,18 +1348,17 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
                gfn &= ~(KVM_PAGES_PER_HPAGE-1);
                largepage = 1;
        }
-       page = gfn_to_page(vcpu->kvm, gfn);
-       if (is_error_page(page)) {
-               kvm_release_page_clean(page);
-               up_read(&current->mm->mmap_sem);
+       pfn = gfn_to_pfn(vcpu->kvm, gfn);
+       up_read(&current->mm->mmap_sem);
+       if (is_error_pfn(pfn)) {
+               kvm_release_pfn_clean(pfn);
                return 1;
        }
        spin_lock(&vcpu->kvm->mmu_lock);
        kvm_mmu_free_some_pages(vcpu);
        r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
-                        largepage, gfn, page, TDP_ROOT_LEVEL);
+                        largepage, gfn, pfn, kvm_x86_ops->get_tdp_level());
        spin_unlock(&vcpu->kvm->mmu_lock);
-       up_read(&current->mm->mmap_sem);
 
        return r;
 }
@@ -1475,7 +1464,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
        context->page_fault = tdp_page_fault;
        context->free = nonpaging_free;
        context->prefetch_page = nonpaging_prefetch_page;
-       context->shadow_root_level = TDP_ROOT_LEVEL;
+       context->shadow_root_level = kvm_x86_ops->get_tdp_level();
        context->root_hpa = INVALID_PAGE;
 
        if (!is_paging(vcpu)) {
@@ -1512,6 +1501,8 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
 
 static int init_kvm_mmu(struct kvm_vcpu *vcpu)
 {
+       vcpu->arch.update_pte.pfn = bad_pfn;
+
        if (tdp_enabled)
                return init_kvm_tdp_mmu(vcpu);
        else
@@ -1584,11 +1575,13 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
                                  u64 *spte,
                                  const void *new)
 {
-       if ((sp->role.level != PT_PAGE_TABLE_LEVEL)
-           && !vcpu->arch.update_pte.largepage) {
-               ++vcpu->kvm->stat.mmu_pde_zapped;
-               return;
-       }
+       if (sp->role.level != PT_PAGE_TABLE_LEVEL) {
+               if (!vcpu->arch.update_pte.largepage ||
+                   sp->role.glevels == PT32_ROOT_LEVEL) {
+                       ++vcpu->kvm->stat.mmu_pde_zapped;
+                       return;
+               }
+        }
 
        ++vcpu->kvm->stat.mmu_pte_updated;
        if (sp->role.glevels == PT32_ROOT_LEVEL)
@@ -1622,7 +1615,7 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu)
 {
        u64 *spte = vcpu->arch.last_pte_updated;
 
-       return !!(spte && (*spte & PT_ACCESSED_MASK));
+       return !!(spte && (*spte & shadow_accessed_mask));
 }
 
 static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
@@ -1631,7 +1624,7 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
        gfn_t gfn;
        int r;
        u64 gpte = 0;
-       struct page *page;
+       pfn_t pfn;
 
        vcpu->arch.update_pte.largepage = 0;
 
@@ -1667,15 +1660,15 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
                gfn &= ~(KVM_PAGES_PER_HPAGE-1);
                vcpu->arch.update_pte.largepage = 1;
        }
-       page = gfn_to_page(vcpu->kvm, gfn);
+       pfn = gfn_to_pfn(vcpu->kvm, gfn);
        up_read(&current->mm->mmap_sem);
 
-       if (is_error_page(page)) {
-               kvm_release_page_clean(page);
+       if (is_error_pfn(pfn)) {
+               kvm_release_pfn_clean(pfn);
                return;
        }
        vcpu->arch.update_pte.gfn = gfn;
-       vcpu->arch.update_pte.page = page;
+       vcpu->arch.update_pte.pfn = pfn;
 }
 
 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
@@ -1780,9 +1773,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
        }
        kvm_mmu_audit(vcpu, "post pte write");
        spin_unlock(&vcpu->kvm->mmu_lock);
-       if (vcpu->arch.update_pte.page) {
-               kvm_release_page_clean(vcpu->arch.update_pte.page);
-               vcpu->arch.update_pte.page = NULL;
+       if (!is_error_pfn(vcpu->arch.update_pte.pfn)) {
+               kvm_release_pfn_clean(vcpu->arch.update_pte.pfn);
+               vcpu->arch.update_pte.pfn = bad_pfn;
        }
 }
 
@@ -1791,9 +1784,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
        gpa_t gpa;
        int r;
 
-       down_read(&vcpu->kvm->slots_lock);
        gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
-       up_read(&vcpu->kvm->slots_lock);
 
        spin_lock(&vcpu->kvm->mmu_lock);
        r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
@@ -1864,6 +1855,7 @@ static void free_mmu_pages(struct kvm_vcpu *vcpu)
                sp = container_of(vcpu->kvm->arch.active_mmu_pages.next,
                                  struct kvm_mmu_page, link);
                kvm_mmu_zap_page(vcpu->kvm, sp);
+               cond_resched();
        }
        free_page((unsigned long)vcpu->arch.mmu.pae_root);
 }
@@ -1956,7 +1948,53 @@ void kvm_mmu_zap_all(struct kvm *kvm)
        kvm_flush_remote_tlbs(kvm);
 }
 
-void kvm_mmu_module_exit(void)
+void kvm_mmu_remove_one_alloc_mmu_page(struct kvm *kvm)
+{
+       struct kvm_mmu_page *page;
+
+       page = container_of(kvm->arch.active_mmu_pages.prev,
+                           struct kvm_mmu_page, link);
+       kvm_mmu_zap_page(kvm, page);
+}
+
+static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
+{
+       struct kvm *kvm;
+       struct kvm *kvm_freed = NULL;
+       int cache_count = 0;
+
+       spin_lock(&kvm_lock);
+
+       list_for_each_entry(kvm, &vm_list, vm_list) {
+               int npages;
+
+               spin_lock(&kvm->mmu_lock);
+               npages = kvm->arch.n_alloc_mmu_pages -
+                        kvm->arch.n_free_mmu_pages;
+               cache_count += npages;
+               if (!kvm_freed && nr_to_scan > 0 && npages > 0) {
+                       kvm_mmu_remove_one_alloc_mmu_page(kvm);
+                       cache_count--;
+                       kvm_freed = kvm;
+               }
+               nr_to_scan--;
+
+               spin_unlock(&kvm->mmu_lock);
+       }
+       if (kvm_freed)
+               list_move_tail(&kvm_freed->vm_list, &vm_list);
+
+       spin_unlock(&kvm_lock);
+
+       return cache_count;
+}
+
+static struct shrinker mmu_shrinker = {
+       .shrink = mmu_shrink,
+       .seeks = DEFAULT_SEEKS * 10,
+};
+
+static void mmu_destroy_caches(void)
 {
        if (pte_chain_cache)
                kmem_cache_destroy(pte_chain_cache);
@@ -1966,6 +2004,12 @@ void kvm_mmu_module_exit(void)
                kmem_cache_destroy(mmu_page_header_cache);
 }
 
+void kvm_mmu_module_exit(void)
+{
+       mmu_destroy_caches();
+       unregister_shrinker(&mmu_shrinker);
+}
+
 int kvm_mmu_module_init(void)
 {
        pte_chain_cache = kmem_cache_create("kvm_pte_chain",
@@ -1985,10 +2029,12 @@ int kvm_mmu_module_init(void)
        if (!mmu_page_header_cache)
                goto nomem;
 
+       register_shrinker(&mmu_shrinker);
+
        return 0;
 
 nomem:
-       kvm_mmu_module_exit();
+       mmu_destroy_caches();
        return -ENOMEM;
 }
 
@@ -2046,7 +2092,7 @@ static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu,
        if (r)
                return r;
 
-       if (!__emulator_write_phys(vcpu, addr, &value, bytes))
+       if (!emulator_write_phys(vcpu, addr, &value, bytes))
                return -EFAULT;
 
        return 1;
@@ -2110,9 +2156,6 @@ int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
        int r;
        struct kvm_pv_mmu_op_buffer buffer;
 
-       down_read(&vcpu->kvm->slots_lock);
-       down_read(&current->mm->mmap_sem);
-
        buffer.ptr = buffer.buf;
        buffer.len = min_t(unsigned long, bytes, sizeof buffer.buf);
        buffer.processed = 0;
@@ -2132,8 +2175,6 @@ int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
        r = 1;
 out:
        *ret = buffer.processed;
-       up_read(&current->mm->mmap_sem);
-       up_read(&vcpu->kvm->slots_lock);
        return r;
 }
 
@@ -2173,8 +2214,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
                        audit_mappings_page(vcpu, ent, va, level - 1);
                } else {
                        gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va);
-                       struct page *page = gpa_to_page(vcpu, gpa);
-                       hpa_t hpa = page_to_phys(page);
+                       hpa_t hpa = (hpa_t)gpa_to_pfn(vcpu, gpa) << PAGE_SHIFT;
 
                        if (is_shadow_present_pte(ent)
                            && (ent & PT64_BASE_ADDR_MASK) != hpa)
@@ -2187,7 +2227,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
                                 && !is_error_hpa(hpa))
                                printk(KERN_ERR "audit: (%s) notrap shadow,"
                                       " valid guest gva %lx\n", audit_msg, va);
-                       kvm_release_page_clean(page);
+                       kvm_release_pfn_clean(pfn);
 
                }
        }