X-Git-Url: http://pilppa.org/gitweb/gitweb.cgi?a=blobdiff_plain;f=drivers%2Fkvm%2Fmmu.c;h=e8e228118de9be496660ac8fa9ece297a39c1b57;hb=aba297927d1d558c7a94548135133bdf9172708a;hp=a1a93368f314d4e89226c3ab1b78dd9934135d42;hpb=0ce3c83a9c22f59937b86c80b478dfbffe54dbab;p=linux-2.6-omap-h63xx.git diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index a1a93368f31..e8e228118de 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -52,11 +52,15 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) {} static int dbg = 1; #endif +#ifndef MMU_DEBUG +#define ASSERT(x) do { } while (0) +#else #define ASSERT(x) \ if (!(x)) { \ printk(KERN_WARNING "assertion failed %s:%d: %s\n", \ __FILE__, __LINE__, #x); \ } +#endif #define PT64_PT_BITS 9 #define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS) @@ -131,7 +135,7 @@ static int dbg = 1; (((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1)) -#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & PAGE_MASK) +#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1)) #define PT64_DIR_BASE_ADDR_MASK \ (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1)) @@ -159,6 +163,9 @@ struct kvm_rmap_desc { struct kvm_rmap_desc *more; }; +static struct kmem_cache *pte_chain_cache; +static struct kmem_cache *rmap_desc_cache; + static int is_write_protection(struct kvm_vcpu *vcpu) { return vcpu->cr0 & CR0_WP_MASK; @@ -196,14 +203,15 @@ static int is_rmap_pte(u64 pte) } static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, - size_t objsize, int min) + struct kmem_cache *base_cache, int min, + gfp_t gfp_flags) { void *obj; if (cache->nobjs >= min) return 0; while (cache->nobjs < ARRAY_SIZE(cache->objects)) { - obj = kzalloc(objsize, GFP_NOWAIT); + obj = kmem_cache_zalloc(base_cache, gfp_flags); if (!obj) return -ENOMEM; cache->objects[cache->nobjs++] = obj; @@ -217,20 +225,35 @@ static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) kfree(mc->objects[--mc->nobjs]); } -static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) +static int __mmu_topup_memory_caches(struct kvm_vcpu *vcpu, gfp_t gfp_flags) { int r; r = mmu_topup_memory_cache(&vcpu->mmu_pte_chain_cache, - sizeof(struct kvm_pte_chain), 4); + pte_chain_cache, 4, gfp_flags); if (r) goto out; r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache, - sizeof(struct kvm_rmap_desc), 1); + rmap_desc_cache, 1, gfp_flags); out: return r; } +static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) +{ + int r; + + r = __mmu_topup_memory_caches(vcpu, GFP_NOWAIT); + if (r < 0) { + spin_unlock(&vcpu->kvm->lock); + kvm_arch_ops->vcpu_put(vcpu); + r = __mmu_topup_memory_caches(vcpu, GFP_KERNEL); + kvm_arch_ops->vcpu_load(vcpu); + spin_lock(&vcpu->kvm->lock); + } + return r; +} + static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) { mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache); @@ -390,13 +413,11 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) { struct kvm *kvm = vcpu->kvm; struct page *page; - struct kvm_memory_slot *slot; struct kvm_rmap_desc *desc; u64 *spte; - slot = gfn_to_memslot(kvm, gfn); - BUG_ON(!slot); - page = gfn_to_page(slot, gfn); + page = gfn_to_page(kvm, gfn); + BUG_ON(!page); while (page_private(page)) { if (!(page_private(page) & 1)) @@ -406,8 +427,8 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) spte = desc->shadow_ptes[0]; } BUG_ON(!spte); - BUG_ON((*spte & PT64_BASE_ADDR_MASK) != - page_to_pfn(page) << PAGE_SHIFT); + BUG_ON((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT + != page_to_pfn(page)); BUG_ON(!(*spte & PT_PRESENT_MASK)); BUG_ON(!(*spte & PT_WRITABLE_MASK)); rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); @@ -417,6 +438,7 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) } } +#ifdef MMU_DEBUG static int is_empty_shadow_page(hpa_t page_hpa) { u64 *pos; @@ -431,15 +453,15 @@ static int is_empty_shadow_page(hpa_t page_hpa) } return 1; } +#endif static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa) { struct kvm_mmu_page *page_head = page_header(page_hpa); ASSERT(is_empty_shadow_page(page_hpa)); - list_del(&page_head->link); page_head->page_hpa = page_hpa; - list_add(&page_head->link, &vcpu->free_pages); + list_move(&page_head->link, &vcpu->free_pages); ++vcpu->kvm->n_free_mmu_pages; } @@ -457,11 +479,9 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, return NULL; page = list_entry(vcpu->free_pages.next, struct kvm_mmu_page, link); - list_del(&page->link); - list_add(&page->link, &vcpu->kvm->active_mmu_pages); + list_move(&page->link, &vcpu->kvm->active_mmu_pages); ASSERT(is_empty_shadow_page(page->page_hpa)); page->slot_bitmap = 0; - page->global = 1; page->multimapped = 0; page->parent_pte = parent_pte; --vcpu->kvm->n_free_mmu_pages; @@ -569,6 +589,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, gva_t gaddr, unsigned level, int metaphysical, + unsigned hugepage_access, u64 *parent_pte) { union kvm_mmu_page_role role; @@ -582,6 +603,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, role.glevels = vcpu->mmu.root_level; role.level = level; role.metaphysical = metaphysical; + role.hugepage_access = hugepage_access; if (vcpu->mmu.root_level <= PT32_ROOT_LEVEL) { quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; @@ -669,10 +691,8 @@ static void kvm_mmu_zap_page(struct kvm_vcpu *vcpu, if (!page->root_count) { hlist_del(&page->hash_link); kvm_mmu_free_page(vcpu, page->page_hpa); - } else { - list_del(&page->link); - list_add(&page->link, &vcpu->kvm->active_mmu_pages); - } + } else + list_move(&page->link, &vcpu->kvm->active_mmu_pages); } static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn) @@ -714,14 +734,12 @@ hpa_t safe_gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa) hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa) { - struct kvm_memory_slot *slot; struct page *page; ASSERT((gpa & HPA_ERR_MASK) == 0); - slot = gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT); - if (!slot) + page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); + if (!page) return gpa | HPA_ERR_MASK; - page = gfn_to_page(slot, gpa >> PAGE_SHIFT); return ((hpa_t)page_to_pfn(page) << PAGE_SHIFT) | (gpa & (PAGE_SIZE-1)); } @@ -735,6 +753,15 @@ hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva) return gpa_to_hpa(vcpu, gpa); } +struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) +{ + gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva); + + if (gpa == UNMAPPED_GVA) + return NULL; + return pfn_to_page(gpa_to_hpa(vcpu, gpa) >> PAGE_SHIFT); +} + static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) { } @@ -772,7 +799,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p) >> PAGE_SHIFT; new_table = kvm_mmu_get_page(vcpu, pseudo_gfn, v, level - 1, - 1, &table[index]); + 1, 0, &table[index]); if (!new_table) { pgprintk("nonpaging_map: ENOMEM\n"); return -ENOMEM; @@ -804,10 +831,12 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) for (i = 0; i < 4; ++i) { hpa_t root = vcpu->mmu.pae_root[i]; - ASSERT(VALID_PAGE(root)); - root &= PT64_BASE_ADDR_MASK; - page = page_header(root); - --page->root_count; + if (root) { + ASSERT(VALID_PAGE(root)); + root &= PT64_BASE_ADDR_MASK; + page = page_header(root); + --page->root_count; + } vcpu->mmu.pae_root[i] = INVALID_PAGE; } vcpu->mmu.root_hpa = INVALID_PAGE; @@ -827,7 +856,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) ASSERT(!VALID_PAGE(root)); page = kvm_mmu_get_page(vcpu, root_gfn, 0, - PT64_ROOT_LEVEL, 0, NULL); + PT64_ROOT_LEVEL, 0, 0, NULL); root = page->page_hpa; ++page->root_count; vcpu->mmu.root_hpa = root; @@ -838,13 +867,17 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) hpa_t root = vcpu->mmu.pae_root[i]; ASSERT(!VALID_PAGE(root)); - if (vcpu->mmu.root_level == PT32E_ROOT_LEVEL) + if (vcpu->mmu.root_level == PT32E_ROOT_LEVEL) { + if (!is_present_pte(vcpu->pdptrs[i])) { + vcpu->mmu.pae_root[i] = 0; + continue; + } root_gfn = vcpu->pdptrs[i] >> PAGE_SHIFT; - else if (vcpu->mmu.root_level == 0) + } else if (vcpu->mmu.root_level == 0) root_gfn = 0; page = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, !is_paging(vcpu), - NULL); + 0, NULL); root = page->page_hpa; ++page->root_count; vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK; @@ -903,7 +936,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu) static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) { - ++kvm_stat.tlb_flush; + ++vcpu->stat.tlb_flush; kvm_arch_ops->tlb_flush(vcpu); } @@ -918,11 +951,6 @@ static void paging_new_cr3(struct kvm_vcpu *vcpu) kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa); } -static void mark_pagetable_nonglobal(void *shadow_pte) -{ - page_header(__pa(shadow_pte))->global = 0; -} - static inline void set_pte_common(struct kvm_vcpu *vcpu, u64 *shadow_pte, gpa_t gaddr, @@ -940,9 +968,6 @@ static inline void set_pte_common(struct kvm_vcpu *vcpu, *shadow_pte |= access_bits; - if (!(*shadow_pte & PT_GLOBAL_MASK)) - mark_pagetable_nonglobal(shadow_pte); - if (is_error_hpa(paddr)) { *shadow_pte |= gaddr; *shadow_pte |= PT_SHADOW_IO_MARK; @@ -1093,22 +1118,40 @@ out: return r; } +static void mmu_pre_write_zap_pte(struct kvm_vcpu *vcpu, + struct kvm_mmu_page *page, + u64 *spte) +{ + u64 pte; + struct kvm_mmu_page *child; + + pte = *spte; + if (is_present_pte(pte)) { + if (page->role.level == PT_PAGE_TABLE_LEVEL) + rmap_remove(vcpu, spte); + else { + child = page_header(pte & PT64_BASE_ADDR_MASK); + mmu_page_remove_parent_pte(vcpu, child, spte); + } + } + *spte = 0; +} + void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes) { gfn_t gfn = gpa >> PAGE_SHIFT; struct kvm_mmu_page *page; - struct kvm_mmu_page *child; struct hlist_node *node, *n; struct hlist_head *bucket; unsigned index; u64 *spte; - u64 pte; unsigned offset = offset_in_page(gpa); unsigned pte_size; unsigned page_offset; unsigned misaligned; int level; int flooded = 0; + int npte; pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes); if (gfn == vcpu->last_pt_write_gfn) { @@ -1144,22 +1187,27 @@ void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes) } page_offset = offset; level = page->role.level; + npte = 1; if (page->role.glevels == PT32_ROOT_LEVEL) { - page_offset <<= 1; /* 32->64 */ + page_offset <<= 1; /* 32->64 */ + /* + * A 32-bit pde maps 4MB while the shadow pdes map + * only 2MB. So we need to double the offset again + * and zap two pdes instead of one. + */ + if (level == PT32_ROOT_LEVEL) { + page_offset &= ~7; /* kill rounding error */ + page_offset <<= 1; + npte = 2; + } page_offset &= ~PAGE_MASK; } spte = __va(page->page_hpa); spte += page_offset / sizeof(*spte); - pte = *spte; - if (is_present_pte(pte)) { - if (level == PT_PAGE_TABLE_LEVEL) - rmap_remove(vcpu, spte); - else { - child = page_header(pte & PT64_BASE_ADDR_MASK); - mmu_page_remove_parent_pte(vcpu, child, spte); - } + while (npte--) { + mmu_pre_write_zap_pte(vcpu, page, spte); + ++spte; } - *spte = 0; } } @@ -1293,6 +1341,51 @@ void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot) } } +void kvm_mmu_zap_all(struct kvm_vcpu *vcpu) +{ + destroy_kvm_mmu(vcpu); + + while (!list_empty(&vcpu->kvm->active_mmu_pages)) { + struct kvm_mmu_page *page; + + page = container_of(vcpu->kvm->active_mmu_pages.next, + struct kvm_mmu_page, link); + kvm_mmu_zap_page(vcpu, page); + } + + mmu_free_memory_caches(vcpu); + kvm_arch_ops->tlb_flush(vcpu); + init_kvm_mmu(vcpu); +} + +void kvm_mmu_module_exit(void) +{ + if (pte_chain_cache) + kmem_cache_destroy(pte_chain_cache); + if (rmap_desc_cache) + kmem_cache_destroy(rmap_desc_cache); +} + +int kvm_mmu_module_init(void) +{ + pte_chain_cache = kmem_cache_create("kvm_pte_chain", + sizeof(struct kvm_pte_chain), + 0, 0, NULL, NULL); + if (!pte_chain_cache) + goto nomem; + rmap_desc_cache = kmem_cache_create("kvm_rmap_desc", + sizeof(struct kvm_rmap_desc), + 0, 0, NULL, NULL); + if (!rmap_desc_cache) + goto nomem; + + return 0; + +nomem: + kvm_mmu_module_exit(); + return -ENOMEM; +} + #ifdef AUDIT static const char *audit_msg; @@ -1315,7 +1408,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, for (i = 0; i < PT64_ENT_PER_PAGE; ++i, va += va_delta) { u64 ent = pt[i]; - if (!ent & PT_PRESENT_MASK) + if (!(ent & PT_PRESENT_MASK)) continue; va = canonicalize(va); @@ -1337,7 +1430,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, static void audit_mappings(struct kvm_vcpu *vcpu) { - int i; + unsigned i; if (vcpu->mmu.root_level == 4) audit_mappings_page(vcpu, vcpu->mmu.root_hpa, 0, 4);