]> pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - drivers/kvm/x86.c
KVM: Portability: move vpic and vioapic to kvm_arch
[linux-2.6-omap-h63xx.git] / drivers / kvm / x86.c
index 6212984a2e6c6f5b557fa21029c7b6202a0c7707..b37c0093d728996efd694c766febb60938b554e4 100644 (file)
 #include "x86_emulate.h"
 #include "segment_descriptor.h"
 #include "irq.h"
+#include "mmu.h"
 
 #include <linux/kvm.h>
 #include <linux/fs.h>
 #include <linux/vmalloc.h>
 #include <linux/module.h>
+#include <linux/mman.h>
+#include <linux/highmem.h>
 
 #include <asm/uaccess.h>
 #include <asm/msr.h>
@@ -72,6 +75,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) },
        { "mmu_flooded", VM_STAT(mmu_flooded) },
        { "mmu_recycled", VM_STAT(mmu_recycled) },
+       { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
        { NULL }
 };
 
@@ -110,9 +114,9 @@ EXPORT_SYMBOL_GPL(segment_base);
 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
 {
        if (irqchip_in_kernel(vcpu->kvm))
-               return vcpu->apic_base;
+               return vcpu->arch.apic_base;
        else
-               return vcpu->apic_base;
+               return vcpu->arch.apic_base;
 }
 EXPORT_SYMBOL_GPL(kvm_get_apic_base);
 
@@ -122,13 +126,49 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
        if (irqchip_in_kernel(vcpu->kvm))
                kvm_lapic_set_base(vcpu, data);
        else
-               vcpu->apic_base = data;
+               vcpu->arch.apic_base = data;
 }
 EXPORT_SYMBOL_GPL(kvm_set_apic_base);
 
-static void inject_gp(struct kvm_vcpu *vcpu)
+void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
 {
-       kvm_x86_ops->inject_gp(vcpu, 0);
+       WARN_ON(vcpu->arch.exception.pending);
+       vcpu->arch.exception.pending = true;
+       vcpu->arch.exception.has_error_code = false;
+       vcpu->arch.exception.nr = nr;
+}
+EXPORT_SYMBOL_GPL(kvm_queue_exception);
+
+void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
+                          u32 error_code)
+{
+       ++vcpu->stat.pf_guest;
+       if (vcpu->arch.exception.pending && vcpu->arch.exception.nr == PF_VECTOR) {
+               printk(KERN_DEBUG "kvm: inject_page_fault:"
+                      " double fault 0x%lx\n", addr);
+               vcpu->arch.exception.nr = DF_VECTOR;
+               vcpu->arch.exception.error_code = 0;
+               return;
+       }
+       vcpu->arch.cr2 = addr;
+       kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
+}
+
+void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
+{
+       WARN_ON(vcpu->arch.exception.pending);
+       vcpu->arch.exception.pending = true;
+       vcpu->arch.exception.has_error_code = true;
+       vcpu->arch.exception.nr = nr;
+       vcpu->arch.exception.error_code = error_code;
+}
+EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
+
+static void __queue_exception(struct kvm_vcpu *vcpu)
+{
+       kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
+                                    vcpu->arch.exception.has_error_code,
+                                    vcpu->arch.exception.error_code);
 }
 
 /*
@@ -140,7 +180,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
        unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
        int i;
        int ret;
-       u64 pdpte[ARRAY_SIZE(vcpu->pdptrs)];
+       u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)];
 
        mutex_lock(&vcpu->kvm->lock);
        ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte,
@@ -157,67 +197,87 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
        }
        ret = 1;
 
-       memcpy(vcpu->pdptrs, pdpte, sizeof(vcpu->pdptrs));
+       memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs));
 out:
        mutex_unlock(&vcpu->kvm->lock);
 
        return ret;
 }
 
+static bool pdptrs_changed(struct kvm_vcpu *vcpu)
+{
+       u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)];
+       bool changed = true;
+       int r;
+
+       if (is_long_mode(vcpu) || !is_pae(vcpu))
+               return false;
+
+       mutex_lock(&vcpu->kvm->lock);
+       r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte));
+       if (r < 0)
+               goto out;
+       changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0;
+out:
+       mutex_unlock(&vcpu->kvm->lock);
+
+       return changed;
+}
+
 void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 {
        if (cr0 & CR0_RESERVED_BITS) {
                printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
-                      cr0, vcpu->cr0);
-               inject_gp(vcpu);
+                      cr0, vcpu->arch.cr0);
+               kvm_inject_gp(vcpu, 0);
                return;
        }
 
        if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
                printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");
-               inject_gp(vcpu);
+               kvm_inject_gp(vcpu, 0);
                return;
        }
 
        if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) {
                printk(KERN_DEBUG "set_cr0: #GP, set PG flag "
                       "and a clear PE flag\n");
-               inject_gp(vcpu);
+               kvm_inject_gp(vcpu, 0);
                return;
        }
 
        if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
 #ifdef CONFIG_X86_64
-               if ((vcpu->shadow_efer & EFER_LME)) {
+               if ((vcpu->arch.shadow_efer & EFER_LME)) {
                        int cs_db, cs_l;
 
                        if (!is_pae(vcpu)) {
                                printk(KERN_DEBUG "set_cr0: #GP, start paging "
                                       "in long mode while PAE is disabled\n");
-                               inject_gp(vcpu);
+                               kvm_inject_gp(vcpu, 0);
                                return;
                        }
                        kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
                        if (cs_l) {
                                printk(KERN_DEBUG "set_cr0: #GP, start paging "
                                       "in long mode while CS.L == 1\n");
-                               inject_gp(vcpu);
+                               kvm_inject_gp(vcpu, 0);
                                return;
 
                        }
                } else
 #endif
-               if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->cr3)) {
+               if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
                        printk(KERN_DEBUG "set_cr0: #GP, pdptrs "
                               "reserved bits\n");
-                       inject_gp(vcpu);
+                       kvm_inject_gp(vcpu, 0);
                        return;
                }
 
        }
 
        kvm_x86_ops->set_cr0(vcpu, cr0);
-       vcpu->cr0 = cr0;
+       vcpu->arch.cr0 = cr0;
 
        mutex_lock(&vcpu->kvm->lock);
        kvm_mmu_reset_context(vcpu);
@@ -228,7 +288,7 @@ EXPORT_SYMBOL_GPL(set_cr0);
 
 void lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
 {
-       set_cr0(vcpu, (vcpu->cr0 & ~0x0ful) | (msw & 0x0f));
+       set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f));
 }
 EXPORT_SYMBOL_GPL(lmsw);
 
@@ -236,7 +296,7 @@ void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
        if (cr4 & CR4_RESERVED_BITS) {
                printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
-               inject_gp(vcpu);
+               kvm_inject_gp(vcpu, 0);
                return;
        }
 
@@ -244,23 +304,23 @@ void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
                if (!(cr4 & X86_CR4_PAE)) {
                        printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while "
                               "in long mode\n");
-                       inject_gp(vcpu);
+                       kvm_inject_gp(vcpu, 0);
                        return;
                }
        } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & X86_CR4_PAE)
-                  && !load_pdptrs(vcpu, vcpu->cr3)) {
+                  && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
                printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
-               inject_gp(vcpu);
+               kvm_inject_gp(vcpu, 0);
                return;
        }
 
        if (cr4 & X86_CR4_VMXE) {
                printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n");
-               inject_gp(vcpu);
+               kvm_inject_gp(vcpu, 0);
                return;
        }
        kvm_x86_ops->set_cr4(vcpu, cr4);
-       vcpu->cr4 = cr4;
+       vcpu->arch.cr4 = cr4;
        mutex_lock(&vcpu->kvm->lock);
        kvm_mmu_reset_context(vcpu);
        mutex_unlock(&vcpu->kvm->lock);
@@ -269,10 +329,15 @@ EXPORT_SYMBOL_GPL(set_cr4);
 
 void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 {
+       if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) {
+               kvm_mmu_flush_tlb(vcpu);
+               return;
+       }
+
        if (is_long_mode(vcpu)) {
                if (cr3 & CR3_L_MODE_RESERVED_BITS) {
                        printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
-                       inject_gp(vcpu);
+                       kvm_inject_gp(vcpu, 0);
                        return;
                }
        } else {
@@ -280,13 +345,13 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
                        if (cr3 & CR3_PAE_RESERVED_BITS) {
                                printk(KERN_DEBUG
                                       "set_cr3: #GP, reserved bits\n");
-                               inject_gp(vcpu);
+                               kvm_inject_gp(vcpu, 0);
                                return;
                        }
                        if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) {
                                printk(KERN_DEBUG "set_cr3: #GP, pdptrs "
                                       "reserved bits\n");
-                               inject_gp(vcpu);
+                               kvm_inject_gp(vcpu, 0);
                                return;
                        }
                }
@@ -307,10 +372,10 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
         * to debug) behavior on the guest side.
         */
        if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT)))
-               inject_gp(vcpu);
+               kvm_inject_gp(vcpu, 0);
        else {
-               vcpu->cr3 = cr3;
-               vcpu->mmu.new_cr3(vcpu);
+               vcpu->arch.cr3 = cr3;
+               vcpu->arch.mmu.new_cr3(vcpu);
        }
        mutex_unlock(&vcpu->kvm->lock);
 }
@@ -320,13 +385,13 @@ void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
 {
        if (cr8 & CR8_RESERVED_BITS) {
                printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
-               inject_gp(vcpu);
+               kvm_inject_gp(vcpu, 0);
                return;
        }
        if (irqchip_in_kernel(vcpu->kvm))
                kvm_lapic_set_tpr(vcpu, cr8);
        else
-               vcpu->cr8 = cr8;
+               vcpu->arch.cr8 = cr8;
 }
 EXPORT_SYMBOL_GPL(set_cr8);
 
@@ -335,7 +400,7 @@ unsigned long get_cr8(struct kvm_vcpu *vcpu)
        if (irqchip_in_kernel(vcpu->kvm))
                return kvm_lapic_get_cr8(vcpu);
        else
-               return vcpu->cr8;
+               return vcpu->arch.cr8;
 }
 EXPORT_SYMBOL_GPL(get_cr8);
 
@@ -368,23 +433,23 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
        if (efer & EFER_RESERVED_BITS) {
                printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n",
                       efer);
-               inject_gp(vcpu);
+               kvm_inject_gp(vcpu, 0);
                return;
        }
 
        if (is_paging(vcpu)
-           && (vcpu->shadow_efer & EFER_LME) != (efer & EFER_LME)) {
+           && (vcpu->arch.shadow_efer & EFER_LME) != (efer & EFER_LME)) {
                printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n");
-               inject_gp(vcpu);
+               kvm_inject_gp(vcpu, 0);
                return;
        }
 
        kvm_x86_ops->set_efer(vcpu, efer);
 
        efer &= ~EFER_LMA;
-       efer |= vcpu->shadow_efer & EFER_LMA;
+       efer |= vcpu->arch.shadow_efer & EFER_LMA;
 
-       vcpu->shadow_efer = efer;
+       vcpu->arch.shadow_efer = efer;
 }
 
 #endif
@@ -432,7 +497,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
                kvm_set_apic_base(vcpu, data);
                break;
        case MSR_IA32_MISC_ENABLE:
-               vcpu->ia32_misc_enable_msr = data;
+               vcpu->arch.ia32_misc_enable_msr = data;
                break;
        default:
                pr_unimpl(vcpu, "unhandled wrmsr: 0x%x\n", msr);
@@ -486,11 +551,11 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
                data = kvm_get_apic_base(vcpu);
                break;
        case MSR_IA32_MISC_ENABLE:
-               data = vcpu->ia32_misc_enable_msr;
+               data = vcpu->arch.ia32_misc_enable_msr;
                break;
 #ifdef CONFIG_X86_64
        case MSR_EFER:
-               data = vcpu->shadow_efer;
+               data = vcpu->arch.shadow_efer;
                break;
 #endif
        default:
@@ -619,6 +684,7 @@ int kvm_dev_ioctl_check_extension(long ext)
        case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
        case KVM_CAP_USER_MEMORY:
        case KVM_CAP_SET_TSS_ADDR:
+       case KVM_CAP_EXT_CPUID:
                r = 1;
                break;
        default:
@@ -681,44 +747,273 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
        kvm_put_guest_fpu(vcpu);
 }
 
-static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
+static int is_efer_nx(void)
 {
        u64 efer;
-       int i;
-       struct kvm_cpuid_entry *e, *entry;
 
        rdmsrl(MSR_EFER, efer);
+       return efer & EFER_NX;
+}
+
+static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
+{
+       int i;
+       struct kvm_cpuid_entry2 *e, *entry;
+
        entry = NULL;
-       for (i = 0; i < vcpu->cpuid_nent; ++i) {
-               e = &vcpu->cpuid_entries[i];
+       for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
+               e = &vcpu->arch.cpuid_entries[i];
                if (e->function == 0x80000001) {
                        entry = e;
                        break;
                }
        }
-       if (entry && (entry->edx & (1 << 20)) && !(efer & EFER_NX)) {
+       if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) {
                entry->edx &= ~(1 << 20);
                printk(KERN_INFO "kvm: guest NX capability removed\n");
        }
 }
 
+/* when an old userspace process fills a new kernel module */
 static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
                                    struct kvm_cpuid *cpuid,
                                    struct kvm_cpuid_entry __user *entries)
 {
-       int r;
+       int r, i;
+       struct kvm_cpuid_entry *cpuid_entries;
 
        r = -E2BIG;
        if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
                goto out;
+       r = -ENOMEM;
+       cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) * cpuid->nent);
+       if (!cpuid_entries)
+               goto out;
        r = -EFAULT;
-       if (copy_from_user(&vcpu->cpuid_entries, entries,
+       if (copy_from_user(cpuid_entries, entries,
                           cpuid->nent * sizeof(struct kvm_cpuid_entry)))
-               goto out;
-       vcpu->cpuid_nent = cpuid->nent;
+               goto out_free;
+       for (i = 0; i < cpuid->nent; i++) {
+               vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function;
+               vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax;
+               vcpu->arch.cpuid_entries[i].ebx = cpuid_entries[i].ebx;
+               vcpu->arch.cpuid_entries[i].ecx = cpuid_entries[i].ecx;
+               vcpu->arch.cpuid_entries[i].edx = cpuid_entries[i].edx;
+               vcpu->arch.cpuid_entries[i].index = 0;
+               vcpu->arch.cpuid_entries[i].flags = 0;
+               vcpu->arch.cpuid_entries[i].padding[0] = 0;
+               vcpu->arch.cpuid_entries[i].padding[1] = 0;
+               vcpu->arch.cpuid_entries[i].padding[2] = 0;
+       }
+       vcpu->arch.cpuid_nent = cpuid->nent;
        cpuid_fix_nx_cap(vcpu);
+       r = 0;
+
+out_free:
+       vfree(cpuid_entries);
+out:
+       return r;
+}
+
+static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
+                                   struct kvm_cpuid2 *cpuid,
+                                   struct kvm_cpuid_entry2 __user *entries)
+{
+       int r;
+
+       r = -E2BIG;
+       if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
+               goto out;
+       r = -EFAULT;
+       if (copy_from_user(&vcpu->arch.cpuid_entries, entries,
+                          cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
+               goto out;
+       vcpu->arch.cpuid_nent = cpuid->nent;
+       return 0;
+
+out:
+       return r;
+}
+
+static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
+                                   struct kvm_cpuid2 *cpuid,
+                                   struct kvm_cpuid_entry2 __user *entries)
+{
+       int r;
+
+       r = -E2BIG;
+       if (cpuid->nent < vcpu->arch.cpuid_nent)
+               goto out;
+       r = -EFAULT;
+       if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
+                          vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
+               goto out;
        return 0;
 
+out:
+       cpuid->nent = vcpu->arch.cpuid_nent;
+       return r;
+}
+
+static inline u32 bit(int bitno)
+{
+       return 1 << (bitno & 31);
+}
+
+static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
+                         u32 index)
+{
+       entry->function = function;
+       entry->index = index;
+       cpuid_count(entry->function, entry->index,
+               &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
+       entry->flags = 0;
+}
+
+static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
+                        u32 index, int *nent, int maxnent)
+{
+       const u32 kvm_supported_word0_x86_features = bit(X86_FEATURE_FPU) |
+               bit(X86_FEATURE_VME) | bit(X86_FEATURE_DE) |
+               bit(X86_FEATURE_PSE) | bit(X86_FEATURE_TSC) |
+               bit(X86_FEATURE_MSR) | bit(X86_FEATURE_PAE) |
+               bit(X86_FEATURE_CX8) | bit(X86_FEATURE_APIC) |
+               bit(X86_FEATURE_SEP) | bit(X86_FEATURE_PGE) |
+               bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) |
+               bit(X86_FEATURE_CLFLSH) | bit(X86_FEATURE_MMX) |
+               bit(X86_FEATURE_FXSR) | bit(X86_FEATURE_XMM) |
+               bit(X86_FEATURE_XMM2) | bit(X86_FEATURE_SELFSNOOP);
+       const u32 kvm_supported_word1_x86_features = bit(X86_FEATURE_FPU) |
+               bit(X86_FEATURE_VME) | bit(X86_FEATURE_DE) |
+               bit(X86_FEATURE_PSE) | bit(X86_FEATURE_TSC) |
+               bit(X86_FEATURE_MSR) | bit(X86_FEATURE_PAE) |
+               bit(X86_FEATURE_CX8) | bit(X86_FEATURE_APIC) |
+               bit(X86_FEATURE_PGE) |
+               bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) |
+               bit(X86_FEATURE_MMX) | bit(X86_FEATURE_FXSR) |
+               bit(X86_FEATURE_SYSCALL) |
+               (bit(X86_FEATURE_NX) && is_efer_nx()) |
+#ifdef CONFIG_X86_64
+               bit(X86_FEATURE_LM) |
+#endif
+               bit(X86_FEATURE_MMXEXT) |
+               bit(X86_FEATURE_3DNOWEXT) |
+               bit(X86_FEATURE_3DNOW);
+       const u32 kvm_supported_word3_x86_features =
+               bit(X86_FEATURE_XMM3) | bit(X86_FEATURE_CX16);
+       const u32 kvm_supported_word6_x86_features =
+               bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY);
+
+       /* all func 2 cpuid_count() should be called on the same cpu */
+       get_cpu();
+       do_cpuid_1_ent(entry, function, index);
+       ++*nent;
+
+       switch (function) {
+       case 0:
+               entry->eax = min(entry->eax, (u32)0xb);
+               break;
+       case 1:
+               entry->edx &= kvm_supported_word0_x86_features;
+               entry->ecx &= kvm_supported_word3_x86_features;
+               break;
+       /* function 2 entries are STATEFUL. That is, repeated cpuid commands
+        * may return different values. This forces us to get_cpu() before
+        * issuing the first command, and also to emulate this annoying behavior
+        * in kvm_emulate_cpuid() using KVM_CPUID_FLAG_STATE_READ_NEXT */
+       case 2: {
+               int t, times = entry->eax & 0xff;
+
+               entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
+               for (t = 1; t < times && *nent < maxnent; ++t) {
+                       do_cpuid_1_ent(&entry[t], function, 0);
+                       entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
+                       ++*nent;
+               }
+               break;
+       }
+       /* function 4 and 0xb have additional index. */
+       case 4: {
+               int index, cache_type;
+
+               entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+               /* read more entries until cache_type is zero */
+               for (index = 1; *nent < maxnent; ++index) {
+                       cache_type = entry[index - 1].eax & 0x1f;
+                       if (!cache_type)
+                               break;
+                       do_cpuid_1_ent(&entry[index], function, index);
+                       entry[index].flags |=
+                              KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+                       ++*nent;
+               }
+               break;
+       }
+       case 0xb: {
+               int index, level_type;
+
+               entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+               /* read more entries until level_type is zero */
+               for (index = 1; *nent < maxnent; ++index) {
+                       level_type = entry[index - 1].ecx & 0xff;
+                       if (!level_type)
+                               break;
+                       do_cpuid_1_ent(&entry[index], function, index);
+                       entry[index].flags |=
+                              KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+                       ++*nent;
+               }
+               break;
+       }
+       case 0x80000000:
+               entry->eax = min(entry->eax, 0x8000001a);
+               break;
+       case 0x80000001:
+               entry->edx &= kvm_supported_word1_x86_features;
+               entry->ecx &= kvm_supported_word6_x86_features;
+               break;
+       }
+       put_cpu();
+}
+
+static int kvm_vm_ioctl_get_supported_cpuid(struct kvm *kvm,
+                                   struct kvm_cpuid2 *cpuid,
+                                   struct kvm_cpuid_entry2 __user *entries)
+{
+       struct kvm_cpuid_entry2 *cpuid_entries;
+       int limit, nent = 0, r = -E2BIG;
+       u32 func;
+
+       if (cpuid->nent < 1)
+               goto out;
+       r = -ENOMEM;
+       cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
+       if (!cpuid_entries)
+               goto out;
+
+       do_cpuid_ent(&cpuid_entries[0], 0, 0, &nent, cpuid->nent);
+       limit = cpuid_entries[0].eax;
+       for (func = 1; func <= limit && nent < cpuid->nent; ++func)
+               do_cpuid_ent(&cpuid_entries[nent], func, 0,
+                               &nent, cpuid->nent);
+       r = -E2BIG;
+       if (nent >= cpuid->nent)
+               goto out_free;
+
+       do_cpuid_ent(&cpuid_entries[nent], 0x80000000, 0, &nent, cpuid->nent);
+       limit = cpuid_entries[nent - 1].eax;
+       for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func)
+               do_cpuid_ent(&cpuid_entries[nent], func, 0,
+                              &nent, cpuid->nent);
+       r = -EFAULT;
+       if (copy_to_user(entries, cpuid_entries,
+                       nent * sizeof(struct kvm_cpuid_entry2)))
+               goto out_free;
+       cpuid->nent = nent;
+       r = 0;
+
+out_free:
+       vfree(cpuid_entries);
 out:
        return r;
 }
@@ -727,7 +1022,7 @@ static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
                                    struct kvm_lapic_state *s)
 {
        vcpu_load(vcpu);
-       memcpy(s->regs, vcpu->apic->regs, sizeof *s);
+       memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
        vcpu_put(vcpu);
 
        return 0;
@@ -737,13 +1032,30 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
                                    struct kvm_lapic_state *s)
 {
        vcpu_load(vcpu);
-       memcpy(vcpu->apic->regs, s->regs, sizeof *s);
+       memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
        kvm_apic_post_state_restore(vcpu);
        vcpu_put(vcpu);
 
        return 0;
 }
 
+static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
+                                   struct kvm_interrupt *irq)
+{
+       if (irq->irq < 0 || irq->irq >= 256)
+               return -EINVAL;
+       if (irqchip_in_kernel(vcpu->kvm))
+               return -ENXIO;
+       vcpu_load(vcpu);
+
+       set_bit(irq->irq, vcpu->arch.irq_pending);
+       set_bit(irq->irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
+
+       vcpu_put(vcpu);
+
+       return 0;
+}
+
 long kvm_arch_vcpu_ioctl(struct file *filp,
                         unsigned int ioctl, unsigned long arg)
 {
@@ -777,6 +1089,18 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                r = 0;
                break;
        }
+       case KVM_INTERRUPT: {
+               struct kvm_interrupt irq;
+
+               r = -EFAULT;
+               if (copy_from_user(&irq, argp, sizeof irq))
+                       goto out;
+               r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
+               if (r)
+                       goto out;
+               r = 0;
+               break;
+       }
        case KVM_SET_CPUID: {
                struct kvm_cpuid __user *cpuid_arg = argp;
                struct kvm_cpuid cpuid;
@@ -789,6 +1113,36 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                        goto out;
                break;
        }
+       case KVM_SET_CPUID2: {
+               struct kvm_cpuid2 __user *cpuid_arg = argp;
+               struct kvm_cpuid2 cpuid;
+
+               r = -EFAULT;
+               if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
+                       goto out;
+               r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
+                               cpuid_arg->entries);
+               if (r)
+                       goto out;
+               break;
+       }
+       case KVM_GET_CPUID2: {
+               struct kvm_cpuid2 __user *cpuid_arg = argp;
+               struct kvm_cpuid2 cpuid;
+
+               r = -EFAULT;
+               if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
+                       goto out;
+               r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
+                               cpuid_arg->entries);
+               if (r)
+                       goto out;
+               r = -EFAULT;
+               if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
+                       goto out;
+               r = 0;
+               break;
+       }
        case KVM_GET_MSRS:
                r = msr_io(vcpu, argp, kvm_get_msr, 1);
                break;
@@ -821,7 +1175,7 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
        mutex_lock(&kvm->lock);
 
        kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
-       kvm->n_requested_mmu_pages = kvm_nr_mmu_pages;
+       kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
 
        mutex_unlock(&kvm->lock);
        return 0;
@@ -829,7 +1183,21 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
 
 static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
 {
-       return kvm->n_alloc_mmu_pages;
+       return kvm->arch.n_alloc_mmu_pages;
+}
+
+gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
+{
+       int i;
+       struct kvm_mem_alias *alias;
+
+       for (i = 0; i < kvm->arch.naliases; ++i) {
+               alias = &kvm->arch.aliases[i];
+               if (gfn >= alias->base_gfn
+                   && gfn < alias->base_gfn + alias->npages)
+                       return alias->target_gfn + gfn - alias->base_gfn;
+       }
+       return gfn;
 }
 
 /*
@@ -860,15 +1228,15 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
 
        mutex_lock(&kvm->lock);
 
-       p = &kvm->aliases[alias->slot];
+       p = &kvm->arch.aliases[alias->slot];
        p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
        p->npages = alias->memory_size >> PAGE_SHIFT;
        p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
 
        for (n = KVM_ALIAS_SLOTS; n > 0; --n)
-               if (kvm->aliases[n - 1].npages)
+               if (kvm->arch.aliases[n - 1].npages)
                        break;
-       kvm->naliases = n;
+       kvm->arch.naliases = n;
 
        kvm_mmu_zap_all(kvm);
 
@@ -1018,12 +1386,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
        }
        case KVM_CREATE_IRQCHIP:
                r = -ENOMEM;
-               kvm->vpic = kvm_create_pic(kvm);
-               if (kvm->vpic) {
+               kvm->arch.vpic = kvm_create_pic(kvm);
+               if (kvm->arch.vpic) {
                        r = kvm_ioapic_init(kvm);
                        if (r) {
-                               kfree(kvm->vpic);
-                               kvm->vpic = NULL;
+                               kfree(kvm->arch.vpic);
+                               kvm->arch.vpic = NULL;
                                goto out;
                        }
                } else
@@ -1041,7 +1409,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
                                kvm_pic_set_irq(pic_irqchip(kvm),
                                        irq_event.irq,
                                        irq_event.level);
-                       kvm_ioapic_set_irq(kvm->vioapic,
+                       kvm_ioapic_set_irq(kvm->arch.vioapic,
                                        irq_event.irq,
                                        irq_event.level);
                        mutex_unlock(&kvm->lock);
@@ -1084,6 +1452,24 @@ long kvm_arch_vm_ioctl(struct file *filp,
                r = 0;
                break;
        }
+       case KVM_GET_SUPPORTED_CPUID: {
+               struct kvm_cpuid2 __user *cpuid_arg = argp;
+               struct kvm_cpuid2 cpuid;
+
+               r = -EFAULT;
+               if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
+                       goto out;
+               r = kvm_vm_ioctl_get_supported_cpuid(kvm, &cpuid,
+                       cpuid_arg->entries);
+               if (r)
+                       goto out;
+
+               r = -EFAULT;
+               if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
+                       goto out;
+               r = 0;
+               break;
+       }
        default:
                ;
        }
@@ -1114,8 +1500,8 @@ static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu,
 {
        struct kvm_io_device *dev;
 
-       if (vcpu->apic) {
-               dev = &vcpu->apic->dev;
+       if (vcpu->arch.apic) {
+               dev = &vcpu->arch.apic->dev;
                if (dev->in_range(dev, addr))
                        return dev;
        }
@@ -1142,7 +1528,7 @@ int emulator_read_std(unsigned long addr,
        void *data = val;
 
        while (bytes) {
-               gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
+               gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
                unsigned offset = addr & (PAGE_SIZE-1);
                unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset);
                int ret;
@@ -1162,15 +1548,6 @@ int emulator_read_std(unsigned long addr,
 }
 EXPORT_SYMBOL_GPL(emulator_read_std);
 
-static int emulator_write_std(unsigned long addr,
-                             const void *val,
-                             unsigned int bytes,
-                             struct kvm_vcpu *vcpu)
-{
-       pr_unimpl(vcpu, "emulator_write_std: addr %lx n %d\n", addr, bytes);
-       return X86EMUL_UNHANDLEABLE;
-}
-
 static int emulator_read_emulated(unsigned long addr,
                                  void *val,
                                  unsigned int bytes,
@@ -1185,7 +1562,7 @@ static int emulator_read_emulated(unsigned long addr,
                return X86EMUL_CONTINUE;
        }
 
-       gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
+       gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
 
        /* For APIC access vmexit */
        if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
@@ -1233,10 +1610,10 @@ static int emulator_write_emulated_onepage(unsigned long addr,
                                           struct kvm_vcpu *vcpu)
 {
        struct kvm_io_device *mmio_dev;
-       gpa_t                 gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
+       gpa_t                 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
 
        if (gpa == UNMAPPED_GVA) {
-               kvm_x86_ops->inject_page_fault(vcpu, addr, 2);
+               kvm_inject_page_fault(vcpu, addr, 2);
                return X86EMUL_PROPAGATE_FAULT;
        }
 
@@ -1299,6 +1676,31 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
                reported = 1;
                printk(KERN_WARNING "kvm: emulating exchange as write\n");
        }
+#ifndef CONFIG_X86_64
+       /* guests cmpxchg8b have to be emulated atomically */
+       if (bytes == 8) {
+               gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
+               struct page *page;
+               char *addr;
+               u64 val;
+
+               if (gpa == UNMAPPED_GVA ||
+                  (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
+                       goto emul_write;
+
+               if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
+                       goto emul_write;
+
+               val = *(u64 *)new;
+               page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+               addr = kmap_atomic(page, KM_USER0);
+               set_64bit((u64 *)(addr + offset_in_page(gpa)), val);
+               kunmap_atomic(addr, KM_USER0);
+               kvm_release_page_dirty(page);
+       }
+emul_write:
+#endif
+
        return emulator_write_emulated(addr, new, bytes, vcpu);
 }
 
@@ -1314,7 +1716,7 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
 
 int emulate_clts(struct kvm_vcpu *vcpu)
 {
-       kvm_x86_ops->set_cr0(vcpu, vcpu->cr0 & ~X86_CR0_TS);
+       kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS);
        return X86EMUL_CONTINUE;
 }
 
@@ -1349,7 +1751,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
 {
        static int reported;
        u8 opcodes[4];
-       unsigned long rip = vcpu->rip;
+       unsigned long rip = vcpu->arch.rip;
        unsigned long rip_linear;
 
        rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
@@ -1367,7 +1769,6 @@ EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
 
 struct x86_emulate_ops emulate_ops = {
        .read_std            = emulator_read_std,
-       .write_std           = emulator_write_std,
        .read_emulated       = emulator_read_emulated,
        .write_emulated      = emulator_write_emulated,
        .cmpxchg_emulated    = emulator_cmpxchg_emulated,
@@ -1381,47 +1782,46 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
 {
        int r;
 
-       vcpu->mmio_fault_cr2 = cr2;
+       vcpu->arch.mmio_fault_cr2 = cr2;
        kvm_x86_ops->cache_regs(vcpu);
 
        vcpu->mmio_is_write = 0;
-       vcpu->pio.string = 0;
+       vcpu->arch.pio.string = 0;
 
        if (!no_decode) {
                int cs_db, cs_l;
                kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
 
-               vcpu->emulate_ctxt.vcpu = vcpu;
-               vcpu->emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
-               vcpu->emulate_ctxt.cr2 = cr2;
-               vcpu->emulate_ctxt.mode =
-                       (vcpu->emulate_ctxt.eflags & X86_EFLAGS_VM)
+               vcpu->arch.emulate_ctxt.vcpu = vcpu;
+               vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
+               vcpu->arch.emulate_ctxt.mode =
+                       (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
                        ? X86EMUL_MODE_REAL : cs_l
                        ? X86EMUL_MODE_PROT64 : cs_db
                        ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
 
-               if (vcpu->emulate_ctxt.mode == X86EMUL_MODE_PROT64) {
-                       vcpu->emulate_ctxt.cs_base = 0;
-                       vcpu->emulate_ctxt.ds_base = 0;
-                       vcpu->emulate_ctxt.es_base = 0;
-                       vcpu->emulate_ctxt.ss_base = 0;
+               if (vcpu->arch.emulate_ctxt.mode == X86EMUL_MODE_PROT64) {
+                       vcpu->arch.emulate_ctxt.cs_base = 0;
+                       vcpu->arch.emulate_ctxt.ds_base = 0;
+                       vcpu->arch.emulate_ctxt.es_base = 0;
+                       vcpu->arch.emulate_ctxt.ss_base = 0;
                } else {
-                       vcpu->emulate_ctxt.cs_base =
+                       vcpu->arch.emulate_ctxt.cs_base =
                                        get_segment_base(vcpu, VCPU_SREG_CS);
-                       vcpu->emulate_ctxt.ds_base =
+                       vcpu->arch.emulate_ctxt.ds_base =
                                        get_segment_base(vcpu, VCPU_SREG_DS);
-                       vcpu->emulate_ctxt.es_base =
+                       vcpu->arch.emulate_ctxt.es_base =
                                        get_segment_base(vcpu, VCPU_SREG_ES);
-                       vcpu->emulate_ctxt.ss_base =
+                       vcpu->arch.emulate_ctxt.ss_base =
                                        get_segment_base(vcpu, VCPU_SREG_SS);
                }
 
-               vcpu->emulate_ctxt.gs_base =
+               vcpu->arch.emulate_ctxt.gs_base =
                                        get_segment_base(vcpu, VCPU_SREG_GS);
-               vcpu->emulate_ctxt.fs_base =
+               vcpu->arch.emulate_ctxt.fs_base =
                                        get_segment_base(vcpu, VCPU_SREG_FS);
 
-               r = x86_decode_insn(&vcpu->emulate_ctxt, &emulate_ops);
+               r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
                ++vcpu->stat.insn_emulation;
                if (r)  {
                        ++vcpu->stat.insn_emulation_fail;
@@ -1431,9 +1831,9 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
                }
        }
 
-       r = x86_emulate_insn(&vcpu->emulate_ctxt, &emulate_ops);
+       r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
 
-       if (vcpu->pio.string)
+       if (vcpu->arch.pio.string)
                return EMULATE_DO_MMIO;
 
        if ((r || vcpu->mmio_is_write) && run) {
@@ -1455,7 +1855,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
        }
 
        kvm_x86_ops->decache_regs(vcpu);
-       kvm_x86_ops->set_rflags(vcpu, vcpu->emulate_ctxt.eflags);
+       kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
 
        if (vcpu->mmio_is_write) {
                vcpu->mmio_needed = 0;
@@ -1470,33 +1870,33 @@ static void free_pio_guest_pages(struct kvm_vcpu *vcpu)
 {
        int i;
 
-       for (i = 0; i < ARRAY_SIZE(vcpu->pio.guest_pages); ++i)
-               if (vcpu->pio.guest_pages[i]) {
-                       kvm_release_page_dirty(vcpu->pio.guest_pages[i]);
-                       vcpu->pio.guest_pages[i] = NULL;
+       for (i = 0; i < ARRAY_SIZE(vcpu->arch.pio.guest_pages); ++i)
+               if (vcpu->arch.pio.guest_pages[i]) {
+                       kvm_release_page_dirty(vcpu->arch.pio.guest_pages[i]);
+                       vcpu->arch.pio.guest_pages[i] = NULL;
                }
 }
 
 static int pio_copy_data(struct kvm_vcpu *vcpu)
 {
-       void *p = vcpu->pio_data;
+       void *p = vcpu->arch.pio_data;
        void *q;
        unsigned bytes;
-       int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1;
+       int nr_pages = vcpu->arch.pio.guest_pages[1] ? 2 : 1;
 
-       q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE,
+       q = vmap(vcpu->arch.pio.guest_pages, nr_pages, VM_READ|VM_WRITE,
                 PAGE_KERNEL);
        if (!q) {
                free_pio_guest_pages(vcpu);
                return -ENOMEM;
        }
-       q += vcpu->pio.guest_page_offset;
-       bytes = vcpu->pio.size * vcpu->pio.cur_count;
-       if (vcpu->pio.in)
+       q += vcpu->arch.pio.guest_page_offset;
+       bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count;
+       if (vcpu->arch.pio.in)
                memcpy(q, p, bytes);
        else
                memcpy(p, q, bytes);
-       q -= vcpu->pio.guest_page_offset;
+       q -= vcpu->arch.pio.guest_page_offset;
        vunmap(q);
        free_pio_guest_pages(vcpu);
        return 0;
@@ -1504,7 +1904,7 @@ static int pio_copy_data(struct kvm_vcpu *vcpu)
 
 int complete_pio(struct kvm_vcpu *vcpu)
 {
-       struct kvm_pio_request *io = &vcpu->pio;
+       struct kvm_pio_request *io = &vcpu->arch.pio;
        long delta;
        int r;
 
@@ -1512,7 +1912,7 @@ int complete_pio(struct kvm_vcpu *vcpu)
 
        if (!io->string) {
                if (io->in)
-                       memcpy(&vcpu->regs[VCPU_REGS_RAX], vcpu->pio_data,
+                       memcpy(&vcpu->arch.regs[VCPU_REGS_RAX], vcpu->arch.pio_data,
                               io->size);
        } else {
                if (io->in) {
@@ -1530,15 +1930,15 @@ int complete_pio(struct kvm_vcpu *vcpu)
                         * The size of the register should really depend on
                         * current address size.
                         */
-                       vcpu->regs[VCPU_REGS_RCX] -= delta;
+                       vcpu->arch.regs[VCPU_REGS_RCX] -= delta;
                }
                if (io->down)
                        delta = -delta;
                delta *= io->size;
                if (io->in)
-                       vcpu->regs[VCPU_REGS_RDI] += delta;
+                       vcpu->arch.regs[VCPU_REGS_RDI] += delta;
                else
-                       vcpu->regs[VCPU_REGS_RSI] += delta;
+                       vcpu->arch.regs[VCPU_REGS_RSI] += delta;
        }
 
        kvm_x86_ops->decache_regs(vcpu);
@@ -1556,13 +1956,13 @@ static void kernel_pio(struct kvm_io_device *pio_dev,
        /* TODO: String I/O for in kernel device */
 
        mutex_lock(&vcpu->kvm->lock);
-       if (vcpu->pio.in)
-               kvm_iodevice_read(pio_dev, vcpu->pio.port,
-                                 vcpu->pio.size,
+       if (vcpu->arch.pio.in)
+               kvm_iodevice_read(pio_dev, vcpu->arch.pio.port,
+                                 vcpu->arch.pio.size,
                                  pd);
        else
-               kvm_iodevice_write(pio_dev, vcpu->pio.port,
-                                  vcpu->pio.size,
+               kvm_iodevice_write(pio_dev, vcpu->arch.pio.port,
+                                  vcpu->arch.pio.size,
                                   pd);
        mutex_unlock(&vcpu->kvm->lock);
 }
@@ -1570,8 +1970,8 @@ static void kernel_pio(struct kvm_io_device *pio_dev,
 static void pio_string_write(struct kvm_io_device *pio_dev,
                             struct kvm_vcpu *vcpu)
 {
-       struct kvm_pio_request *io = &vcpu->pio;
-       void *pd = vcpu->pio_data;
+       struct kvm_pio_request *io = &vcpu->arch.pio;
+       void *pd = vcpu->arch.pio_data;
        int i;
 
        mutex_lock(&vcpu->kvm->lock);
@@ -1597,25 +1997,25 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 
        vcpu->run->exit_reason = KVM_EXIT_IO;
        vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
-       vcpu->run->io.size = vcpu->pio.size = size;
+       vcpu->run->io.size = vcpu->arch.pio.size = size;
        vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
-       vcpu->run->io.count = vcpu->pio.count = vcpu->pio.cur_count = 1;
-       vcpu->run->io.port = vcpu->pio.port = port;
-       vcpu->pio.in = in;
-       vcpu->pio.string = 0;
-       vcpu->pio.down = 0;
-       vcpu->pio.guest_page_offset = 0;
-       vcpu->pio.rep = 0;
+       vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = 1;
+       vcpu->run->io.port = vcpu->arch.pio.port = port;
+       vcpu->arch.pio.in = in;
+       vcpu->arch.pio.string = 0;
+       vcpu->arch.pio.down = 0;
+       vcpu->arch.pio.guest_page_offset = 0;
+       vcpu->arch.pio.rep = 0;
 
        kvm_x86_ops->cache_regs(vcpu);
-       memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4);
+       memcpy(vcpu->arch.pio_data, &vcpu->arch.regs[VCPU_REGS_RAX], 4);
        kvm_x86_ops->decache_regs(vcpu);
 
        kvm_x86_ops->skip_emulated_instruction(vcpu);
 
        pio_dev = vcpu_find_pio_dev(vcpu, port);
        if (pio_dev) {
-               kernel_pio(pio_dev, vcpu, vcpu->pio_data);
+               kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
                complete_pio(vcpu);
                return 1;
        }
@@ -1635,15 +2035,15 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 
        vcpu->run->exit_reason = KVM_EXIT_IO;
        vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
-       vcpu->run->io.size = vcpu->pio.size = size;
+       vcpu->run->io.size = vcpu->arch.pio.size = size;
        vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
-       vcpu->run->io.count = vcpu->pio.count = vcpu->pio.cur_count = count;
-       vcpu->run->io.port = vcpu->pio.port = port;
-       vcpu->pio.in = in;
-       vcpu->pio.string = 1;
-       vcpu->pio.down = down;
-       vcpu->pio.guest_page_offset = offset_in_page(address);
-       vcpu->pio.rep = rep;
+       vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = count;
+       vcpu->run->io.port = vcpu->arch.pio.port = port;
+       vcpu->arch.pio.in = in;
+       vcpu->arch.pio.string = 1;
+       vcpu->arch.pio.down = down;
+       vcpu->arch.pio.guest_page_offset = offset_in_page(address);
+       vcpu->arch.pio.rep = rep;
 
        if (!count) {
                kvm_x86_ops->skip_emulated_instruction(vcpu);
@@ -1669,35 +2069,35 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
                 * String I/O in reverse.  Yuck.  Kill the guest, fix later.
                 */
                pr_unimpl(vcpu, "guest string pio down\n");
-               inject_gp(vcpu);
+               kvm_inject_gp(vcpu, 0);
                return 1;
        }
        vcpu->run->io.count = now;
-       vcpu->pio.cur_count = now;
+       vcpu->arch.pio.cur_count = now;
 
-       if (vcpu->pio.cur_count == vcpu->pio.count)
+       if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count)
                kvm_x86_ops->skip_emulated_instruction(vcpu);
 
        for (i = 0; i < nr_pages; ++i) {
                mutex_lock(&vcpu->kvm->lock);
                page = gva_to_page(vcpu, address + i * PAGE_SIZE);
-               vcpu->pio.guest_pages[i] = page;
+               vcpu->arch.pio.guest_pages[i] = page;
                mutex_unlock(&vcpu->kvm->lock);
                if (!page) {
-                       inject_gp(vcpu);
+                       kvm_inject_gp(vcpu, 0);
                        free_pio_guest_pages(vcpu);
                        return 1;
                }
        }
 
        pio_dev = vcpu_find_pio_dev(vcpu, port);
-       if (!vcpu->pio.in) {
+       if (!vcpu->arch.pio.in) {
                /* string PIO write */
                ret = pio_copy_data(vcpu);
                if (ret >= 0 && pio_dev) {
                        pio_string_write(pio_dev, vcpu);
                        complete_pio(vcpu);
-                       if (vcpu->pio.count == 0)
+                       if (vcpu->arch.pio.count == 0)
                                ret = 1;
                }
        } else if (pio_dev)
@@ -1757,9 +2157,9 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 {
        ++vcpu->stat.halt_exits;
        if (irqchip_in_kernel(vcpu->kvm)) {
-               vcpu->mp_state = VCPU_MP_STATE_HALTED;
+               vcpu->arch.mp_state = VCPU_MP_STATE_HALTED;
                kvm_vcpu_block(vcpu);
-               if (vcpu->mp_state != VCPU_MP_STATE_RUNNABLE)
+               if (vcpu->arch.mp_state != VCPU_MP_STATE_RUNNABLE)
                        return -EINTR;
                return 1;
        } else {
@@ -1775,11 +2175,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 
        kvm_x86_ops->cache_regs(vcpu);
 
-       nr = vcpu->regs[VCPU_REGS_RAX];
-       a0 = vcpu->regs[VCPU_REGS_RBX];
-       a1 = vcpu->regs[VCPU_REGS_RCX];
-       a2 = vcpu->regs[VCPU_REGS_RDX];
-       a3 = vcpu->regs[VCPU_REGS_RSI];
+       nr = vcpu->arch.regs[VCPU_REGS_RAX];
+       a0 = vcpu->arch.regs[VCPU_REGS_RBX];
+       a1 = vcpu->arch.regs[VCPU_REGS_RCX];
+       a2 = vcpu->arch.regs[VCPU_REGS_RDX];
+       a3 = vcpu->arch.regs[VCPU_REGS_RSI];
 
        if (!is_long_mode(vcpu)) {
                nr &= 0xFFFFFFFF;
@@ -1794,7 +2194,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
                ret = -KVM_ENOSYS;
                break;
        }
-       vcpu->regs[VCPU_REGS_RAX] = ret;
+       vcpu->arch.regs[VCPU_REGS_RAX] = ret;
        kvm_x86_ops->decache_regs(vcpu);
        return 0;
 }
@@ -1816,7 +2216,7 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
 
        kvm_x86_ops->cache_regs(vcpu);
        kvm_x86_ops->patch_hypercall(vcpu, instruction);
-       if (emulator_write_emulated(vcpu->rip, instruction, 3, vcpu)
+       if (emulator_write_emulated(vcpu->arch.rip, instruction, 3, vcpu)
            != X86EMUL_CONTINUE)
                ret = -EFAULT;
 
@@ -1856,13 +2256,15 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
        kvm_x86_ops->decache_cr4_guest_bits(vcpu);
        switch (cr) {
        case 0:
-               return vcpu->cr0;
+               return vcpu->arch.cr0;
        case 2:
-               return vcpu->cr2;
+               return vcpu->arch.cr2;
        case 3:
-               return vcpu->cr3;
+               return vcpu->arch.cr3;
        case 4:
-               return vcpu->cr4;
+               return vcpu->arch.cr4;
+       case 8:
+               return get_cr8(vcpu);
        default:
                vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);
                return 0;
@@ -1874,39 +2276,77 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
 {
        switch (cr) {
        case 0:
-               set_cr0(vcpu, mk_cr_64(vcpu->cr0, val));
+               set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
                *rflags = kvm_x86_ops->get_rflags(vcpu);
                break;
        case 2:
-               vcpu->cr2 = val;
+               vcpu->arch.cr2 = val;
                break;
        case 3:
                set_cr3(vcpu, val);
                break;
        case 4:
-               set_cr4(vcpu, mk_cr_64(vcpu->cr4, val));
+               set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val));
+               break;
+       case 8:
+               set_cr8(vcpu, val & 0xfUL);
                break;
        default:
                vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);
        }
 }
 
+static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
+{
+       struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i];
+       int j, nent = vcpu->arch.cpuid_nent;
+
+       e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
+       /* when no next entry is found, the current entry[i] is reselected */
+       for (j = i + 1; j == i; j = (j + 1) % nent) {
+               struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
+               if (ej->function == e->function) {
+                       ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
+                       return j;
+               }
+       }
+       return 0; /* silence gcc, even though control never reaches here */
+}
+
+/* find an entry with matching function, matching index (if needed), and that
+ * should be read next (if it's stateful) */
+static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
+       u32 function, u32 index)
+{
+       if (e->function != function)
+               return 0;
+       if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
+               return 0;
+       if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
+               !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
+               return 0;
+       return 1;
+}
+
 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
 {
        int i;
-       u32 function;
-       struct kvm_cpuid_entry *e, *best;
+       u32 function, index;
+       struct kvm_cpuid_entry2 *e, *best;
 
        kvm_x86_ops->cache_regs(vcpu);
-       function = vcpu->regs[VCPU_REGS_RAX];
-       vcpu->regs[VCPU_REGS_RAX] = 0;
-       vcpu->regs[VCPU_REGS_RBX] = 0;
-       vcpu->regs[VCPU_REGS_RCX] = 0;
-       vcpu->regs[VCPU_REGS_RDX] = 0;
+       function = vcpu->arch.regs[VCPU_REGS_RAX];
+       index = vcpu->arch.regs[VCPU_REGS_RCX];
+       vcpu->arch.regs[VCPU_REGS_RAX] = 0;
+       vcpu->arch.regs[VCPU_REGS_RBX] = 0;
+       vcpu->arch.regs[VCPU_REGS_RCX] = 0;
+       vcpu->arch.regs[VCPU_REGS_RDX] = 0;
        best = NULL;
-       for (i = 0; i < vcpu->cpuid_nent; ++i) {
-               e = &vcpu->cpuid_entries[i];
-               if (e->function == function) {
+       for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
+               e = &vcpu->arch.cpuid_entries[i];
+               if (is_matching_cpuid_entry(e, function, index)) {
+                       if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
+                               move_to_next_stateful_cpuid_entry(vcpu, i);
                        best = e;
                        break;
                }
@@ -1918,10 +2358,10 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
                                best = e;
        }
        if (best) {
-               vcpu->regs[VCPU_REGS_RAX] = best->eax;
-               vcpu->regs[VCPU_REGS_RBX] = best->ebx;
-               vcpu->regs[VCPU_REGS_RCX] = best->ecx;
-               vcpu->regs[VCPU_REGS_RDX] = best->edx;
+               vcpu->arch.regs[VCPU_REGS_RAX] = best->eax;
+               vcpu->arch.regs[VCPU_REGS_RBX] = best->ebx;
+               vcpu->arch.regs[VCPU_REGS_RCX] = best->ecx;
+               vcpu->arch.regs[VCPU_REGS_RDX] = best->edx;
        }
        kvm_x86_ops->decache_regs(vcpu);
        kvm_x86_ops->skip_emulated_instruction(vcpu);
@@ -1937,9 +2377,9 @@ EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
 static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
                                          struct kvm_run *kvm_run)
 {
-       return (!vcpu->irq_summary &&
+       return (!vcpu->arch.irq_summary &&
                kvm_run->request_interrupt_window &&
-               vcpu->interrupt_window_open &&
+               vcpu->arch.interrupt_window_open &&
                (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));
 }
 
@@ -1953,22 +2393,22 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
                kvm_run->ready_for_interrupt_injection = 1;
        else
                kvm_run->ready_for_interrupt_injection =
-                                       (vcpu->interrupt_window_open &&
-                                        vcpu->irq_summary == 0);
+                                       (vcpu->arch.interrupt_window_open &&
+                                        vcpu->arch.irq_summary == 0);
 }
 
 static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
        int r;
 
-       if (unlikely(vcpu->mp_state == VCPU_MP_STATE_SIPI_RECEIVED)) {
+       if (unlikely(vcpu->arch.mp_state == VCPU_MP_STATE_SIPI_RECEIVED)) {
                pr_debug("vcpu %d received sipi with vector # %x\n",
-                      vcpu->vcpu_id, vcpu->sipi_vector);
+                      vcpu->vcpu_id, vcpu->arch.sipi_vector);
                kvm_lapic_reset(vcpu);
                r = kvm_x86_ops->vcpu_reset(vcpu);
                if (r)
                        return r;
-               vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
+               vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE;
        }
 
 preempted:
@@ -1998,9 +2438,11 @@ again:
                goto out;
        }
 
-       if (irqchip_in_kernel(vcpu->kvm))
+       if (vcpu->arch.exception.pending)
+               __queue_exception(vcpu);
+       else if (irqchip_in_kernel(vcpu->kvm))
                kvm_x86_ops->inject_pending_irq(vcpu);
-       else if (!vcpu->mmio_read_completed)
+       else
                kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run);
 
        vcpu->guest_mode = 1;
@@ -2034,9 +2476,12 @@ again:
         */
        if (unlikely(prof_on == KVM_PROFILING)) {
                kvm_x86_ops->cache_regs(vcpu);
-               profile_hit(KVM_PROFILING, (void *)vcpu->rip);
+               profile_hit(KVM_PROFILING, (void *)vcpu->arch.rip);
        }
 
+       if (vcpu->arch.exception.pending && kvm_x86_ops->exception_injected(vcpu))
+               vcpu->arch.exception.pending = false;
+
        r = kvm_x86_ops->handle_exit(kvm_run, vcpu);
 
        if (r > 0) {
@@ -2068,7 +2513,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
        vcpu_load(vcpu);
 
-       if (unlikely(vcpu->mp_state == VCPU_MP_STATE_UNINITIALIZED)) {
+       if (unlikely(vcpu->arch.mp_state == VCPU_MP_STATE_UNINITIALIZED)) {
                kvm_vcpu_block(vcpu);
                vcpu_put(vcpu);
                return -EAGAIN;
@@ -2081,7 +2526,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        if (!irqchip_in_kernel(vcpu->kvm))
                set_cr8(vcpu, kvm_run->cr8);
 
-       if (vcpu->pio.cur_count) {
+       if (vcpu->arch.pio.cur_count) {
                r = complete_pio(vcpu);
                if (r)
                        goto out;
@@ -2092,7 +2537,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                vcpu->mmio_read_completed = 1;
                vcpu->mmio_needed = 0;
                r = emulate_instruction(vcpu, kvm_run,
-                                       vcpu->mmio_fault_cr2, 0, 1);
+                                       vcpu->arch.mmio_fault_cr2, 0, 1);
                if (r == EMULATE_DO_MMIO) {
                        /*
                         * Read-modify-write.  Back to userspace.
@@ -2104,7 +2549,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 #endif
        if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) {
                kvm_x86_ops->cache_regs(vcpu);
-               vcpu->regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret;
+               vcpu->arch.regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret;
                kvm_x86_ops->decache_regs(vcpu);
        }
 
@@ -2124,26 +2569,26 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 
        kvm_x86_ops->cache_regs(vcpu);
 
-       regs->rax = vcpu->regs[VCPU_REGS_RAX];
-       regs->rbx = vcpu->regs[VCPU_REGS_RBX];
-       regs->rcx = vcpu->regs[VCPU_REGS_RCX];
-       regs->rdx = vcpu->regs[VCPU_REGS_RDX];
-       regs->rsi = vcpu->regs[VCPU_REGS_RSI];
-       regs->rdi = vcpu->regs[VCPU_REGS_RDI];
-       regs->rsp = vcpu->regs[VCPU_REGS_RSP];
-       regs->rbp = vcpu->regs[VCPU_REGS_RBP];
+       regs->rax = vcpu->arch.regs[VCPU_REGS_RAX];
+       regs->rbx = vcpu->arch.regs[VCPU_REGS_RBX];
+       regs->rcx = vcpu->arch.regs[VCPU_REGS_RCX];
+       regs->rdx = vcpu->arch.regs[VCPU_REGS_RDX];
+       regs->rsi = vcpu->arch.regs[VCPU_REGS_RSI];
+       regs->rdi = vcpu->arch.regs[VCPU_REGS_RDI];
+       regs->rsp = vcpu->arch.regs[VCPU_REGS_RSP];
+       regs->rbp = vcpu->arch.regs[VCPU_REGS_RBP];
 #ifdef CONFIG_X86_64
-       regs->r8 = vcpu->regs[VCPU_REGS_R8];
-       regs->r9 = vcpu->regs[VCPU_REGS_R9];
-       regs->r10 = vcpu->regs[VCPU_REGS_R10];
-       regs->r11 = vcpu->regs[VCPU_REGS_R11];
-       regs->r12 = vcpu->regs[VCPU_REGS_R12];
-       regs->r13 = vcpu->regs[VCPU_REGS_R13];
-       regs->r14 = vcpu->regs[VCPU_REGS_R14];
-       regs->r15 = vcpu->regs[VCPU_REGS_R15];
+       regs->r8 = vcpu->arch.regs[VCPU_REGS_R8];
+       regs->r9 = vcpu->arch.regs[VCPU_REGS_R9];
+       regs->r10 = vcpu->arch.regs[VCPU_REGS_R10];
+       regs->r11 = vcpu->arch.regs[VCPU_REGS_R11];
+       regs->r12 = vcpu->arch.regs[VCPU_REGS_R12];
+       regs->r13 = vcpu->arch.regs[VCPU_REGS_R13];
+       regs->r14 = vcpu->arch.regs[VCPU_REGS_R14];
+       regs->r15 = vcpu->arch.regs[VCPU_REGS_R15];
 #endif
 
-       regs->rip = vcpu->rip;
+       regs->rip = vcpu->arch.rip;
        regs->rflags = kvm_x86_ops->get_rflags(vcpu);
 
        /*
@@ -2161,26 +2606,26 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
        vcpu_load(vcpu);
 
-       vcpu->regs[VCPU_REGS_RAX] = regs->rax;
-       vcpu->regs[VCPU_REGS_RBX] = regs->rbx;
-       vcpu->regs[VCPU_REGS_RCX] = regs->rcx;
-       vcpu->regs[VCPU_REGS_RDX] = regs->rdx;
-       vcpu->regs[VCPU_REGS_RSI] = regs->rsi;
-       vcpu->regs[VCPU_REGS_RDI] = regs->rdi;
-       vcpu->regs[VCPU_REGS_RSP] = regs->rsp;
-       vcpu->regs[VCPU_REGS_RBP] = regs->rbp;
+       vcpu->arch.regs[VCPU_REGS_RAX] = regs->rax;
+       vcpu->arch.regs[VCPU_REGS_RBX] = regs->rbx;
+       vcpu->arch.regs[VCPU_REGS_RCX] = regs->rcx;
+       vcpu->arch.regs[VCPU_REGS_RDX] = regs->rdx;
+       vcpu->arch.regs[VCPU_REGS_RSI] = regs->rsi;
+       vcpu->arch.regs[VCPU_REGS_RDI] = regs->rdi;
+       vcpu->arch.regs[VCPU_REGS_RSP] = regs->rsp;
+       vcpu->arch.regs[VCPU_REGS_RBP] = regs->rbp;
 #ifdef CONFIG_X86_64
-       vcpu->regs[VCPU_REGS_R8] = regs->r8;
-       vcpu->regs[VCPU_REGS_R9] = regs->r9;
-       vcpu->regs[VCPU_REGS_R10] = regs->r10;
-       vcpu->regs[VCPU_REGS_R11] = regs->r11;
-       vcpu->regs[VCPU_REGS_R12] = regs->r12;
-       vcpu->regs[VCPU_REGS_R13] = regs->r13;
-       vcpu->regs[VCPU_REGS_R14] = regs->r14;
-       vcpu->regs[VCPU_REGS_R15] = regs->r15;
+       vcpu->arch.regs[VCPU_REGS_R8] = regs->r8;
+       vcpu->arch.regs[VCPU_REGS_R9] = regs->r9;
+       vcpu->arch.regs[VCPU_REGS_R10] = regs->r10;
+       vcpu->arch.regs[VCPU_REGS_R11] = regs->r11;
+       vcpu->arch.regs[VCPU_REGS_R12] = regs->r12;
+       vcpu->arch.regs[VCPU_REGS_R13] = regs->r13;
+       vcpu->arch.regs[VCPU_REGS_R14] = regs->r14;
+       vcpu->arch.regs[VCPU_REGS_R15] = regs->r15;
 #endif
 
-       vcpu->rip = regs->rip;
+       vcpu->arch.rip = regs->rip;
        kvm_x86_ops->set_rflags(vcpu, regs->rflags);
 
        kvm_x86_ops->decache_regs(vcpu);
@@ -2232,12 +2677,12 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
        sregs->gdt.base = dt.base;
 
        kvm_x86_ops->decache_cr4_guest_bits(vcpu);
-       sregs->cr0 = vcpu->cr0;
-       sregs->cr2 = vcpu->cr2;
-       sregs->cr3 = vcpu->cr3;
-       sregs->cr4 = vcpu->cr4;
+       sregs->cr0 = vcpu->arch.cr0;
+       sregs->cr2 = vcpu->arch.cr2;
+       sregs->cr3 = vcpu->arch.cr3;
+       sregs->cr4 = vcpu->arch.cr4;
        sregs->cr8 = get_cr8(vcpu);
-       sregs->efer = vcpu->shadow_efer;
+       sregs->efer = vcpu->arch.shadow_efer;
        sregs->apic_base = kvm_get_apic_base(vcpu);
 
        if (irqchip_in_kernel(vcpu->kvm)) {
@@ -2248,7 +2693,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
                        set_bit(pending_vec,
                                (unsigned long *)sregs->interrupt_bitmap);
        } else
-               memcpy(sregs->interrupt_bitmap, vcpu->irq_pending,
+               memcpy(sregs->interrupt_bitmap, vcpu->arch.irq_pending,
                       sizeof sregs->interrupt_bitmap);
 
        vcpu_put(vcpu);
@@ -2278,13 +2723,13 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
        dt.base = sregs->gdt.base;
        kvm_x86_ops->set_gdt(vcpu, &dt);
 
-       vcpu->cr2 = sregs->cr2;
-       mmu_reset_needed |= vcpu->cr3 != sregs->cr3;
-       vcpu->cr3 = sregs->cr3;
+       vcpu->arch.cr2 = sregs->cr2;
+       mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3;
+       vcpu->arch.cr3 = sregs->cr3;
 
        set_cr8(vcpu, sregs->cr8);
 
-       mmu_reset_needed |= vcpu->shadow_efer != sregs->efer;
+       mmu_reset_needed |= vcpu->arch.shadow_efer != sregs->efer;
 #ifdef CONFIG_X86_64
        kvm_x86_ops->set_efer(vcpu, sregs->efer);
 #endif
@@ -2292,25 +2737,25 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 
        kvm_x86_ops->decache_cr4_guest_bits(vcpu);
 
-       mmu_reset_needed |= vcpu->cr0 != sregs->cr0;
-       vcpu->cr0 = sregs->cr0;
+       mmu_reset_needed |= vcpu->arch.cr0 != sregs->cr0;
+       vcpu->arch.cr0 = sregs->cr0;
        kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
 
-       mmu_reset_needed |= vcpu->cr4 != sregs->cr4;
+       mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4;
        kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
        if (!is_long_mode(vcpu) && is_pae(vcpu))
-               load_pdptrs(vcpu, vcpu->cr3);
+               load_pdptrs(vcpu, vcpu->arch.cr3);
 
        if (mmu_reset_needed)
                kvm_mmu_reset_context(vcpu);
 
        if (!irqchip_in_kernel(vcpu->kvm)) {
-               memcpy(vcpu->irq_pending, sregs->interrupt_bitmap,
-                      sizeof vcpu->irq_pending);
-               vcpu->irq_summary = 0;
-               for (i = 0; i < ARRAY_SIZE(vcpu->irq_pending); ++i)
-                       if (vcpu->irq_pending[i])
-                               __set_bit(i, &vcpu->irq_summary);
+               memcpy(vcpu->arch.irq_pending, sregs->interrupt_bitmap,
+                      sizeof vcpu->arch.irq_pending);
+               vcpu->arch.irq_summary = 0;
+               for (i = 0; i < ARRAY_SIZE(vcpu->arch.irq_pending); ++i)
+                       if (vcpu->arch.irq_pending[i])
+                               __set_bit(i, &vcpu->arch.irq_summary);
        } else {
                max_bits = (sizeof sregs->interrupt_bitmap) << 3;
                pending_vec = find_first_bit(
@@ -2385,7 +2830,7 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 
        vcpu_load(vcpu);
        mutex_lock(&vcpu->kvm->lock);
-       gpa = vcpu->mmu.gva_to_gpa(vcpu, vaddr);
+       gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr);
        tr->physical_address = gpa;
        tr->valid = gpa != UNMAPPED_GVA;
        tr->writeable = 1;
@@ -2398,7 +2843,7 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
-       struct fxsave *fxsave = (struct fxsave *)&vcpu->guest_fx_image;
+       struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image;
 
        vcpu_load(vcpu);
 
@@ -2418,7 +2863,7 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 
 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
-       struct fxsave *fxsave = (struct fxsave *)&vcpu->guest_fx_image;
+       struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image;
 
        vcpu_load(vcpu);
 
@@ -2442,16 +2887,16 @@ void fx_init(struct kvm_vcpu *vcpu)
 
        /* Initialize guest FPU by resetting ours and saving into guest's */
        preempt_disable();
-       fx_save(&vcpu->host_fx_image);
+       fx_save(&vcpu->arch.host_fx_image);
        fpu_init();
-       fx_save(&vcpu->guest_fx_image);
-       fx_restore(&vcpu->host_fx_image);
+       fx_save(&vcpu->arch.guest_fx_image);
+       fx_restore(&vcpu->arch.host_fx_image);
        preempt_enable();
 
-       vcpu->cr0 |= X86_CR0_ET;
+       vcpu->arch.cr0 |= X86_CR0_ET;
        after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space);
-       vcpu->guest_fx_image.mxcsr = 0x1f80;
-       memset((void *)&vcpu->guest_fx_image + after_mxcsr_mask,
+       vcpu->arch.guest_fx_image.mxcsr = 0x1f80;
+       memset((void *)&vcpu->arch.guest_fx_image + after_mxcsr_mask,
               0, sizeof(struct i387_fxsave_struct) - after_mxcsr_mask);
 }
 EXPORT_SYMBOL_GPL(fx_init);
@@ -2462,8 +2907,8 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
                return;
 
        vcpu->guest_fpu_loaded = 1;
-       fx_save(&vcpu->host_fx_image);
-       fx_restore(&vcpu->guest_fx_image);
+       fx_save(&vcpu->arch.host_fx_image);
+       fx_restore(&vcpu->arch.guest_fx_image);
 }
 EXPORT_SYMBOL_GPL(kvm_load_guest_fpu);
 
@@ -2473,8 +2918,8 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
                return;
 
        vcpu->guest_fpu_loaded = 0;
-       fx_save(&vcpu->guest_fx_image);
-       fx_restore(&vcpu->host_fx_image);
+       fx_save(&vcpu->arch.guest_fx_image);
+       fx_restore(&vcpu->arch.host_fx_image);
        ++vcpu->stat.fpu_reload;
 }
 EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
@@ -2487,16 +2932,15 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
                                                unsigned int id)
 {
-       int r;
-       struct kvm_vcpu *vcpu = kvm_x86_ops->vcpu_create(kvm, id);
+       return kvm_x86_ops->vcpu_create(kvm, id);
+}
 
-       if (IS_ERR(vcpu)) {
-               r = -ENOMEM;
-               goto fail;
-       }
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+       int r;
 
        /* We do fxsave: this must be aligned. */
-       BUG_ON((unsigned long)&vcpu->host_fx_image & 0xF);
+       BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF);
 
        vcpu_load(vcpu);
        r = kvm_arch_vcpu_reset(vcpu);
@@ -2506,11 +2950,10 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
        if (r < 0)
                goto free_vcpu;
 
-       return vcpu;
+       return 0;
 free_vcpu:
        kvm_x86_ops->vcpu_free(vcpu);
-fail:
-       return ERR_PTR(r);
+       return r;
 }
 
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
@@ -2561,18 +3004,18 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
        BUG_ON(vcpu->kvm == NULL);
        kvm = vcpu->kvm;
 
-       vcpu->mmu.root_hpa = INVALID_PAGE;
+       vcpu->arch.mmu.root_hpa = INVALID_PAGE;
        if (!irqchip_in_kernel(kvm) || vcpu->vcpu_id == 0)
-               vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
+               vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE;
        else
-               vcpu->mp_state = VCPU_MP_STATE_UNINITIALIZED;
+               vcpu->arch.mp_state = VCPU_MP_STATE_UNINITIALIZED;
 
        page = alloc_page(GFP_KERNEL | __GFP_ZERO);
        if (!page) {
                r = -ENOMEM;
                goto fail;
        }
-       vcpu->pio_data = page_address(page);
+       vcpu->arch.pio_data = page_address(page);
 
        r = kvm_mmu_create(vcpu);
        if (r < 0)
@@ -2589,7 +3032,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 fail_mmu_destroy:
        kvm_mmu_destroy(vcpu);
 fail_free_pio_data:
-       free_page((unsigned long)vcpu->pio_data);
+       free_page((unsigned long)vcpu->arch.pio_data);
 fail:
        return r;
 }
@@ -2598,7 +3041,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
        kvm_free_lapic(vcpu);
        kvm_mmu_destroy(vcpu);
-       free_page((unsigned long)vcpu->pio_data);
+       free_page((unsigned long)vcpu->arch.pio_data);
 }
 
 struct  kvm *kvm_arch_create_vm(void)
@@ -2608,7 +3051,7 @@ struct  kvm *kvm_arch_create_vm(void)
        if (!kvm)
                return ERR_PTR(-ENOMEM);
 
-       INIT_LIST_HEAD(&kvm->active_mmu_pages);
+       INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
 
        return kvm;
 }
@@ -2641,9 +3084,65 @@ static void kvm_free_vcpus(struct kvm *kvm)
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
-       kfree(kvm->vpic);
-       kfree(kvm->vioapic);
+       kfree(kvm->arch.vpic);
+       kfree(kvm->arch.vioapic);
        kvm_free_vcpus(kvm);
        kvm_free_physmem(kvm);
        kfree(kvm);
 }
+
+int kvm_arch_set_memory_region(struct kvm *kvm,
+                               struct kvm_userspace_memory_region *mem,
+                               struct kvm_memory_slot old,
+                               int user_alloc)
+{
+       int npages = mem->memory_size >> PAGE_SHIFT;
+       struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
+
+       /*To keep backward compatibility with older userspace,
+        *x86 needs to hanlde !user_alloc case.
+        */
+       if (!user_alloc) {
+               if (npages && !old.rmap) {
+                       down_write(&current->mm->mmap_sem);
+                       memslot->userspace_addr = do_mmap(NULL, 0,
+                                                    npages * PAGE_SIZE,
+                                                    PROT_READ | PROT_WRITE,
+                                                    MAP_SHARED | MAP_ANONYMOUS,
+                                                    0);
+                       up_write(&current->mm->mmap_sem);
+
+                       if (IS_ERR((void *)memslot->userspace_addr))
+                               return PTR_ERR((void *)memslot->userspace_addr);
+               } else {
+                       if (!old.user_alloc && old.rmap) {
+                               int ret;
+
+                               down_write(&current->mm->mmap_sem);
+                               ret = do_munmap(current->mm, old.userspace_addr,
+                                               old.npages * PAGE_SIZE);
+                               up_write(&current->mm->mmap_sem);
+                               if (ret < 0)
+                                       printk(KERN_WARNING
+                                      "kvm_vm_ioctl_set_memory_region: "
+                                      "failed to munmap memory\n");
+                       }
+               }
+       }
+
+       if (!kvm->arch.n_requested_mmu_pages) {
+               unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
+               kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
+       }
+
+       kvm_mmu_slot_remove_write_access(kvm, mem->slot);
+       kvm_flush_remote_tlbs(kvm);
+
+       return 0;
+}
+
+int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
+{
+       return vcpu->arch.mp_state == VCPU_MP_STATE_RUNNABLE
+              || vcpu->arch.mp_state == VCPU_MP_STATE_SIPI_RECEIVED;
+}