]> pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - arch/x86/kvm/x86.c
KVM: Implement dummy values for MSR_PERF_STATUS
[linux-2.6-omap-h63xx.git] / arch / x86 / kvm / x86.c
index 6b01552bd1f1cb91c64af4fdf9f3124e734d4d19..0dd038e7392b9390aa5029c70645c2196651517c 100644 (file)
  */
 
 #include <linux/kvm_host.h>
-#include "segment_descriptor.h"
 #include "irq.h"
 #include "mmu.h"
 
+#include <linux/clocksource.h>
 #include <linux/kvm.h>
 #include <linux/fs.h>
 #include <linux/vmalloc.h>
@@ -28,6 +28,7 @@
 
 #include <asm/uaccess.h>
 #include <asm/msr.h>
+#include <asm/desc.h>
 
 #define MAX_IO_MSRS 256
 #define CR0_RESERVED_BITS                                              \
                          | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
 
 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
-#define EFER_RESERVED_BITS 0xfffffffffffff2fe
+/* EFER defaults:
+ * - enable syscall per default because its emulated by KVM
+ * - enable LME and LMA per default on 64 bit KVM
+ */
+#ifdef CONFIG_X86_64
+static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffafeULL;
+#else
+static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL;
+#endif
 
 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
@@ -63,6 +72,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        { "irq_window", VCPU_STAT(irq_window_exits) },
        { "halt_exits", VCPU_STAT(halt_exits) },
        { "halt_wakeup", VCPU_STAT(halt_wakeup) },
+       { "hypercalls", VCPU_STAT(hypercalls) },
        { "request_irq", VCPU_STAT(request_irq_exits) },
        { "irq_exits", VCPU_STAT(irq_exits) },
        { "host_state_reload", VCPU_STAT(host_state_reload) },
@@ -85,7 +95,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 unsigned long segment_base(u16 selector)
 {
        struct descriptor_table gdt;
-       struct segment_descriptor *d;
+       struct desc_struct *d;
        unsigned long table_base;
        unsigned long v;
 
@@ -101,13 +111,12 @@ unsigned long segment_base(u16 selector)
                asm("sldt %0" : "=g"(ldt_selector));
                table_base = segment_base(ldt_selector);
        }
-       d = (struct segment_descriptor *)(table_base + (selector & ~7));
-       v = d->base_low | ((unsigned long)d->base_mid << 16) |
-               ((unsigned long)d->base_high << 24);
+       d = (struct desc_struct *)(table_base + (selector & ~7));
+       v = d->base0 | ((unsigned long)d->base1 << 16) |
+               ((unsigned long)d->base2 << 24);
 #ifdef CONFIG_X86_64
-       if (d->system == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
-               v |= ((unsigned long) \
-                     ((struct segment_descriptor_64 *)d)->base_higher) << 32;
+       if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
+               v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32;
 #endif
        return v;
 }
@@ -205,6 +214,7 @@ out:
 
        return ret;
 }
+EXPORT_SYMBOL_GPL(load_pdptrs);
 
 static bool pdptrs_changed(struct kvm_vcpu *vcpu)
 {
@@ -415,7 +425,8 @@ static u32 msrs_to_save[] = {
 #ifdef CONFIG_X86_64
        MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
 #endif
-       MSR_IA32_TIME_STAMP_COUNTER,
+       MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
+       MSR_IA32_PERF_STATUS,
 };
 
 static unsigned num_msrs_to_save;
@@ -424,11 +435,9 @@ static u32 emulated_msrs[] = {
        MSR_IA32_MISC_ENABLE,
 };
 
-#ifdef CONFIG_X86_64
-
 static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
 {
-       if (efer & EFER_RESERVED_BITS) {
+       if (efer & efer_reserved_bits) {
                printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n",
                       efer);
                kvm_inject_gp(vcpu, 0);
@@ -450,7 +459,12 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
        vcpu->arch.shadow_efer = efer;
 }
 
-#endif
+void kvm_enable_efer_bits(u64 mask)
+{
+       efer_reserved_bits &= ~mask;
+}
+EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
+
 
 /*
  * Writes msr value into into the appropriate "register".
@@ -470,15 +484,77 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
        return kvm_set_msr(vcpu, index, *data);
 }
 
+static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
+{
+       static int version;
+       struct kvm_wall_clock wc;
+       struct timespec wc_ts;
+
+       if (!wall_clock)
+               return;
+
+       version++;
+
+       down_read(&kvm->slots_lock);
+       kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
+
+       wc_ts = current_kernel_time();
+       wc.wc_sec = wc_ts.tv_sec;
+       wc.wc_nsec = wc_ts.tv_nsec;
+       wc.wc_version = version;
+
+       kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
+
+       version++;
+       kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
+       up_read(&kvm->slots_lock);
+}
+
+static void kvm_write_guest_time(struct kvm_vcpu *v)
+{
+       struct timespec ts;
+       unsigned long flags;
+       struct kvm_vcpu_arch *vcpu = &v->arch;
+       void *shared_kaddr;
+
+       if ((!vcpu->time_page))
+               return;
+
+       /* Keep irq disabled to prevent changes to the clock */
+       local_irq_save(flags);
+       kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER,
+                         &vcpu->hv_clock.tsc_timestamp);
+       ktime_get_ts(&ts);
+       local_irq_restore(flags);
+
+       /* With all the info we got, fill in the values */
+
+       vcpu->hv_clock.system_time = ts.tv_nsec +
+                                    (NSEC_PER_SEC * (u64)ts.tv_sec);
+       /*
+        * The interface expects us to write an even number signaling that the
+        * update is finished. Since the guest won't see the intermediate
+        * state, we just write "2" at the end
+        */
+       vcpu->hv_clock.version = 2;
+
+       shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0);
+
+       memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
+               sizeof(vcpu->hv_clock));
+
+       kunmap_atomic(shared_kaddr, KM_USER0);
+
+       mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
+}
+
 
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
        switch (msr) {
-#ifdef CONFIG_X86_64
        case MSR_EFER:
                set_efer(vcpu, data);
                break;
-#endif
        case MSR_IA32_MC0_STATUS:
                pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n",
                       __FUNCTION__, data);
@@ -501,6 +577,44 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
        case MSR_IA32_MISC_ENABLE:
                vcpu->arch.ia32_misc_enable_msr = data;
                break;
+       case MSR_KVM_WALL_CLOCK:
+               vcpu->kvm->arch.wall_clock = data;
+               kvm_write_wall_clock(vcpu->kvm, data);
+               break;
+       case MSR_KVM_SYSTEM_TIME: {
+               if (vcpu->arch.time_page) {
+                       kvm_release_page_dirty(vcpu->arch.time_page);
+                       vcpu->arch.time_page = NULL;
+               }
+
+               vcpu->arch.time = data;
+
+               /* we verify if the enable bit is set... */
+               if (!(data & 1))
+                       break;
+
+               /* ...but clean it before doing the actual write */
+               vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
+
+               vcpu->arch.hv_clock.tsc_to_system_mul =
+                                       clocksource_khz2mult(tsc_khz, 22);
+               vcpu->arch.hv_clock.tsc_shift = 22;
+
+               down_read(&current->mm->mmap_sem);
+               down_read(&vcpu->kvm->slots_lock);
+               vcpu->arch.time_page =
+                               gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
+               up_read(&vcpu->kvm->slots_lock);
+               up_read(&current->mm->mmap_sem);
+
+               if (is_error_page(vcpu->arch.time_page)) {
+                       kvm_release_page_clean(vcpu->arch.time_page);
+                       vcpu->arch.time_page = NULL;
+               }
+
+               kvm_write_guest_time(vcpu);
+               break;
+       }
        default:
                pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data);
                return 1;
@@ -540,7 +654,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
        case MSR_IA32_MC0_MISC+12:
        case MSR_IA32_MC0_MISC+16:
        case MSR_IA32_UCODE_REV:
-       case MSR_IA32_PERF_STATUS:
        case MSR_IA32_EBL_CR_POWERON:
                /* MTRR registers */
        case 0xfe:
@@ -556,11 +669,21 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
        case MSR_IA32_MISC_ENABLE:
                data = vcpu->arch.ia32_misc_enable_msr;
                break;
-#ifdef CONFIG_X86_64
+       case MSR_IA32_PERF_STATUS:
+               /* TSC increment by tick */
+               data = 1000ULL;
+               /* CPU multiplier */
+               data |= (((uint64_t)4ULL) << 40);
+               break;
        case MSR_EFER:
                data = vcpu->arch.shadow_efer;
                break;
-#endif
+       case MSR_KVM_WALL_CLOCK:
+               data = vcpu->kvm->arch.wall_clock;
+               break;
+       case MSR_KVM_SYSTEM_TIME:
+               data = vcpu->arch.time;
+               break;
        default:
                pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
                return 1;
@@ -688,11 +811,18 @@ int kvm_dev_ioctl_check_extension(long ext)
        case KVM_CAP_USER_MEMORY:
        case KVM_CAP_SET_TSS_ADDR:
        case KVM_CAP_EXT_CPUID:
+       case KVM_CAP_CLOCKSOURCE:
                r = 1;
                break;
        case KVM_CAP_VAPIC:
                r = !kvm_x86_ops->cpu_has_accelerated_tpr();
                break;
+       case KVM_CAP_NR_VCPUS:
+               r = KVM_MAX_VCPUS;
+               break;
+       case KVM_CAP_NR_MEMSLOTS:
+               r = KVM_MEMORY_SLOTS;
+               break;
        default:
                r = 0;
                break;
@@ -763,6 +893,7 @@ out:
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
        kvm_x86_ops->vcpu_load(vcpu, cpu);
+       kvm_write_guest_time(vcpu);
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -958,32 +1089,32 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
        }
        /* function 4 and 0xb have additional index. */
        case 4: {
-               int index, cache_type;
+               int i, cache_type;
 
                entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
                /* read more entries until cache_type is zero */
-               for (index = 1; *nent < maxnent; ++index) {
-                       cache_type = entry[index - 1].eax & 0x1f;
+               for (i = 1; *nent < maxnent; ++i) {
+                       cache_type = entry[i - 1].eax & 0x1f;
                        if (!cache_type)
                                break;
-                       do_cpuid_1_ent(&entry[index], function, index);
-                       entry[index].flags |=
+                       do_cpuid_1_ent(&entry[i], function, i);
+                       entry[i].flags |=
                               KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
                        ++*nent;
                }
                break;
        }
        case 0xb: {
-               int index, level_type;
+               int i, level_type;
 
                entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
                /* read more entries until level_type is zero */
-               for (index = 1; *nent < maxnent; ++index) {
-                       level_type = entry[index - 1].ecx & 0xff;
+               for (i = 1; *nent < maxnent; ++i) {
+                       level_type = entry[i - 1].ecx & 0xff;
                        if (!level_type)
                                break;
-                       do_cpuid_1_ent(&entry[index], function, index);
-                       entry[index].flags |=
+                       do_cpuid_1_ent(&entry[i], function, i);
+                       entry[i].flags |=
                               KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
                        ++*nent;
                }
@@ -1840,7 +1971,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
 }
 EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
 
-struct x86_emulate_ops emulate_ops = {
+static struct x86_emulate_ops emulate_ops = {
        .read_std            = emulator_read_std,
        .read_emulated       = emulator_read_emulated,
        .write_emulated      = emulator_write_emulated,
@@ -2281,6 +2412,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
        }
        vcpu->arch.regs[VCPU_REGS_RAX] = ret;
        kvm_x86_ops->decache_regs(vcpu);
+       ++vcpu->stat.hypercalls;
        return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
@@ -2773,7 +2905,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 static void get_segment(struct kvm_vcpu *vcpu,
                        struct kvm_segment *var, int seg)
 {
-       return kvm_x86_ops->get_segment(vcpu, var, seg);
+       kvm_x86_ops->get_segment(vcpu, var, seg);
 }
 
 void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
@@ -2839,7 +2971,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 static void set_segment(struct kvm_vcpu *vcpu,
                        struct kvm_segment *var, int seg)
 {
-       return kvm_x86_ops->set_segment(vcpu, var, seg);
+       kvm_x86_ops->set_segment(vcpu, var, seg);
 }
 
 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
@@ -2865,9 +2997,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
        set_cr8(vcpu, sregs->cr8);
 
        mmu_reset_needed |= vcpu->arch.shadow_efer != sregs->efer;
-#ifdef CONFIG_X86_64
        kvm_x86_ops->set_efer(vcpu, sregs->efer);
-#endif
        kvm_set_apic_base(vcpu, sregs->apic_base);
 
        kvm_x86_ops->decache_cr4_guest_bits(vcpu);