/* * Kernel-based Virtual Machine driver for Linux * * derived from drivers/kvm/kvm_main.c * * Copyright (C) 2006 Qumranet, Inc. * * Authors: * Avi Kivity * Yaniv Kamay * * This work is licensed under the terms of the GNU GPL, version 2. See * the COPYING file in the top-level directory. * */ #include "kvm.h" #include "x86.h" #include "irq.h" #include #include #include #include #define MAX_IO_MSRS 256 /* * List of msr numbers which we expose to userspace through KVM_GET_MSRS * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. * * This list is modified at module load time to reflect the * capabilities of the host cpu. */ static u32 msrs_to_save[] = { MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, MSR_K6_STAR, #ifdef CONFIG_X86_64 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, #endif MSR_IA32_TIME_STAMP_COUNTER, }; static unsigned num_msrs_to_save; static u32 emulated_msrs[] = { MSR_IA32_MISC_ENABLE, }; /* * Adapt set_msr() to msr_io()'s calling convention */ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) { return kvm_set_msr(vcpu, index, *data); } /* * Read or write a bunch of msrs. All parameters are kernel addresses. * * @return number of msrs set successfully. */ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs, struct kvm_msr_entry *entries, int (*do_msr)(struct kvm_vcpu *vcpu, unsigned index, u64 *data)) { int i; vcpu_load(vcpu); for (i = 0; i < msrs->nmsrs; ++i) if (do_msr(vcpu, entries[i].index, &entries[i].data)) break; vcpu_put(vcpu); return i; } /* * Read or write a bunch of msrs. Parameters are user addresses. * * @return number of msrs set successfully. */ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs, int (*do_msr)(struct kvm_vcpu *vcpu, unsigned index, u64 *data), int writeback) { struct kvm_msrs msrs; struct kvm_msr_entry *entries; int r, n; unsigned size; r = -EFAULT; if (copy_from_user(&msrs, user_msrs, sizeof msrs)) goto out; r = -E2BIG; if (msrs.nmsrs >= MAX_IO_MSRS) goto out; r = -ENOMEM; size = sizeof(struct kvm_msr_entry) * msrs.nmsrs; entries = vmalloc(size); if (!entries) goto out; r = -EFAULT; if (copy_from_user(entries, user_msrs->entries, size)) goto out_free; r = n = __msr_io(vcpu, &msrs, entries, do_msr); if (r < 0) goto out_free; r = -EFAULT; if (writeback && copy_to_user(user_msrs->entries, entries, size)) goto out_free; r = n; out_free: vfree(entries); out: return r; } long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { void __user *argp = (void __user *)arg; long r; switch (ioctl) { case KVM_GET_MSR_INDEX_LIST: { struct kvm_msr_list __user *user_msr_list = argp; struct kvm_msr_list msr_list; unsigned n; r = -EFAULT; if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list)) goto out; n = msr_list.nmsrs; msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs); if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list)) goto out; r = -E2BIG; if (n < num_msrs_to_save) goto out; r = -EFAULT; if (copy_to_user(user_msr_list->indices, &msrs_to_save, num_msrs_to_save * sizeof(u32))) goto out; if (copy_to_user(user_msr_list->indices + num_msrs_to_save * sizeof(u32), &emulated_msrs, ARRAY_SIZE(emulated_msrs) * sizeof(u32))) goto out; r = 0; break; } default: r = -EINVAL; } out: return r; } void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { kvm_x86_ops->vcpu_load(vcpu, cpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { kvm_x86_ops->vcpu_put(vcpu); } static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) { u64 efer; int i; struct kvm_cpuid_entry *e, *entry; rdmsrl(MSR_EFER, efer); entry = NULL; for (i = 0; i < vcpu->cpuid_nent; ++i) { e = &vcpu->cpuid_entries[i]; if (e->function == 0x80000001) { entry = e; break; } } if (entry && (entry->edx & (1 << 20)) && !(efer & EFER_NX)) { entry->edx &= ~(1 << 20); printk(KERN_INFO "kvm: guest NX capability removed\n"); } } static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid *cpuid, struct kvm_cpuid_entry __user *entries) { int r; r = -E2BIG; if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) goto out; r = -EFAULT; if (copy_from_user(&vcpu->cpuid_entries, entries, cpuid->nent * sizeof(struct kvm_cpuid_entry))) goto out; vcpu->cpuid_nent = cpuid->nent; cpuid_fix_nx_cap(vcpu); return 0; out: return r; } static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) { vcpu_load(vcpu); memcpy(s->regs, vcpu->apic->regs, sizeof *s); vcpu_put(vcpu); return 0; } static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) { vcpu_load(vcpu); memcpy(vcpu->apic->regs, s->regs, sizeof *s); kvm_apic_post_state_restore(vcpu); vcpu_put(vcpu); return 0; } long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { struct kvm_vcpu *vcpu = filp->private_data; void __user *argp = (void __user *)arg; int r; switch (ioctl) { case KVM_GET_LAPIC: { struct kvm_lapic_state lapic; memset(&lapic, 0, sizeof lapic); r = kvm_vcpu_ioctl_get_lapic(vcpu, &lapic); if (r) goto out; r = -EFAULT; if (copy_to_user(argp, &lapic, sizeof lapic)) goto out; r = 0; break; } case KVM_SET_LAPIC: { struct kvm_lapic_state lapic; r = -EFAULT; if (copy_from_user(&lapic, argp, sizeof lapic)) goto out; r = kvm_vcpu_ioctl_set_lapic(vcpu, &lapic);; if (r) goto out; r = 0; break; } case KVM_SET_CPUID: { struct kvm_cpuid __user *cpuid_arg = argp; struct kvm_cpuid cpuid; r = -EFAULT; if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) goto out; r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries); if (r) goto out; break; } case KVM_GET_MSRS: r = msr_io(vcpu, argp, kvm_get_msr, 1); break; case KVM_SET_MSRS: r = msr_io(vcpu, argp, do_set_msr, 0); break; default: r = -EINVAL; } out: return r; } static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr) { int ret; if (addr > (unsigned int)(-3 * PAGE_SIZE)) return -1; ret = kvm_x86_ops->set_tss_addr(kvm, addr); return ret; } static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, u32 kvm_nr_mmu_pages) { if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES) return -EINVAL; mutex_lock(&kvm->lock); kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); kvm->n_requested_mmu_pages = kvm_nr_mmu_pages; mutex_unlock(&kvm->lock); return 0; } static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm) { return kvm->n_alloc_mmu_pages; } /* * Set a new alias region. Aliases map a portion of physical memory into * another portion. This is useful for memory windows, for example the PC * VGA region. */ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, struct kvm_memory_alias *alias) { int r, n; struct kvm_mem_alias *p; r = -EINVAL; /* General sanity checks */ if (alias->memory_size & (PAGE_SIZE - 1)) goto out; if (alias->guest_phys_addr & (PAGE_SIZE - 1)) goto out; if (alias->slot >= KVM_ALIAS_SLOTS) goto out; if (alias->guest_phys_addr + alias->memory_size < alias->guest_phys_addr) goto out; if (alias->target_phys_addr + alias->memory_size < alias->target_phys_addr) goto out; mutex_lock(&kvm->lock); p = &kvm->aliases[alias->slot]; p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; p->npages = alias->memory_size >> PAGE_SHIFT; p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT; for (n = KVM_ALIAS_SLOTS; n > 0; --n) if (kvm->aliases[n - 1].npages) break; kvm->naliases = n; kvm_mmu_zap_all(kvm); mutex_unlock(&kvm->lock); return 0; out: return r; } static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) { int r; r = 0; switch (chip->chip_id) { case KVM_IRQCHIP_PIC_MASTER: memcpy(&chip->chip.pic, &pic_irqchip(kvm)->pics[0], sizeof(struct kvm_pic_state)); break; case KVM_IRQCHIP_PIC_SLAVE: memcpy(&chip->chip.pic, &pic_irqchip(kvm)->pics[1], sizeof(struct kvm_pic_state)); break; case KVM_IRQCHIP_IOAPIC: memcpy(&chip->chip.ioapic, ioapic_irqchip(kvm), sizeof(struct kvm_ioapic_state)); break; default: r = -EINVAL; break; } return r; } static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) { int r; r = 0; switch (chip->chip_id) { case KVM_IRQCHIP_PIC_MASTER: memcpy(&pic_irqchip(kvm)->pics[0], &chip->chip.pic, sizeof(struct kvm_pic_state)); break; case KVM_IRQCHIP_PIC_SLAVE: memcpy(&pic_irqchip(kvm)->pics[1], &chip->chip.pic, sizeof(struct kvm_pic_state)); break; case KVM_IRQCHIP_IOAPIC: memcpy(ioapic_irqchip(kvm), &chip->chip.ioapic, sizeof(struct kvm_ioapic_state)); break; default: r = -EINVAL; break; } kvm_pic_update_irq(pic_irqchip(kvm)); return r; } long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { struct kvm *kvm = filp->private_data; void __user *argp = (void __user *)arg; int r = -EINVAL; switch (ioctl) { case KVM_SET_TSS_ADDR: r = kvm_vm_ioctl_set_tss_addr(kvm, arg); if (r < 0) goto out; break; case KVM_SET_MEMORY_REGION: { struct kvm_memory_region kvm_mem; struct kvm_userspace_memory_region kvm_userspace_mem; r = -EFAULT; if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem)) goto out; kvm_userspace_mem.slot = kvm_mem.slot; kvm_userspace_mem.flags = kvm_mem.flags; kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr; kvm_userspace_mem.memory_size = kvm_mem.memory_size; r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0); if (r) goto out; break; } case KVM_SET_NR_MMU_PAGES: r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg); if (r) goto out; break; case KVM_GET_NR_MMU_PAGES: r = kvm_vm_ioctl_get_nr_mmu_pages(kvm); break; case KVM_SET_MEMORY_ALIAS: { struct kvm_memory_alias alias; r = -EFAULT; if (copy_from_user(&alias, argp, sizeof alias)) goto out; r = kvm_vm_ioctl_set_memory_alias(kvm, &alias); if (r) goto out; break; } case KVM_CREATE_IRQCHIP: r = -ENOMEM; kvm->vpic = kvm_create_pic(kvm); if (kvm->vpic) { r = kvm_ioapic_init(kvm); if (r) { kfree(kvm->vpic); kvm->vpic = NULL; goto out; } } else goto out; break; case KVM_IRQ_LINE: { struct kvm_irq_level irq_event; r = -EFAULT; if (copy_from_user(&irq_event, argp, sizeof irq_event)) goto out; if (irqchip_in_kernel(kvm)) { mutex_lock(&kvm->lock); if (irq_event.irq < 16) kvm_pic_set_irq(pic_irqchip(kvm), irq_event.irq, irq_event.level); kvm_ioapic_set_irq(kvm->vioapic, irq_event.irq, irq_event.level); mutex_unlock(&kvm->lock); r = 0; } break; } case KVM_GET_IRQCHIP: { /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ struct kvm_irqchip chip; r = -EFAULT; if (copy_from_user(&chip, argp, sizeof chip)) goto out; r = -ENXIO; if (!irqchip_in_kernel(kvm)) goto out; r = kvm_vm_ioctl_get_irqchip(kvm, &chip); if (r) goto out; r = -EFAULT; if (copy_to_user(argp, &chip, sizeof chip)) goto out; r = 0; break; } case KVM_SET_IRQCHIP: { /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ struct kvm_irqchip chip; r = -EFAULT; if (copy_from_user(&chip, argp, sizeof chip)) goto out; r = -ENXIO; if (!irqchip_in_kernel(kvm)) goto out; r = kvm_vm_ioctl_set_irqchip(kvm, &chip); if (r) goto out; r = 0; break; } default: ; } out: return r; } static __init void kvm_init_msr_list(void) { u32 dummy[2]; unsigned i, j; for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) { if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0) continue; if (j < i) msrs_to_save[j] = msrs_to_save[i]; j++; } num_msrs_to_save = j; } __init void kvm_arch_init(void) { kvm_init_msr_list(); }