2 * Kernel-based Virtual Machine driver for Linux
4 * derived from drivers/kvm/kvm_main.c
6 * Copyright (C) 2006 Qumranet, Inc.
9 * Avi Kivity <avi@qumranet.com>
10 * Yaniv Kamay <yaniv@qumranet.com>
12 * This work is licensed under the terms of the GNU GPL, version 2. See
13 * the COPYING file in the top-level directory.
19 #include "segment_descriptor.h"
22 #include <linux/kvm.h>
24 #include <linux/vmalloc.h>
25 #include <linux/module.h>
27 #include <asm/uaccess.h>
29 #define MAX_IO_MSRS 256
31 unsigned long segment_base(u16 selector)
33 struct descriptor_table gdt;
34 struct segment_descriptor *d;
35 unsigned long table_base;
41 asm("sgdt %0" : "=m"(gdt));
42 table_base = gdt.base;
44 if (selector & 4) { /* from ldt */
47 asm("sldt %0" : "=g"(ldt_selector));
48 table_base = segment_base(ldt_selector);
50 d = (struct segment_descriptor *)(table_base + (selector & ~7));
51 v = d->base_low | ((unsigned long)d->base_mid << 16) |
52 ((unsigned long)d->base_high << 24);
54 if (d->system == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
55 v |= ((unsigned long) \
56 ((struct segment_descriptor_64 *)d)->base_higher) << 32;
60 EXPORT_SYMBOL_GPL(segment_base);
63 * List of msr numbers which we expose to userspace through KVM_GET_MSRS
64 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
66 * This list is modified at module load time to reflect the
67 * capabilities of the host cpu.
69 static u32 msrs_to_save[] = {
70 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
73 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
75 MSR_IA32_TIME_STAMP_COUNTER,
78 static unsigned num_msrs_to_save;
80 static u32 emulated_msrs[] = {
85 * Adapt set_msr() to msr_io()'s calling convention
87 static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
89 return kvm_set_msr(vcpu, index, *data);
93 * Read or write a bunch of msrs. All parameters are kernel addresses.
95 * @return number of msrs set successfully.
97 static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
98 struct kvm_msr_entry *entries,
99 int (*do_msr)(struct kvm_vcpu *vcpu,
100 unsigned index, u64 *data))
106 for (i = 0; i < msrs->nmsrs; ++i)
107 if (do_msr(vcpu, entries[i].index, &entries[i].data))
116 * Read or write a bunch of msrs. Parameters are user addresses.
118 * @return number of msrs set successfully.
120 static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
121 int (*do_msr)(struct kvm_vcpu *vcpu,
122 unsigned index, u64 *data),
125 struct kvm_msrs msrs;
126 struct kvm_msr_entry *entries;
131 if (copy_from_user(&msrs, user_msrs, sizeof msrs))
135 if (msrs.nmsrs >= MAX_IO_MSRS)
139 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
140 entries = vmalloc(size);
145 if (copy_from_user(entries, user_msrs->entries, size))
148 r = n = __msr_io(vcpu, &msrs, entries, do_msr);
153 if (writeback && copy_to_user(user_msrs->entries, entries, size))
164 long kvm_arch_dev_ioctl(struct file *filp,
165 unsigned int ioctl, unsigned long arg)
167 void __user *argp = (void __user *)arg;
171 case KVM_GET_MSR_INDEX_LIST: {
172 struct kvm_msr_list __user *user_msr_list = argp;
173 struct kvm_msr_list msr_list;
177 if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
180 msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
181 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
184 if (n < num_msrs_to_save)
187 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
188 num_msrs_to_save * sizeof(u32)))
190 if (copy_to_user(user_msr_list->indices
191 + num_msrs_to_save * sizeof(u32),
193 ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
205 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
207 kvm_x86_ops->vcpu_load(vcpu, cpu);
210 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
212 kvm_x86_ops->vcpu_put(vcpu);
215 static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
219 struct kvm_cpuid_entry *e, *entry;
221 rdmsrl(MSR_EFER, efer);
223 for (i = 0; i < vcpu->cpuid_nent; ++i) {
224 e = &vcpu->cpuid_entries[i];
225 if (e->function == 0x80000001) {
230 if (entry && (entry->edx & (1 << 20)) && !(efer & EFER_NX)) {
231 entry->edx &= ~(1 << 20);
232 printk(KERN_INFO "kvm: guest NX capability removed\n");
236 static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
237 struct kvm_cpuid *cpuid,
238 struct kvm_cpuid_entry __user *entries)
243 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
246 if (copy_from_user(&vcpu->cpuid_entries, entries,
247 cpuid->nent * sizeof(struct kvm_cpuid_entry)))
249 vcpu->cpuid_nent = cpuid->nent;
250 cpuid_fix_nx_cap(vcpu);
257 static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
258 struct kvm_lapic_state *s)
261 memcpy(s->regs, vcpu->apic->regs, sizeof *s);
267 static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
268 struct kvm_lapic_state *s)
271 memcpy(vcpu->apic->regs, s->regs, sizeof *s);
272 kvm_apic_post_state_restore(vcpu);
278 long kvm_arch_vcpu_ioctl(struct file *filp,
279 unsigned int ioctl, unsigned long arg)
281 struct kvm_vcpu *vcpu = filp->private_data;
282 void __user *argp = (void __user *)arg;
286 case KVM_GET_LAPIC: {
287 struct kvm_lapic_state lapic;
289 memset(&lapic, 0, sizeof lapic);
290 r = kvm_vcpu_ioctl_get_lapic(vcpu, &lapic);
294 if (copy_to_user(argp, &lapic, sizeof lapic))
299 case KVM_SET_LAPIC: {
300 struct kvm_lapic_state lapic;
303 if (copy_from_user(&lapic, argp, sizeof lapic))
305 r = kvm_vcpu_ioctl_set_lapic(vcpu, &lapic);;
311 case KVM_SET_CPUID: {
312 struct kvm_cpuid __user *cpuid_arg = argp;
313 struct kvm_cpuid cpuid;
316 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
318 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
324 r = msr_io(vcpu, argp, kvm_get_msr, 1);
327 r = msr_io(vcpu, argp, do_set_msr, 0);
336 static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
340 if (addr > (unsigned int)(-3 * PAGE_SIZE))
342 ret = kvm_x86_ops->set_tss_addr(kvm, addr);
346 static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
347 u32 kvm_nr_mmu_pages)
349 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
352 mutex_lock(&kvm->lock);
354 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
355 kvm->n_requested_mmu_pages = kvm_nr_mmu_pages;
357 mutex_unlock(&kvm->lock);
361 static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
363 return kvm->n_alloc_mmu_pages;
367 * Set a new alias region. Aliases map a portion of physical memory into
368 * another portion. This is useful for memory windows, for example the PC
371 static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
372 struct kvm_memory_alias *alias)
375 struct kvm_mem_alias *p;
378 /* General sanity checks */
379 if (alias->memory_size & (PAGE_SIZE - 1))
381 if (alias->guest_phys_addr & (PAGE_SIZE - 1))
383 if (alias->slot >= KVM_ALIAS_SLOTS)
385 if (alias->guest_phys_addr + alias->memory_size
386 < alias->guest_phys_addr)
388 if (alias->target_phys_addr + alias->memory_size
389 < alias->target_phys_addr)
392 mutex_lock(&kvm->lock);
394 p = &kvm->aliases[alias->slot];
395 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
396 p->npages = alias->memory_size >> PAGE_SHIFT;
397 p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
399 for (n = KVM_ALIAS_SLOTS; n > 0; --n)
400 if (kvm->aliases[n - 1].npages)
404 kvm_mmu_zap_all(kvm);
406 mutex_unlock(&kvm->lock);
414 static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
419 switch (chip->chip_id) {
420 case KVM_IRQCHIP_PIC_MASTER:
421 memcpy(&chip->chip.pic,
422 &pic_irqchip(kvm)->pics[0],
423 sizeof(struct kvm_pic_state));
425 case KVM_IRQCHIP_PIC_SLAVE:
426 memcpy(&chip->chip.pic,
427 &pic_irqchip(kvm)->pics[1],
428 sizeof(struct kvm_pic_state));
430 case KVM_IRQCHIP_IOAPIC:
431 memcpy(&chip->chip.ioapic,
433 sizeof(struct kvm_ioapic_state));
442 static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
447 switch (chip->chip_id) {
448 case KVM_IRQCHIP_PIC_MASTER:
449 memcpy(&pic_irqchip(kvm)->pics[0],
451 sizeof(struct kvm_pic_state));
453 case KVM_IRQCHIP_PIC_SLAVE:
454 memcpy(&pic_irqchip(kvm)->pics[1],
456 sizeof(struct kvm_pic_state));
458 case KVM_IRQCHIP_IOAPIC:
459 memcpy(ioapic_irqchip(kvm),
461 sizeof(struct kvm_ioapic_state));
467 kvm_pic_update_irq(pic_irqchip(kvm));
471 long kvm_arch_vm_ioctl(struct file *filp,
472 unsigned int ioctl, unsigned long arg)
474 struct kvm *kvm = filp->private_data;
475 void __user *argp = (void __user *)arg;
479 case KVM_SET_TSS_ADDR:
480 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
484 case KVM_SET_MEMORY_REGION: {
485 struct kvm_memory_region kvm_mem;
486 struct kvm_userspace_memory_region kvm_userspace_mem;
489 if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
491 kvm_userspace_mem.slot = kvm_mem.slot;
492 kvm_userspace_mem.flags = kvm_mem.flags;
493 kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr;
494 kvm_userspace_mem.memory_size = kvm_mem.memory_size;
495 r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0);
500 case KVM_SET_NR_MMU_PAGES:
501 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
505 case KVM_GET_NR_MMU_PAGES:
506 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
508 case KVM_SET_MEMORY_ALIAS: {
509 struct kvm_memory_alias alias;
512 if (copy_from_user(&alias, argp, sizeof alias))
514 r = kvm_vm_ioctl_set_memory_alias(kvm, &alias);
519 case KVM_CREATE_IRQCHIP:
521 kvm->vpic = kvm_create_pic(kvm);
523 r = kvm_ioapic_init(kvm);
533 struct kvm_irq_level irq_event;
536 if (copy_from_user(&irq_event, argp, sizeof irq_event))
538 if (irqchip_in_kernel(kvm)) {
539 mutex_lock(&kvm->lock);
540 if (irq_event.irq < 16)
541 kvm_pic_set_irq(pic_irqchip(kvm),
544 kvm_ioapic_set_irq(kvm->vioapic,
547 mutex_unlock(&kvm->lock);
552 case KVM_GET_IRQCHIP: {
553 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
554 struct kvm_irqchip chip;
557 if (copy_from_user(&chip, argp, sizeof chip))
560 if (!irqchip_in_kernel(kvm))
562 r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
566 if (copy_to_user(argp, &chip, sizeof chip))
571 case KVM_SET_IRQCHIP: {
572 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
573 struct kvm_irqchip chip;
576 if (copy_from_user(&chip, argp, sizeof chip))
579 if (!irqchip_in_kernel(kvm))
581 r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
594 static __init void kvm_init_msr_list(void)
599 for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
600 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
603 msrs_to_save[j] = msrs_to_save[i];
606 num_msrs_to_save = j;
609 __init void kvm_arch_init(void)