Merge branch 'kvm-updates/2.6.30' of git://git.kernel.org/pub/scm/virt/kvm/kvm

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 26 Mar 2009 22:47:52 +0000 (15:47 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 26 Mar 2009 22:47:52 +0000 (15:47 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 26 Mar 2009 22:47:52 +0000 (15:47 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 26 Mar 2009 22:47:52 +0000 (15:47 -0700)
diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h

index bfa86b6af7cd0774d652c99d800e822284f74c23..0ee5bd7a988f101c85c3f839b7f1fcc3ae57be53 100644 (file)
--- a/arch/ia64/include/asm/kvm.h
+++ b/arch/ia64/include/asm/kvm.h
@@ -166,7 +166,40 @@ struct saved_vpd {
         unsigned long  vcpuid[5];
         unsigned long  vpsr;
         unsigned long  vpr;
-       unsigned long  vcr[128];
+       union {
+               unsigned long  vcr[128];
+               struct {
+                       unsigned long dcr;
+                       unsigned long itm;
+                       unsigned long iva;
+                       unsigned long rsv1[5];
+                       unsigned long pta;
+                       unsigned long rsv2[7];
+                       unsigned long ipsr;
+                       unsigned long isr;
+                       unsigned long rsv3;
+                       unsigned long iip;
+                       unsigned long ifa;
+                       unsigned long itir;
+                       unsigned long iipa;
+                       unsigned long ifs;
+                       unsigned long iim;
+                       unsigned long iha;
+                       unsigned long rsv4[38];
+                       unsigned long lid;
+                       unsigned long ivr;
+                       unsigned long tpr;
+                       unsigned long eoi;
+                       unsigned long irr[4];
+                       unsigned long itv;
+                       unsigned long pmv;
+                       unsigned long cmcv;
+                       unsigned long rsv5[5];
+                       unsigned long lrr0;
+                       unsigned long lrr1;
+                       unsigned long rsv6[46];
+               };
+       };
  };
  
  struct kvm_regs {
@@ -214,4 +247,18 @@ struct kvm_sregs {
  struct kvm_fpu {
  };
  
+#define KVM_IA64_VCPU_STACK_SHIFT      16
+#define KVM_IA64_VCPU_STACK_SIZE       (1UL << KVM_IA64_VCPU_STACK_SHIFT)
+
+struct kvm_ia64_vcpu_stack {
+       unsigned char stack[KVM_IA64_VCPU_STACK_SIZE];
+};
+
+struct kvm_debug_exit_arch {
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+};
+
  #endif
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h

index 348663661659907320aca0889863d6f6867ed0f1..4542651e6acb1e82f3f4665ebca0705a9e6161aa 100644 (file)
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -112,7 +112,11 @@
  #define VCPU_STRUCT_SHIFT      16
  #define VCPU_STRUCT_SIZE       (__IA64_UL_CONST(1) << VCPU_STRUCT_SHIFT)
  
-#define KVM_STK_OFFSET         VCPU_STRUCT_SIZE
+/*
+ * This must match KVM_IA64_VCPU_STACK_{SHIFT,SIZE} arch/ia64/include/asm/kvm.h
+ */
+#define KVM_STK_SHIFT          16
+#define KVM_STK_OFFSET         (__IA64_UL_CONST(1)<< KVM_STK_SHIFT)
  
  #define KVM_VM_STRUCT_SHIFT    19
  #define KVM_VM_STRUCT_SIZE     (__IA64_UL_CONST(1) << KVM_VM_STRUCT_SHIFT)
@@ -153,10 +157,10 @@ struct kvm_vm_data {
         struct kvm_vcpu_data vcpu_data[KVM_MAX_VCPUS];
  };
  
-#define VCPU_BASE(n)   KVM_VM_DATA_BASE + \
-                               offsetof(struct kvm_vm_data, vcpu_data[n])
-#define VM_BASE                KVM_VM_DATA_BASE + \
-                               offsetof(struct kvm_vm_data, kvm_vm_struct)
+#define VCPU_BASE(n)   (KVM_VM_DATA_BASE + \
+                               offsetof(struct kvm_vm_data, vcpu_data[n]))
+#define KVM_VM_BASE    (KVM_VM_DATA_BASE + \
+                               offsetof(struct kvm_vm_data, kvm_vm_struct))
  #define KVM_MEM_DIRTY_LOG_BASE KVM_VM_DATA_BASE + \
                                 offsetof(struct kvm_vm_data, kvm_mem_dirty_log)
  
@@ -235,8 +239,6 @@ struct kvm_vm_data {
  
  struct kvm;
  struct kvm_vcpu;
-struct kvm_guest_debug{
-};
  
  struct kvm_mmio_req {
         uint64_t addr;          /*  physical address            */
@@ -462,6 +464,8 @@ struct kvm_arch {
         unsigned long   metaphysical_rr4;
         unsigned long   vmm_init_rr;
  
+       int             online_vcpus;
+
         struct kvm_ioapic *vioapic;
         struct kvm_vm_stat stat;
         struct kvm_sal_data rdv_sal_data;
diff --git a/arch/ia64/include/asm/msidef.h b/arch/ia64/include/asm/msidef.h

new file mode 100644 (file)

index 0000000..592c104
--- /dev/null
+++ b/arch/ia64/include/asm/msidef.h
@@ -0,0 +1,42 @@
+#ifndef _IA64_MSI_DEF_H
+#define _IA64_MSI_DEF_H
+
+/*
+ * Shifts for APIC-based data
+ */
+
+#define     MSI_DATA_VECTOR_SHIFT      0
+#define            MSI_DATA_VECTOR(v)          (((u8)v) << MSI_DATA_VECTOR_SHIFT)
+#define     MSI_DATA_VECTOR_MASK       0xffffff00
+
+#define     MSI_DATA_DELIVERY_MODE_SHIFT       8
+#define     MSI_DATA_DELIVERY_FIXED    (0 << MSI_DATA_DELIVERY_MODE_SHIFT)
+#define     MSI_DATA_DELIVERY_LOWPRI   (1 << MSI_DATA_DELIVERY_MODE_SHIFT)
+
+#define     MSI_DATA_LEVEL_SHIFT       14
+#define     MSI_DATA_LEVEL_DEASSERT    (0 << MSI_DATA_LEVEL_SHIFT)
+#define     MSI_DATA_LEVEL_ASSERT      (1 << MSI_DATA_LEVEL_SHIFT)
+
+#define     MSI_DATA_TRIGGER_SHIFT     15
+#define     MSI_DATA_TRIGGER_EDGE      (0 << MSI_DATA_TRIGGER_SHIFT)
+#define     MSI_DATA_TRIGGER_LEVEL     (1 << MSI_DATA_TRIGGER_SHIFT)
+
+/*
+ * Shift/mask fields for APIC-based bus address
+ */
+
+#define     MSI_ADDR_DEST_ID_SHIFT     4
+#define     MSI_ADDR_HEADER            0xfee00000
+
+#define     MSI_ADDR_DEST_ID_MASK      0xfff0000f
+#define     MSI_ADDR_DEST_ID_CPU(cpu)  ((cpu) << MSI_ADDR_DEST_ID_SHIFT)
+
+#define     MSI_ADDR_DEST_MODE_SHIFT   2
+#define     MSI_ADDR_DEST_MODE_PHYS    (0 << MSI_ADDR_DEST_MODE_SHIFT)
+#define            MSI_ADDR_DEST_MODE_LOGIC    (1 << MSI_ADDR_DEST_MODE_SHIFT)
+
+#define     MSI_ADDR_REDIRECTION_SHIFT 3
+#define     MSI_ADDR_REDIRECTION_CPU   (0 << MSI_ADDR_REDIRECTION_SHIFT)
+#define     MSI_ADDR_REDIRECTION_LOWPRI        (1 << MSI_ADDR_REDIRECTION_SHIFT)
+
+#endif/* _IA64_MSI_DEF_H */
diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c

index 890339339035b74814721c799a024fa6d184438a..368ee4e5266d43e9b23e2fe18bdd2199f7676bed 100644 (file)
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -7,44 +7,7 @@
  #include <linux/msi.h>
  #include <linux/dmar.h>
  #include <asm/smp.h>
-
-/*
- * Shifts for APIC-based data
- */
-
-#define MSI_DATA_VECTOR_SHIFT          0
-#define            MSI_DATA_VECTOR(v)          (((u8)v) << MSI_DATA_VECTOR_SHIFT)
-#define MSI_DATA_VECTOR_MASK           0xffffff00
-
-#define MSI_DATA_DELIVERY_SHIFT                8
-#define     MSI_DATA_DELIVERY_FIXED    (0 << MSI_DATA_DELIVERY_SHIFT)
-#define     MSI_DATA_DELIVERY_LOWPRI   (1 << MSI_DATA_DELIVERY_SHIFT)
-
-#define MSI_DATA_LEVEL_SHIFT           14
-#define     MSI_DATA_LEVEL_DEASSERT    (0 << MSI_DATA_LEVEL_SHIFT)
-#define     MSI_DATA_LEVEL_ASSERT      (1 << MSI_DATA_LEVEL_SHIFT)
-
-#define MSI_DATA_TRIGGER_SHIFT         15
-#define     MSI_DATA_TRIGGER_EDGE      (0 << MSI_DATA_TRIGGER_SHIFT)
-#define     MSI_DATA_TRIGGER_LEVEL     (1 << MSI_DATA_TRIGGER_SHIFT)
-
-/*
- * Shift/mask fields for APIC-based bus address
- */
-
-#define MSI_TARGET_CPU_SHIFT           4
-#define MSI_ADDR_HEADER                        0xfee00000
-
-#define MSI_ADDR_DESTID_MASK           0xfff0000f
-#define     MSI_ADDR_DESTID_CPU(cpu)   ((cpu) << MSI_TARGET_CPU_SHIFT)
-
-#define MSI_ADDR_DESTMODE_SHIFT                2
-#define     MSI_ADDR_DESTMODE_PHYS     (0 << MSI_ADDR_DESTMODE_SHIFT)
-#define            MSI_ADDR_DESTMODE_LOGIC     (1 << MSI_ADDR_DESTMODE_SHIFT)
-
-#define MSI_ADDR_REDIRECTION_SHIFT     3
-#define     MSI_ADDR_REDIRECTION_CPU   (0 << MSI_ADDR_REDIRECTION_SHIFT)
-#define     MSI_ADDR_REDIRECTION_LOWPRI        (1 << MSI_ADDR_REDIRECTION_SHIFT)
+#include <asm/msidef.h>
  
  static struct irq_chip ia64_msi_chip;
  
@@ -65,8 +28,8 @@ static void ia64_set_msi_irq_affinity(unsigned int irq,
         read_msi_msg(irq, &msg);
  
         addr = msg.address_lo;
-       addr &= MSI_ADDR_DESTID_MASK;
-       addr |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu));
+       addr &= MSI_ADDR_DEST_ID_MASK;
+       addr |= MSI_ADDR_DEST_ID_CPU(cpu_physical_id(cpu));
         msg.address_lo = addr;
  
         data = msg.data;
@@ -98,9 +61,9 @@ int ia64_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
         msg.address_hi = 0;
         msg.address_lo =
                 MSI_ADDR_HEADER |
-               MSI_ADDR_DESTMODE_PHYS |
+               MSI_ADDR_DEST_MODE_PHYS |
                 MSI_ADDR_REDIRECTION_CPU |
-               MSI_ADDR_DESTID_CPU(dest_phys_id);
+               MSI_ADDR_DEST_ID_CPU(dest_phys_id);
  
         msg.data =
                 MSI_DATA_TRIGGER_EDGE |
@@ -183,8 +146,8 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
  
         msg.data &= ~MSI_DATA_VECTOR_MASK;
         msg.data |= MSI_DATA_VECTOR(cfg->vector);
-       msg.address_lo &= ~MSI_ADDR_DESTID_MASK;
-       msg.address_lo |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu));
+       msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+       msg.address_lo |= MSI_ADDR_DEST_ID_CPU(cpu_physical_id(cpu));
  
         dmar_msi_write(irq, &msg);
         irq_desc[irq].affinity = *mask;
@@ -215,9 +178,9 @@ msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
         msg->address_hi = 0;
         msg->address_lo =
                 MSI_ADDR_HEADER |
-               MSI_ADDR_DESTMODE_PHYS |
+               MSI_ADDR_DEST_MODE_PHYS |
                 MSI_ADDR_REDIRECTION_CPU |
-               MSI_ADDR_DESTID_CPU(dest);
+               MSI_ADDR_DEST_ID_CPU(dest);
  
         msg->data =
                 MSI_DATA_TRIGGER_EDGE |
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig

index f833a0b4188df833650ed4a621172f9765269e85..0a2d6b86075a66ce45908ee6fc66f2cd981b4c8c 100644 (file)
--- a/arch/ia64/kvm/Kconfig
+++ b/arch/ia64/kvm/Kconfig
@@ -4,6 +4,10 @@
  config HAVE_KVM
         bool
  
+config HAVE_KVM_IRQCHIP
+       bool
+       default y
+
  menuconfig VIRTUALIZATION
         bool "Virtualization"
         depends on HAVE_KVM || IA64
diff --git a/arch/ia64/kvm/irq.h b/arch/ia64/kvm/irq.h

index c6786e8b1bf47f9d69a2b3c5f25fc2711f13640a..c0785a7282713025ec363a73263cd30d057b6e32 100644 (file)
--- a/arch/ia64/kvm/irq.h
+++ b/arch/ia64/kvm/irq.h
@@ -23,6 +23,8 @@
  #ifndef __IRQ_H
  #define __IRQ_H
  
+#include "lapic.h"
+
  static inline int irqchip_in_kernel(struct kvm *kvm)
  {
         return 1;
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c

index 28f982045f29c6c1623c2ac06e032e904efb0720..076b00d1dbffe3696eecb5662726faa204478bad 100644 (file)
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -182,7 +182,7 @@ int kvm_dev_ioctl_check_extension(long ext)
         switch (ext) {
         case KVM_CAP_IRQCHIP:
         case KVM_CAP_MP_STATE:
-
+       case KVM_CAP_IRQ_INJECT_STATUS:
                 r = 1;
                 break;
         case KVM_CAP_COALESCED_MMIO:
@@ -314,7 +314,7 @@ static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
         union ia64_lid lid;
         int i;
  
-       for (i = 0; i < KVM_MAX_VCPUS; i++) {
+       for (i = 0; i < kvm->arch.online_vcpus; i++) {
                 if (kvm->vcpus[i]) {
                         lid.val = VCPU_LID(kvm->vcpus[i]);
                         if (lid.id == id && lid.eid == eid)
@@ -388,7 +388,7 @@ static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
  
         call_data.ptc_g_data = p->u.ptc_g_data;
  
-       for (i = 0; i < KVM_MAX_VCPUS; i++) {
+       for (i = 0; i < kvm->arch.online_vcpus; i++) {
                 if (!kvm->vcpus[i] || kvm->vcpus[i]->arch.mp_state ==
                                                 KVM_MP_STATE_UNINITIALIZED ||
                                         vcpu == kvm->vcpus[i])
@@ -788,6 +788,8 @@ struct  kvm *kvm_arch_create_vm(void)
                 return ERR_PTR(-ENOMEM);
         kvm_init_vm(kvm);
  
+       kvm->arch.online_vcpus = 0;
+
         return kvm;
  
  }
@@ -919,7 +921,13 @@ long kvm_arch_vm_ioctl(struct file *filp,
                 r = kvm_ioapic_init(kvm);
                 if (r)
                         goto out;
+               r = kvm_setup_default_irq_routing(kvm);
+               if (r) {
+                       kfree(kvm->arch.vioapic);
+                       goto out;
+               }
                 break;
+       case KVM_IRQ_LINE_STATUS:
         case KVM_IRQ_LINE: {
                 struct kvm_irq_level irq_event;
  
@@ -927,10 +935,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
                 if (copy_from_user(&irq_event, argp, sizeof irq_event))
                         goto out;
                 if (irqchip_in_kernel(kvm)) {
+                       __s32 status;
                         mutex_lock(&kvm->lock);
-                       kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+                       status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
                                     irq_event.irq, irq_event.level);
                         mutex_unlock(&kvm->lock);
+                       if (ioctl == KVM_IRQ_LINE_STATUS) {
+                               irq_event.status = status;
+                               if (copy_to_user(argp, &irq_event,
+                                                       sizeof irq_event))
+                                       goto out;
+                       }
                         r = 0;
                 }
                 break;
@@ -1149,7 +1164,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
  
                 /*Initialize itc offset for vcpus*/
                 itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC);
-               for (i = 0; i < KVM_MAX_VCPUS; i++) {
+               for (i = 0; i < kvm->arch.online_vcpus; i++) {
                         v = (struct kvm_vcpu *)((char *)vcpu +
                                         sizeof(struct kvm_vcpu_data) * i);
                         v->arch.itc_offset = itc_offset;
@@ -1283,6 +1298,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
                 goto fail;
         }
  
+       kvm->arch.online_vcpus++;
+
         return vcpu;
  fail:
         return ERR_PTR(r);
@@ -1303,8 +1320,8 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
         return -EINVAL;
  }
  
-int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
-               struct kvm_debug_guest *dbg)
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+                                       struct kvm_guest_debug *dbg)
  {
         return -EINVAL;
  }
@@ -1421,6 +1438,23 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
         return 0;
  }
  
+int kvm_arch_vcpu_ioctl_get_stack(struct kvm_vcpu *vcpu,
+                                 struct kvm_ia64_vcpu_stack *stack)
+{
+       memcpy(stack, vcpu, sizeof(struct kvm_ia64_vcpu_stack));
+       return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_stack(struct kvm_vcpu *vcpu,
+                                 struct kvm_ia64_vcpu_stack *stack)
+{
+       memcpy(vcpu + 1, &stack->stack[0] + sizeof(struct kvm_vcpu),
+              sizeof(struct kvm_ia64_vcpu_stack) - sizeof(struct kvm_vcpu));
+
+       vcpu->arch.exit_data = ((struct kvm_vcpu *)stack)->arch.exit_data;
+       return 0;
+}
+
  void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
  {
  
@@ -1430,9 +1464,78 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
  
  
  long kvm_arch_vcpu_ioctl(struct file *filp,
-               unsigned int ioctl, unsigned long arg)
+                        unsigned int ioctl, unsigned long arg)
  {
-       return -EINVAL;
+       struct kvm_vcpu *vcpu = filp->private_data;
+       void __user *argp = (void __user *)arg;
+       struct kvm_ia64_vcpu_stack *stack = NULL;
+       long r;
+
+       switch (ioctl) {
+       case KVM_IA64_VCPU_GET_STACK: {
+               struct kvm_ia64_vcpu_stack __user *user_stack;
+               void __user *first_p = argp;
+
+               r = -EFAULT;
+               if (copy_from_user(&user_stack, first_p, sizeof(void *)))
+                       goto out;
+
+               if (!access_ok(VERIFY_WRITE, user_stack,
+                              sizeof(struct kvm_ia64_vcpu_stack))) {
+                       printk(KERN_INFO "KVM_IA64_VCPU_GET_STACK: "
+                              "Illegal user destination address for stack\n");
+                       goto out;
+               }
+               stack = kzalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
+               if (!stack) {
+                       r = -ENOMEM;
+                       goto out;
+               }
+
+               r = kvm_arch_vcpu_ioctl_get_stack(vcpu, stack);
+               if (r)
+                       goto out;
+
+               if (copy_to_user(user_stack, stack,
+                                sizeof(struct kvm_ia64_vcpu_stack)))
+                       goto out;
+
+               break;
+       }
+       case KVM_IA64_VCPU_SET_STACK: {
+               struct kvm_ia64_vcpu_stack __user *user_stack;
+               void __user *first_p = argp;
+
+               r = -EFAULT;
+               if (copy_from_user(&user_stack, first_p, sizeof(void *)))
+                       goto out;
+
+               if (!access_ok(VERIFY_READ, user_stack,
+                           sizeof(struct kvm_ia64_vcpu_stack))) {
+                       printk(KERN_INFO "KVM_IA64_VCPU_SET_STACK: "
+                              "Illegal user address for stack\n");
+                       goto out;
+               }
+               stack = kmalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
+               if (!stack) {
+                       r = -ENOMEM;
+                       goto out;
+               }
+               if (copy_from_user(stack, user_stack,
+                                  sizeof(struct kvm_ia64_vcpu_stack)))
+                       goto out;
+
+               r = kvm_arch_vcpu_ioctl_set_stack(vcpu, stack);
+               break;
+       }
+
+       default:
+               r = -EINVAL;
+       }
+
+out:
+       kfree(stack);
+       return r;
  }
  
  int kvm_arch_set_memory_region(struct kvm *kvm,
@@ -1472,7 +1575,7 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
  }
  
  long kvm_arch_dev_ioctl(struct file *filp,
-               unsigned int ioctl, unsigned long arg)
+                       unsigned int ioctl, unsigned long arg)
  {
         return -EINVAL;
  }
@@ -1737,7 +1840,7 @@ struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
         struct kvm_vcpu *lvcpu = kvm->vcpus[0];
         int i;
  
-       for (i = 1; i < KVM_MAX_VCPUS; i++) {
+       for (i = 1; i < kvm->arch.online_vcpus; i++) {
                 if (!kvm->vcpus[i])
                         continue;
                 if (lvcpu->arch.xtp > kvm->vcpus[i]->arch.xtp)
diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c

index cb7600bdff9d3a81bcf76648e0859a4061820a0f..a8ae52ed56358e3e3d1b81edf095ebfceaa36a3b 100644 (file)
--- a/arch/ia64/kvm/kvm_fw.c
+++ b/arch/ia64/kvm/kvm_fw.c
@@ -227,6 +227,18 @@ static struct ia64_pal_retval pal_proc_get_features(struct kvm_vcpu *vcpu)
         return result;
  }
  
+static struct ia64_pal_retval pal_register_info(struct kvm_vcpu *vcpu)
+{
+
+       struct ia64_pal_retval result = {0, 0, 0, 0};
+       long in0, in1, in2, in3;
+
+       kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+       result.status = ia64_pal_register_info(in1, &result.v1, &result.v2);
+
+       return result;
+}
+
  static struct ia64_pal_retval pal_cache_info(struct kvm_vcpu *vcpu)
  {
  
@@ -268,8 +280,12 @@ static struct ia64_pal_retval pal_vm_summary(struct kvm_vcpu *vcpu)
  static struct ia64_pal_retval pal_vm_info(struct kvm_vcpu *vcpu)
  {
         struct ia64_pal_retval result;
+       unsigned long in0, in1, in2, in3;
  
-       INIT_PAL_STATUS_UNIMPLEMENTED(result);
+       kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+
+       result.status = ia64_pal_vm_info(in1, in2,
+                       (pal_tc_info_u_t *)&result.v1, &result.v2);
  
         return result;
  }
@@ -292,6 +308,108 @@ static void prepare_for_halt(struct kvm_vcpu *vcpu)
         vcpu->arch.timer_fired = 0;
  }
  
+static struct ia64_pal_retval pal_perf_mon_info(struct kvm_vcpu *vcpu)
+{
+       long status;
+       unsigned long in0, in1, in2, in3, r9;
+       unsigned long pm_buffer[16];
+
+       kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+       status = ia64_pal_perf_mon_info(pm_buffer,
+                               (pal_perf_mon_info_u_t *) &r9);
+       if (status != 0) {
+               printk(KERN_DEBUG"PAL_PERF_MON_INFO fails ret=%ld\n", status);
+       } else {
+               if (in1)
+                       memcpy((void *)in1, pm_buffer, sizeof(pm_buffer));
+               else {
+                       status = PAL_STATUS_EINVAL;
+                       printk(KERN_WARNING"Invalid parameters "
+                                               "for PAL call:0x%lx!\n", in0);
+               }
+       }
+       return (struct ia64_pal_retval){status, r9, 0, 0};
+}
+
+static struct ia64_pal_retval pal_halt_info(struct kvm_vcpu *vcpu)
+{
+       unsigned long in0, in1, in2, in3;
+       long status;
+       unsigned long res = 1000UL | (1000UL << 16) | (10UL << 32)
+                                       | (1UL << 61) | (1UL << 60);
+
+       kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+       if (in1) {
+               memcpy((void *)in1, &res, sizeof(res));
+               status = 0;
+       } else{
+               status = PAL_STATUS_EINVAL;
+               printk(KERN_WARNING"Invalid parameters "
+                                       "for PAL call:0x%lx!\n", in0);
+       }
+
+       return (struct ia64_pal_retval){status, 0, 0, 0};
+}
+
+static struct ia64_pal_retval pal_mem_attrib(struct kvm_vcpu *vcpu)
+{
+       unsigned long r9;
+       long status;
+
+       status = ia64_pal_mem_attrib(&r9);
+
+       return (struct ia64_pal_retval){status, r9, 0, 0};
+}
+
+static void remote_pal_prefetch_visibility(void *v)
+{
+       s64 trans_type = (s64)v;
+       ia64_pal_prefetch_visibility(trans_type);
+}
+
+static struct ia64_pal_retval pal_prefetch_visibility(struct kvm_vcpu *vcpu)
+{
+       struct ia64_pal_retval result = {0, 0, 0, 0};
+       unsigned long in0, in1, in2, in3;
+       kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+       result.status = ia64_pal_prefetch_visibility(in1);
+       if (result.status == 0) {
+               /* Must be performed on all remote processors
+               in the coherence domain. */
+               smp_call_function(remote_pal_prefetch_visibility,
+                                       (void *)in1, 1);
+               /* Unnecessary on remote processor for other vcpus!*/
+               result.status = 1;
+       }
+       return result;
+}
+
+static void remote_pal_mc_drain(void *v)
+{
+       ia64_pal_mc_drain();
+}
+
+static struct ia64_pal_retval pal_get_brand_info(struct kvm_vcpu *vcpu)
+{
+       struct ia64_pal_retval result = {0, 0, 0, 0};
+       unsigned long in0, in1, in2, in3;
+
+       kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+
+       if (in1 == 0 && in2) {
+               char brand_info[128];
+               result.status = ia64_pal_get_brand_info(brand_info);
+               if (result.status == PAL_STATUS_SUCCESS)
+                       memcpy((void *)in2, brand_info, 128);
+       } else {
+               result.status = PAL_STATUS_REQUIRES_MEMORY;
+               printk(KERN_WARNING"Invalid parameters for "
+                                       "PAL call:0x%lx!\n", in0);
+       }
+
+       return result;
+}
+
  int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
  {
  
@@ -300,14 +418,22 @@ int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
         int ret = 1;
  
         gr28 = kvm_get_pal_call_index(vcpu);
-       /*printk("pal_call index:%lx\n",gr28);*/
         switch (gr28) {
         case PAL_CACHE_FLUSH:
                 result = pal_cache_flush(vcpu);
                 break;
+       case PAL_MEM_ATTRIB:
+               result = pal_mem_attrib(vcpu);
+               break;
         case PAL_CACHE_SUMMARY:
                 result = pal_cache_summary(vcpu);
                 break;
+       case PAL_PERF_MON_INFO:
+               result = pal_perf_mon_info(vcpu);
+               break;
+       case PAL_HALT_INFO:
+               result = pal_halt_info(vcpu);
+               break;
         case PAL_HALT_LIGHT:
         {
                 INIT_PAL_STATUS_SUCCESS(result);
@@ -317,6 +443,16 @@ int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
         }
                 break;
  
+       case PAL_PREFETCH_VISIBILITY:
+               result = pal_prefetch_visibility(vcpu);
+               break;
+       case PAL_MC_DRAIN:
+               result.status = ia64_pal_mc_drain();
+               /* FIXME: All vcpus likely call PAL_MC_DRAIN.
+                  That causes the congestion. */
+               smp_call_function(remote_pal_mc_drain, NULL, 1);
+               break;
+
         case PAL_FREQ_RATIOS:
                 result = pal_freq_ratios(vcpu);
                 break;
@@ -346,6 +482,9 @@ int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
                 INIT_PAL_STATUS_SUCCESS(result);
                 result.v1 = (1L << 32) | 1L;
                 break;
+       case PAL_REGISTER_INFO:
+               result = pal_register_info(vcpu);
+               break;
         case PAL_VM_PAGE_SIZE:
                 result.status = ia64_pal_vm_page_size(&result.v0,
                                                         &result.v1);
@@ -365,12 +504,18 @@ int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
                 result.status = ia64_pal_version(
                                 (pal_version_u_t *)&result.v0,
                                 (pal_version_u_t *)&result.v1);
-
                 break;
         case PAL_FIXED_ADDR:
                 result.status = PAL_STATUS_SUCCESS;
                 result.v0 = vcpu->vcpu_id;
                 break;
+       case PAL_BRAND_INFO:
+               result = pal_get_brand_info(vcpu);
+               break;
+       case PAL_GET_PSTATE:
+       case PAL_CACHE_SHARED_INFO:
+               INIT_PAL_STATUS_UNIMPLEMENTED(result);
+               break;
         default:
                 INIT_PAL_STATUS_UNIMPLEMENTED(result);
                 printk(KERN_WARNING"kvm: Unsupported pal call,"
diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c

index 230eae482f3262a443e2ef9acf5111820937b574..b1dc80952d91fc49918de183bbeb454276e723e7 100644 (file)
--- a/arch/ia64/kvm/process.c
+++ b/arch/ia64/kvm/process.c
@@ -167,7 +167,6 @@ static u64 vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, u64 ifa)
         return (rr1.val);
  }
  
-
  /*
   * Set vIFA & vITIR & vIHA, when vPSR.ic =1
   * Parameter:
@@ -222,8 +221,6 @@ void itlb_fault(struct kvm_vcpu *vcpu, u64 vadr)
         inject_guest_interruption(vcpu, IA64_INST_TLB_VECTOR);
  }
  
-
-
  /*
   * Data Nested TLB Fault
   *  @ Data Nested TLB Vector
@@ -245,7 +242,6 @@ void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr)
         inject_guest_interruption(vcpu, IA64_ALT_DATA_TLB_VECTOR);
  }
  
-
  /*
   * Data TLB Fault
   *  @ Data TLB vector
@@ -265,8 +261,6 @@ static void _vhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
         /* If vPSR.ic, IFA, ITIR, IHA*/
         set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
         inject_guest_interruption(vcpu, IA64_VHPT_TRANS_VECTOR);
-
-
  }
  
  /*
@@ -279,7 +273,6 @@ void ivhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
         _vhpt_fault(vcpu, vadr);
  }
  
-
  /*
   * VHPT Data Fault
   *  @ VHPT Translation vector
@@ -290,8 +283,6 @@ void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
         _vhpt_fault(vcpu, vadr);
  }
  
-
-
  /*
   * Deal with:
   *  General Exception vector
@@ -301,7 +292,6 @@ void _general_exception(struct kvm_vcpu *vcpu)
         inject_guest_interruption(vcpu, IA64_GENEX_VECTOR);
  }
  
-
  /*
   * Illegal Operation Fault
   *  @ General Exception Vector
@@ -419,19 +409,16 @@ static void __page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
         inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR);
  }
  
-
  void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
  {
         __page_not_present(vcpu, vadr);
  }
  
-
  void inst_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
  {
         __page_not_present(vcpu, vadr);
  }
  
-
  /* Deal with
   *  Data access rights vector
   */
@@ -563,22 +550,64 @@ void reflect_interruption(u64 ifa, u64 isr, u64 iim,
         inject_guest_interruption(vcpu, vector);
  }
  
+static unsigned long kvm_trans_pal_call_args(struct kvm_vcpu *vcpu,
+                                               unsigned long arg)
+{
+       struct thash_data *data;
+       unsigned long gpa, poff;
+
+       if (!is_physical_mode(vcpu)) {
+               /* Depends on caller to provide the DTR or DTC mapping.*/
+               data = vtlb_lookup(vcpu, arg, D_TLB);
+               if (data)
+                       gpa = data->page_flags & _PAGE_PPN_MASK;
+               else {
+                       data = vhpt_lookup(arg);
+                       if (!data)
+                               return 0;
+                       gpa = data->gpaddr & _PAGE_PPN_MASK;
+               }
+
+               poff = arg & (PSIZE(data->ps) - 1);
+               arg = PAGEALIGN(gpa, data->ps) | poff;
+       }
+       arg = kvm_gpa_to_mpa(arg << 1 >> 1);
+
+       return (unsigned long)__va(arg);
+}
+
  static void set_pal_call_data(struct kvm_vcpu *vcpu)
  {
         struct exit_ctl_data *p = &vcpu->arch.exit_data;
+       unsigned long gr28 = vcpu_get_gr(vcpu, 28);
+       unsigned long gr29 = vcpu_get_gr(vcpu, 29);
+       unsigned long gr30 = vcpu_get_gr(vcpu, 30);
  
         /*FIXME:For static and stacked convention, firmware
          * has put the parameters in gr28-gr31 before
          * break to vmm  !!*/
  
-       p->u.pal_data.gr28 = vcpu_get_gr(vcpu, 28);
-       p->u.pal_data.gr29 = vcpu_get_gr(vcpu, 29);
-       p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
+       switch (gr28) {
+       case PAL_PERF_MON_INFO:
+       case PAL_HALT_INFO:
+               p->u.pal_data.gr29 =  kvm_trans_pal_call_args(vcpu, gr29);
+               p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
+               break;
+       case PAL_BRAND_INFO:
+               p->u.pal_data.gr29 = gr29;;
+               p->u.pal_data.gr30 = kvm_trans_pal_call_args(vcpu, gr30);
+               break;
+       default:
+               p->u.pal_data.gr29 = gr29;;
+               p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
+       }
+       p->u.pal_data.gr28 = gr28;
         p->u.pal_data.gr31 = vcpu_get_gr(vcpu, 31);
+
         p->exit_reason = EXIT_REASON_PAL_CALL;
  }
  
-static void set_pal_call_result(struct kvm_vcpu *vcpu)
+static void get_pal_call_result(struct kvm_vcpu *vcpu)
  {
         struct exit_ctl_data *p = &vcpu->arch.exit_data;
  
@@ -606,7 +635,7 @@ static void set_sal_call_data(struct kvm_vcpu *vcpu)
         p->exit_reason = EXIT_REASON_SAL_CALL;
  }
  
-static void set_sal_call_result(struct kvm_vcpu *vcpu)
+static void get_sal_call_result(struct kvm_vcpu *vcpu)
  {
         struct exit_ctl_data *p = &vcpu->arch.exit_data;
  
@@ -629,13 +658,13 @@ void  kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs,
                 if (iim == DOMN_PAL_REQUEST) {
                         set_pal_call_data(v);
                         vmm_transition(v);
-                       set_pal_call_result(v);
+                       get_pal_call_result(v);
                         vcpu_increment_iip(v);
                         return;
                 } else if (iim == DOMN_SAL_REQUEST) {
                         set_sal_call_data(v);
                         vmm_transition(v);
-                       set_sal_call_result(v);
+                       get_sal_call_result(v);
                         vcpu_increment_iip(v);
                         return;
                 }
@@ -703,7 +732,6 @@ void vhpi_detection(struct kvm_vcpu *vcpu)
         }
  }
  
-
  void leave_hypervisor_tail(void)
  {
         struct kvm_vcpu *v = current_vcpu;
@@ -737,7 +765,6 @@ void leave_hypervisor_tail(void)
         }
  }
  
-
  static inline void handle_lds(struct kvm_pt_regs *regs)
  {
         regs->cr_ipsr |= IA64_PSR_ED;
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c

index ecd526b5532305002b008b8936d3d7d7d6c9f54d..d4d28050587883e0f708fb5f46ed9cdb4a7ae3da 100644 (file)
--- a/arch/ia64/kvm/vcpu.c
+++ b/arch/ia64/kvm/vcpu.c
@@ -112,7 +112,6 @@ void switch_to_physical_rid(struct kvm_vcpu *vcpu)
         return;
  }
  
-
  void switch_to_virtual_rid(struct kvm_vcpu *vcpu)
  {
         unsigned long psr;
@@ -166,8 +165,6 @@ void switch_mm_mode(struct kvm_vcpu *vcpu, struct ia64_psr old_psr,
         return;
  }
  
-
-
  /*
   * In physical mode, insert tc/tr for region 0 and 4 uses
   * RID[0] and RID[4] which is for physical mode emulation.
@@ -269,7 +266,6 @@ static inline unsigned long fph_index(struct kvm_pt_regs *regs,
         return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
  }
  
-
  /*
   * The inverse of the above: given bspstore and the number of
   * registers, calculate ar.bsp.
@@ -811,12 +807,15 @@ static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val);
  static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
  {
         struct kvm_vcpu *v;
+       struct kvm *kvm;
         int i;
         long itc_offset = val - ia64_getreg(_IA64_REG_AR_ITC);
         unsigned long vitv = VCPU(vcpu, itv);
  
+       kvm = (struct kvm *)KVM_VM_BASE;
+
         if (vcpu->vcpu_id == 0) {
-               for (i = 0; i < KVM_MAX_VCPUS; i++) {
+               for (i = 0; i < kvm->arch.online_vcpus; i++) {
                         v = (struct kvm_vcpu *)((char *)vcpu +
                                         sizeof(struct kvm_vcpu_data) * i);
                         VMX(v, itc_offset) = itc_offset;
@@ -1039,8 +1038,6 @@ u64 vcpu_tak(struct kvm_vcpu *vcpu, u64 vadr)
         return key;
  }
  
-
-
  void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst)
  {
         unsigned long thash, vadr;
@@ -1050,7 +1047,6 @@ void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst)
         vcpu_set_gr(vcpu, inst.M46.r1, thash, 0);
  }
  
-
  void kvm_ttag(struct kvm_vcpu *vcpu, INST64 inst)
  {
         unsigned long tag, vadr;
@@ -1131,7 +1127,6 @@ int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, u64 *padr)
         return IA64_NO_FAULT;
  }
  
-
  int kvm_tpa(struct kvm_vcpu *vcpu, INST64 inst)
  {
         unsigned long r1, r3;
@@ -1154,7 +1149,6 @@ void kvm_tak(struct kvm_vcpu *vcpu, INST64 inst)
         vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
  }
  
-
  /************************************
   * Insert/Purge translation register/cache
   ************************************/
@@ -1385,7 +1379,6 @@ void kvm_mov_to_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
         vcpu_set_itc(vcpu, r2);
  }
  
-
  void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
  {
         unsigned long r1;
@@ -1393,8 +1386,9 @@ void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
         r1 = vcpu_get_itc(vcpu);
         vcpu_set_gr(vcpu, inst.M31.r1, r1, 0);
  }
+
  /**************************************************************************
-  struct kvm_vcpu*protection key register access routines
+  struct kvm_vcpu protection key register access routines
   **************************************************************************/
  
  unsigned long vcpu_get_pkr(struct kvm_vcpu *vcpu, unsigned long reg)
@@ -1407,20 +1401,6 @@ void vcpu_set_pkr(struct kvm_vcpu *vcpu, unsigned long reg, unsigned long val)
         ia64_set_pkr(reg, val);
  }
  
-
-unsigned long vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, unsigned long ifa)
-{
-       union ia64_rr rr, rr1;
-
-       rr.val = vcpu_get_rr(vcpu, ifa);
-       rr1.val = 0;
-       rr1.ps = rr.ps;
-       rr1.rid = rr.rid;
-       return (rr1.val);
-}
-
-
-
  /********************************
   * Moves to privileged registers
   ********************************/
@@ -1464,8 +1444,6 @@ unsigned long vcpu_set_rr(struct kvm_vcpu *vcpu, unsigned long reg,
         return (IA64_NO_FAULT);
  }
  
-
-
  void kvm_mov_to_rr(struct kvm_vcpu *vcpu, INST64 inst)
  {
         unsigned long r3, r2;
@@ -1510,8 +1488,6 @@ void kvm_mov_to_pkr(struct kvm_vcpu *vcpu, INST64 inst)
         vcpu_set_pkr(vcpu, r3, r2);
  }
  
-
-
  void kvm_mov_from_rr(struct kvm_vcpu *vcpu, INST64 inst)
  {
         unsigned long r3, r1;
@@ -1557,7 +1533,6 @@ void kvm_mov_from_pmc(struct kvm_vcpu *vcpu, INST64 inst)
         vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
  }
  
-
  unsigned long vcpu_get_cpuid(struct kvm_vcpu *vcpu, unsigned long reg)
  {
         /* FIXME: This could get called as a result of a rsvd-reg fault */
@@ -1609,7 +1584,6 @@ unsigned long kvm_mov_to_cr(struct kvm_vcpu *vcpu, INST64 inst)
         return 0;
  }
  
-
  unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst)
  {
         unsigned long tgt = inst.M33.r1;
@@ -1633,8 +1607,6 @@ unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst)
         return 0;
  }
  
-
-
  void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val)
  {
  
@@ -1776,9 +1748,6 @@ void vcpu_bsw1(struct kvm_vcpu *vcpu)
         }
  }
  
-
-
-
  void vcpu_rfi(struct kvm_vcpu *vcpu)
  {
         unsigned long ifs, psr;
@@ -1796,7 +1765,6 @@ void vcpu_rfi(struct kvm_vcpu *vcpu)
         regs->cr_iip = VCPU(vcpu, iip);
  }
  
-
  /*
     VPSR can't keep track of below bits of guest PSR
     This function gets guest PSR
diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h

index b2f12a562bdf60fd4462378c5aa75c44ea882841..042af92ced8304c03e29ee48112ac1b5fee87e7e 100644 (file)
--- a/arch/ia64/kvm/vcpu.h
+++ b/arch/ia64/kvm/vcpu.h
@@ -703,7 +703,7 @@ extern u64 guest_vhpt_lookup(u64 iha, u64 *pte);
  extern void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps);
  extern void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps);
  extern u64 translate_phy_pte(u64 *pte, u64 itir, u64 va);
-extern int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte,
+extern void thash_purge_and_insert(struct kvm_vcpu *v, u64 pte,
                 u64 itir, u64 ifa, int type);
  extern void thash_purge_all(struct kvm_vcpu *v);
  extern struct thash_data *vtlb_lookup(struct kvm_vcpu *v,
@@ -738,7 +738,7 @@ void kvm_init_vhpt(struct kvm_vcpu *v);
  void thash_init(struct thash_cb *hcb, u64 sz);
  
  void panic_vm(struct kvm_vcpu *v, const char *fmt, ...);
-
+u64 kvm_gpa_to_mpa(u64 gpa);
  extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3,
                 u64 arg4, u64 arg5, u64 arg6, u64 arg7);
  
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c

index 6b6307a3bd556a08f6eb937a9d8b083defb0a701..38232b37668b3be87e8c8404aac2caea282bcc67 100644 (file)
--- a/arch/ia64/kvm/vtlb.c
+++ b/arch/ia64/kvm/vtlb.c
@@ -164,11 +164,11 @@ static void vhpt_insert(u64 pte, u64 itir, u64 ifa, u64 gpte)
         unsigned long ps, gpaddr;
  
         ps = itir_ps(itir);
+       rr.val = ia64_get_rr(ifa);
  
-       gpaddr = ((gpte & _PAGE_PPN_MASK) >> ps << ps) |
-               (ifa & ((1UL << ps) - 1));
+        gpaddr = ((gpte & _PAGE_PPN_MASK) >> ps << ps) |
+                                       (ifa & ((1UL << ps) - 1));
  
-       rr.val = ia64_get_rr(ifa);
         head = (struct thash_data *)ia64_thash(ifa);
         head->etag = INVALID_TI_TAG;
         ia64_mf();
@@ -412,16 +412,14 @@ u64 translate_phy_pte(u64 *pte, u64 itir, u64 va)
  
  /*
   * Purge overlap TCs and then insert the new entry to emulate itc ops.
- *    Notes: Only TC entry can purge and insert.
- *    1 indicates this is MMIO
+ * Notes: Only TC entry can purge and insert.
   */
-int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir,
+void  thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir,
                                                 u64 ifa, int type)
  {
         u64 ps;
         u64 phy_pte, io_mask, index;
         union ia64_rr vrr, mrr;
-       int ret = 0;
  
         ps = itir_ps(itir);
         vrr.val = vcpu_get_rr(v, ifa);
@@ -441,25 +439,19 @@ int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir,
                 phy_pte &= ~_PAGE_MA_MASK;
         }
  
-       if (pte & VTLB_PTE_IO)
-               ret = 1;
-
         vtlb_purge(v, ifa, ps);
         vhpt_purge(v, ifa, ps);
  
-       if (ps == mrr.ps) {
-               if (!(pte&VTLB_PTE_IO)) {
-                       vhpt_insert(phy_pte, itir, ifa, pte);
-               } else {
-                       vtlb_insert(v, pte, itir, ifa);
-                       vcpu_quick_region_set(VMX(v, tc_regions), ifa);
-               }
-       } else if (ps > mrr.ps) {
+       if ((ps != mrr.ps) || (pte & VTLB_PTE_IO)) {
                 vtlb_insert(v, pte, itir, ifa);
                 vcpu_quick_region_set(VMX(v, tc_regions), ifa);
-               if (!(pte&VTLB_PTE_IO))
-                       vhpt_insert(phy_pte, itir, ifa, pte);
-       } else {
+       }
+       if (pte & VTLB_PTE_IO)
+               return;
+
+       if (ps >= mrr.ps)
+               vhpt_insert(phy_pte, itir, ifa, pte);
+       else {
                 u64 psr;
                 phy_pte  &= ~PAGE_FLAGS_RV_MASK;
                 psr = ia64_clear_ic();
@@ -469,7 +461,6 @@ int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir,
         if (!(pte&VTLB_PTE_IO))
                 mark_pages_dirty(v, pte, ps);
  
-       return ret;
  }
  
  /*
@@ -509,7 +500,6 @@ void thash_purge_all(struct kvm_vcpu *v)
         local_flush_tlb_all();
  }
  
-
  /*
   * Lookup the hash table and its collision chain to find an entry
   * covering this address rid:va or the entry.
@@ -517,7 +507,6 @@ void thash_purge_all(struct kvm_vcpu *v)
   * INPUT:
   *  in: TLB format for both VHPT & TLB.
   */
-
  struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data)
  {
         struct thash_data  *cch;
@@ -547,7 +536,6 @@ struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data)
         return NULL;
  }
  
-
  /*
   * Initialize internal control data before service.
   */
@@ -573,6 +561,10 @@ void thash_init(struct thash_cb *hcb, u64 sz)
  u64 kvm_get_mpt_entry(u64 gpfn)
  {
         u64 *base = (u64 *) KVM_P2M_BASE;
+
+       if (gpfn >= (KVM_P2M_SIZE >> 3))
+               panic_vm(current_vcpu, "Invalid gpfn =%lx\n", gpfn);
+
         return *(base + gpfn);
  }
  
@@ -589,7 +581,6 @@ u64 kvm_gpa_to_mpa(u64 gpa)
         return (pte >> PAGE_SHIFT << PAGE_SHIFT) | (gpa & ~PAGE_MASK);
  }
  
-
  /*
   * Fetch guest bundle code.
   * INPUT:
@@ -631,7 +622,6 @@ int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle)
         return IA64_NO_FAULT;
  }
  
-
  void kvm_init_vhpt(struct kvm_vcpu *v)
  {
         v->arch.vhpt.num = VHPT_NUM_ENTRIES;
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h

index f993e4198d5ca0d3554fc68ee4c64c92c25243ca..755f1b1948c57c632937c6b546d30df45ebdfe96 100644 (file)
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -52,4 +52,11 @@ struct kvm_fpu {
         __u64 fpr[32];
  };
  
+struct kvm_debug_exit_arch {
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+};
+
  #endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h

index f49031b632ca34bb82536bb1f3a0913a6a59b571..d22d39942a926fa05874557af866d04079f4aebe 100644 (file)
--- a/arch/powerpc/include/asm/kvm_44x.h
+++ b/arch/powerpc/include/asm/kvm_44x.h
@@ -28,6 +28,13 @@
   * need to find some way of advertising it. */
  #define KVM44x_GUEST_TLB_SIZE 64
  
+struct kvmppc_44x_tlbe {
+       u32 tid; /* Only the low 8 bits are used. */
+       u32 word0;
+       u32 word1;
+       u32 word2;
+};
+
  struct kvmppc_44x_shadow_ref {
         struct page *page;
         u16 gtlb_index;
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h

index 2197764796d9695c1c484f34c238e893a3cb1132..56bfae59837f761a03d4bb81a763281fbb224912 100644 (file)
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -42,7 +42,12 @@
  #define BOOKE_INTERRUPT_DTLB_MISS 13
  #define BOOKE_INTERRUPT_ITLB_MISS 14
  #define BOOKE_INTERRUPT_DEBUG 15
-#define BOOKE_MAX_INTERRUPT 15
+
+/* E500 */
+#define BOOKE_INTERRUPT_SPE_UNAVAIL 32
+#define BOOKE_INTERRUPT_SPE_FP_DATA 33
+#define BOOKE_INTERRUPT_SPE_FP_ROUND 34
+#define BOOKE_INTERRUPT_PERFORMANCE_MONITOR 35
  
  #define RESUME_FLAG_NV          (1<<0)  /* Reload guest nonvolatile state? */
  #define RESUME_FLAG_HOST        (1<<1)  /* Resume host? */
diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h

new file mode 100644 (file)

index 0000000..9d497ce
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_e500.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu, <yu.liu@freescale.com>
+ *
+ * Description:
+ * This file is derived from arch/powerpc/include/asm/kvm_44x.h,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_KVM_E500_H__
+#define __ASM_KVM_E500_H__
+
+#include <linux/kvm_host.h>
+
+#define BOOKE_INTERRUPT_SIZE 36
+
+#define E500_PID_NUM   3
+#define E500_TLB_NUM   2
+
+struct tlbe{
+       u32 mas1;
+       u32 mas2;
+       u32 mas3;
+       u32 mas7;
+};
+
+struct kvmppc_vcpu_e500 {
+       /* Unmodified copy of the guest's TLB. */
+       struct tlbe *guest_tlb[E500_TLB_NUM];
+       /* TLB that's actually used when the guest is running. */
+       struct tlbe *shadow_tlb[E500_TLB_NUM];
+       /* Pages which are referenced in the shadow TLB. */
+       struct page **shadow_pages[E500_TLB_NUM];
+
+       unsigned int guest_tlb_size[E500_TLB_NUM];
+       unsigned int shadow_tlb_size[E500_TLB_NUM];
+       unsigned int guest_tlb_nv[E500_TLB_NUM];
+
+       u32 host_pid[E500_PID_NUM];
+       u32 pid[E500_PID_NUM];
+
+       u32 mas0;
+       u32 mas1;
+       u32 mas2;
+       u32 mas3;
+       u32 mas4;
+       u32 mas5;
+       u32 mas6;
+       u32 mas7;
+       u32 l1csr1;
+       u32 hid0;
+       u32 hid1;
+
+       struct kvm_vcpu vcpu;
+};
+
+static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu)
+{
+       return container_of(vcpu, struct kvmppc_vcpu_e500, vcpu);
+}
+
+#endif /* __ASM_KVM_E500_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h

index c1e436fe7738c8685d021453c699fed6a7729003..dfdf13c9fefd9d4f64b0d1f0eebb70b7050fc598 100644 (file)
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -64,13 +64,6 @@ struct kvm_vcpu_stat {
         u32 halt_wakeup;
  };
  
-struct kvmppc_44x_tlbe {
-       u32 tid; /* Only the low 8 bits are used. */
-       u32 word0;
-       u32 word1;
-       u32 word2;
-};
-
  enum kvm_exit_types {
         MMIO_EXITS,
         DCR_EXITS,
@@ -118,11 +111,6 @@ struct kvm_arch {
  struct kvm_vcpu_arch {
         u32 host_stack;
         u32 host_pid;
-       u32 host_dbcr0;
-       u32 host_dbcr1;
-       u32 host_dbcr2;
-       u32 host_iac[4];
-       u32 host_msr;
  
         u64 fpr[32];
         ulong gpr[32];
@@ -157,7 +145,7 @@ struct kvm_vcpu_arch {
         u32 tbu;
         u32 tcr;
         u32 tsr;
-       u32 ivor[16];
+       u32 ivor[64];
         ulong ivpr;
         u32 pir;
  
@@ -170,6 +158,7 @@ struct kvm_vcpu_arch {
         u32 ccr1;
         u32 dbcr0;
         u32 dbcr1;
+       u32 dbsr;
  
  #ifdef CONFIG_KVM_EXIT_TIMING
         struct kvmppc_exit_timing timing_exit;
@@ -200,10 +189,4 @@ struct kvm_vcpu_arch {
         unsigned long pending_exceptions;
  };
  
-struct kvm_guest_debug {
-       int enabled;
-       unsigned long bp[4];
-       int singlestep;
-};
-
  #endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h

index 36d2a50a84875f760522183eddd80d2ecd40e230..2c6ee349df5e8cc9d5b286803a510a58b632ac9b 100644 (file)
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -52,13 +52,19 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run,
  extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
  extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
  
+/* Core-specific hooks */
+
  extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
-                           u64 asid, u32 flags, u32 max_bytes,
                             unsigned int gtlb_idx);
  extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode);
  extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
-
-/* Core-specific hooks */
+extern void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu);
+extern int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
+extern int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
+extern gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index,
+                              gva_t eaddr);
+extern void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu);
+extern void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu);
  
  extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm,
                                                  unsigned int id);
@@ -71,9 +77,6 @@ extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
  extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
  extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
  
-extern void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu);
-extern void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu);
-
  extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu);
  extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
  extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/include/asm/mmu-fsl-booke.h b/arch/powerpc/include/asm/mmu-fsl-booke.h

index 3f941c0f7e8eb504dd921e2a89b3f93087fb66ca..4285b64a65e0a3a10245b68d57809332d7dc8bc8 100644 (file)
--- a/arch/powerpc/include/asm/mmu-fsl-booke.h
+++ b/arch/powerpc/include/asm/mmu-fsl-booke.h
@@ -75,6 +75,8 @@
  
  #ifndef __ASSEMBLY__
  
+extern unsigned int tlbcam_index;
+
  typedef struct {
         unsigned int    id;
         unsigned int    active;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c

index 19ee491e9e2380cb3b58c41ac761e7c389582342..42fe4da4e8ae8e555572a40535d1adb5cb4cb0b6 100644 (file)
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -49,7 +49,7 @@
  #include <asm/iseries/alpaca.h>
  #endif
  #ifdef CONFIG_KVM
-#include <asm/kvm_44x.h>
+#include <linux/kvm_host.h>
  #endif
  
  #if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
@@ -361,8 +361,6 @@ int main(void)
         DEFINE(PTE_SIZE, sizeof(pte_t));
  
  #ifdef CONFIG_KVM
-       DEFINE(TLBE_BYTES, sizeof(struct kvmppc_44x_tlbe));
-
         DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
         DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
         DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c

index a66bec57265a1be4abd96ec1b3c3600421a048b9..0cef809cec21fab0fad7a627674359726b0e6267 100644 (file)
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -28,72 +28,6 @@
  
  #include "44x_tlb.h"
  
-/* Note: clearing MSR[DE] just means that the debug interrupt will not be
- * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits.
- * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt
- * will be delivered as an "imprecise debug event" (which is indicated by
- * DBSR[IDE].
- */
-static void kvm44x_disable_debug_interrupts(void)
-{
-       mtmsr(mfmsr() & ~MSR_DE);
-}
-
-void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
-{
-       kvm44x_disable_debug_interrupts();
-
-       mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]);
-       mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]);
-       mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]);
-       mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]);
-       mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1);
-       mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2);
-       mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0);
-       mtmsr(vcpu->arch.host_msr);
-}
-
-void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
-{
-       struct kvm_guest_debug *dbg = &vcpu->guest_debug;
-       u32 dbcr0 = 0;
-
-       vcpu->arch.host_msr = mfmsr();
-       kvm44x_disable_debug_interrupts();
-
-       /* Save host debug register state. */
-       vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1);
-       vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2);
-       vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3);
-       vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4);
-       vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0);
-       vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1);
-       vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2);
-
-       /* set registers up for guest */
-
-       if (dbg->bp[0]) {
-               mtspr(SPRN_IAC1, dbg->bp[0]);
-               dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
-       }
-       if (dbg->bp[1]) {
-               mtspr(SPRN_IAC2, dbg->bp[1]);
-               dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
-       }
-       if (dbg->bp[2]) {
-               mtspr(SPRN_IAC3, dbg->bp[2]);
-               dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
-       }
-       if (dbg->bp[3]) {
-               mtspr(SPRN_IAC4, dbg->bp[3]);
-               dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
-       }
-
-       mtspr(SPRN_DBCR0, dbcr0);
-       mtspr(SPRN_DBCR1, 0);
-       mtspr(SPRN_DBCR2, 0);
-}
-
  void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
  {
         kvmppc_44x_tlb_load(vcpu);
@@ -149,8 +83,6 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
  int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
                                 struct kvm_translation *tr)
  {
-       struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-       struct kvmppc_44x_tlbe *gtlbe;
         int index;
         gva_t eaddr;
         u8 pid;
@@ -166,9 +98,7 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
                 return 0;
         }
  
-       gtlbe = &vcpu_44x->guest_tlb[index];
-
-       tr->physical_address = tlb_xlate(gtlbe, eaddr);
+       tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr);
         /* XXX what does "writeable" and "usermode" even mean? */
         tr->valid = 1;
  
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c

index 82489a743a6f86dcd7786981504bbadf0d4799c5..61af58fceceee8a6fcb87c9f0e2f1e55e9a3dfb5 100644 (file)
--- a/arch/powerpc/kvm/44x_emulate.c
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -27,25 +27,12 @@
  #include "booke.h"
  #include "44x_tlb.h"
  
-#define OP_RFI      19
-
-#define XOP_RFI     50
-#define XOP_MFMSR   83
-#define XOP_WRTEE   131
-#define XOP_MTMSR   146
-#define XOP_WRTEEI  163
  #define XOP_MFDCR   323
  #define XOP_MTDCR   451
  #define XOP_TLBSX   914
  #define XOP_ICCCI   966
  #define XOP_TLBWE   978
  
-static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
-{
-       vcpu->arch.pc = vcpu->arch.srr0;
-       kvmppc_set_msr(vcpu, vcpu->arch.srr1);
-}
-
  int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
                             unsigned int inst, int *advance)
  {
@@ -59,48 +46,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
         int ws;
  
         switch (get_op(inst)) {
-       case OP_RFI:
-               switch (get_xop(inst)) {
-               case XOP_RFI:
-                       kvmppc_emul_rfi(vcpu);
-                       kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS);
-                       *advance = 0;
-                       break;
-
-               default:
-                       emulated = EMULATE_FAIL;
-                       break;
-               }
-               break;
-
         case 31:
                 switch (get_xop(inst)) {
  
-               case XOP_MFMSR:
-                       rt = get_rt(inst);
-                       vcpu->arch.gpr[rt] = vcpu->arch.msr;
-                       kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
-                       break;
-
-               case XOP_MTMSR:
-                       rs = get_rs(inst);
-                       kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
-                       kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
-                       break;
-
-               case XOP_WRTEE:
-                       rs = get_rs(inst);
-                       vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
-                                                        | (vcpu->arch.gpr[rs] & MSR_EE);
-                       kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
-                       break;
-
-               case XOP_WRTEEI:
-                       vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
-                                                        | (inst & MSR_EE);
-                       kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
-                       break;
-
                 case XOP_MFDCR:
                         dcrn = get_dcrn(inst);
                         rt = get_rt(inst);
@@ -186,186 +134,51 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
                 emulated = EMULATE_FAIL;
         }
  
+       if (emulated == EMULATE_FAIL)
+               emulated = kvmppc_booke_emulate_op(run, vcpu, inst, advance);
+
         return emulated;
  }
  
  int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
  {
+       int emulated = EMULATE_DONE;
+
         switch (sprn) {
-       case SPRN_MMUCR:
-               vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
         case SPRN_PID:
                 kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break;
+       case SPRN_MMUCR:
+               vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
         case SPRN_CCR0:
                 vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
         case SPRN_CCR1:
                 vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
-       case SPRN_DEAR:
-               vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
-       case SPRN_ESR:
-               vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
-       case SPRN_DBCR0:
-               vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
-       case SPRN_DBCR1:
-               vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
-       case SPRN_TSR:
-               vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
-       case SPRN_TCR:
-               vcpu->arch.tcr = vcpu->arch.gpr[rs];
-               kvmppc_emulate_dec(vcpu);
-               break;
-
-       /* Note: SPRG4-7 are user-readable. These values are
-        * loaded into the real SPRGs when resuming the
-        * guest. */
-       case SPRN_SPRG4:
-               vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
-       case SPRN_SPRG5:
-               vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
-       case SPRN_SPRG6:
-               vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
-       case SPRN_SPRG7:
-               vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
-
-       case SPRN_IVPR:
-               vcpu->arch.ivpr = vcpu->arch.gpr[rs];
-               break;
-       case SPRN_IVOR0:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs];
-               break;
-       case SPRN_IVOR1:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs];
-               break;
-       case SPRN_IVOR2:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs];
-               break;
-       case SPRN_IVOR3:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs];
-               break;
-       case SPRN_IVOR4:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs];
-               break;
-       case SPRN_IVOR5:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs];
-               break;
-       case SPRN_IVOR6:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs];
-               break;
-       case SPRN_IVOR7:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs];
-               break;
-       case SPRN_IVOR8:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs];
-               break;
-       case SPRN_IVOR9:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs];
-               break;
-       case SPRN_IVOR10:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs];
-               break;
-       case SPRN_IVOR11:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs];
-               break;
-       case SPRN_IVOR12:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs];
-               break;
-       case SPRN_IVOR13:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs];
-               break;
-       case SPRN_IVOR14:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs];
-               break;
-       case SPRN_IVOR15:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs];
-               break;
-
         default:
-               return EMULATE_FAIL;
+               emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs);
         }
  
         kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
-       return EMULATE_DONE;
+       return emulated;
  }
  
  int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
  {
+       int emulated = EMULATE_DONE;
+
         switch (sprn) {
-       /* 440 */
+       case SPRN_PID:
+               vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
         case SPRN_MMUCR:
                 vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
         case SPRN_CCR0:
                 vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
         case SPRN_CCR1:
                 vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
-
-       /* Book E */
-       case SPRN_PID:
-               vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
-       case SPRN_IVPR:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
-       case SPRN_DEAR:
-               vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
-       case SPRN_ESR:
-               vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
-       case SPRN_DBCR0:
-               vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
-       case SPRN_DBCR1:
-               vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
-
-       case SPRN_IVOR0:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
-               break;
-       case SPRN_IVOR1:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
-               break;
-       case SPRN_IVOR2:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
-               break;
-       case SPRN_IVOR3:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
-               break;
-       case SPRN_IVOR4:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
-               break;
-       case SPRN_IVOR5:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
-               break;
-       case SPRN_IVOR6:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
-               break;
-       case SPRN_IVOR7:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
-               break;
-       case SPRN_IVOR8:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
-               break;
-       case SPRN_IVOR9:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
-               break;
-       case SPRN_IVOR10:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
-               break;
-       case SPRN_IVOR11:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
-               break;
-       case SPRN_IVOR12:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
-               break;
-       case SPRN_IVOR13:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
-               break;
-       case SPRN_IVOR14:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
-               break;
-       case SPRN_IVOR15:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
-               break;
-
         default:
-               return EMULATE_FAIL;
+               emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
         }
  
         kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
-       return EMULATE_DONE;
+       return emulated;
  }
  
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c

index 9a34b8edb9e283adcc8c53e3033b44d7f0845bc1..4a16f472cc1873c00f7f811fd1196e6088c4828b 100644 (file)
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -208,20 +208,38 @@ int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
         return -1;
  }
  
-int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
+gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index,
+                       gva_t eaddr)
+{
+       struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+       struct kvmppc_44x_tlbe *gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
+       unsigned int pgmask = get_tlb_bytes(gtlbe) - 1;
+
+       return get_tlb_raddr(gtlbe) | (eaddr & pgmask);
+}
+
+int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
  {
         unsigned int as = !!(vcpu->arch.msr & MSR_IS);
  
         return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
  }
  
-int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
+int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
  {
         unsigned int as = !!(vcpu->arch.msr & MSR_DS);
  
         return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
  }
  
+void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu)
+{
+}
+
  static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
                                        unsigned int stlb_index)
  {
@@ -248,7 +266,7 @@ static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
         KVMTRACE_1D(STLB_INVAL, &vcpu_44x->vcpu, stlb_index, handler);
  }
  
-void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu)
+void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
  {
         struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
         int i;
@@ -269,15 +287,19 @@ void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu)
   * Caller must ensure that the specified guest TLB entry is safe to insert into
   * the shadow TLB.
   */
-void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid,
-                    u32 flags, u32 max_bytes, unsigned int gtlb_index)
+void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
+                    unsigned int gtlb_index)
  {
         struct kvmppc_44x_tlbe stlbe;
         struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+       struct kvmppc_44x_tlbe *gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
         struct kvmppc_44x_shadow_ref *ref;
         struct page *new_page;
         hpa_t hpaddr;
         gfn_t gfn;
+       u32 asid = gtlbe->tid;
+       u32 flags = gtlbe->word2;
+       u32 max_bytes = get_tlb_bytes(gtlbe);
         unsigned int victim;
  
         /* Select TLB entry to clobber. Indirectly guard against races with the TLB
@@ -448,10 +470,8 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
         }
  
         if (tlbe_is_host_safe(vcpu, tlbe)) {
-               u64 asid;
                 gva_t eaddr;
                 gpa_t gpaddr;
-               u32 flags;
                 u32 bytes;
  
                 eaddr = get_tlb_eaddr(tlbe);
@@ -462,10 +482,7 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
                 eaddr &= ~(bytes - 1);
                 gpaddr &= ~(bytes - 1);
  
-               asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
-               flags = tlbe->word2 & 0xffff;
-
-               kvmppc_mmu_map(vcpu, eaddr, gpaddr, asid, flags, bytes, gtlb_index);
+               kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index);
         }
  
         KVMTRACE_5D(GTLB_WRITE, vcpu, gtlb_index, tlbe->tid, tlbe->word0,
diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h

index 772191f29e62f702c5df56ad5addacbc37b64922..a9ff80e51526785949ec5770f9eeefda85a0d0c6 100644 (file)
--- a/arch/powerpc/kvm/44x_tlb.h
+++ b/arch/powerpc/kvm/44x_tlb.h
@@ -25,8 +25,6 @@
  
  extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr,
                                  unsigned int pid, unsigned int as);
-extern int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
-extern int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
  
  extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb,
                                   u8 rc);
@@ -85,11 +83,4 @@ static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu)
         return (vcpu->arch.mmucr >> 16) & 0x1;
  }
  
-static inline gpa_t tlb_xlate(struct kvmppc_44x_tlbe *tlbe, gva_t eaddr)
-{
-       unsigned int pgmask = get_tlb_bytes(tlbe) - 1;
-
-       return get_tlb_raddr(tlbe) | (eaddr & pgmask);
-}
-
  #endif /* __KVM_POWERPC_TLB_H__ */
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig

index 6dbdc4817d80c5bb2424236208497b5b3bde1373..5a152a52796f171641c5c8edfe3d1a92724d6cb1 100644 (file)
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -2,6 +2,9 @@
  # KVM configuration
  #
  
+config HAVE_KVM_IRQCHIP
+       bool
+
  menuconfig VIRTUALIZATION
         bool "Virtualization"
         ---help---
@@ -43,6 +46,19 @@ config KVM_EXIT_TIMING
  
           If unsure, say N.
  
+config KVM_E500
+       bool "KVM support for PowerPC E500 processors"
+       depends on EXPERIMENTAL && E500
+       select KVM
+       ---help---
+         Support running unmodified E500 guest kernels in virtual machines on
+         E500 host processors.
+
+         This module provides access to the hardware capabilities through
+         a character device node named /dev/kvm.
+
+         If unsure, say N.
+
  config KVM_TRACE
         bool "KVM trace support"
         depends on KVM && MARKERS && SYSFS
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile

index df7ba59e6d53b495c1885497882d6d67a01e77cf..4b2df66c79d853afd54f45a325ebf552c704b7d3 100644 (file)
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -16,8 +16,18 @@ AFLAGS_booke_interrupts.o := -I$(obj)
  
  kvm-440-objs := \
         booke.o \
+       booke_emulate.o \
         booke_interrupts.o \
         44x.o \
         44x_tlb.o \
         44x_emulate.o
  obj-$(CONFIG_KVM_440) += kvm-440.o
+
+kvm-e500-objs := \
+       booke.o \
+       booke_emulate.o \
+       booke_interrupts.o \
+       e500.o \
+       e500_tlb.o \
+       e500_emulate.o
+obj-$(CONFIG_KVM_E500) += kvm-e500.o
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c

index 35485dd6927eef520632f727660a2d9499a3699f..642e4204cf25119e2e6c342481dc2ae2f8dd7ecd 100644 (file)
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -30,10 +30,8 @@
  #include <asm/kvm_ppc.h>
  #include "timing.h"
  #include <asm/cacheflush.h>
-#include <asm/kvm_44x.h>
  
  #include "booke.h"
-#include "44x_tlb.h"
  
  unsigned long kvmppc_booke_handlers;
  
@@ -120,6 +118,9 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
         case BOOKE_IRQPRIO_DATA_STORAGE:
         case BOOKE_IRQPRIO_INST_STORAGE:
         case BOOKE_IRQPRIO_FP_UNAVAIL:
+       case BOOKE_IRQPRIO_SPE_UNAVAIL:
+       case BOOKE_IRQPRIO_SPE_FP_DATA:
+       case BOOKE_IRQPRIO_SPE_FP_ROUND:
         case BOOKE_IRQPRIO_AP_UNAVAIL:
         case BOOKE_IRQPRIO_ALIGNMENT:
                 allowed = 1;
@@ -165,7 +166,7 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
         unsigned int priority;
  
         priority = __ffs(*pending);
-       while (priority <= BOOKE_MAX_INTERRUPT) {
+       while (priority <= BOOKE_IRQPRIO_MAX) {
                 if (kvmppc_booke_irqprio_deliver(vcpu, priority))
                         break;
  
@@ -263,6 +264,21 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                 r = RESUME_GUEST;
                 break;
  
+       case BOOKE_INTERRUPT_SPE_UNAVAIL:
+               kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_UNAVAIL);
+               r = RESUME_GUEST;
+               break;
+
+       case BOOKE_INTERRUPT_SPE_FP_DATA:
+               kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_DATA);
+               r = RESUME_GUEST;
+               break;
+
+       case BOOKE_INTERRUPT_SPE_FP_ROUND:
+               kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_ROUND);
+               r = RESUME_GUEST;
+               break;
+
         case BOOKE_INTERRUPT_DATA_STORAGE:
                 vcpu->arch.dear = vcpu->arch.fault_dear;
                 vcpu->arch.esr = vcpu->arch.fault_esr;
@@ -284,29 +300,27 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                 r = RESUME_GUEST;
                 break;
  
-       /* XXX move to a 440-specific file. */
         case BOOKE_INTERRUPT_DTLB_MISS: {
-               struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-               struct kvmppc_44x_tlbe *gtlbe;
                 unsigned long eaddr = vcpu->arch.fault_dear;
                 int gtlb_index;
+               gpa_t gpaddr;
                 gfn_t gfn;
  
                 /* Check the guest TLB. */
-               gtlb_index = kvmppc_44x_dtlb_index(vcpu, eaddr);
+               gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr);
                 if (gtlb_index < 0) {
                         /* The guest didn't have a mapping for it. */
                         kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
                         vcpu->arch.dear = vcpu->arch.fault_dear;
                         vcpu->arch.esr = vcpu->arch.fault_esr;
+                       kvmppc_mmu_dtlb_miss(vcpu);
                         kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS);
                         r = RESUME_GUEST;
                         break;
                 }
  
-               gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
-               vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr);
-               gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT;
+               gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr);
+               gfn = gpaddr >> PAGE_SHIFT;
  
                 if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
                         /* The guest TLB had a mapping, but the shadow TLB
@@ -315,13 +329,13 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                          * b) the guest used a large mapping which we're faking
                          * Either way, we need to satisfy the fault without
                          * invoking the guest. */
-                       kvmppc_mmu_map(vcpu, eaddr, vcpu->arch.paddr_accessed, gtlbe->tid,
-                                      gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
+                       kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index);
                         kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS);
                         r = RESUME_GUEST;
                 } else {
                         /* Guest has mapped and accessed a page which is not
                          * actually RAM. */
+                       vcpu->arch.paddr_accessed = gpaddr;
                         r = kvmppc_emulate_mmio(run, vcpu);
                         kvmppc_account_exit(vcpu, MMIO_EXITS);
                 }
@@ -329,10 +343,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                 break;
         }
  
-       /* XXX move to a 440-specific file. */
         case BOOKE_INTERRUPT_ITLB_MISS: {
-               struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-               struct kvmppc_44x_tlbe *gtlbe;
                 unsigned long eaddr = vcpu->arch.pc;
                 gpa_t gpaddr;
                 gfn_t gfn;
@@ -341,18 +352,18 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                 r = RESUME_GUEST;
  
                 /* Check the guest TLB. */
-               gtlb_index = kvmppc_44x_itlb_index(vcpu, eaddr);
+               gtlb_index = kvmppc_mmu_itlb_index(vcpu, eaddr);
                 if (gtlb_index < 0) {
                         /* The guest didn't have a mapping for it. */
                         kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS);
+                       kvmppc_mmu_itlb_miss(vcpu);
                         kvmppc_account_exit(vcpu, ITLB_REAL_MISS_EXITS);
                         break;
                 }
  
                 kvmppc_account_exit(vcpu, ITLB_VIRT_MISS_EXITS);
  
-               gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
-               gpaddr = tlb_xlate(gtlbe, eaddr);
+               gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr);
                 gfn = gpaddr >> PAGE_SHIFT;
  
                 if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
@@ -362,8 +373,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                          * b) the guest used a large mapping which we're faking
                          * Either way, we need to satisfy the fault without
                          * invoking the guest. */
-                       kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlbe->tid,
-                                      gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
+                       kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index);
                 } else {
                         /* Guest mapped and leaped at non-RAM! */
                         kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK);
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h

index cf7c94ca24bfacb346dca221e64dcb85496fc9e9..d59bcca1f9d8af1fa962222043a3955c68e2fe06 100644 (file)
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -22,6 +22,7 @@
  
  #include <linux/types.h>
  #include <linux/kvm_host.h>
+#include <asm/kvm_ppc.h>
  #include "timing.h"
  
  /* interrupt priortity ordering */
@@ -30,17 +31,24 @@
  #define BOOKE_IRQPRIO_ALIGNMENT 2
  #define BOOKE_IRQPRIO_PROGRAM 3
  #define BOOKE_IRQPRIO_FP_UNAVAIL 4
-#define BOOKE_IRQPRIO_SYSCALL 5
-#define BOOKE_IRQPRIO_AP_UNAVAIL 6
-#define BOOKE_IRQPRIO_DTLB_MISS 7
-#define BOOKE_IRQPRIO_ITLB_MISS 8
-#define BOOKE_IRQPRIO_MACHINE_CHECK 9
-#define BOOKE_IRQPRIO_DEBUG 10
-#define BOOKE_IRQPRIO_CRITICAL 11
-#define BOOKE_IRQPRIO_WATCHDOG 12
-#define BOOKE_IRQPRIO_EXTERNAL 13
-#define BOOKE_IRQPRIO_FIT 14
-#define BOOKE_IRQPRIO_DECREMENTER 15
+#define BOOKE_IRQPRIO_SPE_UNAVAIL 5
+#define BOOKE_IRQPRIO_SPE_FP_DATA 6
+#define BOOKE_IRQPRIO_SPE_FP_ROUND 7
+#define BOOKE_IRQPRIO_SYSCALL 8
+#define BOOKE_IRQPRIO_AP_UNAVAIL 9
+#define BOOKE_IRQPRIO_DTLB_MISS 10
+#define BOOKE_IRQPRIO_ITLB_MISS 11
+#define BOOKE_IRQPRIO_MACHINE_CHECK 12
+#define BOOKE_IRQPRIO_DEBUG 13
+#define BOOKE_IRQPRIO_CRITICAL 14
+#define BOOKE_IRQPRIO_WATCHDOG 15
+#define BOOKE_IRQPRIO_EXTERNAL 16
+#define BOOKE_IRQPRIO_FIT 17
+#define BOOKE_IRQPRIO_DECREMENTER 18
+#define BOOKE_IRQPRIO_PERFORMANCE_MONITOR 19
+#define BOOKE_IRQPRIO_MAX 19
+
+extern unsigned long kvmppc_booke_handlers;
  
  /* Helper function for "full" MSR writes. No need to call this if only EE is
   * changing. */
@@ -57,4 +65,9 @@ static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
         };
  }
  
+int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                            unsigned int inst, int *advance);
+int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt);
+int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs);
+
  #endif /* __KVM_BOOKE_H__ */
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c

new file mode 100644 (file)

index 0000000..aebc65e
--- /dev/null
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -0,0 +1,266 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/disassemble.h>
+
+#include "booke.h"
+
+#define OP_19_XOP_RFI     50
+
+#define OP_31_XOP_MFMSR   83
+#define OP_31_XOP_WRTEE   131
+#define OP_31_XOP_MTMSR   146
+#define OP_31_XOP_WRTEEI  163
+
+static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
+{
+       vcpu->arch.pc = vcpu->arch.srr0;
+       kvmppc_set_msr(vcpu, vcpu->arch.srr1);
+}
+
+int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                            unsigned int inst, int *advance)
+{
+       int emulated = EMULATE_DONE;
+       int rs;
+       int rt;
+
+       switch (get_op(inst)) {
+       case 19:
+               switch (get_xop(inst)) {
+               case OP_19_XOP_RFI:
+                       kvmppc_emul_rfi(vcpu);
+                       kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS);
+                       *advance = 0;
+                       break;
+
+               default:
+                       emulated = EMULATE_FAIL;
+                       break;
+               }
+               break;
+
+       case 31:
+               switch (get_xop(inst)) {
+
+               case OP_31_XOP_MFMSR:
+                       rt = get_rt(inst);
+                       vcpu->arch.gpr[rt] = vcpu->arch.msr;
+                       kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
+                       break;
+
+               case OP_31_XOP_MTMSR:
+                       rs = get_rs(inst);
+                       kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
+                       kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
+                       break;
+
+               case OP_31_XOP_WRTEE:
+                       rs = get_rs(inst);
+                       vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
+                                                        | (vcpu->arch.gpr[rs] & MSR_EE);
+                       kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
+                       break;
+
+               case OP_31_XOP_WRTEEI:
+                       vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
+                                                        | (inst & MSR_EE);
+                       kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
+                       break;
+
+               default:
+                       emulated = EMULATE_FAIL;
+               }
+
+               break;
+
+       default:
+               emulated = EMULATE_FAIL;
+       }
+
+       return emulated;
+}
+
+int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+{
+       int emulated = EMULATE_DONE;
+
+       switch (sprn) {
+       case SPRN_DEAR:
+               vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
+       case SPRN_ESR:
+               vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
+       case SPRN_DBCR0:
+               vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
+       case SPRN_DBCR1:
+               vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
+       case SPRN_DBSR:
+               vcpu->arch.dbsr &= ~vcpu->arch.gpr[rs]; break;
+       case SPRN_TSR:
+               vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
+       case SPRN_TCR:
+               vcpu->arch.tcr = vcpu->arch.gpr[rs];
+               kvmppc_emulate_dec(vcpu);
+               break;
+
+       /* Note: SPRG4-7 are user-readable. These values are
+        * loaded into the real SPRGs when resuming the
+        * guest. */
+       case SPRN_SPRG4:
+               vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
+       case SPRN_SPRG5:
+               vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
+       case SPRN_SPRG6:
+               vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
+       case SPRN_SPRG7:
+               vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
+
+       case SPRN_IVPR:
+               vcpu->arch.ivpr = vcpu->arch.gpr[rs];
+               break;
+       case SPRN_IVOR0:
+               vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs];
+               break;
+       case SPRN_IVOR1:
+               vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs];
+               break;
+       case SPRN_IVOR2:
+               vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs];
+               break;
+       case SPRN_IVOR3:
+               vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs];
+               break;
+       case SPRN_IVOR4:
+               vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs];
+               break;
+       case SPRN_IVOR5:
+               vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs];
+               break;
+       case SPRN_IVOR6:
+               vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs];
+               break;
+       case SPRN_IVOR7:
+               vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs];
+               break;
+       case SPRN_IVOR8:
+               vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs];
+               break;
+       case SPRN_IVOR9:
+               vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs];
+               break;
+       case SPRN_IVOR10:
+               vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs];
+               break;
+       case SPRN_IVOR11:
+               vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs];
+               break;
+       case SPRN_IVOR12:
+               vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs];
+               break;
+       case SPRN_IVOR13:
+               vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs];
+               break;
+       case SPRN_IVOR14:
+               vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs];
+               break;
+       case SPRN_IVOR15:
+               vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs];
+               break;
+
+       default:
+               emulated = EMULATE_FAIL;
+       }
+
+       return emulated;
+}
+
+int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+{
+       int emulated = EMULATE_DONE;
+
+       switch (sprn) {
+       case SPRN_IVPR:
+               vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
+       case SPRN_DEAR:
+               vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
+       case SPRN_ESR:
+               vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
+       case SPRN_DBCR0:
+               vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
+       case SPRN_DBCR1:
+               vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
+       case SPRN_DBSR:
+               vcpu->arch.gpr[rt] = vcpu->arch.dbsr; break;
+
+       case SPRN_IVOR0:
+               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
+               break;
+       case SPRN_IVOR1:
+               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
+               break;
+       case SPRN_IVOR2:
+               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
+               break;
+       case SPRN_IVOR3:
+               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
+               break;
+       case SPRN_IVOR4:
+               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
+               break;
+       case SPRN_IVOR5:
+               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
+               break;
+       case SPRN_IVOR6:
+               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
+               break;
+       case SPRN_IVOR7:
+               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
+               break;
+       case SPRN_IVOR8:
+               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
+               break;
+       case SPRN_IVOR9:
+               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
+               break;
+       case SPRN_IVOR10:
+               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
+               break;
+       case SPRN_IVOR11:
+               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
+               break;
+       case SPRN_IVOR12:
+               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
+               break;
+       case SPRN_IVOR13:
+               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
+               break;
+       case SPRN_IVOR14:
+               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
+               break;
+       case SPRN_IVOR15:
+               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
+               break;
+
+       default:
+               emulated = EMULATE_FAIL;
+       }
+
+       return emulated;
+}
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S

index 084ebcd7dd83f416136d323f998beb52cc303709..d0c6f841bbd10ddb4b73ef5c7609ee0726004e18 100644 (file)
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -86,6 +86,9 @@ KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG
  KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS
  KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS
  KVM_HANDLER BOOKE_INTERRUPT_DEBUG
+KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL
+KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA
+KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND
  
  _GLOBAL(kvmppc_handler_len)
         .long kvmppc_handler_1 - kvmppc_handler_0
@@ -347,7 +350,9 @@ lightweight_exit:
         lwz     r3, VCPU_SHADOW_PID(r4)
         mtspr   SPRN_PID, r3
  
+#ifdef CONFIG_44x
         iccci   0, 0 /* XXX hack */
+#endif
  
         /* Load some guest volatiles. */
         lwz     r0, VCPU_GPR(r0)(r4)
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c

new file mode 100644 (file)

index 0000000..d8067fd
--- /dev/null
+++ b/arch/powerpc/kvm/e500.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu, <yu.liu@freescale.com>
+ *
+ * Description:
+ * This file is derived from arch/powerpc/kvm/44x.c,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+
+#include <asm/reg.h>
+#include <asm/cputable.h>
+#include <asm/tlbflush.h>
+#include <asm/kvm_e500.h>
+#include <asm/kvm_ppc.h>
+
+#include "booke.h"
+#include "e500_tlb.h"
+
+void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+       kvmppc_e500_tlb_load(vcpu, cpu);
+}
+
+void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+{
+       kvmppc_e500_tlb_put(vcpu);
+}
+
+int kvmppc_core_check_processor_compat(void)
+{
+       int r;
+
+       if (strcmp(cur_cpu_spec->cpu_name, "e500v2") == 0)
+               r = 0;
+       else
+               r = -ENOTSUPP;
+
+       return r;
+}
+
+int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+       struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+       kvmppc_e500_tlb_setup(vcpu_e500);
+
+       /* Use the same core vertion as host's */
+       vcpu->arch.pvr = mfspr(SPRN_PVR);
+
+       return 0;
+}
+
+/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
+int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
+                               struct kvm_translation *tr)
+{
+       int index;
+       gva_t eaddr;
+       u8 pid;
+       u8 as;
+
+       eaddr = tr->linear_address;
+       pid = (tr->linear_address >> 32) & 0xff;
+       as = (tr->linear_address >> 40) & 0x1;
+
+       index = kvmppc_e500_tlb_search(vcpu, eaddr, pid, as);
+       if (index < 0) {
+               tr->valid = 0;
+               return 0;
+       }
+
+       tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr);
+       /* XXX what does "writeable" and "usermode" even mean? */
+       tr->valid = 1;
+
+       return 0;
+}
+
+struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+       struct kvmppc_vcpu_e500 *vcpu_e500;
+       struct kvm_vcpu *vcpu;
+       int err;
+
+       vcpu_e500 = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+       if (!vcpu_e500) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       vcpu = &vcpu_e500->vcpu;
+       err = kvm_vcpu_init(vcpu, kvm, id);
+       if (err)
+               goto free_vcpu;
+
+       err = kvmppc_e500_tlb_init(vcpu_e500);
+       if (err)
+               goto uninit_vcpu;
+
+       return vcpu;
+
+uninit_vcpu:
+       kvm_vcpu_uninit(vcpu);
+free_vcpu:
+       kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
+out:
+       return ERR_PTR(err);
+}
+
+void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+{
+       struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+       kvmppc_e500_tlb_uninit(vcpu_e500);
+       kvm_vcpu_uninit(vcpu);
+       kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
+}
+
+static int kvmppc_e500_init(void)
+{
+       int r, i;
+       unsigned long ivor[3];
+       unsigned long max_ivor = 0;
+
+       r = kvmppc_booke_init();
+       if (r)
+               return r;
+
+       /* copy extra E500 exception handlers */
+       ivor[0] = mfspr(SPRN_IVOR32);
+       ivor[1] = mfspr(SPRN_IVOR33);
+       ivor[2] = mfspr(SPRN_IVOR34);
+       for (i = 0; i < 3; i++) {
+               if (ivor[i] > max_ivor)
+                       max_ivor = ivor[i];
+
+               memcpy((void *)kvmppc_booke_handlers + ivor[i],
+                      kvmppc_handlers_start + (i + 16) * kvmppc_handler_len,
+                      kvmppc_handler_len);
+       }
+       flush_icache_range(kvmppc_booke_handlers,
+                       kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
+
+       return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), THIS_MODULE);
+}
+
+static void kvmppc_e500_exit(void)
+{
+       kvmppc_booke_exit();
+}
+
+module_init(kvmppc_e500_init);
+module_exit(kvmppc_e500_exit);
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c

new file mode 100644 (file)

index 0000000..3f76041
--- /dev/null
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu, <yu.liu@freescale.com>
+ *
+ * Description:
+ * This file is derived from arch/powerpc/kvm/44x_emulate.c,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/kvm_ppc.h>
+#include <asm/disassemble.h>
+#include <asm/kvm_e500.h>
+
+#include "booke.h"
+#include "e500_tlb.h"
+
+#define XOP_TLBIVAX 786
+#define XOP_TLBSX   914
+#define XOP_TLBRE   946
+#define XOP_TLBWE   978
+
+int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                           unsigned int inst, int *advance)
+{
+       int emulated = EMULATE_DONE;
+       int ra;
+       int rb;
+
+       switch (get_op(inst)) {
+       case 31:
+               switch (get_xop(inst)) {
+
+               case XOP_TLBRE:
+                       emulated = kvmppc_e500_emul_tlbre(vcpu);
+                       break;
+
+               case XOP_TLBWE:
+                       emulated = kvmppc_e500_emul_tlbwe(vcpu);
+                       break;
+
+               case XOP_TLBSX:
+                       rb = get_rb(inst);
+                       emulated = kvmppc_e500_emul_tlbsx(vcpu,rb);
+                       break;
+
+               case XOP_TLBIVAX:
+                       ra = get_ra(inst);
+                       rb = get_rb(inst);
+                       emulated = kvmppc_e500_emul_tlbivax(vcpu, ra, rb);
+                       break;
+
+               default:
+                       emulated = EMULATE_FAIL;
+               }
+
+               break;
+
+       default:
+               emulated = EMULATE_FAIL;
+       }
+
+       if (emulated == EMULATE_FAIL)
+               emulated = kvmppc_booke_emulate_op(run, vcpu, inst, advance);
+
+       return emulated;
+}
+
+int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+{
+       struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+       int emulated = EMULATE_DONE;
+
+       switch (sprn) {
+       case SPRN_PID:
+               vcpu_e500->pid[0] = vcpu->arch.shadow_pid =
+                       vcpu->arch.pid = vcpu->arch.gpr[rs];
+               break;
+       case SPRN_PID1:
+               vcpu_e500->pid[1] = vcpu->arch.gpr[rs]; break;
+       case SPRN_PID2:
+               vcpu_e500->pid[2] = vcpu->arch.gpr[rs]; break;
+       case SPRN_MAS0:
+               vcpu_e500->mas0 = vcpu->arch.gpr[rs]; break;
+       case SPRN_MAS1:
+               vcpu_e500->mas1 = vcpu->arch.gpr[rs]; break;
+       case SPRN_MAS2:
+               vcpu_e500->mas2 = vcpu->arch.gpr[rs]; break;
+       case SPRN_MAS3:
+               vcpu_e500->mas3 = vcpu->arch.gpr[rs]; break;
+       case SPRN_MAS4:
+               vcpu_e500->mas4 = vcpu->arch.gpr[rs]; break;
+       case SPRN_MAS6:
+               vcpu_e500->mas6 = vcpu->arch.gpr[rs]; break;
+       case SPRN_MAS7:
+               vcpu_e500->mas7 = vcpu->arch.gpr[rs]; break;
+       case SPRN_L1CSR1:
+               vcpu_e500->l1csr1 = vcpu->arch.gpr[rs]; break;
+       case SPRN_HID0:
+               vcpu_e500->hid0 = vcpu->arch.gpr[rs]; break;
+       case SPRN_HID1:
+               vcpu_e500->hid1 = vcpu->arch.gpr[rs]; break;
+
+       case SPRN_MMUCSR0:
+               emulated = kvmppc_e500_emul_mt_mmucsr0(vcpu_e500,
+                               vcpu->arch.gpr[rs]);
+               break;
+
+       /* extra exceptions */
+       case SPRN_IVOR32:
+               vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = vcpu->arch.gpr[rs];
+               break;
+       case SPRN_IVOR33:
+               vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = vcpu->arch.gpr[rs];
+               break;
+       case SPRN_IVOR34:
+               vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = vcpu->arch.gpr[rs];
+               break;
+       case SPRN_IVOR35:
+               vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = vcpu->arch.gpr[rs];
+               break;
+
+       default:
+               emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs);
+       }
+
+       return emulated;
+}
+
+int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+{
+       struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+       int emulated = EMULATE_DONE;
+
+       switch (sprn) {
+       case SPRN_PID:
+               vcpu->arch.gpr[rt] = vcpu_e500->pid[0]; break;
+       case SPRN_PID1:
+               vcpu->arch.gpr[rt] = vcpu_e500->pid[1]; break;
+       case SPRN_PID2:
+               vcpu->arch.gpr[rt] = vcpu_e500->pid[2]; break;
+       case SPRN_MAS0:
+               vcpu->arch.gpr[rt] = vcpu_e500->mas0; break;
+       case SPRN_MAS1:
+               vcpu->arch.gpr[rt] = vcpu_e500->mas1; break;
+       case SPRN_MAS2:
+               vcpu->arch.gpr[rt] = vcpu_e500->mas2; break;
+       case SPRN_MAS3:
+               vcpu->arch.gpr[rt] = vcpu_e500->mas3; break;
+       case SPRN_MAS4:
+               vcpu->arch.gpr[rt] = vcpu_e500->mas4; break;
+       case SPRN_MAS6:
+               vcpu->arch.gpr[rt] = vcpu_e500->mas6; break;
+       case SPRN_MAS7:
+               vcpu->arch.gpr[rt] = vcpu_e500->mas7; break;
+
+       case SPRN_TLB0CFG:
+               vcpu->arch.gpr[rt] = mfspr(SPRN_TLB0CFG);
+               vcpu->arch.gpr[rt] &= ~0xfffUL;
+               vcpu->arch.gpr[rt] |= vcpu_e500->guest_tlb_size[0];
+               break;
+
+       case SPRN_TLB1CFG:
+               vcpu->arch.gpr[rt] = mfspr(SPRN_TLB1CFG);
+               vcpu->arch.gpr[rt] &= ~0xfffUL;
+               vcpu->arch.gpr[rt] |= vcpu_e500->guest_tlb_size[1];
+               break;
+
+       case SPRN_L1CSR1:
+               vcpu->arch.gpr[rt] = vcpu_e500->l1csr1; break;
+       case SPRN_HID0:
+               vcpu->arch.gpr[rt] = vcpu_e500->hid0; break;
+       case SPRN_HID1:
+               vcpu->arch.gpr[rt] = vcpu_e500->hid1; break;
+
+       case SPRN_MMUCSR0:
+               vcpu->arch.gpr[rt] = 0; break;
+
+       /* extra exceptions */
+       case SPRN_IVOR32:
+               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
+               break;
+       case SPRN_IVOR33:
+               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA];
+               break;
+       case SPRN_IVOR34:
+               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND];
+               break;
+       case SPRN_IVOR35:
+               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
+               break;
+       default:
+               emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
+       }
+
+       return emulated;
+}
+
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c

new file mode 100644 (file)

index 0000000..0e773fc
--- /dev/null
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -0,0 +1,757 @@
+/*
+ * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu, yu.liu@freescale.com
+ *
+ * Description:
+ * This file is based on arch/powerpc/kvm/44x_tlb.c,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/highmem.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_e500.h>
+
+#include "../mm/mmu_decl.h"
+#include "e500_tlb.h"
+
+#define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1)
+
+static unsigned int tlb1_entry_num;
+
+void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
+{
+       struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+       struct tlbe *tlbe;
+       int i, tlbsel;
+
+       printk("| %8s | %8s | %8s | %8s | %8s |\n",
+                       "nr", "mas1", "mas2", "mas3", "mas7");
+
+       for (tlbsel = 0; tlbsel < 2; tlbsel++) {
+               printk("Guest TLB%d:\n", tlbsel);
+               for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++) {
+                       tlbe = &vcpu_e500->guest_tlb[tlbsel][i];
+                       if (tlbe->mas1 & MAS1_VALID)
+                               printk(" G[%d][%3d] |  %08X | %08X | %08X | %08X |\n",
+                                       tlbsel, i, tlbe->mas1, tlbe->mas2,
+                                       tlbe->mas3, tlbe->mas7);
+               }
+       }
+
+       for (tlbsel = 0; tlbsel < 2; tlbsel++) {
+               printk("Shadow TLB%d:\n", tlbsel);
+               for (i = 0; i < vcpu_e500->shadow_tlb_size[tlbsel]; i++) {
+                       tlbe = &vcpu_e500->shadow_tlb[tlbsel][i];
+                       if (tlbe->mas1 & MAS1_VALID)
+                               printk(" S[%d][%3d] |  %08X | %08X | %08X | %08X |\n",
+                                       tlbsel, i, tlbe->mas1, tlbe->mas2,
+                                       tlbe->mas3, tlbe->mas7);
+               }
+       }
+}
+
+static inline unsigned int tlb0_get_next_victim(
+               struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+       unsigned int victim;
+
+       victim = vcpu_e500->guest_tlb_nv[0]++;
+       if (unlikely(vcpu_e500->guest_tlb_nv[0] >= KVM_E500_TLB0_WAY_NUM))
+               vcpu_e500->guest_tlb_nv[0] = 0;
+
+       return victim;
+}
+
+static inline unsigned int tlb1_max_shadow_size(void)
+{
+       return tlb1_entry_num - tlbcam_index;
+}
+
+static inline int tlbe_is_writable(struct tlbe *tlbe)
+{
+       return tlbe->mas3 & (MAS3_SW|MAS3_UW);
+}
+
+static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
+{
+       /* Mask off reserved bits. */
+       mas3 &= MAS3_ATTRIB_MASK;
+
+       if (!usermode) {
+               /* Guest is in supervisor mode,
+                * so we need to translate guest
+                * supervisor permissions into user permissions. */
+               mas3 &= ~E500_TLB_USER_PERM_MASK;
+               mas3 |= (mas3 & E500_TLB_SUPER_PERM_MASK) << 1;
+       }
+
+       return mas3 | E500_TLB_SUPER_PERM_MASK;
+}
+
+static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
+{
+#ifdef CONFIG_SMP
+       return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
+#else
+       return mas2 & MAS2_ATTRIB_MASK;
+#endif
+}
+
+/*
+ * writing shadow tlb entry to host TLB
+ */
+static inline void __write_host_tlbe(struct tlbe *stlbe)
+{
+       mtspr(SPRN_MAS1, stlbe->mas1);
+       mtspr(SPRN_MAS2, stlbe->mas2);
+       mtspr(SPRN_MAS3, stlbe->mas3);
+       mtspr(SPRN_MAS7, stlbe->mas7);
+       __asm__ __volatile__ ("tlbwe\n" : : );
+}
+
+static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
+               int tlbsel, int esel)
+{
+       struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
+
+       local_irq_disable();
+       if (tlbsel == 0) {
+               __write_host_tlbe(stlbe);
+       } else {
+               unsigned register mas0;
+
+               mas0 = mfspr(SPRN_MAS0);
+
+               mtspr(SPRN_MAS0, MAS0_TLBSEL(1) | MAS0_ESEL(to_htlb1_esel(esel)));
+               __write_host_tlbe(stlbe);
+
+               mtspr(SPRN_MAS0, mas0);
+       }
+       local_irq_enable();
+}
+
+void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu)
+{
+       struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+       int i;
+       unsigned register mas0;
+
+       /* Load all valid TLB1 entries to reduce guest tlb miss fault */
+       local_irq_disable();
+       mas0 = mfspr(SPRN_MAS0);
+       for (i = 0; i < tlb1_max_shadow_size(); i++) {
+               struct tlbe *stlbe = &vcpu_e500->shadow_tlb[1][i];
+
+               if (get_tlb_v(stlbe)) {
+                       mtspr(SPRN_MAS0, MAS0_TLBSEL(1)
+                                       | MAS0_ESEL(to_htlb1_esel(i)));
+                       __write_host_tlbe(stlbe);
+               }
+       }
+       mtspr(SPRN_MAS0, mas0);
+       local_irq_enable();
+}
+
+void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu)
+{
+       _tlbil_all();
+}
+
+/* Search the guest TLB for a matching entry. */
+static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
+               gva_t eaddr, int tlbsel, unsigned int pid, int as)
+{
+       int i;
+
+       /* XXX Replace loop with fancy data structures. */
+       for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++) {
+               struct tlbe *tlbe = &vcpu_e500->guest_tlb[tlbsel][i];
+               unsigned int tid;
+
+               if (eaddr < get_tlb_eaddr(tlbe))
+                       continue;
+
+               if (eaddr > get_tlb_end(tlbe))
+                       continue;
+
+               tid = get_tlb_tid(tlbe);
+               if (tid && (tid != pid))
+                       continue;
+
+               if (!get_tlb_v(tlbe))
+                       continue;
+
+               if (get_tlb_ts(tlbe) != as && as != -1)
+                       continue;
+
+               return i;
+       }
+
+       return -1;
+}
+
+static void kvmppc_e500_shadow_release(struct kvmppc_vcpu_e500 *vcpu_e500,
+               int tlbsel, int esel)
+{
+       struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
+       struct page *page = vcpu_e500->shadow_pages[tlbsel][esel];
+
+       if (page) {
+               vcpu_e500->shadow_pages[tlbsel][esel] = NULL;
+
+               if (get_tlb_v(stlbe)) {
+                       if (tlbe_is_writable(stlbe))
+                               kvm_release_page_dirty(page);
+                       else
+                               kvm_release_page_clean(page);
+               }
+       }
+}
+
+static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
+               int tlbsel, int esel)
+{
+       struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
+
+       kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel);
+       stlbe->mas1 = 0;
+       KVMTRACE_5D(STLB_INVAL, &vcpu_e500->vcpu, index_of(tlbsel, esel),
+                       stlbe->mas1, stlbe->mas2, stlbe->mas3, stlbe->mas7,
+                       handler);
+}
+
+static void kvmppc_e500_tlb1_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
+               gva_t eaddr, gva_t eend, u32 tid)
+{
+       unsigned int pid = tid & 0xff;
+       unsigned int i;
+
+       /* XXX Replace loop with fancy data structures. */
+       for (i = 0; i < vcpu_e500->guest_tlb_size[1]; i++) {
+               struct tlbe *stlbe = &vcpu_e500->shadow_tlb[1][i];
+               unsigned int tid;
+
+               if (!get_tlb_v(stlbe))
+                       continue;
+
+               if (eend < get_tlb_eaddr(stlbe))
+                       continue;
+
+               if (eaddr > get_tlb_end(stlbe))
+                       continue;
+
+               tid = get_tlb_tid(stlbe);
+               if (tid && (tid != pid))
+                       continue;
+
+               kvmppc_e500_stlbe_invalidate(vcpu_e500, 1, i);
+               write_host_tlbe(vcpu_e500, 1, i);
+       }
+}
+
+static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
+               unsigned int eaddr, int as)
+{
+       struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+       unsigned int victim, pidsel, tsized;
+       int tlbsel;
+
+       /* since we only have two TLBs, only lower bit is used. */
+       tlbsel = (vcpu_e500->mas4 >> 28) & 0x1;
+       victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0;
+       pidsel = (vcpu_e500->mas4 >> 16) & 0xf;
+       tsized = (vcpu_e500->mas4 >> 8) & 0xf;
+
+       vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim)
+               | MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
+       vcpu_e500->mas1 = MAS1_VALID | (as ? MAS1_TS : 0)
+               | MAS1_TID(vcpu_e500->pid[pidsel])
+               | MAS1_TSIZE(tsized);
+       vcpu_e500->mas2 = (eaddr & MAS2_EPN)
+               | (vcpu_e500->mas4 & MAS2_ATTRIB_MASK);
+       vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
+       vcpu_e500->mas6 = (vcpu_e500->mas6 & MAS6_SPID1)
+               | (get_cur_pid(vcpu) << 16)
+               | (as ? MAS6_SAS : 0);
+       vcpu_e500->mas7 = 0;
+}
+
+static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
+       u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int esel)
+{
+       struct page *new_page;
+       struct tlbe *stlbe;
+       hpa_t hpaddr;
+
+       stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
+
+       /* Get reference to new page. */
+       new_page = gfn_to_page(vcpu_e500->vcpu.kvm, gfn);
+       if (is_error_page(new_page)) {
+               printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);
+               kvm_release_page_clean(new_page);
+               return;
+       }
+       hpaddr = page_to_phys(new_page);
+
+       /* Drop reference to old page. */
+       kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel);
+
+       vcpu_e500->shadow_pages[tlbsel][esel] = new_page;
+
+       /* Force TS=1 IPROT=0 TSIZE=4KB for all guest mappings. */
+       stlbe->mas1 = MAS1_TSIZE(BOOKE_PAGESZ_4K)
+               | MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID;
+       stlbe->mas2 = (gvaddr & MAS2_EPN)
+               | e500_shadow_mas2_attrib(gtlbe->mas2,
+                               vcpu_e500->vcpu.arch.msr & MSR_PR);
+       stlbe->mas3 = (hpaddr & MAS3_RPN)
+               | e500_shadow_mas3_attrib(gtlbe->mas3,
+                               vcpu_e500->vcpu.arch.msr & MSR_PR);
+       stlbe->mas7 = (hpaddr >> 32) & MAS7_RPN;
+
+       KVMTRACE_5D(STLB_WRITE, &vcpu_e500->vcpu, index_of(tlbsel, esel),
+                       stlbe->mas1, stlbe->mas2, stlbe->mas3, stlbe->mas7,
+                       handler);
+}
+
+/* XXX only map the one-one case, for now use TLB0 */
+static int kvmppc_e500_stlbe_map(struct kvmppc_vcpu_e500 *vcpu_e500,
+               int tlbsel, int esel)
+{
+       struct tlbe *gtlbe;
+
+       gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
+
+       kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe),
+                       get_tlb_raddr(gtlbe) >> PAGE_SHIFT,
+                       gtlbe, tlbsel, esel);
+
+       return esel;
+}
+
+/* Caller must ensure that the specified guest TLB entry is safe to insert into
+ * the shadow TLB. */
+/* XXX for both one-one and one-to-many , for now use TLB1 */
+static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
+               u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe)
+{
+       unsigned int victim;
+
+       victim = vcpu_e500->guest_tlb_nv[1]++;
+
+       if (unlikely(vcpu_e500->guest_tlb_nv[1] >= tlb1_max_shadow_size()))
+               vcpu_e500->guest_tlb_nv[1] = 0;
+
+       kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, victim);
+
+       return victim;
+}
+
+/* Invalidate all guest kernel mappings when enter usermode,
+ * so that when they fault back in they will get the
+ * proper permission bits. */
+void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
+{
+       if (usermode) {
+               struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+               int i;
+
+               /* XXX Replace loop with fancy data structures. */
+               for (i = 0; i < tlb1_max_shadow_size(); i++)
+                       kvmppc_e500_stlbe_invalidate(vcpu_e500, 1, i);
+
+               _tlbil_all();
+       }
+}
+
+static int kvmppc_e500_gtlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
+               int tlbsel, int esel)
+{
+       struct tlbe *gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
+
+       if (unlikely(get_tlb_iprot(gtlbe)))
+               return -1;
+
+       if (tlbsel == 1) {
+               kvmppc_e500_tlb1_invalidate(vcpu_e500, get_tlb_eaddr(gtlbe),
+                               get_tlb_end(gtlbe),
+                               get_tlb_tid(gtlbe));
+       } else {
+               kvmppc_e500_stlbe_invalidate(vcpu_e500, tlbsel, esel);
+       }
+
+       gtlbe->mas1 = 0;
+
+       return 0;
+}
+
+int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value)
+{
+       int esel;
+
+       if (value & MMUCSR0_TLB0FI)
+               for (esel = 0; esel < vcpu_e500->guest_tlb_size[0]; esel++)
+                       kvmppc_e500_gtlbe_invalidate(vcpu_e500, 0, esel);
+       if (value & MMUCSR0_TLB1FI)
+               for (esel = 0; esel < vcpu_e500->guest_tlb_size[1]; esel++)
+                       kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel);
+
+       _tlbil_all();
+
+       return EMULATE_DONE;
+}
+
+int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
+{
+       struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+       unsigned int ia;
+       int esel, tlbsel;
+       gva_t ea;
+
+       ea = ((ra) ? vcpu->arch.gpr[ra] : 0) + vcpu->arch.gpr[rb];
+
+       ia = (ea >> 2) & 0x1;
+
+       /* since we only have two TLBs, only lower bit is used. */
+       tlbsel = (ea >> 3) & 0x1;
+
+       if (ia) {
+               /* invalidate all entries */
+               for (esel = 0; esel < vcpu_e500->guest_tlb_size[tlbsel]; esel++)
+                       kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
+       } else {
+               ea &= 0xfffff000;
+               esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel,
+                               get_cur_pid(vcpu), -1);
+               if (esel >= 0)
+                       kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
+       }
+
+       _tlbil_all();
+
+       return EMULATE_DONE;
+}
+
+int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu)
+{
+       struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+       int tlbsel, esel;
+       struct tlbe *gtlbe;
+
+       tlbsel = get_tlb_tlbsel(vcpu_e500);
+       esel = get_tlb_esel(vcpu_e500, tlbsel);
+
+       gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
+       vcpu_e500->mas0 &= ~MAS0_NV(~0);
+       vcpu_e500->mas0 |= MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
+       vcpu_e500->mas1 = gtlbe->mas1;
+       vcpu_e500->mas2 = gtlbe->mas2;
+       vcpu_e500->mas3 = gtlbe->mas3;
+       vcpu_e500->mas7 = gtlbe->mas7;
+
+       return EMULATE_DONE;
+}
+
+int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
+{
+       struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+       int as = !!get_cur_sas(vcpu_e500);
+       unsigned int pid = get_cur_spid(vcpu_e500);
+       int esel, tlbsel;
+       struct tlbe *gtlbe = NULL;
+       gva_t ea;
+
+       ea = vcpu->arch.gpr[rb];
+
+       for (tlbsel = 0; tlbsel < 2; tlbsel++) {
+               esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as);
+               if (esel >= 0) {
+                       gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
+                       break;
+               }
+       }
+
+       if (gtlbe) {
+               vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(esel)
+                       | MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
+               vcpu_e500->mas1 = gtlbe->mas1;
+               vcpu_e500->mas2 = gtlbe->mas2;
+               vcpu_e500->mas3 = gtlbe->mas3;
+               vcpu_e500->mas7 = gtlbe->mas7;
+       } else {
+               int victim;
+
+               /* since we only have two TLBs, only lower bit is used. */
+               tlbsel = vcpu_e500->mas4 >> 28 & 0x1;
+               victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0;
+
+               vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim)
+                       | MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
+               vcpu_e500->mas1 = (vcpu_e500->mas6 & MAS6_SPID0)
+                       | (vcpu_e500->mas6 & (MAS6_SAS ? MAS1_TS : 0))
+                       | (vcpu_e500->mas4 & MAS4_TSIZED(~0));
+               vcpu_e500->mas2 &= MAS2_EPN;
+               vcpu_e500->mas2 |= vcpu_e500->mas4 & MAS2_ATTRIB_MASK;
+               vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
+               vcpu_e500->mas7 = 0;
+       }
+
+       return EMULATE_DONE;
+}
+
+int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
+{
+       struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+       u64 eaddr;
+       u64 raddr;
+       u32 tid;
+       struct tlbe *gtlbe;
+       int tlbsel, esel, stlbsel, sesel;
+
+       tlbsel = get_tlb_tlbsel(vcpu_e500);
+       esel = get_tlb_esel(vcpu_e500, tlbsel);
+
+       gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
+
+       if (get_tlb_v(gtlbe) && tlbsel == 1) {
+               eaddr = get_tlb_eaddr(gtlbe);
+               tid = get_tlb_tid(gtlbe);
+               kvmppc_e500_tlb1_invalidate(vcpu_e500, eaddr,
+                               get_tlb_end(gtlbe), tid);
+       }
+
+       gtlbe->mas1 = vcpu_e500->mas1;
+       gtlbe->mas2 = vcpu_e500->mas2;
+       gtlbe->mas3 = vcpu_e500->mas3;
+       gtlbe->mas7 = vcpu_e500->mas7;
+
+       KVMTRACE_5D(GTLB_WRITE, vcpu, vcpu_e500->mas0,
+                       gtlbe->mas1, gtlbe->mas2, gtlbe->mas3, gtlbe->mas7,
+                       handler);
+
+       /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
+       if (tlbe_is_host_safe(vcpu, gtlbe)) {
+               switch (tlbsel) {
+               case 0:
+                       /* TLB0 */
+                       gtlbe->mas1 &= ~MAS1_TSIZE(~0);
+                       gtlbe->mas1 |= MAS1_TSIZE(BOOKE_PAGESZ_4K);
+
+                       stlbsel = 0;
+                       sesel = kvmppc_e500_stlbe_map(vcpu_e500, 0, esel);
+
+                       break;
+
+               case 1:
+                       /* TLB1 */
+                       eaddr = get_tlb_eaddr(gtlbe);
+                       raddr = get_tlb_raddr(gtlbe);
+
+                       /* Create a 4KB mapping on the host.
+                        * If the guest wanted a large page,
+                        * only the first 4KB is mapped here and the rest
+                        * are mapped on the fly. */
+                       stlbsel = 1;
+                       sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr,
+                                       raddr >> PAGE_SHIFT, gtlbe);
+                       break;
+
+               default:
+                       BUG();
+               }
+               write_host_tlbe(vcpu_e500, stlbsel, sesel);
+       }
+
+       return EMULATE_DONE;
+}
+
+int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
+{
+       unsigned int as = !!(vcpu->arch.msr & MSR_IS);
+
+       return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as);
+}
+
+int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
+{
+       unsigned int as = !!(vcpu->arch.msr & MSR_DS);
+
+       return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as);
+}
+
+void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu)
+{
+       unsigned int as = !!(vcpu->arch.msr & MSR_IS);
+
+       kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.pc, as);
+}
+
+void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu)
+{
+       unsigned int as = !!(vcpu->arch.msr & MSR_DS);
+
+       kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.fault_dear, as);
+}
+
+gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index,
+                       gva_t eaddr)
+{
+       struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+       struct tlbe *gtlbe =
+               &vcpu_e500->guest_tlb[tlbsel_of(index)][esel_of(index)];
+       u64 pgmask = get_tlb_bytes(gtlbe) - 1;
+
+       return get_tlb_raddr(gtlbe) | (eaddr & pgmask);
+}
+
+void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+{
+       struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+       int tlbsel, i;
+
+       for (tlbsel = 0; tlbsel < 2; tlbsel++)
+               for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++)
+                       kvmppc_e500_shadow_release(vcpu_e500, tlbsel, i);
+
+       /* discard all guest mapping */
+       _tlbil_all();
+}
+
+void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
+                       unsigned int index)
+{
+       struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+       int tlbsel = tlbsel_of(index);
+       int esel = esel_of(index);
+       int stlbsel, sesel;
+
+       switch (tlbsel) {
+       case 0:
+               stlbsel = 0;
+               sesel = esel;
+               break;
+
+       case 1: {
+               gfn_t gfn = gpaddr >> PAGE_SHIFT;
+               struct tlbe *gtlbe
+                       = &vcpu_e500->guest_tlb[tlbsel][esel];
+
+               stlbsel = 1;
+               sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, gtlbe);
+               break;
+       }
+
+       default:
+               BUG();
+               break;
+       }
+       write_host_tlbe(vcpu_e500, stlbsel, sesel);
+}
+
+int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
+                               gva_t eaddr, unsigned int pid, int as)
+{
+       struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+       int esel, tlbsel;
+
+       for (tlbsel = 0; tlbsel < 2; tlbsel++) {
+               esel = kvmppc_e500_tlb_index(vcpu_e500, eaddr, tlbsel, pid, as);
+               if (esel >= 0)
+                       return index_of(tlbsel, esel);
+       }
+
+       return -1;
+}
+
+void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+       struct tlbe *tlbe;
+
+       /* Insert large initial mapping for guest. */
+       tlbe = &vcpu_e500->guest_tlb[1][0];
+       tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOKE_PAGESZ_256M);
+       tlbe->mas2 = 0;
+       tlbe->mas3 = E500_TLB_SUPER_PERM_MASK;
+       tlbe->mas7 = 0;
+
+       /* 4K map for serial output. Used by kernel wrapper. */
+       tlbe = &vcpu_e500->guest_tlb[1][1];
+       tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOKE_PAGESZ_4K);
+       tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G;
+       tlbe->mas3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
+       tlbe->mas7 = 0;
+}
+
+int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+       tlb1_entry_num = mfspr(SPRN_TLB1CFG) & 0xFFF;
+
+       vcpu_e500->guest_tlb_size[0] = KVM_E500_TLB0_SIZE;
+       vcpu_e500->guest_tlb[0] =
+               kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
+       if (vcpu_e500->guest_tlb[0] == NULL)
+               goto err_out;
+
+       vcpu_e500->shadow_tlb_size[0] = KVM_E500_TLB0_SIZE;
+       vcpu_e500->shadow_tlb[0] =
+               kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
+       if (vcpu_e500->shadow_tlb[0] == NULL)
+               goto err_out_guest0;
+
+       vcpu_e500->guest_tlb_size[1] = KVM_E500_TLB1_SIZE;
+       vcpu_e500->guest_tlb[1] =
+               kzalloc(sizeof(struct tlbe) * KVM_E500_TLB1_SIZE, GFP_KERNEL);
+       if (vcpu_e500->guest_tlb[1] == NULL)
+               goto err_out_shadow0;
+
+       vcpu_e500->shadow_tlb_size[1] = tlb1_entry_num;
+       vcpu_e500->shadow_tlb[1] =
+               kzalloc(sizeof(struct tlbe) * tlb1_entry_num, GFP_KERNEL);
+       if (vcpu_e500->shadow_tlb[1] == NULL)
+               goto err_out_guest1;
+
+       vcpu_e500->shadow_pages[0] = (struct page **)
+               kzalloc(sizeof(struct page *) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
+       if (vcpu_e500->shadow_pages[0] == NULL)
+               goto err_out_shadow1;
+
+       vcpu_e500->shadow_pages[1] = (struct page **)
+               kzalloc(sizeof(struct page *) * tlb1_entry_num, GFP_KERNEL);
+       if (vcpu_e500->shadow_pages[1] == NULL)
+               goto err_out_page0;
+
+       return 0;
+
+err_out_page0:
+       kfree(vcpu_e500->shadow_pages[0]);
+err_out_shadow1:
+       kfree(vcpu_e500->shadow_tlb[1]);
+err_out_guest1:
+       kfree(vcpu_e500->guest_tlb[1]);
+err_out_shadow0:
+       kfree(vcpu_e500->shadow_tlb[0]);
+err_out_guest0:
+       kfree(vcpu_e500->guest_tlb[0]);
+err_out:
+       return -1;
+}
+
+void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+       kfree(vcpu_e500->shadow_pages[1]);
+       kfree(vcpu_e500->shadow_pages[0]);
+       kfree(vcpu_e500->shadow_tlb[1]);
+       kfree(vcpu_e500->guest_tlb[1]);
+       kfree(vcpu_e500->shadow_tlb[0]);
+       kfree(vcpu_e500->guest_tlb[0]);
+}
diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h

new file mode 100644 (file)

index 0000000..45b064b
--- /dev/null
+++ b/arch/powerpc/kvm/e500_tlb.h
@@ -0,0 +1,185 @@
+/*
+ * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu, yu.liu@freescale.com
+ *
+ * Description:
+ * This file is based on arch/powerpc/kvm/44x_tlb.h,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __KVM_E500_TLB_H__
+#define __KVM_E500_TLB_H__
+
+#include <linux/kvm_host.h>
+#include <asm/mmu-fsl-booke.h>
+#include <asm/tlb.h>
+#include <asm/kvm_e500.h>
+
+#define KVM_E500_TLB0_WAY_SIZE_BIT     7       /* Fixed */
+#define KVM_E500_TLB0_WAY_SIZE         (1UL << KVM_E500_TLB0_WAY_SIZE_BIT)
+#define KVM_E500_TLB0_WAY_SIZE_MASK    (KVM_E500_TLB0_WAY_SIZE - 1)
+
+#define KVM_E500_TLB0_WAY_NUM_BIT      1       /* No greater than 7 */
+#define KVM_E500_TLB0_WAY_NUM          (1UL << KVM_E500_TLB0_WAY_NUM_BIT)
+#define KVM_E500_TLB0_WAY_NUM_MASK     (KVM_E500_TLB0_WAY_NUM - 1)
+
+#define KVM_E500_TLB0_SIZE  (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM)
+#define KVM_E500_TLB1_SIZE  16
+
+#define index_of(tlbsel, esel) (((tlbsel) << 16) | ((esel) & 0xFFFF))
+#define tlbsel_of(index)       ((index) >> 16)
+#define esel_of(index)         ((index) & 0xFFFF)
+
+#define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW)
+#define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW)
+#define MAS2_ATTRIB_MASK \
+         (MAS2_X0 | MAS2_X1)
+#define MAS3_ATTRIB_MASK \
+         (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \
+          | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK)
+
+extern void kvmppc_dump_tlbs(struct kvm_vcpu *);
+extern int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *, ulong);
+extern int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *);
+extern int kvmppc_e500_emul_tlbre(struct kvm_vcpu *);
+extern int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *, int, int);
+extern int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *, int);
+extern int kvmppc_e500_tlb_search(struct kvm_vcpu *, gva_t, unsigned int, int);
+extern void kvmppc_e500_tlb_put(struct kvm_vcpu *);
+extern void kvmppc_e500_tlb_load(struct kvm_vcpu *, int);
+extern int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *);
+extern void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *);
+extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *);
+
+/* TLB helper functions */
+static inline unsigned int get_tlb_size(const struct tlbe *tlbe)
+{
+       return (tlbe->mas1 >> 8) & 0xf;
+}
+
+static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe)
+{
+       return tlbe->mas2 & 0xfffff000;
+}
+
+static inline u64 get_tlb_bytes(const struct tlbe *tlbe)
+{
+       unsigned int pgsize = get_tlb_size(tlbe);
+       return 1ULL << 10 << (pgsize << 1);
+}
+
+static inline gva_t get_tlb_end(const struct tlbe *tlbe)
+{
+       u64 bytes = get_tlb_bytes(tlbe);
+       return get_tlb_eaddr(tlbe) + bytes - 1;
+}
+
+static inline u64 get_tlb_raddr(const struct tlbe *tlbe)
+{
+       u64 rpn = tlbe->mas7;
+       return (rpn << 32) | (tlbe->mas3 & 0xfffff000);
+}
+
+static inline unsigned int get_tlb_tid(const struct tlbe *tlbe)
+{
+       return (tlbe->mas1 >> 16) & 0xff;
+}
+
+static inline unsigned int get_tlb_ts(const struct tlbe *tlbe)
+{
+       return (tlbe->mas1 >> 12) & 0x1;
+}
+
+static inline unsigned int get_tlb_v(const struct tlbe *tlbe)
+{
+       return (tlbe->mas1 >> 31) & 0x1;
+}
+
+static inline unsigned int get_tlb_iprot(const struct tlbe *tlbe)
+{
+       return (tlbe->mas1 >> 30) & 0x1;
+}
+
+static inline unsigned int get_cur_pid(struct kvm_vcpu *vcpu)
+{
+       return vcpu->arch.pid & 0xff;
+}
+
+static inline unsigned int get_cur_spid(
+               const struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+       return (vcpu_e500->mas6 >> 16) & 0xff;
+}
+
+static inline unsigned int get_cur_sas(
+               const struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+       return vcpu_e500->mas6 & 0x1;
+}
+
+static inline unsigned int get_tlb_tlbsel(
+               const struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+       /*
+        * Manual says that tlbsel has 2 bits wide.
+        * Since we only have two TLBs, only lower bit is used.
+        */
+       return (vcpu_e500->mas0 >> 28) & 0x1;
+}
+
+static inline unsigned int get_tlb_nv_bit(
+               const struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+       return vcpu_e500->mas0 & 0xfff;
+}
+
+static inline unsigned int get_tlb_esel_bit(
+               const struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+       return (vcpu_e500->mas0 >> 16) & 0xfff;
+}
+
+static inline unsigned int get_tlb_esel(
+               const struct kvmppc_vcpu_e500 *vcpu_e500,
+               int tlbsel)
+{
+       unsigned int esel = get_tlb_esel_bit(vcpu_e500);
+
+       if (tlbsel == 0) {
+               esel &= KVM_E500_TLB0_WAY_NUM_MASK;
+               esel |= ((vcpu_e500->mas2 >> 12) & KVM_E500_TLB0_WAY_SIZE_MASK)
+                               << KVM_E500_TLB0_WAY_NUM_BIT;
+       } else {
+               esel &= KVM_E500_TLB1_SIZE - 1;
+       }
+
+       return esel;
+}
+
+static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
+                       const struct tlbe *tlbe)
+{
+       gpa_t gpa;
+
+       if (!get_tlb_v(tlbe))
+               return 0;
+
+       /* Does it match current guest AS? */
+       /* XXX what about IS != DS? */
+       if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
+               return 0;
+
+       gpa = get_tlb_raddr(tlbe);
+       if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
+               /* Mapping is not for RAM. */
+               return 0;
+
+       return 1;
+}
+
+#endif /* __KVM_E500_TLB_H__ */
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c

index d1d38daa93fbf723490f09a38b2e5e6eca2dcfd1..a561d6e8da1c19b8c054967333db709f7595a28b 100644 (file)
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -30,6 +30,39 @@
  #include <asm/disassemble.h>
  #include "timing.h"
  
+#define OP_TRAP 3
+
+#define OP_31_XOP_LWZX      23
+#define OP_31_XOP_LBZX      87
+#define OP_31_XOP_STWX      151
+#define OP_31_XOP_STBX      215
+#define OP_31_XOP_STBUX     247
+#define OP_31_XOP_LHZX      279
+#define OP_31_XOP_LHZUX     311
+#define OP_31_XOP_MFSPR     339
+#define OP_31_XOP_STHX      407
+#define OP_31_XOP_STHUX     439
+#define OP_31_XOP_MTSPR     467
+#define OP_31_XOP_DCBI      470
+#define OP_31_XOP_LWBRX     534
+#define OP_31_XOP_TLBSYNC   566
+#define OP_31_XOP_STWBRX    662
+#define OP_31_XOP_LHBRX     790
+#define OP_31_XOP_STHBRX    918
+
+#define OP_LWZ  32
+#define OP_LWZU 33
+#define OP_LBZ  34
+#define OP_LBZU 35
+#define OP_STW  36
+#define OP_STWU 37
+#define OP_STB  38
+#define OP_STBU 39
+#define OP_LHZ  40
+#define OP_LHZU 41
+#define OP_STH  44
+#define OP_STHU 45
+
  void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
  {
         if (vcpu->arch.tcr & TCR_DIE) {
@@ -78,7 +111,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
         kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
  
         switch (get_op(inst)) {
-       case 3:                                             /* trap */
+       case OP_TRAP:
                 vcpu->arch.esr |= ESR_PTR;
                 kvmppc_core_queue_program(vcpu);
                 advance = 0;
@@ -87,31 +120,31 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
         case 31:
                 switch (get_xop(inst)) {
  
-               case 23:                                        /* lwzx */
+               case OP_31_XOP_LWZX:
                         rt = get_rt(inst);
                         emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
                         break;
  
-               case 87:                                        /* lbzx */
+               case OP_31_XOP_LBZX:
                         rt = get_rt(inst);
                         emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
                         break;
  
-               case 151:                                       /* stwx */
+               case OP_31_XOP_STWX:
                         rs = get_rs(inst);
                         emulated = kvmppc_handle_store(run, vcpu,
                                                        vcpu->arch.gpr[rs],
                                                        4, 1);
                         break;
  
-               case 215:                                       /* stbx */
+               case OP_31_XOP_STBX:
                         rs = get_rs(inst);
                         emulated = kvmppc_handle_store(run, vcpu,
                                                        vcpu->arch.gpr[rs],
                                                        1, 1);
                         break;
  
-               case 247:                                       /* stbux */
+               case OP_31_XOP_STBUX:
                         rs = get_rs(inst);
                         ra = get_ra(inst);
                         rb = get_rb(inst);
@@ -126,12 +159,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
                         vcpu->arch.gpr[rs] = ea;
                         break;
  
-               case 279:                                       /* lhzx */
+               case OP_31_XOP_LHZX:
                         rt = get_rt(inst);
                         emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
                         break;
  
-               case 311:                                       /* lhzux */
+               case OP_31_XOP_LHZUX:
                         rt = get_rt(inst);
                         ra = get_ra(inst);
                         rb = get_rb(inst);
@@ -144,7 +177,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
                         vcpu->arch.gpr[ra] = ea;
                         break;
  
-               case 339:                                       /* mfspr */
+               case OP_31_XOP_MFSPR:
                         sprn = get_sprn(inst);
                         rt = get_rt(inst);
  
@@ -185,7 +218,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
                         }
                         break;
  
-               case 407:                                       /* sthx */
+               case OP_31_XOP_STHX:
                         rs = get_rs(inst);
                         ra = get_ra(inst);
                         rb = get_rb(inst);
@@ -195,7 +228,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
                                                        2, 1);
                         break;
  
-               case 439:                                       /* sthux */
+               case OP_31_XOP_STHUX:
                         rs = get_rs(inst);
                         ra = get_ra(inst);
                         rb = get_rb(inst);
@@ -210,7 +243,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
                         vcpu->arch.gpr[ra] = ea;
                         break;
  
-               case 467:                                       /* mtspr */
+               case OP_31_XOP_MTSPR:
                         sprn = get_sprn(inst);
                         rs = get_rs(inst);
                         switch (sprn) {
@@ -246,7 +279,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
                         }
                         break;
  
-               case 470:                                       /* dcbi */
+               case OP_31_XOP_DCBI:
                         /* Do nothing. The guest is performing dcbi because
                          * hardware DMA is not snooped by the dcache, but
                          * emulated DMA either goes through the dcache as
@@ -254,15 +287,15 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
                          * coherence. */
                         break;
  
-               case 534:                                       /* lwbrx */
+               case OP_31_XOP_LWBRX:
                         rt = get_rt(inst);
                         emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0);
                         break;
  
-               case 566:                                       /* tlbsync */
+               case OP_31_XOP_TLBSYNC:
                         break;
  
-               case 662:                                       /* stwbrx */
+               case OP_31_XOP_STWBRX:
                         rs = get_rs(inst);
                         ra = get_ra(inst);
                         rb = get_rb(inst);
@@ -272,12 +305,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
                                                        4, 0);
                         break;
  
-               case 790:                                       /* lhbrx */
+               case OP_31_XOP_LHBRX:
                         rt = get_rt(inst);
                         emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0);
                         break;
  
-               case 918:                                       /* sthbrx */
+               case OP_31_XOP_STHBRX:
                         rs = get_rs(inst);
                         ra = get_ra(inst);
                         rb = get_rb(inst);
@@ -293,37 +326,37 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
                 }
                 break;
  
-       case 32:                                                /* lwz */
+       case OP_LWZ:
                 rt = get_rt(inst);
                 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
                 break;
  
-       case 33:                                                /* lwzu */
+       case OP_LWZU:
                 ra = get_ra(inst);
                 rt = get_rt(inst);
                 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
                 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
                 break;
  
-       case 34:                                                /* lbz */
+       case OP_LBZ:
                 rt = get_rt(inst);
                 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
                 break;
  
-       case 35:                                                /* lbzu */
+       case OP_LBZU:
                 ra = get_ra(inst);
                 rt = get_rt(inst);
                 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
                 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
                 break;
  
-       case 36:                                                /* stw */
+       case OP_STW:
                 rs = get_rs(inst);
                 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
                                                4, 1);
                 break;
  
-       case 37:                                                /* stwu */
+       case OP_STWU:
                 ra = get_ra(inst);
                 rs = get_rs(inst);
                 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
@@ -331,13 +364,13 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
                 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
                 break;
  
-       case 38:                                                /* stb */
+       case OP_STB:
                 rs = get_rs(inst);
                 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
                                                1, 1);
                 break;
  
-       case 39:                                                /* stbu */
+       case OP_STBU:
                 ra = get_ra(inst);
                 rs = get_rs(inst);
                 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
@@ -345,25 +378,25 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
                 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
                 break;
  
-       case 40:                                                /* lhz */
+       case OP_LHZ:
                 rt = get_rt(inst);
                 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
                 break;
  
-       case 41:                                                /* lhzu */
+       case OP_LHZU:
                 ra = get_ra(inst);
                 rt = get_rt(inst);
                 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
                 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
                 break;
  
-       case 44:                                                /* sth */
+       case OP_STH:
                 rs = get_rs(inst);
                 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
                                                2, 1);
                 break;
  
-       case 45:                                                /* sthu */
+       case OP_STHU:
                 ra = get_ra(inst);
                 rs = get_rs(inst);
                 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c

index 5f81256287f5967d1480e7abbbac2fbcab40ac9c..9057335fdc616ce607f76b2dc7e099965ce49e5e 100644 (file)
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -216,46 +216,23 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
  
  void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
  {
-       kvmppc_core_destroy_mmu(vcpu);
+       kvmppc_mmu_destroy(vcpu);
  }
  
  void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
  {
-       if (vcpu->guest_debug.enabled)
-               kvmppc_core_load_guest_debugstate(vcpu);
-
         kvmppc_core_vcpu_load(vcpu, cpu);
  }
  
  void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
  {
-       if (vcpu->guest_debug.enabled)
-               kvmppc_core_load_host_debugstate(vcpu);
-
-       /* Don't leave guest TLB entries resident when being de-scheduled. */
-       /* XXX It would be nice to differentiate between heavyweight exit and
-        * sched_out here, since we could avoid the TLB flush for heavyweight
-        * exits. */
-       _tlbil_all();
         kvmppc_core_vcpu_put(vcpu);
  }
  
-int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
-                                    struct kvm_debug_guest *dbg)
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+                                        struct kvm_guest_debug *dbg)
  {
-       int i;
-
-       vcpu->guest_debug.enabled = dbg->enabled;
-       if (vcpu->guest_debug.enabled) {
-               for (i=0; i < ARRAY_SIZE(vcpu->guest_debug.bp); i++) {
-                       if (dbg->breakpoints[i].enabled)
-                               vcpu->guest_debug.bp[i] = dbg->breakpoints[i].address;
-                       else
-                               vcpu->guest_debug.bp[i] = 0;
-               }
-       }
-
-       return 0;
+       return -EINVAL;
  }
  
  static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
diff --git a/arch/s390/include/asm/kvm.h b/arch/s390/include/asm/kvm.h

index e1f54654e3ae5f9abdbc6b01668a49a7d4b6af26..0b2f829f6d50569ff679394683ea1dbaeee7c385 100644 (file)
--- a/arch/s390/include/asm/kvm.h
+++ b/arch/s390/include/asm/kvm.h
@@ -42,4 +42,11 @@ struct kvm_fpu {
         __u64 fprs[16];
  };
  
+struct kvm_debug_exit_arch {
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+};
+
  #endif
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h

index 3c55e4107dcc1864f232aff79d7e8d6b726ca084..c6e674f5fca9f6eca174785dc599f040f926784b 100644 (file)
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -21,9 +21,6 @@
  /* memory slots that does not exposed to userspace */
  #define KVM_PRIVATE_MEM_SLOTS 4
  
-struct kvm_guest_debug {
-};
-
  struct sca_entry {
         atomic_t scn;
         __u64   reserved;
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig

index e051cad1f1e09d9f880b91f0d23b7738e10a5a57..3e260b7e37b2ce58578e0fbf6ce369b738de5df6 100644 (file)
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -4,6 +4,9 @@
  config HAVE_KVM
         bool
  
+config HAVE_KVM_IRQCHIP
+       bool
+
  menuconfig VIRTUALIZATION
         bool "Virtualization"
         default y
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c

index 61236102203e8746933ae96dab4876b421d901d6..9d19803111bab26d8021d3aaf2f0f028092f8c8b 100644 (file)
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -103,7 +103,7 @@ static int handle_lctl(struct kvm_vcpu *vcpu)
  static intercept_handler_t instruction_handlers[256] = {
         [0x83] = kvm_s390_handle_diag,
         [0xae] = kvm_s390_handle_sigp,
-       [0xb2] = kvm_s390_handle_priv,
+       [0xb2] = kvm_s390_handle_b2,
         [0xb7] = handle_lctl,
         [0xeb] = handle_lctlg,
  };
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c

index f4fe28a2521a10aa7e5d8c99d58c5e83603b631e..0189356fe2098cb3f6ddd61434da623b8a7c9cfa 100644 (file)
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -555,9 +555,14 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
                 VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)",
                            s390int->parm);
                 break;
+       case KVM_S390_SIGP_SET_PREFIX:
+               inti->prefix.address = s390int->parm;
+               inti->type = s390int->type;
+               VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)",
+                          s390int->parm);
+               break;
         case KVM_S390_SIGP_STOP:
         case KVM_S390_RESTART:
-       case KVM_S390_SIGP_SET_PREFIX:
         case KVM_S390_INT_EMERGENCY:
                 VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type);
                 inti->type = s390int->type;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c

index 0d33893e1e898448c05b7ec2573904a81850180b..cbfe91e101208273d789585595f146a5878b0081 100644 (file)
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -422,8 +422,8 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
         return -EINVAL; /* not implemented yet */
  }
  
-int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
-                                   struct kvm_debug_guest *dbg)
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+                                       struct kvm_guest_debug *dbg)
  {
         return -EINVAL; /* not implemented yet */
  }
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h

index 3893cf12eacf4a4bc595b87ee632f84e6b8c7f89..00bbe69b78da97757b8da8840d0f55977a7aca8c 100644 (file)
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -50,7 +50,7 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
  int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
  
  /* implemented in priv.c */
-int kvm_s390_handle_priv(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
  
  /* implemented in sigp.c */
  int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c

index 3605df45dd419fb2082b193c8e7438bf5b815dfe..4b88834b8dd8bd4be94de20d5bf248b8f195bcd6 100644 (file)
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -304,12 +304,24 @@ static intercept_handler_t priv_handlers[256] = {
         [0xb1] = handle_stfl,
  };
  
-int kvm_s390_handle_priv(struct kvm_vcpu *vcpu)
+int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
  {
         intercept_handler_t handler;
  
+       /*
+        * a lot of B2 instructions are priviledged. We first check for
+        * the priviledges ones, that we can handle in the kernel. If the
+        * kernel can handle this instruction, we check for the problem
+        * state bit and (a) handle the instruction or (b) send a code 2
+        * program check.
+        * Anything else goes to userspace.*/
         handler = priv_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
-       if (handler)
-               return handler(vcpu);
+       if (handler) {
+               if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+                       return kvm_s390_inject_program_int(vcpu,
+                                                  PGM_PRIVILEGED_OPERATION);
+               else
+                       return handler(vcpu);
+       }
         return -ENOTSUPP;
  }
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c

index 2a01b9e02801f14afd348162f31679237cbd340a..f27dbedf086600280964164b207851c5f2f98dd2 100644 (file)
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -153,8 +153,6 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
  
         switch (parameter & 0xff) {
         case 0:
-               printk(KERN_WARNING "kvm: request to switch to ESA/390 mode"
-                                                       " not supported");
                 rc = 3; /* not operational */
                 break;
         case 1:
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h

index 886c9402ec4583fd94fc9f6eb44aa8bda088ab3a..dc3f6cf117045ee50b2e92aa87a9b772ce8150bd 100644 (file)
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -15,6 +15,7 @@
  #define __KVM_HAVE_DEVICE_ASSIGNMENT
  #define __KVM_HAVE_MSI
  #define __KVM_HAVE_USER_NMI
+#define __KVM_HAVE_GUEST_DEBUG
  
  /* Architectural interrupt line count. */
  #define KVM_NR_INTERRUPTS 256
@@ -212,7 +213,30 @@ struct kvm_pit_channel_state {
         __s64 count_load_time;
  };
  
+struct kvm_debug_exit_arch {
+       __u32 exception;
+       __u32 pad;
+       __u64 pc;
+       __u64 dr6;
+       __u64 dr7;
+};
+
+#define KVM_GUESTDBG_USE_SW_BP         0x00010000
+#define KVM_GUESTDBG_USE_HW_BP         0x00020000
+#define KVM_GUESTDBG_INJECT_DB         0x00040000
+#define KVM_GUESTDBG_INJECT_BP         0x00080000
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+       __u64 debugreg[8];
+};
+
  struct kvm_pit_state {
         struct kvm_pit_channel_state channels[3];
  };
+
+struct kvm_reinject_control {
+       __u8 pit_reinject;
+       __u8 reserved[31];
+};
  #endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h

index 730843d1d2fbe5e54fd5d64d7c0873113af94bb4..f0faf58044ff6c6215d1a78739b90a3b9d22336e 100644 (file)
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -22,6 +22,7 @@
  #include <asm/pvclock-abi.h>
  #include <asm/desc.h>
  #include <asm/mtrr.h>
+#include <asm/msr-index.h>
  
  #define KVM_MAX_VCPUS 16
  #define KVM_MEMORY_SLOTS 32
@@ -134,11 +135,18 @@ enum {
  
  #define KVM_NR_MEM_OBJS 40
  
-struct kvm_guest_debug {
-       int enabled;
-       unsigned long bp[4];
-       int singlestep;
-};
+#define KVM_NR_DB_REGS 4
+
+#define DR6_BD         (1 << 13)
+#define DR6_BS         (1 << 14)
+#define DR6_FIXED_1    0xffff0ff0
+#define DR6_VOLATILE   0x0000e00f
+
+#define DR7_BP_EN_MASK 0x000000ff
+#define DR7_GE         (1 << 9)
+#define DR7_GD         (1 << 13)
+#define DR7_FIXED_1    0x00000400
+#define DR7_VOLATILE   0xffff23ff
  
  /*
   * We don't want allocation failures within the mmu code, so we preallocate
@@ -162,7 +170,8 @@ struct kvm_pte_chain {
   *   bits 0:3 - total guest paging levels (2-4, or zero for real mode)
   *   bits 4:7 - page table level for this shadow (1-4)
   *   bits 8:9 - page table quadrant for 2-level guests
- *   bit   16 - "metaphysical" - gfn is not a real page (huge page/real mode)
+ *   bit   16 - direct mapping of virtual to physical mapping at gfn
+ *              used for real mode and two-dimensional paging
   *   bits 17:19 - common access permissions for all ptes in this shadow page
   */
  union kvm_mmu_page_role {
@@ -172,9 +181,10 @@ union kvm_mmu_page_role {
                 unsigned level:4;
                 unsigned quadrant:2;
                 unsigned pad_for_nice_hex_output:6;
-               unsigned metaphysical:1;
+               unsigned direct:1;
                 unsigned access:3;
                 unsigned invalid:1;
+               unsigned cr4_pge:1;
         };
  };
  
@@ -218,6 +228,18 @@ struct kvm_pv_mmu_op_buffer {
         char buf[512] __aligned(sizeof(long));
  };
  
+struct kvm_pio_request {
+       unsigned long count;
+       int cur_count;
+       gva_t guest_gva;
+       int in;
+       int port;
+       int size;
+       int string;
+       int down;
+       int rep;
+};
+
  /*
   * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
   * 32-bit).  The kvm_mmu structure abstracts the details of the current mmu
@@ -236,6 +258,7 @@ struct kvm_mmu {
         hpa_t root_hpa;
         int root_level;
         int shadow_root_level;
+       union kvm_mmu_page_role base_role;
  
         u64 *pae_root;
  };
@@ -258,6 +281,7 @@ struct kvm_vcpu_arch {
         unsigned long cr3;
         unsigned long cr4;
         unsigned long cr8;
+       u32 hflags;
         u64 pdptrs[4]; /* pae */
         u64 shadow_efer;
         u64 apic_base;
@@ -338,6 +362,15 @@ struct kvm_vcpu_arch {
  
         struct mtrr_state_type mtrr_state;
         u32 pat;
+
+       int switch_db_regs;
+       unsigned long host_db[KVM_NR_DB_REGS];
+       unsigned long host_dr6;
+       unsigned long host_dr7;
+       unsigned long db[KVM_NR_DB_REGS];
+       unsigned long dr6;
+       unsigned long dr7;
+       unsigned long eff_db[KVM_NR_DB_REGS];
  };
  
  struct kvm_mem_alias {
@@ -378,6 +411,7 @@ struct kvm_arch{
  
         unsigned long irq_sources_bitmap;
         unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
+       u64 vm_init_tsc;
  };
  
  struct kvm_vm_stat {
@@ -446,8 +480,7 @@ struct kvm_x86_ops {
         void (*vcpu_put)(struct kvm_vcpu *vcpu);
  
         int (*set_guest_debug)(struct kvm_vcpu *vcpu,
-                              struct kvm_debug_guest *dbg);
-       void (*guest_debug_pre)(struct kvm_vcpu *vcpu);
+                              struct kvm_guest_debug *dbg);
         int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
         int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
         u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
@@ -583,16 +616,12 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
  void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2,
                            u32 error_code);
  
-void kvm_pic_set_irq(void *opaque, int irq, int level);
+int kvm_pic_set_irq(void *opaque, int irq, int level);
  
  void kvm_inject_nmi(struct kvm_vcpu *vcpu);
  
  void fx_init(struct kvm_vcpu *vcpu);
  
-int emulator_read_std(unsigned long addr,
-                     void *val,
-                     unsigned int bytes,
-                     struct kvm_vcpu *vcpu);
  int emulator_write_emulated(unsigned long addr,
                             const void *val,
                             unsigned int bytes,
@@ -737,6 +766,10 @@ enum {
         TASK_SWITCH_GATE = 3,
  };
  
+#define HF_GIF_MASK            (1 << 0)
+#define HF_HIF_MASK            (1 << 1)
+#define HF_VINTR_MASK          (1 << 2)
+
  /*
   * Hardware virtualization extension instructions may fault if a
   * reboot turns off virtualization while processes are running.
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h

index 358acc59ae044d421196c3beb8abcc3c6e58efc0..f4e505f286bc85cf828679dd91b4a743c582edde 100644 (file)
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -18,11 +18,15 @@
  #define _EFER_LME              8  /* Long mode enable */
  #define _EFER_LMA              10 /* Long mode active (read-only) */
  #define _EFER_NX               11 /* No execute enable */
+#define _EFER_SVME             12 /* Enable virtualization */
+#define _EFER_FFXSR            14 /* Enable Fast FXSAVE/FXRSTOR */
  
  #define EFER_SCE               (1<<_EFER_SCE)
  #define EFER_LME               (1<<_EFER_LME)
  #define EFER_LMA               (1<<_EFER_LMA)
  #define EFER_NX                        (1<<_EFER_NX)
+#define EFER_SVME              (1<<_EFER_SVME)
+#define EFER_FFXSR             (1<<_EFER_FFXSR)
  
  /* Intel MSRs. Some also available on other CPUs */
  #define MSR_IA32_PERFCTR0              0x000000c1
@@ -360,4 +364,9 @@
  #define MSR_IA32_VMX_PROCBASED_CTLS2    0x0000048b
  #define MSR_IA32_VMX_EPT_VPID_CAP       0x0000048c
  
+/* AMD-V MSRs */
+
+#define MSR_VM_CR                       0xc0010114
+#define MSR_VM_HSAVE_PA                 0xc0010117
+
  #endif /* _ASM_X86_MSR_INDEX_H */
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h

index 1b8afa78e869449f3c66bfa7c31929b429d9513a..82ada75f3ebf142678325968ea6fa0952308e9a0 100644 (file)
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -174,10 +174,6 @@ struct __attribute__ ((__packed__)) vmcb {
  #define SVM_CPUID_FEATURE_SHIFT 2
  #define SVM_CPUID_FUNC 0x8000000a
  
-#define MSR_EFER_SVME_MASK (1ULL << 12)
-#define MSR_VM_CR       0xc0010114
-#define MSR_VM_HSAVE_PA 0xc0010117ULL
-
  #define SVM_VM_CR_SVM_DISABLE 4
  
  #define SVM_SELECTOR_S_SHIFT 4
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h

index 59363627523844adf3f5802ae9e8521e521e1f98..e0f9aa16358bc157c71f1a31ccc5969d96b23584 100644 (file)
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -118,7 +118,7 @@ static inline void cpu_svm_disable(void)
  
         wrmsrl(MSR_VM_HSAVE_PA, 0);
         rdmsrl(MSR_EFER, efer);
-       wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
+       wrmsrl(MSR_EFER, efer & ~EFER_SVME);
  }
  
  /** Makes sure SVM is disabled, if it is supported on the CPU
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h

index d0238e6151d86dbbdf37082161ff4c3f7338d21c..498f944010b9a112a013a5007a58117b66d6f6db 100644 (file)
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -270,8 +270,9 @@ enum vmcs_field {
  
  #define INTR_TYPE_EXT_INTR              (0 << 8) /* external interrupt */
  #define INTR_TYPE_NMI_INTR             (2 << 8) /* NMI */
-#define INTR_TYPE_EXCEPTION             (3 << 8) /* processor exception */
+#define INTR_TYPE_HARD_EXCEPTION       (3 << 8) /* processor exception */
  #define INTR_TYPE_SOFT_INTR             (4 << 8) /* software interrupt */
+#define INTR_TYPE_SOFT_EXCEPTION       (6 << 8) /* software exception */
  
  /* GUEST_INTERRUPTIBILITY_INFO flags. */
  #define GUEST_INTR_STATE_STI           0x00000001
@@ -311,7 +312,7 @@ enum vmcs_field {
  #define DEBUG_REG_ACCESS_TYPE           0x10    /* 4, direction of access */
  #define TYPE_MOV_TO_DR                  (0 << 4)
  #define TYPE_MOV_FROM_DR                (1 << 4)
-#define DEBUG_REG_ACCESS_REG            0xf00   /* 11:8, general purpose reg. */
+#define DEBUG_REG_ACCESS_REG(eq)        (((eq) >> 8) & 0xf) /* 11:8, general purpose reg. */
  
  
  /* segment AR */
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig

index b81125f0bdee21be384aeb058f3bb96d7fe767b7..0a303c3ed11fa991902096d845a9a0c6efe7aafa 100644 (file)
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -4,6 +4,10 @@
  config HAVE_KVM
         bool
  
+config HAVE_KVM_IRQCHIP
+       bool
+       default y
+
  menuconfig VIRTUALIZATION
         bool "Virtualization"
         depends on HAVE_KVM || X86
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c

index 72bd275a9b5cee632b393448e325f371610f860b..c13bb92d3157708a52e211b1992a71f45554ffbb 100644 (file)
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -201,6 +201,9 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps)
         if (!atomic_inc_and_test(&pt->pending))
                 set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests);
  
+       if (!pt->reinject)
+               atomic_set(&pt->pending, 1);
+
         if (vcpu0 && waitqueue_active(&vcpu0->wq))
                 wake_up_interruptible(&vcpu0->wq);
  
@@ -536,6 +539,16 @@ void kvm_pit_reset(struct kvm_pit *pit)
         pit->pit_state.irq_ack = 1;
  }
  
+static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
+{
+       struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
+
+       if (!mask) {
+               atomic_set(&pit->pit_state.pit_timer.pending, 0);
+               pit->pit_state.irq_ack = 1;
+       }
+}
+
  struct kvm_pit *kvm_create_pit(struct kvm *kvm)
  {
         struct kvm_pit *pit;
@@ -545,9 +558,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
         if (!pit)
                 return NULL;
  
-       mutex_lock(&kvm->lock);
         pit->irq_source_id = kvm_request_irq_source_id(kvm);
-       mutex_unlock(&kvm->lock);
         if (pit->irq_source_id < 0) {
                 kfree(pit);
                 return NULL;
@@ -580,10 +591,14 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
         pit_state->irq_ack_notifier.gsi = 0;
         pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
         kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
+       pit_state->pit_timer.reinject = true;
         mutex_unlock(&pit->pit_state.lock);
  
         kvm_pit_reset(pit);
  
+       pit->mask_notifier.func = pit_mask_notifer;
+       kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
+
         return pit;
  }
  
@@ -592,6 +607,8 @@ void kvm_free_pit(struct kvm *kvm)
         struct hrtimer *timer;
  
         if (kvm->arch.vpit) {
+               kvm_unregister_irq_mask_notifier(kvm, 0,
+                                              &kvm->arch.vpit->mask_notifier);
                 mutex_lock(&kvm->arch.vpit->pit_state.lock);
                 timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
                 hrtimer_cancel(timer);
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h

index 4178022b97aac8d66b4f3cfb535102b8bd47140d..6acbe4b505d5faad09d6b7d69e2cc64e870965cc 100644 (file)
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -9,6 +9,7 @@ struct kvm_kpit_timer {
         s64 period; /* unit: ns */
         s64 scheduled;
         atomic_t pending;
+       bool reinject;
  };
  
  struct kvm_kpit_channel_state {
@@ -45,6 +46,7 @@ struct kvm_pit {
         struct kvm *kvm;
         struct kvm_kpit_state pit_state;
         int irq_source_id;
+       struct kvm_irq_mask_notifier mask_notifier;
  };
  
  #define KVM_PIT_BASE_ADDRESS       0x40
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c

index 179dcb0103fdcf7b2a59c27ee399fdd4302fe67a..1ccb50c74f186d779e8f67b9b833e896b812b3d9 100644 (file)
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -32,11 +32,13 @@
  #include <linux/kvm_host.h>
  
  static void pic_lock(struct kvm_pic *s)
+       __acquires(&s->lock)
  {
         spin_lock(&s->lock);
  }
  
  static void pic_unlock(struct kvm_pic *s)
+       __releases(&s->lock)
  {
         struct kvm *kvm = s->kvm;
         unsigned acks = s->pending_acks;
@@ -49,7 +51,8 @@ static void pic_unlock(struct kvm_pic *s)
         spin_unlock(&s->lock);
  
         while (acks) {
-               kvm_notify_acked_irq(kvm, __ffs(acks));
+               kvm_notify_acked_irq(kvm, SELECT_PIC(__ffs(acks)),
+                                    __ffs(acks));
                 acks &= acks - 1;
         }
  
@@ -76,12 +79,13 @@ void kvm_pic_clear_isr_ack(struct kvm *kvm)
  /*
   * set irq level. If an edge is detected, then the IRR is set to 1
   */
-static inline void pic_set_irq1(struct kvm_kpic_state *s, int irq, int level)
+static inline int pic_set_irq1(struct kvm_kpic_state *s, int irq, int level)
  {
-       int mask;
+       int mask, ret = 1;
         mask = 1 << irq;
         if (s->elcr & mask)     /* level triggered */
                 if (level) {
+                       ret = !(s->irr & mask);
                         s->irr |= mask;
                         s->last_irr |= mask;
                 } else {
@@ -90,11 +94,15 @@ static inline void pic_set_irq1(struct kvm_kpic_state *s, int irq, int level)
                 }
         else    /* edge triggered */
                 if (level) {
-                       if ((s->last_irr & mask) == 0)
+                       if ((s->last_irr & mask) == 0) {
+                               ret = !(s->irr & mask);
                                 s->irr |= mask;
+                       }
                         s->last_irr |= mask;
                 } else
                         s->last_irr &= ~mask;
+
+       return (s->imr & mask) ? -1 : ret;
  }
  
  /*
@@ -171,16 +179,19 @@ void kvm_pic_update_irq(struct kvm_pic *s)
         pic_unlock(s);
  }
  
-void kvm_pic_set_irq(void *opaque, int irq, int level)
+int kvm_pic_set_irq(void *opaque, int irq, int level)
  {
         struct kvm_pic *s = opaque;
+       int ret = -1;
  
         pic_lock(s);
         if (irq >= 0 && irq < PIC_NUM_PINS) {
-               pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
+               ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
                 pic_update_irq(s);
         }
         pic_unlock(s);
+
+       return ret;
  }
  
  /*
@@ -232,7 +243,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
         }
         pic_update_irq(s);
         pic_unlock(s);
-       kvm_notify_acked_irq(kvm, irq);
+       kvm_notify_acked_irq(kvm, SELECT_PIC(irq), irq);
  
         return intno;
  }
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h

index 82579ee538d08b257f2a53a230cfb9cbf1a51986..9f593188129ea83cf97e34eb77e7777aba7d1743 100644 (file)
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -32,6 +32,8 @@
  #include "lapic.h"
  
  #define PIC_NUM_PINS 16
+#define SELECT_PIC(irq) \
+       ((irq) < 8 ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE)
  
  struct kvm;
  struct kvm_vcpu;
diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h

index 8e5ee99551f6e75bf5c2a2ea64627abe98b12607..ed66e4c078dc64229d5684d5bcf0ed5373098d7b 100644 (file)
--- a/arch/x86/kvm/kvm_svm.h
+++ b/arch/x86/kvm/kvm_svm.h
@@ -18,7 +18,6 @@ static const u32 host_save_user_msrs[] = {
  };
  
  #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
-#define NUM_DB_REGS 4
  
  struct kvm_vcpu;
  
@@ -29,18 +28,23 @@ struct vcpu_svm {
         struct svm_cpu_data *svm_data;
         uint64_t asid_generation;
  
-       unsigned long db_regs[NUM_DB_REGS];
-
         u64 next_rip;
  
         u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
         u64 host_gs_base;
         unsigned long host_cr2;
-       unsigned long host_db_regs[NUM_DB_REGS];
-       unsigned long host_dr6;
-       unsigned long host_dr7;
  
         u32 *msrpm;
+       struct vmcb *hsave;
+       u64 hsave_msr;
+
+       u64 nested_vmcb;
+
+       /* These are the merged vectors */
+       u32 *nested_msrpm;
+
+       /* gpa pointers to the real vectors */
+       u64 nested_vmcb_msrpm;
  };
  
  #endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c

index 2d4477c7147372c28ed84f5a513ca553dc46979c..2a36f7f7c4c74918e31c3854f5b91da86a503168 100644 (file)
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -145,11 +145,20 @@ struct kvm_rmap_desc {
         struct kvm_rmap_desc *more;
  };
  
-struct kvm_shadow_walk {
-       int (*entry)(struct kvm_shadow_walk *walk, struct kvm_vcpu *vcpu,
-                    u64 addr, u64 *spte, int level);
+struct kvm_shadow_walk_iterator {
+       u64 addr;
+       hpa_t shadow_addr;
+       int level;
+       u64 *sptep;
+       unsigned index;
  };
  
+#define for_each_shadow_entry(_vcpu, _addr, _walker)    \
+       for (shadow_walk_init(&(_walker), _vcpu, _addr);        \
+            shadow_walk_okay(&(_walker));                      \
+            shadow_walk_next(&(_walker)))
+
+
  struct kvm_unsync_walk {
         int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk);
  };
@@ -343,7 +352,6 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc,
  
         BUG_ON(!mc->nobjs);
         p = mc->objects[--mc->nobjs];
-       memset(p, 0, size);
         return p;
  }
  
@@ -794,10 +802,8 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
         set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
         list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
         INIT_LIST_HEAD(&sp->oos_link);
-       ASSERT(is_empty_shadow_page(sp->spt));
         bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
         sp->multimapped = 0;
-       sp->global = 1;
         sp->parent_pte = parent_pte;
         --vcpu->kvm->arch.n_free_mmu_pages;
         return sp;
@@ -983,8 +989,8 @@ struct kvm_mmu_pages {
              idx < 512;                                 \
              idx = find_next_bit(bitmap, 512, idx+1))
  
-int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
-                  int idx)
+static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
+                        int idx)
  {
         int i;
  
@@ -1059,7 +1065,7 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
         index = kvm_page_table_hashfn(gfn);
         bucket = &kvm->arch.mmu_page_hash[index];
         hlist_for_each_entry(sp, node, bucket, hash_link)
-               if (sp->gfn == gfn && !sp->role.metaphysical
+               if (sp->gfn == gfn && !sp->role.direct
                     && !sp->role.invalid) {
                         pgprintk("%s: found role %x\n",
                                  __func__, sp->role.word);
@@ -1115,8 +1121,9 @@ struct mmu_page_path {
                         i < pvec.nr && ({ sp = pvec.page[i].sp; 1;});   \
                         i = mmu_pages_next(&pvec, &parents, i))
  
-int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents,
-                  int i)
+static int mmu_pages_next(struct kvm_mmu_pages *pvec,
+                         struct mmu_page_path *parents,
+                         int i)
  {
         int n;
  
@@ -1135,7 +1142,7 @@ int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents,
         return n;
  }
  
-void mmu_pages_clear_parents(struct mmu_page_path *parents)
+static void mmu_pages_clear_parents(struct mmu_page_path *parents)
  {
         struct kvm_mmu_page *sp;
         unsigned int level = 0;
@@ -1193,7 +1200,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
                                              gfn_t gfn,
                                              gva_t gaddr,
                                              unsigned level,
-                                            int metaphysical,
+                                            int direct,
                                              unsigned access,
                                              u64 *parent_pte)
  {
@@ -1204,10 +1211,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
         struct kvm_mmu_page *sp;
         struct hlist_node *node, *tmp;
  
-       role.word = 0;
-       role.glevels = vcpu->arch.mmu.root_level;
+       role = vcpu->arch.mmu.base_role;
         role.level = level;
-       role.metaphysical = metaphysical;
+       role.direct = direct;
         role.access = access;
         if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) {
                 quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
@@ -1242,8 +1248,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
         pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word);
         sp->gfn = gfn;
         sp->role = role;
+       sp->global = role.cr4_pge;
         hlist_add_head(&sp->hash_link, bucket);
-       if (!metaphysical) {
+       if (!direct) {
                 if (rmap_write_protect(vcpu->kvm, gfn))
                         kvm_flush_remote_tlbs(vcpu->kvm);
                 account_shadowed(vcpu->kvm, gfn);
@@ -1255,35 +1262,35 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
         return sp;
  }
  
-static int walk_shadow(struct kvm_shadow_walk *walker,
-                      struct kvm_vcpu *vcpu, u64 addr)
+static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
+                            struct kvm_vcpu *vcpu, u64 addr)
  {
-       hpa_t shadow_addr;
-       int level;
-       int r;
-       u64 *sptep;
-       unsigned index;
-
-       shadow_addr = vcpu->arch.mmu.root_hpa;
-       level = vcpu->arch.mmu.shadow_root_level;
-       if (level == PT32E_ROOT_LEVEL) {
-               shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
-               shadow_addr &= PT64_BASE_ADDR_MASK;
-               if (!shadow_addr)
-                       return 1;
-               --level;
+       iterator->addr = addr;
+       iterator->shadow_addr = vcpu->arch.mmu.root_hpa;
+       iterator->level = vcpu->arch.mmu.shadow_root_level;
+       if (iterator->level == PT32E_ROOT_LEVEL) {
+               iterator->shadow_addr
+                       = vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
+               iterator->shadow_addr &= PT64_BASE_ADDR_MASK;
+               --iterator->level;
+               if (!iterator->shadow_addr)
+                       iterator->level = 0;
         }
+}
  
-       while (level >= PT_PAGE_TABLE_LEVEL) {
-               index = SHADOW_PT_INDEX(addr, level);
-               sptep = ((u64 *)__va(shadow_addr)) + index;
-               r = walker->entry(walker, vcpu, addr, sptep, level);
-               if (r)
-                       return r;
-               shadow_addr = *sptep & PT64_BASE_ADDR_MASK;
-               --level;
-       }
-       return 0;
+static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator)
+{
+       if (iterator->level < PT_PAGE_TABLE_LEVEL)
+               return false;
+       iterator->index = SHADOW_PT_INDEX(iterator->addr, iterator->level);
+       iterator->sptep = ((u64 *)__va(iterator->shadow_addr)) + iterator->index;
+       return true;
+}
+
+static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)
+{
+       iterator->shadow_addr = *iterator->sptep & PT64_BASE_ADDR_MASK;
+       --iterator->level;
  }
  
  static void kvm_mmu_page_unlink_children(struct kvm *kvm,
@@ -1388,7 +1395,7 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
         kvm_mmu_page_unlink_children(kvm, sp);
         kvm_mmu_unlink_parents(kvm, sp);
         kvm_flush_remote_tlbs(kvm);
-       if (!sp->role.invalid && !sp->role.metaphysical)
+       if (!sp->role.invalid && !sp->role.direct)
                 unaccount_shadowed(kvm, sp->gfn);
         if (sp->unsync)
                 kvm_unlink_unsync_page(kvm, sp);
@@ -1451,7 +1458,7 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
         index = kvm_page_table_hashfn(gfn);
         bucket = &kvm->arch.mmu_page_hash[index];
         hlist_for_each_entry_safe(sp, node, n, bucket, hash_link)
-               if (sp->gfn == gfn && !sp->role.metaphysical) {
+               if (sp->gfn == gfn && !sp->role.direct) {
                         pgprintk("%s: gfn %lx role %x\n", __func__, gfn,
                                  sp->role.word);
                         r = 1;
@@ -1463,11 +1470,20 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
  
  static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
  {
+       unsigned index;
+       struct hlist_head *bucket;
         struct kvm_mmu_page *sp;
+       struct hlist_node *node, *nn;
  
-       while ((sp = kvm_mmu_lookup_page(kvm, gfn)) != NULL) {
-               pgprintk("%s: zap %lx %x\n", __func__, gfn, sp->role.word);
-               kvm_mmu_zap_page(kvm, sp);
+       index = kvm_page_table_hashfn(gfn);
+       bucket = &kvm->arch.mmu_page_hash[index];
+       hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) {
+               if (sp->gfn == gfn && !sp->role.direct
+                   && !sp->role.invalid) {
+                       pgprintk("%s: zap %lx %x\n",
+                                __func__, gfn, sp->role.word);
+                       kvm_mmu_zap_page(kvm, sp);
+               }
         }
  }
  
@@ -1622,7 +1638,7 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
         bucket = &vcpu->kvm->arch.mmu_page_hash[index];
         /* don't unsync if pagetable is shadowed with multiple roles */
         hlist_for_each_entry_safe(s, node, n, bucket, hash_link) {
-               if (s->gfn != sp->gfn || s->role.metaphysical)
+               if (s->gfn != sp->gfn || s->role.direct)
                         continue;
                 if (s->role.word != sp->role.word)
                         return 1;
@@ -1669,8 +1685,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
         u64 mt_mask = shadow_mt_mask;
         struct kvm_mmu_page *sp = page_header(__pa(shadow_pte));
  
-       if (!(vcpu->arch.cr4 & X86_CR4_PGE))
-               global = 0;
         if (!global && sp->global) {
                 sp->global = 0;
                 if (sp->unsync) {
@@ -1777,12 +1791,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
                         pgprintk("hfn old %lx new %lx\n",
                                  spte_to_pfn(*shadow_pte), pfn);
                         rmap_remove(vcpu->kvm, shadow_pte);
-               } else {
-                       if (largepage)
-                               was_rmapped = is_large_pte(*shadow_pte);
-                       else
-                               was_rmapped = 1;
-               }
+               } else
+                       was_rmapped = 1;
         }
         if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault,
                       dirty, largepage, global, gfn, pfn, speculative, true)) {
@@ -1820,67 +1830,42 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
  {
  }
  
-struct direct_shadow_walk {
-       struct kvm_shadow_walk walker;
-       pfn_t pfn;
-       int write;
-       int largepage;
-       int pt_write;
-};
-
-static int direct_map_entry(struct kvm_shadow_walk *_walk,
-                           struct kvm_vcpu *vcpu,
-                           u64 addr, u64 *sptep, int level)
+static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
+                       int largepage, gfn_t gfn, pfn_t pfn)
  {
-       struct direct_shadow_walk *walk =
-               container_of(_walk, struct direct_shadow_walk, walker);
+       struct kvm_shadow_walk_iterator iterator;
         struct kvm_mmu_page *sp;
+       int pt_write = 0;
         gfn_t pseudo_gfn;
-       gfn_t gfn = addr >> PAGE_SHIFT;
-
-       if (level == PT_PAGE_TABLE_LEVEL
-           || (walk->largepage && level == PT_DIRECTORY_LEVEL)) {
-               mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL,
-                            0, walk->write, 1, &walk->pt_write,
-                            walk->largepage, 0, gfn, walk->pfn, false);
-               ++vcpu->stat.pf_fixed;
-               return 1;
-       }
  
-       if (*sptep == shadow_trap_nonpresent_pte) {
-               pseudo_gfn = (addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT;
-               sp = kvm_mmu_get_page(vcpu, pseudo_gfn, (gva_t)addr, level - 1,
-                                     1, ACC_ALL, sptep);
-               if (!sp) {
-                       pgprintk("nonpaging_map: ENOMEM\n");
-                       kvm_release_pfn_clean(walk->pfn);
-                       return -ENOMEM;
+       for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
+               if (iterator.level == PT_PAGE_TABLE_LEVEL
+                   || (largepage && iterator.level == PT_DIRECTORY_LEVEL)) {
+                       mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
+                                    0, write, 1, &pt_write,
+                                    largepage, 0, gfn, pfn, false);
+                       ++vcpu->stat.pf_fixed;
+                       break;
                 }
  
-               set_shadow_pte(sptep,
-                              __pa(sp->spt)
-                              | PT_PRESENT_MASK | PT_WRITABLE_MASK
-                              | shadow_user_mask | shadow_x_mask);
-       }
-       return 0;
-}
+               if (*iterator.sptep == shadow_trap_nonpresent_pte) {
+                       pseudo_gfn = (iterator.addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT;
+                       sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr,
+                                             iterator.level - 1,
+                                             1, ACC_ALL, iterator.sptep);
+                       if (!sp) {
+                               pgprintk("nonpaging_map: ENOMEM\n");
+                               kvm_release_pfn_clean(pfn);
+                               return -ENOMEM;
+                       }
  
-static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
-                       int largepage, gfn_t gfn, pfn_t pfn)
-{
-       int r;
-       struct direct_shadow_walk walker = {
-               .walker = { .entry = direct_map_entry, },
-               .pfn = pfn,
-               .largepage = largepage,
-               .write = write,
-               .pt_write = 0,
-       };
-
-       r = walk_shadow(&walker.walker, vcpu, gfn << PAGE_SHIFT);
-       if (r < 0)
-               return r;
-       return walker.pt_write;
+                       set_shadow_pte(iterator.sptep,
+                                      __pa(sp->spt)
+                                      | PT_PRESENT_MASK | PT_WRITABLE_MASK
+                                      | shadow_user_mask | shadow_x_mask);
+               }
+       }
+       return pt_write;
  }
  
  static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
@@ -1962,7 +1947,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
         int i;
         gfn_t root_gfn;
         struct kvm_mmu_page *sp;
-       int metaphysical = 0;
+       int direct = 0;
  
         root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT;
  
@@ -1971,18 +1956,18 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
  
                 ASSERT(!VALID_PAGE(root));
                 if (tdp_enabled)
-                       metaphysical = 1;
+                       direct = 1;
                 sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
-                                     PT64_ROOT_LEVEL, metaphysical,
+                                     PT64_ROOT_LEVEL, direct,
                                       ACC_ALL, NULL);
                 root = __pa(sp->spt);
                 ++sp->root_count;
                 vcpu->arch.mmu.root_hpa = root;
                 return;
         }
-       metaphysical = !is_paging(vcpu);
+       direct = !is_paging(vcpu);
         if (tdp_enabled)
-               metaphysical = 1;
+               direct = 1;
         for (i = 0; i < 4; ++i) {
                 hpa_t root = vcpu->arch.mmu.pae_root[i];
  
@@ -1996,7 +1981,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
                 } else if (vcpu->arch.mmu.root_level == 0)
                         root_gfn = 0;
                 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
-                                     PT32_ROOT_LEVEL, metaphysical,
+                                     PT32_ROOT_LEVEL, direct,
                                       ACC_ALL, NULL);
                 root = __pa(sp->spt);
                 ++sp->root_count;
@@ -2251,17 +2236,23 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
  
  static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
  {
+       int r;
+
         ASSERT(vcpu);
         ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
  
         if (!is_paging(vcpu))
-               return nonpaging_init_context(vcpu);
+               r = nonpaging_init_context(vcpu);
         else if (is_long_mode(vcpu))
-               return paging64_init_context(vcpu);
+               r = paging64_init_context(vcpu);
         else if (is_pae(vcpu))
-               return paging32E_init_context(vcpu);
+               r = paging32E_init_context(vcpu);
         else
-               return paging32_init_context(vcpu);
+               r = paging32_init_context(vcpu);
+
+       vcpu->arch.mmu.base_role.glevels = vcpu->arch.mmu.root_level;
+
+       return r;
  }
  
  static int init_kvm_mmu(struct kvm_vcpu *vcpu)
@@ -2492,7 +2483,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
         index = kvm_page_table_hashfn(gfn);
         bucket = &vcpu->kvm->arch.mmu_page_hash[index];
         hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) {
-               if (sp->gfn != gfn || sp->role.metaphysical || sp->role.invalid)
+               if (sp->gfn != gfn || sp->role.direct || sp->role.invalid)
                         continue;
                 pte_size = sp->role.glevels == PT32_ROOT_LEVEL ? 4 : 8;
                 misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
@@ -3130,7 +3121,7 @@ static void audit_write_protection(struct kvm_vcpu *vcpu)
         gfn_t gfn;
  
         list_for_each_entry(sp, &vcpu->kvm->arch.active_mmu_pages, link) {
-               if (sp->role.metaphysical)
+               if (sp->role.direct)
                         continue;
  
                 gfn = unalias_gfn(vcpu->kvm, sp->gfn);
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h

index 258e5d56298ee953b645bef4f36f1a11594771e1..eaab2145f62b21d31558372d511b16d8f1bc57d4 100644 (file)
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -54,7 +54,7 @@ static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
  static inline int is_long_mode(struct kvm_vcpu *vcpu)
  {
  #ifdef CONFIG_X86_64
-       return vcpu->arch.shadow_efer & EFER_LME;
+       return vcpu->arch.shadow_efer & EFER_LMA;
  #else
         return 0;
  #endif
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h

index 9fd78b6e17ad20fab48e6901c131de0658b6df5d..6bd70206c56130ce95398c2c91a471ab471e9907 100644 (file)
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -25,7 +25,6 @@
  #if PTTYPE == 64
         #define pt_element_t u64
         #define guest_walker guest_walker64
-       #define shadow_walker shadow_walker64
         #define FNAME(name) paging##64_##name
         #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
         #define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK
@@ -42,7 +41,6 @@
  #elif PTTYPE == 32
         #define pt_element_t u32
         #define guest_walker guest_walker32
-       #define shadow_walker shadow_walker32
         #define FNAME(name) paging##32_##name
         #define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
         #define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK
@@ -73,18 +71,6 @@ struct guest_walker {
         u32 error_code;
  };
  
-struct shadow_walker {
-       struct kvm_shadow_walk walker;
-       struct guest_walker *guest_walker;
-       int user_fault;
-       int write_fault;
-       int largepage;
-       int *ptwrite;
-       pfn_t pfn;
-       u64 *sptep;
-       gpa_t pte_gpa;
-};
-
  static gfn_t gpte_to_gfn(pt_element_t gpte)
  {
         return (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
@@ -283,91 +269,79 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
  /*
   * Fetch a shadow pte for a specific level in the paging hierarchy.
   */
-static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw,
-                                   struct kvm_vcpu *vcpu, u64 addr,
-                                   u64 *sptep, int level)
+static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
+                        struct guest_walker *gw,
+                        int user_fault, int write_fault, int largepage,
+                        int *ptwrite, pfn_t pfn)
  {
-       struct shadow_walker *sw =
-               container_of(_sw, struct shadow_walker, walker);
-       struct guest_walker *gw = sw->guest_walker;
         unsigned access = gw->pt_access;
         struct kvm_mmu_page *shadow_page;
-       u64 spte;
-       int metaphysical;
+       u64 spte, *sptep;
+       int direct;
         gfn_t table_gfn;
         int r;
+       int level;
         pt_element_t curr_pte;
+       struct kvm_shadow_walk_iterator iterator;
  
-       if (level == PT_PAGE_TABLE_LEVEL
-           || (sw->largepage && level == PT_DIRECTORY_LEVEL)) {
-               mmu_set_spte(vcpu, sptep, access, gw->pte_access & access,
-                            sw->user_fault, sw->write_fault,
-                            gw->ptes[gw->level-1] & PT_DIRTY_MASK,
-                            sw->ptwrite, sw->largepage,
-                            gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
-                            gw->gfn, sw->pfn, false);
-               sw->sptep = sptep;
-               return 1;
-       }
+       if (!is_present_pte(gw->ptes[gw->level - 1]))
+               return NULL;
  
-       if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep))
-               return 0;
+       for_each_shadow_entry(vcpu, addr, iterator) {
+               level = iterator.level;
+               sptep = iterator.sptep;
+               if (level == PT_PAGE_TABLE_LEVEL
+                   || (largepage && level == PT_DIRECTORY_LEVEL)) {
+                       mmu_set_spte(vcpu, sptep, access,
+                                    gw->pte_access & access,
+                                    user_fault, write_fault,
+                                    gw->ptes[gw->level-1] & PT_DIRTY_MASK,
+                                    ptwrite, largepage,
+                                    gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
+                                    gw->gfn, pfn, false);
+                       break;
+               }
  
-       if (is_large_pte(*sptep)) {
-               set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
-               kvm_flush_remote_tlbs(vcpu->kvm);
-               rmap_remove(vcpu->kvm, sptep);
-       }
+               if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep))
+                       continue;
  
-       if (level == PT_DIRECTORY_LEVEL && gw->level == PT_DIRECTORY_LEVEL) {
-               metaphysical = 1;
-               if (!is_dirty_pte(gw->ptes[level - 1]))
-                       access &= ~ACC_WRITE_MASK;
-               table_gfn = gpte_to_gfn(gw->ptes[level - 1]);
-       } else {
-               metaphysical = 0;
-               table_gfn = gw->table_gfn[level - 2];
-       }
-       shadow_page = kvm_mmu_get_page(vcpu, table_gfn, (gva_t)addr, level-1,
-                                      metaphysical, access, sptep);
-       if (!metaphysical) {
-               r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 2],
-                                         &curr_pte, sizeof(curr_pte));
-               if (r || curr_pte != gw->ptes[level - 2]) {
-                       kvm_mmu_put_page(shadow_page, sptep);
-                       kvm_release_pfn_clean(sw->pfn);
-                       sw->sptep = NULL;
-                       return 1;
+               if (is_large_pte(*sptep)) {
+                       rmap_remove(vcpu->kvm, sptep);
+                       set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
+                       kvm_flush_remote_tlbs(vcpu->kvm);
                 }
-       }
  
-       spte = __pa(shadow_page->spt) | PT_PRESENT_MASK | PT_ACCESSED_MASK
-               | PT_WRITABLE_MASK | PT_USER_MASK;
-       *sptep = spte;
-       return 0;
-}
-
-static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
-                        struct guest_walker *guest_walker,
-                        int user_fault, int write_fault, int largepage,
-                        int *ptwrite, pfn_t pfn)
-{
-       struct shadow_walker walker = {
-               .walker = { .entry = FNAME(shadow_walk_entry), },
-               .guest_walker = guest_walker,
-               .user_fault = user_fault,
-               .write_fault = write_fault,
-               .largepage = largepage,
-               .ptwrite = ptwrite,
-               .pfn = pfn,
-       };
-
-       if (!is_present_pte(guest_walker->ptes[guest_walker->level - 1]))
-               return NULL;
+               if (level == PT_DIRECTORY_LEVEL
+                   && gw->level == PT_DIRECTORY_LEVEL) {
+                       direct = 1;
+                       if (!is_dirty_pte(gw->ptes[level - 1]))
+                               access &= ~ACC_WRITE_MASK;
+                       table_gfn = gpte_to_gfn(gw->ptes[level - 1]);
+               } else {
+                       direct = 0;
+                       table_gfn = gw->table_gfn[level - 2];
+               }
+               shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
+                                              direct, access, sptep);
+               if (!direct) {
+                       r = kvm_read_guest_atomic(vcpu->kvm,
+                                                 gw->pte_gpa[level - 2],
+                                                 &curr_pte, sizeof(curr_pte));
+                       if (r || curr_pte != gw->ptes[level - 2]) {
+                               kvm_mmu_put_page(shadow_page, sptep);
+                               kvm_release_pfn_clean(pfn);
+                               sptep = NULL;
+                               break;
+                       }
+               }
  
-       walk_shadow(&walker.walker, vcpu, addr);
+               spte = __pa(shadow_page->spt)
+                       | PT_PRESENT_MASK | PT_ACCESSED_MASK
+                       | PT_WRITABLE_MASK | PT_USER_MASK;
+               *sptep = spte;
+       }
  
-       return walker.sptep;
+       return sptep;
  }
  
  /*
@@ -465,54 +439,56 @@ out_unlock:
         return 0;
  }
  
-static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw,
-                                     struct kvm_vcpu *vcpu, u64 addr,
-                                     u64 *sptep, int level)
+static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
  {
-       struct shadow_walker *sw =
-               container_of(_sw, struct shadow_walker, walker);
-
-       /* FIXME: properly handle invlpg on large guest pages */
-       if (level == PT_PAGE_TABLE_LEVEL ||
-           ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) {
-               struct kvm_mmu_page *sp = page_header(__pa(sptep));
+       struct kvm_shadow_walk_iterator iterator;
+       pt_element_t gpte;
+       gpa_t pte_gpa = -1;
+       int level;
+       u64 *sptep;
+       int need_flush = 0;
  
-               sw->pte_gpa = (sp->gfn << PAGE_SHIFT);
-               sw->pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
+       spin_lock(&vcpu->kvm->mmu_lock);
  
-               if (is_shadow_present_pte(*sptep)) {
-                       rmap_remove(vcpu->kvm, sptep);
-                       if (is_large_pte(*sptep))
-                               --vcpu->kvm->stat.lpages;
+       for_each_shadow_entry(vcpu, gva, iterator) {
+               level = iterator.level;
+               sptep = iterator.sptep;
+
+               /* FIXME: properly handle invlpg on large guest pages */
+               if (level == PT_PAGE_TABLE_LEVEL ||
+                   ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) {
+                       struct kvm_mmu_page *sp = page_header(__pa(sptep));
+
+                       pte_gpa = (sp->gfn << PAGE_SHIFT);
+                       pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
+
+                       if (is_shadow_present_pte(*sptep)) {
+                               rmap_remove(vcpu->kvm, sptep);
+                               if (is_large_pte(*sptep))
+                                       --vcpu->kvm->stat.lpages;
+                               need_flush = 1;
+                       }
+                       set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
+                       break;
                 }
-               set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
-               return 1;
-       }
-       if (!is_shadow_present_pte(*sptep))
-               return 1;
-       return 0;
-}
  
-static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
-{
-       pt_element_t gpte;
-       struct shadow_walker walker = {
-               .walker = { .entry = FNAME(shadow_invlpg_entry), },
-               .pte_gpa = -1,
-       };
+               if (!is_shadow_present_pte(*sptep))
+                       break;
+       }
  
-       spin_lock(&vcpu->kvm->mmu_lock);
-       walk_shadow(&walker.walker, vcpu, gva);
+       if (need_flush)
+               kvm_flush_remote_tlbs(vcpu->kvm);
         spin_unlock(&vcpu->kvm->mmu_lock);
-       if (walker.pte_gpa == -1)
+
+       if (pte_gpa == -1)
                 return;
-       if (kvm_read_guest_atomic(vcpu->kvm, walker.pte_gpa, &gpte,
+       if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
                                   sizeof(pt_element_t)))
                 return;
         if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) {
                 if (mmu_topup_memory_caches(vcpu))
                         return;
-               kvm_mmu_pte_write(vcpu, walker.pte_gpa, (const u8 *)&gpte,
+               kvm_mmu_pte_write(vcpu, pte_gpa, (const u8 *)&gpte,
                                   sizeof(pt_element_t), 0);
         }
  }
@@ -540,7 +516,7 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
         pt_element_t pt[256 / sizeof(pt_element_t)];
         gpa_t pte_gpa;
  
-       if (sp->role.metaphysical
+       if (sp->role.direct
             || (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) {
                 nonpaging_prefetch_page(vcpu, sp);
                 return;
@@ -619,7 +595,6 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
  
  #undef pt_element_t
  #undef guest_walker
-#undef shadow_walker
  #undef FNAME
  #undef PT_BASE_ADDR_MASK
  #undef PT_INDEX
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c

index a9e769e4e2513b552d1a035884959a8a5280c84d..1821c2078199270cddc130856588650d81b7857c 100644 (file)
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -38,9 +38,6 @@ MODULE_LICENSE("GPL");
  #define IOPM_ALLOC_ORDER 2
  #define MSRPM_ALLOC_ORDER 1
  
-#define DR7_GD_MASK (1 << 13)
-#define DR6_BD_MASK (1 << 13)
-
  #define SEG_TYPE_LDT 2
  #define SEG_TYPE_BUSY_TSS16 3
  
@@ -50,6 +47,15 @@ MODULE_LICENSE("GPL");
  
  #define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
  
+/* Turn on to get debugging output*/
+/* #define NESTED_DEBUG */
+
+#ifdef NESTED_DEBUG
+#define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args)
+#else
+#define nsvm_printk(fmt, args...) do {} while(0)
+#endif
+
  /* enable NPT for AMD64 and X86 with PAE */
  #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
  static bool npt_enabled = true;
@@ -60,14 +66,29 @@ static int npt = 1;
  
  module_param(npt, int, S_IRUGO);
  
+static int nested = 0;
+module_param(nested, int, S_IRUGO);
+
  static void kvm_reput_irq(struct vcpu_svm *svm);
  static void svm_flush_tlb(struct kvm_vcpu *vcpu);
  
+static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override);
+static int nested_svm_vmexit(struct vcpu_svm *svm);
+static int nested_svm_vmsave(struct vcpu_svm *svm, void *nested_vmcb,
+                            void *arg2, void *opaque);
+static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
+                                     bool has_error_code, u32 error_code);
+
  static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
  {
         return container_of(vcpu, struct vcpu_svm, vcpu);
  }
  
+static inline bool is_nested(struct vcpu_svm *svm)
+{
+       return svm->nested_vmcb;
+}
+
  static unsigned long iopm_base;
  
  struct kvm_ldttss_desc {
@@ -157,32 +178,6 @@ static inline void kvm_write_cr2(unsigned long val)
         asm volatile ("mov %0, %%cr2" :: "r" (val));
  }
  
-static inline unsigned long read_dr6(void)
-{
-       unsigned long dr6;
-
-       asm volatile ("mov %%dr6, %0" : "=r" (dr6));
-       return dr6;
-}
-
-static inline void write_dr6(unsigned long val)
-{
-       asm volatile ("mov %0, %%dr6" :: "r" (val));
-}
-
-static inline unsigned long read_dr7(void)
-{
-       unsigned long dr7;
-
-       asm volatile ("mov %%dr7, %0" : "=r" (dr7));
-       return dr7;
-}
-
-static inline void write_dr7(unsigned long val)
-{
-       asm volatile ("mov %0, %%dr7" :: "r" (val));
-}
-
  static inline void force_new_asid(struct kvm_vcpu *vcpu)
  {
         to_svm(vcpu)->asid_generation--;
@@ -198,7 +193,7 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
         if (!npt_enabled && !(efer & EFER_LMA))
                 efer &= ~EFER_LME;
  
-       to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK;
+       to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
         vcpu->arch.shadow_efer = efer;
  }
  
@@ -207,6 +202,11 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
  {
         struct vcpu_svm *svm = to_svm(vcpu);
  
+       /* If we are within a nested VM we'd better #VMEXIT and let the
+          guest handle the exception */
+       if (nested_svm_check_exception(svm, nr, has_error_code, error_code))
+               return;
+
         svm->vmcb->control.event_inj = nr
                 | SVM_EVTINJ_VALID
                 | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
@@ -242,7 +242,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
         kvm_rip_write(vcpu, svm->next_rip);
         svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
  
-       vcpu->arch.interrupt_window_open = 1;
+       vcpu->arch.interrupt_window_open = (svm->vcpu.arch.hflags & HF_GIF_MASK);
  }
  
  static int has_svm(void)
@@ -250,7 +250,7 @@ static int has_svm(void)
         const char *msg;
  
         if (!cpu_has_svm(&msg)) {
-               printk(KERN_INFO "has_svn: %s\n", msg);
+               printk(KERN_INFO "has_svm: %s\n", msg);
                 return 0;
         }
  
@@ -292,7 +292,7 @@ static void svm_hardware_enable(void *garbage)
         svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
  
         rdmsrl(MSR_EFER, efer);
-       wrmsrl(MSR_EFER, efer | MSR_EFER_SVME_MASK);
+       wrmsrl(MSR_EFER, efer | EFER_SVME);
  
         wrmsrl(MSR_VM_HSAVE_PA,
                page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
@@ -417,6 +417,14 @@ static __init int svm_hardware_setup(void)
         if (boot_cpu_has(X86_FEATURE_NX))
                 kvm_enable_efer_bits(EFER_NX);
  
+       if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
+               kvm_enable_efer_bits(EFER_FFXSR);
+
+       if (nested) {
+               printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
+               kvm_enable_efer_bits(EFER_SVME);
+       }
+
         for_each_online_cpu(cpu) {
                 r = svm_cpu_init(cpu);
                 if (r)
@@ -559,7 +567,7 @@ static void init_vmcb(struct vcpu_svm *svm)
         init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
         init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
  
-       save->efer = MSR_EFER_SVME_MASK;
+       save->efer = EFER_SVME;
         save->dr6 = 0xffff0ff0;
         save->dr7 = 0x400;
         save->rflags = 2;
@@ -591,6 +599,9 @@ static void init_vmcb(struct vcpu_svm *svm)
                 save->cr4 = 0;
         }
         force_new_asid(&svm->vcpu);
+
+       svm->nested_vmcb = 0;
+       svm->vcpu.arch.hflags = HF_GIF_MASK;
  }
  
  static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
@@ -615,6 +626,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
         struct vcpu_svm *svm;
         struct page *page;
         struct page *msrpm_pages;
+       struct page *hsave_page;
+       struct page *nested_msrpm_pages;
         int err;
  
         svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
@@ -637,14 +650,25 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
         msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
         if (!msrpm_pages)
                 goto uninit;
+
+       nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
+       if (!nested_msrpm_pages)
+               goto uninit;
+
         svm->msrpm = page_address(msrpm_pages);
         svm_vcpu_init_msrpm(svm->msrpm);
  
+       hsave_page = alloc_page(GFP_KERNEL);
+       if (!hsave_page)
+               goto uninit;
+       svm->hsave = page_address(hsave_page);
+
+       svm->nested_msrpm = page_address(nested_msrpm_pages);
+
         svm->vmcb = page_address(page);
         clear_page(svm->vmcb);
         svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
         svm->asid_generation = 0;
-       memset(svm->db_regs, 0, sizeof(svm->db_regs));
         init_vmcb(svm);
  
         fx_init(&svm->vcpu);
@@ -669,6 +693,8 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
  
         __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
         __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
+       __free_page(virt_to_page(svm->hsave));
+       __free_pages(virt_to_page(svm->nested_msrpm), MSRPM_ALLOC_ORDER);
         kvm_vcpu_uninit(vcpu);
         kmem_cache_free(kvm_vcpu_cache, svm);
  }
@@ -718,6 +744,16 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
         to_svm(vcpu)->vmcb->save.rflags = rflags;
  }
  
+static void svm_set_vintr(struct vcpu_svm *svm)
+{
+       svm->vmcb->control.intercept |= 1ULL << INTERCEPT_VINTR;
+}
+
+static void svm_clear_vintr(struct vcpu_svm *svm)
+{
+       svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR);
+}
+
  static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
  {
         struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
@@ -760,20 +796,37 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
         var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
         var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
  
-       /*
-        * SVM always stores 0 for the 'G' bit in the CS selector in
-        * the VMCB on a VMEXIT. This hurts cross-vendor migration:
-        * Intel's VMENTRY has a check on the 'G' bit.
-        */
-       if (seg == VCPU_SREG_CS)
+       switch (seg) {
+       case VCPU_SREG_CS:
+               /*
+                * SVM always stores 0 for the 'G' bit in the CS selector in
+                * the VMCB on a VMEXIT. This hurts cross-vendor migration:
+                * Intel's VMENTRY has a check on the 'G' bit.
+                */
                 var->g = s->limit > 0xfffff;
-
-       /*
-        * Work around a bug where the busy flag in the tr selector
-        * isn't exposed
-        */
-       if (seg == VCPU_SREG_TR)
+               break;
+       case VCPU_SREG_TR:
+               /*
+                * Work around a bug where the busy flag in the tr selector
+                * isn't exposed
+                */
                 var->type |= 0x2;
+               break;
+       case VCPU_SREG_DS:
+       case VCPU_SREG_ES:
+       case VCPU_SREG_FS:
+       case VCPU_SREG_GS:
+               /*
+                * The accessed bit must always be set in the segment
+                * descriptor cache, although it can be cleared in the
+                * descriptor, the cached bit always remains at 1. Since
+                * Intel has a check on this, set it here to support
+                * cross-vendor migration.
+                */
+               if (!var->unusable)
+                       var->type |= 0x1;
+               break;
+       }
  
         var->unusable = !var->present;
  }
@@ -905,9 +958,37 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
  
  }
  
-static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
+static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
  {
-       return -EOPNOTSUPP;
+       int old_debug = vcpu->guest_debug;
+       struct vcpu_svm *svm = to_svm(vcpu);
+
+       vcpu->guest_debug = dbg->control;
+
+       svm->vmcb->control.intercept_exceptions &=
+               ~((1 << DB_VECTOR) | (1 << BP_VECTOR));
+       if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
+               if (vcpu->guest_debug &
+                   (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
+                       svm->vmcb->control.intercept_exceptions |=
+                               1 << DB_VECTOR;
+               if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
+                       svm->vmcb->control.intercept_exceptions |=
+                               1 << BP_VECTOR;
+       } else
+               vcpu->guest_debug = 0;
+
+       if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+               svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
+       else
+               svm->vmcb->save.dr7 = vcpu->arch.dr7;
+
+       if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+               svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
+       else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
+               svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
+
+       return 0;
  }
  
  static int svm_get_irq(struct kvm_vcpu *vcpu)
@@ -949,7 +1030,29 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data)
  
  static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr)
  {
-       unsigned long val = to_svm(vcpu)->db_regs[dr];
+       struct vcpu_svm *svm = to_svm(vcpu);
+       unsigned long val;
+
+       switch (dr) {
+       case 0 ... 3:
+               val = vcpu->arch.db[dr];
+               break;
+       case 6:
+               if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+                       val = vcpu->arch.dr6;
+               else
+                       val = svm->vmcb->save.dr6;
+               break;
+       case 7:
+               if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+                       val = vcpu->arch.dr7;
+               else
+                       val = svm->vmcb->save.dr7;
+               break;
+       default:
+               val = 0;
+       }
+
         KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
         return val;
  }
@@ -959,33 +1062,40 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
  {
         struct vcpu_svm *svm = to_svm(vcpu);
  
-       *exception = 0;
+       KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)value, handler);
  
-       if (svm->vmcb->save.dr7 & DR7_GD_MASK) {
-               svm->vmcb->save.dr7 &= ~DR7_GD_MASK;
-               svm->vmcb->save.dr6 |= DR6_BD_MASK;
-               *exception = DB_VECTOR;
-               return;
-       }
+       *exception = 0;
  
         switch (dr) {
         case 0 ... 3:
-               svm->db_regs[dr] = value;
+               vcpu->arch.db[dr] = value;
+               if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
+                       vcpu->arch.eff_db[dr] = value;
                 return;
         case 4 ... 5:
-               if (vcpu->arch.cr4 & X86_CR4_DE) {
+               if (vcpu->arch.cr4 & X86_CR4_DE)
                         *exception = UD_VECTOR;
+               return;
+       case 6:
+               if (value & 0xffffffff00000000ULL) {
+                       *exception = GP_VECTOR;
                         return;
                 }
-       case 7: {
-               if (value & ~((1ULL << 32) - 1)) {
+               vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1;
+               return;
+       case 7:
+               if (value & 0xffffffff00000000ULL) {
                         *exception = GP_VECTOR;
                         return;
                 }
-               svm->vmcb->save.dr7 = value;
+               vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1;
+               if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
+                       svm->vmcb->save.dr7 = vcpu->arch.dr7;
+                       vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK);
+               }
                 return;
-       }
         default:
+               /* FIXME: Possible case? */
                 printk(KERN_DEBUG "%s: unexpected dr %u\n",
                        __func__, dr);
                 *exception = UD_VECTOR;
@@ -1031,6 +1141,27 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
         return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
  }
  
+static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+       if (!(svm->vcpu.guest_debug &
+             (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
+               kvm_queue_exception(&svm->vcpu, DB_VECTOR);
+               return 1;
+       }
+       kvm_run->exit_reason = KVM_EXIT_DEBUG;
+       kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
+       kvm_run->debug.arch.exception = DB_VECTOR;
+       return 0;
+}
+
+static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+       kvm_run->exit_reason = KVM_EXIT_DEBUG;
+       kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
+       kvm_run->debug.arch.exception = BP_VECTOR;
+       return 0;
+}
+
  static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
  {
         int er;
@@ -1080,7 +1211,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
  static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
  {
         u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
-       int size, down, in, string, rep;
+       int size, in, string;
         unsigned port;
  
         ++svm->vcpu.stat.io_exits;
@@ -1099,8 +1230,6 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
         in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
         port = io_info >> 16;
         size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
-       rep = (io_info & SVM_IOIO_REP_MASK) != 0;
-       down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0;
  
         skip_emulated_instruction(&svm->vcpu);
         return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port);
@@ -1139,6 +1268,567 @@ static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
         return 1;
  }
  
+static int nested_svm_check_permissions(struct vcpu_svm *svm)
+{
+       if (!(svm->vcpu.arch.shadow_efer & EFER_SVME)
+           || !is_paging(&svm->vcpu)) {
+               kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+               return 1;
+       }
+
+       if (svm->vmcb->save.cpl) {
+               kvm_inject_gp(&svm->vcpu, 0);
+               return 1;
+       }
+
+       return 0;
+}
+
+static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
+                                     bool has_error_code, u32 error_code)
+{
+       if (is_nested(svm)) {
+               svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
+               svm->vmcb->control.exit_code_hi = 0;
+               svm->vmcb->control.exit_info_1 = error_code;
+               svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
+               if (nested_svm_exit_handled(svm, false)) {
+                       nsvm_printk("VMexit -> EXCP 0x%x\n", nr);
+
+                       nested_svm_vmexit(svm);
+                       return 1;
+               }
+       }
+
+       return 0;
+}
+
+static inline int nested_svm_intr(struct vcpu_svm *svm)
+{
+       if (is_nested(svm)) {
+               if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
+                       return 0;
+
+               if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
+                       return 0;
+
+               svm->vmcb->control.exit_code = SVM_EXIT_INTR;
+
+               if (nested_svm_exit_handled(svm, false)) {
+                       nsvm_printk("VMexit -> INTR\n");
+                       nested_svm_vmexit(svm);
+                       return 1;
+               }
+       }
+
+       return 0;
+}
+
+static struct page *nested_svm_get_page(struct vcpu_svm *svm, u64 gpa)
+{
+       struct page *page;
+
+       down_read(&current->mm->mmap_sem);
+       page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
+       up_read(&current->mm->mmap_sem);
+
+       if (is_error_page(page)) {
+               printk(KERN_INFO "%s: could not find page at 0x%llx\n",
+                      __func__, gpa);
+               kvm_release_page_clean(page);
+               kvm_inject_gp(&svm->vcpu, 0);
+               return NULL;
+       }
+       return page;
+}
+
+static int nested_svm_do(struct vcpu_svm *svm,
+                        u64 arg1_gpa, u64 arg2_gpa, void *opaque,
+                        int (*handler)(struct vcpu_svm *svm,
+                                       void *arg1,
+                                       void *arg2,
+                                       void *opaque))
+{
+       struct page *arg1_page;
+       struct page *arg2_page = NULL;
+       void *arg1;
+       void *arg2 = NULL;
+       int retval;
+
+       arg1_page = nested_svm_get_page(svm, arg1_gpa);
+       if(arg1_page == NULL)
+               return 1;
+
+       if (arg2_gpa) {
+               arg2_page = nested_svm_get_page(svm, arg2_gpa);
+               if(arg2_page == NULL) {
+                       kvm_release_page_clean(arg1_page);
+                       return 1;
+               }
+       }
+
+       arg1 = kmap_atomic(arg1_page, KM_USER0);
+       if (arg2_gpa)
+               arg2 = kmap_atomic(arg2_page, KM_USER1);
+
+       retval = handler(svm, arg1, arg2, opaque);
+
+       kunmap_atomic(arg1, KM_USER0);
+       if (arg2_gpa)
+               kunmap_atomic(arg2, KM_USER1);
+
+       kvm_release_page_dirty(arg1_page);
+       if (arg2_gpa)
+               kvm_release_page_dirty(arg2_page);
+
+       return retval;
+}
+
+static int nested_svm_exit_handled_real(struct vcpu_svm *svm,
+                                       void *arg1,
+                                       void *arg2,
+                                       void *opaque)
+{
+       struct vmcb *nested_vmcb = (struct vmcb *)arg1;
+       bool kvm_overrides = *(bool *)opaque;
+       u32 exit_code = svm->vmcb->control.exit_code;
+
+       if (kvm_overrides) {
+               switch (exit_code) {
+               case SVM_EXIT_INTR:
+               case SVM_EXIT_NMI:
+                       return 0;
+               /* For now we are always handling NPFs when using them */
+               case SVM_EXIT_NPF:
+                       if (npt_enabled)
+                               return 0;
+                       break;
+               /* When we're shadowing, trap PFs */
+               case SVM_EXIT_EXCP_BASE + PF_VECTOR:
+                       if (!npt_enabled)
+                               return 0;
+                       break;
+               default:
+                       break;
+               }
+       }
+
+       switch (exit_code) {
+       case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: {
+               u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0);
+               if (nested_vmcb->control.intercept_cr_read & cr_bits)
+                       return 1;
+               break;
+       }
+       case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR8: {
+               u32 cr_bits = 1 << (exit_code - SVM_EXIT_WRITE_CR0);
+               if (nested_vmcb->control.intercept_cr_write & cr_bits)
+                       return 1;
+               break;
+       }
+       case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR7: {
+               u32 dr_bits = 1 << (exit_code - SVM_EXIT_READ_DR0);
+               if (nested_vmcb->control.intercept_dr_read & dr_bits)
+                       return 1;
+               break;
+       }
+       case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR7: {
+               u32 dr_bits = 1 << (exit_code - SVM_EXIT_WRITE_DR0);
+               if (nested_vmcb->control.intercept_dr_write & dr_bits)
+                       return 1;
+               break;
+       }
+       case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
+               u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
+               if (nested_vmcb->control.intercept_exceptions & excp_bits)
+                       return 1;
+               break;
+       }
+       default: {
+               u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
+               nsvm_printk("exit code: 0x%x\n", exit_code);
+               if (nested_vmcb->control.intercept & exit_bits)
+                       return 1;
+       }
+       }
+
+       return 0;
+}
+
+static int nested_svm_exit_handled_msr(struct vcpu_svm *svm,
+                                      void *arg1, void *arg2,
+                                      void *opaque)
+{
+       struct vmcb *nested_vmcb = (struct vmcb *)arg1;
+       u8 *msrpm = (u8 *)arg2;
+        u32 t0, t1;
+       u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
+       u32 param = svm->vmcb->control.exit_info_1 & 1;
+
+       if (!(nested_vmcb->control.intercept & (1ULL << INTERCEPT_MSR_PROT)))
+               return 0;
+
+       switch(msr) {
+       case 0 ... 0x1fff:
+               t0 = (msr * 2) % 8;
+               t1 = msr / 8;
+               break;
+       case 0xc0000000 ... 0xc0001fff:
+               t0 = (8192 + msr - 0xc0000000) * 2;
+               t1 = (t0 / 8);
+               t0 %= 8;
+               break;
+       case 0xc0010000 ... 0xc0011fff:
+               t0 = (16384 + msr - 0xc0010000) * 2;
+               t1 = (t0 / 8);
+               t0 %= 8;
+               break;
+       default:
+               return 1;
+               break;
+       }
+       if (msrpm[t1] & ((1 << param) << t0))
+               return 1;
+
+       return 0;
+}
+
+static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override)
+{
+       bool k = kvm_override;
+
+       switch (svm->vmcb->control.exit_code) {
+       case SVM_EXIT_MSR:
+               return nested_svm_do(svm, svm->nested_vmcb,
+                                    svm->nested_vmcb_msrpm, NULL,
+                                    nested_svm_exit_handled_msr);
+       default: break;
+       }
+
+       return nested_svm_do(svm, svm->nested_vmcb, 0, &k,
+                            nested_svm_exit_handled_real);
+}
+
+static int nested_svm_vmexit_real(struct vcpu_svm *svm, void *arg1,
+                                 void *arg2, void *opaque)
+{
+       struct vmcb *nested_vmcb = (struct vmcb *)arg1;
+       struct vmcb *hsave = svm->hsave;
+       u64 nested_save[] = { nested_vmcb->save.cr0,
+                             nested_vmcb->save.cr3,
+                             nested_vmcb->save.cr4,
+                             nested_vmcb->save.efer,
+                             nested_vmcb->control.intercept_cr_read,
+                             nested_vmcb->control.intercept_cr_write,
+                             nested_vmcb->control.intercept_dr_read,
+                             nested_vmcb->control.intercept_dr_write,
+                             nested_vmcb->control.intercept_exceptions,
+                             nested_vmcb->control.intercept,
+                             nested_vmcb->control.msrpm_base_pa,
+                             nested_vmcb->control.iopm_base_pa,
+                             nested_vmcb->control.tsc_offset };
+
+       /* Give the current vmcb to the guest */
+       memcpy(nested_vmcb, svm->vmcb, sizeof(struct vmcb));
+       nested_vmcb->save.cr0 = nested_save[0];
+       if (!npt_enabled)
+               nested_vmcb->save.cr3 = nested_save[1];
+       nested_vmcb->save.cr4 = nested_save[2];
+       nested_vmcb->save.efer = nested_save[3];
+       nested_vmcb->control.intercept_cr_read = nested_save[4];
+       nested_vmcb->control.intercept_cr_write = nested_save[5];
+       nested_vmcb->control.intercept_dr_read = nested_save[6];
+       nested_vmcb->control.intercept_dr_write = nested_save[7];
+       nested_vmcb->control.intercept_exceptions = nested_save[8];
+       nested_vmcb->control.intercept = nested_save[9];
+       nested_vmcb->control.msrpm_base_pa = nested_save[10];
+       nested_vmcb->control.iopm_base_pa = nested_save[11];
+       nested_vmcb->control.tsc_offset = nested_save[12];
+
+       /* We always set V_INTR_MASKING and remember the old value in hflags */
+       if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
+               nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
+
+       if ((nested_vmcb->control.int_ctl & V_IRQ_MASK) &&
+           (nested_vmcb->control.int_vector)) {
+               nsvm_printk("WARNING: IRQ 0x%x still enabled on #VMEXIT\n",
+                               nested_vmcb->control.int_vector);
+       }
+
+       /* Restore the original control entries */
+       svm->vmcb->control = hsave->control;
+
+       /* Kill any pending exceptions */
+       if (svm->vcpu.arch.exception.pending == true)
+               nsvm_printk("WARNING: Pending Exception\n");
+       svm->vcpu.arch.exception.pending = false;
+
+       /* Restore selected save entries */
+       svm->vmcb->save.es = hsave->save.es;
+       svm->vmcb->save.cs = hsave->save.cs;
+       svm->vmcb->save.ss = hsave->save.ss;
+       svm->vmcb->save.ds = hsave->save.ds;
+       svm->vmcb->save.gdtr = hsave->save.gdtr;
+       svm->vmcb->save.idtr = hsave->save.idtr;
+       svm->vmcb->save.rflags = hsave->save.rflags;
+       svm_set_efer(&svm->vcpu, hsave->save.efer);
+       svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
+       svm_set_cr4(&svm->vcpu, hsave->save.cr4);
+       if (npt_enabled) {
+               svm->vmcb->save.cr3 = hsave->save.cr3;
+               svm->vcpu.arch.cr3 = hsave->save.cr3;
+       } else {
+               kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
+       }
+       kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax);
+       kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp);
+       kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip);
+       svm->vmcb->save.dr7 = 0;
+       svm->vmcb->save.cpl = 0;
+       svm->vmcb->control.exit_int_info = 0;
+
+       svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
+       /* Exit nested SVM mode */
+       svm->nested_vmcb = 0;
+
+       return 0;
+}
+
+static int nested_svm_vmexit(struct vcpu_svm *svm)
+{
+       nsvm_printk("VMexit\n");
+       if (nested_svm_do(svm, svm->nested_vmcb, 0,
+                         NULL, nested_svm_vmexit_real))
+               return 1;
+
+       kvm_mmu_reset_context(&svm->vcpu);
+       kvm_mmu_load(&svm->vcpu);
+
+       return 0;
+}
+
+static int nested_svm_vmrun_msrpm(struct vcpu_svm *svm, void *arg1,
+                                 void *arg2, void *opaque)
+{
+       int i;
+       u32 *nested_msrpm = (u32*)arg1;
+       for (i=0; i< PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER) / 4; i++)
+               svm->nested_msrpm[i] = svm->msrpm[i] | nested_msrpm[i];
+       svm->vmcb->control.msrpm_base_pa = __pa(svm->nested_msrpm);
+
+       return 0;
+}
+
+static int nested_svm_vmrun(struct vcpu_svm *svm, void *arg1,
+                           void *arg2, void *opaque)
+{
+       struct vmcb *nested_vmcb = (struct vmcb *)arg1;
+       struct vmcb *hsave = svm->hsave;
+
+       /* nested_vmcb is our indicator if nested SVM is activated */
+       svm->nested_vmcb = svm->vmcb->save.rax;
+
+       /* Clear internal status */
+       svm->vcpu.arch.exception.pending = false;
+
+       /* Save the old vmcb, so we don't need to pick what we save, but
+          can restore everything when a VMEXIT occurs */
+       memcpy(hsave, svm->vmcb, sizeof(struct vmcb));
+       /* We need to remember the original CR3 in the SPT case */
+       if (!npt_enabled)
+               hsave->save.cr3 = svm->vcpu.arch.cr3;
+       hsave->save.cr4 = svm->vcpu.arch.cr4;
+       hsave->save.rip = svm->next_rip;
+
+       if (svm->vmcb->save.rflags & X86_EFLAGS_IF)
+               svm->vcpu.arch.hflags |= HF_HIF_MASK;
+       else
+               svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
+
+       /* Load the nested guest state */
+       svm->vmcb->save.es = nested_vmcb->save.es;
+       svm->vmcb->save.cs = nested_vmcb->save.cs;
+       svm->vmcb->save.ss = nested_vmcb->save.ss;
+       svm->vmcb->save.ds = nested_vmcb->save.ds;
+       svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
+       svm->vmcb->save.idtr = nested_vmcb->save.idtr;
+       svm->vmcb->save.rflags = nested_vmcb->save.rflags;
+       svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
+       svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
+       svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
+       if (npt_enabled) {
+               svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
+               svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
+       } else {
+               kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
+               kvm_mmu_reset_context(&svm->vcpu);
+       }
+       svm->vmcb->save.cr2 = nested_vmcb->save.cr2;
+       kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
+       kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
+       kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
+       /* In case we don't even reach vcpu_run, the fields are not updated */
+       svm->vmcb->save.rax = nested_vmcb->save.rax;
+       svm->vmcb->save.rsp = nested_vmcb->save.rsp;
+       svm->vmcb->save.rip = nested_vmcb->save.rip;
+       svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
+       svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
+       svm->vmcb->save.cpl = nested_vmcb->save.cpl;
+
+       /* We don't want a nested guest to be more powerful than the guest,
+          so all intercepts are ORed */
+       svm->vmcb->control.intercept_cr_read |=
+               nested_vmcb->control.intercept_cr_read;
+       svm->vmcb->control.intercept_cr_write |=
+               nested_vmcb->control.intercept_cr_write;
+       svm->vmcb->control.intercept_dr_read |=
+               nested_vmcb->control.intercept_dr_read;
+       svm->vmcb->control.intercept_dr_write |=
+               nested_vmcb->control.intercept_dr_write;
+       svm->vmcb->control.intercept_exceptions |=
+               nested_vmcb->control.intercept_exceptions;
+
+       svm->vmcb->control.intercept |= nested_vmcb->control.intercept;
+
+       svm->nested_vmcb_msrpm = nested_vmcb->control.msrpm_base_pa;
+
+       force_new_asid(&svm->vcpu);
+       svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info;
+       svm->vmcb->control.exit_int_info_err = nested_vmcb->control.exit_int_info_err;
+       svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
+       if (nested_vmcb->control.int_ctl & V_IRQ_MASK) {
+               nsvm_printk("nSVM Injecting Interrupt: 0x%x\n",
+                               nested_vmcb->control.int_ctl);
+       }
+       if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
+               svm->vcpu.arch.hflags |= HF_VINTR_MASK;
+       else
+               svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
+
+       nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n",
+                       nested_vmcb->control.exit_int_info,
+                       nested_vmcb->control.int_state);
+
+       svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
+       svm->vmcb->control.int_state = nested_vmcb->control.int_state;
+       svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
+       if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID)
+               nsvm_printk("Injecting Event: 0x%x\n",
+                               nested_vmcb->control.event_inj);
+       svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
+       svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
+
+       svm->vcpu.arch.hflags |= HF_GIF_MASK;
+
+       return 0;
+}
+
+static int nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
+{
+       to_vmcb->save.fs = from_vmcb->save.fs;
+       to_vmcb->save.gs = from_vmcb->save.gs;
+       to_vmcb->save.tr = from_vmcb->save.tr;
+       to_vmcb->save.ldtr = from_vmcb->save.ldtr;
+       to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
+       to_vmcb->save.star = from_vmcb->save.star;
+       to_vmcb->save.lstar = from_vmcb->save.lstar;
+       to_vmcb->save.cstar = from_vmcb->save.cstar;
+       to_vmcb->save.sfmask = from_vmcb->save.sfmask;
+       to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
+       to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
+       to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
+
+       return 1;
+}
+
+static int nested_svm_vmload(struct vcpu_svm *svm, void *nested_vmcb,
+                            void *arg2, void *opaque)
+{
+       return nested_svm_vmloadsave((struct vmcb *)nested_vmcb, svm->vmcb);
+}
+
+static int nested_svm_vmsave(struct vcpu_svm *svm, void *nested_vmcb,
+                            void *arg2, void *opaque)
+{
+       return nested_svm_vmloadsave(svm->vmcb, (struct vmcb *)nested_vmcb);
+}
+
+static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+       if (nested_svm_check_permissions(svm))
+               return 1;
+
+       svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+       skip_emulated_instruction(&svm->vcpu);
+
+       nested_svm_do(svm, svm->vmcb->save.rax, 0, NULL, nested_svm_vmload);
+
+       return 1;
+}
+
+static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+       if (nested_svm_check_permissions(svm))
+               return 1;
+
+       svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+       skip_emulated_instruction(&svm->vcpu);
+
+       nested_svm_do(svm, svm->vmcb->save.rax, 0, NULL, nested_svm_vmsave);
+
+       return 1;
+}
+
+static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+       nsvm_printk("VMrun\n");
+       if (nested_svm_check_permissions(svm))
+               return 1;
+
+       svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+       skip_emulated_instruction(&svm->vcpu);
+
+       if (nested_svm_do(svm, svm->vmcb->save.rax, 0,
+                         NULL, nested_svm_vmrun))
+               return 1;
+
+       if (nested_svm_do(svm, svm->nested_vmcb_msrpm, 0,
+                     NULL, nested_svm_vmrun_msrpm))
+               return 1;
+
+       return 1;
+}
+
+static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+       if (nested_svm_check_permissions(svm))
+               return 1;
+
+       svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+       skip_emulated_instruction(&svm->vcpu);
+
+       svm->vcpu.arch.hflags |= HF_GIF_MASK;
+
+       return 1;
+}
+
+static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+       if (nested_svm_check_permissions(svm))
+               return 1;
+
+       svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+       skip_emulated_instruction(&svm->vcpu);
+
+       svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
+
+       /* After a CLGI no interrupts should come */
+       svm_clear_vintr(svm);
+       svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
+
+       return 1;
+}
+
  static int invalid_op_interception(struct vcpu_svm *svm,
                                    struct kvm_run *kvm_run)
  {
@@ -1250,6 +1940,15 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
         case MSR_IA32_LASTINTTOIP:
                 *data = svm->vmcb->save.last_excp_to;
                 break;
+       case MSR_VM_HSAVE_PA:
+               *data = svm->hsave_msr;
+               break;
+       case MSR_VM_CR:
+               *data = 0;
+               break;
+       case MSR_IA32_UCODE_REV:
+               *data = 0x01000065;
+               break;
         default:
                 return kvm_get_msr_common(vcpu, ecx, data);
         }
@@ -1343,6 +2042,9 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
                  */
                 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", ecx, data);
  
+               break;
+       case MSR_VM_HSAVE_PA:
+               svm->hsave_msr = data;
                 break;
         default:
                 return kvm_set_msr_common(vcpu, ecx, data);
@@ -1380,7 +2082,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm,
  {
         KVMTRACE_0D(PEND_INTR, &svm->vcpu, handler);
  
-       svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR);
+       svm_clear_vintr(svm);
         svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
         /*
          * If the user space waits to inject interrupts, exit as soon as
@@ -1417,6 +2119,8 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
         [SVM_EXIT_WRITE_DR3]                    = emulate_on_interception,
         [SVM_EXIT_WRITE_DR5]                    = emulate_on_interception,
         [SVM_EXIT_WRITE_DR7]                    = emulate_on_interception,
+       [SVM_EXIT_EXCP_BASE + DB_VECTOR]        = db_interception,
+       [SVM_EXIT_EXCP_BASE + BP_VECTOR]        = bp_interception,
         [SVM_EXIT_EXCP_BASE + UD_VECTOR]        = ud_interception,
         [SVM_EXIT_EXCP_BASE + PF_VECTOR]        = pf_interception,
         [SVM_EXIT_EXCP_BASE + NM_VECTOR]        = nm_interception,
@@ -1436,12 +2140,12 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
         [SVM_EXIT_MSR]                          = msr_interception,
         [SVM_EXIT_TASK_SWITCH]                  = task_switch_interception,
         [SVM_EXIT_SHUTDOWN]                     = shutdown_interception,
-       [SVM_EXIT_VMRUN]                        = invalid_op_interception,
+       [SVM_EXIT_VMRUN]                        = vmrun_interception,
         [SVM_EXIT_VMMCALL]                      = vmmcall_interception,
-       [SVM_EXIT_VMLOAD]                       = invalid_op_interception,
-       [SVM_EXIT_VMSAVE]                       = invalid_op_interception,
-       [SVM_EXIT_STGI]                         = invalid_op_interception,
-       [SVM_EXIT_CLGI]                         = invalid_op_interception,
+       [SVM_EXIT_VMLOAD]                       = vmload_interception,
+       [SVM_EXIT_VMSAVE]                       = vmsave_interception,
+       [SVM_EXIT_STGI]                         = stgi_interception,
+       [SVM_EXIT_CLGI]                         = clgi_interception,
         [SVM_EXIT_SKINIT]                       = invalid_op_interception,
         [SVM_EXIT_WBINVD]                       = emulate_on_interception,
         [SVM_EXIT_MONITOR]                      = invalid_op_interception,
@@ -1457,6 +2161,17 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
         KVMTRACE_3D(VMEXIT, vcpu, exit_code, (u32)svm->vmcb->save.rip,
                     (u32)((u64)svm->vmcb->save.rip >> 32), entryexit);
  
+       if (is_nested(svm)) {
+               nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n",
+                           exit_code, svm->vmcb->control.exit_info_1,
+                           svm->vmcb->control.exit_info_2, svm->vmcb->save.rip);
+               if (nested_svm_exit_handled(svm, true)) {
+                       nested_svm_vmexit(svm);
+                       nsvm_printk("-> #VMEXIT\n");
+                       return 1;
+               }
+       }
+
         if (npt_enabled) {
                 int mmu_reload = 0;
                 if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) {
@@ -1544,6 +2259,8 @@ static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
  {
         struct vcpu_svm *svm = to_svm(vcpu);
  
+       nested_svm_intr(svm);
+
         svm_inject_irq(svm, irq);
  }
  
@@ -1589,11 +2306,17 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
         if (!kvm_cpu_has_interrupt(vcpu))
                 goto out;
  
+       if (nested_svm_intr(svm))
+               goto out;
+
+       if (!(svm->vcpu.arch.hflags & HF_GIF_MASK))
+               goto out;
+
         if (!(vmcb->save.rflags & X86_EFLAGS_IF) ||
             (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
             (vmcb->control.event_inj & SVM_EVTINJ_VALID)) {
                 /* unable to deliver irq, set pending irq */
-               vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR);
+               svm_set_vintr(svm);
                 svm_inject_irq(svm, 0x0);
                 goto out;
         }
@@ -1615,7 +2338,8 @@ static void kvm_reput_irq(struct vcpu_svm *svm)
         }
  
         svm->vcpu.arch.interrupt_window_open =
-               !(control->int_state & SVM_INTERRUPT_SHADOW_MASK);
+               !(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
+                (svm->vcpu.arch.hflags & HF_GIF_MASK);
  }
  
  static void svm_do_inject_vector(struct vcpu_svm *svm)
@@ -1637,9 +2361,13 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
         struct vcpu_svm *svm = to_svm(vcpu);
         struct vmcb_control_area *control = &svm->vmcb->control;
  
+       if (nested_svm_intr(svm))
+               return;
+
         svm->vcpu.arch.interrupt_window_open =
                 (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
-                (svm->vmcb->save.rflags & X86_EFLAGS_IF));
+                (svm->vmcb->save.rflags & X86_EFLAGS_IF) &&
+                (svm->vcpu.arch.hflags & HF_GIF_MASK));
  
         if (svm->vcpu.arch.interrupt_window_open && svm->vcpu.arch.irq_summary)
                 /*
@@ -1652,9 +2380,9 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
          */
         if (!svm->vcpu.arch.interrupt_window_open &&
             (svm->vcpu.arch.irq_summary || kvm_run->request_interrupt_window))
-               control->intercept |= 1ULL << INTERCEPT_VINTR;
-        else
-               control->intercept &= ~(1ULL << INTERCEPT_VINTR);
+               svm_set_vintr(svm);
+       else
+               svm_clear_vintr(svm);
  }
  
  static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -1662,22 +2390,6 @@ static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
         return 0;
  }
  
-static void save_db_regs(unsigned long *db_regs)
-{
-       asm volatile ("mov %%dr0, %0" : "=r"(db_regs[0]));
-       asm volatile ("mov %%dr1, %0" : "=r"(db_regs[1]));
-       asm volatile ("mov %%dr2, %0" : "=r"(db_regs[2]));
-       asm volatile ("mov %%dr3, %0" : "=r"(db_regs[3]));
-}
-
-static void load_db_regs(unsigned long *db_regs)
-{
-       asm volatile ("mov %0, %%dr0" : : "r"(db_regs[0]));
-       asm volatile ("mov %0, %%dr1" : : "r"(db_regs[1]));
-       asm volatile ("mov %0, %%dr2" : : "r"(db_regs[2]));
-       asm volatile ("mov %0, %%dr3" : : "r"(db_regs[3]));
-}
-
  static void svm_flush_tlb(struct kvm_vcpu *vcpu)
  {
         force_new_asid(vcpu);
@@ -1736,19 +2448,12 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
         gs_selector = kvm_read_gs();
         ldt_selector = kvm_read_ldt();
         svm->host_cr2 = kvm_read_cr2();
-       svm->host_dr6 = read_dr6();
-       svm->host_dr7 = read_dr7();
-       svm->vmcb->save.cr2 = vcpu->arch.cr2;
+       if (!is_nested(svm))
+               svm->vmcb->save.cr2 = vcpu->arch.cr2;
         /* required for live migration with NPT */
         if (npt_enabled)
                 svm->vmcb->save.cr3 = vcpu->arch.cr3;
  
-       if (svm->vmcb->save.dr7 & 0xff) {
-               write_dr7(0);
-               save_db_regs(svm->host_db_regs);
-               load_db_regs(svm->db_regs);
-       }
-
         clgi();
  
         local_irq_enable();
@@ -1824,16 +2529,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
  #endif
                 );
  
-       if ((svm->vmcb->save.dr7 & 0xff))
-               load_db_regs(svm->host_db_regs);
-
         vcpu->arch.cr2 = svm->vmcb->save.cr2;
         vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
         vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
         vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
  
-       write_dr6(svm->host_dr6);
-       write_dr7(svm->host_dr7);
         kvm_write_cr2(svm->host_cr2);
  
         kvm_load_fs(fs_selector);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c

index 7611af576829d820eba8de4a79d389e8baef7759..bb481330716f5eb92108aa4fbc4b93549211ceec 100644 (file)
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -91,6 +91,7 @@ struct vcpu_vmx {
         } rmode;
         int vpid;
         bool emulation_required;
+       enum emulation_result invalid_state_emulation_result;
  
         /* Support for vnmi-less CPUs */
         int soft_vnmi_blocked;
@@ -189,21 +190,21 @@ static inline int is_page_fault(u32 intr_info)
  {
         return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
                              INTR_INFO_VALID_MASK)) ==
-               (INTR_TYPE_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK);
+               (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK);
  }
  
  static inline int is_no_device(u32 intr_info)
  {
         return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
                              INTR_INFO_VALID_MASK)) ==
-               (INTR_TYPE_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK);
+               (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK);
  }
  
  static inline int is_invalid_opcode(u32 intr_info)
  {
         return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
                              INTR_INFO_VALID_MASK)) ==
-               (INTR_TYPE_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK);
+               (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK);
  }
  
  static inline int is_external_interrupt(u32 intr_info)
@@ -480,8 +481,13 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
         eb = (1u << PF_VECTOR) | (1u << UD_VECTOR);
         if (!vcpu->fpu_active)
                 eb |= 1u << NM_VECTOR;
-       if (vcpu->guest_debug.enabled)
-               eb |= 1u << DB_VECTOR;
+       if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
+               if (vcpu->guest_debug &
+                   (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
+                       eb |= 1u << DB_VECTOR;
+               if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
+                       eb |= 1u << BP_VECTOR;
+       }
         if (vcpu->arch.rmode.active)
                 eb = ~0;
         if (vm_need_ept())
@@ -747,29 +753,33 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
                                 bool has_error_code, u32 error_code)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
+       u32 intr_info = nr | INTR_INFO_VALID_MASK;
  
-       if (has_error_code)
+       if (has_error_code) {
                 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
+               intr_info |= INTR_INFO_DELIVER_CODE_MASK;
+       }
  
         if (vcpu->arch.rmode.active) {
                 vmx->rmode.irq.pending = true;
                 vmx->rmode.irq.vector = nr;
                 vmx->rmode.irq.rip = kvm_rip_read(vcpu);
-               if (nr == BP_VECTOR)
+               if (nr == BP_VECTOR || nr == OF_VECTOR)
                         vmx->rmode.irq.rip++;
-               vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
-                            nr | INTR_TYPE_SOFT_INTR
-                            | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0)
-                            | INTR_INFO_VALID_MASK);
+               intr_info |= INTR_TYPE_SOFT_INTR;
+               vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
                 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
                 kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
                 return;
         }
  
-       vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
-                    nr | INTR_TYPE_EXCEPTION
-                    | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0)
-                    | INTR_INFO_VALID_MASK);
+       if (nr == BP_VECTOR || nr == OF_VECTOR) {
+               vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
+               intr_info |= INTR_TYPE_SOFT_EXCEPTION;
+       } else
+               intr_info |= INTR_TYPE_HARD_EXCEPTION;
+
+       vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
  }
  
  static bool vmx_exception_injected(struct kvm_vcpu *vcpu)
@@ -856,11 +866,8 @@ static u64 guest_read_tsc(void)
   * writes 'guest_tsc' into guest's timestamp counter "register"
   * guest_tsc = host_tsc + tsc_offset ==> tsc_offset = guest_tsc - host_tsc
   */
-static void guest_write_tsc(u64 guest_tsc)
+static void guest_write_tsc(u64 guest_tsc, u64 host_tsc)
  {
-       u64 host_tsc;
-
-       rdtscll(host_tsc);
         vmcs_write64(TSC_OFFSET, guest_tsc - host_tsc);
  }
  
@@ -925,14 +932,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
         struct kvm_msr_entry *msr;
+       u64 host_tsc;
         int ret = 0;
  
         switch (msr_index) {
-#ifdef CONFIG_X86_64
         case MSR_EFER:
                 vmx_load_host_state(vmx);
                 ret = kvm_set_msr_common(vcpu, msr_index, data);
                 break;
+#ifdef CONFIG_X86_64
         case MSR_FS_BASE:
                 vmcs_writel(GUEST_FS_BASE, data);
                 break;
@@ -950,7 +958,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
                 vmcs_writel(GUEST_SYSENTER_ESP, data);
                 break;
         case MSR_IA32_TIME_STAMP_COUNTER:
-               guest_write_tsc(data);
+               rdtscll(host_tsc);
+               guest_write_tsc(data, host_tsc);
                 break;
         case MSR_P6_PERFCTR0:
         case MSR_P6_PERFCTR1:
@@ -999,40 +1008,28 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
         }
  }
  
-static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
+static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
  {
-       unsigned long dr7 = 0x400;
-       int old_singlestep;
-
-       old_singlestep = vcpu->guest_debug.singlestep;
-
-       vcpu->guest_debug.enabled = dbg->enabled;
-       if (vcpu->guest_debug.enabled) {
-               int i;
+       int old_debug = vcpu->guest_debug;
+       unsigned long flags;
  
-               dr7 |= 0x200;  /* exact */
-               for (i = 0; i < 4; ++i) {
-                       if (!dbg->breakpoints[i].enabled)
-                               continue;
-                       vcpu->guest_debug.bp[i] = dbg->breakpoints[i].address;
-                       dr7 |= 2 << (i*2);    /* global enable */
-                       dr7 |= 0 << (i*4+16); /* execution breakpoint */
-               }
+       vcpu->guest_debug = dbg->control;
+       if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
+               vcpu->guest_debug = 0;
  
-               vcpu->guest_debug.singlestep = dbg->singlestep;
-       } else
-               vcpu->guest_debug.singlestep = 0;
-
-       if (old_singlestep && !vcpu->guest_debug.singlestep) {
-               unsigned long flags;
+       if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+               vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]);
+       else
+               vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
  
-               flags = vmcs_readl(GUEST_RFLAGS);
+       flags = vmcs_readl(GUEST_RFLAGS);
+       if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+               flags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
+       else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
                 flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
-               vmcs_writel(GUEST_RFLAGS, flags);
-       }
+       vmcs_writel(GUEST_RFLAGS, flags);
  
         update_exception_bitmap(vcpu);
-       vmcs_writel(GUEST_DR7, dr7);
  
         return 0;
  }
@@ -1433,6 +1430,29 @@ continue_rmode:
         init_rmode(vcpu->kvm);
  }
  
+static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
+{
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+       struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
+
+       vcpu->arch.shadow_efer = efer;
+       if (!msr)
+               return;
+       if (efer & EFER_LMA) {
+               vmcs_write32(VM_ENTRY_CONTROLS,
+                            vmcs_read32(VM_ENTRY_CONTROLS) |
+                            VM_ENTRY_IA32E_MODE);
+               msr->data = efer;
+       } else {
+               vmcs_write32(VM_ENTRY_CONTROLS,
+                            vmcs_read32(VM_ENTRY_CONTROLS) &
+                            ~VM_ENTRY_IA32E_MODE);
+
+               msr->data = efer & ~EFER_LME;
+       }
+       setup_msrs(vmx);
+}
+
  #ifdef CONFIG_X86_64
  
  static void enter_lmode(struct kvm_vcpu *vcpu)
@@ -1447,13 +1467,8 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
                              (guest_tr_ar & ~AR_TYPE_MASK)
                              | AR_TYPE_BUSY_64_TSS);
         }
-
         vcpu->arch.shadow_efer |= EFER_LMA;
-
-       find_msr_entry(to_vmx(vcpu), MSR_EFER)->data |= EFER_LMA | EFER_LME;
-       vmcs_write32(VM_ENTRY_CONTROLS,
-                    vmcs_read32(VM_ENTRY_CONTROLS)
-                    | VM_ENTRY_IA32E_MODE);
+       vmx_set_efer(vcpu, vcpu->arch.shadow_efer);
  }
  
  static void exit_lmode(struct kvm_vcpu *vcpu)
@@ -1612,30 +1627,6 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
         vmcs_writel(GUEST_CR4, hw_cr4);
  }
  
-static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
-{
-       struct vcpu_vmx *vmx = to_vmx(vcpu);
-       struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
-
-       vcpu->arch.shadow_efer = efer;
-       if (!msr)
-               return;
-       if (efer & EFER_LMA) {
-               vmcs_write32(VM_ENTRY_CONTROLS,
-                                    vmcs_read32(VM_ENTRY_CONTROLS) |
-                                    VM_ENTRY_IA32E_MODE);
-               msr->data = efer;
-
-       } else {
-               vmcs_write32(VM_ENTRY_CONTROLS,
-                                    vmcs_read32(VM_ENTRY_CONTROLS) &
-                                    ~VM_ENTRY_IA32E_MODE);
-
-               msr->data = efer & ~EFER_LME;
-       }
-       setup_msrs(vmx);
-}
-
  static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
  {
         struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
@@ -1653,7 +1644,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
         var->limit = vmcs_read32(sf->limit);
         var->selector = vmcs_read16(sf->selector);
         ar = vmcs_read32(sf->ar_bytes);
-       if (ar & AR_UNUSABLE_MASK)
+       if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state)
                 ar = 0;
         var->type = ar & 15;
         var->s = (ar >> 4) & 1;
@@ -1788,14 +1779,16 @@ static bool code_segment_valid(struct kvm_vcpu *vcpu)
         vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
         cs_rpl = cs.selector & SELECTOR_RPL_MASK;
  
+       if (cs.unusable)
+               return false;
         if (~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_ACCESSES_MASK))
                 return false;
         if (!cs.s)
                 return false;
-       if (!(~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_WRITEABLE_MASK))) {
+       if (cs.type & AR_TYPE_WRITEABLE_MASK) {
                 if (cs.dpl > cs_rpl)
                         return false;
-       } else if (cs.type & AR_TYPE_CODE_MASK) {
+       } else {
                 if (cs.dpl != cs_rpl)
                         return false;
         }
@@ -1814,7 +1807,9 @@ static bool stack_segment_valid(struct kvm_vcpu *vcpu)
         vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
         ss_rpl = ss.selector & SELECTOR_RPL_MASK;
  
-       if ((ss.type != 3) || (ss.type != 7))
+       if (ss.unusable)
+               return true;
+       if (ss.type != 3 && ss.type != 7)
                 return false;
         if (!ss.s)
                 return false;
@@ -1834,6 +1829,8 @@ static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
         vmx_get_segment(vcpu, &var, seg);
         rpl = var.selector & SELECTOR_RPL_MASK;
  
+       if (var.unusable)
+               return true;
         if (!var.s)
                 return false;
         if (!var.present)
@@ -1855,9 +1852,11 @@ static bool tr_valid(struct kvm_vcpu *vcpu)
  
         vmx_get_segment(vcpu, &tr, VCPU_SREG_TR);
  
+       if (tr.unusable)
+               return false;
         if (tr.selector & SELECTOR_TI_MASK)     /* TI = 1 */
                 return false;
-       if ((tr.type != 3) || (tr.type != 11)) /* TODO: Check if guest is in IA32e mode */
+       if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */
                 return false;
         if (!tr.present)
                 return false;
@@ -1871,6 +1870,8 @@ static bool ldtr_valid(struct kvm_vcpu *vcpu)
  
         vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR);
  
+       if (ldtr.unusable)
+               return true;
         if (ldtr.selector & SELECTOR_TI_MASK)   /* TI = 1 */
                 return false;
         if (ldtr.type != 2)
@@ -2112,7 +2113,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
  {
         u32 host_sysenter_cs, msr_low, msr_high;
         u32 junk;
-       u64 host_pat;
+       u64 host_pat, tsc_this, tsc_base;
         unsigned long a;
         struct descriptor_table dt;
         int i;
@@ -2240,6 +2241,12 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
         vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
         vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK);
  
+       tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc;
+       rdtscll(tsc_this);
+       if (tsc_this < vmx->vcpu.kvm->arch.vm_init_tsc)
+               tsc_base = tsc_this;
+
+       guest_write_tsc(0, tsc_base);
  
         return 0;
  }
@@ -2319,7 +2326,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
                 kvm_rip_write(vcpu, 0);
         kvm_register_write(vcpu, VCPU_REGS_RSP, 0);
  
-       /* todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0 */
         vmcs_writel(GUEST_DR7, 0x400);
  
         vmcs_writel(GUEST_GDTR_BASE, 0);
@@ -2332,8 +2338,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
         vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
         vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
  
-       guest_write_tsc(0);
-
         /* Special registers */
         vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
  
@@ -2486,6 +2490,11 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
  {
         vmx_update_window_states(vcpu);
  
+       if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+               vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+                               GUEST_INTR_STATE_STI |
+                               GUEST_INTR_STATE_MOV_SS);
+
         if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
                 if (vcpu->arch.interrupt.pending) {
                         enable_nmi_window(vcpu);
@@ -2536,24 +2545,6 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
         return 0;
  }
  
-static void kvm_guest_debug_pre(struct kvm_vcpu *vcpu)
-{
-       struct kvm_guest_debug *dbg = &vcpu->guest_debug;
-
-       set_debugreg(dbg->bp[0], 0);
-       set_debugreg(dbg->bp[1], 1);
-       set_debugreg(dbg->bp[2], 2);
-       set_debugreg(dbg->bp[3], 3);
-
-       if (dbg->singlestep) {
-               unsigned long flags;
-
-               flags = vmcs_readl(GUEST_RFLAGS);
-               flags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
-               vmcs_writel(GUEST_RFLAGS, flags);
-       }
-}
-
  static int handle_rmode_exception(struct kvm_vcpu *vcpu,
                                   int vec, u32 err_code)
  {
@@ -2570,9 +2561,17 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
          *        the required debugging infrastructure rework.
          */
         switch (vec) {
-       case DE_VECTOR:
         case DB_VECTOR:
+               if (vcpu->guest_debug &
+                   (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
+                       return 0;
+               kvm_queue_exception(vcpu, vec);
+               return 1;
         case BP_VECTOR:
+               if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
+                       return 0;
+               /* fall through */
+       case DE_VECTOR:
         case OF_VECTOR:
         case BR_VECTOR:
         case UD_VECTOR:
@@ -2589,8 +2588,8 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
  static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
-       u32 intr_info, error_code;
-       unsigned long cr2, rip;
+       u32 intr_info, ex_no, error_code;
+       unsigned long cr2, rip, dr6;
         u32 vect_info;
         enum emulation_result er;
  
@@ -2649,14 +2648,30 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                 return 1;
         }
  
-       if ((intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK)) ==
-           (INTR_TYPE_EXCEPTION | 1)) {
+       ex_no = intr_info & INTR_INFO_VECTOR_MASK;
+       switch (ex_no) {
+       case DB_VECTOR:
+               dr6 = vmcs_readl(EXIT_QUALIFICATION);
+               if (!(vcpu->guest_debug &
+                     (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
+                       vcpu->arch.dr6 = dr6 | DR6_FIXED_1;
+                       kvm_queue_exception(vcpu, DB_VECTOR);
+                       return 1;
+               }
+               kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1;
+               kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
+               /* fall through */
+       case BP_VECTOR:
                 kvm_run->exit_reason = KVM_EXIT_DEBUG;
-               return 0;
+               kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
+               kvm_run->debug.arch.exception = ex_no;
+               break;
+       default:
+               kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
+               kvm_run->ex.exception = ex_no;
+               kvm_run->ex.error_code = error_code;
+               break;
         }
-       kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
-       kvm_run->ex.exception = intr_info & INTR_INFO_VECTOR_MASK;
-       kvm_run->ex.error_code = error_code;
         return 0;
  }
  
@@ -2677,7 +2692,7 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
  static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
  {
         unsigned long exit_qualification;
-       int size, down, in, string, rep;
+       int size, in, string;
         unsigned port;
  
         ++vcpu->stat.io_exits;
@@ -2693,8 +2708,6 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
  
         size = (exit_qualification & 7) + 1;
         in = (exit_qualification & 8) != 0;
-       down = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0;
-       rep = (exit_qualification & 32) != 0;
         port = exit_qualification >> 16;
  
         skip_emulated_instruction(vcpu);
@@ -2795,21 +2808,44 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
         unsigned long val;
         int dr, reg;
  
-       /*
-        * FIXME: this code assumes the host is debugging the guest.
-        *        need to deal with guest debugging itself too.
-        */
+       dr = vmcs_readl(GUEST_DR7);
+       if (dr & DR7_GD) {
+               /*
+                * As the vm-exit takes precedence over the debug trap, we
+                * need to emulate the latter, either for the host or the
+                * guest debugging itself.
+                */
+               if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
+                       kvm_run->debug.arch.dr6 = vcpu->arch.dr6;
+                       kvm_run->debug.arch.dr7 = dr;
+                       kvm_run->debug.arch.pc =
+                               vmcs_readl(GUEST_CS_BASE) +
+                               vmcs_readl(GUEST_RIP);
+                       kvm_run->debug.arch.exception = DB_VECTOR;
+                       kvm_run->exit_reason = KVM_EXIT_DEBUG;
+                       return 0;
+               } else {
+                       vcpu->arch.dr7 &= ~DR7_GD;
+                       vcpu->arch.dr6 |= DR6_BD;
+                       vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
+                       kvm_queue_exception(vcpu, DB_VECTOR);
+                       return 1;
+               }
+       }
+
         exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
-       dr = exit_qualification & 7;
-       reg = (exit_qualification >> 8) & 15;
-       if (exit_qualification & 16) {
-               /* mov from dr */
+       dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
+       reg = DEBUG_REG_ACCESS_REG(exit_qualification);
+       if (exit_qualification & TYPE_MOV_FROM_DR) {
                 switch (dr) {
+               case 0 ... 3:
+                       val = vcpu->arch.db[dr];
+                       break;
                 case 6:
-                       val = 0xffff0ff0;
+                       val = vcpu->arch.dr6;
                         break;
                 case 7:
-                       val = 0x400;
+                       val = vcpu->arch.dr7;
                         break;
                 default:
                         val = 0;
@@ -2817,7 +2853,38 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                 kvm_register_write(vcpu, reg, val);
                 KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
         } else {
-               /* mov to dr */
+               val = vcpu->arch.regs[reg];
+               switch (dr) {
+               case 0 ... 3:
+                       vcpu->arch.db[dr] = val;
+                       if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
+                               vcpu->arch.eff_db[dr] = val;
+                       break;
+               case 4 ... 5:
+                       if (vcpu->arch.cr4 & X86_CR4_DE)
+                               kvm_queue_exception(vcpu, UD_VECTOR);
+                       break;
+               case 6:
+                       if (val & 0xffffffff00000000ULL) {
+                               kvm_queue_exception(vcpu, GP_VECTOR);
+                               break;
+                       }
+                       vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
+                       break;
+               case 7:
+                       if (val & 0xffffffff00000000ULL) {
+                               kvm_queue_exception(vcpu, GP_VECTOR);
+                               break;
+                       }
+                       vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
+                       if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
+                               vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
+                               vcpu->arch.switch_db_regs =
+                                       (val & DR7_BP_EN_MASK);
+                       }
+                       break;
+               }
+               KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)val, handler);
         }
         skip_emulated_instruction(vcpu);
         return 1;
@@ -2968,17 +3035,25 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
         }
         tss_selector = exit_qualification;
  
-       return kvm_task_switch(vcpu, tss_selector, reason);
+       if (!kvm_task_switch(vcpu, tss_selector, reason))
+               return 0;
+
+       /* clear all local breakpoint enable flags */
+       vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55);
+
+       /*
+        * TODO: What about debug traps on tss switch?
+        *       Are we supposed to inject them and update dr6?
+        */
+
+       return 1;
  }
  
  static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
  {
         u64 exit_qualification;
-       enum emulation_result er;
         gpa_t gpa;
-       unsigned long hva;
         int gla_validity;
-       int r;
  
         exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
  
@@ -3001,32 +3076,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
         }
  
         gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
-       hva = gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT);
-       if (!kvm_is_error_hva(hva)) {
-               r = kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0);
-               if (r < 0) {
-                       printk(KERN_ERR "EPT: Not enough memory!\n");
-                       return -ENOMEM;
-               }
-               return 1;
-       } else {
-               /* must be MMIO */
-               er = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
-
-               if (er == EMULATE_FAIL) {
-                       printk(KERN_ERR
-                        "EPT: Fail to handle EPT violation vmexit!er is %d\n",
-                        er);
-                       printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
-                        (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
-                        (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS));
-                       printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
-                               (long unsigned int)exit_qualification);
-                       return -ENOTSUPP;
-               } else if (er == EMULATE_DO_MMIO)
-                       return 0;
-       }
-       return 1;
+       return kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0);
  }
  
  static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
@@ -3046,7 +3096,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
                                 struct kvm_run *kvm_run)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
-       int err;
+       enum emulation_result err = EMULATE_DONE;
  
         preempt_enable();
         local_irq_enable();
@@ -3071,10 +3121,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
         local_irq_disable();
         preempt_disable();
  
-       /* Guest state should be valid now except if we need to
-        * emulate an MMIO */
-       if (guest_state_valid(vcpu))
-               vmx->emulation_required = 0;
+       vmx->invalid_state_emulation_result = err;
  }
  
  /*
@@ -3123,8 +3170,11 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
  
         /* If we need to emulate an MMIO from handle_invalid_guest_state
          * we just return 0 */
-       if (vmx->emulation_required && emulate_invalid_guest_state)
-               return 0;
+       if (vmx->emulation_required && emulate_invalid_guest_state) {
+               if (guest_state_valid(vcpu))
+                       vmx->emulation_required = 0;
+               return vmx->invalid_state_emulation_result != EMULATE_DO_MMIO;
+       }
  
         /* Access CR3 don't cause VMExit in paging mode, so we need
          * to sync with guest real CR3. */
@@ -3238,7 +3288,8 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
                         vmx->vcpu.arch.nmi_injected = false;
         }
         kvm_clear_exception_queue(&vmx->vcpu);
-       if (idtv_info_valid && type == INTR_TYPE_EXCEPTION) {
+       if (idtv_info_valid && (type == INTR_TYPE_HARD_EXCEPTION ||
+                               type == INTR_TYPE_SOFT_EXCEPTION)) {
                 if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
                         error = vmcs_read32(IDT_VECTORING_ERROR_CODE);
                         kvm_queue_exception_e(&vmx->vcpu, vector, error);
@@ -3259,6 +3310,11 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
  
         vmx_update_window_states(vcpu);
  
+       if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+               vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+                               GUEST_INTR_STATE_STI |
+                               GUEST_INTR_STATE_MOV_SS);
+
         if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
                 if (vcpu->arch.interrupt.pending) {
                         enable_nmi_window(vcpu);
@@ -3347,6 +3403,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
          */
         vmcs_writel(HOST_CR0, read_cr0());
  
+       set_debugreg(vcpu->arch.dr6, 6);
+
         asm(
                 /* Store host registers */
                 "push %%"R"dx; push %%"R"bp;"
@@ -3441,6 +3499,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
         vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
         vcpu->arch.regs_dirty = 0;
  
+       get_debugreg(vcpu->arch.dr6, 6);
+
         vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
         if (vmx->rmode.irq.pending)
                 fixup_rmode_irq(vmx);
@@ -3595,7 +3655,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
         .vcpu_put = vmx_vcpu_put,
  
         .set_guest_debug = set_guest_debug,
-       .guest_debug_pre = kvm_guest_debug_pre,
         .get_msr = vmx_get_msr,
         .set_msr = vmx_set_msr,
         .get_segment_base = vmx_get_segment_base,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index 758b7a155ae9113559b5993b38abfc3cf603fe70..8ca100a9ecac57db73e4157b291be70373a94573 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -36,6 +36,7 @@
  #include <linux/highmem.h>
  #include <linux/iommu.h>
  #include <linux/intel-iommu.h>
+#include <linux/cpufreq.h>
  
  #include <asm/uaccess.h>
  #include <asm/msr.h>
@@ -69,6 +70,8 @@ static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL;
  
  static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
                                     struct kvm_cpuid_entry2 __user *entries);
+struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
+                                             u32 function, u32 index);
  
  struct kvm_x86_ops *kvm_x86_ops;
  EXPORT_SYMBOL_GPL(kvm_x86_ops);
@@ -173,6 +176,7 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
                            u32 error_code)
  {
         ++vcpu->stat.pf_guest;
+
         if (vcpu->arch.exception.pending) {
                 if (vcpu->arch.exception.nr == PF_VECTOR) {
                         printk(KERN_DEBUG "kvm: inject_page_fault:"
@@ -361,6 +365,7 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
         }
         kvm_x86_ops->set_cr4(vcpu, cr4);
         vcpu->arch.cr4 = cr4;
+       vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled;
         kvm_mmu_sync_global(vcpu);
         kvm_mmu_reset_context(vcpu);
  }
@@ -442,6 +447,11 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
  }
  EXPORT_SYMBOL_GPL(kvm_get_cr8);
  
+static inline u32 bit(int bitno)
+{
+       return 1 << (bitno & 31);
+}
+
  /*
   * List of msr numbers which we expose to userspace through KVM_GET_MSRS
   * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
@@ -456,7 +466,7 @@ static u32 msrs_to_save[] = {
         MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
  #endif
         MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
-       MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT
+       MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
  };
  
  static unsigned num_msrs_to_save;
@@ -481,6 +491,28 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
                 return;
         }
  
+       if (efer & EFER_FFXSR) {
+               struct kvm_cpuid_entry2 *feat;
+
+               feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
+               if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) {
+                       printk(KERN_DEBUG "set_efer: #GP, enable FFXSR w/o CPUID capability\n");
+                       kvm_inject_gp(vcpu, 0);
+                       return;
+               }
+       }
+
+       if (efer & EFER_SVME) {
+               struct kvm_cpuid_entry2 *feat;
+
+               feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
+               if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) {
+                       printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n");
+                       kvm_inject_gp(vcpu, 0);
+                       return;
+               }
+       }
+
         kvm_x86_ops->set_efer(vcpu, efer);
  
         efer &= ~EFER_LMA;
@@ -586,6 +618,8 @@ static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info *
                  hv_clock->tsc_to_system_mul);
  }
  
+static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
+
  static void kvm_write_guest_time(struct kvm_vcpu *v)
  {
         struct timespec ts;
@@ -596,9 +630,9 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
         if ((!vcpu->time_page))
                 return;
  
-       if (unlikely(vcpu->hv_clock_tsc_khz != tsc_khz)) {
-               kvm_set_time_scale(tsc_khz, &vcpu->hv_clock);
-               vcpu->hv_clock_tsc_khz = tsc_khz;
+       if (unlikely(vcpu->hv_clock_tsc_khz != __get_cpu_var(cpu_tsc_khz))) {
+               kvm_set_time_scale(__get_cpu_var(cpu_tsc_khz), &vcpu->hv_clock);
+               vcpu->hv_clock_tsc_khz = __get_cpu_var(cpu_tsc_khz);
         }
  
         /* Keep irq disabled to prevent changes to the clock */
@@ -629,6 +663,16 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
         mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
  }
  
+static int kvm_request_guest_time_update(struct kvm_vcpu *v)
+{
+       struct kvm_vcpu_arch *vcpu = &v->arch;
+
+       if (!vcpu->time_page)
+               return 0;
+       set_bit(KVM_REQ_KVMCLOCK_UPDATE, &v->requests);
+       return 1;
+}
+
  static bool msr_mtrr_valid(unsigned msr)
  {
         switch (msr) {
@@ -722,6 +766,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
                 break;
         case MSR_IA32_UCODE_REV:
         case MSR_IA32_UCODE_WRITE:
+       case MSR_VM_HSAVE_PA:
                 break;
         case 0x200 ... 0x2ff:
                 return set_msr_mtrr(vcpu, msr, data);
@@ -758,7 +803,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
                         vcpu->arch.time_page = NULL;
                 }
  
-               kvm_write_guest_time(vcpu);
+               kvm_request_guest_time_update(vcpu);
                 break;
         }
         default:
@@ -843,6 +888,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
         case MSR_IA32_LASTBRANCHTOIP:
         case MSR_IA32_LASTINTFROMIP:
         case MSR_IA32_LASTINTTOIP:
+       case MSR_VM_HSAVE_PA:
                 data = 0;
                 break;
         case MSR_MTRRcap:
@@ -967,10 +1013,13 @@ int kvm_dev_ioctl_check_extension(long ext)
         case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
         case KVM_CAP_SET_TSS_ADDR:
         case KVM_CAP_EXT_CPUID:
+       case KVM_CAP_CLOCKSOURCE:
         case KVM_CAP_PIT:
         case KVM_CAP_NOP_IO_DELAY:
         case KVM_CAP_MP_STATE:
         case KVM_CAP_SYNC_MMU:
+       case KVM_CAP_REINJECT_CONTROL:
+       case KVM_CAP_IRQ_INJECT_STATUS:
                 r = 1;
                 break;
         case KVM_CAP_COALESCED_MMIO:
@@ -991,9 +1040,6 @@ int kvm_dev_ioctl_check_extension(long ext)
         case KVM_CAP_IOMMU:
                 r = iommu_found();
                 break;
-       case KVM_CAP_CLOCKSOURCE:
-               r = boot_cpu_has(X86_FEATURE_CONSTANT_TSC);
-               break;
         default:
                 r = 0;
                 break;
@@ -1044,7 +1090,7 @@ long kvm_arch_dev_ioctl(struct file *filp,
                 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
                         goto out;
                 r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
-                       cpuid_arg->entries);
+                                                     cpuid_arg->entries);
                 if (r)
                         goto out;
  
@@ -1064,7 +1110,7 @@ out:
  void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
  {
         kvm_x86_ops->vcpu_load(vcpu, cpu);
-       kvm_write_guest_time(vcpu);
+       kvm_request_guest_time_update(vcpu);
  }
  
  void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -1142,8 +1188,8 @@ out:
  }
  
  static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
-                                   struct kvm_cpuid2 *cpuid,
-                                   struct kvm_cpuid_entry2 __user *entries)
+                                    struct kvm_cpuid2 *cpuid,
+                                    struct kvm_cpuid_entry2 __user *entries)
  {
         int r;
  
@@ -1162,8 +1208,8 @@ out:
  }
  
  static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
-                                   struct kvm_cpuid2 *cpuid,
-                                   struct kvm_cpuid_entry2 __user *entries)
+                                    struct kvm_cpuid2 *cpuid,
+                                    struct kvm_cpuid_entry2 __user *entries)
  {
         int r;
  
@@ -1172,7 +1218,7 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
                 goto out;
         r = -EFAULT;
         if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
-                          vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
+                        vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
                 goto out;
         return 0;
  
@@ -1181,18 +1227,13 @@ out:
         return r;
  }
  
-static inline u32 bit(int bitno)
-{
-       return 1 << (bitno & 31);
-}
-
  static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
-                         u32 index)
+                          u32 index)
  {
         entry->function = function;
         entry->index = index;
         cpuid_count(entry->function, entry->index,
-               &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
+                   &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
         entry->flags = 0;
  }
  
@@ -1222,15 +1263,17 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
  #ifdef CONFIG_X86_64
                 bit(X86_FEATURE_LM) |
  #endif
+               bit(X86_FEATURE_FXSR_OPT) |
                 bit(X86_FEATURE_MMXEXT) |
                 bit(X86_FEATURE_3DNOWEXT) |
                 bit(X86_FEATURE_3DNOW);
         const u32 kvm_supported_word3_x86_features =
                 bit(X86_FEATURE_XMM3) | bit(X86_FEATURE_CX16);
         const u32 kvm_supported_word6_x86_features =
-               bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY);
+               bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY) |
+               bit(X86_FEATURE_SVM);
  
-       /* all func 2 cpuid_count() should be called on the same cpu */
+       /* all calls to cpuid_count() should be made on the same cpu */
         get_cpu();
         do_cpuid_1_ent(entry, function, index);
         ++*nent;
@@ -1304,7 +1347,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
  }
  
  static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
-                                   struct kvm_cpuid_entry2 __user *entries)
+                                    struct kvm_cpuid_entry2 __user *entries)
  {
         struct kvm_cpuid_entry2 *cpuid_entries;
         int limit, nent = 0, r = -E2BIG;
@@ -1321,7 +1364,7 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
         limit = cpuid_entries[0].eax;
         for (func = 1; func <= limit && nent < cpuid->nent; ++func)
                 do_cpuid_ent(&cpuid_entries[nent], func, 0,
-                               &nent, cpuid->nent);
+                            &nent, cpuid->nent);
         r = -E2BIG;
         if (nent >= cpuid->nent)
                 goto out_free;
@@ -1330,10 +1373,10 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
         limit = cpuid_entries[nent - 1].eax;
         for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func)
                 do_cpuid_ent(&cpuid_entries[nent], func, 0,
-                              &nent, cpuid->nent);
+                            &nent, cpuid->nent);
         r = -EFAULT;
         if (copy_to_user(entries, cpuid_entries,
-                       nent * sizeof(struct kvm_cpuid_entry2)))
+                        nent * sizeof(struct kvm_cpuid_entry2)))
                 goto out_free;
         cpuid->nent = nent;
         r = 0;
@@ -1477,7 +1520,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
                         goto out;
                 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
-                               cpuid_arg->entries);
+                                             cpuid_arg->entries);
                 if (r)
                         goto out;
                 break;
@@ -1490,7 +1533,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
                         goto out;
                 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
-                               cpuid_arg->entries);
+                                             cpuid_arg->entries);
                 if (r)
                         goto out;
                 r = -EFAULT;
@@ -1710,6 +1753,15 @@ static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
         return r;
  }
  
+static int kvm_vm_ioctl_reinject(struct kvm *kvm,
+                                struct kvm_reinject_control *control)
+{
+       if (!kvm->arch.vpit)
+               return -ENXIO;
+       kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject;
+       return 0;
+}
+
  /*
   * Get (and clear) the dirty memory log for a memory slot.
   */
@@ -1807,13 +1859,26 @@ long kvm_arch_vm_ioctl(struct file *filp,
                         }
                 } else
                         goto out;
+               r = kvm_setup_default_irq_routing(kvm);
+               if (r) {
+                       kfree(kvm->arch.vpic);
+                       kfree(kvm->arch.vioapic);
+                       goto out;
+               }
                 break;
         case KVM_CREATE_PIT:
+               mutex_lock(&kvm->lock);
+               r = -EEXIST;
+               if (kvm->arch.vpit)
+                       goto create_pit_unlock;
                 r = -ENOMEM;
                 kvm->arch.vpit = kvm_create_pit(kvm);
                 if (kvm->arch.vpit)
                         r = 0;
+       create_pit_unlock:
+               mutex_unlock(&kvm->lock);
                 break;
+       case KVM_IRQ_LINE_STATUS:
         case KVM_IRQ_LINE: {
                 struct kvm_irq_level irq_event;
  
@@ -1821,10 +1886,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
                 if (copy_from_user(&irq_event, argp, sizeof irq_event))
                         goto out;
                 if (irqchip_in_kernel(kvm)) {
+                       __s32 status;
                         mutex_lock(&kvm->lock);
-                       kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
-                                   irq_event.irq, irq_event.level);
+                       status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+                                       irq_event.irq, irq_event.level);
                         mutex_unlock(&kvm->lock);
+                       if (ioctl == KVM_IRQ_LINE_STATUS) {
+                               irq_event.status = status;
+                               if (copy_to_user(argp, &irq_event,
+                                                       sizeof irq_event))
+                                       goto out;
+                       }
                         r = 0;
                 }
                 break;
@@ -1907,6 +1979,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
                 r = 0;
                 break;
         }
+       case KVM_REINJECT_CONTROL: {
+               struct kvm_reinject_control control;
+               r =  -EFAULT;
+               if (copy_from_user(&control, argp, sizeof(control)))
+                       goto out;
+               r = kvm_vm_ioctl_reinject(kvm, &control);
+               if (r)
+                       goto out;
+               r = 0;
+               break;
+       }
         default:
                 ;
         }
@@ -1960,10 +2043,38 @@ static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
         return dev;
  }
  
-int emulator_read_std(unsigned long addr,
-                            void *val,
-                            unsigned int bytes,
-                            struct kvm_vcpu *vcpu)
+static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
+                              struct kvm_vcpu *vcpu)
+{
+       void *data = val;
+       int r = X86EMUL_CONTINUE;
+
+       while (bytes) {
+               gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
+               unsigned offset = addr & (PAGE_SIZE-1);
+               unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
+               int ret;
+
+               if (gpa == UNMAPPED_GVA) {
+                       r = X86EMUL_PROPAGATE_FAULT;
+                       goto out;
+               }
+               ret = kvm_read_guest(vcpu->kvm, gpa, data, toread);
+               if (ret < 0) {
+                       r = X86EMUL_UNHANDLEABLE;
+                       goto out;
+               }
+
+               bytes -= toread;
+               data += toread;
+               addr += toread;
+       }
+out:
+       return r;
+}
+
+static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes,
+                               struct kvm_vcpu *vcpu)
  {
         void *data = val;
         int r = X86EMUL_CONTINUE;
@@ -1971,27 +2082,27 @@ int emulator_read_std(unsigned long addr,
         while (bytes) {
                 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
                 unsigned offset = addr & (PAGE_SIZE-1);
-               unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset);
+               unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
                 int ret;
  
                 if (gpa == UNMAPPED_GVA) {
                         r = X86EMUL_PROPAGATE_FAULT;
                         goto out;
                 }
-               ret = kvm_read_guest(vcpu->kvm, gpa, data, tocopy);
+               ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
                 if (ret < 0) {
                         r = X86EMUL_UNHANDLEABLE;
                         goto out;
                 }
  
-               bytes -= tocopy;
-               data += tocopy;
-               addr += tocopy;
+               bytes -= towrite;
+               data += towrite;
+               addr += towrite;
         }
  out:
         return r;
  }
-EXPORT_SYMBOL_GPL(emulator_read_std);
+
  
  static int emulator_read_emulated(unsigned long addr,
                                   void *val,
@@ -2013,8 +2124,8 @@ static int emulator_read_emulated(unsigned long addr,
         if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
                 goto mmio;
  
-       if (emulator_read_std(addr, val, bytes, vcpu)
-                       == X86EMUL_CONTINUE)
+       if (kvm_read_guest_virt(addr, val, bytes, vcpu)
+                               == X86EMUL_CONTINUE)
                 return X86EMUL_CONTINUE;
         if (gpa == UNMAPPED_GVA)
                 return X86EMUL_PROPAGATE_FAULT;
@@ -2217,7 +2328,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
  
         rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
  
-       emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu);
+       kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu);
  
         printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
                context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
@@ -2225,7 +2336,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
  EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
  
  static struct x86_emulate_ops emulate_ops = {
-       .read_std            = emulator_read_std,
+       .read_std            = kvm_read_guest_virt,
         .read_emulated       = emulator_read_emulated,
         .write_emulated      = emulator_write_emulated,
         .cmpxchg_emulated    = emulator_cmpxchg_emulated,
@@ -2327,40 +2438,19 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
  }
  EXPORT_SYMBOL_GPL(emulate_instruction);
  
-static void free_pio_guest_pages(struct kvm_vcpu *vcpu)
-{
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(vcpu->arch.pio.guest_pages); ++i)
-               if (vcpu->arch.pio.guest_pages[i]) {
-                       kvm_release_page_dirty(vcpu->arch.pio.guest_pages[i]);
-                       vcpu->arch.pio.guest_pages[i] = NULL;
-               }
-}
-
  static int pio_copy_data(struct kvm_vcpu *vcpu)
  {
         void *p = vcpu->arch.pio_data;
-       void *q;
+       gva_t q = vcpu->arch.pio.guest_gva;
         unsigned bytes;
-       int nr_pages = vcpu->arch.pio.guest_pages[1] ? 2 : 1;
+       int ret;
  
-       q = vmap(vcpu->arch.pio.guest_pages, nr_pages, VM_READ|VM_WRITE,
-                PAGE_KERNEL);
-       if (!q) {
-               free_pio_guest_pages(vcpu);
-               return -ENOMEM;
-       }
-       q += vcpu->arch.pio.guest_page_offset;
         bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count;
         if (vcpu->arch.pio.in)
-               memcpy(q, p, bytes);
+               ret = kvm_write_guest_virt(q, p, bytes, vcpu);
         else
-               memcpy(p, q, bytes);
-       q -= vcpu->arch.pio.guest_page_offset;
-       vunmap(q);
-       free_pio_guest_pages(vcpu);
-       return 0;
+               ret = kvm_read_guest_virt(q, p, bytes, vcpu);
+       return ret;
  }
  
  int complete_pio(struct kvm_vcpu *vcpu)
@@ -2471,7 +2561,6 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
         vcpu->arch.pio.in = in;
         vcpu->arch.pio.string = 0;
         vcpu->arch.pio.down = 0;
-       vcpu->arch.pio.guest_page_offset = 0;
         vcpu->arch.pio.rep = 0;
  
         if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
@@ -2499,9 +2588,7 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
                   gva_t address, int rep, unsigned port)
  {
         unsigned now, in_page;
-       int i, ret = 0;
-       int nr_pages = 1;
-       struct page *page;
+       int ret = 0;
         struct kvm_io_device *pio_dev;
  
         vcpu->run->exit_reason = KVM_EXIT_IO;
@@ -2513,7 +2600,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
         vcpu->arch.pio.in = in;
         vcpu->arch.pio.string = 1;
         vcpu->arch.pio.down = down;
-       vcpu->arch.pio.guest_page_offset = offset_in_page(address);
         vcpu->arch.pio.rep = rep;
  
         if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
@@ -2533,15 +2619,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
         else
                 in_page = offset_in_page(address) + size;
         now = min(count, (unsigned long)in_page / size);
-       if (!now) {
-               /*
-                * String I/O straddles page boundary.  Pin two guest pages
-                * so that we satisfy atomicity constraints.  Do just one
-                * transaction to avoid complexity.
-                */
-               nr_pages = 2;
+       if (!now)
                 now = 1;
-       }
         if (down) {
                 /*
                  * String I/O in reverse.  Yuck.  Kill the guest, fix later.
@@ -2556,15 +2635,7 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
         if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count)
                 kvm_x86_ops->skip_emulated_instruction(vcpu);
  
-       for (i = 0; i < nr_pages; ++i) {
-               page = gva_to_page(vcpu, address + i * PAGE_SIZE);
-               vcpu->arch.pio.guest_pages[i] = page;
-               if (!page) {
-                       kvm_inject_gp(vcpu, 0);
-                       free_pio_guest_pages(vcpu);
-                       return 1;
-               }
-       }
+       vcpu->arch.pio.guest_gva = address;
  
         pio_dev = vcpu_find_pio_dev(vcpu, port,
                                     vcpu->arch.pio.cur_count,
@@ -2572,7 +2643,11 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
         if (!vcpu->arch.pio.in) {
                 /* string PIO write */
                 ret = pio_copy_data(vcpu);
-               if (ret >= 0 && pio_dev) {
+               if (ret == X86EMUL_PROPAGATE_FAULT) {
+                       kvm_inject_gp(vcpu, 0);
+                       return 1;
+               }
+               if (ret == 0 && pio_dev) {
                         pio_string_write(pio_dev, vcpu);
                         complete_pio(vcpu);
                         if (vcpu->arch.pio.count == 0)
@@ -2587,9 +2662,72 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
  }
  EXPORT_SYMBOL_GPL(kvm_emulate_pio_string);
  
+static void bounce_off(void *info)
+{
+       /* nothing */
+}
+
+static unsigned int  ref_freq;
+static unsigned long tsc_khz_ref;
+
+static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
+                                    void *data)
+{
+       struct cpufreq_freqs *freq = data;
+       struct kvm *kvm;
+       struct kvm_vcpu *vcpu;
+       int i, send_ipi = 0;
+
+       if (!ref_freq)
+               ref_freq = freq->old;
+
+       if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
+               return 0;
+       if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
+               return 0;
+       per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
+
+       spin_lock(&kvm_lock);
+       list_for_each_entry(kvm, &vm_list, vm_list) {
+               for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+                       vcpu = kvm->vcpus[i];
+                       if (!vcpu)
+                               continue;
+                       if (vcpu->cpu != freq->cpu)
+                               continue;
+                       if (!kvm_request_guest_time_update(vcpu))
+                               continue;
+                       if (vcpu->cpu != smp_processor_id())
+                               send_ipi++;
+               }
+       }
+       spin_unlock(&kvm_lock);
+
+       if (freq->old < freq->new && send_ipi) {
+               /*
+                * We upscale the frequency.  Must make the guest
+                * doesn't see old kvmclock values while running with
+                * the new frequency, otherwise we risk the guest sees
+                * time go backwards.
+                *
+                * In case we update the frequency for another cpu
+                * (which might be in guest context) send an interrupt
+                * to kick the cpu out of guest context.  Next time
+                * guest context is entered kvmclock will be updated,
+                * so the guest will not see stale values.
+                */
+               smp_call_function_single(freq->cpu, bounce_off, NULL, 1);
+       }
+       return 0;
+}
+
+static struct notifier_block kvmclock_cpufreq_notifier_block = {
+        .notifier_call  = kvmclock_cpufreq_notifier
+};
+
  int kvm_arch_init(void *opaque)
  {
-       int r;
+       int r, cpu;
         struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
  
         if (kvm_x86_ops) {
@@ -2620,6 +2758,15 @@ int kvm_arch_init(void *opaque)
         kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
         kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
                         PT_DIRTY_MASK, PT64_NX_MASK, 0, 0);
+
+       for_each_possible_cpu(cpu)
+               per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
+       if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
+               tsc_khz_ref = tsc_khz;
+               cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
+                                         CPUFREQ_TRANSITION_NOTIFIER);
+       }
+
         return 0;
  
  out:
@@ -2827,25 +2974,20 @@ static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
         if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
                 return 0;
         if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
-               !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
+           !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
                 return 0;
         return 1;
  }
  
-void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
+struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
+                                             u32 function, u32 index)
  {
         int i;
-       u32 function, index;
-       struct kvm_cpuid_entry2 *e, *best;
+       struct kvm_cpuid_entry2 *best = NULL;
  
-       function = kvm_register_read(vcpu, VCPU_REGS_RAX);
-       index = kvm_register_read(vcpu, VCPU_REGS_RCX);
-       kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
-       kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
-       kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
-       kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
-       best = NULL;
         for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
+               struct kvm_cpuid_entry2 *e;
+
                 e = &vcpu->arch.cpuid_entries[i];
                 if (is_matching_cpuid_entry(e, function, index)) {
                         if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
@@ -2860,6 +3002,21 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
                         if (!best || e->function > best->function)
                                 best = e;
         }
+       return best;
+}
+
+void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
+{
+       u32 function, index;
+       struct kvm_cpuid_entry2 *best;
+
+       function = kvm_register_read(vcpu, VCPU_REGS_RAX);
+       index = kvm_register_read(vcpu, VCPU_REGS_RCX);
+       kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
+       kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
+       kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
+       kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
+       best = kvm_find_cpuid_entry(vcpu, function, index);
         if (best) {
                 kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
                 kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
@@ -2945,6 +3102,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
         if (vcpu->requests) {
                 if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
                         __kvm_migrate_timers(vcpu);
+               if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests))
+                       kvm_write_guest_time(vcpu);
                 if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests))
                         kvm_mmu_sync_roots(vcpu);
                 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
@@ -2979,9 +3138,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                 goto out;
         }
  
-       if (vcpu->guest_debug.enabled)
-               kvm_x86_ops->guest_debug_pre(vcpu);
-
         vcpu->guest_mode = 1;
         /*
          * Make sure that guest_mode assignment won't happen after
@@ -3002,10 +3158,34 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
  
         kvm_guest_enter();
  
+       get_debugreg(vcpu->arch.host_dr6, 6);
+       get_debugreg(vcpu->arch.host_dr7, 7);
+       if (unlikely(vcpu->arch.switch_db_regs)) {
+               get_debugreg(vcpu->arch.host_db[0], 0);
+               get_debugreg(vcpu->arch.host_db[1], 1);
+               get_debugreg(vcpu->arch.host_db[2], 2);
+               get_debugreg(vcpu->arch.host_db[3], 3);
+
+               set_debugreg(0, 7);
+               set_debugreg(vcpu->arch.eff_db[0], 0);
+               set_debugreg(vcpu->arch.eff_db[1], 1);
+               set_debugreg(vcpu->arch.eff_db[2], 2);
+               set_debugreg(vcpu->arch.eff_db[3], 3);
+       }
  
         KVMTRACE_0D(VMENTRY, vcpu, entryexit);
         kvm_x86_ops->run(vcpu, kvm_run);
  
+       if (unlikely(vcpu->arch.switch_db_regs)) {
+               set_debugreg(0, 7);
+               set_debugreg(vcpu->arch.host_db[0], 0);
+               set_debugreg(vcpu->arch.host_db[1], 1);
+               set_debugreg(vcpu->arch.host_db[2], 2);
+               set_debugreg(vcpu->arch.host_db[3], 3);
+       }
+       set_debugreg(vcpu->arch.host_dr6, 6);
+       set_debugreg(vcpu->arch.host_dr7, 7);
+
         vcpu->guest_mode = 0;
         local_irq_enable();
  
@@ -3192,7 +3372,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
         /*
          * Don't leak debug flags in case they were set for guest debugging
          */
-       if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep)
+       if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
                 regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
  
         vcpu_put(vcpu);
@@ -3811,15 +3991,32 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
         return 0;
  }
  
-int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
-                                   struct kvm_debug_guest *dbg)
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+                                       struct kvm_guest_debug *dbg)
  {
-       int r;
+       int i, r;
  
         vcpu_load(vcpu);
  
+       if ((dbg->control & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) ==
+           (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) {
+               for (i = 0; i < KVM_NR_DB_REGS; ++i)
+                       vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
+               vcpu->arch.switch_db_regs =
+                       (dbg->arch.debugreg[7] & DR7_BP_EN_MASK);
+       } else {
+               for (i = 0; i < KVM_NR_DB_REGS; i++)
+                       vcpu->arch.eff_db[i] = vcpu->arch.db[i];
+               vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
+       }
+
         r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
  
+       if (dbg->control & KVM_GUESTDBG_INJECT_DB)
+               kvm_queue_exception(vcpu, DB_VECTOR);
+       else if (dbg->control & KVM_GUESTDBG_INJECT_BP)
+               kvm_queue_exception(vcpu, BP_VECTOR);
+
         vcpu_put(vcpu);
  
         return r;
@@ -4007,6 +4204,11 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
         vcpu->arch.nmi_pending = false;
         vcpu->arch.nmi_injected = false;
  
+       vcpu->arch.switch_db_regs = 0;
+       memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
+       vcpu->arch.dr6 = DR6_FIXED_1;
+       vcpu->arch.dr7 = DR7_FIXED_1;
+
         return kvm_x86_ops->vcpu_reset(vcpu);
  }
  
@@ -4100,6 +4302,8 @@ struct  kvm *kvm_arch_create_vm(void)
         /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
         set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
  
+       rdtscll(kvm->arch.vm_init_tsc);
+
         return kvm;
  }
  
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c

index d174db7a3370d9e80f6c1b93f2960a40b5b1cc43..ca91749d2083ff71a44b66f4253bda00dff7f4b3 100644 (file)
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -178,7 +178,7 @@ static u32 opcode_table[256] = {
         0, ImplicitOps | Stack, 0, 0,
         ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
         /* 0xC8 - 0xCF */
-       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, ImplicitOps | Stack, 0, 0, 0, 0,
         /* 0xD0 - 0xD7 */
         ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
         ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
@@ -1136,18 +1136,19 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
  }
  
  static int emulate_pop(struct x86_emulate_ctxt *ctxt,
-                      struct x86_emulate_ops *ops)
+                      struct x86_emulate_ops *ops,
+                      void *dest, int len)
  {
         struct decode_cache *c = &ctxt->decode;
         int rc;
  
         rc = ops->read_emulated(register_address(c, ss_base(ctxt),
                                                  c->regs[VCPU_REGS_RSP]),
-                               &c->src.val, c->src.bytes, ctxt->vcpu);
+                               dest, len, ctxt->vcpu);
         if (rc != 0)
                 return rc;
  
-       register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.bytes);
+       register_address_increment(c, &c->regs[VCPU_REGS_RSP], len);
         return rc;
  }
  
@@ -1157,11 +1158,9 @@ static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
         struct decode_cache *c = &ctxt->decode;
         int rc;
  
-       c->src.bytes = c->dst.bytes;
-       rc = emulate_pop(ctxt, ops);
+       rc = emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes);
         if (rc != 0)
                 return rc;
-       c->dst.val = c->src.val;
         return 0;
  }
  
@@ -1279,6 +1278,25 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
         return 0;
  }
  
+static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,
+                          struct x86_emulate_ops *ops)
+{
+       struct decode_cache *c = &ctxt->decode;
+       int rc;
+       unsigned long cs;
+
+       rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes);
+       if (rc)
+               return rc;
+       if (c->op_bytes == 4)
+               c->eip = (u32)c->eip;
+       rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
+       if (rc)
+               return rc;
+       rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, 1, VCPU_SREG_CS);
+       return rc;
+}
+
  static inline int writeback(struct x86_emulate_ctxt *ctxt,
                             struct x86_emulate_ops *ops)
  {
@@ -1467,11 +1485,9 @@ special_insn:
                 break;
         case 0x58 ... 0x5f: /* pop reg */
         pop_instruction:
-               c->src.bytes = c->op_bytes;
-               rc = emulate_pop(ctxt, ops);
+               rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes);
                 if (rc != 0)
                         goto done;
-               c->dst.val = c->src.val;
                 break;
         case 0x63:              /* movsxd */
                 if (ctxt->mode != X86EMUL_MODE_PROT64)
@@ -1738,6 +1754,11 @@ special_insn:
         mov:
                 c->dst.val = c->src.val;
                 break;
+       case 0xcb:              /* ret far */
+               rc = emulate_ret_far(ctxt, ops);
+               if (rc)
+                       goto done;
+               break;
         case 0xd0 ... 0xd1:     /* Grp2 */
                 c->src.val = 1;
                 emulate_grp2(ctxt);
@@ -1908,11 +1929,16 @@ twobyte_insn:
                         c->dst.type = OP_NONE;
                         break;
                 case 3: /* lidt/vmmcall */
-                       if (c->modrm_mod == 3 && c->modrm_rm == 1) {
-                               rc = kvm_fix_hypercall(ctxt->vcpu);
-                               if (rc)
-                                       goto done;
-                               kvm_emulate_hypercall(ctxt->vcpu);
+                       if (c->modrm_mod == 3) {
+                               switch (c->modrm_rm) {
+                               case 1:
+                                       rc = kvm_fix_hypercall(ctxt->vcpu);
+                                       if (rc)
+                                               goto done;
+                                       break;
+                               default:
+                                       goto cannot_emulate;
+                               }
                         } else {
                                 rc = read_descriptor(ctxt, ops, c->src.ptr,
                                                      &size, &address,
diff --git a/include/linux/kvm.h b/include/linux/kvm.h

index 0424326f167963d5b764554ebec95029b8b99439..311a073afe8a3e79e04b7658bc551509adcf6b2b 100644 (file)
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -48,7 +48,10 @@ struct kvm_irq_level {
          * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47..
          * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23..
          */
-       __u32 irq;
+       union {
+               __u32 irq;
+               __s32 status;
+       };
         __u32 level;
  };
  
@@ -126,6 +129,7 @@ struct kvm_run {
                         __u64 data_offset; /* relative to kvm_run start */
                 } io;
                 struct {
+                       struct kvm_debug_exit_arch arch;
                 } debug;
                 /* KVM_EXIT_MMIO */
                 struct {
@@ -217,21 +221,6 @@ struct kvm_interrupt {
         __u32 irq;
  };
  
-struct kvm_breakpoint {
-       __u32 enabled;
-       __u32 padding;
-       __u64 address;
-};
-
-/* for KVM_DEBUG_GUEST */
-struct kvm_debug_guest {
-       /* int */
-       __u32 enabled;
-       __u32 pad;
-       struct kvm_breakpoint breakpoints[4];
-       __u32 singlestep;
-};
-
  /* for KVM_GET_DIRTY_LOG */
  struct kvm_dirty_log {
         __u32 slot;
@@ -292,6 +281,17 @@ struct kvm_s390_interrupt {
         __u64 parm64;
  };
  
+/* for KVM_SET_GUEST_DEBUG */
+
+#define KVM_GUESTDBG_ENABLE            0x00000001
+#define KVM_GUESTDBG_SINGLESTEP                0x00000002
+
+struct kvm_guest_debug {
+       __u32 control;
+       __u32 pad;
+       struct kvm_guest_debug_arch arch;
+};
+
  #define KVM_TRC_SHIFT           16
  /*
   * kvm trace categories
@@ -395,6 +395,57 @@ struct kvm_trace_rec {
  #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
  #ifdef __KVM_HAVE_USER_NMI
  #define KVM_CAP_USER_NMI 22
+#endif
+#ifdef __KVM_HAVE_GUEST_DEBUG
+#define KVM_CAP_SET_GUEST_DEBUG 23
+#endif
+#ifdef __KVM_HAVE_PIT
+#define KVM_CAP_REINJECT_CONTROL 24
+#endif
+#ifdef __KVM_HAVE_IOAPIC
+#define KVM_CAP_IRQ_ROUTING 25
+#endif
+#define KVM_CAP_IRQ_INJECT_STATUS 26
+#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
+#define KVM_CAP_DEVICE_DEASSIGNMENT 27
+#endif
+
+#ifdef KVM_CAP_IRQ_ROUTING
+
+struct kvm_irq_routing_irqchip {
+       __u32 irqchip;
+       __u32 pin;
+};
+
+struct kvm_irq_routing_msi {
+       __u32 address_lo;
+       __u32 address_hi;
+       __u32 data;
+       __u32 pad;
+};
+
+/* gsi routing entry types */
+#define KVM_IRQ_ROUTING_IRQCHIP 1
+#define KVM_IRQ_ROUTING_MSI 2
+
+struct kvm_irq_routing_entry {
+       __u32 gsi;
+       __u32 type;
+       __u32 flags;
+       __u32 pad;
+       union {
+               struct kvm_irq_routing_irqchip irqchip;
+               struct kvm_irq_routing_msi msi;
+               __u32 pad[8];
+       } u;
+};
+
+struct kvm_irq_routing {
+       __u32 nr;
+       __u32 flags;
+       struct kvm_irq_routing_entry entries[0];
+};
+
  #endif
  
  /*
@@ -421,14 +472,19 @@ struct kvm_trace_rec {
  #define KVM_CREATE_PIT           _IO(KVMIO,  0x64)
  #define KVM_GET_PIT              _IOWR(KVMIO, 0x65, struct kvm_pit_state)
  #define KVM_SET_PIT              _IOR(KVMIO,  0x66, struct kvm_pit_state)
+#define KVM_IRQ_LINE_STATUS      _IOWR(KVMIO, 0x67, struct kvm_irq_level)
  #define KVM_REGISTER_COALESCED_MMIO \
                         _IOW(KVMIO,  0x67, struct kvm_coalesced_mmio_zone)
  #define KVM_UNREGISTER_COALESCED_MMIO \
                         _IOW(KVMIO,  0x68, struct kvm_coalesced_mmio_zone)
  #define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \
                                    struct kvm_assigned_pci_dev)
+#define KVM_SET_GSI_ROUTING       _IOW(KVMIO, 0x6a, struct kvm_irq_routing)
  #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
                             struct kvm_assigned_irq)
+#define KVM_REINJECT_CONTROL      _IO(KVMIO, 0x71)
+#define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \
+                                    struct kvm_assigned_pci_dev)
  
  /*
   * ioctls for vcpu fds
@@ -440,7 +496,8 @@ struct kvm_trace_rec {
  #define KVM_SET_SREGS             _IOW(KVMIO,  0x84, struct kvm_sregs)
  #define KVM_TRANSLATE             _IOWR(KVMIO, 0x85, struct kvm_translation)
  #define KVM_INTERRUPT             _IOW(KVMIO,  0x86, struct kvm_interrupt)
-#define KVM_DEBUG_GUEST           _IOW(KVMIO,  0x87, struct kvm_debug_guest)
+/* KVM_DEBUG_GUEST is no longer supported, use KVM_SET_GUEST_DEBUG instead */
+#define KVM_DEBUG_GUEST           __KVM_DEPRECATED_DEBUG_GUEST
  #define KVM_GET_MSRS              _IOWR(KVMIO, 0x88, struct kvm_msrs)
  #define KVM_SET_MSRS              _IOW(KVMIO,  0x89, struct kvm_msrs)
  #define KVM_SET_CPUID             _IOW(KVMIO,  0x8a, struct kvm_cpuid)
@@ -469,6 +526,29 @@ struct kvm_trace_rec {
  #define KVM_SET_MP_STATE          _IOW(KVMIO,  0x99, struct kvm_mp_state)
  /* Available with KVM_CAP_NMI */
  #define KVM_NMI                   _IO(KVMIO,  0x9a)
+/* Available with KVM_CAP_SET_GUEST_DEBUG */
+#define KVM_SET_GUEST_DEBUG       _IOW(KVMIO,  0x9b, struct kvm_guest_debug)
+
+/*
+ * Deprecated interfaces
+ */
+struct kvm_breakpoint {
+       __u32 enabled;
+       __u32 padding;
+       __u64 address;
+};
+
+struct kvm_debug_guest {
+       __u32 enabled;
+       __u32 pad;
+       struct kvm_breakpoint breakpoints[4];
+       __u32 singlestep;
+};
+
+#define __KVM_DEPRECATED_DEBUG_GUEST _IOW(KVMIO,  0x87, struct kvm_debug_guest)
+
+#define KVM_IA64_VCPU_GET_STACK   _IOR(KVMIO,  0x9a, void *)
+#define KVM_IA64_VCPU_SET_STACK   _IOW(KVMIO,  0x9b, void *)
  
  #define KVM_TRC_INJ_VIRQ         (KVM_TRC_HANDLER + 0x02)
  #define KVM_TRC_REDELIVER_EVT    (KVM_TRC_HANDLER + 0x03)
@@ -522,6 +602,7 @@ struct kvm_assigned_irq {
  
  #define KVM_DEV_ASSIGN_ENABLE_IOMMU    (1 << 0)
  
+#define KVM_DEV_IRQ_ASSIGN_MSI_ACTION  KVM_DEV_IRQ_ASSIGN_ENABLE_MSI
  #define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI  (1 << 0)
  
  #endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h

index bf6f703642fc81c21b33608cb48aff5477a0605b..894a56e365e855e549b12563facda8f24405c15a 100644 (file)
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -37,6 +37,7 @@
  #define KVM_REQ_PENDING_TIMER      5
  #define KVM_REQ_UNHALT             6
  #define KVM_REQ_MMU_SYNC           7
+#define KVM_REQ_KVMCLOCK_UPDATE    8
  
  #define KVM_USERSPACE_IRQ_SOURCE_ID    0
  
@@ -73,7 +74,7 @@ struct kvm_vcpu {
         struct kvm_run *run;
         int guest_mode;
         unsigned long requests;
-       struct kvm_guest_debug guest_debug;
+       unsigned long guest_debug;
         int fpu_active;
         int guest_fpu_loaded;
         wait_queue_head_t wq;
@@ -107,6 +108,20 @@ struct kvm_memory_slot {
         int user_alloc;
  };
  
+struct kvm_kernel_irq_routing_entry {
+       u32 gsi;
+       int (*set)(struct kvm_kernel_irq_routing_entry *e,
+                   struct kvm *kvm, int level);
+       union {
+               struct {
+                       unsigned irqchip;
+                       unsigned pin;
+               } irqchip;
+               struct msi_msg msi;
+       };
+       struct list_head link;
+};
+
  struct kvm {
         struct mutex lock; /* protects the vcpus array and APIC accesses */
         spinlock_t mmu_lock;
@@ -127,6 +142,11 @@ struct kvm {
         struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
  #endif
  
+#ifdef CONFIG_HAVE_KVM_IRQCHIP
+       struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
+       struct hlist_head mask_notifier_list;
+#endif
+
  #ifdef KVM_ARCH_WANT_MMU_NOTIFIER
         struct mmu_notifier mmu_notifier;
         unsigned long mmu_notifier_seq;
@@ -237,7 +257,6 @@ int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
                                    int user_alloc);
  long kvm_arch_vm_ioctl(struct file *filp,
                        unsigned int ioctl, unsigned long arg);
-void kvm_arch_destroy_vm(struct kvm *kvm);
  
  int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
  int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
@@ -255,8 +274,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
                                     struct kvm_mp_state *mp_state);
  int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
                                     struct kvm_mp_state *mp_state);
-int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
-                                   struct kvm_debug_guest *dbg);
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+                                       struct kvm_guest_debug *dbg);
  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
  
  int kvm_arch_init(void *opaque);
@@ -310,7 +329,6 @@ struct kvm_assigned_dev_kernel {
         int host_irq;
         bool host_irq_disabled;
         int guest_irq;
-       struct msi_msg guest_msi;
  #define KVM_ASSIGNED_DEV_GUEST_INTX    (1 << 0)
  #define KVM_ASSIGNED_DEV_GUEST_MSI     (1 << 1)
  #define KVM_ASSIGNED_DEV_HOST_INTX     (1 << 8)
@@ -321,8 +339,21 @@ struct kvm_assigned_dev_kernel {
         struct pci_dev *dev;
         struct kvm *kvm;
  };
-void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
-void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi);
+
+struct kvm_irq_mask_notifier {
+       void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
+       int irq;
+       struct hlist_node link;
+};
+
+void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
+                                   struct kvm_irq_mask_notifier *kimn);
+void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
+                                     struct kvm_irq_mask_notifier *kimn);
+void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask);
+
+int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
+void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
  void kvm_register_irq_ack_notifier(struct kvm *kvm,
                                    struct kvm_irq_ack_notifier *kian);
  void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian);
@@ -464,4 +495,21 @@ static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_se
  }
  #endif
  
+#ifdef CONFIG_HAVE_KVM_IRQCHIP
+
+#define KVM_MAX_IRQ_ROUTES 1024
+
+int kvm_setup_default_irq_routing(struct kvm *kvm);
+int kvm_set_irq_routing(struct kvm *kvm,
+                       const struct kvm_irq_routing_entry *entries,
+                       unsigned nr,
+                       unsigned flags);
+void kvm_free_irq_routing(struct kvm *kvm);
+
+#else
+
+static inline void kvm_free_irq_routing(struct kvm *kvm) {}
+
+#endif
+
  #endif
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h

index 9b6f395c9625e13237abd15a9cb41150a34f48d0..2b8318c83e531d8dcf5cd6e96c09f994f12eef2a 100644 (file)
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -40,17 +40,4 @@ typedef unsigned long  hfn_t;
  
  typedef hfn_t pfn_t;
  
-struct kvm_pio_request {
-       unsigned long count;
-       int cur_count;
-       struct page *guest_pages[2];
-       unsigned guest_page_offset;
-       int in;
-       int port;
-       int size;
-       int string;
-       int down;
-       int rep;
-};
-
  #endif /* __KVM_TYPES_H__ */
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c

index 23b81cf242af739675f01303eabb3cebb71b09a7..c3b99def9cbc3c1eff063248a064d7ee3fc8c84e 100644 (file)
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -83,24 +83,28 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
         return result;
  }
  
-static void ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
+static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
  {
         union ioapic_redir_entry *pent;
+       int injected = -1;
  
         pent = &ioapic->redirtbl[idx];
  
         if (!pent->fields.mask) {
-               int injected = ioapic_deliver(ioapic, idx);
+               injected = ioapic_deliver(ioapic, idx);
                 if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG)
                         pent->fields.remote_irr = 1;
         }
         if (!pent->fields.trig_mode)
                 ioapic->irr &= ~(1 << idx);
+
+       return injected;
  }
  
  static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
  {
         unsigned index;
+       bool mask_before, mask_after;
  
         switch (ioapic->ioregsel) {
         case IOAPIC_REG_VERSION:
@@ -120,6 +124,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
                 ioapic_debug("change redir index %x val %x\n", index, val);
                 if (index >= IOAPIC_NUM_PINS)
                         return;
+               mask_before = ioapic->redirtbl[index].fields.mask;
                 if (ioapic->ioregsel & 1) {
                         ioapic->redirtbl[index].bits &= 0xffffffff;
                         ioapic->redirtbl[index].bits |= (u64) val << 32;
@@ -128,6 +133,9 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
                         ioapic->redirtbl[index].bits |= (u32) val;
                         ioapic->redirtbl[index].fields.remote_irr = 0;
                 }
+               mask_after = ioapic->redirtbl[index].fields.mask;
+               if (mask_before != mask_after)
+                       kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after);
                 if (ioapic->irr & (1 << index))
                         ioapic_service(ioapic, index);
                 break;
@@ -202,7 +210,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
         u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode;
         u32 deliver_bitmask;
         struct kvm_vcpu *vcpu;
-       int vcpu_id, r = 0;
+       int vcpu_id, r = -1;
  
         ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
                      "vector=%x trig_mode=%x\n",
@@ -242,7 +250,9 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
                         deliver_bitmask &= ~(1 << vcpu_id);
                         vcpu = ioapic->kvm->vcpus[vcpu_id];
                         if (vcpu) {
-                               r = ioapic_inj_irq(ioapic, vcpu, vector,
+                               if (r < 0)
+                                       r = 0;
+                               r += ioapic_inj_irq(ioapic, vcpu, vector,
                                                trig_mode, delivery_mode);
                         }
                 }
@@ -253,8 +263,10 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
                                 continue;
                         deliver_bitmask &= ~(1 << vcpu_id);
                         vcpu = ioapic->kvm->vcpus[vcpu_id];
-                       if (vcpu)
+                       if (vcpu) {
                                 ioapic_inj_nmi(vcpu);
+                               r = 1;
+                       }
                         else
                                 ioapic_debug("NMI to vcpu %d failed\n",
                                                 vcpu->vcpu_id);
@@ -268,11 +280,12 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
         return r;
  }
  
-void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
+int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
  {
         u32 old_irr = ioapic->irr;
         u32 mask = 1 << irq;
         union ioapic_redir_entry entry;
+       int ret = 1;
  
         if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
                 entry = ioapic->redirtbl[irq];
@@ -283,25 +296,26 @@ void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
                         ioapic->irr |= mask;
                         if ((!entry.fields.trig_mode && old_irr != ioapic->irr)
                             || !entry.fields.remote_irr)
-                               ioapic_service(ioapic, irq);
+                               ret = ioapic_service(ioapic, irq);
                 }
         }
+       return ret;
  }
  
-static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi,
+static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin,
                                     int trigger_mode)
  {
         union ioapic_redir_entry *ent;
  
-       ent = &ioapic->redirtbl[gsi];
+       ent = &ioapic->redirtbl[pin];
  
-       kvm_notify_acked_irq(ioapic->kvm, gsi);
+       kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin);
  
         if (trigger_mode == IOAPIC_LEVEL_TRIG) {
                 ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
                 ent->fields.remote_irr = 0;
-               if (!ent->fields.mask && (ioapic->irr & (1 << gsi)))
-                       ioapic_service(ioapic, gsi);
+               if (!ent->fields.mask && (ioapic->irr & (1 << pin)))
+                       ioapic_service(ioapic, pin);
         }
  }
  
@@ -426,3 +440,4 @@ int kvm_ioapic_init(struct kvm *kvm)
         kvm_io_bus_register_dev(&kvm->mmio_bus, &ioapic->dev);
         return 0;
  }
+
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h

index 49c9581d25860edd3ad76ba2991a05b9dc9ba6c8..a34bd5e6436bddf086f4e7dca13bdce09776fa64 100644 (file)
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -83,7 +83,7 @@ struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
                                        unsigned long bitmap);
  void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
  int kvm_ioapic_init(struct kvm *kvm);
-void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
+int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
  void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
  u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
                                 u8 dest_mode);
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c

index aa5d1e5c497ef12e3fe9aa12b79865a2081e4b43..864ac5483baade3fce7085c3c51f23e493426914 100644 (file)
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -20,35 +20,132 @@
   */
  
  #include <linux/kvm_host.h>
+
+#include <asm/msidef.h>
+
  #include "irq.h"
  
  #include "ioapic.h"
  
-/* This should be called with the kvm->lock mutex held */
-void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
+static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
+                          struct kvm *kvm, int level)
+{
+#ifdef CONFIG_X86
+       return kvm_pic_set_irq(pic_irqchip(kvm), e->irqchip.pin, level);
+#else
+       return -1;
+#endif
+}
+
+static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
+                             struct kvm *kvm, int level)
+{
+       return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level);
+}
+
+static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
+                      struct kvm *kvm, int level)
+{
+       int vcpu_id, r = -1;
+       struct kvm_vcpu *vcpu;
+       struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
+       int dest_id = (e->msi.address_lo & MSI_ADDR_DEST_ID_MASK)
+                       >> MSI_ADDR_DEST_ID_SHIFT;
+       int vector = (e->msi.data & MSI_DATA_VECTOR_MASK)
+                       >> MSI_DATA_VECTOR_SHIFT;
+       int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT,
+                               (unsigned long *)&e->msi.address_lo);
+       int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT,
+                               (unsigned long *)&e->msi.data);
+       int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT,
+                               (unsigned long *)&e->msi.data);
+       u32 deliver_bitmask;
+
+       BUG_ON(!ioapic);
+
+       deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic,
+                               dest_id, dest_mode);
+       /* IOAPIC delivery mode value is the same as MSI here */
+       switch (delivery_mode) {
+       case IOAPIC_LOWEST_PRIORITY:
+               vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector,
+                               deliver_bitmask);
+               if (vcpu != NULL)
+                       r = kvm_apic_set_irq(vcpu, vector, trig_mode);
+               else
+                       printk(KERN_INFO "kvm: null lowest priority vcpu!\n");
+               break;
+       case IOAPIC_FIXED:
+               for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
+                       if (!(deliver_bitmask & (1 << vcpu_id)))
+                               continue;
+                       deliver_bitmask &= ~(1 << vcpu_id);
+                       vcpu = ioapic->kvm->vcpus[vcpu_id];
+                       if (vcpu) {
+                               if (r < 0)
+                                       r = 0;
+                               r += kvm_apic_set_irq(vcpu, vector, trig_mode);
+                       }
+               }
+               break;
+       default:
+               break;
+       }
+       return r;
+}
+
+/* This should be called with the kvm->lock mutex held
+ * Return value:
+ *  < 0   Interrupt was ignored (masked or not delivered for other reasons)
+ *  = 0   Interrupt was coalesced (previous irq is still pending)
+ *  > 0   Number of CPUs interrupt was delivered to
+ */
+int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
  {
-       unsigned long *irq_state = (unsigned long *)&kvm->arch.irq_states[irq];
+       struct kvm_kernel_irq_routing_entry *e;
+       unsigned long *irq_state, sig_level;
+       int ret = -1;
+
+       if (irq < KVM_IOAPIC_NUM_PINS) {
+               irq_state = (unsigned long *)&kvm->arch.irq_states[irq];
  
-       /* Logical OR for level trig interrupt */
-       if (level)
-               set_bit(irq_source_id, irq_state);
-       else
-               clear_bit(irq_source_id, irq_state);
+               /* Logical OR for level trig interrupt */
+               if (level)
+                       set_bit(irq_source_id, irq_state);
+               else
+                       clear_bit(irq_source_id, irq_state);
+               sig_level = !!(*irq_state);
+       } else /* Deal with MSI/MSI-X */
+               sig_level = 1;
  
         /* Not possible to detect if the guest uses the PIC or the
          * IOAPIC.  So set the bit in both. The guest will ignore
          * writes to the unused one.
          */
-       kvm_ioapic_set_irq(kvm->arch.vioapic, irq, !!(*irq_state));
-#ifdef CONFIG_X86
-       kvm_pic_set_irq(pic_irqchip(kvm), irq, !!(*irq_state));
-#endif
+       list_for_each_entry(e, &kvm->irq_routing, link)
+               if (e->gsi == irq) {
+                       int r = e->set(e, kvm, sig_level);
+                       if (r < 0)
+                               continue;
+
+                       ret = r + ((ret < 0) ? 0 : ret);
+               }
+       return ret;
  }
  
-void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi)
+void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
  {
+       struct kvm_kernel_irq_routing_entry *e;
         struct kvm_irq_ack_notifier *kian;
         struct hlist_node *n;
+       unsigned gsi = pin;
+
+       list_for_each_entry(e, &kvm->irq_routing, link)
+               if (e->irqchip.irqchip == irqchip &&
+                   e->irqchip.pin == pin) {
+                       gsi = e->gsi;
+                       break;
+               }
  
         hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
                 if (kian->gsi == gsi)
@@ -99,3 +196,177 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
                 clear_bit(irq_source_id, &kvm->arch.irq_states[i]);
         clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
  }
+
+void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
+                                   struct kvm_irq_mask_notifier *kimn)
+{
+       kimn->irq = irq;
+       hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
+}
+
+void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
+                                     struct kvm_irq_mask_notifier *kimn)
+{
+       hlist_del(&kimn->link);
+}
+
+void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
+{
+       struct kvm_irq_mask_notifier *kimn;
+       struct hlist_node *n;
+
+       hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
+               if (kimn->irq == irq)
+                       kimn->func(kimn, mask);
+}
+
+static void __kvm_free_irq_routing(struct list_head *irq_routing)
+{
+       struct kvm_kernel_irq_routing_entry *e, *n;
+
+       list_for_each_entry_safe(e, n, irq_routing, link)
+               kfree(e);
+}
+
+void kvm_free_irq_routing(struct kvm *kvm)
+{
+       __kvm_free_irq_routing(&kvm->irq_routing);
+}
+
+static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
+                              const struct kvm_irq_routing_entry *ue)
+{
+       int r = -EINVAL;
+       int delta;
+
+       e->gsi = ue->gsi;
+       switch (ue->type) {
+       case KVM_IRQ_ROUTING_IRQCHIP:
+               delta = 0;
+               switch (ue->u.irqchip.irqchip) {
+               case KVM_IRQCHIP_PIC_MASTER:
+                       e->set = kvm_set_pic_irq;
+                       break;
+               case KVM_IRQCHIP_PIC_SLAVE:
+                       e->set = kvm_set_pic_irq;
+                       delta = 8;
+                       break;
+               case KVM_IRQCHIP_IOAPIC:
+                               e->set = kvm_set_ioapic_irq;
+                       break;
+               default:
+                       goto out;
+               }
+               e->irqchip.irqchip = ue->u.irqchip.irqchip;
+               e->irqchip.pin = ue->u.irqchip.pin + delta;
+               break;
+       case KVM_IRQ_ROUTING_MSI:
+               e->set = kvm_set_msi;
+               e->msi.address_lo = ue->u.msi.address_lo;
+               e->msi.address_hi = ue->u.msi.address_hi;
+               e->msi.data = ue->u.msi.data;
+               break;
+       default:
+               goto out;
+       }
+       r = 0;
+out:
+       return r;
+}
+
+
+int kvm_set_irq_routing(struct kvm *kvm,
+                       const struct kvm_irq_routing_entry *ue,
+                       unsigned nr,
+                       unsigned flags)
+{
+       struct list_head irq_list = LIST_HEAD_INIT(irq_list);
+       struct list_head tmp = LIST_HEAD_INIT(tmp);
+       struct kvm_kernel_irq_routing_entry *e = NULL;
+       unsigned i;
+       int r;
+
+       for (i = 0; i < nr; ++i) {
+               r = -EINVAL;
+               if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
+                       goto out;
+               if (ue->flags)
+                       goto out;
+               r = -ENOMEM;
+               e = kzalloc(sizeof(*e), GFP_KERNEL);
+               if (!e)
+                       goto out;
+               r = setup_routing_entry(e, ue);
+               if (r)
+                       goto out;
+               ++ue;
+               list_add(&e->link, &irq_list);
+               e = NULL;
+       }
+
+       mutex_lock(&kvm->lock);
+       list_splice(&kvm->irq_routing, &tmp);
+       INIT_LIST_HEAD(&kvm->irq_routing);
+       list_splice(&irq_list, &kvm->irq_routing);
+       INIT_LIST_HEAD(&irq_list);
+       list_splice(&tmp, &irq_list);
+       mutex_unlock(&kvm->lock);
+
+       r = 0;
+
+out:
+       kfree(e);
+       __kvm_free_irq_routing(&irq_list);
+       return r;
+}
+
+#define IOAPIC_ROUTING_ENTRY(irq) \
+       { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,  \
+         .u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC, .u.irqchip.pin = (irq) }
+#define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq)
+
+#ifdef CONFIG_X86
+#  define PIC_ROUTING_ENTRY(irq) \
+       { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,  \
+         .u.irqchip.irqchip = SELECT_PIC(irq), .u.irqchip.pin = (irq) % 8 }
+#  define ROUTING_ENTRY2(irq) \
+       IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq)
+#else
+#  define ROUTING_ENTRY2(irq) \
+       IOAPIC_ROUTING_ENTRY(irq)
+#endif
+
+static const struct kvm_irq_routing_entry default_routing[] = {
+       ROUTING_ENTRY2(0), ROUTING_ENTRY2(1),
+       ROUTING_ENTRY2(2), ROUTING_ENTRY2(3),
+       ROUTING_ENTRY2(4), ROUTING_ENTRY2(5),
+       ROUTING_ENTRY2(6), ROUTING_ENTRY2(7),
+       ROUTING_ENTRY2(8), ROUTING_ENTRY2(9),
+       ROUTING_ENTRY2(10), ROUTING_ENTRY2(11),
+       ROUTING_ENTRY2(12), ROUTING_ENTRY2(13),
+       ROUTING_ENTRY2(14), ROUTING_ENTRY2(15),
+       ROUTING_ENTRY1(16), ROUTING_ENTRY1(17),
+       ROUTING_ENTRY1(18), ROUTING_ENTRY1(19),
+       ROUTING_ENTRY1(20), ROUTING_ENTRY1(21),
+       ROUTING_ENTRY1(22), ROUTING_ENTRY1(23),
+#ifdef CONFIG_IA64
+       ROUTING_ENTRY1(24), ROUTING_ENTRY1(25),
+       ROUTING_ENTRY1(26), ROUTING_ENTRY1(27),
+       ROUTING_ENTRY1(28), ROUTING_ENTRY1(29),
+       ROUTING_ENTRY1(30), ROUTING_ENTRY1(31),
+       ROUTING_ENTRY1(32), ROUTING_ENTRY1(33),
+       ROUTING_ENTRY1(34), ROUTING_ENTRY1(35),
+       ROUTING_ENTRY1(36), ROUTING_ENTRY1(37),
+       ROUTING_ENTRY1(38), ROUTING_ENTRY1(39),
+       ROUTING_ENTRY1(40), ROUTING_ENTRY1(41),
+       ROUTING_ENTRY1(42), ROUTING_ENTRY1(43),
+       ROUTING_ENTRY1(44), ROUTING_ENTRY1(45),
+       ROUTING_ENTRY1(46), ROUTING_ENTRY1(47),
+#endif
+};
+
+int kvm_setup_default_irq_routing(struct kvm *kvm)
+{
+       return kvm_set_irq_routing(kvm, default_routing,
+                                  ARRAY_SIZE(default_routing), 0);
+}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c

index 29a667ce35b0ebc1840f1fcbb6ae3cf3313fa3b8..605697e9c4dd50570b04cba68d80219450055828 100644 (file)
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -47,10 +47,6 @@
  #include <asm/uaccess.h>
  #include <asm/pgtable.h>
  
-#ifdef CONFIG_X86
-#include <asm/msidef.h>
-#endif
-
  #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
  #include "coalesced_mmio.h"
  #endif
@@ -85,57 +81,6 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
  static bool kvm_rebooting;
  
  #ifdef KVM_CAP_DEVICE_ASSIGNMENT
-
-#ifdef CONFIG_X86
-static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev)
-{
-       int vcpu_id;
-       struct kvm_vcpu *vcpu;
-       struct kvm_ioapic *ioapic = ioapic_irqchip(dev->kvm);
-       int dest_id = (dev->guest_msi.address_lo & MSI_ADDR_DEST_ID_MASK)
-                       >> MSI_ADDR_DEST_ID_SHIFT;
-       int vector = (dev->guest_msi.data & MSI_DATA_VECTOR_MASK)
-                       >> MSI_DATA_VECTOR_SHIFT;
-       int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT,
-                               (unsigned long *)&dev->guest_msi.address_lo);
-       int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT,
-                               (unsigned long *)&dev->guest_msi.data);
-       int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT,
-                               (unsigned long *)&dev->guest_msi.data);
-       u32 deliver_bitmask;
-
-       BUG_ON(!ioapic);
-
-       deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic,
-                               dest_id, dest_mode);
-       /* IOAPIC delivery mode value is the same as MSI here */
-       switch (delivery_mode) {
-       case IOAPIC_LOWEST_PRIORITY:
-               vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector,
-                               deliver_bitmask);
-               if (vcpu != NULL)
-                       kvm_apic_set_irq(vcpu, vector, trig_mode);
-               else
-                       printk(KERN_INFO "kvm: null lowest priority vcpu!\n");
-               break;
-       case IOAPIC_FIXED:
-               for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
-                       if (!(deliver_bitmask & (1 << vcpu_id)))
-                               continue;
-                       deliver_bitmask &= ~(1 << vcpu_id);
-                       vcpu = ioapic->kvm->vcpus[vcpu_id];
-                       if (vcpu)
-                               kvm_apic_set_irq(vcpu, vector, trig_mode);
-               }
-               break;
-       default:
-               printk(KERN_INFO "kvm: unsupported MSI delivery mode\n");
-       }
-}
-#else
-static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev) {}
-#endif
-
  static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
                                                       int assigned_dev_id)
  {
@@ -162,13 +107,10 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
          * finer-grained lock, update this
          */
         mutex_lock(&assigned_dev->kvm->lock);
-       if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_INTX)
-               kvm_set_irq(assigned_dev->kvm,
-                           assigned_dev->irq_source_id,
-                           assigned_dev->guest_irq, 1);
-       else if (assigned_dev->irq_requested_type &
-                               KVM_ASSIGNED_DEV_GUEST_MSI) {
-               assigned_device_msi_dispatch(assigned_dev);
+       kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
+                   assigned_dev->guest_irq, 1);
+
+       if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI) {
                 enable_irq(assigned_dev->host_irq);
                 assigned_dev->host_irq_disabled = false;
         }
@@ -331,18 +273,24 @@ static int assigned_device_update_msi(struct kvm *kvm,
  {
         int r;
  
+       adev->guest_irq = airq->guest_irq;
         if (airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) {
                 /* x86 don't care upper address of guest msi message addr */
                 adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_MSI;
                 adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_INTX;
-               adev->guest_msi.address_lo = airq->guest_msi.addr_lo;
-               adev->guest_msi.data = airq->guest_msi.data;
                 adev->ack_notifier.gsi = -1;
         } else if (msi2intx) {
                 adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_INTX;
                 adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI;
-               adev->guest_irq = airq->guest_irq;
                 adev->ack_notifier.gsi = airq->guest_irq;
+       } else {
+               /*
+                * Guest require to disable device MSI, we disable MSI and
+                * re-enable INTx by default again. Notice it's only for
+                * non-msi2intx.
+                */
+               assigned_device_update_intx(kvm, adev, airq);
+               return 0;
         }
  
         if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
@@ -379,6 +327,7 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
  {
         int r = 0;
         struct kvm_assigned_dev_kernel *match;
+       u32 current_flags = 0, changed_flags;
  
         mutex_lock(&kvm->lock);
  
@@ -416,8 +365,13 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
                 }
         }
  
-       if ((!msi2intx &&
-            (assigned_irq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI)) ||
+       if ((match->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) &&
+                (match->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI))
+               current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSI;
+
+       changed_flags = assigned_irq->flags ^ current_flags;
+
+       if ((changed_flags & KVM_DEV_IRQ_ASSIGN_MSI_ACTION) ||
             (msi2intx && match->dev->msi_enabled)) {
  #ifdef CONFIG_X86
                 r = assigned_device_update_msi(kvm, match, assigned_irq);
@@ -563,7 +517,7 @@ static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
                 goto out;
         }
  
-       if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)
+       if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)
                 kvm_deassign_device(kvm, match);
  
         kvm_free_assigned_device(kvm, match);
@@ -581,8 +535,10 @@ static inline int valid_vcpu(int n)
  
  inline int kvm_is_mmio_pfn(pfn_t pfn)
  {
-       if (pfn_valid(pfn))
-               return PageReserved(pfn_to_page(pfn));
+       if (pfn_valid(pfn)) {
+               struct page *page = compound_head(pfn_to_page(pfn));
+               return PageReserved(page);
+       }
  
         return true;
  }
@@ -828,6 +784,10 @@ static struct kvm *kvm_create_vm(void)
  
         if (IS_ERR(kvm))
                 goto out;
+#ifdef CONFIG_HAVE_KVM_IRQCHIP
+       INIT_LIST_HEAD(&kvm->irq_routing);
+       INIT_HLIST_HEAD(&kvm->mask_notifier_list);
+#endif
  
  #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
         page = alloc_page(GFP_KERNEL | __GFP_ZERO);
@@ -909,6 +869,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
         spin_lock(&kvm_lock);
         list_del(&kvm->vm_list);
         spin_unlock(&kvm_lock);
+       kvm_free_irq_routing(kvm);
         kvm_io_bus_destroy(&kvm->pio_bus);
         kvm_io_bus_destroy(&kvm->mmio_bus);
  #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
@@ -1755,13 +1716,13 @@ out_free2:
                 r = 0;
                 break;
         }
-       case KVM_DEBUG_GUEST: {
-               struct kvm_debug_guest dbg;
+       case KVM_SET_GUEST_DEBUG: {
+               struct kvm_guest_debug dbg;
  
                 r = -EFAULT;
                 if (copy_from_user(&dbg, argp, sizeof dbg))
                         goto out;
-               r = kvm_arch_vcpu_ioctl_debug_guest(vcpu, &dbg);
+               r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg);
                 if (r)
                         goto out;
                 r = 0;
@@ -1928,6 +1889,36 @@ static long kvm_vm_ioctl(struct file *filp,
                         goto out;
                 break;
         }
+#endif
+#ifdef KVM_CAP_IRQ_ROUTING
+       case KVM_SET_GSI_ROUTING: {
+               struct kvm_irq_routing routing;
+               struct kvm_irq_routing __user *urouting;
+               struct kvm_irq_routing_entry *entries;
+
+               r = -EFAULT;
+               if (copy_from_user(&routing, argp, sizeof(routing)))
+                       goto out;
+               r = -EINVAL;
+               if (routing.nr >= KVM_MAX_IRQ_ROUTES)
+                       goto out;
+               if (routing.flags)
+                       goto out;
+               r = -ENOMEM;
+               entries = vmalloc(routing.nr * sizeof(*entries));
+               if (!entries)
+                       goto out;
+               r = -EFAULT;
+               urouting = argp;
+               if (copy_from_user(entries, urouting->entries,
+                                  routing.nr * sizeof(*entries)))
+                       goto out_free_irq_routing;
+               r = kvm_set_irq_routing(kvm, entries, routing.nr,
+                                       routing.flags);
+       out_free_irq_routing:
+               vfree(entries);
+               break;
+       }
  #endif
         default:
                 r = kvm_arch_vm_ioctl(filp, ioctl, arg);
@@ -1995,6 +1986,10 @@ static long kvm_dev_ioctl_check_extension_generic(long arg)
         case KVM_CAP_USER_MEMORY:
         case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
                 return 1;
+#ifdef CONFIG_HAVE_KVM_IRQCHIP
+       case KVM_CAP_IRQ_ROUTING:
+               return KVM_MAX_IRQ_ROUTES;
+#endif
         default:
                 break;
         }
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 26 Mar 2009 22:47:52 +0000 (15:47 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 26 Mar 2009 22:47:52 +0000 (15:47 -0700)
arch/ia64/include/asm/kvm.h		patch \| blob \| history
arch/ia64/include/asm/kvm_host.h		patch \| blob \| history
arch/ia64/include/asm/msidef.h	[new file with mode: 0644]	patch \| blob
arch/ia64/kernel/msi_ia64.c		patch \| blob \| history
arch/ia64/kvm/Kconfig		patch \| blob \| history
arch/ia64/kvm/irq.h		patch \| blob \| history
arch/ia64/kvm/kvm-ia64.c		patch \| blob \| history
arch/ia64/kvm/kvm_fw.c		patch \| blob \| history
arch/ia64/kvm/process.c		patch \| blob \| history
arch/ia64/kvm/vcpu.c		patch \| blob \| history
arch/ia64/kvm/vcpu.h		patch \| blob \| history
arch/ia64/kvm/vtlb.c		patch \| blob \| history
arch/powerpc/include/asm/kvm.h		patch \| blob \| history
arch/powerpc/include/asm/kvm_44x.h		patch \| blob \| history
arch/powerpc/include/asm/kvm_asm.h		patch \| blob \| history
arch/powerpc/include/asm/kvm_e500.h	[new file with mode: 0644]	patch \| blob
arch/powerpc/include/asm/kvm_host.h		patch \| blob \| history
arch/powerpc/include/asm/kvm_ppc.h		patch \| blob \| history
arch/powerpc/include/asm/mmu-fsl-booke.h		patch \| blob \| history
arch/powerpc/kernel/asm-offsets.c		patch \| blob \| history
arch/powerpc/kvm/44x.c		patch \| blob \| history
arch/powerpc/kvm/44x_emulate.c		patch \| blob \| history
arch/powerpc/kvm/44x_tlb.c		patch \| blob \| history
arch/powerpc/kvm/44x_tlb.h		patch \| blob \| history
arch/powerpc/kvm/Kconfig		patch \| blob \| history
arch/powerpc/kvm/Makefile		patch \| blob \| history
arch/powerpc/kvm/booke.c		patch \| blob \| history
arch/powerpc/kvm/booke.h		patch \| blob \| history
arch/powerpc/kvm/booke_emulate.c	[new file with mode: 0644]	patch \| blob
arch/powerpc/kvm/booke_interrupts.S		patch \| blob \| history
arch/powerpc/kvm/e500.c	[new file with mode: 0644]	patch \| blob
arch/powerpc/kvm/e500_emulate.c	[new file with mode: 0644]	patch \| blob
arch/powerpc/kvm/e500_tlb.c	[new file with mode: 0644]	patch \| blob
arch/powerpc/kvm/e500_tlb.h	[new file with mode: 0644]	patch \| blob
arch/powerpc/kvm/emulate.c		patch \| blob \| history
arch/powerpc/kvm/powerpc.c		patch \| blob \| history
arch/s390/include/asm/kvm.h		patch \| blob \| history
arch/s390/include/asm/kvm_host.h		patch \| blob \| history
arch/s390/kvm/Kconfig		patch \| blob \| history
arch/s390/kvm/intercept.c		patch \| blob \| history
arch/s390/kvm/interrupt.c		patch \| blob \| history
arch/s390/kvm/kvm-s390.c		patch \| blob \| history
arch/s390/kvm/kvm-s390.h		patch \| blob \| history
arch/s390/kvm/priv.c		patch \| blob \| history
arch/s390/kvm/sigp.c		patch \| blob \| history
arch/x86/include/asm/kvm.h		patch \| blob \| history
arch/x86/include/asm/kvm_host.h		patch \| blob \| history
arch/x86/include/asm/msr-index.h		patch \| blob \| history
arch/x86/include/asm/svm.h		patch \| blob \| history
arch/x86/include/asm/virtext.h		patch \| blob \| history
arch/x86/include/asm/vmx.h		patch \| blob \| history
arch/x86/kvm/Kconfig		patch \| blob \| history
arch/x86/kvm/i8254.c		patch \| blob \| history
arch/x86/kvm/i8254.h		patch \| blob \| history
arch/x86/kvm/i8259.c		patch \| blob \| history
arch/x86/kvm/irq.h		patch \| blob \| history
arch/x86/kvm/kvm_svm.h		patch \| blob \| history
arch/x86/kvm/mmu.c		patch \| blob \| history
arch/x86/kvm/mmu.h		patch \| blob \| history
arch/x86/kvm/paging_tmpl.h		patch \| blob \| history
arch/x86/kvm/svm.c		patch \| blob \| history
arch/x86/kvm/vmx.c		patch \| blob \| history
arch/x86/kvm/x86.c		patch \| blob \| history
arch/x86/kvm/x86_emulate.c		patch \| blob \| history
include/linux/kvm.h		patch \| blob \| history
include/linux/kvm_host.h		patch \| blob \| history
include/linux/kvm_types.h		patch \| blob \| history
virt/kvm/ioapic.c		patch \| blob \| history
virt/kvm/ioapic.h		patch \| blob \| history
virt/kvm/irq_comm.c		patch \| blob \| history
virt/kvm/kvm_main.c		patch \| blob \| history