Current kvm disables preemption while the new virtualization registers are
in use. This of course is not very good for latency sensitive workloads (one
use of virtualization is to offload user interface and other latency
insensitive stuff to a container, so that it is easier to analyze the
remaining workload). This patch re-enables preemption for kvm; preemption
is now only disabled when switching the registers in and out, and during
the switch to guest mode and back.
Contains fixes from Shaohua Li <shaohua.li@intel.com>.
Signed-off-by: Avi Kivity <avi@qumranet.com>
config KVM
tristate "Kernel-based Virtual Machine (KVM) support"
depends on X86 && EXPERIMENTAL
config KVM
tristate "Kernel-based Virtual Machine (KVM) support"
depends on X86 && EXPERIMENTAL
+ select PREEMPT_NOTIFIERS
select ANON_INODES
---help---
Support hosting fully virtualized guest machines using hardware
select ANON_INODES
---help---
Support hosting fully virtualized guest machines using hardware
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/mm.h>
+#include <linux/preempt.h>
#include <asm/signal.h>
#include <linux/kvm.h>
#include <asm/signal.h>
#include <linux/kvm.h>
struct kvm_vcpu {
struct kvm *kvm;
struct kvm_vcpu {
struct kvm *kvm;
+ struct preempt_notifier preempt_notifier;
int vcpu_id;
struct mutex mutex;
int cpu;
int vcpu_id;
struct mutex mutex;
int cpu;
struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
void (*vcpu_free)(struct kvm_vcpu *vcpu);
struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
void (*vcpu_free)(struct kvm_vcpu *vcpu);
- void (*vcpu_load)(struct kvm_vcpu *vcpu);
+ void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
void (*vcpu_put)(struct kvm_vcpu *vcpu);
void (*vcpu_decache)(struct kvm_vcpu *vcpu);
void (*vcpu_put)(struct kvm_vcpu *vcpu);
void (*vcpu_decache)(struct kvm_vcpu *vcpu);
struct kvm_arch_ops *kvm_arch_ops;
struct kvm_arch_ops *kvm_arch_ops;
+static __read_mostly struct preempt_ops kvm_preempt_ops;
+
#define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x)
static struct kvm_stats_debugfs_item {
#define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x)
static struct kvm_stats_debugfs_item {
*/
static void vcpu_load(struct kvm_vcpu *vcpu)
{
*/
static void vcpu_load(struct kvm_vcpu *vcpu)
{
mutex_lock(&vcpu->mutex);
mutex_lock(&vcpu->mutex);
- kvm_arch_ops->vcpu_load(vcpu);
+ cpu = get_cpu();
+ preempt_notifier_register(&vcpu->preempt_notifier);
+ kvm_arch_ops->vcpu_load(vcpu, cpu);
+ put_cpu();
}
static void vcpu_put(struct kvm_vcpu *vcpu)
{
}
static void vcpu_put(struct kvm_vcpu *vcpu)
{
kvm_arch_ops->vcpu_put(vcpu);
kvm_arch_ops->vcpu_put(vcpu);
+ preempt_notifier_unregister(&vcpu->preempt_notifier);
+ preempt_enable();
mutex_unlock(&vcpu->mutex);
}
mutex_unlock(&vcpu->mutex);
}
{
if (!need_resched())
return;
{
if (!need_resched())
return;
}
EXPORT_SYMBOL_GPL(kvm_resched);
}
EXPORT_SYMBOL_GPL(kvm_resched);
unsigned bytes;
int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1;
unsigned bytes;
int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1;
- kvm_arch_ops->vcpu_put(vcpu);
q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE,
PAGE_KERNEL);
if (!q) {
q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE,
PAGE_KERNEL);
if (!q) {
- kvm_arch_ops->vcpu_load(vcpu);
free_pio_guest_pages(vcpu);
return -ENOMEM;
}
free_pio_guest_pages(vcpu);
return -ENOMEM;
}
memcpy(p, q, bytes);
q -= vcpu->pio.guest_page_offset;
vunmap(q);
memcpy(p, q, bytes);
q -= vcpu->pio.guest_page_offset;
vunmap(q);
- kvm_arch_ops->vcpu_load(vcpu);
free_pio_guest_pages(vcpu);
return 0;
}
free_pio_guest_pages(vcpu);
return 0;
}
if (IS_ERR(vcpu))
return PTR_ERR(vcpu);
if (IS_ERR(vcpu))
return PTR_ERR(vcpu);
+ preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
+
vcpu_load(vcpu);
r = kvm_mmu_setup(vcpu);
vcpu_put(vcpu);
vcpu_load(vcpu);
r = kvm_mmu_setup(vcpu);
vcpu_put(vcpu);
+static inline
+struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
+{
+ return container_of(pn, struct kvm_vcpu, preempt_notifier);
+}
+
+static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
+{
+ struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
+
+ kvm_arch_ops->vcpu_load(vcpu, cpu);
+}
+
+static void kvm_sched_out(struct preempt_notifier *pn,
+ struct task_struct *next)
+{
+ struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
+
+ kvm_arch_ops->vcpu_put(vcpu);
+}
+
int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module)
{
int r;
int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module)
{
int r;
+ kvm_preempt_ops.sched_in = kvm_sched_in;
+ kvm_preempt_ops.sched_out = kvm_sched_out;
+
kvm_mmu_free_some_pages(vcpu);
if (r < 0) {
spin_unlock(&vcpu->kvm->lock);
kvm_mmu_free_some_pages(vcpu);
if (r < 0) {
spin_unlock(&vcpu->kvm->lock);
- kvm_arch_ops->vcpu_put(vcpu);
r = __mmu_topup_memory_caches(vcpu, GFP_KERNEL);
r = __mmu_topup_memory_caches(vcpu, GFP_KERNEL);
- kvm_arch_ops->vcpu_load(vcpu);
spin_lock(&vcpu->kvm->lock);
kvm_mmu_free_some_pages(vcpu);
}
spin_lock(&vcpu->kvm->lock);
kvm_mmu_free_some_pages(vcpu);
}
-static void svm_vcpu_load(struct kvm_vcpu *vcpu)
+static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
{
struct vcpu_svm *svm = to_svm(vcpu);
if (unlikely(cpu != vcpu->cpu)) {
u64 tsc_this, delta;
if (unlikely(cpu != vcpu->cpu)) {
u64 tsc_this, delta;
wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
rdtscll(vcpu->host_tsc);
wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
rdtscll(vcpu->host_tsc);
}
static void svm_vcpu_decache(struct kvm_vcpu *vcpu)
}
static void svm_vcpu_decache(struct kvm_vcpu *vcpu)
static void vmx_load_host_state(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
static void vmx_load_host_state(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
if (!vmx->host_state.loaded)
return;
if (!vmx->host_state.loaded)
return;
* If we have to reload gs, we must take care to
* preserve our gs base.
*/
* If we have to reload gs, we must take care to
* preserve our gs base.
*/
load_gs(vmx->host_state.gs_sel);
#ifdef CONFIG_X86_64
wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE));
#endif
load_gs(vmx->host_state.gs_sel);
#ifdef CONFIG_X86_64
wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE));
#endif
+ local_irq_restore(flags);
* Switches to specified vcpu, until a matching vcpu_put(), but assumes
* vcpu mutex is already taken.
*/
* Switches to specified vcpu, until a matching vcpu_put(), but assumes
* vcpu mutex is already taken.
*/
-static void vmx_vcpu_load(struct kvm_vcpu *vcpu)
+static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u64 phys_addr = __pa(vmx->vmcs);
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u64 phys_addr = __pa(vmx->vmcs);
if (vcpu->cpu != cpu)
vcpu_clear(vcpu);
if (vcpu->cpu != cpu)
vcpu_clear(vcpu);
{
vmx_load_host_state(vcpu);
kvm_put_guest_fpu(vcpu);
{
vmx_load_host_state(vcpu);
kvm_put_guest_fpu(vcpu);
}
static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
}
static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
if (unlikely(r))
goto out;
if (unlikely(r))
goto out;
if (!vcpu->mmio_read_completed)
do_interrupt_requests(vcpu, kvm_run);
if (!vcpu->mmio_read_completed)
do_interrupt_requests(vcpu, kvm_run);
vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0;
asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0;
asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
+ vmx->launched = 1;
+
+ preempt_enable();
if (unlikely(fail)) {
kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
if (unlikely(fail)) {
kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
if (unlikely(prof_on == KVM_PROFILING))
profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP));
if (unlikely(prof_on == KVM_PROFILING))
profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP));
r = kvm_handle_exit(kvm_run, vcpu);
if (r > 0) {
/* Give scheduler a change to reschedule. */
r = kvm_handle_exit(kvm_run, vcpu);
if (r > 0) {
/* Give scheduler a change to reschedule. */
{
int err;
struct vcpu_vmx *vmx = kzalloc(sizeof(*vmx), GFP_KERNEL);
{
int err;
struct vcpu_vmx *vmx = kzalloc(sizeof(*vmx), GFP_KERNEL);
if (!vmx)
return ERR_PTR(-ENOMEM);
if (!vmx)
return ERR_PTR(-ENOMEM);
- vmx_vcpu_load(&vmx->vcpu);
+ cpu = get_cpu();
+ vmx_vcpu_load(&vmx->vcpu, cpu);
err = vmx_vcpu_setup(&vmx->vcpu);
vmx_vcpu_put(&vmx->vcpu);
err = vmx_vcpu_setup(&vmx->vcpu);
vmx_vcpu_put(&vmx->vcpu);