2 * s390host.c -- hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
15 #include <linux/compiler.h>
16 #include <linux/err.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/module.h>
22 #include <linux/slab.h>
23 #include <linux/timer.h>
24 #include <asm/lowcore.h>
25 #include <asm/pgtable.h>
30 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
32 struct kvm_stats_debugfs_item debugfs_entries[] = {
33 { "userspace_handled", VCPU_STAT(exit_userspace) },
34 { "exit_validity", VCPU_STAT(exit_validity) },
35 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
36 { "exit_external_request", VCPU_STAT(exit_external_request) },
37 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
38 { "exit_instruction", VCPU_STAT(exit_instruction) },
39 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
40 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
41 { "instruction_lctg", VCPU_STAT(instruction_lctg) },
42 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
43 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
44 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
45 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
46 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
47 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
48 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
49 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
50 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
51 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
52 { "instruction_spx", VCPU_STAT(instruction_spx) },
53 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
54 { "instruction_stap", VCPU_STAT(instruction_stap) },
55 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
56 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
57 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
58 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
59 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
60 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
61 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
62 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
63 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
64 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
65 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
70 /* Section: not file related */
71 void kvm_arch_hardware_enable(void *garbage)
73 /* every s390 is virtualization enabled ;-) */
76 void kvm_arch_hardware_disable(void *garbage)
80 void decache_vcpus_on_cpu(int cpu)
84 int kvm_arch_hardware_setup(void)
89 void kvm_arch_hardware_unsetup(void)
93 void kvm_arch_check_processor_compat(void *rtn)
97 int kvm_arch_init(void *opaque)
102 void kvm_arch_exit(void)
106 /* Section: device related */
107 long kvm_arch_dev_ioctl(struct file *filp,
108 unsigned int ioctl, unsigned long arg)
110 if (ioctl == KVM_S390_ENABLE_SIE)
111 return s390_enable_sie();
115 int kvm_dev_ioctl_check_extension(long ext)
120 /* Section: vm related */
122 * Get (and clear) the dirty memory log for a memory slot.
124 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
125 struct kvm_dirty_log *log)
130 long kvm_arch_vm_ioctl(struct file *filp,
131 unsigned int ioctl, unsigned long arg)
133 struct kvm *kvm = filp->private_data;
134 void __user *argp = (void __user *)arg;
138 case KVM_S390_INTERRUPT: {
139 struct kvm_s390_interrupt s390int;
142 if (copy_from_user(&s390int, argp, sizeof(s390int)))
144 r = kvm_s390_inject_vm(kvm, &s390int);
154 struct kvm *kvm_arch_create_vm(void)
160 rc = s390_enable_sie();
165 kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
169 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
173 sprintf(debug_name, "kvm-%u", current->pid);
175 kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
179 spin_lock_init(&kvm->arch.float_int.lock);
180 INIT_LIST_HEAD(&kvm->arch.float_int.list);
182 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
183 VM_EVENT(kvm, 3, "%s", "vm created");
185 try_module_get(THIS_MODULE);
189 free_page((unsigned long)(kvm->arch.sca));
196 void kvm_arch_destroy_vm(struct kvm *kvm)
198 debug_unregister(kvm->arch.dbf);
199 free_page((unsigned long)(kvm->arch.sca));
201 module_put(THIS_MODULE);
204 /* Section: vcpu related */
205 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
210 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
212 /* kvm common code refers to this, but does'nt call it */
216 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
218 save_fp_regs(&vcpu->arch.host_fpregs);
219 save_access_regs(vcpu->arch.host_acrs);
220 vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK;
221 restore_fp_regs(&vcpu->arch.guest_fpregs);
222 restore_access_regs(vcpu->arch.guest_acrs);
224 if (signal_pending(current))
225 atomic_set_mask(CPUSTAT_STOP_INT,
226 &vcpu->arch.sie_block->cpuflags);
229 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
231 save_fp_regs(&vcpu->arch.guest_fpregs);
232 save_access_regs(vcpu->arch.guest_acrs);
233 restore_fp_regs(&vcpu->arch.host_fpregs);
234 restore_access_regs(vcpu->arch.host_acrs);
237 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
239 /* this equals initial cpu reset in pop, but we don't switch to ESA */
240 vcpu->arch.sie_block->gpsw.mask = 0UL;
241 vcpu->arch.sie_block->gpsw.addr = 0UL;
242 vcpu->arch.sie_block->prefix = 0UL;
243 vcpu->arch.sie_block->ihcpu = 0xffff;
244 vcpu->arch.sie_block->cputm = 0UL;
245 vcpu->arch.sie_block->ckc = 0UL;
246 vcpu->arch.sie_block->todpr = 0;
247 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
248 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
249 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
250 vcpu->arch.guest_fpregs.fpc = 0;
251 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
252 vcpu->arch.sie_block->gbea = 1;
255 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
257 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH);
258 vcpu->arch.sie_block->gmslm = 0xffffffffffUL;
259 vcpu->arch.sie_block->gmsor = 0x000000000000;
260 vcpu->arch.sie_block->ecb = 2;
261 vcpu->arch.sie_block->eca = 0xC1002001U;
262 setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup,
263 (unsigned long) vcpu);
264 get_cpu_id(&vcpu->arch.cpu_id);
265 vcpu->arch.cpu_id.version = 0xfe;
269 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
272 struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
278 vcpu->arch.sie_block = (struct sie_block *) get_zeroed_page(GFP_KERNEL);
280 if (!vcpu->arch.sie_block)
283 vcpu->arch.sie_block->icpua = id;
284 BUG_ON(!kvm->arch.sca);
285 BUG_ON(kvm->arch.sca->cpu[id].sda);
286 kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
287 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
288 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
290 spin_lock_init(&vcpu->arch.local_int.lock);
291 INIT_LIST_HEAD(&vcpu->arch.local_int.list);
292 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
293 spin_lock_bh(&kvm->arch.float_int.lock);
294 kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
295 init_waitqueue_head(&vcpu->arch.local_int.wq);
296 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
297 spin_unlock_bh(&kvm->arch.float_int.lock);
299 rc = kvm_vcpu_init(vcpu, kvm, id);
302 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
303 vcpu->arch.sie_block);
305 try_module_get(THIS_MODULE);
314 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
316 VCPU_EVENT(vcpu, 3, "%s", "destroy cpu");
317 free_page((unsigned long)(vcpu->arch.sie_block));
319 module_put(THIS_MODULE);
322 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
324 /* kvm common code refers to this, but never calls it */
329 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
332 kvm_s390_vcpu_initial_reset(vcpu);
337 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
340 memcpy(&vcpu->arch.guest_gprs, ®s->gprs, sizeof(regs->gprs));
345 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
348 memcpy(®s->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs));
353 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
354 struct kvm_sregs *sregs)
357 memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs));
358 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
363 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
364 struct kvm_sregs *sregs)
367 memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs));
368 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
373 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
376 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
377 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
382 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
385 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
386 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
391 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
396 if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING)
399 vcpu->arch.sie_block->gpsw = psw;
404 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
405 struct kvm_translation *tr)
407 return -EINVAL; /* not implemented yet */
410 int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
411 struct kvm_debug_guest *dbg)
413 return -EINVAL; /* not implemented yet */
416 static void __vcpu_run(struct kvm_vcpu *vcpu)
418 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16);
423 vcpu->arch.sie_block->icptcode = 0;
427 VCPU_EVENT(vcpu, 6, "entering sie flags %x",
428 atomic_read(&vcpu->arch.sie_block->cpuflags));
429 sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs);
430 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
431 vcpu->arch.sie_block->icptcode);
436 memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16);
439 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
446 if (vcpu->sigset_active)
447 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
449 atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
451 BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
453 switch (kvm_run->exit_reason) {
454 case KVM_EXIT_S390_SIEIC:
455 vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask;
456 vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr;
458 case KVM_EXIT_UNKNOWN:
459 case KVM_EXIT_S390_RESET:
468 kvm_s390_deliver_pending_interrupts(vcpu);
470 rc = kvm_handle_sie_intercept(vcpu);
471 } while (!signal_pending(current) && !rc);
473 if (signal_pending(current) && !rc)
476 if (rc == -ENOTSUPP) {
477 /* intercept cannot be handled in-kernel, prepare kvm-run */
478 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
479 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
480 kvm_run->s390_sieic.mask = vcpu->arch.sie_block->gpsw.mask;
481 kvm_run->s390_sieic.addr = vcpu->arch.sie_block->gpsw.addr;
482 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
483 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
487 if (rc == -EREMOTE) {
488 /* intercept was handled, but userspace support is needed
489 * kvm_run has been prepared by the handler */
493 if (vcpu->sigset_active)
494 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
498 vcpu->stat.exit_userspace++;
502 static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, const void *from,
503 unsigned long n, int prefix)
506 return copy_to_guest(vcpu, guestdest, from, n);
508 return copy_to_guest_absolute(vcpu, guestdest, from, n);
512 * store status at address
513 * we use have two special cases:
514 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
515 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
517 int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
519 const unsigned char archmode = 1;
522 if (addr == KVM_S390_STORE_STATUS_NOADDR) {
523 if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
525 addr = SAVE_AREA_BASE;
527 } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
528 if (copy_to_guest(vcpu, 163ul, &archmode, 1))
530 addr = SAVE_AREA_BASE;
535 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, fp_regs),
536 vcpu->arch.guest_fpregs.fprs, 128, prefix))
539 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, gp_regs),
540 vcpu->arch.guest_gprs, 128, prefix))
543 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, psw),
544 &vcpu->arch.sie_block->gpsw, 16, prefix))
547 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, pref_reg),
548 &vcpu->arch.sie_block->prefix, 4, prefix))
551 if (__guestcopy(vcpu,
552 addr + offsetof(struct save_area_s390x, fp_ctrl_reg),
553 &vcpu->arch.guest_fpregs.fpc, 4, prefix))
556 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, tod_reg),
557 &vcpu->arch.sie_block->todpr, 4, prefix))
560 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, timer),
561 &vcpu->arch.sie_block->cputm, 8, prefix))
564 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, clk_cmp),
565 &vcpu->arch.sie_block->ckc, 8, prefix))
568 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, acc_regs),
569 &vcpu->arch.guest_acrs, 64, prefix))
572 if (__guestcopy(vcpu,
573 addr + offsetof(struct save_area_s390x, ctrl_regs),
574 &vcpu->arch.sie_block->gcr, 128, prefix))
579 static int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
584 rc = __kvm_s390_vcpu_store_status(vcpu, addr);
589 long kvm_arch_vcpu_ioctl(struct file *filp,
590 unsigned int ioctl, unsigned long arg)
592 struct kvm_vcpu *vcpu = filp->private_data;
593 void __user *argp = (void __user *)arg;
596 case KVM_S390_INTERRUPT: {
597 struct kvm_s390_interrupt s390int;
599 if (copy_from_user(&s390int, argp, sizeof(s390int)))
601 return kvm_s390_inject_vcpu(vcpu, &s390int);
603 case KVM_S390_STORE_STATUS:
604 return kvm_s390_vcpu_store_status(vcpu, arg);
605 case KVM_S390_SET_INITIAL_PSW: {
608 if (copy_from_user(&psw, argp, sizeof(psw)))
610 return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
612 case KVM_S390_INITIAL_RESET:
613 return kvm_arch_vcpu_ioctl_initial_reset(vcpu);
620 /* Section: memory related */
621 int kvm_arch_set_memory_region(struct kvm *kvm,
622 struct kvm_userspace_memory_region *mem,
623 struct kvm_memory_slot old,
626 /* A few sanity checks. We can have exactly one memory slot which has
627 to start at guest virtual zero and which has to be located at a
628 page boundary in userland and which has to end at a page boundary.
629 The memory in userland is ok to be fragmented into various different
630 vmas. It is okay to mmap() and munmap() stuff in this slot after
631 doing this call at any time */
636 if (mem->guest_phys_addr)
639 if (mem->userspace_addr & (PAGE_SIZE - 1))
642 if (mem->memory_size & (PAGE_SIZE - 1))
645 kvm->arch.guest_origin = mem->userspace_addr;
646 kvm->arch.guest_memsize = mem->memory_size;
648 /* FIXME: we do want to interrupt running CPUs and update their memory
649 configuration now to avoid race conditions. But hey, changing the
650 memory layout while virtual CPUs are running is usually bad
651 programming practice. */
656 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
661 static int __init kvm_s390_init(void)
663 return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
666 static void __exit kvm_s390_exit(void)
671 module_init(kvm_s390_init);
672 module_exit(kvm_s390_exit);