2 * s390host.c -- hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
15 #include <linux/compiler.h>
16 #include <linux/err.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/module.h>
22 #include <linux/slab.h>
23 #include <linux/timer.h>
24 #include <asm/lowcore.h>
25 #include <asm/pgtable.h>
30 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
32 struct kvm_stats_debugfs_item debugfs_entries[] = {
33 { "userspace_handled", VCPU_STAT(exit_userspace) },
34 { "exit_null", VCPU_STAT(exit_null) },
35 { "exit_validity", VCPU_STAT(exit_validity) },
36 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
37 { "exit_external_request", VCPU_STAT(exit_external_request) },
38 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
39 { "exit_instruction", VCPU_STAT(exit_instruction) },
40 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
41 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
42 { "instruction_lctg", VCPU_STAT(instruction_lctg) },
43 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
44 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
45 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
46 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
47 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
48 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
49 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
50 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
51 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
52 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
53 { "instruction_spx", VCPU_STAT(instruction_spx) },
54 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
55 { "instruction_stap", VCPU_STAT(instruction_stap) },
56 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
57 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
58 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
59 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
60 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
61 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
62 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
63 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
64 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
65 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
66 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
67 { "diagnose_44", VCPU_STAT(diagnose_44) },
72 /* Section: not file related */
73 void kvm_arch_hardware_enable(void *garbage)
75 /* every s390 is virtualization enabled ;-) */
78 void kvm_arch_hardware_disable(void *garbage)
82 int kvm_arch_hardware_setup(void)
87 void kvm_arch_hardware_unsetup(void)
91 void kvm_arch_check_processor_compat(void *rtn)
95 int kvm_arch_init(void *opaque)
100 void kvm_arch_exit(void)
104 /* Section: device related */
105 long kvm_arch_dev_ioctl(struct file *filp,
106 unsigned int ioctl, unsigned long arg)
108 if (ioctl == KVM_S390_ENABLE_SIE)
109 return s390_enable_sie();
113 int kvm_dev_ioctl_check_extension(long ext)
118 /* Section: vm related */
120 * Get (and clear) the dirty memory log for a memory slot.
122 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
123 struct kvm_dirty_log *log)
128 long kvm_arch_vm_ioctl(struct file *filp,
129 unsigned int ioctl, unsigned long arg)
131 struct kvm *kvm = filp->private_data;
132 void __user *argp = (void __user *)arg;
136 case KVM_S390_INTERRUPT: {
137 struct kvm_s390_interrupt s390int;
140 if (copy_from_user(&s390int, argp, sizeof(s390int)))
142 r = kvm_s390_inject_vm(kvm, &s390int);
152 struct kvm *kvm_arch_create_vm(void)
158 rc = s390_enable_sie();
163 kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
167 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
171 sprintf(debug_name, "kvm-%u", current->pid);
173 kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
177 spin_lock_init(&kvm->arch.float_int.lock);
178 INIT_LIST_HEAD(&kvm->arch.float_int.list);
180 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
181 VM_EVENT(kvm, 3, "%s", "vm created");
183 try_module_get(THIS_MODULE);
187 free_page((unsigned long)(kvm->arch.sca));
194 void kvm_arch_destroy_vm(struct kvm *kvm)
196 debug_unregister(kvm->arch.dbf);
197 free_page((unsigned long)(kvm->arch.sca));
199 module_put(THIS_MODULE);
202 /* Section: vcpu related */
203 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
208 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
210 /* kvm common code refers to this, but does'nt call it */
214 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
216 save_fp_regs(&vcpu->arch.host_fpregs);
217 save_access_regs(vcpu->arch.host_acrs);
218 vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK;
219 restore_fp_regs(&vcpu->arch.guest_fpregs);
220 restore_access_regs(vcpu->arch.guest_acrs);
223 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
225 save_fp_regs(&vcpu->arch.guest_fpregs);
226 save_access_regs(vcpu->arch.guest_acrs);
227 restore_fp_regs(&vcpu->arch.host_fpregs);
228 restore_access_regs(vcpu->arch.host_acrs);
231 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
233 /* this equals initial cpu reset in pop, but we don't switch to ESA */
234 vcpu->arch.sie_block->gpsw.mask = 0UL;
235 vcpu->arch.sie_block->gpsw.addr = 0UL;
236 vcpu->arch.sie_block->prefix = 0UL;
237 vcpu->arch.sie_block->ihcpu = 0xffff;
238 vcpu->arch.sie_block->cputm = 0UL;
239 vcpu->arch.sie_block->ckc = 0UL;
240 vcpu->arch.sie_block->todpr = 0;
241 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
242 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
243 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
244 vcpu->arch.guest_fpregs.fpc = 0;
245 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
246 vcpu->arch.sie_block->gbea = 1;
249 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
251 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH);
252 vcpu->arch.sie_block->gmslm = 0xffffffffffUL;
253 vcpu->arch.sie_block->gmsor = 0x000000000000;
254 vcpu->arch.sie_block->ecb = 2;
255 vcpu->arch.sie_block->eca = 0xC1002001U;
256 setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup,
257 (unsigned long) vcpu);
258 get_cpu_id(&vcpu->arch.cpu_id);
259 vcpu->arch.cpu_id.version = 0xfe;
263 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
266 struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
272 vcpu->arch.sie_block = (struct sie_block *) get_zeroed_page(GFP_KERNEL);
274 if (!vcpu->arch.sie_block)
277 vcpu->arch.sie_block->icpua = id;
278 BUG_ON(!kvm->arch.sca);
279 BUG_ON(kvm->arch.sca->cpu[id].sda);
280 kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
281 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
282 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
284 spin_lock_init(&vcpu->arch.local_int.lock);
285 INIT_LIST_HEAD(&vcpu->arch.local_int.list);
286 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
287 spin_lock_bh(&kvm->arch.float_int.lock);
288 kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
289 init_waitqueue_head(&vcpu->arch.local_int.wq);
290 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
291 spin_unlock_bh(&kvm->arch.float_int.lock);
293 rc = kvm_vcpu_init(vcpu, kvm, id);
296 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
297 vcpu->arch.sie_block);
299 try_module_get(THIS_MODULE);
308 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
310 VCPU_EVENT(vcpu, 3, "%s", "destroy cpu");
311 free_page((unsigned long)(vcpu->arch.sie_block));
313 module_put(THIS_MODULE);
316 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
318 /* kvm common code refers to this, but never calls it */
323 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
326 kvm_s390_vcpu_initial_reset(vcpu);
331 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
334 memcpy(&vcpu->arch.guest_gprs, ®s->gprs, sizeof(regs->gprs));
339 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
342 memcpy(®s->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs));
347 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
348 struct kvm_sregs *sregs)
351 memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs));
352 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
357 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
358 struct kvm_sregs *sregs)
361 memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs));
362 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
367 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
370 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
371 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
376 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
379 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
380 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
385 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
390 if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING)
393 vcpu->arch.sie_block->gpsw = psw;
398 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
399 struct kvm_translation *tr)
401 return -EINVAL; /* not implemented yet */
404 int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
405 struct kvm_debug_guest *dbg)
407 return -EINVAL; /* not implemented yet */
410 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
411 struct kvm_mp_state *mp_state)
413 return -EINVAL; /* not implemented yet */
416 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
417 struct kvm_mp_state *mp_state)
419 return -EINVAL; /* not implemented yet */
422 extern void s390_handle_mcck(void);
424 static void __vcpu_run(struct kvm_vcpu *vcpu)
426 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16);
431 if (test_thread_flag(TIF_MCCK_PENDING))
434 kvm_s390_deliver_pending_interrupts(vcpu);
436 vcpu->arch.sie_block->icptcode = 0;
440 VCPU_EVENT(vcpu, 6, "entering sie flags %x",
441 atomic_read(&vcpu->arch.sie_block->cpuflags));
442 if (sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs)) {
443 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
444 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
446 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
447 vcpu->arch.sie_block->icptcode);
452 memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16);
455 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
462 if (vcpu->sigset_active)
463 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
465 atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
467 BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
469 switch (kvm_run->exit_reason) {
470 case KVM_EXIT_S390_SIEIC:
471 vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask;
472 vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr;
474 case KVM_EXIT_UNKNOWN:
475 case KVM_EXIT_S390_RESET:
485 rc = kvm_handle_sie_intercept(vcpu);
486 } while (!signal_pending(current) && !rc);
488 if (signal_pending(current) && !rc)
491 if (rc == -ENOTSUPP) {
492 /* intercept cannot be handled in-kernel, prepare kvm-run */
493 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
494 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
495 kvm_run->s390_sieic.mask = vcpu->arch.sie_block->gpsw.mask;
496 kvm_run->s390_sieic.addr = vcpu->arch.sie_block->gpsw.addr;
497 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
498 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
502 if (rc == -EREMOTE) {
503 /* intercept was handled, but userspace support is needed
504 * kvm_run has been prepared by the handler */
508 if (vcpu->sigset_active)
509 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
513 vcpu->stat.exit_userspace++;
517 static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, const void *from,
518 unsigned long n, int prefix)
521 return copy_to_guest(vcpu, guestdest, from, n);
523 return copy_to_guest_absolute(vcpu, guestdest, from, n);
527 * store status at address
528 * we use have two special cases:
529 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
530 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
532 int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
534 const unsigned char archmode = 1;
537 if (addr == KVM_S390_STORE_STATUS_NOADDR) {
538 if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
540 addr = SAVE_AREA_BASE;
542 } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
543 if (copy_to_guest(vcpu, 163ul, &archmode, 1))
545 addr = SAVE_AREA_BASE;
550 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, fp_regs),
551 vcpu->arch.guest_fpregs.fprs, 128, prefix))
554 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, gp_regs),
555 vcpu->arch.guest_gprs, 128, prefix))
558 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, psw),
559 &vcpu->arch.sie_block->gpsw, 16, prefix))
562 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, pref_reg),
563 &vcpu->arch.sie_block->prefix, 4, prefix))
566 if (__guestcopy(vcpu,
567 addr + offsetof(struct save_area_s390x, fp_ctrl_reg),
568 &vcpu->arch.guest_fpregs.fpc, 4, prefix))
571 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, tod_reg),
572 &vcpu->arch.sie_block->todpr, 4, prefix))
575 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, timer),
576 &vcpu->arch.sie_block->cputm, 8, prefix))
579 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, clk_cmp),
580 &vcpu->arch.sie_block->ckc, 8, prefix))
583 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, acc_regs),
584 &vcpu->arch.guest_acrs, 64, prefix))
587 if (__guestcopy(vcpu,
588 addr + offsetof(struct save_area_s390x, ctrl_regs),
589 &vcpu->arch.sie_block->gcr, 128, prefix))
594 static int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
599 rc = __kvm_s390_vcpu_store_status(vcpu, addr);
604 long kvm_arch_vcpu_ioctl(struct file *filp,
605 unsigned int ioctl, unsigned long arg)
607 struct kvm_vcpu *vcpu = filp->private_data;
608 void __user *argp = (void __user *)arg;
611 case KVM_S390_INTERRUPT: {
612 struct kvm_s390_interrupt s390int;
614 if (copy_from_user(&s390int, argp, sizeof(s390int)))
616 return kvm_s390_inject_vcpu(vcpu, &s390int);
618 case KVM_S390_STORE_STATUS:
619 return kvm_s390_vcpu_store_status(vcpu, arg);
620 case KVM_S390_SET_INITIAL_PSW: {
623 if (copy_from_user(&psw, argp, sizeof(psw)))
625 return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
627 case KVM_S390_INITIAL_RESET:
628 return kvm_arch_vcpu_ioctl_initial_reset(vcpu);
635 /* Section: memory related */
636 int kvm_arch_set_memory_region(struct kvm *kvm,
637 struct kvm_userspace_memory_region *mem,
638 struct kvm_memory_slot old,
641 /* A few sanity checks. We can have exactly one memory slot which has
642 to start at guest virtual zero and which has to be located at a
643 page boundary in userland and which has to end at a page boundary.
644 The memory in userland is ok to be fragmented into various different
645 vmas. It is okay to mmap() and munmap() stuff in this slot after
646 doing this call at any time */
651 if (mem->guest_phys_addr)
654 if (mem->userspace_addr & (PAGE_SIZE - 1))
657 if (mem->memory_size & (PAGE_SIZE - 1))
660 kvm->arch.guest_origin = mem->userspace_addr;
661 kvm->arch.guest_memsize = mem->memory_size;
663 /* FIXME: we do want to interrupt running CPUs and update their memory
664 configuration now to avoid race conditions. But hey, changing the
665 memory layout while virtual CPUs are running is usually bad
666 programming practice. */
671 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
676 static int __init kvm_s390_init(void)
678 return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
681 static void __exit kvm_s390_exit(void)
686 module_init(kvm_s390_init);
687 module_exit(kvm_s390_exit);