2 * s390host.c -- hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
15 #include <linux/compiler.h>
16 #include <linux/err.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/module.h>
22 #include <linux/slab.h>
23 #include <linux/timer.h>
24 #include <asm/lowcore.h>
25 #include <asm/pgtable.h>
30 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
32 struct kvm_stats_debugfs_item debugfs_entries[] = {
33 { "userspace_handled", VCPU_STAT(exit_userspace) },
34 { "exit_null", VCPU_STAT(exit_null) },
35 { "exit_validity", VCPU_STAT(exit_validity) },
36 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
37 { "exit_external_request", VCPU_STAT(exit_external_request) },
38 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
39 { "exit_instruction", VCPU_STAT(exit_instruction) },
40 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
41 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
42 { "instruction_lctg", VCPU_STAT(instruction_lctg) },
43 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
44 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
45 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
46 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
47 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
48 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
49 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
50 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
51 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
52 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
53 { "instruction_spx", VCPU_STAT(instruction_spx) },
54 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
55 { "instruction_stap", VCPU_STAT(instruction_stap) },
56 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
57 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
58 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
59 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
60 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
61 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
62 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
63 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
64 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
65 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
66 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
67 { "diagnose_44", VCPU_STAT(diagnose_44) },
72 /* Section: not file related */
73 void kvm_arch_hardware_enable(void *garbage)
75 /* every s390 is virtualization enabled ;-) */
78 void kvm_arch_hardware_disable(void *garbage)
82 int kvm_arch_hardware_setup(void)
87 void kvm_arch_hardware_unsetup(void)
91 void kvm_arch_check_processor_compat(void *rtn)
95 int kvm_arch_init(void *opaque)
100 void kvm_arch_exit(void)
104 /* Section: device related */
105 long kvm_arch_dev_ioctl(struct file *filp,
106 unsigned int ioctl, unsigned long arg)
108 if (ioctl == KVM_S390_ENABLE_SIE)
109 return s390_enable_sie();
113 int kvm_dev_ioctl_check_extension(long ext)
118 /* Section: vm related */
120 * Get (and clear) the dirty memory log for a memory slot.
122 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
123 struct kvm_dirty_log *log)
128 long kvm_arch_vm_ioctl(struct file *filp,
129 unsigned int ioctl, unsigned long arg)
131 struct kvm *kvm = filp->private_data;
132 void __user *argp = (void __user *)arg;
136 case KVM_S390_INTERRUPT: {
137 struct kvm_s390_interrupt s390int;
140 if (copy_from_user(&s390int, argp, sizeof(s390int)))
142 r = kvm_s390_inject_vm(kvm, &s390int);
152 struct kvm *kvm_arch_create_vm(void)
158 rc = s390_enable_sie();
163 kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
167 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
171 sprintf(debug_name, "kvm-%u", current->pid);
173 kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
177 spin_lock_init(&kvm->arch.float_int.lock);
178 INIT_LIST_HEAD(&kvm->arch.float_int.list);
180 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
181 VM_EVENT(kvm, 3, "%s", "vm created");
183 try_module_get(THIS_MODULE);
187 free_page((unsigned long)(kvm->arch.sca));
194 void kvm_arch_destroy_vm(struct kvm *kvm)
196 debug_unregister(kvm->arch.dbf);
197 kvm_free_physmem(kvm);
198 free_page((unsigned long)(kvm->arch.sca));
200 module_put(THIS_MODULE);
203 /* Section: vcpu related */
204 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
209 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
211 /* kvm common code refers to this, but does'nt call it */
215 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
217 save_fp_regs(&vcpu->arch.host_fpregs);
218 save_access_regs(vcpu->arch.host_acrs);
219 vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK;
220 restore_fp_regs(&vcpu->arch.guest_fpregs);
221 restore_access_regs(vcpu->arch.guest_acrs);
224 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
226 save_fp_regs(&vcpu->arch.guest_fpregs);
227 save_access_regs(vcpu->arch.guest_acrs);
228 restore_fp_regs(&vcpu->arch.host_fpregs);
229 restore_access_regs(vcpu->arch.host_acrs);
232 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
234 /* this equals initial cpu reset in pop, but we don't switch to ESA */
235 vcpu->arch.sie_block->gpsw.mask = 0UL;
236 vcpu->arch.sie_block->gpsw.addr = 0UL;
237 vcpu->arch.sie_block->prefix = 0UL;
238 vcpu->arch.sie_block->ihcpu = 0xffff;
239 vcpu->arch.sie_block->cputm = 0UL;
240 vcpu->arch.sie_block->ckc = 0UL;
241 vcpu->arch.sie_block->todpr = 0;
242 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
243 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
244 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
245 vcpu->arch.guest_fpregs.fpc = 0;
246 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
247 vcpu->arch.sie_block->gbea = 1;
250 /* The current code can have up to 256 pages for virtio */
251 #define VIRTIODESCSPACE (256ul * 4096ul)
253 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
255 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH);
256 vcpu->arch.sie_block->gmslm = vcpu->kvm->arch.guest_memsize +
257 vcpu->kvm->arch.guest_origin +
258 VIRTIODESCSPACE - 1ul;
259 vcpu->arch.sie_block->gmsor = vcpu->kvm->arch.guest_origin;
260 vcpu->arch.sie_block->ecb = 2;
261 vcpu->arch.sie_block->eca = 0xC1002001U;
262 setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup,
263 (unsigned long) vcpu);
264 get_cpu_id(&vcpu->arch.cpu_id);
265 vcpu->arch.cpu_id.version = 0xfe;
269 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
272 struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
278 vcpu->arch.sie_block = (struct kvm_s390_sie_block *)
279 get_zeroed_page(GFP_KERNEL);
281 if (!vcpu->arch.sie_block)
284 vcpu->arch.sie_block->icpua = id;
285 BUG_ON(!kvm->arch.sca);
286 BUG_ON(kvm->arch.sca->cpu[id].sda);
287 kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
288 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
289 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
291 spin_lock_init(&vcpu->arch.local_int.lock);
292 INIT_LIST_HEAD(&vcpu->arch.local_int.list);
293 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
294 spin_lock_bh(&kvm->arch.float_int.lock);
295 kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
296 init_waitqueue_head(&vcpu->arch.local_int.wq);
297 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
298 spin_unlock_bh(&kvm->arch.float_int.lock);
300 rc = kvm_vcpu_init(vcpu, kvm, id);
303 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
304 vcpu->arch.sie_block);
306 try_module_get(THIS_MODULE);
315 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
317 VCPU_EVENT(vcpu, 3, "%s", "destroy cpu");
318 free_page((unsigned long)(vcpu->arch.sie_block));
320 module_put(THIS_MODULE);
323 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
325 /* kvm common code refers to this, but never calls it */
330 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
333 kvm_s390_vcpu_initial_reset(vcpu);
338 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
341 memcpy(&vcpu->arch.guest_gprs, ®s->gprs, sizeof(regs->gprs));
346 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
349 memcpy(®s->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs));
354 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
355 struct kvm_sregs *sregs)
358 memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs));
359 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
364 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
365 struct kvm_sregs *sregs)
368 memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs));
369 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
374 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
377 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
378 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
383 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
386 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
387 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
392 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
397 if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING)
400 vcpu->arch.sie_block->gpsw = psw;
405 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
406 struct kvm_translation *tr)
408 return -EINVAL; /* not implemented yet */
411 int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
412 struct kvm_debug_guest *dbg)
414 return -EINVAL; /* not implemented yet */
417 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
418 struct kvm_mp_state *mp_state)
420 return -EINVAL; /* not implemented yet */
423 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
424 struct kvm_mp_state *mp_state)
426 return -EINVAL; /* not implemented yet */
429 extern void s390_handle_mcck(void);
431 static void __vcpu_run(struct kvm_vcpu *vcpu)
433 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16);
438 if (test_thread_flag(TIF_MCCK_PENDING))
441 kvm_s390_deliver_pending_interrupts(vcpu);
443 vcpu->arch.sie_block->icptcode = 0;
447 VCPU_EVENT(vcpu, 6, "entering sie flags %x",
448 atomic_read(&vcpu->arch.sie_block->cpuflags));
449 if (sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs)) {
450 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
451 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
453 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
454 vcpu->arch.sie_block->icptcode);
459 memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16);
462 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
469 if (vcpu->sigset_active)
470 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
472 atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
474 BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
476 switch (kvm_run->exit_reason) {
477 case KVM_EXIT_S390_SIEIC:
478 vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask;
479 vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr;
481 case KVM_EXIT_UNKNOWN:
482 case KVM_EXIT_S390_RESET:
492 rc = kvm_handle_sie_intercept(vcpu);
493 } while (!signal_pending(current) && !rc);
495 if (signal_pending(current) && !rc)
498 if (rc == -ENOTSUPP) {
499 /* intercept cannot be handled in-kernel, prepare kvm-run */
500 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
501 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
502 kvm_run->s390_sieic.mask = vcpu->arch.sie_block->gpsw.mask;
503 kvm_run->s390_sieic.addr = vcpu->arch.sie_block->gpsw.addr;
504 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
505 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
509 if (rc == -EREMOTE) {
510 /* intercept was handled, but userspace support is needed
511 * kvm_run has been prepared by the handler */
515 if (vcpu->sigset_active)
516 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
520 vcpu->stat.exit_userspace++;
524 static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, const void *from,
525 unsigned long n, int prefix)
528 return copy_to_guest(vcpu, guestdest, from, n);
530 return copy_to_guest_absolute(vcpu, guestdest, from, n);
534 * store status at address
535 * we use have two special cases:
536 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
537 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
539 int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
541 const unsigned char archmode = 1;
544 if (addr == KVM_S390_STORE_STATUS_NOADDR) {
545 if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
547 addr = SAVE_AREA_BASE;
549 } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
550 if (copy_to_guest(vcpu, 163ul, &archmode, 1))
552 addr = SAVE_AREA_BASE;
557 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, fp_regs),
558 vcpu->arch.guest_fpregs.fprs, 128, prefix))
561 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, gp_regs),
562 vcpu->arch.guest_gprs, 128, prefix))
565 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, psw),
566 &vcpu->arch.sie_block->gpsw, 16, prefix))
569 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, pref_reg),
570 &vcpu->arch.sie_block->prefix, 4, prefix))
573 if (__guestcopy(vcpu,
574 addr + offsetof(struct save_area_s390x, fp_ctrl_reg),
575 &vcpu->arch.guest_fpregs.fpc, 4, prefix))
578 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, tod_reg),
579 &vcpu->arch.sie_block->todpr, 4, prefix))
582 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, timer),
583 &vcpu->arch.sie_block->cputm, 8, prefix))
586 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, clk_cmp),
587 &vcpu->arch.sie_block->ckc, 8, prefix))
590 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, acc_regs),
591 &vcpu->arch.guest_acrs, 64, prefix))
594 if (__guestcopy(vcpu,
595 addr + offsetof(struct save_area_s390x, ctrl_regs),
596 &vcpu->arch.sie_block->gcr, 128, prefix))
601 static int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
606 rc = __kvm_s390_vcpu_store_status(vcpu, addr);
611 long kvm_arch_vcpu_ioctl(struct file *filp,
612 unsigned int ioctl, unsigned long arg)
614 struct kvm_vcpu *vcpu = filp->private_data;
615 void __user *argp = (void __user *)arg;
618 case KVM_S390_INTERRUPT: {
619 struct kvm_s390_interrupt s390int;
621 if (copy_from_user(&s390int, argp, sizeof(s390int)))
623 return kvm_s390_inject_vcpu(vcpu, &s390int);
625 case KVM_S390_STORE_STATUS:
626 return kvm_s390_vcpu_store_status(vcpu, arg);
627 case KVM_S390_SET_INITIAL_PSW: {
630 if (copy_from_user(&psw, argp, sizeof(psw)))
632 return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
634 case KVM_S390_INITIAL_RESET:
635 return kvm_arch_vcpu_ioctl_initial_reset(vcpu);
642 /* Section: memory related */
643 int kvm_arch_set_memory_region(struct kvm *kvm,
644 struct kvm_userspace_memory_region *mem,
645 struct kvm_memory_slot old,
648 /* A few sanity checks. We can have exactly one memory slot which has
649 to start at guest virtual zero and which has to be located at a
650 page boundary in userland and which has to end at a page boundary.
651 The memory in userland is ok to be fragmented into various different
652 vmas. It is okay to mmap() and munmap() stuff in this slot after
653 doing this call at any time */
658 if (mem->guest_phys_addr)
661 if (mem->userspace_addr & (PAGE_SIZE - 1))
664 if (mem->memory_size & (PAGE_SIZE - 1))
667 kvm->arch.guest_origin = mem->userspace_addr;
668 kvm->arch.guest_memsize = mem->memory_size;
670 /* FIXME: we do want to interrupt running CPUs and update their memory
671 configuration now to avoid race conditions. But hey, changing the
672 memory layout while virtual CPUs are running is usually bad
673 programming practice. */
678 void kvm_arch_flush_shadow(struct kvm *kvm)
682 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
687 static int __init kvm_s390_init(void)
689 return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
692 static void __exit kvm_s390_exit(void)
697 module_init(kvm_s390_init);
698 module_exit(kvm_s390_exit);