X-Git-Url: http://pilppa.org/gitweb/gitweb.cgi?a=blobdiff_plain;f=arch%2Fx86%2Flguest%2Fboot.c;h=a104c532ff706cb9845140421371fc208a90605b;hb=4357bd9453b81e0a41db1dec16e06d74256b7560;hp=92c56117eae5ed2af3cbdc37c6086d7a7dd25120;hpb=221d46841b931d0e6b11e6251e482f2afe3974dd;p=linux-2.6-omap-h63xx.git diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 92c56117eae..a104c532ff7 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include #include @@ -67,6 +68,7 @@ #include #include #include +#include /* for struct machine_ops */ /*G:010 Welcome to the Guest! * @@ -74,15 +76,6 @@ * behaving in simplified but equivalent ways. In particular, the Guest is the * same kernel as the Host (or at least, built from the same source code). :*/ -/* Declarations for definitions in lguest_guest.S */ -extern char lguest_noirq_start[], lguest_noirq_end[]; -extern const char lgstart_cli[], lgend_cli[]; -extern const char lgstart_sti[], lgend_sti[]; -extern const char lgstart_popf[], lgend_popf[]; -extern const char lgstart_pushf[], lgend_pushf[]; -extern const char lgstart_iret[], lgend_iret[]; -extern void lguest_iret(void); - struct lguest_data lguest_data = { .hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF }, .noirq_start = (u32)lguest_noirq_start, @@ -91,7 +84,6 @@ struct lguest_data lguest_data = { .blocked_interrupts = { 1 }, /* Block timer interrupts */ .syscall_vec = SYSCALL_VECTOR, }; -static cycle_t clock_base; /*G:037 async_hcall() is pretty simple: I'm quite proud of it really. We have a * ring buffer of stored hypercalls which the Host will run though next time we @@ -175,8 +167,8 @@ static void lguest_leave_lazy_mode(void) * check there when it wants to deliver an interrupt. */ -/* save_flags() is expected to return the processor state (ie. "eflags"). The - * eflags word contains all kind of stuff, but in practice Linux only cares +/* save_flags() is expected to return the processor state (ie. "flags"). The + * flags word contains all kind of stuff, but in practice Linux only cares * about the interrupt flag. Our "save_flags()" just returns that. */ static unsigned long save_fl(void) { @@ -217,19 +209,20 @@ static void irq_enable(void) * address of the handler, and... well, who cares? The Guest just asks the * Host to make the change anyway, because the Host controls the real IDT. */ -static void lguest_write_idt_entry(struct desc_struct *dt, - int entrynum, u32 low, u32 high) +static void lguest_write_idt_entry(gate_desc *dt, + int entrynum, const gate_desc *g) { + u32 *desc = (u32 *)g; /* Keep the local copy up to date. */ - write_dt_entry(dt, entrynum, low, high); + native_write_idt_entry(dt, entrynum, g); /* Tell Host about this new entry. */ - hcall(LHCALL_LOAD_IDT_ENTRY, entrynum, low, high); + hcall(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1]); } /* Changing to a different IDT is very rare: we keep the IDT up-to-date every * time it is written, so we can simply loop through all entries and tell the * Host about them. */ -static void lguest_load_idt(const struct Xgt_desc_struct *desc) +static void lguest_load_idt(const struct desc_ptr *desc) { unsigned int i; struct desc_struct *idt = (void *)desc->address; @@ -252,7 +245,7 @@ static void lguest_load_idt(const struct Xgt_desc_struct *desc) * hypercall and use that repeatedly to load a new IDT. I don't think it * really matters, but wouldn't it be nice if they were the same? */ -static void lguest_load_gdt(const struct Xgt_desc_struct *desc) +static void lguest_load_gdt(const struct desc_ptr *desc) { BUG_ON((desc->size+1)/8 != GDT_ENTRIES); hcall(LHCALL_LOAD_GDT, __pa(desc->address), GDT_ENTRIES, 0); @@ -261,10 +254,10 @@ static void lguest_load_gdt(const struct Xgt_desc_struct *desc) /* For a single GDT entry which changes, we do the lazy thing: alter our GDT, * then tell the Host to reload the entire thing. This operation is so rare * that this naive implementation is reasonable. */ -static void lguest_write_gdt_entry(struct desc_struct *dt, - int entrynum, u32 low, u32 high) +static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum, + const void *desc, int type) { - write_dt_entry(dt, entrynum, low, high); + native_write_gdt_entry(dt, entrynum, desc, type); hcall(LHCALL_LOAD_GDT, __pa(dt), GDT_ENTRIES, 0); } @@ -323,30 +316,30 @@ static void lguest_load_tr_desc(void) * anyone (including userspace) can just use the raw "cpuid" instruction and * the Host won't even notice since it isn't privileged. So we try not to get * too worked up about it. */ -static void lguest_cpuid(unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) +static void lguest_cpuid(unsigned int *ax, unsigned int *bx, + unsigned int *cx, unsigned int *dx) { - int function = *eax; + int function = *ax; - native_cpuid(eax, ebx, ecx, edx); + native_cpuid(ax, bx, cx, dx); switch (function) { case 1: /* Basic feature request. */ /* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */ - *ecx &= 0x00002201; - /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, FPU. */ - *edx &= 0x07808101; + *cx &= 0x00002201; + /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU. */ + *dx &= 0x07808111; /* The Host can do a nice optimization if it knows that the * kernel mappings (addresses above 0xC0000000 or whatever * PAGE_OFFSET is set to) haven't changed. But Linux calls * flush_tlb_user() for both user and kernel mappings unless * the Page Global Enable (PGE) feature bit is set. */ - *edx |= 0x00002000; + *dx |= 0x00002000; break; case 0x80000000: /* Futureproof this a little: if they ask how much extended * processor information there is, limit it to known fields. */ - if (*eax > 0x80000008) - *eax = 0x80000008; + if (*ax > 0x80000008) + *ax = 0x80000008; break; } } @@ -601,19 +594,25 @@ static unsigned long lguest_get_wallclock(void) return lguest_data.time.tv_sec; } +/* The TSC is a Time Stamp Counter. The Host tells us what speed it runs at, + * or 0 if it's unusable as a reliable clock source. This matches what we want + * here: if we return 0 from this function, the x86 TSC clock will not register + * itself. */ +static unsigned long lguest_cpu_khz(void) +{ + return lguest_data.tsc_khz; +} + +/* If we can't use the TSC, the kernel falls back to our "lguest_clock", where + * we read the time value given to us by the Host. */ static cycle_t lguest_clock_read(void) { unsigned long sec, nsec; - /* If the Host tells the TSC speed, we can trust that. */ - if (lguest_data.tsc_khz) - return native_read_tsc(); - - /* If we can't use the TSC, we read the time value written by the Host. - * Since it's in two parts (seconds and nanoseconds), we risk reading - * it just as it's changing from 99 & 0.999999999 to 100 and 0, and - * getting 99 and 0. As Linux tends to come apart under the stress of - * time travel, we must be careful: */ + /* Since the time is in two parts (seconds and nanoseconds), we risk + * reading it just as it's changing from 99 & 0.999999999 to 100 and 0, + * and getting 99 and 0. As Linux tends to come apart under the stress + * of time travel, we must be careful: */ do { /* First we read the seconds part. */ sec = lguest_data.time.tv_sec; @@ -628,14 +627,14 @@ static cycle_t lguest_clock_read(void) /* Now if the seconds part has changed, try again. */ } while (unlikely(lguest_data.time.tv_sec != sec)); - /* Our non-TSC clock is in real nanoseconds. */ + /* Our lguest clock is in real nanoseconds. */ return sec*1000000000ULL + nsec; } -/* This is what we tell the kernel is our clocksource. */ +/* This is the fallback clocksource: lower priority than the TSC clocksource. */ static struct clocksource lguest_clock = { .name = "lguest", - .rating = 400, + .rating = 200, .read = lguest_clock_read, .mask = CLOCKSOURCE_MASK(64), .mult = 1 << 22, @@ -643,12 +642,6 @@ static struct clocksource lguest_clock = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -/* The "scheduler clock" is just our real clock, adjusted to start at zero */ -static unsigned long long lguest_sched_clock(void) -{ - return cyc2ns(&lguest_clock, lguest_clock_read() - clock_base); -} - /* We also need a "struct clock_event_device": Linux asks us to set it to go * off some time in the future. Actually, James Morris figured all this out, I * just applied the patch. */ @@ -718,19 +711,8 @@ static void lguest_time_init(void) /* Set up the timer interrupt (0) to go to our simple timer routine */ set_irq_handler(0, lguest_time_irq); - /* Our clock structure looks like arch/x86/kernel/tsc_32.c if we can - * use the TSC, otherwise it's a dumb nanosecond-resolution clock. - * Either way, the "rating" is set so high that it's always chosen over - * any other clocksource. */ - if (lguest_data.tsc_khz) - lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz, - lguest_clock.shift); - clock_base = lguest_clock_read(); clocksource_register(&lguest_clock); - /* Now we've set up our clock, we can use it as the scheduler clock */ - pv_time_ops.sched_clock = lguest_sched_clock; - /* We can't set cpumask in the initializer: damn C limitations! Set it * here and register our timer device. */ lguest_clockevent.cpumask = cpumask_of_cpu(0); @@ -755,10 +737,10 @@ static void lguest_time_init(void) * segment), the privilege level (we're privilege level 1, the Host is 0 and * will not tolerate us trying to use that), the stack pointer, and the number * of pages in the stack. */ -static void lguest_load_esp0(struct tss_struct *tss, +static void lguest_load_sp0(struct tss_struct *tss, struct thread_struct *thread) { - lazy_hcall(LHCALL_SET_STACK, __KERNEL_DS|0x1, thread->esp0, + lazy_hcall(LHCALL_SET_STACK, __KERNEL_DS|0x1, thread->sp0, THREAD_SIZE/PAGE_SIZE); } @@ -788,11 +770,11 @@ static void lguest_wbinvd(void) * code qualifies for Advanced. It will also never interrupt anything. It * does, however, allow us to get through the Linux boot code. */ #ifdef CONFIG_X86_LOCAL_APIC -static void lguest_apic_write(unsigned long reg, unsigned long v) +static void lguest_apic_write(unsigned long reg, u32 v) { } -static unsigned long lguest_apic_read(unsigned long reg) +static u32 lguest_apic_read(unsigned long reg) { return 0; } @@ -812,7 +794,7 @@ static void lguest_safe_halt(void) * rather than virtual addresses, so we use __pa() here. */ static void lguest_power_off(void) { - hcall(LHCALL_CRASH, __pa("Power down"), 0, 0); + hcall(LHCALL_SHUTDOWN, __pa("Power down"), LGUEST_SHUTDOWN_POWEROFF, 0); } /* @@ -822,7 +804,7 @@ static void lguest_power_off(void) */ static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p) { - hcall(LHCALL_CRASH, __pa(p), 0, 0); + hcall(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF, 0); /* The hcall won't return, but to keep gcc happy, we're "done". */ return NOTIFY_DONE; } @@ -926,6 +908,11 @@ static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf, return insn_len; } +static void lguest_restart(char *reason) +{ + hcall(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART, 0); +} + /*G:030 Once we get to lguest_init(), we know we're a Guest. The pv_ops * structures in the kernel provide points for (almost) every routine we have * to override to avoid privileged instructions. */ @@ -957,7 +944,7 @@ __init void lguest_init(void) pv_cpu_ops.cpuid = lguest_cpuid; pv_cpu_ops.load_idt = lguest_load_idt; pv_cpu_ops.iret = lguest_iret; - pv_cpu_ops.load_esp0 = lguest_load_esp0; + pv_cpu_ops.load_sp0 = lguest_load_sp0; pv_cpu_ops.load_tr_desc = lguest_load_tr_desc; pv_cpu_ops.set_ldt = lguest_set_ldt; pv_cpu_ops.load_tls = lguest_load_tls; @@ -996,6 +983,7 @@ __init void lguest_init(void) /* time operations */ pv_time_ops.get_wallclock = lguest_get_wallclock; pv_time_ops.time_init = lguest_time_init; + pv_time_ops.get_cpu_khz = lguest_cpu_khz; /* Now is a good time to look at the implementations of these functions * before returning to the rest of lguest_init(). */ @@ -1059,6 +1047,7 @@ __init void lguest_init(void) * the Guest routine to power off. */ pm_power_off = lguest_power_off; + machine_ops.restart = lguest_restart; /* Now we're set up, call start_kernel() in init/main.c and we proceed * to boot as normal. It never returns. */ start_kernel();