arch/x86/xen/xen-asm_32.S

   1 /*
   2         Asm versions of Xen pv-ops, suitable for either direct use or inlining.
   3         The inline versions are the same as the direct-use versions, with the
   4         pre- and post-amble chopped off.
   5
   6         This code is encoded for size rather than absolute efficiency,
   7         with a view to being able to inline as much as possible.
   8
   9         We only bother with direct forms (ie, vcpu in pda) of the operations
  10         here; the indirect forms are better handled in C, since they're
  11         generally too large to inline anyway.
  12  */
  13
  14 //#include <asm/asm-offsets.h>
  15 #include <asm/thread_info.h>
  16 #include <asm/processor-flags.h>
  17 #include <asm/segment.h>
  18
  19 #include <xen/interface/xen.h>
  20
  21 #include "xen-asm.h"
  22
  23 /*
  24         Force an event check by making a hypercall,
  25         but preserve regs before making the call.
  26  */
  27 check_events:
  28         push %eax
  29         push %ecx
  30         push %edx
  31         call xen_force_evtchn_callback
  32         pop %edx
  33         pop %ecx
  34         pop %eax
  35         ret
  36
  37 /*
  38         We can't use sysexit directly, because we're not running in ring0.
  39         But we can easily fake it up using iret.  Assuming xen_sysexit
  40         is jumped to with a standard stack frame, we can just strip it
  41         back to a standard iret frame and use iret.
  42  */
  43 ENTRY(xen_sysexit)
  44         movl PT_EAX(%esp), %eax                 /* Shouldn't be necessary? */
  45         orl $X86_EFLAGS_IF, PT_EFLAGS(%esp)
  46         lea PT_EIP(%esp), %esp
  47
  48         jmp xen_iret
  49 ENDPROC(xen_sysexit)
  50
  51 /*
  52         This is run where a normal iret would be run, with the same stack setup:
  53               8: eflags
  54               4: cs
  55         esp-> 0: eip
  56
  57         This attempts to make sure that any pending events are dealt
  58         with on return to usermode, but there is a small window in
  59         which an event can happen just before entering usermode.  If
  60         the nested interrupt ends up setting one of the TIF_WORK_MASK
  61         pending work flags, they will not be tested again before
  62         returning to usermode. This means that a process can end up
  63         with pending work, which will be unprocessed until the process
  64         enters and leaves the kernel again, which could be an
  65         unbounded amount of time.  This means that a pending signal or
  66         reschedule event could be indefinitely delayed.
  67
  68         The fix is to notice a nested interrupt in the critical
  69         window, and if one occurs, then fold the nested interrupt into
  70         the current interrupt stack frame, and re-process it
  71         iteratively rather than recursively.  This means that it will
  72         exit via the normal path, and all pending work will be dealt
  73         with appropriately.
  74
  75         Because the nested interrupt handler needs to deal with the
  76         current stack state in whatever form its in, we keep things
  77         simple by only using a single register which is pushed/popped
  78         on the stack.
  79  */
  80 ENTRY(xen_iret)
  81         /* test eflags for special cases */
  82         testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp)
  83         jnz hyper_iret
  84
  85         push %eax
  86         ESP_OFFSET=4    # bytes pushed onto stack
  87
  88         /* Store vcpu_info pointer for easy access.  Do it this
  89            way to avoid having to reload %fs */
  90 #ifdef CONFIG_SMP
  91         GET_THREAD_INFO(%eax)
  92         movl TI_cpu(%eax),%eax
  93         movl __per_cpu_offset(,%eax,4),%eax
  94         mov per_cpu__xen_vcpu(%eax),%eax
  95 #else
  96         movl per_cpu__xen_vcpu, %eax
  97 #endif
  98
  99         /* check IF state we're restoring */
 100         testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp)
 101
 102         /* Maybe enable events.  Once this happens we could get a
 103            recursive event, so the critical region starts immediately
 104            afterwards.  However, if that happens we don't end up
 105            resuming the code, so we don't have to be worried about
 106            being preempted to another CPU. */
 107         setz XEN_vcpu_info_mask(%eax)
 108 xen_iret_start_crit:
 109
 110         /* check for unmasked and pending */
 111         cmpw $0x0001, XEN_vcpu_info_pending(%eax)
 112
 113         /* If there's something pending, mask events again so we
 114            can jump back into xen_hypervisor_callback */
 115         sete XEN_vcpu_info_mask(%eax)
 116
 117         popl %eax
 118
 119         /* From this point on the registers are restored and the stack
 120            updated, so we don't need to worry about it if we're preempted */
 121 iret_restore_end:
 122
 123         /* Jump to hypervisor_callback after fixing up the stack.
 124            Events are masked, so jumping out of the critical
 125            region is OK. */
 126         je xen_hypervisor_callback
 127
 128 1:      iret
 129 xen_iret_end_crit:
 130 .section __ex_table,"a"
 131         .align 4
 132         .long 1b,iret_exc
 133 .previous
 134
 135 hyper_iret:
 136         /* put this out of line since its very rarely used */
 137         jmp hypercall_page + __HYPERVISOR_iret * 32
 138
 139         .globl xen_iret_start_crit, xen_iret_end_crit
 140
 141 /*
 142    This is called by xen_hypervisor_callback in entry.S when it sees
 143    that the EIP at the time of interrupt was between xen_iret_start_crit
 144    and xen_iret_end_crit.  We're passed the EIP in %eax so we can do
 145    a more refined determination of what to do.
 146
 147    The stack format at this point is:
 148         ----------------
 149          ss             : (ss/esp may be present if we came from usermode)
 150          esp            :
 151          eflags         }  outer exception info
 152          cs             }
 153          eip            }
 154         ---------------- <- edi (copy dest)
 155          eax            :  outer eax if it hasn't been restored
 156         ----------------
 157          eflags         }  nested exception info
 158          cs             }   (no ss/esp because we're nested
 159          eip            }    from the same ring)
 160          orig_eax       }<- esi (copy src)
 161          - - - - - - - -
 162          fs             }
 163          es             }
 164          ds             }  SAVE_ALL state
 165          eax            }
 166           :             :
 167          ebx            }<- esp
 168         ----------------
 169
 170    In order to deliver the nested exception properly, we need to shift
 171    everything from the return addr up to the error code so it
 172    sits just under the outer exception info.  This means that when we
 173    handle the exception, we do it in the context of the outer exception
 174    rather than starting a new one.
 175
 176    The only caveat is that if the outer eax hasn't been
 177    restored yet (ie, it's still on stack), we need to insert
 178    its value into the SAVE_ALL state before going on, since
 179    it's usermode state which we eventually need to restore.
 180  */
 181 ENTRY(xen_iret_crit_fixup)
 182         /*
 183            Paranoia: Make sure we're really coming from kernel space.
 184            One could imagine a case where userspace jumps into the
 185            critical range address, but just before the CPU delivers a GP,
 186            it decides to deliver an interrupt instead.  Unlikely?
 187            Definitely.  Easy to avoid?  Yes.  The Intel documents
 188            explicitly say that the reported EIP for a bad jump is the
 189            jump instruction itself, not the destination, but some virtual
 190            environments get this wrong.
 191          */
 192         movl PT_CS(%esp), %ecx
 193         andl $SEGMENT_RPL_MASK, %ecx
 194         cmpl $USER_RPL, %ecx
 195         je 2f
 196
 197         lea PT_ORIG_EAX(%esp), %esi
 198         lea PT_EFLAGS(%esp), %edi
 199
 200         /* If eip is before iret_restore_end then stack
 201            hasn't been restored yet. */
 202         cmp $iret_restore_end, %eax
 203         jae 1f
 204
 205         movl 0+4(%edi),%eax             /* copy EAX (just above top of frame) */
 206         movl %eax, PT_EAX(%esp)
 207
 208         lea ESP_OFFSET(%edi),%edi       /* move dest up over saved regs */
 209
 210         /* set up the copy */
 211 1:      std
 212         mov $PT_EIP / 4, %ecx           /* saved regs up to orig_eax */
 213         rep movsl
 214         cld
 215
 216         lea 4(%edi),%esp                /* point esp to new frame */
 217 2:      jmp xen_do_upcall
 218