2 * linux/arch/x86_64/entry.S
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
10 * entry.S contains the system-call and fault low-level handling routines.
12 * NOTE: This code handles signal-recognition, which happens every time
13 * after an interrupt and after each system call.
15 * Normal syscalls and interrupts don't save a full stack frame, this is
16 * only done for syscall tracing, signals or fork/exec et.al.
18 * A note on terminology:
19 * - top of stack: Architecture defined interrupt frame from SS to RIP
20 * at the top of the kernel process stack.
21 * - partial stack frame: partially saved registers upto R11.
22 * - full stack frame: Like partial stack frame, but all register saved.
25 * - CFI macros are used to generate dwarf2 unwind information for better
26 * backtraces. They don't change any code.
27 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
28 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
29 * There are unfortunately lots of special cases where some registers
30 * not touched. The macro is a big mess that should be cleaned up.
31 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
32 * Gives a full stack frame.
33 * - ENTRY/END Define functions in the symbol table.
34 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
35 * frame that is otherwise undefined after a SYSCALL
36 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
37 * - errorentry/paranoidentry/zeroentry - Define exception entry points.
40 #include <linux/linkage.h>
41 #include <asm/segment.h>
42 #include <asm/cache.h>
43 #include <asm/errno.h>
44 #include <asm/dwarf2.h>
45 #include <asm/calling.h>
46 #include <asm/asm-offsets.h>
48 #include <asm/unistd.h>
49 #include <asm/thread_info.h>
50 #include <asm/hw_irq.h>
52 #include <asm/irqflags.h>
53 #include <asm/paravirt.h>
58 #ifdef CONFIG_DYNAMIC_FTRACE
90 /* taken from glibc */
100 movq 0x38(%rsp), %rdi
121 #else /* ! CONFIG_DYNAMIC_FTRACE */
123 cmpq $ftrace_stub, ftrace_trace_function
130 /* taken from glibc */
140 movq 0x38(%rsp), %rdi
143 call *ftrace_trace_function
156 #endif /* CONFIG_DYNAMIC_FTRACE */
157 #endif /* CONFIG_FTRACE */
159 #ifndef CONFIG_PREEMPT
160 #define retint_kernel retint_restore_args
163 #ifdef CONFIG_PARAVIRT
164 ENTRY(native_irq_enable_syscall_ret)
165 movq %gs:pda_oldrsp,%rsp
168 #endif /* CONFIG_PARAVIRT */
171 .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
172 #ifdef CONFIG_TRACE_IRQFLAGS
173 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
181 * C code is not supposed to know about undefined top of stack. Every time
182 * a C function with an pt_regs argument is called from the SYSCALL based
183 * fast path FIXUP_TOP_OF_STACK is needed.
184 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
188 /* %rsp:at FRAMEEND */
189 .macro FIXUP_TOP_OF_STACK tmp
190 movq %gs:pda_oldrsp,\tmp
192 movq $__USER_DS,SS(%rsp)
193 movq $__USER_CS,CS(%rsp)
195 movq R11(%rsp),\tmp /* get eflags */
196 movq \tmp,EFLAGS(%rsp)
199 .macro RESTORE_TOP_OF_STACK tmp,offset=0
200 movq RSP-\offset(%rsp),\tmp
201 movq \tmp,%gs:pda_oldrsp
202 movq EFLAGS-\offset(%rsp),\tmp
203 movq \tmp,R11-\offset(%rsp)
206 .macro FAKE_STACK_FRAME child_rip
207 /* push in order ss, rsp, eflags, cs, rip */
210 CFI_ADJUST_CFA_OFFSET 8
211 /*CFI_REL_OFFSET ss,0*/
213 CFI_ADJUST_CFA_OFFSET 8
215 pushq $(1<<9) /* eflags - interrupts on */
216 CFI_ADJUST_CFA_OFFSET 8
217 /*CFI_REL_OFFSET rflags,0*/
218 pushq $__KERNEL_CS /* cs */
219 CFI_ADJUST_CFA_OFFSET 8
220 /*CFI_REL_OFFSET cs,0*/
221 pushq \child_rip /* rip */
222 CFI_ADJUST_CFA_OFFSET 8
224 pushq %rax /* orig rax */
225 CFI_ADJUST_CFA_OFFSET 8
228 .macro UNFAKE_STACK_FRAME
230 CFI_ADJUST_CFA_OFFSET -(6*8)
233 .macro CFI_DEFAULT_STACK start=1
239 CFI_DEF_CFA_OFFSET SS+8
241 CFI_REL_OFFSET r15,R15
242 CFI_REL_OFFSET r14,R14
243 CFI_REL_OFFSET r13,R13
244 CFI_REL_OFFSET r12,R12
245 CFI_REL_OFFSET rbp,RBP
246 CFI_REL_OFFSET rbx,RBX
247 CFI_REL_OFFSET r11,R11
248 CFI_REL_OFFSET r10,R10
251 CFI_REL_OFFSET rax,RAX
252 CFI_REL_OFFSET rcx,RCX
253 CFI_REL_OFFSET rdx,RDX
254 CFI_REL_OFFSET rsi,RSI
255 CFI_REL_OFFSET rdi,RDI
256 CFI_REL_OFFSET rip,RIP
257 /*CFI_REL_OFFSET cs,CS*/
258 /*CFI_REL_OFFSET rflags,EFLAGS*/
259 CFI_REL_OFFSET rsp,RSP
260 /*CFI_REL_OFFSET ss,SS*/
263 * A newly forked process directly context switches into this.
268 push kernel_eflags(%rip)
269 CFI_ADJUST_CFA_OFFSET 4
270 popf # reset kernel eflags
271 CFI_ADJUST_CFA_OFFSET -4
273 GET_THREAD_INFO(%rcx)
274 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
278 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
279 je int_ret_from_sys_call
280 testl $_TIF_IA32,threadinfo_flags(%rcx)
281 jnz int_ret_from_sys_call
282 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
283 jmp ret_from_sys_call
286 call syscall_trace_leave
287 GET_THREAD_INFO(%rcx)
293 * System call entry. Upto 6 arguments in registers are supported.
295 * SYSCALL does not save anything on the stack and does not change the
301 * rax system call number
303 * rcx return address for syscall/sysret, C arg3
306 * r10 arg3 (--> moved to rcx for C)
309 * r11 eflags for syscall/sysret, temporary for C
310 * r12-r15,rbp,rbx saved by C code, not touched.
312 * Interrupts are off on entry.
313 * Only called from user space.
315 * XXX if we had a free scratch register we could save the RSP into the stack frame
316 * and report it properly in ps. Unfortunately we haven't.
318 * When user can change the frames always force IRET. That is because
319 * it deals with uncanonical addresses better. SYSRET has trouble
320 * with them due to bugs in both AMD and Intel CPUs.
326 CFI_DEF_CFA rsp,PDA_STACKOFFSET
328 /*CFI_REGISTER rflags,r11*/
331 * A hypervisor implementation might want to use a label
332 * after the swapgs, so that it can do the swapgs
333 * for the guest and jump here on syscall.
335 ENTRY(system_call_after_swapgs)
337 movq %rsp,%gs:pda_oldrsp
338 movq %gs:pda_kernelstack,%rsp
340 * No need to follow this irqs off/on section - it's straight
343 ENABLE_INTERRUPTS(CLBR_NONE)
345 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
346 movq %rcx,RIP-ARGOFFSET(%rsp)
347 CFI_REL_OFFSET rip,RIP-ARGOFFSET
348 GET_THREAD_INFO(%rcx)
349 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
351 cmpq $__NR_syscall_max,%rax
354 call *sys_call_table(,%rax,8) # XXX: rip relative
355 movq %rax,RAX-ARGOFFSET(%rsp)
357 * Syscall return path ending with SYSRET (fast path)
358 * Has incomplete stack frame and undefined top of stack.
361 movl $_TIF_ALLWORK_MASK,%edi
365 GET_THREAD_INFO(%rcx)
366 DISABLE_INTERRUPTS(CLBR_NONE)
368 movl threadinfo_flags(%rcx),%edx
373 * sysretq will re-enable interrupts:
376 movq RIP-ARGOFFSET(%rsp),%rcx
378 RESTORE_ARGS 0,-ARG_SKIP,1
379 /*CFI_REGISTER rflags,r11*/
380 ENABLE_INTERRUPTS_SYSCALL_RET
383 /* Handle reschedules */
384 /* edx: work, edi: workmask */
386 bt $TIF_NEED_RESCHED,%edx
389 ENABLE_INTERRUPTS(CLBR_NONE)
391 CFI_ADJUST_CFA_OFFSET 8
394 CFI_ADJUST_CFA_OFFSET -8
397 /* Handle a signal */
400 ENABLE_INTERRUPTS(CLBR_NONE)
401 testl $_TIF_DO_NOTIFY_MASK,%edx
404 /* Really a signal */
405 /* edx: work flags (arg3) */
406 leaq do_notify_resume(%rip),%rax
407 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
408 xorl %esi,%esi # oldset -> arg2
409 call ptregscall_common
410 1: movl $_TIF_NEED_RESCHED,%edi
411 /* Use IRET because user could have changed frame. This
412 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
413 DISABLE_INTERRUPTS(CLBR_NONE)
418 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
419 jmp ret_from_sys_call
421 /* Do syscall tracing */
424 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
425 FIXUP_TOP_OF_STACK %rdi
427 call syscall_trace_enter
428 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
430 cmpq $__NR_syscall_max,%rax
431 ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
432 movq %r10,%rcx /* fixup for C */
433 call *sys_call_table(,%rax,8)
434 movq %rax,RAX-ARGOFFSET(%rsp)
435 /* Use IRET because user could have changed frame */
438 * Syscall return path ending with IRET.
439 * Has correct top of stack, but partial stack frame.
441 .globl int_ret_from_sys_call
442 int_ret_from_sys_call:
443 DISABLE_INTERRUPTS(CLBR_NONE)
445 testl $3,CS-ARGOFFSET(%rsp)
446 je retint_restore_args
447 movl $_TIF_ALLWORK_MASK,%edi
448 /* edi: mask to check */
451 GET_THREAD_INFO(%rcx)
452 movl threadinfo_flags(%rcx),%edx
455 andl $~TS_COMPAT,threadinfo_status(%rcx)
458 /* Either reschedule or signal or syscall exit tracking needed. */
459 /* First do a reschedule test. */
460 /* edx: work, edi: workmask */
462 bt $TIF_NEED_RESCHED,%edx
465 ENABLE_INTERRUPTS(CLBR_NONE)
467 CFI_ADJUST_CFA_OFFSET 8
470 CFI_ADJUST_CFA_OFFSET -8
471 DISABLE_INTERRUPTS(CLBR_NONE)
475 /* handle signals and tracing -- both require a full stack frame */
478 ENABLE_INTERRUPTS(CLBR_NONE)
480 /* Check for syscall exit trace */
481 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
484 CFI_ADJUST_CFA_OFFSET 8
485 leaq 8(%rsp),%rdi # &ptregs -> arg1
486 call syscall_trace_leave
488 CFI_ADJUST_CFA_OFFSET -8
489 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
493 testl $_TIF_DO_NOTIFY_MASK,%edx
495 movq %rsp,%rdi # &ptregs -> arg1
496 xorl %esi,%esi # oldset -> arg2
497 call do_notify_resume
498 1: movl $_TIF_NEED_RESCHED,%edi
501 DISABLE_INTERRUPTS(CLBR_NONE)
508 * Certain special system calls that need to save a complete full stack frame.
511 .macro PTREGSCALL label,func,arg
514 leaq \func(%rip),%rax
515 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
516 jmp ptregscall_common
522 PTREGSCALL stub_clone, sys_clone, %r8
523 PTREGSCALL stub_fork, sys_fork, %rdi
524 PTREGSCALL stub_vfork, sys_vfork, %rdi
525 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
526 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
527 PTREGSCALL stub_iopl, sys_iopl, %rsi
529 ENTRY(ptregscall_common)
531 CFI_ADJUST_CFA_OFFSET -8
532 CFI_REGISTER rip, r11
535 CFI_REGISTER rip, r15
536 FIXUP_TOP_OF_STACK %r11
538 RESTORE_TOP_OF_STACK %r11
540 CFI_REGISTER rip, r11
543 CFI_ADJUST_CFA_OFFSET 8
544 CFI_REL_OFFSET rip, 0
547 END(ptregscall_common)
552 CFI_ADJUST_CFA_OFFSET -8
553 CFI_REGISTER rip, r11
555 FIXUP_TOP_OF_STACK %r11
558 RESTORE_TOP_OF_STACK %r11
561 jmp int_ret_from_sys_call
566 * sigreturn is special because it needs to restore all registers on return.
567 * This cannot be done with SYSRET, so use the IRET return path instead.
569 ENTRY(stub_rt_sigreturn)
572 CFI_ADJUST_CFA_OFFSET -8
575 FIXUP_TOP_OF_STACK %r11
576 call sys_rt_sigreturn
577 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
579 jmp int_ret_from_sys_call
581 END(stub_rt_sigreturn)
584 * initial frame state for interrupts and exceptions
589 CFI_DEF_CFA rsp,SS+8-\ref
590 /*CFI_REL_OFFSET ss,SS-\ref*/
591 CFI_REL_OFFSET rsp,RSP-\ref
592 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
593 /*CFI_REL_OFFSET cs,CS-\ref*/
594 CFI_REL_OFFSET rip,RIP-\ref
597 /* initial frame state for interrupts (and exceptions without error code) */
598 #define INTR_FRAME _frame RIP
599 /* initial frame state for exceptions with error code (and interrupts with
600 vector already pushed) */
601 #define XCPT_FRAME _frame ORIG_RAX
604 * Interrupt entry/exit.
606 * Interrupt entry points save only callee clobbered registers in fast path.
608 * Entry runs with interrupts off.
611 /* 0(%rsp): interrupt number */
612 .macro interrupt func
615 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
617 CFI_ADJUST_CFA_OFFSET 8
618 CFI_REL_OFFSET rbp, 0
620 CFI_DEF_CFA_REGISTER rbp
624 /* irqcount is used to check if a CPU is already on an interrupt
625 stack or not. While this is essentially redundant with preempt_count
626 it is a little cheaper to use a separate counter in the PDA
627 (short of moving irq_enter into assembly, which would be too
629 1: incl %gs:pda_irqcount
630 cmoveq %gs:pda_irqstackptr,%rsp
631 push %rbp # backlink for old unwinder
633 * We entered an interrupt context - irqs are off:
639 ENTRY(common_interrupt)
642 /* 0(%rsp): oldrsp-ARGOFFSET */
644 DISABLE_INTERRUPTS(CLBR_NONE)
646 decl %gs:pda_irqcount
648 CFI_DEF_CFA_REGISTER rsp
649 CFI_ADJUST_CFA_OFFSET -8
651 GET_THREAD_INFO(%rcx)
652 testl $3,CS-ARGOFFSET(%rsp)
655 /* Interrupt came from user space */
657 * Has a correct top of stack, but a partial stack frame
658 * %rcx: thread info. Interrupts off.
660 retint_with_reschedule:
661 movl $_TIF_WORK_MASK,%edi
664 movl threadinfo_flags(%rcx),%edx
669 retint_swapgs: /* return to user-space */
671 * The iretq could re-enable interrupts:
673 DISABLE_INTERRUPTS(CLBR_ANY)
678 retint_restore_args: /* return to kernel space */
679 DISABLE_INTERRUPTS(CLBR_ANY)
681 * The iretq could re-enable interrupts:
690 .section __ex_table, "a"
691 .quad irq_return, bad_iret
694 #ifdef CONFIG_PARAVIRT
698 .section __ex_table,"a"
699 .quad native_iret, bad_iret
706 * The iret traps when the %cs or %ss being restored is bogus.
707 * We've lost the original trap vector and error code.
708 * #GPF is the most likely one to get for an invalid selector.
709 * So pretend we completed the iret and took the #GPF in user mode.
711 * We are now running with the kernel GS after exception recovery.
712 * But error_entry expects us to have user GS to match the user %cs,
718 jmp general_protection
722 /* edi: workmask, edx: work */
725 bt $TIF_NEED_RESCHED,%edx
728 ENABLE_INTERRUPTS(CLBR_NONE)
730 CFI_ADJUST_CFA_OFFSET 8
733 CFI_ADJUST_CFA_OFFSET -8
734 GET_THREAD_INFO(%rcx)
735 DISABLE_INTERRUPTS(CLBR_NONE)
740 testl $_TIF_DO_NOTIFY_MASK,%edx
743 ENABLE_INTERRUPTS(CLBR_NONE)
745 movq $-1,ORIG_RAX(%rsp)
746 xorl %esi,%esi # oldset
747 movq %rsp,%rdi # &pt_regs
748 call do_notify_resume
750 DISABLE_INTERRUPTS(CLBR_NONE)
752 movl $_TIF_NEED_RESCHED,%edi
753 GET_THREAD_INFO(%rcx)
756 #ifdef CONFIG_PREEMPT
757 /* Returning to kernel space. Check if we need preemption */
758 /* rcx: threadinfo. interrupts off. */
760 cmpl $0,threadinfo_preempt_count(%rcx)
761 jnz retint_restore_args
762 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
763 jnc retint_restore_args
764 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
765 jnc retint_restore_args
766 call preempt_schedule_irq
771 END(common_interrupt)
776 .macro apicinterrupt num,func
779 CFI_ADJUST_CFA_OFFSET 8
785 ENTRY(thermal_interrupt)
786 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
787 END(thermal_interrupt)
789 ENTRY(threshold_interrupt)
790 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
791 END(threshold_interrupt)
794 ENTRY(reschedule_interrupt)
795 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
796 END(reschedule_interrupt)
798 .macro INVALIDATE_ENTRY num
799 ENTRY(invalidate_interrupt\num)
800 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
801 END(invalidate_interrupt\num)
813 ENTRY(call_function_interrupt)
814 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
815 END(call_function_interrupt)
816 ENTRY(irq_move_cleanup_interrupt)
817 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
818 END(irq_move_cleanup_interrupt)
821 ENTRY(apic_timer_interrupt)
822 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
823 END(apic_timer_interrupt)
825 ENTRY(error_interrupt)
826 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
829 ENTRY(spurious_interrupt)
830 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
831 END(spurious_interrupt)
834 * Exception entry points.
838 pushq $0 /* push error code/oldrax */
839 CFI_ADJUST_CFA_OFFSET 8
840 pushq %rax /* push real oldrax to the rdi slot */
841 CFI_ADJUST_CFA_OFFSET 8
848 .macro errorentry sym
851 CFI_ADJUST_CFA_OFFSET 8
858 /* error code is on the stack already */
859 /* handle NMI like exceptions that can happen everywhere */
860 .macro paranoidentry sym, ist=0, irqtrace=1
864 movl $MSR_GS_BASE,%ecx
872 movq %gs:pda_data_offset, %rbp
875 movq ORIG_RAX(%rsp),%rsi
876 movq $-1,ORIG_RAX(%rsp)
878 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
882 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
884 DISABLE_INTERRUPTS(CLBR_NONE)
891 * "Paranoid" exit path from exception stack.
892 * Paranoid because this is used by NMIs and cannot take
893 * any kernel state for granted.
894 * We don't do kernel preemption checks here, because only
895 * NMI should be common and it does not enable IRQs and
896 * cannot get reschedule ticks.
898 * "trace" is 0 for the NMI handler only, because irq-tracing
899 * is fundamentally NMI-unsafe. (we cannot change the soft and
900 * hard flags at once, atomically)
902 .macro paranoidexit trace=1
903 /* ebx: no swapgs flag */
905 testl %ebx,%ebx /* swapgs needed? */
906 jnz paranoid_restore\trace
908 jnz paranoid_userspace\trace
909 paranoid_swapgs\trace:
914 paranoid_restore\trace:
917 paranoid_userspace\trace:
918 GET_THREAD_INFO(%rcx)
919 movl threadinfo_flags(%rcx),%ebx
920 andl $_TIF_WORK_MASK,%ebx
921 jz paranoid_swapgs\trace
922 movq %rsp,%rdi /* &pt_regs */
924 movq %rax,%rsp /* switch stack for scheduling */
925 testl $_TIF_NEED_RESCHED,%ebx
926 jnz paranoid_schedule\trace
927 movl %ebx,%edx /* arg3: thread flags */
931 ENABLE_INTERRUPTS(CLBR_NONE)
932 xorl %esi,%esi /* arg2: oldset */
933 movq %rsp,%rdi /* arg1: &pt_regs */
934 call do_notify_resume
935 DISABLE_INTERRUPTS(CLBR_NONE)
939 jmp paranoid_userspace\trace
940 paranoid_schedule\trace:
944 ENABLE_INTERRUPTS(CLBR_ANY)
946 DISABLE_INTERRUPTS(CLBR_ANY)
950 jmp paranoid_userspace\trace
955 * Exception entry point. This expects an error code/orig_rax on the stack
956 * and the exception handler in %rax.
958 KPROBE_ENTRY(error_entry)
961 /* rdi slot contains rax, oldrax contains error code */
964 CFI_ADJUST_CFA_OFFSET (14*8)
966 CFI_REL_OFFSET rsi,RSI
967 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
970 CFI_REL_OFFSET rdx,RDX
972 CFI_REL_OFFSET rcx,RCX
973 movq %rsi,10*8(%rsp) /* store rax */
974 CFI_REL_OFFSET rax,RAX
980 CFI_REL_OFFSET r10,R10
982 CFI_REL_OFFSET r11,R11
984 CFI_REL_OFFSET rbx,RBX
986 CFI_REL_OFFSET rbp,RBP
988 CFI_REL_OFFSET r12,R12
990 CFI_REL_OFFSET r13,R13
992 CFI_REL_OFFSET r14,R14
994 CFI_REL_OFFSET r15,R15
1002 CFI_REL_OFFSET rdi,RDI
1004 movq ORIG_RAX(%rsp),%rsi /* get error code */
1005 movq $-1,ORIG_RAX(%rsp)
1007 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
1011 DISABLE_INTERRUPTS(CLBR_NONE)
1013 GET_THREAD_INFO(%rcx)
1016 LOCKDEP_SYS_EXIT_IRQ
1017 movl threadinfo_flags(%rcx),%edx
1018 movl $_TIF_WORK_MASK,%edi
1026 /* There are two places in the kernel that can potentially fault with
1027 usergs. Handle them here. The exception handlers after
1028 iret run with kernel gs again, so don't set the user space flag.
1029 B stepping K8s sometimes report an truncated RIP for IRET
1030 exceptions returning to compat mode. Check for these here too. */
1031 leaq irq_return(%rip),%rbp
1034 movl %ebp,%ebp /* zero extend */
1037 cmpq $gs_change,RIP(%rsp)
1040 KPROBE_END(error_entry)
1042 /* Reload gs selector with exception handling */
1043 /* edi: new selector */
1044 ENTRY(load_gs_index)
1047 CFI_ADJUST_CFA_OFFSET 8
1048 DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
1052 2: mfence /* workaround */
1055 CFI_ADJUST_CFA_OFFSET -8
1058 ENDPROC(load_gs_index)
1060 .section __ex_table,"a"
1062 .quad gs_change,bad_gs
1064 .section .fixup,"ax"
1065 /* running with kernelgs */
1067 SWAPGS /* switch back to user gs */
1074 * Create a kernel thread.
1076 * C extern interface:
1077 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
1079 * asm input arguments:
1080 * rdi: fn, rsi: arg, rdx: flags
1082 ENTRY(kernel_thread)
1084 FAKE_STACK_FRAME $child_rip
1087 # rdi: flags, rsi: usp, rdx: will be &pt_regs
1089 orq kernel_thread_flags(%rip),%rdi
1102 * It isn't worth to check for reschedule here,
1103 * so internally to the x86_64 port you can rely on kernel_thread()
1104 * not to reschedule the child before returning, this avoids the need
1105 * of hacks for example to fork off the per-CPU idle tasks.
1106 * [Hopefully no generic code relies on the reschedule -AK]
1112 ENDPROC(kernel_thread)
1115 pushq $0 # fake return address
1118 * Here we are in the child and the registers are set as they were
1119 * at kernel_thread() invocation in the parent.
1131 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
1133 * C extern interface:
1134 * extern long execve(char *name, char **argv, char **envp)
1136 * asm input arguments:
1137 * rdi: name, rsi: argv, rdx: envp
1139 * We want to fallback into:
1140 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs)
1142 * do_sys_execve asm fallback arguments:
1143 * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
1145 ENTRY(kernel_execve)
1151 movq %rax, RAX(%rsp)
1154 je int_ret_from_sys_call
1159 ENDPROC(kernel_execve)
1161 KPROBE_ENTRY(page_fault)
1162 errorentry do_page_fault
1163 KPROBE_END(page_fault)
1165 ENTRY(coprocessor_error)
1166 zeroentry do_coprocessor_error
1167 END(coprocessor_error)
1169 ENTRY(simd_coprocessor_error)
1170 zeroentry do_simd_coprocessor_error
1171 END(simd_coprocessor_error)
1173 ENTRY(device_not_available)
1174 zeroentry math_state_restore
1175 END(device_not_available)
1177 /* runs on exception stack */
1181 CFI_ADJUST_CFA_OFFSET 8
1182 paranoidentry do_debug, DEBUG_STACK
1186 /* runs on exception stack */
1190 CFI_ADJUST_CFA_OFFSET 8
1191 paranoidentry do_nmi, 0, 0
1192 #ifdef CONFIG_TRACE_IRQFLAGS
1203 CFI_ADJUST_CFA_OFFSET 8
1204 paranoidentry do_int3, DEBUG_STACK
1210 zeroentry do_overflow
1218 zeroentry do_invalid_op
1221 ENTRY(coprocessor_segment_overrun)
1222 zeroentry do_coprocessor_segment_overrun
1223 END(coprocessor_segment_overrun)
1226 zeroentry do_reserved
1229 /* runs on exception stack */
1232 paranoidentry do_double_fault
1238 errorentry do_invalid_TSS
1241 ENTRY(segment_not_present)
1242 errorentry do_segment_not_present
1243 END(segment_not_present)
1245 /* runs on exception stack */
1246 ENTRY(stack_segment)
1248 paranoidentry do_stack_segment
1253 KPROBE_ENTRY(general_protection)
1254 errorentry do_general_protection
1255 KPROBE_END(general_protection)
1257 ENTRY(alignment_check)
1258 errorentry do_alignment_check
1259 END(alignment_check)
1262 zeroentry do_divide_error
1265 ENTRY(spurious_interrupt_bug)
1266 zeroentry do_spurious_interrupt_bug
1267 END(spurious_interrupt_bug)
1269 #ifdef CONFIG_X86_MCE
1270 /* runs on exception stack */
1271 ENTRY(machine_check)
1274 CFI_ADJUST_CFA_OFFSET 8
1275 paranoidentry do_machine_check
1281 /* Call softirq on interrupt stack. Interrupts are off. */
1285 CFI_ADJUST_CFA_OFFSET 8
1286 CFI_REL_OFFSET rbp,0
1288 CFI_DEF_CFA_REGISTER rbp
1289 incl %gs:pda_irqcount
1290 cmove %gs:pda_irqstackptr,%rsp
1291 push %rbp # backlink for old unwinder
1294 CFI_DEF_CFA_REGISTER rsp
1295 CFI_ADJUST_CFA_OFFSET -8
1296 decl %gs:pda_irqcount
1299 ENDPROC(call_softirq)
1301 KPROBE_ENTRY(ignore_sysret)
1306 ENDPROC(ignore_sysret)