/*
  * Invalid mode handlers
  */
-       .macro  inv_entry, sym, reason
-       sub     sp, sp, #S_FRAME_SIZE           @ Allocate frame size in one go
-       stmia   sp, {r0 - lr}                   @ Save XXX r0 - lr
-       ldr     r4, .LC\sym
+       .macro  inv_entry, reason
+       sub     sp, sp, #S_FRAME_SIZE
+       stmib   sp, {r1 - lr}
        mov     r1, #\reason
        .endm
 
 __pabt_invalid:
-       inv_entry abt, BAD_PREFETCH
-       b       1f
+       inv_entry BAD_PREFETCH
+       b       common_invalid
 
 __dabt_invalid:
-       inv_entry abt, BAD_DATA
-       b       1f
+       inv_entry BAD_DATA
+       b       common_invalid
 
 __irq_invalid:
-       inv_entry irq, BAD_IRQ
-       b       1f
+       inv_entry BAD_IRQ
+       b       common_invalid
 
 __und_invalid:
-       inv_entry und, BAD_UNDEFINSTR
+       inv_entry BAD_UNDEFINSTR
+
+       @
+       @ XXX fall through to common_invalid
+       @
+
+@
+@ common_invalid - generic code for failed exception (re-entrant version of handlers)
+@
+common_invalid:
+       zero_fp
+
+       ldmia   r0, {r4 - r6}
+       add     r0, sp, #S_PC           @ here for interlock avoidance
+       mov     r7, #-1                 @  ""   ""    ""        ""
+       str     r4, [sp]                @ save preserved r0
+       stmia   r0, {r5 - r7}           @ lr_<exception>,
+                                       @ cpsr_<exception>, "old_r0"
 
-1:     zero_fp
-       ldmia   r4, {r5 - r7}                   @ Get XXX pc, cpsr, old_r0
-       add     r4, sp, #S_PC
-       stmia   r4, {r5 - r7}                   @ Save XXX pc, cpsr, old_r0
        mov     r0, sp
-       and     r2, r6, #31                     @ int mode
+       and     r2, r6, #0x1f
        b       bad_mode
 
 /*
  * SVC mode handlers
  */
-       .macro  svc_entry, sym
+       .macro  svc_entry
        sub     sp, sp, #S_FRAME_SIZE
-       stmia   sp, {r0 - r12}                  @ save r0 - r12
-       ldr     r2, .LC\sym
-       add     r0, sp, #S_FRAME_SIZE
-       ldmia   r2, {r2 - r4}                   @ get pc, cpsr
-       add     r5, sp, #S_SP
+       stmib   sp, {r1 - r12}
+
+       ldmia   r0, {r1 - r3}
+       add     r5, sp, #S_SP           @ here for interlock avoidance
+       mov     r4, #-1                 @  ""  ""      ""       ""
+       add     r0, sp, #S_FRAME_SIZE   @  ""  ""      ""       ""
+       str     r1, [sp]                @ save the "real" r0 copied
+                                       @ from the exception stack
+
        mov     r1, lr
 
        @
 
        .align  5
 __dabt_svc:
-       svc_entry abt
+       svc_entry
 
        @
        @ get ready to re-enable interrupts if appropriate
 
        .align  5
 __irq_svc:
-       svc_entry irq
+       svc_entry
+
 #ifdef CONFIG_PREEMPT
        get_thread_info tsk
        ldr     r8, [tsk, #TI_PREEMPT]          @ get preempt count
        add     r7, r8, #1                      @ increment it
        str     r7, [tsk, #TI_PREEMPT]
 #endif
+
        irq_handler
 #ifdef CONFIG_PREEMPT
        ldr     r0, [tsk, #TI_FLAGS]            @ get flags
 
        .align  5
 __und_svc:
-       svc_entry und
+       svc_entry
 
        @
        @ call emulation code, which returns using r9 if it has emulated
 
        .align  5
 __pabt_svc:
-       svc_entry abt
+       svc_entry
 
        @
        @ re-enable interrupts if appropriate
        ldmia   sp, {r0 - pc}^                  @ load r0 - pc, cpsr
 
        .align  5
-.LCirq:
-       .word   __temp_irq
-.LCund:
-       .word   __temp_und
-.LCabt:
-       .word   __temp_abt
 .LCcralign:
        .word   cr_alignment
 #ifdef MULTI_ABORT
 /*
  * User mode handlers
  */
-       .macro  usr_entry, sym
-       sub     sp, sp, #S_FRAME_SIZE           @ Allocate frame size in one go
-       stmia   sp, {r0 - r12}                  @ save r0 - r12
-       ldr     r7, .LC\sym
-       add     r5, sp, #S_PC
-       ldmia   r7, {r2 - r4}                   @ Get USR pc, cpsr
+       .macro  usr_entry
+       sub     sp, sp, #S_FRAME_SIZE
+       stmib   sp, {r1 - r12}
+
+       ldmia   r0, {r1 - r3}
+       add     r0, sp, #S_PC           @ here for interlock avoidance
+       mov     r4, #-1                 @  ""  ""     ""        ""
+
+       str     r1, [sp]                @ save the "real" r0 copied
+                                       @ from the exception stack
 
 #if __LINUX_ARM_ARCH__ < 6
        @ make sure our user space atomic helper is aborted
        @
        @ Also, separately save sp_usr and lr_usr
        @
-       stmia   r5, {r2 - r4}
-       stmdb   r5, {sp, lr}^
+       stmia   r0, {r2 - r4}
+       stmdb   r0, {sp, lr}^
 
        @
        @ Enable the alignment trap while in kernel mode
 
        .align  5
 __dabt_usr:
-       usr_entry abt
+       usr_entry
 
        @
        @ Call the processor-specific abort handler:
 
        .align  5
 __irq_usr:
-       usr_entry irq
+       usr_entry
 
        get_thread_info tsk
 #ifdef CONFIG_PREEMPT
        add     r7, r8, #1                      @ increment it
        str     r7, [tsk, #TI_PREEMPT]
 #endif
+
        irq_handler
 #ifdef CONFIG_PREEMPT
        ldr     r0, [tsk, #TI_PREEMPT]
        teq     r0, r7
        strne   r0, [r0, -r0]
 #endif
+
        mov     why, #0
        b       ret_to_user
 
 
        .align  5
 __und_usr:
-       usr_entry und
+       usr_entry
 
        tst     r3, #PSR_T_BIT                  @ Thumb mode?
        bne     fpundefinstr                    @ ignore FP
 
        .align  5
 __pabt_usr:
-       usr_entry abt
+       usr_entry
 
        enable_irq                              @ Enable interrupts
        mov     r0, r2                          @ address (pc)
  *
  * Common stub entry macro:
  *   Enter in IRQ mode, spsr = SVC/USR CPSR, lr = SVC/USR PC
+ *
+ * SP points to a minimal amount of processor-private memory, the address
+ * of which is copied into r0 for the mode specific abort handler.
  */
-       .macro  vector_stub, name, sym, correction=0
+       .macro  vector_stub, name, correction=0
        .align  5
 
 vector_\name:
-       ldr     r13, .LCs\sym
        .if \correction
        sub     lr, lr, #\correction
        .endif
-       str     lr, [r13]                       @ save lr_IRQ
+
+       @
+       @ Save r0, lr_<exception> (parent PC) and spsr_<exception>
+       @ (parent CPSR)
+       @
+       stmia   sp, {r0, lr}            @ save r0, lr
        mrs     lr, spsr
-       str     lr, [r13, #4]                   @ save spsr_IRQ
+       str     lr, [sp, #8]            @ save spsr
+
        @
-       @ now branch to the relevant MODE handling routine
+       @ Prepare for SVC32 mode.  IRQs remain disabled.
        @
-       mrs     r13, cpsr
-       bic     r13, r13, #MODE_MASK
-       orr     r13, r13, #SVC_MODE
-       msr     spsr_cxsf, r13                  @ switch to SVC_32 mode
+       mrs     r0, cpsr
+       bic     r0, r0, #MODE_MASK
+       orr     r0, r0, #SVC_MODE
+       msr     spsr_cxsf, r0
 
-       and     lr, lr, #15
+       @
+       @ the branch table must immediately follow this code
+       @
+       mov     r0, sp
+       and     lr, lr, #0x0f
        ldr     lr, [pc, lr, lsl #2]
-       movs    pc, lr                          @ Changes mode and branches
+       movs    pc, lr                  @ branch to handler in SVC mode
        .endm
 
        .globl  __stubs_start
 /*
  * Interrupt dispatcher
  */
-       vector_stub     irq, irq, 4
+       vector_stub     irq, 4
 
        .long   __irq_usr                       @  0  (USR_26 / USR_32)
        .long   __irq_invalid                   @  1  (FIQ_26 / FIQ_32)
  * Data abort dispatcher
  * Enter in ABT mode, spsr = USR CPSR, lr = USR PC
  */
-       vector_stub     dabt, abt, 8
+       vector_stub     dabt, 8
 
        .long   __dabt_usr                      @  0  (USR_26 / USR_32)
        .long   __dabt_invalid                  @  1  (FIQ_26 / FIQ_32)
  * Prefetch abort dispatcher
  * Enter in ABT mode, spsr = USR CPSR, lr = USR PC
  */
-       vector_stub     pabt, abt, 4
+       vector_stub     pabt, 4
 
        .long   __pabt_usr                      @  0 (USR_26 / USR_32)
        .long   __pabt_invalid                  @  1 (FIQ_26 / FIQ_32)
  * Undef instr entry dispatcher
  * Enter in UND mode, spsr = SVC/USR CPSR, lr = SVC/USR PC
  */
-       vector_stub     und, und
+       vector_stub     und
 
        .long   __und_usr                       @  0 (USR_26 / USR_32)
        .long   __und_invalid                   @  1 (FIQ_26 / FIQ_32)
 .LCvswi:
        .word   vector_swi
 
-.LCsirq:
-       .word   __temp_irq
-.LCsund:
-       .word   __temp_und
-.LCsabt:
-       .word   __temp_abt
-
        .globl  __stubs_end
 __stubs_end:
 
 
        .data
 
-/*
- * Do not reorder these, and do not insert extra data between...
- */
-
-__temp_irq:
-       .word   0                               @ saved lr_irq
-       .word   0                               @ saved spsr_irq
-       .word   -1                              @ old_r0
-__temp_und:
-       .word   0                               @ Saved lr_und
-       .word   0                               @ Saved spsr_und
-       .word   -1                              @ old_r0
-__temp_abt:
-       .word   0                               @ Saved lr_abt
-       .word   0                               @ Saved spsr_abt
-       .word   -1                              @ old_r0
-
        .globl  cr_alignment
        .globl  cr_no_alignment
 cr_alignment:
 
 struct cpu_cache_fns cpu_cache;
 #endif
 
+struct stack {
+       u32 irq[3];
+       u32 abt[3];
+       u32 und[3];
+} ____cacheline_aligned;
+
+static struct stack stacks[NR_CPUS];
+
 char elf_platform[ELF_PLATFORM_SIZE];
 EXPORT_SYMBOL(elf_platform);
 
               cpu_name, processor_id, (int)processor_id & 15,
               proc_arch[cpu_architecture()]);
 
-       dump_cpu_info(smp_processor_id());
-
        sprintf(system_utsname.machine, "%s%c", list->arch_name, ENDIANNESS);
        sprintf(elf_platform, "%s%c", list->elf_name, ENDIANNESS);
        elf_hwcap = list->elf_hwcap;
        cpu_proc_init();
 }
 
+/*
+ * cpu_init - initialise one CPU.
+ *
+ * cpu_init dumps the cache information, initialises SMP specific
+ * information, and sets up the per-CPU stacks.
+ */
+void __init cpu_init(void)
+{
+       unsigned int cpu = smp_processor_id();
+       struct stack *stk = &stacks[cpu];
+
+       if (cpu >= NR_CPUS) {
+               printk(KERN_CRIT "CPU%u: bad primary CPU number\n", cpu);
+               BUG();
+       }
+
+       dump_cpu_info(cpu);
+
+       /*
+        * setup stacks for re-entrant exception handlers
+        */
+       __asm__ (
+       "msr    cpsr_c, %1\n\t"
+       "add    sp, %0, %2\n\t"
+       "msr    cpsr_c, %3\n\t"
+       "add    sp, %0, %4\n\t"
+       "msr    cpsr_c, %5\n\t"
+       "add    sp, %0, %6\n\t"
+       "msr    cpsr_c, %7"
+           :
+           : "r" (stk),
+             "I" (PSR_F_BIT | PSR_I_BIT | IRQ_MODE),
+             "I" (offsetof(struct stack, irq[0])),
+             "I" (PSR_F_BIT | PSR_I_BIT | ABT_MODE),
+             "I" (offsetof(struct stack, abt[0])),
+             "I" (PSR_F_BIT | PSR_I_BIT | UND_MODE),
+             "I" (offsetof(struct stack, und[0])),
+             "I" (PSR_F_BIT | PSR_I_BIT | SVC_MODE));
+}
+
 static struct machine_desc * __init setup_machine(unsigned int nr)
 {
        struct machine_desc *list;
        paging_init(&meminfo, mdesc);
        request_standard_resources(&meminfo, mdesc);
 
+       cpu_init();
+
        /*
         * Set up various architecture-specific pointers
         */