DBG_FAULT(15)
        FAULT(15)
 
-       /*
-        * Squatting in this space ...
-        *
-        * This special case dispatcher for illegal operation faults allows preserved
-        * registers to be modified through a callback function (asm only) that is handed
-        * back from the fault handler in r8. Up to three arguments can be passed to the
-        * callback function by returning an aggregate with the callback as its first
-        * element, followed by the arguments.
-        */
-ENTRY(dispatch_illegal_op_fault)
-       .prologue
-       .body
-       SAVE_MIN_WITH_COVER
-       ssm psr.ic | PSR_DEFAULT_BITS
-       ;;
-       srlz.i          // guarantee that interruption collection is on
-       ;;
-(p15)  ssm psr.i       // restore psr.i
-       adds r3=8,r2    // set up second base pointer for SAVE_REST
-       ;;
-       alloc r14=ar.pfs,0,0,1,0        // must be first in insn group
-       mov out0=ar.ec
-       ;;
-       SAVE_REST
-       PT_REGS_UNWIND_INFO(0)
-       ;;
-       br.call.sptk.many rp=ia64_illegal_op_fault
-.ret0: ;;
-       alloc r14=ar.pfs,0,0,3,0        // must be first in insn group
-       mov out0=r9
-       mov out1=r10
-       mov out2=r11
-       movl r15=ia64_leave_kernel
-       ;;
-       mov rp=r15
-       mov b6=r8
-       ;;
-       cmp.ne p6,p0=0,r8
-(p6)   br.call.dpnt.many b6=b6         // call returns to ia64_leave_kernel
-       br.sptk.many ia64_leave_kernel
-END(dispatch_illegal_op_fault)
-
        .org ia64_ivt+0x4000
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x4000 Entry 16 (size 64 bundles) Reserved
        DBG_FAULT(67)
        FAULT(67)
 
+       /*
+        * Squatting in this space ...
+        *
+        * This special case dispatcher for illegal operation faults allows preserved
+        * registers to be modified through a callback function (asm only) that is handed
+        * back from the fault handler in r8. Up to three arguments can be passed to the
+        * callback function by returning an aggregate with the callback as its first
+        * element, followed by the arguments.
+        */
+ENTRY(dispatch_illegal_op_fault)
+       .prologue
+       .body
+       SAVE_MIN_WITH_COVER
+       ssm psr.ic | PSR_DEFAULT_BITS
+       ;;
+       srlz.i          // guarantee that interruption collection is on
+       ;;
+(p15)  ssm psr.i       // restore psr.i
+       adds r3=8,r2    // set up second base pointer for SAVE_REST
+       ;;
+       alloc r14=ar.pfs,0,0,1,0        // must be first in insn group
+       mov out0=ar.ec
+       ;;
+       SAVE_REST
+       PT_REGS_UNWIND_INFO(0)
+       ;;
+       br.call.sptk.many rp=ia64_illegal_op_fault
+.ret0: ;;
+       alloc r14=ar.pfs,0,0,3,0        // must be first in insn group
+       mov out0=r9
+       mov out1=r10
+       mov out2=r11
+       movl r15=ia64_leave_kernel
+       ;;
+       mov rp=r15
+       mov b6=r8
+       ;;
+       cmp.ne p6,p0=0,r8
+(p6)   br.call.dpnt.many b6=b6         // call returns to ia64_leave_kernel
+       br.sptk.many ia64_leave_kernel
+END(dispatch_illegal_op_fault)
+
 #ifdef CONFIG_IA32_SUPPORT
 
        /*
 
 #define ACCOUNT_SYS_ENTER
 #endif
 
+.section ".data.patch.rse", "a"
+.previous
+
 /*
  * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
  * the minimum state necessary that allows us to turn psr.ic back
  * Note that psr.ic is NOT turned on by this macro.  This is so that
  * we can pass interruption state as arguments to a handler.
  */
-#define DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA)                                                      \
+#define DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA,WORKAROUND)                                           \
        mov r16=IA64_KR(CURRENT);       /* M */                                                 \
        mov r27=ar.rsc;                 /* M */                                                 \
        mov r20=r1;                     /* A */                                                 \
        tbit.nz p15,p0=r29,IA64_PSR_I_BIT;                                                      \
        mov r29=b0                                                                              \
        ;;                                                                                      \
+       WORKAROUND;                                                                             \
        adds r16=PT(R8),r1;     /* initialize first base pointer */                             \
        adds r17=PT(R9),r1;     /* initialize second base pointer */                            \
 (pKStk)        mov r18=r0;             /* make sure r18 isn't NaT */                                   \
        st8 [r25]=r10;          /* ar.ssd */    \
        ;;
 
-#define SAVE_MIN_WITH_COVER    DO_SAVE_MIN(cover, mov r30=cr.ifs,)
-#define SAVE_MIN_WITH_COVER_R19        DO_SAVE_MIN(cover, mov r30=cr.ifs, mov r15=r19)
-#define SAVE_MIN               DO_SAVE_MIN(     , mov r30=r0, )
+#define RSE_WORKAROUND                         \
+(pUStk) extr.u r17=r18,3,6;                    \
+(pUStk)        sub r16=r18,r22;                        \
+[1:](pKStk)    br.cond.sptk.many 1f;           \
+       .xdata4 ".data.patch.rse",1b-.          \
+       ;;                                      \
+       cmp.ge p6,p7 = 33,r17;                  \
+       ;;                                      \
+(p6)   mov r17=0x310;                          \
+(p7)   mov r17=0x308;                          \
+       ;;                                      \
+       cmp.leu p1,p0=r16,r17;                  \
+(p1)   br.cond.sptk.many 1f;                   \
+       dep.z r17=r26,0,62;                     \
+       movl r16=2f;                            \
+       ;;                                      \
+       mov ar.pfs=r17;                         \
+       dep r27=r0,r27,16,14;                   \
+       mov b0=r16;                             \
+       ;;                                      \
+       br.ret.sptk b0;                         \
+       ;;                                      \
+2:                                             \
+       mov ar.rsc=r0                           \
+       ;;                                      \
+       flushrs;                                \
+       ;;                                      \
+       mov ar.bspstore=r22                     \
+       ;;                                      \
+       mov r18=ar.bsp;                         \
+       ;;                                      \
+1:                                             \
+       .pred.rel "mutex", pKStk, pUStk
+
+#define SAVE_MIN_WITH_COVER    DO_SAVE_MIN(cover, mov r30=cr.ifs, , RSE_WORKAROUND)
+#define SAVE_MIN_WITH_COVER_R19        DO_SAVE_MIN(cover, mov r30=cr.ifs, mov r15=r19, RSE_WORKAROUND)
+#define SAVE_MIN                       DO_SAVE_MIN(     , mov r30=r0, , )
 
        ia64_srlz_i();
 }
 
+/*
+ * Disable the RSE workaround by turning the conditional branch
+ * that we tagged in each place the workaround was used into an
+ * unconditional branch.
+ */
+void __init
+ia64_patch_rse (unsigned long start, unsigned long end)
+{
+       s32 *offp = (s32 *) start;
+       u64 ip, *b;
+
+       while (offp < (s32 *) end) {
+               ip = (u64) offp + *offp;
+
+               b = (u64 *)(ip & -16);
+               b[1] &= ~0xf800000L;
+               ia64_fc((void *) ip);
+               ++offp;
+       }
+       ia64_sync_i();
+       ia64_srlz_i();
+}
+
 void __init
 ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
 {
 
        /* process SAL system table: */
        ia64_sal_init(__va(efi.sal_systab));
 
+#ifdef CONFIG_ITANIUM
+       ia64_patch_rse((u64) __start___rse_patchlist, (u64) __end___rse_patchlist);
+#else
+       {
+               u64 num_phys_stacked;
+
+               if (ia64_pal_rse_info(&num_phys_stacked, 0) == 0 && num_phys_stacked > 96)
+                       ia64_patch_rse((u64) __start___rse_patchlist, (u64) __end___rse_patchlist);
+       }
+#endif
+
 #ifdef CONFIG_SMP
        cpu_physical_id(0) = hard_smp_processor_id();
 #endif
 
          __end___vtop_patchlist = .;
        }
 
+  .data.patch.rse : AT(ADDR(.data.patch.rse) - LOAD_OFFSET)
+       {
+         __start___rse_patchlist = .;
+         *(.data.patch.rse)
+         __end___rse_patchlist = .;
+       }
+
   .data.patch.mckinley_e9 : AT(ADDR(.data.patch.mckinley_e9) - LOAD_OFFSET)
        {
          __start___mckinley_e9_bundles = .;
 
 extern void ia64_patch_mckinley_e9 (unsigned long start, unsigned long end);
 extern void ia64_patch_vtop (unsigned long start, unsigned long end);
 extern void ia64_patch_phys_stack_reg(unsigned long val);
+extern void ia64_patch_rse (unsigned long start, unsigned long end);
 extern void ia64_patch_gate (void);
 
 #endif /* _ASM_IA64_PATCH_H */
 
 # define KERNEL_STACK_SIZE_ORDER               0
 #endif
 
-#define IA64_RBS_OFFSET                        ((IA64_TASK_SIZE + IA64_THREAD_INFO_SIZE + 15) & ~15)
+#define IA64_RBS_OFFSET                        ((IA64_TASK_SIZE + IA64_THREAD_INFO_SIZE + 31) & ~31)
 #define IA64_STK_OFFSET                        ((1 << KERNEL_STACK_SIZE_ORDER)*PAGE_SIZE)
 
 #define KERNEL_STACK_SIZE              IA64_STK_OFFSET
 
 
 extern char __per_cpu_start[], __per_cpu_end[], __phys_per_cpu_start[];
 extern char __start___vtop_patchlist[], __end___vtop_patchlist[];
+extern char __start___rse_patchlist[], __end___rse_patchlist[];
 extern char __start___mckinley_e9_bundles[], __end___mckinley_e9_bundles[];
 extern char __start___phys_stack_reg_patchlist[], __end___phys_stack_reg_patchlist[];
 extern char __start_gate_section[];