-#define TRAP_PER_CPU_THREAD 0x00
-#define TRAP_PER_CPU_PGD_PADDR 0x08
-
-#define TRAP_BLOCK_SZ_SHIFT 6
-
-/* Clobbers %g1, loads %g6 with local processor's cpuid */
-#define __GET_CPUID \
- ba,pt %xcc, __get_cpu_id; \
- rd %pc, %g1;
-
-/* Clobbers %g1, current address space PGD phys address into %g7. */
-#define TRAP_LOAD_PGD_PHYS \
- __GET_CPUID \
- sllx %g6, TRAP_BLOCK_SZ_SHIFT, %g6; \
- sethi %hi(trap_block), %g7; \
- or %g7, %lo(trap_block), %g7; \
- add %g7, %g6, %g7; \
- ldx [%g7 + TRAP_PER_CPU_PGD_PADDR], %g7;
-
-/* Clobbers %g1, loads local processor's IRQ work area into %g6. */
-#define TRAP_LOAD_IRQ_WORK \
- __GET_CPUID \
- sethi %hi(__irq_work), %g1; \
- sllx %g6, 6, %g6; \
- or %g1, %lo(__irq_work), %g1; \
- add %g1, %g6, %g6;
-
-/* Clobbers %g1, loads %g6 with current thread info pointer. */
-#define TRAP_LOAD_THREAD_REG \
- __GET_CPUID \
- sllx %g6, TRAP_BLOCK_SZ_SHIFT, %g6; \
- sethi %hi(trap_block), %g1; \
- or %g1, %lo(trap_block), %g1; \
- ldx [%g1 + %g6], %g6;
-
-/* Given the current thread info pointer in %g6, load the per-cpu
- * area base of the current processor into %g5. REG1 and REG2 are
+#define TRAP_PER_CPU_THREAD 0x00
+#define TRAP_PER_CPU_PGD_PADDR 0x08
+#define TRAP_PER_CPU_CPU_MONDO_PA 0x10
+#define TRAP_PER_CPU_DEV_MONDO_PA 0x18
+#define TRAP_PER_CPU_RESUM_MONDO_PA 0x20
+#define TRAP_PER_CPU_RESUM_KBUF_PA 0x28
+#define TRAP_PER_CPU_NONRESUM_MONDO_PA 0x30
+#define TRAP_PER_CPU_NONRESUM_KBUF_PA 0x38
+#define TRAP_PER_CPU_FAULT_INFO 0x40
+#define TRAP_PER_CPU_CPU_MONDO_BLOCK_PA 0xc0
+#define TRAP_PER_CPU_CPU_LIST_PA 0xc8
+#define TRAP_PER_CPU_TSB_HUGE 0xd0
+#define TRAP_PER_CPU_TSB_HUGE_TEMP 0xd8
+
+#define TRAP_BLOCK_SZ_SHIFT 8
+
+#include <asm/scratchpad.h>
+
+#define __GET_CPUID(REG) \
+ /* Spitfire implementation (default). */ \
+661: ldxa [%g0] ASI_UPA_CONFIG, REG; \
+ srlx REG, 17, REG; \
+ and REG, 0x1f, REG; \
+ nop; \
+ .section .cpuid_patch, "ax"; \
+ /* Instruction location. */ \
+ .word 661b; \
+ /* Cheetah Safari implementation. */ \
+ ldxa [%g0] ASI_SAFARI_CONFIG, REG; \
+ srlx REG, 17, REG; \
+ and REG, 0x3ff, REG; \
+ nop; \
+ /* Cheetah JBUS implementation. */ \
+ ldxa [%g0] ASI_JBUS_CONFIG, REG; \
+ srlx REG, 17, REG; \
+ and REG, 0x1f, REG; \
+ nop; \
+ /* Starfire implementation. */ \
+ sethi %hi(0x1fff40000d0 >> 9), REG; \
+ sllx REG, 9, REG; \
+ or REG, 0xd0, REG; \
+ lduwa [REG] ASI_PHYS_BYPASS_EC_E, REG;\
+ /* sun4v implementation. */ \
+ mov SCRATCHPAD_CPUID, REG; \
+ ldxa [REG] ASI_SCRATCHPAD, REG; \
+ nop; \
+ nop; \
+ .previous;
+
+#ifdef CONFIG_SMP
+
+#define TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \
+ __GET_CPUID(TMP) \
+ sethi %hi(trap_block), DEST; \
+ sllx TMP, TRAP_BLOCK_SZ_SHIFT, TMP; \
+ or DEST, %lo(trap_block), DEST; \
+ add DEST, TMP, DEST; \
+
+/* Clobbers TMP, current address space PGD phys address into DEST. */
+#define TRAP_LOAD_PGD_PHYS(DEST, TMP) \
+ TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \
+ ldx [DEST + TRAP_PER_CPU_PGD_PADDR], DEST;
+
+/* Clobbers TMP, loads local processor's IRQ work area into DEST. */
+#define TRAP_LOAD_IRQ_WORK(DEST, TMP) \
+ __GET_CPUID(TMP) \
+ sethi %hi(__irq_work), DEST; \
+ sllx TMP, 6, TMP; \
+ or DEST, %lo(__irq_work), DEST; \
+ add DEST, TMP, DEST;
+
+/* Clobbers TMP, loads DEST with current thread info pointer. */
+#define TRAP_LOAD_THREAD_REG(DEST, TMP) \
+ TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \
+ ldx [DEST + TRAP_PER_CPU_THREAD], DEST;
+
+/* Given the current thread info pointer in THR, load the per-cpu
+ * area base of the current processor into DEST. REG1, REG2, and REG3 are