#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1)
 #define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
 
-#define IRQSTACK_ORDER 2
-#define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER)
+#define IRQ_STACK_ORDER 2
+#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
 
 #define STACKFAULT_STACK 1
 #define DOUBLEFAULT_STACK 2
 
                                        /* gcc-ABI: this canary MUST be at
                                           offset 40!!! */
 #endif
-       char *irqstackptr;
        short nodenumber;               /* number of current node (32k max) */
        short in_bootmem;               /* pda lives in bootmem */
        short isidle;
 
 
 #ifdef CONFIG_X86_64
 DECLARE_PER_CPU(struct orig_ist, orig_ist);
+
+DECLARE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack);
+DECLARE_PER_CPU(char *, irq_stack_ptr);
 #endif
 
 extern void print_cpu_info(struct cpuinfo_x86 *);
 
        ENTRY(pcurrent); 
        ENTRY(irqcount);
        ENTRY(cpunumber);
-       ENTRY(irqstackptr);
        DEFINE(pda_size, sizeof(struct x8664_pda));
        BLANK();
 #undef ENTRY
 
 #ifdef CONFIG_X86_64
 struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
 
-static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
+DEFINE_PER_CPU_PAGE_ALIGNED(char[IRQ_STACK_SIZE], irq_stack);
+#ifdef CONFIG_SMP
+DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */
+#else
+DEFINE_PER_CPU(char *, irq_stack_ptr) =
+       per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64;
+#endif
 
 void __cpuinit pda_init(int cpu)
 {
        if (cpu == 0) {
                /* others are initialized in smpboot.c */
                pda->pcurrent = &init_task;
-               pda->irqstackptr = boot_cpu_stack;
-               pda->irqstackptr += IRQSTACKSIZE - 64;
        } else {
-               if (!pda->irqstackptr) {
-                       pda->irqstackptr = (char *)
-                               __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
-                       if (!pda->irqstackptr)
-                               panic("cannot allocate irqstack for cpu %d",
-                                     cpu);
-                       pda->irqstackptr += IRQSTACKSIZE - 64;
-               }
-
                if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
                        pda->nodenumber = cpu_to_node(cpu);
        }
 
                const struct stacktrace_ops *ops, void *data)
 {
        const unsigned cpu = get_cpu();
-       unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
+       unsigned long *irq_stack_end =
+               (unsigned long *)per_cpu(irq_stack_ptr, cpu);
        unsigned used = 0;
        struct thread_info *tinfo;
        int graph = 0;
                        stack = (unsigned long *) estack_end[-2];
                        continue;
                }
-               if (irqstack_end) {
-                       unsigned long *irqstack;
-                       irqstack = irqstack_end -
-                               (IRQSTACKSIZE - 64) / sizeof(*irqstack);
+               if (irq_stack_end) {
+                       unsigned long *irq_stack;
+                       irq_stack = irq_stack_end -
+                               (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack);
 
-                       if (stack >= irqstack && stack < irqstack_end) {
+                       if (stack >= irq_stack && stack < irq_stack_end) {
                                if (ops->stack(data, "IRQ") < 0)
                                        break;
                                bp = print_context_stack(tinfo, stack, bp,
-                                       ops, data, irqstack_end, &graph);
+                                       ops, data, irq_stack_end, &graph);
                                /*
                                 * We link to the next stack (which would be
                                 * the process stack normally) the last
                                 * pointer (index -1 to end) in the IRQ stack:
                                 */
-                               stack = (unsigned long *) (irqstack_end[-1]);
-                               irqstack_end = NULL;
+                               stack = (unsigned long *) (irq_stack_end[-1]);
+                               irq_stack_end = NULL;
                                ops->stack(data, "EOI");
                                continue;
                        }
        unsigned long *stack;
        int i;
        const int cpu = smp_processor_id();
-       unsigned long *irqstack_end =
-               (unsigned long *) (cpu_pda(cpu)->irqstackptr);
-       unsigned long *irqstack =
-               (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
+       unsigned long *irq_stack_end =
+               (unsigned long *)(per_cpu(irq_stack_ptr, cpu));
+       unsigned long *irq_stack =
+               (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE);
 
        /*
         * debugging aid: "show_stack(NULL, NULL);" prints the
 
        stack = sp;
        for (i = 0; i < kstack_depth_to_print; i++) {
-               if (stack >= irqstack && stack <= irqstack_end) {
-                       if (stack == irqstack_end) {
-                               stack = (unsigned long *) (irqstack_end[-1]);
+               if (stack >= irq_stack && stack <= irq_stack_end) {
+                       if (stack == irq_stack_end) {
+                               stack = (unsigned long *) (irq_stack_end[-1]);
                                printk(" <EOI> ");
                        }
                } else {
 
 1:     incl %gs:pda_irqcount
        jne 2f
        popq_cfi %rax                   /* move return address... */
-       mov %gs:pda_irqstackptr,%rsp
+       mov PER_CPU_VAR(irq_stack_ptr),%rsp
        EMPTY_FRAME 0
        pushq_cfi %rax                  /* ... to the new stack */
        /*
        mov  %rsp,%rbp
        CFI_DEF_CFA_REGISTER rbp
        incl %gs:pda_irqcount
-       cmove %gs:pda_irqstackptr,%rsp
+       cmove PER_CPU_VAR(irq_stack_ptr),%rsp
        push  %rbp                      # backlink for old unwinder
        call __do_softirq
        leaveq
 11:    incl %gs:pda_irqcount
        movq %rsp,%rbp
        CFI_DEF_CFA_REGISTER rbp
-       cmovzq %gs:pda_irqstackptr,%rsp
+       cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
        pushq %rbp                      # backlink for old unwinder
        call xen_evtchn_do_upcall
        popq %rsp
 
 
                memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start);
                per_cpu_offset(cpu) = ptr - __per_cpu_start;
+               per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
 #ifdef CONFIG_X86_64
+               per_cpu(irq_stack_ptr, cpu) =
+                       (char *)per_cpu(irq_stack, cpu) + IRQ_STACK_SIZE - 64;
                /*
                 * CPU0 modified pda in the init data area, reload pda
                 * offset for CPU0 and clear the area for others.
                else
                        memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu)));
 #endif
-               per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
 
                DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
        }