]> pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - arch/x86/kernel/io_apic_64.c
x86: remove ioapic_force
[linux-2.6-omap-h63xx.git] / arch / x86 / kernel / io_apic_64.c
index 61a83b70c18fcc65ce60b965ee3a6e0456622dc4..b70fd82321856bcc6e7bf8ae6b938ff12a9a29c3 100644 (file)
 #include <linux/sched.h>
 #include <linux/pci.h>
 #include <linux/mc146818rtc.h>
+#include <linux/compiler.h>
 #include <linux/acpi.h>
+#include <linux/module.h>
 #include <linux/sysdev.h>
 #include <linux/msi.h>
 #include <linux/htirq.h>
-#include <linux/dmar.h>
-#include <linux/jiffies.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+#include <linux/jiffies.h>     /* time_after() */
 #ifdef CONFIG_ACPI
 #include <acpi/acpi_bus.h>
 #endif
 #include <linux/bootmem.h>
+#include <linux/dmar.h>
 
 #include <asm/idle.h>
 #include <asm/io.h>
 #include <asm/proto.h>
 #include <asm/acpi.h>
 #include <asm/dma.h>
+#include <asm/timer.h>
 #include <asm/i8259.h>
 #include <asm/nmi.h>
 #include <asm/msidef.h>
 #include <asm/hypertransport.h>
+#include <asm/setup.h>
+#include <asm/irq_remapping.h>
 
 #include <mach_ipi.h>
 #include <mach_apic.h>
+#include <mach_apicdef.h>
+
+#define __apicdebuginit(type) static type __init
+
+/*
+ *      Is the SiS APIC rmw bug present ?
+ *      -1 = don't know, 0 = no, 1 = yes
+ */
+int sis_apic_bug = -1;
+
+static DEFINE_SPINLOCK(ioapic_lock);
+static DEFINE_SPINLOCK(vector_lock);
+
+int first_free_entry;
+/*
+ * Rough estimation of how many shared IRQs there are, can
+ * be changed anytime.
+ */
+int pin_map_size;
+
+/*
+ * # of IRQ routing registers
+ */
+int nr_ioapic_registers[MAX_IO_APICS];
+
+/* I/O APIC entries */
+struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
+int nr_ioapics;
+
+/* MP IRQ source entries */
+struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
 
+/* # of MP IRQ source entries */
+int mp_irq_entries;
+
+DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
+
+int skip_ioapic_setup;
+
+static int __init parse_noapic(char *str)
+{
+       disable_ioapic_setup();
+       return 0;
+}
+early_param("noapic", parse_noapic);
+
+
+struct irq_cfg;
+struct irq_pin_list;
 struct irq_cfg {
+       unsigned int irq;
+       struct irq_cfg *next;
+       struct irq_pin_list *irq_2_pin;
        cpumask_t domain;
        cpumask_t old_domain;
        unsigned move_cleanup_count;
@@ -62,70 +120,162 @@ struct irq_cfg {
 };
 
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
-static struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = {
-       [0]  = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
-       [1]  = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
-       [2]  = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
-       [3]  = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
-       [4]  = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
-       [5]  = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
-       [6]  = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
-       [7]  = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
-       [8]  = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
-       [9]  = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
-       [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
-       [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
-       [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
-       [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
-       [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
-       [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
+static struct irq_cfg irq_cfg_legacy[] __initdata = {
+       [0]  = { .irq =  0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
+       [1]  = { .irq =  1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
+       [2]  = { .irq =  2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
+       [3]  = { .irq =  3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
+       [4]  = { .irq =  4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
+       [5]  = { .irq =  5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
+       [6]  = { .irq =  6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
+       [7]  = { .irq =  7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
+       [8]  = { .irq =  8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
+       [9]  = { .irq =  9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
+       [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
+       [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
+       [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
+       [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
+       [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
+       [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
 };
 
-static int assign_irq_vector(int irq, cpumask_t mask);
+static struct irq_cfg irq_cfg_init = { .irq =  -1U, };
+/* need to be biger than size of irq_cfg_legacy */
+static int nr_irq_cfg = 32;
 
-int first_system_vector = 0xfe;
+static int __init parse_nr_irq_cfg(char *arg)
+{
+       if (arg) {
+               nr_irq_cfg = simple_strtoul(arg, NULL, 0);
+               if (nr_irq_cfg < 32)
+                       nr_irq_cfg = 32;
+       }
+       return 0;
+}
 
-char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
+early_param("nr_irq_cfg", parse_nr_irq_cfg);
 
-#define __apicdebuginit  __init
+static void init_one_irq_cfg(struct irq_cfg *cfg)
+{
+       memcpy(cfg, &irq_cfg_init, sizeof(struct irq_cfg));
+}
 
-int sis_apic_bug; /* not actually supported, dummy for compile */
+static struct irq_cfg *irq_cfgx;
+static struct irq_cfg *irq_cfgx_free;
+static void __init init_work(void *data)
+{
+       struct dyn_array *da = data;
+       struct irq_cfg *cfg;
+       int legacy_count;
+       int i;
 
-static int no_timer_check;
+       cfg = *da->name;
 
-static int disable_timer_pin_1 __initdata;
+       memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy));
 
-int timer_through_8259 __initdata;
+       legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]);
+       for (i = legacy_count; i < *da->nr; i++)
+               init_one_irq_cfg(&cfg[i]);
 
-/* Where if anywhere is the i8259 connect in external int mode */
-static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
+       for (i = 1; i < *da->nr; i++)
+               cfg[i-1].next = &cfg[i];
 
-static DEFINE_SPINLOCK(ioapic_lock);
-static DEFINE_SPINLOCK(vector_lock);
+       irq_cfgx_free = &irq_cfgx[legacy_count];
+       irq_cfgx[legacy_count - 1].next = NULL;
+}
 
-/*
- * # of IRQ routing registers
- */
-int nr_ioapic_registers[MAX_IO_APICS];
+#define for_each_irq_cfg(cfg)          \
+       for (cfg = irq_cfgx; cfg; cfg = cfg->next)
 
-/* I/O APIC entries */
-struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
-int nr_ioapics;
+DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work);
 
-/* MP IRQ source entries */
-struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+static struct irq_cfg *irq_cfg(unsigned int irq)
+{
+       struct irq_cfg *cfg;
 
-/* # of MP IRQ source entries */
-int mp_irq_entries;
+       cfg = irq_cfgx;
+       while (cfg) {
+               if (cfg->irq == irq)
+                       return cfg;
 
-DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
+               cfg = cfg->next;
+       }
 
-/*
- * Rough estimation of how many shared IRQs there are, can
- * be changed anytime.
- */
-#define MAX_PLUS_SHARED_IRQS NR_IRQS
-#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
+       return NULL;
+}
+
+static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
+{
+       struct irq_cfg *cfg, *cfg_pri;
+       int i;
+       int count = 0;
+
+       cfg_pri = cfg = irq_cfgx;
+       while (cfg) {
+               if (cfg->irq == irq)
+                       return cfg;
+
+               cfg_pri = cfg;
+               cfg = cfg->next;
+               count++;
+       }
+
+       if (!irq_cfgx_free) {
+               unsigned long phys;
+               unsigned long total_bytes;
+               /*
+                *  we run out of pre-allocate ones, allocate more
+                */
+               printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg);
+
+               total_bytes = sizeof(struct irq_cfg) * nr_irq_cfg;
+               if (after_bootmem)
+                       cfg = kzalloc(total_bytes, GFP_ATOMIC);
+               else
+                       cfg = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0);
+
+               if (!cfg)
+                       panic("please boot with nr_irq_cfg= %d\n", count * 2);
+
+               phys = __pa(cfg);
+               printk(KERN_DEBUG "irq_irq ==> [%#lx - %#lx]\n", phys, phys + total_bytes);
+
+               for (i = 0; i < nr_irq_cfg; i++)
+                       init_one_irq_cfg(&cfg[i]);
+
+               for (i = 1; i < nr_irq_cfg; i++)
+                       cfg[i-1].next = &cfg[i];
+
+               irq_cfgx_free = cfg;
+       }
+
+       cfg = irq_cfgx_free;
+       irq_cfgx_free = irq_cfgx_free->next;
+       cfg->next = NULL;
+       if (cfg_pri)
+               cfg_pri->next = cfg;
+       else
+               irq_cfgx = cfg;
+       cfg->irq = irq;
+       printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq);
+#ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG
+       {
+               /* dump the results */
+               struct irq_cfg *cfg;
+               unsigned long phys;
+               unsigned long bytes = sizeof(struct irq_cfg);
+
+               printk(KERN_DEBUG "=========================== %d\n", irq);
+               printk(KERN_DEBUG "irq_cfg dump after get that for %d\n", irq);
+               for_each_irq_cfg(cfg) {
+                       phys = __pa(cfg);
+                       printk(KERN_DEBUG "irq_cfg %d ==> [%#lx - %#lx]\n", cfg->irq, phys, phys + bytes);
+               }
+               printk(KERN_DEBUG "===========================\n");
+       }
+#endif
+       return cfg;
+}
 
 /*
  * This is performance-critical, we want to do it O(1)
@@ -134,9 +284,66 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
  * between pins and IRQs.
  */
 
-static struct irq_pin_list {
-       short apic, pin, next;
-} irq_2_pin[PIN_MAP_SIZE];
+struct irq_pin_list {
+       int apic, pin;
+       struct irq_pin_list *next;
+};
+
+static struct irq_pin_list *irq_2_pin_head;
+/* fill one page ? */
+static int nr_irq_2_pin = 0x100;
+static struct irq_pin_list *irq_2_pin_ptr;
+static void __init irq_2_pin_init_work(void *data)
+{
+       struct dyn_array *da = data;
+       struct irq_pin_list *pin;
+       int i;
+
+       pin = *da->name;
+
+       for (i = 1; i < *da->nr; i++)
+               pin[i-1].next = &pin[i];
+
+       irq_2_pin_ptr = &pin[0];
+}
+DEFINE_DYN_ARRAY(irq_2_pin_head, sizeof(struct irq_pin_list), nr_irq_2_pin, PAGE_SIZE, irq_2_pin_init_work);
+
+static struct irq_pin_list *get_one_free_irq_2_pin(void)
+{
+       struct irq_pin_list *pin;
+       int i;
+
+       pin = irq_2_pin_ptr;
+
+       if (pin) {
+               irq_2_pin_ptr = pin->next;
+               pin->next = NULL;
+               return pin;
+       }
+
+       /*
+        *  we run out of pre-allocate ones, allocate more
+        */
+       printk(KERN_DEBUG "try to get more irq_2_pin %d\n", nr_irq_2_pin);
+
+       if (after_bootmem)
+               pin = kzalloc(sizeof(struct irq_pin_list)*nr_irq_2_pin,
+                                GFP_ATOMIC);
+       else
+               pin = __alloc_bootmem_nopanic(sizeof(struct irq_pin_list) *
+                               nr_irq_2_pin, PAGE_SIZE, 0);
+
+       if (!pin)
+               panic("can not get more irq_2_pin\n");
+
+       for (i = 1; i < nr_irq_2_pin; i++)
+               pin[i-1].next = &pin[i];
+
+       irq_2_pin_ptr = pin->next;
+       pin->next = NULL;
+
+       return pin;
+}
 
 struct io_apic {
        unsigned int index;
@@ -168,9 +375,11 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i
  * Re-write a value: to be used for read-modify-write
  * cycles where the read already set up the index register.
  */
-static inline void io_apic_modify(unsigned int apic, unsigned int value)
+static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
 {
        struct io_apic __iomem *io_apic = io_apic_base(apic);
+        if (sis_apic_bug)
+                writel(reg, &io_apic->index);
        writel(value, &io_apic->data);
 }
 
@@ -178,16 +387,17 @@ static bool io_apic_level_ack_pending(unsigned int irq)
 {
        struct irq_pin_list *entry;
        unsigned long flags;
+       struct irq_cfg *cfg = irq_cfg(irq);
 
        spin_lock_irqsave(&ioapic_lock, flags);
-       entry = irq_2_pin + irq;
+       entry = cfg->irq_2_pin;
        for (;;) {
                unsigned int reg;
                int pin;
 
-               pin = entry->pin;
-               if (pin == -1)
+               if (!entry)
                        break;
+               pin = entry->pin;
                reg = io_apic_read(entry->apic, 0x10 + pin*2);
                /* Is the remote IRR bit set? */
                if (reg & IO_APIC_REDIR_REMOTE_IRR) {
@@ -196,45 +406,13 @@ static bool io_apic_level_ack_pending(unsigned int irq)
                }
                if (!entry->next)
                        break;
-               entry = irq_2_pin + entry->next;
+               entry = entry->next;
        }
        spin_unlock_irqrestore(&ioapic_lock, flags);
 
        return false;
 }
 
-/*
- * Synchronize the IO-APIC and the CPU by doing
- * a dummy read from the IO-APIC
- */
-static inline void io_apic_sync(unsigned int apic)
-{
-       struct io_apic __iomem *io_apic = io_apic_base(apic);
-       readl(&io_apic->data);
-}
-
-#define __DO_ACTION(R, ACTION, FINAL)                                  \
-                                                                       \
-{                                                                      \
-       int pin;                                                        \
-       struct irq_pin_list *entry = irq_2_pin + irq;                   \
-                                                                       \
-       BUG_ON(irq >= NR_IRQS);                                         \
-       for (;;) {                                                      \
-               unsigned int reg;                                       \
-               pin = entry->pin;                                       \
-               if (pin == -1)                                          \
-                       break;                                          \
-               reg = io_apic_read(entry->apic, 0x10 + R + pin*2);      \
-               reg ACTION;                                             \
-               io_apic_modify(entry->apic, reg);                       \
-               FINAL;                                                  \
-               if (!entry->next)                                       \
-                       break;                                          \
-               entry = irq_2_pin + entry->next;                        \
-       }                                                               \
-}
-
 union entry_union {
        struct { u32 w1, w2; };
        struct IO_APIC_route_entry entry;
@@ -294,32 +472,48 @@ static void ioapic_mask_entry(int apic, int pin)
 static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 {
        int apic, pin;
-       struct irq_pin_list *entry = irq_2_pin + irq;
+       struct irq_cfg *cfg;
+       struct irq_pin_list *entry;
 
-       BUG_ON(irq >= NR_IRQS);
+       cfg = irq_cfg(irq);
+       entry = cfg->irq_2_pin;
        for (;;) {
                unsigned int reg;
+
+               if (!entry)
+                       break;
+
                apic = entry->apic;
                pin = entry->pin;
-               if (pin == -1)
-                       break;
+#ifdef CONFIG_INTR_REMAP
+               /*
+                * With interrupt-remapping, destination information comes
+                * from interrupt-remapping table entry.
+                */
+               if (!irq_remapped(irq))
+                       io_apic_write(apic, 0x11 + pin*2, dest);
+#else
                io_apic_write(apic, 0x11 + pin*2, dest);
+#endif
                reg = io_apic_read(apic, 0x10 + pin*2);
                reg &= ~IO_APIC_REDIR_VECTOR_MASK;
                reg |= vector;
-               io_apic_modify(apic, reg);
+               io_apic_modify(apic, 0x10 + pin*2, reg);
                if (!entry->next)
                        break;
-               entry = irq_2_pin + entry->next;
+               entry = entry->next;
        }
 }
 
+static int assign_irq_vector(int irq, cpumask_t mask);
+
 static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg = irq_cfg(irq);
        unsigned long flags;
        unsigned int dest;
        cpumask_t tmp;
+       struct irq_desc *desc;
 
        cpus_and(tmp, mask, cpu_online_map);
        if (cpus_empty(tmp))
@@ -336,9 +530,10 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
         */
        dest = SET_APIC_LOGICAL_ID(dest);
 
+       desc = irq_to_desc(irq);
        spin_lock_irqsave(&ioapic_lock, flags);
        __target_IO_APIC_irq(irq, dest, cfg->vector);
-       irq_desc[irq].affinity = mask;
+       desc->affinity = mask;
        spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 #endif
@@ -350,21 +545,34 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
  */
 static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 {
-       static int first_free_entry = NR_IRQS;
-       struct irq_pin_list *entry = irq_2_pin + irq;
+       struct irq_cfg *cfg;
+       struct irq_pin_list *entry;
+
+       /* first time to refer irq_cfg, so with new */
+       cfg = irq_cfg_alloc(irq);
+       entry = cfg->irq_2_pin;
+       if (!entry) {
+               entry = get_one_free_irq_2_pin();
+               cfg->irq_2_pin = entry;
+               entry->apic = apic;
+               entry->pin = pin;
+               printk(KERN_DEBUG " 0 add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin);
+               return;
+       }
 
-       BUG_ON(irq >= NR_IRQS);
-       while (entry->next)
-               entry = irq_2_pin + entry->next;
+       while (entry->next) {
+               /* not again, please */
+               if (entry->apic == apic && entry->pin == pin)
+                       return;
 
-       if (entry->pin != -1) {
-               entry->next = first_free_entry;
-               entry = irq_2_pin + entry->next;
-               if (++first_free_entry >= PIN_MAP_SIZE)
-                       panic("io_apic.c: ran out of irq_2_pin entries!");
+               entry = entry->next;
        }
+
+       entry->next = get_one_free_irq_2_pin();
+       entry = entry->next;
        entry->apic = apic;
        entry->pin = pin;
+       printk(KERN_DEBUG " x add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin);
 }
 
 /*
@@ -374,19 +582,59 @@ static void __init replace_pin_at_irq(unsigned int irq,
                                      int oldapic, int oldpin,
                                      int newapic, int newpin)
 {
-       struct irq_pin_list *entry = irq_2_pin + irq;
+       struct irq_cfg *cfg = irq_cfg(irq);
+       struct irq_pin_list *entry = cfg->irq_2_pin;
+       int replaced = 0;
 
-       while (1) {
+       while (entry) {
                if (entry->apic == oldapic && entry->pin == oldpin) {
                        entry->apic = newapic;
                        entry->pin = newpin;
-               }
-               if (!entry->next)
+                       replaced = 1;
+                       /* every one is different, right? */
                        break;
-               entry = irq_2_pin + entry->next;
+               }
+               entry = entry->next;
        }
+
+       /* why? call replace before add? */
+       if (!replaced)
+               add_pin_to_irq(irq, newapic, newpin);
+}
+
+/*
+ * Synchronize the IO-APIC and the CPU by doing
+ * a dummy read from the IO-APIC
+ */
+static inline void io_apic_sync(unsigned int apic)
+{
+       struct io_apic __iomem *io_apic = io_apic_base(apic);
+       readl(&io_apic->data);
 }
 
+#define __DO_ACTION(R, ACTION, FINAL)                                  \
+                                                                       \
+{                                                                      \
+       int pin;                                                        \
+       struct irq_cfg *cfg;                                            \
+       struct irq_pin_list *entry;                                     \
+                                                                       \
+       cfg = irq_cfg(irq);                                             \
+       entry = cfg->irq_2_pin;                                         \
+       for (;;) {                                                      \
+               unsigned int reg;                                       \
+               if (!entry)                                             \
+                       break;                                          \
+               pin = entry->pin;                                       \
+               reg = io_apic_read(entry->apic, 0x10 + R + pin*2);      \
+               reg ACTION;                                             \
+               io_apic_modify(entry->apic, 0x10 + R + pin*2, reg);     \
+               FINAL;                                                  \
+               if (!entry->next)                                       \
+                       break;                                          \
+               entry = entry->next;                                    \
+       }                                                               \
+}
 
 #define DO_ACTION(name,R,ACTION, FINAL)                                        \
                                                                        \
@@ -440,24 +688,73 @@ static void clear_IO_APIC (void)
                        clear_IO_APIC_pin(apic, pin);
 }
 
-int skip_ioapic_setup;
-int ioapic_force;
+#ifdef CONFIG_INTR_REMAP
+/* I/O APIC RTE contents at the OS boot up */
+static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
 
-static int __init parse_noapic(char *str)
+/*
+ * Saves and masks all the unmasked IO-APIC RTE's
+ */
+int save_mask_IO_APIC_setup(void)
 {
-       disable_ioapic_setup();
+       union IO_APIC_reg_01 reg_01;
+       unsigned long flags;
+       int apic, pin;
+
+       /*
+        * The number of IO-APIC IRQ registers (== #pins):
+        */
+       for (apic = 0; apic < nr_ioapics; apic++) {
+               spin_lock_irqsave(&ioapic_lock, flags);
+               reg_01.raw = io_apic_read(apic, 1);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+               nr_ioapic_registers[apic] = reg_01.bits.entries+1;
+       }
+
+       for (apic = 0; apic < nr_ioapics; apic++) {
+               early_ioapic_entries[apic] =
+                       kzalloc(sizeof(struct IO_APIC_route_entry) *
+                               nr_ioapic_registers[apic], GFP_KERNEL);
+               if (!early_ioapic_entries[apic])
+                       return -ENOMEM;
+       }
+
+       for (apic = 0; apic < nr_ioapics; apic++)
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+                       struct IO_APIC_route_entry entry;
+
+                       entry = early_ioapic_entries[apic][pin] =
+                               ioapic_read_entry(apic, pin);
+                       if (!entry.mask) {
+                               entry.mask = 1;
+                               ioapic_write_entry(apic, pin, entry);
+                       }
+               }
        return 0;
 }
-early_param("noapic", parse_noapic);
 
-/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
-static int __init disable_timer_pin_setup(char *arg)
+void restore_IO_APIC_setup(void)
 {
-       disable_timer_pin_1 = 1;
-       return 1;
+       int apic, pin;
+
+       for (apic = 0; apic < nr_ioapics; apic++)
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+                       ioapic_write_entry(apic, pin,
+                                          early_ioapic_entries[apic][pin]);
 }
-__setup("disable_timer_pin_1", disable_timer_pin_setup);
 
+void reinit_intr_remapped_IO_APIC(int intr_remapping)
+{
+       /*
+        * for now plain restore of previous settings.
+        * TBD: In the case of OS enabling interrupt-remapping,
+        * IO-APIC RTE's need to be setup to point to interrupt-remapping
+        * table entries. for now, do a plain restore, and wait for
+        * the setup_IO_APIC_irqs() to do proper initialization.
+        */
+       restore_IO_APIC_setup();
+}
+#endif
 
 /*
  * Find the IRQ entry number of a certain pin.
@@ -561,7 +858,6 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
                                best_guess = irq;
                }
        }
-       BUG_ON(best_guess >= NR_IRQS);
        return best_guess;
 }
 
@@ -693,7 +989,6 @@ static int pin_2_irq(int idx, int apic, int pin)
                        irq += nr_ioapic_registers[i++];
                irq += pin;
        }
-       BUG_ON(irq >= NR_IRQS);
        return irq;
 }
 
@@ -728,8 +1023,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
        int cpu;
        struct irq_cfg *cfg;
 
-       BUG_ON((unsigned)irq >= NR_IRQS);
-       cfg = &irq_cfg[irq];
+       cfg = irq_cfg(irq);
 
        /* Only try and allocate irqs on cpus that are present */
        cpus_and(mask, mask, cpu_online_map);
@@ -802,8 +1096,7 @@ static void __clear_irq_vector(int irq)
        cpumask_t mask;
        int cpu, vector;
 
-       BUG_ON((unsigned)irq >= NR_IRQS);
-       cfg = &irq_cfg[irq];
+       cfg = irq_cfg(irq);
        BUG_ON(!cfg->vector);
 
        vector = cfg->vector;
@@ -820,12 +1113,14 @@ void __setup_vector_irq(int cpu)
        /* Initialize vector_irq on a new cpu */
        /* This function must be called with vector_lock held */
        int irq, vector;
+       struct irq_cfg *cfg;
 
        /* Mark the inuse vectors */
-       for (irq = 0; irq < NR_IRQS; ++irq) {
-               if (!cpu_isset(cpu, irq_cfg[irq].domain))
+       for_each_irq_cfg(cfg) {
+               if (!cpu_isset(cpu, cfg->domain))
                        continue;
-               vector = irq_cfg[irq].vector;
+               vector = cfg->vector;
+               irq = cfg->irq;
                per_cpu(vector_irq, cpu)[vector] = irq;
        }
        /* Mark the free vectors */
@@ -833,36 +1128,128 @@ void __setup_vector_irq(int cpu)
                irq = per_cpu(vector_irq, cpu)[vector];
                if (irq < 0)
                        continue;
-               if (!cpu_isset(cpu, irq_cfg[irq].domain))
+
+               cfg = irq_cfg(irq);
+               if (!cpu_isset(cpu, cfg->domain))
                        per_cpu(vector_irq, cpu)[vector] = -1;
        }
 }
 
 static struct irq_chip ioapic_chip;
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip ir_ioapic_chip;
+#endif
 
 static void ioapic_register_intr(int irq, unsigned long trigger)
 {
-       if (trigger) {
-               irq_desc[irq].status |= IRQ_LEVEL;
+       struct irq_desc *desc;
+
+       /* first time to use this irq_desc */
+       if (irq < 16)
+               desc = irq_to_desc(irq);
+       else
+               desc = irq_to_desc_alloc(irq);
+
+       if (trigger)
+               desc->status |= IRQ_LEVEL;
+       else
+               desc->status &= ~IRQ_LEVEL;
+
+#ifdef CONFIG_INTR_REMAP
+       if (irq_remapped(irq)) {
+               desc->status |= IRQ_MOVE_PCNTXT;
+               if (trigger)
+                       set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
+                                                     handle_fasteoi_irq,
+                                                    "fasteoi");
+               else
+                       set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
+                                                     handle_edge_irq, "edge");
+               return;
+       }
+#endif
+       if (trigger)
                set_irq_chip_and_handler_name(irq, &ioapic_chip,
-                                             handle_fasteoi_irq, "fasteoi");
-       } else {
-               irq_desc[irq].status &= ~IRQ_LEVEL;
+                                             handle_fasteoi_irq,
+                                             "fasteoi");
+       else
                set_irq_chip_and_handler_name(irq, &ioapic_chip,
                                              handle_edge_irq, "edge");
+}
+
+static int setup_ioapic_entry(int apic, int irq,
+                             struct IO_APIC_route_entry *entry,
+                             unsigned int destination, int trigger,
+                             int polarity, int vector)
+{
+       /*
+        * add it to the IO-APIC irq-routing table:
+        */
+       memset(entry,0,sizeof(*entry));
+
+#ifdef CONFIG_INTR_REMAP
+       if (intr_remapping_enabled) {
+               struct intel_iommu *iommu = map_ioapic_to_ir(apic);
+               struct irte irte;
+               struct IR_IO_APIC_route_entry *ir_entry =
+                       (struct IR_IO_APIC_route_entry *) entry;
+               int index;
+
+               if (!iommu)
+                       panic("No mapping iommu for ioapic %d\n", apic);
+
+               index = alloc_irte(iommu, irq, 1);
+               if (index < 0)
+                       panic("Failed to allocate IRTE for ioapic %d\n", apic);
+
+               memset(&irte, 0, sizeof(irte));
+
+               irte.present = 1;
+               irte.dst_mode = INT_DEST_MODE;
+               irte.trigger_mode = trigger;
+               irte.dlvry_mode = INT_DELIVERY_MODE;
+               irte.vector = vector;
+               irte.dest_id = IRTE_DEST(destination);
+
+               modify_irte(irq, &irte);
+
+               ir_entry->index2 = (index >> 15) & 0x1;
+               ir_entry->zero = 0;
+               ir_entry->format = 1;
+               ir_entry->index = (index & 0x7fff);
+       } else
+#endif
+       {
+               entry->delivery_mode = INT_DELIVERY_MODE;
+               entry->dest_mode = INT_DEST_MODE;
+               entry->dest = destination;
        }
+
+       entry->mask = 0;                                /* enable IRQ */
+       entry->trigger = trigger;
+       entry->polarity = polarity;
+       entry->vector = vector;
+
+       /* Mask level triggered irqs.
+        * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
+        */
+       if (trigger)
+               entry->mask = 1;
+       return 0;
 }
 
 static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
                              int trigger, int polarity)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
        struct IO_APIC_route_entry entry;
        cpumask_t mask;
 
        if (!IO_APIC_IRQ(irq))
                return;
 
+       cfg = irq_cfg(irq);
+
        mask = TARGET_CPUS;
        if (assign_irq_vector(irq, mask))
                return;
@@ -875,24 +1262,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
                    apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
                    irq, trigger, polarity);
 
-       /*
-        * add it to the IO-APIC irq-routing table:
-        */
-       memset(&entry,0,sizeof(entry));
-
-       entry.delivery_mode = INT_DELIVERY_MODE;
-       entry.dest_mode = INT_DEST_MODE;
-       entry.dest = cpu_mask_to_apicid(mask);
-       entry.mask = 0;                         /* enable IRQ */
-       entry.trigger = trigger;
-       entry.polarity = polarity;
-       entry.vector = cfg->vector;
 
-       /* Mask level triggered irqs.
-        * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
-        */
-       if (trigger)
-               entry.mask = 1;
+       if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
+                              cpu_mask_to_apicid(mask), trigger, polarity,
+                              cfg->vector)) {
+               printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
+                      mp_ioapics[apic].mp_apicid, pin);
+               __clear_irq_vector(irq);
+               return;
+       }
 
        ioapic_register_intr(irq, trigger);
        if (irq < 16)
@@ -944,6 +1322,11 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
 {
        struct IO_APIC_route_entry entry;
 
+#ifdef CONFIG_INTR_REMAP
+       if (intr_remapping_enabled)
+               return;
+#endif
+
        memset(&entry, 0, sizeof(entry));
 
        /*
@@ -970,13 +1353,15 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
        ioapic_write_entry(apic, pin, entry);
 }
 
-void __apicdebuginit print_IO_APIC(void)
+
+__apicdebuginit(void) print_IO_APIC(void)
 {
        int apic, i;
        union IO_APIC_reg_00 reg_00;
        union IO_APIC_reg_01 reg_01;
        union IO_APIC_reg_02 reg_02;
        unsigned long flags;
+       struct irq_cfg *cfg;
 
        if (apic_verbosity == APIC_QUIET)
                return;
@@ -1005,6 +1390,8 @@ void __apicdebuginit print_IO_APIC(void)
        printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
        printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
        printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
+       printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
+       printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
 
        printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
        printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
@@ -1045,16 +1432,16 @@ void __apicdebuginit print_IO_APIC(void)
        }
        }
        printk(KERN_DEBUG "IRQ to pin mappings:\n");
-       for (i = 0; i < NR_IRQS; i++) {
-               struct irq_pin_list *entry = irq_2_pin + i;
-               if (entry->pin < 0)
+       for_each_irq_cfg(cfg) {
+               struct irq_pin_list *entry = cfg->irq_2_pin;
+               if (!entry)
                        continue;
-               printk(KERN_DEBUG "IRQ%d ", i);
+               printk(KERN_DEBUG "IRQ%d ", cfg->irq);
                for (;;) {
                        printk("-> %d:%d", entry->apic, entry->pin);
                        if (!entry->next)
                                break;
-                       entry = irq_2_pin + entry->next;
+                       entry = entry->next;
                }
                printk("\n");
        }
@@ -1064,9 +1451,7 @@ void __apicdebuginit print_IO_APIC(void)
        return;
 }
 
-#if 0
-
-static __apicdebuginit void print_APIC_bitfield (int base)
+__apicdebuginit(void) print_APIC_bitfield(int base)
 {
        unsigned int v;
        int i, j;
@@ -1087,9 +1472,10 @@ static __apicdebuginit void print_APIC_bitfield (int base)
        }
 }
 
-void __apicdebuginit print_local_APIC(void * dummy)
+__apicdebuginit(void) print_local_APIC(void *dummy)
 {
        unsigned int v, ver, maxlvt;
+       unsigned long icr;
 
        if (apic_verbosity == APIC_QUIET)
                return;
@@ -1097,7 +1483,7 @@ void __apicdebuginit print_local_APIC(void * dummy)
        printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
                smp_processor_id(), hard_smp_processor_id());
        v = apic_read(APIC_ID);
-       printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, GET_APIC_ID(read_apic_id()));
+       printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, read_apic_id());
        v = apic_read(APIC_LVR);
        printk(KERN_INFO "... APIC VERSION: %08x\n", v);
        ver = GET_APIC_VERSION(v);
@@ -1133,10 +1519,9 @@ void __apicdebuginit print_local_APIC(void * dummy)
        v = apic_read(APIC_ESR);
        printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
 
-       v = apic_read(APIC_ICR);
-       printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
-       v = apic_read(APIC_ICR2);
-       printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
+       icr = apic_icr_read();
+       printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
+       printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32));
 
        v = apic_read(APIC_LVTT);
        printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
@@ -1164,12 +1549,12 @@ void __apicdebuginit print_local_APIC(void * dummy)
        printk("\n");
 }
 
-void print_all_local_APICs (void)
+__apicdebuginit(void) print_all_local_APICs(void)
 {
        on_each_cpu(print_local_APIC, NULL, 1);
 }
 
-void __apicdebuginit print_PIC(void)
+__apicdebuginit(void) print_PIC(void)
 {
        unsigned int v;
        unsigned long flags;
@@ -1201,20 +1586,28 @@ void __apicdebuginit print_PIC(void)
        printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
 }
 
-#endif  /*  0  */
+__apicdebuginit(int) print_all_ICs(void)
+{
+       print_PIC();
+       print_all_local_APICs();
+       print_IO_APIC();
+
+       return 0;
+}
+
+fs_initcall(print_all_ICs);
+
+
+/* Where if anywhere is the i8259 connect in external int mode */
+static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
 
 void __init enable_IO_APIC(void)
 {
        union IO_APIC_reg_01 reg_01;
        int i8259_apic, i8259_pin;
-       int i, apic;
+       int apic;
        unsigned long flags;
 
-       for (i = 0; i < PIN_MAP_SIZE; i++) {
-               irq_2_pin[i].pin = -1;
-               irq_2_pin[i].next = 0;
-       }
-
        /*
         * The number of IO-APIC IRQ registers (== #pins):
         */
@@ -1291,7 +1684,7 @@ void disable_IO_APIC(void)
                entry.dest_mode       = 0; /* Physical */
                entry.delivery_mode   = dest_ExtINT; /* ExtInt */
                entry.vector          = 0;
-               entry.dest          = GET_APIC_ID(read_apic_id());
+               entry.dest            = read_apic_id();
 
                /*
                 * Add it to the IO-APIC irq-routing table:
@@ -1302,6 +1695,15 @@ void disable_IO_APIC(void)
        disconnect_bsp_APIC(ioapic_i8259.pin != -1);
 }
 
+int no_timer_check __initdata;
+
+static int __init notimercheck(char *s)
+{
+       no_timer_check = 1;
+       return 1;
+}
+__setup("no_timer_check", notimercheck);
+
 /*
  * There is a nasty bug in some older SMP boards, their mptable lies
  * about the timer IRQ. We do the following to work around the situation:
@@ -1315,6 +1717,9 @@ static int __init timer_irq_works(void)
        unsigned long t1 = jiffies;
        unsigned long flags;
 
+       if (no_timer_check)
+               return 1;
+
        local_save_flags(flags);
        local_irq_enable();
        /* Let ten ticks pass... */
@@ -1335,68 +1740,214 @@ static int __init timer_irq_works(void)
        return 0;
 }
 
-/*
- * In the SMP+IOAPIC case it might happen that there are an unspecified
- * number of pending IRQ events unhandled. These cases are very rare,
- * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
- * better to do it this way as thus we do not have to be aware of
- * 'pending' interrupts in the IRQ path, except at this point.
- */
-/*
- * Edge triggered needs to resend any interrupt
- * that was delayed but this is now handled in the device
- * independent code.
- */
+/*
+ * In the SMP+IOAPIC case it might happen that there are an unspecified
+ * number of pending IRQ events unhandled. These cases are very rare,
+ * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
+ * better to do it this way as thus we do not have to be aware of
+ * 'pending' interrupts in the IRQ path, except at this point.
+ */
+/*
+ * Edge triggered needs to resend any interrupt
+ * that was delayed but this is now handled in the device
+ * independent code.
+ */
+
+/*
+ * Starting up a edge-triggered IO-APIC interrupt is
+ * nasty - we need to make sure that we get the edge.
+ * If it is already asserted for some reason, we need
+ * return 1 to indicate that is was pending.
+ *
+ * This is not complete - we should be able to fake
+ * an edge even if it isn't on the 8259A...
+ */
+
+static unsigned int startup_ioapic_irq(unsigned int irq)
+{
+       int was_pending = 0;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       if (irq < 16) {
+               disable_8259A_irq(irq);
+               if (i8259A_irq_pending(irq))
+                       was_pending = 1;
+       }
+       __unmask_IO_APIC_irq(irq);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       return was_pending;
+}
+
+static int ioapic_retrigger_irq(unsigned int irq)
+{
+       struct irq_cfg *cfg = irq_cfg(irq);
+       unsigned long flags;
+
+       spin_lock_irqsave(&vector_lock, flags);
+       send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
+       spin_unlock_irqrestore(&vector_lock, flags);
+
+       return 1;
+}
+
+/*
+ * Level and edge triggered IO-APIC interrupts need different handling,
+ * so we use two separate IRQ descriptors. Edge triggered IRQs can be
+ * handled with the level-triggered descriptor, but that one has slightly
+ * more overhead. Level-triggered interrupts cannot be handled with the
+ * edge-triggered handler, without risking IRQ storms and other ugly
+ * races.
+ */
+
+#ifdef CONFIG_SMP
+
+#ifdef CONFIG_INTR_REMAP
+static void ir_irq_migration(struct work_struct *work);
+
+static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
+
+/*
+ * Migrate the IO-APIC irq in the presence of intr-remapping.
+ *
+ * For edge triggered, irq migration is a simple atomic update(of vector
+ * and cpu destination) of IRTE and flush the hardware cache.
+ *
+ * For level triggered, we need to modify the io-apic RTE aswell with the update
+ * vector information, along with modifying IRTE with vector and destination.
+ * So irq migration for level triggered is little  bit more complex compared to
+ * edge triggered migration. But the good news is, we use the same algorithm
+ * for level triggered migration as we have today, only difference being,
+ * we now initiate the irq migration from process context instead of the
+ * interrupt context.
+ *
+ * In future, when we do a directed EOI (combined with cpu EOI broadcast
+ * suppression) to the IO-APIC, level triggered irq migration will also be
+ * as simple as edge triggered migration and we can do the irq migration
+ * with a simple atomic update to IO-APIC RTE.
+ */
+static void migrate_ioapic_irq(int irq, cpumask_t mask)
+{
+       struct irq_cfg *cfg;
+       struct irq_desc *desc;
+       cpumask_t tmp, cleanup_mask;
+       struct irte irte;
+       int modify_ioapic_rte;
+       unsigned int dest;
+       unsigned long flags;
+
+       cpus_and(tmp, mask, cpu_online_map);
+       if (cpus_empty(tmp))
+               return;
+
+       if (get_irte(irq, &irte))
+               return;
+
+       if (assign_irq_vector(irq, mask))
+               return;
+
+       cfg = irq_cfg(irq);
+       cpus_and(tmp, cfg->domain, mask);
+       dest = cpu_mask_to_apicid(tmp);
+
+       desc = irq_to_desc(irq);
+       modify_ioapic_rte = desc->status & IRQ_LEVEL;
+       if (modify_ioapic_rte) {
+               spin_lock_irqsave(&ioapic_lock, flags);
+               __target_IO_APIC_irq(irq, dest, cfg->vector);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+       }
+
+       irte.vector = cfg->vector;
+       irte.dest_id = IRTE_DEST(dest);
+
+       /*
+        * Modified the IRTE and flushes the Interrupt entry cache.
+        */
+       modify_irte(irq, &irte);
 
-/*
- * Starting up a edge-triggered IO-APIC interrupt is
- * nasty - we need to make sure that we get the edge.
- * If it is already asserted for some reason, we need
- * return 1 to indicate that is was pending.
- *
- * This is not complete - we should be able to fake
- * an edge even if it isn't on the 8259A...
- */
+       if (cfg->move_in_progress) {
+               cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+               cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+               send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+               cfg->move_in_progress = 0;
+       }
 
-static unsigned int startup_ioapic_irq(unsigned int irq)
+       desc->affinity = mask;
+}
+
+static int migrate_irq_remapped_level(int irq)
 {
-       int was_pending = 0;
-       unsigned long flags;
+       int ret = -1;
+       struct irq_desc *desc = irq_to_desc(irq);
 
-       spin_lock_irqsave(&ioapic_lock, flags);
-       if (irq < 16) {
-               disable_8259A_irq(irq);
-               if (i8259A_irq_pending(irq))
-                       was_pending = 1;
+       mask_IO_APIC_irq(irq);
+
+       if (io_apic_level_ack_pending(irq)) {
+               /*
+                * Interrupt in progress. Migrating irq now will change the
+                * vector information in the IO-APIC RTE and that will confuse
+                * the EOI broadcast performed by cpu.
+                * So, delay the irq migration to the next instance.
+                */
+               schedule_delayed_work(&ir_migration_work, 1);
+               goto unmask;
        }
-       __unmask_IO_APIC_irq(irq);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
 
-       return was_pending;
+       /* everthing is clear. we have right of way */
+       migrate_ioapic_irq(irq, desc->pending_mask);
+
+       ret = 0;
+       desc->status &= ~IRQ_MOVE_PENDING;
+       cpus_clear(desc->pending_mask);
+
+unmask:
+       unmask_IO_APIC_irq(irq);
+       return ret;
 }
 
-static int ioapic_retrigger_irq(unsigned int irq)
+static void ir_irq_migration(struct work_struct *work)
 {
-       struct irq_cfg *cfg = &irq_cfg[irq];
-       unsigned long flags;
+       unsigned int irq;
+       struct irq_desc *desc;
 
-       spin_lock_irqsave(&vector_lock, flags);
-       send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
-       spin_unlock_irqrestore(&vector_lock, flags);
+       for_each_irq_desc(irq, desc) {
+               if (desc->status & IRQ_MOVE_PENDING) {
+                       unsigned long flags;
 
-       return 1;
+                       spin_lock_irqsave(&desc->lock, flags);
+                       if (!desc->chip->set_affinity ||
+                           !(desc->status & IRQ_MOVE_PENDING)) {
+                               desc->status &= ~IRQ_MOVE_PENDING;
+                               spin_unlock_irqrestore(&desc->lock, flags);
+                               continue;
+                       }
+
+                       desc->chip->set_affinity(irq, desc->pending_mask);
+                       spin_unlock_irqrestore(&desc->lock, flags);
+               }
+       }
 }
 
 /*
- * Level and edge triggered IO-APIC interrupts need different handling,
- * so we use two separate IRQ descriptors. Edge triggered IRQs can be
- * handled with the level-triggered descriptor, but that one has slightly
- * more overhead. Level-triggered interrupts cannot be handled with the
- * edge-triggered handler, without risking IRQ storms and other ugly
- * races.
+ * Migrates the IRQ destination in the process context.
  */
+static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+{
+       struct irq_desc *desc = irq_to_desc(irq);
+
+       if (desc->status & IRQ_LEVEL) {
+               desc->status |= IRQ_MOVE_PENDING;
+               desc->pending_mask = mask;
+               migrate_irq_remapped_level(irq);
+               return;
+       }
+
+       migrate_ioapic_irq(irq, mask);
+}
+#endif
 
-#ifdef CONFIG_SMP
 asmlinkage void smp_irq_move_cleanup_interrupt(void)
 {
        unsigned vector, me;
@@ -1410,11 +1961,12 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
                struct irq_desc *desc;
                struct irq_cfg *cfg;
                irq = __get_cpu_var(vector_irq)[vector];
-               if (irq >= NR_IRQS)
+
+               desc = irq_to_desc(irq);
+               if (!desc)
                        continue;
 
-               desc = irq_desc + irq;
-               cfg = irq_cfg + irq;
+               cfg = irq_cfg(irq);
                spin_lock(&desc->lock);
                if (!cfg->move_cleanup_count)
                        goto unlock;
@@ -1433,7 +1985,7 @@ unlock:
 
 static void irq_complete_move(unsigned int irq)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg = irq_cfg(irq);
        unsigned vector, me;
 
        if (likely(!cfg->move_in_progress))
@@ -1453,6 +2005,17 @@ static void irq_complete_move(unsigned int irq)
 #else
 static inline void irq_complete_move(unsigned int irq) {}
 #endif
+#ifdef CONFIG_INTR_REMAP
+static void ack_x2apic_level(unsigned int irq)
+{
+       ack_x2APIC_irq();
+}
+
+static void ack_x2apic_edge(unsigned int irq)
+{
+       ack_x2APIC_irq();
+}
+#endif
 
 static void ack_apic_edge(unsigned int irq)
 {
@@ -1468,7 +2031,7 @@ static void ack_apic_level(unsigned int irq)
        irq_complete_move(irq);
 #ifdef CONFIG_GENERIC_PENDING_IRQ
        /* If we are moving the irq we need to mask it */
-       if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
+       if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
                do_unmask_irq = 1;
                mask_IO_APIC_irq(irq);
        }
@@ -1527,9 +2090,26 @@ static struct irq_chip ioapic_chip __read_mostly = {
        .retrigger      = ioapic_retrigger_irq,
 };
 
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip ir_ioapic_chip __read_mostly = {
+       .name           = "IR-IO-APIC",
+       .startup        = startup_ioapic_irq,
+       .mask           = mask_IO_APIC_irq,
+       .unmask         = unmask_IO_APIC_irq,
+       .ack            = ack_x2apic_edge,
+       .eoi            = ack_x2apic_level,
+#ifdef CONFIG_SMP
+       .set_affinity   = set_ir_ioapic_affinity_irq,
+#endif
+       .retrigger      = ioapic_retrigger_irq,
+};
+#endif
+
 static inline void init_IO_APIC_traps(void)
 {
        int irq;
+       struct irq_desc *desc;
+       struct irq_cfg *cfg;
 
        /*
         * NOTE! The local APIC isn't very good at handling
@@ -1542,8 +2122,9 @@ static inline void init_IO_APIC_traps(void)
         * Also, we've got to be careful not to trash gate
         * 0x80, because int 0x80 is hm, kind of importantish. ;)
         */
-       for (irq = 0; irq < NR_IRQS ; irq++) {
-               if (IO_APIC_IRQ(irq) && !irq_cfg[irq].vector) {
+       for_each_irq_cfg(cfg) {
+               irq = cfg->irq;
+               if (IO_APIC_IRQ(irq) && !cfg->vector) {
                        /*
                         * Hmm.. We don't have an entry for this,
                         * so default to an old-fashioned 8259
@@ -1551,9 +2132,11 @@ static inline void init_IO_APIC_traps(void)
                         */
                        if (irq < 16)
                                make_8259A_irq(irq);
-                       else
+                       else {
+                               desc = irq_to_desc(irq);
                                /* Strange. Oh, well.. */
-                               irq_desc[irq].chip = &no_irq_chip;
+                               desc->chip = &no_irq_chip;
+                       }
                }
        }
 }
@@ -1588,7 +2171,10 @@ static struct irq_chip lapic_chip __read_mostly = {
 
 static void lapic_register_intr(int irq)
 {
-       irq_desc[irq].status &= ~IRQ_LEVEL;
+       struct irq_desc *desc;
+
+       desc = irq_to_desc(irq);
+       desc->status &= ~IRQ_LEVEL;
        set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
                                      "edge");
 }
@@ -1665,6 +2251,17 @@ static inline void __init unlock_ExtINT_logic(void)
        ioapic_write_entry(apic, pin, entry0);
 }
 
+static int disable_timer_pin_1 __initdata;
+/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
+static int __init disable_timer_pin_setup(char *arg)
+{
+       disable_timer_pin_1 = 1;
+       return 0;
+}
+early_param("disable_timer_pin_1", disable_timer_pin_setup);
+
+int timer_through_8259 __initdata;
+
 /*
  * This code may look a bit paranoid, but it's supposed to cooperate with
  * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
@@ -1675,7 +2272,7 @@ static inline void __init unlock_ExtINT_logic(void)
  */
 static inline void __init check_timer(void)
 {
-       struct irq_cfg *cfg = irq_cfg + 0;
+       struct irq_cfg *cfg = irq_cfg(0);
        int apic1, pin1, apic2, pin2;
        unsigned long flags;
        int no_pin1 = 0;
@@ -1712,6 +2309,10 @@ static inline void __init check_timer(void)
         * 8259A.
         */
        if (pin1 == -1) {
+#ifdef CONFIG_INTR_REMAP
+               if (intr_remapping_enabled)
+                       panic("BIOS bug: timer not connected to IO-APIC");
+#endif
                pin1 = pin2;
                apic1 = apic2;
                no_pin1 = 1;
@@ -1729,7 +2330,7 @@ static inline void __init check_timer(void)
                        setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
                }
                unmask_IO_APIC_irq(0);
-               if (!no_timer_check && timer_irq_works()) {
+               if (timer_irq_works()) {
                        if (nmi_watchdog == NMI_IO_APIC) {
                                setup_nmi();
                                enable_8259A_irq(0);
@@ -1738,6 +2339,10 @@ static inline void __init check_timer(void)
                                clear_IO_APIC_pin(0, pin1);
                        goto out;
                }
+#ifdef CONFIG_INTR_REMAP
+               if (intr_remapping_enabled)
+                       panic("timer doesn't work through Interrupt-remapped IO-APIC");
+#endif
                clear_IO_APIC_pin(apic1, pin1);
                if (!no_pin1)
                        apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -1813,13 +2418,6 @@ out:
        local_irq_restore(flags);
 }
 
-static int __init notimercheck(char *s)
-{
-       no_timer_check = 1;
-       return 1;
-}
-__setup("no_timer_check", notimercheck);
-
 /*
  * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
  * to devices.  However there may be an I/O APIC pin available for
@@ -1854,10 +2452,22 @@ void __init setup_IO_APIC(void)
        setup_IO_APIC_irqs();
        init_IO_APIC_traps();
        check_timer();
-       if (!acpi_ioapic)
-               print_IO_APIC();
 }
 
+/*
+ *      Called after all the initialization is done. If we didnt find any
+ *      APIC bugs then we can allow the modify fast path
+ */
+
+static int __init io_apic_bug_finalize(void)
+{
+        if (sis_apic_bug == -1)
+                sis_apic_bug = 0;
+        return 0;
+}
+
+late_initcall(io_apic_bug_finalize);
+
 struct sysfs_ioapic_data {
        struct sys_device dev;
        struct IO_APIC_route_entry entry[0];
@@ -1945,38 +2555,62 @@ device_initcall(ioapic_init_sysfs);
 /*
  * Dynamic irq allocate and deallocation
  */
-int create_irq(void)
+unsigned int create_irq_nr(unsigned int irq_want)
 {
        /* Allocate an unused irq */
-       int irq;
-       int new;
+       unsigned int irq;
+       unsigned int new;
        unsigned long flags;
+       struct irq_cfg *cfg_new;
 
-       irq = -ENOSPC;
+#ifndef CONFIG_HAVE_SPARSE_IRQ
+       irq_want = nr_irqs - 1;
+#endif
+
+       irq = 0;
        spin_lock_irqsave(&vector_lock, flags);
-       for (new = (NR_IRQS - 1); new >= 0; new--) {
+       for (new = irq_want; new > 0; new--) {
                if (platform_legacy_irq(new))
                        continue;
-               if (irq_cfg[new].vector != 0)
+               cfg_new = irq_cfg(new);
+               if (cfg_new && cfg_new->vector != 0)
                        continue;
+               /* check if need to create one */
+               if (!cfg_new)
+                       cfg_new = irq_cfg_alloc(new);
                if (__assign_irq_vector(new, TARGET_CPUS) == 0)
                        irq = new;
                break;
        }
        spin_unlock_irqrestore(&vector_lock, flags);
 
-       if (irq >= 0) {
+       if (irq > 0) {
                dynamic_irq_init(irq);
        }
        return irq;
 }
 
+int create_irq(void)
+{
+       int irq;
+
+       irq = create_irq_nr(nr_irqs - 1);
+
+       if (irq == 0)
+               irq = -1;
+
+       return irq;
+}
+
 void destroy_irq(unsigned int irq)
 {
        unsigned long flags;
 
        dynamic_irq_cleanup(irq);
 
+#ifdef CONFIG_INTR_REMAP
+       free_irte(irq);
+#endif
        spin_lock_irqsave(&vector_lock, flags);
        __clear_irq_vector(irq);
        spin_unlock_irqrestore(&vector_lock, flags);
@@ -1988,17 +2622,49 @@ void destroy_irq(unsigned int irq)
 #ifdef CONFIG_PCI_MSI
 static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
        int err;
        unsigned dest;
        cpumask_t tmp;
 
        tmp = TARGET_CPUS;
        err = assign_irq_vector(irq, tmp);
-       if (!err) {
-               cpus_and(tmp, cfg->domain, tmp);
-               dest = cpu_mask_to_apicid(tmp);
+       if (err)
+               return err;
 
+       cfg = irq_cfg(irq);
+       cpus_and(tmp, cfg->domain, tmp);
+       dest = cpu_mask_to_apicid(tmp);
+
+#ifdef CONFIG_INTR_REMAP
+       if (irq_remapped(irq)) {
+               struct irte irte;
+               int ir_index;
+               u16 sub_handle;
+
+               ir_index = map_irq_to_irte_handle(irq, &sub_handle);
+               BUG_ON(ir_index == -1);
+
+               memset (&irte, 0, sizeof(irte));
+
+               irte.present = 1;
+               irte.dst_mode = INT_DEST_MODE;
+               irte.trigger_mode = 0; /* edge */
+               irte.dlvry_mode = INT_DELIVERY_MODE;
+               irte.vector = cfg->vector;
+               irte.dest_id = IRTE_DEST(dest);
+
+               modify_irte(irq, &irte);
+
+               msg->address_hi = MSI_ADDR_BASE_HI;
+               msg->data = sub_handle;
+               msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
+                                 MSI_ADDR_IR_SHV |
+                                 MSI_ADDR_IR_INDEX1(ir_index) |
+                                 MSI_ADDR_IR_INDEX2(ir_index);
+       } else
+#endif
+       {
                msg->address_hi = MSI_ADDR_BASE_HI;
                msg->address_lo =
                        MSI_ADDR_BASE_LO |
@@ -2024,10 +2690,11 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 #ifdef CONFIG_SMP
 static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
        struct msi_msg msg;
        unsigned int dest;
        cpumask_t tmp;
+       struct irq_desc *desc;
 
        cpus_and(tmp, mask, cpu_online_map);
        if (cpus_empty(tmp))
@@ -2036,6 +2703,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
        if (assign_irq_vector(irq, mask))
                return;
 
+       cfg = irq_cfg(irq);
        cpus_and(tmp, cfg->domain, mask);
        dest = cpu_mask_to_apicid(tmp);
 
@@ -2047,8 +2715,61 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
        msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
        write_msi_msg(irq, &msg);
-       irq_desc[irq].affinity = mask;
+       desc = irq_to_desc(irq);
+       desc->affinity = mask;
+}
+
+#ifdef CONFIG_INTR_REMAP
+/*
+ * Migrate the MSI irq to another cpumask. This migration is
+ * done in the process context using interrupt-remapping hardware.
+ */
+static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+{
+       struct irq_cfg *cfg;
+       unsigned int dest;
+       cpumask_t tmp, cleanup_mask;
+       struct irte irte;
+       struct irq_desc *desc;
+
+       cpus_and(tmp, mask, cpu_online_map);
+       if (cpus_empty(tmp))
+               return;
+
+       if (get_irte(irq, &irte))
+               return;
+
+       if (assign_irq_vector(irq, mask))
+               return;
+
+       cfg = irq_cfg(irq);
+       cpus_and(tmp, cfg->domain, mask);
+       dest = cpu_mask_to_apicid(tmp);
+
+       irte.vector = cfg->vector;
+       irte.dest_id = IRTE_DEST(dest);
+
+       /*
+        * atomically update the IRTE with the new destination and vector.
+        */
+       modify_irte(irq, &irte);
+
+       /*
+        * After this point, all the interrupts will start arriving
+        * at the new destination. So, time to cleanup the previous
+        * vector allocation.
+        */
+       if (cfg->move_in_progress) {
+               cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+               cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+               send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+               cfg->move_in_progress = 0;
+       }
+
+       desc = irq_to_desc(irq);
+       desc->affinity = mask;
 }
+#endif
 #endif /* CONFIG_SMP */
 
 /*
@@ -2066,26 +2787,177 @@ static struct irq_chip msi_chip = {
        .retrigger      = ioapic_retrigger_irq,
 };
 
-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip msi_ir_chip = {
+       .name           = "IR-PCI-MSI",
+       .unmask         = unmask_msi_irq,
+       .mask           = mask_msi_irq,
+       .ack            = ack_x2apic_edge,
+#ifdef CONFIG_SMP
+       .set_affinity   = ir_set_msi_irq_affinity,
+#endif
+       .retrigger      = ioapic_retrigger_irq,
+};
+
+/*
+ * Map the PCI dev to the corresponding remapping hardware unit
+ * and allocate 'nvec' consecutive interrupt-remapping table entries
+ * in it.
+ */
+static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
+{
+       struct intel_iommu *iommu;
+       int index;
+
+       iommu = map_dev_to_ir(dev);
+       if (!iommu) {
+               printk(KERN_ERR
+                      "Unable to map PCI %s to iommu\n", pci_name(dev));
+               return -ENOENT;
+       }
+
+       index = alloc_irte(iommu, irq, nvec);
+       if (index < 0) {
+               printk(KERN_ERR
+                      "Unable to allocate %d IRTE for PCI %s\n", nvec,
+                       pci_name(dev));
+               return -ENOSPC;
+       }
+       return index;
+}
+#endif
+
+static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
 {
+       int ret;
        struct msi_msg msg;
-       int irq, ret;
-       irq = create_irq();
-       if (irq < 0)
-               return irq;
 
        ret = msi_compose_msg(dev, irq, &msg);
+       if (ret < 0)
+               return ret;
+
+       set_irq_msi(irq, desc);
+       write_msi_msg(irq, &msg);
+
+#ifdef CONFIG_INTR_REMAP
+       if (irq_remapped(irq)) {
+               struct irq_desc *desc = irq_to_desc(irq);
+               /*
+                * irq migration in process context
+                */
+               desc->status |= IRQ_MOVE_PCNTXT;
+               set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
+       } else
+#endif
+               set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
+
+       return 0;
+}
+
+static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
+{
+       unsigned int irq;
+
+       irq = dev->bus->number;
+       irq <<= 8;
+       irq |= dev->devfn;
+       irq <<= 12;
+
+       return irq;
+}
+
+int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+{
+       unsigned int irq;
+       int ret;
+       unsigned int irq_want;
+
+       irq_want = build_irq_for_pci_dev(dev) + 0x100;
+
+       irq = create_irq_nr(irq_want);
+       if (irq == 0)
+               return -1;
+
+#ifdef CONFIG_INTR_REMAP
+       if (!intr_remapping_enabled)
+               goto no_ir;
+
+       ret = msi_alloc_irte(dev, irq, 1);
+       if (ret < 0)
+               goto error;
+no_ir:
+#endif
+       ret = setup_msi_irq(dev, desc, irq);
        if (ret < 0) {
                destroy_irq(irq);
                return ret;
        }
+       return 0;
 
-       set_irq_msi(irq, desc);
-       write_msi_msg(irq, &msg);
+#ifdef CONFIG_INTR_REMAP
+error:
+       destroy_irq(irq);
+       return ret;
+#endif
+}
 
-       set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
+int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+       unsigned int irq;
+       int ret, sub_handle;
+       struct msi_desc *desc;
+       unsigned int irq_want;
+
+#ifdef CONFIG_INTR_REMAP
+       struct intel_iommu *iommu = 0;
+       int index = 0;
+#endif
 
+       irq_want = build_irq_for_pci_dev(dev) + 0x100;
+       sub_handle = 0;
+       list_for_each_entry(desc, &dev->msi_list, list) {
+               irq = create_irq_nr(irq_want--);
+               if (irq == 0)
+                       return -1;
+#ifdef CONFIG_INTR_REMAP
+               if (!intr_remapping_enabled)
+                       goto no_ir;
+
+               if (!sub_handle) {
+                       /*
+                        * allocate the consecutive block of IRTE's
+                        * for 'nvec'
+                        */
+                       index = msi_alloc_irte(dev, irq, nvec);
+                       if (index < 0) {
+                               ret = index;
+                               goto error;
+                       }
+               } else {
+                       iommu = map_dev_to_ir(dev);
+                       if (!iommu) {
+                               ret = -ENOENT;
+                               goto error;
+                       }
+                       /*
+                        * setup the mapping between the irq and the IRTE
+                        * base index, the sub_handle pointing to the
+                        * appropriate interrupt remap table entry.
+                        */
+                       set_irte_irq(irq, iommu, index, sub_handle);
+               }
+no_ir:
+#endif
+               ret = setup_msi_irq(dev, desc, irq);
+               if (ret < 0)
+                       goto error;
+               sub_handle++;
+       }
        return 0;
+
+error:
+       destroy_irq(irq);
+       return ret;
 }
 
 void arch_teardown_msi_irq(unsigned int irq)
@@ -2097,10 +2969,11 @@ void arch_teardown_msi_irq(unsigned int irq)
 #ifdef CONFIG_SMP
 static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
        struct msi_msg msg;
        unsigned int dest;
        cpumask_t tmp;
+       struct irq_desc *desc;
 
        cpus_and(tmp, mask, cpu_online_map);
        if (cpus_empty(tmp))
@@ -2109,6 +2982,7 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
        if (assign_irq_vector(irq, mask))
                return;
 
+       cfg = irq_cfg(irq);
        cpus_and(tmp, cfg->domain, mask);
        dest = cpu_mask_to_apicid(tmp);
 
@@ -2120,7 +2994,8 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
        msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
        dmar_msi_write(irq, &msg);
-       irq_desc[irq].affinity = mask;
+       desc = irq_to_desc(irq);
+       desc->affinity = mask;
 }
 #endif /* CONFIG_SMP */
 
@@ -2174,9 +3049,10 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
 
 static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
        unsigned int dest;
        cpumask_t tmp;
+       struct irq_desc *desc;
 
        cpus_and(tmp, mask, cpu_online_map);
        if (cpus_empty(tmp))
@@ -2185,11 +3061,13 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
        if (assign_irq_vector(irq, mask))
                return;
 
+       cfg = irq_cfg(irq);
        cpus_and(tmp, cfg->domain, mask);
        dest = cpu_mask_to_apicid(tmp);
 
        target_ht_irq(irq, dest, cfg->vector);
-       irq_desc[irq].affinity = mask;
+       desc = irq_to_desc(irq);
+       desc->affinity = mask;
 }
 #endif
 
@@ -2206,7 +3084,7 @@ static struct irq_chip ht_irq_chip = {
 
 int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
        int err;
        cpumask_t tmp;
 
@@ -2216,6 +3094,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
                struct ht_irq_msg msg;
                unsigned dest;
 
+               cfg = irq_cfg(irq);
                cpus_and(tmp, cfg->domain, tmp);
                dest = cpu_mask_to_apicid(tmp);
 
@@ -2314,6 +3193,7 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
 void __init setup_ioapic_dest(void)
 {
        int pin, ioapic, irq, irq_entry;
+       struct irq_cfg *cfg;
 
        if (skip_ioapic_setup == 1)
                return;
@@ -2329,10 +3209,15 @@ void __init setup_ioapic_dest(void)
                         * when you have too many devices, because at that time only boot
                         * cpu is online.
                         */
-                       if (!irq_cfg[irq].vector)
+                       cfg = irq_cfg(irq);
+                       if (!cfg->vector)
                                setup_IO_APIC_irq(ioapic, pin, irq,
                                                  irq_trigger(irq_entry),
                                                  irq_polarity(irq_entry));
+#ifdef CONFIG_INTR_REMAP
+                       else if (intr_remapping_enabled)
+                               set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
+#endif
                        else
                                set_ioapic_affinity_irq(irq, TARGET_CPUS);
                }