#include <asm/hypertransport.h>
#include <asm/setup.h>
+#include <mach_ipi.h>
#include <mach_apic.h>
#include <mach_apicdef.h>
static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
static DEFINE_SPINLOCK(ioapic_lock);
-DEFINE_SPINLOCK(vector_lock);
+static DEFINE_SPINLOCK(vector_lock);
int timer_through_8259 __initdata;
*/
int sis_apic_bug = -1;
+int first_free_entry;
/*
* # of IRQ routing registers
*/
static int disable_timer_pin_1 __initdata;
+struct irq_cfg;
+struct irq_pin_list;
+struct irq_cfg {
+ unsigned int irq;
+ struct irq_cfg *next;
+ struct irq_pin_list *irq_2_pin;
+ cpumask_t domain;
+ cpumask_t old_domain;
+ unsigned move_cleanup_count;
+ u8 vector;
+ u8 move_in_progress : 1;
+};
+
+
+/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
+static struct irq_cfg irq_cfg_legacy[] __initdata = {
+ [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, },
+ [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, },
+ [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, },
+ [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, },
+ [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, },
+ [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, },
+ [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, },
+ [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, },
+ [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, },
+ [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, },
+ [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
+ [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
+ [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
+ [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
+ [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
+ [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
+};
+
+static struct irq_cfg irq_cfg_init = { .irq = -1U, };
+/* need to be biger than size of irq_cfg_legacy */
+static int nr_irq_cfg = 32;
+
+static int __init parse_nr_irq_cfg(char *arg)
+{
+ if (arg) {
+ nr_irq_cfg = simple_strtoul(arg, NULL, 0);
+ if (nr_irq_cfg < 32)
+ nr_irq_cfg = 32;
+ }
+ return 0;
+}
+
+early_param("nr_irq_cfg", parse_nr_irq_cfg);
+
+static void init_one_irq_cfg(struct irq_cfg *cfg)
+{
+ memcpy(cfg, &irq_cfg_init, sizeof(struct irq_cfg));
+}
+
+static struct irq_cfg *irq_cfgx;
+static struct irq_cfg *irq_cfgx_free;
+static void __init init_work(void *data)
+{
+ struct dyn_array *da = data;
+ struct irq_cfg *cfg;
+ int legacy_count;
+ int i;
+
+ cfg = *da->name;
+
+ memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy));
+
+ legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]);
+ for (i = legacy_count; i < *da->nr; i++)
+ init_one_irq_cfg(&cfg[i]);
+
+ for (i = 1; i < *da->nr; i++)
+ cfg[i-1].next = &cfg[i];
+
+ irq_cfgx_free = &irq_cfgx[legacy_count];
+ irq_cfgx[legacy_count - 1].next = NULL;
+}
+
+#define for_each_irq_cfg(cfg) \
+ for (cfg = irq_cfgx; cfg; cfg = cfg->next)
+
+DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work);
+
+static struct irq_cfg *irq_cfg(unsigned int irq)
+{
+ struct irq_cfg *cfg;
+
+ cfg = irq_cfgx;
+ while (cfg) {
+ if (cfg->irq == irq)
+ return cfg;
+
+ cfg = cfg->next;
+ }
+
+ return NULL;
+}
+
+static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
+{
+ struct irq_cfg *cfg, *cfg_pri;
+ int i;
+ int count = 0;
+
+ cfg_pri = cfg = irq_cfgx;
+ while (cfg) {
+ if (cfg->irq == irq)
+ return cfg;
+
+ cfg_pri = cfg;
+ cfg = cfg->next;
+ count++;
+ }
+
+ if (!irq_cfgx_free) {
+ unsigned long phys;
+ unsigned long total_bytes;
+ /*
+ * we run out of pre-allocate ones, allocate more
+ */
+ printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg);
+
+ total_bytes = sizeof(struct irq_cfg) * nr_irq_cfg;
+ if (after_bootmem)
+ cfg = kzalloc(total_bytes, GFP_ATOMIC);
+ else
+ cfg = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0);
+
+ if (!cfg)
+ panic("please boot with nr_irq_cfg= %d\n", count * 2);
+
+ phys = __pa(cfg);
+ printk(KERN_DEBUG "irq_irq ==> [%#lx - %#lx]\n", phys, phys + total_bytes);
+
+ for (i = 0; i < nr_irq_cfg; i++)
+ init_one_irq_cfg(&cfg[i]);
+
+ for (i = 1; i < nr_irq_cfg; i++)
+ cfg[i-1].next = &cfg[i];
+
+ irq_cfgx_free = cfg;
+ }
+
+ cfg = irq_cfgx_free;
+ irq_cfgx_free = irq_cfgx_free->next;
+ cfg->next = NULL;
+ if (cfg_pri)
+ cfg_pri->next = cfg;
+ else
+ irq_cfgx = cfg;
+ cfg->irq = irq;
+ printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq);
+
+#ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG
+ {
+ /* dump the results */
+ struct irq_cfg *cfg;
+ unsigned long phys;
+ unsigned long bytes = sizeof(struct irq_cfg);
+
+ printk(KERN_DEBUG "=========================== %d\n", irq);
+ printk(KERN_DEBUG "irq_cfg dump after get that for %d\n", irq);
+ for_each_irq_cfg(cfg) {
+ phys = __pa(cfg);
+ printk(KERN_DEBUG "irq_cfg %d ==> [%#lx - %#lx]\n", cfg->irq, phys, phys + bytes);
+ }
+ printk(KERN_DEBUG "===========================\n");
+ }
+#endif
+ return cfg;
+}
+
+static int assign_irq_vector(int irq, cpumask_t mask);
/*
* Rough estimation of how many shared IRQs there are, can
* be changed anytime.
*/
-#define MAX_PLUS_SHARED_IRQS NR_IRQS
-#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
+int pin_map_size;
/*
* This is performance-critical, we want to do it O(1)
* between pins and IRQs.
*/
-static struct irq_pin_list {
- int apic, pin, next;
-} irq_2_pin[PIN_MAP_SIZE];
+struct irq_pin_list {
+ int apic, pin;
+ struct irq_pin_list *next;
+};
+
+static struct irq_pin_list *irq_2_pin_head;
+/* fill one page ? */
+static int nr_irq_2_pin = 0x100;
+static struct irq_pin_list *irq_2_pin_ptr;
+static void __init irq_2_pin_init_work(void *data)
+{
+ struct dyn_array *da = data;
+ struct irq_pin_list *pin;
+ int i;
+
+ pin = *da->name;
+
+ for (i = 1; i < *da->nr; i++)
+ pin[i-1].next = &pin[i];
+
+ irq_2_pin_ptr = &pin[0];
+}
+DEFINE_DYN_ARRAY(irq_2_pin_head, sizeof(struct irq_pin_list), nr_irq_2_pin, PAGE_SIZE, irq_2_pin_init_work);
+
+static struct irq_pin_list *get_one_free_irq_2_pin(void)
+{
+ struct irq_pin_list *pin;
+ int i;
+
+ pin = irq_2_pin_ptr;
+
+ if (pin) {
+ irq_2_pin_ptr = pin->next;
+ pin->next = NULL;
+ return pin;
+ }
+
+ /*
+ * we run out of pre-allocate ones, allocate more
+ */
+ printk(KERN_DEBUG "try to get more irq_2_pin %d\n", nr_irq_2_pin);
+
+ if (after_bootmem)
+ pin = kzalloc(sizeof(struct irq_pin_list)*nr_irq_2_pin,
+ GFP_ATOMIC);
+ else
+ pin = __alloc_bootmem_nopanic(sizeof(struct irq_pin_list) *
+ nr_irq_2_pin, PAGE_SIZE, 0);
+
+ if (!pin)
+ panic("can not get more irq_2_pin\n");
+
+ for (i = 1; i < nr_irq_2_pin; i++)
+ pin[i-1].next = &pin[i];
+
+ irq_2_pin_ptr = pin->next;
+ pin->next = NULL;
+
+ return pin;
+}
struct io_apic {
unsigned int index;
spin_unlock_irqrestore(&ioapic_lock, flags);
}
+#ifdef CONFIG_SMP
+static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
+{
+ int apic, pin;
+ struct irq_cfg *cfg;
+ struct irq_pin_list *entry;
+
+ cfg = irq_cfg(irq);
+ entry = cfg->irq_2_pin;
+ for (;;) {
+ unsigned int reg;
+
+ if (!entry)
+ break;
+
+ apic = entry->apic;
+ pin = entry->pin;
+ io_apic_write(apic, 0x11 + pin*2, dest);
+ reg = io_apic_read(apic, 0x10 + pin*2);
+ reg &= ~IO_APIC_REDIR_VECTOR_MASK;
+ reg |= vector;
+ io_apic_modify(apic, 0x10 + pin *2, reg);
+ if (!entry->next)
+ break;
+ entry = entry->next;
+ }
+}
+static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+{
+ struct irq_cfg *cfg;
+ unsigned long flags;
+ unsigned int dest;
+ cpumask_t tmp;
+
+ cfg = irq_cfg(irq);
+
+ cpus_and(tmp, mask, cpu_online_map);
+ if (cpus_empty(tmp))
+ return;
+
+ if (assign_irq_vector(irq, mask))
+ return;
+
+ cpus_and(tmp, cfg->domain, mask);
+
+ dest = cpu_mask_to_apicid(tmp);
+ /*
+ * Only the high 8 bits are valid.
+ */
+ dest = SET_APIC_LOGICAL_ID(dest);
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ __target_IO_APIC_irq(irq, dest, cfg->vector);
+ irq_to_desc(irq)->affinity = mask;
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+#endif /* CONFIG_SMP */
+
/*
* The common case is 1:1 IRQ<->pin mappings. Sometimes there are
* shared ISA-space IRQs, so we have to support them. We are super
*/
static void add_pin_to_irq(unsigned int irq, int apic, int pin)
{
- static int first_free_entry = NR_IRQS;
- struct irq_pin_list *entry = irq_2_pin + irq;
+ struct irq_cfg *cfg;
+ struct irq_pin_list *entry;
+
+ /* first time to refer irq_cfg, so with new */
+ cfg = irq_cfg_alloc(irq);
+ entry = cfg->irq_2_pin;
+ if (!entry) {
+ entry = get_one_free_irq_2_pin();
+ cfg->irq_2_pin = entry;
+ entry->apic = apic;
+ entry->pin = pin;
+ printk(KERN_DEBUG " 0 add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin);
+ return;
+ }
- while (entry->next)
- entry = irq_2_pin + entry->next;
+ while (entry->next) {
+ /* not again, please */
+ if (entry->apic == apic && entry->pin == pin)
+ return;
- if (entry->pin != -1) {
- entry->next = first_free_entry;
- entry = irq_2_pin + entry->next;
- if (++first_free_entry >= PIN_MAP_SIZE)
- panic("io_apic.c: whoops");
+ entry = entry->next;
}
+
+ entry->next = get_one_free_irq_2_pin();
+ entry = entry->next;
entry->apic = apic;
entry->pin = pin;
+ printk(KERN_DEBUG " x add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin);
}
/*
int oldapic, int oldpin,
int newapic, int newpin)
{
- struct irq_pin_list *entry = irq_2_pin + irq;
+ struct irq_cfg *cfg = irq_cfg(irq);
+ struct irq_pin_list *entry = cfg->irq_2_pin;
+ int replaced = 0;
- while (1) {
+ while (entry) {
if (entry->apic == oldapic && entry->pin == oldpin) {
entry->apic = newapic;
entry->pin = newpin;
- }
- if (!entry->next)
+ replaced = 1;
+ /* every one is different, right? */
break;
- entry = irq_2_pin + entry->next;
+ }
+ entry = entry->next;
}
+
+ /* why? call replace before add? */
+ if (!replaced)
+ add_pin_to_irq(irq, newapic, newpin);
}
static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable)
{
- struct irq_pin_list *entry = irq_2_pin + irq;
+ struct irq_cfg *cfg;
+ struct irq_pin_list *entry;
unsigned int pin, reg;
+ cfg = irq_cfg(irq);
+ entry = cfg->irq_2_pin;
for (;;) {
- pin = entry->pin;
- if (pin == -1)
+ if (!entry)
break;
+ pin = entry->pin;
reg = io_apic_read(entry->apic, 0x10 + pin*2);
reg &= ~disable;
reg |= enable;
io_apic_modify(entry->apic, 0x10 + pin*2, reg);
if (!entry->next)
break;
- entry = irq_2_pin + entry->next;
+ entry = entry->next;
}
}
clear_IO_APIC_pin(apic, pin);
}
-#ifdef CONFIG_SMP
-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
-{
- unsigned long flags;
- int pin;
- struct irq_pin_list *entry = irq_2_pin + irq;
- unsigned int apicid_value;
- cpumask_t tmp;
-
- cpus_and(tmp, cpumask, cpu_online_map);
- if (cpus_empty(tmp))
- tmp = TARGET_CPUS;
-
- cpus_and(cpumask, tmp, CPU_MASK_ALL);
-
- apicid_value = cpu_mask_to_apicid(cpumask);
- /* Prepare to do the io_apic_write */
- apicid_value = apicid_value << 24;
- spin_lock_irqsave(&ioapic_lock, flags);
- for (;;) {
- pin = entry->pin;
- if (pin == -1)
- break;
- io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value);
- if (!entry->next)
- break;
- entry = irq_2_pin + entry->next;
- }
- irq_desc[irq].affinity = cpumask;
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-#if defined(CONFIG_IRQBALANCE)
-# include <asm/processor.h> /* kernel_thread() */
-# include <linux/kernel_stat.h> /* kstat */
-# include <linux/slab.h> /* kmalloc() */
-# include <linux/timer.h>
-
-#define IRQBALANCE_CHECK_ARCH -999
-#define MAX_BALANCED_IRQ_INTERVAL (5*HZ)
-#define MIN_BALANCED_IRQ_INTERVAL (HZ/2)
-#define BALANCED_IRQ_MORE_DELTA (HZ/10)
-#define BALANCED_IRQ_LESS_DELTA (HZ)
-
-static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH;
-static int physical_balance __read_mostly;
-static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL;
-
-static struct irq_cpu_info {
- unsigned long *last_irq;
- unsigned long *irq_delta;
- unsigned long irq;
-} irq_cpu_data[NR_CPUS];
-
-#define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq)
-#define LAST_CPU_IRQ(cpu, irq) (irq_cpu_data[cpu].last_irq[irq])
-#define IRQ_DELTA(cpu, irq) (irq_cpu_data[cpu].irq_delta[irq])
-
-#define IDLE_ENOUGH(cpu,now) \
- (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1))
-
-#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask)
-
-#define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i)))
-
-static cpumask_t balance_irq_affinity[NR_IRQS] = {
- [0 ... NR_IRQS-1] = CPU_MASK_ALL
-};
-
-void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
-{
- balance_irq_affinity[irq] = mask;
-}
-
-static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
- unsigned long now, int direction)
-{
- int search_idle = 1;
- int cpu = curr_cpu;
-
- goto inside;
-
- do {
- if (unlikely(cpu == curr_cpu))
- search_idle = 0;
-inside:
- if (direction == 1) {
- cpu++;
- if (cpu >= NR_CPUS)
- cpu = 0;
- } else {
- cpu--;
- if (cpu == -1)
- cpu = NR_CPUS-1;
- }
- } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu, allowed_mask) ||
- (search_idle && !IDLE_ENOUGH(cpu, now)));
-
- return cpu;
-}
-
-static inline void balance_irq(int cpu, int irq)
-{
- unsigned long now = jiffies;
- cpumask_t allowed_mask;
- unsigned int new_cpu;
-
- if (irqbalance_disabled)
- return;
-
- cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]);
- new_cpu = move(cpu, allowed_mask, now, 1);
- if (cpu != new_cpu)
- set_pending_irq(irq, cpumask_of_cpu(new_cpu));
-}
-
-static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
-{
- int i, j;
-
- for_each_online_cpu(i) {
- for (j = 0; j < NR_IRQS; j++) {
- if (!irq_desc[j].action)
- continue;
- /* Is it a significant load ? */
- if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) <
- useful_load_threshold)
- continue;
- balance_irq(i, j);
- }
- }
- balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
- balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
- return;
-}
-
-static void do_irq_balance(void)
-{
- int i, j;
- unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
- unsigned long move_this_load = 0;
- int max_loaded = 0, min_loaded = 0;
- int load;
- unsigned long useful_load_threshold = balanced_irq_interval + 10;
- int selected_irq;
- int tmp_loaded, first_attempt = 1;
- unsigned long tmp_cpu_irq;
- unsigned long imbalance = 0;
- cpumask_t allowed_mask, target_cpu_mask, tmp;
-
- for_each_possible_cpu(i) {
- int package_index;
- CPU_IRQ(i) = 0;
- if (!cpu_online(i))
- continue;
- package_index = CPU_TO_PACKAGEINDEX(i);
- for (j = 0; j < NR_IRQS; j++) {
- unsigned long value_now, delta;
- /* Is this an active IRQ or balancing disabled ? */
- if (!irq_desc[j].action || irq_balancing_disabled(j))
- continue;
- if (package_index == i)
- IRQ_DELTA(package_index, j) = 0;
- /* Determine the total count per processor per IRQ */
- value_now = (unsigned long) kstat_cpu(i).irqs[j];
-
- /* Determine the activity per processor per IRQ */
- delta = value_now - LAST_CPU_IRQ(i, j);
-
- /* Update last_cpu_irq[][] for the next time */
- LAST_CPU_IRQ(i, j) = value_now;
-
- /* Ignore IRQs whose rate is less than the clock */
- if (delta < useful_load_threshold)
- continue;
- /* update the load for the processor or package total */
- IRQ_DELTA(package_index, j) += delta;
-
- /* Keep track of the higher numbered sibling as well */
- if (i != package_index)
- CPU_IRQ(i) += delta;
- /*
- * We have sibling A and sibling B in the package
- *
- * cpu_irq[A] = load for cpu A + load for cpu B
- * cpu_irq[B] = load for cpu B
- */
- CPU_IRQ(package_index) += delta;
- }
- }
- /* Find the least loaded processor package */
- for_each_online_cpu(i) {
- if (i != CPU_TO_PACKAGEINDEX(i))
- continue;
- if (min_cpu_irq > CPU_IRQ(i)) {
- min_cpu_irq = CPU_IRQ(i);
- min_loaded = i;
- }
- }
- max_cpu_irq = ULONG_MAX;
-
-tryanothercpu:
- /*
- * Look for heaviest loaded processor.
- * We may come back to get the next heaviest loaded processor.
- * Skip processors with trivial loads.
- */
- tmp_cpu_irq = 0;
- tmp_loaded = -1;
- for_each_online_cpu(i) {
- if (i != CPU_TO_PACKAGEINDEX(i))
- continue;
- if (max_cpu_irq <= CPU_IRQ(i))
- continue;
- if (tmp_cpu_irq < CPU_IRQ(i)) {
- tmp_cpu_irq = CPU_IRQ(i);
- tmp_loaded = i;
- }
- }
-
- if (tmp_loaded == -1) {
- /*
- * In the case of small number of heavy interrupt sources,
- * loading some of the cpus too much. We use Ingo's original
- * approach to rotate them around.
- */
- if (!first_attempt && imbalance >= useful_load_threshold) {
- rotate_irqs_among_cpus(useful_load_threshold);
- return;
- }
- goto not_worth_the_effort;
- }
-
- first_attempt = 0; /* heaviest search */
- max_cpu_irq = tmp_cpu_irq; /* load */
- max_loaded = tmp_loaded; /* processor */
- imbalance = (max_cpu_irq - min_cpu_irq) / 2;
-
- /*
- * if imbalance is less than approx 10% of max load, then
- * observe diminishing returns action. - quit
- */
- if (imbalance < (max_cpu_irq >> 3))
- goto not_worth_the_effort;
-
-tryanotherirq:
- /* if we select an IRQ to move that can't go where we want, then
- * see if there is another one to try.
- */
- move_this_load = 0;
- selected_irq = -1;
- for (j = 0; j < NR_IRQS; j++) {
- /* Is this an active IRQ? */
- if (!irq_desc[j].action)
- continue;
- if (imbalance <= IRQ_DELTA(max_loaded, j))
- continue;
- /* Try to find the IRQ that is closest to the imbalance
- * without going over.
- */
- if (move_this_load < IRQ_DELTA(max_loaded, j)) {
- move_this_load = IRQ_DELTA(max_loaded, j);
- selected_irq = j;
- }
- }
- if (selected_irq == -1)
- goto tryanothercpu;
-
- imbalance = move_this_load;
-
- /* For physical_balance case, we accumulated both load
- * values in the one of the siblings cpu_irq[],
- * to use the same code for physical and logical processors
- * as much as possible.
- *
- * NOTE: the cpu_irq[] array holds the sum of the load for
- * sibling A and sibling B in the slot for the lowest numbered
- * sibling (A), _AND_ the load for sibling B in the slot for
- * the higher numbered sibling.
- *
- * We seek the least loaded sibling by making the comparison
- * (A+B)/2 vs B
- */
- load = CPU_IRQ(min_loaded) >> 1;
- for_each_cpu_mask(j, per_cpu(cpu_sibling_map, min_loaded)) {
- if (load > CPU_IRQ(j)) {
- /* This won't change cpu_sibling_map[min_loaded] */
- load = CPU_IRQ(j);
- min_loaded = j;
- }
- }
-
- cpus_and(allowed_mask,
- cpu_online_map,
- balance_irq_affinity[selected_irq]);
- target_cpu_mask = cpumask_of_cpu(min_loaded);
- cpus_and(tmp, target_cpu_mask, allowed_mask);
-
- if (!cpus_empty(tmp)) {
- /* mark for change destination */
- set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
-
- /* Since we made a change, come back sooner to
- * check for more variation.
- */
- balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
- balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
- return;
- }
- goto tryanotherirq;
-
-not_worth_the_effort:
- /*
- * if we did not find an IRQ to move, then adjust the time interval
- * upward
- */
- balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
- balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);
- return;
-}
-
-static int balanced_irq(void *unused)
-{
- int i;
- unsigned long prev_balance_time = jiffies;
- long time_remaining = balanced_irq_interval;
-
- /* push everything to CPU 0 to give us a starting point. */
- for (i = 0 ; i < NR_IRQS ; i++) {
- irq_desc[i].pending_mask = cpumask_of_cpu(0);
- set_pending_irq(i, cpumask_of_cpu(0));
- }
-
- set_freezable();
- for ( ; ; ) {
- time_remaining = schedule_timeout_interruptible(time_remaining);
- try_to_freeze();
- if (time_after(jiffies,
- prev_balance_time+balanced_irq_interval)) {
- preempt_disable();
- do_irq_balance();
- prev_balance_time = jiffies;
- time_remaining = balanced_irq_interval;
- preempt_enable();
- }
- }
- return 0;
-}
-
-static int __init balanced_irq_init(void)
-{
- int i;
- struct cpuinfo_x86 *c;
- cpumask_t tmp;
-
- cpus_shift_right(tmp, cpu_online_map, 2);
- c = &boot_cpu_data;
- /* When not overwritten by the command line ask subarchitecture. */
- if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
- irqbalance_disabled = NO_BALANCE_IRQ;
- if (irqbalance_disabled)
- return 0;
-
- /* disable irqbalance completely if there is only one processor online */
- if (num_online_cpus() < 2) {
- irqbalance_disabled = 1;
- return 0;
- }
- /*
- * Enable physical balance only if more than 1 physical processor
- * is present
- */
- if (smp_num_siblings > 1 && !cpus_empty(tmp))
- physical_balance = 1;
-
- for_each_online_cpu(i) {
- irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
- irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
- if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
- printk(KERN_ERR "balanced_irq_init: out of memory");
- goto failed;
- }
- }
-
- printk(KERN_INFO "Starting balanced_irq\n");
- if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd")))
- return 0;
- printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
-failed:
- for_each_possible_cpu(i) {
- kfree(irq_cpu_data[i].irq_delta);
- irq_cpu_data[i].irq_delta = NULL;
- kfree(irq_cpu_data[i].last_irq);
- irq_cpu_data[i].last_irq = NULL;
- }
- return 0;
-}
-
-int __devinit irqbalance_disable(char *str)
-{
- irqbalance_disabled = 1;
- return 1;
-}
-
-__setup("noirqbalance", irqbalance_disable);
-
-late_initcall(balanced_irq_init);
-#endif /* CONFIG_IRQBALANCE */
-#endif /* CONFIG_SMP */
-
#ifndef CONFIG_SMP
void send_IPI_self(int vector)
{
}
EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
-/*
- * This function currently is only a helper for the i386 smp boot process where
- * we need to reprogram the ioredtbls to cater for the cpus which have come online
- * so mask in all cases should simply be TARGET_CPUS
- */
-#ifdef CONFIG_SMP
-void __init setup_ioapic_dest(void)
-{
- int pin, ioapic, irq, irq_entry;
-
- if (skip_ioapic_setup == 1)
- return;
-
- for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
- for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
- irq_entry = find_irq_entry(ioapic, pin, mp_INT);
- if (irq_entry == -1)
- continue;
- irq = pin_2_irq(irq_entry, ioapic, pin);
- set_ioapic_affinity_irq(irq, TARGET_CPUS);
- }
-
- }
-}
-#endif
-
#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
/*
* EISA Edge/Level control register, ELCR
return 0;
}
-/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
-static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
+void lock_vector_lock(void)
+{
+ /* Used to the online set of cpus does not change
+ * during assign_irq_vector.
+ */
+ spin_lock(&vector_lock);
+}
+
+void unlock_vector_lock(void)
+{
+ spin_unlock(&vector_lock);
+}
-static int __assign_irq_vector(int irq)
+static int __assign_irq_vector(int irq, cpumask_t mask)
{
- static int current_vector = FIRST_DEVICE_VECTOR, current_offset;
- int vector, offset;
+ static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
+ unsigned int old_vector;
+ int cpu;
+ struct irq_cfg *cfg;
- BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
+ cfg = irq_cfg(irq);
- if (irq_vector[irq] > 0)
- return irq_vector[irq];
+ /* Only try and allocate irqs on cpus that are present */
+ cpus_and(mask, mask, cpu_online_map);
- vector = current_vector;
- offset = current_offset;
-next:
- vector += 8;
- if (vector >= first_system_vector) {
- offset = (offset + 1) % 8;
- vector = FIRST_DEVICE_VECTOR + offset;
- }
- if (vector == current_vector)
- return -ENOSPC;
- if (test_and_set_bit(vector, used_vectors))
- goto next;
+ if ((cfg->move_in_progress) || cfg->move_cleanup_count)
+ return -EBUSY;
+
+ old_vector = cfg->vector;
+ if (old_vector) {
+ cpumask_t tmp;
+ cpus_and(tmp, cfg->domain, mask);
+ if (!cpus_empty(tmp))
+ return 0;
+ }
- current_vector = vector;
- current_offset = offset;
- irq_vector[irq] = vector;
+ for_each_cpu_mask_nr(cpu, mask) {
+ cpumask_t domain, new_mask;
+ int new_cpu;
+ int vector, offset;
- return vector;
+ domain = vector_allocation_domain(cpu);
+ cpus_and(new_mask, domain, cpu_online_map);
+
+ vector = current_vector;
+ offset = current_offset;
+next:
+ vector += 8;
+ if (vector >= first_system_vector) {
+ /* If we run out of vectors on large boxen, must share them. */
+ offset = (offset + 1) % 8;
+ vector = FIRST_DEVICE_VECTOR + offset;
+ }
+ if (unlikely(current_vector == vector))
+ continue;
+ if (vector == SYSCALL_VECTOR)
+ goto next;
+
+ for_each_cpu_mask_nr(new_cpu, new_mask)
+ if (per_cpu(vector_irq, new_cpu)[vector] != -1)
+ goto next;
+ /* Found one! */
+ current_vector = vector;
+ current_offset = offset;
+ if (old_vector) {
+ cfg->move_in_progress = 1;
+ cfg->old_domain = cfg->domain;
+ }
+ printk(KERN_DEBUG "assign_irq_vector: irq %d vector %#x cpu ", irq, vector);
+ for_each_cpu_mask_nr(new_cpu, new_mask) {
+ per_cpu(vector_irq, new_cpu)[vector] = irq;
+ printk(KERN_CONT " %d ", new_cpu);
+ }
+ printk(KERN_CONT "\n");
+ cfg->vector = vector;
+ cfg->domain = domain;
+ return 0;
+ }
+ return -ENOSPC;
}
-static int assign_irq_vector(int irq)
+static int assign_irq_vector(int irq, cpumask_t mask)
{
+ int err;
unsigned long flags;
- int vector;
spin_lock_irqsave(&vector_lock, flags);
- vector = __assign_irq_vector(irq);
+ err = __assign_irq_vector(irq, mask);
spin_unlock_irqrestore(&vector_lock, flags);
- return vector;
+ return err;
+}
+
+static void __clear_irq_vector(int irq)
+{
+ struct irq_cfg *cfg;
+ cpumask_t mask;
+ int cpu, vector;
+
+ cfg = irq_cfg(irq);
+ BUG_ON(!cfg->vector);
+
+ vector = cfg->vector;
+ cpus_and(mask, cfg->domain, cpu_online_map);
+ for_each_cpu_mask_nr(cpu, mask)
+ per_cpu(vector_irq, cpu)[vector] = -1;
+
+ cfg->vector = 0;
+ cpus_clear(cfg->domain);
+}
+
+void __setup_vector_irq(int cpu)
+{
+ /* Initialize vector_irq on a new cpu */
+ /* This function must be called with vector_lock held */
+ int irq, vector;
+ struct irq_cfg *cfg;
+
+ /* Mark the inuse vectors */
+ for_each_irq_cfg(cfg) {
+ if (!cpu_isset(cpu, cfg->domain))
+ continue;
+ vector = cfg->vector;
+ irq = cfg->irq;
+ per_cpu(vector_irq, cpu)[vector] = irq;
+ }
+ /* Mark the free vectors */
+ for (vector = 0; vector < NR_VECTORS; ++vector) {
+ irq = per_cpu(vector_irq, cpu)[vector];
+ if (irq < 0)
+ continue;
+
+ cfg = irq_cfg(irq);
+ if (!cpu_isset(cpu, cfg->domain))
+ per_cpu(vector_irq, cpu)[vector] = -1;
+ }
}
static struct irq_chip ioapic_chip;
#define IOAPIC_EDGE 0
#define IOAPIC_LEVEL 1
-static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
+static void ioapic_register_intr(int irq, unsigned long trigger)
{
+ struct irq_desc *desc;
+
+ /* first time to use this irq_desc */
+ if (irq < 16)
+ desc = irq_to_desc(irq);
+ else
+ desc = irq_to_desc_alloc(irq);
+
if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
trigger == IOAPIC_LEVEL) {
- irq_desc[irq].status |= IRQ_LEVEL;
+ desc->status |= IRQ_LEVEL;
set_irq_chip_and_handler_name(irq, &ioapic_chip,
handle_fasteoi_irq, "fasteoi");
} else {
- irq_desc[irq].status &= ~IRQ_LEVEL;
+ desc->status &= ~IRQ_LEVEL;
set_irq_chip_and_handler_name(irq, &ioapic_chip,
handle_edge_irq, "edge");
}
- set_intr_gate(vector, interrupt[irq]);
}
-static void __init setup_IO_APIC_irqs(void)
+static int setup_ioapic_entry(int apic, int irq,
+ struct IO_APIC_route_entry *entry,
+ unsigned int destination, int trigger,
+ int polarity, int vector)
+{
+ /*
+ * add it to the IO-APIC irq-routing table:
+ */
+ memset(entry,0,sizeof(*entry));
+
+ entry->delivery_mode = INT_DELIVERY_MODE;
+ entry->dest_mode = INT_DEST_MODE;
+ entry->dest = destination;
+
+ entry->mask = 0; /* enable IRQ */
+ entry->trigger = trigger;
+ entry->polarity = polarity;
+ entry->vector = vector;
+
+ /* Mask level triggered irqs.
+ * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
+ */
+ if (trigger)
+ entry->mask = 1;
+
+ return 0;
+}
+
+static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
+ int trigger, int polarity)
{
+ struct irq_cfg *cfg;
struct IO_APIC_route_entry entry;
- int apic, pin, idx, irq, first_notcon = 1, vector;
+ cpumask_t mask;
+
+ if (!IO_APIC_IRQ(irq))
+ return;
+
+ cfg = irq_cfg(irq);
+
+ mask = TARGET_CPUS;
+ if (assign_irq_vector(irq, mask))
+ return;
+
+ cpus_and(mask, cfg->domain, mask);
+
+ apic_printk(APIC_VERBOSE,KERN_DEBUG
+ "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
+ "IRQ %d Mode:%i Active:%i)\n",
+ apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
+ irq, trigger, polarity);
+
+
+ if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
+ cpu_mask_to_apicid(mask), trigger, polarity,
+ cfg->vector)) {
+ printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
+ mp_ioapics[apic].mp_apicid, pin);
+ __clear_irq_vector(irq);
+ return;
+ }
+
+ ioapic_register_intr(irq, trigger);
+ if (irq < 16)
+ disable_8259A_irq(irq);
+
+ ioapic_write_entry(apic, pin, entry);
+}
+
+static void __init setup_IO_APIC_irqs(void)
+{
+ int apic, pin, idx, irq, first_notcon = 1;
apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
for (apic = 0; apic < nr_ioapics; apic++) {
for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
- /*
- * add it to the IO-APIC irq-routing table:
- */
- memset(&entry, 0, sizeof(entry));
-
- entry.delivery_mode = INT_DELIVERY_MODE;
- entry.dest_mode = INT_DEST_MODE;
- entry.mask = 0; /* enable IRQ */
- entry.dest.logical.logical_dest =
- cpu_mask_to_apicid(TARGET_CPUS);
-
- idx = find_irq_entry(apic, pin, mp_INT);
+ idx = find_irq_entry(apic,pin,mp_INT);
if (idx == -1) {
if (first_notcon) {
- apic_printk(APIC_VERBOSE, KERN_DEBUG
- " IO-APIC (apicid-pin) %d-%d",
- mp_ioapics[apic].mp_apicid,
- pin);
+ apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin);
first_notcon = 0;
} else
- apic_printk(APIC_VERBOSE, ", %d-%d",
- mp_ioapics[apic].mp_apicid, pin);
+ apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin);
continue;
}
-
if (!first_notcon) {
apic_printk(APIC_VERBOSE, " not connected.\n");
first_notcon = 1;
}
- entry.trigger = irq_trigger(idx);
- entry.polarity = irq_polarity(idx);
-
- if (irq_trigger(idx)) {
- entry.trigger = 1;
- entry.mask = 1;
- }
-
irq = pin_2_irq(idx, apic, pin);
- /*
- * skip adding the timer int on secondary nodes, which causes
- * a small but painful rift in the time-space continuum
- */
- if (multi_timer_check(apic, irq))
- continue;
- else
- add_pin_to_irq(irq, apic, pin);
- if (!apic && !IO_APIC_IRQ(irq))
- continue;
+ if (multi_timer_check(apic, irq))
+ continue;
- if (IO_APIC_IRQ(irq)) {
- vector = assign_irq_vector(irq);
- entry.vector = vector;
- ioapic_register_intr(irq, vector, IOAPIC_AUTO);
+ add_pin_to_irq(irq, apic, pin);
- if (!apic && (irq < 16))
- disable_8259A_irq(irq);
- }
- ioapic_write_entry(apic, pin, entry);
+ setup_IO_APIC_irq(apic, pin, irq,
+ irq_trigger(idx), irq_polarity(idx));
}
}
*/
entry.dest_mode = INT_DEST_MODE;
entry.mask = 1; /* mask IRQ now */
- entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
+ entry.dest = cpu_mask_to_apicid(TARGET_CPUS);
entry.delivery_mode = INT_DELIVERY_MODE;
entry.polarity = 0;
entry.trigger = 0;
* The timer IRQ doesn't have to know that behind the
* scene we may have a 8259A-master in AEOI mode ...
*/
- ioapic_register_intr(0, vector, IOAPIC_EDGE);
+ ioapic_register_intr(0, IOAPIC_EDGE);
/*
* Add it to the IO-APIC irq-routing table:
union IO_APIC_reg_02 reg_02;
union IO_APIC_reg_03 reg_03;
unsigned long flags;
+ struct irq_cfg *cfg;
if (apic_verbosity == APIC_QUIET)
return;
printk(KERN_DEBUG ".... IRQ redirection table:\n");
- printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
- " Stat Dest Deli Vect: \n");
+ printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
+ " Stat Dmod Deli Vect: \n");
for (i = 0; i <= reg_01.bits.entries; i++) {
struct IO_APIC_route_entry entry;
entry = ioapic_read_entry(apic, i);
- printk(KERN_DEBUG " %02x %03X %02X ",
- i,
- entry.dest.logical.logical_dest,
- entry.dest.physical.physical_dest
- );
+ printk(KERN_DEBUG " %02x %02X ", i, entry.dest);
printk("%1d %1d %1d %1d %1d %1d %1d %02X\n",
entry.mask,
}
}
printk(KERN_DEBUG "IRQ to pin mappings:\n");
- for (i = 0; i < NR_IRQS; i++) {
- struct irq_pin_list *entry = irq_2_pin + i;
- if (entry->pin < 0)
+ for_each_irq_cfg(cfg) {
+ struct irq_pin_list *entry = cfg->irq_2_pin;
+ if (!entry)
continue;
printk(KERN_DEBUG "IRQ%d ", i);
for (;;) {
printk("-> %d:%d", entry->apic, entry->pin);
if (!entry->next)
break;
- entry = irq_2_pin + entry->next;
+ entry = entry->next;
}
printk("\n");
}
int i, apic;
unsigned long flags;
- for (i = 0; i < PIN_MAP_SIZE; i++) {
- irq_2_pin[i].pin = -1;
- irq_2_pin[i].next = 0;
- }
if (!pirqs_enabled)
for (i = 0; i < MAX_PIRQS; i++)
pirq_entries[i] = -1;
entry.dest_mode = 0; /* Physical */
entry.delivery_mode = dest_ExtINT; /* ExtInt */
entry.vector = 0;
- entry.dest.physical.physical_dest = read_apic_id();
+ entry.dest = read_apic_id();
/*
* Add it to the IO-APIC irq-routing table:
return was_pending;
}
+static void irq_complete_move(unsigned int irq);
static void ack_ioapic_irq(unsigned int irq)
{
+ irq_complete_move(irq);
move_native_irq(irq);
ack_APIC_irq();
}
unsigned long v;
int i;
+ irq_complete_move(irq);
move_native_irq(irq);
/*
* It appears there is an erratum which affects at least version 0x11
* operation to prevent an edge-triggered interrupt escaping meanwhile.
* The idea is from Manfred Spraul. --macro
*/
- i = irq_vector[irq];
+ i = irq_cfg(irq)->vector;
v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
static int ioapic_retrigger_irq(unsigned int irq)
{
- send_IPI_self(irq_vector[irq]);
+ send_IPI_self(irq_cfg(irq)->vector);
return 1;
}
+#ifdef CONFIG_SMP
+asmlinkage void smp_irq_move_cleanup_interrupt(void)
+{
+ unsigned vector, me;
+ ack_APIC_irq();
+ irq_enter();
+
+ me = smp_processor_id();
+ for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
+ unsigned int irq;
+ struct irq_desc *desc;
+ struct irq_cfg *cfg;
+ irq = __get_cpu_var(vector_irq)[vector];
+
+ desc = irq_to_desc(irq);
+ if (!desc)
+ continue;
+
+ cfg = irq_cfg(irq);
+ spin_lock(&desc->lock);
+ if (!cfg->move_cleanup_count)
+ goto unlock;
+
+ if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
+ goto unlock;
+
+ __get_cpu_var(vector_irq)[vector] = -1;
+ cfg->move_cleanup_count--;
+unlock:
+ spin_unlock(&desc->lock);
+ }
+
+ irq_exit();
+}
+
+static void irq_complete_move(unsigned int irq)
+{
+ struct irq_cfg *cfg = irq_cfg(irq);
+ unsigned vector, me;
+
+ if (likely(!cfg->move_in_progress))
+ return;
+
+ vector = ~get_irq_regs()->orig_ax;
+ me = smp_processor_id();
+ if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
+ cpumask_t cleanup_mask;
+
+ cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+ cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+ send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+ cfg->move_in_progress = 0;
+ }
+}
+#else
+static inline void irq_complete_move(unsigned int irq) {}
+#endif
+
static struct irq_chip ioapic_chip __read_mostly = {
.name = "IO-APIC",
.startup = startup_ioapic_irq,
static inline void init_IO_APIC_traps(void)
{
int irq;
+ struct irq_desc *desc;
+ struct irq_cfg *cfg;
/*
* NOTE! The local APIC isn't very good at handling
* Also, we've got to be careful not to trash gate
* 0x80, because int 0x80 is hm, kind of importantish. ;)
*/
- for (irq = 0; irq < NR_IRQS ; irq++) {
- if (IO_APIC_IRQ(irq) && !irq_vector[irq]) {
+ for_each_irq_cfg(cfg) {
+ irq = cfg->irq;
+ if (IO_APIC_IRQ(irq) && !cfg->vector) {
/*
* Hmm.. We don't have an entry for this,
* so default to an old-fashioned 8259
*/
if (irq < 16)
make_8259A_irq(irq);
- else
+ else {
+ desc = irq_to_desc(irq);
/* Strange. Oh, well.. */
- irq_desc[irq].chip = &no_irq_chip;
+ desc->chip = &no_irq_chip;
+ }
}
}
}
.ack = ack_lapic_irq,
};
-static void lapic_register_intr(int irq, int vector)
+static void lapic_register_intr(int irq)
{
- irq_desc[irq].status &= ~IRQ_LEVEL;
+ struct irq_desc *desc;
+
+ desc = irq_to_desc(irq);
+ desc->status &= ~IRQ_LEVEL;
set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
"edge");
- set_intr_gate(vector, interrupt[irq]);
}
static void __init setup_nmi(void)
entry1.dest_mode = 0; /* physical delivery */
entry1.mask = 0; /* unmask IRQ now */
- entry1.dest.physical.physical_dest = hard_smp_processor_id();
+ entry1.dest = hard_smp_processor_id();
entry1.delivery_mode = dest_ExtINT;
entry1.polarity = entry0.polarity;
entry1.trigger = 0;
*/
static inline void __init check_timer(void)
{
+ struct irq_cfg *cfg = irq_cfg(0);
int apic1, pin1, apic2, pin2;
int no_pin1 = 0;
- int vector;
unsigned int ver;
unsigned long flags;
* get/set the timer IRQ vector:
*/
disable_8259A_irq(0);
- vector = assign_irq_vector(0);
- set_intr_gate(vector, interrupt[0]);
+ assign_irq_vector(0, TARGET_CPUS);
/*
* As IRQ0 is to be enabled in the 8259A, the virtual
apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
"apic1=%d pin1=%d apic2=%d pin2=%d\n",
- vector, apic1, pin1, apic2, pin2);
+ cfg->vector, apic1, pin1, apic2, pin2);
/*
* Some BIOS writers are clueless and report the ExtINTA
*/
if (no_pin1) {
add_pin_to_irq(0, apic1, pin1);
- setup_timer_IRQ0_pin(apic1, pin1, vector);
+ setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
}
unmask_IO_APIC_irq(0);
if (timer_irq_works()) {
* legacy devices should be connected to IO APIC #0
*/
replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
- setup_timer_IRQ0_pin(apic2, pin2, vector);
+ setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
unmask_IO_APIC_irq(0);
enable_8259A_irq(0);
if (timer_irq_works()) {
apic_printk(APIC_QUIET, KERN_INFO
"...trying to set up timer as Virtual Wire IRQ...\n");
- lapic_register_intr(0, vector);
- apic_write(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
+ lapic_register_intr(0);
+ apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
enable_8259A_irq(0);
if (timer_irq_works()) {
goto out;
}
disable_8259A_irq(0);
- apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
+ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
apic_printk(APIC_QUIET, KERN_INFO
void __init setup_IO_APIC(void)
{
- int i;
-
- /* Reserve all the system vectors. */
- for (i = first_system_vector; i < NR_VECTORS; i++)
- set_bit(i, used_vectors);
-
enable_IO_APIC();
io_apic_irqs = ~PIC_IRQS;
/*
* Dynamic irq allocate and deallocation
*/
-int create_irq(void)
+unsigned int create_irq_nr(unsigned int irq_want)
{
/* Allocate an unused irq */
- int irq, new, vector = 0;
+ unsigned int irq, new;
unsigned long flags;
+ struct irq_cfg *cfg_new;
+
+#ifndef CONFIG_HAVE_SPARSE_IRQ
+ /* only can use bus/dev/fn.. when per_cpu vector is used */
+ irq_want = nr_irqs - 1;
+#endif
- irq = -ENOSPC;
+ irq = 0;
spin_lock_irqsave(&vector_lock, flags);
- for (new = (NR_IRQS - 1); new >= 0; new--) {
+ for (new = (nr_irqs - 1); new > 0; new--) {
if (platform_legacy_irq(new))
continue;
- if (irq_vector[new] != 0)
+ cfg_new = irq_cfg(new);
+ if (cfg_new && cfg_new->vector != 0)
continue;
- vector = __assign_irq_vector(new);
- if (likely(vector > 0))
+ if (!cfg_new)
+ cfg_new = irq_cfg_alloc(new);
+ if (__assign_irq_vector(new, TARGET_CPUS) == 0)
irq = new;
break;
}
spin_unlock_irqrestore(&vector_lock, flags);
- if (irq >= 0) {
- set_intr_gate(vector, interrupt[irq]);
+ if (irq > 0) {
dynamic_irq_init(irq);
}
return irq;
}
+int create_irq(void)
+{
+ return create_irq_nr(nr_irqs - 1);
+}
+
void destroy_irq(unsigned int irq)
{
unsigned long flags;
dynamic_irq_cleanup(irq);
spin_lock_irqsave(&vector_lock, flags);
- clear_bit(irq_vector[irq], used_vectors);
- irq_vector[irq] = 0;
+ __clear_irq_vector(irq);
spin_unlock_irqrestore(&vector_lock, flags);
}
#ifdef CONFIG_PCI_MSI
static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
{
- int vector;
+ struct irq_cfg *cfg;
+ int err;
unsigned dest;
+ cpumask_t tmp;
- vector = assign_irq_vector(irq);
- if (vector >= 0) {
- dest = cpu_mask_to_apicid(TARGET_CPUS);
-
- msg->address_hi = MSI_ADDR_BASE_HI;
- msg->address_lo =
- MSI_ADDR_BASE_LO |
- ((INT_DEST_MODE == 0) ?
-MSI_ADDR_DEST_MODE_PHYSICAL:
- MSI_ADDR_DEST_MODE_LOGICAL) |
- ((INT_DELIVERY_MODE != dest_LowestPrio) ?
- MSI_ADDR_REDIRECTION_CPU:
- MSI_ADDR_REDIRECTION_LOWPRI) |
- MSI_ADDR_DEST_ID(dest);
-
- msg->data =
- MSI_DATA_TRIGGER_EDGE |
- MSI_DATA_LEVEL_ASSERT |
- ((INT_DELIVERY_MODE != dest_LowestPrio) ?
-MSI_DATA_DELIVERY_FIXED:
- MSI_DATA_DELIVERY_LOWPRI) |
- MSI_DATA_VECTOR(vector);
- }
- return vector;
+ tmp = TARGET_CPUS;
+ err = assign_irq_vector(irq, tmp);
+ if (err)
+ return err;
+
+ cfg = irq_cfg(irq);
+ cpus_and(tmp, cfg->domain, tmp);
+ dest = cpu_mask_to_apicid(tmp);
+
+ msg->address_hi = MSI_ADDR_BASE_HI;
+ msg->address_lo =
+ MSI_ADDR_BASE_LO |
+ ((INT_DEST_MODE == 0) ?
+ MSI_ADDR_DEST_MODE_PHYSICAL:
+ MSI_ADDR_DEST_MODE_LOGICAL) |
+ ((INT_DELIVERY_MODE != dest_LowestPrio) ?
+ MSI_ADDR_REDIRECTION_CPU:
+ MSI_ADDR_REDIRECTION_LOWPRI) |
+ MSI_ADDR_DEST_ID(dest);
+
+ msg->data =
+ MSI_DATA_TRIGGER_EDGE |
+ MSI_DATA_LEVEL_ASSERT |
+ ((INT_DELIVERY_MODE != dest_LowestPrio) ?
+ MSI_DATA_DELIVERY_FIXED:
+ MSI_DATA_DELIVERY_LOWPRI) |
+ MSI_DATA_VECTOR(cfg->vector);
+
+ return err;
}
#ifdef CONFIG_SMP
static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
{
+ struct irq_cfg *cfg;
struct msi_msg msg;
unsigned int dest;
cpumask_t tmp;
- int vector;
cpus_and(tmp, mask, cpu_online_map);
if (cpus_empty(tmp))
- tmp = TARGET_CPUS;
+ return;
- vector = assign_irq_vector(irq);
- if (vector < 0)
+ if (assign_irq_vector(irq, mask))
return;
- dest = cpu_mask_to_apicid(mask);
+ cfg = irq_cfg(irq);
+ cpus_and(tmp, cfg->domain, mask);
+ dest = cpu_mask_to_apicid(tmp);
read_msi_msg(irq, &msg);
msg.data &= ~MSI_DATA_VECTOR_MASK;
- msg.data |= MSI_DATA_VECTOR(vector);
+ msg.data |= MSI_DATA_VECTOR(cfg->vector);
msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
write_msi_msg(irq, &msg);
- irq_desc[irq].affinity = mask;
+ irq_to_desc(irq)->affinity = mask;
}
#endif /* CONFIG_SMP */
.retrigger = ioapic_retrigger_irq,
};
+static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
+{
+ unsigned int irq;
+
+ irq = dev->bus->number;
+ irq <<= 8;
+ irq |= dev->devfn;
+ irq <<= 12;
+
+ return irq;
+}
+
int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
{
struct msi_msg msg;
int irq, ret;
- irq = create_irq();
- if (irq < 0)
- return irq;
+
+ unsigned int irq_want;
+
+ irq_want = build_irq_for_pci_dev(dev) + 0x100;
+
+ irq = create_irq_nr(irq_want);
+
+ if (irq == 0)
+ return -1;
ret = msi_compose_msg(dev, irq, &msg);
if (ret < 0) {
#ifdef CONFIG_SMP
-static void target_ht_irq(unsigned int irq, unsigned int dest)
+static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
{
struct ht_irq_msg msg;
fetch_ht_irq_msg(irq, &msg);
- msg.address_lo &= ~(HT_IRQ_LOW_DEST_ID_MASK);
+ msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
- msg.address_lo |= HT_IRQ_LOW_DEST_ID(dest);
+ msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
write_ht_irq_msg(irq, &msg);
static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
{
+ struct irq_cfg *cfg;
unsigned int dest;
cpumask_t tmp;
cpus_and(tmp, mask, cpu_online_map);
if (cpus_empty(tmp))
- tmp = TARGET_CPUS;
+ return;
- cpus_and(mask, tmp, CPU_MASK_ALL);
+ if (assign_irq_vector(irq, mask))
+ return;
- dest = cpu_mask_to_apicid(mask);
+ cfg = irq_cfg(irq);
+ cpus_and(tmp, cfg->domain, mask);
+ dest = cpu_mask_to_apicid(tmp);
- target_ht_irq(irq, dest);
- irq_desc[irq].affinity = mask;
+ target_ht_irq(irq, dest, cfg->vector);
+ irq_to_desc(irq)->affinity = mask;
}
#endif
int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
{
- int vector;
+ struct irq_cfg *cfg;
+ int err;
+ cpumask_t tmp;
- vector = assign_irq_vector(irq);
- if (vector >= 0) {
+ tmp = TARGET_CPUS;
+ err = assign_irq_vector(irq, tmp);
+ if ( !err) {
struct ht_irq_msg msg;
unsigned dest;
- cpumask_t tmp;
- cpus_clear(tmp);
- cpu_set(vector >> 8, tmp);
+ cfg = irq_cfg(irq);
+ cpus_and(tmp, cfg->domain, tmp);
dest = cpu_mask_to_apicid(tmp);
msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
msg.address_lo =
HT_IRQ_LOW_BASE |
HT_IRQ_LOW_DEST_ID(dest) |
- HT_IRQ_LOW_VECTOR(vector) |
+ HT_IRQ_LOW_VECTOR(cfg->vector) |
((INT_DEST_MODE == 0) ?
HT_IRQ_LOW_DM_PHYSICAL :
HT_IRQ_LOW_DM_LOGICAL) |
set_irq_chip_and_handler_name(irq, &ht_irq_chip,
handle_edge_irq, "edge");
}
- return vector;
+ return err;
}
#endif /* CONFIG_HT_IRQ */
}
-int io_apic_set_pci_routing(int ioapic, int pin, int irq, int edge_level, int active_high_low)
+int io_apic_set_pci_routing(int ioapic, int pin, int irq, int triggering, int polarity)
{
- struct IO_APIC_route_entry entry;
-
if (!IO_APIC_IRQ(irq)) {
printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
ioapic);
return -EINVAL;
}
- /*
- * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
- * Note that we mask (disable) IRQs now -- these get enabled when the
- * corresponding device driver registers for this IRQ.
- */
-
- memset(&entry, 0, sizeof(entry));
-
- entry.delivery_mode = INT_DELIVERY_MODE;
- entry.dest_mode = INT_DEST_MODE;
- entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
- entry.trigger = edge_level;
- entry.polarity = active_high_low;
- entry.mask = 1;
-
/*
* IRQs < 16 are already in the irq_2_pin[] map
*/
if (irq >= 16)
add_pin_to_irq(irq, ioapic, pin);
- entry.vector = assign_irq_vector(irq);
-
- apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
- "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
- mp_ioapics[ioapic].mp_apicid, pin, entry.vector, irq,
- edge_level, active_high_low);
-
- ioapic_register_intr(irq, entry.vector, edge_level);
-
- if (!ioapic && (irq < 16))
- disable_8259A_irq(irq);
-
- ioapic_write_entry(ioapic, pin, entry);
+ setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity);
return 0;
}
#endif /* CONFIG_ACPI */
+/*
+ * This function currently is only a helper for the i386 smp boot process where
+ * we need to reprogram the ioredtbls to cater for the cpus which have come online
+ * so mask in all cases should simply be TARGET_CPUS
+ */
+#ifdef CONFIG_SMP
+void __init setup_ioapic_dest(void)
+{
+ int pin, ioapic, irq, irq_entry;
+ struct irq_cfg *cfg;
+ struct irq_desc *desc;
+
+ if (skip_ioapic_setup == 1)
+ return;
+
+ for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
+ for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
+ irq_entry = find_irq_entry(ioapic, pin, mp_INT);
+ if (irq_entry == -1)
+ continue;
+ irq = pin_2_irq(irq_entry, ioapic, pin);
+
+ /* setup_IO_APIC_irqs could fail to get vector for some device
+ * when you have too many devices, because at that time only boot
+ * cpu is online.
+ */
+ cfg = irq_cfg(irq);
+ if (!cfg->vector)
+ setup_IO_APIC_irq(ioapic, pin, irq,
+ irq_trigger(irq_entry),
+ irq_polarity(irq_entry));
+ else {
+ desc = irq_to_desc(irq);
+ set_ioapic_affinity_irq(irq, TARGET_CPUS);
+ }
+ }
+
+ }
+}
+#endif
+
static int __init parse_disable_timer_pin_1(char *arg)
{
disable_timer_pin_1 = 1;