}
}
-static inline void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
+static inline void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask)
{
u64 pstate;
int i;
__asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
- for_each_cpu_mask(i, mask)
+ for_each_cpu_mask_nr(i, *mask)
spitfire_xcall_helper(data0, data1, data2, pstate, i);
}
* packet, but we have no use for that. However we do take advantage of
* the new pipelining feature (ie. dispatch to multiple cpus simultaneously).
*/
-static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
+static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask_p)
{
u64 pstate, ver, busy_mask;
int nack_busy_id, is_jbus, need_more;
+ cpumask_t mask;
- if (cpus_empty(mask))
+ if (cpus_empty(*mask_p))
return;
+ mask = *mask_p;
+
/* Unfortunately, someone at Sun had the brilliant idea to make the
* busy/nack fields hard-coded by ITID number for this Ultra-III
* derivative processor.
{
int i;
- for_each_cpu_mask(i, mask) {
+ for_each_cpu_mask_nr(i, mask) {
u64 target = (i << 14) | 0x70;
if (is_jbus) {
: : "r" (pstate));
if (unlikely(need_more)) {
int i, cnt = 0;
- for_each_cpu_mask(i, mask) {
+ for_each_cpu_mask_nr(i, mask) {
cpu_clear(i, mask);
cnt++;
if (cnt == 32)
/* Clear out the mask bits for cpus which did not
* NACK us.
*/
- for_each_cpu_mask(i, mask) {
+ for_each_cpu_mask_nr(i, mask) {
u64 check_mask;
if (is_jbus)
}
/* Multi-cpu list version. */
-static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
+static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask)
{
+ int cnt, retries, this_cpu, prev_sent, i;
+ unsigned long status;
+ cpumask_t error_mask;
struct trap_per_cpu *tb;
u16 *cpu_list;
u64 *mondo;
- cpumask_t error_mask;
- unsigned long flags, status;
- int cnt, retries, this_cpu, prev_sent, i;
- if (cpus_empty(mask))
+ if (cpus_empty(*mask))
return;
- /* We have to do this whole thing with interrupts fully disabled.
- * Otherwise if we send an xcall from interrupt context it will
- * corrupt both our mondo block and cpu list state.
- *
- * One consequence of this is that we cannot use timeout mechanisms
- * that depend upon interrupts being delivered locally. So, for
- * example, we cannot sample jiffies and expect it to advance.
- *
- * Fortunately, udelay() uses %stick/%tick so we can use that.
- */
- local_irq_save(flags);
-
this_cpu = smp_processor_id();
tb = &trap_block[this_cpu];
/* Setup the initial cpu list. */
cnt = 0;
- for_each_cpu_mask(i, mask)
+ for_each_cpu_mask_nr(i, *mask)
cpu_list[cnt++] = i;
cpus_clear(error_mask);
}
} while (1);
- local_irq_restore(flags);
-
if (unlikely(!cpus_empty(error_mask)))
goto fatal_mondo_cpu_error;
"were in error state\n",
this_cpu);
printk(KERN_CRIT "CPU[%d]: Error mask [ ", this_cpu);
- for_each_cpu_mask(i, error_mask)
+ for_each_cpu_mask_nr(i, error_mask)
printk("%d ", i);
printk("]\n");
return;
fatal_mondo_timeout:
- local_irq_restore(flags);
printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward "
" progress after %d retries.\n",
this_cpu, retries);
goto dump_cpu_list_and_out;
fatal_mondo_error:
- local_irq_restore(flags);
printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n",
this_cpu, status);
printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) "
printk("]\n");
}
-/* Send cross call to all processors mentioned in MASK
- * except self.
+static void (*xcall_deliver_impl)(u64, u64, u64, const cpumask_t *);
+
+static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask)
+{
+ unsigned long flags;
+
+ /* We have to do this whole thing with interrupts fully disabled.
+ * Otherwise if we send an xcall from interrupt context it will
+ * corrupt both our mondo block and cpu list state.
+ *
+ * One consequence of this is that we cannot use timeout mechanisms
+ * that depend upon interrupts being delivered locally. So, for
+ * example, we cannot sample jiffies and expect it to advance.
+ *
+ * Fortunately, udelay() uses %stick/%tick so we can use that.
+ */
+ local_irq_save(flags);
+ xcall_deliver_impl(data0, data1, data2, mask);
+ local_irq_restore(flags);
+}
+
+/* Send cross call to all processors mentioned in MASK_P
+ * except self. Really, there are only two cases currently,
+ * "&cpu_online_map" and "&mm->cpu_vm_mask".
*/
-static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, cpumask_t mask)
+static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, const cpumask_t *mask_p)
{
u64 data0 = (((u64)ctx)<<32 | (((u64)func) & 0xffffffff));
int this_cpu = get_cpu();
+ cpumask_t mask;
- cpus_and(mask, mask, cpu_online_map);
+ mask = *mask_p;
+ if (mask_p != &cpu_online_map)
+ cpus_and(mask, mask, cpu_online_map);
cpu_clear(this_cpu, mask);
- if (tlb_type == spitfire)
- spitfire_xcall_deliver(data0, data1, data2, mask);
- else if (tlb_type == cheetah || tlb_type == cheetah_plus)
- cheetah_xcall_deliver(data0, data1, data2, mask);
- else
- hypervisor_xcall_deliver(data0, data1, data2, mask);
+ xcall_deliver(data0, data1, data2, &mask);
/* NOTE: Caller runs local copy on master. */
put_cpu();
static void smp_start_sync_tick_client(int cpu)
{
- cpumask_t mask = cpumask_of_cpu(cpu);
-
- smp_cross_call_masked(&xcall_sync_tick,
- 0, 0, 0, mask);
+ xcall_deliver((u64) &xcall_sync_tick, 0, 0,
+ &cpumask_of_cpu(cpu));
}
-/* Send cross call to all processors except self. */
-#define smp_cross_call(func, ctx, data1, data2) \
- smp_cross_call_masked(func, ctx, data1, data2, cpu_online_map)
-
-struct call_data_struct {
- void (*func) (void *info);
- void *info;
- atomic_t finished;
- int wait;
-};
-
-static struct call_data_struct *call_data;
-
extern unsigned long xcall_call_function;
-/**
- * smp_call_function(): Run a function on all other CPUs.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @nonatomic: currently unused.
- * @wait: If true, wait (atomically) until function has completed on other CPUs.
- *
- * Returns 0 on success, else a negative status code. Does not return until
- * remote CPUs are nearly ready to execute <<func>> or are or have executed.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-static int smp_call_function_mask(void (*func)(void *info), void *info,
- int nonatomic, int wait, cpumask_t mask)
+void arch_send_call_function_ipi(cpumask_t mask)
{
- struct call_data_struct data;
- int cpus;
-
- /* Can deadlock when called with interrupts disabled */
- WARN_ON(irqs_disabled());
-
- data.func = func;
- data.info = info;
- atomic_set(&data.finished, 0);
- data.wait = wait;
-
- spin_lock(&call_lock);
-
- cpu_clear(smp_processor_id(), mask);
- cpus = cpus_weight(mask);
- if (!cpus)
- goto out_unlock;
-
- call_data = &data;
- mb();
-
- smp_cross_call_masked(&xcall_call_function, 0, 0, 0, mask);
-
- /* Wait for response */
- while (atomic_read(&data.finished) != cpus)
- cpu_relax();
-
-out_unlock:
- spin_unlock(&call_lock);
-
- return 0;
+ xcall_deliver((u64) &xcall_call_function, 0, 0, &mask);
}
-int smp_call_function(void (*func)(void *info), void *info,
- int nonatomic, int wait)
+extern unsigned long xcall_call_function_single;
+
+void arch_send_call_function_single_ipi(int cpu)
{
- return smp_call_function_mask(func, info, nonatomic, wait,
- cpu_online_map);
+ xcall_deliver((u64) &xcall_call_function_single, 0, 0,
+ &cpumask_of_cpu(cpu));
}
+/* Send cross call to all processors except self. */
+#define smp_cross_call(func, ctx, data1, data2) \
+ smp_cross_call_masked(func, ctx, data1, data2, &cpu_online_map)
+
void smp_call_function_client(int irq, struct pt_regs *regs)
{
- void (*func) (void *info) = call_data->func;
- void *info = call_data->info;
+ clear_softint(1 << irq);
+ generic_smp_call_function_interrupt();
+}
+void smp_call_function_single_client(int irq, struct pt_regs *regs)
+{
clear_softint(1 << irq);
- if (call_data->wait) {
- /* let initiator proceed only after completion */
- func(info);
- atomic_inc(&call_data->finished);
- } else {
- /* let initiator proceed after getting data */
- atomic_inc(&call_data->finished);
- func(info);
- }
+ generic_smp_call_function_single_interrupt();
}
static void tsb_sync(void *info)
void smp_tsb_sync(struct mm_struct *mm)
{
- smp_call_function_mask(tsb_sync, mm, 0, 1, mm->cpu_vm_mask);
+ smp_call_function_mask(mm->cpu_vm_mask, tsb_sync, mm, 1);
}
extern unsigned long xcall_flush_tlb_mm;
extern unsigned long xcall_flush_tlb_pending;
extern unsigned long xcall_flush_tlb_kernel_range;
-extern unsigned long xcall_report_regs;
#ifdef CONFIG_MAGIC_SYSRQ
extern unsigned long xcall_fetch_glob_regs;
#endif
__local_flush_dcache_page(page);
} else if (cpu_online(cpu)) {
void *pg_addr = page_address(page);
- u64 data0;
+ u64 data0 = 0;
if (tlb_type == spitfire) {
- data0 =
- ((u64)&xcall_flush_dcache_page_spitfire);
+ data0 = ((u64)&xcall_flush_dcache_page_spitfire);
if (page_mapping(page) != NULL)
data0 |= ((u64)1 << 32);
- spitfire_xcall_deliver(data0,
- __pa(pg_addr),
- (u64) pg_addr,
- mask);
} else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
#ifdef DCACHE_ALIASING_POSSIBLE
- data0 =
- ((u64)&xcall_flush_dcache_page_cheetah);
- cheetah_xcall_deliver(data0,
- __pa(pg_addr),
- 0, mask);
+ data0 = ((u64)&xcall_flush_dcache_page_cheetah);
#endif
}
+ if (data0) {
+ xcall_deliver(data0, __pa(pg_addr),
+ (u64) pg_addr, &mask);
#ifdef CONFIG_DEBUG_DCFLUSH
- atomic_inc(&dcpage_flushes_xcall);
+ atomic_inc(&dcpage_flushes_xcall);
#endif
+ }
}
put_cpu();
void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
{
- void *pg_addr = page_address(page);
cpumask_t mask = cpu_online_map;
- u64 data0;
+ void *pg_addr;
int this_cpu;
+ u64 data0;
if (tlb_type == hypervisor)
return;
#endif
if (cpus_empty(mask))
goto flush_self;
+ data0 = 0;
+ pg_addr = page_address(page);
if (tlb_type == spitfire) {
data0 = ((u64)&xcall_flush_dcache_page_spitfire);
if (page_mapping(page) != NULL)
data0 |= ((u64)1 << 32);
- spitfire_xcall_deliver(data0,
- __pa(pg_addr),
- (u64) pg_addr,
- mask);
} else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
#ifdef DCACHE_ALIASING_POSSIBLE
data0 = ((u64)&xcall_flush_dcache_page_cheetah);
- cheetah_xcall_deliver(data0,
- __pa(pg_addr),
- 0, mask);
#endif
}
+ if (data0) {
+ xcall_deliver(data0, __pa(pg_addr),
+ (u64) pg_addr, &mask);
#ifdef CONFIG_DEBUG_DCFLUSH
- atomic_inc(&dcpage_flushes_xcall);
+ atomic_inc(&dcpage_flushes_xcall);
#endif
+ }
flush_self:
__local_flush_dcache_page(page);
put_cpu();
}
-static void __smp_receive_signal_mask(cpumask_t mask)
-{
- smp_cross_call_masked(&xcall_receive_signal, 0, 0, 0, mask);
-}
-
-void smp_receive_signal(int cpu)
-{
- cpumask_t mask = cpumask_of_cpu(cpu);
-
- if (cpu_online(cpu))
- __smp_receive_signal_mask(mask);
-}
-
-void smp_receive_signal_client(int irq, struct pt_regs *regs)
-{
- clear_softint(1 << irq);
-}
-
void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs)
{
struct mm_struct *mm;
}
#endif
-void smp_report_regs(void)
-{
- smp_cross_call(&xcall_report_regs, 0, 0, 0);
-}
-
#ifdef CONFIG_MAGIC_SYSRQ
void smp_fetch_global_regs(void)
{
smp_cross_call_masked(&xcall_flush_tlb_mm,
ctx, 0, 0,
- mm->cpu_vm_mask);
+ &mm->cpu_vm_mask);
local_flush_and_out:
__flush_tlb_mm(ctx, SECONDARY_CONTEXT);
else
smp_cross_call_masked(&xcall_flush_tlb_pending,
ctx, nr, (unsigned long) vaddrs,
- mm->cpu_vm_mask);
+ &mm->cpu_vm_mask);
__flush_tlb_pending(ctx, nr, vaddrs);
{
}
+void __init smp_setup_processor_id(void)
+{
+ if (tlb_type == spitfire)
+ xcall_deliver_impl = spitfire_xcall_deliver;
+ else if (tlb_type == cheetah || tlb_type == cheetah_plus)
+ xcall_deliver_impl = cheetah_xcall_deliver;
+ else
+ xcall_deliver_impl = hypervisor_xcall_deliver;
+}
+
void __devinit smp_fill_in_sib_core_maps(void)
{
unsigned int i;
void smp_send_reschedule(int cpu)
{
- smp_receive_signal(cpu);
+ xcall_deliver((u64) &xcall_receive_signal, 0, 0,
+ &cpumask_of_cpu(cpu));
+}
+
+void smp_receive_signal_client(int irq, struct pt_regs *regs)
+{
+ clear_softint(1 << irq);
}
/* This is a nop because we capture all other cpus