sparc64: Disable local interrupts around xcall_deliver_impl() invocation.

[linux-2.6-omap-h63xx.git] / arch / sparc64 / kernel / smp.c
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c

index fa63c68a181941736a7809553d41538a1c5ab074..6d458b35643c7b2379e0b9d77eb12e7dac36764f 100644 (file)
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -459,13 +459,13 @@ again:
         }
  }
  
-static inline void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
+static inline void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask)
  {
         u64 pstate;
         int i;
  
         __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
-       for_each_cpu_mask(i, mask)
+       for_each_cpu_mask_nr(i, *mask)
                 spitfire_xcall_helper(data0, data1, data2, pstate, i);
  }
  
@@ -473,14 +473,17 @@ static inline void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, cpuma
   * packet, but we have no use for that.  However we do take advantage of
   * the new pipelining feature (ie. dispatch to multiple cpus simultaneously).
   */
-static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
+static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask_p)
  {
         u64 pstate, ver, busy_mask;
         int nack_busy_id, is_jbus, need_more;
+       cpumask_t mask;
  
-       if (cpus_empty(mask))
+       if (cpus_empty(*mask_p))
                 return;
  
+       mask = *mask_p;
+
         /* Unfortunately, someone at Sun had the brilliant idea to make the
          * busy/nack fields hard-coded by ITID number for this Ultra-III
          * derivative processor.
@@ -511,7 +514,7 @@ retry:
         {
                 int i;
  
-               for_each_cpu_mask(i, mask) {
+               for_each_cpu_mask_nr(i, mask) {
                         u64 target = (i << 14) | 0x70;
  
                         if (is_jbus) {
@@ -550,7 +553,7 @@ retry:
                                                      : : "r" (pstate));
                                 if (unlikely(need_more)) {
                                         int i, cnt = 0;
-                                       for_each_cpu_mask(i, mask) {
+                                       for_each_cpu_mask_nr(i, mask) {
                                                 cpu_clear(i, mask);
                                                 cnt++;
                                                 if (cnt == 32)
@@ -584,7 +587,7 @@ retry:
                         /* Clear out the mask bits for cpus which did not
                          * NACK us.
                          */
-                       for_each_cpu_mask(i, mask) {
+                       for_each_cpu_mask_nr(i, mask) {
                                 u64 check_mask;
  
                                 if (is_jbus)
@@ -605,30 +608,18 @@ retry:
  }
  
  /* Multi-cpu list version.  */
-static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
+static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask)
  {
+       int cnt, retries, this_cpu, prev_sent, i;
+       unsigned long status;
+       cpumask_t error_mask;
         struct trap_per_cpu *tb;
         u16 *cpu_list;
         u64 *mondo;
-       cpumask_t error_mask;
-       unsigned long flags, status;
-       int cnt, retries, this_cpu, prev_sent, i;
  
-       if (cpus_empty(mask))
+       if (cpus_empty(*mask))
                 return;
  
-       /* We have to do this whole thing with interrupts fully disabled.
-        * Otherwise if we send an xcall from interrupt context it will
-        * corrupt both our mondo block and cpu list state.
-        *
-        * One consequence of this is that we cannot use timeout mechanisms
-        * that depend upon interrupts being delivered locally.  So, for
-        * example, we cannot sample jiffies and expect it to advance.
-        *
-        * Fortunately, udelay() uses %stick/%tick so we can use that.
-        */
-       local_irq_save(flags);
-
         this_cpu = smp_processor_id();
         tb = &trap_block[this_cpu];
  
@@ -642,7 +633,7 @@ static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t
  
         /* Setup the initial cpu list.  */
         cnt = 0;
-       for_each_cpu_mask(i, mask)
+       for_each_cpu_mask_nr(i, *mask)
                 cpu_list[cnt++] = i;
  
         cpus_clear(error_mask);
@@ -717,8 +708,6 @@ static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t
                 }
         } while (1);
  
-       local_irq_restore(flags);
-
         if (unlikely(!cpus_empty(error_mask)))
                 goto fatal_mondo_cpu_error;
  
@@ -729,20 +718,18 @@ fatal_mondo_cpu_error:
                "were in error state\n",
                this_cpu);
         printk(KERN_CRIT "CPU[%d]: Error mask [ ", this_cpu);
-       for_each_cpu_mask(i, error_mask)
+       for_each_cpu_mask_nr(i, error_mask)
                 printk("%d ", i);
         printk("]\n");
         return;
  
  fatal_mondo_timeout:
-       local_irq_restore(flags);
         printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward "
                " progress after %d retries.\n",
                this_cpu, retries);
         goto dump_cpu_list_and_out;
  
  fatal_mondo_error:
-       local_irq_restore(flags);
         printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n",
                this_cpu, status);
         printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) "
@@ -756,23 +743,43 @@ dump_cpu_list_and_out:
         printk("]\n");
  }
  
-/* Send cross call to all processors mentioned in MASK
- * except self.
+static void (*xcall_deliver_impl)(u64, u64, u64, const cpumask_t *);
+
+static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask)
+{
+       unsigned long flags;
+
+       /* We have to do this whole thing with interrupts fully disabled.
+        * Otherwise if we send an xcall from interrupt context it will
+        * corrupt both our mondo block and cpu list state.
+        *
+        * One consequence of this is that we cannot use timeout mechanisms
+        * that depend upon interrupts being delivered locally.  So, for
+        * example, we cannot sample jiffies and expect it to advance.
+        *
+        * Fortunately, udelay() uses %stick/%tick so we can use that.
+        */
+       local_irq_save(flags);
+       xcall_deliver_impl(data0, data1, data2, mask);
+       local_irq_restore(flags);
+}
+
+/* Send cross call to all processors mentioned in MASK_P
+ * except self.  Really, there are only two cases currently,
+ * "&cpu_online_map" and "&mm->cpu_vm_mask".
   */
-static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, cpumask_t mask)
+static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, const cpumask_t *mask_p)
  {
         u64 data0 = (((u64)ctx)<<32 | (((u64)func) & 0xffffffff));
         int this_cpu = get_cpu();
+       cpumask_t mask;
  
-       cpus_and(mask, mask, cpu_online_map);
+       mask = *mask_p;
+       if (mask_p != &cpu_online_map)
+               cpus_and(mask, mask, cpu_online_map);
         cpu_clear(this_cpu, mask);
  
-       if (tlb_type == spitfire)
-               spitfire_xcall_deliver(data0, data1, data2, mask);
-       else if (tlb_type == cheetah || tlb_type == cheetah_plus)
-               cheetah_xcall_deliver(data0, data1, data2, mask);
-       else
-               hypervisor_xcall_deliver(data0, data1, data2, mask);
+       xcall_deliver(data0, data1, data2, &mask);
         /* NOTE: Caller runs local copy on master. */
  
         put_cpu();
@@ -782,98 +789,39 @@ extern unsigned long xcall_sync_tick;
  
  static void smp_start_sync_tick_client(int cpu)
  {
-       cpumask_t mask = cpumask_of_cpu(cpu);
-
-       smp_cross_call_masked(&xcall_sync_tick,
-                             0, 0, 0, mask);
+       xcall_deliver((u64) &xcall_sync_tick, 0, 0,
+                     &cpumask_of_cpu(cpu));
  }
  
-/* Send cross call to all processors except self. */
-#define smp_cross_call(func, ctx, data1, data2) \
-       smp_cross_call_masked(func, ctx, data1, data2, cpu_online_map)
-
-struct call_data_struct {
-       void (*func) (void *info);
-       void *info;
-       atomic_t finished;
-       int wait;
-};
-
-static struct call_data_struct *call_data;
-
  extern unsigned long xcall_call_function;
  
-/**
- * smp_call_function(): Run a function on all other CPUs.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @nonatomic: currently unused.
- * @wait: If true, wait (atomically) until function has completed on other CPUs.
- *
- * Returns 0 on success, else a negative status code. Does not return until
- * remote CPUs are nearly ready to execute <<func>> or are or have executed.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-static int smp_call_function_mask(void (*func)(void *info), void *info,
-                                 int nonatomic, int wait, cpumask_t mask)
+void arch_send_call_function_ipi(cpumask_t mask)
  {
-       struct call_data_struct data;
-       int cpus;
-
-       /* Can deadlock when called with interrupts disabled */
-       WARN_ON(irqs_disabled());
-
-       data.func = func;
-       data.info = info;
-       atomic_set(&data.finished, 0);
-       data.wait = wait;
-
-       spin_lock(&call_lock);
-
-       cpu_clear(smp_processor_id(), mask);
-       cpus = cpus_weight(mask);
-       if (!cpus)
-               goto out_unlock;
-
-       call_data = &data;
-       mb();
-
-       smp_cross_call_masked(&xcall_call_function, 0, 0, 0, mask);
-
-       /* Wait for response */
-       while (atomic_read(&data.finished) != cpus)
-               cpu_relax();
-
-out_unlock:
-       spin_unlock(&call_lock);
-
-       return 0;
+       xcall_deliver((u64) &xcall_call_function, 0, 0, &mask);
  }
  
-int smp_call_function(void (*func)(void *info), void *info,
-                     int nonatomic, int wait)
+extern unsigned long xcall_call_function_single;
+
+void arch_send_call_function_single_ipi(int cpu)
  {
-       return smp_call_function_mask(func, info, nonatomic, wait,
-                                     cpu_online_map);
+       xcall_deliver((u64) &xcall_call_function_single, 0, 0,
+                     &cpumask_of_cpu(cpu));
  }
  
+/* Send cross call to all processors except self. */
+#define smp_cross_call(func, ctx, data1, data2) \
+       smp_cross_call_masked(func, ctx, data1, data2, &cpu_online_map)
+
  void smp_call_function_client(int irq, struct pt_regs *regs)
  {
-       void (*func) (void *info) = call_data->func;
-       void *info = call_data->info;
+       clear_softint(1 << irq);
+       generic_smp_call_function_interrupt();
+}
  
+void smp_call_function_single_client(int irq, struct pt_regs *regs)
+{
         clear_softint(1 << irq);
-       if (call_data->wait) {
-               /* let initiator proceed only after completion */
-               func(info);
-               atomic_inc(&call_data->finished);
-       } else {
-               /* let initiator proceed after getting data */
-               atomic_inc(&call_data->finished);
-               func(info);
-       }
+       generic_smp_call_function_single_interrupt();
  }
  
  static void tsb_sync(void *info)
@@ -893,13 +841,12 @@ static void tsb_sync(void *info)
  
  void smp_tsb_sync(struct mm_struct *mm)
  {
-       smp_call_function_mask(tsb_sync, mm, 0, 1, mm->cpu_vm_mask);
+       smp_call_function_mask(mm->cpu_vm_mask, tsb_sync, mm, 1);
  }
  
  extern unsigned long xcall_flush_tlb_mm;
  extern unsigned long xcall_flush_tlb_pending;
  extern unsigned long xcall_flush_tlb_kernel_range;
-extern unsigned long xcall_report_regs;
  #ifdef CONFIG_MAGIC_SYSRQ
  extern unsigned long xcall_fetch_glob_regs;
  #endif
@@ -950,29 +897,24 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu)
                 __local_flush_dcache_page(page);
         } else if (cpu_online(cpu)) {
                 void *pg_addr = page_address(page);
-               u64 data0;
+               u64 data0 = 0;
  
                 if (tlb_type == spitfire) {
-                       data0 =
-                               ((u64)&xcall_flush_dcache_page_spitfire);
+                       data0 = ((u64)&xcall_flush_dcache_page_spitfire);
                         if (page_mapping(page) != NULL)
                                 data0 |= ((u64)1 << 32);
-                       spitfire_xcall_deliver(data0,
-                                              __pa(pg_addr),
-                                              (u64) pg_addr,
-                                              mask);
                 } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
  #ifdef DCACHE_ALIASING_POSSIBLE
-                       data0 =
-                               ((u64)&xcall_flush_dcache_page_cheetah);
-                       cheetah_xcall_deliver(data0,
-                                             __pa(pg_addr),
-                                             0, mask);
+                       data0 = ((u64)&xcall_flush_dcache_page_cheetah);
  #endif
                 }
+               if (data0) {
+                       xcall_deliver(data0, __pa(pg_addr),
+                                     (u64) pg_addr, &mask);
  #ifdef CONFIG_DEBUG_DCFLUSH
-               atomic_inc(&dcpage_flushes_xcall);
+                       atomic_inc(&dcpage_flushes_xcall);
  #endif
+               }
         }
  
         put_cpu();
@@ -980,10 +922,10 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu)
  
  void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
  {
-       void *pg_addr = page_address(page);
         cpumask_t mask = cpu_online_map;
-       u64 data0;
+       void *pg_addr;
         int this_cpu;
+       u64 data0;
  
         if (tlb_type == hypervisor)
                 return;
@@ -997,49 +939,30 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
  #endif
         if (cpus_empty(mask))
                 goto flush_self;
+       data0 = 0;
+       pg_addr = page_address(page);
         if (tlb_type == spitfire) {
                 data0 = ((u64)&xcall_flush_dcache_page_spitfire);
                 if (page_mapping(page) != NULL)
                         data0 |= ((u64)1 << 32);
-               spitfire_xcall_deliver(data0,
-                                      __pa(pg_addr),
-                                      (u64) pg_addr,
-                                      mask);
         } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
  #ifdef DCACHE_ALIASING_POSSIBLE
                 data0 = ((u64)&xcall_flush_dcache_page_cheetah);
-               cheetah_xcall_deliver(data0,
-                                     __pa(pg_addr),
-                                     0, mask);
  #endif
         }
+       if (data0) {
+               xcall_deliver(data0, __pa(pg_addr),
+                             (u64) pg_addr, &mask);
  #ifdef CONFIG_DEBUG_DCFLUSH
-       atomic_inc(&dcpage_flushes_xcall);
+               atomic_inc(&dcpage_flushes_xcall);
  #endif
+       }
   flush_self:
         __local_flush_dcache_page(page);
  
         put_cpu();
  }
  
-static void __smp_receive_signal_mask(cpumask_t mask)
-{
-       smp_cross_call_masked(&xcall_receive_signal, 0, 0, 0, mask);
-}
-
-void smp_receive_signal(int cpu)
-{
-       cpumask_t mask = cpumask_of_cpu(cpu);
-
-       if (cpu_online(cpu))
-               __smp_receive_signal_mask(mask);
-}
-
-void smp_receive_signal_client(int irq, struct pt_regs *regs)
-{
-       clear_softint(1 << irq);
-}
-
  void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs)
  {
         struct mm_struct *mm;
@@ -1078,11 +1001,6 @@ void kgdb_roundup_cpus(unsigned long flags)
  }
  #endif
  
-void smp_report_regs(void)
-{
-       smp_cross_call(&xcall_report_regs, 0, 0, 0);
-}
-
  #ifdef CONFIG_MAGIC_SYSRQ
  void smp_fetch_global_regs(void)
  {
@@ -1145,7 +1063,7 @@ void smp_flush_tlb_mm(struct mm_struct *mm)
  
         smp_cross_call_masked(&xcall_flush_tlb_mm,
                               ctx, 0, 0,
-                             mm->cpu_vm_mask);
+                             &mm->cpu_vm_mask);
  
  local_flush_and_out:
         __flush_tlb_mm(ctx, SECONDARY_CONTEXT);
@@ -1163,7 +1081,7 @@ void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long
         else
                 smp_cross_call_masked(&xcall_flush_tlb_pending,
                                       ctx, nr, (unsigned long) vaddrs,
-                                     mm->cpu_vm_mask);
+                                     &mm->cpu_vm_mask);
  
         __flush_tlb_pending(ctx, nr, vaddrs);
  
@@ -1264,6 +1182,16 @@ void __devinit smp_prepare_boot_cpu(void)
  {
  }
  
+void __init smp_setup_processor_id(void)
+{
+       if (tlb_type == spitfire)
+               xcall_deliver_impl = spitfire_xcall_deliver;
+       else if (tlb_type == cheetah || tlb_type == cheetah_plus)
+               xcall_deliver_impl = cheetah_xcall_deliver;
+       else
+               xcall_deliver_impl = hypervisor_xcall_deliver;
+}
+
  void __devinit smp_fill_in_sib_core_maps(void)
  {
         unsigned int i;
@@ -1432,7 +1360,13 @@ void __init smp_cpus_done(unsigned int max_cpus)
  
  void smp_send_reschedule(int cpu)
  {
-       smp_receive_signal(cpu);
+       xcall_deliver((u64) &xcall_receive_signal, 0, 0,
+                     &cpumask_of_cpu(cpu));
+}
+
+void smp_receive_signal_client(int irq, struct pt_regs *regs)
+{
+       clear_softint(1 << irq);
  }
  
  /* This is a nop because we capture all other cpus