[POWERPC] spufs: move fault, lscsa_alloc and switch code to spufs module

[linux-2.6-omap-h63xx.git] / arch / powerpc / platforms / cell / spufs / sched.c
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c

index 5bebe7fbe056de17a39a8b78d419f67580735f74..ee80de07c0bcb3c44d0a11241f4bfcbc2c1376d1 100644 (file)
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -105,15 +105,15 @@ void spu_set_timeslice(struct spu_context *ctx)
  void __spu_update_sched_info(struct spu_context *ctx)
  {
         /*
-        * 32-Bit assignment are atomic on powerpc, and we don't care about
-        * memory ordering here because retriving the controlling thread is
-        * per defintion racy.
+        * 32-Bit assignments are atomic on powerpc, and we don't care about
+        * memory ordering here because retrieving the controlling thread is
+        * per definition racy.
          */
         ctx->tid = current->pid;
  
         /*
          * We do our own priority calculations, so we normally want
-        * ->static_prio to start with. Unfortunately thies field
+        * ->static_prio to start with. Unfortunately this field
          * contains junk for threads with a realtime scheduling
          * policy so we have to look at ->prio in this case.
          */
@@ -127,7 +127,7 @@ void __spu_update_sched_info(struct spu_context *ctx)
          * A lot of places that don't hold list_mutex poke into
          * cpus_allowed, including grab_runnable_context which
          * already holds the runq_lock.  So abuse runq_lock
-        * to protect this field aswell.
+        * to protect this field as well.
          */
         spin_lock(&spu_prio->runq_lock);
         ctx->cpus_allowed = current->cpus_allowed;
@@ -182,7 +182,7 @@ static void notify_spus_active(void)
          * Wake up the active spu_contexts.
          *
          * When the awakened processes see their "notify_active" flag is set,
-        * they will call spu_switch_notify();
+        * they will call spu_switch_notify().
          */
         for_each_online_node(node) {
                 struct spu *spu;
@@ -230,8 +230,6 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
  
         if (ctx->flags & SPU_CREATE_NOSCHED)
                 atomic_inc(&cbe_spu_info[spu->node].reserved_spus);
-       if (!list_empty(&ctx->aff_list))
-               atomic_inc(&ctx->gang->aff_sched_count);
  
         ctx->stats.slb_flt_base = spu->stats.slb_flt;
         ctx->stats.class2_intr_base = spu->stats.class2_intr;
@@ -392,7 +390,6 @@ static int has_affinity(struct spu_context *ctx)
         if (list_empty(&ctx->aff_list))
                 return 0;
  
-       mutex_lock(&gang->aff_mutex);
         if (!gang->aff_ref_spu) {
                 if (!(gang->aff_flags & AFF_MERGED))
                         aff_merge_remaining_ctxs(gang);
@@ -400,7 +397,6 @@ static int has_affinity(struct spu_context *ctx)
                         aff_set_offsets(gang);
                 aff_set_ref_point_location(gang);
         }
-       mutex_unlock(&gang->aff_mutex);
  
         return gang->aff_ref_spu != NULL;
  }
@@ -418,9 +414,16 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
  
         if (spu->ctx->flags & SPU_CREATE_NOSCHED)
                 atomic_dec(&cbe_spu_info[spu->node].reserved_spus);
-       if (!list_empty(&ctx->aff_list))
-               if (atomic_dec_and_test(&ctx->gang->aff_sched_count))
-                       ctx->gang->aff_ref_spu = NULL;
+
+       if (ctx->gang){
+               mutex_lock(&ctx->gang->aff_mutex);
+               if (has_affinity(ctx)) {
+                       if (atomic_dec_and_test(&ctx->gang->aff_sched_count))
+                               ctx->gang->aff_ref_spu = NULL;
+               }
+               mutex_unlock(&ctx->gang->aff_mutex);
+       }
+
         spu_switch_notify(spu, NULL);
         spu_unmap_mappings(ctx);
         spu_save(&ctx->csa, spu);
@@ -511,20 +514,32 @@ static void spu_prio_wait(struct spu_context *ctx)
  
  static struct spu *spu_get_idle(struct spu_context *ctx)
  {
-       struct spu *spu;
+       struct spu *spu, *aff_ref_spu;
         int node, n;
  
-       if (has_affinity(ctx)) {
-               node = ctx->gang->aff_ref_spu->node;
+       if (ctx->gang) {
+               mutex_lock(&ctx->gang->aff_mutex);
+               if (has_affinity(ctx)) {
+                       aff_ref_spu = ctx->gang->aff_ref_spu;
+                       atomic_inc(&ctx->gang->aff_sched_count);
+                       mutex_unlock(&ctx->gang->aff_mutex);
+                       node = aff_ref_spu->node;
  
-               mutex_lock(&cbe_spu_info[node].list_mutex);
-               spu = ctx_location(ctx->gang->aff_ref_spu, ctx->aff_offset, node);
-               if (spu && spu->alloc_state == SPU_FREE)
-                       goto found;
-               mutex_unlock(&cbe_spu_info[node].list_mutex);
-               return NULL;
-       }
+                       mutex_lock(&cbe_spu_info[node].list_mutex);
+                       spu = ctx_location(aff_ref_spu, ctx->aff_offset, node);
+                       if (spu && spu->alloc_state == SPU_FREE)
+                               goto found;
+                       mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+                       mutex_lock(&ctx->gang->aff_mutex);
+                       if (atomic_dec_and_test(&ctx->gang->aff_sched_count))
+                               ctx->gang->aff_ref_spu = NULL;
+                       mutex_unlock(&ctx->gang->aff_mutex);
  
+                       return NULL;
+               }
+               mutex_unlock(&ctx->gang->aff_mutex);
+       }
         node = cpu_to_node(raw_smp_processor_id());
         for (n = 0; n < MAX_NUMNODES; n++, node++) {
                 node = (node < MAX_NUMNODES) ? node : 0;
@@ -564,7 +579,7 @@ static struct spu *find_victim(struct spu_context *ctx)
         /*
          * Look for a possible preemption candidate on the local node first.
          * If there is no candidate look at the other nodes.  This isn't
-        * exactly fair, but so far the whole spu schedule tries to keep
+        * exactly fair, but so far the whole spu scheduler tries to keep
          * a strong node affinity.  We might want to fine-tune this in
          * the future.
          */
@@ -877,6 +892,38 @@ static int spusched_thread(void *unused)
         return 0;
  }
  
+void spuctx_switch_state(struct spu_context *ctx,
+               enum spu_utilization_state new_state)
+{
+       unsigned long long curtime;
+       signed long long delta;
+       struct timespec ts;
+       struct spu *spu;
+       enum spu_utilization_state old_state;
+
+       ktime_get_ts(&ts);
+       curtime = timespec_to_ns(&ts);
+       delta = curtime - ctx->stats.tstamp;
+
+       WARN_ON(!mutex_is_locked(&ctx->state_mutex));
+       WARN_ON(delta < 0);
+
+       spu = ctx->spu;
+       old_state = ctx->stats.util_state;
+       ctx->stats.util_state = new_state;
+       ctx->stats.tstamp = curtime;
+
+       /*
+        * Update the physical SPU utilization statistics.
+        */
+       if (spu) {
+               ctx->stats.times[old_state] += delta;
+               spu->stats.times[old_state] += delta;
+               spu->stats.util_state = new_state;
+               spu->stats.tstamp = curtime;
+       }
+}
+
  #define LOAD_INT(x) ((x) >> FSHIFT)
  #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
  
@@ -890,7 +937,7 @@ static int show_spu_loadavg(struct seq_file *s, void *private)
  
         /*
          * Note that last_pid doesn't really make much sense for the
-        * SPU loadavg (it even seems very odd on the CPU side..),
+        * SPU loadavg (it even seems very odd on the CPU side...),
          * but we include it here to have a 100% compatible interface.
          */
         seq_printf(s, "%d.%02d %d.%02d %d.%02d %ld/%d %d\n",