return stat ? IRQ_HANDLED : IRQ_NONE;
 }
-EXPORT_SYMBOL_GPL(spu_irq_class_1_bottom);
 
 static irqreturn_t
 spu_irq_class_2(int irq, void *data)
 }
 EXPORT_SYMBOL_GPL(spu_free);
 
-static int spu_handle_mm_fault(struct spu *spu)
-{
-       struct mm_struct *mm = spu->mm;
-       struct vm_area_struct *vma;
-       u64 ea, dsisr, is_write;
-       int ret;
-
-       ea = spu->dar;
-       dsisr = spu->dsisr;
-#if 0
-       if (!IS_VALID_EA(ea)) {
-               return -EFAULT;
-       }
-#endif /* XXX */
-       if (mm == NULL) {
-               return -EFAULT;
-       }
-       if (mm->pgd == NULL) {
-               return -EFAULT;
-       }
-
-       down_read(&mm->mmap_sem);
-       vma = find_vma(mm, ea);
-       if (!vma)
-               goto bad_area;
-       if (vma->vm_start <= ea)
-               goto good_area;
-       if (!(vma->vm_flags & VM_GROWSDOWN))
-               goto bad_area;
-#if 0
-       if (expand_stack(vma, ea))
-               goto bad_area;
-#endif /* XXX */
-good_area:
-       is_write = dsisr & MFC_DSISR_ACCESS_PUT;
-       if (is_write) {
-               if (!(vma->vm_flags & VM_WRITE))
-                       goto bad_area;
-       } else {
-               if (dsisr & MFC_DSISR_ACCESS_DENIED)
-                       goto bad_area;
-               if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
-                       goto bad_area;
-       }
-       ret = 0;
-       switch (handle_mm_fault(mm, vma, ea, is_write)) {
-       case VM_FAULT_MINOR:
-               current->min_flt++;
-               break;
-       case VM_FAULT_MAJOR:
-               current->maj_flt++;
-               break;
-       case VM_FAULT_SIGBUS:
-               ret = -EFAULT;
-               goto bad_area;
-       case VM_FAULT_OOM:
-               ret = -ENOMEM;
-               goto bad_area;
-       default:
-               BUG();
-       }
-       up_read(&mm->mmap_sem);
-       return ret;
-
-bad_area:
-       up_read(&mm->mmap_sem);
-       return -EFAULT;
-}
-
-int spu_irq_class_1_bottom(struct spu *spu)
-{
-       u64 ea, dsisr, access, error = 0UL;
-       int ret = 0;
-
-       ea = spu->dar;
-       dsisr = spu->dsisr;
-       if (dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)) {
-               u64 flags;
-
-               access = (_PAGE_PRESENT | _PAGE_USER);
-               access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL;
-               local_irq_save(flags);
-               if (hash_page(ea, access, 0x300) != 0)
-                       error |= CLASS1_ENABLE_STORAGE_FAULT_INTR;
-               local_irq_restore(flags);
-       }
-       if (error & CLASS1_ENABLE_STORAGE_FAULT_INTR) {
-               if ((ret = spu_handle_mm_fault(spu)) != 0)
-                       error |= CLASS1_ENABLE_STORAGE_FAULT_INTR;
-               else
-                       error &= ~CLASS1_ENABLE_STORAGE_FAULT_INTR;
-       }
-       spu->dar = 0UL;
-       spu->dsisr = 0UL;
-       if (!error) {
-               spu_restart_dma(spu);
-       } else {
-               spu->dma_callback(spu, SPE_EVENT_SPE_DATA_STORAGE);
-       }
-       return ret;
-}
-
 struct sysdev_class spu_sysdev_class = {
        set_kset_name("spu")
 };
 
-obj-y += switch.o
+obj-y += switch.o fault.o
 
 obj-$(CONFIG_SPU_FS) += spufs.o
 spufs-y += inode.o file.o context.o syscalls.o coredump.o
 
        return ret;
 }
 
+static void spu_backing_restart_dma(struct spu_context *ctx)
+{
+       /* nothing to do here */
+}
+
 struct spu_context_ops spu_backing_ops = {
        .mbox_read = spu_backing_mbox_read,
        .mbox_stat_read = spu_backing_mbox_stat_read,
        .read_mfc_tagstatus = spu_backing_read_mfc_tagstatus,
        .get_mfc_free_elements = spu_backing_get_mfc_free_elements,
        .send_mfc_command = spu_backing_send_mfc_command,
+       .restart_dma = spu_backing_restart_dma,
 };
 
--- /dev/null
+/*
+ * Low-level SPU handling
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+
+#include "spufs.h"
+
+/*
+ * This ought to be kept in sync with the powerpc specific do_page_fault
+ * function. Currently, there are a few corner cases that we haven't had
+ * to handle fortunately.
+ */
+static int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea, unsigned long dsisr)
+{
+       struct vm_area_struct *vma;
+       unsigned long is_write;
+       int ret;
+
+#if 0
+       if (!IS_VALID_EA(ea)) {
+               return -EFAULT;
+       }
+#endif /* XXX */
+       if (mm == NULL) {
+               return -EFAULT;
+       }
+       if (mm->pgd == NULL) {
+               return -EFAULT;
+       }
+
+       down_read(&mm->mmap_sem);
+       vma = find_vma(mm, ea);
+       if (!vma)
+               goto bad_area;
+       if (vma->vm_start <= ea)
+               goto good_area;
+       if (!(vma->vm_flags & VM_GROWSDOWN))
+               goto bad_area;
+       if (expand_stack(vma, ea))
+               goto bad_area;
+good_area:
+       is_write = dsisr & MFC_DSISR_ACCESS_PUT;
+       if (is_write) {
+               if (!(vma->vm_flags & VM_WRITE))
+                       goto bad_area;
+       } else {
+               if (dsisr & MFC_DSISR_ACCESS_DENIED)
+                       goto bad_area;
+               if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+                       goto bad_area;
+       }
+       ret = 0;
+       switch (handle_mm_fault(mm, vma, ea, is_write)) {
+       case VM_FAULT_MINOR:
+               current->min_flt++;
+               break;
+       case VM_FAULT_MAJOR:
+               current->maj_flt++;
+               break;
+       case VM_FAULT_SIGBUS:
+               ret = -EFAULT;
+               goto bad_area;
+       case VM_FAULT_OOM:
+               ret = -ENOMEM;
+               goto bad_area;
+       default:
+               BUG();
+       }
+       up_read(&mm->mmap_sem);
+       return ret;
+
+bad_area:
+       up_read(&mm->mmap_sem);
+       return -EFAULT;
+}
+
+static void spufs_handle_dma_error(struct spu_context *ctx, int type)
+{
+       if (ctx->flags & SPU_CREATE_EVENTS_ENABLED) {
+               ctx->event_return |= type;
+               wake_up_all(&ctx->stop_wq);
+       } else {
+               switch (type) {
+               case SPE_EVENT_DMA_ALIGNMENT:
+               case SPE_EVENT_SPE_DATA_STORAGE:
+               case SPE_EVENT_INVALID_DMA:
+                       force_sig(SIGBUS, /* info, */ current);
+                       break;
+               case SPE_EVENT_SPE_ERROR:
+                       force_sig(SIGILL, /* info */ current);
+                       break;
+               }
+       }
+}
+
+void spufs_dma_callback(struct spu *spu, int type)
+{
+       spufs_handle_dma_error(spu->ctx, type);
+}
+EXPORT_SYMBOL_GPL(spufs_dma_callback);
+
+/*
+ * bottom half handler for page faults, we can't do this from
+ * interrupt context, since we might need to sleep.
+ * we also need to give up the mutex so we can get scheduled
+ * out while waiting for the backing store.
+ *
+ * TODO: try calling hash_page from the interrupt handler first
+ *       in order to speed up the easy case.
+ */
+int spufs_handle_class1(struct spu_context *ctx)
+{
+       u64 ea, dsisr, access;
+       unsigned long flags;
+       int ret;
+
+       /*
+        * dar and dsisr get passed from the registers
+        * to the spu_context, to this function, but not
+        * back to the spu if it gets scheduled again.
+        *
+        * if we don't handle the fault for a saved context
+        * in time, we can still expect to get the same fault
+        * the immediately after the context restore.
+        */
+       if (ctx->state == SPU_STATE_RUNNABLE) {
+               ea = ctx->spu->dar;
+               dsisr = ctx->spu->dsisr;
+               ctx->spu->dar= ctx->spu->dsisr = 0;
+       } else {
+               ea = ctx->csa.priv1.mfc_dar_RW;
+               dsisr = ctx->csa.priv1.mfc_dsisr_RW;
+               ctx->csa.priv1.mfc_dar_RW = 0;
+               ctx->csa.priv1.mfc_dsisr_RW = 0;
+       }
+
+       if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)))
+               return 0;
+
+       pr_debug("ctx %p: ea %016lx, dsisr %016lx state %d\n", ctx, ea,
+               dsisr, ctx->state);
+
+       /* we must not hold the lock when entering spu_handle_mm_fault */
+       spu_release(ctx);
+
+       access = (_PAGE_PRESENT | _PAGE_USER);
+       access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL;
+       local_irq_save(flags);
+       ret = hash_page(ea, access, 0x300);
+       local_irq_restore(flags);
+
+       /* hashing failed, so try the actual fault handler */
+       if (ret)
+               ret = spu_handle_mm_fault(current->mm, ea, dsisr);
+
+       spu_acquire(ctx);
+       /*
+        * If we handled the fault successfully and are in runnable
+        * state, restart the DMA.
+        * In case of unhandled error report the problem to user space.
+        */
+       if (!ret) {
+               if (ctx->spu)
+                       ctx->ops->restart_dma(ctx);
+       } else
+               spufs_handle_dma_error(ctx, SPE_EVENT_SPE_DATA_STORAGE);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(spufs_handle_class1);
 
        }
 }
 
+static void spu_hw_restart_dma(struct spu_context *ctx)
+{
+       struct spu_priv2 __iomem *priv2 = ctx->spu->priv2;
+
+       if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &ctx->spu->flags))
+               out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND);
+}
+
 struct spu_context_ops spu_hw_ops = {
        .mbox_read = spu_hw_mbox_read,
        .mbox_stat_read = spu_hw_mbox_stat_read,
        .read_mfc_tagstatus = spu_hw_read_mfc_tagstatus,
        .get_mfc_free_elements = spu_hw_get_mfc_free_elements,
        .send_mfc_command = spu_hw_send_mfc_command,
+       .restart_dma = spu_hw_restart_dma,
 };
 
        wake_up_all(&ctx->stop_wq);
 }
 
-void spufs_dma_callback(struct spu *spu, int type)
-{
-       struct spu_context *ctx = spu->ctx;
-
-       if (ctx->flags & SPU_CREATE_EVENTS_ENABLED) {
-               ctx->event_return |= type;
-               wake_up_all(&ctx->stop_wq);
-       } else {
-               switch (type) {
-               case SPE_EVENT_DMA_ALIGNMENT:
-               case SPE_EVENT_SPE_DATA_STORAGE:
-               case SPE_EVENT_INVALID_DMA:
-                       force_sig(SIGBUS, /* info, */ current);
-                       break;
-               case SPE_EVENT_SPE_ERROR:
-                       force_sig(SIGILL, /* info */ current);
-                       break;
-               }
-       }
-}
-
 static inline int spu_stopped(struct spu_context *ctx, u32 * stat)
 {
        struct spu *spu;
 static inline int spu_process_events(struct spu_context *ctx)
 {
        struct spu *spu = ctx->spu;
-       u64 pte_fault = MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED;
        int ret = 0;
 
-       if (spu->dsisr & pte_fault)
-               ret = spu_irq_class_1_bottom(spu);
        if (spu->class_0_pending)
                ret = spu_irq_class_0_bottom(spu);
        if (!ret && signal_pending(current))
                                break;
                        status &= ~SPU_STATUS_STOPPED_BY_STOP;
                }
+               ret = spufs_handle_class1(ctx);
+               if (ret)
+                       break;
+
                if (unlikely(ctx->state != SPU_STATE_RUNNABLE)) {
                        ret = spu_reacquire_runnable(ctx, npc, &status);
                        if (ret) {
 
                               struct spu_dma_info * info);
        void (*proxydma_info_read) (struct spu_context * ctx,
                                    struct spu_proxydma_info * info);
+       void (*restart_dma)(struct spu_context *ctx);
 };
 
 extern struct spu_context_ops spu_hw_ops;
 void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx);
 void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx);
 
+/* fault handling */
+int spufs_handle_class1(struct spu_context *ctx);
+
 /* context management */
 static inline void spu_acquire(struct spu_context *ctx)
 {
 
        int rc;
 
        acquire_spu_lock(spu);          /* Step 1.     */
+       prev->dar = spu->dar;
+       prev->dsisr = spu->dsisr;
+       spu->dar = 0;
+       spu->dsisr = 0;
        rc = __do_spu_save(prev, spu);  /* Steps 2-53. */
        release_spu_lock(spu);
        if (rc != 0 && rc != 2 && rc != 6) {
 
        acquire_spu_lock(spu);
        harvest(NULL, spu);
-       spu->dar = 0;
-       spu->dsisr = 0;
        spu->slb_replace = 0;
+       new->dar = 0;
+       new->dsisr = 0;
        spu->class_0_pending = 0;
        rc = __do_spu_restore(new, spu);
        release_spu_lock(spu);
 
                           unsigned long vsid, pte_t *ptep, unsigned long trap,
                           unsigned int local);
 struct mm_struct;
+extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap);
 extern int hash_huge_page(struct mm_struct *mm, unsigned long access,
                          unsigned long ea, unsigned long vsid, int local,
                          unsigned long trap);
 
        u64 spu_chnldata_RW[32];
        u32 spu_mailbox_data[4];
        u32 pu_mailbox_data[1];
+       u64 dar, dsisr;
        unsigned long suspend_time;
        spinlock_t register_lock;
 };