/* IOMMU sizing */
 #define IO_SEGMENT_SHIFT       28
-#define IO_PAGENO_BITS         (IO_SEGMENT_SHIFT - IOMMU_PAGE_SHIFT)
+#define IO_PAGENO_BITS(shift)  (IO_SEGMENT_SHIFT - (shift))
 
 /* The high bit needs to be set on every DMA address */
 #define SPIDER_DMA_OFFSET      0x80000000ul
 
 static unsigned long *cell_iommu_alloc_ptab(struct cbe_iommu *iommu,
                unsigned long base, unsigned long size, unsigned long gap_base,
-               unsigned long gap_size)
+               unsigned long gap_size, unsigned long page_shift)
 {
        struct page *page;
        int i;
 
        start_seg = base >> IO_SEGMENT_SHIFT;
        segments  = size >> IO_SEGMENT_SHIFT;
-       pages_per_segment = 1ull << IO_PAGENO_BITS;
+       pages_per_segment = 1ull << IO_PAGENO_BITS(page_shift);
+       /* PTEs for each segment must start on a 4K bounday */
+       pages_per_segment = max(pages_per_segment,
+                               (1 << 12) / sizeof(unsigned long));
 
        ptab_size = segments * pages_per_segment * sizeof(unsigned long);
        pr_debug("%s: iommu[%d]: ptab_size: %lu, order: %d\n", __FUNCTION__,
        /* initialise the STEs */
        reg = IOSTE_V | ((n_pte_pages - 1) << 5);
 
-       if (IOMMU_PAGE_SIZE == 0x1000)
-               reg |= IOSTE_PS_4K;
-       else if (IOMMU_PAGE_SIZE == 0x10000)
-               reg |= IOSTE_PS_64K;
-       else {
-               extern void __unknown_page_size_error(void);
-               __unknown_page_size_error();
+       switch (page_shift) {
+       case 12: reg |= IOSTE_PS_4K;  break;
+       case 16: reg |= IOSTE_PS_64K; break;
+       case 20: reg |= IOSTE_PS_1M;  break;
+       case 24: reg |= IOSTE_PS_16M; break;
+       default: BUG();
        }
 
        gap_base = gap_base >> IO_SEGMENT_SHIFT;
        unsigned long base, unsigned long size)
 {
        cell_iommu_setup_stab(iommu, base, size, 0, 0);
-       iommu->ptab = cell_iommu_alloc_ptab(iommu, base, size, 0, 0);
+       iommu->ptab = cell_iommu_alloc_ptab(iommu, base, size, 0, 0,
+                                           IOMMU_PAGE_SHIFT);
        cell_iommu_enable_hardware(iommu);
 }
 
        int i;
        unsigned long base_pte, uaddr, *io_pte, *ptab;
 
-       ptab = cell_iommu_alloc_ptab(iommu, fbase, fsize, dbase, dsize);
+       ptab = cell_iommu_alloc_ptab(iommu, fbase, fsize, dbase, dsize,
+                                    IOMMU_PAGE_SHIFT);
 
        dma_iommu_fixed_base = fbase;
 
                         dbase + dsize, fbase, fbase + fsize);
 
                cell_iommu_setup_stab(iommu, dbase, dsize, fbase, fsize);
-               iommu->ptab = cell_iommu_alloc_ptab(iommu, dbase, dsize, 0, 0);
+               iommu->ptab = cell_iommu_alloc_ptab(iommu, dbase, dsize, 0, 0,
+                                                   IOMMU_PAGE_SHIFT);
                cell_iommu_setup_fixed_ptab(iommu, np, dbase, dsize,
                                             fbase, fsize);
                cell_iommu_enable_hardware(iommu);