]> pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - arch/x86/kernel/amd_iommu.c
AMD IOMMU: fix lazy IO/TLB flushing in unmap path
[linux-2.6-omap-h63xx.git] / arch / x86 / kernel / amd_iommu.c
index db64482b179639d1cfeeed6181903b4dbacc827c..4755bbc7ae5b71796c364ff2c93e3a167d9730e2 100644 (file)
@@ -50,7 +50,7 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
 /* returns !0 if the IOMMU is caching non-present entries in its TLB */
 static int iommu_has_npcache(struct amd_iommu *iommu)
 {
-       return iommu->cap & IOMMU_CAP_NPCACHE;
+       return iommu->cap & (1UL << IOMMU_CAP_NPCACHE);
 }
 
 /****************************************************************************
@@ -470,10 +470,6 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
  * efficient allocator.
  *
  ****************************************************************************/
-static unsigned long dma_mask_to_pages(unsigned long mask)
-{
-       return PAGE_ALIGN(mask) >> PAGE_SHIFT;
-}
 
 /*
  * The address allocator core function.
@@ -483,16 +479,17 @@ static unsigned long dma_mask_to_pages(unsigned long mask)
 static unsigned long dma_ops_alloc_addresses(struct device *dev,
                                             struct dma_ops_domain *dom,
                                             unsigned int pages,
-                                            unsigned long align_mask)
+                                            unsigned long align_mask,
+                                            u64 dma_mask)
 {
-       unsigned long limit = dma_mask_to_pages(*dev->dma_mask);
+       unsigned long limit;
        unsigned long address;
-       unsigned long size = dom->aperture_size >> PAGE_SHIFT;
        unsigned long boundary_size;
 
        boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
                        PAGE_SIZE) >> PAGE_SHIFT;
-       limit = limit < size ? limit : size;
+       limit = iommu_device_max_index(dom->aperture_size >> PAGE_SHIFT, 0,
+                                      dma_mask >> PAGE_SHIFT);
 
        if (dom->next_bit >= limit) {
                dom->next_bit = 0;
@@ -529,6 +526,9 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
 {
        address >>= PAGE_SHIFT;
        iommu_area_free(dom->bitmap, address, pages);
+
+       if (address + pages >= dom->next_bit)
+               dom->need_flush = true;
 }
 
 /****************************************************************************
@@ -571,7 +571,7 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
        if (start_page + pages > last_page)
                pages = last_page - start_page;
 
-       set_bit_string(dom->bitmap, start_page, pages);
+       iommu_area_reserve(dom->bitmap, start_page, pages);
 }
 
 static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
@@ -919,7 +919,8 @@ static dma_addr_t __map_single(struct device *dev,
                               phys_addr_t paddr,
                               size_t size,
                               int dir,
-                              bool align)
+                              bool align,
+                              u64 dma_mask)
 {
        dma_addr_t offset = paddr & ~PAGE_MASK;
        dma_addr_t address, start;
@@ -933,7 +934,8 @@ static dma_addr_t __map_single(struct device *dev,
        if (align)
                align_mask = (1UL << get_order(size)) - 1;
 
-       address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask);
+       address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
+                                         dma_mask);
        if (unlikely(address == bad_dma_address))
                goto out;
 
@@ -945,7 +947,7 @@ static dma_addr_t __map_single(struct device *dev,
        }
        address += offset;
 
-       if (unlikely(dma_dom->need_flush && !iommu_fullflush)) {
+       if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
                iommu_flush_tlb(iommu, dma_dom->domain.id);
                dma_dom->need_flush = false;
        } else if (unlikely(iommu_has_npcache(iommu)))
@@ -982,8 +984,10 @@ static void __unmap_single(struct amd_iommu *iommu,
 
        dma_ops_free_addresses(dma_dom, dma_addr, pages);
 
-       if (iommu_fullflush)
+       if (amd_iommu_unmap_flush || dma_dom->need_flush) {
                iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size);
+               dma_dom->need_flush = false;
+       }
 }
 
 /*
@@ -997,10 +1001,13 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
        struct protection_domain *domain;
        u16 devid;
        dma_addr_t addr;
+       u64 dma_mask;
 
        if (!check_device(dev))
                return bad_dma_address;
 
+       dma_mask = *dev->dma_mask;
+
        get_device_resources(dev, &iommu, &domain, &devid);
 
        if (iommu == NULL || domain == NULL)
@@ -1008,7 +1015,8 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
                return (dma_addr_t)paddr;
 
        spin_lock_irqsave(&domain->lock, flags);
-       addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false);
+       addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false,
+                           dma_mask);
        if (addr == bad_dma_address)
                goto out;
 
@@ -1080,10 +1088,13 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
        struct scatterlist *s;
        phys_addr_t paddr;
        int mapped_elems = 0;
+       u64 dma_mask;
 
        if (!check_device(dev))
                return 0;
 
+       dma_mask = *dev->dma_mask;
+
        get_device_resources(dev, &iommu, &domain, &devid);
 
        if (!iommu || !domain)
@@ -1095,7 +1106,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
                paddr = sg_phys(s);
 
                s->dma_address = __map_single(dev, iommu, domain->priv,
-                                             paddr, s->length, dir, false);
+                                             paddr, s->length, dir, false,
+                                             dma_mask);
 
                if (s->dma_address) {
                        s->dma_length = s->length;
@@ -1168,6 +1180,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
        struct protection_domain *domain;
        u16 devid;
        phys_addr_t paddr;
+       u64 dma_mask = dev->coherent_dma_mask;
 
        if (!check_device(dev))
                return NULL;
@@ -1187,10 +1200,13 @@ static void *alloc_coherent(struct device *dev, size_t size,
                return virt_addr;
        }
 
+       if (!dma_mask)
+               dma_mask = *dev->dma_mask;
+
        spin_lock_irqsave(&domain->lock, flags);
 
        *dma_addr = __map_single(dev, iommu, domain->priv, paddr,
-                                size, DMA_BIDIRECTIONAL, true);
+                                size, DMA_BIDIRECTIONAL, true, dma_mask);
 
        if (*dma_addr == bad_dma_address) {
                free_pages((unsigned long)virt_addr, get_order(size));