* 04/07/.. ak Better overflow handling. Assorted fixes.
* 05/09/10 linville Add support for syncing ranges, support syncing for
* DMA_BIDIRECTIONAL mappings, miscellaneous cleanup.
+ * 08/12/11 beckyb Add highmem support
*/
#include <linux/cache.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/spinlock.h>
-#include <linux/swiotlb.h>
#include <linux/string.h>
#include <linux/swiotlb.h>
+#include <linux/pfn.h>
#include <linux/types.h>
#include <linux/ctype.h>
+#include <linux/highmem.h>
#include <asm/io.h>
#include <asm/dma.h>
#define OFFSET(val,align) ((unsigned long) \
( (val) & ( (align) - 1)))
-#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
-#define SG_ENT_PHYS_ADDRESS(sg) virt_to_bus(SG_ENT_VIRT_ADDRESS(sg))
-
#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
/*
* We need to save away the original address corresponding to a mapped entry
* for the sync operations.
*/
-static unsigned char **io_tlb_orig_addr;
+static phys_addr_t *io_tlb_orig_addr;
/*
* Protect the above data structures in the map and unmap calls
__setup("swiotlb=", setup_io_tlb_npages);
/* make io_tlb_overflow tunable too? */
-void * __weak swiotlb_alloc_boot(size_t size, unsigned long nslabs)
+void * __weak __init swiotlb_alloc_boot(size_t size, unsigned long nslabs)
{
return alloc_bootmem_low_pages(size);
}
return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
}
-dma_addr_t __weak swiotlb_phys_to_bus(phys_addr_t paddr)
+dma_addr_t __weak swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
{
return paddr;
}
return baddr;
}
-static dma_addr_t swiotlb_virt_to_bus(volatile void *address)
+static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
+ volatile void *address)
{
- return swiotlb_phys_to_bus(virt_to_phys(address));
+ return swiotlb_phys_to_bus(hwdev, virt_to_phys(address));
}
static void *swiotlb_bus_to_virt(dma_addr_t address)
return 0;
}
+static void swiotlb_print_info(unsigned long bytes)
+{
+ phys_addr_t pstart, pend;
+
+ pstart = virt_to_phys(io_tlb_start);
+ pend = virt_to_phys(io_tlb_end);
+
+ printk(KERN_INFO "Placing %luMB software IO TLB between %p - %p\n",
+ bytes >> 20, io_tlb_start, io_tlb_end);
+ printk(KERN_INFO "software IO TLB at phys %#llx - %#llx\n",
+ (unsigned long long)pstart,
+ (unsigned long long)pend);
+}
+
/*
* Statically reserve bounce buffer space and initialize bounce buffer data
* structures for the software IO TLB used to implement the DMA API.
for (i = 0; i < io_tlb_nslabs; i++)
io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
io_tlb_index = 0;
- io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *));
+ io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(phys_addr_t));
/*
* Get the overflow emergency buffer
if (!io_tlb_overflow_buffer)
panic("Cannot allocate SWIOTLB overflow buffer!\n");
- printk(KERN_INFO "Placing software IO TLB between 0x%lx - 0x%lx\n",
- swiotlb_virt_to_bus(io_tlb_start), swiotlb_virt_to_bus(io_tlb_end));
+ swiotlb_print_info(bytes);
}
void __init
io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
io_tlb_index = 0;
- io_tlb_orig_addr = (unsigned char **)__get_free_pages(GFP_KERNEL,
- get_order(io_tlb_nslabs * sizeof(char *)));
+ io_tlb_orig_addr = (phys_addr_t *)
+ __get_free_pages(GFP_KERNEL,
+ get_order(io_tlb_nslabs *
+ sizeof(phys_addr_t)));
if (!io_tlb_orig_addr)
goto cleanup3;
- memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(char *));
+ memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(phys_addr_t));
/*
* Get the overflow emergency buffer
if (!io_tlb_overflow_buffer)
goto cleanup4;
- printk(KERN_INFO "Placing %luMB software IO TLB between 0x%lx - "
- "0x%lx\n", bytes >> 20,
- swiotlb_virt_to_bus(io_tlb_start), swiotlb_virt_to_bus(io_tlb_end));
+ swiotlb_print_info(bytes);
return 0;
cleanup4:
- free_pages((unsigned long)io_tlb_orig_addr, get_order(io_tlb_nslabs *
- sizeof(char *)));
+ free_pages((unsigned long)io_tlb_orig_addr,
+ get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
io_tlb_orig_addr = NULL;
cleanup3:
free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
return addr >= io_tlb_start && addr < io_tlb_end;
}
-static void
-__sync_single(char *buffer, char *dma_addr, size_t size, int dir)
-{
- if (dir == DMA_TO_DEVICE)
- memcpy(dma_addr, buffer, size);
- else
- memcpy(buffer, dma_addr, size);
+/*
+ * Bounce: copy the swiotlb buffer back to the original dma location
+ */
+static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
+ enum dma_data_direction dir)
+{
+ unsigned long pfn = PFN_DOWN(phys);
+
+ if (PageHighMem(pfn_to_page(pfn))) {
+ /* The buffer does not have a mapping. Map it in and copy */
+ unsigned int offset = phys & ~PAGE_MASK;
+ char *buffer;
+ unsigned int sz = 0;
+ unsigned long flags;
+
+ while (size) {
+ sz = min(PAGE_SIZE - offset, size);
+
+ local_irq_save(flags);
+ buffer = kmap_atomic(pfn_to_page(pfn),
+ KM_BOUNCE_READ);
+ if (dir == DMA_TO_DEVICE)
+ memcpy(dma_addr, buffer + offset, sz);
+ else
+ memcpy(buffer + offset, dma_addr, sz);
+ kunmap_atomic(buffer, KM_BOUNCE_READ);
+ local_irq_restore(flags);
+
+ size -= sz;
+ pfn++;
+ dma_addr += sz;
+ offset = 0;
+ }
+ } else {
+ if (dir == DMA_TO_DEVICE)
+ memcpy(dma_addr, phys_to_virt(phys), size);
+ else
+ memcpy(phys_to_virt(phys), dma_addr, size);
+ }
}
/*
* Allocates bounce buffer and returns its kernel virtual address.
*/
static void *
-map_single(struct device *hwdev, char *buffer, size_t size, int dir)
+map_single(struct device *hwdev, phys_addr_t phys, size_t size, int dir)
{
unsigned long flags;
char *dma_addr;
unsigned long max_slots;
mask = dma_get_seg_boundary(hwdev);
- start_dma_addr = swiotlb_virt_to_bus(io_tlb_start) & mask;
+ start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start) & mask;
offset_slots = ALIGN(start_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
* needed.
*/
for (i = 0; i < nslots; i++)
- io_tlb_orig_addr[index+i] = buffer + (i << IO_TLB_SHIFT);
+ io_tlb_orig_addr[index+i] = phys + (i << IO_TLB_SHIFT);
if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
- __sync_single(buffer, dma_addr, size, DMA_TO_DEVICE);
+ swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE);
return dma_addr;
}
unsigned long flags;
int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
- char *buffer = io_tlb_orig_addr[index];
+ phys_addr_t phys = io_tlb_orig_addr[index];
/*
* First, sync the memory before unmapping the entry
*/
- if (buffer && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
- /*
- * bounce... copy the data back into the original buffer * and
- * delete the bounce buffer.
- */
- __sync_single(buffer, dma_addr, size, DMA_FROM_DEVICE);
+ if (phys && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
+ swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE);
/*
* Return the buffer to the free list by setting the corresponding
int dir, int target)
{
int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
- char *buffer = io_tlb_orig_addr[index];
+ phys_addr_t phys = io_tlb_orig_addr[index];
- buffer += ((unsigned long)dma_addr & ((1 << IO_TLB_SHIFT) - 1));
+ phys += ((unsigned long)dma_addr & ((1 << IO_TLB_SHIFT) - 1));
switch (target) {
case SYNC_FOR_CPU:
if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
- __sync_single(buffer, dma_addr, size, DMA_FROM_DEVICE);
+ swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE);
else
BUG_ON(dir != DMA_TO_DEVICE);
break;
case SYNC_FOR_DEVICE:
if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
- __sync_single(buffer, dma_addr, size, DMA_TO_DEVICE);
+ swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE);
else
BUG_ON(dir != DMA_FROM_DEVICE);
break;
dma_mask = hwdev->coherent_dma_mask;
ret = (void *)__get_free_pages(flags, order);
- if (ret && !is_buffer_dma_capable(dma_mask, swiotlb_virt_to_bus(ret), size)) {
+ if (ret &&
+ !is_buffer_dma_capable(dma_mask, swiotlb_virt_to_bus(hwdev, ret),
+ size)) {
/*
* The allocated memory isn't reachable by the device.
* Fall back on swiotlb_map_single().
* swiotlb_map_single(), which will grab memory from
* the lowest available address range.
*/
- ret = map_single(hwdev, NULL, size, DMA_FROM_DEVICE);
+ ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE);
if (!ret)
return NULL;
}
memset(ret, 0, size);
- dev_addr = swiotlb_virt_to_bus(ret);
+ dev_addr = swiotlb_virt_to_bus(hwdev, ret);
/* Confirm address can be DMA'd by device */
if (!is_buffer_dma_capable(dma_mask, dev_addr, size)) {
*dma_handle = dev_addr;
return ret;
}
+EXPORT_SYMBOL(swiotlb_alloc_coherent);
void
swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
/* DMA_TO_DEVICE to avoid memcpy in unmap_single */
unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE);
}
+EXPORT_SYMBOL(swiotlb_free_coherent);
static void
swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
int dir, struct dma_attrs *attrs)
{
- dma_addr_t dev_addr = swiotlb_virt_to_bus(ptr);
+ dma_addr_t dev_addr = swiotlb_virt_to_bus(hwdev, ptr);
void *map;
BUG_ON(dir == DMA_NONE);
/*
* Oh well, have to allocate and map a bounce buffer.
*/
- map = map_single(hwdev, ptr, size, dir);
+ map = map_single(hwdev, virt_to_phys(ptr), size, dir);
if (!map) {
swiotlb_full(hwdev, size, dir, 1);
map = io_tlb_overflow_buffer;
}
- dev_addr = swiotlb_virt_to_bus(map);
+ dev_addr = swiotlb_virt_to_bus(hwdev, map);
/*
* Ensure that the address returned is DMA'ble
{
return swiotlb_map_single_attrs(hwdev, ptr, size, dir, NULL);
}
+EXPORT_SYMBOL(swiotlb_map_single);
/*
* Unmap a single streaming mode DMA translation. The dma_addr and size must
{
return swiotlb_unmap_single_attrs(hwdev, dev_addr, size, dir, NULL);
}
+EXPORT_SYMBOL(swiotlb_unmap_single);
+
/*
* Make physical memory consistent for a single streaming mode DMA translation
* after a transfer.
{
swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU);
}
+EXPORT_SYMBOL(swiotlb_sync_single_for_cpu);
void
swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
{
swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE);
}
+EXPORT_SYMBOL(swiotlb_sync_single_for_device);
/*
* Same as above, but for a sub-range of the mapping.
swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
SYNC_FOR_CPU);
}
+EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_cpu);
void
swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr,
swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
SYNC_FOR_DEVICE);
}
+EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device);
-void swiotlb_unmap_sg_attrs(struct device *, struct scatterlist *, int, int,
- struct dma_attrs *);
/*
* Map a set of buffers described by scatterlist in streaming mode for DMA.
* This is the scatter-gather version of the above swiotlb_map_single
int dir, struct dma_attrs *attrs)
{
struct scatterlist *sg;
- void *addr;
- dma_addr_t dev_addr;
int i;
BUG_ON(dir == DMA_NONE);
for_each_sg(sgl, sg, nelems, i) {
- addr = SG_ENT_VIRT_ADDRESS(sg);
- dev_addr = swiotlb_virt_to_bus(addr);
- if (range_needs_mapping(sg_virt(sg), sg->length) ||
+ void *addr = sg_virt(sg);
+ dma_addr_t dev_addr = swiotlb_virt_to_bus(hwdev, addr);
+
+ if (range_needs_mapping(addr, sg->length) ||
address_needs_mapping(hwdev, dev_addr, sg->length)) {
- void *map = map_single(hwdev, addr, sg->length, dir);
+ void *map = map_single(hwdev, sg_phys(sg),
+ sg->length, dir);
if (!map) {
/* Don't panic here, we expect map_sg users
to do proper error handling. */
sgl[0].dma_length = 0;
return 0;
}
- sg->dma_address = swiotlb_virt_to_bus(map);
+ sg->dma_address = swiotlb_virt_to_bus(hwdev, map);
} else
sg->dma_address = dev_addr;
sg->dma_length = sg->length;
{
return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL);
}
+EXPORT_SYMBOL(swiotlb_map_sg);
/*
* Unmap a set of streaming mode DMA translations. Again, cpu read rules
BUG_ON(dir == DMA_NONE);
for_each_sg(sgl, sg, nelems, i) {
- if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
+ if (sg->dma_address != swiotlb_virt_to_bus(hwdev, sg_virt(sg)))
unmap_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
sg->dma_length, dir);
else if (dir == DMA_FROM_DEVICE)
- dma_mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length);
+ dma_mark_clean(sg_virt(sg), sg->dma_length);
}
}
EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
{
return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL);
}
+EXPORT_SYMBOL(swiotlb_unmap_sg);
/*
* Make physical memory consistent for a set of streaming mode DMA translations
BUG_ON(dir == DMA_NONE);
for_each_sg(sgl, sg, nelems, i) {
- if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
+ if (sg->dma_address != swiotlb_virt_to_bus(hwdev, sg_virt(sg)))
sync_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
sg->dma_length, dir, target);
else if (dir == DMA_FROM_DEVICE)
- dma_mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length);
+ dma_mark_clean(sg_virt(sg), sg->dma_length);
}
}
{
swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU);
}
+EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu);
void
swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
{
swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
}
+EXPORT_SYMBOL(swiotlb_sync_sg_for_device);
int
swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
{
- return (dma_addr == swiotlb_virt_to_bus(io_tlb_overflow_buffer));
+ return (dma_addr == swiotlb_virt_to_bus(hwdev, io_tlb_overflow_buffer));
}
+EXPORT_SYMBOL(swiotlb_dma_mapping_error);
/*
* Return whether the given device DMA address mask can be supported
int
swiotlb_dma_supported(struct device *hwdev, u64 mask)
{
- return swiotlb_virt_to_bus(io_tlb_end - 1) <= mask;
+ return swiotlb_virt_to_bus(hwdev, io_tlb_end - 1) <= mask;
}
-
-EXPORT_SYMBOL(swiotlb_map_single);
-EXPORT_SYMBOL(swiotlb_unmap_single);
-EXPORT_SYMBOL(swiotlb_map_sg);
-EXPORT_SYMBOL(swiotlb_unmap_sg);
-EXPORT_SYMBOL(swiotlb_sync_single_for_cpu);
-EXPORT_SYMBOL(swiotlb_sync_single_for_device);
-EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_cpu);
-EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device);
-EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu);
-EXPORT_SYMBOL(swiotlb_sync_sg_for_device);
-EXPORT_SYMBOL(swiotlb_dma_mapping_error);
-EXPORT_SYMBOL(swiotlb_alloc_coherent);
-EXPORT_SYMBOL(swiotlb_free_coherent);
EXPORT_SYMBOL(swiotlb_dma_supported);