X-Git-Url: http://pilppa.org/gitweb/gitweb.cgi?a=blobdiff_plain;f=drivers%2Fgpu%2Fdrm%2Fi915%2Fi915_gem.c;h=d58ddef468f8124c29f6af32412ee2ba31f9873e;hb=151903d5466fbcfb56ce792c3d5ea0ecbae15d07;hp=17ae330ff269b10610d12a237ed9412f8e5e5856;hpb=4944dd62de21230af039eda7cd218e9a09021d11;p=linux-2.6-omap-h63xx.git diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 17ae330ff26..d58ddef468f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -31,6 +31,8 @@ #include "i915_drv.h" #include +#define I915_GEM_GPU_DOMAINS (~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) + static int i915_gem_object_set_domain(struct drm_gem_object *obj, uint32_t read_domains, @@ -79,6 +81,22 @@ i915_gem_init_ioctl(struct drm_device *dev, void *data, return 0; } +int +i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_i915_gem_get_aperture *args = data; + + if (!(dev->driver->driver_features & DRIVER_GEM)) + return -ENODEV; + + args->aper_size = dev->gtt_total; + args->aper_available_size = (args->aper_size - + atomic_read(&dev->pin_memory)); + + return 0; +} + /** * Creates a new mm object and returns a handle to it. @@ -171,35 +189,50 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, return 0; } -/* - * Try to write quickly with an atomic kmap. Return true on success. - * - * If this fails (which includes a partial write), we'll redo the whole - * thing with the slow version. - * - * This is a workaround for the low performance of iounmap (approximate - * 10% cpu cost on normal 3D workloads). kmap_atomic on HIGHMEM kernels - * happens to let us map card memory without taking IPIs. When the vmap - * rework lands we should be able to dump this hack. +/* This is the fast write path which cannot handle + * page faults in the source data */ -static inline int fast_user_write(unsigned long pfn, char __user *user_data, - int l, int o) + +static inline int +fast_user_write(struct io_mapping *mapping, + loff_t page_base, int page_offset, + char __user *user_data, + int length) { -#ifdef CONFIG_HIGHMEM - unsigned long unwritten; char *vaddr_atomic; + unsigned long unwritten; - vaddr_atomic = kmap_atomic_pfn(pfn, KM_USER0); -#if WATCH_PWRITE - DRM_INFO("pwrite i %d o %d l %d pfn %ld vaddr %p\n", - i, o, l, pfn, vaddr_atomic); -#endif - unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + o, user_data, l); - kunmap_atomic(vaddr_atomic, KM_USER0); - return !unwritten; -#else + vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); + unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset, + user_data, length); + io_mapping_unmap_atomic(vaddr_atomic); + if (unwritten) + return -EFAULT; + return 0; +} + +/* Here's the write path which can sleep for + * page faults + */ + +static inline int +slow_user_write(struct io_mapping *mapping, + loff_t page_base, int page_offset, + char __user *user_data, + int length) +{ + char __iomem *vaddr; + unsigned long unwritten; + + vaddr = io_mapping_map_wc(mapping, page_base); + if (vaddr == NULL) + return -EFAULT; + unwritten = __copy_from_user(vaddr + page_offset, + user_data, length); + io_mapping_unmap(vaddr); + if (unwritten) + return -EFAULT; return 0; -#endif } static int @@ -208,10 +241,12 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, struct drm_file *file_priv) { struct drm_i915_gem_object *obj_priv = obj->driver_private; + drm_i915_private_t *dev_priv = dev->dev_private; ssize_t remain; - loff_t offset; + loff_t offset, page_base; char __user *user_data; - int ret = 0; + int page_offset, page_length; + int ret; user_data = (char __user *) (uintptr_t) args->data_ptr; remain = args->size; @@ -235,57 +270,37 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, obj_priv->dirty = 1; while (remain > 0) { - unsigned long pfn; - int i, o, l; - /* Operation in this page * - * i = page number - * o = offset within page - * l = bytes to copy + * page_base = page offset within aperture + * page_offset = offset within page + * page_length = bytes to copy for this page */ - i = offset >> PAGE_SHIFT; - o = offset & (PAGE_SIZE-1); - l = remain; - if ((o + l) > PAGE_SIZE) - l = PAGE_SIZE - o; - - pfn = (dev->agp->base >> PAGE_SHIFT) + i; - - if (!fast_user_write(pfn, user_data, l, o)) { - unsigned long unwritten; - char __iomem *vaddr; - - vaddr = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE); -#if WATCH_PWRITE - DRM_INFO("pwrite slow i %d o %d l %d " - "pfn %ld vaddr %p\n", - i, o, l, pfn, vaddr); -#endif - if (vaddr == NULL) { - ret = -EFAULT; - goto fail; - } - unwritten = __copy_from_user(vaddr + o, user_data, l); -#if WATCH_PWRITE - DRM_INFO("unwritten %ld\n", unwritten); -#endif - iounmap(vaddr); - if (unwritten) { - ret = -EFAULT; + page_base = (offset & ~(PAGE_SIZE-1)); + page_offset = offset & (PAGE_SIZE-1); + page_length = remain; + if ((page_offset + remain) > PAGE_SIZE) + page_length = PAGE_SIZE - page_offset; + + ret = fast_user_write (dev_priv->mm.gtt_mapping, page_base, + page_offset, user_data, page_length); + + /* If we get a fault while copying data, then (presumably) our + * source page isn't available. In this case, use the + * non-atomic function + */ + if (ret) { + ret = slow_user_write (dev_priv->mm.gtt_mapping, + page_base, page_offset, + user_data, page_length); + if (ret) goto fail; - } } - remain -= l; - user_data += l; - offset += l; + remain -= page_length; + user_data += page_length; + offset += page_length; } -#if WATCH_PWRITE && 1 - i915_gem_clflush_object(obj); - i915_gem_dump_object(obj, args->offset + args->size, __func__, ~0); - i915_gem_clflush_object(obj); -#endif fail: i915_gem_object_unpin(obj); @@ -1436,11 +1451,9 @@ i915_gem_object_set_domain_range(struct drm_gem_object *obj, read_domains, write_domain); /* Wait on any GPU rendering to the object to be flushed. */ - if (obj->write_domain & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) { - ret = i915_gem_object_wait_rendering(obj); - if (ret) - return ret; - } + ret = i915_gem_object_wait_rendering(obj); + if (ret) + return ret; if (obj_priv->page_cpu_valid == NULL) { obj_priv->page_cpu_valid = drm_calloc(1, obj->size / PAGE_SIZE, @@ -1503,12 +1516,12 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, struct drm_i915_gem_exec_object *entry) { struct drm_device *dev = obj->dev; + drm_i915_private_t *dev_priv = dev->dev_private; struct drm_i915_gem_relocation_entry reloc; struct drm_i915_gem_relocation_entry __user *relocs; struct drm_i915_gem_object *obj_priv = obj->driver_private; int i, ret; - uint32_t last_reloc_offset = -1; - void __iomem *reloc_page = NULL; + void __iomem *reloc_page; /* Choose the GTT offset for our buffer and put it there. */ ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment); @@ -1631,26 +1644,11 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, * perform. */ reloc_offset = obj_priv->gtt_offset + reloc.offset; - if (reloc_page == NULL || - (last_reloc_offset & ~(PAGE_SIZE - 1)) != - (reloc_offset & ~(PAGE_SIZE - 1))) { - if (reloc_page != NULL) - iounmap(reloc_page); - - reloc_page = ioremap_wc(dev->agp->base + - (reloc_offset & - ~(PAGE_SIZE - 1)), - PAGE_SIZE); - last_reloc_offset = reloc_offset; - if (reloc_page == NULL) { - drm_gem_object_unreference(target_obj); - i915_gem_object_unpin(obj); - return -ENOMEM; - } - } - + reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping, + (reloc_offset & + ~(PAGE_SIZE - 1))); reloc_entry = (uint32_t __iomem *)(reloc_page + - (reloc_offset & (PAGE_SIZE - 1))); + (reloc_offset & (PAGE_SIZE - 1))); reloc_val = target_obj_priv->gtt_offset + reloc.delta; #if WATCH_BUF @@ -1659,6 +1657,7 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, readl(reloc_entry), reloc_val); #endif writel(reloc_val, reloc_entry); + io_mapping_unmap_atomic(reloc_page); /* Write the updated presumed offset for this entry back out * to the user. @@ -1674,9 +1673,6 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, drm_gem_object_unreference(target_obj); } - if (reloc_page != NULL) - iounmap(reloc_page); - #if WATCH_BUF if (0) i915_gem_dump_object(obj, 128, __func__, ~0); @@ -1870,17 +1866,6 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, for (i = 0; i < args->buffer_count; i++) { struct drm_gem_object *obj = object_list[i]; - struct drm_i915_gem_object *obj_priv = obj->driver_private; - - if (obj_priv->gtt_space == NULL) { - /* We evicted the buffer in the process of validating - * our set of buffers in. We could try to recover by - * kicking them everything out and trying again from - * the start. - */ - ret = -ENOMEM; - goto err; - } /* make sure all previous memory operations have passed */ ret = i915_gem_object_set_domain(obj, @@ -2299,29 +2284,52 @@ i915_gem_idle(struct drm_device *dev) i915_gem_retire_requests(dev); - /* Active and flushing should now be empty as we've - * waited for a sequence higher than any pending execbuffer - */ - BUG_ON(!list_empty(&dev_priv->mm.active_list)); - BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); + if (!dev_priv->mm.wedged) { + /* Active and flushing should now be empty as we've + * waited for a sequence higher than any pending execbuffer + */ + WARN_ON(!list_empty(&dev_priv->mm.active_list)); + WARN_ON(!list_empty(&dev_priv->mm.flushing_list)); + /* Request should now be empty as we've also waited + * for the last request in the list + */ + WARN_ON(!list_empty(&dev_priv->mm.request_list)); + } - /* Request should now be empty as we've also waited - * for the last request in the list + /* Empty the active and flushing lists to inactive. If there's + * anything left at this point, it means that we're wedged and + * nothing good's going to happen by leaving them there. So strip + * the GPU domains and just stuff them onto inactive. */ - BUG_ON(!list_empty(&dev_priv->mm.request_list)); + while (!list_empty(&dev_priv->mm.active_list)) { + struct drm_i915_gem_object *obj_priv; + + obj_priv = list_first_entry(&dev_priv->mm.active_list, + struct drm_i915_gem_object, + list); + obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS; + i915_gem_object_move_to_inactive(obj_priv->obj); + } - /* Move all buffers out of the GTT. */ + while (!list_empty(&dev_priv->mm.flushing_list)) { + struct drm_i915_gem_object *obj_priv; + + obj_priv = list_first_entry(&dev_priv->mm.flushing_list, + struct drm_i915_gem_object, + list); + obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS; + i915_gem_object_move_to_inactive(obj_priv->obj); + } + + + /* Move all inactive buffers out of the GTT. */ ret = i915_gem_evict_from_list(dev, &dev_priv->mm.inactive_list); + WARN_ON(!list_empty(&dev_priv->mm.inactive_list)); if (ret) { mutex_unlock(&dev->struct_mutex); return ret; } - BUG_ON(!list_empty(&dev_priv->mm.active_list)); - BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); - BUG_ON(!list_empty(&dev_priv->mm.inactive_list)); - BUG_ON(!list_empty(&dev_priv->mm.request_list)); - i915_gem_cleanup_ringbuffer(dev); mutex_unlock(&dev->struct_mutex); @@ -2518,6 +2526,10 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data, if (ret != 0) return ret; + dev_priv->mm.gtt_mapping = io_mapping_create_wc(dev->agp->base, + dev->agp->agp_info.aper_size + * 1024 * 1024); + mutex_lock(&dev->struct_mutex); BUG_ON(!list_empty(&dev_priv->mm.active_list)); BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); @@ -2535,11 +2547,13 @@ int i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { + drm_i915_private_t *dev_priv = dev->dev_private; int ret; ret = i915_gem_idle(dev); drm_irq_uninstall(dev); + io_mapping_free(dev_priv->mm.gtt_mapping); return ret; }