| From 52ae6dfe428aa66fb70c6771ec325f7c64a2ea91 Mon Sep 17 00:00:00 2001 |
| From: Chris Wilson <chris@chris-wilson.co.uk> |
| Date: Fri, 9 Aug 2013 12:26:45 +0100 |
| Subject: drm/i915: Update rules for writing through the LLC with the cpu |
| MIME-Version: 1.0 |
| Content-Type: text/plain; charset=UTF-8 |
| Content-Transfer-Encoding: 8bit |
| |
| As mentioned in the previous commit, reads and writes from both the CPU |
| and GPU go through the LLC. This gives us coherency between the CPU and |
| GPU irrespective of the attribute settings either device sets. We can |
| use to avoid having to clflush even uncached memory. |
| |
| Except for the scanout. |
| |
| The scanout resides within another functional block that does not use |
| the LLC but reads directly from main memory. So in order to maintain |
| coherency with the scanout, writes to uncached memory must be flushed. |
| In order to optimize writes elsewhere, we start tracking whether an |
| framebuffer is attached to an object. |
| |
| v2: Use pin_display tracking rather than fb_count (to ensure we flush |
| cursors as well etc) and only force the clflush along explicit writes to |
| the scanout paths (i.e. pin_to_display_plane and pwrite into scanout). |
| |
| v3: Force the flush after hitting the slowpath in pwrite, as after |
| dropping the lock the object's cache domain may be invalidated. (Ville) |
| |
| Based on a patch by Ville Syrjälä. |
| |
| Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> |
| Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> |
| Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> |
| Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> |
| (cherry picked from commit 2c22569bba8af6c2976d5f9479fe54a53a39966b) |
| Signed-off-by: Darren Hart <dvhart@linux.intel.com> |
| --- |
| drivers/gpu/drm/i915/i915_drv.h | 2 +- |
| drivers/gpu/drm/i915/i915_gem.c | 58 ++++++++++++++++-------------- |
| drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- |
| drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- |
| 4 files changed, 35 insertions(+), 29 deletions(-) |
| |
| diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h |
| index cbb35a09ad7d..d25f9f712b6f 100644 |
| --- a/drivers/gpu/drm/i915/i915_drv.h |
| +++ b/drivers/gpu/drm/i915/i915_drv.h |
| @@ -1840,7 +1840,7 @@ static inline bool i915_terminally_wedged(struct i915_gpu_error *error) |
| } |
| |
| void i915_gem_reset(struct drm_device *dev); |
| -void i915_gem_clflush_object(struct drm_i915_gem_object *obj); |
| +void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); |
| int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj); |
| int __must_check i915_gem_init(struct drm_device *dev); |
| int __must_check i915_gem_init_hw(struct drm_device *dev); |
| diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c |
| index 46f007cd7f87..ce4a8c4e42ac 100644 |
| --- a/drivers/gpu/drm/i915/i915_gem.c |
| +++ b/drivers/gpu/drm/i915/i915_gem.c |
| @@ -38,7 +38,8 @@ |
| #include <linux/dma-buf.h> |
| |
| static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); |
| -static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); |
| +static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj, |
| + bool force); |
| static __must_check int |
| i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, |
| struct i915_address_space *vm, |
| @@ -68,6 +69,14 @@ static bool cpu_cache_is_coherent(struct drm_device *dev, |
| return HAS_LLC(dev) || level != I915_CACHE_NONE; |
| } |
| |
| +static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) |
| +{ |
| + if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) |
| + return true; |
| + |
| + return obj->pin_display; |
| +} |
| + |
| static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) |
| { |
| if (obj->tiling_mode) |
| @@ -736,8 +745,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, |
| * write domain and manually flush cachelines (if required). This |
| * optimizes for the case when the gpu will use the data |
| * right away and we therefore have to clflush anyway. */ |
| - if (obj->cache_level == I915_CACHE_NONE) |
| - needs_clflush_after = 1; |
| + needs_clflush_after = cpu_write_needs_clflush(obj); |
| if (i915_gem_obj_bound_any(obj)) { |
| ret = i915_gem_object_set_to_gtt_domain(obj, true); |
| if (ret) |
| @@ -827,7 +835,7 @@ out: |
| */ |
| if (!needs_clflush_after && |
| obj->base.write_domain != I915_GEM_DOMAIN_CPU) { |
| - i915_gem_clflush_object(obj); |
| + i915_gem_clflush_object(obj, obj->pin_display); |
| i915_gem_chipset_flush(dev); |
| } |
| } |
| @@ -905,9 +913,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, |
| goto out; |
| } |
| |
| - if (obj->cache_level == I915_CACHE_NONE && |
| - obj->tiling_mode == I915_TILING_NONE && |
| - obj->base.write_domain != I915_GEM_DOMAIN_CPU) { |
| + if (obj->tiling_mode == I915_TILING_NONE && |
| + obj->base.write_domain != I915_GEM_DOMAIN_CPU && |
| + cpu_write_needs_clflush(obj)) { |
| ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); |
| /* Note that the gtt paths might fail with non-page-backed user |
| * pointers (e.g. gtt mappings when moving data between |
| @@ -1256,8 +1264,8 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, |
| } |
| |
| /* Pinned buffers may be scanout, so flush the cache */ |
| - if (obj->pin_count) |
| - i915_gem_object_flush_cpu_write_domain(obj); |
| + if (obj->pin_display) |
| + i915_gem_object_flush_cpu_write_domain(obj, true); |
| |
| drm_gem_object_unreference(&obj->base); |
| unlock: |
| @@ -1627,7 +1635,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) |
| * hope for the best. |
| */ |
| WARN_ON(ret != -EIO); |
| - i915_gem_clflush_object(obj); |
| + i915_gem_clflush_object(obj, true); |
| obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; |
| } |
| |
| @@ -3205,7 +3213,8 @@ err_unpin: |
| } |
| |
| void |
| -i915_gem_clflush_object(struct drm_i915_gem_object *obj) |
| +i915_gem_clflush_object(struct drm_i915_gem_object *obj, |
| + bool force) |
| { |
| /* If we don't have a page list set up, then we're not pinned |
| * to GPU, and we can ignore the cache flush because it'll happen |
| @@ -3229,7 +3238,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj) |
| * snooping behaviour occurs naturally as the result of our domain |
| * tracking. |
| */ |
| - if (obj->cache_level != I915_CACHE_NONE) |
| + if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) |
| return; |
| |
| trace_i915_gem_object_clflush(obj); |
| @@ -3266,14 +3275,15 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) |
| |
| /** Flushes the CPU write domain for the object if it's dirty. */ |
| static void |
| -i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) |
| +i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj, |
| + bool force) |
| { |
| uint32_t old_write_domain; |
| |
| if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) |
| return; |
| |
| - i915_gem_clflush_object(obj); |
| + i915_gem_clflush_object(obj, force); |
| i915_gem_chipset_flush(obj->base.dev); |
| old_write_domain = obj->base.write_domain; |
| obj->base.write_domain = 0; |
| @@ -3307,7 +3317,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) |
| if (ret) |
| return ret; |
| |
| - i915_gem_object_flush_cpu_write_domain(obj); |
| + i915_gem_object_flush_cpu_write_domain(obj, false); |
| |
| /* Serialise direct access to this object with the barriers for |
| * coherent writes from the GPU, by effectively invalidating the |
| @@ -3397,7 +3407,11 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, |
| obj, cache_level); |
| } |
| |
| - if (cache_level == I915_CACHE_NONE) { |
| + list_for_each_entry(vma, &obj->vma_list, vma_link) |
| + vma->node.color = cache_level; |
| + obj->cache_level = cache_level; |
| + |
| + if (cpu_write_needs_clflush(obj)) { |
| u32 old_read_domains, old_write_domain; |
| |
| /* If we're coming from LLC cached, then we haven't |
| @@ -3420,9 +3434,6 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, |
| old_write_domain); |
| } |
| |
| - list_for_each_entry(vma, &obj->vma_list, vma_link) |
| - vma->node.color = cache_level; |
| - obj->cache_level = cache_level; |
| i915_gem_verify_gtt(dev); |
| return 0; |
| } |
| @@ -3550,7 +3561,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, |
| if (ret) |
| goto err_unpin_display; |
| |
| - i915_gem_object_flush_cpu_write_domain(obj); |
| + i915_gem_object_flush_cpu_write_domain(obj, true); |
| |
| old_write_domain = obj->base.write_domain; |
| old_read_domains = obj->base.read_domains; |
| @@ -3622,8 +3633,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) |
| |
| /* Flush the CPU cache if it's still invalid. */ |
| if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { |
| - if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) |
| - i915_gem_clflush_object(obj); |
| + i915_gem_clflush_object(obj, false); |
| |
| obj->base.read_domains |= I915_GEM_DOMAIN_CPU; |
| } |
| @@ -3805,10 +3815,6 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data, |
| obj->user_pin_count++; |
| obj->pin_filp = file; |
| |
| - /* XXX - flush the CPU caches for pinned objects |
| - * as the X server doesn't manage domains yet |
| - */ |
| - i915_gem_object_flush_cpu_write_domain(obj); |
| args->offset = i915_gem_obj_ggtt_offset(obj); |
| out: |
| drm_gem_object_unreference(&obj->base); |
| diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c |
| index 8ccc29ac9629..e999578a021c 100644 |
| --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c |
| +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c |
| @@ -716,7 +716,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, |
| return ret; |
| |
| if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) |
| - i915_gem_clflush_object(obj); |
| + i915_gem_clflush_object(obj, false); |
| |
| flush_domains |= obj->base.write_domain; |
| } |
| diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c |
| index 24fb989593f0..c9420c280cf0 100644 |
| --- a/drivers/gpu/drm/i915/i915_gem_gtt.c |
| +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c |
| @@ -487,7 +487,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) |
| dev_priv->gtt.base.total / PAGE_SIZE); |
| |
| list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { |
| - i915_gem_clflush_object(obj); |
| + i915_gem_clflush_object(obj, obj->pin_display); |
| i915_gem_gtt_bind_object(obj, obj->cache_level); |
| } |
| |
| -- |
| 1.8.5.rc3 |
| |