| From 3b20739e9d8e9b0341710449bb63a7090b7c8729 Mon Sep 17 00:00:00 2001 |
| From: Chris Wilson <chris@chris-wilson.co.uk> |
| Date: Thu, 8 Aug 2013 14:41:03 +0100 |
| Subject: drm/i915: Update rules for reading cache lines through the LLC |
| MIME-Version: 1.0 |
| Content-Type: text/plain; charset=UTF-8 |
| Content-Transfer-Encoding: 8bit |
| |
| The LLC is a fun device. The cache is a distinct functional block within |
| the SA that arbitrates access from both the CPU and GPU cores. As such |
| all writes to memory land first in the LLC before further action is |
| taken. For example, an uncached write from either the CPU or GPU will |
| then proceed to memory and evict the cacheline from the LLC. This means that |
| a read from the LLC always returns the correct information even if the PTE |
| bit in the GPU differs from the PAT bit in the CPU. For the older |
| snooping architecture on non-LLC, the fundamental principle still holds |
| except that some coordination is required between the CPU and GPU to |
| explicitly perform the snooping (which is handled by our request |
| tracking). |
| |
| The upshot of this is that we know that we can issue a read from either |
| LLC devices or snoopable memory and trust the contents of the cache - |
| i.e. we can forgo a clflush before a read in these circumstances. |
| Writing to memory from the CPU is a little more tricky as we have to |
| consider that the scanout does not read from the CPU cache at all, but |
| from main memory. So we have to currently treat all requests to write to |
| uncached memory as having to be flushed to main memory for coherency |
| with all consumers. |
| |
| Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> |
| Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> |
| Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> |
| Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> |
| (cherry picked from commit c76ce038e31a2b30bc3dd816f0aefaf685097a0a) |
| Signed-off-by: Darren Hart <dvhart@linux.intel.com> |
| --- |
| drivers/gpu/drm/i915/i915_gem.c | 22 ++++++++++++++-------- |
| 1 file changed, 14 insertions(+), 8 deletions(-) |
| |
| diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c |
| index 498ef8a7bbc7..50200b5e501b 100644 |
| --- a/drivers/gpu/drm/i915/i915_gem.c |
| +++ b/drivers/gpu/drm/i915/i915_gem.c |
| @@ -62,6 +62,12 @@ static long i915_gem_purge(struct drm_i915_private *dev_priv, long target); |
| static void i915_gem_shrink_all(struct drm_i915_private *dev_priv); |
| static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); |
| |
| +static bool cpu_cache_is_coherent(struct drm_device *dev, |
| + enum i915_cache_level level) |
| +{ |
| + return HAS_LLC(dev) || level != I915_CACHE_NONE; |
| +} |
| + |
| static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) |
| { |
| if (obj->tiling_mode) |
| @@ -414,8 +420,7 @@ i915_gem_shmem_pread(struct drm_device *dev, |
| * read domain and manually flush cachelines (if required). This |
| * optimizes for the case when the gpu will dirty the data |
| * anyway again before the next pread happens. */ |
| - if (obj->cache_level == I915_CACHE_NONE) |
| - needs_clflush = 1; |
| + needs_clflush = !cpu_cache_is_coherent(dev, obj->cache_level); |
| if (i915_gem_obj_bound_any(obj)) { |
| ret = i915_gem_object_set_to_gtt_domain(obj, false); |
| if (ret) |
| @@ -739,11 +744,11 @@ i915_gem_shmem_pwrite(struct drm_device *dev, |
| return ret; |
| } |
| } |
| - /* Same trick applies for invalidate partially written cachelines before |
| - * writing. */ |
| - if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU) |
| - && obj->cache_level == I915_CACHE_NONE) |
| - needs_clflush_before = 1; |
| + /* Same trick applies to invalidate partially written cachelines read |
| + * before writing. */ |
| + if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) |
| + needs_clflush_before = |
| + !cpu_cache_is_coherent(dev, obj->cache_level); |
| |
| ret = i915_gem_object_get_pages(obj); |
| if (ret) |
| @@ -3585,7 +3590,8 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) |
| |
| /* Flush the CPU cache if it's still invalid. */ |
| if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { |
| - i915_gem_clflush_object(obj); |
| + if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) |
| + i915_gem_clflush_object(obj); |
| |
| obj->base.read_domains |= I915_GEM_DOMAIN_CPU; |
| } |
| -- |
| 1.8.5.rc3 |
| |