blob: 84d1103a1245be380aefaf98f35d2e270100ec67 [file] [log] [blame]
From 3b20739e9d8e9b0341710449bb63a7090b7c8729 Mon Sep 17 00:00:00 2001
From: Chris Wilson <>
Date: Thu, 8 Aug 2013 14:41:03 +0100
Subject: drm/i915: Update rules for reading cache lines through the LLC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The LLC is a fun device. The cache is a distinct functional block within
the SA that arbitrates access from both the CPU and GPU cores. As such
all writes to memory land first in the LLC before further action is
taken. For example, an uncached write from either the CPU or GPU will
then proceed to memory and evict the cacheline from the LLC. This means that
a read from the LLC always returns the correct information even if the PTE
bit in the GPU differs from the PAT bit in the CPU. For the older
snooping architecture on non-LLC, the fundamental principle still holds
except that some coordination is required between the CPU and GPU to
explicitly perform the snooping (which is handled by our request
The upshot of this is that we know that we can issue a read from either
LLC devices or snoopable memory and trust the contents of the cache -
i.e. we can forgo a clflush before a read in these circumstances.
Writing to memory from the CPU is a little more tricky as we have to
consider that the scanout does not read from the CPU cache at all, but
from main memory. So we have to currently treat all requests to write to
uncached memory as having to be flushed to main memory for coherency
with all consumers.
Signed-off-by: Chris Wilson <>
Cc: Ville Syrjälä <>
Reviewed-by: Ville Syrjälä <>
Signed-off-by: Daniel Vetter <>
(cherry picked from commit c76ce038e31a2b30bc3dd816f0aefaf685097a0a)
Signed-off-by: Darren Hart <>
drivers/gpu/drm/i915/i915_gem.c | 22 ++++++++++++++--------
1 file changed, 14 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 498ef8a7bbc7..50200b5e501b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -62,6 +62,12 @@ static long i915_gem_purge(struct drm_i915_private *dev_priv, long target);
static void i915_gem_shrink_all(struct drm_i915_private *dev_priv);
static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
+static bool cpu_cache_is_coherent(struct drm_device *dev,
+ enum i915_cache_level level)
+ return HAS_LLC(dev) || level != I915_CACHE_NONE;
static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
if (obj->tiling_mode)
@@ -414,8 +420,7 @@ i915_gem_shmem_pread(struct drm_device *dev,
* read domain and manually flush cachelines (if required). This
* optimizes for the case when the gpu will dirty the data
* anyway again before the next pread happens. */
- if (obj->cache_level == I915_CACHE_NONE)
- needs_clflush = 1;
+ needs_clflush = !cpu_cache_is_coherent(dev, obj->cache_level);
if (i915_gem_obj_bound_any(obj)) {
ret = i915_gem_object_set_to_gtt_domain(obj, false);
if (ret)
@@ -739,11 +744,11 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
return ret;
- /* Same trick applies for invalidate partially written cachelines before
- * writing. */
- if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)
- && obj->cache_level == I915_CACHE_NONE)
- needs_clflush_before = 1;
+ /* Same trick applies to invalidate partially written cachelines read
+ * before writing. */
+ if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
+ needs_clflush_before =
+ !cpu_cache_is_coherent(dev, obj->cache_level);
ret = i915_gem_object_get_pages(obj);
if (ret)
@@ -3585,7 +3590,8 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
/* Flush the CPU cache if it's still invalid. */
if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
- i915_gem_clflush_object(obj);
+ if (!cpu_cache_is_coherent(obj->, obj->cache_level))
+ i915_gem_clflush_object(obj);
obj->base.read_domains |= I915_GEM_DOMAIN_CPU;