patches.baytrail/0652-drm-i915-Update-rules-for-reading-cache-lines-throug.patch - pub/scm/linux/kernel/git/ltsi/ltsi-kernel - Git at Google

 From 3b20739e9d8e9b0341710449bb63a7090b7c8729 Mon Sep 17 00:00:00 2001
 From: Chris Wilson <chris@chris-wilson.co.uk>
 Date: Thu, 8 Aug 2013 14:41:03 +0100
 Subject: drm/i915: Update rules for reading cache lines through the LLC
 MIME-Version: 1.0
 Content-Type: text/plain; charset=UTF-8
 Content-Transfer-Encoding: 8bit

 The LLC is a fun device. The cache is a distinct functional block within
 the SA that arbitrates access from both the CPU and GPU cores. As such
 all writes to memory land first in the LLC before further action is
 taken. For example, an uncached write from either the CPU or GPU will
 then proceed to memory and evict the cacheline from the LLC. This means that
 a read from the LLC always returns the correct information even if the PTE
 bit in the GPU differs from the PAT bit in the CPU. For the older
 snooping architecture on non-LLC, the fundamental principle still holds
 except that some coordination is required between the CPU and GPU to
 explicitly perform the snooping (which is handled by our request
 tracking).

 The upshot of this is that we know that we can issue a read from either
 LLC devices or snoopable memory and trust the contents of the cache -
 i.e. we can forgo a clflush before a read in these circumstances.
 Writing to memory from the CPU is a little more tricky as we have to
 consider that the scanout does not read from the CPU cache at all, but
 from main memory. So we have to currently treat all requests to write to
 uncached memory as having to be flushed to main memory for coherency
 with all consumers.

 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
 Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
 Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
 Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
 (cherry picked from commit c76ce038e31a2b30bc3dd816f0aefaf685097a0a)
 Signed-off-by: Darren Hart <dvhart@linux.intel.com>
 ---
  drivers/gpu/drm/i915/i915_gem.c | 22 ++++++++++++++--------
  1 file changed, 14 insertions(+), 8 deletions(-)

 diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
 index 498ef8a7bbc7..50200b5e501b 100644
 --- a/drivers/gpu/drm/i915/i915_gem.c
 +++ b/drivers/gpu/drm/i915/i915_gem.c
 @@ -62,6 +62,12 @@ static long i915_gem_purge(struct drm_i915_private *dev_priv, long target);
  static void i915_gem_shrink_all(struct drm_i915_private *dev_priv);
  static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);

 +static bool cpu_cache_is_coherent(struct drm_device *dev,
 +				  enum i915_cache_level level)
 +{
 +	return HAS_LLC(dev) || level != I915_CACHE_NONE;
 +}
 +
  static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
  {
  	if (obj->tiling_mode)
 @@ -414,8 +420,7 @@ i915_gem_shmem_pread(struct drm_device *dev,
  		 * read domain and manually flush cachelines (if required). This
  		 * optimizes for the case when the gpu will dirty the data
  		 * anyway again before the next pread happens. */
 -		if (obj->cache_level == I915_CACHE_NONE)
 -			needs_clflush = 1;
 +		needs_clflush = !cpu_cache_is_coherent(dev, obj->cache_level);
  		if (i915_gem_obj_bound_any(obj)) {
  			ret = i915_gem_object_set_to_gtt_domain(obj, false);
  			if (ret)
 @@ -739,11 +744,11 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
  				return ret;
  		}
  	}
 -	/* Same trick applies for invalidate partially written cachelines before
 -	 * writing.  */
 -	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)
 -	    && obj->cache_level == I915_CACHE_NONE)
 -		needs_clflush_before = 1;
 +	/* Same trick applies to invalidate partially written cachelines read
 +	 * before writing. */
 +	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
 +		needs_clflush_before =
 +			!cpu_cache_is_coherent(dev, obj->cache_level);

  	ret = i915_gem_object_get_pages(obj);
  	if (ret)
 @@ -3585,7 +3590,8 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)

  	/* Flush the CPU cache if it's still invalid. */
  	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
 -		i915_gem_clflush_object(obj);
 +		if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
 +			i915_gem_clflush_object(obj);

  		obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
  	}
 --
 1.8.5.rc3
	From 3b20739e9d8e9b0341710449bb63a7090b7c8729 Mon Sep 17 00:00:00 2001
	From: Chris Wilson <chris@chris-wilson.co.uk>
	Date: Thu, 8 Aug 2013 14:41:03 +0100
	Subject: drm/i915: Update rules for reading cache lines through the LLC
	MIME-Version: 1.0
	Content-Type: text/plain; charset=UTF-8
	Content-Transfer-Encoding: 8bit

	The LLC is a fun device. The cache is a distinct functional block within
	the SA that arbitrates access from both the CPU and GPU cores. As such
	all writes to memory land first in the LLC before further action is
	taken. For example, an uncached write from either the CPU or GPU will
	then proceed to memory and evict the cacheline from the LLC. This means that
	a read from the LLC always returns the correct information even if the PTE
	bit in the GPU differs from the PAT bit in the CPU. For the older
	snooping architecture on non-LLC, the fundamental principle still holds
	except that some coordination is required between the CPU and GPU to
	explicitly perform the snooping (which is handled by our request
	tracking).

	The upshot of this is that we know that we can issue a read from either
	LLC devices or snoopable memory and trust the contents of the cache -
	i.e. we can forgo a clflush before a read in these circumstances.
	Writing to memory from the CPU is a little more tricky as we have to
	consider that the scanout does not read from the CPU cache at all, but
	from main memory. So we have to currently treat all requests to write to
	uncached memory as having to be flushed to main memory for coherency
	with all consumers.

	Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
	Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
	Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
	Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
	(cherry picked from commit c76ce038e31a2b30bc3dd816f0aefaf685097a0a)
	Signed-off-by: Darren Hart <dvhart@linux.intel.com>
	---
	drivers/gpu/drm/i915/i915_gem.c \| 22 ++++++++++++++--------
	1 file changed, 14 insertions(+), 8 deletions(-)

	diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
	index 498ef8a7bbc7..50200b5e501b 100644
	--- a/drivers/gpu/drm/i915/i915_gem.c
	+++ b/drivers/gpu/drm/i915/i915_gem.c
	@@ -62,6 +62,12 @@ static long i915_gem_purge(struct drm_i915_private *dev_priv, long target);
	static void i915_gem_shrink_all(struct drm_i915_private *dev_priv);
	static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);

	+static bool cpu_cache_is_coherent(struct drm_device *dev,
	+ enum i915_cache_level level)
	+{
	+ return HAS_LLC(dev) \|\| level != I915_CACHE_NONE;
	+}
	+
	static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
	{
	if (obj->tiling_mode)
	@@ -414,8 +420,7 @@ i915_gem_shmem_pread(struct drm_device *dev,
	* read domain and manually flush cachelines (if required). This
	* optimizes for the case when the gpu will dirty the data
	* anyway again before the next pread happens. */
	- if (obj->cache_level == I915_CACHE_NONE)
	- needs_clflush = 1;
	+ needs_clflush = !cpu_cache_is_coherent(dev, obj->cache_level);
	if (i915_gem_obj_bound_any(obj)) {
	ret = i915_gem_object_set_to_gtt_domain(obj, false);
	if (ret)
	@@ -739,11 +744,11 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
	return ret;
	}
	}
	- /* Same trick applies for invalidate partially written cachelines before
	- * writing. */
	- if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)
	- && obj->cache_level == I915_CACHE_NONE)
	- needs_clflush_before = 1;
	+ /* Same trick applies to invalidate partially written cachelines read
	+ * before writing. */
	+ if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
	+ needs_clflush_before =
	+ !cpu_cache_is_coherent(dev, obj->cache_level);

	ret = i915_gem_object_get_pages(obj);
	if (ret)
	@@ -3585,7 +3590,8 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)

	/* Flush the CPU cache if it's still invalid. */
	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
	- i915_gem_clflush_object(obj);
	+ if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
	+ i915_gem_clflush_object(obj);

	obj->base.read_domains \|= I915_GEM_DOMAIN_CPU;
	}
	--
	1.8.5.rc3