blob: cd1848e4c3cedf5bb1c6ed747b3de11c2302774f [file] [log] [blame]
From 52ae6dfe428aa66fb70c6771ec325f7c64a2ea91 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 9 Aug 2013 12:26:45 +0100
Subject: drm/i915: Update rules for writing through the LLC with the cpu
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
As mentioned in the previous commit, reads and writes from both the CPU
and GPU go through the LLC. This gives us coherency between the CPU and
GPU irrespective of the attribute settings either device sets. We can
use to avoid having to clflush even uncached memory.
Except for the scanout.
The scanout resides within another functional block that does not use
the LLC but reads directly from main memory. So in order to maintain
coherency with the scanout, writes to uncached memory must be flushed.
In order to optimize writes elsewhere, we start tracking whether an
framebuffer is attached to an object.
v2: Use pin_display tracking rather than fb_count (to ensure we flush
cursors as well etc) and only force the clflush along explicit writes to
the scanout paths (i.e. pin_to_display_plane and pwrite into scanout).
v3: Force the flush after hitting the slowpath in pwrite, as after
dropping the lock the object's cache domain may be invalidated. (Ville)
Based on a patch by Ville Syrjälä.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
(cherry picked from commit 2c22569bba8af6c2976d5f9479fe54a53a39966b)
Signed-off-by: Darren Hart <dvhart@linux.intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 2 +-
drivers/gpu/drm/i915/i915_gem.c | 58 ++++++++++++++++--------------
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +-
drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +-
4 files changed, 35 insertions(+), 29 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index cbb35a09ad7d..d25f9f712b6f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1840,7 +1840,7 @@ static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
}
void i915_gem_reset(struct drm_device *dev);
-void i915_gem_clflush_object(struct drm_i915_gem_object *obj);
+void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
int __must_check i915_gem_init(struct drm_device *dev);
int __must_check i915_gem_init_hw(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 46f007cd7f87..ce4a8c4e42ac 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -38,7 +38,8 @@
#include <linux/dma-buf.h>
static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
-static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
+static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
+ bool force);
static __must_check int
i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
@@ -68,6 +69,14 @@ static bool cpu_cache_is_coherent(struct drm_device *dev,
return HAS_LLC(dev) || level != I915_CACHE_NONE;
}
+static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
+{
+ if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
+ return true;
+
+ return obj->pin_display;
+}
+
static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
{
if (obj->tiling_mode)
@@ -736,8 +745,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
* write domain and manually flush cachelines (if required). This
* optimizes for the case when the gpu will use the data
* right away and we therefore have to clflush anyway. */
- if (obj->cache_level == I915_CACHE_NONE)
- needs_clflush_after = 1;
+ needs_clflush_after = cpu_write_needs_clflush(obj);
if (i915_gem_obj_bound_any(obj)) {
ret = i915_gem_object_set_to_gtt_domain(obj, true);
if (ret)
@@ -827,7 +835,7 @@ out:
*/
if (!needs_clflush_after &&
obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
- i915_gem_clflush_object(obj);
+ i915_gem_clflush_object(obj, obj->pin_display);
i915_gem_chipset_flush(dev);
}
}
@@ -905,9 +913,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
goto out;
}
- if (obj->cache_level == I915_CACHE_NONE &&
- obj->tiling_mode == I915_TILING_NONE &&
- obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
+ if (obj->tiling_mode == I915_TILING_NONE &&
+ obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
+ cpu_write_needs_clflush(obj)) {
ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
/* Note that the gtt paths might fail with non-page-backed user
* pointers (e.g. gtt mappings when moving data between
@@ -1256,8 +1264,8 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
}
/* Pinned buffers may be scanout, so flush the cache */
- if (obj->pin_count)
- i915_gem_object_flush_cpu_write_domain(obj);
+ if (obj->pin_display)
+ i915_gem_object_flush_cpu_write_domain(obj, true);
drm_gem_object_unreference(&obj->base);
unlock:
@@ -1627,7 +1635,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
* hope for the best.
*/
WARN_ON(ret != -EIO);
- i915_gem_clflush_object(obj);
+ i915_gem_clflush_object(obj, true);
obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
}
@@ -3205,7 +3213,8 @@ err_unpin:
}
void
-i915_gem_clflush_object(struct drm_i915_gem_object *obj)
+i915_gem_clflush_object(struct drm_i915_gem_object *obj,
+ bool force)
{
/* If we don't have a page list set up, then we're not pinned
* to GPU, and we can ignore the cache flush because it'll happen
@@ -3229,7 +3238,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj)
* snooping behaviour occurs naturally as the result of our domain
* tracking.
*/
- if (obj->cache_level != I915_CACHE_NONE)
+ if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
return;
trace_i915_gem_object_clflush(obj);
@@ -3266,14 +3275,15 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
/** Flushes the CPU write domain for the object if it's dirty. */
static void
-i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
+i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
+ bool force)
{
uint32_t old_write_domain;
if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
return;
- i915_gem_clflush_object(obj);
+ i915_gem_clflush_object(obj, force);
i915_gem_chipset_flush(obj->base.dev);
old_write_domain = obj->base.write_domain;
obj->base.write_domain = 0;
@@ -3307,7 +3317,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
if (ret)
return ret;
- i915_gem_object_flush_cpu_write_domain(obj);
+ i915_gem_object_flush_cpu_write_domain(obj, false);
/* Serialise direct access to this object with the barriers for
* coherent writes from the GPU, by effectively invalidating the
@@ -3397,7 +3407,11 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
obj, cache_level);
}
- if (cache_level == I915_CACHE_NONE) {
+ list_for_each_entry(vma, &obj->vma_list, vma_link)
+ vma->node.color = cache_level;
+ obj->cache_level = cache_level;
+
+ if (cpu_write_needs_clflush(obj)) {
u32 old_read_domains, old_write_domain;
/* If we're coming from LLC cached, then we haven't
@@ -3420,9 +3434,6 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
old_write_domain);
}
- list_for_each_entry(vma, &obj->vma_list, vma_link)
- vma->node.color = cache_level;
- obj->cache_level = cache_level;
i915_gem_verify_gtt(dev);
return 0;
}
@@ -3550,7 +3561,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
if (ret)
goto err_unpin_display;
- i915_gem_object_flush_cpu_write_domain(obj);
+ i915_gem_object_flush_cpu_write_domain(obj, true);
old_write_domain = obj->base.write_domain;
old_read_domains = obj->base.read_domains;
@@ -3622,8 +3633,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
/* Flush the CPU cache if it's still invalid. */
if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
- if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
- i915_gem_clflush_object(obj);
+ i915_gem_clflush_object(obj, false);
obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
}
@@ -3805,10 +3815,6 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data,
obj->user_pin_count++;
obj->pin_filp = file;
- /* XXX - flush the CPU caches for pinned objects
- * as the X server doesn't manage domains yet
- */
- i915_gem_object_flush_cpu_write_domain(obj);
args->offset = i915_gem_obj_ggtt_offset(obj);
out:
drm_gem_object_unreference(&obj->base);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 8ccc29ac9629..e999578a021c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -716,7 +716,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
return ret;
if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
- i915_gem_clflush_object(obj);
+ i915_gem_clflush_object(obj, false);
flush_domains |= obj->base.write_domain;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 24fb989593f0..c9420c280cf0 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -487,7 +487,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
dev_priv->gtt.base.total / PAGE_SIZE);
list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
- i915_gem_clflush_object(obj);
+ i915_gem_clflush_object(obj, obj->pin_display);
i915_gem_gtt_bind_object(obj, obj->cache_level);
}
--
1.8.5.rc3