| From 6fdc61a8d45e519f59c2b5c552ecdde3161e1344 Mon Sep 17 00:00:00 2001 |
| From: Sasha Levin <sashal@kernel.org> |
| Date: Tue, 4 May 2021 23:57:37 -0400 |
| Subject: drm/vmwgfx: Fix cpu updates of coherent multisample surfaces |
| |
| From: Thomas Hellstrom <thellstrom@vmware.com> |
| |
| [ Upstream commit 88509f698c4e38e287e016e86a0445547824135c ] |
| |
| In cases where the dirty linear memory range spans multiple sample sheets |
| in a surface, the dirty surface region is incorrectly computed. |
| To do this correctly and in an optimized fashion we would have to compute |
| the dirty region of each sample sheet and compute the union of those |
| regions. |
| |
| But assuming that cpu writing to a multisample surface is rather a corner |
| case than a common case, just set the dirty region to the full surface. |
| |
| This fixes OpenGL piglit errors with SVGA_FORCE_COHERENT=1 |
| and the piglit test: |
| |
| fbo-depthstencil blit default_fb -samples=2 -auto |
| |
| Fixes: 9ca7d19ff8ba ("drm/vmwgfx: Add surface dirty-tracking callbacks") |
| Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com> |
| Reviewed-by: Charmaine Lee <charmainel@vmware.com> |
| Reviewed-by: Roland Scheidegger <sroland@vmware.com> |
| Signed-off-by: Zack Rusin <zackr@vmware.com> |
| Link: https://patchwork.freedesktop.org/patch/msgid/20210505035740.286923-4-zackr@vmware.com |
| Signed-off-by: Sasha Levin <sashal@kernel.org> |
| --- |
| .../drm/vmwgfx/device_include/svga3d_surfacedefs.h | 8 ++++++-- |
| drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 13 +++++++++++++ |
| 2 files changed, 19 insertions(+), 2 deletions(-) |
| |
| diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h |
| index 4db25bd9fa22..127eaf0a0a58 100644 |
| --- a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h |
| +++ b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h |
| @@ -1467,6 +1467,7 @@ struct svga3dsurface_cache { |
| |
| /** |
| * struct svga3dsurface_loc - Surface location |
| + * @sheet: The multisample sheet. |
| * @sub_resource: Surface subresource. Defined as layer * num_mip_levels + |
| * mip_level. |
| * @x: X coordinate. |
| @@ -1474,6 +1475,7 @@ struct svga3dsurface_cache { |
| * @z: Z coordinate. |
| */ |
| struct svga3dsurface_loc { |
| + u32 sheet; |
| u32 sub_resource; |
| u32 x, y, z; |
| }; |
| @@ -1566,8 +1568,8 @@ svga3dsurface_get_loc(const struct svga3dsurface_cache *cache, |
| u32 layer; |
| int i; |
| |
| - if (offset >= cache->sheet_bytes) |
| - offset %= cache->sheet_bytes; |
| + loc->sheet = offset / cache->sheet_bytes; |
| + offset -= loc->sheet * cache->sheet_bytes; |
| |
| layer = offset / cache->mip_chain_bytes; |
| offset -= layer * cache->mip_chain_bytes; |
| @@ -1631,6 +1633,7 @@ svga3dsurface_min_loc(const struct svga3dsurface_cache *cache, |
| u32 sub_resource, |
| struct svga3dsurface_loc *loc) |
| { |
| + loc->sheet = 0; |
| loc->sub_resource = sub_resource; |
| loc->x = loc->y = loc->z = 0; |
| } |
| @@ -1652,6 +1655,7 @@ svga3dsurface_max_loc(const struct svga3dsurface_cache *cache, |
| const struct drm_vmw_size *size; |
| u32 mip; |
| |
| + loc->sheet = 0; |
| loc->sub_resource = sub_resource + 1; |
| mip = sub_resource % cache->num_mip_levels; |
| size = &cache->mip[mip].size; |
| diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c |
| index 3914bfee0533..f493b20c7a38 100644 |
| --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c |
| +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c |
| @@ -1802,6 +1802,19 @@ static void vmw_surface_tex_dirty_range_add(struct vmw_resource *res, |
| svga3dsurface_get_loc(cache, &loc2, end - 1); |
| svga3dsurface_inc_loc(cache, &loc2); |
| |
| + if (loc1.sheet != loc2.sheet) { |
| + u32 sub_res; |
| + |
| + /* |
| + * Multiple multisample sheets. To do this in an optimized |
| + * fashion, compute the dirty region for each sheet and the |
| + * resulting union. Since this is not a common case, just dirty |
| + * the whole surface. |
| + */ |
| + for (sub_res = 0; sub_res < dirty->num_subres; ++sub_res) |
| + vmw_subres_dirty_full(dirty, sub_res); |
| + return; |
| + } |
| if (loc1.sub_resource + 1 == loc2.sub_resource) { |
| /* Dirty range covers a single sub-resource */ |
| vmw_subres_dirty_add(dirty, &loc1, &loc2); |
| -- |
| 2.30.2 |
| |