diff options
author | Reza Jahanbakhshi | 2022-12-08 12:58:21 +0100 |
---|---|---|
committer | Reza Jahanbakhshi | 2022-12-08 12:58:21 +0100 |
commit | 7bfd186021e68844ac629ddfc6b67a3658f82f3f (patch) | |
tree | 10f276c177572b09944778fefc095832aad79e47 /0002-intel-fs-always-mask-the-bottom-bits-of-the-sampler-.patch | |
parent | 6c139e6095a30fb7a876c3718587043d24603e79 (diff) | |
download | aur-7bfd186021e68844ac629ddfc6b67a3658f82f3f.tar.gz |
added i915 intel gallium driver
added patches to fix a hang where the HW is hung on a PIPE_CONTROL after a GPGPU_WALKER
Diffstat (limited to '0002-intel-fs-always-mask-the-bottom-bits-of-the-sampler-.patch')
-rw-r--r-- | 0002-intel-fs-always-mask-the-bottom-bits-of-the-sampler-.patch | 83 |
1 files changed, 83 insertions, 0 deletions
diff --git a/0002-intel-fs-always-mask-the-bottom-bits-of-the-sampler-.patch b/0002-intel-fs-always-mask-the-bottom-bits-of-the-sampler-.patch new file mode 100644 index 000000000000..ef12fda3b0c0 --- /dev/null +++ b/0002-intel-fs-always-mask-the-bottom-bits-of-the-sampler-.patch @@ -0,0 +1,83 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Lionel Landwerlin <lionel.g.landwerlin@intel.com> +Date: Sat, 25 Jun 2022 23:38:45 +0300 +Subject: [PATCH] intel/fs: always mask the bottom bits of the sampler extended + descriptor + +Fixes a hang in Age Of Empire 4. The HW is hang with the sampler input +unit busy. Replaying on simulation showed the extended message length +in the extended descriptor is invalid. Since the Anv ensures the input +is correct in anv_surface_state_to_handle(), the likely reason for +this issue is the use of VK_VALVE_mutable_descriptor_type and the +application leaving a previous value for a different descriptor type. + +Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> +--- + src/intel/compiler/brw_fs.cpp | 2 +- + .../compiler/brw_lower_logical_sends.cpp | 20 +++++++++++++++---- + 2 files changed, 17 insertions(+), 5 deletions(-) + +diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp +index 624454676031..061eb7d603bb 100644 +--- a/src/intel/compiler/brw_fs.cpp ++++ b/src/intel/compiler/brw_fs.cpp +@@ -4439,7 +4439,7 @@ brw_fb_write_msg_control(const fs_inst *inst, + return mctl; + } + +- /** ++/** + * Predicate the specified instruction on the sample mask. + */ + void +diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp +index 1ff064d342ae..90cb00daeb9b 100644 +--- a/src/intel/compiler/brw_lower_logical_sends.cpp ++++ b/src/intel/compiler/brw_lower_logical_sends.cpp +@@ -1117,30 +1117,42 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op, + inst->src[1] = brw_imm_ud(0); + } else if (surface_handle.file != BAD_FILE) { + /* Bindless surface */ ++ const fs_builder ubld = bld.group(1, 0).exec_all(); + assert(devinfo->ver >= 9); + inst->desc = brw_sampler_desc(devinfo, + GFX9_BTI_BINDLESS, + sampler.file == IMM ? sampler.ud % 16 : 0, + msg_type, + simd_mode, + 0 /* return_format unused on gfx7+ */); + + /* For bindless samplers, the entire address is included in the message + * header so we can leave the portion in the message descriptor 0. + */ + if (sampler_handle.file != BAD_FILE || sampler.file == IMM) { + inst->src[0] = brw_imm_ud(0); + } else { +- const fs_builder ubld = bld.group(1, 0).exec_all(); + fs_reg desc = ubld.vgrf(BRW_REGISTER_TYPE_UD); + ubld.SHL(desc, sampler, brw_imm_ud(8)); + inst->src[0] = desc; + } + +- /* We assume that the driver provided the handle in the top 20 bits so +- * we can use the surface handle directly as the extended descriptor. ++ /* We previously assumed that the driver provided the handle in the top ++ * 20 bits (leaving the bottom 12 bits at 0). But with extensions like ++ * VK_VALVE_mutable_descriptor_type, the application is more in control ++ * of the content of VkDescriptors which is where we store ++ * surface/sampler offsets. We experience GPU hangs because the ++ * application left an invalid value in the descriptor (probably used ++ * for another descriptor type than sampler) and the lower 12bits of the ++ * surface handle overlapping with the extended descriptor length make ++ * the HW hang. The following AND() clears those bits and fixes a hang ++ * in Age Of Empire 4. + */ +- inst->src[1] = retype(surface_handle, BRW_REGISTER_TYPE_UD); ++ fs_reg ex_desc = ubld.vgrf(BRW_REGISTER_TYPE_UD); ++ ubld.AND(ex_desc, ++ retype(surface_handle, BRW_REGISTER_TYPE_UD), ++ brw_imm_ud(INTEL_MASK(31, 12))); ++ inst->src[1] = component(ex_desc, 0); + } else { + /* Immediate portion of the descriptor */ + inst->desc = brw_sampler_desc(devinfo, |