1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Date: Sat, 25 Jun 2022 23:38:45 +0300
Subject: [PATCH] intel/fs: always mask the bottom bits of the sampler extended
descriptor
Fixes a hang in Age Of Empire 4. The HW is hang with the sampler input
unit busy. Replaying on simulation showed the extended message length
in the extended descriptor is invalid. Since the Anv ensures the input
is correct in anv_surface_state_to_handle(), the likely reason for
this issue is the use of VK_VALVE_mutable_descriptor_type and the
application leaving a previous value for a different descriptor type.
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
src/intel/compiler/brw_fs.cpp | 2 +-
.../compiler/brw_lower_logical_sends.cpp | 20 +++++++++++++++----
2 files changed, 17 insertions(+), 5 deletions(-)
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 624454676031..061eb7d603bb 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -4439,7 +4439,7 @@ brw_fb_write_msg_control(const fs_inst *inst,
return mctl;
}
- /**
+/**
* Predicate the specified instruction on the sample mask.
*/
void
diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp
index 1ff064d342ae..90cb00daeb9b 100644
--- a/src/intel/compiler/brw_lower_logical_sends.cpp
+++ b/src/intel/compiler/brw_lower_logical_sends.cpp
@@ -1117,30 +1117,42 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op,
inst->src[1] = brw_imm_ud(0);
} else if (surface_handle.file != BAD_FILE) {
/* Bindless surface */
+ const fs_builder ubld = bld.group(1, 0).exec_all();
assert(devinfo->ver >= 9);
inst->desc = brw_sampler_desc(devinfo,
GFX9_BTI_BINDLESS,
sampler.file == IMM ? sampler.ud % 16 : 0,
msg_type,
simd_mode,
0 /* return_format unused on gfx7+ */);
/* For bindless samplers, the entire address is included in the message
* header so we can leave the portion in the message descriptor 0.
*/
if (sampler_handle.file != BAD_FILE || sampler.file == IMM) {
inst->src[0] = brw_imm_ud(0);
} else {
- const fs_builder ubld = bld.group(1, 0).exec_all();
fs_reg desc = ubld.vgrf(BRW_REGISTER_TYPE_UD);
ubld.SHL(desc, sampler, brw_imm_ud(8));
inst->src[0] = desc;
}
- /* We assume that the driver provided the handle in the top 20 bits so
- * we can use the surface handle directly as the extended descriptor.
+ /* We previously assumed that the driver provided the handle in the top
+ * 20 bits (leaving the bottom 12 bits at 0). But with extensions like
+ * VK_VALVE_mutable_descriptor_type, the application is more in control
+ * of the content of VkDescriptors which is where we store
+ * surface/sampler offsets. We experience GPU hangs because the
+ * application left an invalid value in the descriptor (probably used
+ * for another descriptor type than sampler) and the lower 12bits of the
+ * surface handle overlapping with the extended descriptor length make
+ * the HW hang. The following AND() clears those bits and fixes a hang
+ * in Age Of Empire 4.
*/
- inst->src[1] = retype(surface_handle, BRW_REGISTER_TYPE_UD);
+ fs_reg ex_desc = ubld.vgrf(BRW_REGISTER_TYPE_UD);
+ ubld.AND(ex_desc,
+ retype(surface_handle, BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(INTEL_MASK(31, 12)));
+ inst->src[1] = component(ex_desc, 0);
} else {
/* Immediate portion of the descriptor */
inst->desc = brw_sampler_desc(devinfo,
|