aboutsummarylogtreecommitdiffstats
path: root/9002-Issue-1710-1712-debugging-and-speculative-fixes.patch
blob: a8febd5a00258b11d22afdf28d91daadd7f0ab7f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
From 9a33b20092f8512a60ab9672965afa1ae470c096 Mon Sep 17 00:00:00 2001
From: Scott B <arglebargle@arglebargle.dev>
Date: Tue, 19 Oct 2021 09:54:24 -0700
Subject: [PATCH] Issue 1710/1712 debugging and speculative fixes

Squashed commit of the following:

commit 4a7c5a628d7e4943321e1515b3bf62cee989ec41
Author: Scott B <arglebargle@arglebargle.dev>
Date:   Thu Oct 14 02:22:21 2021 -0700

    TEST: Replaces "don't wait to signal GFXOFF"

    replacement for 4df3adab896f843afe5bca5960fbca6ff2cc407e per lijo lazar
    see: https://gitlab.freedesktop.org/drm/amd/-/issues/1710#note_1102805

commit 5a5d7f9067780704c58d7e9600e91f61a458c43d
Author: Scott B <arglebargle@arglebargle.dev>
Date:   Thu Oct 14 02:16:16 2021 -0700

    Revert "drm/amdgpu: During s0ix don't wait to signal GFXOFF"

    This reverts commit 4df3adab896f843afe5bca5960fbca6ff2cc407e.

commit 1a1086b0c5a1afc22b11f8dd3875a464f579e6cd
Author: Mario Limonciello <mario.limonciello@amd.com>
Date:   Tue Sep 28 11:00:40 2021 -0500

    platform/x86: amd-pmc: explicitly check for GFXOFF mask (SEE NOTE)

    NOTE: Updated patch to apply after queued changes in pdx86/for-next -SB
    See: https://git.kernel.org/pdx86/platform-drivers-x86/c/40635cd32f0d83573a558dc30e9ba3469e769249

    Original commit message:

    (This patch is for testing only and should not be upstreamed in
    this state)

    Explicitly check the value of GFXOFF before setting OS_HINT.  If
    it's not valid, continue retrying to read it - for up to 2 seconds.

    If it's still not valid, abort the suspend routine.

    Possible outcomes:
    * If this makes all failed suspends "go away" 100% success -> there is
      a timing problem remaining in amdgpu as it pertains to when GFXOFF is
      set relative to when AMD_PMC sends OS_HINT

      There should be a message "gfxoff not asserted retrying"
    * If the suspend entry fails now with "gfxoff not asserted after 2000000us"
      -> GFXOFF is also a symptom and not the root cause of failed s0i3 entry

    Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
    Change-Id: Ic3a1ed188abad21f94c8dd82c2eeed43117b1dbe
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c       | 14 ++-------
 .../gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c   |  2 ++
 drivers/platform/x86/amd-pmc.c                | 29 ++++++++++++++++---
 3 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 1795d448c700..b4ced45301be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -31,8 +31,6 @@
 /* delay 0.1 second to enable gfx off feature */
 #define GFX_OFF_DELAY_ENABLE         msecs_to_jiffies(100)
 
-#define GFX_OFF_NO_DELAY 0
-
 /*
  * GPU GFX IP block helpers function.
  */
@@ -560,8 +558,6 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev)
 
 void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
 {
-	unsigned long delay = GFX_OFF_DELAY_ENABLE;
-
 	if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
 		return;
 
@@ -577,14 +573,8 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
 
 		adev->gfx.gfx_off_req_count--;
 
-		if (adev->gfx.gfx_off_req_count == 0 &&
-		    !adev->gfx.gfx_off_state) {
-			/* If going to s2idle, no need to wait */
-			if (adev->in_s0ix)
-				delay = GFX_OFF_NO_DELAY;
-			schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
-					      delay);
-		}
+		if (adev->gfx.gfx_off_req_count == 0 && !adev->gfx.gfx_off_state)
+			schedule_delayed_work(&adev->gfx.gfx_off_delay_work, GFX_OFF_DELAY_ENABLE);
 	} else {
 		if (adev->gfx.gfx_off_req_count == 0) {
 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
index 9a9c24a6ec35..3891fe8cd7fb 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
@@ -1376,6 +1376,8 @@ static ssize_t renoir_get_gpu_metrics(struct smu_context *smu,
 
 static int renoir_gfx_state_change_set(struct smu_context *smu, uint32_t state)
 {
+	if (state == sGpuChangeState_D3Entry)
+		smu_v12_0_gfx_off_control(smu, true);
 
 	return 0;
 }
diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c
index dc851c2c4d1c..cf49fb975b36 100644
--- a/drivers/platform/x86/amd-pmc.c
+++ b/drivers/platform/x86/amd-pmc.c
@@ -87,6 +87,9 @@
 #define DELAY_MIN_US		2000
 #define DELAY_MAX_US		3000
 #define FIFO_SIZE		4096
+
+#define GFX_IDLE_MASK		0x00000080
+
 enum amd_pmc_def {
 	MSG_TEST = 0x01,
 	MSG_OS_HINT_PCO,
@@ -189,7 +192,7 @@ static int amd_pmc_get_smu_version(struct amd_pmc_dev *dev)
 }
 
 static int amd_pmc_idlemask_read(struct amd_pmc_dev *pdev, struct device *dev,
-				 struct seq_file *s)
+				 struct seq_file *s, u32 *val_out)
 {
 	u32 val;
 
@@ -203,6 +206,8 @@ static int amd_pmc_idlemask_read(struct amd_pmc_dev *pdev, struct device *dev,
 	default:
 		return -EINVAL;
 	}
+	if (val_out)
+		*val_out = val;
 
 	if (dev)
 		dev_dbg(pdev->dev, "SMU idlemask s0i3: 0x%x\n", val);
@@ -276,7 +281,7 @@ static int amd_pmc_idlemask_show(struct seq_file *s, void *unused)
 	int rc;
 
 	if (dev->major > 56 || (dev->major >= 55 && dev->minor >= 37)) {
-		rc = amd_pmc_idlemask_read(dev, NULL, s);
+		rc = amd_pmc_idlemask_read(dev, NULL, s, NULL);
 		if (rc)
 			return rc;
 	} else {
@@ -449,6 +454,7 @@ static int __maybe_unused amd_pmc_suspend(struct device *dev)
 {
 	struct amd_pmc_dev *pdev = dev_get_drvdata(dev);
 	int rc;
+	u32 val = 0;
 	u8 msg;
 
 	/* Reset and Start SMU logging - to monitor the s0i3 stats */
@@ -456,7 +462,22 @@ static int __maybe_unused amd_pmc_suspend(struct device *dev)
 	amd_pmc_send_cmd(pdev, 0, NULL, SMU_MSG_LOG_START, 0);
 
 	/* Dump the IdleMask before we send hint to SMU */
-	amd_pmc_idlemask_read(pdev, dev, NULL);
+	amd_pmc_idlemask_read(pdev, dev, NULL, &val);
+	if (!(val & GFX_IDLE_MASK)) {
+		uint32_t i;
+		dev_err(pdev->dev, "gfxoff not asserted, retrying\n");
+		for (i = 0; i < PMC_MSG_DELAY_MIN_US * RESPONSE_REGISTER_LOOP_MAX; i++) {
+			udelay(PMC_MSG_DELAY_MIN_US);
+			amd_pmc_idlemask_read(pdev, dev, NULL, &val);
+			if (val & GFX_IDLE_MASK)
+				break;
+		}
+		if (!(val & GFX_IDLE_MASK)) {
+			dev_err(pdev->dev, "gfxoff not asserted after %dus\n",
+				PMC_MSG_DELAY_MIN_US * RESPONSE_REGISTER_LOOP_MAX);
+			return -EBUSY;
+		}
+	}
 	msg = amd_pmc_get_os_hint(pdev);
 	rc = amd_pmc_send_cmd(pdev, 1, NULL, msg, 0);
 	if (rc)
@@ -483,7 +504,7 @@ static int __maybe_unused amd_pmc_resume(struct device *dev)
 	amd_pmc_send_cmd(pdev, 0, NULL, SMU_MSG_LOG_DUMP_DATA, 0);
 
 	/* Dump the IdleMask to see the blockers */
-	amd_pmc_idlemask_read(pdev, dev, NULL);
+	amd_pmc_idlemask_read(pdev, dev, NULL, NULL);
 
 	/* Write data incremented by 1 to distinguish in stb_read */
 	if (enable_stb)
-- 
2.33.1