Skip to content

Commit c3d590f

Browse files
committed
Merge tag 'amd-drm-next-6.14-2025-01-10' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.14-2025-01-10: amdgpu: - Fix max surface handling in DC - clang fixes - DCN 3.5 fixes - DCN 4.0.1 fixes - DC CRC fixes - DML updates - DSC fixes - PSR fixes - DC add some divide by 0 checks - SMU13 updates - SR-IOV fixes - RAS fixes - Cleaner shader support for gfx10.3 dGPUs - fix drm buddy trim handling - SDMA engine reset updates _ Fix RB bitmap setup - Fix doorbell ttm cleanup - Add CEC notifier support - DPIA updates - MST fixes amdkfd: - Shader debugger fixes - Trap handler cleanup - Cleanup includes - Eviction fence wq fix Signed-off-by: Dave Airlie <[email protected]> From: Alex Deucher <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
2 parents 0dc8538 + 812a33a commit c3d590f

File tree

167 files changed

+4869
-2043
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

167 files changed

+4869
-2043
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1131,6 +1131,9 @@ uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev,
11311131
uint32_t low, high;
11321132
uint64_t queue_addr = 0;
11331133

1134+
if (!amdgpu_gpu_recovery)
1135+
return 0;
1136+
11341137
kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
11351138
amdgpu_gfx_rlc_enter_safe_mode(adev, inst);
11361139

@@ -1179,6 +1182,9 @@ uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev,
11791182
uint32_t low, high, pipe_reset_data = 0;
11801183
uint64_t queue_addr = 0;
11811184

1185+
if (!amdgpu_gpu_recovery)
1186+
return 0;
1187+
11821188
kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
11831189
amdgpu_gfx_rlc_enter_safe_mode(adev, inst);
11841190

drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,8 @@ static void amdgpu_gem_object_free(struct drm_gem_object *gobj)
8888
{
8989
struct amdgpu_bo *aobj = gem_to_amdgpu_bo(gobj);
9090

91-
if (aobj) {
92-
amdgpu_hmm_unregister(aobj);
93-
ttm_bo_put(&aobj->tbo);
94-
}
91+
amdgpu_hmm_unregister(aobj);
92+
ttm_bo_put(&aobj->tbo);
9593
}
9694

9795
int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,

drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3020,10 +3020,7 @@ static int psp_hw_init(struct amdgpu_ip_block *ip_block)
30203020
struct amdgpu_device *adev = ip_block->adev;
30213021

30223022
mutex_lock(&adev->firmware.mutex);
3023-
/*
3024-
* This sequence is just used on hw_init only once, no need on
3025-
* resume.
3026-
*/
3023+
30273024
ret = amdgpu_ucode_init_bo(adev);
30283025
if (ret)
30293026
goto failed;
@@ -3148,6 +3145,10 @@ static int psp_resume(struct amdgpu_ip_block *ip_block)
31483145

31493146
mutex_lock(&adev->firmware.mutex);
31503147

3148+
ret = amdgpu_ucode_init_bo(adev);
3149+
if (ret)
3150+
goto failed;
3151+
31513152
ret = psp_hw_start(psp);
31523153
if (ret)
31533154
goto failed;
@@ -3891,10 +3892,12 @@ static ssize_t psp_usbc_pd_fw_sysfs_read(struct device *dev,
38913892
{
38923893
struct drm_device *ddev = dev_get_drvdata(dev);
38933894
struct amdgpu_device *adev = drm_to_adev(ddev);
3895+
struct amdgpu_ip_block *ip_block;
38943896
uint32_t fw_ver;
38953897
int ret;
38963898

3897-
if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) {
3899+
ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP);
3900+
if (!ip_block || !ip_block->status.late_initialized) {
38983901
dev_info(adev->dev, "PSP block is not ready yet\n.");
38993902
return -EBUSY;
39003903
}
@@ -3923,8 +3926,10 @@ static ssize_t psp_usbc_pd_fw_sysfs_write(struct device *dev,
39233926
struct amdgpu_bo *fw_buf_bo = NULL;
39243927
uint64_t fw_pri_mc_addr;
39253928
void *fw_pri_cpu_addr;
3929+
struct amdgpu_ip_block *ip_block;
39263930

3927-
if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) {
3931+
ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP);
3932+
if (!ip_block || !ip_block->status.late_initialized) {
39283933
dev_err(adev->dev, "PSP block is not ready yet.");
39293934
return -EBUSY;
39303935
}

drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2832,8 +2832,10 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
28322832

28332833
mutex_lock(&con->recovery_lock);
28342834
data = con->eh_data;
2835-
if (!data)
2835+
if (!data) {
2836+
/* Returning 0 as the absence of eh_data is acceptable */
28362837
goto free;
2838+
}
28372839

28382840
for (i = 0; i < pages; i++) {
28392841
if (from_rom &&
@@ -2845,26 +2847,34 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
28452847
* one row
28462848
*/
28472849
if (amdgpu_umc_pages_in_a_row(adev, &err_data,
2848-
bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT))
2850+
bps[i].retired_page <<
2851+
AMDGPU_GPU_PAGE_SHIFT)) {
2852+
ret = -EINVAL;
28492853
goto free;
2850-
else
2854+
} else {
28512855
find_pages_per_pa = true;
2856+
}
28522857
} else {
28532858
/* unsupported cases */
2859+
ret = -EOPNOTSUPP;
28542860
goto free;
28552861
}
28562862
}
28572863
} else {
28582864
if (amdgpu_umc_pages_in_a_row(adev, &err_data,
2859-
bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT))
2865+
bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT)) {
2866+
ret = -EINVAL;
28602867
goto free;
2868+
}
28612869
}
28622870
} else {
28632871
if (from_rom && !find_pages_per_pa) {
28642872
if (bps[i].retired_page & UMC_CHANNEL_IDX_V2) {
28652873
/* bad page in any NPS mode in eeprom */
2866-
if (amdgpu_ras_mca2pa_by_idx(adev, &bps[i], &err_data))
2874+
if (amdgpu_ras_mca2pa_by_idx(adev, &bps[i], &err_data)) {
2875+
ret = -EINVAL;
28672876
goto free;
2877+
}
28682878
} else {
28692879
/* legacy bad page in eeprom, generated only in
28702880
* NPS1 mode
@@ -2881,6 +2891,7 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
28812891
/* non-nps1 mode, old RAS TA
28822892
* can't support it
28832893
*/
2894+
ret = -EOPNOTSUPP;
28842895
goto free;
28852896
}
28862897
}

drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -362,13 +362,13 @@ static int amdgpu_debugfs_sdma_sched_mask_set(void *data, u64 val)
362362
if (!adev)
363363
return -ENODEV;
364364

365-
mask = (1 << adev->sdma.num_instances) - 1;
365+
mask = BIT_ULL(adev->sdma.num_instances) - 1;
366366
if ((val & mask) == 0)
367367
return -EINVAL;
368368

369369
for (i = 0; i < adev->sdma.num_instances; ++i) {
370370
ring = &adev->sdma.instance[i].ring;
371-
if (val & (1 << i))
371+
if (val & BIT_ULL(i))
372372
ring->sched.ready = true;
373373
else
374374
ring->sched.ready = false;

drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2066,6 +2066,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
20662066
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS);
20672067
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS);
20682068
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA);
2069+
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_DOORBELL);
20692070
ttm_device_fini(&adev->mman.bdev);
20702071
adev->mman.initialized = false;
20712072
DRM_INFO("amdgpu: ttm finalized\n");

drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -567,7 +567,6 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
567567
else
568568
remaining_size -= size;
569569
}
570-
mutex_unlock(&mgr->lock);
571570

572571
if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) {
573572
struct drm_buddy_block *dcc_block;
@@ -584,6 +583,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
584583
(u64)vres->base.size,
585584
&vres->blocks);
586585
}
586+
mutex_unlock(&mgr->lock);
587587

588588
vres->base.start = 0;
589589
size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks),

drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
#include "clearstate_gfx10.h"
4646
#include "v10_structs.h"
4747
#include "gfx_v10_0.h"
48+
#include "gfx_v10_0_cleaner_shader.h"
4849
#include "nbio_v2_3.h"
4950

5051
/*
@@ -4738,6 +4739,23 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
47384739
break;
47394740
}
47404741
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
4742+
case IP_VERSION(10, 3, 0):
4743+
case IP_VERSION(10, 3, 2):
4744+
case IP_VERSION(10, 3, 4):
4745+
case IP_VERSION(10, 3, 5):
4746+
adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
4747+
adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex);
4748+
if (adev->gfx.me_fw_version >= 64 &&
4749+
adev->gfx.pfp_fw_version >= 100 &&
4750+
adev->gfx.mec_fw_version >= 122) {
4751+
adev->gfx.enable_cleaner_shader = true;
4752+
r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
4753+
if (r) {
4754+
adev->gfx.enable_cleaner_shader = false;
4755+
dev_err(adev->dev, "Failed to initialize cleaner shader\n");
4756+
}
4757+
}
4758+
break;
47414759
default:
47424760
adev->gfx.enable_cleaner_shader = false;
47434761
break;
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/* SPDX-License-Identifier: MIT */
2+
/*
3+
* Copyright 2025 Advanced Micro Devices, Inc.
4+
*
5+
* Permission is hereby granted, free of charge, to any person obtaining a
6+
* copy of this software and associated documentation files (the "Software"),
7+
* to deal in the Software without restriction, including without limitation
8+
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
9+
* and/or sell copies of the Software, and to permit persons to whom the
10+
* Software is furnished to do so, subject to the following conditions:
11+
*
12+
* The above copyright notice and this permission notice shall be included in
13+
* all copies or substantial portions of the Software.
14+
*
15+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18+
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19+
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20+
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21+
* OTHER DEALINGS IN THE SOFTWARE.
22+
*/
23+
24+
/* Define the cleaner shader gfx_10_3_0 */
25+
static const u32 gfx_10_3_0_cleaner_shader_hex[] = {
26+
0xb0804004, 0xbf8a0000,
27+
0xbe8203b8, 0xbefc0380,
28+
0x7e008480, 0x7e028480,
29+
0x7e048480, 0x7e068480,
30+
0x7e088480, 0x7e0a8480,
31+
0x7e0c8480, 0x7e0e8480,
32+
0xbefc0302, 0x80828802,
33+
0xbf84fff5, 0xbe8203ff,
34+
0x80000000, 0x87020002,
35+
0xbf840012, 0xbefe03c1,
36+
0xbeff03c1, 0xd7650001,
37+
0x0001007f, 0xd7660001,
38+
0x0002027e, 0x16020288,
39+
0xbe8203bf, 0xbefc03c1,
40+
0xd9382000, 0x00020201,
41+
0xd9386040, 0x00040401,
42+
0xd70f6a01, 0x000202ff,
43+
0x00000400, 0x80828102,
44+
0xbf84fff7, 0xbefc03ff,
45+
0x00000068, 0xbe803080,
46+
0xbe813080, 0xbe823080,
47+
0xbe833080, 0x80fc847c,
48+
0xbf84fffa, 0xbeea0480,
49+
0xbeec0480, 0xbeee0480,
50+
0xbef00480, 0xbef20480,
51+
0xbef40480, 0xbef60480,
52+
0xbef80480, 0xbefa0480,
53+
0xbf810000, 0xbf9f0000,
54+
0xbf9f0000, 0xbf9f0000,
55+
0xbf9f0000, 0xbf9f0000,
56+
};
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
/* SPDX-License-Identifier: MIT */
2+
/*
3+
* Copyright 2025 Advanced Micro Devices, Inc.
4+
*
5+
* Permission is hereby granted, free of charge, to any person obtaining a
6+
* copy of this software and associated documentation files (the "Software"),
7+
* to deal in the Software without restriction, including without limitation
8+
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
9+
* and/or sell copies of the Software, and to permit persons to whom the
10+
* Software is furnished to do so, subject to the following conditions:
11+
*
12+
* The above copyright notice and this permission notice shall be included in
13+
* all copies or substantial portions of the Software.
14+
*
15+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18+
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19+
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20+
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21+
* OTHER DEALINGS IN THE SOFTWARE.
22+
*/
23+
24+
// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 192 Dwords cleaner shader.
25+
//To turn this shader program on for complitaion change this to main and lower shader main to main_1
26+
27+
// GFX10.3 : Clear SGPRs, VGPRs and LDS
28+
// Launch 32 waves per CU (16 per SIMD) as a workgroup (threadgroup) to fill every wave slot
29+
// Waves are "wave32" and have 64 VGPRs each, which uses all 1024 VGPRs per SIMD
30+
// Waves are launched in "CU" mode, and the workgroup shares 64KB of LDS (half of the WGP's LDS)
31+
// It takes 2 workgroups to use all of LDS: one on each CU of the WGP
32+
// Each wave clears SGPRs 0 - 107
33+
// Each wave clears VGPRs 0 - 63
34+
// The first wave of the workgroup clears its 64KB of LDS
35+
// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup
36+
// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared.
37+
38+
39+
shader main
40+
asic(GFX10)
41+
type(CS)
42+
wave_size(32)
43+
// Note: original source code from SQ team
44+
45+
//
46+
// Create 32 waves in a threadgroup (CS waves)
47+
// Each allocates 64 VGPRs
48+
// The workgroup allocates all of LDS (64kbytes)
49+
//
50+
// Takes about 2500 clocks to run.
51+
// (theorhetical fastest = 1024clks vgpr + 640lds = 1660 clks)
52+
//
53+
S_BARRIER
54+
s_mov_b32 s2, 0x00000038 // Loop 64/8=8 times (loop unrolled for performance)
55+
s_mov_b32 m0, 0
56+
//
57+
// CLEAR VGPRs
58+
//
59+
label_0005:
60+
v_movreld_b32 v0, 0
61+
v_movreld_b32 v1, 0
62+
v_movreld_b32 v2, 0
63+
v_movreld_b32 v3, 0
64+
v_movreld_b32 v4, 0
65+
v_movreld_b32 v5, 0
66+
v_movreld_b32 v6, 0
67+
v_movreld_b32 v7, 0
68+
s_mov_b32 m0, s2
69+
s_sub_u32 s2, s2, 8
70+
s_cbranch_scc0 label_0005
71+
//
72+
s_mov_b32 s2, 0x80000000 // Bit31 is first_wave
73+
s_and_b32 s2, s2, s0 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
74+
s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup
75+
// CLEAR LDS
76+
//
77+
s_mov_b32 exec_lo, 0xffffffff
78+
s_mov_b32 exec_hi, 0xffffffff
79+
v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63)
80+
v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63)
81+
v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte)
82+
s_mov_b32 s2, 0x00000003f // 64 loop iterations
83+
s_mov_b32 m0, 0xffffffff
84+
// Clear all of LDS space
85+
// Each FirstWave of WorkGroup clears 64kbyte block
86+
87+
label_001F:
88+
ds_write2_b64 v1, v[2:3], v[2:3] offset1:32
89+
ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96
90+
v_add_co_u32 v1, vcc, 0x00000400, v1
91+
s_sub_u32 s2, s2, 1
92+
s_cbranch_scc0 label_001F
93+
94+
//
95+
// CLEAR SGPRs
96+
//
97+
label_0023:
98+
s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance)
99+
label_sgpr_loop:
100+
s_movreld_b32 s0, 0
101+
s_movreld_b32 s1, 0
102+
s_movreld_b32 s2, 0
103+
s_movreld_b32 s3, 0
104+
s_sub_u32 m0, m0, 4
105+
s_cbranch_scc0 label_sgpr_loop
106+
107+
//clear vcc
108+
s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR
109+
s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR
110+
s_mov_b64 vcc, 0 //clear vcc
111+
s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1
112+
s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3
113+
s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5
114+
s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7
115+
s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9
116+
s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11
117+
s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13
118+
s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15
119+
120+
s_endpgm
121+
122+
end
123+
124+

0 commit comments

Comments
 (0)