@@ -1312,8 +1312,8 @@ main_body:
1312
1312
ret void
1313
1313
}
1314
1314
1315
- define amdgpu_ps void @raw_buffer_load_x1_offset_swizzled_not_merged (<4 x i32 > inreg %rsrc ) {
1316
- ; PREGFX10-LABEL: raw_buffer_load_x1_offset_swizzled_not_merged :
1315
+ define amdgpu_ps void @raw_buffer_load_x1_offset_swizzled_not_merged_pregfx12 (<4 x i32 > inreg %rsrc ) {
1316
+ ; PREGFX10-LABEL: raw_buffer_load_x1_offset_swizzled_not_merged_pregfx12 :
1317
1317
; PREGFX10: ; %bb.0: ; %main_body
1318
1318
; PREGFX10-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4
1319
1319
; PREGFX10-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:8
@@ -1327,7 +1327,7 @@ define amdgpu_ps void @raw_buffer_load_x1_offset_swizzled_not_merged(<4 x i32> i
1327
1327
; PREGFX10-NEXT: exp mrt0 v4, v5, v0, v0 done vm
1328
1328
; PREGFX10-NEXT: s_endpgm
1329
1329
;
1330
- ; GFX10-LABEL: raw_buffer_load_x1_offset_swizzled_not_merged :
1330
+ ; GFX10-LABEL: raw_buffer_load_x1_offset_swizzled_not_merged_pregfx12 :
1331
1331
; GFX10: ; %bb.0: ; %main_body
1332
1332
; GFX10-NEXT: s_clause 0x5
1333
1333
; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4
@@ -1342,7 +1342,7 @@ define amdgpu_ps void @raw_buffer_load_x1_offset_swizzled_not_merged(<4 x i32> i
1342
1342
; GFX10-NEXT: exp mrt0 v4, v5, v0, v0 done vm
1343
1343
; GFX10-NEXT: s_endpgm
1344
1344
;
1345
- ; GFX11-LABEL: raw_buffer_load_x1_offset_swizzled_not_merged :
1345
+ ; GFX11-LABEL: raw_buffer_load_x1_offset_swizzled_not_merged_pregfx12 :
1346
1346
; GFX11: ; %bb.0: ; %main_body
1347
1347
; GFX11-NEXT: s_clause 0x5
1348
1348
; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0 offset:4
@@ -1357,7 +1357,7 @@ define amdgpu_ps void @raw_buffer_load_x1_offset_swizzled_not_merged(<4 x i32> i
1357
1357
; GFX11-NEXT: exp mrt0 v4, v5, v0, v0 done
1358
1358
; GFX11-NEXT: s_endpgm
1359
1359
;
1360
- ; GFX12-LABEL: raw_buffer_load_x1_offset_swizzled_not_merged :
1360
+ ; GFX12-LABEL: raw_buffer_load_x1_offset_swizzled_not_merged_pregfx12 :
1361
1361
; GFX12: ; %bb.0: ; %main_body
1362
1362
; GFX12-NEXT: s_clause 0x1
1363
1363
; GFX12-NEXT: buffer_load_b128 v[0:3], off, s[0:3], null offset:4 scope:SCOPE_SE
@@ -1379,6 +1379,65 @@ main_body:
1379
1379
ret void
1380
1380
}
1381
1381
1382
+ define amdgpu_ps void @raw_buffer_load_x1_offset_swizzled_not_merged (<4 x i32 > inreg %rsrc ) {
1383
+ ; PREGFX10-LABEL: raw_buffer_load_x1_offset_swizzled_not_merged:
1384
+ ; PREGFX10: ; %bb.0: ; %main_body
1385
+ ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4
1386
+ ; PREGFX10-NEXT: buffer_load_dwordx2 v[4:5], off, s[0:3], 0 offset:28
1387
+ ; PREGFX10-NEXT: s_waitcnt vmcnt(1)
1388
+ ; PREGFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm
1389
+ ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1390
+ ; PREGFX10-NEXT: exp mrt0 v4, v5, v0, v0 done vm
1391
+ ; PREGFX10-NEXT: s_endpgm
1392
+ ;
1393
+ ; GFX10-LABEL: raw_buffer_load_x1_offset_swizzled_not_merged:
1394
+ ; GFX10: ; %bb.0: ; %main_body
1395
+ ; GFX10-NEXT: s_clause 0x1
1396
+ ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4
1397
+ ; GFX10-NEXT: buffer_load_dwordx2 v[4:5], off, s[0:3], 0 offset:28
1398
+ ; GFX10-NEXT: s_waitcnt vmcnt(1)
1399
+ ; GFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm
1400
+ ; GFX10-NEXT: s_waitcnt vmcnt(0)
1401
+ ; GFX10-NEXT: exp mrt0 v4, v5, v0, v0 done vm
1402
+ ; GFX10-NEXT: s_endpgm
1403
+ ;
1404
+ ; GFX11-LABEL: raw_buffer_load_x1_offset_swizzled_not_merged:
1405
+ ; GFX11: ; %bb.0: ; %main_body
1406
+ ; GFX11-NEXT: s_clause 0x1
1407
+ ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 offset:4
1408
+ ; GFX11-NEXT: buffer_load_b64 v[4:5], off, s[0:3], 0 offset:28
1409
+ ; GFX11-NEXT: s_waitcnt vmcnt(1)
1410
+ ; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done
1411
+ ; GFX11-NEXT: s_waitcnt vmcnt(0)
1412
+ ; GFX11-NEXT: exp mrt0 v4, v5, v0, v0 done
1413
+ ; GFX11-NEXT: s_endpgm
1414
+ ;
1415
+ ; GFX12-LABEL: raw_buffer_load_x1_offset_swizzled_not_merged:
1416
+ ; GFX12: ; %bb.0: ; %main_body
1417
+ ; GFX12-NEXT: s_clause 0x5
1418
+ ; GFX12-NEXT: buffer_load_b32 v0, off, s[0:3], null offset:4
1419
+ ; GFX12-NEXT: buffer_load_b32 v1, off, s[0:3], null offset:8
1420
+ ; GFX12-NEXT: buffer_load_b32 v2, off, s[0:3], null offset:12
1421
+ ; GFX12-NEXT: buffer_load_b32 v3, off, s[0:3], null offset:16
1422
+ ; GFX12-NEXT: buffer_load_b32 v4, off, s[0:3], null offset:28
1423
+ ; GFX12-NEXT: buffer_load_b32 v5, off, s[0:3], null offset:32
1424
+ ; GFX12-NEXT: s_wait_loadcnt 0x2
1425
+ ; GFX12-NEXT: export mrt0 v0, v1, v2, v3 done
1426
+ ; GFX12-NEXT: s_wait_loadcnt 0x0
1427
+ ; GFX12-NEXT: export mrt0 v4, v5, v0, v0 done
1428
+ ; GFX12-NEXT: s_endpgm
1429
+ main_body:
1430
+ %r1 = call float @llvm.amdgcn.raw.buffer.load.f32 (<4 x i32 > %rsrc , i32 4 , i32 0 , i32 64 )
1431
+ %r2 = call float @llvm.amdgcn.raw.buffer.load.f32 (<4 x i32 > %rsrc , i32 8 , i32 0 , i32 64 )
1432
+ %r3 = call float @llvm.amdgcn.raw.buffer.load.f32 (<4 x i32 > %rsrc , i32 12 , i32 0 , i32 64 )
1433
+ %r4 = call float @llvm.amdgcn.raw.buffer.load.f32 (<4 x i32 > %rsrc , i32 16 , i32 0 , i32 64 )
1434
+ %r5 = call float @llvm.amdgcn.raw.buffer.load.f32 (<4 x i32 > %rsrc , i32 28 , i32 0 , i32 64 )
1435
+ %r6 = call float @llvm.amdgcn.raw.buffer.load.f32 (<4 x i32 > %rsrc , i32 32 , i32 0 , i32 64 )
1436
+ call void @llvm.amdgcn.exp.f32 (i32 0 , i32 15 , float %r1 , float %r2 , float %r3 , float %r4 , i1 true , i1 true )
1437
+ call void @llvm.amdgcn.exp.f32 (i32 0 , i32 15 , float %r5 , float %r6 , float undef , float undef , i1 true , i1 true )
1438
+ ret void
1439
+ }
1440
+
1382
1441
declare float @llvm.amdgcn.raw.buffer.load.f32 (<4 x i32 >, i32 , i32 , i32 ) #0
1383
1442
declare <2 x float > @llvm.amdgcn.raw.buffer.load.v2f32 (<4 x i32 >, i32 , i32 , i32 ) #0
1384
1443
declare <4 x float > @llvm.amdgcn.raw.buffer.load.v4f32 (<4 x i32 >, i32 , i32 , i32 ) #0
0 commit comments