Skip to content

Commit 6cc8a46

Browse files
committed
Added amdgcn_update_dpp
1 parent 9f02e25 commit 6cc8a46

File tree

4 files changed

+31
-8
lines changed

4 files changed

+31
-8
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6561,6 +6561,7 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N,
65616561
case Intrinsic::amdgcn_set_inactive:
65626562
case Intrinsic::amdgcn_set_inactive_chain_arg:
65636563
case Intrinsic::amdgcn_mov_dpp8:
6564+
case Intrinsic::amdgcn_update_dpp:
65646565
Results.push_back(lowerLaneOp(*this, N, DAG));
65656566
return;
65666567
}

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8961,6 +8961,7 @@ define void @v_permlane16_i8(ptr addrspace(1) %out, i8 %src0, i32 %src1, i32 %sr
89618961
; GFX12-NEXT: s_wait_kmcnt 0x0
89628962
; GFX12-NEXT: v_readfirstlane_b32 s0, v3
89638963
; GFX12-NEXT: v_readfirstlane_b32 s1, v4
8964+
; GFX12-NEXT: s_wait_alu 0xf1ff
89648965
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
89658966
; GFX12-NEXT: v_permlane16_b32 v2, v2, s0, s1
89668967
; GFX12-NEXT: global_store_b8 v[0:1], v2, off
@@ -9001,6 +9002,7 @@ define void @v_permlane16_i1(ptr addrspace(1) %out, i1 %src0, i32 %src1, i32 %sr
90019002
; GFX12-NEXT: s_wait_kmcnt 0x0
90029003
; GFX12-NEXT: v_readfirstlane_b32 s0, v3
90039004
; GFX12-NEXT: v_readfirstlane_b32 s1, v4
9005+
; GFX12-NEXT: s_wait_alu 0xf1ff
90049006
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
90059007
; GFX12-NEXT: v_permlane16_b32 v2, v2, s0, s1
90069008
; GFX12-NEXT: v_and_b32_e32 v2, 1, v2
@@ -9539,6 +9541,7 @@ define void @v_permlanex16_i8(ptr addrspace(1) %out, i8 %src0, i32 %src1, i32 %s
95399541
; GFX12-NEXT: s_wait_kmcnt 0x0
95409542
; GFX12-NEXT: v_readfirstlane_b32 s0, v3
95419543
; GFX12-NEXT: v_readfirstlane_b32 s1, v4
9544+
; GFX12-NEXT: s_wait_alu 0xf1ff
95429545
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
95439546
; GFX12-NEXT: v_permlanex16_b32 v2, v2, s0, s1
95449547
; GFX12-NEXT: global_store_b8 v[0:1], v2, off
@@ -9579,6 +9582,7 @@ define void @v_permlanex16_i1(ptr addrspace(1) %out, i1 %src0, i32 %src1, i32 %s
95799582
; GFX12-NEXT: s_wait_kmcnt 0x0
95809583
; GFX12-NEXT: v_readfirstlane_b32 s0, v3
95819584
; GFX12-NEXT: v_readfirstlane_b32 s1, v4
9585+
; GFX12-NEXT: s_wait_alu 0xf1ff
95829586
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
95839587
; GFX12-NEXT: v_permlanex16_b32 v2, v2, s0, s1
95849588
; GFX12-NEXT: v_and_b32_e32 v2, 1, v2

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,24 @@ define amdgpu_kernel void @dpp_test_v2f16_imm_comb8(ptr addrspace(1) %out, <2 x
576576
ret void
577577
}
578578

579+
; GCN-LABEL: {{^}}dpp_i8:
580+
; GCN: v_mov_b32_dpp v2, v2 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}}
581+
; GCN: store_{{byte|b8}} v[0:1], v2
582+
define void @dpp_i8(ptr addrspace(1) %out, i8 %in) {
583+
%tmp0 = call i8 @llvm.amdgcn.update.dpp.i8(i8 %in, i8 %in, i32 1, i32 1, i32 1, i1 false) #0
584+
store i8 %tmp0, ptr addrspace(1) %out
585+
ret void
586+
}
587+
588+
; GCN-LABEL: {{^}}dpp_i1:
589+
; GCN: v_mov_b32_dpp v2, v2 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}}
590+
; GCN: store_{{byte|b8}} v[0:1], v2
591+
define void @dpp_i1(ptr addrspace(1) %out, i1 %in) {
592+
%tmp0 = call i1 @llvm.amdgcn.update.dpp.i8(i1 %in, i1 %in, i32 1, i32 1, i32 1, i1 false) #0
593+
store i1 %tmp0, ptr addrspace(1) %out
594+
ret void
595+
}
596+
579597
declare i32 @llvm.amdgcn.workitem.id.x()
580598
declare void @llvm.amdgcn.s.barrier()
581599
declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1) #0

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2705,11 +2705,11 @@ define void @test_writelane_i8(ptr addrspace(1) %out, i8 %src, i32 %src1) {
27052705
; GFX802-SDAG: ; %bb.0:
27062706
; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27072707
; GFX802-SDAG-NEXT: flat_load_ubyte v4, v[0:1]
2708-
; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v3
2709-
; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v2
2708+
; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v3
2709+
; GFX802-SDAG-NEXT: v_readfirstlane_b32 s5, v2
2710+
; GFX802-SDAG-NEXT: s_mov_b32 m0, s4
27102711
; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
2711-
; GFX802-SDAG-NEXT: s_nop 1
2712-
; GFX802-SDAG-NEXT: v_writelane_b32 v4, s4, m0
2712+
; GFX802-SDAG-NEXT: v_writelane_b32 v4, s5, m0
27132713
; GFX802-SDAG-NEXT: flat_store_byte v[0:1], v4
27142714
; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
27152715
; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -2783,11 +2783,11 @@ define void @test_writelane_i1(ptr addrspace(1) %out, i1 %src, i32 %src1) {
27832783
; GFX802-SDAG: ; %bb.0:
27842784
; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27852785
; GFX802-SDAG-NEXT: flat_load_ubyte v4, v[0:1]
2786-
; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v3
2787-
; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v2
2786+
; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v3
2787+
; GFX802-SDAG-NEXT: v_readfirstlane_b32 s5, v2
2788+
; GFX802-SDAG-NEXT: s_mov_b32 m0, s4
27882789
; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
2789-
; GFX802-SDAG-NEXT: s_nop 1
2790-
; GFX802-SDAG-NEXT: v_writelane_b32 v4, s4, m0
2790+
; GFX802-SDAG-NEXT: v_writelane_b32 v4, s5, m0
27912791
; GFX802-SDAG-NEXT: v_and_b32_e32 v2, 1, v4
27922792
; GFX802-SDAG-NEXT: flat_store_byte v[0:1], v2
27932793
; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)

0 commit comments

Comments
 (0)