Skip to content

Commit c08d7b3

Browse files
authored
AMDGPU: Fix verifier error on tail call target in vgprs (#110984)
We allow tail calls of known uniform function pointers. This would produce a verifier error if the uniform value is in VGPRs. Insert readfirstlanes just in case this occurs, which will fold out later if it is unnecessary. GlobalISel should need a similar fix, but it currently does not attempt tail calls of indirect calls. Fixes #107447 Fixes subissue of #110930
1 parent 3692995 commit c08d7b3

File tree

4 files changed

+164
-27
lines changed

4 files changed

+164
-27
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3565,6 +3565,7 @@ bool SITargetLowering::isEligibleForTailCallOptimization(
35653565
if (IsVarArg)
35663566
return false;
35673567

3568+
// FIXME: We need to know all arguments passed in SGPR are uniform.
35683569
for (const Argument &Arg : CallerF.args()) {
35693570
if (Arg.hasByValAttr())
35703571
return false;
@@ -3875,15 +3876,37 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
38753876
InGlue = Chain.getValue(1);
38763877
}
38773878

3878-
std::vector<SDValue> Ops;
3879-
Ops.push_back(Chain);
3880-
Ops.push_back(Callee);
3879+
std::vector<SDValue> Ops({Chain});
3880+
38813881
// Add a redundant copy of the callee global which will not be legalized, as
38823882
// we need direct access to the callee later.
38833883
if (GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(Callee)) {
38843884
const GlobalValue *GV = GSD->getGlobal();
3885+
Ops.push_back(Callee);
38853886
Ops.push_back(DAG.getTargetGlobalAddress(GV, DL, MVT::i64));
38863887
} else {
3888+
if (IsTailCall) {
3889+
assert(!Callee->isDivergent() &&
3890+
"cannot tail call a divergent call target");
3891+
3892+
// isEligibleForTailCallOptimization considered whether the call target is
3893+
// divergent, but we may still end up with a uniform value in a VGPR.
3894+
// Insert a readfirstlane just in case.
3895+
SDValue ReadFirstLaneID =
3896+
DAG.getTargetConstant(Intrinsic::amdgcn_readfirstlane, DL, MVT::i32);
3897+
3898+
SmallVector<SDValue, 3> ReadfirstlaneArgs({ReadFirstLaneID, Callee});
3899+
if (CLI.ConvergenceControlToken) {
3900+
SDValue TokenGlue = DAG.getNode(ISD::CONVERGENCECTRL_GLUE, {},
3901+
MVT::Glue, CLI.ConvergenceControlToken);
3902+
ReadfirstlaneArgs.push_back(TokenGlue); // Wire up convergence token.
3903+
}
3904+
3905+
Callee = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Callee.getValueType(),
3906+
ReadfirstlaneArgs);
3907+
}
3908+
3909+
Ops.push_back(Callee);
38873910
Ops.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
38883911
}
38893912

llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll

Lines changed: 40 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -486,7 +486,14 @@ define amdgpu_cs_chain void @indirect(ptr inreg %callee, <3 x i32> inreg %sgpr,
486486
; DAGISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr2
487487
; DAGISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr1
488488
; DAGISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr0
489-
; DAGISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
489+
; DAGISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
490+
; DAGISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
491+
; DAGISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
492+
; DAGISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 killed [[COPY10]], implicit $exec
493+
; DAGISEL-GFX11-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
494+
; DAGISEL-GFX11-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
495+
; DAGISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 killed [[COPY12]], implicit $exec
496+
; DAGISEL-GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_1]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_]], %subreg.sub1
490497
; DAGISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
491498
; DAGISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY6]]
492499
; DAGISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY5]]
@@ -495,7 +502,7 @@ define amdgpu_cs_chain void @indirect(ptr inreg %callee, <3 x i32> inreg %sgpr,
495502
; DAGISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY2]]
496503
; DAGISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY1]]
497504
; DAGISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY]]
498-
; DAGISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 killed [[REG_SEQUENCE]], 0, 0, killed [[S_MOV_B32_]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
505+
; DAGISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 killed [[REG_SEQUENCE1]], 0, 0, killed [[S_MOV_B32_]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
499506
;
500507
; DAGISEL-GFX10-LABEL: name: indirect
501508
; DAGISEL-GFX10: bb.0 (%ir-block.0):
@@ -510,18 +517,25 @@ define amdgpu_cs_chain void @indirect(ptr inreg %callee, <3 x i32> inreg %sgpr,
510517
; DAGISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr2
511518
; DAGISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr1
512519
; DAGISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr0
513-
; DAGISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
514-
; DAGISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51
520+
; DAGISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
521+
; DAGISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
522+
; DAGISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
523+
; DAGISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 killed [[COPY10]], implicit $exec
524+
; DAGISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
525+
; DAGISEL-GFX10-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
526+
; DAGISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 killed [[COPY12]], implicit $exec
527+
; DAGISEL-GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_1]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_]], %subreg.sub1
528+
; DAGISEL-GFX10-NEXT: [[COPY13:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51
515529
; DAGISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
516-
; DAGISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY9]]
530+
; DAGISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY13]]
517531
; DAGISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY6]]
518532
; DAGISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY5]]
519533
; DAGISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY4]]
520534
; DAGISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]]
521535
; DAGISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY2]]
522536
; DAGISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY1]]
523537
; DAGISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY]]
524-
; DAGISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 killed [[REG_SEQUENCE]], 0, 0, killed [[S_MOV_B32_]], amdgpu_allvgprs, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
538+
; DAGISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 killed [[REG_SEQUENCE1]], 0, 0, killed [[S_MOV_B32_]], amdgpu_allvgprs, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
525539
call void(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr %callee, i32 -1, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 0)
526540
unreachable
527541
}
@@ -697,15 +711,22 @@ define amdgpu_cs_chain void @indirect_with_non_imm_exec(ptr inreg %callee, i32 i
697711
; DAGISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr2
698712
; DAGISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr1
699713
; DAGISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr0
700-
; DAGISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1
714+
; DAGISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1
715+
; DAGISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
716+
; DAGISEL-GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
717+
; DAGISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 killed [[COPY11]], implicit $exec
718+
; DAGISEL-GFX11-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
719+
; DAGISEL-GFX11-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
720+
; DAGISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec
721+
; DAGISEL-GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_1]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_]], %subreg.sub1
701722
; DAGISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY6]]
702723
; DAGISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY5]]
703724
; DAGISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY4]]
704725
; DAGISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]]
705726
; DAGISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY2]]
706727
; DAGISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY1]]
707728
; DAGISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY]]
708-
; DAGISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 killed [[REG_SEQUENCE]], 0, 0, [[COPY7]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
729+
; DAGISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 killed [[REG_SEQUENCE1]], 0, 0, [[COPY7]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
709730
;
710731
; DAGISEL-GFX10-LABEL: name: indirect_with_non_imm_exec
711732
; DAGISEL-GFX10: bb.0 (%ir-block.0):
@@ -721,17 +742,24 @@ define amdgpu_cs_chain void @indirect_with_non_imm_exec(ptr inreg %callee, i32 i
721742
; DAGISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr2
722743
; DAGISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr1
723744
; DAGISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr0
724-
; DAGISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1
725-
; DAGISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51
726-
; DAGISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]]
745+
; DAGISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1
746+
; DAGISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
747+
; DAGISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
748+
; DAGISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 killed [[COPY11]], implicit $exec
749+
; DAGISEL-GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
750+
; DAGISEL-GFX10-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
751+
; DAGISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec
752+
; DAGISEL-GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_1]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_]], %subreg.sub1
753+
; DAGISEL-GFX10-NEXT: [[COPY14:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51
754+
; DAGISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY14]]
727755
; DAGISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY6]]
728756
; DAGISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY5]]
729757
; DAGISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY4]]
730758
; DAGISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]]
731759
; DAGISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY2]]
732760
; DAGISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY1]]
733761
; DAGISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY]]
734-
; DAGISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 killed [[REG_SEQUENCE]], 0, 0, [[COPY7]], amdgpu_allvgprs, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
762+
; DAGISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 killed [[REG_SEQUENCE1]], 0, 0, [[COPY7]], amdgpu_allvgprs, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
735763
call void(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr %callee, i32 %exec, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 0)
736764
unreachable
737765
}

0 commit comments

Comments
 (0)