Skip to content

Commit 1eb49bb

Browse files
author
Jessica Paquette
committed
[GlobalISel][CallLowering] Use hasRetAttr for return flags on CallBases
Given something like this: ``` declare signext i16 @signext_callee() define i32 @caller() { %res = call i16 @signext_callee() ... } ``` CallLowering would miss that signext_callee's return value is sign extended, because it isn't on the call. Use hasRetAttr on the CallBase to allow us to catch this. (This now inserts G_ASSERT_SEXT/G_ASSERT_ZEXT like in the original review.) Differential Revision: https://reviews.llvm.org/D86228
1 parent ccbb401 commit 1eb49bb

File tree

4 files changed

+107
-11
lines changed

4 files changed

+107
-11
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,9 @@ class CallLowering {
352352
ISD::ArgFlagsTy getAttributesForArgIdx(const CallBase &Call,
353353
unsigned ArgIdx) const;
354354

355+
/// \returns Flags corresponding to the attributes on the return from \p Call.
356+
ISD::ArgFlagsTy getAttributesForReturn(const CallBase &Call) const;
357+
355358
/// Adds flags to \p Flags based off of the attributes in \p Attrs.
356359
/// \p OpIdx is the index in \p Attrs to add flags from.
357360
void addArgFlagsFromAttributes(ISD::ArgFlagsTy &Flags,

llvm/lib/CodeGen/GlobalISel/CallLowering.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,15 @@ ISD::ArgFlagsTy CallLowering::getAttributesForArgIdx(const CallBase &Call,
7070
return Flags;
7171
}
7272

73+
ISD::ArgFlagsTy
74+
CallLowering::getAttributesForReturn(const CallBase &Call) const {
75+
ISD::ArgFlagsTy Flags;
76+
addFlagsUsingAttrFn(Flags, [&Call](Attribute::AttrKind Attr) {
77+
return Call.hasRetAttr(Attr);
78+
});
79+
return Flags;
80+
}
81+
7382
void CallLowering::addArgFlagsFromAttributes(ISD::ArgFlagsTy &Flags,
7483
const AttributeList &Attrs,
7584
unsigned OpIdx) const {
@@ -141,7 +150,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
141150
Register ReturnHintAlignReg;
142151
Align ReturnHintAlign;
143152

144-
Info.OrigRet = ArgInfo{ResRegs, RetTy, 0, ISD::ArgFlagsTy{}};
153+
Info.OrigRet = ArgInfo{ResRegs, RetTy, 0, getAttributesForReturn(CB)};
145154

146155
if (!Info.OrigRet.Ty->isVoidTy()) {
147156
setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CB);

llvm/test/CodeGen/AArch64/GlobalISel/call-translator.ll

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,3 +361,83 @@ define void @call_returns_array_size0_struct() {
361361
%call = call [1 x %size0type] @func.returns.array.size0.struct()
362362
ret void
363363
}
364+
365+
; Test extends on return values.
366+
367+
; This should notice that the return value from has_zext_return is zero
368+
; extended.
369+
declare zeroext i16 @has_zext_return()
370+
define i32 @test_zext_return_from_callee() {
371+
; CHECK-LABEL: name: test_zext_return_from_callee
372+
; CHECK: bb.1 (%ir-block.0):
373+
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
374+
; CHECK-NEXT: BL @has_zext_return, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $w0
375+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
376+
; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 16
377+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32)
378+
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
379+
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16)
380+
; CHECK-NEXT: $w0 = COPY [[ZEXT]](s32)
381+
; CHECK-NEXT: RET_ReallyLR implicit $w0
382+
%val = call i16 @has_zext_return()
383+
%ext = zext i16 %val to i32
384+
ret i32 %ext
385+
}
386+
387+
; Same as above, but with zeroext explicitly on the call. Should produce the
388+
; same codegen.
389+
define i32 @test_zext_return_from_callee2() {
390+
; CHECK-LABEL: name: test_zext_return_from_callee2
391+
; CHECK: bb.1 (%ir-block.0):
392+
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
393+
; CHECK-NEXT: BL @has_zext_return, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $w0
394+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
395+
; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 16
396+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32)
397+
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
398+
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16)
399+
; CHECK-NEXT: $w0 = COPY [[ZEXT]](s32)
400+
; CHECK-NEXT: RET_ReallyLR implicit $w0
401+
%val = call zeroext i16 @has_zext_return()
402+
%ext = zext i16 %val to i32
403+
ret i32 %ext
404+
}
405+
406+
; This should notice that the return value from has_sext_return is sign
407+
; extended.
408+
declare signext i16 @has_sext_return()
409+
define i32 @test_sext_return_from_callee() {
410+
; CHECK-LABEL: name: test_sext_return_from_callee
411+
; CHECK: bb.1 (%ir-block.0):
412+
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
413+
; CHECK-NEXT: BL @has_sext_return, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $w0
414+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
415+
; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 16
416+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32)
417+
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
418+
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16)
419+
; CHECK-NEXT: $w0 = COPY [[SEXT]](s32)
420+
; CHECK-NEXT: RET_ReallyLR implicit $w0
421+
%val = call i16 @has_sext_return()
422+
%ext = sext i16 %val to i32
423+
ret i32 %ext
424+
}
425+
426+
; Same as above, but with signext explicitly on the call. Should produce the
427+
; same codegen.
428+
define i32 @test_sext_return_from_callee2() {
429+
; CHECK-LABEL: name: test_sext_return_from_callee2
430+
; CHECK: bb.1 (%ir-block.0):
431+
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
432+
; CHECK-NEXT: BL @has_sext_return, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $w0
433+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
434+
; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 16
435+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32)
436+
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
437+
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16)
438+
; CHECK-NEXT: $w0 = COPY [[SEXT]](s32)
439+
; CHECK-NEXT: RET_ReallyLR implicit $w0
440+
%val = call signext i16 @has_sext_return()
441+
%ext = sext i16 %val to i32
442+
ret i32 %ext
443+
}

llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,8 @@ define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() #0 {
280280
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
281281
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i1_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
282282
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
283-
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY21]](s32)
283+
; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY21]], 1
284+
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s32)
284285
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
285286
; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1)
286287
; GCN-NEXT: G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
@@ -341,7 +342,8 @@ define amdgpu_kernel void @test_call_external_i1_signext_func_void() #0 {
341342
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
342343
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i1_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
343344
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
344-
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY21]](s32)
345+
; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY21]], 1
346+
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_SEXT]](s32)
345347
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
346348
; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1)
347349
; GCN-NEXT: G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
@@ -482,10 +484,10 @@ define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() #0 {
482484
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
483485
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i8_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
484486
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
485-
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
486-
; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
487+
; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY21]], 8
488+
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT]](s32)
487489
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
488-
; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC1]](s8)
490+
; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8)
489491
; GCN-NEXT: G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
490492
; GCN-NEXT: S_ENDPGM 0
491493
%val = call i8 @external_i8_zeroext_func_void()
@@ -544,10 +546,10 @@ define amdgpu_kernel void @test_call_external_i8_signext_func_void() #0 {
544546
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
545547
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i8_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
546548
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
547-
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
548-
; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
549+
; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY21]], 8
550+
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32)
549551
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
550-
; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s8)
552+
; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8)
551553
; GCN-NEXT: G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
552554
; GCN-NEXT: S_ENDPGM 0
553555
%val = call i8 @external_i8_signext_func_void()
@@ -665,7 +667,8 @@ define amdgpu_kernel void @test_call_external_i16_zeroext_func_void() #0 {
665667
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
666668
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i16_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
667669
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
668-
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
670+
; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY21]], 16
671+
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32)
669672
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
670673
; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16)
671674
; GCN-NEXT: G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
@@ -726,7 +729,8 @@ define amdgpu_kernel void @test_call_external_i16_signext_func_void() #0 {
726729
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
727730
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i16_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
728731
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
729-
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
732+
; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY21]], 16
733+
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32)
730734
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
731735
; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16)
732736
; GCN-NEXT: G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)

0 commit comments

Comments
 (0)