Skip to content

Commit 79726ef

Browse files
authored
[VP] Correct lowering of predicated fma and faddmul to avoid strictfp. (#85272)
Correct missing cases in a switch that result in @llvm.vp.fma.v4f32 getting lowered to a constrained fma intrinsic. Vector predicated lowering to contrained intrinsics is not supported currently, and there's no consensus on the path forward. We certainly shouldn't be introducing constrained intrinsics into a function that isn't strictfp. Problem found with D146845.
1 parent 915c84b commit 79726ef

File tree

4 files changed

+203
-11
lines changed

4 files changed

+203
-11
lines changed

llvm/include/llvm/IR/Intrinsics.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,10 @@ namespace Intrinsic {
105105
/// Map a MS builtin name to an intrinsic ID.
106106
ID getIntrinsicForMSBuiltin(const char *Prefix, StringRef BuiltinName);
107107

108+
/// Returns true if the intrinsic ID is for one of the "Constrained
109+
/// Floating-Point Intrinsics".
110+
bool isConstrainedFPIntrinsic(ID QID);
111+
108112
/// This is a type descriptor which explains the type requirements of an
109113
/// intrinsic. This is returned by getIntrinsicInfoTableEntries.
110114
struct IITDescriptor {

llvm/lib/CodeGen/ExpandVectorPredication.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -340,15 +340,21 @@ Value *CachingVPExpander::expandPredicationToFPCall(
340340
replaceOperation(*NewOp, VPI);
341341
return NewOp;
342342
}
343+
case Intrinsic::fma:
344+
case Intrinsic::fmuladd:
343345
case Intrinsic::experimental_constrained_fma:
344346
case Intrinsic::experimental_constrained_fmuladd: {
345347
Value *Op0 = VPI.getOperand(0);
346348
Value *Op1 = VPI.getOperand(1);
347349
Value *Op2 = VPI.getOperand(2);
348350
Function *Fn = Intrinsic::getDeclaration(
349351
VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()});
350-
Value *NewOp =
351-
Builder.CreateConstrainedFPCall(Fn, {Op0, Op1, Op2}, VPI.getName());
352+
Value *NewOp;
353+
if (Intrinsic::isConstrainedFPIntrinsic(UnpredicatedIntrinsicID))
354+
NewOp =
355+
Builder.CreateConstrainedFPCall(Fn, {Op0, Op1, Op2}, VPI.getName());
356+
else
357+
NewOp = Builder.CreateCall(Fn, {Op0, Op1, Op2}, VPI.getName());
352358
replaceOperation(*NewOp, VPI);
353359
return NewOp;
354360
}
@@ -731,6 +737,8 @@ Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) {
731737
case Intrinsic::vp_minnum:
732738
case Intrinsic::vp_maximum:
733739
case Intrinsic::vp_minimum:
740+
case Intrinsic::vp_fma:
741+
case Intrinsic::vp_fmuladd:
734742
return expandPredicationToFPCall(Builder, VPI,
735743
VPI.getFunctionalIntrinsicID().value());
736744
case Intrinsic::vp_load:

llvm/lib/IR/Function.cpp

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -499,15 +499,7 @@ static MutableArrayRef<Argument> makeArgArray(Argument *Args, size_t Count) {
499499
}
500500

501501
bool Function::isConstrainedFPIntrinsic() const {
502-
switch (getIntrinsicID()) {
503-
#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
504-
case Intrinsic::INTRINSIC:
505-
#include "llvm/IR/ConstrainedOps.def"
506-
return true;
507-
#undef INSTRUCTION
508-
default:
509-
return false;
510-
}
502+
return Intrinsic::isConstrainedFPIntrinsic(getIntrinsicID());
511503
}
512504

513505
void Function::clearArguments() {
@@ -1486,6 +1478,18 @@ Function *Intrinsic::getDeclaration(Module *M, ID id, ArrayRef<Type*> Tys) {
14861478
#include "llvm/IR/IntrinsicImpl.inc"
14871479
#undef GET_LLVM_INTRINSIC_FOR_MS_BUILTIN
14881480

1481+
bool Intrinsic::isConstrainedFPIntrinsic(ID QID) {
1482+
switch (QID) {
1483+
#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
1484+
case Intrinsic::INTRINSIC:
1485+
#include "llvm/IR/ConstrainedOps.def"
1486+
return true;
1487+
#undef INSTRUCTION
1488+
default:
1489+
return false;
1490+
}
1491+
}
1492+
14891493
using DeferredIntrinsicMatchPair =
14901494
std::pair<Type *, ArrayRef<Intrinsic::IITDescriptor>>;
14911495

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -expandvp -S < %s | FileCheck %s
3+
4+
define void @vp_fadd_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp) nounwind {
5+
; CHECK-LABEL: define void @vp_fadd_v4f32(
6+
; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], ptr [[OUT:%.*]], i32 [[VP:%.*]]) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: [[RES1:%.*]] = fadd <4 x float> [[A0]], [[A1]]
8+
; CHECK-NEXT: store <4 x float> [[RES1]], ptr [[OUT]], align 16
9+
; CHECK-NEXT: ret void
10+
;
11+
%res = call <4 x float> @llvm.vp.fadd.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp)
12+
store <4 x float> %res, ptr %out
13+
ret void
14+
}
15+
declare <4 x float> @llvm.vp.fadd.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32)
16+
17+
define void @vp_fsub_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp) nounwind {
18+
; CHECK-LABEL: define void @vp_fsub_v4f32(
19+
; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], ptr [[OUT:%.*]], i32 [[VP:%.*]]) #[[ATTR0]] {
20+
; CHECK-NEXT: [[RES1:%.*]] = fsub <4 x float> [[A0]], [[A1]]
21+
; CHECK-NEXT: store <4 x float> [[RES1]], ptr [[OUT]], align 16
22+
; CHECK-NEXT: ret void
23+
;
24+
%res = call <4 x float> @llvm.vp.fsub.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp)
25+
store <4 x float> %res, ptr %out
26+
ret void
27+
}
28+
declare <4 x float> @llvm.vp.fsub.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32)
29+
30+
define void @vp_fmul_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp) nounwind {
31+
; CHECK-LABEL: define void @vp_fmul_v4f32(
32+
; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], ptr [[OUT:%.*]], i32 [[VP:%.*]]) #[[ATTR0]] {
33+
; CHECK-NEXT: [[RES1:%.*]] = fmul <4 x float> [[A0]], [[A1]]
34+
; CHECK-NEXT: store <4 x float> [[RES1]], ptr [[OUT]], align 16
35+
; CHECK-NEXT: ret void
36+
;
37+
%res = call <4 x float> @llvm.vp.fmul.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp)
38+
store <4 x float> %res, ptr %out
39+
ret void
40+
}
41+
declare <4 x float> @llvm.vp.fmul.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32)
42+
43+
define void @vp_fdiv_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp) nounwind {
44+
; CHECK-LABEL: define void @vp_fdiv_v4f32(
45+
; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], ptr [[OUT:%.*]], i32 [[VP:%.*]]) #[[ATTR0]] {
46+
; CHECK-NEXT: [[RES1:%.*]] = fdiv <4 x float> [[A0]], [[A1]]
47+
; CHECK-NEXT: store <4 x float> [[RES1]], ptr [[OUT]], align 16
48+
; CHECK-NEXT: ret void
49+
;
50+
%res = call <4 x float> @llvm.vp.fdiv.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp)
51+
store <4 x float> %res, ptr %out
52+
ret void
53+
}
54+
declare <4 x float> @llvm.vp.fdiv.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32)
55+
56+
define void @vp_frem_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp) nounwind {
57+
; CHECK-LABEL: define void @vp_frem_v4f32(
58+
; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], ptr [[OUT:%.*]], i32 [[VP:%.*]]) #[[ATTR0]] {
59+
; CHECK-NEXT: [[RES1:%.*]] = frem <4 x float> [[A0]], [[A1]]
60+
; CHECK-NEXT: store <4 x float> [[RES1]], ptr [[OUT]], align 16
61+
; CHECK-NEXT: ret void
62+
;
63+
%res = call <4 x float> @llvm.vp.frem.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp)
64+
store <4 x float> %res, ptr %out
65+
ret void
66+
}
67+
declare <4 x float> @llvm.vp.frem.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32)
68+
69+
define void @vp_fabs_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp) nounwind {
70+
; CHECK-LABEL: define void @vp_fabs_v4f32(
71+
; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], ptr [[OUT:%.*]], i32 [[VP:%.*]]) #[[ATTR0]] {
72+
; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A0]])
73+
; CHECK-NEXT: store <4 x float> [[RES1]], ptr [[OUT]], align 16
74+
; CHECK-NEXT: ret void
75+
;
76+
%res = call <4 x float> @llvm.vp.fabs.v4f32(<4 x float> %a0, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp)
77+
store <4 x float> %res, ptr %out
78+
ret void
79+
}
80+
declare <4 x float> @llvm.vp.fabs.v4f32(<4 x float>, <4 x i1>, i32)
81+
82+
define void @vp_sqrt_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp) nounwind {
83+
; CHECK-LABEL: define void @vp_sqrt_v4f32(
84+
; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], ptr [[OUT:%.*]], i32 [[VP:%.*]]) #[[ATTR0]] {
85+
; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[A0]])
86+
; CHECK-NEXT: store <4 x float> [[RES1]], ptr [[OUT]], align 16
87+
; CHECK-NEXT: ret void
88+
;
89+
%res = call <4 x float> @llvm.vp.sqrt.v4f32(<4 x float> %a0, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp)
90+
store <4 x float> %res, ptr %out
91+
ret void
92+
}
93+
declare <4 x float> @llvm.vp.sqrt.v4f32(<4 x float>, <4 x i1>, i32)
94+
95+
define void @vp_fneg_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp) nounwind {
96+
; CHECK-LABEL: define void @vp_fneg_v4f32(
97+
; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], ptr [[OUT:%.*]], i32 [[VP:%.*]]) #[[ATTR0]] {
98+
; CHECK-NEXT: [[RES1:%.*]] = fneg <4 x float> [[A0]]
99+
; CHECK-NEXT: store <4 x float> [[RES1]], ptr [[OUT]], align 16
100+
; CHECK-NEXT: ret void
101+
;
102+
%res = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a0, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp)
103+
store <4 x float> %res, ptr %out
104+
ret void
105+
}
106+
declare <4 x float> @llvm.vp.fneg.v4f32(<4 x float>, <4 x i1>, i32)
107+
108+
define void @vp_fma_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i4 %a5) nounwind {
109+
; CHECK-LABEL: define void @vp_fma_v4f32(
110+
; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], ptr [[OUT:%.*]], i4 [[A5:%.*]]) #[[ATTR0]] {
111+
; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[A0]], <4 x float> [[A1]], <4 x float> [[A1]])
112+
; CHECK-NEXT: store <4 x float> [[RES1]], ptr [[OUT]], align 16
113+
; CHECK-NEXT: ret void
114+
;
115+
%res = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a1, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 4)
116+
store <4 x float> %res, ptr %out
117+
ret void
118+
}
119+
declare <4 x float> @llvm.vp.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, <4 x i1>, i32)
120+
121+
define void @vp_fmuladd_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i4 %a5) nounwind {
122+
; CHECK-LABEL: define void @vp_fmuladd_v4f32(
123+
; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], ptr [[OUT:%.*]], i4 [[A5:%.*]]) #[[ATTR0]] {
124+
; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[A0]], <4 x float> [[A1]], <4 x float> [[A1]])
125+
; CHECK-NEXT: store <4 x float> [[RES1]], ptr [[OUT]], align 16
126+
; CHECK-NEXT: ret void
127+
;
128+
%res = call <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a1, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 4)
129+
store <4 x float> %res, ptr %out
130+
ret void
131+
}
132+
declare <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>, <4 x i1>, i32)
133+
134+
declare <4 x float> @llvm.vp.maxnum.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32)
135+
define <4 x float> @vfmax_vv_v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 zeroext %evl) {
136+
; CHECK-LABEL: define <4 x float> @vfmax_vv_v4f32(
137+
; CHECK-SAME: <4 x float> [[VA:%.*]], <4 x float> [[VB:%.*]], <4 x i1> [[M:%.*]], i32 zeroext [[EVL:%.*]]) {
138+
; CHECK-NEXT: [[V1:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VA]], <4 x float> [[VB]])
139+
; CHECK-NEXT: ret <4 x float> [[V1]]
140+
;
141+
%v = call <4 x float> @llvm.vp.maxnum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 %evl)
142+
ret <4 x float> %v
143+
}
144+
145+
declare <8 x float> @llvm.vp.maxnum.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32)
146+
define <8 x float> @vfmax_vv_v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 zeroext %evl) {
147+
; CHECK-LABEL: define <8 x float> @vfmax_vv_v8f32(
148+
; CHECK-SAME: <8 x float> [[VA:%.*]], <8 x float> [[VB:%.*]], <8 x i1> [[M:%.*]], i32 zeroext [[EVL:%.*]]) {
149+
; CHECK-NEXT: [[V1:%.*]] = call <8 x float> @llvm.maxnum.v8f32(<8 x float> [[VA]], <8 x float> [[VB]])
150+
; CHECK-NEXT: ret <8 x float> [[V1]]
151+
;
152+
%v = call <8 x float> @llvm.vp.maxnum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 %evl)
153+
ret <8 x float> %v
154+
}
155+
156+
declare <4 x float> @llvm.vp.minnum.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32)
157+
define <4 x float> @vfmin_vv_v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 zeroext %evl) {
158+
; CHECK-LABEL: define <4 x float> @vfmin_vv_v4f32(
159+
; CHECK-SAME: <4 x float> [[VA:%.*]], <4 x float> [[VB:%.*]], <4 x i1> [[M:%.*]], i32 zeroext [[EVL:%.*]]) {
160+
; CHECK-NEXT: [[V1:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VA]], <4 x float> [[VB]])
161+
; CHECK-NEXT: ret <4 x float> [[V1]]
162+
;
163+
%v = call <4 x float> @llvm.vp.minnum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 %evl)
164+
ret <4 x float> %v
165+
}
166+
167+
declare <8 x float> @llvm.vp.minnum.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32)
168+
define <8 x float> @vfmin_vv_v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 zeroext %evl) {
169+
; CHECK-LABEL: define <8 x float> @vfmin_vv_v8f32(
170+
; CHECK-SAME: <8 x float> [[VA:%.*]], <8 x float> [[VB:%.*]], <8 x i1> [[M:%.*]], i32 zeroext [[EVL:%.*]]) {
171+
; CHECK-NEXT: [[V1:%.*]] = call <8 x float> @llvm.minnum.v8f32(<8 x float> [[VA]], <8 x float> [[VB]])
172+
; CHECK-NEXT: ret <8 x float> [[V1]]
173+
;
174+
%v = call <8 x float> @llvm.vp.minnum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 %evl)
175+
ret <8 x float> %v
176+
}

0 commit comments

Comments
 (0)