Skip to content

Commit 134829f

Browse files
author
Simon Moll
committed
[VP] IR support for vector-predicated integer operations
This patch is part of the integer patch set of the Vector Predication extension (D57504). VP / integer slice / patch #1
1 parent 815ef29 commit 134829f

File tree

11 files changed

+1187
-4
lines changed

11 files changed

+1187
-4
lines changed

llvm/docs/LangRef.rst

Lines changed: 668 additions & 0 deletions
Large diffs are not rendered by default.

llvm/include/llvm/IR/IntrinsicInst.h

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,48 @@ namespace llvm {
206206
/// @}
207207
};
208208

209+
/// This is the common base class for vector predication intrinsics.
210+
class VPIntrinsic : public IntrinsicInst {
211+
public:
212+
static Optional<int> GetMaskParamPos(Intrinsic::ID IntrinsicID);
213+
static Optional<int> GetVectorLengthParamPos(Intrinsic::ID IntrinsicID);
214+
215+
/// The llvm.vp.* intrinsics for this instruction Opcode
216+
static Intrinsic::ID GetForOpcode(unsigned OC);
217+
218+
// Whether \p ID is a VP intrinsic ID.
219+
static bool IsVPIntrinsic(Intrinsic::ID);
220+
221+
/// \return the mask parameter or nullptr.
222+
Value *getMaskParam() const;
223+
224+
/// \return the vector length parameter or nullptr.
225+
Value *getVectorLengthParam() const;
226+
227+
/// \return whether the vector length param can be ignored.
228+
bool canIgnoreVectorLengthParam() const;
229+
230+
/// \return the static element count (vector number of elements) the vector
231+
/// length parameter applies to.
232+
ElementCount getVectorLength() const;
233+
234+
// Methods for support type inquiry through isa, cast, and dyn_cast:
235+
static bool classof(const IntrinsicInst *I) {
236+
return IsVPIntrinsic(I->getIntrinsicID());
237+
}
238+
static bool classof(const Value *V) {
239+
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
240+
}
241+
242+
// Equivalent non-predicated opcode
243+
unsigned getFunctionalOpcode() const {
244+
return GetFunctionalOpcodeForVP(getIntrinsicID());
245+
}
246+
247+
// Equivalent non-predicated opcode
248+
static unsigned GetFunctionalOpcodeForVP(Intrinsic::ID ID);
249+
};
250+
209251
/// This is the common base class for constrained floating point intrinsics.
210252
class ConstrainedFPIntrinsic : public IntrinsicInst {
211253
public:

llvm/include/llvm/IR/Intrinsics.td

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ class IntrinsicProperty;
2727
// effects. It may be CSE'd deleted if dead, etc.
2828
def IntrNoMem : IntrinsicProperty;
2929

30+
// IntrNoSync - Threads executing the intrinsic will not synchronize using
31+
// memory or other means.
32+
def IntrNoSync : IntrinsicProperty;
33+
3034
// IntrReadMem - This intrinsic only reads from memory. It does not write to
3135
// memory and has no other side effects. Therefore, it cannot be moved across
3236
// potentially aliasing stores. However, it can be reordered otherwise and can
@@ -1145,6 +1149,79 @@ def int_is_constant : Intrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem, IntrWil
11451149
def int_ptrmask: Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty, llvm_anyint_ty],
11461150
[IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
11471151

1152+
//===---------------- Vector Predication Intrinsics --------------===//
1153+
1154+
// Binary operators
1155+
let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
1156+
def int_vp_add : Intrinsic<[ llvm_anyvector_ty ],
1157+
[ LLVMMatchType<0>,
1158+
LLVMMatchType<0>,
1159+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1160+
llvm_i32_ty]>;
1161+
def int_vp_sub : Intrinsic<[ llvm_anyvector_ty ],
1162+
[ LLVMMatchType<0>,
1163+
LLVMMatchType<0>,
1164+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1165+
llvm_i32_ty]>;
1166+
def int_vp_mul : Intrinsic<[ llvm_anyvector_ty ],
1167+
[ LLVMMatchType<0>,
1168+
LLVMMatchType<0>,
1169+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1170+
llvm_i32_ty]>;
1171+
def int_vp_sdiv : Intrinsic<[ llvm_anyvector_ty ],
1172+
[ LLVMMatchType<0>,
1173+
LLVMMatchType<0>,
1174+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1175+
llvm_i32_ty]>;
1176+
def int_vp_udiv : Intrinsic<[ llvm_anyvector_ty ],
1177+
[ LLVMMatchType<0>,
1178+
LLVMMatchType<0>,
1179+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1180+
llvm_i32_ty]>;
1181+
def int_vp_srem : Intrinsic<[ llvm_anyvector_ty ],
1182+
[ LLVMMatchType<0>,
1183+
LLVMMatchType<0>,
1184+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1185+
llvm_i32_ty]>;
1186+
def int_vp_urem : Intrinsic<[ llvm_anyvector_ty ],
1187+
[ LLVMMatchType<0>,
1188+
LLVMMatchType<0>,
1189+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1190+
llvm_i32_ty]>;
1191+
def int_vp_ashr : Intrinsic<[ llvm_anyvector_ty ],
1192+
[ LLVMMatchType<0>,
1193+
LLVMMatchType<0>,
1194+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1195+
llvm_i32_ty]>;
1196+
def int_vp_lshr : Intrinsic<[ llvm_anyvector_ty ],
1197+
[ LLVMMatchType<0>,
1198+
LLVMMatchType<0>,
1199+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1200+
llvm_i32_ty]>;
1201+
def int_vp_shl : Intrinsic<[ llvm_anyvector_ty ],
1202+
[ LLVMMatchType<0>,
1203+
LLVMMatchType<0>,
1204+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1205+
llvm_i32_ty]>;
1206+
def int_vp_or : Intrinsic<[ llvm_anyvector_ty ],
1207+
[ LLVMMatchType<0>,
1208+
LLVMMatchType<0>,
1209+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1210+
llvm_i32_ty]>;
1211+
def int_vp_and : Intrinsic<[ llvm_anyvector_ty ],
1212+
[ LLVMMatchType<0>,
1213+
LLVMMatchType<0>,
1214+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1215+
llvm_i32_ty]>;
1216+
def int_vp_xor : Intrinsic<[ llvm_anyvector_ty ],
1217+
[ LLVMMatchType<0>,
1218+
LLVMMatchType<0>,
1219+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1220+
llvm_i32_ty]>;
1221+
1222+
}
1223+
1224+
11481225
//===-------------------------- Masked Intrinsics -------------------------===//
11491226
//
11501227
def int_masked_store : Intrinsic<[], [llvm_anyvector_ty,

llvm/include/llvm/IR/VPIntrinsics.def

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
//===-- IR/VPIntrinsics.def - Describes llvm.vp.* Intrinsics -*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file contains descriptions of the various Vector Predication intrinsics.
10+
// This is used as a central place for enumerating the different instructions
11+
// and should eventually be the place to put comments about the instructions.
12+
//
13+
//===----------------------------------------------------------------------===//
14+
15+
// NOTE: NO INCLUDE GUARD DESIRED!
16+
17+
// Provide definitions of macros so that users of this file do not have to
18+
// define everything to use it...
19+
//
20+
#ifndef REGISTER_VP_INTRINSIC
21+
#define REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS)
22+
#endif
23+
24+
// Map this VP intrinsic to its functional Opcode
25+
#ifndef HANDLE_VP_TO_OC
26+
#define HANDLE_VP_TO_OC(VPID, OC)
27+
#endif
28+
29+
///// Integer Arithmetic /////
30+
31+
// llvm.vp.add(x,y,mask,vlen)
32+
REGISTER_VP_INTRINSIC(vp_add, 2, 3)
33+
HANDLE_VP_TO_OC(vp_add, Add)
34+
35+
// llvm.vp.and(x,y,mask,vlen)
36+
REGISTER_VP_INTRINSIC(vp_and, 2, 3)
37+
HANDLE_VP_TO_OC(vp_and, And)
38+
39+
// llvm.vp.ashr(x,y,mask,vlen)
40+
REGISTER_VP_INTRINSIC(vp_ashr, 2, 3)
41+
HANDLE_VP_TO_OC(vp_ashr, AShr)
42+
43+
// llvm.vp.lshr(x,y,mask,vlen)
44+
REGISTER_VP_INTRINSIC(vp_lshr, 2, 3)
45+
HANDLE_VP_TO_OC(vp_lshr, LShr)
46+
47+
// llvm.vp.mul(x,y,mask,vlen)
48+
REGISTER_VP_INTRINSIC(vp_mul, 2, 3)
49+
HANDLE_VP_TO_OC(vp_mul, Mul)
50+
51+
// llvm.vp.or(x,y,mask,vlen)
52+
REGISTER_VP_INTRINSIC(vp_or, 2, 3)
53+
HANDLE_VP_TO_OC(vp_or, Or)
54+
55+
// llvm.vp.sdiv(x,y,mask,vlen)
56+
REGISTER_VP_INTRINSIC(vp_sdiv, 2, 3)
57+
HANDLE_VP_TO_OC(vp_sdiv, SDiv)
58+
59+
// llvm.vp.shl(x,y,mask,vlen)
60+
REGISTER_VP_INTRINSIC(vp_shl, 2, 3)
61+
HANDLE_VP_TO_OC(vp_shl, Shl)
62+
63+
// llvm.vp.srem(x,y,mask,vlen)
64+
REGISTER_VP_INTRINSIC(vp_srem, 2, 3)
65+
HANDLE_VP_TO_OC(vp_srem, SRem)
66+
67+
// llvm.vp.sub(x,y,mask,vlen)
68+
REGISTER_VP_INTRINSIC(vp_sub, 2, 3)
69+
HANDLE_VP_TO_OC(vp_sub, Sub)
70+
71+
// llvm.vp.udiv(x,y,mask,vlen)
72+
REGISTER_VP_INTRINSIC(vp_udiv, 2, 3)
73+
HANDLE_VP_TO_OC(vp_udiv, UDiv)
74+
75+
// llvm.vp.urem(x,y,mask,vlen)
76+
REGISTER_VP_INTRINSIC(vp_urem, 2, 3)
77+
HANDLE_VP_TO_OC(vp_urem, URem)
78+
79+
// llvm.vp.xor(x,y,mask,vlen)
80+
REGISTER_VP_INTRINSIC(vp_xor, 2, 3)
81+
HANDLE_VP_TO_OC(vp_xor, Xor)
82+
83+
#undef REGISTER_VP_INTRINSIC
84+
#undef HANDLE_VP_TO_OC

llvm/lib/IR/IntrinsicInst.cpp

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,119 @@ bool ConstrainedFPIntrinsic::classof(const IntrinsicInst *I) {
178178
}
179179
}
180180

181+
ElementCount VPIntrinsic::getVectorLength() const {
182+
auto GetVectorLengthOfType = [](const Type *T) -> ElementCount {
183+
auto VT = cast<VectorType>(T);
184+
auto ElemCount = VT->getElementCount();
185+
return ElemCount;
186+
};
187+
188+
auto VPMask = getMaskParam();
189+
return GetVectorLengthOfType(VPMask->getType());
190+
}
191+
192+
Value *VPIntrinsic::getMaskParam() const {
193+
auto maskPos = GetMaskParamPos(getIntrinsicID());
194+
if (maskPos)
195+
return getArgOperand(maskPos.getValue());
196+
return nullptr;
197+
}
198+
199+
Value *VPIntrinsic::getVectorLengthParam() const {
200+
auto vlenPos = GetVectorLengthParamPos(getIntrinsicID());
201+
if (vlenPos)
202+
return getArgOperand(vlenPos.getValue());
203+
return nullptr;
204+
}
205+
206+
Optional<int> VPIntrinsic::GetMaskParamPos(Intrinsic::ID IntrinsicID) {
207+
switch (IntrinsicID) {
208+
default:
209+
return None;
210+
211+
#define REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS) \
212+
case Intrinsic::VPID: \
213+
return MASKPOS;
214+
#include "llvm/IR/VPIntrinsics.def"
215+
}
216+
}
217+
218+
Optional<int> VPIntrinsic::GetVectorLengthParamPos(Intrinsic::ID IntrinsicID) {
219+
switch (IntrinsicID) {
220+
default:
221+
return None;
222+
223+
#define REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS) \
224+
case Intrinsic::VPID: \
225+
return VLENPOS;
226+
#include "llvm/IR/VPIntrinsics.def"
227+
}
228+
}
229+
230+
bool VPIntrinsic::IsVPIntrinsic(Intrinsic::ID ID) {
231+
switch (ID) {
232+
default:
233+
return false;
234+
235+
#define REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS) \
236+
case Intrinsic::VPID: \
237+
break;
238+
#include "llvm/IR/VPIntrinsics.def"
239+
}
240+
return true;
241+
}
242+
243+
// Equivalent non-predicated opcode
244+
unsigned VPIntrinsic::GetFunctionalOpcodeForVP(Intrinsic::ID ID) {
245+
switch (ID) {
246+
default:
247+
return Instruction::Call;
248+
249+
#define HANDLE_VP_TO_OC(VPID, OC) \
250+
case Intrinsic::VPID: \
251+
return Instruction::OC;
252+
#include "llvm/IR/VPIntrinsics.def"
253+
}
254+
}
255+
256+
Intrinsic::ID VPIntrinsic::GetForOpcode(unsigned OC) {
257+
switch (OC) {
258+
default:
259+
return Intrinsic::not_intrinsic;
260+
261+
#define HANDLE_VP_TO_OC(VPID, OC) \
262+
case Instruction::OC: \
263+
return Intrinsic::VPID;
264+
#include "llvm/IR/VPIntrinsics.def"
265+
}
266+
}
267+
268+
bool VPIntrinsic::canIgnoreVectorLengthParam() const {
269+
// No vlen param - no lanes masked-off by it.
270+
auto *VLParam = getVectorLengthParam();
271+
if (!VLParam)
272+
return true;
273+
274+
// Can ignore if MSB of vlen is set.
275+
auto VLConst = dyn_cast<ConstantInt>(VLParam);
276+
if (VLConst && VLConst->getSExtValue() < 0)
277+
return true;
278+
279+
// Vlen param greater-equal type vlen - no lanes masked-off.
280+
if (VLConst) {
281+
auto ElemCount = getVectorLength();
282+
if (ElemCount.Scalable)
283+
return false;
284+
285+
uint64_t VLNum = VLConst->getZExtValue();
286+
if (VLNum >= ElemCount.Min)
287+
return true;
288+
}
289+
290+
// Cannot ignore vlen param by default.
291+
return false;
292+
}
293+
181294
Instruction::BinaryOps BinaryOpIntrinsic::getBinaryOp() const {
182295
switch (getIntrinsicID()) {
183296
case Intrinsic::uadd_with_overflow:

llvm/test/Verifier/vp-intrinsics.ll

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
; RUN: opt --verify %s
2+
3+
define void @test_vp_int(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) {
4+
%r0 = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
5+
%r1 = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
6+
%r2 = call <8 x i32> @llvm.vp.mul.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
7+
%r3 = call <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
8+
%r4 = call <8 x i32> @llvm.vp.srem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
9+
%r5 = call <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
10+
%r6 = call <8 x i32> @llvm.vp.urem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
11+
%r7 = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
12+
%r8 = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
13+
%r9 = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
14+
%rA = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
15+
%rB = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
16+
%rC = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
17+
ret void
18+
}
19+
20+
; integer arith
21+
declare <8 x i32> @llvm.vp.add.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
22+
declare <8 x i32> @llvm.vp.sub.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
23+
declare <8 x i32> @llvm.vp.mul.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
24+
declare <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
25+
declare <8 x i32> @llvm.vp.srem.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
26+
declare <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
27+
declare <8 x i32> @llvm.vp.urem.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
28+
; bit arith
29+
declare <8 x i32> @llvm.vp.and.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
30+
declare <8 x i32> @llvm.vp.or.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
31+
declare <8 x i32> @llvm.vp.xor.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
32+
declare <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
33+
declare <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
34+
declare <8 x i32> @llvm.vp.shl.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)

llvm/unittests/IR/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ add_llvm_unittest(IRTests
4040
ValueTest.cpp
4141
VectorTypesTest.cpp
4242
VerifierTest.cpp
43+
VPIntrinsicTest.cpp
4344
WaymarkTest.cpp
4445
)
4546

0 commit comments

Comments
 (0)