Skip to content

Commit 62c4a45

Browse files
author
Simon Moll
committed
[VP] IR support for vector-predicated integer operations
This patch is part of the integer patch set of the Vector Predication extension (D57504). VP / integer slice / patch #1
1 parent eedb964 commit 62c4a45

File tree

11 files changed

+1187
-4
lines changed

11 files changed

+1187
-4
lines changed

llvm/docs/LangRef.rst

Lines changed: 668 additions & 0 deletions
Large diffs are not rendered by default.

llvm/include/llvm/IR/IntrinsicInst.h

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,48 @@ namespace llvm {
206206
/// @}
207207
};
208208

209+
/// This is the common base class for vector predication intrinsics.
210+
class VPIntrinsic : public IntrinsicInst {
211+
public:
212+
static Optional<int> GetMaskParamPos(Intrinsic::ID IntrinsicID);
213+
static Optional<int> GetVectorLengthParamPos(Intrinsic::ID IntrinsicID);
214+
215+
/// The llvm.vp.* intrinsics for this instruction Opcode
216+
static Intrinsic::ID GetForOpcode(unsigned OC);
217+
218+
// Whether \p ID is a VP intrinsic ID.
219+
static bool IsVPIntrinsic(Intrinsic::ID);
220+
221+
/// \return the mask parameter or nullptr.
222+
Value *getMaskParam() const;
223+
224+
/// \return the vector length parameter or nullptr.
225+
Value *getVectorLengthParam() const;
226+
227+
/// \return whether the vector length param can be ignored.
228+
bool canIgnoreVectorLengthParam() const;
229+
230+
/// \return the static element count (vector number of elements) the vector
231+
/// length parameter applies to.
232+
ElementCount getVectorLength() const;
233+
234+
// Methods for support type inquiry through isa, cast, and dyn_cast:
235+
static bool classof(const IntrinsicInst *I) {
236+
return IsVPIntrinsic(I->getIntrinsicID());
237+
}
238+
static bool classof(const Value *V) {
239+
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
240+
}
241+
242+
// Equivalent non-predicated opcode
243+
unsigned getFunctionalOpcode() const {
244+
return GetFunctionalOpcodeForVP(getIntrinsicID());
245+
}
246+
247+
// Equivalent non-predicated opcode
248+
static unsigned GetFunctionalOpcodeForVP(Intrinsic::ID ID);
249+
};
250+
209251
/// This is the common base class for constrained floating point intrinsics.
210252
class ConstrainedFPIntrinsic : public IntrinsicInst {
211253
public:

llvm/include/llvm/IR/Intrinsics.td

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ class IntrinsicProperty;
2727
// effects. It may be CSE'd deleted if dead, etc.
2828
def IntrNoMem : IntrinsicProperty;
2929

30+
// IntrNoSync - Threads executing the intrinsic will not synchronize using
31+
// memory or other means.
32+
def IntrNoSync : IntrinsicProperty;
33+
3034
// IntrReadMem - This intrinsic only reads from memory. It does not write to
3135
// memory and has no other side effects. Therefore, it cannot be moved across
3236
// potentially aliasing stores. However, it can be reordered otherwise and can
@@ -1099,6 +1103,79 @@ def int_is_constant : Intrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem, IntrWil
10991103
def int_ptrmask: Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty, llvm_anyint_ty],
11001104
[IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
11011105

1106+
//===---------------- Vector Predication Intrinsics --------------===//
1107+
1108+
// Binary operators
1109+
let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
1110+
def int_vp_add : Intrinsic<[ llvm_anyvector_ty ],
1111+
[ LLVMMatchType<0>,
1112+
LLVMMatchType<0>,
1113+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1114+
llvm_i32_ty]>;
1115+
def int_vp_sub : Intrinsic<[ llvm_anyvector_ty ],
1116+
[ LLVMMatchType<0>,
1117+
LLVMMatchType<0>,
1118+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1119+
llvm_i32_ty]>;
1120+
def int_vp_mul : Intrinsic<[ llvm_anyvector_ty ],
1121+
[ LLVMMatchType<0>,
1122+
LLVMMatchType<0>,
1123+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1124+
llvm_i32_ty]>;
1125+
def int_vp_sdiv : Intrinsic<[ llvm_anyvector_ty ],
1126+
[ LLVMMatchType<0>,
1127+
LLVMMatchType<0>,
1128+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1129+
llvm_i32_ty]>;
1130+
def int_vp_udiv : Intrinsic<[ llvm_anyvector_ty ],
1131+
[ LLVMMatchType<0>,
1132+
LLVMMatchType<0>,
1133+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1134+
llvm_i32_ty]>;
1135+
def int_vp_srem : Intrinsic<[ llvm_anyvector_ty ],
1136+
[ LLVMMatchType<0>,
1137+
LLVMMatchType<0>,
1138+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1139+
llvm_i32_ty]>;
1140+
def int_vp_urem : Intrinsic<[ llvm_anyvector_ty ],
1141+
[ LLVMMatchType<0>,
1142+
LLVMMatchType<0>,
1143+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1144+
llvm_i32_ty]>;
1145+
def int_vp_ashr : Intrinsic<[ llvm_anyvector_ty ],
1146+
[ LLVMMatchType<0>,
1147+
LLVMMatchType<0>,
1148+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1149+
llvm_i32_ty]>;
1150+
def int_vp_lshr : Intrinsic<[ llvm_anyvector_ty ],
1151+
[ LLVMMatchType<0>,
1152+
LLVMMatchType<0>,
1153+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1154+
llvm_i32_ty]>;
1155+
def int_vp_shl : Intrinsic<[ llvm_anyvector_ty ],
1156+
[ LLVMMatchType<0>,
1157+
LLVMMatchType<0>,
1158+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1159+
llvm_i32_ty]>;
1160+
def int_vp_or : Intrinsic<[ llvm_anyvector_ty ],
1161+
[ LLVMMatchType<0>,
1162+
LLVMMatchType<0>,
1163+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1164+
llvm_i32_ty]>;
1165+
def int_vp_and : Intrinsic<[ llvm_anyvector_ty ],
1166+
[ LLVMMatchType<0>,
1167+
LLVMMatchType<0>,
1168+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1169+
llvm_i32_ty]>;
1170+
def int_vp_xor : Intrinsic<[ llvm_anyvector_ty ],
1171+
[ LLVMMatchType<0>,
1172+
LLVMMatchType<0>,
1173+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1174+
llvm_i32_ty]>;
1175+
1176+
}
1177+
1178+
11021179
//===-------------------------- Masked Intrinsics -------------------------===//
11031180
//
11041181
def int_masked_store : Intrinsic<[], [llvm_anyvector_ty,

llvm/include/llvm/IR/VPIntrinsics.def

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
//===-- IR/VPIntrinsics.def - Describes llvm.vp.* Intrinsics -*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file contains descriptions of the various Vector Predication intrinsics.
10+
// This is used as a central place for enumerating the different instructions
11+
// and should eventually be the place to put comments about the instructions.
12+
//
13+
//===----------------------------------------------------------------------===//
14+
15+
// NOTE: NO INCLUDE GUARD DESIRED!
16+
17+
// Provide definitions of macros so that users of this file do not have to
18+
// define everything to use it...
19+
//
20+
#ifndef REGISTER_VP_INTRINSIC
21+
#define REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS)
22+
#endif
23+
24+
// Map this VP intrinsic to its functional Opcode
25+
#ifndef HANDLE_VP_TO_OC
26+
#define HANDLE_VP_TO_OC(VPID, OC)
27+
#endif
28+
29+
///// Integer Arithmetic /////
30+
31+
// llvm.vp.add(x,y,mask,vlen)
32+
REGISTER_VP_INTRINSIC(vp_add, 2, 3)
33+
HANDLE_VP_TO_OC(vp_add, Add)
34+
35+
// llvm.vp.and(x,y,mask,vlen)
36+
REGISTER_VP_INTRINSIC(vp_and, 2, 3)
37+
HANDLE_VP_TO_OC(vp_and, And)
38+
39+
// llvm.vp.ashr(x,y,mask,vlen)
40+
REGISTER_VP_INTRINSIC(vp_ashr, 2, 3)
41+
HANDLE_VP_TO_OC(vp_ashr, AShr)
42+
43+
// llvm.vp.lshr(x,y,mask,vlen)
44+
REGISTER_VP_INTRINSIC(vp_lshr, 2, 3)
45+
HANDLE_VP_TO_OC(vp_lshr, LShr)
46+
47+
// llvm.vp.mul(x,y,mask,vlen)
48+
REGISTER_VP_INTRINSIC(vp_mul, 2, 3)
49+
HANDLE_VP_TO_OC(vp_mul, Mul)
50+
51+
// llvm.vp.or(x,y,mask,vlen)
52+
REGISTER_VP_INTRINSIC(vp_or, 2, 3)
53+
HANDLE_VP_TO_OC(vp_or, Or)
54+
55+
// llvm.vp.sdiv(x,y,mask,vlen)
56+
REGISTER_VP_INTRINSIC(vp_sdiv, 2, 3)
57+
HANDLE_VP_TO_OC(vp_sdiv, SDiv)
58+
59+
// llvm.vp.shl(x,y,mask,vlen)
60+
REGISTER_VP_INTRINSIC(vp_shl, 2, 3)
61+
HANDLE_VP_TO_OC(vp_shl, Shl)
62+
63+
// llvm.vp.srem(x,y,mask,vlen)
64+
REGISTER_VP_INTRINSIC(vp_srem, 2, 3)
65+
HANDLE_VP_TO_OC(vp_srem, SRem)
66+
67+
// llvm.vp.sub(x,y,mask,vlen)
68+
REGISTER_VP_INTRINSIC(vp_sub, 2, 3)
69+
HANDLE_VP_TO_OC(vp_sub, Sub)
70+
71+
// llvm.vp.udiv(x,y,mask,vlen)
72+
REGISTER_VP_INTRINSIC(vp_udiv, 2, 3)
73+
HANDLE_VP_TO_OC(vp_udiv, UDiv)
74+
75+
// llvm.vp.urem(x,y,mask,vlen)
76+
REGISTER_VP_INTRINSIC(vp_urem, 2, 3)
77+
HANDLE_VP_TO_OC(vp_urem, URem)
78+
79+
// llvm.vp.xor(x,y,mask,vlen)
80+
REGISTER_VP_INTRINSIC(vp_xor, 2, 3)
81+
HANDLE_VP_TO_OC(vp_xor, Xor)
82+
83+
#undef REGISTER_VP_INTRINSIC
84+
#undef HANDLE_VP_TO_OC

llvm/lib/IR/IntrinsicInst.cpp

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,119 @@ bool ConstrainedFPIntrinsic::isTernaryOp() const {
160160
}
161161
}
162162

163+
ElementCount VPIntrinsic::getVectorLength() const {
164+
auto GetVectorLengthOfType = [](const Type *T) -> ElementCount {
165+
auto VT = cast<VectorType>(T);
166+
auto ElemCount = VT->getElementCount();
167+
return ElemCount;
168+
};
169+
170+
auto VPMask = getMaskParam();
171+
return GetVectorLengthOfType(VPMask->getType());
172+
}
173+
174+
Value *VPIntrinsic::getMaskParam() const {
175+
auto maskPos = GetMaskParamPos(getIntrinsicID());
176+
if (maskPos)
177+
return getArgOperand(maskPos.getValue());
178+
return nullptr;
179+
}
180+
181+
Value *VPIntrinsic::getVectorLengthParam() const {
182+
auto vlenPos = GetVectorLengthParamPos(getIntrinsicID());
183+
if (vlenPos)
184+
return getArgOperand(vlenPos.getValue());
185+
return nullptr;
186+
}
187+
188+
Optional<int> VPIntrinsic::GetMaskParamPos(Intrinsic::ID IntrinsicID) {
189+
switch (IntrinsicID) {
190+
default:
191+
return None;
192+
193+
#define REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS) \
194+
case Intrinsic::VPID: \
195+
return MASKPOS;
196+
#include "llvm/IR/VPIntrinsics.def"
197+
}
198+
}
199+
200+
Optional<int> VPIntrinsic::GetVectorLengthParamPos(Intrinsic::ID IntrinsicID) {
201+
switch (IntrinsicID) {
202+
default:
203+
return None;
204+
205+
#define REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS) \
206+
case Intrinsic::VPID: \
207+
return VLENPOS;
208+
#include "llvm/IR/VPIntrinsics.def"
209+
}
210+
}
211+
212+
bool VPIntrinsic::IsVPIntrinsic(Intrinsic::ID ID) {
213+
switch (ID) {
214+
default:
215+
return false;
216+
217+
#define REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS) \
218+
case Intrinsic::VPID: \
219+
break;
220+
#include "llvm/IR/VPIntrinsics.def"
221+
}
222+
return true;
223+
}
224+
225+
// Equivalent non-predicated opcode
226+
unsigned VPIntrinsic::GetFunctionalOpcodeForVP(Intrinsic::ID ID) {
227+
switch (ID) {
228+
default:
229+
return Instruction::Call;
230+
231+
#define HANDLE_VP_TO_OC(VPID, OC) \
232+
case Intrinsic::VPID: \
233+
return Instruction::OC;
234+
#include "llvm/IR/VPIntrinsics.def"
235+
}
236+
}
237+
238+
Intrinsic::ID VPIntrinsic::GetForOpcode(unsigned OC) {
239+
switch (OC) {
240+
default:
241+
return Intrinsic::not_intrinsic;
242+
243+
#define HANDLE_VP_TO_OC(VPID, OC) \
244+
case Instruction::OC: \
245+
return Intrinsic::VPID;
246+
#include "llvm/IR/VPIntrinsics.def"
247+
}
248+
}
249+
250+
bool VPIntrinsic::canIgnoreVectorLengthParam() const {
251+
// No vlen param - no lanes masked-off by it.
252+
auto *VLParam = getVectorLengthParam();
253+
if (!VLParam)
254+
return true;
255+
256+
// Can ignore if MSB of vlen is set.
257+
auto VLConst = dyn_cast<ConstantInt>(VLParam);
258+
if (VLConst && VLConst->getSExtValue() < 0)
259+
return true;
260+
261+
// Vlen param greater-equal type vlen - no lanes masked-off.
262+
if (VLConst) {
263+
auto ElemCount = getVectorLength();
264+
if (ElemCount.Scalable)
265+
return false;
266+
267+
uint64_t VLNum = VLConst->getZExtValue();
268+
if (VLNum >= ElemCount.Min)
269+
return true;
270+
}
271+
272+
// Cannot ignore vlen param by default.
273+
return false;
274+
}
275+
163276
Instruction::BinaryOps BinaryOpIntrinsic::getBinaryOp() const {
164277
switch (getIntrinsicID()) {
165278
case Intrinsic::uadd_with_overflow:

llvm/test/Verifier/vp-intrinsics.ll

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
; RUN: opt --verify %s
2+
3+
define void @test_vp_int(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) {
4+
%r0 = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
5+
%r1 = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
6+
%r2 = call <8 x i32> @llvm.vp.mul.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
7+
%r3 = call <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
8+
%r4 = call <8 x i32> @llvm.vp.srem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
9+
%r5 = call <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
10+
%r6 = call <8 x i32> @llvm.vp.urem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
11+
%r7 = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
12+
%r8 = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
13+
%r9 = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
14+
%rA = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
15+
%rB = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
16+
%rC = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
17+
ret void
18+
}
19+
20+
; integer arith
21+
declare <8 x i32> @llvm.vp.add.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
22+
declare <8 x i32> @llvm.vp.sub.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
23+
declare <8 x i32> @llvm.vp.mul.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
24+
declare <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
25+
declare <8 x i32> @llvm.vp.srem.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
26+
declare <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
27+
declare <8 x i32> @llvm.vp.urem.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
28+
; bit arith
29+
declare <8 x i32> @llvm.vp.and.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
30+
declare <8 x i32> @llvm.vp.or.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
31+
declare <8 x i32> @llvm.vp.xor.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
32+
declare <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
33+
declare <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
34+
declare <8 x i32> @llvm.vp.shl.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)

llvm/unittests/IR/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ add_llvm_unittest(IRTests
3939
ValueTest.cpp
4040
VectorTypesTest.cpp
4141
VerifierTest.cpp
42+
VPIntrinsicTest.cpp
4243
WaymarkTest.cpp
4344
)
4445

0 commit comments

Comments
 (0)