Skip to content

Commit 3f1bccc

Browse files
author
Simon Moll
committed
[VP,Integer,#1] Vector-predicated integer intrinsics
Summary: This patch adds IR intrinsics for vector-predicated integer arithmetic. It is subpatch #1 of the [integer slice](https://reviews.llvm.org/D57504#1732277) of [LLVM-VP](https://reviews.llvm.org/D57504). LLVM-VP is a larger effort to bring native vector predication to LLVM. Reviewers: mkuper, rkruppe, fhahn, rengolin, huntergr, sdesmalen, jdoerfert, andrew.w.kaylor Reviewed By: andrew.w.kaylor Subscribers: efriedma, vkmr, craig.topper, chandlerc, pengfei, andrew.w.kaylor, merge_guards_bot, dmgreen, rogfer01, cameron.mcinally, sstefan1, SjoerdMeijer, samparker, k-ishizaka, efocht, mgorny, hiraditya, jdoerfert, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D69891
1 parent cbeffa3 commit 3f1bccc

13 files changed

+1258
-4
lines changed

llvm/docs/LangRef.rst

Lines changed: 697 additions & 0 deletions
Large diffs are not rendered by default.

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1164,6 +1164,15 @@ class TargetTransformInfo {
11641164
/// to a stack reload.
11651165
unsigned getGISelRematGlobalCost() const;
11661166

1167+
/// \name Vector Predication Information
1168+
/// @{
1169+
/// Whether the target supports the %evl parameter of VP intrinsic efficiently in hardware.
1170+
/// (see LLVM Language Reference - "Vector Predication Intrinsics")
1171+
/// Use of %evl is discouraged when that is not the case.
1172+
bool hasActiveVectorLength() const;
1173+
1174+
/// @}
1175+
11671176
/// @}
11681177

11691178
private:
@@ -1413,6 +1422,7 @@ class TargetTransformInfo::Concept {
14131422
ReductionFlags) const = 0;
14141423
virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
14151424
virtual unsigned getGISelRematGlobalCost() const = 0;
1425+
virtual bool hasActiveVectorLength() const = 0;
14161426
virtual int getInstructionLatency(const Instruction *I) = 0;
14171427
};
14181428

@@ -1900,6 +1910,10 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
19001910
return Impl.getGISelRematGlobalCost();
19011911
}
19021912

1913+
bool hasActiveVectorLength() const override {
1914+
return Impl.hasActiveVectorLength();
1915+
}
1916+
19031917
int getInstructionLatency(const Instruction *I) override {
19041918
return Impl.getInstructionLatency(I);
19051919
}

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,10 @@ class TargetTransformInfoImplBase {
625625
return 1;
626626
}
627627

628+
bool hasActiveVectorLength() const {
629+
return false;
630+
}
631+
628632
protected:
629633
// Obtain the minimum required size to hold the value (without the sign)
630634
// In case of a vector it returns the min required size for one element.

llvm/include/llvm/IR/IntrinsicInst.h

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,48 @@ namespace llvm {
206206
/// @}
207207
};
208208

209+
/// This is the common base class for vector predication intrinsics.
210+
class VPIntrinsic : public IntrinsicInst {
211+
public:
212+
static Optional<int> GetMaskParamPos(Intrinsic::ID IntrinsicID);
213+
static Optional<int> GetVectorLengthParamPos(Intrinsic::ID IntrinsicID);
214+
215+
/// The llvm.vp.* intrinsics for this instruction Opcode
216+
static Intrinsic::ID GetForOpcode(unsigned OC);
217+
218+
// Whether \p ID is a VP intrinsic ID.
219+
static bool IsVPIntrinsic(Intrinsic::ID);
220+
221+
/// \return the mask parameter or nullptr.
222+
Value *getMaskParam() const;
223+
224+
/// \return the vector length parameter or nullptr.
225+
Value *getVectorLengthParam() const;
226+
227+
/// \return whether the vector length param can be ignored.
228+
bool canIgnoreVectorLengthParam() const;
229+
230+
/// \return the static element count (vector number of elements) the vector
231+
/// length parameter applies to.
232+
ElementCount getStaticVectorLength() const;
233+
234+
// Methods for support type inquiry through isa, cast, and dyn_cast:
235+
static bool classof(const IntrinsicInst *I) {
236+
return IsVPIntrinsic(I->getIntrinsicID());
237+
}
238+
static bool classof(const Value *V) {
239+
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
240+
}
241+
242+
// Equivalent non-predicated opcode
243+
unsigned getFunctionalOpcode() const {
244+
return GetFunctionalOpcodeForVP(getIntrinsicID());
245+
}
246+
247+
// Equivalent non-predicated opcode
248+
static unsigned GetFunctionalOpcodeForVP(Intrinsic::ID ID);
249+
};
250+
209251
/// This is the common base class for constrained floating point intrinsics.
210252
class ConstrainedFPIntrinsic : public IntrinsicInst {
211253
public:

llvm/include/llvm/IR/Intrinsics.td

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ class IntrinsicProperty;
2727
// effects. It may be CSE'd deleted if dead, etc.
2828
def IntrNoMem : IntrinsicProperty;
2929

30+
// IntrNoSync - Threads executing the intrinsic will not synchronize using
31+
// memory or other means.
32+
def IntrNoSync : IntrinsicProperty;
33+
3034
// IntrReadMem - This intrinsic only reads from memory. It does not write to
3135
// memory and has no other side effects. Therefore, it cannot be moved across
3236
// potentially aliasing stores. However, it can be reordered otherwise and can
@@ -1153,6 +1157,79 @@ def int_is_constant : Intrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem, IntrWil
11531157
def int_ptrmask: Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty, llvm_anyint_ty],
11541158
[IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
11551159

1160+
//===---------------- Vector Predication Intrinsics --------------===//
1161+
1162+
// Binary operators
1163+
let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
1164+
def int_vp_add : Intrinsic<[ llvm_anyvector_ty ],
1165+
[ LLVMMatchType<0>,
1166+
LLVMMatchType<0>,
1167+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1168+
llvm_i32_ty]>;
1169+
def int_vp_sub : Intrinsic<[ llvm_anyvector_ty ],
1170+
[ LLVMMatchType<0>,
1171+
LLVMMatchType<0>,
1172+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1173+
llvm_i32_ty]>;
1174+
def int_vp_mul : Intrinsic<[ llvm_anyvector_ty ],
1175+
[ LLVMMatchType<0>,
1176+
LLVMMatchType<0>,
1177+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1178+
llvm_i32_ty]>;
1179+
def int_vp_sdiv : Intrinsic<[ llvm_anyvector_ty ],
1180+
[ LLVMMatchType<0>,
1181+
LLVMMatchType<0>,
1182+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1183+
llvm_i32_ty]>;
1184+
def int_vp_udiv : Intrinsic<[ llvm_anyvector_ty ],
1185+
[ LLVMMatchType<0>,
1186+
LLVMMatchType<0>,
1187+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1188+
llvm_i32_ty]>;
1189+
def int_vp_srem : Intrinsic<[ llvm_anyvector_ty ],
1190+
[ LLVMMatchType<0>,
1191+
LLVMMatchType<0>,
1192+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1193+
llvm_i32_ty]>;
1194+
def int_vp_urem : Intrinsic<[ llvm_anyvector_ty ],
1195+
[ LLVMMatchType<0>,
1196+
LLVMMatchType<0>,
1197+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1198+
llvm_i32_ty]>;
1199+
def int_vp_ashr : Intrinsic<[ llvm_anyvector_ty ],
1200+
[ LLVMMatchType<0>,
1201+
LLVMMatchType<0>,
1202+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1203+
llvm_i32_ty]>;
1204+
def int_vp_lshr : Intrinsic<[ llvm_anyvector_ty ],
1205+
[ LLVMMatchType<0>,
1206+
LLVMMatchType<0>,
1207+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1208+
llvm_i32_ty]>;
1209+
def int_vp_shl : Intrinsic<[ llvm_anyvector_ty ],
1210+
[ LLVMMatchType<0>,
1211+
LLVMMatchType<0>,
1212+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1213+
llvm_i32_ty]>;
1214+
def int_vp_or : Intrinsic<[ llvm_anyvector_ty ],
1215+
[ LLVMMatchType<0>,
1216+
LLVMMatchType<0>,
1217+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1218+
llvm_i32_ty]>;
1219+
def int_vp_and : Intrinsic<[ llvm_anyvector_ty ],
1220+
[ LLVMMatchType<0>,
1221+
LLVMMatchType<0>,
1222+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1223+
llvm_i32_ty]>;
1224+
def int_vp_xor : Intrinsic<[ llvm_anyvector_ty ],
1225+
[ LLVMMatchType<0>,
1226+
LLVMMatchType<0>,
1227+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1228+
llvm_i32_ty]>;
1229+
1230+
}
1231+
1232+
11561233
//===-------------------------- Masked Intrinsics -------------------------===//
11571234
//
11581235
def int_masked_store : Intrinsic<[], [llvm_anyvector_ty,

llvm/include/llvm/IR/VPIntrinsics.def

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
//===-- IR/VPIntrinsics.def - Describes llvm.vp.* Intrinsics -*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file contains descriptions of the various Vector Predication intrinsics.
10+
// This is used as a central place for enumerating the different instructions
11+
// and should eventually be the place to put comments about the instructions.
12+
//
13+
//===----------------------------------------------------------------------===//
14+
15+
// NOTE: NO INCLUDE GUARD DESIRED!
16+
17+
// Provide definitions of macros so that users of this file do not have to
18+
// define everything to use it...
19+
//
20+
#ifndef REGISTER_VP_INTRINSIC
21+
#define REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS)
22+
#endif
23+
24+
// Map this VP intrinsic to its functional Opcode
25+
#ifndef HANDLE_VP_TO_OC
26+
#define HANDLE_VP_TO_OC(VPID, OC)
27+
#endif
28+
29+
///// Integer Arithmetic /////
30+
31+
// llvm.vp.add(x,y,mask,vlen)
32+
REGISTER_VP_INTRINSIC(vp_add, 2, 3)
33+
HANDLE_VP_TO_OC(vp_add, Add)
34+
35+
// llvm.vp.and(x,y,mask,vlen)
36+
REGISTER_VP_INTRINSIC(vp_and, 2, 3)
37+
HANDLE_VP_TO_OC(vp_and, And)
38+
39+
// llvm.vp.ashr(x,y,mask,vlen)
40+
REGISTER_VP_INTRINSIC(vp_ashr, 2, 3)
41+
HANDLE_VP_TO_OC(vp_ashr, AShr)
42+
43+
// llvm.vp.lshr(x,y,mask,vlen)
44+
REGISTER_VP_INTRINSIC(vp_lshr, 2, 3)
45+
HANDLE_VP_TO_OC(vp_lshr, LShr)
46+
47+
// llvm.vp.mul(x,y,mask,vlen)
48+
REGISTER_VP_INTRINSIC(vp_mul, 2, 3)
49+
HANDLE_VP_TO_OC(vp_mul, Mul)
50+
51+
// llvm.vp.or(x,y,mask,vlen)
52+
REGISTER_VP_INTRINSIC(vp_or, 2, 3)
53+
HANDLE_VP_TO_OC(vp_or, Or)
54+
55+
// llvm.vp.sdiv(x,y,mask,vlen)
56+
REGISTER_VP_INTRINSIC(vp_sdiv, 2, 3)
57+
HANDLE_VP_TO_OC(vp_sdiv, SDiv)
58+
59+
// llvm.vp.shl(x,y,mask,vlen)
60+
REGISTER_VP_INTRINSIC(vp_shl, 2, 3)
61+
HANDLE_VP_TO_OC(vp_shl, Shl)
62+
63+
// llvm.vp.srem(x,y,mask,vlen)
64+
REGISTER_VP_INTRINSIC(vp_srem, 2, 3)
65+
HANDLE_VP_TO_OC(vp_srem, SRem)
66+
67+
// llvm.vp.sub(x,y,mask,vlen)
68+
REGISTER_VP_INTRINSIC(vp_sub, 2, 3)
69+
HANDLE_VP_TO_OC(vp_sub, Sub)
70+
71+
// llvm.vp.udiv(x,y,mask,vlen)
72+
REGISTER_VP_INTRINSIC(vp_udiv, 2, 3)
73+
HANDLE_VP_TO_OC(vp_udiv, UDiv)
74+
75+
// llvm.vp.urem(x,y,mask,vlen)
76+
REGISTER_VP_INTRINSIC(vp_urem, 2, 3)
77+
HANDLE_VP_TO_OC(vp_urem, URem)
78+
79+
// llvm.vp.xor(x,y,mask,vlen)
80+
REGISTER_VP_INTRINSIC(vp_xor, 2, 3)
81+
HANDLE_VP_TO_OC(vp_xor, Xor)
82+
83+
#undef REGISTER_VP_INTRINSIC
84+
#undef HANDLE_VP_TO_OC

0 commit comments

Comments
 (0)