Skip to content

Commit 7bc70ef

Browse files
authored
Merge pull request #20 from sx-aurora-dev/hpce/develop
Hpce/develop
2 parents 6a61f79 + a39618c commit 7bc70ef

16 files changed

+138
-580
lines changed

.travis.yml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
language: cpp
2+
dist: focal
3+
compiler: gcc
4+
before_install:
5+
- sudo apt-get -y install ninja-build
6+
script:
7+
- mkdir -p build
8+
- cd build/
9+
- cmake ../llvm -DLLVM_TARGETS_TO_BUILD=VE -DCMAKE_BUILD_TYPE=Release -G Ninja -DBUILD_SHARED_LIBS=on -DCMAKE_CXX_FLAGS_RELEASE="-O0 -DNDEBUG" -DLLVM_ENABLE_ASSERTIONS=OFF
10+
- ninja lib/libLLVMVEAsmParser.so lib/libLLVMVEDesc.so lib/libLLVMVEInfo.so lib/libLLVMVECodeGen.so lib/libLLVMVEDisassembler.so

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# LLVM for NEC SX-Aurora VE (llvm-ve-rv 1.8-dev)
22

3+
[![Build Status](https://travis-ci.com/sx-aurora-dev/llvm-project.svg?branch=hpce%2Fdevelop)](https://travis-ci.com/sx-aurora-dev/llvm-project)
4+
35
This is a fork of the LLVM repositoy with support for the NEC
46
SX-Aurora TSUBASA Vector Engine (VE).
57

llvm/include/llvm/CodeGen/ISDOpcodes.h

Lines changed: 2 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -228,14 +228,8 @@ enum NodeType {
228228
SREM,
229229
UREM,
230230

231-
// Vector-predicated integer binary arithmetic
232-
VP_ADD,
233-
VP_SUB,
234-
VP_MUL,
235-
VP_SDIV,
236-
VP_UDIV,
237-
VP_SREM,
238-
VP_UREM,
231+
#define BEGIN_REGISTER_VP_SDNODE(VPSDNAME, ...) VPSDNAME,
232+
#include "llvm/IR/VPIntrinsics.def"
239233

240234
/// SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing
241235
/// a signed/unsigned value of type i[2*N], and return the full value as
@@ -354,9 +348,6 @@ enum NodeType {
354348
FDIV,
355349
FREM,
356350

357-
// Vector predicated floating point ops.
358-
VP_FADD, VP_FSUB, VP_FMUL, VP_FDIV, VP_FREM,
359-
360351
/// Constrained versions of the binary floating point operators.
361352
/// These will be lowered to the simple operators before final selection.
362353
/// They are used to limit optimizations while the DAG is being
@@ -441,7 +432,6 @@ enum NodeType {
441432

442433
/// FMA - Perform a * b + c with no intermediate rounding step.
443434
FMA,
444-
VP_FMA,
445435

446436
/// FMAD - Perform a * b + c, while getting the same result as the
447437
/// separately rounded operations.
@@ -532,19 +522,6 @@ enum NodeType {
532522
/// in terms of the element size of VEC1/VEC2, not in terms of bytes.
533523
VECTOR_SHUFFLE,
534524

535-
/// VP_VSHIFT(VEC1, AMOUNT, MASK, VLEN) - Returns a vector, of the same type as
536-
/// VEC1. AMOUNT is an integer value. The returned vector is equivalent
537-
/// to VEC1 shifted by AMOUNT (RETURNED_VEC[idx] = VEC1[idx + AMOUNT]).
538-
VP_VSHIFT,
539-
540-
/// VP_COMPRESS(VEC1, MASK, VLEN) - Returns a vector, of the same type as
541-
/// VEC1.
542-
VP_COMPRESS,
543-
544-
/// VP_EXPAND(VEC1, MASK, VLEN) - Returns a vector, of the same type as
545-
/// VEC1.
546-
VP_EXPAND,
547-
548525
/// SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a
549526
/// scalar value into element 0 of the resultant vector type. The top
550527
/// elements 1 to N-1 of the N-element vector are undefined. The type
@@ -578,9 +555,6 @@ enum NodeType {
578555
OR,
579556
XOR,
580557

581-
// Vector-predicated bitwise operators
582-
VP_AND, VP_OR, VP_XOR,
583-
584558
/// ABS - Determine the unsigned absolute value of a signed integer value of
585559
/// the same bitwidth.
586560
/// Note: A value of INT_MIN will return INT_MIN, no saturation or overflow
@@ -609,7 +583,6 @@ enum NodeType {
609583
ROTR,
610584
FSHL,
611585
FSHR,
612-
VP_SHL, VP_SRA, VP_SRL,
613586

614587
/// Byte Swap and Counting operators.
615588
BSWAP,
@@ -634,7 +607,6 @@ enum NodeType {
634607
/// change the condition type in order to match the VSELECT node using a
635608
/// pattern. The condition follows the BooleanContent format of the target.
636609
VSELECT,
637-
VP_SELECT,
638610

639611
/// Select with condition operator - This selects between a true value and
640612
/// a false value (ops #2 and #3) based on the boolean result of comparing
@@ -649,7 +621,6 @@ enum NodeType {
649621
/// them with (op #2) as a CondCodeSDNode. If the operands are vector types
650622
/// then the result type must also be a vector type.
651623
SETCC,
652-
VP_SETCC,
653624

654625
/// Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but
655626
/// op #2 is a boolean indicating if there is an incoming carry. This
@@ -688,8 +659,6 @@ enum NodeType {
688659
/// depends on the first letter) to floating point.
689660
SINT_TO_FP,
690661
UINT_TO_FP,
691-
VP_SINT_TO_FP,
692-
VP_UINT_TO_FP,
693662

694663
/// SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to
695664
/// sign extend a small value in a large integer register (e.g. sign
@@ -736,8 +705,6 @@ enum NodeType {
736705
/// the FP value cannot fit in the integer type, the results are undefined.
737706
FP_TO_SINT,
738707
FP_TO_UINT,
739-
VP_FP_TO_SINT,
740-
VP_FP_TO_UINT,
741708

742709
/// X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type
743710
/// down to the precision of the destination VT. TRUNC is a flag, which is
@@ -763,7 +730,6 @@ enum NodeType {
763730

764731
/// X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
765732
FP_EXTEND,
766-
VP_FP_EXTEND,
767733

768734
/// BITCAST - This operator converts between integer, vector and FP
769735
/// values, as if the value was stored to memory with one type and loaded
@@ -821,12 +787,6 @@ enum NodeType {
821787
LRINT,
822788
LLRINT,
823789

824-
// Vector-predicated unary floating-point ops
825-
VP_FNEG, VP_FABS, VP_FSQRT, VP_FCBRT, VP_FSIN, VP_FCOS, VP_FPOWI, VP_FPOW,
826-
VP_FLOG, VP_FLOG2, VP_FLOG10, VP_FEXP, VP_FEXP2,
827-
VP_FCEIL, VP_FTRUNC, VP_FRINT, VP_FNEARBYINT, VP_FROUND, VP_FFLOOR,
828-
VP_LROUND, VP_LLROUND, VP_LRINT, VP_LLRINT,
829-
830790
/// FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two
831791
/// values.
832792
//
@@ -836,7 +796,6 @@ enum NodeType {
836796
/// The return value of (FMINNUM 0.0, -0.0) could be either 0.0 or -0.0.
837797
FMINNUM,
838798
FMAXNUM,
839-
VP_FMINNUM, VP_FMAXNUM,
840799

841800
/// FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on
842801
/// two values, following the IEEE-754 2008 definition. This differs from
@@ -1086,7 +1045,6 @@ enum NodeType {
10861045
// OutChain = MSTORE(Value, BasePtr, Mask)
10871046
MLOAD,
10881047
MSTORE,
1089-
VP_LOAD, VP_STORE,
10901048

10911049
// Masked gather and scatter - load and store operations for a vector of
10921050
// random addresses with additional mask operand that prevents memory
@@ -1100,17 +1058,6 @@ enum NodeType {
11001058
MGATHER,
11011059
MSCATTER,
11021060

1103-
// VP gather and scatter - load and store operations for a vector of
1104-
// random addresses with additional mask and vector length operand that
1105-
// prevents memory accesses to the masked-off lanes.
1106-
//
1107-
// Val, OutChain = VP_GATHER(InChain, BasePtr, Index, Scale, Mask, EVL)
1108-
// OutChain = VP_SCATTER(InChain, Value, BasePtr, Index, Scale, Mask, EVL)
1109-
//
1110-
// The Index operand can have more vector elements than the other operands
1111-
// due to type legalization. The extra elements are ignored.
1112-
VP_GATHER, VP_SCATTER,
1113-
11141061
/// This corresponds to the llvm.lifetime.* intrinsics. The first operand
11151062
/// is the chain and the second operand is the alloca pointer.
11161063
LIFETIME_START,
@@ -1143,7 +1090,6 @@ enum NodeType {
11431090
/// is the vector to reduce.
11441091
VECREDUCE_STRICT_FADD,
11451092
VECREDUCE_STRICT_FMUL,
1146-
VP_REDUCE_STRICT_FADD, VP_REDUCE_STRICT_FMUL,
11471093

11481094
/// These reductions are non-strict, and have a single vector operand.
11491095
VECREDUCE_FADD,
@@ -1164,23 +1110,6 @@ enum NodeType {
11641110
VECREDUCE_UMAX,
11651111
VECREDUCE_UMIN,
11661112

1167-
// Vector-predicated reduction operators
1168-
VP_REDUCE_FADD,
1169-
VP_REDUCE_FMUL,
1170-
VP_REDUCE_ADD,
1171-
VP_REDUCE_MUL,
1172-
VP_REDUCE_AND,
1173-
VP_REDUCE_OR,
1174-
VP_REDUCE_XOR,
1175-
VP_REDUCE_SMAX,
1176-
VP_REDUCE_SMIN,
1177-
VP_REDUCE_UMAX,
1178-
VP_REDUCE_UMIN,
1179-
1180-
/// FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
1181-
VP_REDUCE_FMAX,
1182-
VP_REDUCE_FMIN,
1183-
11841113
/// BUILTIN_OP_END - This must be the last enum value in this list.
11851114
/// The target-specific pre-isel opcode values start here.
11861115
BUILTIN_OP_END

llvm/include/llvm/IR/Intrinsics.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1375,6 +1375,11 @@ let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn, Mask<3>, VectorLeng
13751375
LLVMMatchType<0>,
13761376
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
13771377
llvm_i32_ty]>;
1378+
// Element-wise bitops
1379+
def int_vp_ctpop : Intrinsic<[ llvm_anyvector_ty ],
1380+
[ LLVMMatchType<0>,
1381+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1382+
llvm_i32_ty]>;
13781383

13791384
// Logical operators
13801385
def int_vp_ashr : Intrinsic<[ llvm_anyvector_ty ],

llvm/include/llvm/IR/VPIntrinsics.def

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,13 @@
111111

112112
///// Integer Arithmetic /////
113113

114+
// llvm.vp.ctpop(x,mask,vlen)
115+
BEGIN_REGISTER_VP_INTRINSIC(vp_ctpop, 1, 2)
116+
BEGIN_REGISTER_VP_SDNODE(VP_CTPOP, -1, vp_ctpop, 1, 2)
117+
HANDLE_VP_TO_INTRIN(ctpop)
118+
HANDLE_VP_IS_UNARY
119+
END_REGISTER_CASES(vp_ctpop, VP_CTPOP)
120+
114121
// llvm.vp.add(x,y,mask,vlen)
115122
BEGIN_REGISTER_VP_INTRINSIC(vp_add, 2, 3)
116123
BEGIN_REGISTER_VP_SDNODE(VP_ADD, -1, vp_add, 2, 3)
@@ -490,6 +497,16 @@ HANDLE_VP_IS_MEMOP(1, 0)
490497
END_REGISTER_CASES(vp_store, VP_STORE)
491498

492499
// llvm.vp.scatter(ptr,val,mask,vlen)
500+
// VP gather and scatter - load and store operations for a vector of
501+
// random addresses with additional mask and vector length operand that
502+
// prevents memory accesses to the masked-off lanes.
503+
//
504+
// Val, OutChain = VP_GATHER(InChain, BasePtr, Index, Scale, Mask, EVL)
505+
// OutChain = VP_SCATTER(InChain, Value, BasePtr, Index, Scale, Mask, EVL)
506+
//
507+
// The Index operand can have more vector elements than the other operands
508+
// due to type legalization. The extra elements are ignored.
509+
493510
BEGIN_REGISTER_VP_INTRINSIC(vp_scatter, 2, 3)
494511
BEGIN_REGISTER_VP_SDNODE(VP_SCATTER, 1, vp_scatter, 3, 4)
495512
HANDLE_VP_TO_INTRIN(masked_scatter)
@@ -514,16 +531,23 @@ END_REGISTER_CASES(vp_gather, VP_GATHER)
514531
///// Shuffle & Blend /////
515532

516533
// llvm.vp.compress(x,mask,vlen)
534+
/// VP_COMPRESS(VEC1, MASK, VLEN) - Returns a vector, of the same type as
535+
/// VEC1.
517536
BEGIN_REGISTER_VP_INTRINSIC(vp_compress, 1, 2)
518537
BEGIN_REGISTER_VP_SDNODE(VP_COMPRESS, -1, vp_compress, 1, 2)
519538
END_REGISTER_CASES(vp_compress, VP_COMPRESS)
520539

521540
// llvm.vp.expand(x,mask,vlen)
541+
/// VP_EXPAND(VEC1, MASK, VLEN) - Returns a vector, of the same type as
542+
/// VEC1.
522543
BEGIN_REGISTER_VP_INTRINSIC(vp_expand, 1, 2)
523544
BEGIN_REGISTER_VP_SDNODE(VP_EXPAND, -1, vp_expand, 1, 2)
524545
END_REGISTER_CASES(vp_expand, VP_EXPAND)
525546

526547
// llvm.vp.vshift(x,amount,mask,vlen)
548+
/// VP_VSHIFT(VEC1, AMOUNT, MASK, VLEN) - Returns a vector, of the same type as
549+
/// VEC1. AMOUNT is an integer value. The returned vector is equivalent
550+
/// to VEC1 shifted by AMOUNT (RETURNED_VEC[idx] = VEC1[idx + AMOUNT]).
527551
BEGIN_REGISTER_VP_INTRINSIC(vp_vshift, 2, 3)
528552
BEGIN_REGISTER_VP_SDNODE(VP_VSHIFT, -1, vp_vshift, 2, 3)
529553
END_REGISTER_CASES(vp_vshift, VP_VSHIFT)

llvm/lib/Target/VE/VVPInstrInfo.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,11 @@ def SDTFPUnaryOpVVP : SDTypeProfile<1, 3, [ // fneg, fsqrt, etc
6868
SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisSameNumEltsAs<0, 2>, IsVLVT<3>
6969
]>;
7070

71+
// unary int
72+
def SDTUnaryOpVVP : SDTypeProfile<1, 3, [ // ctpop
73+
SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisSameNumEltsAs<0, 2>, IsVLVT<3>
74+
]>;
75+
7176
// gather scatter
7277
def vvp_scatter : SDNode<"VEISD::VVP_SCATTER", SDTScatterVVP,
7378
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
@@ -145,6 +150,9 @@ def vvp_sext : SDNode<"VEISD::VVP_SEXT", SDTIntExtendOpVVP>;
145150
def vvp_zext : SDNode<"VEISD::VVP_ZEXT", SDTIntExtendOpVVP>;
146151
def vvp_trunc : SDNode<"VEISD::VVP_TRUNC", SDTIntTruncOpVVP>;
147152

153+
// element-wise bitops
154+
def vvp_ctpop : SDNode<"VEISD::VVP_CTPOP", SDTUnaryOpVVP>;
155+
148156
// reductions
149157
def vvp_reduce_fadd : SDNode<"VEISD::VVP_REDUCE_FADD", SDTReduceVVP>;
150158
def vvp_reduce_strict_fadd : SDNode<"VEISD::VVP_REDUCE_STRICT_FADD", SDTReduceStartVVP>;

llvm/lib/Target/VE/VVPInstrPatterns.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,8 @@ multiclass VectorTernaryArith_ShortLong<SDPatternOperator OpNode, ValueType Long
241241

242242

243243
// Integer arithmetic (256 elements)
244+
defm : VectorUnaryArith_ShortLong<vvp_ctpop, i64, v256i64, "VPCNT", i32, v256i32, "PVPCNTLO">;
245+
244246
defm : VectorBinaryArith_ShortLong<c_vv_add, i64, v256i64, "VADDSL", i32, v256i32, "VADDSWSX">;
245247

246248
defm : VectorBinaryArith_ShortLong<vvp_sub, i64, v256i64, "VSUBSL", i32, v256i32, "VSUBSWSX">;

llvm/lib/Target/VE/VVPNodes.inc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,10 @@ ADD_VVP_OP(VVP_ZEXT) REGISTER_ICONV_VVP_OP(VVP_ZEXT,ZERO_EXTEND) // HAND
9595
ADD_VVP_OP(VVP_FPEXT) REGISTER_FPCONV_VVP_OP(VVP_FPEXT,FP_EXTEND) HANDLE_VP_TO_VVP(VP_FP_EXTEND,VVP_FPEXT)
9696
ADD_VVP_OP(VVP_FPROUND) REGISTER_FPCONV_VVP_OP(VVP_FPROUND,FP_ROUND) HANDLE_VP_TO_VVP(VP_FROUND,VVP_FPROUND)
9797

98+
// element-wise bitops
99+
ADD_VVP_OP(VVP_CTPOP) REGISTER_UNARY_VVP_OP(VVP_CTPOP,CTPOP) // HANDLE_VP_TO_VVP(VP_FROUND,VVP_FPROUND) // TODO as VP -opt
100+
101+
98102
#if 0
99103
// Disabled, this gets expanded instead
100104
ADD_VVP_OP(VVP_FFLOOR) REGISTER_UNARY_VVP_OP(VVP_FFLOOR, FFLOOR) HANDLE_VP_TO_VVP(VP_FFLOOR, VVP_FFLOOR)

llvm/test/CodeGen/VE/Vector/extract_insert_vector_elt.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -101,14 +101,14 @@ define x86_regcallcc <512 x i32> @__regcall3__insert_v512i32r(<512 x i32>, i32)
101101
; CHECK-NEXT: lea %s2, 1024(, %s1)
102102
; CHECK-NEXT: lea %s3, 256
103103
; CHECK-NEXT: lvl %s3
104-
; CHECK-NEXT: vstl %v1,4,%s2
105-
; CHECK-NEXT: vstl %v0,4,%s1
104+
; CHECK-NEXT: vstl %v1, 4, %s2
105+
; CHECK-NEXT: vstl %v0, 4, %s1
106106
; CHECK-NEXT: and %s0, %s0, (55)0
107107
; CHECK-NEXT: sll %s0, %s0, 2
108108
; CHECK-NEXT: or %s4, 2, (0)1
109109
; CHECK-NEXT: stl %s4, 176(%s0, %s11)
110-
; CHECK-NEXT: vldl.zx %v0,4,%s1
111-
; CHECK-NEXT: vldl.zx %v1,4,%s2
110+
; CHECK-NEXT: vldl.zx %v0, 4, %s1
111+
; CHECK-NEXT: vldl.zx %v1, 4, %s2
112112
; CHECK-NEXT: or %s11, 0, %s9
113113
%3 = insertelement <512 x i32> %0, i32 2, i32 %1
114114
ret <512 x i32> %3
@@ -122,8 +122,8 @@ define x86_regcallcc i32 @__regcall3__extract_v512i32r(<512 x i32>, i32) {
122122
; CHECK-NEXT: lea %s2, 1024(, %s1)
123123
; CHECK-NEXT: lea %s3, 256
124124
; CHECK-NEXT: lvl %s3
125-
; CHECK-NEXT: vstl %v1,4,%s2
126-
; CHECK-NEXT: vstl %v0,4,%s1
125+
; CHECK-NEXT: vstl %v1, 4, %s2
126+
; CHECK-NEXT: vstl %v0, 4, %s1
127127
; CHECK-NEXT: and %s0, %s0, (55)0
128128
; CHECK-NEXT: sll %s0, %s0, 2
129129
; CHECK-NEXT: ldl.sx %s0, 176(%s0, %s11)
@@ -140,14 +140,14 @@ define x86_regcallcc <512 x float> @__regcall3__insert_v512f32r(<512 x float>, i
140140
; CHECK-NEXT: lea %s2, 1024(, %s1)
141141
; CHECK-NEXT: lea %s3, 256
142142
; CHECK-NEXT: lvl %s3
143-
; CHECK-NEXT: vstu %v1,4,%s2
144-
; CHECK-NEXT: vstu %v0,4,%s1
143+
; CHECK-NEXT: vstu %v1, 4, %s2
144+
; CHECK-NEXT: vstu %v0, 4, %s1
145145
; CHECK-NEXT: and %s0, %s0, (55)0
146146
; CHECK-NEXT: sll %s0, %s0, 2
147147
; CHECK-NEXT: lea %s4, 1065353216
148148
; CHECK-NEXT: stl %s4, 176(%s0, %s11)
149-
; CHECK-NEXT: vldu %v0,4,%s1
150-
; CHECK-NEXT: vldu %v1,4,%s2
149+
; CHECK-NEXT: vldu %v0, 4, %s1
150+
; CHECK-NEXT: vldu %v1, 4, %s2
151151
; CHECK-NEXT: or %s11, 0, %s9
152152
%3 = insertelement <512 x float> %0, float 1.0, i32 %1
153153
ret <512 x float> %3
@@ -161,8 +161,8 @@ define x86_regcallcc float @__regcall3__extract_v512f32r(<512 x float>, i32) {
161161
; CHECK-NEXT: lea %s2, 1024(, %s1)
162162
; CHECK-NEXT: lea %s3, 256
163163
; CHECK-NEXT: lvl %s3
164-
; CHECK-NEXT: vstu %v1,4,%s2
165-
; CHECK-NEXT: vstu %v0,4,%s1
164+
; CHECK-NEXT: vstu %v1, 4, %s2
165+
; CHECK-NEXT: vstu %v0, 4, %s1
166166
; CHECK-NEXT: and %s0, %s0, (55)0
167167
; CHECK-NEXT: sll %s0, %s0, 2
168168
; CHECK-NEXT: ldu %s0, 176(%s0, %s11)

0 commit comments

Comments
 (0)