sx-aurora-dev · efocht · Oct 5, 2020 · Sep 22, 2020 · Sep 22, 2020 · Sep 22, 2020
diff --git a/.travis.yml b/.travis.yml
@@ -0,0 +1,10 @@
+language: cpp
+dist: focal
+compiler: gcc
+before_install:
+- sudo apt-get -y install ninja-build
+script:
+  - mkdir -p build
+  - cd build/
+  - cmake ../llvm -DLLVM_TARGETS_TO_BUILD=VE -DCMAKE_BUILD_TYPE=Release -G Ninja -DBUILD_SHARED_LIBS=on -DCMAKE_CXX_FLAGS_RELEASE="-O0 -DNDEBUG" -DLLVM_ENABLE_ASSERTIONS=OFF
+  - ninja lib/libLLVMVEAsmParser.so lib/libLLVMVEDesc.so lib/libLLVMVEInfo.so lib/libLLVMVECodeGen.so lib/libLLVMVEDisassembler.so
diff --git a/README.md b/README.md
@@ -1,5 +1,7 @@
 # LLVM for NEC SX-Aurora VE (llvm-ve-rv 1.8-dev)
 
+[![Build Status](https://travis-ci.com/sx-aurora-dev/llvm-project.svg?branch=hpce%2Fdevelop)](https://travis-ci.com/sx-aurora-dev/llvm-project)
+
 This is a fork of the LLVM repositoy with support for the NEC
 SX-Aurora TSUBASA Vector Engine (VE).
 

diff --git a/libunwind/src/UnwindRegistersSave.S b/libunwind/src/UnwindRegistersSave.S
@@ -1100,6 +1100,88 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
 
   li     a0, 0  // return UNW_ESUCCESS
   ret           // jump to ra
+
+#elif defined(__ve__)
+
+#
+# extern int __unw_getcontext(unw_context_t* thread_state)
+#
+# On entry:
+#  thread_state pointer is in %s0
+#
+DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
+        st  %s1, 8(,%s0)
+        sic %s1
+        st  %s1, 512(,%s0)
+        st  %s0, 0(,%s0)
+        st  %s2, 16(,%s0)
+        st  %s3, 24(,%s0)
+        st  %s4, 32(,%s0)
+        st  %s5, 40(,%s0)
+        st  %s6, 48(,%s0)
+        st  %s7, 56(,%s0)
+        st  %s8, 64(,%s0)
+        st  %s9, 72(,%s0)
+        st  %s10, 80(,%s0)
+        st  %s11, 88(,%s0)
+        st  %s12, 96(,%s0)
+        st  %s13, 104(,%s0)
+        st  %s14, 112(,%s0)
+        st  %s15, 120(,%s0)
+        st  %s15, 120(,%s0)
+        st  %s16, 128(,%s0)
+        st  %s17, 136(,%s0)
+        st  %s18, 144(,%s0)
+        st  %s19, 152(,%s0)
+        st  %s20, 160(,%s0)
+        st  %s21, 168(,%s0)
+        st  %s22, 176(,%s0)
+        st  %s23, 184(,%s0)
+        st  %s24, 192(,%s0)
+        st  %s25, 200(,%s0)
+        st  %s26, 208(,%s0)
+        st  %s27, 216(,%s0)
+        st  %s28, 224(,%s0)
+        st  %s29, 232(,%s0)
+        st  %s30, 240(,%s0)
+        st  %s31, 248(,%s0)
+        st  %s32, 256(,%s0)
+        st  %s33, 264(,%s0)
+        st  %s34, 272(,%s0)
+        st  %s35, 280(,%s0)
+        st  %s36, 288(,%s0)
+        st  %s37, 296(,%s0)
+        st  %s38, 304(,%s0)
+        st  %s39, 312(,%s0)
+        st  %s40, 320(,%s0)
+        st  %s41, 328(,%s0)
+        st  %s42, 336(,%s0)
+        st  %s43, 344(,%s0)
+        st  %s44, 352(,%s0)
+        st  %s45, 360(,%s0)
+        st  %s46, 368(,%s0)
+        st  %s47, 376(,%s0)
+        st  %s48, 384(,%s0)
+        st  %s49, 392(,%s0)
+        st  %s50, 400(,%s0)
+        st  %s51, 408(,%s0)
+        st  %s52, 416(,%s0)
+        st  %s53, 424(,%s0)
+        st  %s54, 432(,%s0)
+        st  %s55, 440(,%s0)
+        st  %s56, 448(,%s0)
+        st  %s57, 456(,%s0)
+        st  %s58, 464(,%s0)
+        st  %s59, 472(,%s0)
+        st  %s60, 480(,%s0)
+        st  %s61, 488(,%s0)
+        st  %s62, 496(,%s0)
+        st  %s63, 504(,%s0)
+        svl %s1
+        st  %s1, 520(,%s0)
+        ld  %s1, 8(%s0)
+        or  %s0, 0, 0(1)
+        b.l (,%lr)
 #endif
 
   WEAK_ALIAS(__unw_getcontext, unw_getcontext)

diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake
@@ -435,6 +435,8 @@ elseif (LLVM_NATIVE_ARCH MATCHES "riscv32")
   set(LLVM_NATIVE_ARCH RISCV)
 elseif (LLVM_NATIVE_ARCH MATCHES "riscv64")
   set(LLVM_NATIVE_ARCH RISCV)
+elseif (LLVM_NATIVE_ARCH MATCHES "ve")
+  set(LLVM_NATIVE_ARCH VE)
 else ()
   message(FATAL_ERROR "Unknown architecture ${LLVM_NATIVE_ARCH}")
 endif ()

diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -228,14 +228,8 @@ enum NodeType {
   SREM,
   UREM,
 
-  // Vector-predicated integer binary arithmetic
-  VP_ADD,
-  VP_SUB,
-  VP_MUL,
-  VP_SDIV,
-  VP_UDIV,
-  VP_SREM,
-  VP_UREM,
+#define BEGIN_REGISTER_VP_SDNODE(VPSDNAME, ...) VPSDNAME,
+#include "llvm/IR/VPIntrinsics.def"
 
   /// SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing
   /// a signed/unsigned value of type i[2*N], and return the full value as
@@ -354,9 +348,6 @@ enum NodeType {
   FDIV,
   FREM,
 
-  // Vector predicated floating point ops.
-  VP_FADD, VP_FSUB, VP_FMUL, VP_FDIV, VP_FREM,
-
   /// Constrained versions of the binary floating point operators.
   /// These will be lowered to the simple operators before final selection.
   /// They are used to limit optimizations while the DAG is being
@@ -441,7 +432,6 @@ enum NodeType {
 
   /// FMA - Perform a * b + c with no intermediate rounding step.
   FMA,
-  VP_FMA,
 
   /// FMAD - Perform a * b + c, while getting the same result as the
   /// separately rounded operations.
@@ -532,19 +522,6 @@ enum NodeType {
   /// in terms of the element size of VEC1/VEC2, not in terms of bytes.
   VECTOR_SHUFFLE,
 
-  /// VP_VSHIFT(VEC1, AMOUNT, MASK, VLEN) - Returns a vector, of the same type as
-  /// VEC1. AMOUNT is an integer value. The returned vector is equivalent
-  /// to VEC1 shifted by AMOUNT (RETURNED_VEC[idx] = VEC1[idx + AMOUNT]).
-  VP_VSHIFT,
-
-  /// VP_COMPRESS(VEC1, MASK, VLEN) - Returns a vector, of the same type as
-  /// VEC1.
-  VP_COMPRESS,
-
-  /// VP_EXPAND(VEC1, MASK, VLEN) - Returns a vector, of the same type as
-  /// VEC1.
-  VP_EXPAND,
-
   /// SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a
   /// scalar value into element 0 of the resultant vector type.  The top
   /// elements 1 to N-1 of the N-element vector are undefined.  The type
@@ -578,9 +555,6 @@ enum NodeType {
   OR,
   XOR,
 
-  // Vector-predicated bitwise operators
-  VP_AND, VP_OR, VP_XOR,
-
   /// ABS - Determine the unsigned absolute value of a signed integer value of
   /// the same bitwidth.
   /// Note: A value of INT_MIN will return INT_MIN, no saturation or overflow
@@ -609,7 +583,6 @@ enum NodeType {
   ROTR,
   FSHL,
   FSHR,
-  VP_SHL, VP_SRA, VP_SRL,
 
   /// Byte Swap and Counting operators.
   BSWAP,
@@ -634,7 +607,6 @@ enum NodeType {
   /// change the condition type in order to match the VSELECT node using a
   /// pattern. The condition follows the BooleanContent format of the target.
   VSELECT,
-  VP_SELECT,
 
   /// Select with condition operator - This selects between a true value and
   /// a false value (ops #2 and #3) based on the boolean result of comparing
@@ -649,7 +621,6 @@ enum NodeType {
   /// them with (op #2) as a CondCodeSDNode. If the operands are vector types
   /// then the result type must also be a vector type.
   SETCC,
-  VP_SETCC,
 
   /// Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but
   /// op #2 is a boolean indicating if there is an incoming carry. This
@@ -688,8 +659,6 @@ enum NodeType {
   /// depends on the first letter) to floating point.
   SINT_TO_FP,
   UINT_TO_FP,
-  VP_SINT_TO_FP,
-  VP_UINT_TO_FP,
 
   /// SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to
   /// sign extend a small value in a large integer register (e.g. sign
@@ -736,8 +705,6 @@ enum NodeType {
   /// the FP value cannot fit in the integer type, the results are undefined.
   FP_TO_SINT,
   FP_TO_UINT,
-  VP_FP_TO_SINT,
-  VP_FP_TO_UINT,
 
   /// X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type
   /// down to the precision of the destination VT.  TRUNC is a flag, which is
@@ -763,7 +730,6 @@ enum NodeType {
 
   /// X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
   FP_EXTEND,
-  VP_FP_EXTEND,
 
   /// BITCAST - This operator converts between integer, vector and FP
   /// values, as if the value was stored to memory with one type and loaded
@@ -821,12 +787,6 @@ enum NodeType {
   LRINT,
   LLRINT,
 
-  // Vector-predicated unary floating-point ops
-  VP_FNEG, VP_FABS, VP_FSQRT, VP_FCBRT, VP_FSIN, VP_FCOS, VP_FPOWI, VP_FPOW,
-  VP_FLOG, VP_FLOG2, VP_FLOG10, VP_FEXP, VP_FEXP2,
-  VP_FCEIL, VP_FTRUNC, VP_FRINT, VP_FNEARBYINT, VP_FROUND, VP_FFLOOR,
-  VP_LROUND, VP_LLROUND, VP_LRINT, VP_LLRINT,
-
   /// FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two
   /// values.
   //
@@ -836,7 +796,6 @@ enum NodeType {
   /// The return value of (FMINNUM 0.0, -0.0) could be either 0.0 or -0.0.
   FMINNUM,
   FMAXNUM,
-  VP_FMINNUM, VP_FMAXNUM,
 
   /// FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on
   /// two values, following the IEEE-754 2008 definition. This differs from
@@ -1086,7 +1045,6 @@ enum NodeType {
   // OutChain = MSTORE(Value, BasePtr, Mask)
   MLOAD,
   MSTORE,
-  VP_LOAD, VP_STORE,
 
   // Masked gather and scatter - load and store operations for a vector of
   // random addresses with additional mask operand that prevents memory
@@ -1100,17 +1058,6 @@ enum NodeType {
   MGATHER,
   MSCATTER,
 
-  // VP gather and scatter - load and store operations for a vector of
-  // random addresses with additional mask and vector length operand that
-  // prevents memory accesses to the masked-off lanes.
-  //
-  // Val, OutChain = VP_GATHER(InChain, BasePtr, Index, Scale, Mask, EVL)
-  // OutChain = VP_SCATTER(InChain, Value, BasePtr, Index, Scale, Mask, EVL)
-  //
-  // The Index operand can have more vector elements than the other operands
-  // due to type legalization. The extra elements are ignored.
-  VP_GATHER, VP_SCATTER,
-
   /// This corresponds to the llvm.lifetime.* intrinsics. The first operand
   /// is the chain and the second operand is the alloca pointer.
   LIFETIME_START,
@@ -1143,7 +1090,6 @@ enum NodeType {
   /// is the vector to reduce.
   VECREDUCE_STRICT_FADD,
   VECREDUCE_STRICT_FMUL,
-  VP_REDUCE_STRICT_FADD, VP_REDUCE_STRICT_FMUL,
 
   /// These reductions are non-strict, and have a single vector operand.
   VECREDUCE_FADD,
@@ -1164,23 +1110,6 @@ enum NodeType {
   VECREDUCE_UMAX,
   VECREDUCE_UMIN,
 
-  // Vector-predicated reduction operators
-  VP_REDUCE_FADD,
-  VP_REDUCE_FMUL,
-  VP_REDUCE_ADD,
-  VP_REDUCE_MUL,
-  VP_REDUCE_AND,
-  VP_REDUCE_OR,
-  VP_REDUCE_XOR,
-  VP_REDUCE_SMAX,
-  VP_REDUCE_SMIN,
-  VP_REDUCE_UMAX,
-  VP_REDUCE_UMIN,
-
-  /// FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
-  VP_REDUCE_FMAX,
-  VP_REDUCE_FMIN,
-
   /// BUILTIN_OP_END - This must be the last enum value in this list.
   /// The target-specific pre-isel opcode values start here.
   BUILTIN_OP_END

diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
@@ -1375,6 +1375,11 @@ let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn, Mask<3>, VectorLeng
                                 LLVMMatchType<0>,
                                 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
                                 llvm_i32_ty]>;
+// Element-wise bitops
+  def int_vp_ctpop : Intrinsic<[ llvm_anyvector_ty ],
+                               [ LLVMMatchType<0>,
+                                 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                                 llvm_i32_ty]>;
 
 // Logical operators
   def int_vp_ashr : Intrinsic<[ llvm_anyvector_ty ],

diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def
@@ -111,6 +111,13 @@
 
 ///// Integer Arithmetic /////
 
+// llvm.vp.ctpop(x,mask,vlen)
+BEGIN_REGISTER_VP_INTRINSIC(vp_ctpop, 1, 2)
+BEGIN_REGISTER_VP_SDNODE(VP_CTPOP, -1, vp_ctpop, 1, 2)
+HANDLE_VP_TO_INTRIN(ctpop)
+HANDLE_VP_IS_UNARY
+END_REGISTER_CASES(vp_ctpop, VP_CTPOP)
+
 // llvm.vp.add(x,y,mask,vlen)
 BEGIN_REGISTER_VP_INTRINSIC(vp_add, 2, 3)
 BEGIN_REGISTER_VP_SDNODE(VP_ADD, -1, vp_add, 2, 3)
@@ -490,6 +497,16 @@ HANDLE_VP_IS_MEMOP(1, 0)
 END_REGISTER_CASES(vp_store, VP_STORE)
 
 // llvm.vp.scatter(ptr,val,mask,vlen)
+// VP gather and scatter - load and store operations for a vector of
+// random addresses with additional mask and vector length operand that
+// prevents memory accesses to the masked-off lanes.
+//
+// Val, OutChain = VP_GATHER(InChain, BasePtr, Index, Scale, Mask, EVL)
+// OutChain = VP_SCATTER(InChain, Value, BasePtr, Index, Scale, Mask, EVL)
+//
+// The Index operand can have more vector elements than the other operands
+// due to type legalization. The extra elements are ignored.
+
 BEGIN_REGISTER_VP_INTRINSIC(vp_scatter, 2, 3)
 BEGIN_REGISTER_VP_SDNODE(VP_SCATTER, 1, vp_scatter, 3, 4)
 HANDLE_VP_TO_INTRIN(masked_scatter)
@@ -514,16 +531,23 @@ END_REGISTER_CASES(vp_gather, VP_GATHER)
 ///// Shuffle & Blend /////
 
 // llvm.vp.compress(x,mask,vlen)
+/// VP_COMPRESS(VEC1, MASK, VLEN) - Returns a vector, of the same type as
+/// VEC1.
 BEGIN_REGISTER_VP_INTRINSIC(vp_compress, 1, 2)
 BEGIN_REGISTER_VP_SDNODE(VP_COMPRESS, -1, vp_compress, 1, 2)
 END_REGISTER_CASES(vp_compress, VP_COMPRESS)
 
 // llvm.vp.expand(x,mask,vlen)
+/// VP_EXPAND(VEC1, MASK, VLEN) - Returns a vector, of the same type as
+/// VEC1.
 BEGIN_REGISTER_VP_INTRINSIC(vp_expand, 1, 2)
 BEGIN_REGISTER_VP_SDNODE(VP_EXPAND, -1, vp_expand, 1, 2)
 END_REGISTER_CASES(vp_expand, VP_EXPAND)
 
 // llvm.vp.vshift(x,amount,mask,vlen)
+/// VP_VSHIFT(VEC1, AMOUNT, MASK, VLEN) - Returns a vector, of the same type as
+/// VEC1. AMOUNT is an integer value. The returned vector is equivalent
+/// to VEC1 shifted by AMOUNT (RETURNED_VEC[idx] = VEC1[idx + AMOUNT]).
 BEGIN_REGISTER_VP_INTRINSIC(vp_vshift, 2, 3)
 BEGIN_REGISTER_VP_SDNODE(VP_VSHIFT, -1, vp_vshift, 2, 3)
 END_REGISTER_CASES(vp_vshift, VP_VSHIFT)

diff --git a/llvm/lib/Target/VE/VVPInstrInfo.td b/llvm/lib/Target/VE/VVPInstrInfo.td
@@ -68,6 +68,11 @@ def SDTFPUnaryOpVVP  : SDTypeProfile<1, 3, [   // fneg, fsqrt, etc
   SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisSameNumEltsAs<0, 2>, IsVLVT<3>
 ]>;
 
+// unary int
+def SDTUnaryOpVVP  : SDTypeProfile<1, 3, [   // ctpop
+  SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisSameNumEltsAs<0, 2>, IsVLVT<3>
+]>;
+
 // gather scatter
 def vvp_scatter : SDNode<"VEISD::VVP_SCATTER",  SDTScatterVVP,
                         [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
@@ -145,6 +150,9 @@ def vvp_sext       : SDNode<"VEISD::VVP_SEXT", SDTIntExtendOpVVP>;
 def vvp_zext       : SDNode<"VEISD::VVP_ZEXT", SDTIntExtendOpVVP>;
 def vvp_trunc      : SDNode<"VEISD::VVP_TRUNC", SDTIntTruncOpVVP>;
 
+// element-wise bitops
+def vvp_ctpop :  SDNode<"VEISD::VVP_CTPOP", SDTUnaryOpVVP>;
+
 // reductions
 def vvp_reduce_fadd         : SDNode<"VEISD::VVP_REDUCE_FADD", SDTReduceVVP>;
 def vvp_reduce_strict_fadd  : SDNode<"VEISD::VVP_REDUCE_STRICT_FADD", SDTReduceStartVVP>;