Skip to content

Hpce/develop #20

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Oct 5, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
language: cpp
dist: focal
compiler: gcc
before_install:
- sudo apt-get -y install ninja-build
script:
- mkdir -p build
- cd build/
- cmake ../llvm -DLLVM_TARGETS_TO_BUILD=VE -DCMAKE_BUILD_TYPE=Release -G Ninja -DBUILD_SHARED_LIBS=on -DCMAKE_CXX_FLAGS_RELEASE="-O0 -DNDEBUG" -DLLVM_ENABLE_ASSERTIONS=OFF
- ninja lib/libLLVMVEAsmParser.so lib/libLLVMVEDesc.so lib/libLLVMVEInfo.so lib/libLLVMVECodeGen.so lib/libLLVMVEDisassembler.so
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# LLVM for NEC SX-Aurora VE (llvm-ve-rv 1.8-dev)

[![Build Status](https://travis-ci.com/sx-aurora-dev/llvm-project.svg?branch=hpce%2Fdevelop)](https://travis-ci.com/sx-aurora-dev/llvm-project)

This is a fork of the LLVM repositoy with support for the NEC
SX-Aurora TSUBASA Vector Engine (VE).

Expand Down
82 changes: 82 additions & 0 deletions libunwind/src/UnwindRegistersSave.S
Original file line number Diff line number Diff line change
Expand Up @@ -1100,6 +1100,88 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)

li a0, 0 // return UNW_ESUCCESS
ret // jump to ra

#elif defined(__ve__)

#
# extern int __unw_getcontext(unw_context_t* thread_state)
#
# On entry:
# thread_state pointer is in %s0
#
DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
st %s1, 8(,%s0)
sic %s1
st %s1, 512(,%s0)
st %s0, 0(,%s0)
st %s2, 16(,%s0)
st %s3, 24(,%s0)
st %s4, 32(,%s0)
st %s5, 40(,%s0)
st %s6, 48(,%s0)
st %s7, 56(,%s0)
st %s8, 64(,%s0)
st %s9, 72(,%s0)
st %s10, 80(,%s0)
st %s11, 88(,%s0)
st %s12, 96(,%s0)
st %s13, 104(,%s0)
st %s14, 112(,%s0)
st %s15, 120(,%s0)
st %s15, 120(,%s0)
st %s16, 128(,%s0)
st %s17, 136(,%s0)
st %s18, 144(,%s0)
st %s19, 152(,%s0)
st %s20, 160(,%s0)
st %s21, 168(,%s0)
st %s22, 176(,%s0)
st %s23, 184(,%s0)
st %s24, 192(,%s0)
st %s25, 200(,%s0)
st %s26, 208(,%s0)
st %s27, 216(,%s0)
st %s28, 224(,%s0)
st %s29, 232(,%s0)
st %s30, 240(,%s0)
st %s31, 248(,%s0)
st %s32, 256(,%s0)
st %s33, 264(,%s0)
st %s34, 272(,%s0)
st %s35, 280(,%s0)
st %s36, 288(,%s0)
st %s37, 296(,%s0)
st %s38, 304(,%s0)
st %s39, 312(,%s0)
st %s40, 320(,%s0)
st %s41, 328(,%s0)
st %s42, 336(,%s0)
st %s43, 344(,%s0)
st %s44, 352(,%s0)
st %s45, 360(,%s0)
st %s46, 368(,%s0)
st %s47, 376(,%s0)
st %s48, 384(,%s0)
st %s49, 392(,%s0)
st %s50, 400(,%s0)
st %s51, 408(,%s0)
st %s52, 416(,%s0)
st %s53, 424(,%s0)
st %s54, 432(,%s0)
st %s55, 440(,%s0)
st %s56, 448(,%s0)
st %s57, 456(,%s0)
st %s58, 464(,%s0)
st %s59, 472(,%s0)
st %s60, 480(,%s0)
st %s61, 488(,%s0)
st %s62, 496(,%s0)
st %s63, 504(,%s0)
svl %s1
st %s1, 520(,%s0)
ld %s1, 8(%s0)
or %s0, 0, 0(1)
b.l (,%lr)
#endif

WEAK_ALIAS(__unw_getcontext, unw_getcontext)
Expand Down
2 changes: 2 additions & 0 deletions llvm/cmake/config-ix.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,8 @@ elseif (LLVM_NATIVE_ARCH MATCHES "riscv32")
set(LLVM_NATIVE_ARCH RISCV)
elseif (LLVM_NATIVE_ARCH MATCHES "riscv64")
set(LLVM_NATIVE_ARCH RISCV)
elseif (LLVM_NATIVE_ARCH MATCHES "ve")
set(LLVM_NATIVE_ARCH VE)
else ()
message(FATAL_ERROR "Unknown architecture ${LLVM_NATIVE_ARCH}")
endif ()
Expand Down
75 changes: 2 additions & 73 deletions llvm/include/llvm/CodeGen/ISDOpcodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -228,14 +228,8 @@ enum NodeType {
SREM,
UREM,

// Vector-predicated integer binary arithmetic
VP_ADD,
VP_SUB,
VP_MUL,
VP_SDIV,
VP_UDIV,
VP_SREM,
VP_UREM,
#define BEGIN_REGISTER_VP_SDNODE(VPSDNAME, ...) VPSDNAME,
#include "llvm/IR/VPIntrinsics.def"

/// SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing
/// a signed/unsigned value of type i[2*N], and return the full value as
Expand Down Expand Up @@ -354,9 +348,6 @@ enum NodeType {
FDIV,
FREM,

// Vector predicated floating point ops.
VP_FADD, VP_FSUB, VP_FMUL, VP_FDIV, VP_FREM,

/// Constrained versions of the binary floating point operators.
/// These will be lowered to the simple operators before final selection.
/// They are used to limit optimizations while the DAG is being
Expand Down Expand Up @@ -441,7 +432,6 @@ enum NodeType {

/// FMA - Perform a * b + c with no intermediate rounding step.
FMA,
VP_FMA,

/// FMAD - Perform a * b + c, while getting the same result as the
/// separately rounded operations.
Expand Down Expand Up @@ -532,19 +522,6 @@ enum NodeType {
/// in terms of the element size of VEC1/VEC2, not in terms of bytes.
VECTOR_SHUFFLE,

/// VP_VSHIFT(VEC1, AMOUNT, MASK, VLEN) - Returns a vector, of the same type as
/// VEC1. AMOUNT is an integer value. The returned vector is equivalent
/// to VEC1 shifted by AMOUNT (RETURNED_VEC[idx] = VEC1[idx + AMOUNT]).
VP_VSHIFT,

/// VP_COMPRESS(VEC1, MASK, VLEN) - Returns a vector, of the same type as
/// VEC1.
VP_COMPRESS,

/// VP_EXPAND(VEC1, MASK, VLEN) - Returns a vector, of the same type as
/// VEC1.
VP_EXPAND,

/// SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a
/// scalar value into element 0 of the resultant vector type. The top
/// elements 1 to N-1 of the N-element vector are undefined. The type
Expand Down Expand Up @@ -578,9 +555,6 @@ enum NodeType {
OR,
XOR,

// Vector-predicated bitwise operators
VP_AND, VP_OR, VP_XOR,

/// ABS - Determine the unsigned absolute value of a signed integer value of
/// the same bitwidth.
/// Note: A value of INT_MIN will return INT_MIN, no saturation or overflow
Expand Down Expand Up @@ -609,7 +583,6 @@ enum NodeType {
ROTR,
FSHL,
FSHR,
VP_SHL, VP_SRA, VP_SRL,

/// Byte Swap and Counting operators.
BSWAP,
Expand All @@ -634,7 +607,6 @@ enum NodeType {
/// change the condition type in order to match the VSELECT node using a
/// pattern. The condition follows the BooleanContent format of the target.
VSELECT,
VP_SELECT,

/// Select with condition operator - This selects between a true value and
/// a false value (ops #2 and #3) based on the boolean result of comparing
Expand All @@ -649,7 +621,6 @@ enum NodeType {
/// them with (op #2) as a CondCodeSDNode. If the operands are vector types
/// then the result type must also be a vector type.
SETCC,
VP_SETCC,

/// Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but
/// op #2 is a boolean indicating if there is an incoming carry. This
Expand Down Expand Up @@ -688,8 +659,6 @@ enum NodeType {
/// depends on the first letter) to floating point.
SINT_TO_FP,
UINT_TO_FP,
VP_SINT_TO_FP,
VP_UINT_TO_FP,

/// SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to
/// sign extend a small value in a large integer register (e.g. sign
Expand Down Expand Up @@ -736,8 +705,6 @@ enum NodeType {
/// the FP value cannot fit in the integer type, the results are undefined.
FP_TO_SINT,
FP_TO_UINT,
VP_FP_TO_SINT,
VP_FP_TO_UINT,

/// X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type
/// down to the precision of the destination VT. TRUNC is a flag, which is
Expand All @@ -763,7 +730,6 @@ enum NodeType {

/// X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
FP_EXTEND,
VP_FP_EXTEND,

/// BITCAST - This operator converts between integer, vector and FP
/// values, as if the value was stored to memory with one type and loaded
Expand Down Expand Up @@ -821,12 +787,6 @@ enum NodeType {
LRINT,
LLRINT,

// Vector-predicated unary floating-point ops
VP_FNEG, VP_FABS, VP_FSQRT, VP_FCBRT, VP_FSIN, VP_FCOS, VP_FPOWI, VP_FPOW,
VP_FLOG, VP_FLOG2, VP_FLOG10, VP_FEXP, VP_FEXP2,
VP_FCEIL, VP_FTRUNC, VP_FRINT, VP_FNEARBYINT, VP_FROUND, VP_FFLOOR,
VP_LROUND, VP_LLROUND, VP_LRINT, VP_LLRINT,

/// FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two
/// values.
//
Expand All @@ -836,7 +796,6 @@ enum NodeType {
/// The return value of (FMINNUM 0.0, -0.0) could be either 0.0 or -0.0.
FMINNUM,
FMAXNUM,
VP_FMINNUM, VP_FMAXNUM,

/// FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on
/// two values, following the IEEE-754 2008 definition. This differs from
Expand Down Expand Up @@ -1086,7 +1045,6 @@ enum NodeType {
// OutChain = MSTORE(Value, BasePtr, Mask)
MLOAD,
MSTORE,
VP_LOAD, VP_STORE,

// Masked gather and scatter - load and store operations for a vector of
// random addresses with additional mask operand that prevents memory
Expand All @@ -1100,17 +1058,6 @@ enum NodeType {
MGATHER,
MSCATTER,

// VP gather and scatter - load and store operations for a vector of
// random addresses with additional mask and vector length operand that
// prevents memory accesses to the masked-off lanes.
//
// Val, OutChain = VP_GATHER(InChain, BasePtr, Index, Scale, Mask, EVL)
// OutChain = VP_SCATTER(InChain, Value, BasePtr, Index, Scale, Mask, EVL)
//
// The Index operand can have more vector elements than the other operands
// due to type legalization. The extra elements are ignored.
VP_GATHER, VP_SCATTER,

/// This corresponds to the llvm.lifetime.* intrinsics. The first operand
/// is the chain and the second operand is the alloca pointer.
LIFETIME_START,
Expand Down Expand Up @@ -1143,7 +1090,6 @@ enum NodeType {
/// is the vector to reduce.
VECREDUCE_STRICT_FADD,
VECREDUCE_STRICT_FMUL,
VP_REDUCE_STRICT_FADD, VP_REDUCE_STRICT_FMUL,

/// These reductions are non-strict, and have a single vector operand.
VECREDUCE_FADD,
Expand All @@ -1164,23 +1110,6 @@ enum NodeType {
VECREDUCE_UMAX,
VECREDUCE_UMIN,

// Vector-predicated reduction operators
VP_REDUCE_FADD,
VP_REDUCE_FMUL,
VP_REDUCE_ADD,
VP_REDUCE_MUL,
VP_REDUCE_AND,
VP_REDUCE_OR,
VP_REDUCE_XOR,
VP_REDUCE_SMAX,
VP_REDUCE_SMIN,
VP_REDUCE_UMAX,
VP_REDUCE_UMIN,

/// FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
VP_REDUCE_FMAX,
VP_REDUCE_FMIN,

/// BUILTIN_OP_END - This must be the last enum value in this list.
/// The target-specific pre-isel opcode values start here.
BUILTIN_OP_END
Expand Down
5 changes: 5 additions & 0 deletions llvm/include/llvm/IR/Intrinsics.td
Original file line number Diff line number Diff line change
Expand Up @@ -1375,6 +1375,11 @@ let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn, Mask<3>, VectorLeng
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
// Element-wise bitops
def int_vp_ctpop : Intrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;

// Logical operators
def int_vp_ashr : Intrinsic<[ llvm_anyvector_ty ],
Expand Down
24 changes: 24 additions & 0 deletions llvm/include/llvm/IR/VPIntrinsics.def
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,13 @@

///// Integer Arithmetic /////

// llvm.vp.ctpop(x,mask,vlen)
BEGIN_REGISTER_VP_INTRINSIC(vp_ctpop, 1, 2)
BEGIN_REGISTER_VP_SDNODE(VP_CTPOP, -1, vp_ctpop, 1, 2)
HANDLE_VP_TO_INTRIN(ctpop)
HANDLE_VP_IS_UNARY
END_REGISTER_CASES(vp_ctpop, VP_CTPOP)

// llvm.vp.add(x,y,mask,vlen)
BEGIN_REGISTER_VP_INTRINSIC(vp_add, 2, 3)
BEGIN_REGISTER_VP_SDNODE(VP_ADD, -1, vp_add, 2, 3)
Expand Down Expand Up @@ -490,6 +497,16 @@ HANDLE_VP_IS_MEMOP(1, 0)
END_REGISTER_CASES(vp_store, VP_STORE)

// llvm.vp.scatter(ptr,val,mask,vlen)
// VP gather and scatter - load and store operations for a vector of
// random addresses with additional mask and vector length operand that
// prevents memory accesses to the masked-off lanes.
//
// Val, OutChain = VP_GATHER(InChain, BasePtr, Index, Scale, Mask, EVL)
// OutChain = VP_SCATTER(InChain, Value, BasePtr, Index, Scale, Mask, EVL)
//
// The Index operand can have more vector elements than the other operands
// due to type legalization. The extra elements are ignored.

BEGIN_REGISTER_VP_INTRINSIC(vp_scatter, 2, 3)
BEGIN_REGISTER_VP_SDNODE(VP_SCATTER, 1, vp_scatter, 3, 4)
HANDLE_VP_TO_INTRIN(masked_scatter)
Expand All @@ -514,16 +531,23 @@ END_REGISTER_CASES(vp_gather, VP_GATHER)
///// Shuffle & Blend /////

// llvm.vp.compress(x,mask,vlen)
/// VP_COMPRESS(VEC1, MASK, VLEN) - Returns a vector, of the same type as
/// VEC1.
BEGIN_REGISTER_VP_INTRINSIC(vp_compress, 1, 2)
BEGIN_REGISTER_VP_SDNODE(VP_COMPRESS, -1, vp_compress, 1, 2)
END_REGISTER_CASES(vp_compress, VP_COMPRESS)

// llvm.vp.expand(x,mask,vlen)
/// VP_EXPAND(VEC1, MASK, VLEN) - Returns a vector, of the same type as
/// VEC1.
BEGIN_REGISTER_VP_INTRINSIC(vp_expand, 1, 2)
BEGIN_REGISTER_VP_SDNODE(VP_EXPAND, -1, vp_expand, 1, 2)
END_REGISTER_CASES(vp_expand, VP_EXPAND)

// llvm.vp.vshift(x,amount,mask,vlen)
/// VP_VSHIFT(VEC1, AMOUNT, MASK, VLEN) - Returns a vector, of the same type as
/// VEC1. AMOUNT is an integer value. The returned vector is equivalent
/// to VEC1 shifted by AMOUNT (RETURNED_VEC[idx] = VEC1[idx + AMOUNT]).
BEGIN_REGISTER_VP_INTRINSIC(vp_vshift, 2, 3)
BEGIN_REGISTER_VP_SDNODE(VP_VSHIFT, -1, vp_vshift, 2, 3)
END_REGISTER_CASES(vp_vshift, VP_VSHIFT)
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/VE/VVPInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,11 @@ def SDTFPUnaryOpVVP : SDTypeProfile<1, 3, [ // fneg, fsqrt, etc
SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisSameNumEltsAs<0, 2>, IsVLVT<3>
]>;

// unary int
def SDTUnaryOpVVP : SDTypeProfile<1, 3, [ // ctpop
SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisSameNumEltsAs<0, 2>, IsVLVT<3>
]>;

// gather scatter
def vvp_scatter : SDNode<"VEISD::VVP_SCATTER", SDTScatterVVP,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
Expand Down Expand Up @@ -145,6 +150,9 @@ def vvp_sext : SDNode<"VEISD::VVP_SEXT", SDTIntExtendOpVVP>;
def vvp_zext : SDNode<"VEISD::VVP_ZEXT", SDTIntExtendOpVVP>;
def vvp_trunc : SDNode<"VEISD::VVP_TRUNC", SDTIntTruncOpVVP>;

// element-wise bitops
def vvp_ctpop : SDNode<"VEISD::VVP_CTPOP", SDTUnaryOpVVP>;

// reductions
def vvp_reduce_fadd : SDNode<"VEISD::VVP_REDUCE_FADD", SDTReduceVVP>;
def vvp_reduce_strict_fadd : SDNode<"VEISD::VVP_REDUCE_STRICT_FADD", SDTReduceStartVVP>;
Expand Down
Loading