diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 9fb2c048a5c86..6a6d22eaac2d1 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -15234,6 +15234,59 @@ The behavior of '``llvm.memset.inline.*``' is equivalent to the behavior of '``llvm.memset.*``', but the generated code is guaranteed not to call any external functions. +.. _int_memset_pattern_inline: + +'``llvm.memset_pattern.inline``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.memset_pattern.inline`` on +any integer bit width and for different address spaces. Not all targets +support all bit widths however. + +:: + + declare void @llvm.memset_pattern.inline.p0.i64.i128(ptr , i128 , + i64 , i1 ) + +Overview: +""""""""" + +The '``llvm.memset_pattern.inline.*``' intrinsics fill a block of memory with +a particular value and guarantees that no external functions are called. + +Arguments: +"""""""""" + +The first argument is a pointer to the destination to fill, the second +is the value with which to fill it, the third argument is a constant integer +argument specifying the number of bytes to fill, and the fourth is a boolean +indicating a volatile access. + +The :ref:`align ` parameter attribute can be provided +for the first argument. + +If the ``isvolatile`` parameter is ``true``, the +``llvm.memset_pattern.inline`` call is a :ref:`volatile operation `. +The detailed access behavior is not very cleanly specified and it is unwise to +depend on it. + +Semantics: +"""""""""" + +The '``llvm.memset_pattern.inline.*``' intrinsics fill "len" bytes of memory +starting at the destination location. If the argument is known to be aligned +to some boundary, this can be specified as an attribute on the argument. + +``len`` must be a constant expression. +If ```` is 0, it is no-op modulo the behavior of attributes attached to +the arguments. +If ```` is not a well-defined value, the behavior is undefined. +If ```` is not zero, ```` should be well-defined, otherwise the +behavior is undefined. + .. _int_sqrt: '``llvm.sqrt.*``' Intrinsic diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 7b0e5e7d9504b..f5d5169500432 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1205,6 +1205,12 @@ class SelectionDAG { MachinePointerInfo DstPtrInfo, const AAMDNodes &AAInfo = AAMDNodes()); + SDValue getMemsetPatternInline(SDValue Chain, const SDLoc &dl, SDValue Dst, + SDValue Src, SDValue Size, Align Alignment, + bool isVol, bool isTailCall, + MachinePointerInfo DstPtrInfo, + const AAMDNodes &AAInfo = AAMDNodes()); + SDValue getAtomicMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Type *SizeTy, unsigned ElemSz, bool isTailCall, diff --git a/llvm/include/llvm/IR/InstVisitor.h b/llvm/include/llvm/IR/InstVisitor.h index 311e0ac47ddfa..30a9df8a32184 100644 --- a/llvm/include/llvm/IR/InstVisitor.h +++ b/llvm/include/llvm/IR/InstVisitor.h @@ -208,6 +208,7 @@ class InstVisitor { RetTy visitDbgInfoIntrinsic(DbgInfoIntrinsic &I){ DELEGATE(IntrinsicInst); } RetTy visitMemSetInst(MemSetInst &I) { DELEGATE(MemIntrinsic); } RetTy visitMemSetInlineInst(MemSetInlineInst &I){ DELEGATE(MemSetInst); } + RetTy visitMemSetPatternInlineInst(MemSetPatternInlineInst &I){ DELEGATE(MemSetInst); } RetTy visitMemCpyInst(MemCpyInst &I) { DELEGATE(MemTransferInst); } RetTy visitMemCpyInlineInst(MemCpyInlineInst &I){ DELEGATE(MemCpyInst); } RetTy visitMemMoveInst(MemMoveInst &I) { DELEGATE(MemTransferInst); } @@ -295,6 +296,8 @@ class InstVisitor { case Intrinsic::memset: DELEGATE(MemSetInst); case Intrinsic::memset_inline: DELEGATE(MemSetInlineInst); + case Intrinsic::memset_pattern_inline: + DELEGATE(MemSetPatternInlineInst); case Intrinsic::vastart: DELEGATE(VAStartInst); case Intrinsic::vaend: DELEGATE(VAEndInst); case Intrinsic::vacopy: DELEGATE(VACopyInst); diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h index 9010e1a1c896b..184cc47e48c1e 100644 --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -1170,6 +1170,7 @@ class MemIntrinsic : public MemIntrinsicBase { case Intrinsic::memmove: case Intrinsic::memset: case Intrinsic::memset_inline: + case Intrinsic::memset_pattern_inline: case Intrinsic::memcpy_inline: return true; default: @@ -1181,7 +1182,8 @@ class MemIntrinsic : public MemIntrinsicBase { } }; -/// This class wraps the llvm.memset and llvm.memset.inline intrinsics. +/// This class wraps the llvm.memset, llvm.memset.inline, and +/// llvm.memset_pattern.inline intrinsics. class MemSetInst : public MemSetBase { public: // Methods for support type inquiry through isa, cast, and dyn_cast: @@ -1189,6 +1191,7 @@ class MemSetInst : public MemSetBase { switch (I->getIntrinsicID()) { case Intrinsic::memset: case Intrinsic::memset_inline: + case Intrinsic::memset_pattern_inline: return true; default: return false; @@ -1214,6 +1217,21 @@ class MemSetInlineInst : public MemSetInst { } }; +/// This class wraps the llvm.memset.inline intrinsic. +class MemSetPatternInlineInst : public MemSetInst { +public: + ConstantInt *getLength() const { + return cast(MemSetInst::getLength()); + } + // Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::memset_pattern_inline; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + /// This class wraps the llvm.memcpy/memmove intrinsics. class MemTransferInst : public MemTransferBase { public: @@ -1293,6 +1311,7 @@ class AnyMemIntrinsic : public MemIntrinsicBase { case Intrinsic::memmove: case Intrinsic::memset: case Intrinsic::memset_inline: + case Intrinsic::memset_pattern_inline: case Intrinsic::memcpy_element_unordered_atomic: case Intrinsic::memmove_element_unordered_atomic: case Intrinsic::memset_element_unordered_atomic: @@ -1315,6 +1334,7 @@ class AnyMemSetInst : public MemSetBase { switch (I->getIntrinsicID()) { case Intrinsic::memset: case Intrinsic::memset_inline: + case Intrinsic::memset_pattern_inline: case Intrinsic::memset_element_unordered_atomic: return true; default: diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 107442623ab7b..a7f2818506886 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1008,6 +1008,15 @@ def int_memset_inline NoCapture>, WriteOnly>, ImmArg>, ImmArg>]>; +// Memset variant that writes a given pattern. Like memset.inline, this is +// guaranteed not to call any external function. +def int_memset_pattern_inline + : Intrinsic<[], + [llvm_anyptr_ty, llvm_anyint_ty, llvm_anyint_ty, llvm_i1_ty], + [IntrWriteMem, IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, + NoCapture>, WriteOnly>, + ImmArg>, ImmArg>], "llvm.memset_pattern.inline">; + // FIXME: Add version of these floating point intrinsics which allow non-default // rounding modes and FP exception handling. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 523d3aea66225..8a9faefbf2a8c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -8484,6 +8484,47 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, return CallResult.second; } +SDValue SelectionDAG::getMemsetPatternInline(SDValue Chain, const SDLoc &dl, + SDValue Dst, SDValue Src, + SDValue Size, Align Alignment, + bool isVol, bool isTailCall, + MachinePointerInfo DstPtrInfo, + const AAMDNodes &AAInfo) { + ConstantSDNode *ConstantSize = dyn_cast(Size); + if (ConstantSize->isZero()) + return Chain; + + uint64_t SrcWidth = Src.getScalarValueSizeInBits() / 8; + unsigned NumFullWidthStores = ConstantSize->getZExtValue() / SrcWidth; + unsigned RemainingBytes = ConstantSize->getZExtValue() % SrcWidth; + SmallVector OutChains; + uint64_t DstOff = 0; + + for (unsigned i = 0; i < ConstantSize->getZExtValue() / SrcWidth; i++) { + SDValue Store = getStore( + Chain, dl, Src, + getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl), + DstPtrInfo.getWithOffset(DstOff), Alignment, + isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone, + AAInfo); + OutChains.push_back(Store); + DstOff += Src.getValueType().getSizeInBits() / 8; + } + + if (RemainingBytes) { + EVT IntVT = EVT::getIntegerVT(*getContext(), RemainingBytes * 8); + SDValue Store = getTruncStore( + Chain, dl, Src, + getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl), + DstPtrInfo.getWithOffset(DstOff), IntVT, Alignment, + isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone, + AAInfo); + OutChains.push_back(Store); + } + + return getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); +} + SDValue SelectionDAG::getAtomicMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Value, SDValue Size, Type *SizeTy, unsigned ElemSz, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index be5e0f6ef058b..9d2b1fd6b7f9b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6485,6 +6485,24 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, updateDAGForMaybeTailCall(MC); return; } + case Intrinsic::memset_pattern_inline: { + const auto &MSPII = cast(I); + SDValue Dst = getValue(I.getArgOperand(0)); + SDValue Value = getValue(I.getArgOperand(1)); + SDValue Size = getValue(I.getArgOperand(2)); + assert(isa(Size) && + "memset_pattern_inline needs constant size"); + // @llvm.memset defines 0 and 1 to both mean no alignment. + Align DstAlign = MSPII.getDestAlign().valueOrOne(); + bool isVol = MSPII.isVolatile(); + bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); + SDValue Root = isVol ? getRoot() : getMemoryRoot(); + SDValue MC = DAG.getMemsetPatternInline( + Root, sdl, Dst, Value, Size, DstAlign, isVol, isTC, + MachinePointerInfo(I.getArgOperand(0)), I.getAAMetadata()); + updateDAGForMaybeTailCall(MC); + return; + } case Intrinsic::memmove: { const auto &MMI = cast(I); SDValue Op1 = getValue(I.getArgOperand(0)); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index e5927203f33a2..8f783755a1dae 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -5434,7 +5434,8 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { case Intrinsic::memcpy_inline: case Intrinsic::memmove: case Intrinsic::memset: - case Intrinsic::memset_inline: { + case Intrinsic::memset_inline: + case Intrinsic::memset_pattern_inline: { break; } case Intrinsic::memcpy_element_unordered_atomic: diff --git a/llvm/test/CodeGen/RISCV/memset-pattern-inline.ll b/llvm/test/CodeGen/RISCV/memset-pattern-inline.ll new file mode 100644 index 0000000000000..993dc330a36ff --- /dev/null +++ b/llvm/test/CodeGen/RISCV/memset-pattern-inline.ll @@ -0,0 +1,591 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=riscv32 -mattr=+m \ +; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32 +; RUN: llc < %s -mtriple=riscv64 -mattr=+m \ +; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64 +; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+unaligned-scalar-mem \ +; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+unaligned-scalar-mem \ +; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST + +define void @memset_1(ptr %a, i128 %value) nounwind { +; RV32-BOTH-LABEL: memset_1: +; RV32-BOTH: # %bb.0: +; RV32-BOTH-NEXT: lw a1, 0(a1) +; RV32-BOTH-NEXT: sb a1, 0(a0) +; RV32-BOTH-NEXT: ret +; +; RV64-BOTH-LABEL: memset_1: +; RV64-BOTH: # %bb.0: +; RV64-BOTH-NEXT: sb a1, 0(a0) +; RV64-BOTH-NEXT: ret + tail call void @llvm.memset_pattern.inline.p0.i64.i128(ptr %a, i128 %value, i64 1, i1 0) + ret void +} + +define void @memset_2(ptr %a, i128 %value) nounwind { +; RV32-LABEL: memset_2: +; RV32: # %bb.0: +; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: sb a1, 0(a0) +; RV32-NEXT: srli a1, a1, 8 +; RV32-NEXT: sb a1, 1(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: memset_2: +; RV64: # %bb.0: +; RV64-NEXT: sb a1, 0(a0) +; RV64-NEXT: srli a1, a1, 8 +; RV64-NEXT: sb a1, 1(a0) +; RV64-NEXT: ret +; +; RV32-FAST-LABEL: memset_2: +; RV32-FAST: # %bb.0: +; RV32-FAST-NEXT: lw a1, 0(a1) +; RV32-FAST-NEXT: sh a1, 0(a0) +; RV32-FAST-NEXT: ret +; +; RV64-FAST-LABEL: memset_2: +; RV64-FAST: # %bb.0: +; RV64-FAST-NEXT: sh a1, 0(a0) +; RV64-FAST-NEXT: ret + tail call void @llvm.memset_pattern.inline.p0.i64.i128(ptr %a, i128 %value, i64 2, i1 0) + ret void +} + +define void @memset_3(ptr %a, i128 %value) nounwind { +; RV32-LABEL: memset_3: +; RV32: # %bb.0: +; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: sb a1, 0(a0) +; RV32-NEXT: srli a2, a1, 8 +; RV32-NEXT: sb a2, 1(a0) +; RV32-NEXT: srli a1, a1, 16 +; RV32-NEXT: sb a1, 2(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: memset_3: +; RV64: # %bb.0: +; RV64-NEXT: sb a1, 0(a0) +; RV64-NEXT: srli a2, a1, 8 +; RV64-NEXT: sb a2, 1(a0) +; RV64-NEXT: srli a1, a1, 16 +; RV64-NEXT: sb a1, 2(a0) +; RV64-NEXT: ret +; +; RV32-FAST-LABEL: memset_3: +; RV32-FAST: # %bb.0: +; RV32-FAST-NEXT: lw a1, 0(a1) +; RV32-FAST-NEXT: sh a1, 0(a0) +; RV32-FAST-NEXT: srli a1, a1, 16 +; RV32-FAST-NEXT: sb a1, 2(a0) +; RV32-FAST-NEXT: ret +; +; RV64-FAST-LABEL: memset_3: +; RV64-FAST: # %bb.0: +; RV64-FAST-NEXT: sh a1, 0(a0) +; RV64-FAST-NEXT: srli a1, a1, 16 +; RV64-FAST-NEXT: sb a1, 2(a0) +; RV64-FAST-NEXT: ret + tail call void @llvm.memset_pattern.inline.p0.i64.i128(ptr %a, i128 %value, i64 3, i1 0) + ret void +} + +define void @memset_4(ptr %a, i128 %value) nounwind { +; RV32-LABEL: memset_4: +; RV32: # %bb.0: +; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: sb a1, 0(a0) +; RV32-NEXT: srli a2, a1, 24 +; RV32-NEXT: sb a2, 3(a0) +; RV32-NEXT: srli a2, a1, 16 +; RV32-NEXT: sb a2, 2(a0) +; RV32-NEXT: srli a1, a1, 8 +; RV32-NEXT: sb a1, 1(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: memset_4: +; RV64: # %bb.0: +; RV64-NEXT: sb a1, 0(a0) +; RV64-NEXT: srli a2, a1, 24 +; RV64-NEXT: sb a2, 3(a0) +; RV64-NEXT: srli a2, a1, 16 +; RV64-NEXT: sb a2, 2(a0) +; RV64-NEXT: srli a1, a1, 8 +; RV64-NEXT: sb a1, 1(a0) +; RV64-NEXT: ret +; +; RV32-FAST-LABEL: memset_4: +; RV32-FAST: # %bb.0: +; RV32-FAST-NEXT: lw a1, 0(a1) +; RV32-FAST-NEXT: sw a1, 0(a0) +; RV32-FAST-NEXT: ret +; +; RV64-FAST-LABEL: memset_4: +; RV64-FAST: # %bb.0: +; RV64-FAST-NEXT: sw a1, 0(a0) +; RV64-FAST-NEXT: ret + tail call void @llvm.memset_pattern.inline.p0.i64.i128(ptr %a, i128 %value, i64 4, i1 0) + ret void +} + +define void @memset_5(ptr %a, i128 %value) nounwind { +; RV32-LABEL: memset_5: +; RV32: # %bb.0: +; RV32-NEXT: lw a2, 0(a1) +; RV32-NEXT: lw a1, 4(a1) +; RV32-NEXT: sb a2, 0(a0) +; RV32-NEXT: sb a1, 4(a0) +; RV32-NEXT: srli a1, a2, 24 +; RV32-NEXT: sb a1, 3(a0) +; RV32-NEXT: srli a1, a2, 16 +; RV32-NEXT: sb a1, 2(a0) +; RV32-NEXT: srli a2, a2, 8 +; RV32-NEXT: sb a2, 1(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: memset_5: +; RV64: # %bb.0: +; RV64-NEXT: sb a1, 0(a0) +; RV64-NEXT: srli a2, a1, 24 +; RV64-NEXT: sb a2, 3(a0) +; RV64-NEXT: srli a2, a1, 16 +; RV64-NEXT: sb a2, 2(a0) +; RV64-NEXT: srli a2, a1, 8 +; RV64-NEXT: sb a2, 1(a0) +; RV64-NEXT: srli a1, a1, 32 +; RV64-NEXT: sb a1, 4(a0) +; RV64-NEXT: ret +; +; RV32-FAST-LABEL: memset_5: +; RV32-FAST: # %bb.0: +; RV32-FAST-NEXT: lw a2, 4(a1) +; RV32-FAST-NEXT: lw a1, 0(a1) +; RV32-FAST-NEXT: sb a2, 4(a0) +; RV32-FAST-NEXT: sw a1, 0(a0) +; RV32-FAST-NEXT: ret +; +; RV64-FAST-LABEL: memset_5: +; RV64-FAST: # %bb.0: +; RV64-FAST-NEXT: sw a1, 0(a0) +; RV64-FAST-NEXT: srli a1, a1, 32 +; RV64-FAST-NEXT: sb a1, 4(a0) +; RV64-FAST-NEXT: ret + tail call void @llvm.memset_pattern.inline.p0.i64.i128(ptr %a, i128 %value, i64 5, i1 0) + ret void +} + +define void @memset_6(ptr %a, i128 %value) nounwind { +; RV32-LABEL: memset_6: +; RV32: # %bb.0: +; RV32-NEXT: lw a2, 4(a1) +; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: sb a2, 4(a0) +; RV32-NEXT: sb a1, 0(a0) +; RV32-NEXT: srli a2, a2, 8 +; RV32-NEXT: sb a2, 5(a0) +; RV32-NEXT: srli a2, a1, 24 +; RV32-NEXT: sb a2, 3(a0) +; RV32-NEXT: srli a2, a1, 16 +; RV32-NEXT: sb a2, 2(a0) +; RV32-NEXT: srli a1, a1, 8 +; RV32-NEXT: sb a1, 1(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: memset_6: +; RV64: # %bb.0: +; RV64-NEXT: sb a1, 0(a0) +; RV64-NEXT: srli a2, a1, 40 +; RV64-NEXT: sb a2, 5(a0) +; RV64-NEXT: srli a2, a1, 32 +; RV64-NEXT: sb a2, 4(a0) +; RV64-NEXT: srli a2, a1, 24 +; RV64-NEXT: sb a2, 3(a0) +; RV64-NEXT: srli a2, a1, 16 +; RV64-NEXT: sb a2, 2(a0) +; RV64-NEXT: srli a1, a1, 8 +; RV64-NEXT: sb a1, 1(a0) +; RV64-NEXT: ret +; +; RV32-FAST-LABEL: memset_6: +; RV32-FAST: # %bb.0: +; RV32-FAST-NEXT: lw a2, 4(a1) +; RV32-FAST-NEXT: lw a1, 0(a1) +; RV32-FAST-NEXT: sh a2, 4(a0) +; RV32-FAST-NEXT: sw a1, 0(a0) +; RV32-FAST-NEXT: ret +; +; RV64-FAST-LABEL: memset_6: +; RV64-FAST: # %bb.0: +; RV64-FAST-NEXT: sw a1, 0(a0) +; RV64-FAST-NEXT: srli a1, a1, 32 +; RV64-FAST-NEXT: sh a1, 4(a0) +; RV64-FAST-NEXT: ret + tail call void @llvm.memset_pattern.inline.p0.i64.i128(ptr %a, i128 %value, i64 6, i1 0) + ret void +} + +define void @memset_7(ptr %a, i128 %value) nounwind { +; RV32-LABEL: memset_7: +; RV32: # %bb.0: +; RV32-NEXT: lw a2, 4(a1) +; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: sb a2, 4(a0) +; RV32-NEXT: sb a1, 0(a0) +; RV32-NEXT: srli a3, a2, 8 +; RV32-NEXT: sb a3, 5(a0) +; RV32-NEXT: srli a2, a2, 16 +; RV32-NEXT: sb a2, 6(a0) +; RV32-NEXT: srli a2, a1, 24 +; RV32-NEXT: sb a2, 3(a0) +; RV32-NEXT: srli a2, a1, 16 +; RV32-NEXT: sb a2, 2(a0) +; RV32-NEXT: srli a1, a1, 8 +; RV32-NEXT: sb a1, 1(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: memset_7: +; RV64: # %bb.0: +; RV64-NEXT: sb a1, 0(a0) +; RV64-NEXT: srli a2, a1, 40 +; RV64-NEXT: sb a2, 5(a0) +; RV64-NEXT: srli a2, a1, 32 +; RV64-NEXT: sb a2, 4(a0) +; RV64-NEXT: srli a2, a1, 48 +; RV64-NEXT: sb a2, 6(a0) +; RV64-NEXT: srli a2, a1, 24 +; RV64-NEXT: sb a2, 3(a0) +; RV64-NEXT: srli a2, a1, 16 +; RV64-NEXT: sb a2, 2(a0) +; RV64-NEXT: srli a1, a1, 8 +; RV64-NEXT: sb a1, 1(a0) +; RV64-NEXT: ret +; +; RV32-FAST-LABEL: memset_7: +; RV32-FAST: # %bb.0: +; RV32-FAST-NEXT: lw a2, 4(a1) +; RV32-FAST-NEXT: lw a1, 0(a1) +; RV32-FAST-NEXT: sh a2, 4(a0) +; RV32-FAST-NEXT: sw a1, 0(a0) +; RV32-FAST-NEXT: srli a2, a2, 16 +; RV32-FAST-NEXT: sb a2, 6(a0) +; RV32-FAST-NEXT: ret +; +; RV64-FAST-LABEL: memset_7: +; RV64-FAST: # %bb.0: +; RV64-FAST-NEXT: sw a1, 0(a0) +; RV64-FAST-NEXT: srli a2, a1, 48 +; RV64-FAST-NEXT: sb a2, 6(a0) +; RV64-FAST-NEXT: srli a1, a1, 32 +; RV64-FAST-NEXT: sh a1, 4(a0) +; RV64-FAST-NEXT: ret + tail call void @llvm.memset_pattern.inline.p0.i64.i128(ptr %a, i128 %value, i64 7, i1 0) + ret void +} + +define void @memset_8(ptr %a, i128 %value) nounwind { +; RV32-LABEL: memset_8: +; RV32: # %bb.0: +; RV32-NEXT: lw a2, 4(a1) +; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: sb a2, 4(a0) +; RV32-NEXT: sb a1, 0(a0) +; RV32-NEXT: srli a3, a2, 24 +; RV32-NEXT: sb a3, 7(a0) +; RV32-NEXT: srli a3, a2, 16 +; RV32-NEXT: sb a3, 6(a0) +; RV32-NEXT: srli a2, a2, 8 +; RV32-NEXT: sb a2, 5(a0) +; RV32-NEXT: srli a2, a1, 24 +; RV32-NEXT: sb a2, 3(a0) +; RV32-NEXT: srli a2, a1, 16 +; RV32-NEXT: sb a2, 2(a0) +; RV32-NEXT: srli a1, a1, 8 +; RV32-NEXT: sb a1, 1(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: memset_8: +; RV64: # %bb.0: +; RV64-NEXT: sb a1, 0(a0) +; RV64-NEXT: srli a2, a1, 56 +; RV64-NEXT: sb a2, 7(a0) +; RV64-NEXT: srli a2, a1, 48 +; RV64-NEXT: sb a2, 6(a0) +; RV64-NEXT: srli a2, a1, 40 +; RV64-NEXT: sb a2, 5(a0) +; RV64-NEXT: srli a2, a1, 32 +; RV64-NEXT: sb a2, 4(a0) +; RV64-NEXT: srli a2, a1, 24 +; RV64-NEXT: sb a2, 3(a0) +; RV64-NEXT: srli a2, a1, 16 +; RV64-NEXT: sb a2, 2(a0) +; RV64-NEXT: srli a1, a1, 8 +; RV64-NEXT: sb a1, 1(a0) +; RV64-NEXT: ret +; +; RV32-FAST-LABEL: memset_8: +; RV32-FAST: # %bb.0: +; RV32-FAST-NEXT: lw a2, 4(a1) +; RV32-FAST-NEXT: lw a1, 0(a1) +; RV32-FAST-NEXT: sw a2, 4(a0) +; RV32-FAST-NEXT: sw a1, 0(a0) +; RV32-FAST-NEXT: ret +; +; RV64-FAST-LABEL: memset_8: +; RV64-FAST: # %bb.0: +; RV64-FAST-NEXT: sd a1, 0(a0) +; RV64-FAST-NEXT: ret + tail call void @llvm.memset_pattern.inline.p0.i64.i128(ptr %a, i128 %value, i64 8, i1 0) + ret void +} + +define void @memset_9(ptr %a, i128 %value) nounwind { +; RV32-LABEL: memset_9: +; RV32: # %bb.0: +; RV32-NEXT: lw a2, 4(a1) +; RV32-NEXT: lw a3, 0(a1) +; RV32-NEXT: lw a1, 8(a1) +; RV32-NEXT: sb a2, 4(a0) +; RV32-NEXT: sb a3, 0(a0) +; RV32-NEXT: sb a1, 8(a0) +; RV32-NEXT: srli a1, a2, 24 +; RV32-NEXT: sb a1, 7(a0) +; RV32-NEXT: srli a1, a2, 16 +; RV32-NEXT: sb a1, 6(a0) +; RV32-NEXT: srli a2, a2, 8 +; RV32-NEXT: sb a2, 5(a0) +; RV32-NEXT: srli a1, a3, 24 +; RV32-NEXT: sb a1, 3(a0) +; RV32-NEXT: srli a1, a3, 16 +; RV32-NEXT: sb a1, 2(a0) +; RV32-NEXT: srli a3, a3, 8 +; RV32-NEXT: sb a3, 1(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: memset_9: +; RV64: # %bb.0: +; RV64-NEXT: sb a1, 0(a0) +; RV64-NEXT: sb a2, 8(a0) +; RV64-NEXT: srli a2, a1, 56 +; RV64-NEXT: sb a2, 7(a0) +; RV64-NEXT: srli a2, a1, 48 +; RV64-NEXT: sb a2, 6(a0) +; RV64-NEXT: srli a2, a1, 40 +; RV64-NEXT: sb a2, 5(a0) +; RV64-NEXT: srli a2, a1, 32 +; RV64-NEXT: sb a2, 4(a0) +; RV64-NEXT: srli a2, a1, 24 +; RV64-NEXT: sb a2, 3(a0) +; RV64-NEXT: srli a2, a1, 16 +; RV64-NEXT: sb a2, 2(a0) +; RV64-NEXT: srli a1, a1, 8 +; RV64-NEXT: sb a1, 1(a0) +; RV64-NEXT: ret +; +; RV32-FAST-LABEL: memset_9: +; RV32-FAST: # %bb.0: +; RV32-FAST-NEXT: lw a2, 4(a1) +; RV32-FAST-NEXT: lw a3, 0(a1) +; RV32-FAST-NEXT: lw a1, 8(a1) +; RV32-FAST-NEXT: sw a2, 4(a0) +; RV32-FAST-NEXT: sw a3, 0(a0) +; RV32-FAST-NEXT: sb a1, 8(a0) +; RV32-FAST-NEXT: ret +; +; RV64-FAST-LABEL: memset_9: +; RV64-FAST: # %bb.0: +; RV64-FAST-NEXT: sb a2, 8(a0) +; RV64-FAST-NEXT: sd a1, 0(a0) +; RV64-FAST-NEXT: ret + tail call void @llvm.memset_pattern.inline.p0.i64.i128(ptr %a, i128 %value, i64 9, i1 0) + ret void +} + +define void @memset_16(ptr %a, i128 %value) nounwind { +; RV32-LABEL: memset_16: +; RV32: # %bb.0: +; RV32-NEXT: lw a2, 12(a1) +; RV32-NEXT: lw a3, 8(a1) +; RV32-NEXT: lw a4, 4(a1) +; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: sb a2, 12(a0) +; RV32-NEXT: sb a3, 8(a0) +; RV32-NEXT: sb a4, 4(a0) +; RV32-NEXT: sb a1, 0(a0) +; RV32-NEXT: srli a5, a2, 24 +; RV32-NEXT: sb a5, 15(a0) +; RV32-NEXT: srli a5, a2, 16 +; RV32-NEXT: sb a5, 14(a0) +; RV32-NEXT: srli a2, a2, 8 +; RV32-NEXT: sb a2, 13(a0) +; RV32-NEXT: srli a2, a3, 24 +; RV32-NEXT: sb a2, 11(a0) +; RV32-NEXT: srli a2, a3, 16 +; RV32-NEXT: sb a2, 10(a0) +; RV32-NEXT: srli a3, a3, 8 +; RV32-NEXT: sb a3, 9(a0) +; RV32-NEXT: srli a2, a4, 24 +; RV32-NEXT: sb a2, 7(a0) +; RV32-NEXT: srli a2, a4, 16 +; RV32-NEXT: sb a2, 6(a0) +; RV32-NEXT: srli a4, a4, 8 +; RV32-NEXT: sb a4, 5(a0) +; RV32-NEXT: srli a2, a1, 24 +; RV32-NEXT: sb a2, 3(a0) +; RV32-NEXT: srli a2, a1, 16 +; RV32-NEXT: sb a2, 2(a0) +; RV32-NEXT: srli a1, a1, 8 +; RV32-NEXT: sb a1, 1(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: memset_16: +; RV64: # %bb.0: +; RV64-NEXT: sb a2, 8(a0) +; RV64-NEXT: sb a1, 0(a0) +; RV64-NEXT: srli a3, a2, 56 +; RV64-NEXT: sb a3, 15(a0) +; RV64-NEXT: srli a3, a2, 48 +; RV64-NEXT: sb a3, 14(a0) +; RV64-NEXT: srli a3, a2, 40 +; RV64-NEXT: sb a3, 13(a0) +; RV64-NEXT: srli a3, a2, 32 +; RV64-NEXT: sb a3, 12(a0) +; RV64-NEXT: srli a3, a2, 24 +; RV64-NEXT: sb a3, 11(a0) +; RV64-NEXT: srli a3, a2, 16 +; RV64-NEXT: sb a3, 10(a0) +; RV64-NEXT: srli a2, a2, 8 +; RV64-NEXT: sb a2, 9(a0) +; RV64-NEXT: srli a2, a1, 56 +; RV64-NEXT: sb a2, 7(a0) +; RV64-NEXT: srli a2, a1, 48 +; RV64-NEXT: sb a2, 6(a0) +; RV64-NEXT: srli a2, a1, 40 +; RV64-NEXT: sb a2, 5(a0) +; RV64-NEXT: srli a2, a1, 32 +; RV64-NEXT: sb a2, 4(a0) +; RV64-NEXT: srli a2, a1, 24 +; RV64-NEXT: sb a2, 3(a0) +; RV64-NEXT: srli a2, a1, 16 +; RV64-NEXT: sb a2, 2(a0) +; RV64-NEXT: srli a1, a1, 8 +; RV64-NEXT: sb a1, 1(a0) +; RV64-NEXT: ret +; +; RV32-FAST-LABEL: memset_16: +; RV32-FAST: # %bb.0: +; RV32-FAST-NEXT: lw a2, 12(a1) +; RV32-FAST-NEXT: lw a3, 8(a1) +; RV32-FAST-NEXT: lw a4, 4(a1) +; RV32-FAST-NEXT: lw a1, 0(a1) +; RV32-FAST-NEXT: sw a2, 12(a0) +; RV32-FAST-NEXT: sw a3, 8(a0) +; RV32-FAST-NEXT: sw a4, 4(a0) +; RV32-FAST-NEXT: sw a1, 0(a0) +; RV32-FAST-NEXT: ret +; +; RV64-FAST-LABEL: memset_16: +; RV64-FAST: # %bb.0: +; RV64-FAST-NEXT: sd a2, 8(a0) +; RV64-FAST-NEXT: sd a1, 0(a0) +; RV64-FAST-NEXT: ret + tail call void @llvm.memset_pattern.inline.p0.i64.i128(ptr %a, i128 %value, i64 16, i1 0) + ret void +} + +define void @memset_17(ptr %a, i128 %value) nounwind { +; RV32-LABEL: memset_17: +; RV32: # %bb.0: +; RV32-NEXT: lw a2, 12(a1) +; RV32-NEXT: lw a3, 8(a1) +; RV32-NEXT: lw a4, 4(a1) +; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: sb a2, 12(a0) +; RV32-NEXT: sb a3, 8(a0) +; RV32-NEXT: sb a4, 4(a0) +; RV32-NEXT: sb a1, 0(a0) +; RV32-NEXT: sb a1, 16(a0) +; RV32-NEXT: srli a5, a2, 24 +; RV32-NEXT: sb a5, 15(a0) +; RV32-NEXT: srli a5, a2, 16 +; RV32-NEXT: sb a5, 14(a0) +; RV32-NEXT: srli a2, a2, 8 +; RV32-NEXT: sb a2, 13(a0) +; RV32-NEXT: srli a2, a3, 24 +; RV32-NEXT: sb a2, 11(a0) +; RV32-NEXT: srli a2, a3, 16 +; RV32-NEXT: sb a2, 10(a0) +; RV32-NEXT: srli a3, a3, 8 +; RV32-NEXT: sb a3, 9(a0) +; RV32-NEXT: srli a2, a4, 24 +; RV32-NEXT: sb a2, 7(a0) +; RV32-NEXT: srli a2, a4, 16 +; RV32-NEXT: sb a2, 6(a0) +; RV32-NEXT: srli a4, a4, 8 +; RV32-NEXT: sb a4, 5(a0) +; RV32-NEXT: srli a2, a1, 24 +; RV32-NEXT: sb a2, 3(a0) +; RV32-NEXT: srli a2, a1, 16 +; RV32-NEXT: sb a2, 2(a0) +; RV32-NEXT: srli a1, a1, 8 +; RV32-NEXT: sb a1, 1(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: memset_17: +; RV64: # %bb.0: +; RV64-NEXT: sb a2, 8(a0) +; RV64-NEXT: sb a1, 0(a0) +; RV64-NEXT: sb a1, 16(a0) +; RV64-NEXT: srli a3, a2, 56 +; RV64-NEXT: sb a3, 15(a0) +; RV64-NEXT: srli a3, a2, 48 +; RV64-NEXT: sb a3, 14(a0) +; RV64-NEXT: srli a3, a2, 40 +; RV64-NEXT: sb a3, 13(a0) +; RV64-NEXT: srli a3, a2, 32 +; RV64-NEXT: sb a3, 12(a0) +; RV64-NEXT: srli a3, a2, 24 +; RV64-NEXT: sb a3, 11(a0) +; RV64-NEXT: srli a3, a2, 16 +; RV64-NEXT: sb a3, 10(a0) +; RV64-NEXT: srli a2, a2, 8 +; RV64-NEXT: sb a2, 9(a0) +; RV64-NEXT: srli a2, a1, 56 +; RV64-NEXT: sb a2, 7(a0) +; RV64-NEXT: srli a2, a1, 48 +; RV64-NEXT: sb a2, 6(a0) +; RV64-NEXT: srli a2, a1, 40 +; RV64-NEXT: sb a2, 5(a0) +; RV64-NEXT: srli a2, a1, 32 +; RV64-NEXT: sb a2, 4(a0) +; RV64-NEXT: srli a2, a1, 24 +; RV64-NEXT: sb a2, 3(a0) +; RV64-NEXT: srli a2, a1, 16 +; RV64-NEXT: sb a2, 2(a0) +; RV64-NEXT: srli a1, a1, 8 +; RV64-NEXT: sb a1, 1(a0) +; RV64-NEXT: ret +; +; RV32-FAST-LABEL: memset_17: +; RV32-FAST: # %bb.0: +; RV32-FAST-NEXT: lw a2, 12(a1) +; RV32-FAST-NEXT: lw a3, 8(a1) +; RV32-FAST-NEXT: lw a4, 4(a1) +; RV32-FAST-NEXT: lw a1, 0(a1) +; RV32-FAST-NEXT: sw a2, 12(a0) +; RV32-FAST-NEXT: sw a3, 8(a0) +; RV32-FAST-NEXT: sw a4, 4(a0) +; RV32-FAST-NEXT: sw a1, 0(a0) +; RV32-FAST-NEXT: sb a1, 16(a0) +; RV32-FAST-NEXT: ret +; +; RV64-FAST-LABEL: memset_17: +; RV64-FAST: # %bb.0: +; RV64-FAST-NEXT: sd a2, 8(a0) +; RV64-FAST-NEXT: sd a1, 0(a0) +; RV64-FAST-NEXT: sb a1, 16(a0) +; RV64-FAST-NEXT: ret + tail call void @llvm.memset_pattern.inline.p0.i64.i128(ptr %a, i128 %value, i64 17, i1 0) + ret void +} + diff --git a/llvm/test/Verifier/intrinsic-immarg.ll b/llvm/test/Verifier/intrinsic-immarg.ll index 47189c0b7d052..29921633ce964 100644 --- a/llvm/test/Verifier/intrinsic-immarg.ll +++ b/llvm/test/Verifier/intrinsic-immarg.ll @@ -79,6 +79,23 @@ define void @memset_inline_variable_size(ptr %dest, i8 %value, i32 %size) { ret void } +declare void @llvm.memset_pattern.inline.p0.i32.i32(ptr nocapture, i32, i32, i1) +define void @memset_pattern_inline_is_volatile(ptr %dest, i32 %value, i1 %is.volatile) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %is.volatile + ; CHECK-NEXT: call void @llvm.memset_pattern.inline.p0.i32.i32(ptr %dest, i32 %value, i32 8, i1 %is.volatile) + call void @llvm.memset_pattern.inline.p0.i32.i32(ptr %dest, i32 %value, i32 8, i1 %is.volatile) + ret void +} + +define void @memset_pattern_inline_variable_size(ptr %dest, i32 %value, i32 %size) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %size + ; CHECK-NEXT: call void @llvm.memset_pattern.inline.p0.i32.i32(ptr %dest, i32 %value, i32 %size, i1 true) + call void @llvm.memset_pattern.inline.p0.i32.i32(ptr %dest, i32 %value, i32 %size, i1 true) + ret void +} + declare i64 @llvm.objectsize.i64.p0(ptr, i1, i1, i1) define void @objectsize(ptr %ptr, i1 %a, i1 %b, i1 %c) { diff --git a/llvm/test/Verifier/memset-pattern-inline.ll b/llvm/test/Verifier/memset-pattern-inline.ll new file mode 100644 index 0000000000000..6876bc3ceedba --- /dev/null +++ b/llvm/test/Verifier/memset-pattern-inline.ll @@ -0,0 +1,9 @@ +; RUN: not opt -passes=verify < %s 2>&1 | FileCheck %s + +; CHECK: alignment is not a power of two + +define void @foo(ptr %P, i32 %value) { + call void @llvm.memset_pattern.inline.p0.i32.i32(ptr align 3 %P, i32 %value, i32 4, i1 false) + ret void +} +declare void @llvm.memset_pattern.inline.p0.i32.i32(ptr nocapture, i32, i32, i1) nounwind