diff --git a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp index 509b01213cd9c..1f770893828e2 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp @@ -13,6 +13,7 @@ #include "MCTargetDesc/NVPTXBaseInfo.h" #include "NVPTX.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/Support/CommandLine.h" @@ -115,3 +116,29 @@ ModRefInfo NVPTXAAResult::getModRefInfoMask(const MemoryLocation &Loc, return ModRefInfo::ModRef; } + +MemoryEffects NVPTXAAResult::getMemoryEffects(const CallBase *Call, + AAQueryInfo &AAQI) { + // Inline assembly with no side-effect or memory clobbers should not + // indirectly access memory in the PTX specification. + if (const auto *IA = dyn_cast(Call->getCalledOperand())) { + // Volatile is translated as side-effects. + if (IA->hasSideEffects()) + return MemoryEffects::unknown(); + + for (const InlineAsm::ConstraintInfo &Constraint : IA->ParseConstraints()) { + // Indirect constraints (e.g. =*m) are unsupported in inline PTX. + if (Constraint.isIndirect) + return MemoryEffects::unknown(); + + // Memory clobbers prevent optimization. + if ((Constraint.Type & InlineAsm::ConstraintPrefix::isClobber) && + any_of(Constraint.Codes, + [](const auto &Code) { return Code == "{memory}"; })) + return MemoryEffects::unknown(); + } + return MemoryEffects::none(); + } + + return MemoryEffects::unknown(); +} diff --git a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h index 2d204979eb6ce..cfbf5dee3ec50 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h +++ b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h @@ -36,6 +36,12 @@ class NVPTXAAResult : public AAResultBase { ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, AAQueryInfo &AAQI, bool IgnoreLocals); + + MemoryEffects getMemoryEffects(const CallBase *Call, AAQueryInfo &AAQI); + + MemoryEffects getMemoryEffects(const Function *F) { + return MemoryEffects::unknown(); + } }; /// Analysis pass providing a never-invalidated alias analysis result. diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index 51c679b8ad89c..a89ca3037c7ff 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -8,6 +8,7 @@ #include "NVPTXTargetTransformInfo.h" #include "NVPTXUtilities.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -483,6 +484,35 @@ NVPTXTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { return std::nullopt; } +InstructionCost +NVPTXTTIImpl::getInstructionCost(const User *U, + ArrayRef Operands, + TTI::TargetCostKind CostKind) { + if (const auto *CI = dyn_cast(U)) + if (const auto *IA = dyn_cast(CI->getCalledOperand())) { + // Without this implementation getCallCost() would return the number + // of arguments+1 as the cost. Because the cost-model assumes it is a call + // since it is classified as a call in the IR. A better cost model would + // be to return the number of asm instructions embedded in the asm + // string. + auto &AsmStr = IA->getAsmString(); + const unsigned InstCount = + count_if(split(AsmStr, ';'), [](StringRef AsmInst) { + // Trim off scopes denoted by '{' and '}' as these can be ignored + AsmInst = AsmInst.trim().ltrim("{} \t\n\v\f\r"); + // This is pretty coarse but does a reasonably good job of + // identifying things that look like instructions, possibly with a + // predicate ("@"). + return !AsmInst.empty() && + (AsmInst[0] == '@' || isAlpha(AsmInst[0]) || + AsmInst.find(".pragma") != StringRef::npos); + }); + return InstCount * TargetTransformInfo::TCC_Basic; + } + + return BaseT::getInstructionCost(U, Operands, CostKind); +} + InstructionCost NVPTXTTIImpl::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h index 7f69d422e8b4b..6db36e958b28c 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -94,6 +94,10 @@ class NVPTXTTIImpl : public BasicTTIImplBase { // calls are particularly expensive in NVPTX. unsigned getInliningThresholdMultiplier() const { return 11; } + InstructionCost getInstructionCost(const User *U, + ArrayRef Operands, + TTI::TargetCostKind CostKind); + InstructionCost getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, diff --git a/llvm/test/Analysis/CostModel/NVPTX/inline-asm.ll b/llvm/test/Analysis/CostModel/NVPTX/inline-asm.ll new file mode 100644 index 0000000000000..600e3b5d537c9 --- /dev/null +++ b/llvm/test/Analysis/CostModel/NVPTX/inline-asm.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -passes="print" 2>&1 -disable-output < %s | FileCheck %s + +target triple = "nvptx64-nvidia-cuda" + +define void @test1() { +; CHECK-LABEL: 'test1' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call double asm "rsqrt.approx.ftz.f64 $0, $1;", "=d,d"(double 1.000000e+00) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call { i32, i32 } asm "{\0A\09mad.lo.cc.u32 $0, $2, $3, $4;\0A\09madc.hi.u32 $1, $2, $3, 0;\0A\09}", "=r,=r,r,r,r"(i32 2, i32 3, i32 3) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = call i32 asm sideeffect "{ \0A\09.reg .pred \09%p1; \0A\09setp.ne.u32 \09%p1, $1, 0; \0A\09vote.ballot.b32 \09$0, %p1; \0A\09}", "=r,r"(i32 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call i32 asm sideeffect "{ \0A\09.reg .pred \09%p1; \0A\09setp.ne.u32 \09%p1, $1, 0; \0A\09@%p1 exit; \0A\09}", "=r,r"(i32 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void asm sideeffect ".pragma \22nounroll\22;\0A\09", "~{memory}"() +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %1 = call double asm "rsqrt.approx.ftz.f64 $0, $1;", "=d,d"(double 1.0) + %2 = call { i32, i32 } asm "{\0A\09mad.lo.cc.u32 $0, $2, $3, $4;\0A\09madc.hi.u32 $1, $2, $3, 0;\0A\09}", "=r,=r,r,r,r"(i32 2, i32 3, i32 3) + %3 = call i32 asm sideeffect "{ \0A\09.reg .pred \09%p1; \0A\09setp.ne.u32 \09%p1, $1, 0; \0A\09vote.ballot.b32 \09$0, %p1; \0A\09}", "=r,r"(i32 0) + %4 = call i32 asm sideeffect "{ \0A\09.reg .pred \09%p1; \0A\09setp.ne.u32 \09%p1, $1, 0; \0A\09@%p1 exit; \0A\09}", "=r,r"(i32 0) + call void asm sideeffect ".pragma \22nounroll\22;\0A\09", "~{memory}"() + ret void +} diff --git a/llvm/test/Analysis/CostModel/NVPTX/lit.local.cfg b/llvm/test/Analysis/CostModel/NVPTX/lit.local.cfg new file mode 100644 index 0000000000000..0d37b86e1c8e6 --- /dev/null +++ b/llvm/test/Analysis/CostModel/NVPTX/lit.local.cfg @@ -0,0 +1,2 @@ +if not "NVPTX" in config.root.targets: + config.unsupported = True diff --git a/llvm/test/CodeGen/NVPTX/nvptx-aa-inline-asm.ll b/llvm/test/CodeGen/NVPTX/nvptx-aa-inline-asm.ll new file mode 100644 index 0000000000000..f1e3a93ca9d84 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/nvptx-aa-inline-asm.ll @@ -0,0 +1,47 @@ +; RUN: opt -passes=aa-eval -aa-pipeline=nvptx-aa,basic-aa -print-all-alias-modref-info < %s -disable-output 2>&1 \ +; RUN: | FileCheck %s --check-prefixes CHECK-ALIAS + +target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" +target triple = "nvptx64-nvidia-cuda" + +;;CHECK-ALIAS-LABEL: Function: test_sideeffect +;;CHECK-ALIAS: Both ModRef: Ptr: i32* %0 <-> call +define void @test_sideeffect(ptr %out) { +entry: + %0 = addrspacecast ptr %out to ptr addrspace(1) + call void asm sideeffect "membar.gl;", ""() + store i32 5, ptr addrspace(1) %0, align 4 + ret void +} + +;;CHECK-ALIAS-LABEL: Function: test_indirect +;;CHECK-ALIAS: Both ModRef: Ptr: i32* %0 <-> %1 = call +define i32 @test_indirect(ptr %out) { +entry: + %0 = addrspacecast ptr %out to ptr addrspace(1) + store i32 0, ptr addrspace(1) %0, align 4 + %1 = call i32 asm "ld.global.u32 $0, [$1];", "=r,*m"(ptr addrspace(1) elementtype(i32) %0) + store i32 0, ptr addrspace(1) %0, align 4 + ret i32 %1 +} + +;;CHECK-ALIAS-LABEL: Function: test_memory +;;CHECK-ALIAS: Both ModRef: Ptr: i32* %0 <-> %1 = call +define i32 @test_memory(ptr %out) { +entry: + %0 = addrspacecast ptr %out to ptr addrspace(1) + store i32 0, ptr addrspace(1) %0, align 4 + %1 = call i32 asm "ld.global.u32 $0, [$1];", "=r,l,~{memory}"(ptr addrspace(1) %0) + store i32 0, ptr addrspace(1) %0, align 4 + ret i32 %1 +} + +;;CHECK-ALIAS-LABEL: Function: test_no_sideeffect +;;CHECK-ALIAS: NoModRef: Ptr: i32* %0 <-> %1 = call +define void @test_no_sideeffect(ptr %in, ptr %out) { +entry: + %0 = addrspacecast ptr %out to ptr addrspace(1) + %1 = call i32 asm "cvt.u32.u64 $0, $1;", "=r,l"(ptr %in) + store i32 %1, ptr addrspace(1) %0, align 4 + ret void +}