From 4cd82cf46178587a66acb4eb74e22ec4282b0ec4 Mon Sep 17 00:00:00 2001 From: Alex Maclean Date: Fri, 7 Mar 2025 23:37:32 +0000 Subject: [PATCH 1/3] [NVPTX] Improve modeling of inline PTX --- llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp | 28 +++++++++++ llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h | 6 +++ .../Target/NVPTX/NVPTXTargetTransformInfo.cpp | 29 ++++++++++++ .../Target/NVPTX/NVPTXTargetTransformInfo.h | 4 ++ .../Analysis/CostModel/NVPTX/inline-asm.ll | 21 +++++++++ .../Analysis/CostModel/NVPTX/lit.local.cfg | 2 + .../test/CodeGen/NVPTX/nvptx-aa-inline-asm.ll | 47 +++++++++++++++++++ 7 files changed, 137 insertions(+) create mode 100644 llvm/test/Analysis/CostModel/NVPTX/inline-asm.ll create mode 100644 llvm/test/Analysis/CostModel/NVPTX/lit.local.cfg create mode 100644 llvm/test/CodeGen/NVPTX/nvptx-aa-inline-asm.ll diff --git a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp index 509b01213cd9c..0cc2132143af1 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp @@ -13,6 +13,7 @@ #include "MCTargetDesc/NVPTXBaseInfo.h" #include "NVPTX.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/Support/CommandLine.h" @@ -115,3 +116,30 @@ ModRefInfo NVPTXAAResult::getModRefInfoMask(const MemoryLocation &Loc, return ModRefInfo::ModRef; } + +MemoryEffects NVPTXAAResult::getMemoryEffects(const CallBase *Call, + AAQueryInfo &AAQI) { + // Inline assembly with no side-effect or memory clobbers should not + // indirectly access memory in the PTX specification. + if (const auto *IA = dyn_cast(Call->getCalledOperand())) { + // Volatile is translated as side-effects. + if (IA->hasSideEffects()) + return MemoryEffects::unknown(); + + for (const InlineAsm::ConstraintInfo &Constraint : IA->ParseConstraints()) { + // Indirect constraints (e.g. =*m) are unsupported in inline PTX. + if (Constraint.isIndirect) + return MemoryEffects::unknown(); + + // Memory clobbers prevent optimization. + if (!(Constraint.Type & InlineAsm::ConstraintPrefix::isClobber)) + continue; + for (const std::string &Code : Constraint.Codes) + if (Code == "{memory}") + return MemoryEffects::unknown(); + } + return MemoryEffects::none(); + } + + return MemoryEffects::unknown(); +} \ No newline at end of file diff --git a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h index 2d204979eb6ce..cfbf5dee3ec50 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h +++ b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h @@ -36,6 +36,12 @@ class NVPTXAAResult : public AAResultBase { ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, AAQueryInfo &AAQI, bool IgnoreLocals); + + MemoryEffects getMemoryEffects(const CallBase *Call, AAQueryInfo &AAQI); + + MemoryEffects getMemoryEffects(const Function *F) { + return MemoryEffects::unknown(); + } }; /// Analysis pass providing a never-invalidated alias analysis result. diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index 51c679b8ad89c..4d12d15b1d80b 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -8,6 +8,7 @@ #include "NVPTXTargetTransformInfo.h" #include "NVPTXUtilities.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -483,6 +484,34 @@ NVPTXTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { return std::nullopt; } +InstructionCost +NVPTXTTIImpl::getInstructionCost(const User *U, + ArrayRef Operands, + TTI::TargetCostKind CostKind) { + if (const auto *CI = dyn_cast(U)) + if (const auto *IA = dyn_cast(CI->getCalledOperand())) { + // Without this implementation getCallCost() would return the number + // of arguments+1 as the cost. Because the cost-model assumes it is a call + // since it is classified as a call in the IR. A better cost model would + // be to return the number of asm instructions embedded in the asm + // string. + auto &AsmStr = IA->getAsmString(); + SmallVector AsmPieces; + SplitString(AsmStr, AsmPieces, ";\n"); + + const unsigned InstCount = count_if(AsmPieces, [](StringRef AsmInst) { + AsmInst = AsmInst.trim(); + // This is pretty course but does a reasonably good job of identifying + // things that look like instructions, possibly with a predicate ("@"). + return !AsmInst.empty() && (AsmInst[0] == '@' || isAlpha(AsmInst[0]) || + AsmInst.find(".pragma") != StringRef::npos); + }); + return InstCount * TargetTransformInfo::TCC_Basic; + } + + return BaseT::getInstructionCost(U, Operands, CostKind); +} + InstructionCost NVPTXTTIImpl::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h index 7f69d422e8b4b..6db36e958b28c 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -94,6 +94,10 @@ class NVPTXTTIImpl : public BasicTTIImplBase { // calls are particularly expensive in NVPTX. unsigned getInliningThresholdMultiplier() const { return 11; } + InstructionCost getInstructionCost(const User *U, + ArrayRef Operands, + TTI::TargetCostKind CostKind); + InstructionCost getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, diff --git a/llvm/test/Analysis/CostModel/NVPTX/inline-asm.ll b/llvm/test/Analysis/CostModel/NVPTX/inline-asm.ll new file mode 100644 index 0000000000000..600e3b5d537c9 --- /dev/null +++ b/llvm/test/Analysis/CostModel/NVPTX/inline-asm.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -passes="print" 2>&1 -disable-output < %s | FileCheck %s + +target triple = "nvptx64-nvidia-cuda" + +define void @test1() { +; CHECK-LABEL: 'test1' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call double asm "rsqrt.approx.ftz.f64 $0, $1;", "=d,d"(double 1.000000e+00) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call { i32, i32 } asm "{\0A\09mad.lo.cc.u32 $0, $2, $3, $4;\0A\09madc.hi.u32 $1, $2, $3, 0;\0A\09}", "=r,=r,r,r,r"(i32 2, i32 3, i32 3) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = call i32 asm sideeffect "{ \0A\09.reg .pred \09%p1; \0A\09setp.ne.u32 \09%p1, $1, 0; \0A\09vote.ballot.b32 \09$0, %p1; \0A\09}", "=r,r"(i32 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call i32 asm sideeffect "{ \0A\09.reg .pred \09%p1; \0A\09setp.ne.u32 \09%p1, $1, 0; \0A\09@%p1 exit; \0A\09}", "=r,r"(i32 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void asm sideeffect ".pragma \22nounroll\22;\0A\09", "~{memory}"() +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %1 = call double asm "rsqrt.approx.ftz.f64 $0, $1;", "=d,d"(double 1.0) + %2 = call { i32, i32 } asm "{\0A\09mad.lo.cc.u32 $0, $2, $3, $4;\0A\09madc.hi.u32 $1, $2, $3, 0;\0A\09}", "=r,=r,r,r,r"(i32 2, i32 3, i32 3) + %3 = call i32 asm sideeffect "{ \0A\09.reg .pred \09%p1; \0A\09setp.ne.u32 \09%p1, $1, 0; \0A\09vote.ballot.b32 \09$0, %p1; \0A\09}", "=r,r"(i32 0) + %4 = call i32 asm sideeffect "{ \0A\09.reg .pred \09%p1; \0A\09setp.ne.u32 \09%p1, $1, 0; \0A\09@%p1 exit; \0A\09}", "=r,r"(i32 0) + call void asm sideeffect ".pragma \22nounroll\22;\0A\09", "~{memory}"() + ret void +} diff --git a/llvm/test/Analysis/CostModel/NVPTX/lit.local.cfg b/llvm/test/Analysis/CostModel/NVPTX/lit.local.cfg new file mode 100644 index 0000000000000..0d37b86e1c8e6 --- /dev/null +++ b/llvm/test/Analysis/CostModel/NVPTX/lit.local.cfg @@ -0,0 +1,2 @@ +if not "NVPTX" in config.root.targets: + config.unsupported = True diff --git a/llvm/test/CodeGen/NVPTX/nvptx-aa-inline-asm.ll b/llvm/test/CodeGen/NVPTX/nvptx-aa-inline-asm.ll new file mode 100644 index 0000000000000..b03fae365f264 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/nvptx-aa-inline-asm.ll @@ -0,0 +1,47 @@ +; RUN: opt -passes=aa-eval -aa-pipeline=nvptx-aa -print-all-alias-modref-info < %s -disable-output 2>&1 \ +; RUN: | FileCheck %s --check-prefixes CHECK-ALIAS + +target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" +target triple = "nvptx64-nvidia-cuda" + +;;CHECK-ALIAS-LABEL: Function: test_sideeffect +;;CHECK-ALIAS: Both ModRef: Ptr: i32* %0 <-> call +define void @test_sideeffect(ptr %out) { +entry: + %0 = addrspacecast ptr %out to ptr addrspace(1) + call void asm sideeffect "membar.gl;", ""() + store i32 5, ptr addrspace(1) %0, align 4 + ret void +} + +;;CHECK-ALIAS-LABEL: Function: test_indirect +;;CHECK-ALIAS: Both ModRef: Ptr: i32* %0 <-> %1 = call +define i32 @test_indirect(ptr %out) { +entry: + %0 = addrspacecast ptr %out to ptr addrspace(1) + store i32 0, ptr addrspace(1) %0, align 4 + %1 = call i32 asm "ld.global.u32 $0, [$1];", "=r,*m"(ptr addrspace(1) elementtype(i32) %0) + store i32 0, ptr addrspace(1) %0, align 4 + ret i32 %1 +} + +;;CHECK-ALIAS-LABEL: Function: test_memory +;;CHECK-ALIAS: Both ModRef: Ptr: i32* %0 <-> %1 = call +define i32 @test_memory(ptr %out) { +entry: + %0 = addrspacecast ptr %out to ptr addrspace(1) + store i32 0, ptr addrspace(1) %0, align 4 + %1 = call i32 asm "ld.global.u32 $0, [$1];", "=r,l,~{memory}"(ptr addrspace(1) %0) + store i32 0, ptr addrspace(1) %0, align 4 + ret i32 %1 +} + +;;CHECK-ALIAS-LABEL: Function: test_no_sideeffect +;;CHECK-ALIAS: NoModRef: Ptr: i32* %0 <-> %1 = call +define void @test_no_sideeffect(ptr %in, ptr %out) { +entry: + %0 = addrspacecast ptr %out to ptr addrspace(1) + %1 = call i32 asm "cvt.u32.u64 $0, $1;", "=r,l"(ptr %in) + store i32 %1, ptr addrspace(1) %0, align 4 + ret void +} From 985ab049c8b53625b422c76652311f47ddc6dcda Mon Sep 17 00:00:00 2001 From: Alex Maclean Date: Thu, 13 Mar 2025 23:48:43 +0000 Subject: [PATCH 2/3] address comments --- llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp | 2 +- llvm/test/CodeGen/NVPTX/nvptx-aa-inline-asm.ll | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp index 0cc2132143af1..a784cc0d12a57 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp @@ -142,4 +142,4 @@ MemoryEffects NVPTXAAResult::getMemoryEffects(const CallBase *Call, } return MemoryEffects::unknown(); -} \ No newline at end of file +} diff --git a/llvm/test/CodeGen/NVPTX/nvptx-aa-inline-asm.ll b/llvm/test/CodeGen/NVPTX/nvptx-aa-inline-asm.ll index b03fae365f264..f1e3a93ca9d84 100644 --- a/llvm/test/CodeGen/NVPTX/nvptx-aa-inline-asm.ll +++ b/llvm/test/CodeGen/NVPTX/nvptx-aa-inline-asm.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=aa-eval -aa-pipeline=nvptx-aa -print-all-alias-modref-info < %s -disable-output 2>&1 \ +; RUN: opt -passes=aa-eval -aa-pipeline=nvptx-aa,basic-aa -print-all-alias-modref-info < %s -disable-output 2>&1 \ ; RUN: | FileCheck %s --check-prefixes CHECK-ALIAS target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" From 0f09da8585bccdeb3dd50d18df186b103807a4e1 Mon Sep 17 00:00:00 2001 From: Alex Maclean Date: Tue, 25 Mar 2025 17:02:02 +0000 Subject: [PATCH 3/3] address comments --- llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp | 9 ++++---- .../Target/NVPTX/NVPTXTargetTransformInfo.cpp | 21 ++++++++++--------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp index a784cc0d12a57..1f770893828e2 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp @@ -132,11 +132,10 @@ MemoryEffects NVPTXAAResult::getMemoryEffects(const CallBase *Call, return MemoryEffects::unknown(); // Memory clobbers prevent optimization. - if (!(Constraint.Type & InlineAsm::ConstraintPrefix::isClobber)) - continue; - for (const std::string &Code : Constraint.Codes) - if (Code == "{memory}") - return MemoryEffects::unknown(); + if ((Constraint.Type & InlineAsm::ConstraintPrefix::isClobber) && + any_of(Constraint.Codes, + [](const auto &Code) { return Code == "{memory}"; })) + return MemoryEffects::unknown(); } return MemoryEffects::none(); } diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index 4d12d15b1d80b..a89ca3037c7ff 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -496,16 +496,17 @@ NVPTXTTIImpl::getInstructionCost(const User *U, // be to return the number of asm instructions embedded in the asm // string. auto &AsmStr = IA->getAsmString(); - SmallVector AsmPieces; - SplitString(AsmStr, AsmPieces, ";\n"); - - const unsigned InstCount = count_if(AsmPieces, [](StringRef AsmInst) { - AsmInst = AsmInst.trim(); - // This is pretty course but does a reasonably good job of identifying - // things that look like instructions, possibly with a predicate ("@"). - return !AsmInst.empty() && (AsmInst[0] == '@' || isAlpha(AsmInst[0]) || - AsmInst.find(".pragma") != StringRef::npos); - }); + const unsigned InstCount = + count_if(split(AsmStr, ';'), [](StringRef AsmInst) { + // Trim off scopes denoted by '{' and '}' as these can be ignored + AsmInst = AsmInst.trim().ltrim("{} \t\n\v\f\r"); + // This is pretty coarse but does a reasonably good job of + // identifying things that look like instructions, possibly with a + // predicate ("@"). + return !AsmInst.empty() && + (AsmInst[0] == '@' || isAlpha(AsmInst[0]) || + AsmInst.find(".pragma") != StringRef::npos); + }); return InstCount * TargetTransformInfo::TCC_Basic; }