Skip to content

[NVPTX] Improve modeling of inline PTX #130675

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "NVPTX.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Support/CommandLine.h"

Expand Down Expand Up @@ -115,3 +116,29 @@ ModRefInfo NVPTXAAResult::getModRefInfoMask(const MemoryLocation &Loc,

return ModRefInfo::ModRef;
}

MemoryEffects NVPTXAAResult::getMemoryEffects(const CallBase *Call,
AAQueryInfo &AAQI) {
// Inline assembly with no side-effect or memory clobbers should not
// indirectly access memory in the PTX specification.
if (const auto *IA = dyn_cast<InlineAsm>(Call->getCalledOperand())) {
// Volatile is translated as side-effects.
if (IA->hasSideEffects())
return MemoryEffects::unknown();

for (const InlineAsm::ConstraintInfo &Constraint : IA->ParseConstraints()) {
// Indirect constraints (e.g. =*m) are unsupported in inline PTX.
if (Constraint.isIndirect)
return MemoryEffects::unknown();

// Memory clobbers prevent optimization.
if ((Constraint.Type & InlineAsm::ConstraintPrefix::isClobber) &&
any_of(Constraint.Codes,
[](const auto &Code) { return Code == "{memory}"; }))
return MemoryEffects::unknown();
}
return MemoryEffects::none();
}

return MemoryEffects::unknown();
}
6 changes: 6 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ class NVPTXAAResult : public AAResultBase {

ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, AAQueryInfo &AAQI,
bool IgnoreLocals);

MemoryEffects getMemoryEffects(const CallBase *Call, AAQueryInfo &AAQI);

MemoryEffects getMemoryEffects(const Function *F) {
return MemoryEffects::unknown();
}
};

/// Analysis pass providing a never-invalidated alias analysis result.
Expand Down
30 changes: 30 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "NVPTXTargetTransformInfo.h"
#include "NVPTXUtilities.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
Expand Down Expand Up @@ -483,6 +484,35 @@ NVPTXTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
return std::nullopt;
}

InstructionCost
NVPTXTTIImpl::getInstructionCost(const User *U,
ArrayRef<const Value *> Operands,
TTI::TargetCostKind CostKind) {
if (const auto *CI = dyn_cast<CallInst>(U))
if (const auto *IA = dyn_cast<InlineAsm>(CI->getCalledOperand())) {
// Without this implementation getCallCost() would return the number
// of arguments+1 as the cost. Because the cost-model assumes it is a call
// since it is classified as a call in the IR. A better cost model would
// be to return the number of asm instructions embedded in the asm
// string.
auto &AsmStr = IA->getAsmString();
const unsigned InstCount =
count_if(split(AsmStr, ';'), [](StringRef AsmInst) {
// Trim off scopes denoted by '{' and '}' as these can be ignored
AsmInst = AsmInst.trim().ltrim("{} \t\n\v\f\r");
// This is pretty coarse but does a reasonably good job of
// identifying things that look like instructions, possibly with a
// predicate ("@").
return !AsmInst.empty() &&
(AsmInst[0] == '@' || isAlpha(AsmInst[0]) ||
AsmInst.find(".pragma") != StringRef::npos);
});
return InstCount * TargetTransformInfo::TCC_Basic;
}

return BaseT::getInstructionCost(U, Operands, CostKind);
}

InstructionCost NVPTXTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info,
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> {
// calls are particularly expensive in NVPTX.
unsigned getInliningThresholdMultiplier() const { return 11; }

InstructionCost getInstructionCost(const User *U,
ArrayRef<const Value *> Operands,
TTI::TargetCostKind CostKind);

InstructionCost getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
Expand Down
21 changes: 21 additions & 0 deletions llvm/test/Analysis/CostModel/NVPTX/inline-asm.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output < %s | FileCheck %s

target triple = "nvptx64-nvidia-cuda"

define void @test1() {
; CHECK-LABEL: 'test1'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call double asm "rsqrt.approx.ftz.f64 $0, $1;", "=d,d"(double 1.000000e+00)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call { i32, i32 } asm "{\0A\09mad.lo.cc.u32 $0, $2, $3, $4;\0A\09madc.hi.u32 $1, $2, $3, 0;\0A\09}", "=r,=r,r,r,r"(i32 2, i32 3, i32 3)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = call i32 asm sideeffect "{ \0A\09.reg .pred \09%p1; \0A\09setp.ne.u32 \09%p1, $1, 0; \0A\09vote.ballot.b32 \09$0, %p1; \0A\09}", "=r,r"(i32 0)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call i32 asm sideeffect "{ \0A\09.reg .pred \09%p1; \0A\09setp.ne.u32 \09%p1, $1, 0; \0A\09@%p1 exit; \0A\09}", "=r,r"(i32 0)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void asm sideeffect ".pragma \22nounroll\22;\0A\09", "~{memory}"()
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%1 = call double asm "rsqrt.approx.ftz.f64 $0, $1;", "=d,d"(double 1.0)
%2 = call { i32, i32 } asm "{\0A\09mad.lo.cc.u32 $0, $2, $3, $4;\0A\09madc.hi.u32 $1, $2, $3, 0;\0A\09}", "=r,=r,r,r,r"(i32 2, i32 3, i32 3)
%3 = call i32 asm sideeffect "{ \0A\09.reg .pred \09%p1; \0A\09setp.ne.u32 \09%p1, $1, 0; \0A\09vote.ballot.b32 \09$0, %p1; \0A\09}", "=r,r"(i32 0)
%4 = call i32 asm sideeffect "{ \0A\09.reg .pred \09%p1; \0A\09setp.ne.u32 \09%p1, $1, 0; \0A\09@%p1 exit; \0A\09}", "=r,r"(i32 0)
call void asm sideeffect ".pragma \22nounroll\22;\0A\09", "~{memory}"()
ret void
}
2 changes: 2 additions & 0 deletions llvm/test/Analysis/CostModel/NVPTX/lit.local.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
if not "NVPTX" in config.root.targets:
config.unsupported = True
47 changes: 47 additions & 0 deletions llvm/test/CodeGen/NVPTX/nvptx-aa-inline-asm.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
; RUN: opt -passes=aa-eval -aa-pipeline=nvptx-aa,basic-aa -print-all-alias-modref-info < %s -disable-output 2>&1 \
; RUN: | FileCheck %s --check-prefixes CHECK-ALIAS

target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"

;;CHECK-ALIAS-LABEL: Function: test_sideeffect
;;CHECK-ALIAS: Both ModRef: Ptr: i32* %0 <-> call
define void @test_sideeffect(ptr %out) {
entry:
%0 = addrspacecast ptr %out to ptr addrspace(1)
call void asm sideeffect "membar.gl;", ""()
store i32 5, ptr addrspace(1) %0, align 4
ret void
}

;;CHECK-ALIAS-LABEL: Function: test_indirect
;;CHECK-ALIAS: Both ModRef: Ptr: i32* %0 <-> %1 = call
define i32 @test_indirect(ptr %out) {
entry:
%0 = addrspacecast ptr %out to ptr addrspace(1)
store i32 0, ptr addrspace(1) %0, align 4
%1 = call i32 asm "ld.global.u32 $0, [$1];", "=r,*m"(ptr addrspace(1) elementtype(i32) %0)
store i32 0, ptr addrspace(1) %0, align 4
ret i32 %1
}

;;CHECK-ALIAS-LABEL: Function: test_memory
;;CHECK-ALIAS: Both ModRef: Ptr: i32* %0 <-> %1 = call
define i32 @test_memory(ptr %out) {
entry:
%0 = addrspacecast ptr %out to ptr addrspace(1)
store i32 0, ptr addrspace(1) %0, align 4
%1 = call i32 asm "ld.global.u32 $0, [$1];", "=r,l,~{memory}"(ptr addrspace(1) %0)
store i32 0, ptr addrspace(1) %0, align 4
ret i32 %1
}

;;CHECK-ALIAS-LABEL: Function: test_no_sideeffect
;;CHECK-ALIAS: NoModRef: Ptr: i32* %0 <-> %1 = call
define void @test_no_sideeffect(ptr %in, ptr %out) {
entry:
%0 = addrspacecast ptr %out to ptr addrspace(1)
%1 = call i32 asm "cvt.u32.u64 $0, $1;", "=r,l"(ptr %in)
store i32 %1, ptr addrspace(1) %0, align 4
ret void
}
Loading