Skip to content

[AMDGPU][Attributor] Add a pass parameter closed-world for AMDGPUAttributor pass #101760

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -283,12 +283,22 @@ class AMDGPULowerKernelArgumentsPass
PreservedAnalyses run(Function &, FunctionAnalysisManager &);
};

struct AMDGPUAttributorOptions {
bool IsClosedWorld = false;
};

class AMDGPUAttributorPass : public PassInfoMixin<AMDGPUAttributorPass> {
private:
TargetMachine &TM;

AMDGPUAttributorOptions Options;

/// Asserts whether we can assume whole program visibility.
bool HasWholeProgramVisibility = false;

public:
AMDGPUAttributorPass(TargetMachine &TM) : TM(TM){};
AMDGPUAttributorPass(TargetMachine &TM, AMDGPUAttributorOptions Options = {})
: TM(TM), Options(Options) {};
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
};

Expand Down
10 changes: 6 additions & 4 deletions llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1023,7 +1023,8 @@ static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
}
}

static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) {
static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
AMDGPUAttributorOptions Options) {
SetVector<Function *> Functions;
for (Function &F : M) {
if (!F.isIntrinsic())
Expand All @@ -1041,6 +1042,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) {
&AAUnderlyingObjects::ID, &AAAddressSpace::ID});

AttributorConfig AC(CGUpdater);
AC.IsClosedWorldModule = Options.IsClosedWorld;
AC.Allowed = &Allowed;
AC.IsModulePass = true;
AC.DefaultInitializeLiveInternals = false;
Expand Down Expand Up @@ -1098,7 +1100,7 @@ class AMDGPUAttributorLegacy : public ModulePass {

bool runOnModule(Module &M) override {
AnalysisGetter AG(this);
return runImpl(M, AG, *TM);
return runImpl(M, AG, *TM, /*Options=*/{});
}

void getAnalysisUsage(AnalysisUsage &AU) const override {
Expand All @@ -1119,8 +1121,8 @@ PreservedAnalyses llvm::AMDGPUAttributorPass::run(Module &M,
AnalysisGetter AG(FAM);

// TODO: Probably preserves CFG
return runImpl(M, AG, TM) ? PreservedAnalyses::none()
: PreservedAnalyses::all();
return runImpl(M, AG, TM, Options) ? PreservedAnalyses::none()
: PreservedAnalyses::all();
}

char AMDGPUAttributorLegacy::ID = 0;
Expand Down
12 changes: 11 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
#define MODULE_PASS(NAME, CREATE_PASS)
#endif
MODULE_PASS("amdgpu-always-inline", AMDGPUAlwaysInlinePass())
MODULE_PASS("amdgpu-attributor", AMDGPUAttributorPass(*this))
MODULE_PASS("amdgpu-lower-buffer-fat-pointers",
AMDGPULowerBufferFatPointersPass(*this))
MODULE_PASS("amdgpu-lower-ctor-dtor", AMDGPUCtorDtorLoweringPass())
Expand All @@ -26,6 +25,17 @@ MODULE_PASS("amdgpu-printf-runtime-binding", AMDGPUPrintfRuntimeBindingPass())
MODULE_PASS("amdgpu-unify-metadata", AMDGPUUnifyMetadataPass())
#undef MODULE_PASS

#ifndef MODULE_PASS_WITH_PARAMS
#define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)
#endif
MODULE_PASS_WITH_PARAMS(
"amdgpu-attributor", "AMDGPUAttributorPass",
[=](AMDGPUAttributorOptions Options) {
return AMDGPUAttributorPass(*this, Options);
},
parseAMDGPUAttributorPassOptions, "closed-world")
#undef MODULE_PASS_WITH_PARAMS

#ifndef FUNCTION_PASS
#define FUNCTION_PASS(NAME, CREATE_PASS)
#endif
Expand Down
19 changes: 19 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
#include "llvm/InitializePasses.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Transforms/HipStdPar/HipStdPar.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
Expand Down Expand Up @@ -662,6 +663,24 @@ Error AMDGPUTargetMachine::buildCodeGenPipeline(
return CGPB.buildPipeline(MPM, Out, DwoOut, FileType);
}

Expected<AMDGPUAttributorOptions>
parseAMDGPUAttributorPassOptions(StringRef Params) {
AMDGPUAttributorOptions Result;
while (!Params.empty()) {
StringRef ParamName;
std::tie(ParamName, Params) = Params.split(';');
if (ParamName == "closed-world") {
Result.IsClosedWorld = true;
} else {
return make_error<StringError>(
formatv("invalid AMDGPUAttributor pass parameter '{0}' ", ParamName)
.str(),
inconvertibleErrorCode());
}
}
return Result;
}

void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {

#define GET_PASS_REGISTRY "AMDGPUPassRegistry.def"
Expand Down
58 changes: 58 additions & 0 deletions llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s | FileCheck --check-prefixes=CHECK,OW %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes='amdgpu-attributor<closed-world>' %s | FileCheck --check-prefixes=CHECK,CW %s

target datalayout = "A5"

@G = global i32 0, align 4

;.
; CHECK: @G = global i32 0, align 4
;.
define void @bar() {
; CHECK-LABEL: define {{[^@]+}}@bar
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: store i32 1, ptr @G, align 4
; CHECK-NEXT: ret void
;
entry:
store i32 1, ptr @G, align 4
ret void
}

define ptr @helper() {
; CHECK-LABEL: define {{[^@]+}}@helper
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: ret ptr @bar
;
entry:
ret ptr @bar
}

define amdgpu_kernel void @foo(ptr noundef %fp) {
; CHECK-LABEL: define {{[^@]+}}@foo
; CHECK-SAME: (ptr noundef [[FP:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; CHECK-NEXT: store ptr [[FP]], ptr addrspace(5) [[FP_ADDR]], align 8
; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(5) [[FP_ADDR]], align 8
; CHECK-NEXT: call void [[LOAD]]()
; CHECK-NEXT: ret void
;
entry:
%fp.addr = alloca ptr, addrspace(5)
store ptr %fp, ptr addrspace(5) %fp.addr
%load = load ptr, ptr addrspace(5) %fp.addr
call void %load()
ret void
}

;.
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
;.
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CW: {{.*}}
; OW: {{.*}}
12 changes: 12 additions & 0 deletions llvm/test/Other/amdgpu-pass-pipeline-parsing.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
; REQUIRES: amdgpu-registered-target

; RUN: not opt -S -mtriple=amdgcn-amd-amdhsa -passes='amdgpu-attributor<random>' -disable-output %s 2>&1 | FileCheck %s

; CHECK: amdgpu-attributor: invalid AMDGPUAttributor pass parameter 'random'

define void @f() {
entry:
br label %loop
loop:
br label %loop
}