|
14 | 14 | #include "GCNSubtarget.h"
|
15 | 15 | #include "Utils/AMDGPUBaseInfo.h"
|
16 | 16 | #include "llvm/Analysis/CycleAnalysis.h"
|
| 17 | +#include "llvm/Analysis/TargetTransformInfo.h" |
| 18 | +#include "llvm/Analysis/UniformityAnalysis.h" |
17 | 19 | #include "llvm/CodeGen/TargetPassConfig.h"
|
18 | 20 | #include "llvm/IR/IntrinsicsAMDGPU.h"
|
19 | 21 | #include "llvm/IR/IntrinsicsR600.h"
|
@@ -1014,6 +1016,110 @@ struct AAAMDGPUNoAGPR
|
1014 | 1016 |
|
1015 | 1017 | const char AAAMDGPUNoAGPR::ID = 0;
|
1016 | 1018 |
|
| 1019 | +struct AAAMDGPUUniform |
| 1020 | + : public IRAttribute<Attribute::InReg, |
| 1021 | + StateWrapper<BooleanState, AbstractAttribute>, |
| 1022 | + AAAMDGPUUniform> { |
| 1023 | + AAAMDGPUUniform(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} |
| 1024 | + |
| 1025 | + /// Create an abstract attribute view for the position \p IRP. |
| 1026 | + static AAAMDGPUUniform &createForPosition(const IRPosition &IRP, |
| 1027 | + Attributor &A); |
| 1028 | + |
| 1029 | + /// See AbstractAttribute::getName() |
| 1030 | + const std::string getName() const override { return "AAAMDGPUUniform"; } |
| 1031 | + |
| 1032 | + const std::string getAsStr(Attributor *A) const override { |
| 1033 | + return getAssumed() ? "inreg" : "non-inreg"; |
| 1034 | + } |
| 1035 | + |
| 1036 | + void trackStatistics() const override {} |
| 1037 | + |
| 1038 | + /// See AbstractAttribute::getIdAddr() |
| 1039 | + const char *getIdAddr() const override { return &ID; } |
| 1040 | + |
| 1041 | + /// This function should return true if the type of the \p AA is |
| 1042 | + /// AAAMDGPUUniform |
| 1043 | + static bool classof(const AbstractAttribute *AA) { |
| 1044 | + return (AA->getIdAddr() == &ID); |
| 1045 | + } |
| 1046 | + |
| 1047 | + /// Unique ID (due to the unique address) |
| 1048 | + static const char ID; |
| 1049 | +}; |
| 1050 | + |
| 1051 | +const char AAAMDGPUUniform::ID = 0; |
| 1052 | + |
| 1053 | +namespace { |
| 1054 | + |
| 1055 | +struct AAAMDGPUUniformArgument : public AAAMDGPUUniform { |
| 1056 | + AAAMDGPUUniformArgument(const IRPosition &IRP, Attributor &A) |
| 1057 | + : AAAMDGPUUniform(IRP, A) {} |
| 1058 | + |
| 1059 | + void initialize(Attributor &A) override { |
| 1060 | + assert( |
| 1061 | + !AMDGPU::isEntryFunctionCC(getAssociatedFunction()->getCallingConv())); |
| 1062 | + if (getAssociatedArgument()->hasAttribute(Attribute::InReg)) |
| 1063 | + indicateOptimisticFixpoint(); |
| 1064 | + } |
| 1065 | + |
| 1066 | + ChangeStatus updateImpl(Attributor &A) override { |
| 1067 | + unsigned ArgNo = getAssociatedArgument()->getArgNo(); |
| 1068 | + |
| 1069 | + auto isUniform = [&](AbstractCallSite ACS) -> bool { |
| 1070 | + CallBase *CB = ACS.getInstruction(); |
| 1071 | + Value *V = CB->getArgOperandUse(ArgNo); |
| 1072 | + if (isa<Constant>(V)) |
| 1073 | + return true; |
| 1074 | + if (auto *I = dyn_cast<Instruction>(V)) { |
| 1075 | + auto *UA = A.getInfoCache() |
| 1076 | + .getAnalysisResultForFunction<UniformityInfoAnalysis>( |
| 1077 | + *I->getFunction()); |
| 1078 | + return UA && UA->isUniform(I); |
| 1079 | + } |
| 1080 | + if (auto *Arg = dyn_cast<Argument>(V)) { |
| 1081 | + auto *UA = A.getInfoCache() |
| 1082 | + .getAnalysisResultForFunction<UniformityInfoAnalysis>( |
| 1083 | + *Arg->getParent()); |
| 1084 | + if (UA && UA->isUniform(Arg)) |
| 1085 | + return true; |
| 1086 | + // We only rely on isArgPassedInSGPR when the function is terminal, |
| 1087 | + // assuming there is no call edge from a function to an entry function. |
| 1088 | + if (AMDGPU::isEntryFunctionCC(Arg->getParent()->getCallingConv())) |
| 1089 | + return AMDGPU::isArgPassedInSGPR(Arg); |
| 1090 | + auto *AA = |
| 1091 | + A.getOrCreateAAFor<AAAMDGPUUniform>(IRPosition::argument(*Arg)); |
| 1092 | + return AA && AA->isValidState(); |
| 1093 | + } |
| 1094 | + return false; |
| 1095 | + }; |
| 1096 | + |
| 1097 | + bool UsedAssumedInformation = true; |
| 1098 | + if (!A.checkForAllCallSites(isUniform, *this, /*RequireAllCallSites=*/true, |
| 1099 | + UsedAssumedInformation)) |
| 1100 | + return indicatePessimisticFixpoint(); |
| 1101 | + |
| 1102 | + if (!UsedAssumedInformation) |
| 1103 | + return indicateOptimisticFixpoint(); |
| 1104 | + |
| 1105 | + return ChangeStatus::UNCHANGED; |
| 1106 | + } |
| 1107 | +}; |
| 1108 | + |
| 1109 | +} // namespace |
| 1110 | + |
| 1111 | +AAAMDGPUUniform &AAAMDGPUUniform::createForPosition(const IRPosition &IRP, |
| 1112 | + Attributor &A) { |
| 1113 | + switch (IRP.getPositionKind()) { |
| 1114 | + case IRPosition::IRP_ARGUMENT: |
| 1115 | + return *new (A.Allocator) AAAMDGPUUniformArgument(IRP, A); |
| 1116 | + // TODO: Since inreg is also allowed for return value, maybe we need to add |
| 1117 | + // AAAMDGPUUniformCallSiteReturned? |
| 1118 | + default: |
| 1119 | + llvm_unreachable("not a valid position for AAAMDGPUUniform"); |
| 1120 | + } |
| 1121 | +} |
| 1122 | + |
1017 | 1123 | static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
|
1018 | 1124 | const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
|
1019 | 1125 | for (unsigned I = 0;
|
@@ -1046,7 +1152,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
|
1046 | 1152 | &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, &AACallEdges::ID,
|
1047 | 1153 | &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
|
1048 | 1154 | &AAUnderlyingObjects::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
|
1049 |
| - &AAInstanceInfo::ID}); |
| 1155 | + &AAInstanceInfo::ID, &AAAMDGPUUniform::ID}); |
1050 | 1156 |
|
1051 | 1157 | AttributorConfig AC(CGUpdater);
|
1052 | 1158 | AC.IsClosedWorldModule = Options.IsClosedWorld;
|
@@ -1092,6 +1198,11 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
|
1092 | 1198 | IRPosition::value(*CmpX->getPointerOperand()));
|
1093 | 1199 | }
|
1094 | 1200 | }
|
| 1201 | + |
| 1202 | + if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) { |
| 1203 | + for (auto &Arg : F->args()) |
| 1204 | + A.getOrCreateAAFor<AAAMDGPUUniform>(IRPosition::argument(Arg)); |
| 1205 | + } |
1095 | 1206 | }
|
1096 | 1207 |
|
1097 | 1208 | ChangeStatus Change = A.run();
|
@@ -1120,6 +1231,7 @@ class AMDGPUAttributorLegacy : public ModulePass {
|
1120 | 1231 |
|
1121 | 1232 | void getAnalysisUsage(AnalysisUsage &AU) const override {
|
1122 | 1233 | AU.addRequired<CycleInfoWrapperPass>();
|
| 1234 | + AU.addRequired<UniformityInfoWrapperPass>(); |
1123 | 1235 | }
|
1124 | 1236 |
|
1125 | 1237 | StringRef getPassName() const override { return "AMDGPU Attributor"; }
|
|
0 commit comments