|
8 | 8 |
|
9 | 9 | #include "ABIInfoImpl.h"
|
10 | 10 | #include "TargetInfo.h"
|
| 11 | +#include "clang/Basic/Cuda.h" |
11 | 12 | #include "llvm/IR/IntrinsicsNVPTX.h"
|
12 | 13 |
|
13 | 14 | using namespace clang;
|
@@ -80,6 +81,9 @@ class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
|
80 | 81 | static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
|
81 | 82 | int Operand);
|
82 | 83 |
|
| 84 | + static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name, |
| 85 | + const std::vector<int> &Operands); |
| 86 | + |
83 | 87 | private:
|
84 | 88 | static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
|
85 | 89 | LValue Src) {
|
@@ -218,6 +222,98 @@ Address NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
|
218 | 222 | llvm_unreachable("NVPTX does not support varargs");
|
219 | 223 | }
|
220 | 224 |
|
| 225 | +// Get current CudaArch and ignore any unknown values |
| 226 | +// Copied from CGOpenMPRuntimeGPU |
| 227 | +static CudaArch getCudaArch(CodeGenModule &CGM) { |
| 228 | + if (!CGM.getTarget().hasFeature("ptx")) |
| 229 | + return CudaArch::UNKNOWN; |
| 230 | + for (const auto &Feature : CGM.getTarget().getTargetOpts().FeatureMap) { |
| 231 | + if (Feature.getValue()) { |
| 232 | + CudaArch Arch = StringToCudaArch(Feature.getKey()); |
| 233 | + if (Arch != CudaArch::UNKNOWN) |
| 234 | + return Arch; |
| 235 | + } |
| 236 | + } |
| 237 | + return CudaArch::UNKNOWN; |
| 238 | +} |
| 239 | + |
| 240 | +static bool supportsGridConstant(CudaArch Arch) { |
| 241 | + switch (Arch) { |
| 242 | + case CudaArch::SM_70: |
| 243 | + case CudaArch::SM_72: |
| 244 | + case CudaArch::SM_75: |
| 245 | + case CudaArch::SM_80: |
| 246 | + case CudaArch::SM_86: |
| 247 | + case CudaArch::SM_87: |
| 248 | + case CudaArch::SM_89: |
| 249 | + case CudaArch::SM_90: |
| 250 | + case CudaArch::SM_90a: |
| 251 | + return true; |
| 252 | + case CudaArch::UNKNOWN: |
| 253 | + case CudaArch::UNUSED: |
| 254 | + case CudaArch::SM_20: |
| 255 | + case CudaArch::SM_21: |
| 256 | + case CudaArch::SM_30: |
| 257 | + case CudaArch::SM_32_: |
| 258 | + case CudaArch::SM_35: |
| 259 | + case CudaArch::SM_37: |
| 260 | + case CudaArch::SM_50: |
| 261 | + case CudaArch::SM_52: |
| 262 | + case CudaArch::SM_53: |
| 263 | + case CudaArch::SM_60: |
| 264 | + case CudaArch::SM_61: |
| 265 | + case CudaArch::SM_62: |
| 266 | + return false; |
| 267 | + case CudaArch::GFX600: |
| 268 | + case CudaArch::GFX601: |
| 269 | + case CudaArch::GFX602: |
| 270 | + case CudaArch::GFX700: |
| 271 | + case CudaArch::GFX701: |
| 272 | + case CudaArch::GFX702: |
| 273 | + case CudaArch::GFX703: |
| 274 | + case CudaArch::GFX704: |
| 275 | + case CudaArch::GFX705: |
| 276 | + case CudaArch::GFX801: |
| 277 | + case CudaArch::GFX802: |
| 278 | + case CudaArch::GFX803: |
| 279 | + case CudaArch::GFX805: |
| 280 | + case CudaArch::GFX810: |
| 281 | + case CudaArch::GFX900: |
| 282 | + case CudaArch::GFX902: |
| 283 | + case CudaArch::GFX904: |
| 284 | + case CudaArch::GFX906: |
| 285 | + case CudaArch::GFX908: |
| 286 | + case CudaArch::GFX909: |
| 287 | + case CudaArch::GFX90a: |
| 288 | + case CudaArch::GFX90c: |
| 289 | + case CudaArch::GFX940: |
| 290 | + case CudaArch::GFX941: |
| 291 | + case CudaArch::GFX942: |
| 292 | + case CudaArch::GFX1010: |
| 293 | + case CudaArch::GFX1011: |
| 294 | + case CudaArch::GFX1012: |
| 295 | + case CudaArch::GFX1013: |
| 296 | + case CudaArch::GFX1030: |
| 297 | + case CudaArch::GFX1031: |
| 298 | + case CudaArch::GFX1032: |
| 299 | + case CudaArch::GFX1033: |
| 300 | + case CudaArch::GFX1034: |
| 301 | + case CudaArch::GFX1035: |
| 302 | + case CudaArch::GFX1036: |
| 303 | + case CudaArch::GFX1100: |
| 304 | + case CudaArch::GFX1101: |
| 305 | + case CudaArch::GFX1102: |
| 306 | + case CudaArch::GFX1103: |
| 307 | + case CudaArch::GFX1150: |
| 308 | + case CudaArch::GFX1151: |
| 309 | + case CudaArch::GFX1200: |
| 310 | + case CudaArch::GFX1201: |
| 311 | + case CudaArch::Generic: |
| 312 | + case CudaArch::LAST: |
| 313 | + llvm_unreachable("unhandled CudaArch"); |
| 314 | + } |
| 315 | +} |
| 316 | + |
221 | 317 | void NVPTXTargetCodeGenInfo::setTargetAttributes(
|
222 | 318 | const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
|
223 | 319 | if (GV->isDeclaration())
|
@@ -248,6 +344,21 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
|
248 | 344 | addNVVMMetadata(F, "kernel", 1);
|
249 | 345 | // And kernel functions are not subject to inlining
|
250 | 346 | F->addFnAttr(llvm::Attribute::NoInline);
|
| 347 | + |
| 348 | + if (supportsGridConstant(getCudaArch(M))) { |
| 349 | + // Add grid_constant annotations to all relevant kernel-function |
| 350 | + // parameters. We can guarantee that in SYCL, all by-val kernel |
| 351 | + // parameters are "grid_constant". |
| 352 | + std::vector<int> GridConstantParamIdxs; |
| 353 | + for (auto [Idx, Arg] : llvm::enumerate(F->args())) { |
| 354 | + if (Arg.getType()->isPointerTy() && Arg.hasByValAttr()) { |
| 355 | + // Note - the parameter indices are numbered from 1. |
| 356 | + GridConstantParamIdxs.push_back(Idx + 1); |
| 357 | + } |
| 358 | + } |
| 359 | + if (!GridConstantParamIdxs.empty()) |
| 360 | + addNVVMMetadata(F, "grid_constant", GridConstantParamIdxs); |
| 361 | + } |
251 | 362 | }
|
252 | 363 | bool HasMaxWorkGroupSize = false;
|
253 | 364 | bool HasMinWorkGroupPerCU = false;
|
@@ -329,6 +440,28 @@ void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
|
329 | 440 | MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
|
330 | 441 | }
|
331 | 442 |
|
| 443 | +void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV, |
| 444 | + StringRef Name, |
| 445 | + const std::vector<int> &Operands) { |
| 446 | + llvm::Module *M = GV->getParent(); |
| 447 | + llvm::LLVMContext &Ctx = M->getContext(); |
| 448 | + |
| 449 | + // Get "nvvm.annotations" metadata node |
| 450 | + llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations"); |
| 451 | + |
| 452 | + llvm::SmallVector<llvm::Metadata *, 8> MDOps; |
| 453 | + for (int Op : Operands) { |
| 454 | + MDOps.push_back(llvm::ConstantAsMetadata::get( |
| 455 | + llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Op))); |
| 456 | + } |
| 457 | + auto *OpList = llvm::MDNode::get(Ctx, MDOps); |
| 458 | + |
| 459 | + llvm::Metadata *MDVals[] = {llvm::ConstantAsMetadata::get(GV), |
| 460 | + llvm::MDString::get(Ctx, Name), OpList}; |
| 461 | + // Append metadata to nvvm.annotations |
| 462 | + MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); |
| 463 | +} |
| 464 | + |
332 | 465 | bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
|
333 | 466 | return false;
|
334 | 467 | }
|
|
0 commit comments