Skip to content

Commit e4003cd

Browse files
authored
[CIR][HIP] Lower Device CIR to LLVM IR (#1967)
1 parent e33d2c4 commit e4003cd

File tree

8 files changed

+186
-15
lines changed

8 files changed

+186
-15
lines changed

clang/lib/CIR/Dialect/Transforms/TargetLowering/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ add_clang_library(TargetLowering
1313
TargetInfo.cpp
1414
TargetLoweringInfo.cpp
1515
Targets/AArch64.cpp
16+
Targets/AMDGPU.cpp
1617
Targets/NVPTX.cpp
1718
Targets/SPIR.cpp
1819
Targets/X86.cpp

clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerModule.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@ createTargetLoweringInfo(LowerModule &LM) {
7171

7272
return createAArch64TargetLoweringInfo(LM, Kind);
7373
}
74+
case llvm::Triple::amdgcn:
75+
return createAMDGPUTargetLoweringInfo(LM);
7476
case llvm::Triple::x86_64: {
7577
switch (Triple.getOS()) {
7678
case llvm::Triple::Win32:

clang/lib/CIR/Dialect/Transforms/TargetLowering/TargetInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ createSPIRVTargetLoweringInfo(LowerModule &CGM);
3333
std::unique_ptr<TargetLoweringInfo>
3434
createNVPTXTargetLoweringInfo(LowerModule &CGM);
3535

36+
std::unique_ptr<TargetLoweringInfo>
37+
createAMDGPUTargetLoweringInfo(LowerModule &CGM);
38+
3639
} // namespace cir
3740

3841
#endif // LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_TARGETINFO_H
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
//===- AMDGPU.cpp - TargetInfo for AMDGPU ---------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "ABIInfoImpl.h"
10+
#include "LowerFunctionInfo.h"
11+
#include "LowerTypes.h"
12+
#include "TargetInfo.h"
13+
#include "TargetLoweringInfo.h"
14+
#include "clang/CIR/ABIArgInfo.h"
15+
#include "clang/CIR/Dialect/IR/CIRTypes.h"
16+
#include "clang/CIR/MissingFeatures.h"
17+
#include "llvm/Support/Casting.h"
18+
#include "llvm/Support/ErrorHandling.h"
19+
20+
using ABIArgInfo = cir::ABIArgInfo;
21+
using MissingFeature = cir::MissingFeatures;
22+
23+
namespace cir {
24+
25+
//===----------------------------------------------------------------------===//
26+
// AMDGPU ABI Implementation
27+
//===----------------------------------------------------------------------===//
28+
29+
namespace {
30+
31+
class AMDGPUABIInfo : public ABIInfo {
32+
public:
33+
AMDGPUABIInfo(LowerTypes &lt) : ABIInfo(lt) {}
34+
35+
private:
36+
void computeInfo(LowerFunctionInfo &fi) const override {
37+
llvm_unreachable("NYI");
38+
}
39+
};
40+
41+
class AMDGPUTargetLoweringInfo : public TargetLoweringInfo {
42+
public:
43+
AMDGPUTargetLoweringInfo(LowerTypes &lt)
44+
: TargetLoweringInfo(std::make_unique<AMDGPUABIInfo>(lt)) {}
45+
// Taken from here: https://llvm.org/docs/AMDGPUUsage.html#address-spaces
46+
unsigned getTargetAddrSpaceFromCIRAddrSpace(
47+
cir::AddressSpace addrSpace) const override {
48+
switch (addrSpace) {
49+
case cir::AddressSpace::OffloadPrivate:
50+
return 5;
51+
case cir::AddressSpace::OffloadLocal:
52+
return 3;
53+
case cir::AddressSpace::OffloadGlobal:
54+
return 1;
55+
case cir::AddressSpace::OffloadConstant:
56+
return 4;
57+
case cir::AddressSpace::OffloadGeneric:
58+
return 0;
59+
default:
60+
cir_cconv_unreachable("Unknown CIR address space for this target");
61+
}
62+
}
63+
};
64+
65+
} // namespace
66+
std::unique_ptr<TargetLoweringInfo>
67+
createAMDGPUTargetLoweringInfo(LowerModule &lowerModule) {
68+
return std::make_unique<AMDGPUTargetLoweringInfo>(lowerModule.getTypes());
69+
}
70+
71+
} // namespace cir

clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1749,19 +1749,31 @@ mlir::LogicalResult CIRToLLVMAllocaOpLowering::matchAndRewrite(
17491749
convertTypeForMemory(*getTypeConverter(), dataLayout, op.getAllocaType());
17501750
auto resultTy = getTypeConverter()->convertType(op.getType());
17511751
// Verification between the CIR alloca AS and the one from data layout.
1752-
{
1752+
auto allocaAS = [&]() {
17531753
auto resPtrTy = mlir::cast<mlir::LLVM::LLVMPointerType>(resultTy);
17541754
auto dlAllocaASAttr = mlir::cast_if_present<mlir::IntegerAttr>(
17551755
dataLayout.getAllocaMemorySpace());
1756-
// Absence means 0
17571756
// TODO: The query for the alloca AS should be done through CIRDataLayout
17581757
// instead to reuse the logic of interpret null attr as 0.
1759-
auto dlAllocaAS = dlAllocaASAttr ? dlAllocaASAttr.getInt() : 0;
1760-
if (dlAllocaAS != resPtrTy.getAddressSpace()) {
1761-
return op.emitError() << "alloca address space doesn't match the one "
1762-
"from the target data layout: "
1763-
<< dlAllocaAS;
1764-
}
1758+
if (!dlAllocaASAttr)
1759+
return 0u;
1760+
return static_cast<unsigned>(dlAllocaASAttr.getValue().getZExtValue());
1761+
}();
1762+
1763+
auto resPtrTy =
1764+
mlir::LLVM::LLVMPointerType::get(elementTy.getContext(), allocaAS);
1765+
1766+
auto llvmAlloca = rewriter.create<mlir::LLVM::AllocaOp>(
1767+
op.getLoc(), resPtrTy, elementTy, size, op.getAlignmentAttr().getInt());
1768+
1769+
auto expectedPtrTy = mlir::cast<mlir::LLVM::LLVMPointerType>(
1770+
getTypeConverter()->convertType(op.getResult().getType()));
1771+
1772+
mlir::Value finalPtr = llvmAlloca.getResult();
1773+
1774+
if (expectedPtrTy.getAddressSpace() != allocaAS) {
1775+
finalPtr = rewriter.create<mlir::LLVM::AddrSpaceCastOp>(
1776+
op.getLoc(), expectedPtrTy, finalPtr);
17651777
}
17661778

17671779
// If there are annotations available, copy them out before we destroy the
@@ -1770,11 +1782,10 @@ mlir::LogicalResult CIRToLLVMAllocaOpLowering::matchAndRewrite(
17701782
if (op.getAnnotations())
17711783
annotations = op.getAnnotationsAttr();
17721784

1773-
auto llvmAlloca = rewriter.replaceOpWithNewOp<mlir::LLVM::AllocaOp>(
1774-
op, resultTy, elementTy, size, op.getAlignmentAttr().getInt());
1775-
17761785
if (annotations && !annotations.empty())
17771786
buildAllocaAnnotations(llvmAlloca, adaptor, rewriter, annotations);
1787+
rewriter.replaceOp(op, finalPtr);
1788+
17781789
return mlir::success();
17791790
}
17801791

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#include "cuda.h"
2+
3+
// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
4+
// RUN: -fcuda-is-device -fhip-new-launch-api \
5+
// RUN: -I%S/../Inputs/ -emit-cir %s -o %t.ll
6+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.ll %s
7+
8+
__global__ void fn() {
9+
int i = 0;
10+
__shared__ int j;
11+
j = i;
12+
}
13+
14+
// CIR: cir.global "private" internal dso_local addrspace(offload_local) @_ZZ2fnvE1j : !s32i
15+
// CIR: cir.func dso_local @_Z2fnv
16+
// CIR: [[Local:%[0-9]+]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init]
17+
// CIR: [[Shared:%[0-9]+]] = cir.get_global @_ZZ2fnvE1j : !cir.ptr<!s32i, addrspace(offload_local)>
18+
// CIR: [[Tmp:%[0-9]+]] = cir.load {{.*}} [[Local]] : !cir.ptr<!s32i>, !s32i
19+
// CIR: cir.store{{.*}} [[Tmp]], [[Shared]] : !s32i, !cir.ptr<!s32i, addrspace(offload_local)>
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#include "cuda.h"
2+
3+
// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
4+
// RUN: -fcuda-is-device -fhip-new-launch-api \
5+
// RUN: -I%S/../Inputs/ -emit-llvm %s -o %t.ll
6+
// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s
7+
8+
// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip \
9+
// RUN: -fcuda-is-device -fhip-new-launch-api \
10+
// RUN: -I%S/../Inputs/ -emit-llvm %s -o %t.ll
11+
// RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s
12+
13+
14+
__shared__ int a;
15+
// LLVM-DEVICE: @a = addrspace(3) global i32 undef, align 4
16+
// OGCG-DEVICE: @a = addrspace(3) global i32 undef, align 4
17+
18+
__device__ int b;
19+
// LLVM-DEVICE: @b = addrspace(1) externally_initialized global i32 0, align 4
20+
// OGCG-DEVICE: @b = addrspace(1) externally_initialized global i32 0, align 4
21+
22+
__constant__ int c;
23+
// LLVM-DEVICE: @c = addrspace(4) externally_initialized constant i32 0, align 4
24+
// OGCG-DEVICE: @c = addrspace(4) externally_initialized constant i32 0, align 4
25+

clang/test/CIR/CodeGen/HIP/simple.cpp

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,36 @@
1-
#include "../Inputs/cuda.h"
1+
#include "cuda.h"
22

33
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
44
// RUN: -x hip -fhip-new-launch-api \
5-
// RUN: -emit-cir %s -o %t.cir
5+
// RUN: -I%S/../Inputs/ -emit-cir %s -o %t.cir
66
// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s
77

88
// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
99
// RUN: -fcuda-is-device -fhip-new-launch-api \
10-
// RUN: -emit-cir %s -o %t.cir
10+
// RUN: -I%S/../Inputs/ -emit-cir %s -o %t.cir
1111
// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s
1212
//
1313
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
1414
// RUN: -x hip -emit-llvm -fhip-new-launch-api \
15-
// RUN: %s -o %t.ll
15+
// RUN: -I%S/../Inputs/ %s -o %t.ll
1616
// RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.ll %s
1717

18+
// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
19+
// RUN: -fcuda-is-device -fhip-new-launch-api \
20+
// RUN: -I%S/../Inputs/ -emit-llvm %s -o %t.ll
21+
// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s
22+
23+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
24+
// RUN: -x hip -emit-llvm -fhip-new-launch-api \
25+
// RUN: -I%S/../Inputs/ %s -o %t.ll
26+
// RUN: FileCheck --check-prefix=OGCG-HOST --input-file=%t.ll %s
27+
28+
// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip \
29+
// RUN: -fcuda-is-device -fhip-new-launch-api \
30+
// RUN: -I%S/../Inputs/ -emit-llvm %s -o %t.ll
31+
// RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s
32+
33+
1834
// Attribute for global_fn
1935
// CIR-HOST: [[Kernel:#[a-zA-Z_0-9]+]] = {{.*}}#cir.cu.kernel_name<_Z9global_fni>{{.*}}
2036

@@ -29,6 +45,8 @@ __device__ void device_fn(int* a, double b, float c) {}
2945

3046
__global__ void global_fn(int a) {}
3147
// CIR-DEVICE: @_Z9global_fni
48+
// LLVM-DEVICE: define dso_local void @_Z9global_fni
49+
// OGCG-DEVICE: define dso_local amdgpu_kernel void @_Z9global_fni
3250

3351
// CIR-HOST: @_Z24__device_stub__global_fni{{.*}}extra([[Kernel]])
3452
// CIR-HOST: %[[#CIRKernelArgs:]] = cir.alloca {{.*}}"kernel_args"
@@ -43,6 +61,12 @@ __global__ void global_fn(int a) {}
4361
// LLVM-HOST: %[[#GEP2:]] = getelementptr [1 x ptr], ptr %[[#KernelArgs]], i32 0, i64 0
4462
// LLVM-HOST: call i32 @__hipPopCallConfiguration
4563
// LLVM-HOST: call i32 @hipLaunchKernel(ptr @_Z9global_fni
64+
//
65+
// OGCG-HOST: define dso_local void @_Z24__device_stub__global_fni
66+
// OGCG-HOST: %kernel_args = alloca ptr, i64 1, align 16
67+
// OGCG-HOST: getelementptr ptr, ptr %kernel_args, i32 0
68+
// OGCG-HOST: call i32 @__hipPopCallConfiguration
69+
// OGCG-HOST: %call = call noundef i32 @hipLaunchKernel(ptr noundef @_Z9global_fni
4670

4771

4872
int main() {
@@ -78,3 +102,18 @@ int main() {
78102
// LLVM-HOST: %[[#]] = load i32
79103
// LLVM-HOST: ret i32
80104

105+
// OGCG-HOST: define dso_local noundef i32 @main
106+
// OGCG-HOST: %agg.tmp = alloca %struct.dim3, align 4
107+
// OGCG-HOST: %agg.tmp1 = alloca %struct.dim3, align 4
108+
// OGCG-HOST: call void @_ZN4dim3C1Ejjj
109+
// OGCG-HOST: call void @_ZN4dim3C1Ejjj
110+
// OGCG-HOST: %call = call i32 @__hipPushCallConfiguration
111+
// OGCG-HOST: %tobool = icmp ne i32 %call, 0
112+
// OGCG-HOST: br i1 %tobool, label %kcall.end, label %kcall.configok
113+
// OGCG-HOST: kcall.configok:
114+
// OGCG-HOST: call void @_Z24__device_stub__global_fni(i32 noundef 1)
115+
// OGCG-HOST: br label %kcall.end
116+
// OGCG-HOST: kcall.end:
117+
// OGCG-HOST: %{{[0-9]+}} = load i32, ptr %retval, align 4
118+
// OGCG-HOST: ret i32 %8
119+

0 commit comments

Comments
 (0)