Skip to content

Commit 2c596ea

Browse files
authored
Revert "[mlir][test][gpu] Migrate CUDA tests to the TargetAttr compilation workflow (#65768) (#65848)
This reverts commit d21b672.
1 parent d21b672 commit 2c596ea

25 files changed

+132
-62
lines changed

mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@ namespace mlir {
1616
class LLVMTypeConverter;
1717
class ConversionTarget;
1818
class RewritePatternSet;
19-
class Pass;
19+
20+
template <typename OpT>
21+
class OperationPass;
2022

2123
namespace gpu {
2224
class GPUModuleOp;
@@ -43,6 +45,14 @@ void populateGpuSubgroupReduceOpLoweringPattern(LLVMTypeConverter &converter,
4345
/// Collect a set of patterns to convert WMMA ops from GPU dialect to NVVM.
4446
void populateGpuWMMAToNVVMConversionPatterns(LLVMTypeConverter &converter,
4547
RewritePatternSet &patterns);
48+
49+
/// Creates a pass that lowers GPU dialect operations to NVVM counterparts. The
50+
/// index bitwidth used for the lowering of the device side index computations
51+
/// is configurable.
52+
std::unique_ptr<OperationPass<gpu::GPUModuleOp>> createLowerGpuOpsToNVVMOpsPass(
53+
unsigned indexBitwidth = kDeriveIndexBitwidthFromDataLayout,
54+
bool hasRedux = false);
55+
4656
} // namespace mlir
4757

4858
#endif // MLIR_CONVERSION_GPUTONVVM_GPUTONVVMPASS_H_

mlir/include/mlir/Conversion/Passes.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,7 @@ def LowerHostCodeToLLVMPass : Pass<"lower-host-to-llvm", "ModuleOp"> {
486486

487487
def ConvertGpuOpsToNVVMOps : Pass<"convert-gpu-to-nvvm", "gpu::GPUModuleOp"> {
488488
let summary = "Generate NVVM operations for gpu operations";
489+
let constructor = "mlir::createLowerGpuOpsToNVVMOpsPass()";
489490
let dependentDialects = [
490491
"cf::ControlFlowDialect",
491492
"memref::MemRefDialect",

mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,11 @@ struct GPULaneIdOpToNVVM : ConvertOpToLLVMPattern<gpu::LaneIdOp> {
210210
/// code.
211211
struct LowerGpuOpsToNVVMOpsPass
212212
: public impl::ConvertGpuOpsToNVVMOpsBase<LowerGpuOpsToNVVMOpsPass> {
213-
using Base::Base;
213+
LowerGpuOpsToNVVMOpsPass() = default;
214+
LowerGpuOpsToNVVMOpsPass(unsigned indexBitwidth, bool hasRedux = false) {
215+
this->indexBitwidth = indexBitwidth;
216+
this->hasRedux = hasRedux;
217+
}
214218

215219
void runOnOperation() override {
216220
gpu::GPUModuleOp m = getOperation();
@@ -374,3 +378,8 @@ void mlir::populateGpuToNVVMConversionPatterns(LLVMTypeConverter &converter,
374378
"__nv_tanh");
375379
populateOpPatterns<math::TanOp>(converter, patterns, "__nv_tanf", "__nv_tan");
376380
}
381+
382+
std::unique_ptr<OperationPass<gpu::GPUModuleOp>>
383+
mlir::createLowerGpuOpsToNVVMOpsPass(unsigned indexBitwidth, bool hasRedux) {
384+
return std::make_unique<LowerGpuOpsToNVVMOpsPass>(indexBitwidth, hasRedux);
385+
}

mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ void mlir::sparse_tensor::buildSparseCompiler(
5252
pm.addPass(createSparseGPUCodegenPass());
5353
pm.addNestedPass<gpu::GPUModuleOp>(createStripDebugInfoPass());
5454
pm.addNestedPass<gpu::GPUModuleOp>(createConvertSCFToCFPass());
55-
pm.addNestedPass<gpu::GPUModuleOp>(createConvertGpuOpsToNVVMOps());
55+
pm.addNestedPass<gpu::GPUModuleOp>(createLowerGpuOpsToNVVMOpsPass());
5656
}
5757

5858
// TODO(springerm): Add sparse support to the BufferDeallocation pass and add

mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/dump-ptx.mlir

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
// RUN: mlir-opt %s \
2-
// RUN: | mlir-opt -test-lower-to-nvvm -debug-only=serialize-to-isa \
2+
// RUN: | mlir-opt -gpu-kernel-outlining \
3+
// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{dump-ptx}))' \
34
// RUN: 2>&1 | FileCheck %s
45

56
// CHECK: Generated by LLVM NVPTX Back-End

mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-mma-2-4-f16.mlir

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22
// NOTE: this test requires gpu-sm80
33
//
44
// RUN: mlir-opt \
5-
// RUN: --pass-pipeline="builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,convert-nvgpu-to-nvvm,affine-expand-index-ops,lower-affine,convert-arith-to-llvm),convert-vector-to-llvm,canonicalize,cse)" \
5+
// RUN: --pass-pipeline="builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,convert-nvgpu-to-nvvm,affine-expand-index-ops,lower-affine,convert-arith-to-llvm),convert-vector-to-llvm,canonicalize,cse,gpu.module(gpu-to-cubin{chip=sm_80 features=+ptx71}))" \
66
// RUN: %s \
7-
// RUN: | mlir-opt --test-lower-to-nvvm="cubin-chip=sm_80 cubin-features=+ptx71" \
7+
// RUN: | mlir-opt --convert-vector-to-scf --convert-scf-to-cf -convert-cf-to-llvm --convert-vector-to-llvm \
8+
// RUN: --convert-arith-to-llvm --gpu-to-llvm --reconcile-unrealized-casts \
89
// RUN: | mlir-cpu-runner \
910
// RUN: --shared-libs=%mlir_cuda_runtime \
1011
// RUN: --shared-libs=%mlir_c_runner_utils \

mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-reduction-distribute.mlir

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
// RUN: mlir-opt %s -test-vector-warp-distribute="hoist-uniform distribute-transfer-write propagate-distribution" -canonicalize |\
22
// RUN: mlir-opt -test-vector-warp-distribute=rewrite-warp-ops-to-scf-if |\
33
// RUN: mlir-opt -lower-affine -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm \
4-
// RUN: -convert-arith-to-llvm -test-lower-to-nvvm | \
4+
// RUN: -convert-arith-to-llvm -gpu-kernel-outlining |\
5+
// RUN: mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin))' |\
6+
// RUN: mlir-opt -gpu-to-llvm -reconcile-unrealized-casts |\
57
// RUN: mlir-cpu-runner -e main -entry-point-result=void \
68
// RUN: -shared-libs=%mlir_cuda_runtime \
79
// RUN: -shared-libs=%mlir_c_runner_utils \

mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-warp-distribute.mlir

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
// everything on the same thread.
33
// RUN: mlir-opt %s -test-vector-warp-distribute=rewrite-warp-ops-to-scf-if -canonicalize | \
44
// RUN: mlir-opt -convert-vector-to-scf -convert-scf-to-cf -convert-cf-to-llvm -convert-vector-to-llvm -convert-arith-to-llvm \
5-
// RUN: -test-lower-to-nvvm | \
5+
// RUN: -gpu-kernel-outlining |\
6+
// RUN: mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin))' |\
7+
// RUN: mlir-opt -gpu-to-llvm -reconcile-unrealized-casts |\
68
// RUN: mlir-cpu-runner -e main -entry-point-result=void \
79
// RUN: -shared-libs=%mlir_cuda_runtime \
810
// RUN: -shared-libs=%mlir_c_runner_utils \
@@ -13,7 +15,9 @@
1315
// RUN: mlir-opt %s -test-vector-warp-distribute="hoist-uniform distribute-transfer-write" \
1416
// RUN: -test-vector-warp-distribute=rewrite-warp-ops-to-scf-if -canonicalize | \
1517
// RUN: mlir-opt -convert-vector-to-scf -convert-scf-to-cf -convert-cf-to-llvm -convert-vector-to-llvm -convert-arith-to-llvm \
16-
// RUN: -test-lower-to-nvvm | \
18+
// RUN: -gpu-kernel-outlining |\
19+
// RUN: mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin))' |\
20+
// RUN: mlir-opt -gpu-to-llvm -reconcile-unrealized-casts |\
1721
// RUN: mlir-cpu-runner -e main -entry-point-result=void \
1822
// RUN: -shared-libs=%mlir_cuda_runtime \
1923
// RUN: -shared-libs=%mlir_c_runner_utils \
@@ -23,7 +27,9 @@
2327
// RUN: mlir-opt %s -test-vector-warp-distribute="hoist-uniform distribute-transfer-write propagate-distribution" \
2428
// RUN: -test-vector-warp-distribute=rewrite-warp-ops-to-scf-if -canonicalize | \
2529
// RUN: mlir-opt -convert-vector-to-scf -convert-scf-to-cf -convert-cf-to-llvm -convert-vector-to-llvm -convert-arith-to-llvm \
26-
// RUN: -test-lower-to-nvvm | \
30+
// RUN: -gpu-kernel-outlining |\
31+
// RUN: mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin))' |\
32+
// RUN: mlir-opt -gpu-to-llvm -reconcile-unrealized-casts |\
2733
// RUN: mlir-cpu-runner -e main -entry-point-result=void \
2834
// RUN: -shared-libs=%mlir_cuda_runtime \
2935
// RUN: -shared-libs=%mlir_c_runner_utils \

mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f16.mlir

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
// RUN: mlir-opt %s \
2-
// RUN: | mlir-opt -test-lower-to-nvvm="cubin-chip=sm_70" \
2+
// RUN: | mlir-opt -gpu-kernel-outlining \
3+
// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{chip=sm_70}))' \
4+
// RUN: | mlir-opt --convert-scf-to-cf -gpu-to-llvm \
35
// RUN: | mlir-cpu-runner \
46
// RUN: --shared-libs=%mlir_cuda_runtime \
57
// RUN: --shared-libs=%mlir_runner_utils \

mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f32-bare-ptr.mlir

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
// Similar to the wmma-matmul-f32 but but with the memref bare pointer lowering convention.
44
// This test also uses gpu.memcpy operations (instead of gpu.host_register).
55
// RUN: mlir-opt %s \
6-
// RUN: | mlir-opt -test-lower-to-nvvm="host-bare-ptr-calling-convention=1 kernel-bare-ptr-calling-convention=1 cubin-chip=sm_70" \
6+
// RUN: | mlir-opt -gpu-kernel-outlining \
7+
// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm{use-bare-ptr-memref-call-conv=1},gpu-to-cubin{chip=sm_70}))' \
8+
// RUN: | mlir-opt --convert-scf-to-cf -gpu-to-llvm="use-bare-pointers-for-host=1 use-bare-pointers-for-kernels=1" \
79
// RUN: | mlir-cpu-runner \
810
// RUN: --shared-libs=%mlir_cuda_runtime \
911
// RUN: --entry-point-result=void \

mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f32.mlir

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
// RUN: mlir-opt %s \
2-
// RUN: | mlir-opt -test-lower-to-nvvm="cubin-chip=sm_70" \
2+
// RUN: | mlir-opt -gpu-kernel-outlining \
3+
// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{chip=sm_70}))' \
4+
// RUN: | mlir-opt --convert-scf-to-cf -gpu-to-llvm \
35
// RUN: | mlir-cpu-runner \
46
// RUN: --shared-libs=%mlir_cuda_runtime \
57
// RUN: --shared-libs=%mlir_runner_utils \

mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
// RUN: mlir-opt %s \
2-
// RUN: | mlir-opt -test-lower-to-nvvm \
2+
// RUN: | mlir-opt -gpu-kernel-outlining \
3+
// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \
4+
// RUN: | mlir-opt -gpu-to-llvm \
35
// RUN: | mlir-cpu-runner \
46
// RUN: --shared-libs=%mlir_cuda_runtime \
57
// RUN: --shared-libs=%mlir_runner_utils \
@@ -8,7 +10,9 @@
810

911
// Same as above but with the memref bare pointer lowering convention.
1012
// RUN: mlir-opt %s \
11-
// RUN: | mlir-opt -test-lower-to-nvvm="kernel-bare-ptr-calling-convention=1" \
13+
// RUN: | mlir-opt -gpu-kernel-outlining \
14+
// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm{use-bare-ptr-memref-call-conv=1},gpu-to-cubin))' \
15+
// RUN: | mlir-opt -gpu-to-llvm="use-bare-pointers-for-kernels=1" \
1216
// RUN: | mlir-cpu-runner \
1317
// RUN: --shared-libs=%mlir_cuda_runtime \
1418
// RUN: --shared-libs=%mlir_runner_utils \

mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
// RUN: mlir-opt %s \
2-
// RUN: | mlir-opt -test-lower-to-nvvm \
2+
// RUN: | mlir-opt -gpu-kernel-outlining \
3+
// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \
4+
// RUN: | mlir-opt -gpu-to-llvm \
35
// RUN: | mlir-cpu-runner \
46
// RUN: --shared-libs=%mlir_cuda_runtime \
57
// RUN: --shared-libs=%mlir_runner_utils \

mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
// RUN: mlir-opt %s \
2-
// RUN: | mlir-opt -test-lower-to-nvvm \
2+
// RUN: | mlir-opt -gpu-kernel-outlining \
3+
// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \
4+
// RUN: | mlir-opt -gpu-to-llvm \
35
// RUN: | mlir-cpu-runner \
46
// RUN: --shared-libs=%mlir_cuda_runtime \
57
// RUN: --shared-libs=%mlir_runner_utils \

mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
// RUN: mlir-opt %s \
2-
// RUN: | mlir-opt -test-lower-to-nvvm \
2+
// RUN: | mlir-opt -gpu-kernel-outlining \
3+
// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \
4+
// RUN: | mlir-opt -gpu-to-llvm \
35
// RUN: | mlir-cpu-runner \
46
// RUN: --shared-libs=%mlir_cuda_runtime \
57
// RUN: --shared-libs=%mlir_runner_utils \

mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
// RUN: mlir-opt %s \
2-
// RUN: | mlir-opt -test-lower-to-nvvm \
2+
// RUN: | mlir-opt -gpu-kernel-outlining \
3+
// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \
4+
// RUN: | mlir-opt -gpu-to-llvm \
35
// RUN: | mlir-cpu-runner \
46
// RUN: --shared-libs=%mlir_cuda_runtime \
57
// RUN: --shared-libs=%mlir_runner_utils \

mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
// RUN: mlir-opt %s \
2-
// RUN: | mlir-opt -test-lower-to-nvvm \
2+
// RUN: | mlir-opt -gpu-kernel-outlining \
3+
// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \
4+
// RUN: | mlir-opt -gpu-to-llvm \
35
// RUN: | mlir-cpu-runner \
46
// RUN: --shared-libs=%mlir_cuda_runtime \
57
// RUN: --shared-libs=%mlir_runner_utils \

mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
// RUN: mlir-opt %s \
2-
// RUN: | mlir-opt -test-lower-to-nvvm \
2+
// RUN: | mlir-opt -gpu-kernel-outlining \
3+
// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \
4+
// RUN: | mlir-opt -gpu-to-llvm \
35
// RUN: | mlir-cpu-runner \
46
// RUN: --shared-libs=%mlir_cuda_runtime \
57
// RUN: --shared-libs=%mlir_runner_utils \

mlir/test/Integration/GPU/CUDA/async.mlir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// RUN: mlir-opt %s \
22
// RUN: | mlir-opt -gpu-kernel-outlining \
3-
// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm),nvvm-attach-target)' \
4-
// RUN: | mlir-opt -gpu-async-region -gpu-to-llvm -gpu-module-to-binary \
3+
// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \
4+
// RUN: | mlir-opt -gpu-async-region -gpu-to-llvm \
55
// RUN: | mlir-opt -async-to-async-runtime -async-runtime-ref-counting \
66
// RUN: | mlir-opt -convert-async-to-llvm -convert-func-to-llvm \
77
// RUN: | mlir-cpu-runner \

mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
// RUN: mlir-opt %s \
2-
// RUN: | mlir-opt -test-lower-to-nvvm \
2+
// RUN: | mlir-opt -gpu-kernel-outlining \
3+
// RUN: | mlir-opt -convert-vector-to-scf -convert-scf-to-cf -convert-cf-to-llvm -convert-vector-to-llvm -convert-arith-to-llvm \
4+
// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \
5+
// RUN: | mlir-opt -gpu-to-llvm -reconcile-unrealized-casts \
36
// RUN: | mlir-cpu-runner \
47
// RUN: --shared-libs=%mlir_cuda_runtime \
58
// RUN: --shared-libs=%mlir_runner_utils \

mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
// RUN: mlir-opt %s \
2-
// RUN: | mlir-opt -test-lower-to-nvvm \
2+
// RUN: | mlir-opt -gpu-kernel-outlining \
3+
// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \
4+
// RUN: | mlir-opt -gpu-to-llvm \
35
// RUN: | mlir-cpu-runner \
46
// RUN: --shared-libs=%mlir_cuda_runtime \
57
// RUN: --shared-libs=%mlir_runner_utils \

mlir/test/Integration/GPU/CUDA/printf.mlir

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
// RUN: mlir-opt %s \
2-
// RUN: | mlir-opt -test-lower-to-nvvm \
2+
// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \
3+
// RUN: | mlir-opt -gpu-to-llvm \
34
// RUN: | mlir-cpu-runner \
45
// RUN: --shared-libs=%mlir_cuda_runtime \
56
// RUN: --shared-libs=%mlir_runner_utils \

mlir/test/Integration/GPU/CUDA/shuffle.mlir

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
// RUN: mlir-opt %s \
2-
// RUN: | mlir-opt -test-lower-to-nvvm \
2+
// RUN: | mlir-opt -gpu-kernel-outlining \
3+
// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \
4+
// RUN: | mlir-opt -gpu-to-llvm \
35
// RUN: | mlir-cpu-runner \
46
// RUN: --shared-libs=%mlir_cuda_runtime \
57
// RUN: --shared-libs=%mlir_runner_utils \

mlir/test/Integration/GPU/CUDA/two-modules.mlir

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
// RUN: mlir-opt %s \
2-
// RUN: | mlir-opt -test-lower-to-nvvm \
2+
// RUN: | mlir-opt -gpu-kernel-outlining \
3+
// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \
4+
// RUN: | mlir-opt -gpu-to-llvm \
35
// RUN: | mlir-cpu-runner \
46
// RUN: --shared-libs=%mlir_cuda_runtime \
57
// RUN: --shared-libs=%mlir_runner_utils \

0 commit comments

Comments
 (0)