|
| 1 | +// RUN: fir-opt --cuf-gpu-convert-to-llvm %s | FileCheck %s |
| 2 | + |
| 3 | +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 20.0.0 ( [email protected]:clementval/llvm-project.git ddcfd4d2dc17bf66cee8c3ef6284118684a2b0e6)", llvm.target_triple = "x86_64-unknown-linux-gnu"} { |
| 4 | + llvm.func @_QMmod1Phost_sub() { |
| 5 | + %0 = llvm.mlir.constant(1 : i32) : i32 |
| 6 | + %1 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr |
| 7 | + %2 = llvm.mlir.constant(40 : i64) : i64 |
| 8 | + %3 = llvm.mlir.constant(16 : i32) : i32 |
| 9 | + %4 = llvm.mlir.constant(25 : i32) : i32 |
| 10 | + %5 = llvm.mlir.constant(21 : i32) : i32 |
| 11 | + %6 = llvm.mlir.constant(17 : i32) : i32 |
| 12 | + %7 = llvm.mlir.constant(1 : index) : i64 |
| 13 | + %8 = llvm.mlir.constant(27 : i32) : i32 |
| 14 | + %9 = llvm.mlir.constant(6 : i32) : i32 |
| 15 | + %10 = llvm.mlir.constant(1 : i32) : i32 |
| 16 | + %11 = llvm.mlir.constant(0 : i32) : i32 |
| 17 | + %12 = llvm.mlir.constant(10 : index) : i64 |
| 18 | + %13 = llvm.mlir.addressof @_QQclX91d13f6e74caa2f03965d7a7c6a8fdd5 : !llvm.ptr |
| 19 | + %14 = llvm.call @_FortranACUFMemAlloc(%2, %11, %13, %6) : (i64, i32, !llvm.ptr, i32) -> !llvm.ptr |
| 20 | + %15 = llvm.mlir.constant(10 : index) : i64 |
| 21 | + %16 = llvm.mlir.constant(1 : index) : i64 |
| 22 | + %17 = llvm.alloca %15 x i32 : (i64) -> !llvm.ptr |
| 23 | + %18 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> |
| 24 | + %19 = llvm.insertvalue %17, %18[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> |
| 25 | + %20 = llvm.insertvalue %17, %19[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> |
| 26 | + %21 = llvm.mlir.constant(0 : index) : i64 |
| 27 | + %22 = llvm.insertvalue %21, %20[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> |
| 28 | + %23 = llvm.insertvalue %15, %22[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> |
| 29 | + %24 = llvm.insertvalue %16, %23[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> |
| 30 | + %25 = llvm.extractvalue %24[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> |
| 31 | + %26 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> |
| 32 | + %27 = llvm.insertvalue %25, %26[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> |
| 33 | + %28 = llvm.insertvalue %25, %27[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> |
| 34 | + %29 = llvm.mlir.constant(0 : index) : i64 |
| 35 | + %30 = llvm.insertvalue %29, %28[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> |
| 36 | + %31 = llvm.mlir.constant(10 : index) : i64 |
| 37 | + %32 = llvm.insertvalue %31, %30[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> |
| 38 | + %33 = llvm.mlir.constant(1 : index) : i64 |
| 39 | + %34 = llvm.insertvalue %33, %32[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> |
| 40 | + %35 = llvm.mlir.constant(1 : index) : i64 |
| 41 | + %36 = llvm.mlir.constant(11 : index) : i64 |
| 42 | + %37 = llvm.mlir.constant(1 : index) : i64 |
| 43 | + llvm.br ^bb1(%35 : i64) |
| 44 | + ^bb1(%38: i64): // 2 preds: ^bb0, ^bb2 |
| 45 | + %39 = llvm.icmp "slt" %38, %36 : i64 |
| 46 | + llvm.cond_br %39, ^bb2, ^bb3 |
| 47 | + ^bb2: // pred: ^bb1 |
| 48 | + %40 = llvm.mlir.constant(-1 : index) : i64 |
| 49 | + %41 = llvm.add %38, %40 : i64 |
| 50 | + %42 = llvm.extractvalue %34[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> |
| 51 | + %43 = llvm.getelementptr %42[%41] : (!llvm.ptr, i64) -> !llvm.ptr, i32 |
| 52 | + llvm.store %11, %43 : i32, !llvm.ptr |
| 53 | + %44 = llvm.add %38, %37 : i64 |
| 54 | + llvm.br ^bb1(%44 : i64) |
| 55 | + ^bb3: // pred: ^bb1 |
| 56 | + %45 = llvm.call @_FortranACUFDataTransferPtrPtr(%14, %25, %2, %11, %13, %5) : (!llvm.ptr, !llvm.ptr, i64, i32, !llvm.ptr, i32) -> !llvm.struct<()> |
| 57 | + gpu.launch_func @cuda_device_mod::@_QMmod1Psub1 blocks in (%7, %7, %7) threads in (%12, %7, %7) : i64 dynamic_shared_memory_size %11 args(%14 : !llvm.ptr) |
| 58 | + %46 = llvm.call @_FortranACUFDataTransferPtrPtr(%25, %14, %2, %10, %13, %4) : (!llvm.ptr, !llvm.ptr, i64, i32, !llvm.ptr, i32) -> !llvm.struct<()> |
| 59 | + %47 = llvm.call @_FortranAioBeginExternalListOutput(%9, %13, %8) {fastmathFlags = #llvm.fastmath<contract>} : (i32, !llvm.ptr, i32) -> !llvm.ptr |
| 60 | + %48 = llvm.mlir.constant(9 : i32) : i32 |
| 61 | + %49 = llvm.mlir.zero : !llvm.ptr |
| 62 | + %50 = llvm.getelementptr %49[1] : (!llvm.ptr) -> !llvm.ptr, i32 |
| 63 | + %51 = llvm.ptrtoint %50 : !llvm.ptr to i64 |
| 64 | + %52 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> |
| 65 | + %53 = llvm.insertvalue %51, %52[1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> |
| 66 | + %54 = llvm.mlir.constant(20240719 : i32) : i32 |
| 67 | + %55 = llvm.insertvalue %54, %53[2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> |
| 68 | + %56 = llvm.mlir.constant(1 : i32) : i32 |
| 69 | + %57 = llvm.trunc %56 : i32 to i8 |
| 70 | + %58 = llvm.insertvalue %57, %55[3] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> |
| 71 | + %59 = llvm.trunc %48 : i32 to i8 |
| 72 | + %60 = llvm.insertvalue %59, %58[4] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> |
| 73 | + %61 = llvm.mlir.constant(0 : i32) : i32 |
| 74 | + %62 = llvm.trunc %61 : i32 to i8 |
| 75 | + %63 = llvm.insertvalue %62, %60[5] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> |
| 76 | + %64 = llvm.mlir.constant(0 : i32) : i32 |
| 77 | + %65 = llvm.trunc %64 : i32 to i8 |
| 78 | + %66 = llvm.insertvalue %65, %63[6] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> |
| 79 | + %67 = llvm.mlir.constant(0 : i64) : i64 |
| 80 | + %68 = llvm.mlir.constant(1 : i64) : i64 |
| 81 | + %69 = llvm.insertvalue %68, %66[7, 0, 0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> |
| 82 | + %70 = llvm.insertvalue %12, %69[7, 0, 1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> |
| 83 | + %71 = llvm.insertvalue %51, %70[7, 0, 2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> |
| 84 | + %72 = llvm.mul %51, %12 : i64 |
| 85 | + %73 = llvm.insertvalue %25, %71[0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> |
| 86 | + llvm.store %73, %1 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, !llvm.ptr |
| 87 | + llvm.return |
| 88 | + } |
| 89 | + llvm.func @_QMmod1Psub1(!llvm.ptr) -> () |
| 90 | + llvm.mlir.global linkonce constant @_QQclX91d13f6e74caa2f03965d7a7c6a8fdd5() {addr_space = 0 : i32} : !llvm.array<2 x i8> { |
| 91 | + %0 = llvm.mlir.constant("a\00") : !llvm.array<2 x i8> |
| 92 | + llvm.return %0 : !llvm.array<2 x i8> |
| 93 | + } |
| 94 | + llvm.func @_FortranAioBeginExternalListOutput(i32, !llvm.ptr, i32) -> !llvm.ptr attributes {fir.io, fir.runtime, sym_visibility = "private"} |
| 95 | + llvm.func @_FortranACUFMemAlloc(i64, i32, !llvm.ptr, i32) -> !llvm.ptr attributes {fir.runtime, sym_visibility = "private"} |
| 96 | + llvm.func @_FortranACUFDataTransferPtrPtr(!llvm.ptr, !llvm.ptr, i64, i32, !llvm.ptr, i32) -> !llvm.struct<()> attributes {fir.runtime, sym_visibility = "private"} |
| 97 | + llvm.func @_FortranACUFMemFree(!llvm.ptr, i32, !llvm.ptr, i32) -> !llvm.struct<()> attributes {fir.runtime, sym_visibility = "private"} |
| 98 | + gpu.binary @cuda_device_mod [#gpu.object<#nvvm.target, "">] |
| 99 | +} |
| 100 | + |
| 101 | +// CHECK-LABEL: _QMmod1Phost_sub |
| 102 | + |
| 103 | +// CHECK: %[[KERNEL_PTR:.*]] = llvm.mlir.addressof @_QMmod1Psub1 : !llvm.ptr |
| 104 | +// CHECK: llvm.call @_FortranACUFLaunchKernel(%[[KERNEL_PTR]], {{.*}}) |
0 commit comments