Open
Description
Reproduce:
gc-opt --gc-gpu-pipeline test.mlir
test.mlir
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"CPU" : #dlti.target_device_spec<#dlti.dl_entry<"tile_size", 32 : i32>>>} {
func.func @dynamic_matmul_f16(%arg0: memref<?x?xf16>, %arg1: memref<1024x1024xf16>, %arg2: memref<?x1024xf16>) {
%0 = bufferization.to_tensor %arg0 restrict : memref<?x?xf16>
%c0 = arith.constant 0 : index
%dim = tensor.dim %0, %c0 : tensor<?x?xf16>
%c1 = arith.constant 1 : index
%dim_0 = tensor.dim %0, %c1 : tensor<?x?xf16>
%1 = bufferization.to_tensor %arg1 restrict : memref<1024x1024xf16>
%2 = tensor.empty(%dim) : tensor<?x1024xf16>
%cst = arith.constant 0.000000e+00 : f16
%3 = linalg.fill ins(%cst : f16) outs(%2 : tensor<?x1024xf16>) -> tensor<?x1024xf16>
%4 = linalg.matmul_transpose_b ins(%0, %1 : tensor<?x?xf16>, tensor<1024x1024xf16>) outs(%3 : tensor<?x1024xf16>) -> tensor<?x1024xf16>
bufferization.materialize_in_destination %4 in restrict writable %arg2 : (tensor<?x1024xf16>, memref<?x1024xf16>) -> ()
return
}
}