diff --git a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp index 5f20ea42e4992..2ee21099cfb14 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp @@ -55,8 +55,11 @@ void mlir::linalg::hoistRedundantVectorTransfersOnTensor(func::FuncOp func) { static bool noAliasingUseInLoop(vector::TransferReadOp transferRead, LoopLikeOpInterface loop) { Value source = transferRead.getSource(); + // Skip subview and collapse_shape Ops while (auto subView = source.getDefiningOp()) source = subView.getSource(); + while (auto collapsed = source.getDefiningOp()) + source = collapsed->getOperand(0); llvm::SmallVector users(source.getUsers().begin(), source.getUsers().end()); llvm::SmallDenseSet processed; @@ -69,6 +72,10 @@ static bool noAliasingUseInLoop(vector::TransferReadOp transferRead, users.append(subView->getUsers().begin(), subView->getUsers().end()); continue; } + if (auto collapsed = dyn_cast(user)) { + users.append(collapsed->getUsers().begin(), collapsed->getUsers().end()); + continue; + } if (isMemoryEffectFree(user) || isa(user)) continue; if (!loop->isAncestor(user)) diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp index 74d4b7636315f..f715c543eb179 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp @@ -206,6 +206,10 @@ void TransferOptimization::storeToLoadForwarding(vector::TransferReadOp read) { users.append(subView->getUsers().begin(), subView->getUsers().end()); continue; } + if (auto collapsed = dyn_cast(user)) { + users.append(collapsed->getUsers().begin(), collapsed->getUsers().end()); + continue; + } if (isMemoryEffectFree(user) || isa(user)) continue; if (auto write = dyn_cast(user)) { diff --git a/mlir/test/Dialect/Linalg/hoisting.mlir b/mlir/test/Dialect/Linalg/hoisting.mlir index 5b1bb3fc15e09..e25914620726b 100644 --- a/mlir/test/Dialect/Linalg/hoisting.mlir +++ b/mlir/test/Dialect/Linalg/hoisting.mlir @@ -756,3 +756,74 @@ transform.sequence failures(propagate) { transform.structured.hoist_redundant_vector_transfers %0 : (!transform.any_op) -> !transform.any_op } + +// ----- + +// Regression test - `vector.transfer_read` below should not be hoisted. +// Indeed, %collapse_shape (written to by `vector.transfer_write`) and %alloca +// (read by `vector.transfer_read`) alias. + +// CHECK-LABEL: func.func @no_hoisting_collapse_shape +// CHECK: scf.for {{.*}} { +// CHECK: vector.transfer_write +// CHECK: vector.transfer_read +// CHECK: vector.transfer_write +// CHECK: } + +func.func @no_hoisting_collapse_shape(%in_0: memref<1x20x1xi32>, %1: memref<9x1xi32>, %vec: vector<4xi32>) { + %c0_i32 = arith.constant 0 : i32 + %c0 = arith.constant 0 : index + %c4 = arith.constant 4 : index + %c20 = arith.constant 20 : index + %alloca = memref.alloca() {alignment = 64 : i64} : memref<1x4x1xi32> + scf.for %arg0 = %c0 to %c20 step %c4 { + %subview = memref.subview %in_0[0, %arg0, 0] [1, 4, 1] [1, 1, 1] : memref<1x20x1xi32> to memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>> + %collapse_shape = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x4x1xi32> into memref<4xi32> + vector.transfer_write %vec, %collapse_shape[%c0] {in_bounds = [true]} : vector<4xi32>, memref<4xi32> + %read = vector.transfer_read %alloca[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32>, vector<1x4x1xi32> + vector.transfer_write %read, %subview[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>> + } + return +} + +transform.sequence failures(propagate) { +^bb1(%arg1: !transform.any_op): + %0 = transform.structured.match ops{["func.func"]} in %arg1 + : (!transform.any_op) -> !transform.any_op + transform.structured.hoist_redundant_vector_transfers %0 + : (!transform.any_op) -> !transform.any_op +} + +// ----- + +// Regression test - `vector.transfer_read` below should not be hoisted. +// Indeed, %collapse_shape (read by `vector.transfer_read`) and %alloca +// (written to by `vector.transfer_write`) alias. + +// CHECK-LABEL: func.func @no_hoisting_collapse_shape_2 +// CHECK: scf.for {{.*}} { +// CHECK: vector.transfer_write +// CHECK: vector.transfer_read + +func.func @no_hoisting_collapse_shape_2(%vec: vector<1x12x1xi32>) { + %c0_i32 = arith.constant 0 : i32 + %c0 = arith.constant 0 : index + %c4 = arith.constant 4 : index + %c20 = arith.constant 20 : index + %alloca = memref.alloca() {alignment = 64 : i64} : memref<1x12x1xi32> + scf.for %arg0 = %c0 to %c20 step %c4 { + %collapse_shape = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x12x1xi32> into memref<12xi32> + vector.transfer_write %vec, %alloca[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x12x1xi32>, memref<1x12x1xi32> + %read = vector.transfer_read %collapse_shape[%c0], %c0_i32 {in_bounds = [true]} : memref<12xi32>, vector<12xi32> + "prevent.dce"(%read) : (vector<12xi32>) ->() + } + return +} + +transform.sequence failures(propagate) { +^bb1(%arg1: !transform.any_op): + %0 = transform.structured.match ops{["func.func"]} in %arg1 + : (!transform.any_op) -> !transform.any_op + transform.structured.hoist_redundant_vector_transfers %0 + : (!transform.any_op) -> !transform.any_op +} diff --git a/mlir/test/Dialect/Vector/vector-transferop-opt.mlir b/mlir/test/Dialect/Vector/vector-transferop-opt.mlir index af4e7d86f62e3..f43367ab4aeba 100644 --- a/mlir/test/Dialect/Vector/vector-transferop-opt.mlir +++ b/mlir/test/Dialect/Vector/vector-transferop-opt.mlir @@ -215,4 +215,44 @@ func.func @forward_dead_store_negative(%arg0: i1, %arg1 : memref<4x4xf32>, vector.transfer_write %v2, %arg1[%c1, %c0] {in_bounds = [true, true]} : vector<1x4xf32>, memref<4x4xf32> return %0 : vector<1x4xf32> -} \ No newline at end of file +} + + +// Regression test - the following _potential forwarding_ of %1 to the final +// `vector.transfer_write` would not be safe: +// %1 = vector.transfer_read %subview +// vector.transfer_write %1, %alloca +// vector.transfer_write %vec, %collapse_shape +// %2 = vector.transfer_read %alloca +// vector.transfer_write %1, %subview +// Indeed, %alloca and %collapse_shape alias and hence %2 != %1. Instead, the +// final `vector.transfer_write` should be preserved as: +// vector.transfer_write %2, %subview + +// CHECK-LABEL: func.func @collapse_shape +// CHECK: scf.for {{.*}} { +// CHECK: vector.transfer_read +// CHECK: vector.transfer_write +// CHECK: vector.transfer_write +// CHECK: vector.transfer_read +// CHECK: vector.transfer_write + +func.func @collapse_shape(%in_0: memref<1x20x1xi32>, %vec: vector<4xi32>) { + %c0_i32 = arith.constant 0 : i32 + %c0 = arith.constant 0 : index + %c4 = arith.constant 4 : index + %c20 = arith.constant 20 : index + + %alloca = memref.alloca() {alignment = 64 : i64} : memref<1x4x1xi32> + %collapse_shape = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x4x1xi32> into memref<4xi32> + scf.for %arg0 = %c0 to %c20 step %c4 { + %subview = memref.subview %in_0[0, %arg0, 0] [1, 4, 1] [1, 1, 1] : memref<1x20x1xi32> to memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>> + %1 = vector.transfer_read %subview[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>, vector<1x4x1xi32> + // $alloca and $collapse_shape alias + vector.transfer_write %1, %alloca[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32> + vector.transfer_write %vec, %collapse_shape[%c0] {in_bounds = [true]} : vector<4xi32>, memref<4xi32> + %2 = vector.transfer_read %alloca[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32>, vector<1x4x1xi32> + vector.transfer_write %2, %subview[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>> + } + return +}