|
| 1 | +// RUN: mlir-opt %s -split-input-file -test-eliminate-vector-masks | FileCheck %s |
| 2 | + |
| 3 | +// This tests a general pattern the vectorizer tends to emit. |
| 4 | + |
| 5 | +// CHECK-LABEL: @eliminate_redundant_masks_through_insert_and_extracts |
| 6 | +// CHECK: %[[ALL_TRUE_MASK:.*]] = arith.constant dense<true> : vector<[4]xi1> |
| 7 | +// CHECK: vector.transfer_read {{.*}} %[[ALL_TRUE_MASK]] |
| 8 | +// CHECK: vector.transfer_write {{.*}} %[[ALL_TRUE_MASK]] |
| 9 | +func.func @eliminate_redundant_masks_through_insert_and_extracts(%tensor: tensor<1x1000xf32>) { |
| 10 | + %c0 = arith.constant 0 : index |
| 11 | + %c4 = arith.constant 4 : index |
| 12 | + %c1000 = arith.constant 1000 : index |
| 13 | + %c0_f32 = arith.constant 0.0 : f32 |
| 14 | + %vscale = vector.vscale |
| 15 | + %c4_vscale = arith.muli %vscale, %c4 : index |
| 16 | + %extracted_slice_0 = tensor.extract_slice %tensor[0, 0] [1, %c4_vscale] [1, 1] : tensor<1x1000xf32> to tensor<1x?xf32> |
| 17 | + %output_tensor = scf.for %i = %c0 to %c1000 step %c4_vscale iter_args(%arg = %extracted_slice_0) -> tensor<1x?xf32> { |
| 18 | + // 1. Extract a slice. |
| 19 | + %extracted_slice_1 = tensor.extract_slice %arg[0, 0] [1, %c4_vscale] [1, 1] : tensor<1x?xf32> to tensor<?xf32> |
| 20 | + |
| 21 | + // 2. Create a mask for the slice. |
| 22 | + %dim_1 = tensor.dim %extracted_slice_1, %c0 : tensor<?xf32> |
| 23 | + %mask = vector.create_mask %dim_1 : vector<[4]xi1> |
| 24 | + |
| 25 | + // 3. Read the slice and do some computation. |
| 26 | + %vec = vector.transfer_read %extracted_slice_1[%c0], %c0_f32, %mask {in_bounds = [true]} : tensor<?xf32>, vector<[4]xf32> |
| 27 | + %new_vec = "test.some_computation"(%vec) : (vector<[4]xf32>) -> (vector<[4]xf32>) |
| 28 | + |
| 29 | + // 4. Write the new value. |
| 30 | + %write = vector.transfer_write %new_vec, %extracted_slice_1[%c0], %mask {in_bounds = [true]} : vector<[4]xf32>, tensor<?xf32> |
| 31 | + |
| 32 | + // 5. Insert and yield the new tensor value. |
| 33 | + %result = tensor.insert_slice %write into %arg[0, 0] [1, %c4_vscale] [1, 1] : tensor<?xf32> into tensor<1x?xf32> |
| 34 | + scf.yield %result : tensor<1x?xf32> |
| 35 | + } |
| 36 | + "test.some_use"(%output_tensor) : (tensor<1x?xf32>) -> () |
| 37 | + return |
| 38 | +} |
| 39 | + |
| 40 | +// ----- |
| 41 | + |
| 42 | +// CHECK-LABEL: @negative_extract_slice_size_shrink |
| 43 | +// CHECK-NOT: arith.constant dense<true> : vector<[4]xi1> |
| 44 | +// CHECK: %[[MASK:.*]] = vector.create_mask |
| 45 | +// CHECK: "test.some_use"(%[[MASK]]) : (vector<[4]xi1>) -> () |
| 46 | +func.func @negative_extract_slice_size_shrink(%tensor: tensor<1000xf32>) { |
| 47 | + %c0 = arith.constant 0 : index |
| 48 | + %c4 = arith.constant 4 : index |
| 49 | + %c1000 = arith.constant 1000 : index |
| 50 | + %vscale = vector.vscale |
| 51 | + %c4_vscale = arith.muli %vscale, %c4 : index |
| 52 | + %extracted_slice = tensor.extract_slice %tensor[0] [%c4_vscale] [1] : tensor<1000xf32> to tensor<?xf32> |
| 53 | + %slice = scf.for %i = %c0 to %c1000 step %c4_vscale iter_args(%arg = %extracted_slice) -> tensor<?xf32> { |
| 54 | + // This mask cannot be eliminated even though looking at the above operations |
| 55 | + // it appears `tensor.dim` will always be c4_vscale (so the mask all-true). |
| 56 | + %dim = tensor.dim %arg, %c0 : tensor<?xf32> |
| 57 | + %mask = vector.create_mask %dim : vector<[4]xi1> |
| 58 | + "test.some_use"(%mask) : (vector<[4]xi1>) -> () |
| 59 | + // !!! Here the size of the mask could shrink in the next iteration. |
| 60 | + %next_num_els = affine.min affine_map<(d0)[s0] -> (-d0 + 1000, s0)>(%i)[%c4_vscale] |
| 61 | + %new_extracted_slice = tensor.extract_slice %tensor[%c4_vscale] [%next_num_els] [1] : tensor<1000xf32> to tensor<?xf32> |
| 62 | + scf.yield %new_extracted_slice : tensor<?xf32> |
| 63 | + } |
| 64 | + "test.some_use"(%slice) : (tensor<?xf32>) -> () |
| 65 | + return |
| 66 | +} |
| 67 | + |
| 68 | +// ----- |
| 69 | + |
| 70 | +// CHECK-LABEL: @negative_constant_dim_not_all_true |
| 71 | +// CHECK-NOT: arith.constant dense<true> : vector<2x[4]xi1> |
| 72 | +// CHECK: %[[MASK:.*]] = vector.create_mask |
| 73 | +// CHECK: "test.some_use"(%[[MASK]]) : (vector<2x[4]xi1>) -> () |
| 74 | +func.func @negative_constant_dim_not_all_true() |
| 75 | +{ |
| 76 | + %c1 = arith.constant 1 : index |
| 77 | + %c4 = arith.constant 4 : index |
| 78 | + %vscale = vector.vscale |
| 79 | + %c4_vscale = arith.muli %vscale, %c4 : index |
| 80 | + %mask = vector.create_mask %c1, %c4_vscale : vector<2x[4]xi1> |
| 81 | + "test.some_use"(%mask) : (vector<2x[4]xi1>) -> () |
| 82 | + return |
| 83 | +} |
| 84 | + |
| 85 | +// ----- |
| 86 | + |
| 87 | +// CHECK-LABEL: @negative_constant_vscale_multiple_not_all_true |
| 88 | +// CHECK-NOT: arith.constant dense<true> : vector<2x[4]xi1> |
| 89 | +// CHECK: %[[MASK:.*]] = vector.create_mask |
| 90 | +// CHECK: "test.some_use"(%[[MASK]]) : (vector<2x[4]xi1>) -> () |
| 91 | +func.func @negative_constant_vscale_multiple_not_all_true() { |
| 92 | + %c2 = arith.constant 2 : index |
| 93 | + %c3 = arith.constant 3 : index |
| 94 | + %vscale = vector.vscale |
| 95 | + %c3_vscale = arith.muli %vscale, %c3 : index |
| 96 | + %mask = vector.create_mask %c2, %c3_vscale : vector<2x[4]xi1> |
| 97 | + "test.some_use"(%mask) : (vector<2x[4]xi1>) -> () |
| 98 | + return |
| 99 | +} |
| 100 | + |
| 101 | +// ----- |
| 102 | + |
| 103 | +// CHECK-LABEL: @negative_value_bounds_fixed_dim_not_all_true |
| 104 | +// CHECK-NOT: arith.constant dense<true> : vector<3x[4]xi1> |
| 105 | +// CHECK: %[[MASK:.*]] = vector.create_mask |
| 106 | +// CHECK: "test.some_use"(%[[MASK]]) : (vector<3x[4]xi1>) -> () |
| 107 | +func.func @negative_value_bounds_fixed_dim_not_all_true(%tensor: tensor<2x?xf32>) |
| 108 | +{ |
| 109 | + %c0 = arith.constant 0 : index |
| 110 | + %c4 = arith.constant 4 : index |
| 111 | + %vscale = vector.vscale |
| 112 | + %c4_vscale = arith.muli %vscale, %c4 : index |
| 113 | + // This is _very_ simple but since addi is not a constant value bounds will |
| 114 | + // be used to resolve it. |
| 115 | + %dim = tensor.dim %tensor, %c0 : tensor<2x?xf32> |
| 116 | + %mask = vector.create_mask %dim, %c4_vscale : vector<3x[4]xi1> |
| 117 | + "test.some_use"(%mask) : (vector<3x[4]xi1>) -> () |
| 118 | + return |
| 119 | +} |
| 120 | + |
| 121 | +// ----- |
| 122 | + |
| 123 | +// CHECK-LABEL: @negative_value_bounds_scalable_dim_not_all_true |
| 124 | +// CHECK-NOT: arith.constant dense<true> : vector<3x[4]xi1> |
| 125 | +// CHECK: %[[MASK:.*]] = vector.create_mask |
| 126 | +// CHECK: "test.some_use"(%[[MASK]]) : (vector<3x[4]xi1>) -> () |
| 127 | +func.func @negative_value_bounds_scalable_dim_not_all_true(%tensor: tensor<2x100xf32>) { |
| 128 | + %c1 = arith.constant 1 : index |
| 129 | + %c3 = arith.constant 3 : index |
| 130 | + %vscale = vector.vscale |
| 131 | + %c3_vscale = arith.muli %vscale, %c3 : index |
| 132 | + %slice = tensor.extract_slice %tensor[0, 0] [2, %c3_vscale] [1, 1] : tensor<2x100xf32> to tensor<2x?xf32> |
| 133 | + // Another simple example, but value bounds will be used to resolve the tensor.dim. |
| 134 | + %dim = tensor.dim %slice, %c1 : tensor<2x?xf32> |
| 135 | + %mask = vector.create_mask %c3, %dim : vector<3x[4]xi1> |
| 136 | + "test.some_use"(%mask) : (vector<3x[4]xi1>) -> () |
| 137 | + return |
| 138 | +} |
0 commit comments