1
- // RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs-from-loops bufferize-function-boundaries test-analysis-only" -split-input-file | FileCheck %s
1
+ // RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs-from-loops bufferize-function-boundaries test-analysis-only" -split-input-file | FileCheck %s --check-prefixes=CHECK,PARALLEL-CHECK
2
+ // RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs-from-loops bufferize-function-boundaries test-analysis-only check-parallel-regions=false" -split-input-file | FileCheck %s --check-prefixes=CHECK,NO-PARALLEL-CHECK
2
3
3
4
// Run fuzzer with different seeds.
4
5
// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs-from-loops bufferize-function-boundaries test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=23" -split-input-file -o /dev/null
@@ -811,8 +812,10 @@ func.func @parallel_region() -> tensor<320xf32>
811
812
%0 = scf.forall (%arg0 ) in (%c320 ) shared_outs (%arg1 = %alloc0 ) -> (tensor <320 xf32 >) {
812
813
%val = " test.foo" () : () -> (f32 )
813
814
// linalg.fill must bufferize out-of-place because every thread needs a
814
- // private copy of %alloc1.
815
- // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]}
815
+ // private copy of %alloc1. If not accounting for parallel regions, the fill
816
+ // can bufferize in place.
817
+ // PARALLEL-CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]}
818
+ // NO-PARALLEL-CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]}
816
819
%fill = linalg.fill ins (%val : f32 ) outs (%alloc1 : tensor <1 xf32 >) -> tensor <1 xf32 >
817
820
scf.forall.in_parallel {
818
821
// CHECK: tensor.parallel_insert_slice {{.*}} {__inplace_operands_attr__ = ["true", "true", "none"]}
@@ -841,8 +844,10 @@ func.func @parallel_region_mixed_def(%c: i1) -> tensor<320xf32>
841
844
}
842
845
%val = " test.foo" () : () -> (f32 )
843
846
// linalg.fill must bufferize out-of-place because every thread needs a
844
- // private copy of %alloc1.
845
- // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]}
847
+ // private copy of %alloc1. If not accounting for parallel regions, the fill
848
+ // can bufferize in place.
849
+ // PARALLEL-CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]}
850
+ // NO-PARALLEL-CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]}
846
851
%fill = linalg.fill ins (%val : f32 ) outs (%selected : tensor <1 xf32 >) -> tensor <1 xf32 >
847
852
scf.forall.in_parallel {
848
853
// CHECK: tensor.parallel_insert_slice {{.*}} {__inplace_operands_attr__ = ["true", "true", "none"]}
@@ -866,8 +871,10 @@ func.func @parallel_region_two_writes(%f: f32) -> tensor<320xf32>
866
871
%0 = scf.forall (%arg0 ) in (%c320 ) shared_outs (%arg1 = %alloc0 ) -> (tensor <320 xf32 >) {
867
872
%val = " test.foo" () : () -> (f32 )
868
873
// linalg.fill must bufferize out-of-place because every thread needs a
869
- // private copy of %alloc1.
870
- // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]}
874
+ // private copy of %alloc1. If not accounting for parallel regions, the fill
875
+ // can bufferize in place.
876
+ // PARALLEL-CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]}
877
+ // NO-PARALLEL-CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]}
871
878
%fill = linalg.fill ins (%val : f32 ) outs (%alloc1 : tensor <1 xf32 >) -> tensor <1 xf32 >
872
879
// CHECK: tensor.insert
873
880
// CHECK-SAME: __inplace_operands_attr__ = ["none", "true", "none"]
0 commit comments