Skip to content

Commit 57e4735

Browse files
committed
[mlir][OpenMP] Add optional alloc region to reduction decl
The verifier checks that there is at most one block in the alloc region. This is not sufficient to avoid control flow in general MLIR, but by the time we are converting to LLVMIR structured control flow should already have been lowered to the cf dialect.
1 parent 5506831 commit 57e4735

File tree

4 files changed

+200
-25
lines changed

4 files changed

+200
-25
lines changed

mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1531,18 +1531,29 @@ def DeclareReductionOp : OpenMP_Op<"declare_reduction", [IsolatedFromAbove,
15311531
Declares an OpenMP reduction kind. This requires two mandatory and two
15321532
optional regions.
15331533

1534-
1. The initializer region specifies how to initialize the thread-local
1534+
1. The optional alloc region specifies how to allocate the thread-local
1535+
reduction value. This region should not contain control flow and all
1536+
IR should be suitable for inlining straight into an entry block. In
1537+
the common case this is expected to contain only allocas. It is
1538+
expected to `omp.yield` the allocated value on all control paths.
1539+
If allocation is conditional (e.g. only allocate if the mold is
1540+
allocated), this should be done in the initilizer region and this
1541+
region not included. The alloc region is not used for by-value
1542+
reductions (where allocation is implicit).
1543+
2. The initializer region specifies how to initialize the thread-local
15351544
reduction value. This is usually the neutral element of the reduction.
15361545
For convenience, the region has an argument that contains the value
1537-
of the reduction accumulator at the start of the reduction. It is
1538-
expected to `omp.yield` the new value on all control flow paths.
1539-
2. The reduction region specifies how to combine two values into one, i.e.
1546+
of the reduction accumulator at the start of the reduction. If an alloc
1547+
region is specified, there is a second block argument containing the
1548+
address of the allocated memory. The initializer region is expected to
1549+
`omp.yield` the new value on all control flow paths.
1550+
3. The reduction region specifies how to combine two values into one, i.e.
15401551
the reduction operator. It accepts the two values as arguments and is
15411552
expected to `omp.yield` the combined value on all control flow paths.
1542-
3. The atomic reduction region is optional and specifies how two values
1553+
4. The atomic reduction region is optional and specifies how two values
15431554
can be combined atomically given local accumulator variables. It is
15441555
expected to store the combined value in the first accumulator variable.
1545-
4. The cleanup region is optional and specifies how to clean up any memory
1556+
5. The cleanup region is optional and specifies how to clean up any memory
15461557
allocated by the initializer region. The region has an argument that
15471558
contains the value of the thread-local reduction accumulator. This will
15481559
be executed after the reduction has completed.
@@ -1558,12 +1569,14 @@ def DeclareReductionOp : OpenMP_Op<"declare_reduction", [IsolatedFromAbove,
15581569
let arguments = (ins SymbolNameAttr:$sym_name,
15591570
TypeAttr:$type);
15601571

1561-
let regions = (region AnyRegion:$initializerRegion,
1572+
let regions = (region MaxSizedRegion<1>:$allocRegion,
1573+
AnyRegion:$initializerRegion,
15621574
AnyRegion:$reductionRegion,
15631575
AnyRegion:$atomicReductionRegion,
15641576
AnyRegion:$cleanupRegion);
15651577

15661578
let assemblyFormat = "$sym_name `:` $type attr-dict-with-keyword "
1579+
"custom<AllocReductionRegion>($allocRegion) "
15671580
"`init` $initializerRegion "
15681581
"`combiner` $reductionRegion "
15691582
"custom<AtomicReductionRegion>($atomicReductionRegion) "
@@ -1576,6 +1589,17 @@ def DeclareReductionOp : OpenMP_Op<"declare_reduction", [IsolatedFromAbove,
15761589

15771590
return cast<PointerLikeType>(getAtomicReductionRegion().front().getArgument(0).getType());
15781591
}
1592+
1593+
Value getInitializerMoldArg() {
1594+
return getInitializerRegion().front().getArgument(0);
1595+
}
1596+
1597+
Value getInitializerAllocArg() {
1598+
if (getAllocRegion().empty() ||
1599+
getInitializerRegion().front().getNumArguments() != 2)
1600+
return {nullptr};
1601+
return getInitializerRegion().front().getArgument(1);
1602+
}
15791603
}];
15801604
let hasRegionVerifier = 1;
15811605
}

mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp

Lines changed: 55 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1883,46 +1883,84 @@ LogicalResult DistributeOp::verify() {
18831883
// DeclareReductionOp
18841884
//===----------------------------------------------------------------------===//
18851885

1886-
static ParseResult parseAtomicReductionRegion(OpAsmParser &parser,
1887-
Region &region) {
1888-
if (parser.parseOptionalKeyword("atomic"))
1886+
static ParseResult parseOptionalReductionRegion(OpAsmParser &parser,
1887+
Region &region,
1888+
StringRef keyword) {
1889+
if (parser.parseOptionalKeyword(keyword))
18891890
return success();
18901891
return parser.parseRegion(region);
18911892
}
18921893

1893-
static void printAtomicReductionRegion(OpAsmPrinter &printer,
1894-
DeclareReductionOp op, Region &region) {
1894+
static void printOptionalReductionRegion(OpAsmPrinter &printer, Region &region,
1895+
StringRef keyword) {
18951896
if (region.empty())
18961897
return;
1897-
printer << "atomic ";
1898+
printer << keyword << " ";
18981899
printer.printRegion(region);
18991900
}
19001901

1902+
static ParseResult parseAllocReductionRegion(OpAsmParser &parser,
1903+
Region &region) {
1904+
return parseOptionalReductionRegion(parser, region, "alloc");
1905+
}
1906+
1907+
static void printAllocReductionRegion(OpAsmPrinter &printer,
1908+
DeclareReductionOp op, Region &region) {
1909+
printOptionalReductionRegion(printer, region, "alloc");
1910+
}
1911+
1912+
static ParseResult parseAtomicReductionRegion(OpAsmParser &parser,
1913+
Region &region) {
1914+
return parseOptionalReductionRegion(parser, region, "atomic");
1915+
}
1916+
1917+
static void printAtomicReductionRegion(OpAsmPrinter &printer,
1918+
DeclareReductionOp op, Region &region) {
1919+
printOptionalReductionRegion(printer, region, "atomic");
1920+
}
1921+
19011922
static ParseResult parseCleanupReductionRegion(OpAsmParser &parser,
19021923
Region &region) {
1903-
if (parser.parseOptionalKeyword("cleanup"))
1904-
return success();
1905-
return parser.parseRegion(region);
1924+
return parseOptionalReductionRegion(parser, region, "cleanup");
19061925
}
19071926

19081927
static void printCleanupReductionRegion(OpAsmPrinter &printer,
19091928
DeclareReductionOp op, Region &region) {
1910-
if (region.empty())
1911-
return;
1912-
printer << "cleanup ";
1913-
printer.printRegion(region);
1929+
printOptionalReductionRegion(printer, region, "cleanup");
19141930
}
19151931

19161932
LogicalResult DeclareReductionOp::verifyRegions() {
1933+
if (!getAllocRegion().empty()) {
1934+
for (YieldOp yieldOp : getAllocRegion().getOps<YieldOp>()) {
1935+
if (yieldOp.getResults().size() != 1 ||
1936+
yieldOp.getResults().getTypes()[0] != getType())
1937+
return emitOpError() << "expects alloc region to yield a value "
1938+
"of the reduction type";
1939+
}
1940+
}
1941+
19171942
if (getInitializerRegion().empty())
19181943
return emitOpError() << "expects non-empty initializer region";
19191944
Block &initializerEntryBlock = getInitializerRegion().front();
1920-
if (initializerEntryBlock.getNumArguments() != 1 ||
1921-
initializerEntryBlock.getArgument(0).getType() != getType()) {
1922-
return emitOpError() << "expects initializer region with one argument "
1923-
"of the reduction type";
1945+
1946+
if (initializerEntryBlock.getNumArguments() == 1) {
1947+
if (!getAllocRegion().empty())
1948+
return emitOpError() << "expects two arguments to the initializer region "
1949+
"when an allocation region is used";
1950+
} else if (initializerEntryBlock.getNumArguments() == 2) {
1951+
if (getAllocRegion().empty())
1952+
return emitOpError() << "expects one argument to the initializer region "
1953+
"when no allocation region is used";
1954+
} else {
1955+
return emitOpError()
1956+
<< "expects one or two arguments to the initializer region";
19241957
}
19251958

1959+
for (mlir::Value arg : initializerEntryBlock.getArguments())
1960+
if (arg.getType() != getType())
1961+
return emitOpError() << "expects initializer region argument to match "
1962+
"the reduction type";
1963+
19261964
for (YieldOp yieldOp : getInitializerRegion().getOps<YieldOp>()) {
19271965
if (yieldOp.getResults().size() != 1 ||
19281966
yieldOp.getResults().getTypes()[0] != getType())

mlir/test/Dialect/OpenMP/invalid.mlir

Lines changed: 84 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -565,7 +565,63 @@ func.func @omp_simd_pretty_simdlen_safelen(%lb : index, %ub : index, %step : ind
565565

566566
// -----
567567

568-
// expected-error @below {{op expects initializer region with one argument of the reduction type}}
568+
// expected-error @below {{op expects alloc region to yield a value of the reduction type}}
569+
omp.declare_reduction @add_f32 : f32
570+
alloc {
571+
^bb0(%arg: f32):
572+
// nonsense test code
573+
%0 = arith.constant 0.0 : f64
574+
omp.yield (%0 : f64)
575+
}
576+
init {
577+
^bb0(%arg0: f32, %arg1: f32):
578+
%0 = arith.constant 0.0 : f32
579+
omp.yield (%0 : f32)
580+
}
581+
combiner {
582+
^bb1(%arg0: f32, %arg1: f32):
583+
%1 = arith.addf %arg0, %arg1 : f32
584+
omp.yield (%1 : f32)
585+
}
586+
587+
// -----
588+
589+
// expected-error @below {{op expects two arguments to the initializer region when an allocation region is used}}
590+
omp.declare_reduction @add_f32 : f32
591+
alloc {
592+
^bb0(%arg: f32):
593+
// nonsense test code
594+
omp.yield (%arg : f32)
595+
}
596+
init {
597+
^bb0(%arg0: f32):
598+
%0 = arith.constant 0.0 : f32
599+
omp.yield (%0 : f32)
600+
}
601+
combiner {
602+
^bb1(%arg0: f32, %arg1: f32):
603+
%1 = arith.addf %arg0, %arg1 : f32
604+
omp.yield (%1 : f32)
605+
}
606+
607+
// -----
608+
609+
// expected-error @below {{op expects one argument to the initializer region when no allocation region is used}}
610+
omp.declare_reduction @add_f32 : f32
611+
init {
612+
^bb0(%arg: f32, %arg2: f32):
613+
%0 = arith.constant 0.0 : f32
614+
omp.yield (%0 : f32)
615+
}
616+
combiner {
617+
^bb1(%arg0: f32, %arg1: f32):
618+
%1 = arith.addf %arg0, %arg1 : f32
619+
omp.yield (%1 : f32)
620+
}
621+
622+
// -----
623+
624+
// expected-error @below {{op expects initializer region argument to match the reduction type}}
569625
omp.declare_reduction @add_f32 : f64
570626
init {
571627
^bb0(%arg: f32):
@@ -683,6 +739,33 @@ cleanup {
683739

684740
// -----
685741

742+
// expected-error @below {{op region #0 ('allocRegion') failed to verify constraint: region with at most 1 blocks}}
743+
omp.declare_reduction @alloc_reduction : !llvm.ptr
744+
alloc {
745+
^bb0(%arg: !llvm.ptr):
746+
%c1 = arith.constant 1 : i32
747+
%0 = llvm.alloca %c1 x f32 : (i32) -> !llvm.ptr
748+
cf.br ^bb1(%0: !llvm.ptr)
749+
^bb1(%ret: !llvm.ptr):
750+
omp.yield (%ret : !llvm.ptr)
751+
}
752+
init {
753+
^bb0(%arg: !llvm.ptr):
754+
%cst = arith.constant 1.0 : f32
755+
llvm.store %cst, %arg : f32, !llvm.ptr
756+
omp.yield (%arg : !llvm.ptr)
757+
}
758+
combiner {
759+
^bb1(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
760+
%0 = llvm.load %arg0 : !llvm.ptr -> f32
761+
%1 = llvm.load %arg1 : !llvm.ptr -> f32
762+
%2 = arith.addf %0, %1 : f32
763+
llvm.store %2, %arg0 : f32, !llvm.ptr
764+
omp.yield (%arg0 : !llvm.ptr)
765+
}
766+
767+
// -----
768+
686769
func.func @foo(%lb : index, %ub : index, %step : index) {
687770
%c1 = arith.constant 1 : i32
688771
%0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr

mlir/test/Dialect/OpenMP/ops.mlir

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2541,6 +2541,36 @@ atomic {
25412541
omp.yield
25422542
}
25432543

2544+
// CHECK-LABEL: @alloc_reduction
2545+
// CHECK-SAME: alloc {
2546+
// CHECK-NEXT: ^bb0(%[[ARG0:.*]]: !llvm.ptr):
2547+
// ...
2548+
// CHECK: omp.yield
2549+
// CHECK-NEXT: } init {
2550+
// CHECK: } combiner {
2551+
// CHECK: }
2552+
omp.declare_reduction @alloc_reduction : !llvm.ptr
2553+
alloc {
2554+
^bb0(%arg: !llvm.ptr):
2555+
%c1 = arith.constant 1 : i32
2556+
%0 = llvm.alloca %c1 x f32 : (i32) -> !llvm.ptr
2557+
omp.yield (%0 : !llvm.ptr)
2558+
}
2559+
init {
2560+
^bb0(%mold: !llvm.ptr, %alloc: !llvm.ptr):
2561+
%cst = arith.constant 1.0 : f32
2562+
llvm.store %cst, %alloc : f32, !llvm.ptr
2563+
omp.yield (%alloc : !llvm.ptr)
2564+
}
2565+
combiner {
2566+
^bb1(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
2567+
%0 = llvm.load %arg0 : !llvm.ptr -> f32
2568+
%1 = llvm.load %arg1 : !llvm.ptr -> f32
2569+
%2 = arith.addf %0, %1 : f32
2570+
llvm.store %2, %arg0 : f32, !llvm.ptr
2571+
omp.yield (%arg0 : !llvm.ptr)
2572+
}
2573+
25442574
// CHECK-LABEL: omp_targets_with_map_bounds
25452575
// CHECK-SAME: (%[[ARG0:.*]]: !llvm.ptr, %[[ARG1:.*]]: !llvm.ptr)
25462576
func.func @omp_targets_with_map_bounds(%arg0: !llvm.ptr, %arg1: !llvm.ptr) -> () {

0 commit comments

Comments
 (0)