Skip to content

Commit 5c0cc18

Browse files
authored
[flang][OpenMP] Privatize "loop-local" values in do concurent on device (#146)
Extends #112. This PR extends support for `do concurrent` mapping to the device a bit more. In particular, it handles localization of loop-local values on the deive. Previously, this was only supported and tested on the host. See docs for `looputils::collectLoopLocalValues` for the definition of "loop-local" values.
1 parent 6f99163 commit 5c0cc18

File tree

2 files changed

+42
-23
lines changed

2 files changed

+42
-23
lines changed

flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -565,9 +565,9 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
565565
"defining operation.");
566566
}
567567

568-
llvm::SmallVector<mlir::Value> outermostLoopLives;
569-
looputils::collectLoopLiveIns(doLoop, outermostLoopLives);
570-
assert(!outermostLoopLives.empty());
568+
llvm::SmallVector<mlir::Value> outermostLoopLiveIns;
569+
looputils::collectLoopLiveIns(doLoop, outermostLoopLiveIns);
570+
assert(!outermostLoopLiveIns.empty());
571571

572572
looputils::LoopNestToIndVarMap loopNest;
573573
bool hasRemainingNestedLoops =
@@ -577,28 +577,35 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
577577
"Some `do concurent` loops are not perfectly-nested. "
578578
"These will be serialzied.");
579579

580-
mlir::IRMapping mapper;
581-
582580
llvm::SetVector<mlir::Value> locals;
583581
looputils::collectLoopLocalValues(loopNest.back().first, locals);
582+
// We do not want to map "loop-local" values to the device through
583+
// `omp.map.info` ops. Therefore, we remove them from the list of live-ins.
584+
outermostLoopLiveIns.erase(llvm::remove_if(outermostLoopLiveIns,
585+
[&](mlir::Value liveIn) {
586+
return locals.contains(liveIn);
587+
}),
588+
outermostLoopLiveIns.end());
584589

585590
looputils::sinkLoopIVArgs(rewriter, loopNest);
586591

587592
mlir::omp::TargetOp targetOp;
588593
mlir::omp::LoopNestOperands loopNestClauseOps;
589594

595+
mlir::IRMapping mapper;
596+
590597
if (mapToDevice) {
591598
mlir::omp::TargetOperands targetClauseOps;
592599

593600
// The outermost loop will contain all the live-in values in all nested
594601
// loops since live-in values are collected recursively for all nested
595602
// ops.
596-
for (mlir::Value liveIn : outermostLoopLives)
603+
for (mlir::Value liveIn : outermostLoopLiveIns)
597604
targetClauseOps.mapVars.push_back(
598605
genMapInfoOpForLiveIn(rewriter, liveIn));
599606

600607
targetOp = genTargetOp(doLoop.getLoc(), rewriter, mapper,
601-
outermostLoopLives, targetClauseOps);
608+
outermostLoopLiveIns, targetClauseOps);
602609
genTeamsOp(doLoop.getLoc(), rewriter);
603610
}
604611

flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,10 @@
55
! occur due to multiple teams trying to access the same allocation.
66

77
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=host %s -o - \
8-
! RUN: | FileCheck %s
8+
! RUN: | FileCheck %s --check-prefixes=COMMON
9+
10+
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=device %s -o - \
11+
! RUN: | FileCheck %s --check-prefixes=COMMON,DEVICE
912

1013
module struct_mod
1114
type test_struct
@@ -49,18 +52,27 @@ program main
4952
print *, "total =", total
5053
end program main
5154

52-
! CHECK: omp.parallel {
53-
! CHECK: %[[LOCAL_TEMP:.*]] = fir.alloca !fir.type<_QMstruct_modTtest_struct{x_:!fir.box<!fir.heap<i32>>}> {bindc_name = ".result"}
54-
! CHECK: omp.wsloop {
55-
! CHECK: omp.loop_nest {{.*}} {
56-
! CHECK: %[[TEMP_VAL:.*]] = fir.call @_QMstruct_modPconstruct_from_components
57-
! CHECK: fir.save_result %[[TEMP_VAL]] to %[[LOCAL_TEMP]]
58-
! CHECK: %[[EMBOXED_LOCAL:.*]] = fir.embox %[[LOCAL_TEMP]]
59-
! CHECK: %[[CONVERTED_LOCAL:.*]] = fir.convert %[[EMBOXED_LOCAL]]
60-
! CHECK: fir.call @_FortranADestroy(%[[CONVERTED_LOCAL]])
61-
! CHECK: omp.yield
62-
! CHECK: }
63-
! CHECK: omp.terminator
64-
! CHECK: }
65-
! CHECK: omp.terminator
66-
! CHECK: }
55+
! DEVICE: omp.target {{.*}} {
56+
! DEVICE: omp.teams {
57+
! COMMON: omp.parallel {
58+
! COMMON: %[[LOCAL_TEMP:.*]] = fir.alloca !fir.type<_QMstruct_modTtest_struct{x_:!fir.box<!fir.heap<i32>>}> {bindc_name = ".result"}
59+
! DEVICE: omp.distribute {
60+
! COMMON: omp.wsloop {
61+
! COMMON: omp.loop_nest {{.*}} {
62+
! COMMON: %[[TEMP_VAL:.*]] = fir.call @_QMstruct_modPconstruct_from_components
63+
! COMMON: fir.save_result %[[TEMP_VAL]] to %[[LOCAL_TEMP]]
64+
! COMMON: %[[EMBOXED_LOCAL:.*]] = fir.embox %[[LOCAL_TEMP]]
65+
! COMMON: %[[CONVERTED_LOCAL:.*]] = fir.convert %[[EMBOXED_LOCAL]]
66+
! COMMON: fir.call @_FortranADestroy(%[[CONVERTED_LOCAL]])
67+
! COMMON: omp.yield
68+
! COMMON: }
69+
! COMMON: omp.terminator
70+
! COMMON: }
71+
! DEVICE: omp.terminator
72+
! DEVICE: }
73+
! COMMON: omp.terminator
74+
! COMMON: }
75+
! DEVICE: omp.terminator
76+
! DEVICE: }
77+
! DEVICE: omp.terminator
78+
! DEVICE: }

0 commit comments

Comments
 (0)