Skip to content
21 changes: 7 additions & 14 deletions flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,13 @@ DataSharingProcessor::DataSharingProcessor(
}

void DataSharingProcessor::processStep1(
mlir::omp::PrivateClauseOps *clauseOps,
llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms) {
mlir::omp::PrivateClauseOps *clauseOps) {
collectSymbolsForPrivatization();
collectDefaultSymbols();
collectImplicitSymbols();
collectPreDeterminedSymbols();

privatize(clauseOps, privateSyms);
privatize(clauseOps);

insertBarrier();
}
Expand Down Expand Up @@ -415,16 +414,14 @@ void DataSharingProcessor::collectPreDeterminedSymbols() {
preDeterminedSymbols);
}

void DataSharingProcessor::privatize(
mlir::omp::PrivateClauseOps *clauseOps,
llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms) {
void DataSharingProcessor::privatize(mlir::omp::PrivateClauseOps *clauseOps) {
for (const semantics::Symbol *sym : allPrivatizedSymbols) {
if (const auto *commonDet =
sym->detailsIf<semantics::CommonBlockDetails>()) {
for (const auto &mem : commonDet->objects())
doPrivatize(&*mem, clauseOps, privateSyms);
doPrivatize(&*mem, clauseOps);
} else
doPrivatize(sym, clauseOps, privateSyms);
doPrivatize(sym, clauseOps);
}
}

Expand All @@ -441,9 +438,8 @@ void DataSharingProcessor::copyLastPrivatize(mlir::Operation *op) {
}
}

void DataSharingProcessor::doPrivatize(
const semantics::Symbol *sym, mlir::omp::PrivateClauseOps *clauseOps,
llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms) {
void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym,
mlir::omp::PrivateClauseOps *clauseOps) {
if (!useDelayedPrivatization) {
cloneSymbol(sym);
copyFirstPrivateSymbol(sym);
Expand Down Expand Up @@ -548,9 +544,6 @@ void DataSharingProcessor::doPrivatize(
clauseOps->privateVars.push_back(hsb.getAddr());
}

if (privateSyms)
privateSyms->push_back(sym);

symToPrivatizer[sym] = privatizerOp;
}

Expand Down
18 changes: 9 additions & 9 deletions flang/lib/Lower/OpenMP/DataSharingProcessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,18 +105,15 @@ class DataSharingProcessor {
void collectDefaultSymbols();
void collectImplicitSymbols();
void collectPreDeterminedSymbols();
void privatize(mlir::omp::PrivateClauseOps *clauseOps,
llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms);
void privatize(mlir::omp::PrivateClauseOps *clauseOps);
void defaultPrivatize(
mlir::omp::PrivateClauseOps *clauseOps,
llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms);
void implicitPrivatize(
mlir::omp::PrivateClauseOps *clauseOps,
llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms);
void
doPrivatize(const semantics::Symbol *sym,
mlir::omp::PrivateClauseOps *clauseOps,
llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms);
void doPrivatize(const semantics::Symbol *sym,
mlir::omp::PrivateClauseOps *clauseOps);
void copyLastPrivatize(mlir::Operation *op);
void insertLastPrivateCompare(mlir::Operation *op);
void cloneSymbol(const semantics::Symbol *sym);
Expand Down Expand Up @@ -147,15 +144,18 @@ class DataSharingProcessor {
// Step2 performs the copying for lastprivates and requires knowledge of the
// MLIR operation to insert the last private update. Step2 adds
// dealocation code as well.
void processStep1(
mlir::omp::PrivateClauseOps *clauseOps = nullptr,
llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms = nullptr);
void processStep1(mlir::omp::PrivateClauseOps *clauseOps = nullptr);
void processStep2(mlir::Operation *op, bool isLoop);

void setLoopIV(mlir::Value iv) {
assert(!loopIV && "Loop iteration variable already set");
loopIV = iv;
}

const llvm::SetVector<const semantics::Symbol *> &
getAllSymbolsToPrivatize() const {
return allPrivatizedSymbols;
}
};

} // namespace omp
Expand Down
118 changes: 94 additions & 24 deletions flang/lib/Lower/OpenMP/OpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,33 @@ markDeclareTarget(mlir::Operation *op, lower::AbstractConverter &converter,
declareTargetOp.setDeclareTarget(deviceType, captureClause);
}

/// For an operation that takes `omp.private` values as region args, this util
/// merges the private vars info into the region arguments list.
///
/// \tparam OMPOP - the OpenMP op that takes `omp.private` inputs.
/// \tparam InfoTy - the type of private info we want to merge; e.g. mlir::Type
/// or mlir::Location fields of the private var list.
///
/// \param [in] op - the op accepting `omp.private` inputs.
/// \param [in] currentList - the current list of region info that we
/// want to merge private info with. For example this could be the list of types
/// or locations of previous arguments to \op's region.
/// \param [in] infoAccessor - for a private variable, this returns the
/// data we want to merge: type or location.
/// \param [out] allRegionArgsInfo - the merged list of region info.
template <typename OMPOp, typename InfoTy>
static void
mergePrivateVarsInfo(OMPOp op, llvm::ArrayRef<InfoTy> currentList,
llvm::function_ref<InfoTy(mlir::Value)> infoAccessor,
llvm::SmallVectorImpl<InfoTy> &allRegionArgsInfo) {
mlir::OperandRange privateVars = op.getPrivateVars();

llvm::transform(currentList, std::back_inserter(allRegionArgsInfo),
[](InfoTy i) { return i; });
llvm::transform(privateVars, std::back_inserter(allRegionArgsInfo),
infoAccessor);
}

//===----------------------------------------------------------------------===//
// Op body generation helper structures and functions
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -758,15 +785,28 @@ genBodyOfTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
llvm::ArrayRef<const semantics::Symbol *> mapSyms,
llvm::ArrayRef<mlir::Location> mapSymLocs,
llvm::ArrayRef<mlir::Type> mapSymTypes,
DataSharingProcessor &dsp,
const mlir::Location &currentLocation,
const ConstructQueue &queue, ConstructQueue::iterator item) {
assert(mapSymTypes.size() == mapSymLocs.size());

fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
mlir::Region &region = targetOp.getRegion();

auto *regionBlock =
firOpBuilder.createBlock(&region, {}, mapSymTypes, mapSymLocs);
llvm::SmallVector<mlir::Type> allRegionArgTypes;
mergePrivateVarsInfo(targetOp, mapSymTypes,
llvm::function_ref<mlir::Type(mlir::Value)>{
[](mlir::Value v) { return v.getType(); }},
allRegionArgTypes);

llvm::SmallVector<mlir::Location> allRegionArgLocs;
mergePrivateVarsInfo(targetOp, mapSymLocs,
llvm::function_ref<mlir::Location(mlir::Value)>{
[](mlir::Value v) { return v.getLoc(); }},
allRegionArgLocs);

auto *regionBlock = firOpBuilder.createBlock(&region, {}, allRegionArgTypes,
allRegionArgLocs);

// Clones the `bounds` placing them inside the target region and returns them.
auto cloneBound = [&](mlir::Value bound) {
Expand Down Expand Up @@ -830,6 +870,20 @@ genBodyOfTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
});
}

for (auto [argIndex, argSymbol] :
llvm::enumerate(dsp.getAllSymbolsToPrivatize())) {
argIndex = mapSyms.size() + argIndex;

const mlir::BlockArgument &arg = region.getArgument(argIndex);
converter.bindSymbol(*argSymbol,
hlfir::translateToExtendedValue(
currentLocation, firOpBuilder, hlfir::Entity{arg},
/*contiguousHint=*/
evaluate::IsSimplyContiguous(
*argSymbol, converter.getFoldingContext()))
.first);
}

// Check if cloning the bounds introduced any dependency on the outer region.
// If so, then either clone them as well if they are MemoryEffectFree, or else
// copy them to a new temporary and add them to the map and block_argument
Expand Down Expand Up @@ -907,6 +961,8 @@ genBodyOfTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
} else {
genNestedEvaluations(converter, eval);
}

dsp.processStep2(targetOp, /*isLoop=*/false);
}

template <typename OpTy, typename... Args>
Expand Down Expand Up @@ -1048,15 +1104,18 @@ static void genTargetClauses(
devicePtrSyms);
cp.processMap(loc, stmtCtx, clauseOps, &mapSyms, &mapLocs, &mapTypes);
cp.processThreadLimit(stmtCtx, clauseOps);
// TODO Support delayed privatization.

if (processHostOnlyClauses)
cp.processNowait(clauseOps);

cp.processTODO<clause::Allocate, clause::Defaultmap, clause::Firstprivate,
clause::InReduction, clause::Private, clause::Reduction,
clause::InReduction, clause::Reduction,
clause::UsesAllocators>(loc,
llvm::omp::Directive::OMPD_target);

// `target private(..)` is only supported in delayed privatization mode.
if (!enableDelayedPrivatization)
cp.processTODO<clause::Private>(loc, llvm::omp::Directive::OMPD_target);
}

static void genTargetDataClauses(
Expand Down Expand Up @@ -1289,7 +1348,6 @@ genParallelOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
lower::StatementContext stmtCtx;
mlir::omp::ParallelClauseOps clauseOps;
llvm::SmallVector<const semantics::Symbol *> privateSyms;
llvm::SmallVector<mlir::Type> reductionTypes;
llvm::SmallVector<const semantics::Symbol *> reductionSyms;
genParallelClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
Expand Down Expand Up @@ -1319,34 +1377,35 @@ genParallelOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
/*useDelayedPrivatization=*/true, &symTable);

if (privatize)
dsp.processStep1(&clauseOps, &privateSyms);
dsp.processStep1(&clauseOps);

auto genRegionEntryCB = [&](mlir::Operation *op) {
auto parallelOp = llvm::cast<mlir::omp::ParallelOp>(op);

llvm::SmallVector<mlir::Location> reductionLocs(
clauseOps.reductionVars.size(), loc);

mlir::OperandRange privateVars = parallelOp.getPrivateVars();
mlir::Region &region = parallelOp.getRegion();
llvm::SmallVector<mlir::Type> allRegionArgTypes;
mergePrivateVarsInfo(parallelOp, llvm::ArrayRef(reductionTypes),
llvm::function_ref<mlir::Type(mlir::Value)>{
[](mlir::Value v) { return v.getType(); }},
allRegionArgTypes);

llvm::SmallVector<mlir::Type> privateVarTypes = reductionTypes;
privateVarTypes.reserve(privateVarTypes.size() + privateVars.size());
llvm::transform(privateVars, std::back_inserter(privateVarTypes),
[](mlir::Value v) { return v.getType(); });
llvm::SmallVector<mlir::Location> allRegionArgLocs;
mergePrivateVarsInfo(parallelOp, llvm::ArrayRef(reductionLocs),
llvm::function_ref<mlir::Location(mlir::Value)>{
[](mlir::Value v) { return v.getLoc(); }},
allRegionArgLocs);

llvm::SmallVector<mlir::Location> privateVarLocs = reductionLocs;
privateVarLocs.reserve(privateVarLocs.size() + privateVars.size());
llvm::transform(privateVars, std::back_inserter(privateVarLocs),
[](mlir::Value v) { return v.getLoc(); });

firOpBuilder.createBlock(&region, /*insertPt=*/{}, privateVarTypes,
privateVarLocs);
mlir::Region &region = parallelOp.getRegion();
firOpBuilder.createBlock(&region, /*insertPt=*/{}, allRegionArgTypes,
allRegionArgLocs);

llvm::SmallVector<const semantics::Symbol *> allSymbols = reductionSyms;
allSymbols.append(privateSyms);
allSymbols.append(dsp.getAllSymbolsToPrivatize().begin(),
dsp.getAllSymbolsToPrivatize().end());

for (auto [arg, prv] : llvm::zip_equal(allSymbols, region.getArguments())) {
fir::ExtendedValue hostExV = converter.getSymbolExtendedValue(*arg);
converter.bindSymbol(*arg, hlfir::translateToExtendedValue(
loc, firOpBuilder, hlfir::Entity{prv},
/*contiguousHint=*/
Expand Down Expand Up @@ -1541,11 +1600,22 @@ genTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
deviceAddrLocs, deviceAddrTypes, devicePtrSyms,
devicePtrLocs, devicePtrTypes);

llvm::SmallVector<const semantics::Symbol *> privateSyms;
DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
/*shouldCollectPreDeterminedSymbols=*/
lower::omp::isLastItemInQueue(item, queue),
/*useDelayedPrivatization=*/true, &symTable);
dsp.processStep1(&clauseOps);

// 5.8.1 Implicit Data-Mapping Attribute Rules
// The following code follows the implicit data-mapping rules to map all the
// symbols used inside the region that have not been explicitly mapped using
// the map clause.
// symbols used inside the region that do not have explicit data-environment
// attribute clauses (neither data-sharing; e.g. `private`, nor `map`
// clauses).
auto captureImplicitMap = [&](const semantics::Symbol &sym) {
if (dsp.getAllSymbolsToPrivatize().contains(&sym))
return;

if (llvm::find(mapSyms, &sym) == mapSyms.end()) {
mlir::Value baseOp = converter.getSymbolAddress(sym);
if (!baseOp)
Expand Down Expand Up @@ -1632,7 +1702,7 @@ genTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,

auto targetOp = firOpBuilder.create<mlir::omp::TargetOp>(loc, clauseOps);
genBodyOfTargetOp(converter, symTable, semaCtx, eval, targetOp, mapSyms,
mapLocs, mapTypes, loc, queue, item);
mapLocs, mapTypes, dsp, loc, queue, item);
return targetOp;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
! Tests delayed privatization for `targets ... private(..)` for allocatables.

! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \
! RUN: -o - %s 2>&1 | FileCheck %s
! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 \
! RUN: | FileCheck %s

subroutine target_allocatable
implicit none
integer, allocatable :: alloc_var

!$omp target private(alloc_var)
alloc_var = 10
!$omp end target
end subroutine target_allocatable

! CHECK-LABEL: omp.private {type = private}
! CHECK-SAME: @[[VAR_PRIVATIZER_SYM:.*]] :
! CHECK-SAME: [[TYPE:!fir.ref<!fir.box<!fir.heap<i32>>>]] alloc {
! CHECK: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):
! CHECK: %[[PRIV_ALLOC:.*]] = fir.alloca !fir.box<!fir.heap<i32>> {bindc_name = "alloc_var", {{.*}}}

! CHECK-NEXT: %[[PRIV_ARG_VAL:.*]] = fir.load %[[PRIV_ARG]] : !fir.ref<!fir.box<!fir.heap<i32>>>
! CHECK-NEXT: %[[PRIV_ARG_BOX:.*]] = fir.box_addr %[[PRIV_ARG_VAL]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
! CHECK-NEXT: %[[PRIV_ARG_ADDR:.*]] = fir.convert %[[PRIV_ARG_BOX]] : (!fir.heap<i32>) -> i64
! CHECK-NEXT: %[[C0:.*]] = arith.constant 0 : i64
! CHECK-NEXT: %[[ALLOC_COND:.*]] = arith.cmpi ne, %[[PRIV_ARG_ADDR]], %[[C0]] : i64

! CHECK-NEXT: fir.if %[[ALLOC_COND]] {
! CHECK: %[[PRIV_ALLOCMEM:.*]] = fir.allocmem i32 {fir.must_be_heap = true, {{.*}}}
! CHECK-NEXT: %[[PRIV_ALLOCMEM_BOX:.*]] = fir.embox %[[PRIV_ALLOCMEM]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
! CHECK-NEXT: fir.store %[[PRIV_ALLOCMEM_BOX]] to %[[PRIV_ALLOC]] : !fir.ref<!fir.box<!fir.heap<i32>>>
! CHECK-NEXT: } else {
! CHECK-NEXT: %[[ZERO_BITS:.*]] = fir.zero_bits !fir.heap<i32>
! CHECK-NEXT: %[[ZERO_BOX:.*]] = fir.embox %[[ZERO_BITS]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
! CHECK-NEXT: fir.store %[[ZERO_BOX]] to %[[PRIV_ALLOC]] : !fir.ref<!fir.box<!fir.heap<i32>>>
! CHECK-NEXT: }

! CHECK-NEXT: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]]
! CHECK-NEXT: omp.yield(%[[PRIV_DECL]]#0 : [[TYPE]])

! CHECK-NEXT: } dealloc {
! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):

! CHECK-NEXT: %[[PRIV_VAL:.*]] = fir.load %[[PRIV_ARG]]
! CHECK-NEXT: %[[PRIV_ADDR:.*]] = fir.box_addr %[[PRIV_VAL]]
! CHECK-NEXT: %[[PRIV_ADDR_I64:.*]] = fir.convert %[[PRIV_ADDR]]
! CHECK-NEXT: %[[C0:.*]] = arith.constant 0 : i64
! CHECK-NEXT: %[[PRIV_NULL_COND:.*]] = arith.cmpi ne, %[[PRIV_ADDR_I64]], %[[C0]] : i64

! CHECK-NEXT: fir.if %[[PRIV_NULL_COND]] {
! CHECK: %[[PRIV_VAL_2:.*]] = fir.load %[[PRIV_ARG]]
! CHECK-NEXT: %[[PRIV_ADDR_2:.*]] = fir.box_addr %[[PRIV_VAL_2]]
! CHECK-NEXT: fir.freemem %[[PRIV_ADDR_2]]
! CHECK-NEXT: %[[ZEROS:.*]] = fir.zero_bits
! CHECK-NEXT: %[[ZEROS_BOX:.*]] = fir.embox %[[ZEROS]]
! CHECK-NEXT: fir.store %[[ZEROS_BOX]] to %[[PRIV_ARG]]
! CHECK-NEXT: }

! CHECK-NEXT: omp.yield
! CHECK-NEXT: }


! CHECK-LABEL: func.func @_QPtarget_allocatable() {

! CHECK: %[[VAR_ALLOC:.*]] = fir.alloca !fir.box<!fir.heap<i32>>
! CHECK-SAME: {bindc_name = "alloc_var", {{.*}}}
! CHECK: %[[VAR_DECL:.*]]:2 = hlfir.declare %[[VAR_ALLOC]]

! CHECK: omp.target private(
! CHECK-SAME: @[[VAR_PRIVATIZER_SYM]] %[[VAR_DECL]]#0 -> %{{.*}} : [[TYPE]]) {
Loading