diff --git a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp index e9d820adbd22b..8f6ed4716e18f 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp @@ -232,7 +232,17 @@ class ReductionAsElementalConverter { /// by the reduction loop. In general, there is a single /// loop-carried reduction value (e.g. for SUM), but, for example, /// MAXLOC/MINLOC implementation uses multiple reductions. - virtual llvm::SmallVector genReductionInitValues() = 0; + /// \p oneBasedIndices contains any array indices predefined + /// before the reduction loop, i.e. it is empty for total + /// reductions, and contains the one-based indices of the wrapping + /// hlfir.elemental. + /// \p extents are the pre-computed extents of the input array. + /// For total reductions, \p extents holds extents of all dimensions. + /// For partial reductions, \p extents holds a single extent + /// of the DIM dimension. + virtual llvm::SmallVector + genReductionInitValues(mlir::ValueRange oneBasedIndices, + const llvm::SmallVectorImpl &extents) = 0; /// Perform reduction(s) update given a single input array's element /// identified by \p array and \p oneBasedIndices coordinates. @@ -396,6 +406,54 @@ genMinMaxComparison(mlir::Location loc, fir::FirOpBuilder &builder, llvm_unreachable("unsupported type"); } +// Generate a predicate value indicating that an array with the given +// extents is not empty. +static mlir::Value +genIsNotEmptyArrayExtents(mlir::Location loc, fir::FirOpBuilder &builder, + const llvm::SmallVectorImpl &extents) { + mlir::Value isNotEmpty = builder.createBool(loc, true); + for (auto extent : extents) { + mlir::Value zero = + fir::factory::createZeroValue(builder, loc, extent.getType()); + mlir::Value cmp = builder.create( + loc, mlir::arith::CmpIPredicate::ne, extent, zero); + isNotEmpty = builder.create(loc, isNotEmpty, cmp); + } + return isNotEmpty; +} + +// Helper method for MIN/MAX LOC/VAL reductions. +// It returns a vector of indices such that they address +// the first element of an array (in case of total reduction) +// or its section (in case of partial reduction). +// +// If case of total reduction oneBasedIndices must be empty, +// otherwise, they contain the one based indices of the wrapping +// hlfir.elemental. +// Basically, the method adds the necessary number of constant-one +// indices into oneBasedIndices. +static llvm::SmallVector genFirstElementIndicesForReduction( + mlir::Location loc, fir::FirOpBuilder &builder, bool isTotalReduction, + mlir::FailureOr dim, unsigned rank, + mlir::ValueRange oneBasedIndices) { + llvm::SmallVector indices{oneBasedIndices}; + mlir::Value one = + builder.createIntegerConstant(loc, builder.getIndexType(), 1); + if (isTotalReduction) { + assert(oneBasedIndices.size() == 0 && + "wrong number of indices for total reduction"); + // Set indices to all-ones. + indices.append(rank, one); + } else { + assert(oneBasedIndices.size() == rank - 1 && + "there must be RANK-1 indices for partial reduction"); + assert(mlir::succeeded(dim) && "partial reduction with invalid DIM"); + // Insert constant-one index at DIM dimension. + indices.insert(indices.begin() + *dim - 1, one); + } + return indices; +} + /// Implementation of ReductionAsElementalConverter interface /// for MAXLOC/MINLOC. template @@ -410,6 +468,9 @@ class MinMaxlocAsElementalConverter : public ReductionAsElementalConverter { // * 1 reduction value holding the current MIN/MAX. // * 1 boolean indicating whether it is the first time // the mask is true. + // + // If useIsFirst() returns false, then the boolean loop-carried + // value is not used. static constexpr unsigned maxNumReductions = Fortran::common::maxRank + 2; static constexpr bool isMax = std::is_same_v; using Base = ReductionAsElementalConverter; @@ -444,7 +505,9 @@ class MinMaxlocAsElementalConverter : public ReductionAsElementalConverter { return getResultRank() == 0 || !getDim(); } - virtual llvm::SmallVector genReductionInitValues() final; + virtual llvm::SmallVector genReductionInitValues( + mlir::ValueRange oneBasedIndices, + const llvm::SmallVectorImpl &extents) final; virtual llvm::SmallVector reduceOneElement(const llvm::SmallVectorImpl ¤tValue, hlfir::Entity array, mlir::ValueRange oneBasedIndices) final; @@ -460,8 +523,12 @@ class MinMaxlocAsElementalConverter : public ReductionAsElementalConverter { void checkReductions(const llvm::SmallVectorImpl &reductions) const { - assert(reductions.size() == getNumCoors() + 2 && - "invalid number of reductions for MINLOC/MAXLOC"); + if (!useIsFirst()) + assert(reductions.size() == getNumCoors() + 1 && + "invalid number of reductions for MINLOC/MAXLOC"); + else + assert(reductions.size() == getNumCoors() + 2 && + "invalid number of reductions for MINLOC/MAXLOC"); } mlir::Value @@ -473,13 +540,62 @@ class MinMaxlocAsElementalConverter : public ReductionAsElementalConverter { mlir::Value getIsFirst(const llvm::SmallVectorImpl &reductions) const { checkReductions(reductions); + assert(useIsFirst() && "IsFirst predicate must not be used"); return reductions[getNumCoors() + 1]; } + + // Return true iff the input can contain NaNs, and they should be + // honored, such that all-NaNs input must produce the location + // of the first unmasked NaN. + bool honorNans() const { + return !static_cast(getFastMath() & mlir::arith::FastMathFlags::nnan); + } + + // Return true iff we have to use the loop-carried IsFirst predicate. + // If there is no mask, we can initialize the reductions using + // the first elements of the input. + // If NaNs are not honored, we can initialize the starting MIN/MAX + // value to +/-LARGEST; the coordinates are guaranteed to be updated + // properly for non-empty input without NaNs. + bool useIsFirst() const { return getMask() && honorNans(); } }; template llvm::SmallVector -MinMaxlocAsElementalConverter::genReductionInitValues() { +MinMaxlocAsElementalConverter::genReductionInitValues( + mlir::ValueRange oneBasedIndices, + const llvm::SmallVectorImpl &extents) { + fir::IfOp ifOp; + if (!useIsFirst() && honorNans()) { + // Check if we can load the value of the first element in the array + // or its section (for partial reduction). + assert(!getMask() && "cannot fetch first element when mask is present"); + assert(extents.size() == getNumCoors() && + "wrong number of extents for MINLOC/MAXLOC reduction"); + mlir::Value isNotEmpty = genIsNotEmptyArrayExtents(loc, builder, extents); + + llvm::SmallVector indices = genFirstElementIndicesForReduction( + loc, builder, isTotalReduction(), getConstDim(), getSourceRank(), + oneBasedIndices); + + llvm::SmallVector ifTypes(getNumCoors(), + getResultElementType()); + ifTypes.push_back(getSourceElementType()); + ifOp = builder.create(loc, ifTypes, isNotEmpty, + /*withElseRegion=*/true); + builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); + mlir::Value one = + builder.createIntegerConstant(loc, getResultElementType(), 1); + llvm::SmallVector results(getNumCoors(), one); + mlir::Value minMaxFirst = + hlfir::loadElementAt(loc, builder, hlfir::Entity{getSource()}, indices); + results.push_back(minMaxFirst); + builder.create(loc, results); + + // In the 'else' block use default init values. + builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); + } + // Initial value for the coordinate(s) is zero. mlir::Value zeroCoor = fir::factory::createZeroValue(builder, loc, getResultElementType()); @@ -490,11 +606,17 @@ MinMaxlocAsElementalConverter::genReductionInitValues() { genMinMaxInitValue(loc, builder, getSourceElementType()); result.push_back(minMaxInit); - // Initial value for isFirst predicate. It is switched to false, - // when the reduction update dynamically happens inside the reduction - // loop. - mlir::Value trueVal = builder.createBool(loc, true); - result.push_back(trueVal); + if (ifOp) { + builder.create(loc, result); + builder.setInsertionPointAfter(ifOp); + result = ifOp.getResults(); + } else if (useIsFirst()) { + // Initial value for isFirst predicate. It is switched to false, + // when the reduction update dynamically happens inside the reduction + // loop. + mlir::Value trueVal = builder.createBool(loc, true); + result.push_back(trueVal); + } return result; } @@ -509,9 +631,12 @@ MinMaxlocAsElementalConverter::reduceOneElement( hlfir::loadElementAt(loc, builder, array, oneBasedIndices); mlir::Value cmp = genMinMaxComparison(loc, builder, elementValue, getCurrentMinMax(currentValue)); - // If isFirst is true, then do the reduction update regardless - // of the FP comparison. - cmp = builder.create(loc, cmp, getIsFirst(currentValue)); + if (useIsFirst()) { + // If isFirst is true, then do the reduction update regardless + // of the FP comparison. + cmp = + builder.create(loc, cmp, getIsFirst(currentValue)); + } llvm::SmallVector newIndices; int64_t dim = 1; @@ -537,8 +662,10 @@ MinMaxlocAsElementalConverter::reduceOneElement( loc, cmp, elementValue, getCurrentMinMax(currentValue)); newIndices.push_back(newMinMax); - mlir::Value newIsFirst = builder.createBool(loc, false); - newIndices.push_back(newIsFirst); + if (useIsFirst()) { + mlir::Value newIsFirst = builder.createBool(loc, false); + newIndices.push_back(newIsFirst); + } assert(currentValue.size() == newIndices.size() && "invalid number of updated reductions"); @@ -629,7 +756,8 @@ class MinMaxvalAsElementalConverter // // The boolean flag is used to replace the initial value // with the first input element even if it is NaN. - static constexpr unsigned numReductions = 2; + // If useIsFirst() returns false, then the boolean loop-carried + // value is not used. static constexpr bool isMax = std::is_same_v; using Base = NumericReductionAsElementalConverterBase; @@ -646,19 +774,9 @@ class MinMaxvalAsElementalConverter return mlir::success(); } - virtual llvm::SmallVector genReductionInitValues() final { - llvm::SmallVector result; - fir::FirOpBuilder &builder = this->builder; - mlir::Location loc = this->loc; - mlir::Value init = - genMinMaxInitValue(loc, builder, this->getResultElementType()); - result.push_back(init); - // Initial value for isFirst predicate. It is switched to false, - // when the reduction update dynamically happens inside the reduction - // loop. - result.push_back(builder.createBool(loc, true)); - return result; - } + virtual llvm::SmallVector genReductionInitValues( + mlir::ValueRange oneBasedIndices, + const llvm::SmallVectorImpl &extents) final; virtual llvm::SmallVector reduceOneElement(const llvm::SmallVectorImpl ¤tValue, @@ -673,12 +791,14 @@ class MinMaxvalAsElementalConverter mlir::Value currentMinMax = getCurrentMinMax(currentValue); mlir::Value cmp = genMinMaxComparison(loc, builder, elementValue, currentMinMax); - cmp = - builder.create(loc, cmp, getIsFirst(currentValue)); + if (useIsFirst()) + cmp = builder.create(loc, cmp, + getIsFirst(currentValue)); mlir::Value newMinMax = builder.create( loc, cmp, elementValue, currentMinMax); result.push_back(newMinMax); - result.push_back(builder.createBool(loc, false)); + if (useIsFirst()) + result.push_back(builder.createBool(loc, false)); return result; } @@ -690,7 +810,7 @@ class MinMaxvalAsElementalConverter void checkReductions(const llvm::SmallVectorImpl &reductions) const { - assert(reductions.size() == numReductions && + assert(reductions.size() == getNumReductions() && "invalid number of reductions for MINVAL/MAXVAL"); } @@ -703,10 +823,80 @@ class MinMaxvalAsElementalConverter mlir::Value getIsFirst(const llvm::SmallVectorImpl &reductions) const { this->checkReductions(reductions); + assert(useIsFirst() && "IsFirst predicate must not be used"); return reductions[1]; } + + // Return true iff the input can contain NaNs, and they should be + // honored, such that all-NaNs input must produce NaN result. + bool honorNans() const { + return !static_cast(this->getFastMath() & + mlir::arith::FastMathFlags::nnan); + } + + // Return true iff we have to use the loop-carried IsFirst predicate. + // If there is no mask, we can initialize the reductions using + // the first elements of the input. + // If NaNs are not honored, we can initialize the starting MIN/MAX + // value to +/-LARGEST. + bool useIsFirst() const { return this->getMask() && honorNans(); } + + std::size_t getNumReductions() const { return useIsFirst() ? 2 : 1; } }; +template +llvm::SmallVector +MinMaxvalAsElementalConverter::genReductionInitValues( + mlir::ValueRange oneBasedIndices, + const llvm::SmallVectorImpl &extents) { + llvm::SmallVector result; + fir::FirOpBuilder &builder = this->builder; + mlir::Location loc = this->loc; + + fir::IfOp ifOp; + if (!useIsFirst() && honorNans()) { + // Check if we can load the value of the first element in the array + // or its section (for partial reduction). + assert(!this->getMask() && + "cannot fetch first element when mask is present"); + assert(extents.size() == + (this->isTotalReduction() ? this->getSourceRank() : 1u) && + "wrong number of extents for MINVAL/MAXVAL reduction"); + mlir::Value isNotEmpty = genIsNotEmptyArrayExtents(loc, builder, extents); + llvm::SmallVector indices = genFirstElementIndicesForReduction( + loc, builder, this->isTotalReduction(), this->getConstDim(), + this->getSourceRank(), oneBasedIndices); + + ifOp = + builder.create(loc, this->getResultElementType(), isNotEmpty, + /*withElseRegion=*/true); + builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); + mlir::Value minMaxFirst = hlfir::loadElementAt( + loc, builder, hlfir::Entity{this->getSource()}, indices); + builder.create(loc, minMaxFirst); + + // In the 'else' block use default init values. + builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); + } + + mlir::Value init = + genMinMaxInitValue(loc, builder, this->getResultElementType()); + result.push_back(init); + + if (ifOp) { + builder.create(loc, result); + builder.setInsertionPointAfter(ifOp); + result = ifOp.getResults(); + } else if (useIsFirst()) { + // Initial value for isFirst predicate. It is switched to false, + // when the reduction update dynamically happens inside the reduction + // loop. + result.push_back(builder.createBool(loc, true)); + } + + return result; +} + /// Reduction converter for SUM. class SumAsElementalConverter : public NumericReductionAsElementalConverterBase { @@ -717,7 +907,10 @@ class SumAsElementalConverter : Base{op, rewriter} {} private: - virtual llvm::SmallVector genReductionInitValues() final { + virtual llvm::SmallVector genReductionInitValues( + [[maybe_unused]] mlir::ValueRange oneBasedIndices, + [[maybe_unused]] const llvm::SmallVectorImpl &extents) + final { return { fir::factory::createZeroValue(builder, loc, getResultElementType())}; } @@ -781,7 +974,10 @@ class AllAnyAsElementalConverter : Base{op, rewriter} {} private: - virtual llvm::SmallVector genReductionInitValues() final { + virtual llvm::SmallVector genReductionInitValues( + [[maybe_unused]] mlir::ValueRange oneBasedIndices, + [[maybe_unused]] const llvm::SmallVectorImpl &extents) + final { return {this->builder.createBool(this->loc, isAll ? true : false)}; } virtual llvm::SmallVector @@ -819,7 +1015,10 @@ class CountAsElementalConverter : Base{op, rewriter} {} private: - virtual llvm::SmallVector genReductionInitValues() final { + virtual llvm::SmallVector genReductionInitValues( + [[maybe_unused]] mlir::ValueRange oneBasedIndices, + [[maybe_unused]] const llvm::SmallVectorImpl &extents) + final { return { fir::factory::createZeroValue(builder, loc, getResultElementType())}; } @@ -881,10 +1080,6 @@ mlir::LogicalResult ReductionAsElementalConverter::convert() { // Loop over all indices in the DIM dimension, and reduce all values. // If DIM is not present, do total reduction. - // Initial value for the reduction. - llvm::SmallVector reductionInitValues = - genReductionInitValues(); - llvm::SmallVector extents; if (isTotalReduce) extents = arrayExtents; @@ -892,6 +1087,10 @@ mlir::LogicalResult ReductionAsElementalConverter::convert() { extents.push_back( builder.createConvert(loc, builder.getIndexType(), dimExtent)); + // Initial value for the reduction. + llvm::SmallVector reductionInitValues = + genReductionInitValues(inputIndices, extents); + auto genBody = [&](mlir::Location loc, fir::FirOpBuilder &builder, mlir::ValueRange oneBasedIndices, mlir::ValueRange reductionArgs) diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-maxloc.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-maxloc.fir index 4e9f5d0ebb08a..b285945027afb 100644 --- a/flang/test/HLFIR/simplify-hlfir-intrinsics-maxloc.fir +++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-maxloc.fir @@ -294,6 +294,179 @@ func.func @test_partial_var(%input: !fir.box>, %mask: !fir // CHECK: return %[[VAL_14]] : !hlfir.expr // CHECK: } +func.func @test_total_expr_nomask(%input: !hlfir.expr) -> !hlfir.expr<3xi32> { + %0 = hlfir.maxloc %input {fastmath = #arith.fastmath} : (!hlfir.expr) -> !hlfir.expr<3xi32> + return %0 : !hlfir.expr<3xi32> +} +// CHECK-LABEL: func.func @test_total_expr_nomask( +// CHECK-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr) -> !hlfir.expr<3xi32> { +// CHECK: %[[VAL_1:.*]] = arith.constant false +// CHECK: %[[VAL_2:.*]] = arith.constant 3 : index +// CHECK: %[[VAL_3:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_4:.*]] = arith.constant -3.40282347E+38 : f32 +// CHECK: %[[VAL_5:.*]] = arith.constant 0 : i32 +// CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32 +// CHECK: %[[VAL_7:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_8:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_9:.*]] = fir.alloca !fir.array<3xi32> +// CHECK: %[[VAL_10:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr) -> !fir.shape<3> +// CHECK: %[[VAL_11:.*]] = hlfir.get_extent %[[VAL_10]] {dim = 0 : index} : (!fir.shape<3>) -> index +// CHECK: %[[VAL_12:.*]] = hlfir.get_extent %[[VAL_10]] {dim = 1 : index} : (!fir.shape<3>) -> index +// CHECK: %[[VAL_13:.*]] = hlfir.get_extent %[[VAL_10]] {dim = 2 : index} : (!fir.shape<3>) -> index +// CHECK: %[[VAL_14:.*]] = arith.cmpi ne, %[[VAL_11]], %[[VAL_8]] : index +// CHECK: %[[VAL_15:.*]] = arith.cmpi ne, %[[VAL_12]], %[[VAL_8]] : index +// CHECK: %[[VAL_16:.*]] = arith.andi %[[VAL_14]], %[[VAL_15]] : i1 +// CHECK: %[[VAL_17:.*]] = arith.cmpi ne, %[[VAL_13]], %[[VAL_8]] : index +// CHECK: %[[VAL_18:.*]] = arith.andi %[[VAL_16]], %[[VAL_17]] : i1 +// CHECK: %[[VAL_19:.*]]:4 = fir.if %[[VAL_18]] -> (i32, i32, i32, f32) { +// CHECK: %[[VAL_20:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_7]], %[[VAL_7]], %[[VAL_7]] : (!hlfir.expr, index, index, index) -> f32 +// CHECK: fir.result %[[VAL_6]], %[[VAL_6]], %[[VAL_6]], %[[VAL_20]] : i32, i32, i32, f32 +// CHECK: } else { +// CHECK: fir.result %[[VAL_5]], %[[VAL_5]], %[[VAL_5]], %[[VAL_4]] : i32, i32, i32, f32 +// CHECK: } +// CHECK: %[[VAL_21:.*]]:4 = fir.do_loop %[[VAL_22:.*]] = %[[VAL_7]] to %[[VAL_13]] step %[[VAL_7]] unordered iter_args(%[[VAL_23:.*]] = %[[VAL_24:.*]]#0, %[[VAL_25:.*]] = %[[VAL_24]]#1, %[[VAL_26:.*]] = %[[VAL_24]]#2, %[[VAL_27:.*]] = %[[VAL_24]]#3) -> (i32, i32, i32, f32) { +// CHECK: %[[VAL_28:.*]]:4 = fir.do_loop %[[VAL_29:.*]] = %[[VAL_7]] to %[[VAL_12]] step %[[VAL_7]] unordered iter_args(%[[VAL_30:.*]] = %[[VAL_23]], %[[VAL_31:.*]] = %[[VAL_25]], %[[VAL_32:.*]] = %[[VAL_26]], %[[VAL_33:.*]] = %[[VAL_27]]) -> (i32, i32, i32, f32) { +// CHECK: %[[VAL_34:.*]]:4 = fir.do_loop %[[VAL_35:.*]] = %[[VAL_7]] to %[[VAL_11]] step %[[VAL_7]] unordered iter_args(%[[VAL_36:.*]] = %[[VAL_30]], %[[VAL_37:.*]] = %[[VAL_31]], %[[VAL_38:.*]] = %[[VAL_32]], %[[VAL_39:.*]] = %[[VAL_33]]) -> (i32, i32, i32, f32) { +// CHECK: %[[VAL_40:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_35]], %[[VAL_29]], %[[VAL_22]] : (!hlfir.expr, index, index, index) -> f32 +// CHECK: %[[VAL_41:.*]] = arith.cmpf ogt, %[[VAL_40]], %[[VAL_39]] fastmath : f32 +// CHECK: %[[VAL_42:.*]] = arith.cmpf une, %[[VAL_39]], %[[VAL_39]] fastmath : f32 +// CHECK: %[[VAL_43:.*]] = arith.cmpf oeq, %[[VAL_40]], %[[VAL_40]] fastmath : f32 +// CHECK: %[[VAL_44:.*]] = arith.andi %[[VAL_42]], %[[VAL_43]] : i1 +// CHECK: %[[VAL_45:.*]] = arith.ori %[[VAL_41]], %[[VAL_44]] : i1 +// CHECK: %[[VAL_46:.*]] = fir.convert %[[VAL_35]] : (index) -> i32 +// CHECK: %[[VAL_47:.*]] = arith.select %[[VAL_45]], %[[VAL_46]], %[[VAL_36]] : i32 +// CHECK: %[[VAL_48:.*]] = fir.convert %[[VAL_29]] : (index) -> i32 +// CHECK: %[[VAL_49:.*]] = arith.select %[[VAL_45]], %[[VAL_48]], %[[VAL_37]] : i32 +// CHECK: %[[VAL_50:.*]] = fir.convert %[[VAL_22]] : (index) -> i32 +// CHECK: %[[VAL_51:.*]] = arith.select %[[VAL_45]], %[[VAL_50]], %[[VAL_38]] : i32 +// CHECK: %[[VAL_52:.*]] = arith.select %[[VAL_45]], %[[VAL_40]], %[[VAL_39]] : f32 +// CHECK: fir.result %[[VAL_47]], %[[VAL_49]], %[[VAL_51]], %[[VAL_52]] : i32, i32, i32, f32 +// CHECK: } +// CHECK: fir.result %[[VAL_53:.*]]#0, %[[VAL_53]]#1, %[[VAL_53]]#2, %[[VAL_53]]#3 : i32, i32, i32, f32 +// CHECK: } +// CHECK: fir.result %[[VAL_54:.*]]#0, %[[VAL_54]]#1, %[[VAL_54]]#2, %[[VAL_54]]#3 : i32, i32, i32, f32 +// CHECK: } +// CHECK: %[[VAL_55:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_7]]) : (!fir.ref>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_56:.*]]#0 to %[[VAL_55]] : i32, !fir.ref +// CHECK: %[[VAL_57:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_3]]) : (!fir.ref>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_56]]#1 to %[[VAL_57]] : i32, !fir.ref +// CHECK: %[[VAL_58:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_2]]) : (!fir.ref>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_56]]#2 to %[[VAL_58]] : i32, !fir.ref +// CHECK: %[[VAL_59:.*]] = hlfir.as_expr %[[VAL_9]] move %[[VAL_1]] : (!fir.ref>, i1) -> !hlfir.expr<3xi32> +// CHECK: return %[[VAL_59]] : !hlfir.expr<3xi32> +// CHECK: } + +func.func @test_partial_var_nomask(%input: !fir.box>) -> !hlfir.expr { + %dim = arith.constant 2 : i32 + %0 = hlfir.maxloc %input dim %dim {fastmath = #arith.fastmath} : (!fir.box>, i32) -> !hlfir.expr + return %0 : !hlfir.expr +} +// CHECK-LABEL: func.func @test_partial_var_nomask( +// CHECK-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box>) -> !hlfir.expr { +// CHECK: %[[VAL_1:.*]] = arith.constant -3.40282347E+38 : f32 +// CHECK: %[[VAL_2:.*]] = arith.constant 0 : i32 +// CHECK: %[[VAL_3:.*]] = arith.constant 1 : i32 +// CHECK: %[[VAL_4:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_5:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_6:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_7:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_6]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_8:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_9:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_10:.*]] = fir.shape %[[VAL_7]]#1, %[[VAL_9]]#1 : (index, index) -> !fir.shape<2> +// CHECK: %[[VAL_11:.*]] = hlfir.elemental %[[VAL_10]] unordered : (!fir.shape<2>) -> !hlfir.expr { +// CHECK: ^bb0(%[[VAL_12:.*]]: index, %[[VAL_13:.*]]: index): +// CHECK: %[[VAL_14:.*]] = arith.cmpi ne, %[[VAL_8]]#1, %[[VAL_6]] : index +// CHECK: %[[VAL_15:.*]]:2 = fir.if %[[VAL_14]] -> (i32, f32) { +// CHECK: %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_6]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_18:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_19:.*]] = arith.subi %[[VAL_16]]#0, %[[VAL_5]] : index +// CHECK: %[[VAL_20:.*]] = arith.addi %[[VAL_12]], %[[VAL_19]] : index +// CHECK: %[[VAL_21:.*]] = arith.subi %[[VAL_18]]#0, %[[VAL_5]] : index +// CHECK: %[[VAL_22:.*]] = arith.addi %[[VAL_13]], %[[VAL_21]] : index +// CHECK: %[[VAL_23:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_20]], %[[VAL_17]]#0, %[[VAL_22]]) : (!fir.box>, index, index, index) -> !fir.ref +// CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_23]] : !fir.ref +// CHECK: fir.result %[[VAL_3]], %[[VAL_24]] : i32, f32 +// CHECK: } else { +// CHECK: fir.result %[[VAL_2]], %[[VAL_1]] : i32, f32 +// CHECK: } +// CHECK: %[[VAL_25:.*]]:2 = fir.do_loop %[[VAL_26:.*]] = %[[VAL_5]] to %[[VAL_8]]#1 step %[[VAL_5]] unordered iter_args(%[[VAL_27:.*]] = %[[VAL_28:.*]]#0, %[[VAL_29:.*]] = %[[VAL_28]]#1) -> (i32, f32) { +// CHECK: %[[VAL_30:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_6]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_31:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_32:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_33:.*]] = arith.subi %[[VAL_30]]#0, %[[VAL_5]] : index +// CHECK: %[[VAL_34:.*]] = arith.addi %[[VAL_12]], %[[VAL_33]] : index +// CHECK: %[[VAL_35:.*]] = arith.subi %[[VAL_31]]#0, %[[VAL_5]] : index +// CHECK: %[[VAL_36:.*]] = arith.addi %[[VAL_26]], %[[VAL_35]] : index +// CHECK: %[[VAL_37:.*]] = arith.subi %[[VAL_32]]#0, %[[VAL_5]] : index +// CHECK: %[[VAL_38:.*]] = arith.addi %[[VAL_13]], %[[VAL_37]] : index +// CHECK: %[[VAL_39:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_34]], %[[VAL_36]], %[[VAL_38]]) : (!fir.box>, index, index, index) -> !fir.ref +// CHECK: %[[VAL_40:.*]] = fir.load %[[VAL_39]] : !fir.ref +// CHECK: %[[VAL_41:.*]] = arith.cmpf ogt, %[[VAL_40]], %[[VAL_29]] fastmath : f32 +// CHECK: %[[VAL_42:.*]] = arith.cmpf une, %[[VAL_29]], %[[VAL_29]] fastmath : f32 +// CHECK: %[[VAL_43:.*]] = arith.cmpf oeq, %[[VAL_40]], %[[VAL_40]] fastmath : f32 +// CHECK: %[[VAL_44:.*]] = arith.andi %[[VAL_42]], %[[VAL_43]] : i1 +// CHECK: %[[VAL_45:.*]] = arith.ori %[[VAL_41]], %[[VAL_44]] : i1 +// CHECK: %[[VAL_46:.*]] = fir.convert %[[VAL_26]] : (index) -> i32 +// CHECK: %[[VAL_47:.*]] = arith.select %[[VAL_45]], %[[VAL_46]], %[[VAL_27]] : i32 +// CHECK: %[[VAL_48:.*]] = arith.select %[[VAL_45]], %[[VAL_40]], %[[VAL_29]] : f32 +// CHECK: fir.result %[[VAL_47]], %[[VAL_48]] : i32, f32 +// CHECK: } +// CHECK: hlfir.yield_element %[[VAL_49:.*]]#0 : i32 +// CHECK: } +// CHECK: return %[[VAL_11]] : !hlfir.expr +// CHECK: } + +// Test that 'nnan' allows using -LARGEST value as the reduction init. +func.func @test_total_expr_nnan(%input: !hlfir.expr) -> !hlfir.expr<3xi32> { + %0 = hlfir.maxloc %input {fastmath = #arith.fastmath} : (!hlfir.expr) -> !hlfir.expr<3xi32> + return %0 : !hlfir.expr<3xi32> +} +// CHECK-LABEL: func.func @test_total_expr_nnan( +// CHECK-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr) -> !hlfir.expr<3xi32> { +// CHECK: %[[VAL_1:.*]] = arith.constant false +// CHECK: %[[VAL_2:.*]] = arith.constant 3 : index +// CHECK: %[[VAL_3:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_5:.*]] = arith.constant -3.40282347E+38 : f32 +// CHECK: %[[VAL_6:.*]] = arith.constant 0 : i32 +// CHECK: %[[VAL_7:.*]] = fir.alloca !fir.array<3xi32> +// CHECK: %[[VAL_8:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr) -> !fir.shape<3> +// CHECK: %[[VAL_9:.*]] = hlfir.get_extent %[[VAL_8]] {dim = 0 : index} : (!fir.shape<3>) -> index +// CHECK: %[[VAL_10:.*]] = hlfir.get_extent %[[VAL_8]] {dim = 1 : index} : (!fir.shape<3>) -> index +// CHECK: %[[VAL_11:.*]] = hlfir.get_extent %[[VAL_8]] {dim = 2 : index} : (!fir.shape<3>) -> index +// CHECK: %[[VAL_12:.*]]:4 = fir.do_loop %[[VAL_13:.*]] = %[[VAL_4]] to %[[VAL_11]] step %[[VAL_4]] iter_args(%[[VAL_14:.*]] = %[[VAL_6]], %[[VAL_15:.*]] = %[[VAL_6]], %[[VAL_16:.*]] = %[[VAL_6]], %[[VAL_17:.*]] = %[[VAL_5]]) -> (i32, i32, i32, f32) { +// CHECK: %[[VAL_18:.*]]:4 = fir.do_loop %[[VAL_19:.*]] = %[[VAL_4]] to %[[VAL_10]] step %[[VAL_4]] iter_args(%[[VAL_20:.*]] = %[[VAL_14]], %[[VAL_21:.*]] = %[[VAL_15]], %[[VAL_22:.*]] = %[[VAL_16]], %[[VAL_23:.*]] = %[[VAL_17]]) -> (i32, i32, i32, f32) { +// CHECK: %[[VAL_24:.*]]:4 = fir.do_loop %[[VAL_25:.*]] = %[[VAL_4]] to %[[VAL_9]] step %[[VAL_4]] iter_args(%[[VAL_26:.*]] = %[[VAL_20]], %[[VAL_27:.*]] = %[[VAL_21]], %[[VAL_28:.*]] = %[[VAL_22]], %[[VAL_29:.*]] = %[[VAL_23]]) -> (i32, i32, i32, f32) { +// CHECK: %[[VAL_30:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_25]], %[[VAL_19]], %[[VAL_13]] : (!hlfir.expr, index, index, index) -> f32 +// CHECK: %[[VAL_31:.*]] = arith.cmpf ogt, %[[VAL_30]], %[[VAL_29]] fastmath : f32 +// CHECK: %[[VAL_32:.*]] = arith.cmpf une, %[[VAL_29]], %[[VAL_29]] fastmath : f32 +// CHECK: %[[VAL_33:.*]] = arith.cmpf oeq, %[[VAL_30]], %[[VAL_30]] fastmath : f32 +// CHECK: %[[VAL_34:.*]] = arith.andi %[[VAL_32]], %[[VAL_33]] : i1 +// CHECK: %[[VAL_35:.*]] = arith.ori %[[VAL_31]], %[[VAL_34]] : i1 +// CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_25]] : (index) -> i32 +// CHECK: %[[VAL_37:.*]] = arith.select %[[VAL_35]], %[[VAL_36]], %[[VAL_26]] : i32 +// CHECK: %[[VAL_38:.*]] = fir.convert %[[VAL_19]] : (index) -> i32 +// CHECK: %[[VAL_39:.*]] = arith.select %[[VAL_35]], %[[VAL_38]], %[[VAL_27]] : i32 +// CHECK: %[[VAL_40:.*]] = fir.convert %[[VAL_13]] : (index) -> i32 +// CHECK: %[[VAL_41:.*]] = arith.select %[[VAL_35]], %[[VAL_40]], %[[VAL_28]] : i32 +// CHECK: %[[VAL_42:.*]] = arith.select %[[VAL_35]], %[[VAL_30]], %[[VAL_29]] : f32 +// CHECK: fir.result %[[VAL_37]], %[[VAL_39]], %[[VAL_41]], %[[VAL_42]] : i32, i32, i32, f32 +// CHECK: } +// CHECK: fir.result %[[VAL_43:.*]]#0, %[[VAL_43]]#1, %[[VAL_43]]#2, %[[VAL_43]]#3 : i32, i32, i32, f32 +// CHECK: } +// CHECK: fir.result %[[VAL_44:.*]]#0, %[[VAL_44]]#1, %[[VAL_44]]#2, %[[VAL_44]]#3 : i32, i32, i32, f32 +// CHECK: } +// CHECK: %[[VAL_45:.*]] = hlfir.designate %[[VAL_7]] (%[[VAL_4]]) : (!fir.ref>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_46:.*]]#0 to %[[VAL_45]] : i32, !fir.ref +// CHECK: %[[VAL_47:.*]] = hlfir.designate %[[VAL_7]] (%[[VAL_3]]) : (!fir.ref>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_46]]#1 to %[[VAL_47]] : i32, !fir.ref +// CHECK: %[[VAL_48:.*]] = hlfir.designate %[[VAL_7]] (%[[VAL_2]]) : (!fir.ref>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_46]]#2 to %[[VAL_48]] : i32, !fir.ref +// CHECK: %[[VAL_49:.*]] = hlfir.as_expr %[[VAL_7]] move %[[VAL_1]] : (!fir.ref>, i1) -> !hlfir.expr<3xi32> +// CHECK: return %[[VAL_49]] : !hlfir.expr<3xi32> +// CHECK: } + // Character comparisons are not supported yet. func.func @test_character(%input: !fir.box>>) -> !hlfir.expr<1xi32> { %0 = hlfir.maxloc %input : (!fir.box>>) -> !hlfir.expr<1xi32> diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-maxval.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-maxval.fir index 8f414e5c4b563..87ed365f9de26 100644 --- a/flang/test/HLFIR/simplify-hlfir-intrinsics-maxval.fir +++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-maxval.fir @@ -184,3 +184,118 @@ func.func @test_partial_var(%input: !fir.box>, %mask: !fir.b // CHECK: } // CHECK: return %[[VAL_10]] : !hlfir.expr // CHECK: } + +func.func @test_partial_expr_nomask(%input: !hlfir.expr) -> !hlfir.expr { + %dim = arith.constant 1 : i32 + %0 = hlfir.maxval %input dim %dim {fastmath = #arith.fastmath} : (!hlfir.expr, i32) -> !hlfir.expr + return %0 : !hlfir.expr +} +// CHECK-LABEL: func.func @test_partial_expr_nomask( +// CHECK-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr) -> !hlfir.expr { +// CHECK: %[[VAL_1:.*]] = arith.constant -1.7976931348623157E+308 : f64 +// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr) -> !fir.shape<2> +// CHECK: %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<2>) -> index +// CHECK: %[[VAL_6:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 1 : index} : (!fir.shape<2>) -> index +// CHECK: %[[VAL_7:.*]] = fir.shape %[[VAL_6]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_8:.*]] = hlfir.elemental %[[VAL_7]] unordered : (!fir.shape<1>) -> !hlfir.expr { +// CHECK: ^bb0(%[[VAL_9:.*]]: index): +// CHECK: %[[VAL_10:.*]] = arith.cmpi ne, %[[VAL_5]], %[[VAL_3]] : index +// CHECK: %[[VAL_11:.*]] = fir.if %[[VAL_10]] -> (f64) { +// CHECK: %[[VAL_12:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_2]], %[[VAL_9]] : (!hlfir.expr, index, index) -> f64 +// CHECK: fir.result %[[VAL_12]] : f64 +// CHECK: } else { +// CHECK: fir.result %[[VAL_1]] : f64 +// CHECK: } +// CHECK: %[[VAL_13:.*]] = fir.do_loop %[[VAL_14:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] unordered iter_args(%[[VAL_15:.*]] = %[[VAL_11]]) -> (f64) { +// CHECK: %[[VAL_16:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_14]], %[[VAL_9]] : (!hlfir.expr, index, index) -> f64 +// CHECK: %[[VAL_17:.*]] = arith.cmpf ogt, %[[VAL_16]], %[[VAL_15]] fastmath : f64 +// CHECK: %[[VAL_18:.*]] = arith.cmpf une, %[[VAL_15]], %[[VAL_15]] fastmath : f64 +// CHECK: %[[VAL_19:.*]] = arith.cmpf oeq, %[[VAL_16]], %[[VAL_16]] fastmath : f64 +// CHECK: %[[VAL_20:.*]] = arith.andi %[[VAL_18]], %[[VAL_19]] : i1 +// CHECK: %[[VAL_21:.*]] = arith.ori %[[VAL_17]], %[[VAL_20]] : i1 +// CHECK: %[[VAL_22:.*]] = arith.select %[[VAL_21]], %[[VAL_16]], %[[VAL_15]] : f64 +// CHECK: fir.result %[[VAL_22]] : f64 +// CHECK: } +// CHECK: hlfir.yield_element %[[VAL_13]] : f64 +// CHECK: } +// CHECK: return %[[VAL_8]] : !hlfir.expr +// CHECK: } + +func.func @test_total_var_nomask(%input: !fir.box>) -> f16 { + %0 = hlfir.maxval %input {fastmath = #arith.fastmath} : (!fir.box>) -> f16 + return %0 : f16 +} +// CHECK-LABEL: func.func @test_total_var_nomask( +// CHECK-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box>) -> f16 { +// CHECK: %[[VAL_1:.*]] = arith.constant -6.550400e+04 : f16 +// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_4:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_6:.*]] = arith.cmpi ne, %[[VAL_4]]#1, %[[VAL_3]] : index +// CHECK: %[[VAL_7:.*]] = arith.cmpi ne, %[[VAL_5]]#1, %[[VAL_3]] : index +// CHECK: %[[VAL_8:.*]] = arith.andi %[[VAL_6]], %[[VAL_7]] : i1 +// CHECK: %[[VAL_9:.*]] = fir.if %[[VAL_8]] -> (f16) { +// CHECK: %[[VAL_10:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_11:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_12:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_10]]#0, %[[VAL_11]]#0) : (!fir.box>, index, index) -> !fir.ref +// CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_12]] : !fir.ref +// CHECK: fir.result %[[VAL_13]] : f16 +// CHECK: } else { +// CHECK: fir.result %[[VAL_1]] : f16 +// CHECK: } +// CHECK: %[[VAL_14:.*]] = fir.do_loop %[[VAL_15:.*]] = %[[VAL_2]] to %[[VAL_5]]#1 step %[[VAL_2]] unordered iter_args(%[[VAL_16:.*]] = %[[VAL_9]]) -> (f16) { +// CHECK: %[[VAL_17:.*]] = fir.do_loop %[[VAL_18:.*]] = %[[VAL_2]] to %[[VAL_4]]#1 step %[[VAL_2]] unordered iter_args(%[[VAL_19:.*]] = %[[VAL_16]]) -> (f16) { +// CHECK: %[[VAL_20:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_21:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_22:.*]] = arith.subi %[[VAL_20]]#0, %[[VAL_2]] : index +// CHECK: %[[VAL_23:.*]] = arith.addi %[[VAL_18]], %[[VAL_22]] : index +// CHECK: %[[VAL_24:.*]] = arith.subi %[[VAL_21]]#0, %[[VAL_2]] : index +// CHECK: %[[VAL_25:.*]] = arith.addi %[[VAL_15]], %[[VAL_24]] : index +// CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_23]], %[[VAL_25]]) : (!fir.box>, index, index) -> !fir.ref +// CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_26]] : !fir.ref +// CHECK: %[[VAL_28:.*]] = arith.cmpf ogt, %[[VAL_27]], %[[VAL_19]] fastmath : f16 +// CHECK: %[[VAL_29:.*]] = arith.cmpf une, %[[VAL_19]], %[[VAL_19]] fastmath : f16 +// CHECK: %[[VAL_30:.*]] = arith.cmpf oeq, %[[VAL_27]], %[[VAL_27]] fastmath : f16 +// CHECK: %[[VAL_31:.*]] = arith.andi %[[VAL_29]], %[[VAL_30]] : i1 +// CHECK: %[[VAL_32:.*]] = arith.ori %[[VAL_28]], %[[VAL_31]] : i1 +// CHECK: %[[VAL_33:.*]] = arith.select %[[VAL_32]], %[[VAL_27]], %[[VAL_19]] : f16 +// CHECK: fir.result %[[VAL_33]] : f16 +// CHECK: } +// CHECK: fir.result %[[VAL_17]] : f16 +// CHECK: } +// CHECK: return %[[VAL_14]] : f16 +// CHECK: } + +// Test that 'nnan' allows using -LARGEST value as the reduction init. +func.func @test_partial_expr_nnan(%input: !hlfir.expr) -> !hlfir.expr { + %dim = arith.constant 1 : i32 + %0 = hlfir.maxval %input dim %dim {fastmath = #arith.fastmath} : (!hlfir.expr, i32) -> !hlfir.expr + return %0 : !hlfir.expr +} +// CHECK-LABEL: func.func @test_partial_expr_nnan( +// CHECK-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr) -> !hlfir.expr { +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_2:.*]] = arith.constant -1.7976931348623157E+308 : f64 +// CHECK: %[[VAL_3:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr) -> !fir.shape<2> +// CHECK: %[[VAL_4:.*]] = hlfir.get_extent %[[VAL_3]] {dim = 0 : index} : (!fir.shape<2>) -> index +// CHECK: %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_3]] {dim = 1 : index} : (!fir.shape<2>) -> index +// CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_7:.*]] = hlfir.elemental %[[VAL_6]] unordered : (!fir.shape<1>) -> !hlfir.expr { +// CHECK: ^bb0(%[[VAL_8:.*]]: index): +// CHECK: %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_1]] to %[[VAL_4]] step %[[VAL_1]] iter_args(%[[VAL_11:.*]] = %[[VAL_2]]) -> (f64) { +// CHECK: %[[VAL_12:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_10]], %[[VAL_8]] : (!hlfir.expr, index, index) -> f64 +// CHECK: %[[VAL_13:.*]] = arith.cmpf ogt, %[[VAL_12]], %[[VAL_11]] fastmath : f64 +// CHECK: %[[VAL_14:.*]] = arith.cmpf une, %[[VAL_11]], %[[VAL_11]] fastmath : f64 +// CHECK: %[[VAL_15:.*]] = arith.cmpf oeq, %[[VAL_12]], %[[VAL_12]] fastmath : f64 +// CHECK: %[[VAL_16:.*]] = arith.andi %[[VAL_14]], %[[VAL_15]] : i1 +// CHECK: %[[VAL_17:.*]] = arith.ori %[[VAL_13]], %[[VAL_16]] : i1 +// CHECK: %[[VAL_18:.*]] = arith.select %[[VAL_17]], %[[VAL_12]], %[[VAL_11]] : f64 +// CHECK: fir.result %[[VAL_18]] : f64 +// CHECK: } +// CHECK: hlfir.yield_element %[[VAL_9]] : f64 +// CHECK: } +// CHECK: return %[[VAL_7]] : !hlfir.expr +// CHECK: } diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-minloc.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-minloc.fir index 0c17fd6fea92c..b9a7195b5f139 100644 --- a/flang/test/HLFIR/simplify-hlfir-intrinsics-minloc.fir +++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-minloc.fir @@ -294,6 +294,179 @@ func.func @test_partial_var(%input: !fir.box>, %mask: !fir // CHECK: return %[[VAL_14]] : !hlfir.expr // CHECK: } +func.func @test_total_expr_nomask(%input: !hlfir.expr) -> !hlfir.expr<3xi32> { + %0 = hlfir.minloc %input {fastmath = #arith.fastmath} : (!hlfir.expr) -> !hlfir.expr<3xi32> + return %0 : !hlfir.expr<3xi32> +} +// CHECK-LABEL: func.func @test_total_expr_nomask( +// CHECK-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr) -> !hlfir.expr<3xi32> { +// CHECK: %[[VAL_1:.*]] = arith.constant false +// CHECK: %[[VAL_2:.*]] = arith.constant 3 : index +// CHECK: %[[VAL_3:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_4:.*]] = arith.constant 3.40282347E+38 : f32 +// CHECK: %[[VAL_5:.*]] = arith.constant 0 : i32 +// CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32 +// CHECK: %[[VAL_7:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_8:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_9:.*]] = fir.alloca !fir.array<3xi32> +// CHECK: %[[VAL_10:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr) -> !fir.shape<3> +// CHECK: %[[VAL_11:.*]] = hlfir.get_extent %[[VAL_10]] {dim = 0 : index} : (!fir.shape<3>) -> index +// CHECK: %[[VAL_12:.*]] = hlfir.get_extent %[[VAL_10]] {dim = 1 : index} : (!fir.shape<3>) -> index +// CHECK: %[[VAL_13:.*]] = hlfir.get_extent %[[VAL_10]] {dim = 2 : index} : (!fir.shape<3>) -> index +// CHECK: %[[VAL_14:.*]] = arith.cmpi ne, %[[VAL_11]], %[[VAL_8]] : index +// CHECK: %[[VAL_15:.*]] = arith.cmpi ne, %[[VAL_12]], %[[VAL_8]] : index +// CHECK: %[[VAL_16:.*]] = arith.andi %[[VAL_14]], %[[VAL_15]] : i1 +// CHECK: %[[VAL_17:.*]] = arith.cmpi ne, %[[VAL_13]], %[[VAL_8]] : index +// CHECK: %[[VAL_18:.*]] = arith.andi %[[VAL_16]], %[[VAL_17]] : i1 +// CHECK: %[[VAL_19:.*]]:4 = fir.if %[[VAL_18]] -> (i32, i32, i32, f32) { +// CHECK: %[[VAL_20:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_7]], %[[VAL_7]], %[[VAL_7]] : (!hlfir.expr, index, index, index) -> f32 +// CHECK: fir.result %[[VAL_6]], %[[VAL_6]], %[[VAL_6]], %[[VAL_20]] : i32, i32, i32, f32 +// CHECK: } else { +// CHECK: fir.result %[[VAL_5]], %[[VAL_5]], %[[VAL_5]], %[[VAL_4]] : i32, i32, i32, f32 +// CHECK: } +// CHECK: %[[VAL_21:.*]]:4 = fir.do_loop %[[VAL_22:.*]] = %[[VAL_7]] to %[[VAL_13]] step %[[VAL_7]] unordered iter_args(%[[VAL_23:.*]] = %[[VAL_24:.*]]#0, %[[VAL_25:.*]] = %[[VAL_24]]#1, %[[VAL_26:.*]] = %[[VAL_24]]#2, %[[VAL_27:.*]] = %[[VAL_24]]#3) -> (i32, i32, i32, f32) { +// CHECK: %[[VAL_28:.*]]:4 = fir.do_loop %[[VAL_29:.*]] = %[[VAL_7]] to %[[VAL_12]] step %[[VAL_7]] unordered iter_args(%[[VAL_30:.*]] = %[[VAL_23]], %[[VAL_31:.*]] = %[[VAL_25]], %[[VAL_32:.*]] = %[[VAL_26]], %[[VAL_33:.*]] = %[[VAL_27]]) -> (i32, i32, i32, f32) { +// CHECK: %[[VAL_34:.*]]:4 = fir.do_loop %[[VAL_35:.*]] = %[[VAL_7]] to %[[VAL_11]] step %[[VAL_7]] unordered iter_args(%[[VAL_36:.*]] = %[[VAL_30]], %[[VAL_37:.*]] = %[[VAL_31]], %[[VAL_38:.*]] = %[[VAL_32]], %[[VAL_39:.*]] = %[[VAL_33]]) -> (i32, i32, i32, f32) { +// CHECK: %[[VAL_40:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_35]], %[[VAL_29]], %[[VAL_22]] : (!hlfir.expr, index, index, index) -> f32 +// CHECK: %[[VAL_41:.*]] = arith.cmpf olt, %[[VAL_40]], %[[VAL_39]] fastmath : f32 +// CHECK: %[[VAL_42:.*]] = arith.cmpf une, %[[VAL_39]], %[[VAL_39]] fastmath : f32 +// CHECK: %[[VAL_43:.*]] = arith.cmpf oeq, %[[VAL_40]], %[[VAL_40]] fastmath : f32 +// CHECK: %[[VAL_44:.*]] = arith.andi %[[VAL_42]], %[[VAL_43]] : i1 +// CHECK: %[[VAL_45:.*]] = arith.ori %[[VAL_41]], %[[VAL_44]] : i1 +// CHECK: %[[VAL_46:.*]] = fir.convert %[[VAL_35]] : (index) -> i32 +// CHECK: %[[VAL_47:.*]] = arith.select %[[VAL_45]], %[[VAL_46]], %[[VAL_36]] : i32 +// CHECK: %[[VAL_48:.*]] = fir.convert %[[VAL_29]] : (index) -> i32 +// CHECK: %[[VAL_49:.*]] = arith.select %[[VAL_45]], %[[VAL_48]], %[[VAL_37]] : i32 +// CHECK: %[[VAL_50:.*]] = fir.convert %[[VAL_22]] : (index) -> i32 +// CHECK: %[[VAL_51:.*]] = arith.select %[[VAL_45]], %[[VAL_50]], %[[VAL_38]] : i32 +// CHECK: %[[VAL_52:.*]] = arith.select %[[VAL_45]], %[[VAL_40]], %[[VAL_39]] : f32 +// CHECK: fir.result %[[VAL_47]], %[[VAL_49]], %[[VAL_51]], %[[VAL_52]] : i32, i32, i32, f32 +// CHECK: } +// CHECK: fir.result %[[VAL_53:.*]]#0, %[[VAL_53]]#1, %[[VAL_53]]#2, %[[VAL_53]]#3 : i32, i32, i32, f32 +// CHECK: } +// CHECK: fir.result %[[VAL_54:.*]]#0, %[[VAL_54]]#1, %[[VAL_54]]#2, %[[VAL_54]]#3 : i32, i32, i32, f32 +// CHECK: } +// CHECK: %[[VAL_55:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_7]]) : (!fir.ref>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_56:.*]]#0 to %[[VAL_55]] : i32, !fir.ref +// CHECK: %[[VAL_57:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_3]]) : (!fir.ref>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_56]]#1 to %[[VAL_57]] : i32, !fir.ref +// CHECK: %[[VAL_58:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_2]]) : (!fir.ref>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_56]]#2 to %[[VAL_58]] : i32, !fir.ref +// CHECK: %[[VAL_59:.*]] = hlfir.as_expr %[[VAL_9]] move %[[VAL_1]] : (!fir.ref>, i1) -> !hlfir.expr<3xi32> +// CHECK: return %[[VAL_59]] : !hlfir.expr<3xi32> +// CHECK: } + +func.func @test_partial_var_nomask(%input: !fir.box>) -> !hlfir.expr { + %dim = arith.constant 2 : i32 + %0 = hlfir.minloc %input dim %dim {fastmath = #arith.fastmath} : (!fir.box>, i32) -> !hlfir.expr + return %0 : !hlfir.expr +} +// CHECK-LABEL: func.func @test_partial_var_nomask( +// CHECK-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box>) -> !hlfir.expr { +// CHECK: %[[VAL_1:.*]] = arith.constant 3.40282347E+38 : f32 +// CHECK: %[[VAL_2:.*]] = arith.constant 0 : i32 +// CHECK: %[[VAL_3:.*]] = arith.constant 1 : i32 +// CHECK: %[[VAL_4:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_5:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_6:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_7:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_6]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_8:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_9:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_10:.*]] = fir.shape %[[VAL_7]]#1, %[[VAL_9]]#1 : (index, index) -> !fir.shape<2> +// CHECK: %[[VAL_11:.*]] = hlfir.elemental %[[VAL_10]] unordered : (!fir.shape<2>) -> !hlfir.expr { +// CHECK: ^bb0(%[[VAL_12:.*]]: index, %[[VAL_13:.*]]: index): +// CHECK: %[[VAL_14:.*]] = arith.cmpi ne, %[[VAL_8]]#1, %[[VAL_6]] : index +// CHECK: %[[VAL_15:.*]]:2 = fir.if %[[VAL_14]] -> (i32, f32) { +// CHECK: %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_6]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_18:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_19:.*]] = arith.subi %[[VAL_16]]#0, %[[VAL_5]] : index +// CHECK: %[[VAL_20:.*]] = arith.addi %[[VAL_12]], %[[VAL_19]] : index +// CHECK: %[[VAL_21:.*]] = arith.subi %[[VAL_18]]#0, %[[VAL_5]] : index +// CHECK: %[[VAL_22:.*]] = arith.addi %[[VAL_13]], %[[VAL_21]] : index +// CHECK: %[[VAL_23:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_20]], %[[VAL_17]]#0, %[[VAL_22]]) : (!fir.box>, index, index, index) -> !fir.ref +// CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_23]] : !fir.ref +// CHECK: fir.result %[[VAL_3]], %[[VAL_24]] : i32, f32 +// CHECK: } else { +// CHECK: fir.result %[[VAL_2]], %[[VAL_1]] : i32, f32 +// CHECK: } +// CHECK: %[[VAL_25:.*]]:2 = fir.do_loop %[[VAL_26:.*]] = %[[VAL_5]] to %[[VAL_8]]#1 step %[[VAL_5]] unordered iter_args(%[[VAL_27:.*]] = %[[VAL_28:.*]]#0, %[[VAL_29:.*]] = %[[VAL_28]]#1) -> (i32, f32) { +// CHECK: %[[VAL_30:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_6]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_31:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_32:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_33:.*]] = arith.subi %[[VAL_30]]#0, %[[VAL_5]] : index +// CHECK: %[[VAL_34:.*]] = arith.addi %[[VAL_12]], %[[VAL_33]] : index +// CHECK: %[[VAL_35:.*]] = arith.subi %[[VAL_31]]#0, %[[VAL_5]] : index +// CHECK: %[[VAL_36:.*]] = arith.addi %[[VAL_26]], %[[VAL_35]] : index +// CHECK: %[[VAL_37:.*]] = arith.subi %[[VAL_32]]#0, %[[VAL_5]] : index +// CHECK: %[[VAL_38:.*]] = arith.addi %[[VAL_13]], %[[VAL_37]] : index +// CHECK: %[[VAL_39:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_34]], %[[VAL_36]], %[[VAL_38]]) : (!fir.box>, index, index, index) -> !fir.ref +// CHECK: %[[VAL_40:.*]] = fir.load %[[VAL_39]] : !fir.ref +// CHECK: %[[VAL_41:.*]] = arith.cmpf olt, %[[VAL_40]], %[[VAL_29]] fastmath : f32 +// CHECK: %[[VAL_42:.*]] = arith.cmpf une, %[[VAL_29]], %[[VAL_29]] fastmath : f32 +// CHECK: %[[VAL_43:.*]] = arith.cmpf oeq, %[[VAL_40]], %[[VAL_40]] fastmath : f32 +// CHECK: %[[VAL_44:.*]] = arith.andi %[[VAL_42]], %[[VAL_43]] : i1 +// CHECK: %[[VAL_45:.*]] = arith.ori %[[VAL_41]], %[[VAL_44]] : i1 +// CHECK: %[[VAL_46:.*]] = fir.convert %[[VAL_26]] : (index) -> i32 +// CHECK: %[[VAL_47:.*]] = arith.select %[[VAL_45]], %[[VAL_46]], %[[VAL_27]] : i32 +// CHECK: %[[VAL_48:.*]] = arith.select %[[VAL_45]], %[[VAL_40]], %[[VAL_29]] : f32 +// CHECK: fir.result %[[VAL_47]], %[[VAL_48]] : i32, f32 +// CHECK: } +// CHECK: hlfir.yield_element %[[VAL_49:.*]]#0 : i32 +// CHECK: } +// CHECK: return %[[VAL_11]] : !hlfir.expr +// CHECK: } + +// Test that 'nnan' allows using LARGEST value as the reduction init. +func.func @test_total_expr_nnan(%input: !hlfir.expr) -> !hlfir.expr<3xi32> { + %0 = hlfir.minloc %input {fastmath = #arith.fastmath} : (!hlfir.expr) -> !hlfir.expr<3xi32> + return %0 : !hlfir.expr<3xi32> +} +// CHECK-LABEL: func.func @test_total_expr_nnan( +// CHECK-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr) -> !hlfir.expr<3xi32> { +// CHECK: %[[VAL_1:.*]] = arith.constant false +// CHECK: %[[VAL_2:.*]] = arith.constant 3 : index +// CHECK: %[[VAL_3:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_5:.*]] = arith.constant 3.40282347E+38 : f32 +// CHECK: %[[VAL_6:.*]] = arith.constant 0 : i32 +// CHECK: %[[VAL_7:.*]] = fir.alloca !fir.array<3xi32> +// CHECK: %[[VAL_8:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr) -> !fir.shape<3> +// CHECK: %[[VAL_9:.*]] = hlfir.get_extent %[[VAL_8]] {dim = 0 : index} : (!fir.shape<3>) -> index +// CHECK: %[[VAL_10:.*]] = hlfir.get_extent %[[VAL_8]] {dim = 1 : index} : (!fir.shape<3>) -> index +// CHECK: %[[VAL_11:.*]] = hlfir.get_extent %[[VAL_8]] {dim = 2 : index} : (!fir.shape<3>) -> index +// CHECK: %[[VAL_12:.*]]:4 = fir.do_loop %[[VAL_13:.*]] = %[[VAL_4]] to %[[VAL_11]] step %[[VAL_4]] iter_args(%[[VAL_14:.*]] = %[[VAL_6]], %[[VAL_15:.*]] = %[[VAL_6]], %[[VAL_16:.*]] = %[[VAL_6]], %[[VAL_17:.*]] = %[[VAL_5]]) -> (i32, i32, i32, f32) { +// CHECK: %[[VAL_18:.*]]:4 = fir.do_loop %[[VAL_19:.*]] = %[[VAL_4]] to %[[VAL_10]] step %[[VAL_4]] iter_args(%[[VAL_20:.*]] = %[[VAL_14]], %[[VAL_21:.*]] = %[[VAL_15]], %[[VAL_22:.*]] = %[[VAL_16]], %[[VAL_23:.*]] = %[[VAL_17]]) -> (i32, i32, i32, f32) { +// CHECK: %[[VAL_24:.*]]:4 = fir.do_loop %[[VAL_25:.*]] = %[[VAL_4]] to %[[VAL_9]] step %[[VAL_4]] iter_args(%[[VAL_26:.*]] = %[[VAL_20]], %[[VAL_27:.*]] = %[[VAL_21]], %[[VAL_28:.*]] = %[[VAL_22]], %[[VAL_29:.*]] = %[[VAL_23]]) -> (i32, i32, i32, f32) { +// CHECK: %[[VAL_30:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_25]], %[[VAL_19]], %[[VAL_13]] : (!hlfir.expr, index, index, index) -> f32 +// CHECK: %[[VAL_31:.*]] = arith.cmpf olt, %[[VAL_30]], %[[VAL_29]] fastmath : f32 +// CHECK: %[[VAL_32:.*]] = arith.cmpf une, %[[VAL_29]], %[[VAL_29]] fastmath : f32 +// CHECK: %[[VAL_33:.*]] = arith.cmpf oeq, %[[VAL_30]], %[[VAL_30]] fastmath : f32 +// CHECK: %[[VAL_34:.*]] = arith.andi %[[VAL_32]], %[[VAL_33]] : i1 +// CHECK: %[[VAL_35:.*]] = arith.ori %[[VAL_31]], %[[VAL_34]] : i1 +// CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_25]] : (index) -> i32 +// CHECK: %[[VAL_37:.*]] = arith.select %[[VAL_35]], %[[VAL_36]], %[[VAL_26]] : i32 +// CHECK: %[[VAL_38:.*]] = fir.convert %[[VAL_19]] : (index) -> i32 +// CHECK: %[[VAL_39:.*]] = arith.select %[[VAL_35]], %[[VAL_38]], %[[VAL_27]] : i32 +// CHECK: %[[VAL_40:.*]] = fir.convert %[[VAL_13]] : (index) -> i32 +// CHECK: %[[VAL_41:.*]] = arith.select %[[VAL_35]], %[[VAL_40]], %[[VAL_28]] : i32 +// CHECK: %[[VAL_42:.*]] = arith.select %[[VAL_35]], %[[VAL_30]], %[[VAL_29]] : f32 +// CHECK: fir.result %[[VAL_37]], %[[VAL_39]], %[[VAL_41]], %[[VAL_42]] : i32, i32, i32, f32 +// CHECK: } +// CHECK: fir.result %[[VAL_43:.*]]#0, %[[VAL_43]]#1, %[[VAL_43]]#2, %[[VAL_43]]#3 : i32, i32, i32, f32 +// CHECK: } +// CHECK: fir.result %[[VAL_44:.*]]#0, %[[VAL_44]]#1, %[[VAL_44]]#2, %[[VAL_44]]#3 : i32, i32, i32, f32 +// CHECK: } +// CHECK: %[[VAL_45:.*]] = hlfir.designate %[[VAL_7]] (%[[VAL_4]]) : (!fir.ref>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_46:.*]]#0 to %[[VAL_45]] : i32, !fir.ref +// CHECK: %[[VAL_47:.*]] = hlfir.designate %[[VAL_7]] (%[[VAL_3]]) : (!fir.ref>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_46]]#1 to %[[VAL_47]] : i32, !fir.ref +// CHECK: %[[VAL_48:.*]] = hlfir.designate %[[VAL_7]] (%[[VAL_2]]) : (!fir.ref>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_46]]#2 to %[[VAL_48]] : i32, !fir.ref +// CHECK: %[[VAL_49:.*]] = hlfir.as_expr %[[VAL_7]] move %[[VAL_1]] : (!fir.ref>, i1) -> !hlfir.expr<3xi32> +// CHECK: return %[[VAL_49]] : !hlfir.expr<3xi32> +// CHECK: } + // Character comparisons are not supported yet. func.func @test_character(%input: !fir.box>>) -> !hlfir.expr<1xi32> { %0 = hlfir.minloc %input : (!fir.box>>) -> !hlfir.expr<1xi32> diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-minval.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-minval.fir index 98e4c692fb72f..9ab419893c6f3 100644 --- a/flang/test/HLFIR/simplify-hlfir-intrinsics-minval.fir +++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-minval.fir @@ -184,3 +184,118 @@ func.func @test_partial_var(%input: !fir.box>, %mask: !fir.b // CHECK: } // CHECK: return %[[VAL_10]] : !hlfir.expr // CHECK: } + +func.func @test_partial_expr_nomask(%input: !hlfir.expr) -> !hlfir.expr { + %dim = arith.constant 1 : i32 + %0 = hlfir.minval %input dim %dim {fastmath = #arith.fastmath} : (!hlfir.expr, i32) -> !hlfir.expr + return %0 : !hlfir.expr +} +// CHECK-LABEL: func.func @test_partial_expr_nomask( +// CHECK-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr) -> !hlfir.expr { +// CHECK: %[[VAL_1:.*]] = arith.constant 1.7976931348623157E+308 : f64 +// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr) -> !fir.shape<2> +// CHECK: %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<2>) -> index +// CHECK: %[[VAL_6:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 1 : index} : (!fir.shape<2>) -> index +// CHECK: %[[VAL_7:.*]] = fir.shape %[[VAL_6]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_8:.*]] = hlfir.elemental %[[VAL_7]] unordered : (!fir.shape<1>) -> !hlfir.expr { +// CHECK: ^bb0(%[[VAL_9:.*]]: index): +// CHECK: %[[VAL_10:.*]] = arith.cmpi ne, %[[VAL_5]], %[[VAL_3]] : index +// CHECK: %[[VAL_11:.*]] = fir.if %[[VAL_10]] -> (f64) { +// CHECK: %[[VAL_12:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_2]], %[[VAL_9]] : (!hlfir.expr, index, index) -> f64 +// CHECK: fir.result %[[VAL_12]] : f64 +// CHECK: } else { +// CHECK: fir.result %[[VAL_1]] : f64 +// CHECK: } +// CHECK: %[[VAL_13:.*]] = fir.do_loop %[[VAL_14:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] unordered iter_args(%[[VAL_15:.*]] = %[[VAL_11]]) -> (f64) { +// CHECK: %[[VAL_16:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_14]], %[[VAL_9]] : (!hlfir.expr, index, index) -> f64 +// CHECK: %[[VAL_17:.*]] = arith.cmpf olt, %[[VAL_16]], %[[VAL_15]] fastmath : f64 +// CHECK: %[[VAL_18:.*]] = arith.cmpf une, %[[VAL_15]], %[[VAL_15]] fastmath : f64 +// CHECK: %[[VAL_19:.*]] = arith.cmpf oeq, %[[VAL_16]], %[[VAL_16]] fastmath : f64 +// CHECK: %[[VAL_20:.*]] = arith.andi %[[VAL_18]], %[[VAL_19]] : i1 +// CHECK: %[[VAL_21:.*]] = arith.ori %[[VAL_17]], %[[VAL_20]] : i1 +// CHECK: %[[VAL_22:.*]] = arith.select %[[VAL_21]], %[[VAL_16]], %[[VAL_15]] : f64 +// CHECK: fir.result %[[VAL_22]] : f64 +// CHECK: } +// CHECK: hlfir.yield_element %[[VAL_13]] : f64 +// CHECK: } +// CHECK: return %[[VAL_8]] : !hlfir.expr +// CHECK: } + +func.func @test_total_var_nomask(%input: !fir.box>) -> f16 { + %0 = hlfir.minval %input {fastmath = #arith.fastmath} : (!fir.box>) -> f16 + return %0 : f16 +} +// CHECK-LABEL: func.func @test_total_var_nomask( +// CHECK-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box>) -> f16 { +// CHECK: %[[VAL_1:.*]] = arith.constant 6.550400e+04 : f16 +// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_4:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_6:.*]] = arith.cmpi ne, %[[VAL_4]]#1, %[[VAL_3]] : index +// CHECK: %[[VAL_7:.*]] = arith.cmpi ne, %[[VAL_5]]#1, %[[VAL_3]] : index +// CHECK: %[[VAL_8:.*]] = arith.andi %[[VAL_6]], %[[VAL_7]] : i1 +// CHECK: %[[VAL_9:.*]] = fir.if %[[VAL_8]] -> (f16) { +// CHECK: %[[VAL_10:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_11:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_12:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_10]]#0, %[[VAL_11]]#0) : (!fir.box>, index, index) -> !fir.ref +// CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_12]] : !fir.ref +// CHECK: fir.result %[[VAL_13]] : f16 +// CHECK: } else { +// CHECK: fir.result %[[VAL_1]] : f16 +// CHECK: } +// CHECK: %[[VAL_14:.*]] = fir.do_loop %[[VAL_15:.*]] = %[[VAL_2]] to %[[VAL_5]]#1 step %[[VAL_2]] unordered iter_args(%[[VAL_16:.*]] = %[[VAL_9]]) -> (f16) { +// CHECK: %[[VAL_17:.*]] = fir.do_loop %[[VAL_18:.*]] = %[[VAL_2]] to %[[VAL_4]]#1 step %[[VAL_2]] unordered iter_args(%[[VAL_19:.*]] = %[[VAL_16]]) -> (f16) { +// CHECK: %[[VAL_20:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_21:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_22:.*]] = arith.subi %[[VAL_20]]#0, %[[VAL_2]] : index +// CHECK: %[[VAL_23:.*]] = arith.addi %[[VAL_18]], %[[VAL_22]] : index +// CHECK: %[[VAL_24:.*]] = arith.subi %[[VAL_21]]#0, %[[VAL_2]] : index +// CHECK: %[[VAL_25:.*]] = arith.addi %[[VAL_15]], %[[VAL_24]] : index +// CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_23]], %[[VAL_25]]) : (!fir.box>, index, index) -> !fir.ref +// CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_26]] : !fir.ref +// CHECK: %[[VAL_28:.*]] = arith.cmpf olt, %[[VAL_27]], %[[VAL_19]] fastmath : f16 +// CHECK: %[[VAL_29:.*]] = arith.cmpf une, %[[VAL_19]], %[[VAL_19]] fastmath : f16 +// CHECK: %[[VAL_30:.*]] = arith.cmpf oeq, %[[VAL_27]], %[[VAL_27]] fastmath : f16 +// CHECK: %[[VAL_31:.*]] = arith.andi %[[VAL_29]], %[[VAL_30]] : i1 +// CHECK: %[[VAL_32:.*]] = arith.ori %[[VAL_28]], %[[VAL_31]] : i1 +// CHECK: %[[VAL_33:.*]] = arith.select %[[VAL_32]], %[[VAL_27]], %[[VAL_19]] : f16 +// CHECK: fir.result %[[VAL_33]] : f16 +// CHECK: } +// CHECK: fir.result %[[VAL_17]] : f16 +// CHECK: } +// CHECK: return %[[VAL_14]] : f16 +// CHECK: } + +// Test that 'nnan' allows using LARGEST value as the reduction init. +func.func @test_partial_expr_nnan(%input: !hlfir.expr) -> !hlfir.expr { + %dim = arith.constant 1 : i32 + %0 = hlfir.minval %input dim %dim {fastmath = #arith.fastmath} : (!hlfir.expr, i32) -> !hlfir.expr + return %0 : !hlfir.expr +} +// CHECK-LABEL: func.func @test_partial_expr_nnan( +// CHECK-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr) -> !hlfir.expr { +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_2:.*]] = arith.constant 1.7976931348623157E+308 : f64 +// CHECK: %[[VAL_3:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr) -> !fir.shape<2> +// CHECK: %[[VAL_4:.*]] = hlfir.get_extent %[[VAL_3]] {dim = 0 : index} : (!fir.shape<2>) -> index +// CHECK: %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_3]] {dim = 1 : index} : (!fir.shape<2>) -> index +// CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_7:.*]] = hlfir.elemental %[[VAL_6]] unordered : (!fir.shape<1>) -> !hlfir.expr { +// CHECK: ^bb0(%[[VAL_8:.*]]: index): +// CHECK: %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_1]] to %[[VAL_4]] step %[[VAL_1]] iter_args(%[[VAL_11:.*]] = %[[VAL_2]]) -> (f64) { +// CHECK: %[[VAL_12:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_10]], %[[VAL_8]] : (!hlfir.expr, index, index) -> f64 +// CHECK: %[[VAL_13:.*]] = arith.cmpf olt, %[[VAL_12]], %[[VAL_11]] fastmath : f64 +// CHECK: %[[VAL_14:.*]] = arith.cmpf une, %[[VAL_11]], %[[VAL_11]] fastmath : f64 +// CHECK: %[[VAL_15:.*]] = arith.cmpf oeq, %[[VAL_12]], %[[VAL_12]] fastmath : f64 +// CHECK: %[[VAL_16:.*]] = arith.andi %[[VAL_14]], %[[VAL_15]] : i1 +// CHECK: %[[VAL_17:.*]] = arith.ori %[[VAL_13]], %[[VAL_16]] : i1 +// CHECK: %[[VAL_18:.*]] = arith.select %[[VAL_17]], %[[VAL_12]], %[[VAL_11]] : f64 +// CHECK: fir.result %[[VAL_18]] : f64 +// CHECK: } +// CHECK: hlfir.yield_element %[[VAL_9]] : f64 +// CHECK: } +// CHECK: return %[[VAL_7]] : !hlfir.expr +// CHECK: }