diff --git a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td index dbec741cf1b1f..8773fc5881461 100644 --- a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td +++ b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td @@ -1096,6 +1096,7 @@ def AffineDelinearizeIndexOp : Affine_Op<"delinearize_index", ]; let hasVerifier = 1; + let hasCanonicalizer = 1; } #endif // AFFINE_OPS diff --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td index 1036e93a03924..b08e803345f76 100644 --- a/mlir/include/mlir/Dialect/Affine/Passes.td +++ b/mlir/include/mlir/Dialect/Affine/Passes.td @@ -394,7 +394,7 @@ def LoopCoalescing : Pass<"affine-loop-coalescing", "func::FuncOp"> { let summary = "Coalesce nested loops with independent bounds into a single " "loop"; let constructor = "mlir::affine::createLoopCoalescingPass()"; - let dependentDialects = ["arith::ArithDialect"]; + let dependentDialects = ["affine::AffineDialect","arith::ArithDialect"]; } def SimplifyAffineStructures : Pass<"affine-simplify-structures", "func::FuncOp"> { diff --git a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td index 9b29affb97c43..53d1ae10dc87d 100644 --- a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td @@ -56,6 +56,7 @@ def SCFParallelLoopFusion : Pass<"scf-parallel-loop-fusion"> { def TestSCFParallelLoopCollapsing : Pass<"test-scf-parallel-loop-collapsing"> { let summary = "Test parallel loops collapsing transformation"; let constructor = "mlir::createTestSCFParallelLoopCollapsingPass()"; + let dependentDialects = ["affine::AffineDialect"]; let description = [{ This pass is purely for testing the scf::collapseParallelLoops transformation. The transformation does not have opinions on how a diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp index 11b6b7cf5fd5a..df48db8987393 100644 --- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp +++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp @@ -4534,6 +4534,133 @@ LogicalResult AffineDelinearizeIndexOp::verify() { return success(); } +namespace { + +// Drops delinearization indices that correspond to unit-extent basis +struct DropUnitExtentBasis + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(affine::AffineDelinearizeIndexOp delinearizeOp, + PatternRewriter &rewriter) const override { + SmallVector replacements(delinearizeOp->getNumResults(), nullptr); + std::optional zero = std::nullopt; + Location loc = delinearizeOp->getLoc(); + auto getZero = [&]() -> Value { + if (!zero) + zero = rewriter.create(loc, 0); + return zero.value(); + }; + + // Replace all indices corresponding to unit-extent basis with 0. + // Remaining basis can be used to get a new `affine.delinearize_index` op. + SmallVector newOperands; + for (auto [index, basis] : llvm::enumerate(delinearizeOp.getBasis())) { + if (matchPattern(basis, m_One())) + replacements[index] = getZero(); + else + newOperands.push_back(basis); + } + + if (newOperands.size() == delinearizeOp.getBasis().size()) + return failure(); + + if (!newOperands.empty()) { + auto newDelinearizeOp = rewriter.create( + loc, delinearizeOp.getLinearIndex(), newOperands); + int newIndex = 0; + // Map back the new delinearized indices to the values they replace. + for (auto &replacement : replacements) { + if (replacement) + continue; + replacement = newDelinearizeOp->getResult(newIndex++); + } + } + + rewriter.replaceOp(delinearizeOp, replacements); + return success(); + } +}; + +/// Drop delinearization pattern related to loops in the following way +/// +/// ``` +/// (%iv) = (%c0) to (%ub) step (%c1) { +/// %0 = affine.delinearize_index %iv into (%ub) : index +/// (%0) +/// } +/// ``` +/// +/// can be canonicalized to +/// +/// ``` +/// (%iv) = (%c0) to (%ub) step (%c1) { +/// (%iv) +/// } +/// ``` +struct DropDelinearizeOfSingleLoop + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(affine::AffineDelinearizeIndexOp delinearizeOp, + PatternRewriter &rewriter) const override { + auto basis = delinearizeOp.getBasis(); + if (basis.size() != 1) + return failure(); + + // Check that the `linear_index` is an induction variable. + auto inductionVar = cast(delinearizeOp.getLinearIndex()); + if (!inductionVar) + return failure(); + + // Check that the parent is a `LoopLikeOpInterface`. + auto loopLikeOp = cast( + inductionVar.getParentRegion()->getParentOp()); + if (!loopLikeOp) + return failure(); + + // Check that loop is unit-rank and that the `linear_index` is the induction + // variable. + auto inductionVars = loopLikeOp.getLoopInductionVars(); + if (!inductionVars || inductionVars->size() != 1 || + inductionVars->front() != inductionVar) { + return rewriter.notifyMatchFailure( + delinearizeOp, "`linear_index` is not loop induction variable"); + } + + // Check that the upper-bound is the basis. + auto upperBounds = loopLikeOp.getLoopUpperBounds(); + if (!upperBounds || upperBounds->size() != 1 || + upperBounds->front() != getAsOpFoldResult(basis.front())) { + return rewriter.notifyMatchFailure(delinearizeOp, + "`basis` is not upper bound"); + } + + // Check that the lower bound is zero. + auto lowerBounds = loopLikeOp.getLoopLowerBounds(); + if (!lowerBounds || lowerBounds->size() != 1 || + !isZeroIndex(lowerBounds->front())) { + return rewriter.notifyMatchFailure(delinearizeOp, + "loop lower bound is not zero"); + } + + // Check that the step is one. + auto steps = loopLikeOp.getLoopSteps(); + if (!steps || steps->size() != 1 || !isConstantIntValue(steps->front(), 1)) + return rewriter.notifyMatchFailure(delinearizeOp, "loop step is not one"); + + rewriter.replaceOp(delinearizeOp, inductionVar); + return success(); + } +}; + +} // namespace + +void affine::AffineDelinearizeIndexOp::getCanonicalizationPatterns( + RewritePatternSet &patterns, MLIRContext *context) { + patterns.insert(context); +} + //===----------------------------------------------------------------------===// // TableGen'd op method definitions //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp index 6ba7020e86fa6..358a3b38a4cd3 100644 --- a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp @@ -8,6 +8,7 @@ #include "mlir/Dialect/SCF/Transforms/Passes.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/Dialect/SCF/Utils/Utils.h" #include "mlir/Transforms/RegionUtils.h" diff --git a/mlir/lib/Dialect/SCF/Utils/Utils.cpp b/mlir/lib/Dialect/SCF/Utils/Utils.cpp index a794a121d6267..43fcc595af0f7 100644 --- a/mlir/lib/Dialect/SCF/Utils/Utils.cpp +++ b/mlir/lib/Dialect/SCF/Utils/Utils.cpp @@ -12,6 +12,7 @@ #include "mlir/Dialect/SCF/Utils/Utils.h" #include "mlir/Analysis/SliceAnalysis.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Arith/Utils/Utils.h" #include "mlir/Dialect/Func/IR/FuncOps.h" @@ -671,9 +672,26 @@ LogicalResult mlir::loopUnrollJamByFactor(scf::ForOp forOp, return success(); } +Range emitNormalizedLoopBoundsForIndexType(RewriterBase &rewriter, Location loc, + OpFoldResult lb, OpFoldResult ub, + OpFoldResult step) { + Range normalizedLoopBounds; + normalizedLoopBounds.offset = rewriter.getIndexAttr(0); + normalizedLoopBounds.stride = rewriter.getIndexAttr(1); + AffineExpr s0, s1, s2; + bindSymbols(rewriter.getContext(), s0, s1, s2); + AffineExpr e = (s1 - s0).ceilDiv(s2); + normalizedLoopBounds.size = + affine::makeComposedFoldedAffineApply(rewriter, loc, e, {lb, ub, step}); + return normalizedLoopBounds; +} + Range mlir::emitNormalizedLoopBounds(RewriterBase &rewriter, Location loc, OpFoldResult lb, OpFoldResult ub, OpFoldResult step) { + if (getType(lb).isIndex()) { + return emitNormalizedLoopBoundsForIndexType(rewriter, loc, lb, ub, step); + } // For non-index types, generate `arith` instructions // Check if the loop is already known to have a constant zero lower bound or // a constant one step. @@ -714,9 +732,38 @@ Range mlir::emitNormalizedLoopBounds(RewriterBase &rewriter, Location loc, return {newLowerBound, newUpperBound, newStep}; } +static void denormalizeInductionVariableForIndexType(RewriterBase &rewriter, + Location loc, + Value normalizedIv, + OpFoldResult origLb, + OpFoldResult origStep) { + AffineExpr d0, s0, s1; + bindSymbols(rewriter.getContext(), s0, s1); + bindDims(rewriter.getContext(), d0); + AffineExpr e = d0 * s1 + s0; + OpFoldResult denormalizedIv = affine::makeComposedFoldedAffineApply( + rewriter, loc, e, ArrayRef{normalizedIv, origLb, origStep}); + Value denormalizedIvVal = + getValueOrCreateConstantIndexOp(rewriter, loc, denormalizedIv); + SmallPtrSet preservedUses; + // If an `affine.apply` operation is generated for denormalization, the use + // of `origLb` in those ops must not be replaced. These arent not generated + // when `origLb == 0` and `origStep == 1`. + if (!isConstantIntValue(origLb, 0) || !isConstantIntValue(origStep, 1)) { + if (Operation *preservedUse = denormalizedIvVal.getDefiningOp()) { + preservedUses.insert(preservedUse); + } + } + rewriter.replaceAllUsesExcept(normalizedIv, denormalizedIvVal, preservedUses); +} + void mlir::denormalizeInductionVariable(RewriterBase &rewriter, Location loc, Value normalizedIv, OpFoldResult origLb, OpFoldResult origStep) { + if (getType(origLb).isIndex()) { + return denormalizeInductionVariableForIndexType(rewriter, loc, normalizedIv, + origLb, origStep); + } Value denormalizedIv; SmallPtrSet preserve; bool isStepOne = isConstantIntValue(origStep, 1); @@ -739,10 +786,29 @@ void mlir::denormalizeInductionVariable(RewriterBase &rewriter, Location loc, rewriter.replaceAllUsesExcept(normalizedIv, denormalizedIv, preserve); } +static OpFoldResult getProductOfIndexes(RewriterBase &rewriter, Location loc, + ArrayRef values) { + assert(!values.empty() && "unexecpted empty array"); + AffineExpr s0, s1; + bindSymbols(rewriter.getContext(), s0, s1); + AffineExpr mul = s0 * s1; + OpFoldResult products = rewriter.getIndexAttr(1); + for (auto v : values) { + products = affine::makeComposedFoldedAffineApply( + rewriter, loc, mul, ArrayRef{products, v}); + } + return products; +} + /// Helper function to multiply a sequence of values. static Value getProductOfIntsOrIndexes(RewriterBase &rewriter, Location loc, ArrayRef values) { assert(!values.empty() && "unexpected empty list"); + if (getType(values.front()).isIndex()) { + SmallVector ofrs = getAsOpFoldResult(values); + OpFoldResult product = getProductOfIndexes(rewriter, loc, ofrs); + return getValueOrCreateConstantIndexOp(rewriter, loc, product); + } std::optional productOf; for (auto v : values) { auto vOne = getConstantIntValue(v); @@ -757,7 +823,7 @@ static Value getProductOfIntsOrIndexes(RewriterBase &rewriter, Location loc, if (!productOf) { productOf = rewriter .create( - loc, rewriter.getOneAttr(values.front().getType())) + loc, rewriter.getOneAttr(getType(values.front()))) .getResult(); } return productOf.value(); @@ -774,6 +840,16 @@ static Value getProductOfIntsOrIndexes(RewriterBase &rewriter, Location loc, static std::pair, SmallPtrSet> delinearizeInductionVariable(RewriterBase &rewriter, Location loc, Value linearizedIv, ArrayRef ubs) { + + if (linearizedIv.getType().isIndex()) { + Operation *delinearizedOp = + rewriter.create(loc, linearizedIv, + ubs); + auto resultVals = llvm::map_to_vector( + delinearizedOp->getResults(), [](OpResult r) -> Value { return r; }); + return {resultVals, SmallPtrSet{delinearizedOp}}; + } + SmallVector delinearizedIvs(ubs.size()); SmallPtrSet preservedUsers; diff --git a/mlir/test/Dialect/Affine/canonicalize.mlir b/mlir/test/Dialect/Affine/canonicalize.mlir index 730808dbbdf18..ff0e987bcef6c 100644 --- a/mlir/test/Dialect/Affine/canonicalize.mlir +++ b/mlir/test/Dialect/Affine/canonicalize.mlir @@ -1466,3 +1466,51 @@ func.func @prefetch_canonicalize(%arg0: memref<512xf32>) -> () { } return } + +// ----- + +func.func @drop_unit_basis_in_delinearize(%arg0 : index, %arg1 : index, %arg2 : index) -> + (index, index, index, index, index, index) { + %c1 = arith.constant 1 : index + %0:6 = affine.delinearize_index %arg0 into (%c1, %arg1, %c1, %c1, %arg2, %c1) + : index, index, index, index, index, index + return %0#0, %0#1, %0#2, %0#3, %0#4, %0#5 : index, index, index, index, index, index +} +// CHECK-LABEL: func @drop_unit_basis_in_delinearize( +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: index, +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: index, +// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index) +// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index +// CHECK-DAG: %[[DELINEARIZE:.+]]:2 = affine.delinearize_index %[[ARG0]] into (%[[ARG1]], %[[ARG2]]) +// CHECK: return %[[C0]], %[[DELINEARIZE]]#0, %[[C0]], %[[C0]], %[[DELINEARIZE]]#1, %[[C0]] + +// ----- + +func.func @drop_all_unit_bases(%arg0 : index) -> (index, index) { + %c1 = arith.constant 1 : index + %0:2 = affine.delinearize_index %arg0 into (%c1, %c1) : index, index + return %0#0, %0#1 : index, index +} +// CHECK-LABEL: func @drop_all_unit_bases( +// CHECK-SAME: %[[ARG0:.+]]: index) +// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index +// CHECK-NOT: affine.delinearize_index +// CHECK: return %[[C0]], %[[C0]] + +// ----- + +func.func @drop_single_loop_delinearize(%arg0 : index, %arg1 : index) -> index { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %2 = scf.for %iv = %c0 to %arg1 step %c1 iter_args(%arg2 = %c0) -> index { + %0 = affine.delinearize_index %iv into (%arg1) : index + %1 = "some_use"(%arg2, %0) : (index, index) -> (index) + scf.yield %1 : index + } + return %2 : index +} +// CHECK-LABEL: func @drop_single_loop_delinearize( +// CHECK-SAME: %[[ARG0:.+]]: index) +// CHECK: scf.for %[[IV:[a-zA-Z0-9]+]] = +// CHECK-NOT: affine.delinearize_index +// CHECK: "some_use"(%{{.+}}, %[[IV]]) diff --git a/mlir/test/Dialect/Affine/loop-coalescing.mlir b/mlir/test/Dialect/Affine/loop-coalescing.mlir index 45dd299295f64..f6e7b21bc66ab 100644 --- a/mlir/test/Dialect/Affine/loop-coalescing.mlir +++ b/mlir/test/Dialect/Affine/loop-coalescing.mlir @@ -1,14 +1,15 @@ -// RUN: mlir-opt -split-input-file -allow-unregistered-dialect -affine-loop-coalescing --cse %s | FileCheck %s +// RUN: mlir-opt -split-input-file -allow-unregistered-dialect -affine-loop-coalescing --cse --mlir-print-local-scope %s | FileCheck %s // CHECK-LABEL: @one_3d_nest func.func @one_3d_nest() { // Capture original bounds. Note that for zero-based step-one loops, the // upper bound is also the number of iterations. - // CHECK: %[[orig_lb:.*]] = arith.constant 0 - // CHECK: %[[orig_step:.*]] = arith.constant 1 - // CHECK: %[[orig_ub_k:.*]] = arith.constant 3 - // CHECK: %[[orig_ub_i:.*]] = arith.constant 42 - // CHECK: %[[orig_ub_j:.*]] = arith.constant 56 + // CHECK-DAG: %[[orig_lb:.*]] = arith.constant 0 + // CHECK-DAG: %[[orig_step:.*]] = arith.constant 1 + // CHECK-DAG: %[[orig_ub_k:.*]] = arith.constant 3 + // CHECK-DAG: %[[orig_ub_i:.*]] = arith.constant 42 + // CHECK-DAG: %[[orig_ub_j:.*]] = arith.constant 56 + // CHECK-DAG: %[[range:.*]] = arith.constant 7056 %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %c2 = arith.constant 2 : index @@ -16,9 +17,6 @@ func.func @one_3d_nest() { %c42 = arith.constant 42 : index %c56 = arith.constant 56 : index // The range of the new scf. - // CHECK: %[[partial_range:.*]] = arith.muli %[[orig_ub_i]], %[[orig_ub_j]] - // CHECK-NEXT:%[[range:.*]] = arith.muli %[[partial_range]], %[[orig_ub_k]] - // Updated loop bounds. // CHECK: scf.for %[[i:.*]] = %[[orig_lb]] to %[[range]] step %[[orig_step]] scf.for %i = %c0 to %c42 step %c1 { @@ -26,13 +24,11 @@ func.func @one_3d_nest() { // CHECK-NOT: scf.for // Reconstruct original IVs from the linearized one. - // CHECK: %[[orig_k:.*]] = arith.remsi %[[i]], %[[orig_ub_k]] - // CHECK: %[[div:.*]] = arith.divsi %[[i]], %[[orig_ub_k]] - // CHECK: %[[orig_j:.*]] = arith.remsi %[[div]], %[[orig_ub_j]] - // CHECK: %[[orig_i:.*]] = arith.divsi %[[div]], %[[orig_ub_j]] + // CHECK: %[[delinearize:.+]]:3 = affine.delinearize_index %[[i]] + // CHECK-SAME: into (%[[orig_ub_i]], %[[orig_ub_j]], %[[orig_ub_k]]) scf.for %j = %c0 to %c56 step %c1 { scf.for %k = %c0 to %c3 step %c1 { - // CHECK: "use"(%[[orig_i]], %[[orig_j]], %[[orig_k]]) + // CHECK: "use"(%[[delinearize]]#0, %[[delinearize]]#1, %[[delinearize]]#2) "use"(%i, %j, %k) : (index, index, index) -> () } } @@ -40,6 +36,8 @@ func.func @one_3d_nest() { return } +// ----- + // Check that there is no chasing the replacement of value uses by ensuring // multiple uses of loop induction variables get rewritten to the same values. @@ -52,13 +50,10 @@ func.func @multi_use() { scf.for %i = %c1 to %c10 step %c1 { scf.for %j = %c1 to %c10 step %c1 { scf.for %k = %c1 to %c10 step %c1 { - // CHECK: %[[k_unshifted:.*]] = arith.remsi %[[iv]], %[[k_extent:.*]] - // CHECK: %[[ij:.*]] = arith.divsi %[[iv]], %[[k_extent]] - // CHECK: %[[j_unshifted:.*]] = arith.remsi %[[ij]], %[[j_extent:.*]] - // CHECK: %[[i_unshifted:.*]] = arith.divsi %[[ij]], %[[j_extent]] - // CHECK: %[[k:.*]] = arith.addi %[[k_unshifted]] - // CHECK: %[[j:.*]] = arith.addi %[[j_unshifted]] - // CHECK: %[[i:.*]] = arith.addi %[[i_unshifted]] + // CHECK: %[[delinearize:.+]]:3 = affine.delinearize_index %[[iv]] + // CHECK: %[[k:.*]] = affine.apply affine_map<(d0) -> (d0 + 1)>(%[[delinearize]]#2) + // CHECK: %[[j:.*]] = affine.apply affine_map<(d0) -> (d0 + 1)>(%[[delinearize]]#1) + // CHECK: %[[i:.*]] = affine.apply affine_map<(d0) -> (d0 + 1)>(%[[delinearize]]#0) // CHECK: "use1"(%[[i]], %[[j]], %[[k]]) "use1"(%i,%j,%k) : (index,index,index) -> () @@ -72,12 +67,20 @@ func.func @multi_use() { return } +// ----- + func.func @unnormalized_loops() { - // CHECK: %[[orig_step_i:.*]] = arith.constant 2 + // Normalized lower bound and step for the outer scf. + // CHECK-DAG: %[[lb_i:.*]] = arith.constant 0 + // CHECK-DAG: %[[step_i:.*]] = arith.constant 1 + // CHECK-DAG: %[[orig_step_j_and_numiter_i:.*]] = arith.constant 3 + + // Number of iterations in the inner loop, the pattern is the same as above, + // only capture the final result. + // CHECK-DAG: %[[numiter_j:.*]] = arith.constant 4 + + // CHECK-DAG: %[[range:.*]] = arith.constant 12 - // CHECK: %[[orig_step_j_and_numiter_i:.*]] = arith.constant 3 - // CHECK: %[[orig_lb_i:.*]] = arith.constant 5 - // CHECK: %[[orig_lb_j:.*]] = arith.constant 7 %c2 = arith.constant 2 : index %c3 = arith.constant 3 : index %c5 = arith.constant 5 : index @@ -85,28 +88,18 @@ func.func @unnormalized_loops() { %c10 = arith.constant 10 : index %c17 = arith.constant 17 : index - // Normalized lower bound and step for the outer scf. - // CHECK: %[[lb_i:.*]] = arith.constant 0 - // CHECK: %[[step_i:.*]] = arith.constant 1 - - // Number of iterations in the inner loop, the pattern is the same as above, - // only capture the final result. - // CHECK: %[[numiter_j:.*]] = arith.constant 4 // New bounds of the outer scf. - // CHECK: %[[range:.*]] = arith.muli %[[orig_step_j_and_numiter_i:.*]], %[[numiter_j]] // CHECK: scf.for %[[i:.*]] = %[[lb_i]] to %[[range]] step %[[step_i]] scf.for %i = %c5 to %c10 step %c2 { // The inner loop has been removed. // CHECK-NOT: scf.for scf.for %j = %c7 to %c17 step %c3 { // The IVs are rewritten. - // CHECK: %[[normalized_j:.*]] = arith.remsi %[[i]], %[[numiter_j]] - // CHECK: %[[normalized_i:.*]] = arith.divsi %[[i]], %[[numiter_j]] - // CHECK: %[[scaled_j:.*]] = arith.muli %[[normalized_j]], %[[orig_step_j_and_numiter_i]] - // CHECK: %[[orig_j:.*]] = arith.addi %[[scaled_j]], %[[orig_lb_j]] - // CHECK: %[[scaled_i:.*]] = arith.muli %[[normalized_i]], %[[orig_step_i]] - // CHECK: %[[orig_i:.*]] = arith.addi %[[scaled_i]], %[[orig_lb_i]] + // CHECK: %[[delinearize:.+]]:2 = affine.delinearize_index %[[i]] + // CHECK-SAME: into (%[[orig_step_j_and_numiter_i]], %[[numiter_j]]) + // CHECK: %[[orig_j:.*]] = affine.apply affine_map<(d0) -> (d0 * 3 + 7)>(%[[delinearize]]#1) + // CHECK: %[[orig_i:.*]] = affine.apply affine_map<(d0) -> (d0 * 2 + 5)>(%[[delinearize]]#0) // CHECK: "use"(%[[orig_i]], %[[orig_j]]) "use"(%i, %j) : (index, index) -> () } @@ -114,20 +107,21 @@ func.func @unnormalized_loops() { return } +// ----- + func.func @noramalized_loops_with_yielded_iter_args() { - // CHECK: %[[orig_lb:.*]] = arith.constant 0 - // CHECK: %[[orig_step:.*]] = arith.constant 1 - // CHECK: %[[orig_ub_k:.*]] = arith.constant 3 - // CHECK: %[[orig_ub_i:.*]] = arith.constant 42 - // CHECK: %[[orig_ub_j:.*]] = arith.constant 56 + // CHECK-DAG: %[[orig_lb:.*]] = arith.constant 0 + // CHECK-DAG: %[[orig_ub_i:.*]] = arith.constant 42 + // CHECK-DAG: %[[orig_step:.*]] = arith.constant 1 + // CHECK-DAG: %[[orig_ub_j:.*]] = arith.constant 56 + // CHECK-DAG: %[[orig_ub_k:.*]] = arith.constant 3 + // CHECK-DAG: %[[range:.*]] = arith.constant 7056 %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %c3 = arith.constant 3 : index %c42 = arith.constant 42 : index %c56 = arith.constant 56 : index // The range of the new scf. - // CHECK: %[[partial_range:.*]] = arith.muli %[[orig_ub_i]], %[[orig_ub_j]] - // CHECK-NEXT:%[[range:.*]] = arith.muli %[[partial_range]], %[[orig_ub_k]] // Updated loop bounds. // CHECK: scf.for %[[i:.*]] = %[[orig_lb]] to %[[range]] step %[[orig_step]] iter_args(%[[VAL_1:.*]] = %[[orig_lb]]) -> (index) { @@ -136,13 +130,10 @@ func.func @noramalized_loops_with_yielded_iter_args() { // CHECK-NOT: scf.for // Reconstruct original IVs from the linearized one. - // CHECK: %[[orig_k:.*]] = arith.remsi %[[i]], %[[orig_ub_k]] - // CHECK: %[[div:.*]] = arith.divsi %[[i]], %[[orig_ub_k]] - // CHECK: %[[orig_j:.*]] = arith.remsi %[[div]], %[[orig_ub_j]] - // CHECK: %[[orig_i:.*]] = arith.divsi %[[div]], %[[orig_ub_j]] + // CHECK: %[[delinearize:.+]]:3 = affine.delinearize_index %[[i]] into (%[[orig_ub_i]], %[[orig_ub_j]], %[[orig_ub_k]]) %1:1 = scf.for %j = %c0 to %c56 step %c1 iter_args(%arg1 = %arg0) -> (index){ %0:1 = scf.for %k = %c0 to %c3 step %c1 iter_args(%arg2 = %arg1) -> (index) { - // CHECK: "use"(%[[orig_i]], %[[orig_j]], %[[orig_k]]) + // CHECK: "use"(%[[delinearize]]#0, %[[delinearize]]#1, %[[delinearize]]#2) "use"(%i, %j, %k) : (index, index, index) -> () // CHECK: scf.yield %[[VAL_1]] : index scf.yield %arg2 : index @@ -154,20 +145,21 @@ func.func @noramalized_loops_with_yielded_iter_args() { return } +// ----- + func.func @noramalized_loops_with_shuffled_yielded_iter_args() { - // CHECK: %[[orig_lb:.*]] = arith.constant 0 - // CHECK: %[[orig_step:.*]] = arith.constant 1 - // CHECK: %[[orig_ub_k:.*]] = arith.constant 3 - // CHECK: %[[orig_ub_i:.*]] = arith.constant 42 - // CHECK: %[[orig_ub_j:.*]] = arith.constant 56 + // CHECK-DAG: %[[orig_lb:.*]] = arith.constant 0 + // CHECK-DAG: %[[orig_step:.*]] = arith.constant 1 + // CHECK-DAG: %[[orig_ub_k:.*]] = arith.constant 3 + // CHECK-DAG: %[[orig_ub_i:.*]] = arith.constant 42 + // CHECK-DAG: %[[orig_ub_j:.*]] = arith.constant 56 %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %c3 = arith.constant 3 : index %c42 = arith.constant 42 : index %c56 = arith.constant 56 : index // The range of the new scf. - // CHECK: %[[partial_range:.*]] = arith.muli %[[orig_ub_i]], %[[orig_ub_j]] - // CHECK-NEXT:%[[range:.*]] = arith.muli %[[partial_range]], %[[orig_ub_k]] + // CHECK-DAG:%[[range:.*]] = arith.constant 7056 // Updated loop bounds. // CHECK: scf.for %[[i:.*]] = %[[orig_lb]] to %[[range]] step %[[orig_step]] iter_args(%[[VAL_1:.*]] = %[[orig_lb]], %[[VAL_2:.*]] = %[[orig_lb]]) -> (index, index) { @@ -176,13 +168,11 @@ func.func @noramalized_loops_with_shuffled_yielded_iter_args() { // CHECK-NOT: scf.for // Reconstruct original IVs from the linearized one. - // CHECK: %[[orig_k:.*]] = arith.remsi %[[i]], %[[orig_ub_k]] - // CHECK: %[[div:.*]] = arith.divsi %[[i]], %[[orig_ub_k]] - // CHECK: %[[orig_j:.*]] = arith.remsi %[[div]], %[[orig_ub_j]] - // CHECK: %[[orig_i:.*]] = arith.divsi %[[div]], %[[orig_ub_j]] + // CHECK: %[[delinearize:.+]]:3 = affine.delinearize_index %[[i]] + // CHECK-SAME: into (%[[orig_ub_i]], %[[orig_ub_j]], %[[orig_ub_k]]) %1:2 = scf.for %j = %c0 to %c56 step %c1 iter_args(%arg2 = %arg0, %arg3 = %arg1) -> (index, index){ %0:2 = scf.for %k = %c0 to %c3 step %c1 iter_args(%arg4 = %arg2, %arg5 = %arg3) -> (index, index) { - // CHECK: "use"(%[[orig_i]], %[[orig_j]], %[[orig_k]]) + // CHECK: "use"(%[[delinearize]]#0, %[[delinearize]]#1, %[[delinearize]]#2) "use"(%i, %j, %k) : (index, index, index) -> () // CHECK: scf.yield %[[VAL_2]], %[[VAL_1]] : index, index scf.yield %arg5, %arg4 : index, index @@ -194,20 +184,21 @@ func.func @noramalized_loops_with_shuffled_yielded_iter_args() { return } +// ----- + func.func @noramalized_loops_with_yielded_non_iter_args() { - // CHECK: %[[orig_lb:.*]] = arith.constant 0 - // CHECK: %[[orig_step:.*]] = arith.constant 1 - // CHECK: %[[orig_ub_k:.*]] = arith.constant 3 - // CHECK: %[[orig_ub_i:.*]] = arith.constant 42 - // CHECK: %[[orig_ub_j:.*]] = arith.constant 56 + // CHECK-DAG: %[[orig_lb:.*]] = arith.constant 0 + // CHECK-DAG: %[[orig_step:.*]] = arith.constant 1 + // CHECK-DAG: %[[orig_ub_k:.*]] = arith.constant 3 + // CHECK-DAG: %[[orig_ub_i:.*]] = arith.constant 42 + // CHECK-DAG: %[[orig_ub_j:.*]] = arith.constant 56 %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %c3 = arith.constant 3 : index %c42 = arith.constant 42 : index %c56 = arith.constant 56 : index // The range of the new scf. - // CHECK: %[[partial_range:.*]] = arith.muli %[[orig_ub_i]], %[[orig_ub_j]] - // CHECK-NEXT:%[[range:.*]] = arith.muli %[[partial_range]], %[[orig_ub_k]] + // CHECK-DAG: %[[range:.*]] = arith.constant 7056 // Updated loop bounds. // CHECK: scf.for %[[i:.*]] = %[[orig_lb]] to %[[range]] step %[[orig_step]] iter_args(%[[VAL_1:.*]] = %[[orig_lb]]) -> (index) { @@ -216,13 +207,11 @@ func.func @noramalized_loops_with_yielded_non_iter_args() { // CHECK-NOT: scf.for // Reconstruct original IVs from the linearized one. - // CHECK: %[[orig_k:.*]] = arith.remsi %[[i]], %[[orig_ub_k]] - // CHECK: %[[div:.*]] = arith.divsi %[[i]], %[[orig_ub_k]] - // CHECK: %[[orig_j:.*]] = arith.remsi %[[div]], %[[orig_ub_j]] - // CHECK: %[[orig_i:.*]] = arith.divsi %[[div]], %[[orig_ub_j]] + // CHECK: %[[delinearize:.+]]:3 = affine.delinearize_index %[[i]] + // CHECK-SAME: into (%[[orig_ub_i]], %[[orig_ub_j]], %[[orig_ub_k]]) %1:1 = scf.for %j = %c0 to %c56 step %c1 iter_args(%arg1 = %arg0) -> (index){ %0:1 = scf.for %k = %c0 to %c3 step %c1 iter_args(%arg2 = %arg1) -> (index) { - // CHECK: %[[res:.*]] = "use"(%[[orig_i]], %[[orig_j]], %[[orig_k]]) + // CHECK: %[[res:.*]] = "use"(%[[delinearize]]#0, %[[delinearize]]#1, %[[delinearize]]#2) %res = "use"(%i, %j, %k) : (index, index, index) -> (index) // CHECK: scf.yield %[[res]] : index scf.yield %res : index @@ -234,6 +223,8 @@ func.func @noramalized_loops_with_yielded_non_iter_args() { return } +// ----- + // Check with parametric loop bounds and steps, capture the bounds here. // CHECK-LABEL: @parametric // CHECK-SAME: %[[orig_lb1:[A-Za-z0-9]+]]: @@ -246,25 +237,28 @@ func.func @parametric(%lb1 : index, %ub1 : index, %step1 : index, %lb2 : index, %ub2 : index, %step2 : index) { // Compute the number of iterations for each of the loops and the total // number of iterations. - // CHECK: %[[range1:.*]] = arith.subi %[[orig_ub1]], %[[orig_lb1]] - // CHECK: %[[numiter1:.*]] = arith.ceildivsi %[[range1]], %[[orig_step1]] - // CHECK: %[[range2:.*]] = arith.subi %[[orig_ub2]], %[[orig_lb2]] - // CHECK: %[[numiter2:.*]] = arith.ceildivsi %[[range2]], %[[orig_step2]] - // CHECK: %[[range:.*]] = arith.muli %[[numiter1]], %[[numiter2]] : index + // CHECK: %[[normalized_i:.*]] = affine.apply + // CHECK-SAME: affine_map<()[s0, s1, s2] -> ((-s0 + s1) ceildiv s2)>()[%[[orig_lb1]], %[[orig_ub1]], %[[orig_step1]]] + // CHECK: %[[c0:.+]] = arith.constant 0 + // CHECK: %[[c1:.+]] = arith.constant 1 + // CHECK: %[[normalized_j:.*]] = affine.apply + // CHECK-SAME: affine_map<()[s0, s1, s2] -> ((-s0 + s1) ceildiv s2)>()[%[[orig_lb2]], %[[orig_ub2]], %[[orig_step2]]] + // CHECK: %[[range:.+]] = affine.apply + // CHECK-SAME: affine_map<()[s0, s1, s2, s3, s4, s5] -> (((-s0 + s1) ceildiv s2) * ((-s3 + s4) ceildiv s5))>() + // CHECK-SAME: [%[[orig_lb1]], %[[orig_ub1]], %[[orig_step1]], %[[orig_lb2]], %[[orig_ub2]], %[[orig_step2]]] // Check that the outer loop is updated. - // CHECK: scf.for %[[i:.*]] = %c0{{.*}} to %[[range]] step %c1 + // CHECK: scf.for %[[i:.*]] = %[[c0]] to %[[range]] step %[[c1]] scf.for %i = %lb1 to %ub1 step %step1 { // Check that the inner loop is removed. // CHECK-NOT: scf.for scf.for %j = %lb2 to %ub2 step %step2 { // Remapping of the induction variables. - // CHECK: %[[normalized_j:.*]] = arith.remsi %[[i]], %[[numiter2]] : index - // CHECK: %[[normalized_i:.*]] = arith.divsi %[[i]], %[[numiter2]] : index - // CHECK: %[[scaled_j:.*]] = arith.muli %[[normalized_j]], %[[orig_step2]] - // CHECK: %[[orig_j:.*]] = arith.addi %[[scaled_j]], %[[orig_lb2]] - // CHECK: %[[scaled_i:.*]] = arith.muli %[[normalized_i]], %[[orig_step1]] - // CHECK: %[[orig_i:.*]] = arith.addi %[[scaled_i]], %[[orig_lb1]] + // CHECK: %[[delinearize:.+]]:2 = affine.delinearize_index %[[i]] into (%[[normalized_i]], %[[normalized_j]]) + // CHECK: %[[orig_j:.*]] = affine.apply affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> + // CHECK-SAME: (%[[delinearize]]#1)[%[[orig_lb2]], %[[orig_step2]]] + // CHECK: %[[orig_i:.*]] = affine.apply affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> + // CHECK-SAME: (%[[delinearize]]#0)[%[[orig_lb1]], %[[orig_step1]]] // CHECK: "foo"(%[[orig_i]], %[[orig_j]]) "foo"(%i, %j) : (index, index) -> () @@ -273,19 +267,21 @@ func.func @parametric(%lb1 : index, %ub1 : index, %step1 : index, return } +// ----- + // CHECK-LABEL: @two_bands func.func @two_bands() { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %c10 = arith.constant 10 : index - // CHECK: %[[outer_range:.*]] = arith.muli + // CHECK: %[[outer_range:.*]] = arith.constant 100 // CHECK: scf.for %{{.*}} = %{{.*}} to %[[outer_range]] scf.for %i = %c0 to %c10 step %c1 { // Check that the "j" loop was removed and that the inner loops were // coalesced as well. The preparation step for coalescing will inject the // subtraction operation unlike the IV remapping. // CHECK-NOT: scf.for - // CHECK: arith.subi + // CHECK: affine.delinearize_index scf.for %j = %c0 to %c10 step %c1 { // The inner pair of loops is coalesced separately. // CHECK: scf.for @@ -303,12 +299,6 @@ func.func @two_bands() { // ----- // Check coalescing of affine.for loops when all the loops have constant upper bound. -// CHECK-DAG: #[[SIXTEEN:.*]] = affine_map<() -> (16)> -// CHECK-DAG: #[[SIXTY_FOUR:.*]] = affine_map<() -> (64)> -// CHECK-DAG: #[[PRODUCT:.*]] = affine_map<(d0)[s0] -> (d0 * s0)> -// CHECK-DAG: #[[EIGHT:.*]] = affine_map<() -> (8)> -// CHECK-DAG: #[[MOD:.*]] = affine_map<(d0)[s0] -> (d0 mod s0)> -// CHECK-DAG: #[[DIV:.*]] = affine_map<(d0)[s0] -> (d0 floordiv s0)> func.func @coalesce_affine_for() { affine.for %i = 0 to 16 { affine.for %j = 0 to 64 { @@ -319,16 +309,16 @@ func.func @coalesce_affine_for() { } return } -// CHECK-DAG: %[[T0:.*]] = affine.apply #[[SIXTEEN]]() -// CHECK-DAG: %[[T1:.*]] = affine.apply #[[SIXTY_FOUR]]() -// CHECK-DAG: %[[T2:.*]] = affine.apply #[[PRODUCT]](%[[T0]])[%[[T1]]] -// CHECK-DAG: %[[T3:.*]] = affine.apply #[[EIGHT]]() -// CHECK-DAG: %[[T4:.*]] = affine.apply #[[PRODUCT]](%[[T2]])[%[[T3]]] +// CHECK-DAG: %[[T0:.*]] = affine.apply affine_map<() -> (16)>() +// CHECK-DAG: %[[T1:.*]] = affine.apply affine_map<() -> (64)>() +// CHECK-DAG: %[[T2:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T0]])[%[[T1]]] +// CHECK-DAG: %[[T3:.*]] = affine.apply affine_map<() -> (8)>() +// CHECK-DAG: %[[T4:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T2]])[%[[T3]]] // CHECK: affine.for %[[IV:.*]] = 0 to %[[T4]] -// CHECK-DAG: %[[K:.*]] = affine.apply #[[MOD]](%[[IV]])[%[[T3]]] -// CHECK-DAG: %[[T6:.*]] = affine.apply #[[DIV]](%[[IV]])[%[[T3]]] -// CHECK-DAG: %[[J:.*]] = affine.apply #[[MOD]](%[[T6]])[%[[T1]]] -// CHECK-DAG: %[[I:.*]] = affine.apply #[[DIV]](%[[T6]])[%[[T1]]] +// CHECK-DAG: %[[K:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[IV]])[%[[T3]]] +// CHECK-DAG: %[[T6:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[IV]])[%[[T3]]] +// CHECK-DAG: %[[J:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[T6]])[%[[T1]]] +// CHECK-DAG: %[[I:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[T6]])[%[[T1]]] // CHECK-NEXT: "test.foo"(%[[I]], %[[J]], %[[K]]) // CHECK-NEXT: } // CHECK-NEXT: return @@ -336,10 +326,6 @@ func.func @coalesce_affine_for() { // ----- // Check coalescing of affine.for loops when all the loops have non constant upper bounds. -// CHECK-DAG: #[[IDENTITY:.*]] = affine_map<()[s0] -> (s0)> -// CHECK-DAG: #[[PRODUCT:.*]] = affine_map<(d0)[s0] -> (d0 * s0)> -// CHECK-DAG: #[[MOD:.*]] = affine_map<(d0)[s0] -> (d0 mod s0)> -// CHECK-DAG: #[[FLOOR:.*]] = affine_map<(d0)[s0] -> (d0 floordiv s0)> func.func @coalesce_affine_for(%arg0: memref) { %c0 = arith.constant 0 : index %M = memref.dim %arg0, %c0 : memref @@ -355,14 +341,14 @@ func.func @coalesce_affine_for(%arg0: memref) { return } // CHECK: %[[DIM:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref -// CHECK-DAG: %[[T0:.*]] = affine.apply #[[IDENTITY]]()[%[[DIM]]] -// CHECK-DAG: %[[T1:.*]] = affine.apply #[[PRODUCT]](%[[T0]])[%[[T0]]] -// CHECK-DAG: %[[T2:.*]] = affine.apply #[[PRODUCT]](%[[T1]])[%[[T0]]] +// CHECK-DAG: %[[T0:.*]] = affine.apply affine_map<()[s0] -> (s0)>()[%[[DIM]]] +// CHECK-DAG: %[[T1:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T0]])[%[[T0]]] +// CHECK-DAG: %[[T2:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T1]])[%[[T0]]] // CHECK: affine.for %[[IV:.*]] = 0 to %[[T2]] -// CHECK-DAG: %[[K:.*]] = affine.apply #[[MOD]](%[[IV]])[%[[T0]]] -// CHECK-DAG: %[[T9:.*]] = affine.apply #[[FLOOR]](%[[IV]])[%[[T0]]] -// CHECK-DAG: %[[J:.*]] = affine.apply #[[MOD]](%[[T9]])[%[[T0]]] -// CHECK-DAG: %[[I:.*]] = affine.apply #[[FLOOR]](%[[T9]])[%[[T0]]] +// CHECK-DAG: %[[K:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[IV]])[%[[T0]]] +// CHECK-DAG: %[[T9:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[IV]])[%[[T0]]] +// CHECK-DAG: %[[J:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[T9]])[%[[T0]]] +// CHECK-DAG: %[[I:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[T9]])[%[[T0]]] // CHECK-NEXT: "test.foo"(%[[I]], %[[J]], %[[K]]) // CHECK-NEXT: } // CHECK-NEXT: return @@ -370,11 +356,6 @@ func.func @coalesce_affine_for(%arg0: memref) { // ----- // Check coalescing of affine.for loops when some of the loop has constant upper bounds while others have nin constant upper bounds. -// CHECK-DAG: #[[IDENTITY:.*]] = affine_map<()[s0] -> (s0)> -// CHECK-DAG: #[[PRODUCT:.*]] = affine_map<(d0)[s0] -> (d0 * s0)> -// CHECK-DAG: #[[SIXTY_FOUR:.*]] = affine_map<() -> (64)> -// CHECK-DAG: #[[MOD:.*]] = affine_map<(d0)[s0] -> (d0 mod s0)> -// CHECK-DAG: #[[DIV:.*]] = affine_map<(d0)[s0] -> (d0 floordiv s0)> func.func @coalesce_affine_for(%arg0: memref) { %c0 = arith.constant 0 : index %M = memref.dim %arg0, %c0 : memref @@ -389,15 +370,15 @@ func.func @coalesce_affine_for(%arg0: memref) { return } // CHECK: %[[DIM:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref -// CHECK-DAG: %[[T0:.*]] = affine.apply #[[IDENTITY]]()[%[[DIM]]] -// CHECK-DAG: %[[T1:.*]] = affine.apply #[[PRODUCT]](%[[T0]])[%[[T0]]] -// CHECK-DAG: %[[T2:.*]] = affine.apply #[[SIXTY_FOUR]]() -// CHECK-DAG: %[[T3:.*]] = affine.apply #[[PRODUCT]](%[[T1]])[%[[T2]]] +// CHECK-DAG: %[[T0:.*]] = affine.apply affine_map<()[s0] -> (s0)>()[%[[DIM]]] +// CHECK-DAG: %[[T1:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T0]])[%[[T0]]] +// CHECK-DAG: %[[T2:.*]] = affine.apply affine_map<() -> (64)>() +// CHECK-DAG: %[[T3:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T1]])[%[[T2]]] // CHECK: affine.for %[[IV:.*]] = 0 to %[[T3]] -// CHECK-DAG: %[[K:.*]] = affine.apply #[[MOD]](%[[IV]])[%[[T2]]] -// CHECK-DAG: %[[T5:.*]] = affine.apply #[[DIV]](%[[IV]])[%[[T2]]] -// CHECK-DAG: %[[J:.*]] = affine.apply #[[MOD]](%[[T5]])[%[[T0]]] -// CHECK-DAG: %[[I:.*]] = affine.apply #[[DIV]](%[[T5]])[%[[T0]]] +// CHECK-DAG: %[[K:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[IV]])[%[[T2]]] +// CHECK-DAG: %[[T5:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[IV]])[%[[T2]]] +// CHECK-DAG: %[[J:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[T5]])[%[[T0]]] +// CHECK-DAG: %[[I:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[T5]])[%[[T0]]] // CHECK-NEXT: "test.foo"(%[[I]], %[[J]], %[[K]]) // CHECK-NEXT: } // CHECK-NEXT: return @@ -405,11 +386,6 @@ func.func @coalesce_affine_for(%arg0: memref) { // ----- // Check coalescing of affine.for loops when upper bound contains multi result upper bound map. -// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0, -s0)> -// CHECK-DAG: #[[IDENTITY:.*]] = affine_map<()[s0] -> (s0)> -// CHECK-DAG: #[[PRODUCT:.*]] = affine_map<(d0)[s0] -> (d0 * s0)> -// CHECK-DAG: #[[MOD:.*]] = affine_map<(d0)[s0] -> (d0 mod s0)> -// CHECK-DAG: #[[DIV:.*]] = affine_map<(d0)[s0] -> (d0 floordiv s0)> #myMap = affine_map<()[s1] -> (s1, -s1)> func.func @coalesce_affine_for(%arg0: memref) { %c0 = arith.constant 0 : index @@ -426,23 +402,21 @@ func.func @coalesce_affine_for(%arg0: memref) { return } // CHECK: %[[DIM:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref -// CHECK-DAG: %[[T0:.*]] = affine.min #[[MAP0]]()[%[[DIM]]] -// CHECK-DAG: %[[T1:.*]] = affine.apply #[[IDENTITY]]()[%[[DIM]]] -// CHECK-DAG: %[[T2:.*]] = affine.apply #[[PRODUCT]](%[[T0]])[%[[T1]]] -// CHECK-DAG: %[[T3:.*]] = affine.apply #[[PRODUCT]](%[[T2]])[%[[T1]]] +// CHECK-DAG: %[[T0:.*]] = affine.min affine_map<()[s0] -> (s0, -s0)>()[%[[DIM]]] +// CHECK-DAG: %[[T1:.*]] = affine.apply affine_map<()[s0] -> (s0)>()[%[[DIM]]] +// CHECK-DAG: %[[T2:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T0]])[%[[T1]]] +// CHECK-DAG: %[[T3:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T2]])[%[[T1]]] // CHECK: affine.for %[[IV:.*]] = 0 to %[[T3]] -// CHECK-DAG: %[[K:.*]] = affine.apply #[[MOD]](%[[IV]])[%[[T1]]] -// CHECK-DAG: %[[T5:.*]] = affine.apply #[[DIV]](%[[IV]])[%[[T1]]] -// CHECK-DAG: %[[J:.*]] = affine.apply #[[MOD]](%[[T5]])[%[[T1]]] -// CHECK-DAG: %[[I:.*]] = affine.apply #[[DIV]](%[[T5]])[%[[T1]]] +// CHECK-DAG: %[[K:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[IV]])[%[[T1]]] +// CHECK-DAG: %[[T5:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[IV]])[%[[T1]]] +// CHECK-DAG: %[[J:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[T5]])[%[[T1]]] +// CHECK-DAG: %[[I:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[T5]])[%[[T1]]] // CHECK-NEXT: "test.foo"(%[[I]], %[[J]], %[[K]]) // CHECK-NEXT: } // CHECK-NEXT: return // ----- -// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0) -> (d0 * 110)> -// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0) -> (696, d0 * 110 + 110)> #map0 = affine_map<(d0) -> (d0 * 110)> #map1 = affine_map<(d0) -> (696, d0 * 110 + 110)> func.func @test_loops_do_not_get_coalesced() { @@ -454,7 +428,7 @@ func.func @test_loops_do_not_get_coalesced() { return } // CHECK: affine.for %[[IV0:.*]] = 0 to 7 -// CHECK-NEXT: affine.for %[[IV1:.*]] = #[[MAP0]](%[[IV0]]) to min #[[MAP1]](%[[IV0]]) +// CHECK-NEXT: affine.for %[[IV1:.*]] = affine_map<(d0) -> (d0 * 110)>(%[[IV0]]) to min affine_map<(d0) -> (696, d0 * 110 + 110)>(%[[IV0]]) // CHECK-NEXT: "use"(%[[IV0]], %[[IV1]]) // CHECK-NEXT: } // CHECK-NEXT: } diff --git a/mlir/test/Dialect/SCF/transform-op-coalesce.mlir b/mlir/test/Dialect/SCF/transform-op-coalesce.mlir index 6fcd727621bae..03ddee1c7a98a 100644 --- a/mlir/test/Dialect/SCF/transform-op-coalesce.mlir +++ b/mlir/test/Dialect/SCF/transform-op-coalesce.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -transform-interpreter -split-input-file -verify-diagnostics -allow-unregistered-dialect --cse | FileCheck %s +// RUN: mlir-opt %s -transform-interpreter -split-input-file -verify-diagnostics -allow-unregistered-dialect --cse --mlir-print-local-scope | FileCheck %s func.func @coalesce_inner() { %c0 = arith.constant 0 : index @@ -33,19 +33,15 @@ module attributes {transform.with_named_sequence} { // ----- -// CHECK-DAG: #[[MAP:.+]] = affine_map<() -> (64)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (d0 * s0)> -// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0)[s0] -> (d0 mod s0)> -// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0)[s0] -> (d0 floordiv s0)> func.func @coalesce_outer(%arg1: memref<64x64xf32, 1>, %arg2: memref<64x64xf32, 1>, %arg3: memref<64x64xf32, 1>) attributes {} { - // CHECK: %[[T0:.+]] = affine.apply #[[MAP]]() - // CHECK: %[[UB:.+]] = affine.apply #[[MAP1]](%[[T0]])[%[[T0]]] + // CHECK: %[[T0:.+]] = affine.apply affine_map<() -> (64)>() + // CHECK: %[[UB:.+]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T0]])[%[[T0]]] // CHECK: affine.for %[[IV1:.+]] = 0 to %[[UB:.+]] { // CHECK-NOT: affine.for %[[IV2:.+]] affine.for %arg4 = 0 to 64 { affine.for %arg5 = 0 to 64 { - // CHECK: %[[IDX0:.+]] = affine.apply #[[MAP2]](%[[IV1]])[%{{.+}}] - // CHECK: %[[IDX1:.+]] = affine.apply #[[MAP3]](%[[IV1]])[%{{.+}}] + // CHECK: %[[IDX0:.+]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[IV1]])[%{{.+}}] + // CHECK: %[[IDX1:.+]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[IV1]])[%{{.+}}] // CHECK-NEXT: %{{.+}} = affine.load %{{.+}}[%[[IDX1]], %[[IDX0]]] : memref<64x64xf32, 1> %0 = affine.load %arg1[%arg4, %arg5] : memref<64x64xf32, 1> %1 = affine.load %arg2[%arg4, %arg5] : memref<64x64xf32, 1> @@ -76,9 +72,8 @@ func.func @coalesce_and_unroll(%arg1: memref<64x64xf32, 1>, %arg2: memref<64x64x scf.for %arg4 = %c0 to %c64 step %c1 { // CHECK-NOT: scf.for scf.for %arg5 = %c0 to %c64 step %c1 { - // CHECK: %[[IDX0:.+]] = arith.remsi %[[IV1]] - // CHECK: %[[IDX1:.+]] = arith.divsi %[[IV1]] - // CHECK-NEXT: %{{.+}} = memref.load %{{.+}}[%[[IDX1]], %[[IDX0]]] : memref<64x64xf32, 1> + // CHECK: %[[IDX:.+]]:2 = affine.delinearize_index + // CHECK-NEXT: %{{.+}} = memref.load %{{.+}}[%[[IDX]]#0, %[[IDX]]#1] : memref<64x64xf32, 1> %0 = memref.load %arg1[%arg4, %arg5] : memref<64x64xf32, 1> %1 = memref.load %arg2[%arg4, %arg5] : memref<64x64xf32, 1> %2 = arith.addf %0, %1 : f32 @@ -138,27 +133,22 @@ module attributes {transform.with_named_sequence} { // CHECK-SAME: %[[LB2:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[UB2:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[STEP2:[a-zA-Z0-9_]+]]: index -// CHECK: %[[NEWUB0_DIFF:.+]] = arith.subi %[[UB0]], %[[LB0]] -// CHECK-DAG: %[[NEWUB0:.+]] = arith.ceildivsi %[[NEWUB0_DIFF]], %[[STEP0]] -// CHECK-DAG: %[[C0:.+]] = arith.constant 0 -// CHECK-DAG: %[[C1:.+]] = arith.constant 1 -// CHECK: %[[NEWUB1_DIFF:.+]] = arith.subi %[[UB1]], %[[LB1]] -// CHECK-DAG: %[[NEWUB1:.+]] = arith.ceildivsi %[[NEWUB1_DIFF]], %[[STEP1]] -// CHECK: %[[NEWUB2_DIFF:.+]] = arith.subi %[[UB2]], %[[LB2]] -// CHECK-DAG: %[[NEWUB2:.+]] = arith.ceildivsi %[[NEWUB2_DIFF]], %[[STEP2]] -// CHECK: %[[PROD1:.+]] = arith.muli %[[NEWUB0]], %[[NEWUB1]] -// CHECK: %[[NEWUB:.+]] = arith.muli %[[PROD1]], %[[NEWUB2]] +// CHECK: %[[NITERS0:.+]] = affine.apply +// CHECK-SAME: affine_map<()[s0, s1, s2] -> ((-s0 + s1) ceildiv s2)>()[%[[LB0]], %[[UB0]], %[[STEP0]]] +// CHECK: %[[C0:.+]] = arith.constant 0 : index +// CHECK: %[[C1:.+]] = arith.constant 1 : index +// CHECK: %[[NITERS1:.+]] = affine.apply +// CHECK-SAME: affine_map<()[s0, s1, s2] -> ((-s0 + s1) ceildiv s2)>()[%[[LB1]], %[[UB1]], %[[STEP1]]] +// CHECK: %[[NITERS2:.+]] = affine.apply +// CHECK-SAME: affine_map<()[s0, s1, s2] -> ((-s0 + s1) ceildiv s2)>()[%[[LB2]], %[[UB2]], %[[STEP2]]] +// CHECK: %[[NEWUB:.+]] = affine.apply affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8] -> +// CHECK-SAME: ((((-s0 + s1) ceildiv s2) * ((-s3 + s4) ceildiv s5)) * ((-s6 + s7) ceildiv s8)) +// CHECK-SAME: [%[[LB0]], %[[UB0]], %[[STEP0]], %[[LB1]], %[[UB1]], %[[STEP1]], %[[LB2]], %[[UB2]], %[[STEP2]]] // CHECK: %[[RESULT:.+]] = scf.for %[[IV:[a-zA-Z0-9]+]] = %[[C0]] to %[[NEWUB]] step %[[C1]] iter_args(%[[ITER_ARG:.+]] = %[[ARG0]]) -// CHECK: %[[IV2:.+]] = arith.remsi %[[IV]], %[[NEWUB2]] -// CHECK: %[[PREVIOUS:.+]] = arith.divsi %[[IV]], %[[NEWUB2]] -// CHECK: %[[IV1:.+]] = arith.remsi %[[PREVIOUS]], %[[NEWUB1]] -// CHECK: %[[IV0:.+]] = arith.divsi %[[PREVIOUS]], %[[NEWUB1]] -// CHECK: %[[K_STEP:.+]] = arith.muli %[[IV2]], %[[STEP2]] -// CHECK: %[[K:.+]] = arith.addi %[[K_STEP]], %[[LB2]] -// CHECK: %[[J_STEP:.+]] = arith.muli %[[IV1]], %[[STEP1]] -// CHECK: %[[J:.+]] = arith.addi %[[J_STEP]], %[[LB1]] -// CHECK: %[[I_STEP:.+]] = arith.muli %[[IV0]], %[[STEP0]] -// CHECK: %[[I:.+]] = arith.addi %[[I_STEP]], %[[LB0]] +// CHECK: %[[DELINEARIZE:.+]]:3 = affine.delinearize_index %[[IV]] into (%[[NITERS0]], %[[NITERS1]], %[[NITERS2]]) +// CHECK-DAG: %[[K:.+]] = affine.apply affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>(%[[DELINEARIZE]]#2)[%[[LB2]], %[[STEP2]]] +// CHECK-DAG: %[[J:.+]] = affine.apply affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>(%[[DELINEARIZE]]#1)[%[[LB1]], %[[STEP1]]] +// CHECK-DAG: %[[I:.+]] = affine.apply affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>(%[[DELINEARIZE]]#0)[%[[LB0]], %[[STEP0]]] // CHECK: %[[USE:.+]] = "use"(%[[ITER_ARG]], %[[I]], %[[J]], %[[K]]) // CHECK: scf.yield %[[USE]] // CHECK: return %[[RESULT]] @@ -201,8 +191,7 @@ module attributes {transform.with_named_sequence} { // CHECK-SAME: %[[UB2:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[STEP2:[a-zA-Z0-9_]+]]: index // CHECK: scf.for -// CHECK: arith.remsi -// CHECK: arith.divsi +// CHECK: affine.delinearize_index // CHECK: scf.for %{{[a-zA-Z0-9]+}} = %[[LB2]] to %[[UB2]] step %[[STEP2]] // CHECK-NOT: scf.for // CHECK: transform.named_sequence @@ -245,8 +234,7 @@ module attributes {transform.with_named_sequence} { // CHECK-SAME: %[[UB2:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[STEP2:[a-zA-Z0-9_]+]]: index // CHECK: scf.for -// CHECK: arith.remsi -// CHECK: arith.divsi +// CHECK: affine.delinearize_index // CHECK: scf.for %{{[a-zA-Z0-9]+}} = %[[LB2]] to %[[UB2]] step %[[STEP2]] // CHECK-NOT: scf.for // CHECK: transform.named_sequence @@ -289,13 +277,9 @@ module attributes {transform.with_named_sequence} { // CHECK-SAME: %[[UB2:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[STEP2:[a-zA-Z0-9_]+]]: index // CHECK: scf.for %{{[a-zA-Z0-9]+}} = %[[LB0]] to %[[UB0]] step %[[STEP0]] -// CHECK: arith.subi -// CHECK: arith.ceildivsi -// CHECK: arith.subi -// CHECK: arith.ceildivsi +// CHECK-NOT: affine.delinearize_index // CHECK: scf.for -// CHECK: arith.remsi -// CHECK: arith.divsi +// CHECK: affine.delinearize_index // CHECK-NOT: scf.for // CHECK: transform.named_sequence @@ -329,6 +313,9 @@ module attributes {transform.with_named_sequence} { %0 = transform.structured.match ops{["scf.for"]} attributes {coalesce} in %arg1 : (!transform.any_op) -> !transform.any_op %1 = transform.cast %0 : !transform.any_op to !transform.op<"scf.for"> %2 = transform.loop.coalesce %1 : (!transform.op<"scf.for">) -> (!transform.op<"scf.for">) + transform.apply_patterns to %2 { + transform.apply_patterns.canonicalization + } : !transform.op<"scf.for"> transform.yield } } @@ -337,11 +324,10 @@ module attributes {transform.with_named_sequence} { // CHECK-SAME: %[[ARG2:.+]]: index) // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index -// CHECK: %[[UB:.+]] = arith.muli %[[ARG1]], %[[ARG2]] +// CHECK: %[[UB:.+]] = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%[[ARG1]], %[[ARG2]]] // CHECK: scf.for %[[IV:.+]] = %[[C0]] to %[[UB]] step %[[C1]] -// CHECK: %[[IV1:.+]] = arith.remsi %[[IV]], %[[ARG2]] -// CHECK: %[[IV2:.+]] = arith.divsi %[[IV]], %[[ARG2]] -// CHECK: "some_use"(%{{[a-zA-Z0-9]+}}, %[[C0]], %[[C0]], %[[IV2]], %[[C0]], %[[IV1]]) +// CHECK: %[[DELINEARIZE:.+]]:2 = affine.delinearize_index %[[IV]] into (%[[ARG1]], %[[ARG2]]) +// CHECK: "some_use"(%{{[a-zA-Z0-9]+}}, %[[C0]], %[[C0]], %[[DELINEARIZE]]#0, %[[C0]], %[[DELINEARIZE]]#1) // ----- @@ -367,6 +353,9 @@ module attributes {transform.with_named_sequence} { %0 = transform.structured.match ops{["scf.for"]} attributes {coalesce} in %arg1 : (!transform.any_op) -> !transform.any_op %1 = transform.cast %0 : !transform.any_op to !transform.op<"scf.for"> %2 = transform.loop.coalesce %1 : (!transform.op<"scf.for">) -> (!transform.op<"scf.for">) + transform.apply_patterns to %2 { + transform.apply_patterns.canonicalization + } : !transform.op<"scf.for"> transform.yield } } diff --git a/mlir/test/Transforms/parallel-loop-collapsing.mlir b/mlir/test/Transforms/parallel-loop-collapsing.mlir index d1c23d584f92b..dc4e042a3c4f5 100644 --- a/mlir/test/Transforms/parallel-loop-collapsing.mlir +++ b/mlir/test/Transforms/parallel-loop-collapsing.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(test-scf-parallel-loop-collapsing{collapsed-indices-0=0,3 collapsed-indices-1=1,4 collapsed-indices-2=2}, canonicalize))' | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(test-scf-parallel-loop-collapsing{collapsed-indices-0=0,3 collapsed-indices-1=1,4 collapsed-indices-2=2}, canonicalize))' --mlir-print-local-scope | FileCheck %s // CHECK: func @parallel_many_dims() { func.func @parallel_many_dims() { @@ -33,14 +33,11 @@ func.func @parallel_many_dims() { // CHECK-DAG: %[[C12:.*]] = arith.constant 12 : index // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[C9:.*]] = arith.constant 9 : index -// CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index // CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index // CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index // CHECK: scf.parallel (%[[NEW_I0:.*]]) = (%[[C0]]) to (%[[C4]]) step (%[[C1]]) { // CHECK: %[[V0:.*]] = arith.remsi %[[NEW_I0]], %[[C2]] : index // CHECK: %[[I0:.*]] = arith.divsi %[[NEW_I0]], %[[C2]] : index -// CHECK: %[[V2:.*]] = arith.muli %[[V0]], %[[C10]] -// CHECK: %[[I3:.*]] = arith.addi %[[V2]], %[[C9]] +// CHECK: %[[I3:.*]] = affine.apply affine_map<(d0) -> (d0 * 10 + 9)>(%[[V0]]) // CHECK: "magic.op"(%[[I0]], %[[C3]], %[[C6]], %[[I3]], %[[C12]]) : (index, index, index, index, index) -> index // CHECK: scf.reduce diff --git a/mlir/test/Transforms/single-parallel-loop-collapsing.mlir b/mlir/test/Transforms/single-parallel-loop-collapsing.mlir index 4eed61a65aa47..1ef787bec1bb3 100644 --- a/mlir/test/Transforms/single-parallel-loop-collapsing.mlir +++ b/mlir/test/Transforms/single-parallel-loop-collapsing.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(test-scf-parallel-loop-collapsing{collapsed-indices-0=0,1}, canonicalize))' | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect -pass-pipeline='builtin.module(func.func(test-scf-parallel-loop-collapsing{collapsed-indices-0=0,1}, canonicalize))' --mlir-print-local-scope %s | FileCheck %s func.func @collapse_to_single() { %c0 = arith.constant 3 : index @@ -14,20 +14,15 @@ func.func @collapse_to_single() { } // CHECK: func @collapse_to_single() { -// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index -// CHECK-DAG: %[[C7:.*]] = arith.constant 7 : index -// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index // CHECK-DAG: %[[C6:.*]] = arith.constant 6 : index +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index // CHECK-DAG: %[[C18:.*]] = arith.constant 18 : index // CHECK: scf.parallel (%[[NEW_I:.*]]) = (%[[C0]]) to (%[[C18]]) step (%[[C1]]) { // CHECK: %[[I0_COUNT:.*]] = arith.remsi %[[NEW_I]], %[[C6]] : index // CHECK: %[[I1_COUNT:.*]] = arith.divsi %[[NEW_I]], %[[C6]] : index -// CHECK: %[[V0:.*]] = arith.muli %[[I0_COUNT]], %[[C4]] -// CHECK: %[[I1:.*]] = arith.addi %[[V0]], %[[C7]] -// CHECK: %[[V1:.*]] = arith.muli %[[I1_COUNT]], %[[C3]] -// CHECK: %[[I0:.*]] = arith.addi %[[V1]], %[[C3]] +// CHECK: %[[I1:.*]] = affine.apply affine_map<(d0) -> (d0 * 4 + 7)>(%[[I0_COUNT]]) +// CHECK: %[[I0:.*]] = affine.apply affine_map<(d0) -> (d0 * 3 + 3)>(%[[I1_COUNT]]) // CHECK: "magic.op"(%[[I0]], %[[I1]]) : (index, index) -> index // CHECK: scf.reduce // CHECK-NEXT: }