diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
index 3c5fa23bd4a7f..244cee1dd635b 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
@@ -295,10 +295,9 @@ using TeamsClauseOps =
                     PrivateClauseOps, ReductionClauseOps, ThreadLimitClauseOps>;
 
 using WsloopClauseOps =
-    detail::Clauses<AllocateClauseOps, CollapseClauseOps, LinearClauseOps,
-                    LoopRelatedOps, NowaitClauseOps, OrderClauseOps,
-                    OrderedClauseOps, PrivateClauseOps, ReductionClauseOps,
-                    ScheduleClauseOps>;
+    detail::Clauses<AllocateClauseOps, LinearClauseOps, NowaitClauseOps,
+                    OrderClauseOps, OrderedClauseOps, PrivateClauseOps,
+                    ReductionClauseOps, ScheduleClauseOps>;
 
 } // namespace omp
 } // namespace mlir
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 10771f6e854dd..8ab116ce391e2 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -600,29 +600,30 @@ def LoopNestOp : OpenMP_Op<"loop_nest", [SameVariadicOperandSize,
 //===----------------------------------------------------------------------===//
 
 def WsloopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
-                         AllTypesMatch<["lowerBound", "upperBound", "step"]>,
                          DeclareOpInterfaceMethods<LoopWrapperInterface>,
-                         RecursiveMemoryEffects, ReductionClauseInterface]> {
+                         RecursiveMemoryEffects, ReductionClauseInterface,
+                         SingleBlockImplicitTerminator<"TerminatorOp">]> {
   let summary = "worksharing-loop construct";
   let description = [{
     The worksharing-loop construct specifies that the iterations of the loop(s)
     will be executed in parallel by threads in the current context. These
     iterations are spread across threads that already exist in the enclosing
-    parallel region. The lower and upper bounds specify a half-open range: the
-    range includes the lower bound but does not include the upper bound. If the
-    `inclusive` attribute is specified then the upper bound is also included.
+    parallel region.
 
-    The body region can contain any number of blocks. The region is terminated
-    by "omp.yield" instruction without operands.
+    The body region can only contain a single block which must contain a single
+    operation and a terminator. The operation must be another compatible loop
+    wrapper or an `omp.loop_nest`.
 
     ```
-    omp.wsloop <clauses>
-    for (%i1, %i2) : index = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) {
-      %a = load %arrA[%i1, %i2] : memref<?x?xf32>
-      %b = load %arrB[%i1, %i2] : memref<?x?xf32>
-      %sum = arith.addf %a, %b : f32
-      store %sum, %arrC[%i1, %i2] : memref<?x?xf32>
-      omp.yield
+    omp.wsloop <clauses> {
+      omp.loop_nest (%i1, %i2) : index = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) {
+        %a = load %arrA[%i1, %i2] : memref<?x?xf32>
+        %b = load %arrB[%i1, %i2] : memref<?x?xf32>
+        %sum = arith.addf %a, %b : f32
+        store %sum, %arrC[%i1, %i2] : memref<?x?xf32>
+        omp.yield
+      }
+      omp.terminator
     }
     ```
 
@@ -665,10 +666,7 @@ def WsloopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
     passed by reference.
   }];
 
-  let arguments = (ins Variadic<IntLikeType>:$lowerBound,
-             Variadic<IntLikeType>:$upperBound,
-             Variadic<IntLikeType>:$step,
-             Variadic<AnyType>:$linear_vars,
+  let arguments = (ins Variadic<AnyType>:$linear_vars,
              Variadic<I32>:$linear_step_vars,
              Variadic<OpenMP_PointerLikeType>:$reduction_vars,
              OptionalAttr<SymbolRefArrayAttr>:$reductions,
@@ -679,22 +677,16 @@ def WsloopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
              UnitAttr:$nowait,
              UnitAttr:$byref,
              ConfinedAttr<OptionalAttr<I64Attr>, [IntMinValue<0>]>:$ordered_val,
-             OptionalAttr<OrderKindAttr>:$order_val,
-             UnitAttr:$inclusive);
+             OptionalAttr<OrderKindAttr>:$order_val);
 
   let builders = [
-    OpBuilder<(ins "ValueRange":$lowerBound, "ValueRange":$upperBound,
-               "ValueRange":$step,
-               CArg<"ArrayRef<NamedAttribute>", "{}">:$attributes)>,
+    OpBuilder<(ins CArg<"ArrayRef<NamedAttribute>", "{}">:$attributes)>,
     OpBuilder<(ins CArg<"const WsloopClauseOps &">:$clauses)>
   ];
 
   let regions = (region AnyRegion:$region);
 
   let extraClassDeclaration = [{
-    /// Returns the number of loops in the worksharing-loop nest.
-    unsigned getNumLoops() { return getLowerBound().size(); }
-
     /// Returns the number of reduction variables.
     unsigned getNumReductionVars() { return getReductionVars().size(); }
   }];
@@ -711,9 +703,8 @@ def WsloopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
           |`byref` $byref
           |`ordered` `(` $ordered_val `)`
           |`order` `(` custom<ClauseAttr>($order_val) `)`
-    ) custom<Wsloop>($region, $lowerBound, $upperBound, $step, type($step),
-                     $reduction_vars, type($reduction_vars), $reductions,
-                     $inclusive) attr-dict
+    ) custom<Wsloop>($region, $reduction_vars, type($reduction_vars),
+                     $reductions) attr-dict
   }];
   let hasVerifier = 1;
 }
@@ -732,7 +723,7 @@ def SimdOp : OpenMP_Op<"simd", [AttrSizedOperandSegments,
     transformed into a SIMD loop (that is, multiple iterations of the loop can
     be executed concurrently using SIMD instructions).
 
-    The body region can contain a single block which must contain a single
+    The body region can only contain a single block which must contain a single
     operation and a terminator. The operation must be another compatible loop
     wrapper or an `omp.loop_nest`.
 
@@ -766,6 +757,7 @@ def SimdOp : OpenMP_Op<"simd", [AttrSizedOperandSegments,
         store %sum, %arrC[%i1, %i2] : memref<?x?xf32>
         omp.yield
       }
+      omp.terminator
     }
     ```
   }];
@@ -805,8 +797,8 @@ def SimdOp : OpenMP_Op<"simd", [AttrSizedOperandSegments,
 
 def YieldOp : OpenMP_Op<"yield",
     [Pure, ReturnLike, Terminator,
-     ParentOneOf<["LoopNestOp", "WsloopOp", "DeclareReductionOp",
-     "AtomicUpdateOp", "PrivateClauseOp"]>]> {
+     ParentOneOf<["AtomicUpdateOp", "DeclareReductionOp", "LoopNestOp",
+                  "PrivateClauseOp"]>]> {
   let summary = "loop yield and termination operation";
   let description = [{
     "omp.yield" yields SSA values from the OpenMP dialect op region and
@@ -846,7 +838,7 @@ def DistributeOp : OpenMP_Op<"distribute", [AttrSizedOperandSegments,
     iterations are spread across threads that already exist in the enclosing
     region.
     
-    The body region can contain a single block which must contain a single
+    The body region can only contain a single block which must contain a single
     operation and a terminator. The operation must be another compatible loop
     wrapper or an `omp.loop_nest`.
 
@@ -864,6 +856,7 @@ def DistributeOp : OpenMP_Op<"distribute", [AttrSizedOperandSegments,
         store %sum, %arrC[%i1, %i2] : memref<?x?xf32>
         omp.yield
       }
+      omp.terminator
     }
     ```
     // TODO: private_var, firstprivate_var, lastprivate_var, collapse
@@ -1029,7 +1022,7 @@ def TaskloopOp : OpenMP_Op<"taskloop", [AttrSizedOperandSegments,
     iterations are distributed across tasks generated by the construct and
     scheduled to be executed.
 
-    The body region can contain a single block which must contain a single
+    The body region can only contain a single block which must contain a single
     operation and a terminator. The operation must be another compatible loop
     wrapper or an `omp.loop_nest`.
 
@@ -1042,6 +1035,7 @@ def TaskloopOp : OpenMP_Op<"taskloop", [AttrSizedOperandSegments,
         store %sum, %arrC[%i1, %i2] : memref<?x?xf32>
         omp.yield
       }
+      omp.terminator
     }
     ```
 
diff --git a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp
index 7f91367ad427a..d6f85451ee5d3 100644
--- a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp
+++ b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp
@@ -461,18 +461,50 @@ struct ParallelOpLowering : public OpRewritePattern<scf::ParallelOp> {
       // Replace the loop.
       {
         OpBuilder::InsertionGuard allocaGuard(rewriter);
-        auto loop = rewriter.create<omp::WsloopOp>(
+        // Create worksharing loop wrapper.
+        auto wsloopOp = rewriter.create<omp::WsloopOp>(parallelOp.getLoc());
+        if (!reductionVariables.empty()) {
+          wsloopOp.setReductionsAttr(
+              ArrayAttr::get(rewriter.getContext(), reductionDeclSymbols));
+          wsloopOp.getReductionVarsMutable().append(reductionVariables);
+        }
+        rewriter.create<omp::TerminatorOp>(loc); // omp.parallel terminator.
+
+        // The wrapper's entry block arguments will define the reduction
+        // variables.
+        llvm::SmallVector<mlir::Type> reductionTypes;
+        reductionTypes.reserve(reductionVariables.size());
+        llvm::transform(reductionVariables, std::back_inserter(reductionTypes),
+                        [](mlir::Value v) { return v.getType(); });
+        rewriter.createBlock(
+            &wsloopOp.getRegion(), {}, reductionTypes,
+            llvm::SmallVector<mlir::Location>(reductionVariables.size(),
+                                              parallelOp.getLoc()));
+
+        rewriter.setInsertionPoint(
+            rewriter.create<omp::TerminatorOp>(parallelOp.getLoc()));
+
+        // Create loop nest and populate region with contents of scf.parallel.
+        auto loopOp = rewriter.create<omp::LoopNestOp>(
             parallelOp.getLoc(), parallelOp.getLowerBound(),
             parallelOp.getUpperBound(), parallelOp.getStep());
-        rewriter.create<omp::TerminatorOp>(loc);
 
-        rewriter.inlineRegionBefore(parallelOp.getRegion(), loop.getRegion(),
-                                    loop.getRegion().begin());
+        rewriter.inlineRegionBefore(parallelOp.getRegion(), loopOp.getRegion(),
+                                    loopOp.getRegion().begin());
 
-        Block *ops = rewriter.splitBlock(&*loop.getRegion().begin(),
-                                         loop.getRegion().begin()->begin());
+        // Remove reduction-related block arguments from omp.loop_nest and
+        // redirect uses to the corresponding omp.wsloop block argument.
+        mlir::Block &loopOpEntryBlock = loopOp.getRegion().front();
+        unsigned numLoops = parallelOp.getNumLoops();
+        rewriter.replaceAllUsesWith(
+            loopOpEntryBlock.getArguments().drop_front(numLoops),
+            wsloopOp.getRegion().getArguments());
+        loopOpEntryBlock.eraseArguments(
+            numLoops, loopOpEntryBlock.getNumArguments() - numLoops);
 
-        rewriter.setInsertionPointToStart(&*loop.getRegion().begin());
+        Block *ops =
+            rewriter.splitBlock(&loopOpEntryBlock, loopOpEntryBlock.begin());
+        rewriter.setInsertionPointToStart(&loopOpEntryBlock);
 
         auto scope = rewriter.create<memref::AllocaScopeOp>(parallelOp.getLoc(),
                                                             TypeRange());
@@ -481,11 +513,6 @@ struct ParallelOpLowering : public OpRewritePattern<scf::ParallelOp> {
         rewriter.mergeBlocks(ops, scopeBlock);
         rewriter.setInsertionPointToEnd(&*scope.getBodyRegion().begin());
         rewriter.create<memref::AllocaScopeReturnOp>(loc, ValueRange());
-        if (!reductionVariables.empty()) {
-          loop.setReductionsAttr(
-              ArrayAttr::get(rewriter.getContext(), reductionDeclSymbols));
-          loop.getReductionVarsMutable().append(reductionVariables);
-        }
       }
     }
 
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 528a0d05b1011..f60668dd0cf99 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -1484,86 +1484,72 @@ LogicalResult SingleOp::verify() {
 // WsloopOp
 //===----------------------------------------------------------------------===//
 
-/// loop-control ::= `(` ssa-id-list `)` `:` type `=`  loop-bounds
-/// loop-bounds := `(` ssa-id-list `)` to `(` ssa-id-list `)` inclusive? steps
-/// steps := `step` `(`ssa-id-list`)`
 ParseResult
 parseWsloop(OpAsmParser &parser, Region &region,
-            SmallVectorImpl<OpAsmParser::UnresolvedOperand> &lowerBound,
-            SmallVectorImpl<OpAsmParser::UnresolvedOperand> &upperBound,
-            SmallVectorImpl<OpAsmParser::UnresolvedOperand> &steps,
-            SmallVectorImpl<Type> &loopVarTypes,
             SmallVectorImpl<OpAsmParser::UnresolvedOperand> &reductionOperands,
-            SmallVectorImpl<Type> &reductionTypes, ArrayAttr &reductionSymbols,
-            UnitAttr &inclusive) {
-
+            SmallVectorImpl<Type> &reductionTypes,
+            ArrayAttr &reductionSymbols) {
   // Parse an optional reduction clause
   llvm::SmallVector<OpAsmParser::Argument> privates;
-  bool hasReduction = succeeded(parser.parseOptionalKeyword("reduction")) &&
-                      succeeded(parseClauseWithRegionArgs(
-                          parser, region, reductionOperands, reductionTypes,
-                          reductionSymbols, privates));
-
-  if (parser.parseKeyword("for"))
-    return failure();
-
-  // Parse an opening `(` followed by induction variables followed by `)`
-  SmallVector<OpAsmParser::Argument> ivs;
-  Type loopVarType;
-  if (parser.parseArgumentList(ivs, OpAsmParser::Delimiter::Paren) ||
-      parser.parseColonType(loopVarType) ||
-      // Parse loop bounds.
-      parser.parseEqual() ||
-      parser.parseOperandList(lowerBound, ivs.size(),
-                              OpAsmParser::Delimiter::Paren) ||
-      parser.parseKeyword("to") ||
-      parser.parseOperandList(upperBound, ivs.size(),
-                              OpAsmParser::Delimiter::Paren))
-    return failure();
-
-  if (succeeded(parser.parseOptionalKeyword("inclusive")))
-    inclusive = UnitAttr::get(parser.getBuilder().getContext());
-
-  // Parse step values.
-  if (parser.parseKeyword("step") ||
-      parser.parseOperandList(steps, ivs.size(), OpAsmParser::Delimiter::Paren))
-    return failure();
-
-  // Now parse the body.
-  loopVarTypes = SmallVector<Type>(ivs.size(), loopVarType);
-  for (auto &iv : ivs)
-    iv.type = loopVarType;
-
-  SmallVector<OpAsmParser::Argument> regionArgs{ivs};
-  if (hasReduction)
-    llvm::copy(privates, std::back_inserter(regionArgs));
-
-  return parser.parseRegion(region, regionArgs);
+  if (succeeded(parser.parseOptionalKeyword("reduction"))) {
+    if (failed(parseClauseWithRegionArgs(parser, region, reductionOperands,
+                                         reductionTypes, reductionSymbols,
+                                         privates)))
+      return failure();
+  }
+  return parser.parseRegion(region, privates);
 }
 
 void printWsloop(OpAsmPrinter &p, Operation *op, Region &region,
-                 ValueRange lowerBound, ValueRange upperBound, ValueRange steps,
-                 TypeRange loopVarTypes, ValueRange reductionOperands,
-                 TypeRange reductionTypes, ArrayAttr reductionSymbols,
-                 UnitAttr inclusive) {
+                 ValueRange reductionOperands, TypeRange reductionTypes,
+                 ArrayAttr reductionSymbols) {
   if (reductionSymbols) {
-    auto reductionArgs =
-        region.front().getArguments().drop_front(loopVarTypes.size());
+    auto reductionArgs = region.front().getArguments();
     printClauseWithRegionArgs(p, op, reductionArgs, "reduction",
                               reductionOperands, reductionTypes,
                               reductionSymbols);
   }
-
-  p << " for ";
-  auto args = region.front().getArguments().drop_back(reductionOperands.size());
-  p << " (" << args << ") : " << args[0].getType() << " = (" << lowerBound
-    << ") to (" << upperBound << ") ";
-  if (inclusive)
-    p << "inclusive ";
-  p << "step (" << steps << ") ";
   p.printRegion(region, /*printEntryBlockArgs=*/false);
 }
 
+void WsloopOp::build(OpBuilder &builder, OperationState &state,
+                     ArrayRef<NamedAttribute> attributes) {
+  build(builder, state, /*linear_vars=*/ValueRange(),
+        /*linear_step_vars=*/ValueRange(), /*reduction_vars=*/ValueRange(),
+        /*reductions=*/nullptr, /*schedule_val=*/nullptr,
+        /*schedule_chunk_var=*/nullptr, /*schedule_modifier=*/nullptr,
+        /*simd_modifier=*/false, /*nowait=*/false, /*byref=*/false,
+        /*ordered_val=*/nullptr, /*order_val=*/nullptr);
+  state.addAttributes(attributes);
+}
+
+void WsloopOp::build(OpBuilder &builder, OperationState &state,
+                     const WsloopClauseOps &clauses) {
+  MLIRContext *ctx = builder.getContext();
+  // TODO: Store clauses in op: allocateVars, allocatorVars, privateVars,
+  // privatizers.
+  WsloopOp::build(
+      builder, state, clauses.linearVars, clauses.linearStepVars,
+      clauses.reductionVars, makeArrayAttr(ctx, clauses.reductionDeclSymbols),
+      clauses.scheduleValAttr, clauses.scheduleChunkVar,
+      clauses.scheduleModAttr, clauses.scheduleSimdAttr, clauses.nowaitAttr,
+      clauses.reductionByRefAttr, clauses.orderedAttr, clauses.orderAttr);
+}
+
+LogicalResult WsloopOp::verify() {
+  if (!isWrapper())
+    return emitOpError() << "must be a loop wrapper";
+
+  if (LoopWrapperInterface nested = getNestedWrapper()) {
+    // Check for the allowed leaf constructs that may appear in a composite
+    // construct directly after DO/FOR.
+    if (!isa<SimdOp>(nested))
+      return emitError() << "only supported nested wrapper is 'omp.simd'";
+  }
+
+  return verifyReductionVarList(*this, getReductions(), getReductionVars());
+}
+
 //===----------------------------------------------------------------------===//
 // Simd construct [2.9.3.1]
 //===----------------------------------------------------------------------===//
@@ -1947,42 +1933,6 @@ void LoopNestOp::gatherWrappers(
   }
 }
 
-//===----------------------------------------------------------------------===//
-// WsloopOp
-//===----------------------------------------------------------------------===//
-
-void WsloopOp::build(OpBuilder &builder, OperationState &state,
-                     ValueRange lowerBound, ValueRange upperBound,
-                     ValueRange step, ArrayRef<NamedAttribute> attributes) {
-  build(builder, state, lowerBound, upperBound, step,
-        /*linear_vars=*/ValueRange(),
-        /*linear_step_vars=*/ValueRange(), /*reduction_vars=*/ValueRange(),
-        /*reductions=*/nullptr, /*schedule_val=*/nullptr,
-        /*schedule_chunk_var=*/nullptr, /*schedule_modifier=*/nullptr,
-        /*simd_modifier=*/false, /*nowait=*/false, /*byref=*/false,
-        /*ordered_val=*/nullptr,
-        /*order_val=*/nullptr, /*inclusive=*/false);
-  state.addAttributes(attributes);
-}
-
-void WsloopOp::build(OpBuilder &builder, OperationState &state,
-                     const WsloopClauseOps &clauses) {
-  MLIRContext *ctx = builder.getContext();
-  // TODO Store clauses in op: allocateVars, allocatorVars, privateVars,
-  // privatizers.
-  WsloopOp::build(
-      builder, state, clauses.loopLBVar, clauses.loopUBVar, clauses.loopStepVar,
-      clauses.linearVars, clauses.linearStepVars, clauses.reductionVars,
-      makeArrayAttr(ctx, clauses.reductionDeclSymbols), clauses.scheduleValAttr,
-      clauses.scheduleChunkVar, clauses.scheduleModAttr,
-      clauses.scheduleSimdAttr, clauses.nowaitAttr, clauses.reductionByRefAttr,
-      clauses.orderedAttr, clauses.orderAttr, clauses.loopInclusiveAttr);
-}
-
-LogicalResult WsloopOp::verify() {
-  return verifyReductionVarList(*this, getReductions(), getReductionVars());
-}
-
 //===----------------------------------------------------------------------===//
 // Critical construct (2.17.1)
 //===----------------------------------------------------------------------===//
@@ -2014,6 +1964,39 @@ LogicalResult CriticalOp::verifySymbolUses(SymbolTableCollection &symbolTable) {
 // Ordered construct
 //===----------------------------------------------------------------------===//
 
+static LogicalResult verifyOrderedParent(Operation &op) {
+  bool hasRegion = op.getNumRegions() > 0;
+  auto loopOp = op.getParentOfType<LoopNestOp>();
+  if (!loopOp) {
+    if (hasRegion)
+      return success();
+
+    // TODO: Consider if this needs to be the case only for the standalone
+    // variant of the ordered construct.
+    return op.emitOpError() << "must be nested inside of a loop";
+  }
+
+  Operation *wrapper = loopOp->getParentOp();
+  if (auto wsloopOp = dyn_cast<WsloopOp>(wrapper)) {
+    IntegerAttr orderedAttr = wsloopOp.getOrderedValAttr();
+    if (!orderedAttr)
+      return op.emitOpError() << "the enclosing worksharing-loop region must "
+                                 "have an ordered clause";
+
+    if (hasRegion && orderedAttr.getInt() != 0)
+      return op.emitOpError() << "the enclosing loop's ordered clause must not "
+                                 "have a parameter present";
+
+    if (!hasRegion && orderedAttr.getInt() == 0)
+      return op.emitOpError() << "the enclosing loop's ordered clause must "
+                                 "have a parameter present";
+  } else if (!isa<SimdOp>(wrapper)) {
+    return op.emitOpError() << "must be nested inside of a worksharing, simd "
+                               "or worksharing simd loop";
+  }
+  return success();
+}
+
 void OrderedOp::build(OpBuilder &builder, OperationState &state,
                       const OrderedOpClauseOps &clauses) {
   OrderedOp::build(builder, state, clauses.doacrossDependTypeAttr,
@@ -2021,14 +2004,11 @@ void OrderedOp::build(OpBuilder &builder, OperationState &state,
 }
 
 LogicalResult OrderedOp::verify() {
-  auto container = (*this)->getParentOfType<WsloopOp>();
-  if (!container || !container.getOrderedValAttr() ||
-      container.getOrderedValAttr().getInt() == 0)
-    return emitOpError() << "ordered depend directive must be closely "
-                         << "nested inside a worksharing-loop with ordered "
-                         << "clause with parameter present";
-
-  if (container.getOrderedValAttr().getInt() != (int64_t)*getNumLoopsVal())
+  if (failed(verifyOrderedParent(**this)))
+    return failure();
+
+  auto wrapper = (*this)->getParentOfType<WsloopOp>();
+  if (!wrapper || *wrapper.getOrderedVal() != *getNumLoopsVal())
     return emitOpError() << "number of variables in depend clause does not "
                          << "match number of iteration variables in the "
                          << "doacross loop";
@@ -2046,15 +2026,7 @@ LogicalResult OrderedRegionOp::verify() {
   if (getSimd())
     return failure();
 
-  if (auto container = (*this)->getParentOfType<WsloopOp>()) {
-    if (!container.getOrderedValAttr() ||
-        container.getOrderedValAttr().getInt() != 0)
-      return emitOpError() << "ordered region must be closely nested inside "
-                           << "a worksharing-loop region with an ordered "
-                           << "clause without parameter present";
-  }
-
-  return success();
+  return verifyOrderedParent(**this);
 }
 
 //===----------------------------------------------------------------------===//
@@ -2199,15 +2171,19 @@ LogicalResult CancelOp::verify() {
                          << "inside a parallel region";
   }
   if (cct == ClauseCancellationConstructType::Loop) {
-    if (!isa<WsloopOp>(parentOp)) {
-      return emitOpError() << "cancel loop must appear "
-                           << "inside a worksharing-loop region";
+    auto loopOp = dyn_cast<LoopNestOp>(parentOp);
+    auto wsloopOp = llvm::dyn_cast_if_present<WsloopOp>(
+        loopOp ? loopOp->getParentOp() : nullptr);
+
+    if (!wsloopOp) {
+      return emitOpError()
+             << "cancel loop must appear inside a worksharing-loop region";
     }
-    if (cast<WsloopOp>(parentOp).getNowaitAttr()) {
+    if (wsloopOp.getNowaitAttr()) {
       return emitError() << "A worksharing construct that is canceled "
                          << "must not have a nowait clause";
     }
-    if (cast<WsloopOp>(parentOp).getOrderedValAttr()) {
+    if (wsloopOp.getOrderedValAttr()) {
       return emitError() << "A worksharing construct that is canceled "
                          << "must not have an ordered clause";
     }
@@ -2245,7 +2221,7 @@ LogicalResult CancellationPointOp::verify() {
                          << "inside a parallel region";
   }
   if ((cct == ClauseCancellationConstructType::Loop) &&
-      !isa<WsloopOp>(parentOp)) {
+      (!isa<LoopNestOp>(parentOp) || !isa<WsloopOp>(parentOp->getParentOp()))) {
     return emitOpError() << "cancellation point loop must appear "
                          << "inside a worksharing-loop region";
   }
diff --git a/mlir/test/CAPI/execution_engine.c b/mlir/test/CAPI/execution_engine.c
index 38a8fb8c3e213..81ff8477ffd7b 100644
--- a/mlir/test/CAPI/execution_engine.c
+++ b/mlir/test/CAPI/execution_engine.c
@@ -99,8 +99,11 @@ void testOmpCreation(void) {
 "    %1 = arith.constant 1 : i32                                                \n"
 "    %2 = arith.constant 2 : i32                                                \n"
 "    omp.parallel {                                                             \n"
-"      omp.wsloop for (%3) : i32 = (%0) to (%2) step (%1) {                     \n"
-"        omp.yield                                                              \n"
+"      omp.wsloop {                                                             \n"
+"        omp.loop_nest (%3) : i32 = (%0) to (%2) step (%1) {                    \n"
+"          omp.yield                                                            \n"
+"        }                                                                      \n"
+"        omp.terminator                                                         \n"
 "      }                                                                        \n"
 "      omp.terminator                                                           \n"
 "    }                                                                          \n"
diff --git a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
index 9f45d139b81f2..3aeb9e70522d5 100644
--- a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
+++ b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
@@ -71,15 +71,18 @@ func.func @branch_loop() {
 func.func @wsloop(%arg0: index, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index) {
   // CHECK: omp.parallel
   omp.parallel {
-    // CHECK: omp.wsloop for (%[[ARG6:.*]], %[[ARG7:.*]]) : i64 = (%[[ARG0]], %[[ARG1]]) to (%[[ARG2]], %[[ARG3]]) step (%[[ARG4]], %[[ARG5]]) {
-    "omp.wsloop"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5) ({
-    ^bb0(%arg6: index, %arg7: index):
-      // CHECK-DAG: %[[CAST_ARG6:.*]] = builtin.unrealized_conversion_cast %[[ARG6]] : i64 to index
-      // CHECK-DAG: %[[CAST_ARG7:.*]] = builtin.unrealized_conversion_cast %[[ARG7]] : i64 to index
-      // CHECK: "test.payload"(%[[CAST_ARG6]], %[[CAST_ARG7]]) : (index, index) -> ()
-      "test.payload"(%arg6, %arg7) : (index, index) -> ()
-      omp.yield
-    }) {operandSegmentSizes = array<i32: 2, 2, 2, 0, 0, 0, 0>} : (index, index, index, index, index, index) -> ()
+    // CHECK: omp.wsloop {
+    "omp.wsloop"() ({
+      // CHECK: omp.loop_nest (%[[ARG6:.*]], %[[ARG7:.*]]) : i64 = (%[[ARG0]], %[[ARG1]]) to (%[[ARG2]], %[[ARG3]]) step (%[[ARG4]], %[[ARG5]]) {
+      omp.loop_nest (%arg6, %arg7) : index = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) {
+        // CHECK-DAG: %[[CAST_ARG6:.*]] = builtin.unrealized_conversion_cast %[[ARG6]] : i64 to index
+        // CHECK-DAG: %[[CAST_ARG7:.*]] = builtin.unrealized_conversion_cast %[[ARG7]] : i64 to index
+        // CHECK: "test.payload"(%[[CAST_ARG6]], %[[CAST_ARG7]]) : (index, index) -> ()
+        "test.payload"(%arg6, %arg7) : (index, index) -> ()
+        omp.yield
+      }
+      omp.terminator
+    }) : () -> ()
     omp.terminator
   }
   return
@@ -323,12 +326,14 @@ llvm.func @_QPsb() {
 // CHECK-LABEL:  @_QPsimple_reduction
 // CHECK:    %[[RED_ACCUMULATOR:.*]] = llvm.alloca %{{.*}} x i32 {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"} : (i64) -> !llvm.ptr
 // CHECK:    omp.parallel
-// CHECK:      omp.wsloop reduction(@eqv_reduction %{{.+}} -> %[[PRV:.+]] : !llvm.ptr) for
-// CHECK:        %[[LPRV:.+]] = llvm.load %[[PRV]] : !llvm.ptr -> i32
-// CHECK:        %[[CMP:.+]] = llvm.icmp "eq" %{{.*}}, %[[LPRV]] : i32
-// CHECK:        %[[ZEXT:.+]] = llvm.zext %[[CMP]] : i1 to i32
-// CHECK:        llvm.store %[[ZEXT]], %[[PRV]] : i32, !llvm.ptr
-// CHECK:        omp.yield
+// CHECK:      omp.wsloop reduction(@eqv_reduction %{{.+}} -> %[[PRV:.+]] : !llvm.ptr)
+// CHECK-NEXT:   omp.loop_nest {{.*}}{
+// CHECK:          %[[LPRV:.+]] = llvm.load %[[PRV]] : !llvm.ptr -> i32
+// CHECK:          %[[CMP:.+]] = llvm.icmp "eq" %{{.*}}, %[[LPRV]] : i32
+// CHECK:          %[[ZEXT:.+]] = llvm.zext %[[CMP]] : i1 to i32
+// CHECK:          llvm.store %[[ZEXT]], %[[PRV]] : i32, !llvm.ptr
+// CHECK:          omp.yield
+// CHECK:        omp.terminator
 // CHECK:      omp.terminator
 // CHECK:    llvm.return
 
@@ -354,20 +359,23 @@ llvm.func @_QPsimple_reduction(%arg0: !llvm.ptr {fir.bindc_name = "y"}) {
   %4 = llvm.alloca %3 x i32 {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"} : (i64) -> !llvm.ptr
   %5 = llvm.zext %2 : i1 to i32
   llvm.store %5, %4 : i32, !llvm.ptr
-  omp.parallel   {
+  omp.parallel {
     %6 = llvm.alloca %3 x i32 {adapt.valuebyref, in_type = i32, operandSegmentSizes = array<i32: 0, 0>, pinned} : (i64) -> !llvm.ptr
-    omp.wsloop   reduction(@eqv_reduction %4 -> %prv : !llvm.ptr) for  (%arg1) : i32 = (%1) to (%0) inclusive step (%1) {
-      llvm.store %arg1, %6 : i32, !llvm.ptr
-      %7 = llvm.load %6 : !llvm.ptr -> i32
-      %8 = llvm.sext %7 : i32 to i64
-      %9 = llvm.sub %8, %3  : i64
-      %10 = llvm.getelementptr %arg0[0, %9] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<100 x i32>
-      %11 = llvm.load %10 : !llvm.ptr -> i32
-      %12 = llvm.load %prv : !llvm.ptr -> i32
-      %13 = llvm.icmp "eq" %11, %12 : i32
-      %14 = llvm.zext %13 : i1 to i32
-      llvm.store %14, %prv : i32, !llvm.ptr
-      omp.yield
+    omp.wsloop reduction(@eqv_reduction %4 -> %prv : !llvm.ptr) {
+      omp.loop_nest (%arg1) : i32 = (%1) to (%0) inclusive step (%1) {
+        llvm.store %arg1, %6 : i32, !llvm.ptr
+        %7 = llvm.load %6 : !llvm.ptr -> i32
+        %8 = llvm.sext %7 : i32 to i64
+        %9 = llvm.sub %8, %3  : i64
+        %10 = llvm.getelementptr %arg0[0, %9] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<100 x i32>
+        %11 = llvm.load %10 : !llvm.ptr -> i32
+        %12 = llvm.load %prv : !llvm.ptr -> i32
+        %13 = llvm.icmp "eq" %11, %12 : i32
+        %14 = llvm.zext %13 : i1 to i32
+        llvm.store %14, %prv : i32, !llvm.ptr
+        omp.yield
+      }
+      omp.terminator
     }
     omp.terminator
   }
diff --git a/mlir/test/Conversion/SCFToOpenMP/reductions.mlir b/mlir/test/Conversion/SCFToOpenMP/reductions.mlir
index 3b6c145d62f1a..fc6d56559c261 100644
--- a/mlir/test/Conversion/SCFToOpenMP/reductions.mlir
+++ b/mlir/test/Conversion/SCFToOpenMP/reductions.mlir
@@ -28,6 +28,7 @@ func.func @reduction1(%arg0 : index, %arg1 : index, %arg2 : index,
   // CHECK: omp.parallel
   // CHECK: omp.wsloop
   // CHECK-SAME: reduction(@[[$REDF]] %[[BUF]] -> %[[PVT_BUF:[a-z0-9]+]]
+  // CHECK: omp.loop_nest
   // CHECK: memref.alloca_scope
   scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
                             step (%arg4, %step) init (%zero) -> (f32) {
@@ -43,6 +44,7 @@ func.func @reduction1(%arg0 : index, %arg1 : index, %arg2 : index,
     }
     // CHECK: omp.yield
   }
+  // CHECK:   omp.terminator
   // CHECK: omp.terminator
   // CHECK: llvm.load %[[BUF]]
   return
@@ -107,6 +109,7 @@ func.func @reduction_muli(%arg0 : index, %arg1 : index, %arg2 : index,
   %one = arith.constant 1 : i32
   // CHECK: %[[RED_VAR:.*]] = llvm.alloca %{{.*}} x i32 : (i64) -> !llvm.ptr
   // CHECK: omp.wsloop reduction(@[[$REDI]] %[[RED_VAR]] -> %[[RED_PVT_VAR:.*]] : !llvm.ptr)
+  // CHECK: omp.loop_nest
   scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
                             step (%arg4, %step) init (%one) -> (i32) {
     // CHECK: %[[C2:.*]] = arith.constant 2 : i32
@@ -208,6 +211,7 @@ func.func @reduction4(%arg0 : index, %arg1 : index, %arg2 : index,
   // CHECK: omp.wsloop
   // CHECK-SAME: reduction(@[[$REDF1]] %[[BUF1]] -> %[[PVT_BUF1:[a-z0-9]+]]
   // CHECK-SAME:           @[[$REDF2]] %[[BUF2]] -> %[[PVT_BUF2:[a-z0-9]+]]
+  // CHECK: omp.loop_nest
   // CHECK: memref.alloca_scope
   %res:2 = scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
                         step (%arg4, %step) init (%zero, %ione) -> (f32, i64) {
@@ -236,6 +240,7 @@ func.func @reduction4(%arg0 : index, %arg1 : index, %arg2 : index,
     }
     // CHECK: omp.yield
   }
+  // CHECK:   omp.terminator
   // CHECK: omp.terminator
   // CHECK: %[[RES1:.*]] = llvm.load %[[BUF1]] : !llvm.ptr -> f32
   // CHECK: %[[RES2:.*]] = llvm.load %[[BUF2]] : !llvm.ptr -> i64
diff --git a/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir b/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir
index acd2690c56e2e..b2f19d294cb5f 100644
--- a/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir
+++ b/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir
@@ -2,10 +2,11 @@
 
 // CHECK-LABEL: @parallel
 func.func @parallel(%arg0: index, %arg1: index, %arg2: index,
-          %arg3: index, %arg4: index, %arg5: index) {
+                    %arg3: index, %arg4: index, %arg5: index) {
   // CHECK: %[[FOUR:.+]] = llvm.mlir.constant(4 : i32) : i32
   // CHECK: omp.parallel num_threads(%[[FOUR]] : i32) {
-  // CHECK: omp.wsloop for (%[[LVAR1:.*]], %[[LVAR2:.*]]) : index = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) {
+  // CHECK: omp.wsloop {
+  // CHECK: omp.loop_nest (%[[LVAR1:.*]], %[[LVAR2:.*]]) : index = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) {
   // CHECK: memref.alloca_scope
   scf.parallel (%i, %j) = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) {
     // CHECK: "test.payload"(%[[LVAR1]], %[[LVAR2]]) : (index, index) -> ()
@@ -13,6 +14,8 @@ func.func @parallel(%arg0: index, %arg1: index, %arg2: index,
     // CHECK:   omp.yield
     // CHECK: }
   }
+  // CHECK:     omp.terminator
+  // CHECK:   }
   // CHECK:   omp.terminator
   // CHECK: }
   return
@@ -23,20 +26,26 @@ func.func @nested_loops(%arg0: index, %arg1: index, %arg2: index,
                    %arg3: index, %arg4: index, %arg5: index) {
   // CHECK: %[[FOUR:.+]] = llvm.mlir.constant(4 : i32) : i32
   // CHECK: omp.parallel num_threads(%[[FOUR]] : i32) {
-  // CHECK: omp.wsloop for (%[[LVAR_OUT1:.*]]) : index = (%arg0) to (%arg2) step (%arg4) {
-    // CHECK: memref.alloca_scope
+  // CHECK: omp.wsloop {
+  // CHECK: omp.loop_nest (%[[LVAR_OUT1:.*]]) : index = (%arg0) to (%arg2) step (%arg4) {
+  // CHECK: memref.alloca_scope
   scf.parallel (%i) = (%arg0) to (%arg2) step (%arg4) {
     // CHECK: omp.parallel
-    // CHECK: omp.wsloop for (%[[LVAR_IN1:.*]]) : index = (%arg1) to (%arg3) step (%arg5) {
+    // CHECK: omp.wsloop {
+    // CHECK: omp.loop_nest (%[[LVAR_IN1:.*]]) : index = (%arg1) to (%arg3) step (%arg5) {
     // CHECK: memref.alloca_scope
     scf.parallel (%j) = (%arg1) to (%arg3) step (%arg5) {
       // CHECK: "test.payload"(%[[LVAR_OUT1]], %[[LVAR_IN1]]) : (index, index) -> ()
       "test.payload"(%i, %j) : (index, index) -> ()
       // CHECK: }
     }
-    // CHECK:   omp.yield
+    // CHECK:     omp.yield
+    // CHECK:   }
+    // CHECK:   omp.terminator
     // CHECK: }
   }
+  // CHECK:     omp.terminator
+  // CHECK:   }
   // CHECK:   omp.terminator
   // CHECK: }
   return
@@ -47,7 +56,8 @@ func.func @adjacent_loops(%arg0: index, %arg1: index, %arg2: index,
                      %arg3: index, %arg4: index, %arg5: index) {
   // CHECK: %[[FOUR:.+]] = llvm.mlir.constant(4 : i32) : i32
   // CHECK: omp.parallel num_threads(%[[FOUR]] : i32) {
-  // CHECK: omp.wsloop for (%[[LVAR_AL1:.*]]) : index = (%arg0) to (%arg2) step (%arg4) {
+  // CHECK: omp.wsloop {
+  // CHECK: omp.loop_nest (%[[LVAR_AL1:.*]]) : index = (%arg0) to (%arg2) step (%arg4) {
   // CHECK: memref.alloca_scope
   scf.parallel (%i) = (%arg0) to (%arg2) step (%arg4) {
     // CHECK: "test.payload1"(%[[LVAR_AL1]]) : (index) -> ()
@@ -55,12 +65,15 @@ func.func @adjacent_loops(%arg0: index, %arg1: index, %arg2: index,
     // CHECK:   omp.yield
     // CHECK: }
   }
+  // CHECK:     omp.terminator
+  // CHECK:   }
   // CHECK:   omp.terminator
   // CHECK: }
 
   // CHECK: %[[FOUR:.+]] = llvm.mlir.constant(4 : i32) : i32
   // CHECK: omp.parallel num_threads(%[[FOUR]] : i32) {
-  // CHECK: omp.wsloop for (%[[LVAR_AL2:.*]]) : index = (%arg1) to (%arg3) step (%arg5) {
+  // CHECK: omp.wsloop {
+  // CHECK: omp.loop_nest (%[[LVAR_AL2:.*]]) : index = (%arg1) to (%arg3) step (%arg5) {
   // CHECK: memref.alloca_scope
   scf.parallel (%j) = (%arg1) to (%arg3) step (%arg5) {
     // CHECK: "test.payload2"(%[[LVAR_AL2]]) : (index) -> ()
@@ -68,6 +81,8 @@ func.func @adjacent_loops(%arg0: index, %arg1: index, %arg2: index,
     // CHECK:   omp.yield
     // CHECK: }
   }
+  // CHECK:     omp.terminator
+  // CHECK:   }
   // CHECK:   omp.terminator
   // CHECK: }
   return
diff --git a/mlir/test/Dialect/LLVMIR/legalize-for-export.mlir b/mlir/test/Dialect/LLVMIR/legalize-for-export.mlir
index 37720e98d92a9..b1b06740f1944 100644
--- a/mlir/test/Dialect/LLVMIR/legalize-for-export.mlir
+++ b/mlir/test/Dialect/LLVMIR/legalize-for-export.mlir
@@ -32,14 +32,17 @@ llvm.func @repeated_successor_no_args(%arg0: i1) {
 
 // CHECK: @repeated_successor_openmp
 llvm.func @repeated_successor_openmp(%arg0: i64, %arg1: i64, %arg2: i64, %arg3: i1) {
-  omp.wsloop for (%arg4) : i64 = (%arg0) to (%arg1) step (%arg2) {
-    // CHECK: llvm.cond_br %{{.*}}, ^[[BB1:.*]]({{.*}}), ^[[BB2:.*]]({{.*}})
-    llvm.cond_br %arg3, ^bb1(%arg0 : i64), ^bb1(%arg1 : i64)
-  // CHECK: ^[[BB1]]
-  ^bb1(%0: i64):  // 2 preds: ^bb0, ^bb0
-    omp.yield
-  // CHECK: ^[[BB2]](%[[ARG:.*]]: i64):
-  // CHECK:  llvm.br ^[[BB1]](%[[ARG]] : i64)
+  omp.wsloop {
+    omp.loop_nest (%arg4) : i64 = (%arg0) to (%arg1) step (%arg2) {
+      // CHECK: llvm.cond_br %{{.*}}, ^[[BB1:.*]]({{.*}}), ^[[BB2:.*]]({{.*}})
+      llvm.cond_br %arg3, ^bb1(%arg0 : i64), ^bb1(%arg1 : i64)
+    // CHECK: ^[[BB1]]
+    ^bb1(%0: i64):  // 2 preds: ^bb0, ^bb0
+      omp.yield
+    // CHECK: ^[[BB2]](%[[ARG:.*]]: i64):
+    // CHECK:  llvm.br ^[[BB1]](%[[ARG]] : i64)
+    }
+    omp.terminator
   }
   llvm.return
 }
diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir
index 2f24dce4233e4..e329b3010017c 100644
--- a/mlir/test/Dialect/OpenMP/invalid.mlir
+++ b/mlir/test/Dialect/OpenMP/invalid.mlir
@@ -149,50 +149,74 @@ func.func @invalid_parent(%lb : index, %ub : index, %step : index) {
 // -----
 
 func.func @invalid_wrapper(%lb : index, %ub : index, %step : index) {
-  // TODO Remove induction variables from omp.wsloop.
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
+  omp.parallel {
     %0 = arith.constant 0 : i32
     // expected-error@+1 {{op expects parent op to be a valid loop wrapper}}
     omp.loop_nest (%iv2) : index = (%lb) to (%ub) step (%step) {
       omp.yield
     }
-    omp.yield
+    omp.terminator
   }
 }
 
 // -----
 
 func.func @type_mismatch(%lb : index, %ub : index, %step : index) {
-  // TODO Remove induction variables from omp.wsloop.
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
+  omp.wsloop {
     // expected-error@+1 {{range argument type does not match corresponding IV type}}
     "omp.loop_nest" (%lb, %ub, %step) ({
     ^bb0(%iv2: i32):
       omp.yield
     }) : (index, index, index) -> ()
-    omp.yield
+    omp.terminator
   }
 }
 
 // -----
 
 func.func @iv_number_mismatch(%lb : index, %ub : index, %step : index) {
-  // TODO Remove induction variables from omp.wsloop.
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
+  omp.wsloop {
     // expected-error@+1 {{number of range arguments and IVs do not match}}
     "omp.loop_nest" (%lb, %ub, %step) ({
     ^bb0(%iv1 : index, %iv2 : index):
       omp.yield
     }) : (index, index, index) -> ()
-    omp.yield
+    omp.terminator
+  }
+}
+
+// -----
+
+func.func @no_wrapper(%lb : index, %ub : index, %step : index) {
+  // expected-error @below {{op must be a loop wrapper}}
+  omp.wsloop {
+    %0 = arith.constant 0 : i32
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
+  }
+}
+
+// -----
+
+func.func @invalid_nested_wrapper(%lb : index, %ub : index, %step : index) {
+  // expected-error @below {{only supported nested wrapper is 'omp.simd'}}
+  omp.wsloop {
+    omp.distribute {
+      omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+        omp.yield
+      }
+      omp.terminator
+    }
+    omp.terminator
   }
 }
 
 // -----
 
 func.func @no_loops(%lb : index, %ub : index, %step : index) {
-  // TODO Remove induction variables from omp.wsloop.
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
+  omp.wsloop {
     // expected-error@+1 {{op must represent at least one loop}}
     "omp.loop_nest" () ({
     ^bb0():
@@ -205,10 +229,12 @@ func.func @no_loops(%lb : index, %ub : index, %step : index) {
 // -----
 
 func.func @inclusive_not_a_clause(%lb : index, %ub : index, %step : index) {
-  // expected-error @below {{expected 'for'}}
-  omp.wsloop nowait inclusive
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    omp.yield
+  // expected-error @below {{expected '{'}}
+  omp.wsloop nowait inclusive {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
   }
 }
 
@@ -216,39 +242,47 @@ func.func @inclusive_not_a_clause(%lb : index, %ub : index, %step : index) {
 
 func.func @order_value(%lb : index, %ub : index, %step : index) {
   // expected-error @below {{invalid clause value: 'default'}}
-  omp.wsloop order(default)
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    omp.yield
+  omp.wsloop order(default) {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
   }
 }
 
 // -----
 
 func.func @if_not_allowed(%lb : index, %ub : index, %step : index, %bool_var : i1) {
-  // expected-error @below {{expected 'for'}}
-  omp.wsloop if(%bool_var: i1)
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    omp.yield
+  // expected-error @below {{expected '{'}}
+  omp.wsloop if(%bool_var: i1) {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
   }
 }
 
 // -----
 
 func.func @num_threads_not_allowed(%lb : index, %ub : index, %step : index, %int_var : i32) {
-  // expected-error @below {{expected 'for'}}
-  omp.wsloop num_threads(%int_var: i32)
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    omp.yield
+  // expected-error @below {{expected '{'}}
+  omp.wsloop num_threads(%int_var: i32) {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
   }
 }
 
 // -----
 
 func.func @proc_bind_not_allowed(%lb : index, %ub : index, %step : index) {
-  // expected-error @below {{expected 'for'}}
-  omp.wsloop proc_bind(close)
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    omp.yield
+  // expected-error @below {{expected '{'}}
+  omp.wsloop proc_bind(close) {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
   }
 }
 
@@ -256,9 +290,11 @@ func.func @proc_bind_not_allowed(%lb : index, %ub : index, %step : index) {
 
 llvm.func @test_omp_wsloop_dynamic_bad_modifier(%lb : i64, %ub : i64, %step : i64) -> () {
   // expected-error @+1 {{unknown modifier type: ginandtonic}}
-  omp.wsloop schedule(dynamic, ginandtonic)
-  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
-    omp.yield
+  omp.wsloop schedule(dynamic, ginandtonic) {
+    omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
   }
   llvm.return
 }
@@ -267,9 +303,11 @@ llvm.func @test_omp_wsloop_dynamic_bad_modifier(%lb : i64, %ub : i64, %step : i6
 
 llvm.func @test_omp_wsloop_dynamic_many_modifier(%lb : i64, %ub : i64, %step : i64) -> () {
   // expected-error @+1 {{unexpected modifier(s)}}
-  omp.wsloop schedule(dynamic, monotonic, monotonic, monotonic)
-  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
-    omp.yield
+  omp.wsloop schedule(dynamic, monotonic, monotonic, monotonic) {
+    omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
   }
   llvm.return
 }
@@ -278,9 +316,11 @@ llvm.func @test_omp_wsloop_dynamic_many_modifier(%lb : i64, %ub : i64, %step : i
 
 llvm.func @test_omp_wsloop_dynamic_wrong_modifier(%lb : i64, %ub : i64, %step : i64) -> () {
   // expected-error @+1 {{incorrect modifier order}}
-  omp.wsloop schedule(dynamic, simd, monotonic)
-  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
-    omp.yield
+  omp.wsloop schedule(dynamic, simd, monotonic) {
+    omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
   }
   llvm.return
 }
@@ -289,9 +329,11 @@ llvm.func @test_omp_wsloop_dynamic_wrong_modifier(%lb : i64, %ub : i64, %step :
 
 llvm.func @test_omp_wsloop_dynamic_wrong_modifier2(%lb : i64, %ub : i64, %step : i64) -> () {
   // expected-error @+1 {{incorrect modifier order}}
-  omp.wsloop schedule(dynamic, monotonic, monotonic)
-  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
-    omp.yield
+  omp.wsloop schedule(dynamic, monotonic, monotonic) {
+    omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
   }
   llvm.return
 }
@@ -300,9 +342,11 @@ llvm.func @test_omp_wsloop_dynamic_wrong_modifier2(%lb : i64, %ub : i64, %step :
 
 llvm.func @test_omp_wsloop_dynamic_wrong_modifier3(%lb : i64, %ub : i64, %step : i64) -> () {
   // expected-error @+1 {{incorrect modifier order}}
-  omp.wsloop schedule(dynamic, simd, simd)
-  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
-    omp.yield
+  omp.wsloop schedule(dynamic, simd, simd) {
+    omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
   }
   llvm.return
 }
@@ -601,11 +645,13 @@ func.func @foo(%lb : index, %ub : index, %step : index) {
   %1 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
 
   // expected-error @below {{expected symbol reference @foo to point to a reduction declaration}}
-  omp.wsloop reduction(@foo %0 -> %prv : !llvm.ptr)
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    %2 = arith.constant 2.0 : f32
-    omp.reduction %2, %1 : f32, !llvm.ptr
-    omp.yield
+  omp.wsloop reduction(@foo %0 -> %prv : !llvm.ptr) {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      %2 = arith.constant 2.0 : f32
+      omp.reduction %2, %1 : f32, !llvm.ptr
+      omp.yield
+    }
+    omp.terminator
   }
   return
 }
@@ -629,11 +675,13 @@ func.func @foo(%lb : index, %ub : index, %step : index) {
   %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
 
   // expected-error @below {{accumulator variable used more than once}}
-  omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr, @add_f32 %0 -> %prv1 : !llvm.ptr)
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    %2 = arith.constant 2.0 : f32
-    omp.reduction %2, %0 : f32, !llvm.ptr
-    omp.yield
+  omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr, @add_f32 %0 -> %prv1 : !llvm.ptr) {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      %2 = arith.constant 2.0 : f32
+      omp.reduction %2, %0 : f32, !llvm.ptr
+      omp.yield
+    }
+    omp.terminator
   }
   return
 }
@@ -662,11 +710,13 @@ func.func @foo(%lb : index, %ub : index, %step : index, %mem : memref<1xf32>) {
   %c1 = arith.constant 1 : i32
 
   // expected-error @below {{expected accumulator ('memref<1xf32>') to be the same type as reduction declaration ('!llvm.ptr')}}
-  omp.wsloop reduction(@add_f32 %mem -> %prv : memref<1xf32>)
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    %2 = arith.constant 2.0 : f32
-    omp.reduction %2, %mem : f32, memref<1xf32>
-    omp.yield
+  omp.wsloop reduction(@add_f32 %mem -> %prv : memref<1xf32>) {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      %2 = arith.constant 2.0 : f32
+      omp.reduction %2, %mem : f32, memref<1xf32>
+      omp.yield
+    }
+    omp.terminator
   }
   return
 }
@@ -698,60 +748,112 @@ omp.critical.declare @mutex hint(invalid_hint)
 
 // -----
 
-func.func @omp_ordered1(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () {
-  omp.wsloop ordered(1)
-  for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
-    // expected-error @below {{ordered region must be closely nested inside a worksharing-loop region with an ordered clause without parameter present}}
-    omp.ordered.region {
-      omp.terminator
+func.func @omp_ordered_region1(%x : i32) -> () {
+  omp.distribute {
+    omp.loop_nest (%i) : i32 = (%x) to (%x) step (%x) {
+      // expected-error @below {{op must be nested inside of a worksharing, simd or worksharing simd loop}}
+      omp.ordered.region {
+        omp.terminator
+      }
+      omp.yield
     }
-    omp.yield
+    omp.terminator
   }
   return
 }
 
 // -----
 
-func.func @omp_ordered2(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () {
-  omp.wsloop for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
-    // expected-error @below {{ordered region must be closely nested inside a worksharing-loop region with an ordered clause without parameter present}}
-    omp.ordered.region {
-      omp.terminator
+func.func @omp_ordered_region2(%x : i32) -> () {
+  omp.wsloop {
+    omp.loop_nest (%i) : i32 = (%x) to (%x) step (%x) {
+      // expected-error @below {{the enclosing worksharing-loop region must have an ordered clause}}
+      omp.ordered.region {
+        omp.terminator
+      }
+      omp.yield
     }
-    omp.yield
+    omp.terminator
   }
   return
 }
 
 // -----
 
-func.func @omp_ordered3(%vec0 : i64) -> () {
-  // expected-error @below {{ordered depend directive must be closely nested inside a worksharing-loop with ordered clause with parameter present}}
+func.func @omp_ordered_region3(%x : i32) -> () {
+  omp.wsloop ordered(1) {
+    omp.loop_nest (%i) : i32 = (%x) to (%x) step (%x) {
+      // expected-error @below {{the enclosing loop's ordered clause must not have a parameter present}}
+      omp.ordered.region {
+        omp.terminator
+      }
+      omp.yield
+    }
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+func.func @omp_ordered1(%vec0 : i64) -> () {
+  // expected-error @below {{op must be nested inside of a loop}}
   omp.ordered depend_type(dependsink) depend_vec(%vec0 : i64) {num_loops_val = 1 : i64}
   return
 }
 
 // -----
 
-func.func @omp_ordered4(%arg1 : i32, %arg2 : i32, %arg3 : i32, %vec0 : i64) -> () {
-  omp.wsloop ordered(0)
-  for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
-    // expected-error @below {{ordered depend directive must be closely nested inside a worksharing-loop with ordered clause with parameter present}}
-    omp.ordered depend_type(dependsink) depend_vec(%vec0 : i64) {num_loops_val = 1 : i64}
+func.func @omp_ordered2(%arg1 : i32, %arg2 : i32, %arg3 : i32, %vec0 : i64) -> () {
+  omp.distribute {
+    omp.loop_nest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
+      // expected-error @below {{op must be nested inside of a worksharing, simd or worksharing simd loop}}
+      omp.ordered depend_type(dependsink) depend_vec(%vec0 : i64) {num_loops_val = 1 : i64}
+      omp.yield
+    }
+    omp.terminator
+  }
+  return
+}
 
-    omp.yield
+// -----
+
+func.func @omp_ordered3(%arg1 : i32, %arg2 : i32, %arg3 : i32, %vec0 : i64) -> () {
+  omp.wsloop {
+    omp.loop_nest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
+      // expected-error @below {{the enclosing worksharing-loop region must have an ordered clause}}
+      omp.ordered depend_type(dependsink) depend_vec(%vec0 : i64) {num_loops_val = 1 : i64}
+      omp.yield
+    }
+    omp.terminator
   }
   return
 }
+
 // -----
 
-func.func @omp_ordered5(%arg1 : i32, %arg2 : i32, %arg3 : i32, %vec0 : i64, %vec1 : i64) -> () {
-  omp.wsloop ordered(1)
-  for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
-    // expected-error @below {{number of variables in depend clause does not match number of iteration variables in the doacross loop}}
-    omp.ordered depend_type(dependsource) depend_vec(%vec0, %vec1 : i64, i64) {num_loops_val = 2 : i64}
+func.func @omp_ordered4(%arg1 : i32, %arg2 : i32, %arg3 : i32, %vec0 : i64) -> () {
+  omp.wsloop ordered(0) {
+    omp.loop_nest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
+      // expected-error @below {{the enclosing loop's ordered clause must have a parameter present}}
+      omp.ordered depend_type(dependsink) depend_vec(%vec0 : i64) {num_loops_val = 1 : i64}
+      omp.yield
+    }
+    omp.terminator
+  }
+  return
+}
 
-    omp.yield
+// -----
+
+func.func @omp_ordered5(%arg1 : i32, %arg2 : i32, %arg3 : i32, %vec0 : i64, %vec1 : i64) -> () {
+  omp.wsloop ordered(1) {
+    omp.loop_nest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
+      // expected-error @below {{number of variables in depend clause does not match number of iteration variables in the doacross loop}}
+      omp.ordered depend_type(dependsource) depend_vec(%vec0, %vec1 : i64, i64) {num_loops_val = 2 : i64}
+      omp.yield
+    }
+    omp.terminator
   }
   return
 }
@@ -1590,11 +1692,13 @@ func.func @omp_cancel2() {
 // -----
 
 func.func @omp_cancel3(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () {
-  omp.wsloop nowait
-    for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
-    // expected-error @below {{A worksharing construct that is canceled must not have a nowait clause}}
-    omp.cancel cancellation_construct_type(loop)
-    // CHECK: omp.terminator
+  omp.wsloop nowait {
+    omp.loop_nest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
+      // expected-error @below {{A worksharing construct that is canceled must not have a nowait clause}}
+      omp.cancel cancellation_construct_type(loop)
+      // CHECK: omp.yield
+      omp.yield
+    }
     omp.terminator
   }
   return
@@ -1603,11 +1707,13 @@ func.func @omp_cancel3(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () {
 // -----
 
 func.func @omp_cancel4(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () {
-  omp.wsloop ordered(1)
-    for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
-    // expected-error @below {{A worksharing construct that is canceled must not have an ordered clause}}
-    omp.cancel cancellation_construct_type(loop)
-    // CHECK: omp.terminator
+  omp.wsloop ordered(1) {
+    omp.loop_nest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
+      // expected-error @below {{A worksharing construct that is canceled must not have an ordered clause}}
+      omp.cancel cancellation_construct_type(loop)
+      // CHECK: omp.yield
+      omp.yield
+    }
     omp.terminator
   }
   return
@@ -2143,4 +2249,4 @@ func.func @undefined_privatizer(%arg0: !llvm.ptr) {
       omp.terminator
     }) : (!llvm.ptr) -> ()
   return
-}
+}
\ No newline at end of file
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index c10fc88211c36..a012588f0b552 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -90,10 +90,9 @@ func.func @omp_parallel(%data_var : memref<i32>, %if_cond : i1, %num_threads : i
     // CHECK-NEXT: omp.parallel
     omp.parallel {
       // CHECK-NEXT: omp.wsloop
-      // TODO Remove induction variables from omp.wsloop.
-      omp.wsloop for (%iv) : index = (%idx) to (%idx) step (%idx) {
+      omp.wsloop {
         // CHECK-NEXT: omp.loop_nest
-        omp.loop_nest (%iv2) : index = (%idx) to (%idx) step (%idx) {
+        omp.loop_nest (%iv) : index = (%idx) to (%idx) step (%idx) {
           omp.yield
         }
         omp.terminator
@@ -153,49 +152,45 @@ func.func @omp_parallel_pretty(%data_var : memref<i32>, %if_cond : i1, %num_thre
 
 // CHECK-LABEL: omp_loop_nest
 func.func @omp_loop_nest(%lb : index, %ub : index, %step : index) -> () {
-  // TODO Remove induction variables from omp.wsloop.
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
+  omp.wsloop {
     // CHECK: omp.loop_nest
     // CHECK-SAME: (%{{.*}}) : index =
     // CHECK-SAME: (%{{.*}}) to (%{{.*}}) step (%{{.*}})
     "omp.loop_nest" (%lb, %ub, %step) ({
-    ^bb0(%iv2: index):
+    ^bb0(%iv: index):
       omp.yield
     }) : (index, index, index) -> ()
-    omp.yield
+    omp.terminator
   }
 
-  // TODO Remove induction variables from omp.wsloop.
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
+  omp.wsloop {
     // CHECK: omp.loop_nest
     // CHECK-SAME: (%{{.*}}) : index =
     // CHECK-SAME: (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}})
     "omp.loop_nest" (%lb, %ub, %step) ({
-    ^bb0(%iv2: index):
+    ^bb0(%iv: index):
       omp.yield
     }) {inclusive} : (index, index, index) -> ()
-    omp.yield
+    omp.terminator
   }
 
-  // TODO Remove induction variables from omp.wsloop.
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
+  omp.wsloop {
     // CHECK: omp.loop_nest
     // CHECK-SAME: (%{{.*}}, %{{.*}}) : index =
     // CHECK-SAME: (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}})
     "omp.loop_nest" (%lb, %lb, %ub, %ub, %step, %step) ({
-    ^bb0(%iv2: index, %iv3: index):
+    ^bb0(%iv: index, %iv3: index):
       omp.yield
     }) : (index, index, index, index, index, index) -> ()
-    omp.yield
+    omp.terminator
   }
 
-  // TODO Remove induction variables from omp.wsloop.
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
+  omp.wsloop {
     // CHECK: omp.loop_nest
     // CHECK-SAME: (%{{.*}}) : index =
     // CHECK-SAME: (%{{.*}}) to (%{{.*}}) step (%{{.*}})
     "omp.loop_nest" (%lb, %ub, %step) ({
-    ^bb0(%iv2: index):
+    ^bb0(%iv: index):
       // CHECK: test.op1
       "test.op1"(%lb) : (index) -> ()
       // CHECK: test.op2
@@ -203,7 +198,7 @@ func.func @omp_loop_nest(%lb : index, %ub : index, %step : index) -> () {
       // CHECK: omp.yield
       omp.yield
     }) : (index, index, index) -> ()
-    omp.yield
+    omp.terminator
   }
 
   return
@@ -211,45 +206,41 @@ func.func @omp_loop_nest(%lb : index, %ub : index, %step : index) -> () {
 
 // CHECK-LABEL: omp_loop_nest_pretty
 func.func @omp_loop_nest_pretty(%lb : index, %ub : index, %step : index) -> () {
-  // TODO Remove induction variables from omp.wsloop.
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
+  omp.wsloop {
     // CHECK: omp.loop_nest
     // CHECK-SAME: (%{{.*}}) : index =
     // CHECK-SAME: (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-    omp.loop_nest (%iv2) : index = (%lb) to (%ub) step (%step) {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
       omp.yield
     }
-    omp.yield
+    omp.terminator
   }
 
-  // TODO Remove induction variables from omp.wsloop.
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
+  omp.wsloop {
     // CHECK: omp.loop_nest
     // CHECK-SAME: (%{{.*}}) : index =
     // CHECK-SAME: (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}})
-    omp.loop_nest (%iv2) : index = (%lb) to (%ub) inclusive step (%step) {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) inclusive step (%step) {
       omp.yield
     }
-    omp.yield
+    omp.terminator
   }
 
-  // TODO Remove induction variables from omp.wsloop.
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
+  omp.wsloop {
     // CHECK: omp.loop_nest
     // CHECK-SAME: (%{{.*}}) : index =
     // CHECK-SAME: (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}})
-    omp.loop_nest (%iv2, %iv3) : index = (%lb, %lb) to (%ub, %ub) step (%step, %step) {
+    omp.loop_nest (%iv1, %iv2) : index = (%lb, %lb) to (%ub, %ub) step (%step, %step) {
       omp.yield
     }
-    omp.yield
+    omp.terminator
   }
 
-  // TODO Remove induction variables from omp.wsloop.
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
+  omp.wsloop {
     // CHECK: omp.loop_nest
     // CHECK-SAME: (%{{.*}}) : index =
     // CHECK-SAME: (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-    omp.loop_nest (%iv2) : index = (%lb) to (%ub) step (%step)  {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step)  {
       // CHECK: test.op1
       "test.op1"(%lb) : (index) -> ()
       // CHECK: test.op2
@@ -257,201 +248,271 @@ func.func @omp_loop_nest_pretty(%lb : index, %ub : index, %step : index) -> () {
       // CHECK: omp.yield
       omp.yield
     }
-    omp.yield
+    omp.terminator
   }
 
   return
 }
 
-// CHECK-LABEL: omp_wsloop
-func.func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memref<i32>, %linear_var : i32, %chunk_var : i32) -> () {
+// CHECK-LABEL: omp_loop_nest_pretty_multi_block
+func.func @omp_loop_nest_pretty_multi_block(%lb : index, %ub : index,
+    %step : index, %data1 : memref<?xi32>, %data2 : memref<?xi32>) -> () {
 
-  // CHECK: omp.wsloop ordered(1)
-  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  "omp.wsloop" (%lb, %ub, %step) ({
-    ^bb0(%iv: index):
+  omp.wsloop {
+    // CHECK: omp.loop_nest (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      %1 = "test.payload"(%iv) : (index) -> (i32)
+      cf.br ^bb1(%1: i32)
+    ^bb1(%arg: i32):
+      memref.store %arg, %data1[%iv] : memref<?xi32>
       omp.yield
-  }) {operandSegmentSizes = array<i32: 1,1,1,0,0,0,0>, ordered_val = 1} :
-    (index, index, index) -> ()
+    }
+    omp.terminator
+  }
 
-  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(static)
-  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  "omp.wsloop" (%lb, %ub, %step, %data_var, %linear_var) ({
-    ^bb0(%iv: index):
+  omp.wsloop {
+    // CHECK: omp.loop_nest (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      %c = "test.condition"(%iv) : (index) -> (i1)
+      %v1 = "test.payload"(%iv) : (index) -> (i32)
+      cf.cond_br %c, ^bb1(%v1: i32), ^bb2(%v1: i32)
+    ^bb1(%arg0: i32):
+      memref.store %arg0, %data1[%iv] : memref<?xi32>
+      cf.br ^bb3
+    ^bb2(%arg1: i32):
+      memref.store %arg1, %data2[%iv] : memref<?xi32>
+      cf.br ^bb3
+    ^bb3:
       omp.yield
-  }) {operandSegmentSizes = array<i32: 1,1,1,1,1,0,0>, schedule_val = #omp<schedulekind static>} :
-    (index, index, index, memref<i32>, i32) -> ()
+    }
+    omp.terminator
+  }
 
-  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>, %{{.*}} = %{{.*}} : memref<i32>) schedule(static)
-  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  "omp.wsloop" (%lb, %ub, %step, %data_var, %data_var, %linear_var, %linear_var) ({
-    ^bb0(%iv: index):
+  omp.wsloop {
+    // CHECK: omp.loop_nest (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      %c = "test.condition"(%iv) : (index) -> (i1)
+      %v1 = "test.payload"(%iv) : (index) -> (i32)
+      cf.cond_br %c, ^bb1(%v1: i32), ^bb2(%v1: i32)
+    ^bb1(%arg0: i32):
+      memref.store %arg0, %data1[%iv] : memref<?xi32>
+      omp.yield
+    ^bb2(%arg1: i32):
+      memref.store %arg1, %data2[%iv] : memref<?xi32>
       omp.yield
-  }) {operandSegmentSizes = array<i32: 1,1,1,2,2,0,0>, schedule_val = #omp<schedulekind static>} :
-    (index, index, index, memref<i32>, memref<i32>, i32, i32) -> ()
+    }
+    omp.terminator
+  }
 
-  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}}) ordered(2)
-  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  "omp.wsloop" (%lb, %ub, %step, %data_var, %linear_var, %chunk_var) ({
-    ^bb0(%iv: index):
+  return
+}
+
+// CHECK-LABEL: omp_loop_nest_pretty_non_index
+func.func @omp_loop_nest_pretty_non_index(%lb1 : i32, %ub1 : i32, %step1 : i32,
+    %lb2 : i64, %ub2 : i64, %step2 : i64, %data1 : memref<?xi32>,
+    %data2 : memref<?xi64>) -> () {
+
+  omp.wsloop {
+    // CHECK: omp.loop_nest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
+    omp.loop_nest (%iv1) : i32 = (%lb1) to (%ub1) step (%step1) {
+      %1 = "test.payload"(%iv1) : (i32) -> (index)
+      cf.br ^bb1(%1: index)
+    ^bb1(%arg1: index):
+      memref.store %iv1, %data1[%arg1] : memref<?xi32>
       omp.yield
-  }) {operandSegmentSizes = array<i32: 1,1,1,1,1,0,1>, schedule_val = #omp<schedulekind dynamic>, ordered_val = 2} :
-    (index, index, index, memref<i32>, i32, i32) -> ()
+    }
+    omp.terminator
+  }
 
-  // CHECK: omp.wsloop schedule(auto) nowait
-  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  "omp.wsloop" (%lb, %ub, %step) ({
-    ^bb0(%iv: index):
+  omp.wsloop {
+    // CHECK: omp.loop_nest (%{{.*}}) : i64 = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
+    omp.loop_nest (%iv) : i64 = (%lb2) to (%ub2) step (%step2) {
+      %2 = "test.payload"(%iv) : (i64) -> (index)
+      cf.br ^bb1(%2: index)
+    ^bb1(%arg2: index):
+      memref.store %iv, %data2[%arg2] : memref<?xi64>
       omp.yield
-  }) {operandSegmentSizes = array<i32: 1,1,1,0,0,0,0>, nowait, schedule_val = #omp<schedulekind auto>} :
-    (index, index, index) -> ()
+    }
+    omp.terminator
+  }
 
   return
 }
 
-// CHECK-LABEL: omp_wsloop_pretty
-func.func @omp_wsloop_pretty(%lb : index, %ub : index, %step : index, %data_var : memref<i32>, %linear_var : i32, %chunk_var : i32, %chunk_var2 : i16) -> () {
+// CHECK-LABEL: omp_loop_nest_pretty_multiple
+func.func @omp_loop_nest_pretty_multiple(%lb1 : i32, %ub1 : i32, %step1 : i32,
+    %lb2 : i32, %ub2 : i32, %step2 : i32, %data1 : memref<?xi32>) -> () {
 
-  // CHECK: omp.wsloop ordered(2)
-  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop ordered(2)
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    omp.yield
+  omp.wsloop {
+    // CHECK: omp.loop_nest (%{{.*}}, %{{.*}}) : i32 = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}})
+    omp.loop_nest (%iv1, %iv2) : i32 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) {
+      %1 = "test.payload"(%iv1) : (i32) -> (index)
+      %2 = "test.payload"(%iv2) : (i32) -> (index)
+      memref.store %iv1, %data1[%1] : memref<?xi32>
+      memref.store %iv2, %data1[%2] : memref<?xi32>
+      omp.yield
+    }
+    omp.terminator
   }
 
-  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(static)
-  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop schedule(static) linear(%data_var = %linear_var : memref<i32>)
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    omp.yield
-  }
+  return
+}
 
-  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(static = %{{.*}} : i32) ordered(2)
-  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop ordered(2) linear(%data_var = %linear_var : memref<i32>) schedule(static = %chunk_var : i32)
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    omp.yield
-  }
+// CHECK-LABEL: omp_wsloop
+func.func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memref<i32>, %linear_var : i32, %chunk_var : i32) -> () {
 
-  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}} : i32, nonmonotonic) ordered(2)
-  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop ordered(2) linear(%data_var = %linear_var : memref<i32>) schedule(dynamic = %chunk_var : i32, nonmonotonic)
-  for (%iv) : index = (%lb) to (%ub) step (%step)  {
-    omp.yield
-  }
+  // CHECK: omp.wsloop ordered(1) {
+  // CHECK-NEXT: omp.loop_nest
+  "omp.wsloop" () ({
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
+  }) {operandSegmentSizes = array<i32: 0,0,0,0>, ordered_val = 1} :
+    () -> ()
 
-  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}} : i16, monotonic) ordered(2)
-  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop ordered(2) linear(%data_var = %linear_var : memref<i32>) schedule(dynamic = %chunk_var2 : i16, monotonic)
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    omp.yield
-  }
+  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(static) {
+  // CHECK-NEXT: omp.loop_nest
+  "omp.wsloop" (%data_var, %linear_var) ({
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
+  }) {operandSegmentSizes = array<i32: 1,1,0,0>, schedule_val = #omp<schedulekind static>} :
+    (memref<i32>, i32) -> ()
 
-  // CHECK: omp.wsloop for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
-    omp.yield
-  }
+  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>, %{{.*}} = %{{.*}} : memref<i32>) schedule(static) {
+  // CHECK-NEXT: omp.loop_nest
+  "omp.wsloop" (%data_var, %data_var, %linear_var, %linear_var) ({
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
+  }) {operandSegmentSizes = array<i32: 2,2,0,0>, schedule_val = #omp<schedulekind static>} :
+    (memref<i32>, memref<i32>, i32, i32) -> ()
 
-  // CHECK: omp.wsloop for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}})
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) inclusive step (%step) {
-    omp.yield
-  }
+  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}}) ordered(2) {
+  // CHECK-NEXT: omp.loop_nest
+  "omp.wsloop" (%data_var, %linear_var, %chunk_var) ({
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
+  }) {operandSegmentSizes = array<i32: 1,1,0,1>, schedule_val = #omp<schedulekind dynamic>, ordered_val = 2} :
+    (memref<i32>, i32, i32) -> ()
 
-  // CHECK: omp.wsloop nowait
-  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop nowait
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    omp.yield
-  }
+  // CHECK: omp.wsloop schedule(auto) nowait {
+  // CHECK-NEXT: omp.loop_nest
+  "omp.wsloop" () ({
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
+  }) {operandSegmentSizes = array<i32: 0,0,0,0>, nowait, schedule_val = #omp<schedulekind auto>} :
+    () -> ()
 
-  // CHECK: omp.wsloop nowait order(concurrent)
-  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop order(concurrent) nowait
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    omp.yield
-  }
+  // CHECK: omp.wsloop {
+  // CHECK-NEXT: omp.simd
+  // CHECK-NEXT: omp.loop_nest
+  "omp.wsloop" () ({
+    omp.simd {
+      omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+        omp.yield
+      }
+      omp.terminator
+    }
+    omp.terminator
+  }) : () -> ()
 
   return
 }
 
-// CHECK-LABEL: omp_wsloop_pretty_multi_block
-func.func @omp_wsloop_pretty_multi_block(%lb : index, %ub : index, %step : index, %data1 : memref<?xi32>, %data2 : memref<?xi32>) -> () {
+// CHECK-LABEL: omp_wsloop_pretty
+func.func @omp_wsloop_pretty(%lb : index, %ub : index, %step : index, %data_var : memref<i32>, %linear_var : i32, %chunk_var : i32, %chunk_var2 : i16) -> () {
 
-  // CHECK: omp.wsloop for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
-    %1 = "test.payload"(%iv) : (index) -> (i32)
-    cf.br ^bb1(%1: i32)
-  ^bb1(%arg: i32):
-    memref.store %arg, %data1[%iv] : memref<?xi32>
-    omp.yield
+  // CHECK: omp.wsloop ordered(2) {
+  // CHECK-NEXT: omp.loop_nest
+  omp.wsloop ordered(2) {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
   }
 
-  // CHECK: omp.wsloop for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
-    %c = "test.condition"(%iv) : (index) -> (i1)
-    %v1 = "test.payload"(%iv) : (index) -> (i32)
-    cf.cond_br %c, ^bb1(%v1: i32), ^bb2(%v1: i32)
-  ^bb1(%arg0: i32):
-    memref.store %arg0, %data1[%iv] : memref<?xi32>
-    cf.br ^bb3
-  ^bb2(%arg1: i32):
-    memref.store %arg1, %data2[%iv] : memref<?xi32>
-    cf.br ^bb3
-  ^bb3:
-    omp.yield
+  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(static) {
+  // CHECK-NEXT: omp.loop_nest
+  omp.wsloop schedule(static) linear(%data_var = %linear_var : memref<i32>) {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
   }
 
-  // CHECK: omp.wsloop for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
-    %c = "test.condition"(%iv) : (index) -> (i1)
-    %v1 = "test.payload"(%iv) : (index) -> (i32)
-    cf.cond_br %c, ^bb1(%v1: i32), ^bb2(%v1: i32)
-  ^bb1(%arg0: i32):
-    memref.store %arg0, %data1[%iv] : memref<?xi32>
-    omp.yield
-  ^bb2(%arg1: i32):
-    memref.store %arg1, %data2[%iv] : memref<?xi32>
-    omp.yield
+  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(static = %{{.*}} : i32) ordered(2) {
+  // CHECK-NEXT: omp.loop_nest
+  omp.wsloop ordered(2) linear(%data_var = %linear_var : memref<i32>) schedule(static = %chunk_var : i32) {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
   }
 
-  return
-}
-
-// CHECK-LABEL: omp_wsloop_pretty_non_index
-func.func @omp_wsloop_pretty_non_index(%lb1 : i32, %ub1 : i32, %step1 : i32, %lb2 : i64, %ub2 : i64, %step2 : i64,
-                           %data1 : memref<?xi32>, %data2 : memref<?xi64>) -> () {
+  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}} : i32, nonmonotonic) ordered(2) {
+  // CHECK-NEXT: omp.loop_nest
+  omp.wsloop ordered(2) linear(%data_var = %linear_var : memref<i32>) schedule(dynamic = %chunk_var : i32, nonmonotonic) {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step)  {
+      omp.yield
+    }
+    omp.terminator
+  }
 
-  // CHECK: omp.wsloop for (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop for (%iv1) : i32 = (%lb1) to (%ub1) step (%step1) {
-    %1 = "test.payload"(%iv1) : (i32) -> (index)
-    cf.br ^bb1(%1: index)
-  ^bb1(%arg1: index):
-    memref.store %iv1, %data1[%arg1] : memref<?xi32>
-    omp.yield
+  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}} : i16, monotonic) ordered(2) {
+  // CHECK-NEXT: omp.loop_nest
+  omp.wsloop ordered(2) linear(%data_var = %linear_var : memref<i32>) schedule(dynamic = %chunk_var2 : i16, monotonic) {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
   }
 
-  // CHECK: omp.wsloop for (%{{.*}}) : i64 = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop for (%iv2) : i64 = (%lb2) to (%ub2) step (%step2) {
-    %2 = "test.payload"(%iv2) : (i64) -> (index)
-    cf.br ^bb1(%2: index)
-  ^bb1(%arg2: index):
-    memref.store %iv2, %data2[%arg2] : memref<?xi64>
-    omp.yield
+  // CHECK: omp.wsloop {
+  // CHECK-NEXT: omp.loop_nest
+  omp.wsloop {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
   }
 
-  return
-}
+  // CHECK: omp.wsloop nowait {
+  // CHECK-NEXT: omp.loop_nest
+  omp.wsloop nowait {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
+  }
 
-// CHECK-LABEL: omp_wsloop_pretty_multiple
-func.func @omp_wsloop_pretty_multiple(%lb1 : i32, %ub1 : i32, %step1 : i32, %lb2 : i32, %ub2 : i32, %step2 : i32, %data1 : memref<?xi32>) -> () {
+  // CHECK: omp.wsloop nowait order(concurrent) {
+  // CHECK-NEXT: omp.loop_nest
+  omp.wsloop order(concurrent) nowait {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      omp.yield
+    }
+    omp.terminator
+  }
 
-  // CHECK: omp.wsloop for (%{{.*}}, %{{.*}}) : i32 = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}})
-  omp.wsloop for (%iv1, %iv2) : i32 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) {
-    %1 = "test.payload"(%iv1) : (i32) -> (index)
-    %2 = "test.payload"(%iv2) : (i32) -> (index)
-    memref.store %iv1, %data1[%1] : memref<?xi32>
-    memref.store %iv2, %data1[%2] : memref<?xi32>
-    omp.yield
+  // CHECK: omp.wsloop {
+  // CHECK-NEXT: omp.simd
+  // CHECK-NEXT: omp.loop_nest
+  omp.wsloop {
+    omp.simd {
+      omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+        omp.yield
+      }
+      omp.terminator
+    }
+    omp.terminator
   }
 
   return
@@ -659,7 +720,7 @@ func.func @omp_distribute(%chunk_size : i32, %data_var : memref<i32>, %arg0 : i3
   // CHECK: omp.distribute
   omp.distribute {
     omp.simd {
-      omp.loop_nest (%iv2) : i32 = (%arg0) to (%arg0) step (%arg0) {
+      omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) {
         omp.yield
       }
     }
@@ -791,17 +852,19 @@ func.func @wsloop_reduction(%lb : index, %ub : index, %step : index) {
   %c1 = arith.constant 1 : i32
   %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
   // CHECK: reduction(@add_f32 %{{.+}} -> %[[PRV:.+]] : !llvm.ptr)
-  omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr)
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    // CHECK: %[[CST:.+]] = arith.constant 2.0{{.*}} : f32
-    %cst = arith.constant 2.0 : f32
-    // CHECK: %[[LPRV:.+]] = llvm.load %[[PRV]] : !llvm.ptr -> f32
-    %lprv = llvm.load %prv : !llvm.ptr -> f32
-    // CHECK: %[[RES:.+]] = llvm.fadd %[[LPRV]], %[[CST]] : f32
-    %res = llvm.fadd %lprv, %cst: f32
-    // CHECK: llvm.store %[[RES]], %[[PRV]] :  f32, !llvm.ptr
-    llvm.store %res, %prv :  f32, !llvm.ptr
-    omp.yield
+  omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr) {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      // CHECK: %[[CST:.+]] = arith.constant 2.0{{.*}} : f32
+      %cst = arith.constant 2.0 : f32
+      // CHECK: %[[LPRV:.+]] = llvm.load %[[PRV]] : !llvm.ptr -> f32
+      %lprv = llvm.load %prv : !llvm.ptr -> f32
+      // CHECK: %[[RES:.+]] = llvm.fadd %[[LPRV]], %[[CST]] : f32
+      %res = llvm.fadd %lprv, %cst: f32
+      // CHECK: llvm.store %[[RES]], %[[PRV]] :  f32, !llvm.ptr
+      llvm.store %res, %prv :  f32, !llvm.ptr
+      omp.yield
+    }
+    omp.terminator
   }
   return
 }
@@ -828,14 +891,19 @@ func.func @parallel_wsloop_reduction(%lb : index, %ub : index, %step : index) {
   %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
   // CHECK: omp.parallel reduction(@add_f32 %{{.*}} -> %{{.+}} : !llvm.ptr) {
   omp.parallel reduction(@add_f32 %0 -> %prv : !llvm.ptr) {
-    // CHECK: omp.wsloop for (%{{.+}}) : index = (%{{.+}}) to (%{{.+}}) step (%{{.+}})
-    omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
-      %1 = arith.constant 2.0 : f32
-      %2 = llvm.load %prv : !llvm.ptr -> f32
-      // CHECK: llvm.fadd %{{.+}}, %{{.+}} : f32
-      llvm.fadd %1, %2 : f32
-      // CHECK: omp.yield
-      omp.yield
+    // CHECK: omp.wsloop {
+    omp.wsloop {
+      // CHECK: omp.loop_nest (%{{.+}}) : index = (%{{.+}}) to (%{{.+}}) step (%{{.+}}) {
+      omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+        %1 = arith.constant 2.0 : f32
+        %2 = llvm.load %prv : !llvm.ptr -> f32
+        // CHECK: llvm.fadd %{{.+}}, %{{.+}} : f32
+        llvm.fadd %1, %2 : f32
+        // CHECK: omp.yield
+        omp.yield
+      }
+      // CHECK: omp.terminator
+      omp.terminator
     }
     // CHECK: omp.terminator
     omp.terminator
@@ -959,16 +1027,18 @@ combiner {
 // CHECK-LABEL: func @wsloop_reduction2
 func.func @wsloop_reduction2(%lb : index, %ub : index, %step : index) {
   %0 = memref.alloca() : memref<1xf32>
-  // CHECK: omp.wsloop reduction(@add2_f32 %{{.+}} -> %{{.+}} : memref<1xf32>)
-  omp.wsloop reduction(@add2_f32 %0 -> %prv : memref<1xf32>)
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    %1 = arith.constant 2.0 : f32
-    %2 = arith.constant 0 : index
-    %3 = memref.load %prv[%2] : memref<1xf32>
-    // CHECK: llvm.fadd
-    %4 = llvm.fadd %1, %3 : f32
-    memref.store %4, %prv[%2] : memref<1xf32>
-    omp.yield
+  // CHECK: omp.wsloop reduction(@add2_f32 %{{.+}} -> %{{.+}} : memref<1xf32>) {
+  omp.wsloop reduction(@add2_f32 %0 -> %prv : memref<1xf32>) {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      %1 = arith.constant 2.0 : f32
+      %2 = arith.constant 0 : index
+      %3 = memref.load %prv[%2] : memref<1xf32>
+      // CHECK: llvm.fadd
+      %4 = llvm.fadd %1, %3 : f32
+      memref.store %4, %prv[%2] : memref<1xf32>
+      omp.yield
+    }
+    omp.terminator
   }
   return
 }
@@ -995,14 +1065,19 @@ func.func @parallel_wsloop_reduction2(%lb : index, %ub : index, %step : index) {
   %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
   // CHECK: omp.parallel reduction(@add2_f32 %{{.*}} -> %{{.+}} : !llvm.ptr) {
   omp.parallel reduction(@add2_f32 %0 -> %prv : !llvm.ptr) {
-    // CHECK: omp.wsloop for (%{{.+}}) : index = (%{{.+}}) to (%{{.+}}) step (%{{.+}})
-    omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
-      %1 = arith.constant 2.0 : f32
-      %2 = llvm.load %prv : !llvm.ptr -> f32
-      // CHECK: llvm.fadd %{{.+}}, %{{.+}} : f32
-      %3 = llvm.fadd %1, %2 : f32
-      // CHECK: omp.yield
-      omp.yield
+    // CHECK: omp.wsloop {
+    omp.wsloop {
+      // CHECK: omp.loop_nest (%{{.+}}) : index = (%{{.+}}) to (%{{.+}}) step (%{{.+}}) {
+      omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+        %1 = arith.constant 2.0 : f32
+        %2 = llvm.load %prv : !llvm.ptr -> f32
+        // CHECK: llvm.fadd %{{.+}}, %{{.+}} : f32
+        %3 = llvm.fadd %1, %2 : f32
+        // CHECK: omp.yield
+        omp.yield
+      }
+      // CHECK: omp.terminator
+      omp.terminator
     }
     // CHECK: omp.terminator
     omp.terminator
@@ -1076,36 +1151,44 @@ func.func @omp_ordered(%arg1 : i32, %arg2 : i32, %arg3 : i32,
     omp.terminator
   }
 
-  omp.wsloop ordered(0)
-  for (%0) : i32 = (%arg1) to (%arg2) step (%arg3)  {
-    omp.ordered.region {
-      omp.terminator
+  omp.wsloop ordered(0) {
+    omp.loop_nest (%0) : i32 = (%arg1) to (%arg2) step (%arg3)  {
+      // CHECK: omp.ordered.region
+      omp.ordered.region {
+        // CHECK: omp.terminator
+        omp.terminator
+      }
+      omp.yield
     }
-    omp.yield
+    omp.terminator
   }
 
-  omp.wsloop ordered(1)
-  for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
-    // Only one DEPEND(SINK: vec) clause
-    // CHECK: omp.ordered depend_type(dependsink) depend_vec(%{{.*}} : i64) {num_loops_val = 1 : i64}
-    omp.ordered depend_type(dependsink) depend_vec(%vec0 : i64) {num_loops_val = 1 : i64}
+  omp.wsloop ordered(1) {
+    omp.loop_nest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
+      // Only one DEPEND(SINK: vec) clause
+      // CHECK: omp.ordered depend_type(dependsink) depend_vec(%{{.*}} : i64) {num_loops_val = 1 : i64}
+      omp.ordered depend_type(dependsink) depend_vec(%vec0 : i64) {num_loops_val = 1 : i64}
 
-    // CHECK: omp.ordered depend_type(dependsource) depend_vec(%{{.*}} : i64) {num_loops_val = 1 : i64}
-    omp.ordered depend_type(dependsource) depend_vec(%vec0 : i64) {num_loops_val = 1 : i64}
+      // CHECK: omp.ordered depend_type(dependsource) depend_vec(%{{.*}} : i64) {num_loops_val = 1 : i64}
+      omp.ordered depend_type(dependsource) depend_vec(%vec0 : i64) {num_loops_val = 1 : i64}
 
-    omp.yield
+      omp.yield
+    }
+    omp.terminator
   }
 
-  omp.wsloop ordered(2)
-  for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
-    // Multiple DEPEND(SINK: vec) clauses
-    // CHECK: omp.ordered depend_type(dependsink) depend_vec(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : i64, i64, i64, i64) {num_loops_val = 2 : i64}
-    omp.ordered depend_type(dependsink) depend_vec(%vec0, %vec1, %vec2, %vec3 : i64, i64, i64, i64) {num_loops_val = 2 : i64}
+  omp.wsloop ordered(2) {
+    omp.loop_nest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
+      // Multiple DEPEND(SINK: vec) clauses
+      // CHECK: omp.ordered depend_type(dependsink) depend_vec(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : i64, i64, i64, i64) {num_loops_val = 2 : i64}
+      omp.ordered depend_type(dependsink) depend_vec(%vec0, %vec1, %vec2, %vec3 : i64, i64, i64, i64) {num_loops_val = 2 : i64}
 
-    // CHECK: omp.ordered depend_type(dependsource) depend_vec(%{{.*}}, %{{.*}} : i64, i64) {num_loops_val = 2 : i64}
-    omp.ordered depend_type(dependsource) depend_vec(%vec0, %vec1 : i64, i64) {num_loops_val = 2 : i64}
+      // CHECK: omp.ordered depend_type(dependsource) depend_vec(%{{.*}}, %{{.*}} : i64, i64) {num_loops_val = 2 : i64}
+      omp.ordered depend_type(dependsource) depend_vec(%vec0, %vec1 : i64, i64) {num_loops_val = 2 : i64}
 
-    omp.yield
+      omp.yield
+    }
+    omp.terminator
   }
 
   return
@@ -1956,11 +2039,13 @@ func.func @omp_cancel_parallel(%if_cond : i1) -> () {
 }
 
 func.func @omp_cancel_wsloop(%lb : index, %ub : index, %step : index) {
-  omp.wsloop
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    // CHECK: omp.cancel cancellation_construct_type(loop)
-    omp.cancel cancellation_construct_type(loop)
-    // CHECK: omp.terminator
+  omp.wsloop {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      // CHECK: omp.cancel cancellation_construct_type(loop)
+      omp.cancel cancellation_construct_type(loop)
+      // CHECK: omp.yield
+      omp.yield
+    }
     omp.terminator
   }
   return
@@ -1991,13 +2076,15 @@ func.func @omp_cancellationpoint_parallel() -> () {
 }
 
 func.func @omp_cancellationpoint_wsloop(%lb : index, %ub : index, %step : index) {
-  omp.wsloop
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
-    // CHECK: omp.cancellation_point cancellation_construct_type(loop)
-    omp.cancellation_point cancellation_construct_type(loop)
-    // CHECK: omp.cancel cancellation_construct_type(loop)
-    omp.cancel cancellation_construct_type(loop)
-    // CHECK: omp.terminator
+  omp.wsloop {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      // CHECK: omp.cancellation_point cancellation_construct_type(loop)
+      omp.cancellation_point cancellation_construct_type(loop)
+      // CHECK: omp.cancel cancellation_construct_type(loop)
+      omp.cancel cancellation_construct_type(loop)
+      // CHECK: omp.yield
+      omp.yield
+    }
     omp.terminator
   }
   return