diff --git a/cli/asc.js b/cli/asc.js
index a94d42a69c..7c2baf3ae1 100644
--- a/cli/asc.js
+++ b/cli/asc.js
@@ -635,26 +635,22 @@ exports.main = function main(argv, options, callback) {
   if (args.trapMode === "clamp") {
     stats.optimizeCount++;
     stats.optimizeTime += measure(() => {
-      module.runPasses([ "trap-mode-clamp" ]);
+      module.runPass("trap-mode-clamp");
     });
   } else if (args.trapMode === "js") {
     stats.optimizeCount++;
     stats.optimizeTime += measure(() => {
-      module.runPasses([ "trap-mode-js" ]);
+      module.runPass("trap-mode-js");
     });
   } else if (args.trapMode !== "allow") {
     module.dispose();
     return callback(Error("Unsupported trap mode"));
   }
 
-  // Implicitly run costly non-LLVM optimizations on -O3 or -Oz
-  // see: https://github.com/WebAssembly/binaryen/pull/1596
-  if (optimizeLevel >= 3 || shrinkLevel >= 2) optimizeLevel = 4;
-
-  module.setOptimizeLevel(optimizeLevel);
-  module.setShrinkLevel(shrinkLevel);
-  module.setDebugInfo(args.debug);
-
+  // Optimize the module
+  const debugInfo = args.debug;
+  const usesARC = args.runtime == "half" || args.runtime == "full";
+  const converge = args.converge;
   const runPasses = [];
   if (args.runPasses) {
     if (typeof args.runPasses === "string") {
@@ -668,213 +664,16 @@ exports.main = function main(argv, options, callback) {
     }
   }
 
-  function doOptimize() {
-    const hasARC = args.runtime == "half" || args.runtime == "full";
-    const passes = [];
-    function add(pass) { passes.push(pass); }
-
-    if (optimizeLevel >= 2 && shrinkLevel === 0) {
-      // tweak inlining options when speed more preferable than size
-      module.setAlwaysInlineMaxSize(12);
-      module.setFlexibleInlineMaxSize(70);
-      module.setOneCallerInlineMaxSize(200);
-    } else {
-      // tweak inlining options when size matters
-      optimizeLevel === 0 && shrinkLevel >= 0
-        ? module.setAlwaysInlineMaxSize(2)
-        : module.setAlwaysInlineMaxSize(4);  // default:  2
-      module.setFlexibleInlineMaxSize(65);   // default: 20
-      module.setOneCallerInlineMaxSize(80);  // default: 15
-    }
-
-    // Optimize the module if requested
-    if (optimizeLevel > 0 || shrinkLevel > 0) {
-      // Binaryen's default passes with Post-AssemblyScript passes added.
-      // see: Binaryen/src/pass.cpp
-
-      // PassRunner::addDefaultGlobalOptimizationPrePasses
-      add("duplicate-function-elimination");
-      add("remove-unused-module-elements"); // differs
-
-      // PassRunner::addDefaultFunctionOptimizationPasses
-      if (optimizeLevel >= 3 || shrinkLevel >= 1) {
-        add("ssa-nomerge");
-      }
-      if (optimizeLevel >= 3) {
-        add("flatten"); // differs
-        add("simplify-locals-notee-nostructure"); // differs
-        add("vacuum"); // differs
-        add("code-folding"); // differs
-        add("flatten");
-        add("local-cse");
-        add("reorder-locals"); // differs
-      }
-      if (optimizeLevel >= 2 || shrinkLevel >= 1) { // differs
-        add("rse");
-        add("vacuum");
-      }
-      if (hasARC) { // differs
-        if (optimizeLevel < 3) {
-          add("flatten");
-        }
-        add("post-assemblyscript");
-      }
-      add("optimize-instructions"); // differs
-      add("inlining"); // differs
-      add("dce");
-      add("remove-unused-brs");
-      add("remove-unused-names");
-      add("inlining-optimizing"); // differs
-      if (optimizeLevel >= 2 || shrinkLevel >= 1) {
-        add("pick-load-signs");
-        add("simplify-globals-optimizing"); // differs
-      }
-      if (optimizeLevel >= 3 || shrinkLevel >= 2) {
-        add("precompute-propagate");
-      } else {
-        add("precompute");
-      }
-      add("vacuum"); // differs
-      // this will be done later (1)
-      // if (optimizeLevel >= 2 || shrinkLevel >= 2) {
-      //   add("code-pushing");
-      // }
-      if (optimizeLevel >= 3 && shrinkLevel <= 1) { // differs
-        add("licm");
-      }
-      add("simplify-locals-nostructure");
-      add("vacuum");
-      add("reorder-locals");
-      add("remove-unused-brs");
-      // if (optimizeLevel >= 3 || shrinkLevel >= 2) { // do it later
-      //   add("merge-locals");
-      // }
-      add("coalesce-locals");
-      add("simplify-locals");
-      add("vacuum");
-      add("reorder-locals");
-      add("coalesce-locals");
-      add("reorder-locals");
-      if (optimizeLevel >= 3 || shrinkLevel >= 1) { // differs
-        add("merge-locals");
-      }
-      add("vacuum");
-      if (optimizeLevel >= 3 || shrinkLevel >= 1) {
-        add("code-folding");
-      }
-      if (optimizeLevel >= 2 || shrinkLevel >= 1) { // differs
-        add("simplify-globals-optimizing");
-      }
-      add("merge-blocks");
-      add("remove-unused-brs");
-      add("remove-unused-names");
-      add("merge-blocks");
-      // make this later & move to (2)
-      // if (optimizeLevel >= 3 || shrinkLevel >= 2) {
-      //   add("precompute-propagate");
-      // } else {
-      //   add("precompute");
-      // }
-      if (optimizeLevel >= 3) {
-        add("optimize-instructions");
-      }
-      if (optimizeLevel >= 2 || shrinkLevel >= 1) {
-        add("rse");
-      }
-      add("vacuum");
-      // PassRunner::addDefaultGlobalOptimizationPostPasses
-      if (optimizeLevel >= 2 || shrinkLevel >= 1) {
-        add("simplify-globals-optimizing"); // differs
-        add("dae-optimizing");
-      }
-      if (optimizeLevel >= 2 || shrinkLevel >= 2) {
-        add("inlining-optimizing");
-      }
-      if (module.getLowMemoryUnused()) {
-        if (optimizeLevel >= 3 || shrinkLevel >= 1) {
-          add("optimize-added-constants-propagate");
-        } else {
-          add("optimize-added-constants");
-        }
-      }
-      // "duplicate-function-elimination" will better done later
-      // add("duplicate-function-elimination");
-      add("duplicate-import-elimination");
-      if (optimizeLevel >= 2 || shrinkLevel >= 2) {
-        add("simplify-globals-optimizing");
-      } else {
-        add("simplify-globals");
-        add("vacuum"); // differs
-      }
-      // moved from (2)
-      // it works better after globals optimizations like simplify-globals, inlining-optimizing and etc
-      if (optimizeLevel >= 2 || shrinkLevel >= 1) { // differs
-        add("precompute-propagate");
-      } else {
-        add("precompute");
-      }
-      // replace indirect calls with direct, reduce arity and
-      // inline this calls if possible
-      add("directize"); // differs
-      add("dae-optimizing"); // differs
-      add("inlining-optimizing"); // differs
-      // ARC finalization should be done exactly after inlining for better release/retain reduction
-      if (hasARC) { // differs
-        add("post-assemblyscript-finalize");
-      }
-      if (optimizeLevel >= 2 || shrinkLevel >= 1) { // differs
-        add("rse");
-        // move some code after early return which potentially could reduce computations
-        // do this after CFG cleanup (originally it was done before)
-        // moved from (1)
-        add("code-pushing");
-        if (optimizeLevel >= 3) {
-          // this quite expensive so do this only for highest opt level
-          add("simplify-globals");
-          add("vacuum");
-          // replace indirect calls with direct and inline if possible again.
-          add("inlining-optimizing");
-          add("directize");
-          add("dae-optimizing");
-          add("precompute-propagate");
-          add("vacuum");
-          add("merge-locals");
-          add("coalesce-locals");
-          add("simplify-locals-nostructure");
-          add("vacuum");
-          add("inlining-optimizing");
-          add("precompute-propagate");
-        }
-        add("remove-unused-brs");
-        add("remove-unused-names");
-        add("vacuum");
-        add("optimize-instructions");
-        add("simplify-globals-optimizing");
-      }
-      // remove unused elements of table and pack / reduce memory
-      add("duplicate-function-elimination"); // differs
-      add("remove-unused-nonfunction-module-elements"); // differs
-      add("memory-packing");
-      add("remove-unused-module-elements"); // differs
-      // It seems stack-ir unuseful for our needs.
-      // if (optimizeLevel >= 3 || shrinkLevel >= 1) { // differs. was optimizeLevel >= 2
-      //   add("generate-stack-ir");
-      //   add("optimize-stack-ir");
-      // }
-    }
-
-    // Append additional passes if requested and execute
-    module.runPasses(passes.concat(runPasses));
-  }
-
   stats.optimizeTime += measure(() => {
     stats.optimizeCount++;
-    doOptimize();
-    if (args.converge) {
+    module.optimize(optimizeLevel, shrinkLevel, debugInfo, usesARC);
+    module.runPasses(runPasses);
+    if (converge) {
       let last = module.toBinary();
       do {
         stats.optimizeCount++;
-        doOptimize();
+        module.optimize(optimizeLevel, shrinkLevel, debugInfo, usesARC);
+        module.runPasses(runPasses);
         let next = module.toBinary();
         if (next.output.length >= last.output.length) {
           if (next.output.length > last.output.length) {
diff --git a/src/module.ts b/src/module.ts
index 018c941d62..f0337460c1 100644
--- a/src/module.ts
+++ b/src/module.ts
@@ -1394,28 +1394,211 @@ export class Module {
     binaryen._BinaryenModuleSetFeatures(this.ref, featureFlags);
   }
 
-  optimize(func: FunctionRef = 0): void {
+  runPass(pass: string, func: FunctionRef = 0): void {
+    var cStr = allocString(pass);
     if (func) {
-      binaryen._BinaryenFunctionOptimize(func, this.ref);
+      binaryen._BinaryenFunctionRunPasses(func, this.ref, cStr, 1);
     } else {
-      binaryen._BinaryenModuleOptimize(this.ref);
+      binaryen._BinaryenModuleRunPasses(this.ref, cStr, 1);
     }
+    binaryen._free(cStr);
   }
 
   runPasses(passes: string[], func: FunctionRef = 0): void {
     var numNames = passes.length;
-    var names = new Array<usize>(numNames);
+    var cStrs = new Array<usize>(numNames);
     for (let i = 0; i < numNames; ++i) {
-      names[i] = allocString(passes[i]);
+      cStrs[i] = allocString(passes[i]);
     }
-    var cArr = allocPtrArray(names);
+    var cArr = allocPtrArray(cStrs);
     if (func) {
       binaryen._BinaryenFunctionRunPasses(func, this.ref, cArr, numNames);
     } else {
       binaryen._BinaryenModuleRunPasses(this.ref, cArr, numNames);
     }
     binaryen._free(cArr);
-    for (let i = numNames; i >= 0; --i) binaryen._free(names[i]);
+    for (let i = numNames; i >= 0; --i) binaryen._free(cStrs[i]);
+  }
+
+  optimize(optimizeLevel: i32, shrinkLevel: i32, debugInfo: bool = false, usesARC: bool = true): void {
+    // Implicitly run costly non-LLVM optimizations on -O3 or -Oz
+    if (optimizeLevel >= 3 || shrinkLevel >= 2) optimizeLevel = 4;
+
+    binaryen._BinaryenSetOptimizeLevel(optimizeLevel);
+    binaryen._BinaryenSetShrinkLevel(shrinkLevel);
+    binaryen._BinaryenSetDebugInfo(debugInfo);
+
+    // Tweak inlining limits based on optimization levels
+    if (optimizeLevel >= 2 && shrinkLevel === 0) {
+      binaryen._BinaryenSetAlwaysInlineMaxSize(12);
+      binaryen._BinaryenSetFlexibleInlineMaxSize(70);
+      binaryen._BinaryenSetOneCallerInlineMaxSize(200);
+    } else {
+      binaryen._BinaryenSetAlwaysInlineMaxSize(
+        optimizeLevel == 0 && shrinkLevel >= 0
+          ? 2
+          : 4
+      );
+      binaryen._BinaryenSetFlexibleInlineMaxSize(65);
+      binaryen._BinaryenSetOneCallerInlineMaxSize(80);
+    }
+
+    // Pass order here differs substantially from Binaryen's defaults
+    // see: Binaryen/src/pass.cpp
+    if (optimizeLevel > 0 || shrinkLevel > 0) {
+      let passes = new Array<string>();
+
+      // --- PassRunner::addDefaultGlobalOptimizationPrePasses ---
+
+      passes.push("duplicate-function-elimination");
+      passes.push("remove-unused-module-elements"); // +
+
+      // --- PassRunner::addDefaultFunctionOptimizationPasses ---
+
+      if (optimizeLevel >= 3 || shrinkLevel >= 1) {
+        passes.push("ssa-nomerge");
+      }
+      if (optimizeLevel >= 3) {
+        passes.push("flatten");
+        passes.push("simplify-locals-notee-nostructure");
+        passes.push("vacuum");
+        passes.push("code-folding");
+        passes.push("flatten");
+        passes.push("local-cse");
+        passes.push("reorder-locals");
+      }
+      if (optimizeLevel >= 2 || shrinkLevel >= 1) {
+        passes.push("rse");
+        passes.push("vacuum");
+      }
+      if (usesARC) {
+        if (optimizeLevel < 3) {
+          passes.push("flatten");
+        }
+        passes.push("post-assemblyscript");
+      }
+      passes.push("optimize-instructions");
+      passes.push("inlining");
+      passes.push("dce");
+      passes.push("remove-unused-brs");
+      passes.push("remove-unused-names");
+      passes.push("inlining-optimizing");
+      if (optimizeLevel >= 2 || shrinkLevel >= 1) {
+        passes.push("pick-load-signs");
+        passes.push("simplify-globals-optimizing");
+      }
+      if (optimizeLevel >= 3 || shrinkLevel >= 2) {
+        passes.push("precompute-propagate");
+      } else {
+        passes.push("precompute");
+      }
+      passes.push("vacuum");
+      if (optimizeLevel >= 3 && shrinkLevel <= 1) {
+        passes.push("licm");
+      }
+      passes.push("simplify-locals-nostructure");
+      passes.push("vacuum");
+      passes.push("reorder-locals");
+      passes.push("remove-unused-brs");
+      passes.push("coalesce-locals");
+      passes.push("simplify-locals");
+      passes.push("vacuum");
+      passes.push("reorder-locals");
+      passes.push("coalesce-locals");
+      passes.push("reorder-locals");
+      if (optimizeLevel >= 3 || shrinkLevel >= 1) {
+        passes.push("merge-locals");
+      }
+      passes.push("vacuum");
+      if (optimizeLevel >= 3 || shrinkLevel >= 1) {
+        passes.push("code-folding");
+      }
+      if (optimizeLevel >= 2 || shrinkLevel >= 1) {
+        passes.push("simplify-globals-optimizing");
+      }
+      passes.push("merge-blocks");
+      passes.push("remove-unused-brs");
+      passes.push("remove-unused-names");
+      passes.push("merge-blocks");
+      if (optimizeLevel >= 3) {
+        passes.push("optimize-instructions");
+      }
+      if (optimizeLevel >= 2 || shrinkLevel >= 1) {
+        passes.push("rse");
+      }
+      passes.push("vacuum");
+
+      // --- PassRunner::addDefaultGlobalOptimizationPostPasses ---
+
+      if (optimizeLevel >= 2 || shrinkLevel >= 1) {
+        passes.push("simplify-globals-optimizing");
+        passes.push("dae-optimizing");
+      }
+      if (optimizeLevel >= 2 || shrinkLevel >= 2) {
+        passes.push("inlining-optimizing");
+      }
+      if (binaryen._BinaryenGetLowMemoryUnused()) {
+        if (optimizeLevel >= 3 || shrinkLevel >= 1) {
+          passes.push("optimize-added-constants-propagate");
+        } else {
+          passes.push("optimize-added-constants");
+        }
+      }
+      passes.push("duplicate-import-elimination");
+      if (optimizeLevel >= 2 || shrinkLevel >= 2) {
+        passes.push("simplify-globals-optimizing");
+      } else {
+        passes.push("simplify-globals");
+        passes.push("vacuum");
+      }
+      // precompute works best after global optimizations
+      if (optimizeLevel >= 2 || shrinkLevel >= 1) {
+        passes.push("precompute-propagate");
+      } else {
+        passes.push("precompute");
+      }
+      passes.push("directize"); // replace indirect with direct calls
+      passes.push("dae-optimizing"); // reduce arity
+      passes.push("inlining-optimizing"); // and inline if possible
+      if (usesARC) {
+        // works best after inlining to cover most retains/releases
+        passes.push("post-assemblyscript-finalize");
+      }
+      if (optimizeLevel >= 2 || shrinkLevel >= 1) {
+        passes.push("rse");
+        // move code on early return (after CFG cleanup)
+        passes.push("code-pushing");
+        if (optimizeLevel >= 3) {
+          // very expensive, so O3 only
+          passes.push("simplify-globals");
+          passes.push("vacuum");
+          // replace indirect with direct calls again and inline
+          passes.push("inlining-optimizing");
+          passes.push("directize");
+          passes.push("dae-optimizing");
+          passes.push("precompute-propagate");
+          passes.push("vacuum");
+          passes.push("merge-locals");
+          passes.push("coalesce-locals");
+          passes.push("simplify-locals-nostructure");
+          passes.push("vacuum");
+          passes.push("inlining-optimizing");
+          passes.push("precompute-propagate");
+        }
+        passes.push("remove-unused-brs");
+        passes.push("remove-unused-names");
+        passes.push("vacuum");
+        passes.push("optimize-instructions");
+        passes.push("simplify-globals-optimizing");
+      }
+      // clean up
+      passes.push("duplicate-function-elimination");
+      passes.push("remove-unused-nonfunction-module-elements");
+      passes.push("memory-packing");
+      passes.push("remove-unused-module-elements");
+
+      this.runPasses(passes);
+    }
   }
 
   private cachedPrecomputeNames: usize = 0;
diff --git a/tests/compiler/features/simd.optimized.wat b/tests/compiler/features/simd.optimized.wat
index 952933a6b7..317d754484 100644
--- a/tests/compiler/features/simd.optimized.wat
+++ b/tests/compiler/features/simd.optimized.wat
@@ -8,37 +8,29 @@
  (global $~lib/rt/stub/offset (mut i32) (i32.const 0))
  (export "memory" (memory $0))
  (start $~start)
- (func $features/simd/test_v128
+ (func $start:features/simd
   (local $0 i32)
   (local $1 i32)
-  (local $2 i32)
-  (local $3 i32)
-  global.get $~lib/rt/stub/offset
-  i32.const 16
-  i32.add
-  local.tee $1
-  i32.const -64
-  i32.sub
-  local.tee $2
+  i32.const 1120
+  global.set $~lib/rt/stub/offset
+  i32.const 1200
   memory.size
-  local.tee $3
+  local.tee $1
   i32.const 16
   i32.shl
   local.tee $0
   i32.gt_u
   if
-   local.get $3
-   local.get $2
+   local.get $1
+   i32.const 66735
    local.get $0
    i32.sub
-   i32.const 65535
-   i32.add
    i32.const -65536
    i32.and
    i32.const 16
    i32.shr_u
    local.tee $0
-   local.get $3
+   local.get $1
    local.get $0
    i32.gt_s
    select
@@ -55,48 +47,26 @@
     end
    end
   end
-  local.get $2
+  i32.const 1200
   global.set $~lib/rt/stub/offset
-  local.get $1
-  i32.const 16
-  i32.sub
-  local.tee $0
+  i32.const 1120
   i32.const 64
   i32.store
-  local.get $0
+  i32.const 1124
   i32.const 1
-  i32.store offset=4
-  local.get $0
+  i32.store
+  i32.const 1128
   i32.const 0
-  i32.store offset=8
-  local.get $0
+  i32.store
+  i32.const 1132
   i32.const 64
-  i32.store offset=12
-  local.get $1
-  local.get $1
-  v128.load offset=16
-  v128.store offset=32
-  local.get $1
-  i32.const 15
-  i32.and
-  i32.eqz
-  i32.const 0
-  local.get $1
-  select
-  i32.eqz
-  if
-   i32.const 0
-   i32.const 1040
-   i32.const 70
-   i32.const 3
-   call $~lib/builtins/abort
-   unreachable
-  end
-  local.get $1
-  i32.const 16
-  i32.sub
-  local.tee $0
-  i32.load offset=4
+  i32.store
+  i32.const 1168
+  i32.const 1152
+  v128.load
+  v128.store
+  i32.const 1124
+  i32.load
   i32.const 1
   i32.ne
   if
@@ -108,19 +78,17 @@
    unreachable
   end
   global.get $~lib/rt/stub/offset
-  local.get $1
-  local.get $0
+  i32.const 1120
   i32.load
+  i32.const 1136
   i32.add
   i32.eq
   if
-   local.get $0
+   i32.const 1120
    global.set $~lib/rt/stub/offset
   end
  )
  (func $~start
-  i32.const 1120
-  global.set $~lib/rt/stub/offset
-  call $features/simd/test_v128
+  call $start:features/simd
  )
 )
diff --git a/tests/compiler/std/object-literal-omitted.optimized.wat b/tests/compiler/std/object-literal-omitted.optimized.wat
index a70a39a38b..7399ef8711 100644
--- a/tests/compiler/std/object-literal-omitted.optimized.wat
+++ b/tests/compiler/std/object-literal-omitted.optimized.wat
@@ -632,10 +632,10 @@
       i32.const 16
       i32.lt_u
       if
-       local.get $2
        local.get $1
        i32.const 4
        i32.shl
+       local.get $2
        i32.add
        i32.const 2
        i32.shl
@@ -1418,26 +1418,26 @@
    i32.eq
    br_if $__inlined_func$~lib/string/String.__eq
    drop
+   block $folding-inner0
+    i32.const 0
+    i32.const 1
+    local.get $2
+    select
+    br_if $folding-inner0
+    local.get $2
+    call $~lib/string/String#get:length
+    local.tee $3
+    i32.const 1280
+    call $~lib/string/String#get:length
+    i32.ne
+    br_if $folding-inner0
+    local.get $2
+    local.get $3
+    call $~lib/util/string/compareImpl
+    i32.eqz
+    br $__inlined_func$~lib/string/String.__eq
+   end
    i32.const 0
-   i32.const 0
-   i32.const 1
-   local.get $2
-   select
-   br_if $__inlined_func$~lib/string/String.__eq
-   drop
-   i32.const 0
-   local.get $2
-   call $~lib/string/String#get:length
-   local.tee $3
-   i32.const 1280
-   call $~lib/string/String#get:length
-   i32.ne
-   br_if $__inlined_func$~lib/string/String.__eq
-   drop
-   local.get $2
-   local.get $3
-   call $~lib/util/string/compareImpl
-   i32.eqz
   end
   i32.eqz
   if
@@ -1501,16 +1501,13 @@
      block $switch$1$case$6
       block $switch$1$case$4
        local.get $0
-       i32.const 16
-       i32.add
-       local.tee $1
        i32.const 8
-       i32.sub
+       i32.add
        i32.load
        br_table $__inlined_func$~lib/rt/__visit_members $__inlined_func$~lib/rt/__visit_members $switch$1$case$4 $__inlined_func$~lib/rt/__visit_members $switch$1$case$6 $switch$1$default
       end
-      local.get $1
-      i32.load
+      local.get $0
+      i32.load offset=16
       local.tee $1
       if
        local.get $1
@@ -1518,8 +1515,8 @@
       end
       br $__inlined_func$~lib/rt/__visit_members
      end
-     local.get $1
-     i32.load offset=4
+     local.get $0
+     i32.load offset=20
      local.tee $1
      if
       local.get $1