From 0672dff83bdd7a49d734e8de94b8f01b6b374cb8 Mon Sep 17 00:00:00 2001 From: Yi Yang Date: Wed, 22 Mar 2023 21:00:40 +0800 Subject: [PATCH] cmd/compile: hoist some loop invariants Conservatively hoist some loop invariants outside the loop. Updates #15808 --- src/cmd/compile/internal/ssa/branchelim.go | 11 +- src/cmd/compile/internal/ssa/compile.go | 1 + src/cmd/compile/internal/ssa/graphkit.go | 46 ++ src/cmd/compile/internal/ssa/hoistloopiv.go | 132 +++++ .../compile/internal/ssa/hoistloopiv_test.go | 111 +++++ src/cmd/compile/internal/ssa/likelyadjust.go | 413 ---------------- src/cmd/compile/internal/ssa/loop.go | 458 ++++++++++++++++++ src/cmd/compile/internal/ssa/shortcircuit.go | 38 -- 8 files changed, 757 insertions(+), 453 deletions(-) create mode 100644 src/cmd/compile/internal/ssa/graphkit.go create mode 100644 src/cmd/compile/internal/ssa/hoistloopiv.go create mode 100644 src/cmd/compile/internal/ssa/hoistloopiv_test.go create mode 100644 src/cmd/compile/internal/ssa/loop.go diff --git a/src/cmd/compile/internal/ssa/branchelim.go b/src/cmd/compile/internal/ssa/branchelim.go index f16959dd572973..aeb85bc1a62f2d 100644 --- a/src/cmd/compile/internal/ssa/branchelim.go +++ b/src/cmd/compile/internal/ssa/branchelim.go @@ -424,6 +424,14 @@ func shouldElimIfElse(no, yes, post *Block, arch string) bool { } } +func hasSideEffect(v *Value) bool { + if v.Op == OpPhi || isDivMod(v.Op) || isPtrArithmetic(v.Op) || v.Type.IsMemory() || + v.MemoryArg() != nil || opcodeTable[v.Op].hasSideEffects { + return true + } + return false +} + // canSpeculativelyExecute reports whether every value in the block can // be evaluated without causing any observable side effects (memory // accesses, panics and so on) except for execution time changes. It @@ -436,8 +444,7 @@ func canSpeculativelyExecute(b *Block) bool { // don't fuse memory ops, Phi ops, divides (can panic), // or anything else with side-effects for _, v := range b.Values { - if v.Op == OpPhi || isDivMod(v.Op) || isPtrArithmetic(v.Op) || v.Type.IsMemory() || - v.MemoryArg() != nil || opcodeTable[v.Op].hasSideEffects { + if hasSideEffect(v) { return false } } diff --git a/src/cmd/compile/internal/ssa/compile.go b/src/cmd/compile/internal/ssa/compile.go index 54de1746b7d369..82d27bc96495a8 100644 --- a/src/cmd/compile/internal/ssa/compile.go +++ b/src/cmd/compile/internal/ssa/compile.go @@ -485,6 +485,7 @@ var passes = [...]pass{ {name: "writebarrier", fn: writebarrier, required: true}, // expand write barrier ops {name: "insert resched checks", fn: insertLoopReschedChecks, disabled: !buildcfg.Experiment.PreemptibleLoops}, // insert resched checks in loops. + {name: "hoist loop invariant", fn: hoistLoopInvariant}, {name: "lower", fn: lower, required: true}, {name: "addressing modes", fn: addressingModes, required: false}, {name: "late lower", fn: lateLower, required: true}, diff --git a/src/cmd/compile/internal/ssa/graphkit.go b/src/cmd/compile/internal/ssa/graphkit.go new file mode 100644 index 00000000000000..9db1c31bd0a2b2 --- /dev/null +++ b/src/cmd/compile/internal/ssa/graphkit.go @@ -0,0 +1,46 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ssa + +// ---------------------------------------------------------------------------- +// Graph transformation + +// replaceUses replaces all uses of old in b with new. +func (b *Block) replaceUses(old, new *Value) { + for _, v := range b.Values { + for i, a := range v.Args { + if a == old { + v.SetArg(i, new) + } + } + } + for i, v := range b.ControlValues() { + if v == old { + b.ReplaceControl(i, new) + } + } +} + +// moveTo moves v to dst, adjusting the appropriate Block.Values slices. +// The caller is responsible for ensuring that this is safe. +// i is the index of v in v.Block.Values. +func (v *Value) moveTo(dst *Block, i int) { + if dst.Func.scheduled { + v.Fatalf("moveTo after scheduling") + } + src := v.Block + if src.Values[i] != v { + v.Fatalf("moveTo bad index %d", v, i) + } + if src == dst { + return + } + v.Block = dst + dst.Values = append(dst.Values, v) + last := len(src.Values) - 1 + src.Values[i] = src.Values[last] + src.Values[last] = nil + src.Values = src.Values[:last] +} diff --git a/src/cmd/compile/internal/ssa/hoistloopiv.go b/src/cmd/compile/internal/ssa/hoistloopiv.go new file mode 100644 index 00000000000000..60a19417051f56 --- /dev/null +++ b/src/cmd/compile/internal/ssa/hoistloopiv.go @@ -0,0 +1,132 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ssa + +import "fmt" + +const MaxLoopBlockSize = 8 + +func printInvariant(val *Value, block *Block, domBlock *Block) { + fmt.Printf("== Hoist %v(%v) from b%v to b%v in %v\n", + val.Op.String(), val.String(), + block.ID, domBlock.ID, block.Func.Name) + fmt.Printf(" %v\n", val.LongString()) +} + +func isCandidate(block *Block, val *Value) bool { + if len(val.Args) == 0 { + // not a profitable expression, e.g. constant + return false + } + if block.Likely == BranchUnlikely { + // all values are excluded as candidate when branch becomes unlikely to reach + return false + } + return true +} + +func isInsideLoop(loopBlocks []*Block, v *Value) bool { + for _, block := range loopBlocks { + for _, val := range block.Values { + if val == v { + return true + } + } + } + return false +} + +// tryHoist hoists profitable loop invariant to block that dominates the entire loop. +// Value is considered as loop invariant if all its inputs are defined outside the loop +// or all its inputs are loop invariants. Since loop invariant will immediately moved +// to dominator block of loop, the first rule actually already implies the second rule +func tryHoist(loopnest *loopnest, loop *loop, loopBlocks []*Block) { + for _, block := range loopBlocks { + // if basic block is located in a nested loop rather than directly in the + // current loop, it will not be processed. + if loopnest.b2l[block.ID] != loop { + continue + } + for i := 0; i < len(block.Values); i++ { + var val *Value = block.Values[i] + if !isCandidate(block, val) { + continue + } + // value can hoist because it may causes observable side effects + if hasSideEffect(val) { + continue + } + // consider the following operation as pinned anyway + switch val.Op { + case OpInlMark, + OpAtomicLoad8, OpAtomicLoad32, OpAtomicLoad64, + OpAtomicLoadPtr, OpAtomicLoadAcq32, OpAtomicLoadAcq64: + continue + } + // input def is inside loop, consider as variant + isInvariant := true + loopnest.assembleChildren() + for _, arg := range val.Args { + if isInsideLoop(loopBlocks, arg) { + isInvariant = false + break + } + } + if isInvariant { + for valIdx, v := range block.Values { + if val != v { + continue + } + domBlock := loopnest.sdom.Parent(loop.header) + if block.Func.pass.debug >= 1 { + printInvariant(val, block, domBlock) + } + val.moveTo(domBlock, valIdx) + i-- + break + } + } + } + } +} + +// hoistLoopInvariant hoists expressions that computes the same value +// while has no effect outside loop +func hoistLoopInvariant(f *Func) { + loopnest := f.loopnest() + if loopnest.hasIrreducible { + return + } + if len(loopnest.loops) == 0 { + return + } + for _, loop := range loopnest.loops { + loopBlocks := loopnest.findLoopBlocks(loop) + if len(loopBlocks) >= MaxLoopBlockSize { + continue + } + + // check if it's too complicated for such optmization + tooComplicated := false + Out: + for _, block := range loopBlocks { + for _, val := range block.Values { + if val.Op.IsCall() || val.Op.HasSideEffects() { + tooComplicated = true + break Out + } + switch val.Op { + case OpLoad, OpStore: + tooComplicated = true + break Out + } + } + } + // try to hoist loop invariant outside the loop + if !tooComplicated { + tryHoist(loopnest, loop, loopBlocks) + } + } +} diff --git a/src/cmd/compile/internal/ssa/hoistloopiv_test.go b/src/cmd/compile/internal/ssa/hoistloopiv_test.go new file mode 100644 index 00000000000000..5ca3e976fc5b74 --- /dev/null +++ b/src/cmd/compile/internal/ssa/hoistloopiv_test.go @@ -0,0 +1,111 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ssa + +import ( + "cmd/compile/internal/types" + "testing" +) + +func checkValueMotion(t *testing.T, fun fun, valName, expectedBlock string) { + for _, b := range fun.f.Blocks { + for _, v := range b.Values { + if v == fun.values[valName] { + if fun.blocks[expectedBlock] != b { + t.Errorf("Error: %v\n", v.LongString()) + } + } + } + } +} + +// var d int +// var p = 15 +// +// for i := 0; i < 10; i++ { +// t := 1 * p +// d = i + t +// } +// +// t should be hoisted to dominator block of loop header +func TestHoistLoopIVSimple(t *testing.T) { + c := testConfig(t) + fun := c.Fun("b1", + Bloc("b1", + Valu("mem", OpInitMem, types.TypeMem, 0, nil), + Valu("zero", OpConst64, c.config.Types.Int64, 0, nil), + Valu("one", OpConst64, c.config.Types.Int64, 1, nil), + Valu("ten", OpConst64, c.config.Types.Int64, 10, nil), + Valu("p", OpConst64, c.config.Types.Int64, 15, nil), + Goto("b2")), + Bloc("b2", + Valu("i", OpPhi, c.config.Types.Int64, 0, nil, "one", "i2"), + Valu("d", OpPhi, c.config.Types.Int64, 0, nil, "zero", "d2"), + Valu("cmp", OpLess64, c.config.Types.Bool, 0, nil, "i", "ten"), + If("cmp", "b3", "b4")), + Bloc("b3", + Valu("loopiv", OpMul64, c.config.Types.Int64, 0, nil, "one", "p"), + Valu("d2", OpAdd64, c.config.Types.Int64, 0, nil, "loopiv", "d"), + Valu("i2", OpAdd64, c.config.Types.Int64, 0, nil, "i", "one"), + Goto("b2")), + Bloc("b4", + Exit("mem"))) + + CheckFunc(fun.f) + hoistLoopInvariant(fun.f) + CheckFunc(fun.f) + checkValueMotion(t, fun, "loopiv", "b1") +} + +func BenchmarkHoistIV1Opt(b *testing.B) { + var d = 0 + var a = 3 + + for i := 0; i < b.N; i++ { + d = i + (a*10 - a + 3) + } + _ = d +} + +func BenchmarkHoistIV1Manual(b *testing.B) { + var d = 0 + var a = 3 + val := (a*10 - a + 3) + for i := 0; i < b.N; i++ { + d = i + val + } + _ = d +} + +//go:noinline +func hoistLoopIV2Opt(n, d int) { + t := 0 + for i := 0; i < n*d; i++ { + t += 1 + } + _ = t +} + +//go:noinline +func hoistLoopIV2Manual(n, d int) { + t := 0 + val := n * d + for i := 0; i < val; i++ { + t += 1 + } + _ = t +} + +func BenchmarkHoistIV2Opt(b *testing.B) { + for i := 0; i < b.N; i++ { + hoistLoopIV2Opt(i%10, i%5) + } +} + +func BenchmarkHoistIV2Manual(b *testing.B) { + for i := 0; i < b.N; i++ { + hoistLoopIV2Manual(i%10, i%5) + } +} diff --git a/src/cmd/compile/internal/ssa/likelyadjust.go b/src/cmd/compile/internal/ssa/likelyadjust.go index 1d0e53cf5b6086..46c32f976a5d7c 100644 --- a/src/cmd/compile/internal/ssa/likelyadjust.go +++ b/src/cmd/compile/internal/ssa/likelyadjust.go @@ -4,75 +4,6 @@ package ssa -import ( - "fmt" -) - -type loop struct { - header *Block // The header node of this (reducible) loop - outer *loop // loop containing this loop - - // By default, children, exits, and depth are not initialized. - children []*loop // loops nested directly within this loop. Initialized by assembleChildren(). - exits []*Block // exits records blocks reached by exits from this loop. Initialized by findExits(). - - // Next three fields used by regalloc and/or - // aid in computation of inner-ness and list of blocks. - nBlocks int32 // Number of blocks in this loop but not within inner loops - depth int16 // Nesting depth of the loop; 1 is outermost. Initialized by calculateDepths(). - isInner bool // True if never discovered to contain a loop - - // register allocation uses this. - containsUnavoidableCall bool // True if all paths through the loop have a call -} - -// outerinner records that outer contains inner -func (sdom SparseTree) outerinner(outer, inner *loop) { - // There could be other outer loops found in some random order, - // locate the new outer loop appropriately among them. - - // Outer loop headers dominate inner loop headers. - // Use this to put the "new" "outer" loop in the right place. - oldouter := inner.outer - for oldouter != nil && sdom.isAncestor(outer.header, oldouter.header) { - inner = oldouter - oldouter = inner.outer - } - if outer == oldouter { - return - } - if oldouter != nil { - sdom.outerinner(oldouter, outer) - } - - inner.outer = outer - outer.isInner = false -} - -func checkContainsCall(bb *Block) bool { - if bb.Kind == BlockDefer { - return true - } - for _, v := range bb.Values { - if opcodeTable[v.Op].call { - return true - } - } - return false -} - -type loopnest struct { - f *Func - b2l []*loop - po []*Block - sdom SparseTree - loops []*loop - hasIrreducible bool // TODO current treatment of irreducible loops is very flaky, if accurate loops are needed, must punt at function level. - - // Record which of the lazily initialized fields have actually been initialized. - initializedChildren, initializedDepth, initializedExits bool -} - func min8(a, b int8) int8 { if a < b { return a @@ -234,347 +165,3 @@ func likelyadjust(f *Func) { } } - -func (l *loop) String() string { - return fmt.Sprintf("hdr:%s", l.header) -} - -func (l *loop) LongString() string { - i := "" - o := "" - if l.isInner { - i = ", INNER" - } - if l.outer != nil { - o = ", o=" + l.outer.header.String() - } - return fmt.Sprintf("hdr:%s%s%s", l.header, i, o) -} - -func (l *loop) isWithinOrEq(ll *loop) bool { - if ll == nil { // nil means whole program - return true - } - for ; l != nil; l = l.outer { - if l == ll { - return true - } - } - return false -} - -// nearestOuterLoop returns the outer loop of loop most nearly -// containing block b; the header must dominate b. loop itself -// is assumed to not be that loop. For acceptable performance, -// we're relying on loop nests to not be terribly deep. -func (l *loop) nearestOuterLoop(sdom SparseTree, b *Block) *loop { - var o *loop - for o = l.outer; o != nil && !sdom.IsAncestorEq(o.header, b); o = o.outer { - } - return o -} - -func loopnestfor(f *Func) *loopnest { - po := f.postorder() - sdom := f.Sdom() - b2l := make([]*loop, f.NumBlocks()) - loops := make([]*loop, 0) - visited := f.Cache.allocBoolSlice(f.NumBlocks()) - defer f.Cache.freeBoolSlice(visited) - sawIrred := false - - if f.pass.debug > 2 { - fmt.Printf("loop finding in %s\n", f.Name) - } - - // Reducible-loop-nest-finding. - for _, b := range po { - if f.pass != nil && f.pass.debug > 3 { - fmt.Printf("loop finding at %s\n", b) - } - - var innermost *loop // innermost header reachable from this block - - // IF any successor s of b is in a loop headed by h - // AND h dominates b - // THEN b is in the loop headed by h. - // - // Choose the first/innermost such h. - // - // IF s itself dominates b, then s is a loop header; - // and there may be more than one such s. - // Since there's at most 2 successors, the inner/outer ordering - // between them can be established with simple comparisons. - for _, e := range b.Succs { - bb := e.b - l := b2l[bb.ID] - - if sdom.IsAncestorEq(bb, b) { // Found a loop header - if f.pass != nil && f.pass.debug > 4 { - fmt.Printf("loop finding succ %s of %s is header\n", bb.String(), b.String()) - } - if l == nil { - l = &loop{header: bb, isInner: true} - loops = append(loops, l) - b2l[bb.ID] = l - } - } else if !visited[bb.ID] { // Found an irreducible loop - sawIrred = true - if f.pass != nil && f.pass.debug > 4 { - fmt.Printf("loop finding succ %s of %s is IRRED, in %s\n", bb.String(), b.String(), f.Name) - } - } else if l != nil { - // TODO handle case where l is irreducible. - // Perhaps a loop header is inherited. - // is there any loop containing our successor whose - // header dominates b? - if !sdom.IsAncestorEq(l.header, b) { - l = l.nearestOuterLoop(sdom, b) - } - if f.pass != nil && f.pass.debug > 4 { - if l == nil { - fmt.Printf("loop finding succ %s of %s has no loop\n", bb.String(), b.String()) - } else { - fmt.Printf("loop finding succ %s of %s provides loop with header %s\n", bb.String(), b.String(), l.header.String()) - } - } - } else { // No loop - if f.pass != nil && f.pass.debug > 4 { - fmt.Printf("loop finding succ %s of %s has no loop\n", bb.String(), b.String()) - } - - } - - if l == nil || innermost == l { - continue - } - - if innermost == nil { - innermost = l - continue - } - - if sdom.isAncestor(innermost.header, l.header) { - sdom.outerinner(innermost, l) - innermost = l - } else if sdom.isAncestor(l.header, innermost.header) { - sdom.outerinner(l, innermost) - } - } - - if innermost != nil { - b2l[b.ID] = innermost - innermost.nBlocks++ - } - visited[b.ID] = true - } - - ln := &loopnest{f: f, b2l: b2l, po: po, sdom: sdom, loops: loops, hasIrreducible: sawIrred} - - // Calculate containsUnavoidableCall for regalloc - dominatedByCall := f.Cache.allocBoolSlice(f.NumBlocks()) - defer f.Cache.freeBoolSlice(dominatedByCall) - for _, b := range po { - if checkContainsCall(b) { - dominatedByCall[b.ID] = true - } - } - // Run dfs to find path through the loop that avoids all calls. - // Such path either escapes loop or return back to header. - // It isn't enough to have exit not dominated by any call, for example: - // ... some loop - // call1 call2 - // \ / - // exit - // ... - // exit is not dominated by any call, but we don't have call-free path to it. - for _, l := range loops { - // Header contains call. - if dominatedByCall[l.header.ID] { - l.containsUnavoidableCall = true - continue - } - callfreepath := false - tovisit := make([]*Block, 0, len(l.header.Succs)) - // Push all non-loop non-exit successors of header onto toVisit. - for _, s := range l.header.Succs { - nb := s.Block() - // This corresponds to loop with zero iterations. - if !l.iterationEnd(nb, b2l) { - tovisit = append(tovisit, nb) - } - } - for len(tovisit) > 0 { - cur := tovisit[len(tovisit)-1] - tovisit = tovisit[:len(tovisit)-1] - if dominatedByCall[cur.ID] { - continue - } - // Record visited in dominatedByCall. - dominatedByCall[cur.ID] = true - for _, s := range cur.Succs { - nb := s.Block() - if l.iterationEnd(nb, b2l) { - callfreepath = true - } - if !dominatedByCall[nb.ID] { - tovisit = append(tovisit, nb) - } - - } - if callfreepath { - break - } - } - if !callfreepath { - l.containsUnavoidableCall = true - } - } - - // Curious about the loopiness? "-d=ssa/likelyadjust/stats" - if f.pass != nil && f.pass.stats > 0 && len(loops) > 0 { - ln.assembleChildren() - ln.calculateDepths() - ln.findExits() - - // Note stats for non-innermost loops are slightly flawed because - // they don't account for inner loop exits that span multiple levels. - - for _, l := range loops { - x := len(l.exits) - cf := 0 - if !l.containsUnavoidableCall { - cf = 1 - } - inner := 0 - if l.isInner { - inner++ - } - - f.LogStat("loopstats:", - l.depth, "depth", x, "exits", - inner, "is_inner", cf, "always_calls", l.nBlocks, "n_blocks") - } - } - - if f.pass != nil && f.pass.debug > 1 && len(loops) > 0 { - fmt.Printf("Loops in %s:\n", f.Name) - for _, l := range loops { - fmt.Printf("%s, b=", l.LongString()) - for _, b := range f.Blocks { - if b2l[b.ID] == l { - fmt.Printf(" %s", b) - } - } - fmt.Print("\n") - } - fmt.Printf("Nonloop blocks in %s:", f.Name) - for _, b := range f.Blocks { - if b2l[b.ID] == nil { - fmt.Printf(" %s", b) - } - } - fmt.Print("\n") - } - return ln -} - -// assembleChildren initializes the children field of each -// loop in the nest. Loop A is a child of loop B if A is -// directly nested within B (based on the reducible-loops -// detection above) -func (ln *loopnest) assembleChildren() { - if ln.initializedChildren { - return - } - for _, l := range ln.loops { - if l.outer != nil { - l.outer.children = append(l.outer.children, l) - } - } - ln.initializedChildren = true -} - -// calculateDepths uses the children field of loops -// to determine the nesting depth (outer=1) of each -// loop. This is helpful for finding exit edges. -func (ln *loopnest) calculateDepths() { - if ln.initializedDepth { - return - } - ln.assembleChildren() - for _, l := range ln.loops { - if l.outer == nil { - l.setDepth(1) - } - } - ln.initializedDepth = true -} - -// findExits uses loop depth information to find the -// exits from a loop. -func (ln *loopnest) findExits() { - if ln.initializedExits { - return - } - ln.calculateDepths() - b2l := ln.b2l - for _, b := range ln.po { - l := b2l[b.ID] - if l != nil && len(b.Succs) == 2 { - sl := b2l[b.Succs[0].b.ID] - if recordIfExit(l, sl, b.Succs[0].b) { - continue - } - sl = b2l[b.Succs[1].b.ID] - if recordIfExit(l, sl, b.Succs[1].b) { - continue - } - } - } - ln.initializedExits = true -} - -// depth returns the loop nesting level of block b. -func (ln *loopnest) depth(b ID) int16 { - if l := ln.b2l[b]; l != nil { - return l.depth - } - return 0 -} - -// recordIfExit checks sl (the loop containing b) to see if it -// is outside of loop l, and if so, records b as an exit block -// from l and returns true. -func recordIfExit(l, sl *loop, b *Block) bool { - if sl != l { - if sl == nil || sl.depth <= l.depth { - l.exits = append(l.exits, b) - return true - } - // sl is not nil, and is deeper than l - // it's possible for this to be a goto into an irreducible loop made from gotos. - for sl.depth > l.depth { - sl = sl.outer - } - if sl != l { - l.exits = append(l.exits, b) - return true - } - } - return false -} - -func (l *loop) setDepth(d int16) { - l.depth = d - for _, c := range l.children { - c.setDepth(d + 1) - } -} - -// iterationEnd checks if block b ends iteration of loop l. -// Ending iteration means either escaping to outer loop/code or -// going back to header -func (l *loop) iterationEnd(b *Block, b2l []*loop) bool { - return b == l.header || b2l[b.ID] == nil || (b2l[b.ID] != l && b2l[b.ID].depth <= l.depth) -} diff --git a/src/cmd/compile/internal/ssa/loop.go b/src/cmd/compile/internal/ssa/loop.go new file mode 100644 index 00000000000000..993d536fcadd08 --- /dev/null +++ b/src/cmd/compile/internal/ssa/loop.go @@ -0,0 +1,458 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ssa + +import "fmt" + +type loop struct { + header *Block // The header node of this (reducible) loop + outer *loop // loop containing this loop + + // By default, children, exits, and depth are not initialized. + children []*loop // loops nested directly within this loop. Initialized by assembleChildren(). + exits []*Block // exits records blocks reached by exits from this loop. Initialized by findExits(). + + // Next three fields used by regalloc and/or + // aid in computation of inner-ness and list of blocks. + nBlocks int32 // Number of blocks in this loop but not within inner loops + depth int16 // Nesting depth of the loop; 1 is outermost. Initialized by calculateDepths(). + isInner bool // True if never discovered to contain a loop + + // register allocation uses this. + containsUnavoidableCall bool // True if all paths through the loop have a call +} + +type loopnest struct { + f *Func + b2l []*loop // block id to loop mapping + po []*Block + sdom SparseTree + loops []*loop + hasIrreducible bool // TODO current treatment of irreducible loops is very flaky, if accurate loops are needed, must punt at function level. + + // Record which of the lazily initialized fields have actually been initialized. + initializedChildren, initializedDepth, initializedExits bool +} + +// outerinner records that outer contains inner +func (sdom SparseTree) outerinner(outer, inner *loop) { + // There could be other outer loops found in some random order, + // locate the new outer loop appropriately among them. + + // Outer loop headers dominate inner loop headers. + // Use this to put the "new" "outer" loop in the right place. + oldouter := inner.outer + for oldouter != nil && sdom.isAncestor(outer.header, oldouter.header) { + inner = oldouter + oldouter = inner.outer + } + if outer == oldouter { + return + } + if oldouter != nil { + sdom.outerinner(oldouter, outer) + } + + inner.outer = outer + outer.isInner = false +} + +func checkContainsCall(bb *Block) bool { + if bb.Kind == BlockDefer { + return true + } + for _, v := range bb.Values { + if opcodeTable[v.Op].call { + return true + } + } + return false +} + +func (l *loop) String() string { + return fmt.Sprintf("hdr:%s", l.header) +} + +func (l *loop) LongString() string { + i := "" + o := "" + if l.isInner { + i = ", INNER" + } + if l.outer != nil { + o = ", o=" + l.outer.header.String() + } + return fmt.Sprintf("hdr:%s%s%s", l.header, i, o) +} + +func (l *loop) isWithinOrEq(ll *loop) bool { + if ll == nil { // nil means whole program + return true + } + for ; l != nil; l = l.outer { + if l == ll { + return true + } + } + return false +} + +// nearestOuterLoop returns the outer loop of loop most nearly +// containing block b; the header must dominate b. loop itself +// is assumed to not be that loop. For acceptable performance, +// we're relying on loop nests to not be terribly deep. +func (l *loop) nearestOuterLoop(sdom SparseTree, b *Block) *loop { + var o *loop + for o = l.outer; o != nil && !sdom.IsAncestorEq(o.header, b); o = o.outer { + } + return o +} + +func loopnestfor(f *Func) *loopnest { + po := f.postorder() + sdom := f.Sdom() + b2l := make([]*loop, f.NumBlocks()) + loops := make([]*loop, 0) + visited := f.Cache.allocBoolSlice(f.NumBlocks()) + defer f.Cache.freeBoolSlice(visited) + sawIrred := false + + if f.pass.debug > 2 { + fmt.Printf("loop finding in %s\n", f.Name) + } + + // Reducible-loop-nest-finding. + for _, b := range po { + if f.pass != nil && f.pass.debug > 3 { + fmt.Printf("loop finding at %s\n", b) + } + + var innermost *loop // innermost header reachable from this block + + // IF any successor s of b is in a loop headed by h + // AND h dominates b + // THEN b is in the loop headed by h. + // + // Choose the first/innermost such h. + // + // IF s itself dominates b, then s is a loop header; + // and there may be more than one such s. + // Since there's at most 2 successors, the inner/outer ordering + // between them can be established with simple comparisons. + for _, e := range b.Succs { + bb := e.b + l := b2l[bb.ID] + + if sdom.IsAncestorEq(bb, b) { // Found a loop header + if f.pass != nil && f.pass.debug > 4 { + fmt.Printf("loop finding succ %s of %s is header\n", bb.String(), b.String()) + } + if l == nil { + l = &loop{header: bb, isInner: true} + loops = append(loops, l) + b2l[bb.ID] = l + } + } else if !visited[bb.ID] { // Found an irreducible loop + sawIrred = true + if f.pass != nil && f.pass.debug > 4 { + fmt.Printf("loop finding succ %s of %s is IRRED, in %s\n", bb.String(), b.String(), f.Name) + } + } else if l != nil { + // TODO handle case where l is irreducible. + // Perhaps a loop header is inherited. + // is there any loop containing our successor whose + // header dominates b? + if !sdom.IsAncestorEq(l.header, b) { + l = l.nearestOuterLoop(sdom, b) + } + if f.pass != nil && f.pass.debug > 4 { + if l == nil { + fmt.Printf("loop finding succ %s of %s has no loop\n", bb.String(), b.String()) + } else { + fmt.Printf("loop finding succ %s of %s provides loop with header %s\n", bb.String(), b.String(), l.header.String()) + } + } + } else { // No loop + if f.pass != nil && f.pass.debug > 4 { + fmt.Printf("loop finding succ %s of %s has no loop\n", bb.String(), b.String()) + } + + } + + if l == nil || innermost == l { + continue + } + + if innermost == nil { + innermost = l + continue + } + + if sdom.isAncestor(innermost.header, l.header) { + sdom.outerinner(innermost, l) + innermost = l + } else if sdom.isAncestor(l.header, innermost.header) { + sdom.outerinner(l, innermost) + } + } + + if innermost != nil { + b2l[b.ID] = innermost + innermost.nBlocks++ + } + visited[b.ID] = true + } + + ln := &loopnest{f: f, b2l: b2l, po: po, sdom: sdom, loops: loops, hasIrreducible: sawIrred} + + // Calculate containsUnavoidableCall for regalloc + dominatedByCall := f.Cache.allocBoolSlice(f.NumBlocks()) + defer f.Cache.freeBoolSlice(dominatedByCall) + for _, b := range po { + if checkContainsCall(b) { + dominatedByCall[b.ID] = true + } + } + // Run dfs to find path through the loop that avoids all calls. + // Such path either escapes loop or return back to header. + // It isn't enough to have exit not dominated by any call, for example: + // ... some loop + // call1 call2 + // \ / + // exit + // ... + // exit is not dominated by any call, but we don't have call-free path to it. + for _, l := range loops { + // Header contains call. + if dominatedByCall[l.header.ID] { + l.containsUnavoidableCall = true + continue + } + callfreepath := false + tovisit := make([]*Block, 0, len(l.header.Succs)) + // Push all non-loop non-exit successors of header onto toVisit. + for _, s := range l.header.Succs { + nb := s.Block() + // This corresponds to loop with zero iterations. + if !l.iterationEnd(nb, b2l) { + tovisit = append(tovisit, nb) + } + } + for len(tovisit) > 0 { + cur := tovisit[len(tovisit)-1] + tovisit = tovisit[:len(tovisit)-1] + if dominatedByCall[cur.ID] { + continue + } + // Record visited in dominatedByCall. + dominatedByCall[cur.ID] = true + for _, s := range cur.Succs { + nb := s.Block() + if l.iterationEnd(nb, b2l) { + callfreepath = true + } + if !dominatedByCall[nb.ID] { + tovisit = append(tovisit, nb) + } + + } + if callfreepath { + break + } + } + if !callfreepath { + l.containsUnavoidableCall = true + } + } + + // Curious about the loopiness? "-d=ssa/likelyadjust/stats" + if f.pass != nil && f.pass.stats > 0 && len(loops) > 0 { + ln.assembleChildren() + ln.calculateDepths() + ln.findExits() + + // Note stats for non-innermost loops are slightly flawed because + // they don't account for inner loop exits that span multiple levels. + + for _, l := range loops { + x := len(l.exits) + cf := 0 + if !l.containsUnavoidableCall { + cf = 1 + } + inner := 0 + if l.isInner { + inner++ + } + + f.LogStat("loopstats:", + l.depth, "depth", x, "exits", + inner, "is_inner", cf, "always_calls", l.nBlocks, "n_blocks") + } + } + + if f.pass != nil && f.pass.debug > 1 && len(loops) > 0 { + fmt.Printf("Loops in %s:\n", f.Name) + for _, l := range loops { + fmt.Printf("%s, b=", l.LongString()) + for _, b := range f.Blocks { + if b2l[b.ID] == l { + fmt.Printf(" %s", b) + } + } + fmt.Print("\n") + } + fmt.Printf("Nonloop blocks in %s:", f.Name) + for _, b := range f.Blocks { + if b2l[b.ID] == nil { + fmt.Printf(" %s", b) + } + } + fmt.Print("\n") + } + return ln +} + +// assembleChildren initializes the children field of each +// loop in the nest. Loop A is a child of loop B if A is +// directly nested within B (based on the reducible-loops +// detection above) +func (ln *loopnest) assembleChildren() { + if ln.initializedChildren { + return + } + for _, l := range ln.loops { + if l.outer != nil { + l.outer.children = append(l.outer.children, l) + } + } + ln.initializedChildren = true +} + +// calculateDepths uses the children field of loops +// to determine the nesting depth (outer=1) of each +// loop. This is helpful for finding exit edges. +func (ln *loopnest) calculateDepths() { + if ln.initializedDepth { + return + } + ln.assembleChildren() + for _, l := range ln.loops { + if l.outer == nil { + l.setDepth(1) + } + } + ln.initializedDepth = true +} + +// findExits uses loop depth information to find the +// exits from a loop. +func (ln *loopnest) findExits() { + if ln.initializedExits { + return + } + ln.calculateDepths() + b2l := ln.b2l + for _, b := range ln.po { + l := b2l[b.ID] + if l != nil && len(b.Succs) == 2 { + sl := b2l[b.Succs[0].b.ID] + if recordIfExit(l, sl, b.Succs[0].b) { + continue + } + sl = b2l[b.Succs[1].b.ID] + if recordIfExit(l, sl, b.Succs[1].b) { + continue + } + } + } + ln.initializedExits = true +} + +// depth returns the loop nesting level of block b. +func (ln *loopnest) depth(b ID) int16 { + if l := ln.b2l[b]; l != nil { + return l.depth + } + return 0 +} + +// recordIfExit checks sl (the loop containing b) to see if it +// is outside of loop l, and if so, records b as an exit block +// from l and returns true. +func recordIfExit(l, sl *loop, b *Block) bool { + if sl != l { + if sl == nil || sl.depth <= l.depth { + l.exits = append(l.exits, b) + return true + } + // sl is not nil, and is deeper than l + // it's possible for this to be a goto into an irreducible loop made from gotos. + for sl.depth > l.depth { + sl = sl.outer + } + if sl != l { + l.exits = append(l.exits, b) + return true + } + } + return false +} + +func (l *loop) setDepth(d int16) { + l.depth = d + for _, c := range l.children { + c.setDepth(d + 1) + } +} + +// iterationEnd checks if block b ends iteration of loop l. +// Ending iteration means either escaping to outer loop/code or +// going back to header +func (l *loop) iterationEnd(b *Block, b2l []*loop) bool { + return b == l.header || b2l[b.ID] == nil || (b2l[b.ID] != l && b2l[b.ID].depth <= l.depth) +} + +// contains checks if receiver loop contains inner loop in any depth +func (loop *loop) contains(inner *loop) bool { + // Find from current loop + for _, child := range loop.children { + if child == inner { + return true + } + } + // Find from child of current loop + for _, child := range loop.children { + if child.contains(inner) { + return true + } + } + return false +} + +// findLoopBlocks returns all basic blocks, including those contained in nested loops. +func (ln *loopnest) findLoopBlocks(loop *loop) []*Block { + ln.assembleChildren() + loopBlocks := make([]*Block, 0) + for id, tloop := range ln.b2l { + if tloop == nil { + continue + } + if tloop == loop { + // Find block by id and append it + for _, block := range ln.f.Blocks { + if int32(block.ID) == int32(id) { + loopBlocks = append(loopBlocks, block) + break + } + } + } else if loop.contains(tloop) { + // Otherwise, check if this block is within inner loops + blocks := ln.findLoopBlocks(tloop) + loopBlocks = append(loopBlocks, blocks...) + } + } + return loopBlocks +} diff --git a/src/cmd/compile/internal/ssa/shortcircuit.go b/src/cmd/compile/internal/ssa/shortcircuit.go index d7d0b6fe3335b6..9c33d9e1145dce 100644 --- a/src/cmd/compile/internal/ssa/shortcircuit.go +++ b/src/cmd/compile/internal/ssa/shortcircuit.go @@ -473,41 +473,3 @@ func shortcircuitPhiPlan(b *Block, ctl *Value, cidx int, ti int64) func(*Value, // TODO: handle more cases; shortcircuit optimizations turn out to be reasonably high impact return nil } - -// replaceUses replaces all uses of old in b with new. -func (b *Block) replaceUses(old, new *Value) { - for _, v := range b.Values { - for i, a := range v.Args { - if a == old { - v.SetArg(i, new) - } - } - } - for i, v := range b.ControlValues() { - if v == old { - b.ReplaceControl(i, new) - } - } -} - -// moveTo moves v to dst, adjusting the appropriate Block.Values slices. -// The caller is responsible for ensuring that this is safe. -// i is the index of v in v.Block.Values. -func (v *Value) moveTo(dst *Block, i int) { - if dst.Func.scheduled { - v.Fatalf("moveTo after scheduling") - } - src := v.Block - if src.Values[i] != v { - v.Fatalf("moveTo bad index %d", v, i) - } - if src == dst { - return - } - v.Block = dst - dst.Values = append(dst.Values, v) - last := len(src.Values) - 1 - src.Values[i] = src.Values[last] - src.Values[last] = nil - src.Values = src.Values[:last] -}