Skip to content

Commit 983dcf7

Browse files
committed
cmd/compile/internal/ssa: update regalloc in loops
Currently we don't lift spill out of loop if loop contains call. However often we have code like this: for .. { if hard_case { call() } // simple case, without call } So instead of checking for any call, check for unavoidable call. For #22698 cases I see: mime/quotedprintable/Writer-6 10.9µs ± 4% 9.2µs ± 3% -15.02% (p=0.000 n=8+8) And: compress/flate/Encode/Twain/Huffman/1e4-6 99.4µs ± 6% 90.9µs ± 0% -8.57% (p=0.000 n=8+8) compress/flate/Encode/Twain/Huffman/1e5-6 760µs ± 1% 725µs ± 1% -4.56% (p=0.000 n=8+8) compress/flate/Encode/Twain/Huffman/1e6-6 7.55ms ± 0% 7.24ms ± 0% -4.07% (p=0.000 n=8+7) There are no significant changes on go1 benchmarks. But for cases with runtime arch checks, where we call generic version on old hardware, there are respectable performance gains: math/RoundToEven-6 1.43ns ± 0% 1.25ns ± 0% -12.59% (p=0.001 n=7+7) math/bits/OnesCount64-6 1.60ns ± 1% 1.42ns ± 1% -11.32% (p=0.000 n=8+8) Also on some runtime benchmarks loops have less loads and higher performance: runtime/RuneIterate/range1/ASCII-6 15.6ns ± 1% 13.9ns ± 1% -10.74% (p=0.000 n=7+8) runtime/ArrayEqual-6 3.22ns ± 0% 2.86ns ± 2% -11.06% (p=0.000 n=7+8) Fixes #22698 Updates #22234 Change-Id: I0ae2f19787d07a9026f064366dedbe601bf7257a Reviewed-on: https://go-review.googlesource.com/84055 Run-TryBot: Ilya Tocar <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: David Chase <[email protected]>
1 parent be371ed commit 983dcf7

File tree

2 files changed

+74
-20
lines changed

2 files changed

+74
-20
lines changed

src/cmd/compile/internal/ssa/likelyadjust.go

Lines changed: 73 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ type loop struct {
2323
isInner bool // True if never discovered to contain a loop
2424

2525
// register allocation uses this.
26-
containsCall bool // if any block in this loop or any loop within it contains has a call
26+
containsUnavoidableCall bool // True if all paths through the loop have a call
2727
}
2828

2929
// outerinner records that outer contains inner
@@ -47,28 +47,18 @@ func (sdom SparseTree) outerinner(outer, inner *loop) {
4747

4848
inner.outer = outer
4949
outer.isInner = false
50-
if inner.containsCall {
51-
outer.setContainsCall()
52-
}
5350
}
5451

55-
func (l *loop) setContainsCall() {
56-
for ; l != nil && !l.containsCall; l = l.outer {
57-
l.containsCall = true
58-
}
59-
60-
}
61-
func (l *loop) checkContainsCall(bb *Block) {
52+
func checkContainsCall(bb *Block) bool {
6253
if bb.Kind == BlockDefer {
63-
l.setContainsCall()
64-
return
54+
return true
6555
}
6656
for _, v := range bb.Values {
6757
if opcodeTable[v.Op].call {
68-
l.setContainsCall()
69-
return
58+
return true
7059
}
7160
}
61+
return false
7262
}
7363

7464
type loopnest struct {
@@ -323,7 +313,6 @@ func loopnestfor(f *Func) *loopnest {
323313
l = &loop{header: bb, isInner: true}
324314
loops = append(loops, l)
325315
b2l[bb.ID] = l
326-
l.checkContainsCall(bb)
327316
}
328317
} else if !visited[bb.ID] { // Found an irreducible loop
329318
sawIrred = true
@@ -371,14 +360,72 @@ func loopnestfor(f *Func) *loopnest {
371360

372361
if innermost != nil {
373362
b2l[b.ID] = innermost
374-
innermost.checkContainsCall(b)
375363
innermost.nBlocks++
376364
}
377365
visited[b.ID] = true
378366
}
379367

380368
ln := &loopnest{f: f, b2l: b2l, po: po, sdom: sdom, loops: loops, hasIrreducible: sawIrred}
381369

370+
// Calculate containsUnavoidableCall for regalloc
371+
dominatedByCall := make([]bool, f.NumBlocks())
372+
for _, b := range po {
373+
if checkContainsCall(b) {
374+
dominatedByCall[b.ID] = true
375+
}
376+
}
377+
// Run dfs to find path through the loop that avoids all calls.
378+
// Such path either escapes loop or return back to header.
379+
// It isn't enough to have exit not dominated by any call, for example:
380+
// ... some loop
381+
// call1 call2
382+
// \ /
383+
// exit
384+
// ...
385+
// exit is not dominated by any call, but we don't have call-free path to it.
386+
for _, l := range loops {
387+
// Header contains call.
388+
if dominatedByCall[l.header.ID] {
389+
l.containsUnavoidableCall = true
390+
continue
391+
}
392+
callfreepath := false
393+
tovisit := make([]*Block, 0, len(l.header.Succs))
394+
// Push all non-loop non-exit successors of header onto toVisit.
395+
for _, s := range l.header.Succs {
396+
nb := s.Block()
397+
// This corresponds to loop with zero iterations.
398+
if !l.iterationEnd(nb, b2l) {
399+
tovisit = append(tovisit, nb)
400+
}
401+
}
402+
for len(tovisit) > 0 {
403+
cur := tovisit[len(tovisit)-1]
404+
tovisit = tovisit[:len(tovisit)-1]
405+
if dominatedByCall[cur.ID] {
406+
continue
407+
}
408+
// Record visited in dominatedByCall.
409+
dominatedByCall[cur.ID] = true
410+
for _, s := range cur.Succs {
411+
nb := s.Block()
412+
if l.iterationEnd(nb, b2l) {
413+
callfreepath = true
414+
}
415+
if !dominatedByCall[nb.ID] {
416+
tovisit = append(tovisit, nb)
417+
}
418+
419+
}
420+
if callfreepath {
421+
break
422+
}
423+
}
424+
if !callfreepath {
425+
l.containsUnavoidableCall = true
426+
}
427+
}
428+
382429
// Curious about the loopiness? "-d=ssa/likelyadjust/stats"
383430
if f.pass != nil && f.pass.stats > 0 && len(loops) > 0 {
384431
ln.assembleChildren()
@@ -391,7 +438,7 @@ func loopnestfor(f *Func) *loopnest {
391438
for _, l := range loops {
392439
x := len(l.exits)
393440
cf := 0
394-
if !l.containsCall {
441+
if !l.containsUnavoidableCall {
395442
cf = 1
396443
}
397444
inner := 0
@@ -401,7 +448,7 @@ func loopnestfor(f *Func) *loopnest {
401448

402449
f.LogStat("loopstats:",
403450
l.depth, "depth", x, "exits",
404-
inner, "is_inner", cf, "is_callfree", l.nBlocks, "n_blocks")
451+
inner, "is_inner", cf, "always_calls", l.nBlocks, "n_blocks")
405452
}
406453
}
407454

@@ -519,3 +566,10 @@ func (l *loop) setDepth(d int16) {
519566
c.setDepth(d + 1)
520567
}
521568
}
569+
570+
// iterationEnd checks if block b ends iteration of loop l.
571+
// Ending iteration means either escaping to outer loop/code or
572+
// going back to header
573+
func (l *loop) iterationEnd(b *Block, b2l []*loop) bool {
574+
return b == l.header || b2l[b.ID] == nil || (b2l[b.ID] != l && b2l[b.ID].depth <= l.depth)
575+
}

src/cmd/compile/internal/ssa/regalloc.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1415,7 +1415,7 @@ func (s *regAllocState) regalloc(f *Func) {
14151415
// For this to be worthwhile, the loop must have no calls in it.
14161416
top := b.Succs[0].b
14171417
loop := s.loopnest.b2l[top.ID]
1418-
if loop == nil || loop.header != top || loop.containsCall {
1418+
if loop == nil || loop.header != top || loop.containsUnavoidableCall {
14191419
goto badloop
14201420
}
14211421

0 commit comments

Comments
 (0)