Skip to content

Commit 348275c

Browse files
committed
cmd/compile: make a copy of Phi input if it is still live
Register of Phi input is allocated to the Phi. So if the Phi input is still live after Phi, we may need to use a spill. In this case, copy the Phi input to a spare register to avoid a spill. Originally targeted the code in issue #16187, and this CL indeed removes the spill, but doesn't seem to help on benchmark result. It may help in general, though. On AMD64: name old time/op new time/op delta BinaryTree17-12 2.79s ± 1% 2.76s ± 0% -1.33% (p=0.000 n=10+10) Fannkuch11-12 3.02s ± 0% 3.14s ± 0% +3.99% (p=0.000 n=10+10) FmtFprintfEmpty-12 51.2ns ± 0% 51.4ns ± 3% ~ (p=0.368 n=8+10) FmtFprintfString-12 145ns ± 0% 144ns ± 0% -0.69% (p=0.000 n=6+9) FmtFprintfInt-12 127ns ± 1% 124ns ± 1% -2.79% (p=0.000 n=10+9) FmtFprintfIntInt-12 186ns ± 0% 184ns ± 0% -1.34% (p=0.000 n=10+9) FmtFprintfPrefixedInt-12 196ns ± 0% 194ns ± 0% -0.97% (p=0.000 n=9+9) FmtFprintfFloat-12 293ns ± 2% 287ns ± 0% -2.00% (p=0.000 n=10+9) FmtManyArgs-12 847ns ± 1% 829ns ± 0% -2.17% (p=0.000 n=10+7) GobDecode-12 7.17ms ± 0% 7.18ms ± 0% ~ (p=0.123 n=10+10) GobEncode-12 6.08ms ± 1% 6.08ms ± 0% ~ (p=0.497 n=10+9) Gzip-12 277ms ± 1% 275ms ± 1% -0.47% (p=0.028 n=10+9) Gunzip-12 39.1ms ± 2% 38.2ms ± 1% -2.20% (p=0.000 n=10+9) HTTPClientServer-12 90.9µs ± 4% 87.7µs ± 2% -3.51% (p=0.001 n=9+10) JSONEncode-12 17.3ms ± 1% 16.5ms ± 0% -5.02% (p=0.000 n=9+9) JSONDecode-12 54.6ms ± 1% 54.1ms ± 0% -0.99% (p=0.000 n=9+9) Mandelbrot200-12 4.45ms ± 0% 4.45ms ± 0% -0.02% (p=0.006 n=8+9) GoParse-12 3.44ms ± 0% 3.48ms ± 1% +0.95% (p=0.000 n=10+10) RegexpMatchEasy0_32-12 84.9ns ± 0% 85.0ns ± 0% ~ (p=0.241 n=8+8) RegexpMatchEasy0_1K-12 867ns ± 3% 915ns ±11% +5.55% (p=0.037 n=10+10) RegexpMatchEasy1_32-12 82.7ns ± 5% 83.9ns ± 4% ~ (p=0.161 n=9+10) RegexpMatchEasy1_1K-12 361ns ± 1% 363ns ± 0% ~ (p=0.098 n=10+8) RegexpMatchMedium_32-12 126ns ± 0% 126ns ± 1% ~ (p=0.549 n=8+10) RegexpMatchMedium_1K-12 38.8µs ± 0% 39.1µs ± 0% +0.67% (p=0.000 n=9+8) RegexpMatchHard_32-12 1.95µs ± 0% 1.96µs ± 0% +0.43% (p=0.000 n=9+9) RegexpMatchHard_1K-12 59.0µs ± 0% 59.1µs ± 0% +0.27% (p=0.000 n=10+9) Revcomp-12 436ms ± 1% 431ms ± 1% -1.19% (p=0.005 n=10+10) Template-12 56.7ms ± 1% 57.1ms ± 1% +0.71% (p=0.001 n=10+9) TimeParse-12 312ns ± 0% 310ns ± 0% -0.80% (p=0.000 n=10+9) TimeFormat-12 336ns ± 0% 332ns ± 0% -1.19% (p=0.000 n=8+7) [Geo mean] 59.2µs 58.9µs -0.42% On PPC64: name old time/op new time/op delta BinaryTree17-2 4.67s ± 2% 4.71s ± 1% ~ (p=0.421 n=5+5) Fannkuch11-2 3.92s ± 1% 3.94s ± 0% +0.46% (p=0.032 n=5+5) FmtFprintfEmpty-2 122ns ± 0% 120ns ± 2% -1.80% (p=0.016 n=4+5) FmtFprintfString-2 305ns ± 1% 299ns ± 1% -1.84% (p=0.008 n=5+5) FmtFprintfInt-2 243ns ± 0% 241ns ± 1% -0.66% (p=0.016 n=4+5) FmtFprintfIntInt-2 361ns ± 1% 356ns ± 1% -1.49% (p=0.016 n=5+5) FmtFprintfPrefixedInt-2 355ns ± 1% 357ns ± 1% ~ (p=0.333 n=5+5) FmtFprintfFloat-2 502ns ± 2% 498ns ± 1% ~ (p=0.151 n=5+5) FmtManyArgs-2 1.55µs ± 2% 1.59µs ± 1% +2.52% (p=0.008 n=5+5) GobDecode-2 13.0ms ± 1% 13.0ms ± 1% ~ (p=0.841 n=5+5) GobEncode-2 11.8ms ± 1% 11.8ms ± 1% ~ (p=0.690 n=5+5) Gzip-2 499ms ± 1% 503ms ± 0% ~ (p=0.421 n=5+5) Gunzip-2 86.5ms ± 0% 86.4ms ± 1% ~ (p=0.841 n=5+5) HTTPClientServer-2 68.2µs ± 2% 69.6µs ± 3% ~ (p=0.151 n=5+5) JSONEncode-2 39.0ms ± 1% 37.2ms ± 1% -4.65% (p=0.008 n=5+5) JSONDecode-2 122ms ± 1% 126ms ± 1% +2.63% (p=0.008 n=5+5) Mandelbrot200-2 6.08ms ± 1% 5.89ms ± 1% -3.06% (p=0.008 n=5+5) GoParse-2 5.95ms ± 2% 5.98ms ± 1% ~ (p=0.421 n=5+5) RegexpMatchEasy0_32-2 331ns ± 1% 328ns ± 1% ~ (p=0.056 n=5+5) RegexpMatchEasy0_1K-2 1.45µs ± 0% 1.47µs ± 0% +1.13% (p=0.008 n=5+5) RegexpMatchEasy1_32-2 359ns ± 0% 353ns ± 0% -1.84% (p=0.008 n=5+5) RegexpMatchEasy1_1K-2 1.79µs ± 0% 1.81µs ± 1% +1.16% (p=0.008 n=5+5) RegexpMatchMedium_32-2 420ns ± 2% 413ns ± 0% -1.72% (p=0.008 n=5+5) RegexpMatchMedium_1K-2 70.2µs ± 1% 69.5µs ± 1% -1.09% (p=0.032 n=5+5) RegexpMatchHard_32-2 3.87µs ± 1% 3.65µs ± 0% -5.86% (p=0.008 n=5+5) RegexpMatchHard_1K-2 111µs ± 0% 105µs ± 0% -5.49% (p=0.016 n=5+4) Revcomp-2 1.00s ± 1% 1.01s ± 2% ~ (p=0.151 n=5+5) Template-2 113ms ± 1% 113ms ± 2% ~ (p=0.841 n=5+5) TimeParse-2 555ns ± 0% 550ns ± 1% -0.87% (p=0.032 n=5+5) TimeFormat-2 736ns ± 1% 704ns ± 1% -4.35% (p=0.008 n=5+5) [Geo mean] 120µs 119µs -0.77% Reduce "spilled value remains" by 0.6% in cmd/go on AMD64. Change-Id: If655df343b0f30d1a49ab1ab644f10c698b96f3e Reviewed-on: https://go-review.googlesource.com/32442 Run-TryBot: Cherry Zhang <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Keith Randall <[email protected]>
1 parent d24b57a commit 348275c

File tree

1 file changed

+26
-2
lines changed

1 file changed

+26
-2
lines changed

src/cmd/compile/internal/ssa/regalloc.go

+26-2
Original file line numberDiff line numberDiff line change
@@ -869,7 +869,6 @@ func (s *regAllocState) regalloc(f *Func) {
869869
m := s.values[a.ID].regs &^ phiUsed & s.allocatable
870870
if m != 0 {
871871
r := pickReg(m)
872-
s.freeReg(r)
873872
phiUsed |= regMask(1) << r
874873
phiRegs = append(phiRegs, r)
875874
} else {
@@ -878,7 +877,7 @@ func (s *regAllocState) regalloc(f *Func) {
878877
}
879878

880879
// Second pass - deallocate any phi inputs which are now dead.
881-
for _, v := range phis {
880+
for i, v := range phis {
882881
if !s.values[v.ID].needReg {
883882
continue
884883
}
@@ -887,6 +886,31 @@ func (s *regAllocState) regalloc(f *Func) {
887886
// Input is dead beyond the phi, deallocate
888887
// anywhere else it might live.
889888
s.freeRegs(s.values[a.ID].regs)
889+
} else {
890+
// Input is still live.
891+
// Try to move it around before kicking out, if there is a free register.
892+
// We generate a Copy in the predecessor block and record it. It will be
893+
// deleted if never used.
894+
r := phiRegs[i]
895+
if r == noRegister {
896+
continue
897+
}
898+
// Pick a free register. At this point some registers used in the predecessor
899+
// block may have been deallocated. Those are the ones used for Phis. Exclude
900+
// them (and they are not going to be helpful anyway).
901+
m := s.compatRegs(a.Type) &^ s.used &^ phiUsed
902+
if m != 0 && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 {
903+
r2 := pickReg(m)
904+
c := p.NewValue1(a.Line, OpCopy, a.Type, s.regs[r].c)
905+
s.copies[c] = false
906+
if s.f.pass.debug > regDebug {
907+
fmt.Printf("copy %s to %s : %s\n", a, c, s.registers[r2].Name())
908+
}
909+
s.setOrig(c, a)
910+
s.assignReg(r2, a, c)
911+
s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c})
912+
}
913+
s.freeReg(r)
890914
}
891915
}
892916

0 commit comments

Comments
 (0)