Skip to content

Commit ace1494

Browse files
author
eric fang
committed
cmd/compile: optimize absorbing InvertFlags into Noov comparisons for arm64
Previously (LessThanNoov (InvertFlags x)) is lowered as: CSET CSET BIC With this CL it's lowered as: CSET CSEL This saves one instruction. Similarly (GreaterEqualNoov (InvertFlags x)) is now lowered as: CSET CSINC $ benchstat old.bench new.bench goos: linux goarch: arm64 │ old.bench │ new.bench │ │ sec/op │ sec/op vs base │ InvertLessThanNoov-160 2.249n ± 2% 2.190n ± 1% -2.62% (p=0.003 n=10) Change-Id: Idd8979b7f4fe466e74b1a201c4aba7f1b0cffb0b Reviewed-on: https://go-review.googlesource.com/c/go/+/526237 Reviewed-by: Heschi Kreinick <[email protected]> TryBot-Result: Gopher Robot <[email protected]> Run-TryBot: Eric Fang <[email protected]> Reviewed-by: Cherry Mui <[email protected]>
1 parent dd88102 commit ace1494

File tree

4 files changed

+43
-12
lines changed

4 files changed

+43
-12
lines changed

src/cmd/compile/internal/ssa/_gen/ARM64.rules

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1569,8 +1569,8 @@
15691569
(LessEqualF (InvertFlags x)) => (GreaterEqualF x)
15701570
(GreaterThanF (InvertFlags x)) => (LessThanF x)
15711571
(GreaterEqualF (InvertFlags x)) => (LessEqualF x)
1572-
(LessThanNoov (InvertFlags x)) => (BIC (GreaterEqualNoov <typ.Bool> x) (Equal <typ.Bool> x))
1573-
(GreaterEqualNoov (InvertFlags x)) => (OR (LessThanNoov <typ.Bool> x) (Equal <typ.Bool> x))
1572+
(LessThanNoov (InvertFlags x)) => (CSEL0 [OpARM64NotEqual] (GreaterEqualNoov <typ.Bool> x) x)
1573+
(GreaterEqualNoov (InvertFlags x)) => (CSINC [OpARM64NotEqual] (LessThanNoov <typ.Bool> x) (MOVDconst [0]) x)
15741574

15751575
// Boolean-generating instructions (NOTE: NOT all boolean Values) always
15761576
// zero upper bit of the register; no need to zero-extend

src/cmd/compile/internal/ssa/bench_test.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,21 @@ func BenchmarkPhioptPass(b *testing.B) {
3030
}
3131
}
3232
}
33+
34+
type Point struct {
35+
X, Y int
36+
}
37+
38+
//go:noinline
39+
func sign(p1, p2, p3 Point) bool {
40+
return (p1.X-p3.X)*(p2.Y-p3.Y)-(p2.X-p3.X)*(p1.Y-p3.Y) < 0
41+
}
42+
43+
func BenchmarkInvertLessThanNoov(b *testing.B) {
44+
p1 := Point{1, 2}
45+
p2 := Point{2, 3}
46+
p3 := Point{3, 4}
47+
for i := 0; i < b.N; i++ {
48+
sign(p1, p2, p3)
49+
}
50+
}

src/cmd/compile/internal/ssa/rewriteARM64.go

Lines changed: 10 additions & 10 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

test/codegen/comparisons.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -788,3 +788,16 @@ func cmp7() {
788788
cmp5[string]("") // force instantiation
789789
cmp6[string]("") // force instantiation
790790
}
791+
792+
type Point struct {
793+
X, Y int
794+
}
795+
796+
// invertLessThanNoov checks (LessThanNoov (InvertFlags x)) is lowered as
797+
// CMP, CSET, CSEL instruction sequence. InvertFlags are only generated under
798+
// certain conditions, see canonLessThan, so if the code below does not
799+
// generate an InvertFlags OP, this check may fail.
800+
func invertLessThanNoov(p1, p2, p3 Point) bool {
801+
// arm64:`CMP`,`CSET`,`CSEL`
802+
return (p1.X-p3.X)*(p2.Y-p3.Y)-(p2.X-p3.X)*(p1.Y-p3.Y) < 0
803+
}

0 commit comments

Comments
 (0)