Skip to content

Commit aef81a7

Browse files
sophie-zhaoabner-chenc
authored andcommitted
cmd/compile: add rules to optimize go codes to constant 0 on loong64
goos: linux goarch: loong64 pkg: test/bench/go1 cpu: Loongson-3A6000 @ 2500.00MHz │ old.bench │ new.bench │ │ sec/op │ sec/op vs base │ BinaryTree17 7.735 ± 1% 7.716 ± 1% -0.23% (p=0.041 n=15) Fannkuch11 2.645 ± 0% 2.646 ± 0% +0.05% (p=0.013 n=15) FmtFprintfEmpty 35.87n ± 0% 35.89n ± 0% +0.06% (p=0.000 n=15) FmtFprintfString 59.54n ± 0% 59.47n ± 0% ~ (p=0.213 n=15) FmtFprintfInt 62.23n ± 0% 62.06n ± 0% ~ (p=0.212 n=15) FmtFprintfIntInt 98.16n ± 0% 97.90n ± 0% -0.26% (p=0.000 n=15) FmtFprintfPrefixedInt 117.0n ± 0% 116.7n ± 0% -0.26% (p=0.000 n=15) FmtFprintfFloat 204.6n ± 0% 204.2n ± 0% -0.20% (p=0.000 n=15) FmtManyArgs 456.3n ± 0% 455.4n ± 0% -0.20% (p=0.000 n=15) GobDecode 7.210m ± 0% 7.156m ± 1% -0.75% (p=0.000 n=15) GobEncode 8.143m ± 1% 8.177m ± 1% ~ (p=0.806 n=15) Gzip 280.2m ± 0% 279.7m ± 0% -0.19% (p=0.005 n=15) Gunzip 32.71m ± 0% 32.65m ± 0% -0.19% (p=0.000 n=15) HTTPClientServer 53.76µ ± 0% 53.65µ ± 0% ~ (p=0.083 n=15) JSONEncode 9.297m ± 0% 9.295m ± 0% ~ (p=0.806 n=15) JSONDecode 46.97m ± 1% 47.07m ± 1% ~ (p=0.683 n=15) Mandelbrot200 4.602m ± 0% 4.600m ± 0% -0.05% (p=0.001 n=15) GoParse 4.682m ± 0% 4.670m ± 1% -0.25% (p=0.001 n=15) RegexpMatchEasy0_32 59.80n ± 0% 59.63n ± 0% -0.28% (p=0.000 n=15) RegexpMatchEasy0_1K 458.3n ± 0% 457.3n ± 0% -0.22% (p=0.001 n=15) RegexpMatchEasy1_32 59.39n ± 0% 59.23n ± 0% -0.27% (p=0.000 n=15) RegexpMatchEasy1_1K 557.9n ± 0% 556.6n ± 0% -0.23% (p=0.001 n=15) RegexpMatchMedium_32 803.6n ± 0% 801.8n ± 0% -0.22% (p=0.001 n=15) RegexpMatchMedium_1K 27.32µ ± 0% 27.26µ ± 0% -0.21% (p=0.000 n=15) RegexpMatchHard_32 1.385µ ± 0% 1.382µ ± 0% -0.22% (p=0.000 n=15) RegexpMatchHard_1K 40.93µ ± 0% 40.83µ ± 0% -0.24% (p=0.000 n=15) Revcomp 474.8m ± 0% 474.3m ± 0% ~ (p=0.250 n=15) Template 77.41m ± 1% 76.63m ± 1% -1.01% (p=0.023 n=15) TimeParse 271.1n ± 0% 271.2n ± 0% +0.04% (p=0.022 n=15) TimeFormat 290.0n ± 0% 289.8n ± 0% ~ (p=0.118 n=15) geomean 51.73µ 51.64µ -0.18% Change-Id: I45a1e6c85bb3cea0f62766ec932432803e9af10a Reviewed-on: https://go-review.googlesource.com/c/go/+/619315 Reviewed-by: Qiqi Huang <[email protected]> Reviewed-by: Meidan Li <[email protected]> Reviewed-by: abner chenc <[email protected]> Reviewed-by: Michael Pratt <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Carlos Amedee <[email protected]>
1 parent 98b3be7 commit aef81a7

File tree

3 files changed

+104
-0
lines changed

3 files changed

+104
-0
lines changed

src/cmd/compile/internal/ssa/_gen/LOONG64.rules

+10
Original file line numberDiff line numberDiff line change
@@ -646,6 +646,16 @@
646646
(ROTR x (MOVVconst [c])) => (ROTRconst x [c&31])
647647
(ROTRV x (MOVVconst [c])) => (ROTRVconst x [c&63])
648648

649+
// If the shift amount is larger than the datasize(32, 16, 8), we can optimize to constant 0.
650+
(MOVWUreg (SLLVconst [lc] x)) && lc >= 32 => (MOVVconst [0])
651+
(MOVHUreg (SLLVconst [lc] x)) && lc >= 16 => (MOVVconst [0])
652+
(MOVBUreg (SLLVconst [lc] x)) && lc >= 8 => (MOVVconst [0])
653+
654+
// After zero extension, the upper (64-datasize(32|16|8)) bits are zero, we can optimize to constant 0.
655+
(SRLVconst [rc] (MOVWUreg x)) && rc >= 32 => (MOVVconst [0])
656+
(SRLVconst [rc] (MOVHUreg x)) && rc >= 16 => (MOVVconst [0])
657+
(SRLVconst [rc] (MOVBUreg x)) && rc >= 8 => (MOVVconst [0])
658+
649659
// mul by constant
650660
(MULV x (MOVVconst [-1])) => (NEGV x)
651661
(MULV _ (MOVVconst [0])) => (MOVVconst [0])

src/cmd/compile/internal/ssa/rewriteLOONG64.go

+90
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

test/codegen/bitfield.go

+4
Original file line numberDiff line numberDiff line change
@@ -358,11 +358,15 @@ func rev16w(c uint32) (uint32, uint32, uint32) {
358358

359359
func shift(x uint32, y uint16, z uint8) uint64 {
360360
// arm64:-`MOVWU`,-`LSR\t[$]32`
361+
// loong64:-`MOVWU`,-`SRLV\t[$]32`
361362
a := uint64(x) >> 32
362363
// arm64:-`MOVHU
364+
// loong64:-`MOVHU`,-`SRLV\t[$]16`
363365
b := uint64(y) >> 16
364366
// arm64:-`MOVBU`
367+
// loong64:-`MOVBU`,-`SRLV\t[$]8`
365368
c := uint64(z) >> 8
366369
// arm64:`MOVD\tZR`,-`ADD\tR[0-9]+>>16`,-`ADD\tR[0-9]+>>8`,
370+
// loong64:`MOVV\t[$]0`,-`ADDVU`
367371
return a + b + c
368372
}

0 commit comments

Comments
 (0)