Skip to content

Commit e031318

Browse files
Xiangdong Jirandall77
Xiangdong Ji
authored andcommitted
cmd/compile: ARM comparisons with 0 incorrect on overflow
Some ARM rewriting rules convert 'comparing to zero' conditions of if statements to a simplified version utilizing CMN and CMP instructions to branch over condition flags, in order to save one Add or Sub caculation. Such optimizations lead to wrong branching in case an overflow/underflow occurs when executing CMN or CMP. Fix the issue by introducing new block opcodes that don't honor the overflow/underflow flag: Block-Op Meaning ARM condition codes 1. LTnoov less than MI 2. GEnoov greater than or equal PL 3. LEnoov less than or equal MI || EQ 4. GTnoov greater than NEQ & PL The patch also adds a few test cases to cover scenarios that are specific to ARM and fine-tunes the code generation tests for 'x-const'. For more details please refer to the previous fix on 64-bit ARM: https://go-review.googlesource.com/c/go/+/233097 Go1 perf, 'old' is the non-optimized version, that is removing all concerned rewriting rules. name old time/op new time/op delta BinaryTree17-8 7.73s ± 0% 7.81s ± 0% +0.97% (p=0.000 n=7+8) Fannkuch11-8 7.06s ± 0% 7.00s ± 0% -0.83% (p=0.000 n=8+8) FmtFprintfEmpty-8 181ns ± 1% 183ns ± 1% +1.31% (p=0.001 n=8+8) FmtFprintfString-8 319ns ± 1% 325ns ± 2% +1.71% (p=0.009 n=7+8) FmtFprintfInt-8 358ns ± 1% 359ns ± 1% ~ (p=0.293 n=7+7) FmtFprintfIntInt-8 459ns ± 3% 456ns ± 1% ~ (p=0.869 n=8+8) FmtFprintfPrefixedInt-8 535ns ± 4% 538ns ± 4% ~ (p=0.572 n=8+8) FmtFprintfFloat-8 1.01µs ± 2% 1.01µs ± 2% ~ (p=0.625 n=8+8) FmtManyArgs-8 1.93µs ± 2% 1.93µs ± 1% ~ (p=0.979 n=8+7) GobDecode-8 16.1ms ± 1% 16.5ms ± 1% +2.32% (p=0.000 n=8+8) GobEncode-8 15.9ms ± 0% 15.8ms ± 1% -1.00% (p=0.000 n=8+7) Gzip-8 690ms ± 1% 670ms ± 0% -2.90% (p=0.000 n=8+8) Gunzip-8 109ms ± 1% 109ms ± 1% ~ (p=0.694 n=7+8) HTTPClientServer-8 149µs ± 3% 146µs ± 2% -1.70% (p=0.028 n=8+8) JSONEncode-8 50.5ms ± 1% 49.2ms ± 0% -2.60% (p=0.001 n=7+7) JSONDecode-8 135ms ± 2% 137ms ± 1% ~ (p=0.054 n=8+7) Mandelbrot200-8 951ms ± 0% 952ms ± 0% ~ (p=0.852 n=6+8) GoParse-8 9.47ms ± 1% 9.66ms ± 1% +2.01% (p=0.000 n=8+8) RegexpMatchEasy0_32-8 288ns ± 2% 277ns ± 2% -3.61% (p=0.000 n=8+8) RegexpMatchEasy0_1K-8 1.66µs ± 1% 1.69µs ± 2% +2.21% (p=0.001 n=7+7) RegexpMatchEasy1_32-8 334ns ± 1% 305ns ± 2% -8.86% (p=0.000 n=8+8) RegexpMatchEasy1_1K-8 2.14µs ± 2% 2.15µs ± 0% ~ (p=0.099 n=8+8) RegexpMatchMedium_32-8 13.3ns ± 1% 13.3ns ± 0% ~ (p=1.000 n=7+7) RegexpMatchMedium_1K-8 81.1µs ± 3% 80.7µs ± 1% ~ (p=0.955 n=7+8) RegexpMatchHard_32-8 4.26µs ± 0% 4.26µs ± 0% ~ (p=0.933 n=7+8) RegexpMatchHard_1K-8 124µs ± 0% 124µs ± 0% +0.31% (p=0.000 n=8+8) Revcomp-8 14.7ms ± 2% 14.5ms ± 1% -1.66% (p=0.003 n=8+8) Template-8 197ms ± 2% 200ms ± 3% +1.62% (p=0.021 n=8+8) TimeParse-8 1.33µs ± 1% 1.30µs ± 1% -1.86% (p=0.002 n=8+8) TimeFormat-8 3.04µs ± 1% 3.02µs ± 0% -0.60% (p=0.000 n=8+8) name old speed new speed delta GobDecode-8 47.6MB/s ± 1% 46.5MB/s ± 1% -2.28% (p=0.000 n=8+8) GobEncode-8 48.1MB/s ± 0% 48.6MB/s ± 1% +1.02% (p=0.000 n=8+7) Gzip-8 28.1MB/s ± 1% 29.0MB/s ± 0% +2.97% (p=0.000 n=8+8) Gunzip-8 178MB/s ± 1% 179MB/s ± 2% ~ (p=0.694 n=7+8) JSONEncode-8 38.4MB/s ± 1% 39.4MB/s ± 0% +2.67% (p=0.001 n=7+7) JSONDecode-8 14.3MB/s ± 2% 14.2MB/s ± 1% -0.81% (p=0.043 n=8+7) GoParse-8 6.12MB/s ± 1% 5.99MB/s ± 1% -2.00% (p=0.000 n=8+8) RegexpMatchEasy0_32-8 111MB/s ± 2% 115MB/s ± 2% +3.77% (p=0.000 n=8+8) RegexpMatchEasy0_1K-8 618MB/s ± 1% 604MB/s ± 2% -2.16% (p=0.001 n=7+7) RegexpMatchEasy1_32-8 95.7MB/s ± 1% 105.1MB/s ± 2% +9.76% (p=0.000 n=8+8) RegexpMatchEasy1_1K-8 479MB/s ± 2% 477MB/s ± 0% ~ (p=0.105 n=8+8) RegexpMatchMedium_32-8 75.2MB/s ± 1% 75.2MB/s ± 0% ~ (p=0.247 n=7+7) RegexpMatchMedium_1K-8 12.6MB/s ± 3% 12.7MB/s ± 1% ~ (p=0.538 n=7+8) RegexpMatchHard_32-8 7.52MB/s ± 0% 7.52MB/s ± 0% ~ (p=0.968 n=7+8) RegexpMatchHard_1K-8 8.26MB/s ± 0% 8.24MB/s ± 0% -0.30% (p=0.001 n=8+8) Revcomp-8 173MB/s ± 2% 176MB/s ± 1% +1.68% (p=0.003 n=8+8) Template-8 9.85MB/s ± 2% 9.69MB/s ± 3% -1.59% (p=0.021 n=8+8) Fixes #39303 Updates #38740 Change-Id: I0a5f87bfda679f66414c0041ace2ca2e28363f36 Reviewed-on: https://go-review.googlesource.com/c/go/+/236637 Run-TryBot: Keith Randall <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Keith Randall <[email protected]>
1 parent bdcf33f commit e031318

File tree

8 files changed

+418
-259
lines changed

8 files changed

+418
-259
lines changed

src/cmd/compile/fmtmap_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ var knownFormats = map[string]string{
140140
"float64 %.3f": "",
141141
"float64 %.6g": "",
142142
"float64 %g": "",
143+
"int %#x": "",
143144
"int %-12d": "",
144145
"int %-6d": "",
145146
"int %-8o": "",
@@ -203,6 +204,7 @@ var knownFormats = map[string]string{
203204
"uint64 %b": "",
204205
"uint64 %d": "",
205206
"uint64 %x": "",
207+
"uint8 %#x": "",
206208
"uint8 %d": "",
207209
"uint8 %v": "",
208210
"uint8 %x": "",

src/cmd/compile/internal/arm/ssa.go

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -888,16 +888,30 @@ var condBits = map[ssa.Op]uint8{
888888
var blockJump = map[ssa.BlockKind]struct {
889889
asm, invasm obj.As
890890
}{
891-
ssa.BlockARMEQ: {arm.ABEQ, arm.ABNE},
892-
ssa.BlockARMNE: {arm.ABNE, arm.ABEQ},
893-
ssa.BlockARMLT: {arm.ABLT, arm.ABGE},
894-
ssa.BlockARMGE: {arm.ABGE, arm.ABLT},
895-
ssa.BlockARMLE: {arm.ABLE, arm.ABGT},
896-
ssa.BlockARMGT: {arm.ABGT, arm.ABLE},
897-
ssa.BlockARMULT: {arm.ABLO, arm.ABHS},
898-
ssa.BlockARMUGE: {arm.ABHS, arm.ABLO},
899-
ssa.BlockARMUGT: {arm.ABHI, arm.ABLS},
900-
ssa.BlockARMULE: {arm.ABLS, arm.ABHI},
891+
ssa.BlockARMEQ: {arm.ABEQ, arm.ABNE},
892+
ssa.BlockARMNE: {arm.ABNE, arm.ABEQ},
893+
ssa.BlockARMLT: {arm.ABLT, arm.ABGE},
894+
ssa.BlockARMGE: {arm.ABGE, arm.ABLT},
895+
ssa.BlockARMLE: {arm.ABLE, arm.ABGT},
896+
ssa.BlockARMGT: {arm.ABGT, arm.ABLE},
897+
ssa.BlockARMULT: {arm.ABLO, arm.ABHS},
898+
ssa.BlockARMUGE: {arm.ABHS, arm.ABLO},
899+
ssa.BlockARMUGT: {arm.ABHI, arm.ABLS},
900+
ssa.BlockARMULE: {arm.ABLS, arm.ABHI},
901+
ssa.BlockARMLTnoov: {arm.ABMI, arm.ABPL},
902+
ssa.BlockARMGEnoov: {arm.ABPL, arm.ABMI},
903+
}
904+
905+
// To model a 'LEnoov' ('<=' without overflow checking) branching
906+
var leJumps = [2][2]gc.IndexJump{
907+
{{Jump: arm.ABEQ, Index: 0}, {Jump: arm.ABPL, Index: 1}}, // next == b.Succs[0]
908+
{{Jump: arm.ABMI, Index: 0}, {Jump: arm.ABEQ, Index: 0}}, // next == b.Succs[1]
909+
}
910+
911+
// To model a 'GTnoov' ('>' without overflow checking) branching
912+
var gtJumps = [2][2]gc.IndexJump{
913+
{{Jump: arm.ABMI, Index: 1}, {Jump: arm.ABEQ, Index: 1}}, // next == b.Succs[0]
914+
{{Jump: arm.ABEQ, Index: 1}, {Jump: arm.ABPL, Index: 0}}, // next == b.Succs[1]
901915
}
902916

903917
func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
@@ -941,7 +955,8 @@ func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
941955
ssa.BlockARMLT, ssa.BlockARMGE,
942956
ssa.BlockARMLE, ssa.BlockARMGT,
943957
ssa.BlockARMULT, ssa.BlockARMUGT,
944-
ssa.BlockARMULE, ssa.BlockARMUGE:
958+
ssa.BlockARMULE, ssa.BlockARMUGE,
959+
ssa.BlockARMLTnoov, ssa.BlockARMGEnoov:
945960
jmp := blockJump[b.Kind]
946961
switch next {
947962
case b.Succs[0].Block():
@@ -958,6 +973,12 @@ func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
958973
}
959974
}
960975

976+
case ssa.BlockARMLEnoov:
977+
s.CombJump(b, next, &leJumps)
978+
979+
case ssa.BlockARMGTnoov:
980+
s.CombJump(b, next, &gtJumps)
981+
961982
default:
962983
b.Fatalf("branch not implemented: %s", b.LongString())
963984
}

src/cmd/compile/internal/ssa/gen/ARM.rules

Lines changed: 76 additions & 72 deletions
Large diffs are not rendered by default.

src/cmd/compile/internal/ssa/gen/ARMOps.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -584,6 +584,10 @@ func init() {
584584
{name: "ULE", controls: 1},
585585
{name: "UGT", controls: 1},
586586
{name: "UGE", controls: 1},
587+
{name: "LTnoov", controls: 1}, // 'LT' but without honoring overflow
588+
{name: "LEnoov", controls: 1}, // 'LE' but without honoring overflow
589+
{name: "GTnoov", controls: 1}, // 'GT' but without honoring overflow
590+
{name: "GEnoov", controls: 1}, // 'GE' but without honoring overflow
587591
}
588592

589593
archs = append(archs, arch{

src/cmd/compile/internal/ssa/opGen.go

Lines changed: 18 additions & 10 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)