Skip to content

Commit 8157960

Browse files
author
Martin Möhrmann
committed
all: replace runtime SSE2 detection with GO386 setting
When GO386=sse2 we can assume sse2 to be present without a runtime check. If GO386=softfloat is set we can avoid the usage of SSE2 even if detected. This might cause a memcpy, memclr and bytealg slowdown of Go binaries compiled with softfloat on machines that support SSE2. Such setups are rare and should use GO386=sse2 instead if performance matters. On targets that support SSE2 we avoid the runtime overhead of dynamic cpu feature dispatch. The removal of runtime sse2 checks also allows to simplify internal/cpu further by removing handling of the required feature option as a followup after this CL. Change-Id: I90a853a8853a405cb665497c6d1a86556947ba17 Reviewed-on: https://go-review.googlesource.com/c/go/+/344350 Trust: Martin Möhrmann <[email protected]> Run-TryBot: Martin Möhrmann <[email protected]> TryBot-Result: Go Bot <[email protected]> Reviewed-by: Keith Randall <[email protected]>
1 parent 22540ab commit 8157960

15 files changed

+31
-60
lines changed

src/cmd/go/internal/work/gc.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,11 @@ func asmArgs(a *Action, p *load.Package) []interface{} {
374374
args = append(args, "-compiling-runtime")
375375
}
376376

377+
if cfg.Goarch == "386" {
378+
// Define GO386_value from cfg.GO386.
379+
args = append(args, "-D", "GO386_"+cfg.GO386)
380+
}
381+
377382
if cfg.Goarch == "mips" || cfg.Goarch == "mipsle" {
378383
// Define GOMIPS_value from cfg.GOMIPS.
379384
args = append(args, "-D", "GOMIPS_"+cfg.GOMIPS)

src/internal/bytealg/bytealg.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import (
1111

1212
// Offsets into internal/cpu records for use in assembly.
1313
const (
14-
offsetX86HasSSE2 = unsafe.Offsetof(cpu.X86.HasSSE2)
1514
offsetX86HasSSE42 = unsafe.Offsetof(cpu.X86.HasSSE42)
1615
offsetX86HasAVX2 = unsafe.Offsetof(cpu.X86.HasAVX2)
1716
offsetX86HasPOPCNT = unsafe.Offsetof(cpu.X86.HasPOPCNT)

src/internal/bytealg/compare_386.s

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,9 @@ TEXT cmpbody<>(SB),NOSPLIT,$0-0
3636
JEQ allsame
3737
CMPL BP, $4
3838
JB small
39-
CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1
40-
JNE mediumloop
39+
#ifdef GO386_softfloat
40+
JMP mediumloop
41+
#endif
4142
largeloop:
4243
CMPL BP, $16
4344
JB mediumloop

src/internal/bytealg/equal_386.s

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,9 @@ TEXT memeqbody<>(SB),NOSPLIT,$0-0
4343
hugeloop:
4444
CMPL BX, $64
4545
JB bigloop
46-
CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1
47-
JNE bigloop
46+
#ifdef GO386_softfloat
47+
JMP bigloop
48+
#endif
4849
MOVOU (SI), X0
4950
MOVOU (DI), X1
5051
MOVOU 16(SI), X2

src/internal/cpu/cpu.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ var X86 struct {
3737
HasPCLMULQDQ bool
3838
HasPOPCNT bool
3939
HasRDTSCP bool
40-
HasSSE2 bool
4140
HasSSE3 bool
4241
HasSSSE3 bool
4342
HasSSE41 bool

src/internal/cpu/cpu_386.go

Lines changed: 0 additions & 7 deletions
This file was deleted.

src/internal/cpu/cpu_amd64.go

Lines changed: 0 additions & 7 deletions
This file was deleted.

src/internal/cpu/cpu_x86.go

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,6 @@ func doinit() {
6161
{Name: "sse41", Feature: &X86.HasSSE41},
6262
{Name: "sse42", Feature: &X86.HasSSE42},
6363
{Name: "ssse3", Feature: &X86.HasSSSE3},
64-
65-
// These capabilities should always be enabled on amd64:
66-
{Name: "sse2", Feature: &X86.HasSSE2, Required: GOARCH == "amd64"},
6764
}
6865

6966
maxID, _, _, _ := cpuid(0, 0)
@@ -74,8 +71,7 @@ func doinit() {
7471

7572
maxExtendedFunctionInformation, _, _, _ = cpuid(0x80000000, 0)
7673

77-
_, _, ecx1, edx1 := cpuid(1, 0)
78-
X86.HasSSE2 = isSet(edx1, cpuid_SSE2)
74+
_, _, ecx1, _ := cpuid(1, 0)
7975

8076
X86.HasSSE3 = isSet(ecx1, cpuid_SSE3)
8177
X86.HasPCLMULQDQ = isSet(ecx1, cpuid_PCLMULQDQ)

src/internal/cpu/cpu_x86_test.go

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ package cpu_test
1010
import (
1111
. "internal/cpu"
1212
"os"
13-
"runtime"
1413
"testing"
1514
)
1615

@@ -20,23 +19,6 @@ func TestX86ifAVX2hasAVX(t *testing.T) {
2019
}
2120
}
2221

23-
func TestDisableSSE2(t *testing.T) {
24-
runDebugOptionsTest(t, "TestSSE2DebugOption", "cpu.sse2=off")
25-
}
26-
27-
func TestSSE2DebugOption(t *testing.T) {
28-
MustHaveDebugOptionsSupport(t)
29-
30-
if os.Getenv("GODEBUG") != "cpu.sse2=off" {
31-
t.Skipf("skipping test: GODEBUG=cpu.sse2=off not set")
32-
}
33-
34-
want := runtime.GOARCH != "386" // SSE2 can only be disabled on 386.
35-
if got := X86.HasSSE2; got != want {
36-
t.Errorf("X86.HasSSE2 on %s expected %v, got %v", runtime.GOARCH, want, got)
37-
}
38-
}
39-
4022
func TestDisableSSE3(t *testing.T) {
4123
runDebugOptionsTest(t, "TestSSE3DebugOption", "cpu.sse3=off")
4224
}

src/runtime/asm_386.s

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -838,8 +838,9 @@ TEXT runtime·cputicks(SB),NOSPLIT,$0-8
838838
// When no SSE2 support is present do not enforce any serialization
839839
// since using CPUID to serialize the instruction stream is
840840
// very costly.
841-
CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1
842-
JNE rdtsc
841+
#ifdef GO386_softfloat
842+
JMP rdtsc // no fence instructions available
843+
#endif
843844
CMPB internal∕cpu·X86+const_offsetX86HasRDTSCP(SB), $1
844845
JNE fences
845846
// Instruction stream serializing RDTSCP is supported.

src/runtime/cpuflags.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ const (
1515
offsetX86HasAVX2 = unsafe.Offsetof(cpu.X86.HasAVX2)
1616
offsetX86HasERMS = unsafe.Offsetof(cpu.X86.HasERMS)
1717
offsetX86HasRDTSCP = unsafe.Offsetof(cpu.X86.HasRDTSCP)
18-
offsetX86HasSSE2 = unsafe.Offsetof(cpu.X86.HasSSE2)
1918

2019
offsetARMHasIDIVA = unsafe.Offsetof(cpu.ARM.HasIDIVA)
2120

src/runtime/memclr_386.s

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,9 @@ tail:
3030
JBE _5through8
3131
CMPL BX, $16
3232
JBE _9through16
33-
CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1
34-
JNE nosse2
33+
#ifdef GO386_softfloat
34+
JMP nosse2
35+
#endif
3536
PXOR X0, X0
3637
CMPL BX, $32
3738
JBE _17through32

src/runtime/memmove_386.s

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,9 @@ tail:
5555
JBE move_5through8
5656
CMPL BX, $16
5757
JBE move_9through16
58-
CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1
59-
JNE nosse2
58+
#ifdef GO386_softfloat
59+
JMP nosse2
60+
#endif
6061
CMPL BX, $32
6162
JBE move_17through32
6263
CMPL BX, $64

src/runtime/mkpreempt.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,8 @@ func gen386() {
200200
l.add("MOVL", reg, 4)
201201
}
202202

203+
softfloat := "GO386_softfloat"
204+
203205
// Save SSE state only if supported.
204206
lSSE := layout{stack: l.stack, sp: "SP"}
205207
for i := 0; i < 8; i++ {
@@ -209,13 +211,13 @@ func gen386() {
209211
p("ADJSP $%d", lSSE.stack)
210212
p("NOP SP")
211213
l.save()
212-
p("CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1\nJNE nosse")
214+
p("#ifndef %s", softfloat)
213215
lSSE.save()
214-
label("nosse:")
216+
p("#endif")
215217
p("CALL ·asyncPreempt2(SB)")
216-
p("CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1\nJNE nosse2")
218+
p("#ifndef %s", softfloat)
217219
lSSE.restore()
218-
label("nosse2:")
220+
p("#endif")
219221
l.restore()
220222
p("ADJSP $%d", -lSSE.stack)
221223

src/runtime/preempt_386.s

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,7 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
1414
MOVL BP, 16(SP)
1515
MOVL SI, 20(SP)
1616
MOVL DI, 24(SP)
17-
CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1
18-
JNE nosse
17+
#ifndef GO386_softfloat
1918
MOVUPS X0, 28(SP)
2019
MOVUPS X1, 44(SP)
2120
MOVUPS X2, 60(SP)
@@ -24,10 +23,9 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
2423
MOVUPS X5, 108(SP)
2524
MOVUPS X6, 124(SP)
2625
MOVUPS X7, 140(SP)
27-
nosse:
26+
#endif
2827
CALL ·asyncPreempt2(SB)
29-
CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1
30-
JNE nosse2
28+
#ifndef GO386_softfloat
3129
MOVUPS 140(SP), X7
3230
MOVUPS 124(SP), X6
3331
MOVUPS 108(SP), X5
@@ -36,7 +34,7 @@ nosse:
3634
MOVUPS 60(SP), X2
3735
MOVUPS 44(SP), X1
3836
MOVUPS 28(SP), X0
39-
nosse2:
37+
#endif
4038
MOVL 24(SP), DI
4139
MOVL 20(SP), SI
4240
MOVL 16(SP), BP

0 commit comments

Comments
 (0)