Skip to content

Commit 4e5086b

Browse files
dvyukovrsc
authored andcommitted
runtime: improve Linux mutex
The implementation is hybrid active/passive spin/blocking mutex. The design minimizes amount of context switches and futex calls. The idea is that all critical sections in runtime are intentially small, so pure blocking mutex behaves badly causing a lot of context switches, thread parking/unparking and kernel calls. Note that some synthetic benchmarks become somewhat slower, that's due to increased contention on other data structures, it should not affect programs that do any real work. On 2 x Intel E5620, 8 HT cores, 2.4GHz benchmark old ns/op new ns/op delta BenchmarkSelectContended 521.00 503.00 -3.45% BenchmarkSelectContended-2 661.00 320.00 -51.59% BenchmarkSelectContended-4 1139.00 629.00 -44.78% BenchmarkSelectContended-8 2870.00 878.00 -69.41% BenchmarkSelectContended-16 5276.00 818.00 -84.50% BenchmarkChanContended 112.00 103.00 -8.04% BenchmarkChanContended-2 631.00 174.00 -72.42% BenchmarkChanContended-4 682.00 272.00 -60.12% BenchmarkChanContended-8 1601.00 520.00 -67.52% BenchmarkChanContended-16 3100.00 372.00 -88.00% BenchmarkChanSync 253.00 239.00 -5.53% BenchmarkChanSync-2 5030.00 4648.00 -7.59% BenchmarkChanSync-4 4826.00 4694.00 -2.74% BenchmarkChanSync-8 4778.00 4713.00 -1.36% BenchmarkChanSync-16 5289.00 4710.00 -10.95% BenchmarkChanProdCons0 273.00 254.00 -6.96% BenchmarkChanProdCons0-2 599.00 400.00 -33.22% BenchmarkChanProdCons0-4 1168.00 659.00 -43.58% BenchmarkChanProdCons0-8 2831.00 1057.00 -62.66% BenchmarkChanProdCons0-16 4197.00 1037.00 -75.29% BenchmarkChanProdCons10 150.00 140.00 -6.67% BenchmarkChanProdCons10-2 607.00 268.00 -55.85% BenchmarkChanProdCons10-4 1137.00 404.00 -64.47% BenchmarkChanProdCons10-8 2115.00 828.00 -60.85% BenchmarkChanProdCons10-16 4283.00 855.00 -80.04% BenchmarkChanProdCons100 117.00 110.00 -5.98% BenchmarkChanProdCons100-2 558.00 218.00 -60.93% BenchmarkChanProdCons100-4 722.00 287.00 -60.25% BenchmarkChanProdCons100-8 1840.00 431.00 -76.58% BenchmarkChanProdCons100-16 3394.00 448.00 -86.80% BenchmarkChanProdConsWork0 2014.00 1996.00 -0.89% BenchmarkChanProdConsWork0-2 1207.00 1127.00 -6.63% BenchmarkChanProdConsWork0-4 1913.00 611.00 -68.06% BenchmarkChanProdConsWork0-8 3016.00 949.00 -68.53% BenchmarkChanProdConsWork0-16 4320.00 1154.00 -73.29% BenchmarkChanProdConsWork10 1906.00 1897.00 -0.47% BenchmarkChanProdConsWork10-2 1123.00 1033.00 -8.01% BenchmarkChanProdConsWork10-4 1076.00 571.00 -46.93% BenchmarkChanProdConsWork10-8 2748.00 1096.00 -60.12% BenchmarkChanProdConsWork10-16 4600.00 1105.00 -75.98% BenchmarkChanProdConsWork100 1884.00 1852.00 -1.70% BenchmarkChanProdConsWork100-2 1235.00 1146.00 -7.21% BenchmarkChanProdConsWork100-4 1217.00 619.00 -49.14% BenchmarkChanProdConsWork100-8 1534.00 509.00 -66.82% BenchmarkChanProdConsWork100-16 4126.00 918.00 -77.75% BenchmarkSyscall 34.40 33.30 -3.20% BenchmarkSyscall-2 160.00 121.00 -24.38% BenchmarkSyscall-4 131.00 136.00 +3.82% BenchmarkSyscall-8 139.00 131.00 -5.76% BenchmarkSyscall-16 161.00 168.00 +4.35% BenchmarkSyscallWork 950.00 950.00 +0.00% BenchmarkSyscallWork-2 481.00 480.00 -0.21% BenchmarkSyscallWork-4 268.00 270.00 +0.75% BenchmarkSyscallWork-8 156.00 169.00 +8.33% BenchmarkSyscallWork-16 188.00 184.00 -2.13% BenchmarkSemaSyntNonblock 36.40 35.60 -2.20% BenchmarkSemaSyntNonblock-2 81.40 45.10 -44.59% BenchmarkSemaSyntNonblock-4 126.00 108.00 -14.29% BenchmarkSemaSyntNonblock-8 112.00 112.00 +0.00% BenchmarkSemaSyntNonblock-16 110.00 112.00 +1.82% BenchmarkSemaSyntBlock 35.30 35.30 +0.00% BenchmarkSemaSyntBlock-2 118.00 124.00 +5.08% BenchmarkSemaSyntBlock-4 105.00 108.00 +2.86% BenchmarkSemaSyntBlock-8 101.00 111.00 +9.90% BenchmarkSemaSyntBlock-16 112.00 118.00 +5.36% BenchmarkSemaWorkNonblock 810.00 811.00 +0.12% BenchmarkSemaWorkNonblock-2 476.00 414.00 -13.03% BenchmarkSemaWorkNonblock-4 238.00 228.00 -4.20% BenchmarkSemaWorkNonblock-8 140.00 126.00 -10.00% BenchmarkSemaWorkNonblock-16 117.00 116.00 -0.85% BenchmarkSemaWorkBlock 810.00 811.00 +0.12% BenchmarkSemaWorkBlock-2 454.00 466.00 +2.64% BenchmarkSemaWorkBlock-4 243.00 241.00 -0.82% BenchmarkSemaWorkBlock-8 145.00 137.00 -5.52% BenchmarkSemaWorkBlock-16 132.00 123.00 -6.82% BenchmarkContendedSemaphore 123.00 102.00 -17.07% BenchmarkContendedSemaphore-2 34.80 34.90 +0.29% BenchmarkContendedSemaphore-4 34.70 34.80 +0.29% BenchmarkContendedSemaphore-8 34.70 34.70 +0.00% BenchmarkContendedSemaphore-16 34.80 34.70 -0.29% BenchmarkMutex 26.80 26.00 -2.99% BenchmarkMutex-2 108.00 45.20 -58.15% BenchmarkMutex-4 103.00 127.00 +23.30% BenchmarkMutex-8 109.00 147.00 +34.86% BenchmarkMutex-16 102.00 152.00 +49.02% BenchmarkMutexSlack 27.00 26.90 -0.37% BenchmarkMutexSlack-2 149.00 165.00 +10.74% BenchmarkMutexSlack-4 121.00 209.00 +72.73% BenchmarkMutexSlack-8 101.00 158.00 +56.44% BenchmarkMutexSlack-16 97.00 129.00 +32.99% BenchmarkMutexWork 792.00 794.00 +0.25% BenchmarkMutexWork-2 407.00 409.00 +0.49% BenchmarkMutexWork-4 220.00 209.00 -5.00% BenchmarkMutexWork-8 267.00 160.00 -40.07% BenchmarkMutexWork-16 315.00 300.00 -4.76% BenchmarkMutexWorkSlack 792.00 793.00 +0.13% BenchmarkMutexWorkSlack-2 406.00 404.00 -0.49% BenchmarkMutexWorkSlack-4 225.00 212.00 -5.78% BenchmarkMutexWorkSlack-8 268.00 136.00 -49.25% BenchmarkMutexWorkSlack-16 300.00 300.00 +0.00% BenchmarkRWMutexWrite100 27.10 27.00 -0.37% BenchmarkRWMutexWrite100-2 33.10 40.80 +23.26% BenchmarkRWMutexWrite100-4 113.00 88.10 -22.04% BenchmarkRWMutexWrite100-8 119.00 95.30 -19.92% BenchmarkRWMutexWrite100-16 148.00 109.00 -26.35% BenchmarkRWMutexWrite10 29.60 29.40 -0.68% BenchmarkRWMutexWrite10-2 111.00 61.40 -44.68% BenchmarkRWMutexWrite10-4 270.00 208.00 -22.96% BenchmarkRWMutexWrite10-8 204.00 185.00 -9.31% BenchmarkRWMutexWrite10-16 261.00 190.00 -27.20% BenchmarkRWMutexWorkWrite100 1040.00 1036.00 -0.38% BenchmarkRWMutexWorkWrite100-2 593.00 580.00 -2.19% BenchmarkRWMutexWorkWrite100-4 470.00 365.00 -22.34% BenchmarkRWMutexWorkWrite100-8 468.00 289.00 -38.25% BenchmarkRWMutexWorkWrite100-16 604.00 374.00 -38.08% BenchmarkRWMutexWorkWrite10 951.00 951.00 +0.00% BenchmarkRWMutexWorkWrite10-2 1001.00 928.00 -7.29% BenchmarkRWMutexWorkWrite10-4 1555.00 1006.00 -35.31% BenchmarkRWMutexWorkWrite10-8 2085.00 1171.00 -43.84% BenchmarkRWMutexWorkWrite10-16 2082.00 1614.00 -22.48% R=rsc, iant, msolo, fw, iant CC=golang-dev https://golang.org/cl/4711045
1 parent bed7e3e commit 4e5086b

File tree

18 files changed

+278
-91
lines changed

18 files changed

+278
-91
lines changed

src/cmd/6a/lex.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,7 @@ struct
527527
"OUTSB", LTYPE0, AOUTSB,
528528
"OUTSL", LTYPE0, AOUTSL,
529529
"OUTSW", LTYPE0, AOUTSW,
530+
"PAUSE", LTYPEN, APAUSE,
530531
"POPAL", LTYPE0, APOPAL,
531532
"POPAW", LTYPE0, APOPAW,
532533
"POPFL", LTYPE0, APOPFL,

src/cmd/6l/6.out.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,7 @@ enum as
190190
AOUTSB,
191191
AOUTSL,
192192
AOUTSW,
193+
APAUSE,
193194
APOPAL,
194195
APOPAW,
195196
APOPFL,

src/cmd/6l/optab.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -919,6 +919,7 @@ Optab optab[] =
919919
{ APADDW, ymm, Py, 0xfd,Pe,0xfd },
920920
{ APAND, ymm, Py, 0xdb,Pe,0xdb },
921921
{ APANDN, ymm, Py, 0xdf,Pe,0xdf },
922+
{ APAUSE, ynone, Px, 0xf3,0x90 },
922923
{ APAVGB, ymm, Py, 0xe0,Pe,0xe0 },
923924
{ APAVGW, ymm, Py, 0xe3,Pe,0xe3 },
924925
{ APCMPEQB, ymm, Py, 0x74,Pe,0x74 },

src/cmd/8a/lex.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,7 @@ struct
421421
"OUTSB", LTYPE0, AOUTSB,
422422
"OUTSL", LTYPE0, AOUTSL,
423423
"OUTSW", LTYPE0, AOUTSW,
424+
"PAUSE", LTYPEN, APAUSE,
424425
"POPAL", LTYPE0, APOPAL,
425426
"POPAW", LTYPE0, APOPAW,
426427
"POPFL", LTYPE0, APOPFL,

src/cmd/8l/8.out.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ enum as
180180
AOUTSB,
181181
AOUTSL,
182182
AOUTSW,
183+
APAUSE,
183184
APOPAL,
184185
APOPAW,
185186
APOPFL,

src/cmd/8l/optab.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,7 @@ Optab optab[] =
495495
{ AOUTSB, ynone, Pb, 0x6e },
496496
{ AOUTSL, ynone, Px, 0x6f },
497497
{ AOUTSW, ynone, Pe, 0x6f },
498+
{ APAUSE, ynone, Px, 0xf3,0x90 },
498499
{ APOPAL, ynone, Px, 0x61 },
499500
{ APOPAW, ynone, Pe, 0x61 },
500501
{ APOPFL, ynone, Px, 0x9d },

src/pkg/runtime/386/asm.s

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,20 @@ TEXT runtime·xadd(SB), 7, $0
334334
ADDL CX, AX
335335
RET
336336

337+
TEXT runtime·xchg(SB), 7, $0
338+
MOVL 4(SP), BX
339+
MOVL 8(SP), AX
340+
XCHGL AX, 0(BX)
341+
RET
342+
343+
TEXT runtime·procyield(SB),7,$0
344+
MOVL 4(SP), AX
345+
again:
346+
PAUSE
347+
SUBL $1, AX
348+
JNZ again
349+
RET
350+
337351
TEXT runtime·atomicstorep(SB), 7, $0
338352
MOVL 4(SP), BX
339353
MOVL 8(SP), AX

src/pkg/runtime/amd64/asm.s

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,20 @@ TEXT runtime·xadd(SB), 7, $0
378378
ADDL CX, AX
379379
RET
380380

381+
TEXT runtime·xchg(SB), 7, $0
382+
MOVQ 8(SP), BX
383+
MOVL 16(SP), AX
384+
XCHGL AX, 0(BX)
385+
RET
386+
387+
TEXT runtime·procyield(SB),7,$0
388+
MOVL 8(SP), AX
389+
again:
390+
PAUSE
391+
SUBL $1, AX
392+
JNZ again
393+
RET
394+
381395
TEXT runtime·atomicstorep(SB), 7, $0
382396
MOVQ 8(SP), BX
383397
MOVQ 16(SP), AX

src/pkg/runtime/arm/atomic.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,29 @@ runtime·xadd(uint32 volatile *val, int32 delta)
1919
}
2020
}
2121

22+
#pragma textflag 7
23+
uint32
24+
runtime·xchg(uint32 volatile* addr, uint32 v)
25+
{
26+
uint32 old;
27+
28+
for(;;) {
29+
old = *addr;
30+
if(runtime·cas(addr, old, v))
31+
return old;
32+
}
33+
}
34+
35+
#pragma textflag 7
36+
void
37+
runtime·procyield(uint32 cnt)
38+
{
39+
uint32 volatile i;
40+
41+
for(i = 0; i < cnt; i++) {
42+
}
43+
}
44+
2245
#pragma textflag 7
2346
uint32
2447
runtime·atomicload(uint32 volatile* addr)

src/pkg/runtime/linux/386/defs.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ enum {
6161
ITIMER_REAL = 0,
6262
ITIMER_VIRTUAL = 0x1,
6363
ITIMER_PROF = 0x2,
64+
O_RDONLY = 0,
65+
O_CLOEXEC = 02000000,
6466
};
6567

6668
// Types

src/pkg/runtime/linux/386/sys.s

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,31 @@ TEXT runtime·exit1(SB),7,$0
2222
INT $3 // not reached
2323
RET
2424

25+
TEXT runtime·open(SB),7,$0
26+
MOVL $5, AX // syscall - open
27+
MOVL 4(SP), BX
28+
MOVL 8(SP), CX
29+
MOVL 12(SP), DX
30+
INT $0x80
31+
RET
32+
33+
TEXT runtime·close(SB),7,$0
34+
MOVL $6, AX // syscall - close
35+
MOVL 4(SP), BX
36+
INT $0x80
37+
RET
38+
2539
TEXT runtime·write(SB),7,$0
2640
MOVL $4, AX // syscall - write
27-
MOVL 4(SP), BX
41+
MOVL 4(SP), BX
42+
MOVL 8(SP), CX
43+
MOVL 12(SP), DX
44+
INT $0x80
45+
RET
46+
47+
TEXT runtime·read(SB),7,$0
48+
MOVL $3, AX // syscall - read
49+
MOVL 4(SP), BX
2850
MOVL 8(SP), CX
2951
MOVL 12(SP), DX
3052
INT $0x80
@@ -315,3 +337,8 @@ TEXT runtime·setldt(SB),7,$32
315337
MOVW AX, GS
316338

317339
RET
340+
341+
TEXT runtime·osyield(SB),7,$0
342+
MOVL $158, AX
343+
INT $0x80
344+
RET

src/pkg/runtime/linux/amd64/defs.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ enum {
6161
ITIMER_REAL = 0,
6262
ITIMER_VIRTUAL = 0x1,
6363
ITIMER_PROF = 0x2,
64+
O_RDONLY = 0,
65+
O_CLOEXEC = 02000000,
6466
};
6567

6668
// Types

src/pkg/runtime/linux/amd64/sys.s

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,12 @@ TEXT runtime·open(SB),7,$0-16
2828
SYSCALL
2929
RET
3030

31+
TEXT runtime·close(SB),7,$0-16
32+
MOVL 8(SP), DI
33+
MOVL $3, AX // syscall entry
34+
SYSCALL
35+
RET
36+
3137
TEXT runtime·write(SB),7,$0-24
3238
MOVL 8(SP), DI
3339
MOVQ 16(SP), SI
@@ -36,6 +42,14 @@ TEXT runtime·write(SB),7,$0-24
3642
SYSCALL
3743
RET
3844

45+
TEXT runtime·read(SB),7,$0-24
46+
MOVL 8(SP), DI
47+
MOVQ 16(SP), SI
48+
MOVL 24(SP), DX
49+
MOVL $0, AX // syscall entry
50+
SYSCALL
51+
RET
52+
3953
TEXT runtime·raisesigpipe(SB),7,$12
4054
MOVL $186, AX // syscall - gettid
4155
SYSCALL
@@ -232,3 +246,7 @@ TEXT runtime·settls(SB),7,$32
232246
CALL runtime·notok(SB)
233247
RET
234248

249+
TEXT runtime·osyield(SB),7,$0
250+
MOVL $24, AX
251+
SYSCALL
252+
RET

src/pkg/runtime/linux/arm/defs.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ enum {
6161
ITIMER_REAL = 0,
6262
ITIMER_PROF = 0x2,
6363
ITIMER_VIRTUAL = 0x1,
64+
O_RDONLY = 0,
65+
O_CLOEXEC = 02000000,
6466
};
6567

6668
// Types

src/pkg/runtime/linux/arm/sys.s

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,10 @@
1515
#define SYS_BASE 0x0
1616

1717
#define SYS_exit (SYS_BASE + 1)
18+
#define SYS_read (SYS_BASE + 3)
1819
#define SYS_write (SYS_BASE + 4)
20+
#define SYS_open (SYS_BASE + 5)
21+
#define SYS_close (SYS_BASE + 6)
1922
#define SYS_gettimeofday (SYS_BASE + 78)
2023
#define SYS_clone (SYS_BASE + 120)
2124
#define SYS_rt_sigreturn (SYS_BASE + 173)
@@ -29,10 +32,25 @@
2932
#define SYS_mincore (SYS_BASE + 219)
3033
#define SYS_gettid (SYS_BASE + 224)
3134
#define SYS_tkill (SYS_BASE + 238)
35+
#define SYS_sched_yield (SYS_BASE + 158)
3236

3337
#define ARM_BASE (SYS_BASE + 0x0f0000)
3438
#define SYS_ARM_cacheflush (ARM_BASE + 2)
3539

40+
TEXT runtime·open(SB),7,$0
41+
MOVW 0(FP), R0
42+
MOVW 4(FP), R1
43+
MOVW 8(FP), R2
44+
MOVW $SYS_open, R7
45+
SWI $0
46+
RET
47+
48+
TEXT runtime·close(SB),7,$0
49+
MOVW 0(FP), R0
50+
MOVW $SYS_close, R7
51+
SWI $0
52+
RET
53+
3654
TEXT runtime·write(SB),7,$0
3755
MOVW 0(FP), R0
3856
MOVW 4(FP), R1
@@ -41,6 +59,14 @@ TEXT runtime·write(SB),7,$0
4159
SWI $0
4260
RET
4361

62+
TEXT runtime·read(SB),7,$0
63+
MOVW 0(FP), R0
64+
MOVW 4(FP), R1
65+
MOVW 8(FP), R2
66+
MOVW $SYS_read, R7
67+
SWI $0
68+
RET
69+
4470
TEXT runtime·exit(SB),7,$-4
4571
MOVW 0(FP), R0
4672
MOVW $SYS_exit_group, R7
@@ -287,3 +313,7 @@ cascheck:
287313
TEXT runtime·casp(SB),7,$0
288314
B runtime·cas(SB)
289315

316+
TEXT runtime·osyield(SB),7,$0
317+
MOVW $SYS_sched_yield, R7
318+
SWI $0
319+
RET

0 commit comments

Comments
 (0)