Skip to content

Commit ae78084

Browse files
cherrymuitoothrot
authored andcommitted
[release-branch.go1.12] runtime: ensure memmove write pointer atomically on ARM64
If a pointer write is not atomic, if the GC is running concurrently, it may observe a partially updated pointer, which may point to unallocated or already dead memory. Most pointer writes, like the store instructions generated by the compiler, are already atomic. But we still need to be careful in places like memmove. In memmove, we don't know which bits are pointers (or too expensive to query), so we ensure that all aligned pointer-sized units are written atomically. Fixes #36367. Updates #36101. Change-Id: I1b3ca24c6b1ac8a8aaf9ee470115e9a89ec1b00b Reviewed-on: https://go-review.googlesource.com/c/go/+/212626 Reviewed-by: Austin Clements <[email protected]> (cherry picked from commit ffbc027) Reviewed-on: https://go-review.googlesource.com/c/go/+/213684 Run-TryBot: Cherry Zhang <[email protected]> TryBot-Result: Gobot Gobot <[email protected]>
1 parent c5af2aa commit ae78084

File tree

1 file changed

+37
-5
lines changed

1 file changed

+37
-5
lines changed

src/runtime/memmove_arm64.s

+37-5
Original file line numberDiff line numberDiff line change
@@ -22,19 +22,20 @@ check:
2222
CMP R3, R4
2323
BLT backward
2424

25-
// Copying forward proceeds by copying R7/8 words then copying R6 bytes.
25+
// Copying forward proceeds by copying R7/32 quadwords then R6 <= 31 tail bytes.
2626
// R3 and R4 are advanced as we copy.
2727

2828
// (There may be implementations of armv8 where copying by bytes until
2929
// at least one of source or dest is word aligned is a worthwhile
3030
// optimization, but the on the one tested so far (xgene) it did not
3131
// make a significance difference.)
3232

33-
CBZ R7, noforwardlarge // Do we need to do any doubleword-by-doubleword copying?
33+
CBZ R7, noforwardlarge // Do we need to do any quadword copying?
3434

3535
ADD R3, R7, R9 // R9 points just past where we copy by word
3636

3737
forwardlargeloop:
38+
// Copy 32 bytes at a time.
3839
LDP.P 32(R4), (R8, R10)
3940
STP.P (R8, R10), 32(R3)
4041
LDP -16(R4), (R11, R12)
@@ -43,10 +44,26 @@ forwardlargeloop:
4344
CBNZ R7, forwardlargeloop
4445

4546
noforwardlarge:
46-
CBNZ R6, forwardtail // Do we need to do any byte-by-byte copying?
47+
CBNZ R6, forwardtail // Do we need to copy any tail bytes?
4748
RET
4849

4950
forwardtail:
51+
// There are R6 <= 31 bytes remaining to copy.
52+
// This is large enough to still contain pointers,
53+
// which must be copied atomically.
54+
// Copy the next 16 bytes, then 8 bytes, then any remaining bytes.
55+
TBZ $4, R6, 3(PC) // write 16 bytes if R6&16 != 0
56+
LDP.P 16(R4), (R8, R10)
57+
STP.P (R8, R10), 16(R3)
58+
59+
TBZ $3, R6, 3(PC) // write 8 bytes if R6&8 != 0
60+
MOVD.P 8(R4), R8
61+
MOVD.P R8, 8(R3)
62+
63+
AND $7, R6
64+
CBNZ R6, 2(PC)
65+
RET
66+
5067
ADD R3, R6, R9 // R9 points just past the destination memory
5168

5269
forwardtailloop:
@@ -90,7 +107,7 @@ copy1:
90107
RET
91108

92109
backward:
93-
// Copying backwards proceeds by copying R6 bytes then copying R7/8 words.
110+
// Copying backwards first copies R6 <= 31 tail bytes, then R7/32 quadwords.
94111
// R3 and R4 are advanced to the end of the destination/source buffers
95112
// respectively and moved back as we copy.
96113

@@ -99,13 +116,28 @@ backward:
99116

100117
CBZ R6, nobackwardtail // Do we need to do any byte-by-byte copying?
101118

102-
SUB R6, R3, R9 // R9 points at the lowest destination byte that should be copied by byte.
119+
AND $7, R6, R12
120+
CBZ R12, backwardtaillarge
121+
122+
SUB R12, R3, R9 // R9 points at the lowest destination byte that should be copied by byte.
103123
backwardtailloop:
124+
// Copy sub-pointer-size tail.
104125
MOVBU.W -1(R4), R8
105126
MOVBU.W R8, -1(R3)
106127
CMP R9, R3
107128
BNE backwardtailloop
108129

130+
backwardtaillarge:
131+
// Do 8/16-byte write if possible.
132+
// See comment at forwardtail.
133+
TBZ $3, R6, 3(PC)
134+
MOVD.W -8(R4), R8
135+
MOVD.W R8, -8(R3)
136+
137+
TBZ $4, R6, 3(PC)
138+
LDP.W -16(R4), (R8, R10)
139+
STP.W (R8, R10), -16(R3)
140+
109141
nobackwardtail:
110142
CBNZ R7, backwardlarge // Do we need to do any doubleword-by-doubleword copying?
111143
RET

0 commit comments

Comments
 (0)