Skip to content

Commit 958e355

Browse files
committed
asm xor
Change-Id: Ib9150103655ce338384b81fb7a9e6d230d9253f5
1 parent cb9cb27 commit 958e355

File tree

2 files changed

+46
-29
lines changed

2 files changed

+46
-29
lines changed

src/crypto/aes/ctr_amd64.go

Lines changed: 10 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,16 @@
44

55
package aes
66

7-
import (
8-
"crypto/cipher"
9-
"unsafe"
10-
)
7+
import "crypto/cipher"
118

9+
// xorBytes xors the contents of a and b and places the resulting values into
10+
// dst. If a and b are not the same length then the number of bytes processed
11+
// will be equal to the length of shorter of the two. Returns the number
12+
// of bytes processed.
13+
//go:noescape
14+
func xorBytes(dst, a, b []byte) int
15+
16+
//go:noescape
1217
func fillEightBlocks(nr int, xk *uint32, dst, counter *byte)
1318

1419
// streamBufferSize is the number of bytes of encrypted counter values to cache.
@@ -53,33 +58,9 @@ func (c *aesctr) XORKeyStream(dst, src []byte) {
5358
if len(c.buffer) == 0 {
5459
c.refill()
5560
}
56-
n := fastXORBytes(dst, src, c.buffer)
61+
n := xorBytes(dst, src, c.buffer)
5762
c.buffer = c.buffer[n:]
5863
src = src[n:]
5964
dst = dst[n:]
6065
}
6166
}
62-
63-
func fastXORBytes(dst, a, b []byte) int {
64-
wordSize := 8
65-
n := len(a)
66-
if len(b) < n {
67-
n = len(b)
68-
}
69-
70-
w := n / wordSize
71-
if w > 0 {
72-
dw := *(*[]uintptr)(unsafe.Pointer(&dst))
73-
aw := *(*[]uintptr)(unsafe.Pointer(&a))
74-
bw := *(*[]uintptr)(unsafe.Pointer(&b))
75-
for i := 0; i < w; i++ {
76-
dw[i] = aw[i] ^ bw[i]
77-
}
78-
}
79-
80-
for i := (n - n%wordSize); i < n; i++ {
81-
dst[i] = a[i] ^ b[i]
82-
}
83-
84-
return n
85-
}

src/crypto/aes/ctr_amd64.s

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,3 +123,39 @@ Lenc128:
123123
MOVOU X15, 112(DX)
124124
RET
125125

126+
// func xorBytes(dst, a, b []byte) int
127+
TEXT ·xorBytes(SB),NOSPLIT,$0
128+
MOVQ dst_base+0(FP), DI
129+
MOVQ a_base+24(FP), SI
130+
MOVQ a_len+32(FP), R8
131+
MOVQ b_base+48(FP), BX
132+
MOVQ b_len+56(FP), R9
133+
CMPQ R8, R9
134+
JLE skip
135+
MOVQ R9, R8
136+
skip:
137+
MOVQ R8, ret+72(FP)
138+
XORQ CX, CX
139+
CMPQ R8, $16
140+
JL tail
141+
loop:
142+
MOVOU (SI)(CX*1), X1
143+
MOVOU (BX)(CX*1), X2
144+
PXOR X1, X2
145+
MOVOU X2, (DI)(CX*1)
146+
ADDQ $16, CX
147+
SUBQ $16, R8
148+
CMPQ R8, $16
149+
JGE loop
150+
tail:
151+
CMPQ R8, $0
152+
JE done
153+
MOVBLZX (SI)(CX*1), R9
154+
MOVBLZX (BX)(CX*1), R10
155+
XORL R10, R9
156+
MOVB R9B, (DI)(CX*1)
157+
INCQ CX
158+
DECQ R8
159+
JMP tail
160+
done:
161+
RET

0 commit comments

Comments
 (0)