22
22
23
23
#include "textflag.h"
24
24
25
- // For set{En,De}cryptKeyAsm
25
+ // For expandKeyAsm
26
26
#define INP R3
27
27
#define BITS R4
28
- #define OUT R5
28
+ #define OUTENC R5 // Pointer to next expanded encrypt key
29
29
#define PTR R6
30
30
#define CNT R7
31
31
#define ROUNDS R8
32
+ #define OUTDEC R9 // Pointer to next expanded decrypt key
32
33
#define TEMP R19
33
34
#define ZERO V0
34
35
#define IN0 V1
@@ -87,31 +88,13 @@ GLOBL ·rcon(SB), RODATA, $80
87
88
LXSDX (RA+RB), VT \
88
89
VPERM VT, VT, ESPERM, VT
89
90
90
- // func setEncryptKeyAsm(key *byte, keylen int, enc *uint32) int
91
- TEXT ·setEncryptKeyAsm (SB), NOSPLIT|NOFRAME, $0
91
+ // func setEncryptKeyAsm(nr int, key *byte, enc *uint32, dec *uint32)
92
+ TEXT ·expandKeyAsm (SB), NOSPLIT|NOFRAME, $0
92
93
// Load the arguments inside the registers
93
- MOVD key+0 (FP), INP
94
- MOVD keylen+8 (FP), BITS
95
- MOVD enc+16 (FP), OUT
96
- JMP ·doEncryptKeyAsm(SB)
97
-
98
- // This text is used both setEncryptKeyAsm and setDecryptKeyAsm
99
- TEXT ·doEncryptKeyAsm(SB), NOSPLIT|NOFRAME, $0
100
- // Do not change R10 since it's storing the LR value in setDecryptKeyAsm
101
-
102
- // Check arguments
103
- MOVD $-1 , PTR // li 6,-1 exit code to -1 (255)
104
- CMPU INP, $0 // cmpldi r3,0 input key pointer set?
105
- BC 0x0E , 2 , enc_key_abort // beq- .Lenc_key_abort
106
- CMPU OUT , $0 // cmpldi r5,0 output key pointer set?
107
- BC 0x0E , 2 , enc_key_abort // beq- .Lenc_key_abort
108
- MOVD $-2 , PTR // li 6,-2 exit code to -2 (254)
109
- CMPW BITS, $128 // cmpwi 4,128 greater or equal to 128
110
- BC 0x0E , 0 , enc_key_abort // blt- .Lenc_key_abort
111
- CMPW BITS, $256 // cmpwi 4,256 lesser or equal to 256
112
- BC 0x0E , 1 , enc_key_abort // bgt- .Lenc_key_abort
113
- ANDCC $0x3f , BITS, TEMP // andi. 0,4,0x3f multiple of 64
114
- BC 0x06 , 2 , enc_key_abort // bne- .Lenc_key_abort
94
+ MOVD nr+0 (FP), ROUNDS
95
+ MOVD key+8 (FP), INP
96
+ MOVD enc+16 (FP), OUTENC
97
+ MOVD dec +24 (FP), OUTDEC
115
98
116
99
MOVD $·rcon(SB), PTR // PTR point to rcon addr
117
100
LVX (PTR), ESPERM
@@ -120,27 +103,34 @@ TEXT ·doEncryptKeyAsm(SB), NOSPLIT|NOFRAME, $0
120
103
// Get key from memory and write aligned into VR
121
104
P8_LXVB16X(INP, R0, IN0)
122
105
ADD $0x10 , INP, INP
123
- MOVD $0x20 , R8 // li 8,0x20 R8 = 32
106
+ MOVD $0x20 , TEMP
124
107
125
- CMPW BITS , $192 // cmpwi 4,192 Key size == 192?
108
+ CMPW ROUNDS , $12
126
109
LVX (PTR)(R0), RCON // lvx 4,0,6 Load first 16 bytes into RCON
127
- LVX (PTR)(R8 ), MASK // lvx 5,8,6
110
+ LVX (PTR)(TEMP ), MASK
128
111
ADD $0x10 , PTR, PTR // addi 6,6,0x10 PTR to next 16 bytes of RCON
129
112
MOVD $8 , CNT // li 7,8 CNT = 8
130
113
VXOR ZERO, ZERO, ZERO // vxor 0,0,0 Zero to be zero :)
131
114
MOVD CNT, CTR // mtctr 7 Set the counter to 8 (rounds)
132
115
133
- BLT loop128 // blt .Loop128
134
- BEQ l192 // beq .L192
135
- JMP l256 // b .L256
116
+ // The expanded decrypt key is the expanded encrypt key stored in reverse order.
117
+ // Move OUTDEC to the last key location, and store in descending order.
118
+ ADD $160 , OUTDEC, OUTDEC
119
+ BLT loop128
120
+ ADD $32 , OUTDEC, OUTDEC
121
+ BEQ l192
122
+ ADD $32 , OUTDEC, OUTDEC
123
+ JMP l256
136
124
137
125
loop128:
138
126
// Key schedule (Round 1 to 8)
139
127
VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat
140
128
VSLDOI $12 , ZERO, IN0, TMP // vsldoi 6,0,1,12
141
- P8_STXV(IN0, R0, OUT )
129
+ P8_STXV(IN0, R0, OUTENC)
130
+ P8_STXV(IN0, R0, OUTDEC)
142
131
VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
143
- ADD $16 , OUT , OUT // addi 5,5,16 Point to the next round
132
+ ADD $16 , OUTENC, OUTENC
133
+ ADD $-16 , OUTDEC, OUTDEC
144
134
145
135
VXOR IN0, TMP, IN0 // vxor 1,1,6
146
136
VSLDOI $12 , ZERO, TMP, TMP // vsldoi 6,0,6,12
@@ -156,9 +146,11 @@ loop128:
156
146
// Key schedule (Round 9)
157
147
VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-spat
158
148
VSLDOI $12 , ZERO, IN0, TMP // vsldoi 6,0,1,12
159
- P8_STXV(IN0, R0, OUT )
149
+ P8_STXV(IN0, R0, OUTENC)
150
+ P8_STXV(IN0, R0, OUTDEC)
160
151
VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
161
- ADD $16 , OUT , OUT // addi 5,5,16
152
+ ADD $16 , OUTENC, OUTENC
153
+ ADD $-16 , OUTDEC, OUTDEC
162
154
163
155
// Key schedule (Round 10)
164
156
VXOR IN0, TMP, IN0 // vxor 1,1,6
@@ -171,9 +163,11 @@ loop128:
171
163
172
164
VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat
173
165
VSLDOI $12 , ZERO, IN0, TMP // vsldoi 6,0,1,12
174
- P8_STXV(IN0, R0, OUT )
166
+ P8_STXV(IN0, R0, OUTENC)
167
+ P8_STXV(IN0, R0, OUTDEC)
175
168
VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
176
- ADD $16 , OUT , OUT // addi 5,5,16
169
+ ADD $16 , OUTENC, OUTENC
170
+ ADD $-16 , OUTDEC, OUTDEC
177
171
178
172
// Key schedule (Round 11)
179
173
VXOR IN0, TMP, IN0 // vxor 1,1,6
@@ -182,18 +176,18 @@ loop128:
182
176
VSLDOI $12 , ZERO, TMP, TMP // vsldoi 6,0,6,12
183
177
VXOR IN0, TMP, IN0 // vxor 1,1,6
184
178
VXOR IN0, KEY, IN0 // vxor 1,1,3
185
- P8_STXV(IN0, R0, OUT )
179
+ P8_STXV(IN0, R0, OUTENC)
180
+ P8_STXV(IN0, R0, OUTDEC)
186
181
187
- ADD $0x50 , OUT , OUT // addi 5,5,0x50
188
-
189
- MOVD $10 , ROUNDS // li 8,10
190
- JMP done // b .Ldone
182
+ RET
191
183
192
184
l192:
193
185
LXSDX_BE(INP, R0, IN1) // Load next 8 bytes into upper half of VSR in BE order.
194
186
MOVD $4 , CNT // li 7,4
195
- P8_STXV(IN0, R0, OUT )
196
- ADD $16 , OUT , OUT // addi 5,5,16
187
+ P8_STXV(IN0, R0, OUTENC)
188
+ P8_STXV(IN0, R0, OUTDEC)
189
+ ADD $16 , OUTENC, OUTENC
190
+ ADD $-16 , OUTDEC, OUTDEC
197
191
VSPLTISB $8 , KEY // vspltisb 3,8
198
192
MOVD CNT, CTR // mtctr 7
199
193
VSUBUBM MASK, KEY, MASK // vsububm 5,5,3
@@ -221,18 +215,22 @@ loop192:
221
215
222
216
VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5
223
217
VSLDOI $12 , ZERO, IN0, TMP // vsldoi 6,0,1,12
224
- P8_STXV(STAGE, R0, OUT )
218
+ P8_STXV(STAGE, R0, OUTENC)
219
+ P8_STXV(STAGE, R0, OUTDEC)
225
220
VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
226
- ADD $16 , OUT , OUT // addi 5,5,16
221
+ ADD $16 , OUTENC, OUTENC
222
+ ADD $-16 , OUTDEC, OUTDEC
227
223
228
224
VSLDOI $8 , IN0, IN1, STAGE // vsldoi 7,1,2,8
229
225
VXOR IN0, TMP, IN0 // vxor 1,1,6
230
226
VSLDOI $12 , ZERO, TMP, TMP // vsldoi 6,0,6,12
231
- P8_STXV(STAGE, R0, OUT )
227
+ P8_STXV(STAGE, R0, OUTENC)
228
+ P8_STXV(STAGE, R0, OUTDEC)
232
229
VXOR IN0, TMP, IN0 // vxor 1,1,6
233
230
VSLDOI $12 , ZERO, TMP, TMP // vsldoi 6,0,6,12
234
231
VXOR IN0, TMP, IN0 // vxor 1,1,6
235
- ADD $16 , OUT , OUT // addi 5,5,16
232
+ ADD $16 , OUTENC, OUTENC
233
+ ADD $-16 , OUTDEC, OUTDEC
236
234
237
235
VSPLTW $3 , IN0, TMP // vspltw 6,1,3
238
236
VXOR TMP, IN1, TMP // vxor 6,6,2
@@ -241,28 +239,31 @@ loop192:
241
239
VXOR IN1, TMP, IN1 // vxor 2,2,6
242
240
VXOR IN0, KEY, IN0 // vxor 1,1,3
243
241
VXOR IN1, KEY, IN1 // vxor 2,2,3
244
- P8_STXV(IN0, R0, OUT )
245
- ADD $16 , OUT , OUT // addi 5,5,16
242
+ P8_STXV(IN0, R0, OUTENC)
243
+ P8_STXV(IN0, R0, OUTDEC)
244
+ ADD $16 , OUTENC, OUTENC
245
+ ADD $-16 , OUTDEC, OUTDEC
246
246
BC 0x10 , 0 , loop192 // bdnz .Loop192
247
247
248
- MOVD $12 , ROUNDS // li 8,12
249
- ADD $0x20 , OUT , OUT // addi 5,5,0x20
250
- BR done // b .Ldone
248
+ RET
251
249
252
250
l256:
253
251
P8_LXVB16X(INP, R0, IN1)
254
252
MOVD $7 , CNT // li 7,7
255
- MOVD $14 , ROUNDS // li 8,14
256
- P8_STXV(IN0, R0, OUT )
257
- ADD $16 , OUT , OUT // addi 5,5,16
253
+ P8_STXV(IN0, R0, OUTENC)
254
+ P8_STXV(IN0, R0, OUTDEC)
255
+ ADD $16 , OUTENC, OUTENC
256
+ ADD $-16 , OUTDEC, OUTDEC
258
257
MOVD CNT, CTR // mtctr 7
259
258
260
259
loop256:
261
260
VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5
262
261
VSLDOI $12 , ZERO, IN0, TMP // vsldoi 6,0,1,12
263
- P8_STXV(IN1, R0, OUT )
262
+ P8_STXV(IN1, R0, OUTENC)
263
+ P8_STXV(IN1, R0, OUTDEC)
264
264
VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
265
- ADD $16 , OUT , OUT // addi 5,5,16
265
+ ADD $16 , OUTENC, OUTENC
266
+ ADD $-16 , OUTDEC, OUTDEC
266
267
267
268
VXOR IN0, TMP, IN0 // vxor 1,1,6
268
269
VSLDOI $12 , ZERO, TMP, TMP // vsldoi 6,0,6,12
@@ -271,8 +272,10 @@ loop256:
271
272
VXOR IN0, TMP, IN0 // vxor 1,1,6
272
273
VADDUWM RCON, RCON, RCON // vadduwm 4,4,4
273
274
VXOR IN0, KEY, IN0 // vxor 1,1,3
274
- P8_STXV(IN0, R0, OUT )
275
- ADD $16 , OUT , OUT // addi 5,5,16
275
+ P8_STXV(IN0, R0, OUTENC)
276
+ P8_STXV(IN0, R0, OUTDEC)
277
+ ADD $16 , OUTENC, OUTENC
278
+ ADD $-16 , OUTDEC, OUTDEC
276
279
BC 0x12 , 0 , done // bdz .Ldone
277
280
278
281
VSPLTW $3 , IN0, KEY // vspltw 3,1,3
@@ -289,71 +292,16 @@ loop256:
289
292
JMP loop256 // b .Loop256
290
293
291
294
done:
292
- MOVD $0 , PTR // li 6,0 set PTR to 0 (exit code 0)
293
- MOVW ROUNDS, 0 (OUT ) // stw 8,0(5)
294
-
295
- enc_key_abort:
296
- MOVD PTR, INP // mr 3,6 set exit code with PTR value
297
- MOVD INP, ret +24 (FP) // Put return value into the FP
298
- RET // blr
295
+ RET
299
296
300
- // func setDecryptKeyAsm(key *byte, keylen int, dec *uint32) int
301
- TEXT ·setDecryptKeyAsm(SB), NOSPLIT|NOFRAME, $0
302
- // Load the arguments inside the registers
303
- MOVD key+0 (FP), INP
304
- MOVD keylen+8 (FP), BITS
305
- MOVD dec +16 (FP), OUT
306
-
307
- MOVD LR, R10 // mflr 10
308
- CALL ·doEncryptKeyAsm(SB)
309
- MOVD R10, LR // mtlr 10
310
-
311
- CMPW INP, $0 // cmpwi 3,0 exit 0 = ok
312
- BC 0x06 , 2 , dec_key_abort // bne- .Ldec_key_abort
313
-
314
- // doEncryptKeyAsm set ROUNDS (R8) with the proper value for each mode
315
- SLW $4 , ROUNDS, CNT // slwi 7,8,4
316
- SUB $240 , OUT , INP // subi 3,5,240
317
- SRW $1 , ROUNDS, ROUNDS // srwi 8,8,1
318
- ADD R7, INP, OUT // add 5,3,7
319
- MOVD ROUNDS, CTR // mtctr 8
320
-
321
- // dec_key will invert the key sequence in order to be used for decrypt
322
- dec_key:
323
- MOVWZ 0 (INP), TEMP // lwz 0, 0(3)
324
- MOVWZ 4 (INP), R6 // lwz 6, 4(3)
325
- MOVWZ 8 (INP), R7 // lwz 7, 8(3)
326
- MOVWZ 12 (INP), R8 // lwz 8, 12(3)
327
- ADD $16 , INP, INP // addi 3,3,16
328
- MOVWZ 0 (OUT ), R9 // lwz 9, 0(5)
329
- MOVWZ 4 (OUT ), R10 // lwz 10,4(5)
330
- MOVWZ 8 (OUT ), R11 // lwz 11,8(5)
331
- MOVWZ 12 (OUT ), R12 // lwz 12,12(5)
332
- MOVW TEMP, 0 (OUT ) // stw 0, 0(5)
333
- MOVW R6, 4 (OUT ) // stw 6, 4(5)
334
- MOVW R7, 8 (OUT ) // stw 7, 8(5)
335
- MOVW R8, 12 (OUT ) // stw 8, 12(5)
336
- SUB $16 , OUT , OUT // subi 5,5,16
337
- MOVW R9, -16 (INP) // stw 9, -16(3)
338
- MOVW R10, -12 (INP) // stw 10,-12(3)
339
- MOVW R11, -8 (INP) // stw 11,-8(3)
340
- MOVW R12, -4 (INP) // stw 12,-4(3)
341
- BC 0x10 , 0 , dec_key // bdnz .Ldeckey
342
-
343
- XOR R3, R3, R3 // xor 3,3,3 Clean R3
344
-
345
- dec_key_abort:
346
- MOVD R3, ret +24 (FP) // Put return value into the FP
347
- RET // blr
348
-
349
- // func encryptBlockAsm(dst, src *byte, enc *uint32)
297
+ // func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
350
298
TEXT ·encryptBlockAsm(SB), NOSPLIT|NOFRAME, $0
351
299
// Load the arguments inside the registers
352
- MOVD dst+0 (FP), BLK_OUT
353
- MOVD src+8 (FP), BLK_INP
354
- MOVD enc+16 (FP), BLK_KEY
300
+ MOVD nr+0 (FP), BLK_ROUNDS
301
+ MOVD xk+8 (FP), BLK_KEY
302
+ MOVD dst+16 (FP), BLK_OUT
303
+ MOVD src+24 (FP), BLK_INP
355
304
356
- MOVWZ 240 (BLK_KEY), BLK_ROUNDS // lwz 6,240(5)
357
305
MOVD $15 , BLK_IDX // li 7,15
358
306
359
307
LVX (BLK_INP)(R0), ZERO // lvx 0,0,3
@@ -410,14 +358,14 @@ loop_enc:
410
358
411
359
RET // blr
412
360
413
- // func decryptBlockAsm(dst, src *byte, dec *uint32 )
361
+ // func decryptBlockAsm(nr int, xk *uint32, dst, src *byte )
414
362
TEXT ·decryptBlockAsm(SB), NOSPLIT|NOFRAME, $0
415
363
// Load the arguments inside the registers
416
- MOVD dst+0 (FP), BLK_OUT
417
- MOVD src+8 (FP), BLK_INP
418
- MOVD dec +16 (FP), BLK_KEY
364
+ MOVD nr+0 (FP), BLK_ROUNDS
365
+ MOVD xk+8 (FP), BLK_KEY
366
+ MOVD dst+16 (FP), BLK_OUT
367
+ MOVD src+24 (FP), BLK_INP
419
368
420
- MOVWZ 240 (BLK_KEY), BLK_ROUNDS // lwz 6,240(5)
421
369
MOVD $15 , BLK_IDX // li 7,15
422
370
423
371
LVX (BLK_INP)(R0), ZERO // lvx 0,0,3
@@ -476,7 +424,7 @@ loop_dec:
476
424
477
425
// Remove defines from above so they can be defined here
478
426
#undef INP
479
- #undef OUT
427
+ #undef OUTENC
480
428
#undef ROUNDS
481
429
#undef KEY
482
430
#undef TMP
@@ -545,13 +493,15 @@ loop_dec:
545
493
// for decryption which was omitted to avoid the
546
494
// complexity.
547
495
496
+ // func cryptBlocksChain(src, dst *byte, length int, key *uint32, iv *byte, enc int, nr int)
548
497
TEXT ·cryptBlocksChain(SB), NOSPLIT|NOFRAME, $0
549
498
MOVD src+0 (FP), INP
550
499
MOVD dst+8 (FP), OUT
551
500
MOVD length+16 (FP), LEN
552
501
MOVD key+24 (FP), KEY
553
502
MOVD iv+32 (FP), IVP
554
503
MOVD enc+40 (FP), ENC
504
+ MOVD nr+48 (FP), ROUNDS
555
505
556
506
CMPU LEN, $16 // cmpldi r5,16
557
507
BC 14 , 0 , LR // bltlr-
@@ -567,7 +517,6 @@ TEXT ·cryptBlocksChain(SB), NOSPLIT|NOFRAME, $0
567
517
VPERM IVEC, INPTAIL, INPPERM, IVEC // vperm v4,v4,v5,v6
568
518
NEG INP, R11 // neg r11,r3
569
519
LVSR (KEY)(R0), KEYPERM // lvsr v10,r0,r6
570
- MOVWZ 240 (KEY), ROUNDS // lwz r9,240(r6)
571
520
LVSR (R11)(R0), V6 // lvsr v6,r0,r11
572
521
LVX (INP)(R0), INPTAIL // lvx v5,r0,r3
573
522
ADD $15 , INP // addi r3,r3,15
0 commit comments