@@ -15,6 +15,25 @@ use cryptoutil::{write_u64_be, write_u32_be, read_u64v_be, read_u32v_be, FixedBu
15
15
use digest:: Digest ;
16
16
17
17
18
+ // Sha-512 and Sha-256 use basically the same calculations which are implemented by these macros.
19
+ // Inlining the calculations seems to result in better generated code.
20
+ macro_rules! schedule_round( ( $t: expr) => (
21
+ W [ $t] = sigma1( W [ $t - 2 ] ) + W [ $t - 7 ] + sigma0( W [ $t - 15 ] ) + W [ $t - 16 ] ;
22
+ )
23
+ )
24
+
25
+ macro_rules! sha2_round(
26
+ ( $A: ident, $B: ident, $C: ident, $D: ident,
27
+ $E: ident, $F: ident, $G: ident, $H: ident, $K: ident, $t: expr) => (
28
+ {
29
+ $H += sum1( $E) + ch( $E, $F, $G) + $K[ $t] + W [ $t] ;
30
+ $D += $H;
31
+ $H += sum0( $A) + maj( $A, $B, $C) ;
32
+ }
33
+ )
34
+ )
35
+
36
+
18
37
// BitCounter is a specialized structure intended simply for counting the
19
38
// number of bits that have been processed by the SHA-2 512 family of functions.
20
39
// It does very little overflow checking since such checking is not necessary
@@ -119,15 +138,6 @@ impl Engine512State {
119
138
( ( x << 45 ) | ( x >> 19 ) ) ^ ( ( x << 3 ) | ( x >> 61 ) ) ^ ( x >> 6 )
120
139
}
121
140
122
- let mut W = [ 0u64 , ..80 ] ;
123
-
124
- read_u64v_be( W . mut_slice( 0 , 16 ) , data) ;
125
-
126
- for uint:: range( 16 , 80 ) |t| {
127
- W [ t] = sigma1( W [ t - 2 ] ) + W [ t - 7 ] + sigma0( W [ t - 15 ] ) +
128
- W [ t - 16 ] ;
129
- }
130
-
131
141
let mut a = self . H0 ;
132
142
let mut b = self . H1 ;
133
143
let mut c = self . H2 ;
@@ -137,47 +147,41 @@ impl Engine512State {
137
147
let mut g = self . H6 ;
138
148
let mut h = self . H7 ;
139
149
140
- let mut t = 0 ;
141
- for uint:: range( 0 , 10 ) |_| {
142
- h += sum1( e) + ch( e, f, g) + K64 [ t] + W [ t] ;
143
- d += h ;
144
- h += sum0 ( a ) + maj ( a , b , c ) ;
145
- t += 1 ;
146
-
147
- g += sum1 ( d ) + ch ( d , e , f ) + K64 [ t ] + W [ t ] ;
148
- c += g ;
149
- g += sum0 ( h ) + maj ( h , a , b ) ;
150
- t += 1 ;
151
-
152
- f += sum1 ( c ) + ch ( c , d , e ) + K64 [ t ] + W [ t ] ;
153
- b += f ;
154
- f += sum0 ( g ) + maj ( g , h , a ) ;
155
- t += 1 ;
156
-
157
- e += sum1 ( b ) + ch ( b , c , d ) + K64 [ t ] + W [ t ] ;
158
- a += e ;
159
- e += sum0 ( f ) + maj ( f , g , h ) ;
160
- t += 1 ;
161
-
162
- d += sum1 ( a ) + ch ( a , b , c ) + K64 [ t ] + W [ t ] ;
163
- h += d ;
164
- d += sum0 ( e ) + maj ( e , f , g ) ;
165
- t += 1 ;
166
-
167
- c += sum1 ( h ) + ch ( h , a , b ) + K64 [ t ] + W [ t ] ;
168
- g += c ;
169
- c += sum0 ( d ) + maj ( d , e , f ) ;
170
- t += 1 ;
171
-
172
- b += sum1 ( g ) + ch ( g , h , a ) + K64 [ t ] + W [ t ] ;
173
- f += b ;
174
- b += sum0 ( c ) + maj ( c , d , e ) ;
175
- t += 1 ;
176
-
177
- a += sum1 ( f ) + ch ( f , g , h ) + K64 [ t ] + W [ t ] ;
178
- e += a ;
179
- a += sum0 ( b ) + maj ( b , c , d ) ;
180
- t += 1 ;
150
+ let mut W = [ 0u64 , ..80 ] ;
151
+
152
+ read_u64v_be( W . mut_slice( 0 , 16 ) , data) ;
153
+
154
+ // Putting the message schedule inside the same loop as the round calculations allows for
155
+ // the compiler to generate better code.
156
+ for uint:: range_step( 0 , 64 , 8 ) |t| {
157
+ schedule_round ! ( t + 16 ) ;
158
+ schedule_round ! ( t + 17 ) ;
159
+ schedule_round ! ( t + 18 ) ;
160
+ schedule_round ! ( t + 19 ) ;
161
+ schedule_round ! ( t + 20 ) ;
162
+ schedule_round ! ( t + 21 ) ;
163
+ schedule_round ! ( t + 22 ) ;
164
+ schedule_round ! ( t + 23 ) ;
165
+
166
+ sha2_round ! ( a, b, c, d, e, f, g, h, K64 , t) ;
167
+ sha2_round ! ( h, a, b, c, d, e, f, g, K64 , t + 1 ) ;
168
+ sha2_round ! ( g, h, a, b, c, d, e, f, K64 , t + 2 ) ;
169
+ sha2_round ! ( f, g, h, a, b, c, d, e, K64 , t + 3 ) ;
170
+ sha2_round ! ( e, f, g, h, a, b, c, d, K64 , t + 4 ) ;
171
+ sha2_round ! ( d, e, f, g, h, a, b, c, K64 , t + 5 ) ;
172
+ sha2_round ! ( c, d, e, f, g, h, a, b, K64 , t + 6 ) ;
173
+ sha2_round ! ( b, c, d, e, f, g, h, a, K64 , t + 7 ) ;
174
+ }
175
+
176
+ for uint:: range_step( 64 , 80 , 8 ) |t| {
177
+ sha2_round ! ( a, b, c, d, e, f, g, h, K64 , t) ;
178
+ sha2_round ! ( h, a, b, c, d, e, f, g, K64 , t + 1 ) ;
179
+ sha2_round ! ( g, h, a, b, c, d, e, f, K64 , t + 2 ) ;
180
+ sha2_round ! ( f, g, h, a, b, c, d, e, K64 , t + 3 ) ;
181
+ sha2_round ! ( e, f, g, h, a, b, c, d, K64 , t + 4 ) ;
182
+ sha2_round ! ( d, e, f, g, h, a, b, c, K64 , t + 5 ) ;
183
+ sha2_round ! ( c, d, e, f, g, h, a, b, K64 , t + 6 ) ;
184
+ sha2_round ! ( b, c, d, e, f, g, h, a, K64 , t + 7 ) ;
181
185
}
182
186
183
187
self . H0 += a ;
@@ -524,15 +528,6 @@ impl Engine256State {
524
528
( ( x >> 17 ) | ( x << 15 ) ) ^ ( ( x >> 19 ) | ( x << 13 ) ) ^ ( x >> 10 )
525
529
}
526
530
527
- let mut W = [ 0u32 , ..80 ] ;
528
-
529
- read_u32v_be( W . mut_slice( 0 , 16 ) , data) ;
530
-
531
- for uint:: range( 16 , 64 ) |t| {
532
- W [ t] = sigma1( W [ t - 2 ] ) + W [ t - 7 ] + sigma0( W [ t - 15 ] ) +
533
- W [ t - 16 ] ;
534
- }
535
-
536
531
let mut a = self . H0 ;
537
532
let mut b = self . H1 ;
538
533
let mut c = self . H2 ;
@@ -542,47 +537,41 @@ impl Engine256State {
542
537
let mut g = self . H6 ;
543
538
let mut h = self . H7 ;
544
539
545
- let mut t = 0 ;
546
- for uint:: range( 0 , 8 ) |_| {
547
- h += sum1( e) + ch( e, f, g) + K32 [ t] + W [ t] ;
548
- d += h;
549
- h += sum0( a) + maj( a, b, c) ;
550
- t += 1 ;
551
-
552
- g += sum1( d) + ch( d, e, f) + K32 [ t] + W [ t] ;
553
- c += g;
554
- g += sum0( h) + maj( h, a, b) ;
555
- t += 1 ;
556
-
557
- f += sum1( c) + ch( c, d, e) + K32 [ t] + W [ t] ;
558
- b += f;
559
- f += sum0( g) + maj( g, h, a) ;
560
- t += 1 ;
561
-
562
- e += sum1( b) + ch( b, c, d) + K32 [ t] + W [ t] ;
563
- a += e;
564
- e += sum0( f) + maj( f, g, h) ;
565
- t += 1 ;
566
-
567
- d += sum1( a) + ch( a, b, c) + K32 [ t] + W [ t] ;
568
- h += d;
569
- d += sum0( e) + maj( e, f, g) ;
570
- t += 1 ;
571
-
572
- c += sum1( h) + ch( h, a, b) + K32 [ t] + W [ t] ;
573
- g += c;
574
- c += sum0( d) + maj( d, e, f) ;
575
- t += 1 ;
576
-
577
- b += sum1( g) + ch( g, h, a) + K32 [ t] + W [ t] ;
578
- f += b;
579
- b += sum0( c) + maj( c, d, e) ;
580
- t += 1 ;
581
-
582
- a += sum1( f) + ch( f, g, h) + K32 [ t] + W [ t] ;
583
- e += a;
584
- a += sum0( b) + maj( b, c, d) ;
585
- t += 1 ;
540
+ let mut W = [ 0u32 , ..64 ] ;
541
+
542
+ read_u32v_be( W . mut_slice( 0 , 16 ) , data) ;
543
+
544
+ // Putting the message schedule inside the same loop as the round calculations allows for
545
+ // the compiler to generate better code.
546
+ for uint:: range_step( 0 , 48 , 8 ) |t| {
547
+ schedule_round!( t + 16 ) ;
548
+ schedule_round!( t + 17 ) ;
549
+ schedule_round!( t + 18 ) ;
550
+ schedule_round!( t + 19 ) ;
551
+ schedule_round!( t + 20 ) ;
552
+ schedule_round!( t + 21 ) ;
553
+ schedule_round!( t + 22 ) ;
554
+ schedule_round!( t + 23 ) ;
555
+
556
+ sha2_round!( a, b, c, d, e, f, g, h, K32 , t) ;
557
+ sha2_round!( h, a, b, c, d, e, f, g, K32 , t + 1 ) ;
558
+ sha2_round!( g, h, a, b, c, d, e, f, K32 , t + 2 ) ;
559
+ sha2_round!( f, g, h, a, b, c, d, e, K32 , t + 3 ) ;
560
+ sha2_round!( e, f, g, h, a, b, c, d, K32 , t + 4 ) ;
561
+ sha2_round!( d, e, f, g, h, a, b, c, K32 , t + 5 ) ;
562
+ sha2_round!( c, d, e, f, g, h, a, b, K32 , t + 6 ) ;
563
+ sha2_round!( b, c, d, e, f, g, h, a, K32 , t + 7 ) ;
564
+ }
565
+
566
+ for uint:: range_step( 48 , 64 , 8 ) |t| {
567
+ sha2_round!( a, b, c, d, e, f, g, h, K32 , t) ;
568
+ sha2_round!( h, a, b, c, d, e, f, g, K32 , t + 1 ) ;
569
+ sha2_round!( g, h, a, b, c, d, e, f, K32 , t + 2 ) ;
570
+ sha2_round!( f, g, h, a, b, c, d, e, K32 , t + 3 ) ;
571
+ sha2_round!( e, f, g, h, a, b, c, d, K32 , t + 4 ) ;
572
+ sha2_round!( d, e, f, g, h, a, b, c, K32 , t + 5 ) ;
573
+ sha2_round!( c, d, e, f, g, h, a, b, K32 , t + 6 ) ;
574
+ sha2_round!( b, c, d, e, f, g, h, a, K32 , t + 7 ) ;
586
575
}
587
576
588
577
self . H0 += a;
0 commit comments