@@ -454,18 +454,15 @@ void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t *a);
454454
455455#else 
456456
457- #ifdef  VERIFY 
458- #define  VERIFY_BITS (x , n ) VERIFY_CHECK(((x) >> (n)) == 0)
459- #else 
460- #define  VERIFY_BITS (x , n ) do { } while(0)
461- #endif 
462- 
463457SECP256K1_INLINE  static  void  secp256k1_fe_mul_inner (uint32_t  * r , const  uint32_t  * a , const  uint32_t  *  SECP256K1_RESTRICT  b ) {
464458    uint64_t  c , d ;
465459    uint64_t  u0 , u1 , u2 , u3 , u4 , u5 , u6 , u7 , u8 ;
466460    uint32_t  t9 , t1 , t0 , t2 , t3 , t4 , t5 , t6 , t7 ;
467461    const  uint32_t  M  =  0x3FFFFFFUL , R0  =  0x3D10UL , R1  =  0x400UL ;
468462
463+     VERIFY_BITS (R0 , 14 );
464+     VERIFY_BITS (R1 , 11 );
465+ 
469466    VERIFY_BITS (a [0 ], 30 );
470467    VERIFY_BITS (a [1 ], 30 );
471468    VERIFY_BITS (a [2 ], 30 );
@@ -765,14 +762,34 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t
765762    /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 
766763    /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 
767764
768-     d     =  c  *  (R0  >> 4 ) +  t0 ;
765+     /* MSVC for 32-bit targets implements 64x64->64 bit multiplications using a 
766+        non-constant subroutine. The subroutine is not constant-time because it 
767+        shortcuts when the high 32 bits of both multiplicands are all 0. 
768+        See https://research.kudelskisecurity.com/2017/01/16/when-constant-time-source-may-not-save-you/ 
769+        and also https://www.bearssl.org/ctmul.html for more information. 
770+ 
771+        To work around this, we shift the right constant multiplicand to the 
772+        left to ensure that it has at least one 1 bit in its high 32 bits, and 
773+        then shift the product back to compensate. 
774+ 
775+        The preceding VERIFY_BITS statement for the left multiplicand asserts 
776+        that the shifted multiplication does not overflow. For example, here 
777+        we have that the 64 - 46 = 18 highest bits in the left multiplicand c 
778+        are 0, so we can safely shift the right multiplicand (R0 >> 4) by 
779+        7 < 18 bits to the left. Moreover, the shift of the right multiplicand 
780+        won't overflow the right multiplicand itself, which can be verified by 
781+        inspection of value constant value R0. 
782+     */ 
783+     /* d =  c *  (R0 >> 4)             + t0; // 64x64->64 mul, MSVC */ 
784+     d     =  (c  *  ((R0  >> 4 ) << 7 ) >> 7 ) +  t0 ;
769785    VERIFY_BITS (d , 56 );
770786    /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 
771787    r [0 ] =  d  &  M ; d  >>= 26 ;
772788    VERIFY_BITS (r [0 ], 26 );
773789    VERIFY_BITS (d , 30 );
774790    /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 
775-     d    +=  c  *  (R1  >> 4 ) +  t1 ;
791+     /* d+=  c *  (R1 >> 4)                + t1; // 64x64->64 mul, MSVC */ 
792+     d    +=  (c  *  ((R1  >> 4 ) << 10 ) >> 10 ) +  t1 ;
776793    VERIFY_BITS (d , 53 );
777794    VERIFY_CHECK (d  <= 0x10000003FFFFBFULL );
778795    /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 
@@ -1039,14 +1056,16 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t
10391056    /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 
10401057    /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 
10411058
1042-     d     =  c  *  (R0  >> 4 ) +  t0 ;
1059+     /* d =  c *  (R0 >> 4)             + t0; // 64x64->64 mul, MSVC */ 
1060+     d     =  (c  *  ((R0  >> 4 ) << 7 ) >> 7 ) +  t0 ;
10431061    VERIFY_BITS (d , 56 );
10441062    /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 
10451063    r [0 ] =  d  &  M ; d  >>= 26 ;
10461064    VERIFY_BITS (r [0 ], 26 );
10471065    VERIFY_BITS (d , 30 );
10481066    /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 
1049-     d    +=  c  *  (R1  >> 4 ) +  t1 ;
1067+     /* d+=  c *  (R1 >> 4)               + t1; // 64x64->64 mul, MSVC */ 
1068+     d    +=  (c  *  ((R1  >> 4 ) << 10 ) >> 10 ) +  t1 ;
10501069    VERIFY_BITS (d , 53 );
10511070    VERIFY_CHECK (d  <= 0x10000003FFFFBFULL );
10521071    /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 
0 commit comments