Make 64x64->64 multiplications constant-time with MSVC on 32bit x86

real-or-random · real-or-random · commit ed4c023a94a6 · 2020-03-24T17:31:39.000+01:00
diff --git a/src/field_10x26_impl.h b/src/field_10x26_impl.h
@@ -454,18 +454,15 @@ void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t *a);
 
 #else
 
-#ifdef VERIFY
-#define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0)
-#else
-#define VERIFY_BITS(x, n) do { } while(0)
-#endif
-
 SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b) {
     uint64_t c, d;
     uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8;
     uint32_t t9, t1, t0, t2, t3, t4, t5, t6, t7;
     const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL;
 
+    VERIFY_BITS(R0, 14);
+    VERIFY_BITS(R1, 11);
+
     VERIFY_BITS(a[0], 30);
     VERIFY_BITS(a[1], 30);
     VERIFY_BITS(a[2], 30);
@@ -765,14 +762,34 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t
     /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
     /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
 
-    d    = c * (R0 >> 4) + t0;
+    /* MSVC for 32-bit targets implements 64x64->64 bit multiplications using a
+       non-constant subroutine. The subroutine is not constant-time because it
+       shortcuts when the high 32 bits of both multiplicands are all 0.
+       See https://research.kudelskisecurity.com/2017/01/16/when-constant-time-source-may-not-save-you/
+       and also https://www.bearssl.org/ctmul.html for more information.
+
+       To work around this, we shift the right constant multiplicand to the
+       left to ensure that it has at least one 1 bit in its high 32 bits, and
+       then shift the product back to compensate.
+
+       The preceding VERIFY_BITS statement for the left multiplicand asserts
+       that the shifted multiplication does not overflow. For example, here
+       we have that the 64 - 46 = 18 highest bits in the left multiplicand c
+       are 0, so we can safely shift the right multiplicand (R0 >> 4) by
+       7 < 18 bits to the left. Moreover, the shift of the right multiplicand
+       won't overflow the right multiplicand itself, which can be verified by
+       inspection of value constant value R0.
+    */
+    /* d =  c *  (R0 >> 4)             + t0; // 64x64->64 mul, MSVC */
+    d    = (c * ((R0 >> 4) << 7) >> 7) + t0;
     VERIFY_BITS(d, 56);
     /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
     r[0] = d & M; d >>= 26;
     VERIFY_BITS(r[0], 26);
     VERIFY_BITS(d, 30);
     /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
-    d   += c * (R1 >> 4) + t1;
+    /* d+=  c *  (R1 >> 4)                + t1; // 64x64->64 mul, MSVC */
+    d   += (c * ((R1 >> 4) << 10) >> 10) + t1;
     VERIFY_BITS(d, 53);
     VERIFY_CHECK(d <= 0x10000003FFFFBFULL);
     /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
@@ -1039,14 +1056,16 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t
     /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
     /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
 
-    d    = c * (R0 >> 4) + t0;
+    /* d =  c *  (R0 >> 4)             + t0; // 64x64->64 mul, MSVC */
+    d    = (c * ((R0 >> 4) << 7) >> 7) + t0;
     VERIFY_BITS(d, 56);
     /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
     r[0] = d & M; d >>= 26;
     VERIFY_BITS(r[0], 26);
     VERIFY_BITS(d, 30);
     /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
-    d   += c * (R1 >> 4) + t1;
+    /* d+=  c *  (R1 >> 4)               + t1; // 64x64->64 mul, MSVC */
+    d   += (c * ((R1 >> 4) << 10) >> 10) + t1;
     VERIFY_BITS(d, 53);
     VERIFY_CHECK(d <= 0x10000003FFFFBFULL);
     /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
diff --git a/src/scalar_8x32_impl.h b/src/scalar_8x32_impl.h
@@ -265,6 +265,9 @@ static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) {
 /** Add a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
 #define muladd(a,b) { \
     uint32_t tl, th; \
+    /* VERIFY that the 64x64->64 mul a*b is in fact a 32x32->64 mul for MSCV */ \
+    VERIFY_BITS((uint64_t)a, 32); \
+    VERIFY_BITS((uint64_t)b, 32); \
     { \
         uint64_t t = (uint64_t)a * b; \
         th = t >> 32;         /* at most 0xFFFFFFFE */ \
@@ -280,6 +283,9 @@ static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) {
 /** Add a*b to the number defined by (c0,c1). c1 must never overflow. */
 #define muladd_fast(a,b) { \
     uint32_t tl, th; \
+    /* VERIFY that the 64x64->64 mul a*b is in fact a 32x32->64 mul for MSCV */ \
+    VERIFY_BITS((uint64_t)a, 32); \
+    VERIFY_BITS((uint64_t)b, 32); \
     { \
         uint64_t t = (uint64_t)a * b; \
         th = t >> 32;         /* at most 0xFFFFFFFE */ \
@@ -294,6 +300,9 @@ static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) {
 /** Add 2*a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
 #define muladd2(a,b) { \
     uint32_t tl, th, th2, tl2; \
+    /* VERIFY that the 64x64->64 mul a*b is in fact a 32x32->64 mul for MSCV */ \
+    VERIFY_BITS((uint64_t)a, 32); \
+    VERIFY_BITS((uint64_t)b, 32); \
     { \
         uint64_t t = (uint64_t)a * b; \
         th = t >> 32;               /* at most 0xFFFFFFFE */ \
diff --git a/src/util.h b/src/util.h
@@ -69,6 +69,12 @@ static SECP256K1_INLINE void secp256k1_callback_call(const secp256k1_callback *
 #define VERIFY_SETUP(stmt)
 #endif
 
+#ifdef VERIFY
+#define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0)
+#else
+#define VERIFY_BITS(x, n) do { } while(0)
+#endif
+
 static SECP256K1_INLINE void *checked_malloc(const secp256k1_callback* cb, size_t size) {
     void *ret = malloc(size);
     if (ret == NULL) {