@@ -214,28 +214,6 @@ static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) {
214214 VERIFY_CHECK (c1 >= th ); \
215215}
216216
217- /** Add 2*a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
218- #define muladd2 (a ,b ) { \
219- uint64_t tl, th, th2, tl2; \
220- { \
221- uint128_t t = (uint128_t)a * b; \
222- th = t >> 64; /* at most 0xFFFFFFFFFFFFFFFE */ \
223- tl = t ; \
224- } \
225- th2 = th + th ; /* at most 0xFFFFFFFFFFFFFFFE (in case th was 0x7FFFFFFFFFFFFFFF) */ \
226- c2 += (th2 < th ); /* never overflows by contract (verified the next line) */ \
227- VERIFY_CHECK ((th2 >= th ) || (c2 != 0 )); \
228- tl2 = tl + tl ; /* at most 0xFFFFFFFFFFFFFFFE (in case the lowest 63 bits of tl were 0x7FFFFFFFFFFFFFFF) */ \
229- th2 += (tl2 < tl ); /* at most 0xFFFFFFFFFFFFFFFF */ \
230- c0 += tl2 ; /* overflow is handled on the next line */ \
231- th2 += (c0 < tl2 ); /* second overflow is handled on the next line */ \
232- c2 += (c0 < tl2 ) & (th2 == 0 ); /* never overflows by contract (verified the next line) */ \
233- VERIFY_CHECK ((c0 >= tl2 ) || (th2 != 0 ) || (c2 != 0 )); \
234- c1 += th2 ; /* overflow is handled on the next line */ \
235- c2 += (c1 < th2 ); /* never overflows by contract (verified the next line) */ \
236- VERIFY_CHECK ((c1 >= th2 ) || (c2 != 0 )); \
237- }
238-
239217/** Add a to the number defined by (c0,c1,c2). c2 must never overflow. */
240218#define sumadd (a ) { \
241219 unsigned int over; \
@@ -745,148 +723,10 @@ static void secp256k1_scalar_mul_512(uint64_t l[8], const secp256k1_scalar *a, c
745723#endif
746724}
747725
748- static void secp256k1_scalar_sqr_512 (uint64_t l [8 ], const secp256k1_scalar * a ) {
749- #ifdef USE_ASM_X86_64
750- __asm__ __volatile__(
751- /* Preload */
752- "movq 0(%%rdi), %%r11\n"
753- "movq 8(%%rdi), %%r12\n"
754- "movq 16(%%rdi), %%r13\n"
755- "movq 24(%%rdi), %%r14\n"
756- /* (rax,rdx) = a0 * a0 */
757- "movq %%r11, %%rax\n"
758- "mulq %%r11\n"
759- /* Extract l0 */
760- "movq %%rax, 0(%%rsi)\n"
761- /* (r8,r9,r10) = (rdx,0) */
762- "movq %%rdx, %%r8\n"
763- "xorq %%r9, %%r9\n"
764- "xorq %%r10, %%r10\n"
765- /* (r8,r9,r10) += 2 * a0 * a1 */
766- "movq %%r11, %%rax\n"
767- "mulq %%r12\n"
768- "addq %%rax, %%r8\n"
769- "adcq %%rdx, %%r9\n"
770- "adcq $0, %%r10\n"
771- "addq %%rax, %%r8\n"
772- "adcq %%rdx, %%r9\n"
773- "adcq $0, %%r10\n"
774- /* Extract l1 */
775- "movq %%r8, 8(%%rsi)\n"
776- "xorq %%r8, %%r8\n"
777- /* (r9,r10,r8) += 2 * a0 * a2 */
778- "movq %%r11, %%rax\n"
779- "mulq %%r13\n"
780- "addq %%rax, %%r9\n"
781- "adcq %%rdx, %%r10\n"
782- "adcq $0, %%r8\n"
783- "addq %%rax, %%r9\n"
784- "adcq %%rdx, %%r10\n"
785- "adcq $0, %%r8\n"
786- /* (r9,r10,r8) += a1 * a1 */
787- "movq %%r12, %%rax\n"
788- "mulq %%r12\n"
789- "addq %%rax, %%r9\n"
790- "adcq %%rdx, %%r10\n"
791- "adcq $0, %%r8\n"
792- /* Extract l2 */
793- "movq %%r9, 16(%%rsi)\n"
794- "xorq %%r9, %%r9\n"
795- /* (r10,r8,r9) += 2 * a0 * a3 */
796- "movq %%r11, %%rax\n"
797- "mulq %%r14\n"
798- "addq %%rax, %%r10\n"
799- "adcq %%rdx, %%r8\n"
800- "adcq $0, %%r9\n"
801- "addq %%rax, %%r10\n"
802- "adcq %%rdx, %%r8\n"
803- "adcq $0, %%r9\n"
804- /* (r10,r8,r9) += 2 * a1 * a2 */
805- "movq %%r12, %%rax\n"
806- "mulq %%r13\n"
807- "addq %%rax, %%r10\n"
808- "adcq %%rdx, %%r8\n"
809- "adcq $0, %%r9\n"
810- "addq %%rax, %%r10\n"
811- "adcq %%rdx, %%r8\n"
812- "adcq $0, %%r9\n"
813- /* Extract l3 */
814- "movq %%r10, 24(%%rsi)\n"
815- "xorq %%r10, %%r10\n"
816- /* (r8,r9,r10) += 2 * a1 * a3 */
817- "movq %%r12, %%rax\n"
818- "mulq %%r14\n"
819- "addq %%rax, %%r8\n"
820- "adcq %%rdx, %%r9\n"
821- "adcq $0, %%r10\n"
822- "addq %%rax, %%r8\n"
823- "adcq %%rdx, %%r9\n"
824- "adcq $0, %%r10\n"
825- /* (r8,r9,r10) += a2 * a2 */
826- "movq %%r13, %%rax\n"
827- "mulq %%r13\n"
828- "addq %%rax, %%r8\n"
829- "adcq %%rdx, %%r9\n"
830- "adcq $0, %%r10\n"
831- /* Extract l4 */
832- "movq %%r8, 32(%%rsi)\n"
833- "xorq %%r8, %%r8\n"
834- /* (r9,r10,r8) += 2 * a2 * a3 */
835- "movq %%r13, %%rax\n"
836- "mulq %%r14\n"
837- "addq %%rax, %%r9\n"
838- "adcq %%rdx, %%r10\n"
839- "adcq $0, %%r8\n"
840- "addq %%rax, %%r9\n"
841- "adcq %%rdx, %%r10\n"
842- "adcq $0, %%r8\n"
843- /* Extract l5 */
844- "movq %%r9, 40(%%rsi)\n"
845- /* (r10,r8) += a3 * a3 */
846- "movq %%r14, %%rax\n"
847- "mulq %%r14\n"
848- "addq %%rax, %%r10\n"
849- "adcq %%rdx, %%r8\n"
850- /* Extract l6 */
851- "movq %%r10, 48(%%rsi)\n"
852- /* Extract l7 */
853- "movq %%r8, 56(%%rsi)\n"
854- :
855- : "S" (l ), "D" (a -> d )
856- : "rax" , "rdx" , "r8" , "r9" , "r10" , "r11" , "r12" , "r13" , "r14" , "cc" , "memory" );
857- #else
858- /* 160 bit accumulator. */
859- uint64_t c0 = 0 , c1 = 0 ;
860- uint32_t c2 = 0 ;
861-
862- /* l[0..7] = a[0..3] * b[0..3]. */
863- muladd_fast (a -> d [0 ], a -> d [0 ]);
864- extract_fast (l [0 ]);
865- muladd2 (a -> d [0 ], a -> d [1 ]);
866- extract (l [1 ]);
867- muladd2 (a -> d [0 ], a -> d [2 ]);
868- muladd (a -> d [1 ], a -> d [1 ]);
869- extract (l [2 ]);
870- muladd2 (a -> d [0 ], a -> d [3 ]);
871- muladd2 (a -> d [1 ], a -> d [2 ]);
872- extract (l [3 ]);
873- muladd2 (a -> d [1 ], a -> d [3 ]);
874- muladd (a -> d [2 ], a -> d [2 ]);
875- extract (l [4 ]);
876- muladd2 (a -> d [2 ], a -> d [3 ]);
877- extract (l [5 ]);
878- muladd_fast (a -> d [3 ], a -> d [3 ]);
879- extract_fast (l [6 ]);
880- VERIFY_CHECK (c1 == 0 );
881- l [7 ] = c0 ;
882- #endif
883- }
884-
885726#undef sumadd
886727#undef sumadd_fast
887728#undef muladd
888729#undef muladd_fast
889- #undef muladd2
890730#undef extract
891731#undef extract_fast
892732
@@ -908,12 +748,6 @@ static int secp256k1_scalar_shr_int(secp256k1_scalar *r, int n) {
908748 return ret ;
909749}
910750
911- static void secp256k1_scalar_sqr (secp256k1_scalar * r , const secp256k1_scalar * a ) {
912- uint64_t l [8 ];
913- secp256k1_scalar_sqr_512 (l , a );
914- secp256k1_scalar_reduce_512 (r , l );
915- }
916-
917751static void secp256k1_scalar_split_128 (secp256k1_scalar * r1 , secp256k1_scalar * r2 , const secp256k1_scalar * k ) {
918752 r1 -> d [0 ] = k -> d [0 ];
919753 r1 -> d [1 ] = k -> d [1 ];
0 commit comments