@@ -271,37 +271,36 @@ static int secp256k1_ge_is_valid_var(const secp256k1_ge *a) {
271271}
272272
273273static SECP256K1_INLINE void secp256k1_gej_double (secp256k1_gej * r , const secp256k1_gej * a ) {
274- /* Operations: 3 mul, 4 sqr, 0 normalize, 12 mul_int/add/negate.
275- *
276- * Note that there is an implementation described at
277- * https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling-dbl-2009-l
278- * which trades a multiply for a square, but in practice this is actually slower,
279- * mainly because it requires more normalizations.
280- */
281- secp256k1_fe t1 ,t2 ,t3 ,t4 ;
274+ secp256k1_fe l , s , t , q ;
282275
283276 r -> infinity = a -> infinity ;
284277
285- secp256k1_fe_mul (& r -> z , & a -> z , & a -> y );
286- secp256k1_fe_mul_int (& r -> z , 2 ); /* Z' = 2*Y*Z (2) */
287- secp256k1_fe_sqr (& t1 , & a -> x );
288- secp256k1_fe_mul_int (& t1 , 3 ); /* T1 = 3*X^2 (3) */
289- secp256k1_fe_sqr (& t2 , & t1 ); /* T2 = 9*X^4 (1) */
290- secp256k1_fe_sqr (& t3 , & a -> y );
291- secp256k1_fe_mul_int (& t3 , 2 ); /* T3 = 2*Y^2 (2) */
292- secp256k1_fe_sqr (& t4 , & t3 );
293- secp256k1_fe_mul_int (& t4 , 2 ); /* T4 = 8*Y^4 (2) */
294- secp256k1_fe_mul (& t3 , & t3 , & a -> x ); /* T3 = 2*X*Y^2 (1) */
295- r -> x = t3 ;
296- secp256k1_fe_mul_int (& r -> x , 4 ); /* X' = 8*X*Y^2 (4) */
297- secp256k1_fe_negate (& r -> x , & r -> x , 4 ); /* X' = -8*X*Y^2 (5) */
298- secp256k1_fe_add (& r -> x , & t2 ); /* X' = 9*X^4 - 8*X*Y^2 (6) */
299- secp256k1_fe_negate (& t2 , & t2 , 1 ); /* T2 = -9*X^4 (2) */
300- secp256k1_fe_mul_int (& t3 , 6 ); /* T3 = 12*X*Y^2 (6) */
301- secp256k1_fe_add (& t3 , & t2 ); /* T3 = 12*X*Y^2 - 9*X^4 (8) */
302- secp256k1_fe_mul (& r -> y , & t1 , & t3 ); /* Y' = 36*X^3*Y^2 - 27*X^6 (1) */
303- secp256k1_fe_negate (& t2 , & t4 , 2 ); /* T2 = -8*Y^4 (3) */
304- secp256k1_fe_add (& r -> y , & t2 ); /* Y' = 36*X^3*Y^2 - 27*X^6 - 8*Y^4 (4) */
278+ /* Formula used:
279+ * L = (3/2) * X1^2
280+ * S = Y1^2
281+ * T = X1*S
282+ * X3 = L^2 - 2*T
283+ * Y3 = L*(T - X3) - S^2
284+ * Z3 = Y1*Z1
285+ */
286+
287+ secp256k1_fe_mul (& r -> z , & a -> z , & a -> y ); /* Z3 = Y1*Z1 (1) */
288+ secp256k1_fe_sqr (& l , & a -> x ); /* L = X1^2 (1) */
289+ secp256k1_fe_mul_int (& l , 3 ); /* L = 3*X1^2 (3) */
290+ secp256k1_fe_half (& l ); /* L = 3/2*X1^2 (2) */
291+ secp256k1_fe_sqr (& s , & a -> y ); /* S = Y1^2 (1) */
292+ secp256k1_fe_mul (& t , & a -> x , & s ); /* T = X1*S (1) */
293+ q = t ;
294+ secp256k1_fe_add (& q , & t ); /* Q = 2*T (2) */
295+ secp256k1_fe_negate (& r -> x , & q , 2 ); /* X3 = -2*T (3) */
296+ secp256k1_fe_sqr (& q , & l ); /* Q = L^2 (1) */
297+ secp256k1_fe_add (& r -> x , & q ); /* X3 = L^2 - 2*T (4) */
298+ secp256k1_fe_negate (& q , & r -> x , 4 ); /* Q = -X3 (5) */
299+ secp256k1_fe_add (& q , & t ); /* Q = T-X3 (6) */
300+ secp256k1_fe_mul (& q , & q , & l ); /* Q = L*(T-X3) (1) */
301+ secp256k1_fe_sqr (& s , & s );
302+ secp256k1_fe_negate (& r -> y , & s , 1 ); /* Y3 = -S^2 (2) */
303+ secp256k1_fe_add (& r -> y , & q ); /* Y3 = L*(T-X3) - S^2 (3) */
305304}
306305
307306static void secp256k1_gej_double_var (secp256k1_gej * r , const secp256k1_gej * a , secp256k1_fe * rzr ) {
@@ -325,8 +324,6 @@ static void secp256k1_gej_double_var(secp256k1_gej *r, const secp256k1_gej *a, s
325324
326325 if (rzr != NULL ) {
327326 * rzr = a -> y ;
328- secp256k1_fe_normalize_weak (rzr );
329- secp256k1_fe_mul_int (rzr , 2 );
330327 }
331328
332329 secp256k1_gej_double (r , a );
0 commit comments