@@ -180,21 +180,29 @@ static void fe_0(fe *h) {
180
180
OPENSSL_memset (h , 0 , sizeof (fe ));
181
181
}
182
182
183
+ #if defined(OPENSSL_SMALL )
184
+
183
185
static void fe_loose_0 (fe_loose * h ) {
184
186
OPENSSL_memset (h , 0 , sizeof (fe_loose ));
185
187
}
186
188
189
+ #endif
190
+
187
191
// h = 1
188
192
static void fe_1 (fe * h ) {
189
193
OPENSSL_memset (h , 0 , sizeof (fe ));
190
194
h -> v [0 ] = 1 ;
191
195
}
192
196
197
+ #if defined(OPENSSL_SMALL )
198
+
193
199
static void fe_loose_1 (fe_loose * h ) {
194
200
OPENSSL_memset (h , 0 , sizeof (fe_loose ));
195
201
h -> v [0 ] = 1 ;
196
202
}
197
203
204
+ #endif
205
+
198
206
// h = f + g
199
207
// Can overlap h with f or g.
200
208
static void fe_add (fe_loose * h , const fe * f , const fe * g ) {
@@ -319,11 +327,6 @@ static void fe_copy(fe *h, const fe *f) {
319
327
static void fe_copy_lt (fe_loose * h , const fe * f ) {
320
328
fe_limbs_copy (h -> v , f -> v );
321
329
}
322
- #if !defined(OPENSSL_SMALL )
323
- static void fe_copy_ll (fe_loose * h , const fe_loose * f ) {
324
- fe_limbs_copy (h -> v , f -> v );
325
- }
326
- #endif // !defined(OPENSSL_SMALL)
327
330
328
331
static void fe_loose_invert (fe * out , const fe_loose * z ) {
329
332
fe t0 ;
@@ -532,12 +535,16 @@ static void ge_p3_0(ge_p3 *h) {
532
535
fe_0 (& h -> T );
533
536
}
534
537
538
+ #if defined(OPENSSL_SMALL )
539
+
535
540
static void ge_precomp_0 (ge_precomp * h ) {
536
541
fe_loose_1 (& h -> yplusx );
537
542
fe_loose_1 (& h -> yminusx );
538
543
fe_loose_0 (& h -> xy2d );
539
544
}
540
545
546
+ #endif
547
+
541
548
// r = p
542
549
static void ge_p3_to_p2 (ge_p2 * r , const ge_p3 * p ) {
543
550
fe_copy (& r -> X , & p -> X );
@@ -664,16 +671,6 @@ static void x25519_ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
664
671
fe_add (& r -> T , & trZ , & trT );
665
672
}
666
673
667
- static uint8_t equal (signed char b , signed char c ) {
668
- uint8_t ub = b ;
669
- uint8_t uc = c ;
670
- uint8_t x = ub ^ uc ; // 0: yes; 1..255: no
671
- uint32_t y = x ; // 0: yes; 1..255: no
672
- y -= 1 ; // 4294967295: yes; 0..254: no
673
- y >>= 31 ; // 1: yes; 0: no
674
- return y ;
675
- }
676
-
677
674
static void cmov (ge_precomp * t , const ge_precomp * u , uint8_t b ) {
678
675
fe_cmov (& t -> yplusx , & u -> yplusx , b );
679
676
fe_cmov (& t -> yminusx , & u -> yminusx , b );
@@ -722,7 +719,7 @@ static void x25519_ge_scalarmult_small_precomp(
722
719
ge_precomp_0 (& e );
723
720
724
721
for (j = 1 ; j < 16 ; j ++ ) {
725
- cmov (& e , & multiples [j - 1 ], equal (index , j ));
722
+ cmov (& e , & multiples [j - 1 ], 1 & constant_time_eq_w (index , j ));
726
723
}
727
724
728
725
ge_cached cached ;
@@ -742,35 +739,36 @@ void x25519_ge_scalarmult_base(ge_p3 *h, const uint8_t a[32]) {
742
739
743
740
#else
744
741
745
- static uint8_t negative (signed char b ) {
746
- uint32_t x = b ;
747
- x >>= 31 ; // 1: yes; 0: no
748
- return x ;
749
- }
742
+ static void table_select (ge_precomp * t , const int pos , const signed char b ) {
743
+ uint8_t bnegative = constant_time_msb_w (b );
744
+ uint8_t babs = b - ((bnegative & b ) << 1 );
745
+
746
+ uint8_t t_bytes [3 ][32 ] = {
747
+ {constant_time_is_zero_w (b ) & 1 }, {constant_time_is_zero_w (b ) & 1 }, {0 }};
748
+ #if defined(__clang__ ) // materialize for vectorization, 6% speedup
749
+ __asm__("" : "+m" (t_bytes ) : /*no inputs*/ );
750
+ #endif
751
+ OPENSSL_STATIC_ASSERT (sizeof (t_bytes ) == sizeof (k25519Precomp [pos ][0 ]), "" );
752
+ for (int i = 0 ; i < 8 ; i ++ ) {
753
+ constant_time_conditional_memxor (t_bytes , k25519Precomp [pos ][i ],
754
+ sizeof (t_bytes ),
755
+ constant_time_eq_w (babs , 1 + i ));
756
+ }
757
+
758
+ fe yplusx , yminusx , xy2d ;
759
+ fe_frombytes_strict (& yplusx , t_bytes [0 ]);
760
+ fe_frombytes_strict (& yminusx , t_bytes [1 ]);
761
+ fe_frombytes_strict (& xy2d , t_bytes [2 ]);
762
+
763
+ fe_copy_lt (& t -> yplusx , & yplusx );
764
+ fe_copy_lt (& t -> yminusx , & yminusx );
765
+ fe_copy_lt (& t -> xy2d , & xy2d );
750
766
751
- static void table_select (ge_precomp * t , int pos , signed char b ) {
752
767
ge_precomp minust ;
753
- uint8_t bnegative = negative (b );
754
- uint8_t babs = b - ((uint8_t )((- bnegative ) & b ) << 1 );
755
-
756
- ge_precomp_0 (t );
757
- cmov (t , & k25519Precomp [pos ][0 ], equal (babs , 1 ));
758
- cmov (t , & k25519Precomp [pos ][1 ], equal (babs , 2 ));
759
- cmov (t , & k25519Precomp [pos ][2 ], equal (babs , 3 ));
760
- cmov (t , & k25519Precomp [pos ][3 ], equal (babs , 4 ));
761
- cmov (t , & k25519Precomp [pos ][4 ], equal (babs , 5 ));
762
- cmov (t , & k25519Precomp [pos ][5 ], equal (babs , 6 ));
763
- cmov (t , & k25519Precomp [pos ][6 ], equal (babs , 7 ));
764
- cmov (t , & k25519Precomp [pos ][7 ], equal (babs , 8 ));
765
- fe_copy_ll (& minust .yplusx , & t -> yminusx );
766
- fe_copy_ll (& minust .yminusx , & t -> yplusx );
767
-
768
- // NOTE: the input table is canonical, but types don't encode it
769
- fe tmp ;
770
- fe_carry (& tmp , & t -> xy2d );
771
- fe_neg (& minust .xy2d , & tmp );
772
-
773
- cmov (t , & minust , bnegative );
768
+ fe_copy_lt (& minust .yplusx , & yminusx );
769
+ fe_copy_lt (& minust .yminusx , & yplusx );
770
+ fe_neg (& minust .xy2d , & xy2d );
771
+ cmov (t , & minust , bnegative >>7 );
774
772
}
775
773
776
774
// h = a * B
0 commit comments