Skip to content

Commit 63ade87

Browse files
committed
Merge BoringSSL 'd605df5': Use packed representation for large Curve25519 table
2 parents 2d8fbe0 + d605df5 commit 63ade87

File tree

4 files changed

+3054
-7479
lines changed

4 files changed

+3054
-7479
lines changed

crypto/curve25519/curve25519.c

+106-41
Original file line numberDiff line numberDiff line change
@@ -319,11 +319,6 @@ static void fe_copy(fe *h, const fe *f) {
319319
static void fe_copy_lt(fe_loose *h, const fe *f) {
320320
fe_limbs_copy(h->v, f->v);
321321
}
322-
#if !defined(OPENSSL_SMALL)
323-
static void fe_copy_ll(fe_loose *h, const fe_loose *f) {
324-
fe_limbs_copy(h->v, f->v);
325-
}
326-
#endif // !defined(OPENSSL_SMALL)
327322

328323
static void fe_loose_invert(fe *out, const fe_loose *z) {
329324
fe t0;
@@ -532,12 +527,23 @@ static void ge_p3_0(ge_p3 *h) {
532527
fe_0(&h->T);
533528
}
534529

530+
static void ge_cached_0(ge_cached *h) {
531+
fe_loose_1(&h->YplusX);
532+
fe_loose_1(&h->YminusX);
533+
fe_loose_1(&h->Z);
534+
fe_loose_0(&h->T2d);
535+
}
536+
537+
#if defined(OPENSSL_SMALL)
538+
535539
static void ge_precomp_0(ge_precomp *h) {
536540
fe_loose_1(&h->yplusx);
537541
fe_loose_1(&h->yminusx);
538542
fe_loose_0(&h->xy2d);
539543
}
540544

545+
#endif
546+
541547
// r = p
542548
static void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p) {
543549
fe_copy(&r->X, &p->X);
@@ -568,6 +574,13 @@ static void x25519_ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p) {
568574
fe_mul_tll(&r->T, &p->X, &p->Y);
569575
}
570576

577+
// r = p
578+
static void ge_p1p1_to_cached(ge_cached *r, const ge_p1p1 *p) {
579+
ge_p3 t;
580+
x25519_ge_p1p1_to_p3(&t, p);
581+
x25519_ge_p3_to_cached(r, &t);
582+
}
583+
571584
// r = 2 * p
572585
static void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p) {
573586
fe trX, trZ, trT;
@@ -664,16 +677,6 @@ static void x25519_ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
664677
fe_add(&r->T, &trZ, &trT);
665678
}
666679

667-
static uint8_t equal(signed char b, signed char c) {
668-
uint8_t ub = b;
669-
uint8_t uc = c;
670-
uint8_t x = ub ^ uc; // 0: yes; 1..255: no
671-
uint32_t y = x; // 0: yes; 1..255: no
672-
y -= 1; // 4294967295: yes; 0..254: no
673-
y >>= 31; // 1: yes; 0: no
674-
return y;
675-
}
676-
677680
static void cmov(ge_precomp *t, const ge_precomp *u, uint8_t b) {
678681
fe_cmov(&t->yplusx, &u->yplusx, b);
679682
fe_cmov(&t->yminusx, &u->yminusx, b);
@@ -722,7 +725,7 @@ static void x25519_ge_scalarmult_small_precomp(
722725
ge_precomp_0(&e);
723726

724727
for (j = 1; j < 16; j++) {
725-
cmov(&e, &multiples[j-1], equal(index, j));
728+
cmov(&e, &multiples[j-1], 1&constant_time_eq_w(index, j));
726729
}
727730

728731
ge_cached cached;
@@ -742,35 +745,36 @@ void x25519_ge_scalarmult_base(ge_p3 *h, const uint8_t a[32]) {
742745

743746
#else
744747

745-
static uint8_t negative(signed char b) {
746-
uint32_t x = b;
747-
x >>= 31; // 1: yes; 0: no
748-
return x;
749-
}
748+
static void table_select(ge_precomp *t, const int pos, const signed char b) {
749+
uint8_t bnegative = constant_time_msb_w(b);
750+
uint8_t babs = b - ((bnegative & b) << 1);
750751

751-
static void table_select(ge_precomp *t, int pos, signed char b) {
752-
ge_precomp minust;
753-
uint8_t bnegative = negative(b);
754-
uint8_t babs = b - ((uint8_t)((-bnegative) & b) << 1);
752+
uint8_t t_bytes[3][32] = {
753+
{constant_time_is_zero_w(b) & 1}, {constant_time_is_zero_w(b) & 1}, {0}};
754+
#if defined(__clang__) // materialize for vectorization, 6% speedup
755+
__asm__("" : "+m" (t_bytes) : /*no inputs*/);
756+
#endif
757+
static_assert(sizeof(t_bytes) == sizeof(k25519Precomp[pos][0]), "");
758+
for (int i = 0; i < 8; i++) {
759+
constant_time_conditional_memxor(t_bytes, k25519Precomp[pos][i],
760+
sizeof(t_bytes),
761+
constant_time_eq_w(babs, 1 + i));
762+
}
755763

756-
ge_precomp_0(t);
757-
cmov(t, &k25519Precomp[pos][0], equal(babs, 1));
758-
cmov(t, &k25519Precomp[pos][1], equal(babs, 2));
759-
cmov(t, &k25519Precomp[pos][2], equal(babs, 3));
760-
cmov(t, &k25519Precomp[pos][3], equal(babs, 4));
761-
cmov(t, &k25519Precomp[pos][4], equal(babs, 5));
762-
cmov(t, &k25519Precomp[pos][5], equal(babs, 6));
763-
cmov(t, &k25519Precomp[pos][6], equal(babs, 7));
764-
cmov(t, &k25519Precomp[pos][7], equal(babs, 8));
765-
fe_copy_ll(&minust.yplusx, &t->yminusx);
766-
fe_copy_ll(&minust.yminusx, &t->yplusx);
764+
fe yplusx, yminusx, xy2d;
765+
fe_frombytes_strict(&yplusx, t_bytes[0]);
766+
fe_frombytes_strict(&yminusx, t_bytes[1]);
767+
fe_frombytes_strict(&xy2d, t_bytes[2]);
767768

768-
// NOTE: the input table is canonical, but types don't encode it
769-
fe tmp;
770-
fe_carry(&tmp, &t->xy2d);
771-
fe_neg(&minust.xy2d, &tmp);
769+
fe_copy_lt(&t->yplusx, &yplusx);
770+
fe_copy_lt(&t->yminusx, &yminusx);
771+
fe_copy_lt(&t->xy2d, &xy2d);
772772

773-
cmov(t, &minust, bnegative);
773+
ge_precomp minust;
774+
fe_copy_lt(&minust.yplusx, &yminusx);
775+
fe_copy_lt(&minust.yminusx, &yplusx);
776+
fe_neg(&minust.xy2d, &xy2d);
777+
cmov(t, &minust, bnegative>>7);
774778
}
775779

776780
// h = a * B
@@ -829,6 +833,67 @@ void x25519_ge_scalarmult_base(ge_p3 *h, const uint8_t a[32]) {
829833

830834
#endif
831835

836+
static void cmov_cached(ge_cached *t, ge_cached *u, uint8_t b) {
837+
fe_cmov(&t->YplusX, &u->YplusX, b);
838+
fe_cmov(&t->YminusX, &u->YminusX, b);
839+
fe_cmov(&t->Z, &u->Z, b);
840+
fe_cmov(&t->T2d, &u->T2d, b);
841+
}
842+
843+
// r = scalar * A.
844+
// where a = a[0]+256*a[1]+...+256^31 a[31].
845+
void x25519_ge_scalarmult(ge_p2 *r, const uint8_t *scalar, const ge_p3 *A) {
846+
ge_p2 Ai_p2[8];
847+
ge_cached Ai[16];
848+
ge_p1p1 t;
849+
850+
ge_cached_0(&Ai[0]);
851+
x25519_ge_p3_to_cached(&Ai[1], A);
852+
ge_p3_to_p2(&Ai_p2[1], A);
853+
854+
unsigned i;
855+
for (i = 2; i < 16; i += 2) {
856+
ge_p2_dbl(&t, &Ai_p2[i / 2]);
857+
ge_p1p1_to_cached(&Ai[i], &t);
858+
if (i < 8) {
859+
x25519_ge_p1p1_to_p2(&Ai_p2[i], &t);
860+
}
861+
x25519_ge_add(&t, A, &Ai[i]);
862+
ge_p1p1_to_cached(&Ai[i + 1], &t);
863+
if (i < 7) {
864+
x25519_ge_p1p1_to_p2(&Ai_p2[i + 1], &t);
865+
}
866+
}
867+
868+
ge_p2_0(r);
869+
ge_p3 u;
870+
871+
for (i = 0; i < 256; i += 4) {
872+
ge_p2_dbl(&t, r);
873+
x25519_ge_p1p1_to_p2(r, &t);
874+
ge_p2_dbl(&t, r);
875+
x25519_ge_p1p1_to_p2(r, &t);
876+
ge_p2_dbl(&t, r);
877+
x25519_ge_p1p1_to_p2(r, &t);
878+
ge_p2_dbl(&t, r);
879+
x25519_ge_p1p1_to_p3(&u, &t);
880+
881+
uint8_t index = scalar[31 - i/8];
882+
index >>= 4 - (i & 4);
883+
index &= 0xf;
884+
885+
unsigned j;
886+
ge_cached selected;
887+
ge_cached_0(&selected);
888+
for (j = 0; j < 16; j++) {
889+
cmov_cached(&selected, &Ai[j], 1&constant_time_eq_w(index, j));
890+
}
891+
892+
x25519_ge_add(&t, &u, &selected);
893+
x25519_ge_p1p1_to_p2(r, &t);
894+
}
895+
}
896+
832897
static void slide(signed char *r, const uint8_t *a) {
833898
int i;
834899
int b;

0 commit comments

Comments
 (0)