@@ -102,6 +102,20 @@ const (
102
102
fieldPrimeWordOne = 0x3ffffbf
103
103
)
104
104
105
+ var (
106
+ // fieldQBytes is the value Q = (P+1)/4 for the secp256k1 prime P. This
107
+ // value is used to efficiently compute the square root of values in the
108
+ // field via exponentiation. The value of Q in hex is:
109
+ //
110
+ // Q = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffff0c
111
+ fieldQBytes = []byte {
112
+ 0x3f , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff ,
113
+ 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff ,
114
+ 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff ,
115
+ 0xff , 0xff , 0xff , 0xff , 0xbf , 0xff , 0xff , 0x0c ,
116
+ }
117
+ )
118
+
105
119
// fieldVal implements optimized fixed-precision arithmetic over the
106
120
// secp256k1 finite field. This means all arithmetic is performed modulo
107
121
// 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f. It
@@ -1221,3 +1235,118 @@ func (f *fieldVal) Inverse() *fieldVal {
1221
1235
f .Square ().Square ().Square ().Square ().Square () // f = a^(2^256 - 4294968320)
1222
1236
return f .Mul (& a45 ) // f = a^(2^256 - 4294968275) = a^(p-2)
1223
1237
}
1238
+
1239
+ // SqrtVal computes the square root of x modulo the curve's prime, and stores
1240
+ // the result in f. The square root is computed via exponentiation of x by the
1241
+ // value Q = (P+1)/4 using the curve's precomputed big-endian representation of
1242
+ // the Q. This method uses a modified version of square-and-multiply
1243
+ // exponentiation over secp256k1 fieldVals to operate on bytes instead of bits,
1244
+ // which offers better performance over both big.Int exponentiation and bit-wise
1245
+ // square-and-multiply.
1246
+ //
1247
+ // NOTE: This method only works when P is intended to be the secp256k1 prime and
1248
+ // is not constant time. The returned value is of magnitude 1, but is
1249
+ // denormalized.
1250
+ func (f * fieldVal ) SqrtVal (x * fieldVal ) * fieldVal {
1251
+ // The following computation iteratively computes x^((P+1)/4) = x^Q
1252
+ // using the recursive, piece-wise definition:
1253
+ //
1254
+ // x^n = (x^2)^(n/2) mod P if n is even
1255
+ // x^n = x(x^2)^(n-1/2) mod P if n is odd
1256
+ //
1257
+ // Given n in its big-endian representation b_k, ..., b_0, x^n can be
1258
+ // computed by defining the sequence r_k+1, ..., r_0, where:
1259
+ //
1260
+ // r_k+1 = 1
1261
+ // r_i = (r_i+1)^2 * x^b_i for i = k, ..., 0
1262
+ //
1263
+ // The final value r_0 = x^n.
1264
+ //
1265
+ // See https://en.wikipedia.org/wiki/Exponentiation_by_squaring for more
1266
+ // details.
1267
+ //
1268
+ // This can be further optimized, by observing that the value of Q in
1269
+ // secp256k1 has the value:
1270
+ //
1271
+ // Q = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffff0c
1272
+ //
1273
+ // We can unroll the typical bit-wise interpretation of the
1274
+ // exponentiation algorithm above to instead operate on bytes.
1275
+ // This reduces the number of comparisons by an order of magnitude,
1276
+ // reducing the overhead of failed branch predictions and additional
1277
+ // comparisons in this method.
1278
+ //
1279
+ // Since there there are only 4 unique bytes of Q, this keeps the jump
1280
+ // table small without the need to handle all possible 8-bit values.
1281
+ // Further, we observe that 29 of the 32 bytes are 0xff; making the
1282
+ // first case handle 0xff therefore optimizes the hot path.
1283
+ f .SetInt (1 )
1284
+ for _ , b := range fieldQBytes {
1285
+ switch b {
1286
+
1287
+ // Most common case, where all 8 bits are set.
1288
+ case 0xff :
1289
+ f .Square ().Mul (x )
1290
+ f .Square ().Mul (x )
1291
+ f .Square ().Mul (x )
1292
+ f .Square ().Mul (x )
1293
+ f .Square ().Mul (x )
1294
+ f .Square ().Mul (x )
1295
+ f .Square ().Mul (x )
1296
+ f .Square ().Mul (x )
1297
+
1298
+ // First byte of Q (0x3f), where all but the top two bits are
1299
+ // set. Note that this case only applies six operations, since
1300
+ // the highest bit of Q resides in bit six of the first byte. We
1301
+ // ignore the first two bits, since squaring for these bits will
1302
+ // result in an invalid result. We forgo squaring f before the
1303
+ // first multiply, since 1^2 = 1.
1304
+ case 0x3f :
1305
+ f .Mul (x )
1306
+ f .Square ().Mul (x )
1307
+ f .Square ().Mul (x )
1308
+ f .Square ().Mul (x )
1309
+ f .Square ().Mul (x )
1310
+ f .Square ().Mul (x )
1311
+
1312
+ // Byte 28 of Q (0xbf), where only bit 7 is unset.
1313
+ case 0xbf :
1314
+ f .Square ().Mul (x )
1315
+ f .Square ()
1316
+ f .Square ().Mul (x )
1317
+ f .Square ().Mul (x )
1318
+ f .Square ().Mul (x )
1319
+ f .Square ().Mul (x )
1320
+ f .Square ().Mul (x )
1321
+ f .Square ().Mul (x )
1322
+
1323
+ // Byte 31 of Q (0x0c), where only bits 3 and 4 are set.
1324
+ default :
1325
+ f .Square ()
1326
+ f .Square ()
1327
+ f .Square ()
1328
+ f .Square ()
1329
+ f .Square ().Mul (x )
1330
+ f .Square ().Mul (x )
1331
+ f .Square ()
1332
+ f .Square ()
1333
+ }
1334
+ }
1335
+
1336
+ return f
1337
+ }
1338
+
1339
+ // Sqrt computes the square root of f modulo the curve's prime, and stores the
1340
+ // result in f. The square root is computed via exponentiation of x by the value
1341
+ // Q = (P+1)/4 using the curve's precomputed big-endian representation of the Q.
1342
+ // This method uses a modified version of square-and-multiply exponentiation
1343
+ // over secp256k1 fieldVals to operate on bytes instead of bits, which offers
1344
+ // better performance over both big.Int exponentiation and bit-wise
1345
+ // square-and-multiply.
1346
+ //
1347
+ // NOTE: This method only works when P is intended to be the secp256k1 prime and
1348
+ // is not constant time. The returned value is of magnitude 1, but is
1349
+ // denormalized.
1350
+ func (f * fieldVal ) Sqrt () * fieldVal {
1351
+ return f .SqrtVal (f )
1352
+ }
0 commit comments