Skip to content

Commit

Permalink
Expose Pippenger multiplication for combining multiple sigs of same msg
Browse files Browse the repository at this point in the history
In many use cases, there are multiple signatures of the same message,
e.g., Ethereum attestations often share the signed `AttestationData`.

For that situation, `blst` started exposing Pippenger multiplication
to accelerate this use case. Multiscalar multiplication is much faster
than individual scalar multiplication of each signature / pubkey.

Further optimizations may be achieved with parallel tiling, see the Rust
binding code in the `npoints >= 32` situation:

- https://github.com/supranational/blst/blob/v0.3.13/bindings/rust/src/pippenger.rs

Likewise, multiple pubkeys / signatures may be loaded simultaneously
using the new `blst` APIs.

We don't do either of these additional optimizations as our architecture
does not readily support them. Pippenger multiplication alone already
offers a significant speedup until prioritizing further optimizations.

```
------------------------------------------------------------------------------------------------------------------------------------
BLS verif of 6 msgs by 6 pubkeys                                                117.232 ops/s      8530098 ns/op     20471994 cycles
BLS verif of 6 sigs of same msg by 6 pubkeys (with blinding)                    553.186 ops/s      1807711 ns/op      4338371 cycles
BLS verif of 6 sigs of same msg by 6 pubkeys                                    724.279 ops/s      1380683 ns/op      3313617 cycles
------------------------------------------------------------------------------------------------------------------------------------
BLS verif of 60 msgs by 60 pubkeys                                               11.131 ops/s     89839743 ns/op    215615251 cycles
BLS verif of 60 sigs of same msg by 60 pubkeys (with blinding)                  238.059 ops/s      4200634 ns/op     10081380 cycles
BLS verif of 60 sigs of same msg by 60 pubkeys                                  680.634 ops/s      1469219 ns/op      3526031 cycles
------------------------------------------------------------------------------------------------------------------------------------
BLS verif of 180 msgs by 180 pubkeys                                              3.887 ops/s    257298895 ns/op    617517127 cycles
BLS verif of 180 sigs of same msg by 180 pubkeys (with blinding)                166.340 ops/s      6011785 ns/op     14428186 cycles
BLS verif of 180 sigs of same msg by 180 pubkeys                                536.938 ops/s      1862413 ns/op      4469689 cycles
------------------------------------------------------------------------------------------------------------------------------------
```
  • Loading branch information
etan-status committed Aug 9, 2024
1 parent 50f0466 commit 99b9c35
Show file tree
Hide file tree
Showing 6 changed files with 285 additions and 59 deletions.
62 changes: 37 additions & 25 deletions benchmarks/bench_all.nim
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
# Nim-BLSCurve
# Copyright (c) 2018-2024 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))
# * MIT license ([LICENSE-MIT](LICENSE-MIT))
# at your option.
# This file may not be copied, modified, or distributed except according to
# those terms.

import
std/[os, strutils, cpuinfo],
../blscurve,
Expand Down Expand Up @@ -29,28 +38,31 @@ benchFastAggregateVerify(numKeys = 128, iters = 10)
separator()

when BLS_BACKEND == BLST:
var nthreads: int
if existsEnv"TP_NUM_THREADS":
nthreads = getEnv"TP_NUM_THREADS".parseInt()
else:
nthreads = countProcessors()

# Simulate Block verification (at most 6 signatures per block)
batchVerifyMulti(numSigs = 6, iters = 10)
batchVerifyMultiBatchedSerial(numSigs = 6, iters = 10)
batchVerifyMultiBatchedParallel(numSigs = 6, iters = 10, nthreads)
separator()

# Simulate 10 blocks verification
batchVerifyMulti(numSigs = 60, iters = 10)
batchVerifyMultiBatchedSerial(numSigs = 60, iters = 10)
batchVerifyMultiBatchedParallel(numSigs = 60, iters = 10, nthreads)
separator()

# Simulate 30 blocks verification
batchVerifyMulti(numSigs = 180, iters = 10)
batchVerifyMultiBatchedSerial(numSigs = 180, iters = 10)
batchVerifyMultiBatchedParallel(numSigs = 180, iters = 10, nthreads)
separator()

echo "\nUsing nthreads = ", nthreads, ". The number of threads can be changed with TP_NUM_THREADS environment variable."
var nthreads: int
if existsEnv"TP_NUM_THREADS":
nthreads = getEnv"TP_NUM_THREADS".parseInt()
else:
nthreads = countProcessors()

# Simulate Block verification (at most 6 signatures per block)
batchVerifyMulti(numSigs = 6, iters = 10)
batchVerifyMultiSameMessage(numSigs = 6, iters = 10)
batchVerifyMultiBatchedSerial(numSigs = 6, iters = 10)
batchVerifyMultiBatchedParallel(numSigs = 6, iters = 10, nthreads)
separator()

# Simulate 10 blocks verification
batchVerifyMulti(numSigs = 60, iters = 10)
batchVerifyMultiSameMessage(numSigs = 60, iters = 10)
batchVerifyMultiBatchedSerial(numSigs = 60, iters = 10)
batchVerifyMultiBatchedParallel(numSigs = 60, iters = 10, nthreads)
separator()

# Simulate 30 blocks verification
batchVerifyMulti(numSigs = 180, iters = 10)
batchVerifyMultiSameMessage(numSigs = 180, iters = 10)
batchVerifyMultiBatchedSerial(numSigs = 180, iters = 10)
batchVerifyMultiBatchedParallel(numSigs = 180, iters = 10, nthreads)
separator()

echo "\nUsing nthreads = ", nthreads, ". The number of threads can be changed with TP_NUM_THREADS environment variable."
50 changes: 47 additions & 3 deletions benchmarks/bls_signature.nim
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Nim-BLSCurve
# Copyright (c) 2018 Status Research & Development GmbH
# Copyright (c) 2018-2024 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))
# * MIT license ([LICENSE-MIT](LICENSE-MIT))
Expand Down Expand Up @@ -209,11 +209,52 @@ when BLS_BACKEND == BLST:
hashedMsg.bls_sha256_digest("msg" & $i)
triplets.add (pk, hashedMsg, sk.sign(hashedMsg))

bench("BLS verif of " & $numSigs & " msgs by "& $numSigs & " pubkeys", iters):
bench("BLS verif of " & $numSigs & " msgs by " & $numSigs & " pubkeys", iters):
for i in 0 ..< triplets.len:
let ok = triplets[i].pubkey.verify(triplets[i].msg, triplets[i].sig)
doAssert ok

proc batchVerifyMultiSameMessage*(numSigs, iters: int) =
## Verification of N pubkeys signing the same message

var hashedMsg: array[32, byte]
hashedMsg.bls_sha256_digest("msg")

var
pks: seq[PublicKey]
sigs: seq[Signature]
multiSet {.noinit.}: MultiSignatureSet
for i in 0 ..< numSigs:
let
(pk, sk) = keyGen()
sig = sk.sign(hashedMsg)
pks.add pk
sigs.add sig
if i == 0:
multiSet = MultiSignatureSet.init((pk, hashedMsg, sig))
else:
multiSet.add((pk, hashedMsg, sig))

# With blinding (more secure, but slower)
var secureBlindingBytes: array[32, byte]
secureBlindingBytes.bls_sha256_digest("Mr F was here")
bench("BLS verif of " & $numSigs & " sigs of same msg by " & $numSigs & " pubkeys (with blinding)", iters):
let
triplet = multiSet.combine(secureBlindingBytes)
ok = triplet.pubkey.verify(triplet.message, triplet.signature)
doAssert ok

# Without blinding (not secure, but benched for comparison)
bench("BLS verif of " & $numSigs & " sigs of same msg by " & $numSigs & " pubkeys", iters):
var
pubkey {.noinit.}: PublicKey
signature {.noinit.}: Signature
let ok =
pubkey.aggregateAll(pks) and
signature.aggregateAll(sigs) and
pubkey.verify(hashedMsg, signature)
doAssert ok

proc batchVerifyMultiBatchedSerial*(numSigs, iters: int) =
## Verification of N pubkeys signing for N messages

Expand All @@ -230,7 +271,7 @@ when BLS_BACKEND == BLST:

var cache = BatchedBLSVerifierCache.init()

bench("Serial batch verify " & $numSigs & " msgs by "& $numSigs & " pubkeys (with blinding)", iters):
bench("Serial batch verify " & $numSigs & " msgs by " & $numSigs & " pubkeys (with blinding)", iters):
secureBlindingBytes.bls_sha256_digest(secureBlindingBytes)
let ok = cache.batchVerifySerial(batch, secureBlindingBytes)
doAssert ok
Expand Down Expand Up @@ -277,16 +318,19 @@ when isMainModule:

# Simulate Block verification (at most 6 signatures per block)
batchVerifyMulti(numSigs = 6, iters = 10)
batchVerifyMultiSameMessage(numSigs = 6, iters = 10)
batchVerifyMultiBatchedSerial(numSigs = 6, iters = 10)
batchVerifyMultiBatchedParallel(numSigs = 6, iters = 10, nthreads)

# Simulate 10 blocks verification
batchVerifyMulti(numSigs = 60, iters = 10)
batchVerifyMultiSameMessage(numSigs = 60, iters = 10)
batchVerifyMultiBatchedSerial(numSigs = 60, iters = 10)
batchVerifyMultiBatchedParallel(numSigs = 60, iters = 10, nthreads)

# Simulate 30 blocks verification
batchVerifyMulti(numSigs = 180, iters = 10)
batchVerifyMultiSameMessage(numSigs = 180, iters = 10)
batchVerifyMultiBatchedSerial(numSigs = 180, iters = 10)
batchVerifyMultiBatchedParallel(numSigs = 180, iters = 10, nthreads)

Expand Down
54 changes: 51 additions & 3 deletions blscurve/bls_batch_verifier.nim
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Nim-BLSCurve
# Copyright (c) 2018-Present Status Research & Development GmbH
# Copyright (c) 2018-2024 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))
# * MIT license ([LICENSE-MIT](LICENSE-MIT))
Expand Down Expand Up @@ -39,12 +39,26 @@ type
## if `signature` is the corresponding AggregateSignature
## on the same `message`
##
## This assumes that `message`
## is the output of a fixed size hash function.
## This assumes that `message` is the output of a fixed size hash function.
##
## `pubkey` and `signature` are assumed to be grouped checked
## which is guaranteed at deserialization from bytes or hex

MultiSignatureSet* = object
## A set of signatures that all pertain to the same `message`.
##
## `pubkeys` can contain aggregate publickeys (via `aggregateAll`)
## if `signatures` contains the corresponding AggregateSignature
## on the same `message`
##
## This assumes that `message` is the output of a fixed size hash function.
##
## `pubkeys` and `signatures` are assumed to be grouped checked
## which is guaranteed at deserialization from bytes or hex
pubkeys: seq[PublicKey]
message: array[32, byte]
signatures: seq[Signature]

BatchedBLSVerifierCache* {.requiresInit.} = object
## This types hold temporary contexts
## to batch BLS multi signatures (aggregated or individual)
Expand All @@ -57,6 +71,40 @@ type
# Serial Batch Verifier
# ----------------------------------------------------------------------

func init*(
T: type MultiSignatureSet,
pubkeys: seq[PublicKey],
message: array[32, byte],
signatures: seq[Signature]
): MultiSignatureSet =
doAssert pubkeys.len == signatures.len
doAssert pubkeys.len > 0
MultiSignatureSet(
pubkeys: pubkeys,
message: message,
signatures: signatures,
)

func init*(T: type MultiSignatureSet, sigset: SignatureSet): MultiSignatureSet =
MultiSignatureSet(
pubkeys: @[sigset.pubkey],
message: sigset.message,
signatures: @[sigset.signature],
)

func add*(multiSet: var MultiSignatureSet, sigset: SignatureSet) =
doAssert multiSet.message == sigset.message
multiSet.pubkeys.add sigset.pubkey
multiSet.signatures.add sigset.signature

func combine*(
multiSet: MultiSignatureSet,
secureRandomBytes: array[32, byte]
): SignatureSet =
let (pubkey, signature) = secureRandomBytes
.combine(multiSet.pubkeys, multiSet.signatures)
(pubkey, multiSet.message, signature)

func init*(T: type BatchedBLSVerifierCache): T =
## Initialise the cache for single-threaded usage
BatchedBLSVerifierCache(
Expand Down
54 changes: 30 additions & 24 deletions blscurve/blst/blst_abi.nim
Original file line number Diff line number Diff line change
Expand Up @@ -320,43 +320,49 @@ proc blst_p2_affine_is_inf*(a: ptr cblst_p2_affine): CTbool
proc blst_p2_affine_generator*(): ptr cblst_p2_affine

# Multi-scalar multiplications and other multi-point operations.
proc blst_p1s_to_affine*(dst: UncheckedArray[cblst_p1_affine]; points: cblst_p1;
proc blst_p1s_to_affine*(dst: ptr cblst_p1_affine; points: ptr ptr cblst_p1;
npoints: uint)
proc blst_p1s_add*(ret: ptr cblst_p1; points: cblst_p1_affine; npoints: uint)
proc blst_p1s_add*(ret: ptr cblst_p1;
points: ptr ptr cblst_p1_affine; npoints: uint)
proc blst_p1s_mult_wbits_precompute_sizeof*(wbits: uint; npoints: uint): uint
proc blst_p1s_mult_wbits_precompute*(table: UncheckedArray[cblst_p1_affine];
wbits: uint; points: cblst_p1_affine;
proc blst_p1s_mult_wbits_precompute*(table: ptr cblst_p1_affine; wbits: uint;
points: ptr ptr cblst_p1_affine;
npoints: uint)
proc blst_p1s_mult_wbits_scratch_sizeof*(npoints: uint): uint
proc blst_p1s_mult_wbits*(ret: ptr cblst_p1;
table: UncheckedArray[cblst_p1_affine]; wbits: uint;
npoints: uint; scalars: byte; nbits: uint;
proc blst_p1s_mult_wbits*(ret: ptr cblst_p1; table: ptr cblst_p1_affine;
wbits: uint; npoints: uint;
scalars: ptr ptr byte; nbits: uint;
scratch: ptr limb_t)
proc blst_p1s_mult_pippenger_scratch_sizeof*(npoints: uint): uint
proc blst_p1s_mult_pippenger*(ret: ptr cblst_p1; points: cblst_p1_affine;
npoints: uint; scalars: byte; nbits: uint;
proc blst_p1s_mult_pippenger*(ret: ptr cblst_p1;
points: ptr ptr cblst_p1_affine; npoints: uint;
scalars: ptr ptr byte; nbits: uint;
scratch: ptr limb_t)
proc blst_p1s_tile_pippenger*(ret: ptr cblst_p1; points: cblst_p1_affine;
npoints: uint; scalars: byte; nbits: uint;
proc blst_p1s_tile_pippenger*(ret: ptr cblst_p1;
points: ptr ptr cblst_p1_affine; npoints: uint;
scalars: ptr ptr byte; nbits: uint;
scratch: ptr limb_t; bit0: uint; window: uint)
proc blst_p2s_to_affine*(dst: UncheckedArray[cblst_p2_affine]; points: cblst_p2;
proc blst_p2s_to_affine*(dst: ptr cblst_p2_affine; points: ptr ptr cblst_p2;
npoints: uint)
proc blst_p2s_add*(ret: ptr cblst_p2; points: cblst_p2_affine; npoints: uint)
proc blst_p2s_add*(ret: ptr cblst_p2;
points: ptr ptr cblst_p2_affine; npoints: uint)
proc blst_p2s_mult_wbits_precompute_sizeof*(wbits: uint; npoints: uint): uint
proc blst_p2s_mult_wbits_precompute*(table: UncheckedArray[cblst_p2_affine];
wbits: uint; points: cblst_p2_affine;
proc blst_p2s_mult_wbits_precompute*(table: ptr cblst_p2_affine; wbits: uint;
points: ptr ptr cblst_p2_affine;
npoints: uint)
proc blst_p2s_mult_wbits_scratch_sizeof*(npoints: uint): uint
proc blst_p2s_mult_wbits*(ret: ptr cblst_p2;
table: UncheckedArray[cblst_p2_affine]; wbits: uint;
npoints: uint; scalars: byte; nbits: uint;
proc blst_p2s_mult_wbits*(ret: ptr cblst_p2; table: ptr cblst_p2_affine;
wbits: uint; npoints: uint;
scalars: ptr ptr byte; nbits: uint;
scratch: ptr limb_t)
proc blst_p2s_mult_pippenger_scratch_sizeof*(npoints: uint): uint
proc blst_p2s_mult_pippenger*(ret: ptr cblst_p2; points: cblst_p2_affine;
npoints: uint; scalars: byte; nbits: uint;
proc blst_p2s_mult_pippenger*(ret: ptr cblst_p2;
points: ptr ptr cblst_p2_affine; npoints: uint;
scalars: ptr ptr byte; nbits: uint;
scratch: ptr limb_t)
proc blst_p2s_tile_pippenger*(ret: ptr cblst_p2; points: cblst_p2_affine;
npoints: uint; scalars: byte; nbits: uint;
proc blst_p2s_tile_pippenger*(ret: ptr cblst_p2;
points: ptr ptr cblst_p2_affine; npoints: uint;
scalars: ptr ptr byte; nbits: uint;
scratch: ptr limb_t; bit0: uint; window: uint)

# Hash-to-curve operations.
Expand Down Expand Up @@ -446,8 +452,8 @@ proc blst_sign_pk_in_g2*(out_sig: ptr cblst_p1; hash: ptr cblst_p1;

proc blst_miller_loop*(ret: ptr cblst_fp12; Q: ptr cblst_p2_affine;
P: ptr cblst_p1_affine)
proc blst_miller_loop_n*(ret: ptr cblst_fp12; Qs: cblst_p2_affine;
Ps: cblst_p1_affine; n: uint)
proc blst_miller_loop_n*(ret: ptr cblst_fp12; Qs: ptr ptr cblst_p2_affine;
Ps: ptr ptr cblst_p1_affine; n: uint)
proc blst_final_exp*(ret: ptr cblst_fp12; f: ptr cblst_fp12)
proc blst_precompute_lines*(Qlines: var array[68, cblst_fp6]; Q: ptr cblst_p2_affine)
proc blst_miller_loop_lines*(ret: ptr cblst_fp12; Qlines: array[68, cblst_fp6];
Expand Down
Loading

0 comments on commit 99b9c35

Please sign in to comment.