Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scroll dev 1220 #25

Closed
wants to merge 43 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
54c4d8a
speed up generate vk pk with multi-thread
LuozhuZhang Jul 15, 2022
d8d7235
Add parallel verifying process.
spherel Jun 24, 2022
81c654a
use binary_search to check if an input is included in table
kunxian-xia Aug 4, 2022
48c5fa9
make blinding rows for advice columns are 0, only last row is 1
lispc Jun 14, 2022
4a67031
random poly is full of 0
lispc Jul 6, 2022
15a90b3
add brief transcript with poseidon
lanbones Apr 4, 2022
ae82f75
make some struct fields pub for aggregation; use gwc by default
lispc Sep 1, 2022
a1b4726
clean up after merge
zhenfeizhang Sep 5, 2022
4b53eee
address comments
zhenfeizhang Sep 10, 2022
be443ce
bring back reader and writer
zhenfeizhang Sep 11, 2022
8c6b166
revert halo2_proofs/src/dev.rs as upstream
lispc Sep 20, 2022
1bd4afd
add is_none to Value
lispc Sep 21, 2022
b04667b
expose inner
lispc Sep 27, 2022
3a5a600
log pairing
lispc Oct 11, 2022
85bf2ed
poseidon hash to 63
lispc Oct 13, 2022
a960c7e
make query index public
lispc Oct 14, 2022
6f18f38
make g_lagrange public
lispc Oct 15, 2022
879487c
Merge commit 'a9e99a72a65d7c98e8a4258c2c94269c834d1c10' into scroll-d…
lispc Dec 13, 2022
9c2e786
lint
lispc Dec 13, 2022
5b38c99
fix
lispc Dec 13, 2022
3f85679
better logging with region shape
lispc Dec 13, 2022
255634a
better logging with region shape
lispc Dec 13, 2022
dedbb5e
lint
lispc Dec 13, 2022
15dd976
log region shape
lispc Dec 14, 2022
8860f03
log::warn region piling
lispc Dec 15, 2022
b46c23b
make num_fixed_columns public
lispc Dec 19, 2022
88f6fda
change some log level
lispc Dec 19, 2022
b46b5ac
fix empty region failure panic
lispc Dec 21, 2022
849b7b6
Merge branch 'scroll-dev-0902' into scroll-dev-1220
lispc Dec 21, 2022
2739742
add meta.max_phase()
lispc Dec 29, 2022
a6312fe
fix log level
lispc Dec 29, 2022
75a602b
add assignment timer
lispc Jan 2, 2023
9520076
add assignment timer
lispc Jan 2, 2023
0950603
creating pk no longer needs vk; avoid assignment twice
lispc Jan 3, 2023
3370852
multi-phases: make phases related field public
xgaozoyoe Jan 9, 2023
7724ce2
Revert "optimize fft"
Velaciela Jan 11, 2023
d5a75d7
optimize parallel fft
Velaciela Jan 11, 2023
18af374
disable profile by default
lispc Jan 11, 2023
ac93e82
Merge remote-tracking branch 'scroll/xgao/multi-phase' into scroll-de…
lispc Jan 12, 2023
b050cd9
Merge remote-tracking branch 'scroll/parallel_fft_opt' into scroll-de…
lispc Jan 12, 2023
1a92fa8
fmt
lispc Jan 18, 2023
7a62a6b
add phase-check feature
lispc Jan 18, 2023
c7c72fd
allow later phase to assign prev phase columns
lispc Jan 20, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ jobs:
args: --verbose --release --all --all-features

build:
if: ${{ false }}
name: Build target ${{ matrix.target }}
runs-on: ubuntu-latest
strategy:
Expand Down
16 changes: 15 additions & 1 deletion halo2_proofs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,20 @@ halo2curves = { git = 'https://github.com/privacy-scaling-explorations/halo2curv
rand_core = { version = "0.6", default-features = false }
tracing = "0.1"
blake2b_simd = "1"
# pairing = { git = 'https://github.com/appliedzkp/pairing', package = "pairing_bn256", tag = "v0.1.1" }
subtle = "2.3"
cfg-if = "0.1"
poseidon = { git = 'https://github.com/appliedzkp/poseidon.git' } #, branch = 'circuit' }
num-integer = "0.1"
num-bigint = { version = "0.4", features = ["rand"] }

# Developer tooling dependencies
plotters = { version = "0.3.0", optional = true }
tabbycat = { version = "0.1", features = ["attributes"], optional = true }
log = "0.4.17"

# timer
ark-std = { version = "0.3.0" }

[dev-dependencies]
assert_matches = "1.5"
Expand All @@ -68,11 +78,15 @@ rand_core = { version = "0.6", default-features = false, features = ["getrandom"
getrandom = { version = "0.2", features = ["js"] }

[features]
default = ["batch"]
default = ["batch", "gwc"]
dev-graph = ["plotters", "tabbycat"]
gadget-traces = ["backtrace"]
sanity-checks = []
batch = ["rand_core/getrandom"]
shplonk = []
gwc = []
phase-check = []
profile = ["ark-std/print-trace"]

[lib]
bench = false
Expand Down
312 changes: 228 additions & 84 deletions halo2_proofs/src/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ use group::{

pub use halo2curves::{CurveAffine, CurveExt, FieldExt, Group};

pub const SPARSE_TWIDDLE_DEGREE: u32 = 10;

fn multiexp_serial<C: CurveAffine>(coeffs: &[C::Scalar], bases: &[C], acc: &mut C::Curve) {
let coeffs: Vec<_> = coeffs.iter().map(|a| a.to_repr()).collect();

Expand Down Expand Up @@ -169,108 +171,250 @@ pub fn best_multiexp<C: CurveAffine>(coeffs: &[C::Scalar], bases: &[C]) -> C::Cu
///
/// This will use multithreading if beneficial.
pub fn best_fft<G: Group>(a: &mut [G], omega: G::Scalar, log_n: u32) {
fn bitreverse(mut n: usize, l: usize) -> usize {
let mut r = 0;
for _ in 0..l {
r = (r << 1) | (n & 1);
n >>= 1;
}
r
}

let threads = multicore::current_num_threads();
let log_threads = log2_floor(threads);
let log_split = log2_floor(threads) as usize;
let n = a.len() as usize;
let sub_n = n >> log_split;
let split_m = 1 << log_split;

if sub_n < split_m {
serial_fft(a, omega, log_n);
} else {
parallel_fft(a, omega, log_n);
}
}

fn bitreverse(mut n: usize, l: usize) -> usize {
let mut r = 0;
for _ in 0..l {
r = (r << 1) | (n & 1);
n >>= 1;
}
r
}

fn serial_fft<G: Group>(a: &mut [G], omega: G::Scalar, log_n: u32) {
let n = a.len() as u32;
assert_eq!(n, 1 << log_n);

for k in 0..n {
for k in 0..n as usize {
let rk = bitreverse(k, log_n as usize);
if k < rk {
a.swap(rk, k);
a.swap(rk as usize, k as usize);
}
}

// precompute twiddle factors
let twiddles: Vec<_> = (0..(n / 2) as usize)
.scan(G::Scalar::one(), |w, _| {
let tw = *w;
w.group_scale(&omega);
Some(tw)
})
.collect();

if log_n <= log_threads {
let mut chunk = 2_usize;
let mut twiddle_chunk = (n / 2) as usize;
for _ in 0..log_n {
a.chunks_mut(chunk).for_each(|coeffs| {
let (left, right) = coeffs.split_at_mut(chunk / 2);

// case when twiddle factor is one
let (a, left) = left.split_at_mut(1);
let (b, right) = right.split_at_mut(1);
let t = b[0];
b[0] = a[0];
a[0].group_add(&t);
b[0].group_sub(&t);

left.iter_mut()
.zip(right.iter_mut())
.enumerate()
.for_each(|(i, (a, b))| {
let mut t = *b;
t.group_scale(&twiddles[(i + 1) * twiddle_chunk]);
*b = *a;
a.group_add(&t);
b.group_sub(&t);
});
});
chunk *= 2;
twiddle_chunk /= 2;
let mut m = 1;
for _ in 0..log_n {
let w_m = omega.pow_vartime(&[u64::from(n / (2 * m)), 0, 0, 0]);

let mut k = 0;
while k < n {
let mut w = G::Scalar::one();
for j in 0..m {
let mut t = a[(k + j + m) as usize];
t.group_scale(&w);
a[(k + j + m) as usize] = a[(k + j) as usize];
a[(k + j + m) as usize].group_sub(&t);
a[(k + j) as usize].group_add(&t);
w *= &w_m;
}

k += 2 * m;
}
} else {
recursive_butterfly_arithmetic(a, n, 1, &twiddles)

m *= 2;
}
}

/// This perform recursive butterfly arithmetic
pub fn recursive_butterfly_arithmetic<G: Group>(
fn serial_split_fft<G: Group>(
a: &mut [G],
twiddle_lut: &[G::Scalar],
twiddle_scale: usize,
log_n: u32,
) {
let n = a.len() as u32;
assert_eq!(n, 1 << log_n);

let mut m = 1;
for _ in 0..log_n {
let omega_idx = twiddle_scale * n as usize / (2 * m as usize); // 1/2, 1/4, 1/8, ...
let low_idx = omega_idx % (1 << SPARSE_TWIDDLE_DEGREE);
let high_idx = omega_idx >> SPARSE_TWIDDLE_DEGREE;
let mut w_m = twiddle_lut[low_idx];
if high_idx > 0 {
w_m = w_m * twiddle_lut[(1 << SPARSE_TWIDDLE_DEGREE) + high_idx];
}

let mut k = 0;
while k < n {
let mut w = G::Scalar::one();
for j in 0..m {
let mut t = a[(k + j + m) as usize];
t.group_scale(&w);
a[(k + j + m) as usize] = a[(k + j) as usize];
a[(k + j + m) as usize].group_sub(&t);
a[(k + j) as usize].group_add(&t);
w *= &w_m;
}

k += 2 * m;
}

m *= 2;
}
}

fn split_radix_fft<G: Group>(
tmp: &mut [G],
a: &[G],
twiddle_lut: &[G::Scalar],
n: usize,
twiddle_chunk: usize,
twiddles: &[G::Scalar],
sub_fft_offset: usize,
log_split: usize,
) {
if n == 2 {
let t = a[1];
a[1] = a[0];
a[0].group_add(&t);
a[1].group_sub(&t);
} else {
let (left, right) = a.split_at_mut(n / 2);
rayon::join(
|| recursive_butterfly_arithmetic(left, n / 2, twiddle_chunk * 2, twiddles),
|| recursive_butterfly_arithmetic(right, n / 2, twiddle_chunk * 2, twiddles),
);
let split_m = 1 << log_split;
let sub_n = n >> log_split;

// we use out-place bitreverse here, split_m <= num_threads, so the buffer spase is small
// and it's is good for data locality
let mut t1 = vec![G::group_zero(); split_m];
// if unsafe code is allowed, a 10% performance improvement can be achieved
// let mut t1: Vec<G> = Vec::with_capacity(split_m as usize);
// unsafe{ t1.set_len(split_m as usize); }
for i in 0..split_m {
t1[bitreverse(i, log_split)] = a[(i * sub_n + sub_fft_offset)];
}
serial_split_fft(&mut t1, twiddle_lut, sub_n, log_split as u32);

let sparse_degree = SPARSE_TWIDDLE_DEGREE;
let omega_idx = sub_fft_offset as usize;
let low_idx = omega_idx % (1 << sparse_degree);
let high_idx = omega_idx >> sparse_degree;
let mut omega = twiddle_lut[low_idx];
if high_idx > 0 {
omega = omega * twiddle_lut[(1 << sparse_degree) + high_idx];
}
let mut w_m = G::Scalar::one();
for i in 0..split_m {
t1[i].group_scale(&w_m);
tmp[i] = t1[i];
w_m = w_m * omega;
}
}

// case when twiddle factor is one
let (a, left) = left.split_at_mut(1);
let (b, right) = right.split_at_mut(1);
let t = b[0];
b[0] = a[0];
a[0].group_add(&t);
b[0].group_sub(&t);

left.iter_mut()
.zip(right.iter_mut())
.enumerate()
.for_each(|(i, (a, b))| {
let mut t = *b;
t.group_scale(&twiddles[(i + 1) * twiddle_chunk]);
*b = *a;
a.group_add(&t);
b.group_sub(&t);
});
pub fn generate_twiddle_lookup_table<F: Field>(
omega: F,
log_n: u32,
sparse_degree: u32,
with_last_level: bool,
) -> Vec<F> {
let without_last_level = !with_last_level;
let is_lut_len_large = sparse_degree > log_n;

// dense
if is_lut_len_large {
let mut twiddle_lut = vec![F::zero(); (1 << log_n) as usize];
parallelize(&mut twiddle_lut, |twiddle_lut, start| {
let mut w_n = omega.pow_vartime(&[start as u64, 0, 0, 0]);
for twiddle_lut in twiddle_lut.iter_mut() {
*twiddle_lut = w_n;
w_n = w_n * omega;
}
});
return twiddle_lut;
}

// sparse
let low_degree_lut_len = 1 << sparse_degree;
let high_degree_lut_len = 1 << (log_n - sparse_degree - without_last_level as u32);
let mut twiddle_lut = vec![F::zero(); (low_degree_lut_len + high_degree_lut_len) as usize];
parallelize(
&mut twiddle_lut[..low_degree_lut_len],
|twiddle_lut, start| {
let mut w_n = omega.pow_vartime(&[start as u64, 0, 0, 0]);
for twiddle_lut in twiddle_lut.iter_mut() {
*twiddle_lut = w_n;
w_n = w_n * omega;
}
},
);
let high_degree_omega = omega.pow_vartime(&[(1 << sparse_degree) as u64, 0, 0, 0]);
parallelize(
&mut twiddle_lut[low_degree_lut_len..],
|twiddle_lut, start| {
let mut w_n = high_degree_omega.pow_vartime(&[start as u64, 0, 0, 0]);
for twiddle_lut in twiddle_lut.iter_mut() {
*twiddle_lut = w_n;
w_n = w_n * high_degree_omega;
}
},
);
twiddle_lut
}

pub fn parallel_fft<G: Group>(a: &mut [G], omega: G::Scalar, log_n: u32) {
let n = a.len() as usize;
assert_eq!(n, 1 << log_n);

let log_split = log2_floor(multicore::current_num_threads()) as usize;
let split_m = 1 << log_split;
let sub_n = n >> log_split as usize;
let twiddle_lut = generate_twiddle_lookup_table(omega, log_n, SPARSE_TWIDDLE_DEGREE, true);

// split fft
let mut tmp = vec![G::group_zero(); n];
// if unsafe code is allowed, a 10% performance improvement can be achieved
// let mut tmp: Vec<G> = Vec::with_capacity(n);
// unsafe{ tmp.set_len(n); }
multicore::scope(|scope| {
let a = &*a;
let twiddle_lut = &*twiddle_lut;
for (chunk_idx, tmp) in tmp.chunks_mut(sub_n).enumerate() {
scope.spawn(move |_| {
let split_fft_offset = chunk_idx * sub_n >> log_split;
for (i, tmp) in tmp.chunks_mut(split_m).enumerate() {
let split_fft_offset = split_fft_offset + i;
split_radix_fft(tmp, a, twiddle_lut, n, split_fft_offset, log_split);
}
});
}
});

// shuffle
parallelize(a, |a, start| {
for (idx, a) in a.iter_mut().enumerate() {
let idx = start + idx;
let i = idx / sub_n;
let j = idx % sub_n;
*a = tmp[j * split_m + i];
}
});

// sub fft
let new_omega = omega.pow_vartime(&[split_m as u64, 0, 0, 0]);
multicore::scope(|scope| {
for a in a.chunks_mut(sub_n) {
scope.spawn(move |_| {
serial_fft(a, new_omega, log_n - log_split as u32);
});
}
});

// copy & unshuffle
let mask = (1 << log_split) - 1;
parallelize(&mut tmp, |tmp, start| {
for (idx, tmp) in tmp.iter_mut().enumerate() {
let idx = start + idx;
*tmp = a[idx];
}
});
parallelize(a, |a, start| {
for (idx, a) in a.iter_mut().enumerate() {
let idx = start + idx;
*a = tmp[sub_n * (idx & mask) + (idx >> log_split)];
}
});
}

/// Convert coefficient bases group elements to lagrange basis by inverse FFT.
Expand Down
Loading