diff --git a/aggregator/src/aggregation/decoder.rs b/aggregator/src/aggregation/decoder.rs index 55c2ba6afc..01f14b135e 100644 --- a/aggregator/src/aggregation/decoder.rs +++ b/aggregator/src/aggregation/decoder.rs @@ -16,7 +16,7 @@ use halo2_proofs::{ circuit::{AssignedCell, Layouter, Value}, halo2curves::bn256::Fr, plonk::{ - Advice, Column, ConstraintSystem, Error, Expression, Fixed, SecondPhase, VirtualCells, + Advice, Assigned, Column, ConstraintSystem, Error, Expression, Fixed, SecondPhase, VirtualCells }, poly::Rotation, }; @@ -927,8 +927,12 @@ impl SequencesDataDecoder { pub struct AssignedDecoderConfigExports { /// The RLC of the zstd encoded bytes, i.e. blob bytes. pub encoded_rlc: AssignedCell, + /// The length of encoded data. + pub encoded_len: AssignedCell, /// The RLC of the decoded bytes, i.e. batch bytes. pub decoded_rlc: AssignedCell, + /// The length of decoded data. + pub decoded_len: AssignedCell, } pub struct DecoderConfigArgs { @@ -1051,6 +1055,8 @@ impl DecoderConfig { fixed_table, }; + meta.enable_equality(config.decoded_len); + macro_rules! is_tag { ($var:ident, $tag_variant:ident) => { let $var = |meta: &mut VirtualCells| { @@ -4098,8 +4104,8 @@ impl DecoderConfig { challenges: &Challenges>, k: u32, // witgen_debug - // ) -> Result { - ) -> Result<(), Error> { + ) -> Result { + // ) -> Result<(), Error> { let mut pow_of_rand: Vec> = vec![Value::known(Fr::ONE)]; assert!(!block_info_arr.is_empty(), "Must have at least 1 block"); @@ -4185,7 +4191,7 @@ impl DecoderConfig { (1 << k) - self.unusable_rows(), )?; // TODO: use equality constraint for the exported_len and exported_rlc cell - let (_exported_len, _exported_rlc) = self.sequence_execution_config.assign( + let (exported_len, exported_rlc) = self.sequence_execution_config.assign( layouter, challenges, literal_datas @@ -4203,6 +4209,15 @@ impl DecoderConfig { layouter.assign_region( || "Decompression table region", |mut region| { + //////////////////////////////////////////////////////// + //////// Capture Copy Constraint/Export Cells ///////// + //////////////////////////////////////////////////////// + let mut last_encoded_rlc: Value = Value::known(Fr::zero()); + let mut encoded_rlc_cell: Option> = None; + let mut byte_idx_cell: Option> = None; + let mut last_decoded_len: Value = Value::known(Fr::zero()); + let mut decoded_length_cell: Option> = None; + ///////////////////////////////////////// /////////// Assign First Row /////////// ///////////////////////////////////////// @@ -4259,18 +4274,20 @@ impl DecoderConfig { }, )?; } - region.assign_advice( + encoded_rlc_cell = Some(region.assign_advice( || "encoded_rlc", self.encoded_rlc, i, || row.encoded_data.value_rlc, - )?; - region.assign_advice( + )?); + last_encoded_rlc = row.encoded_data.value_rlc; + decoded_length_cell = Some(region.assign_advice( || "decoded_len", self.decoded_len, i, || Value::known(Fr::from(row.decoded_data.decoded_len)), - )?; + )?); + last_decoded_len = Value::known(Fr::from(row.decoded_data.decoded_len)); ///////////////////////////////////////// ///// Assign Bitstream Decoder ///////// @@ -4684,17 +4701,25 @@ impl DecoderConfig { )?; } - let mut padding_count = 2usize; for idx in witness_rows.len()..((1 << k) - self.unusable_rows()) { - if padding_count > 0 { - region.assign_advice( - || "byte_idx", - self.byte_idx, - idx, - || Value::known(Fr::from(last_byte_idx + 1)), - )?; - padding_count -= 1; - } + byte_idx_cell = Some(region.assign_advice( + || "byte_idx", + self.byte_idx, + idx, + || Value::known(Fr::from(last_byte_idx + 1)), + )?); + encoded_rlc_cell = Some(region.assign_advice( + || "encoded_rlc", + self.encoded_rlc, + idx, + || last_encoded_rlc, + )?); + decoded_length_cell = Some(region.assign_advice( + || "decoded_len", + self.decoded_len, + idx, + || last_decoded_len, + )?); region.assign_advice( || "tag_config.tag", self.tag_config.tag, @@ -4764,19 +4789,16 @@ impl DecoderConfig { )?; } - Ok(()) - }, - )?; - - // witgen_debug - // pub struct AssignedDecoderConfigExports { - // /// The RLC of the zstd encoded bytes, i.e. blob bytes. - // pub encoded_rlc: AssignedCell, - // /// The RLC of the decoded bytes, i.e. batch bytes. - // pub decoded_rlc: AssignedCell, - // } + region.constrain_equal(exported_len.cell(), decoded_length_cell.as_ref().unwrap().cell())?; - Ok(()) + Ok(AssignedDecoderConfigExports { + encoded_rlc: encoded_rlc_cell.unwrap(), + encoded_len: byte_idx_cell.unwrap(), + decoded_len: exported_len.clone(), + decoded_rlc: exported_rlc.clone(), + }) + }, + ) } pub fn unusable_rows(&self) -> usize { diff --git a/aggregator/src/aggregation/decoder/seq_exec.rs b/aggregator/src/aggregation/decoder/seq_exec.rs index 0b0857b309..fea49ccc04 100644 --- a/aggregator/src/aggregation/decoder/seq_exec.rs +++ b/aggregator/src/aggregation/decoder/seq_exec.rs @@ -1094,8 +1094,9 @@ mod tests { inst.instruction_idx as usize, SequenceExecInfo::BackRef(r.clone()), )); - let matched_bytes = Vec::from(&outputs[r]); - outputs.extend(matched_bytes); + for ref_pos in r { + outputs.push(outputs[ref_pos]); + } } current_literal_pos = new_literal_pos; } @@ -1231,7 +1232,7 @@ mod tests { AddressTableRow::mock_samples_full([ [1, 4, 1, 1, 4, 8], [9, 1, 3, 6, 1, 4], - [3, 0, 4, 5, 6, 1], + [3, 0, 4, 5, 6, 1], // ref offset 3 while literal == 0 ]), ); @@ -1243,6 +1244,26 @@ mod tests { mock_prover.verify().unwrap(); } + #[test] + fn seq_exec_rle_like() { + // no instructions, we only copy literals to output + let circuit = SeqExecMock::mock_generate( + Vec::from("abcdef".as_bytes()), + AddressTableRow::mock_samples_full([ + [1, 4, 1, 1, 4, 8], + [9, 1, 3, 6, 1, 4], + [5, 0, 6, 2, 6, 1], // an RLE like inst, match len exceed match offset + ]), + ); + + assert_eq!(circuit.outputs, Vec::from("abcddeabcbcbcbcf".as_bytes())); + + let k = 12; + let mock_prover = + MockProver::::run(k, &circuit, vec![]).expect("failed to run mock prover"); + mock_prover.verify().unwrap(); + } + #[test] fn seq_exec_no_tail_cp() { // no instructions, we only copy literals to output diff --git a/aggregator/src/aggregation/decoder/witgen.rs b/aggregator/src/aggregation/decoder/witgen.rs index c28db5f0e1..d6b61e4bce 100644 --- a/aggregator/src/aggregation/decoder/witgen.rs +++ b/aggregator/src/aggregation/decoder/witgen.rs @@ -6,6 +6,8 @@ use halo2_proofs::circuit::Value; use revm_precompile::HashMap; use std::io; +// witgen_debug +use std::io::Write; mod params; pub use params::*; @@ -794,102 +796,78 @@ fn process_sequences( &table_mlt, ), ] { - let mut tag_value_iter = - src[start_offset..end_offset] - .iter() - .scan(Value::known(F::zero()), |acc, &byte| { - *acc = *acc * randomness + Value::known(F::from(byte as u64)); - Some(*acc) - }); - let tag_value = tag_value_iter.clone().last().expect("Tag value must exist"); - - let mut tag_rlc_iter = - src[start_offset..end_offset] + if end_offset > start_offset { + let mut tag_value_iter = + src[start_offset..end_offset] + .iter() + .scan(Value::known(F::zero()), |acc, &byte| { + *acc = *acc * randomness + Value::known(F::from(byte as u64)); + Some(*acc) + }); + let tag_value = tag_value_iter.clone().last().expect("Tag value must exist"); + + let mut tag_rlc_iter = + src[start_offset..end_offset] + .iter() + .scan(Value::known(F::zero()), |acc, &byte| { + *acc = *acc * randomness + Value::known(F::from(byte as u64)); + Some(*acc) + }); + let tag_rlc = tag_rlc_iter.clone().last().expect("Tag RLC must exist"); + + let mut decoded: u64 = 0; + let mut n_acc: usize = 0; + let mut n_emitted: usize = 0; + let mut current_tag_value_acc = Value::known(F::zero()); + let mut current_tag_rlc_acc = Value::known(F::zero()); + let mut last_byte_idx: i64 = 0; + let mut from_pos: (i64, i64) = (1, 0); + let mut to_pos: (i64, i64) = (0, 0); + let kind = table.table_kind; + let mut next_symbol: i32 = -1; + let mut is_repeating_bit_boundary: HashMap = HashMap::new(); + + let multiplier = + (0..last_row.state.tag_len).fold(Value::known(F::one()), |acc, _| acc * randomness); + let value_rlc = last_row.encoded_data.value_rlc * multiplier + last_row.state.tag_rlc; + let mut last_symbol: i32 = 0; + + let bitstream_rows = bit_boundaries .iter() - .scan(Value::known(F::zero()), |acc, &byte| { - *acc = *acc * randomness + Value::known(F::from(byte as u64)); - Some(*acc) - }); - let tag_rlc = tag_rlc_iter.clone().last().expect("Tag RLC must exist"); - - let mut decoded: u64 = 0; - let mut n_acc: usize = 0; - let mut n_emitted: usize = 0; - let mut current_tag_value_acc = Value::known(F::zero()); - let mut current_tag_rlc_acc = Value::known(F::zero()); - let mut last_byte_idx: i64 = 0; - let mut from_pos: (i64, i64) = (1, 0); - let mut to_pos: (i64, i64) = (0, 0); - let kind = table.table_kind; - let mut next_symbol: i32 = -1; - let mut is_repeating_bit_boundary: HashMap = HashMap::new(); - - let multiplier = - (0..last_row.state.tag_len).fold(Value::known(F::one()), |acc, _| acc * randomness); - let value_rlc = last_row.encoded_data.value_rlc * multiplier + last_row.state.tag_rlc; - let mut last_symbol: i32 = 0; + .enumerate() + .map(|(bit_boundary_idx, (bit_idx, value_read, value_decoded))| { + // Calculate byte and bit positions. Increment allocators. + from_pos = if next_symbol == -1 { (1, -1) } else { to_pos }; - let bitstream_rows = bit_boundaries - .iter() - .enumerate() - .map(|(bit_boundary_idx, (bit_idx, value_read, value_decoded))| { - // Calculate byte and bit positions. Increment allocators. - from_pos = if next_symbol == -1 { (1, -1) } else { to_pos }; + from_pos.1 += 1; + if from_pos.1 == 8 { + from_pos = (from_pos.0 + 1, 0); + } - from_pos.1 += 1; - if from_pos.1 == 8 { - from_pos = (from_pos.0 + 1, 0); - } + from_pos.1 = (from_pos.1 as u64).rem_euclid(8) as i64; - from_pos.1 = (from_pos.1 as u64).rem_euclid(8) as i64; + while from_pos.0 > last_byte_idx { + current_tag_value_acc = tag_value_iter.next().unwrap(); + current_tag_rlc_acc = tag_rlc_iter.next().unwrap(); + last_byte_idx = from_pos.0; + } - while from_pos.0 > last_byte_idx { - current_tag_value_acc = tag_value_iter.next().unwrap(); - current_tag_rlc_acc = tag_rlc_iter.next().unwrap(); - last_byte_idx = from_pos.0; - } + let to_byte_idx = (bit_idx - 1) / 8; + let mut to_bit_idx = bit_idx - to_byte_idx * (N_BITS_PER_BYTE as u32) - 1; - let to_byte_idx = (bit_idx - 1) / 8; - let mut to_bit_idx = bit_idx - to_byte_idx * (N_BITS_PER_BYTE as u32) - 1; + if from_pos.0 < (to_byte_idx + 1) as i64 { + to_bit_idx += 8; + } - if from_pos.0 < (to_byte_idx + 1) as i64 { - to_bit_idx += 8; - } + to_pos = ((to_byte_idx + 1) as i64, to_bit_idx as i64); - to_pos = ((to_byte_idx + 1) as i64, to_bit_idx as i64); - - // Decide Fse decoding results - if bit_boundary_idx < 1 { - // Accuracy log bits - next_symbol += 1; - assert_eq!(value_read, value_decoded, "no varbit packing for AL bits"); - ( - 0, - n_emitted, - from_pos.0 as usize, - from_pos.1 as usize, - to_pos.0 as usize, - to_pos.1 as usize, - *value_read, - *value_decoded, - current_tag_value_acc, - current_tag_rlc_acc, - n_acc, - // FseDecoder-specific witness values - kind as u64, - table.table_size, - false, - false, - ) - } else if !is_repeating_bit_boundary.contains_key(&bit_boundary_idx) { - if n_acc >= (table.table_size as usize) { - // Trailing bits - assert_eq!( - value_read, value_decoded, - "no varbit packing for trailing bits" - ); + // Decide Fse decoding results + if bit_boundary_idx < 1 { + // Accuracy log bits + next_symbol += 1; + assert_eq!(value_read, value_decoded, "no varbit packing for AL bits"); ( - last_symbol as u64, + 0, n_emitted, from_pos.0 as usize, from_pos.1 as usize, @@ -904,46 +882,98 @@ fn process_sequences( kind as u64, table.table_size, false, - true, + false, ) - } else { - // Regular decoding state - assert!(next_symbol >= 0); - decoded = next_symbol as u64; - n_emitted += 1; - last_symbol = next_symbol; - next_symbol += 1; - match *value_decoded { - 0 => { - // When a symbol has a value==0, it signifies a case of prob=-1 (or - // probability "less than 1"), where - // such symbols are allocated states from the - // end and retreating. Exactly 1 state is allocated in this case. - n_acc += 1; - } - 1 => { - let mut repeating_bit_boundary_idx = bit_boundary_idx + 1; - loop { - let repeating_bits = - bit_boundaries[repeating_bit_boundary_idx].1; - next_symbol += repeating_bits as i32; // skip symbols - is_repeating_bit_boundary - .insert(repeating_bit_boundary_idx, true); - - if repeating_bits < 3 { - break; - } else { - repeating_bit_boundary_idx += 1; + } else if !is_repeating_bit_boundary.contains_key(&bit_boundary_idx) { + if n_acc >= (table.table_size as usize) { + // Trailing bits + assert_eq!( + value_read, value_decoded, + "no varbit packing for trailing bits" + ); + ( + last_symbol as u64, + n_emitted, + from_pos.0 as usize, + from_pos.1 as usize, + to_pos.0 as usize, + to_pos.1 as usize, + *value_read, + *value_decoded, + current_tag_value_acc, + current_tag_rlc_acc, + n_acc, + // FseDecoder-specific witness values + kind as u64, + table.table_size, + false, + true, + ) + } else { + // Regular decoding state + assert!(next_symbol >= 0); + decoded = next_symbol as u64; + n_emitted += 1; + last_symbol = next_symbol; + next_symbol += 1; + match *value_decoded { + 0 => { + // When a symbol has a value==0, it signifies a case of prob=-1 (or + // probability "less than 1"), where + // such symbols are allocated states from the + // end and retreating. Exactly 1 state is allocated in this case. + n_acc += 1; + } + 1 => { + let mut repeating_bit_boundary_idx = bit_boundary_idx + 1; + loop { + let repeating_bits = + bit_boundaries[repeating_bit_boundary_idx].1; + next_symbol += repeating_bits as i32; // skip symbols + is_repeating_bit_boundary + .insert(repeating_bit_boundary_idx, true); + + if repeating_bits < 3 { + break; + } else { + repeating_bit_boundary_idx += 1; + } } } + _ => { + n_acc += (*value_decoded - 1) as usize; + } } - _ => { - n_acc += (*value_decoded - 1) as usize; - } - } + ( + decoded, + n_emitted, + from_pos.0 as usize, + from_pos.1 as usize, + to_pos.0 as usize, + to_pos.1 as usize, + *value_read, + *value_decoded, + current_tag_value_acc, + current_tag_rlc_acc, + n_acc, + // FseDecoder-specific witness values + kind as u64, + table.table_size, + false, // repeating bits + false, // trailing bits + ) + } + } else { + // Repeating bits + let symbol = last_symbol as u64 + value_decoded; + last_symbol = symbol as i32; + assert_eq!( + value_read, value_decoded, + "no varbit packing for repeat-bits flag" + ); ( - decoded, + symbol, n_emitted, from_pos.0 as usize, from_pos.1 as usize, @@ -957,111 +987,85 @@ fn process_sequences( // FseDecoder-specific witness values kind as u64, table.table_size, - false, // repeating bits - false, // trailing bits + true, + false, ) } - } else { - // Repeating bits - let symbol = last_symbol as u64 + value_decoded; - last_symbol = symbol as i32; - assert_eq!( - value_read, value_decoded, - "no varbit packing for repeat-bits flag" - ); - ( - symbol, - n_emitted, - from_pos.0 as usize, - from_pos.1 as usize, - to_pos.0 as usize, - to_pos.1 as usize, - *value_read, - *value_decoded, - current_tag_value_acc, - current_tag_rlc_acc, - n_acc, - // FseDecoder-specific witness values - kind as u64, - table.table_size, - true, - false, - ) - } - }) - .collect::, - Value, - usize, - u64, - u64, - bool, - bool, - )>>(); - - // Transform bitstream rows into witness rows - for (j, row) in bitstream_rows.iter().enumerate() { - witness_rows.push(ZstdWitnessRow { - state: ZstdState { - tag: ZstdTag::ZstdBlockSequenceFseCode, - tag_next: if idx > 1 { - ZstdTag::ZstdBlockSequenceData - } else { - ZstdTag::ZstdBlockSequenceFseCode + }) + .collect::, + Value, + usize, + u64, + u64, + bool, + bool, + )>>(); + + // Transform bitstream rows into witness rows + for (j, row) in bitstream_rows.iter().enumerate() { + witness_rows.push(ZstdWitnessRow { + state: ZstdState { + tag: ZstdTag::ZstdBlockSequenceFseCode, + tag_next: if idx > 1 { + ZstdTag::ZstdBlockSequenceData + } else { + ZstdTag::ZstdBlockSequenceFseCode + }, + block_idx, + max_tag_len: lookup_max_tag_len(ZstdTag::ZstdBlockSequenceFseCode), + tag_len, + tag_idx: row.2 as u64, + tag_value, + tag_value_acc: row.8, + is_tag_change: j == 0, + tag_rlc, + tag_rlc_acc: row.9, }, - block_idx, - max_tag_len: lookup_max_tag_len(ZstdTag::ZstdBlockSequenceFseCode), - tag_len, - tag_idx: row.2 as u64, - tag_value, - tag_value_acc: row.8, - is_tag_change: j == 0, - tag_rlc, - tag_rlc_acc: row.9, - }, - encoded_data: EncodedData { - byte_idx: (start_offset + row.2) as u64, - encoded_len, - value_byte: src[start_offset + row.2 - 1], - value_rlc, - reverse: false, - ..Default::default() - }, - bitstream_read_data: BitstreamReadRow { - bit_start_idx: row.3, - bit_end_idx: row.5, - bit_value: row.6, - is_zero_bit_read: false, - ..Default::default() - }, - decoded_data: DecodedData { - decoded_len: last_row.decoded_data.decoded_len, - decoded_len_acc: last_row.decoded_data.decoded_len_acc, - total_decoded_len: last_row.decoded_data.total_decoded_len, - decoded_byte: 0u8, - decoded_value_rlc: last_row.decoded_data.decoded_value_rlc, - }, - fse_data: FseDecodingRow { - table_kind: row.11, - table_size: row.12, - symbol: row.0, - num_emitted: row.1 as u64, - value_decoded: row.7, - probability_acc: row.10 as u64, - is_repeat_bits_loop: row.13, - is_trailing_bits: row.14, - }, - }); + encoded_data: EncodedData { + byte_idx: (start_offset + row.2) as u64, + encoded_len, + value_byte: src[start_offset + row.2 - 1], + value_rlc, + reverse: false, + ..Default::default() + }, + bitstream_read_data: BitstreamReadRow { + bit_start_idx: row.3, + bit_end_idx: row.5, + bit_value: row.6, + is_zero_bit_read: false, + ..Default::default() + }, + decoded_data: DecodedData { + decoded_len: last_row.decoded_data.decoded_len, + decoded_len_acc: last_row.decoded_data.decoded_len_acc, + total_decoded_len: last_row.decoded_data.total_decoded_len, + decoded_byte: 0u8, + decoded_value_rlc: last_row.decoded_data.decoded_value_rlc, + }, + fse_data: FseDecodingRow { + table_kind: row.11, + table_size: row.12, + symbol: row.0, + num_emitted: row.1 as u64, + value_decoded: row.7, + probability_acc: row.10 as u64, + is_repeat_bits_loop: row.13, + is_trailing_bits: row.14, + }, + }); + } + last_row = witness_rows.last().cloned().unwrap(); } - last_row = witness_rows.last().cloned().unwrap(); } // Reconstruct LLTV, CMOTV, and MLTV which specifies bit actions for a specific state @@ -1696,9 +1700,19 @@ fn process_sequences( seq_exec_info.push(SequenceExec( inst.instruction_idx as usize, SequenceExecInfo::BackRef(r.clone()), - )); - let matched_bytes = Vec::from(&recovered_inputs[r]); - recovered_inputs.extend_from_slice(matched_bytes.as_slice()); + )); + if inst.match_length <= inst.actual_offset { + let matched_bytes = Vec::from(&recovered_inputs[r]); + recovered_inputs.extend_from_slice(matched_bytes.as_slice()); + } else { + // TODO(FV): Add support for repeated byte slice + let l = inst.match_length as usize; + let r = match_pos..recovered_inputs.len(); + let matched_bytes = Vec::from(&recovered_inputs[r]); + let total_matched_bytes: Vec = + matched_bytes.iter().cycle().take(l).copied().collect(); + recovered_inputs.extend_from_slice(total_matched_bytes.as_slice()); + } } current_literal_pos = new_literal_pos; } @@ -1719,6 +1733,14 @@ fn process_sequences( ); } + // witgen_debug + let stdout = io::stdout(); + let mut handle = stdout.lock(); + + // witgen_debug + // write!(handle, "=> decoded: {:?}", recovered_inputs).unwrap(); + // writeln!(handle).unwrap(); + ( end_offset, witness_rows, @@ -2004,6 +2026,7 @@ mod tests { // use bitstream_io::write; // use halo2_proofs::halo2curves::bn256::Fr; // use serde_json::from_str; + use std::fs; // witgen_debug use std::io::Write; @@ -2085,52 +2108,87 @@ mod tests { // } #[test] - fn batch_compression_zstd() -> Result<(), std::io::Error> { + fn test_zstd_witness_processing_batch_data() -> Result<(), std::io::Error> { + use super::*; use halo2_proofs::halo2curves::bn256::Fr; + // witgen_debug - // use hex::FromHex; + let stdout = io::stdout(); + let mut handle = stdout.lock(); + + let mut batch_files = fs::read_dir("./data")? + .map(|entry| entry.map(|e| e.path())) + .collect::, std::io::Error>>()?; + batch_files.sort(); + let batches = batch_files + .iter() + .map(fs::read_to_string) + .filter_map(|data| data.ok()) + .map(|data| hex::decode(data.trim_end()).expect("Failed to decode hex data")) + .collect::>>(); - use super::*; - // let raw = >::from_hex(r#"0100000000000231fb0000000064e588f7000000000000000000000000000000000000000000000000000000000000000000000000007a12000006000000000219f90216038510229a150083039bd49417afd0263d6909ba1f9a8eac697f76532365fb95880234e1a857498000b901a45ae401dc0000000000000000000000000000000000000000000000000000000064e58a1400000000000000000000000000000000000000000000000000000000000000400000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000002000000000000000000000000000000000000000000000000000000000000000e404e45aaf0000000000000000000000005300000000000000000000000000000000000004000000000000000000000000d9692f1748afee00face2da35242417dd05a86150000000000000000000000000000000000000000000000000000000000000bb8000000000000000000000000c3100d07a5997a7f9f9cdde967d396f9a2aed6a60000000000000000000000000000000000000000000000000234e1a8574980000000000000000000000000000000000000000000000000049032ac61d5dce9e600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000083104ec1a053077484b4d7a88434c2d03c30c3c55bd3a82b259f339f1c0e1e1244189009c5a01c915dd14aed1b824bf610a95560e380ea3213f0bf345df3bddff1acaf7da84d000002d8f902d5068510229a1500830992fd94bbad0e891922a8a4a7e9c39d4cc0559117016fec87082b6be7f5b757b90264ac9650d800000000000000000000000000000000000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000002000000000000000000000000000000000000000000000000000000000000004000000000000000000000000000000000000000000000000000000000000001e00000000000000000000000000000000000000000000000000000000000000164883164560000000000000000000000005300000000000000000000000000000000000004000000000000000000000000ffd2ece82f7959ae184d10fe17865d27b4f0fb9400000000000000000000000000000000000000000000000000000000000001f4fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffce9f6fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffcea0a00000000000000000000000000000000000000000000000000082b6be7f5b75700000000000000000000000000000000000000000000000000000000004c4b40000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000006aea61ea08dd6e4834cd43a257ed52d9a31dd3b90000000000000000000000000000000000000000000000000000000064e58a1400000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000412210e8a0000000000000000000000000000000000000000000000000000000083104ec2a0bc501c59bceb707d958423bad14c0d0daec84ad067f7e42209ad2cb8d904a55da00a04de4c79ed24b7a82d523b5de63c7ff68a3b7bb519546b3fe4ba8bc90a396600000137f9013480850f7eb06980830317329446ce46951d12710d85bc4fe10bb29c6ea501207787019945ca262000b8c4b2dd898a000000000000000000000000000000000000000000000000000000000000002000000000000000000000000065e4e8d7bd50191abfee6e5bcdc4d16ddfe9975e000000000000000000000000000000000000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000800000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000083104ec2a037979a5225dd156f51abf9a8601e9156e1b1308c0474d69af98c55627886232ea048ac197295187e7ad48aa34cc37c2625434fa812449337732d8522014f4eacfc00000137f9013480850f7eb06980830317329446ce46951d12710d85bc4fe10bb29c6ea501207787019945ca262000b8c4b2dd898a000000000000000000000000000000000000000000000000000000000000002000000000000000000000000065e4e8d7bd50191abfee6e5bcdc4d16ddfe9975e000000000000000000000000000000000000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000800000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000083104ec1a087269dbb9e987e5d58ecd3bcb724cbc4e6c843eb9095de16a25263aebfe06f5aa07f3ac49b6847ba51c5319174e51e088117742240f8555c5c1d77108cf0df90d700000137f9013480850f7eb06980830317329446ce46951d12710d85bc4fe10bb29c6ea501207787019945ca262000b8c4b2dd898a000000000000000000000000000000000000000000000000000000000000002000000000000000000000000065e4e8d7bd50191abfee6e5bcdc4d16ddfe9975e000000000000000000000000000000000000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000800000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000083104ec1a04abdb8572dcabf1996825de6f753124eed41c1292fcfdc4d9a90cb4f8a0f8ff1a06ef25857e2cc9d0fa8b6ecc03b4ba6ef6f3ec1515d570fcc9102e2aa653f347a00000137f9013480850f7eb06980830317329446ce46951d12710d85bc4fe10bb29c6ea501207787019945ca262000b8c4b2dd898a000000000000000000000000000000000000000000000000000000000000002000000000000000000000000065e4e8d7bd50191abfee6e5bcdc4d16ddfe9975e000000000000000000000000000000000000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000800000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000083104ec2a0882202163cbb9a299709b443b663fbab459440deabfbe183e999c98c00ea80c2a010ecb1e5196f0b1ee3d067d9a158b47b1376706e42ce2e769cf8e986935781dd"#) - // .expect("FromHex failure"); + for (batch_idx, raw_input_bytes) in batches.into_iter().enumerate() { + // witgen_debug + // if batch_idx == 127 { + // continue; + // } + + let compressed = { + // compression level = 0 defaults to using level=3, which is zstd's default. + let mut encoder = zstd::stream::write::Encoder::new(Vec::new(), 0)?; + + // disable compression of literals, i.e. literals will be raw bytes. + encoder.set_parameter(zstd::stream::raw::CParameter::LiteralCompressionMode( + zstd::zstd_safe::ParamSwitch::Disable, + ))?; + // set target block size to fit within a single block. + encoder + .set_parameter(zstd::stream::raw::CParameter::TargetCBlockSize(124 * 1024))?; + // do not include the checksum at the end of the encoded data. + encoder.include_checksum(false)?; + // do not include magic bytes at the start of the frame since we will have a single + // frame. + encoder.include_magicbytes(false)?; + // set source length, which will be reflected in the frame header. + encoder.set_pledged_src_size(Some(raw_input_bytes.len() as u64))?; + // include the content size to know at decode time the expected size of decoded + // data. + encoder.include_contentsize(true)?; + + encoder.write_all(&raw_input_bytes)?; + encoder.finish()? + }; - // witgen_debug - let raw: Vec = String::from("Romeo and Juliet@Excerpt from Act 2, Scene 2@@JULIET@O Romeo, Romeo! wherefore art thou Romeo?@Deny thy father and refuse thy name;@Or, if thou wilt not, be but sworn my love,@And I'll no longer be a Capulet.@@ROMEO@[Aside] Shall I hear more, or shall I speak at this?@@JULIET@'Tis but thy name that is my enemy;@Thou art thyself, though not a Montague.@What's Montague? it is nor hand, nor foot,@Nor arm, nor face, nor any other part@Belonging to a man. O, be some other name!@What's in a name? that which we call a rose@By any other name would smell as sweet;@So Romeo would, were he not Romeo call'd,@Retain that dear perfection which he owes@Without that title. Romeo, doff thy name,@And for that name which is no part of thee@Take all myself.@@ROMEO@I take thee at thy word:@Call me but love, and I'll be new baptized;@Henceforth I never will be Romeo.@@JULIET@What man art thou that thus bescreen'd in night@So stumblest on my counsel?").as_bytes().to_vec(); - - let compressed = { - // compression level = 0 defaults to using level=3, which is zstd's default. - let mut encoder = zstd::stream::write::Encoder::new(Vec::new(), 0)?; - - // disable compression of literals, i.e. literals will be raw bytes. - encoder.set_parameter(zstd::stream::raw::CParameter::LiteralCompressionMode( - zstd::zstd_safe::ParamSwitch::Disable, - ))?; - // set target block size to fit within a single block. - encoder.set_parameter(zstd::stream::raw::CParameter::TargetCBlockSize(124 * 1024))?; - // do not include the checksum at the end of the encoded data. - encoder.include_checksum(false)?; - // do not include magic bytes at the start of the frame since we will have a single - // frame. - encoder.include_magicbytes(false)?; - // set source length, which will be reflected in the frame header. - encoder.set_pledged_src_size(Some(raw.len() as u64))?; - // include the content size to know at decode time the expected size of decoded data. - encoder.include_contentsize(true)?; - - encoder.write_all(&raw)?; - encoder.finish()? - }; + // witgen_debug + // write!(handle, "=> compressed: {:?}", compressed).unwrap(); + // writeln!(handle).unwrap(); + + let ( + _witness_rows, + _decoded_literals, + _aux_data, + _fse_aux_tables, + _block_info_arr, + _sequence_info_arr, + _, + sequence_exec_result, + ) = process::(&compressed, Value::known(Fr::from(123456789))); + + let decoded_bytes = sequence_exec_result + .into_iter() + .flat_map(|r| r.recovered_bytes) + .collect::>(); - let ( - _witness_rows, - _decoded_literals, - _aux_data, - _fse_aux_tables, - _block_info_arr, - _sequence_info_arr, - _, - _, - ) = process::(&compressed, Value::known(Fr::from(123456789))); + // witgen_debug + write!(handle, "=> batch_idx: {:?}", batch_idx).unwrap(); + writeln!(handle).unwrap(); + + // witgen_debug + // write!(handle, "=> decoded: {:?}", decoded_bytes).unwrap(); + // writeln!(handle).unwrap(); + + assert!(raw_input_bytes == decoded_bytes); + } Ok(()) } diff --git a/aggregator/src/aggregation/decoder/witgen/types.rs b/aggregator/src/aggregation/decoder/witgen/types.rs index 8a02329400..f1796195f9 100644 --- a/aggregator/src/aggregation/decoder/witgen/types.rs +++ b/aggregator/src/aggregation/decoder/witgen/types.rs @@ -651,49 +651,48 @@ impl FseAuxiliaryTableData { let mut reader = BitReader::endian(Cursor::new(&data), LittleEndian); let mut bit_boundaries: Vec<(u32, u64, u64)> = vec![]; - // number of bits read by the bit-reader from the bistream. - let mut offset = 0; - - let accuracy_log = { - offset += 4; - reader.read::(offset)? + 5 - }; - bit_boundaries.push((offset, accuracy_log as u64 - 5, accuracy_log as u64 - 5)); - let table_size = 1 << accuracy_log; - //////////////////////////////////////////////////////////////////////////////////////// //////////////////////////// Parse Normalised Probabilities //////////////////////////// //////////////////////////////////////////////////////////////////////////////////////// let mut normalised_probs = BTreeMap::new(); - let mut R = table_size; - let mut symbol = 0; + let mut offset = 0; - if is_predefined { - let predefined_frequencies = match table_kind { - FseTableKind::LLT => { + let (accuracy_log, table_size) = if is_predefined { + let (predefined_frequencies, accuracy_log) = match table_kind { + FseTableKind::LLT => ( vec![ 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, -1, -1, -1, -1, - ] - } - FseTableKind::MOT => { + ], + 6, + ), + FseTableKind::MOT => ( vec![ 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, - ] - } - FseTableKind::MLT => { + ], + 5, + ), + FseTableKind::MLT => ( vec![ 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, - ] - } + ], + 6, + ), }; for (symbol, freq) in predefined_frequencies.into_iter().enumerate() { normalised_probs.insert(symbol as u64, freq); } + (accuracy_log, 1 << accuracy_log) } else { + offset += 4; + let accuracy_log = reader.read::(offset)? + 5; + bit_boundaries.push((offset, accuracy_log as u64 - 5, accuracy_log as u64 - 5)); + let table_size = 1 << accuracy_log; + let mut R = table_size; + let mut symbol = 0; while R > 0 { // number of bits and value read from the variable bit-packed data. // And update the total number of bits read so far. @@ -755,7 +754,8 @@ impl FseAuxiliaryTableData { // remove N slots from a total of R. R -= N; } - } + (accuracy_log, table_size) + }; // ignore any bits left to be read until byte-aligned. let t = if is_predefined {