diff --git a/aggregator/src/aggregation/circuit.rs b/aggregator/src/aggregation/circuit.rs index 142b1a831e..c4d30bc61a 100644 --- a/aggregator/src/aggregation/circuit.rs +++ b/aggregator/src/aggregation/circuit.rs @@ -440,7 +440,7 @@ impl Circuit for AggregationCircuit { let batch_data = BatchData::from(&self.batch_hash); - let blob_data_exports = config.blob_data_config.assign( + let _blob_data_exports = config.blob_data_config.assign( &mut layouter, challenges, &config.rlc_config, diff --git a/aggregator/src/aggregation/decoder.rs b/aggregator/src/aggregation/decoder.rs index d958f18c32..b43d58ae3e 100644 --- a/aggregator/src/aggregation/decoder.rs +++ b/aggregator/src/aggregation/decoder.rs @@ -1,16 +1,19 @@ mod tables; -mod witgen; +mod seq_exec; +pub mod witgen; +use witgen::*; +use crate::aggregation::decoder::tables::FixedLookupTag; use gadgets::{ binary_number::{BinaryNumberChip, BinaryNumberConfig}, - comparator::{ComparatorChip, ComparatorConfig}, - is_equal::{IsEqualChip, IsEqualConfig}, - less_than::{LtChip, LtConfig}, + comparator::{ComparatorChip, ComparatorConfig, ComparatorInstruction}, + is_equal::{IsEqualChip, IsEqualConfig, IsEqualInstruction}, + less_than::{LtChip, LtConfig, LtInstruction}, util::{and, not, select, sum, Expr}, }; use halo2_proofs::{ arithmetic::Field, - circuit::{AssignedCell, Layouter}, + circuit::{AssignedCell, Layouter, Value}, halo2curves::bn256::Fr, plonk::{ Advice, Column, ConstraintSystem, Error, Expression, Fixed, SecondPhase, VirtualCells, @@ -22,20 +25,24 @@ use zkevm_circuits::{ evm_circuit::{BaseConstraintBuilder, ConstrainBuilderCommon}, table::{BitwiseOpTable, LookupTable, Pow2Table, PowOfRandTable, RangeTable, U8Table}, util::Challenges, + witness, }; -use crate::aggregation::decoder::tables::FixedLookupTag; - use self::{ - tables::{BitstringTable, FixedTable, FseTable, LiteralsHeaderTable}, + tables::{BitstringTable, FixedTable, FseTable, LiteralsHeaderTable, SeqInstTable}, + util::value_bits_le, witgen::{ FseTableKind, ZstdTag, N_BITS_PER_BYTE, N_BITS_REPEAT_FLAG, N_BITS_ZSTD_TAG, N_BLOCK_HEADER_BYTES, }, }; +use seq_exec::{LiteralTable, SequenceConfig, SeqExecConfig}; + #[derive(Clone, Debug)] pub struct DecoderConfig { + /// Fixed column to mark all the usable rows. + q_enable: Column, /// Fixed column to mark the first row in the layout. q_first: Column, /// The byte index in the encoded data. At the first byte, byte_idx = 1. @@ -72,22 +79,23 @@ pub struct DecoderConfig { pow2_table: Pow2Table<20>, /// Helper table for decoding the regenerated size from LiteralsHeader. literals_header_table: LiteralsHeaderTable, - /// Helper table for decoding bitstreams. + // /// Helper table for decoding bitstreams. bitstring_table: BitstringTable, /// Helper table for decoding FSE tables. fse_table: FseTable, - /// Helper table for sequences as instructions. - /// TODO(enable): sequence_instruction_table: SequenceInstructionTable, - /// Helper table in the "output" region for accumulating the result of executing sequences. - /// TODO(enable): sequence_execution_table: SequenceExecutionTable, + + // witgen_debug + // /// Helper table for sequences as instructions. + // /// TODO(enable): sequence_instruction_table: SequenceInstructionTable, + // /// Helper table in the "output" region for accumulating the result of executing sequences. + // /// TODO(enable): sequence_execution_table: SequenceExecutionTable, + /// Fixed lookups table. fixed_table: FixedTable, } #[derive(Clone, Debug)] struct TagConfig { - /// Marks all enabled rows. - q_enable: Column, /// The ZstdTag being processed at the current row. tag: Column, /// Tag decomposed as bits. This is useful in constructing conditional checks against the tag @@ -133,14 +141,12 @@ struct TagConfig { } impl TagConfig { - fn configure(meta: &mut ConstraintSystem) -> Self { - let q_enable = meta.fixed_column(); + fn configure(meta: &mut ConstraintSystem, q_enable: Column) -> Self { let tag = meta.advice_column(); let tag_idx = meta.advice_column(); let tag_len = meta.advice_column(); Self { - q_enable, tag, tag_bits: BinaryNumberChip::configure(meta, q_enable, Some(tag.into())), tag_next: meta.advice_column(), @@ -197,7 +203,7 @@ struct BlockConfig { } impl BlockConfig { - fn configure(meta: &mut ConstraintSystem, is_padding: Column) -> Self { + fn configure(meta: &mut ConstraintSystem, q_enable: Column) -> Self { let num_sequences = meta.advice_column(); Self { block_len: meta.advice_column(), @@ -207,7 +213,7 @@ impl BlockConfig { num_sequences, is_empty_sequences: IsEqualChip::configure( meta, - |meta| not::expr(meta.query_advice(is_padding, Rotation::cur())), + |meta| meta.query_fixed(q_enable, Rotation::cur()), |meta| meta.query_advice(num_sequences, Rotation::cur()), |_| 0.expr(), ), @@ -261,7 +267,6 @@ impl BlockConfig { ), ) } - fn is_empty_sequences( &self, meta: &mut VirtualCells, @@ -304,20 +309,20 @@ impl SequencesHeaderDecoder { fn configure( meta: &mut ConstraintSystem, byte: Column, - is_padding: Column, + q_enable: Column, u8_table: U8Table, ) -> Self { Self { byte0_lt_0x80: LtChip::configure( meta, - |meta| not::expr(meta.query_advice(is_padding, Rotation::cur())), + |meta| meta.query_fixed(q_enable, Rotation::cur()), |meta| meta.query_advice(byte, Rotation::cur()), |_| 0x80.expr(), u8_table.into(), ), byte0_lt_0xff: LtChip::configure( meta, - |meta| not::expr(meta.query_advice(is_padding, Rotation::cur())), + |meta| meta.query_fixed(q_enable, Rotation::cur()), |meta| meta.query_advice(byte, Rotation::cur()), |_| 0xff.expr(), u8_table.into(), @@ -359,58 +364,58 @@ impl SequencesHeaderDecoder { let comp_mode_bit0_ll = select::expr( byte0_lt_0x80.expr(), - meta.query_advice(bits[0], Rotation(1)), + meta.query_advice(bits[6], Rotation(1)), select::expr( byte0_lt_0xff.expr(), - meta.query_advice(bits[0], Rotation(2)), - meta.query_advice(bits[0], Rotation(3)), + meta.query_advice(bits[6], Rotation(2)), + meta.query_advice(bits[6], Rotation(3)), ), ); let comp_mode_bit1_ll = select::expr( byte0_lt_0x80.expr(), - meta.query_advice(bits[1], Rotation(1)), + meta.query_advice(bits[7], Rotation(1)), select::expr( byte0_lt_0xff.expr(), - meta.query_advice(bits[1], Rotation(2)), - meta.query_advice(bits[1], Rotation(3)), + meta.query_advice(bits[7], Rotation(2)), + meta.query_advice(bits[7], Rotation(3)), ), ); let comp_mode_bit0_om = select::expr( byte0_lt_0x80.expr(), - meta.query_advice(bits[2], Rotation(1)), + meta.query_advice(bits[4], Rotation(1)), select::expr( byte0_lt_0xff.expr(), - meta.query_advice(bits[2], Rotation(2)), - meta.query_advice(bits[2], Rotation(3)), + meta.query_advice(bits[4], Rotation(2)), + meta.query_advice(bits[4], Rotation(3)), ), ); let comp_mode_bit1_om = select::expr( byte0_lt_0x80.expr(), - meta.query_advice(bits[3], Rotation(1)), + meta.query_advice(bits[5], Rotation(1)), select::expr( byte0_lt_0xff.expr(), - meta.query_advice(bits[3], Rotation(2)), - meta.query_advice(bits[3], Rotation(3)), + meta.query_advice(bits[5], Rotation(2)), + meta.query_advice(bits[5], Rotation(3)), ), ); let comp_mode_bit0_ml = select::expr( byte0_lt_0x80.expr(), - meta.query_advice(bits[4], Rotation(1)), + meta.query_advice(bits[2], Rotation(1)), select::expr( byte0_lt_0xff.expr(), - meta.query_advice(bits[4], Rotation(2)), - meta.query_advice(bits[4], Rotation(3)), + meta.query_advice(bits[2], Rotation(2)), + meta.query_advice(bits[2], Rotation(3)), ), ); let comp_mode_bit1_ml = select::expr( byte0_lt_0x80.expr(), - meta.query_advice(bits[5], Rotation(1)), + meta.query_advice(bits[3], Rotation(1)), select::expr( byte0_lt_0xff.expr(), - meta.query_advice(bits[5], Rotation(2)), - meta.query_advice(bits[5], Rotation(3)), + meta.query_advice(bits[3], Rotation(2)), + meta.query_advice(bits[3], Rotation(3)), ), ); @@ -441,14 +446,14 @@ pub struct BitstreamDecoder { bit_index_end_cmp_15: ComparatorConfig, /// Helper gadget to know if the bitstring was spanned over 3 bytes. bit_index_end_cmp_23: ComparatorConfig, - /// The value of the binary bitstring. - bitstring_value: Column, /// When we have encountered a symbol with value=1, i.e. prob=0, it is followed by 2-bits /// repeat bits flag that tells us the number of symbols following the current one that also /// have a probability of prob=0. If the repeat bits flag itself is [1, 1], i.e. /// bitstring_value==3, then it is followed by another 2-bits repeat bits flag and so on. We /// utilise this equality config to identify these cases. bitstring_value_eq_3: IsEqualConfig, + /// The value of the binary bitstring. + bitstring_value: Column, /// Boolean that is set for a special case: /// - The bitstring that we have read in the current row is byte-aligned up to the next or the /// next-to-next byte. In this case, the next or the next-to-next following row(s) should have @@ -466,7 +471,8 @@ pub struct BitstreamDecoder { impl BitstreamDecoder { fn configure( meta: &mut ConstraintSystem, - is_padding: Column, + q_enable: Column, + q_first: Column, u8_table: U8Table, ) -> Self { let bit_index_start = meta.advice_column(); @@ -477,37 +483,42 @@ impl BitstreamDecoder { bit_index_end, bit_index_end_cmp_7: ComparatorChip::configure( meta, - |meta| not::expr(meta.query_advice(is_padding, Rotation::cur())), + |meta| meta.query_fixed(q_enable, Rotation::cur()), |meta| meta.query_advice(bit_index_end, Rotation::cur()), |_| 7.expr(), u8_table.into(), ), bit_index_end_cmp_15: ComparatorChip::configure( meta, - |meta| not::expr(meta.query_advice(is_padding, Rotation::cur())), + |meta| meta.query_fixed(q_enable, Rotation::cur()), |meta| meta.query_advice(bit_index_end, Rotation::cur()), |_| 15.expr(), u8_table.into(), ), bit_index_end_cmp_23: ComparatorChip::configure( meta, - |meta| not::expr(meta.query_advice(is_padding, Rotation::cur())), + |meta| meta.query_fixed(q_enable, Rotation::cur()), |meta| meta.query_advice(bit_index_end, Rotation::cur()), |_| 23.expr(), u8_table.into(), ), - bitstring_value, bitstring_value_eq_3: IsEqualChip::configure( meta, - |meta| not::expr(meta.query_advice(is_padding, Rotation::cur())), + |meta| meta.query_fixed(q_enable, Rotation::cur()), |meta| meta.query_advice(bitstring_value, Rotation::cur()), |_| 3.expr(), ), + bitstring_value, is_nil: meta.advice_column(), is_nb0: meta.advice_column(), start_unchanged: IsEqualChip::configure( meta, - |meta| not::expr(meta.query_advice(is_padding, Rotation::cur())), + |meta| { + and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), + not::expr(meta.query_fixed(q_first, Rotation::cur())), + ]) + }, |meta| meta.query_advice(bit_index_start, Rotation::prev()), |meta| meta.query_advice(bit_index_start, Rotation::cur()), ), @@ -657,7 +668,7 @@ impl BitstreamDecoder { #[derive(Clone, Debug)] pub struct FseDecoder { /// The FSE table that is being decoded in this tag. Possible values are: - /// - LLT = 0, MOT = 1, MLT = 2 + /// - LLT = 1, MOT = 2, MLT = 3 table_kind: Column, /// The number of states in the FSE table. table_size == 1 << AL, where AL is the accuracy log /// of the FSE table. @@ -685,7 +696,7 @@ pub struct FseDecoder { } impl FseDecoder { - fn configure(meta: &mut ConstraintSystem, is_padding: Column) -> Self { + fn configure(meta: &mut ConstraintSystem, q_enable: Column) -> Self { let value_decoded = meta.advice_column(); Self { table_kind: meta.advice_column(), @@ -697,13 +708,13 @@ impl FseDecoder { is_trailing_bits: meta.advice_column(), value_decoded_eq_0: IsEqualChip::configure( meta, - |meta| not::expr(meta.query_advice(is_padding, Rotation::cur())), + |meta| meta.query_fixed(q_enable, Rotation::cur()), |meta| meta.query_advice(value_decoded, Rotation::cur()), |_| 0.expr(), ), value_decoded_eq_1: IsEqualChip::configure( meta, - |meta| not::expr(meta.query_advice(is_padding, Rotation::cur())), + |meta| meta.query_fixed(q_enable, Rotation::cur()), |meta| meta.query_advice(value_decoded, Rotation::cur()), |_| 1.expr(), ), @@ -720,17 +731,17 @@ impl FseDecoder { * invert_of_2 } - fn is_mlt(&self, meta: &mut VirtualCells, rotation: Rotation) -> Expression { + fn is_mot(&self, meta: &mut VirtualCells, rotation: Rotation) -> Expression { let table_kind = meta.query_advice(self.table_kind, rotation); (table_kind.expr() - FseTableKind::LLT.expr()) - * (FseTableKind::MOT.expr() - table_kind.expr()) + * (FseTableKind::MLT.expr() - table_kind.expr()) } - fn is_mot(&self, meta: &mut VirtualCells, rotation: Rotation) -> Expression { + fn is_mlt(&self, meta: &mut VirtualCells, rotation: Rotation) -> Expression { let table_kind = meta.query_advice(self.table_kind, rotation); let invert_of_2 = Fr::from(2).invert().expect("infallible"); (table_kind.expr() - FseTableKind::LLT.expr()) - * (table_kind.expr() - FseTableKind::MLT.expr()) + * (table_kind.expr() - FseTableKind::MOT.expr()) * invert_of_2 } @@ -742,7 +753,7 @@ impl FseDecoder { ) -> Expression { let value_decoded = meta.query_advice(self.value_decoded, rotation); self.value_decoded_eq_0 - .expr_at(meta, rotation, value_decoded, 1.expr()) + .expr_at(meta, rotation, value_decoded, 0.expr()) } /// While reconstructing the FSE table, indicates whether a value=1 was found, i.e. prob=0. In @@ -946,11 +957,22 @@ impl DecoderConfig { // Fixed table let fixed_table = FixedTable::construct(meta); + // // TODO (enable later): + // let sequence_instruction_table = SeqInstTable::configure(meta); + + let (q_enable, q_first, byte_idx, byte, is_padding) = ( + meta.fixed_column(), + meta.fixed_column(), + meta.advice_column(), + meta.advice_column(), + meta.advice_column(), + ); // Helper tables - let literals_header_table = LiteralsHeaderTable::configure(meta, range8, range16); - let bitstring_table = BitstringTable::configure(meta, u8_table); + let literals_header_table = LiteralsHeaderTable::configure(meta, q_enable, range8, range16); + let bitstring_table = BitstringTable::configure(meta, q_enable, u8_table); let fse_table = FseTable::configure( meta, + q_enable, &fixed_table, u8_table, range8, @@ -960,37 +982,38 @@ impl DecoderConfig { // TODO(enable): let sequence_instruction_table = SequenceInstructionTable::configure(meta); // Peripheral configs - let (byte_idx, byte, is_padding) = ( - meta.advice_column(), - meta.advice_column(), - meta.advice_column(), - ); - let tag_config = TagConfig::configure(meta); - let block_config = BlockConfig::configure(meta, is_padding); + let tag_config = TagConfig::configure(meta, q_enable); + let block_config = BlockConfig::configure(meta, q_enable); let sequences_header_decoder = - SequencesHeaderDecoder::configure(meta, byte, is_padding, u8_table); - let bitstream_decoder = BitstreamDecoder::configure(meta, is_padding, u8_table); - let fse_decoder = FseDecoder::configure(meta, is_padding); + SequencesHeaderDecoder::configure(meta, byte, q_enable, u8_table); + let bitstream_decoder = BitstreamDecoder::configure(meta, q_enable, q_first, u8_table); + let fse_decoder = FseDecoder::configure(meta, q_enable); let sequences_data_decoder = SequencesDataDecoder::configure(meta); - // TODO(enable): - // let literals_table = [ - // tag_config.tag, - // block_config.block_idx, - // byte_idx, - // byte, - // is_padding, - // ]; - // let sequence_execution_table = SequenceExecutionTable::configure( + // // TODO enable later: + // let _sequence_execution_table = SeqExecConfig::configure( // meta, // challenges, - // &literals_table, + // &LiteralTable::construct([ + // tag_config.tag, + // block_config.block_idx, + // byte_idx, + // byte, + // tag_config.is_change, + // is_padding, + // ]), // &sequence_instruction_table, + // &SequenceConfig::construct([ + // block_config.is_block, + // block_config.block_idx, + // block_config.num_sequences, + // ]), // ); // Main config let config = Self { - q_first: meta.fixed_column(), + q_enable, + q_first, byte_idx, byte, bits: (0..N_BITS_PER_BYTE) @@ -1013,6 +1036,7 @@ impl DecoderConfig { literals_header_table, bitstring_table, fse_table, + // TODO(enable): sequence_instruction_table, // TODO(enable): sequence_execution_table, fixed_table, @@ -1040,6 +1064,17 @@ impl DecoderConfig { }; } + macro_rules! is_next_tag { + ($var:ident, $tag_variant:ident) => { + let $var = |meta: &mut VirtualCells| { + config + .tag_config + .tag_bits + .value_equals(ZstdTag::$tag_variant, Rotation::next())(meta) + }; + }; + } + is_tag!(is_null, Null); is_tag!(is_frame_header_descriptor, FrameHeaderDescriptor); is_tag!(is_frame_content_size, FrameContentSize); @@ -1047,14 +1082,14 @@ impl DecoderConfig { is_tag!(is_zb_literals_header, ZstdBlockLiteralsHeader); is_tag!(is_zb_raw_block, ZstdBlockLiteralsRawBytes); is_tag!(is_zb_sequence_header, ZstdBlockSequenceHeader); - is_tag!(is_zb_sequence_fse, ZstdBlockFseCode); - // TODO: update to ZstdBlockSequenceData once witgen code is merged. - is_tag!(is_zb_sequence_data, ZstdBlockHuffmanCode); + is_tag!(is_zb_sequence_fse, ZstdBlockSequenceFseCode); + is_tag!(is_zb_sequence_data, ZstdBlockSequenceData); is_prev_tag!(is_prev_frame_content_size, FrameContentSize); is_prev_tag!(is_prev_sequence_header, ZstdBlockSequenceHeader); - // TODO: update to ZstdBlockSequenceData once witgen code is merged. - is_prev_tag!(is_prev_sequence_data, ZstdBlockHuffmanCode); + is_prev_tag!(is_prev_sequence_data, ZstdBlockSequenceData); + + is_next_tag!(is_next_null, Null); meta.lookup("DecoderConfig: 0 <= encoded byte < 256", |meta| { vec![( @@ -1105,7 +1140,10 @@ impl DecoderConfig { }); meta.create_gate("DecoderConfig: all rows except the first row", |meta| { - let condition = not::expr(meta.query_fixed(config.q_first, Rotation::cur())); + let condition = and::expr([ + meta.query_fixed(config.q_enable, Rotation::cur()), + not::expr(meta.query_fixed(config.q_first, Rotation::cur())), + ]); let mut cb = BaseConstraintBuilder::default(); @@ -1123,7 +1161,10 @@ impl DecoderConfig { }); meta.create_gate("DecoderConfig: all non-padded rows", |meta| { - let condition = not::expr(meta.query_advice(config.is_padding, Rotation::cur())); + let condition = and::expr([ + meta.query_fixed(config.q_enable, Rotation::cur()), + not::expr(meta.query_advice(config.is_padding, Rotation::cur())), + ]); let mut cb = BaseConstraintBuilder::default(); @@ -1195,6 +1236,7 @@ impl DecoderConfig { "DecoderConfig: all non-padded rows except the first row", |meta| { let condition = and::expr([ + meta.query_fixed(config.q_enable, Rotation::cur()), not::expr(meta.query_fixed(config.q_first, Rotation::cur())), not::expr(meta.query_advice(config.is_padding, Rotation::cur())), ]); @@ -1249,6 +1291,7 @@ impl DecoderConfig { meta.create_gate("DecoderConfig: padded rows", |meta| { let condition = and::expr([ + meta.query_fixed(config.q_enable, Rotation::cur()), meta.query_advice(config.is_padding, Rotation::prev()), meta.query_advice(config.is_padding, Rotation::cur()), ]); @@ -1269,8 +1312,13 @@ impl DecoderConfig { }); meta.lookup_any("DecoderConfig: fixed lookup (tag transition)", |meta| { - let condition = meta.query_fixed(config.q_first, Rotation::cur()) - + meta.query_advice(config.tag_config.is_change, Rotation::cur()); + let condition = and::expr([ + meta.query_fixed(config.q_enable, Rotation::cur()), + sum::expr([ + meta.query_fixed(config.q_first, Rotation::cur()), + meta.query_advice(config.tag_config.is_change, Rotation::cur()), + ]), + ]); [ FixedLookupTag::TagTransition.expr(), @@ -1288,7 +1336,10 @@ impl DecoderConfig { }); meta.create_gate("DecoderConfig: new tag", |meta| { - let condition = meta.query_advice(config.tag_config.is_change, Rotation::cur()); + let condition = and::expr([ + meta.query_fixed(config.q_enable, Rotation::cur()), + meta.query_advice(config.tag_config.is_change, Rotation::cur()), + ]); let mut cb = BaseConstraintBuilder::default(); @@ -1357,6 +1408,7 @@ impl DecoderConfig { meta.create_gate("DecoderConfig: continue same tag", |meta| { let condition = and::expr([ + meta.query_fixed(config.q_enable, Rotation::cur()), not::expr(meta.query_fixed(config.q_first, Rotation::cur())), not::expr(meta.query_advice(config.tag_config.is_change, Rotation::cur())), not::expr(meta.query_advice(config.is_padding, Rotation::cur())), @@ -1436,6 +1488,7 @@ impl DecoderConfig { meta.lookup_any("DecoderConfig: keccak randomness power tag_len", |meta| { let condition = and::expr([ + meta.query_fixed(config.q_enable, Rotation::cur()), meta.query_advice(config.tag_config.is_change, Rotation::cur()), not::expr(meta.query_advice(config.is_padding, Rotation::cur())), ]); @@ -1457,7 +1510,10 @@ impl DecoderConfig { ////////////////////////////// ZstdTag::FrameHeaderDescriptor ///////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////// meta.create_gate("DecoderConfig: tag FrameHeaderDescriptor", |meta| { - let condition = is_frame_header_descriptor(meta); + let condition = and::expr([ + meta.query_fixed(config.q_enable, Rotation::cur()), + is_frame_header_descriptor(meta), + ]); let mut cb = BaseConstraintBuilder::default(); @@ -1528,6 +1584,7 @@ impl DecoderConfig { /////////////////////////////////////////////////////////////////////////////////////////// meta.create_gate("DecoderConfig: tag FrameContentSize", |meta| { let condition = and::expr([ + meta.query_fixed(config.q_enable, Rotation::cur()), meta.query_advice(config.tag_config.is_frame_content_size, Rotation::cur()), meta.query_advice(config.tag_config.is_change, Rotation::cur()), ]); @@ -1585,8 +1642,10 @@ impl DecoderConfig { }); meta.create_gate("DecoderConfig: tag FrameContentSize (block_idx)", |meta| { - let condition = - meta.query_advice(config.tag_config.is_frame_content_size, Rotation::cur()); + let condition = and::expr([ + meta.query_fixed(config.q_enable, Rotation::cur()), + meta.query_advice(config.tag_config.is_frame_content_size, Rotation::cur()), + ]); let mut cb = BaseConstraintBuilder::default(); @@ -1605,6 +1664,7 @@ impl DecoderConfig { /////////////////////////////////////////////////////////////////////////////////////////// meta.create_gate("DecoderConfig: tag BlockHeader", |meta| { let condition = and::expr([ + meta.query_fixed(config.q_enable, Rotation::cur()), meta.query_advice(config.tag_config.is_block_header, Rotation::cur()), meta.query_advice(config.tag_config.is_change, Rotation::cur()), ]); @@ -1682,12 +1742,12 @@ impl DecoderConfig { // - if prev_tag=SequenceData: all sequences from prev block were decoded. cb.require_equal( "tag::prev in [FCS, SH, SD]", - meta.query_advice(config.tag_config.tag, Rotation::prev()), sum::expr([ is_prev_frame_content_size(meta), is_prev_sequence_header(meta), is_prev_sequence_data(meta), ]), + 1.expr(), ); cb.condition(is_prev_sequence_header(meta), |cb| { cb.require_equal( @@ -1728,7 +1788,10 @@ impl DecoderConfig { }); meta.create_gate("DecoderConfig: processing block content", |meta| { - let condition = meta.query_advice(config.block_config.is_block, Rotation::cur()); + let condition = and::expr([ + meta.query_fixed(config.q_enable, Rotation::cur()), + meta.query_advice(config.block_config.is_block, Rotation::cur()), + ]); let mut cb = BaseConstraintBuilder::default(); @@ -1779,6 +1842,7 @@ impl DecoderConfig { /////////////////////////////////////////////////////////////////////////////////////////// meta.create_gate("DecoderConfig: tag ZstdBlockLiteralsHeader", |meta| { let condition = and::expr([ + meta.query_fixed(config.q_enable, Rotation::cur()), is_zb_literals_header(meta), meta.query_advice(config.tag_config.is_change, Rotation::cur()), ]); @@ -1875,7 +1939,10 @@ impl DecoderConfig { /////////////////////////// ZstdTag::ZstdBlockLiteralsRawBytes //////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////// meta.create_gate("DecoderConfig: tag ZstdBlockLiteralsRawBytes", |meta| { - let condition = is_zb_raw_block(meta); + let condition = and::expr([ + meta.query_fixed(config.q_enable, Rotation::cur()), + is_zb_raw_block(meta), + ]); let mut cb = BaseConstraintBuilder::default(); @@ -1895,6 +1962,7 @@ impl DecoderConfig { /////////////////////////////////////////////////////////////////////////////////////////// meta.create_gate("DecoderConfig: tag ZstdBlockSequenceHeader", |meta| { let condition = and::expr([ + meta.query_fixed(config.q_enable, Rotation::cur()), is_zb_sequence_header(meta), meta.query_advice(config.tag_config.is_change, Rotation::cur()), ]); @@ -1958,9 +2026,8 @@ impl DecoderConfig { meta.query_advice(config.tag_config.tag_next, Rotation::cur()), select::expr( no_fse_tables, - // TODO: replace with SequencesData once witgen code is merged. - ZstdTag::ZstdBlockHuffmanCode.expr(), - ZstdTag::ZstdBlockFseCode.expr(), + ZstdTag::ZstdBlockSequenceData.expr(), + ZstdTag::ZstdBlockSequenceFseCode.expr(), ), ); @@ -2002,6 +2069,7 @@ impl DecoderConfig { |meta| { // The first row of a ZstdBlockSequenceFseCode tag. let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), meta.query_advice(config.tag_config.is_fse_code, Rotation::cur()), meta.query_advice(config.tag_config.is_change, Rotation::cur()), ]); @@ -2084,6 +2152,7 @@ impl DecoderConfig { "DecoderConfig: tag ZstdBlockSequenceFseCode (table size)", |meta| { let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), meta.query_advice(config.tag_config.is_fse_code, Rotation::cur()), meta.query_advice(config.tag_config.is_change, Rotation::cur()), ]); @@ -2107,8 +2176,12 @@ impl DecoderConfig { "DecoderConfig: tag ZstdBlockSequenceFseCode (other rows)", |meta| { let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), meta.query_advice(config.tag_config.is_fse_code, Rotation::cur()), not::expr(meta.query_advice(config.tag_config.is_change, Rotation::cur())), + not::expr( + meta.query_advice(config.fse_decoder.is_trailing_bits, Rotation::cur()), + ), ]); let mut cb = BaseConstraintBuilder::default(); @@ -2253,6 +2326,9 @@ impl DecoderConfig { // As we decode the normalised probability for each symbol in the FSE table, we // update the probability accumulator. It should be updated as long as we are // reading a bitstring and we are not in the repeat-bits loop. + // + // We skip the check for symbol on the first bitstring after the 4-bits for AL + // because this check has already been done on the "first row". cb.condition( and::expr([ config.bitstream_decoder.is_not_nil(meta, Rotation::cur()), @@ -2273,7 +2349,11 @@ impl DecoderConfig { cb.require_equal( "fse: symbol increments", fse_symbol_cur.expr(), - fse_symbol_prev.expr() + 1.expr(), + select::expr( + meta.query_advice(config.tag_config.is_change, Rotation::prev()), + 0.expr(), + fse_symbol_prev.expr() + 1.expr(), + ), ); }, ); @@ -2312,6 +2392,7 @@ impl DecoderConfig { "DecoderConfig: tag ZstdBlockSequenceFseCode (last row)", |meta| { let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), meta.query_advice(config.tag_config.is_fse_code, Rotation::cur()), meta.query_advice(config.tag_config.is_change, Rotation::next()), ]); @@ -2364,8 +2445,10 @@ impl DecoderConfig { meta.create_gate( "DecoderConfig: tag ZstdBlockSequenceFseCode (trailing bits)", |meta| { - let condition = - meta.query_advice(config.fse_decoder.is_trailing_bits, Rotation::cur()); + let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), + meta.query_advice(config.fse_decoder.is_trailing_bits, Rotation::cur()), + ]); let mut cb = BaseConstraintBuilder::default(); @@ -2373,7 +2456,7 @@ impl DecoderConfig { cb.require_equal( "tag=FseCode", meta.query_advice(config.tag_config.tag, Rotation::cur()), - ZstdTag::ZstdBlockFseCode.expr(), + ZstdTag::ZstdBlockSequenceFseCode.expr(), ); // 2. trailing bits only occur on the last row of the tag=FseCode section. @@ -2419,12 +2502,16 @@ impl DecoderConfig { // - except when we are in repeat-bits loop // - except the trailing bits (if they exist) let condition = and::expr([ + meta.query_fixed(config.q_enable, Rotation::cur()), meta.query_advice(config.tag_config.is_fse_code, Rotation::cur()), config.bitstream_decoder.is_not_nil(meta, Rotation::cur()), not::expr(meta.query_advice(config.tag_config.is_change, Rotation::cur())), not::expr( meta.query_advice(config.fse_decoder.is_repeat_bits_loop, Rotation::cur()), ), + not::expr( + meta.query_advice(config.fse_decoder.is_trailing_bits, Rotation::cur()), + ), ]); let (table_size, probability_acc, value_read, value_decoded, num_bits) = ( @@ -2463,6 +2550,7 @@ impl DecoderConfig { // - except when we are in repeat-bits loop // - except the trailing bits (if they exist) let condition = and::expr([ + meta.query_fixed(config.q_enable, Rotation::cur()), meta.query_advice(config.tag_config.is_fse_code, Rotation::cur()), config.bitstream_decoder.is_not_nil(meta, Rotation::cur()), not::expr(meta.query_advice(config.tag_config.is_change, Rotation::cur())), @@ -2518,6 +2606,7 @@ impl DecoderConfig { "DecoderConfig: tag ZstdBlockSequenceData (sentinel row)", |meta| { let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), meta.query_advice(config.tag_config.is_sequence_data, Rotation::cur()), meta.query_advice(config.tag_config.is_change, Rotation::cur()), ]); @@ -2570,6 +2659,7 @@ impl DecoderConfig { // bitstring. We also skip the row where we don't read a bitstring // (is_nil=true). let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), meta.query_advice(config.tag_config.is_sequence_data, Rotation::cur()), not::expr(meta.query_advice(config.tag_config.is_change, Rotation::cur())), config.bitstream_decoder.is_not_nil(meta, Rotation::cur()), @@ -2605,6 +2695,7 @@ impl DecoderConfig { "DecoderConfig: tag ZstdBlockSequenceData (sequences)", |meta| { let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), meta.query_advice(config.tag_config.is_sequence_data, Rotation::cur()), not::expr(meta.query_advice(config.tag_config.is_change, Rotation::cur())), config.bitstream_decoder.is_not_nil(meta, Rotation::cur()), @@ -2795,9 +2886,9 @@ impl DecoderConfig { ); cb.require_equal( "seq_idx increments", - meta.query_advice(config.sequences_data_decoder.idx, Rotation::cur()), + meta.query_advice(config.sequences_data_decoder.idx, Rotation::cur()), meta.query_advice(config.sequences_data_decoder.idx, Rotation::prev()) - + 1.expr(), + + 1.expr(), ); }, ); @@ -2866,6 +2957,7 @@ impl DecoderConfig { "DecoderConfig: tag ZstdBlockSequenceData (last row)", |meta| { let condition = and::expr([ + meta.query_fixed(config.q_enable, Rotation::cur()), meta.query_advice(config.tag_config.is_sequence_data, Rotation::cur()), meta.query_advice(config.tag_config.is_change, Rotation::next()), ]); @@ -2954,6 +3046,7 @@ impl DecoderConfig { "DecoderConfig: tag ZstdBlockSequenceData (is_nil)", |meta| { let condition = and::expr([ + meta.query_fixed(config.q_enable, Rotation::cur()), meta.query_advice(config.tag_config.is_sequence_data, Rotation::cur()), config.bitstream_decoder.is_nil(meta, Rotation::cur()), ]); @@ -2994,6 +3087,7 @@ impl DecoderConfig { "DecoderConfig: tag ZstdBlockSequenceData (ROM sequence codes)", |meta| { // When we read a bitstring in tag=ZstdBlockSequenceData that is: + // - not the first row (sentinel row) // - not init state // - not update state // @@ -3006,7 +3100,9 @@ impl DecoderConfig { // // which is used in the next lookup to the SequenceInstructionTable. let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), meta.query_advice(config.tag_config.is_sequence_data, Rotation::cur()), + not::expr(meta.query_advice(config.tag_config.is_change, Rotation::cur())), config.bitstream_decoder.is_not_nil(meta, Rotation::cur()), config .sequences_data_decoder @@ -3150,9 +3246,15 @@ impl DecoderConfig { "DecoderConfig: tag ZstdBlockSequenceData (FseTable)", |meta| { let condition = and::expr([ + meta.query_fixed(config.q_enable, Rotation::cur()), meta.query_advice(config.tag_config.is_sequence_data, Rotation::cur()), not::expr(meta.query_advice(config.tag_config.is_change, Rotation::cur())), config.bitstream_decoder.is_not_nil(meta, Rotation::cur()), + not::expr( + config + .sequences_data_decoder + .is_init_state(meta, Rotation::cur()), + ), config .sequences_data_decoder .is_update_state(meta, Rotation::cur()), @@ -3208,7 +3310,11 @@ impl DecoderConfig { //////////////////////////////////// ZstdTag::Null //////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////// meta.create_gate("DecoderConfig: tag=Null", |meta| { - let condition = meta.query_advice(config.tag_config.is_null, Rotation::cur()); + let condition = and::expr([ + meta.query_fixed(config.q_enable, Rotation::cur()), + meta.query_advice(config.tag_config.is_null, Rotation::cur()), + not::expr(meta.query_advice(config.tag_config.is_null, Rotation::prev())), + ]); let mut cb = BaseConstraintBuilder::default(); @@ -3235,6 +3341,7 @@ impl DecoderConfig { // the following scenarios: // - end of block (is_last=true) with tag=SequenceData // - end of block (is_last=true) with tag=SequenceHeader and num_sequences=0 + // - the last tag ended OK cb.require_equal( "is_null: block::is_last=true on the previous row", meta.query_advice(config.block_config.is_last_block, Rotation::prev()), @@ -3248,10 +3355,14 @@ impl DecoderConfig { .block_config .is_empty_sequences(meta, Rotation::prev()), ZstdTag::ZstdBlockSequenceHeader.expr(), - // TODO: replace with ZstdBlockSequenceData when witgen is merged. - ZstdTag::ZstdBlockHuffmanCode.expr(), + ZstdTag::ZstdBlockSequenceData.expr(), ), ); + cb.require_equal( + "is_null: tag_idx::prev == tag_len::prev", + meta.query_advice(config.tag_config.tag_idx, Rotation::prev()), + meta.query_advice(config.tag_config.tag_len, Rotation::prev()), + ); cb.gate(condition) }); @@ -3263,7 +3374,10 @@ impl DecoderConfig { /////////////////////////////////////////////////////////////////////////////////////////// meta.create_gate("DecoderConfig: Bitstream Decoder (is_nil)", |meta| { // Bitstream decoder when we skip reading a bitstring at a row. - let condition = config.bitstream_decoder.is_nil(meta, Rotation::cur()); + let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), + config.bitstream_decoder.is_nil(meta, Rotation::cur()), + ]); let mut cb = BaseConstraintBuilder::default(); @@ -3279,10 +3393,11 @@ impl DecoderConfig { "bit_index_start' == 0", meta.query_advice(config.bitstream_decoder.bit_index_start, Rotation::next()), ); + cb.require_equal( - "if is_nil: byte_idx' == byte_idx", + "if is_nil: byte_idx' == byte_idx + 1", meta.query_advice(config.byte_idx, Rotation::next()), - meta.query_advice(config.byte_idx, Rotation::cur()), + meta.query_advice(config.byte_idx, Rotation::cur()) + 1.expr(), ); cb.require_zero( @@ -3303,7 +3418,12 @@ impl DecoderConfig { meta.create_gate("DecoderConfig: Bitstream Decoder (is_nb0)", |meta| { // Bitstream decoder when we read nb=0 bits from the bitstream. - let condition = config.bitstream_decoder.is_nb0(meta, Rotation::cur()); + let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), + config.bitstream_decoder.is_nb0(meta, Rotation::cur()), + not::expr(is_next_null(meta)), /* Exclude last block's bitstream tail row. + * Transition to Null. */ + ]); let mut cb = BaseConstraintBuilder::default(); @@ -3347,6 +3467,7 @@ impl DecoderConfig { |meta| { // Bitstream decoder when the bitstring to be read is not nil. let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), not::expr(config.bitstream_decoder.is_nil(meta, Rotation::cur())), not::expr(config.bitstream_decoder.is_nb0(meta, Rotation::cur())), sum::expr([ @@ -3414,26 +3535,27 @@ impl DecoderConfig { }, ); + // witgen_debug // 2. bitstring is byte-aligned: bit_index_end == 7. - cb.condition( - config - .bitstream_decoder - .aligned_one_byte(meta, Rotation::cur()), - |cb| { - cb.require_equal( - "(case2): byte_idx' == byte_idx + 1", - meta.query_advice(config.byte_idx, Rotation::next()), - meta.query_advice(config.byte_idx, Rotation::cur()) + 1.expr(), - ); - cb.require_zero( - "(case2): bit_index_start' == 0", - meta.query_advice( - config.bitstream_decoder.bit_index_start, - Rotation::next(), - ), - ); - }, - ); + // cb.condition( + // config + // .bitstream_decoder + // .aligned_one_byte(meta, Rotation::cur()), + // |cb| { + // cb.require_equal( + // "(case2): byte_idx' == byte_idx + 1", + // meta.query_advice(config.byte_idx, Rotation::next()), + // meta.query_advice(config.byte_idx, Rotation::cur()) + 1.expr(), + // ); + // cb.require_zero( + // "(case2): bit_index_start' == 0", + // meta.query_advice( + // config.bitstream_decoder.bit_index_start, + // Rotation::next(), + // ), + // ); + // }, + // ); // 3. bitstring strictly spans over 2 bytes: 8 <= bit_index_end < 15. cb.condition( @@ -3604,9 +3726,12 @@ impl DecoderConfig { ); meta.create_gate("DecoderConfig: Bitstream Decoder", |meta| { - let condition = sum::expr([ - meta.query_advice(config.tag_config.is_fse_code, Rotation::cur()), - meta.query_advice(config.tag_config.is_sequence_data, Rotation::cur()), + let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), + sum::expr([ + meta.query_advice(config.tag_config.is_fse_code, Rotation::cur()), + meta.query_advice(config.tag_config.is_sequence_data, Rotation::cur()), + ]), ]); let mut cb = BaseConstraintBuilder::default(); @@ -3748,10 +3873,842 @@ impl DecoderConfig { config } - pub fn assign( + pub fn assign( &self, layouter: &mut impl Layouter, - ) -> Result { - unimplemented!() + witness_rows: Vec>, + _aux_data: Vec, + fse_aux_tables: Vec, + block_info_arr: Vec, + sequence_info_arr: Vec, + challenges: &Challenges>, + k: u32, + // witgen_debug + // ) -> Result { + ) -> Result<(), Error> { + let mut pow_of_rand: Vec> = vec![Value::known(Fr::ONE)]; + + assert!(block_info_arr.len() > 0, "Must have at least 1 block"); + assert!(sequence_info_arr.len() > 0, "Must have at least 1 block"); + + let mut curr_block_info = block_info_arr[0]; + let mut curr_sequence_info = sequence_info_arr[0]; + + ///////////////////////////////////////// + //////// Load Auxiliary Tables ///////// + ///////////////////////////////////////// + self.range8.load(layouter)?; + self.range16.load(layouter)?; + self.fixed_table.load(layouter)?; + self.pow2_table.load(layouter)?; + + ///////////////////////////////////////////////////////// + //////// Assign FSE and Bitstream Accumulation ///////// + ///////////////////////////////////////////////////////// + self.fse_table.assign(layouter, fse_aux_tables, k)?; + self.bitstring_table + .assign(layouter, &block_info_arr, &witness_rows, k)?; + + ///////////////////////////////////////// + ///// Assign LiteralHeaderTable //////// + ///////////////////////////////////////// + let mut literal_headers: Vec<(u64, u64, (u64, u64, u64))> = vec![]; // (block_idx, byte_offset, (byte0, byte1, byte2)) + let literal_header_rows = witness_rows + .iter() + .filter(|r| r.state.tag == ZstdTag::ZstdBlockLiteralsHeader) + .map(|r| r.clone()) + .collect::>>(); + let max_block_idx = witness_rows + .iter() + .last() + .expect("Last row of witness exists.") + .state + .block_idx; + for curr_block_idx in 1..=max_block_idx { + let byte_idx = literal_header_rows + .iter() + .find(|r| r.state.block_idx == curr_block_idx) + .unwrap() + .encoded_data + .byte_idx; + + let literal_bytes = literal_header_rows + .iter() + .filter(|&r| r.state.block_idx == curr_block_idx) + .map(|r| r.encoded_data.value_byte as u64) + .collect::>(); + + literal_headers.push(( + curr_block_idx, + byte_idx, + ( + literal_bytes[0], + if literal_bytes.len() > 1 { + literal_bytes[1] + } else { + 0 + }, + if literal_bytes.len() > 2 { + literal_bytes[2] + } else { + 0 + }, + ), + )); + } + + self.literals_header_table + .assign(layouter, literal_headers)?; + + ///////////////////////////////////////// + ///// Assign Decompression Region ////// + ///////////////////////////////////////// + layouter.assign_region( + || "Decompression table region", + |mut region| { + ///////////////////////////////////////// + /////////// Assign First Row /////////// + ///////////////////////////////////////// + region.assign_fixed(|| "q_first", self.q_first, 0, || Value::known(Fr::one()))?; + for i in 0..((1 << k) - self.unusable_rows()) { + region.assign_fixed( + || "q_enable", + self.q_enable, + i, + || Value::known(Fr::one()), + )?; + } + let mut last_byte_idx = 0u64; + let mut last_bit_start_idx = 0u64; + + ///////////////////////////////////////// + ///////// Assign Witness Rows ////////// + ///////////////////////////////////////// + for (i, row) in witness_rows.iter().enumerate() { + region.assign_advice( + || "is_padding", + self.is_padding, + i, + || Value::known(Fr::zero()), + )?; + region.assign_advice( + || "byte_idx", + self.byte_idx, + i, + || Value::known(Fr::from(row.encoded_data.byte_idx)), + )?; + last_byte_idx = row.encoded_data.byte_idx; + region.assign_advice( + || "byte", + self.byte, + i, + || Value::known(Fr::from(row.encoded_data.value_byte as u64)), + )?; + let bits = value_bits_le(row.encoded_data.value_byte); + let is_reverse = row.encoded_data.reverse; + for (idx, col) in self.bits.iter().rev().enumerate() { + region.assign_advice( + || "value_bits", + *col, + i, + || { + Value::known(Fr::from( + (if is_reverse { + bits[idx] + } else { + bits[N_BITS_PER_BYTE - idx - 1] + }) as u64, + )) + }, + )?; + } + region.assign_advice( + || "encoded_rlc", + self.encoded_rlc, + i, + || row.encoded_data.value_rlc, + )?; + region.assign_advice( + || "decoded_len", + self.decoded_len, + i, + || Value::known(Fr::from(row.decoded_data.decoded_len as u64)), + )?; + + ///////////////////////////////////////// + ///// Assign Bitstream Decoder ///////// + ///////////////////////////////////////// + region.assign_advice( + || "bit_index_start", + self.bitstream_decoder.bit_index_start, + i, + || Value::known(Fr::from(row.bitstream_read_data.bit_start_idx as u64)), + )?; + let start_unchanged = + IsEqualChip::construct(self.bitstream_decoder.start_unchanged.clone()); + start_unchanged.assign( + &mut region, + i, + Value::known(Fr::from(last_bit_start_idx as u64)), + Value::known(Fr::from(row.bitstream_read_data.bit_start_idx as u64)), + )?; + last_bit_start_idx = row.bitstream_read_data.bit_start_idx as u64; + + region.assign_advice( + || "bit_index_end", + self.bitstream_decoder.bit_index_end, + i, + || Value::known(Fr::from(row.bitstream_read_data.bit_end_idx as u64)), + )?; + region.assign_advice( + || "bitstring_value", + self.bitstream_decoder.bitstring_value, + i, + || Value::known(Fr::from(row.bitstream_read_data.bit_value as u64)), + )?; + region.assign_advice( + || "is_nb0", + self.bitstream_decoder.is_nb0, + i, + || Value::known(Fr::from(row.bitstream_read_data.is_zero_bit_read as u64)), + )?; + region.assign_advice( + || "is_nil", + self.bitstream_decoder.is_nil, + i, + || Value::known(Fr::from(row.bitstream_read_data.is_nil as u64)), + )?; + + let bit_index_end_cmp_7 = ComparatorChip::construct( + self.bitstream_decoder.bit_index_end_cmp_7.clone(), + ); + bit_index_end_cmp_7.assign( + &mut region, + i, + Fr::from(row.bitstream_read_data.bit_end_idx as u64), + Fr::from(7u64), + )?; + let bit_index_end_cmp_15 = ComparatorChip::construct( + self.bitstream_decoder.bit_index_end_cmp_15.clone(), + ); + bit_index_end_cmp_15.assign( + &mut region, + i, + Fr::from(row.bitstream_read_data.bit_end_idx as u64), + Fr::from(15u64), + )?; + let bit_index_end_cmp_23 = ComparatorChip::construct( + self.bitstream_decoder.bit_index_end_cmp_23.clone(), + ); + bit_index_end_cmp_23.assign( + &mut region, + i, + Fr::from(row.bitstream_read_data.bit_end_idx as u64), + Fr::from(23u64), + )?; + let bitstring_value_eq_3 = + IsEqualChip::construct(self.bitstream_decoder.bitstring_value_eq_3.clone()); + bitstring_value_eq_3.assign( + &mut region, + i, + Value::known(Fr::from(row.bitstream_read_data.bit_value as u64)), + Value::known(Fr::from(3u64)), + )?; + + ///////////////////////////////////////// + ////////// Assign Tag Config /////////// + ///////////////////////////////////////// + region.assign_advice( + || "tag_config.tag", + self.tag_config.tag, + i, + || Value::known(Fr::from(row.state.tag as u64)), + )?; + region.assign_advice( + || "tag_config.tag_next", + self.tag_config.tag_next, + i, + || Value::known(Fr::from(row.state.tag_next as u64)), + )?; + region.assign_advice( + || "tag_config.tag_len", + self.tag_config.tag_len, + i, + || Value::known(Fr::from(row.state.tag_len as u64)), + )?; + region.assign_advice( + || "tag_config.max_len", + self.tag_config.max_len, + i, + || Value::known(Fr::from(row.state.max_tag_len as u64)), + )?; + region.assign_advice( + || "tag_config.tag_idx", + self.tag_config.tag_idx, + i, + || Value::known(Fr::from(row.state.tag_idx as u64)), + )?; + + let is_sequence_data = row.state.tag == ZstdTag::ZstdBlockSequenceData; + region.assign_advice( + || "tag_config.is_sequence_data", + self.tag_config.is_sequence_data, + i, + || Value::known(Fr::from(is_sequence_data as u64)), + )?; + + let is_frame_content_size = row.state.tag == ZstdTag::FrameContentSize; + region.assign_advice( + || "tag_config.is_frame_content_size", + self.tag_config.is_frame_content_size, + i, + || Value::known(Fr::from(is_frame_content_size as u64)), + )?; + + let is_block_header = row.state.tag == ZstdTag::BlockHeader; + region.assign_advice( + || "tag_config.is_block_header", + self.tag_config.is_block_header, + i, + || Value::known(Fr::from(is_block_header as u64)), + )?; + + let is_fse_code = row.state.tag == ZstdTag::ZstdBlockSequenceFseCode; + region.assign_advice( + || "tag_config.is_fse_code", + self.tag_config.is_fse_code, + i, + || Value::known(Fr::from(is_fse_code as u64)), + )?; + + let is_null = row.state.tag == ZstdTag::Null; + region.assign_advice( + || "tag_config.is_null", + self.tag_config.is_null, + i, + || Value::known(Fr::from(is_null as u64)), + )?; + + region.assign_advice( + || "tag_config.is_change", + self.tag_config.is_change, + i, + || Value::known(Fr::from((row.state.is_tag_change && i > 0) as u64)), + )?; + region.assign_advice( + || "tag_config.is_reverse", + self.tag_config.is_reverse, + i, + || Value::known(Fr::from(row.state.tag.is_reverse() as u64)), + )?; + region.assign_advice( + || "tag_config.tag_rlc", + self.tag_config.tag_rlc, + i, + || row.state.tag_rlc_acc, + )?; + region.assign_advice( + || "tag_config.is_output", + self.tag_config.is_output, + i, + || Value::known(Fr::from(row.state.tag.is_output() as u64)), + )?; + + let tag_len = row.state.tag_len as usize; + if tag_len >= pow_of_rand.len() { + let mut last = pow_of_rand + .last() + .expect("Last pow_of_rand exists.") + .clone(); + for _ in pow_of_rand.len()..=tag_len { + last = last * challenges.keccak_input(); + pow_of_rand.push(last.clone()); + } + } + region.assign_advice( + || "tag_config.rpow_tag_len", + self.tag_config.rpow_tag_len, + i, + || pow_of_rand[tag_len], + )?; + + let tag_idx_eq_tag_len = + IsEqualChip::construct(self.tag_config.tag_idx_eq_tag_len.clone()); + tag_idx_eq_tag_len.assign( + &mut region, + i, + Value::known(Fr::from(row.state.tag_idx as u64)), + Value::known(Fr::from(row.state.tag_len as u64)), + )?; + + let tag_chip = BinaryNumberChip::construct(self.tag_config.tag_bits); + tag_chip.assign(&mut region, i, &row.state.tag)?; + + ///////////////////////////////////////// + ///////// Assign Block Config ////////// + ///////////////////////////////////////// + let block_idx = row.state.block_idx; + let is_block = row.state.tag.is_block(); + let is_block_header = row.state.tag == BlockHeader; + + if is_block || is_block_header { + if block_idx != curr_block_info.block_idx as u64 { + curr_block_info = block_info_arr + .iter() + .find(|&b| b.block_idx == block_idx as usize) + .expect("Block info should exist") + .clone(); + } + if block_idx != curr_sequence_info.block_idx as u64 { + curr_sequence_info = sequence_info_arr + .iter() + .find(|&s| s.block_idx == block_idx as usize) + .expect("Sequence info should exist") + .clone(); + } + region.assign_advice( + || "block_config.block_len", + self.block_config.block_len, + i, + || Value::known(Fr::from(curr_block_info.block_len as u64)), + )?; + region.assign_advice( + || "block_config.block_idx", + self.block_config.block_idx, + i, + || Value::known(Fr::from(curr_block_info.block_idx as u64)), + )?; + region.assign_advice( + || "block_config.is_last_block", + self.block_config.is_last_block, + i, + || Value::known(Fr::from(curr_block_info.is_last_block as u64)), + )?; + region.assign_advice( + || "block_config.is_block", + self.block_config.is_block, + i, + || Value::known(Fr::from(is_block as u64)), + )?; + region.assign_advice( + || "block_config.num_sequences", + self.block_config.num_sequences, + i, + || Value::known(Fr::from(curr_sequence_info.num_sequences as u64)), + )?; + + let table_names = ["LLT", "MOT", "MLT"]; + for idx in 0..3 { + region.assign_advice( + || table_names[idx], + self.block_config.compression_modes[idx], + i, + || { + Value::known(Fr::from( + curr_sequence_info.compression_mode[idx] as u64, + )) + }, + )?; + } + let is_empty_sequences = + IsEqualChip::construct(self.block_config.is_empty_sequences.clone()); + is_empty_sequences.assign( + &mut region, + i, + Value::known(Fr::from(curr_sequence_info.num_sequences as u64)), + Value::known(Fr::zero()), + )?; + } + + //////////////////////////////////////////////////////////// + ///////// Assign Extra Sequence Bitstream Fields ////////// + //////////////////////////////////////////////////////////// + region.assign_advice( + || "sequence_data_decoder.idx", + self.sequences_data_decoder.idx, + i, + || Value::known(Fr::from((row.bitstream_read_data.seq_idx) as u64)), + )?; + region.assign_advice( + || "sequence_data_decoder.is_init_state", + self.sequences_data_decoder.is_init_state, + i, + || Value::known(Fr::from(row.bitstream_read_data.is_seq_init as u64)), + )?; + + let seq_states = row.bitstream_read_data.states; + let seq_symbols = row.bitstream_read_data.symbols; + let tables = ["LLT", "MLT", "MOT"]; + + for idx in 0..3 { + region.assign_advice( + || format!("sequence_data_decoder.states: {:?}", tables[idx]), + self.sequences_data_decoder.states[idx], + i, + || Value::known(Fr::from(seq_states[idx])), + )?; + region.assign_advice( + || format!("sequence_data_decoder.symbols: {:?}", tables[idx]), + self.sequences_data_decoder.symbols[idx], + i, + || Value::known(Fr::from(seq_symbols[idx])), + )?; + region.assign_advice( + || format!("sequence_data_decoder.values: {:?}", tables[idx]), + self.sequences_data_decoder.values[idx], + i, + || Value::known(Fr::from(row.bitstream_read_data.values[idx])), + )?; + } + region.assign_advice( + || "sequence_data_decoder.is_update_state", + self.sequences_data_decoder.is_update_state, + i, + || Value::known(Fr::from(row.bitstream_read_data.is_update_state)), + )?; + region.assign_advice( + || "sequence_data_decoder.baseline", + self.sequences_data_decoder.baseline, + i, + || Value::known(Fr::from(row.bitstream_read_data.baseline as u64)), + )?; + let byte0_lt_0x80 = + LtChip::construct(self.sequences_header_decoder.byte0_lt_0x80); + byte0_lt_0x80.assign( + &mut region, + i, + Fr::from(row.encoded_data.value_byte as u64), + Fr::from(0x80), + )?; + let byte0_lt_0xff = + LtChip::construct(self.sequences_header_decoder.byte0_lt_0xff); + byte0_lt_0xff.assign( + &mut region, + i, + Fr::from(row.encoded_data.value_byte as u64), + Fr::from(0xff), + )?; + + //////////////////////////////////////////////// + ///////// Assign FSE Decoding Fields ////////// + //////////////////////////////////////////////// + region.assign_advice( + || "fse_decoder.table_kind", + self.fse_decoder.table_kind, + i, + || Value::known(Fr::from(row.fse_data.table_kind)), + )?; + region.assign_advice( + || "fse_decoder.table_size", + self.fse_decoder.table_size, + i, + || Value::known(Fr::from(row.fse_data.table_size)), + )?; + region.assign_advice( + || "fse_decoder.symbol", + self.fse_decoder.symbol, + i, + || Value::known(Fr::from(row.fse_data.symbol)), + )?; + region.assign_advice( + || "fse_decoder.value_decoded", + self.fse_decoder.value_decoded, + i, + || Value::known(Fr::from(row.fse_data.value_decoded)), + )?; + region.assign_advice( + || "fse_decoder.probability_acc", + self.fse_decoder.probability_acc, + i, + || Value::known(Fr::from(row.fse_data.probability_acc)), + )?; + region.assign_advice( + || "fse_decoder.is_repeat_bits_loop", + self.fse_decoder.is_repeat_bits_loop, + i, + || Value::known(Fr::from(row.fse_data.is_repeat_bits_loop)), + )?; + region.assign_advice( + || "fse_decoder.is_trailing_bits", + self.fse_decoder.is_trailing_bits, + i, + || Value::known(Fr::from(row.fse_data.is_trailing_bits)), + )?; + + let value_decoded_eq_0 = + IsEqualChip::construct(self.fse_decoder.value_decoded_eq_0.clone()); + value_decoded_eq_0.assign( + &mut region, + i, + Value::known(Fr::from(row.fse_data.value_decoded)), + Value::known(Fr::zero()), + )?; + let value_decoded_eq_1 = + IsEqualChip::construct(self.fse_decoder.value_decoded_eq_1.clone()); + value_decoded_eq_1.assign( + &mut region, + i, + Value::known(Fr::from(row.fse_data.value_decoded)), + Value::known(Fr::one()), + )?; + } + + let mut padding_count = 2usize; + for idx in witness_rows.len()..((1 << k) - self.unusable_rows()) { + if padding_count > 0 { + region.assign_advice( + || "byte_idx", + self.byte_idx, + idx, + || Value::known(Fr::from(last_byte_idx as u64)), + )?; + padding_count -= 1; + } + region.assign_advice( + || "tag_config.tag", + self.tag_config.tag, + idx, + || Value::known(Fr::from(ZstdTag::Null as u64)), + )?; + region.assign_advice( + || "is_padding", + self.is_padding, + idx, + || Value::known(Fr::one()), + )?; + let byte0_lt_0x80 = + LtChip::construct(self.sequences_header_decoder.byte0_lt_0x80); + byte0_lt_0x80.assign(&mut region, idx, Fr::zero(), Fr::from(0x80))?; + let byte0_lt_0xff = + LtChip::construct(self.sequences_header_decoder.byte0_lt_0xff); + byte0_lt_0xff.assign(&mut region, idx, Fr::zero(), Fr::from(0xff))?; + + // Bitstream decoder gadgets + let bit_index_end_cmp_7 = ComparatorChip::construct( + self.bitstream_decoder.bit_index_end_cmp_7.clone(), + ); + bit_index_end_cmp_7.assign(&mut region, idx, Fr::zero(), Fr::from(7u64))?; + let bit_index_end_cmp_15 = ComparatorChip::construct( + self.bitstream_decoder.bit_index_end_cmp_15.clone(), + ); + bit_index_end_cmp_15.assign(&mut region, idx, Fr::zero(), Fr::from(15u64))?; + let bit_index_end_cmp_23 = ComparatorChip::construct( + self.bitstream_decoder.bit_index_end_cmp_23.clone(), + ); + bit_index_end_cmp_23.assign(&mut region, idx, Fr::zero(), Fr::from(23u64))?; + let bitstring_value_eq_3 = + IsEqualChip::construct(self.bitstream_decoder.bitstring_value_eq_3.clone()); + bitstring_value_eq_3.assign( + &mut region, + idx, + Value::known(Fr::zero()), + Value::known(Fr::from(3u64)), + )?; + let start_unchanged = + IsEqualChip::construct(self.bitstream_decoder.start_unchanged.clone()); + start_unchanged.assign( + &mut region, + idx, + Value::known(Fr::from(last_bit_start_idx as u64)), + Value::known(Fr::zero()), + )?; + last_bit_start_idx = 0; + + // Fse decoder gadgets + let value_decoded_eq_0 = + IsEqualChip::construct(self.fse_decoder.value_decoded_eq_0.clone()); + value_decoded_eq_0.assign( + &mut region, + idx, + Value::known(Fr::zero()), + Value::known(Fr::zero()), + )?; + let value_decoded_eq_1 = + IsEqualChip::construct(self.fse_decoder.value_decoded_eq_1.clone()); + value_decoded_eq_1.assign( + &mut region, + idx, + Value::known(Fr::zero()), + Value::known(Fr::one()), + )?; + } + + Ok(()) + }, + )?; + + // witgen_debug + // pub struct AssignedDecoderConfigExports { + // /// The RLC of the zstd encoded bytes, i.e. blob bytes. + // pub encoded_rlc: AssignedCell, + // /// The RLC of the decoded bytes, i.e. batch bytes. + // pub decoded_rlc: AssignedCell, + // } + + Ok(()) + } + + pub fn unusable_rows(&self) -> usize { + 30 + } +} + +#[cfg(test)] +mod tests { + use eth_types::Field; + use std::marker::PhantomData; + + use super::process; + use crate::{DecoderConfig, DecoderConfigArgs}; + use bitstream_io::write; + use halo2_proofs::{ + circuit::{Layouter, SimpleFloorPlanner}, + dev::MockProver, + halo2curves::bn256::Fr, + plonk::{Circuit, ConstraintSystem, Error}, + }; + use std::{ + fs::{self, File}, + io::{self, Write}, + }; + use zkevm_circuits::{ + table::{BitwiseOpTable, Pow2Table, PowOfRandTable, RangeTable, U8Table}, + util::Challenges, + }; + + #[derive(Clone, Debug, Default)] + struct DecoderConfigTester { + compressed: Vec, + k: u32, + } + + impl Circuit for DecoderConfigTester { + type Config = ( + DecoderConfig, + U8Table, + BitwiseOpTable, + PowOfRandTable, + Challenges, + ); + type FloorPlanner = SimpleFloorPlanner; + + fn without_witnesses(&self) -> Self { + unimplemented!() + } + + fn configure(meta: &mut ConstraintSystem) -> Self::Config { + let challenges = Challenges::construct(meta); + let challenges_expr = challenges.exprs(meta); + + let pow_rand_table = PowOfRandTable::construct(meta, &challenges_expr); + let pow2_table = Pow2Table::construct(meta); + let u8_table = U8Table::construct(meta); + let range8 = RangeTable::construct(meta); + let range16 = RangeTable::construct(meta); + let bitwise_op_table = BitwiseOpTable::construct(meta); + + let config = DecoderConfig::configure( + meta, + &challenges_expr, + DecoderConfigArgs { + pow_rand_table, + pow2_table, + u8_table, + range8, + range16, + bitwise_op_table, + }, + ); + + ( + config, + u8_table, + bitwise_op_table, + pow_rand_table, + challenges, + ) + } + + #[allow(clippy::type_complexity)] + fn synthesize( + &self, + config: Self::Config, + mut layouter: impl Layouter, + ) -> Result<(), Error> { + let (config, u8_table, bitwise_op_table, pow_rand_table, challenge) = config; + let challenges = challenge.values(&layouter); + + let ( + witness_rows, + _decoded_literals, + aux_data, + fse_aux_tables, + block_info_arr, + sequence_info_arr, + ) = process(&self.compressed, challenges.keccak_input()); + + u8_table.load(&mut layouter)?; + bitwise_op_table.load(&mut layouter)?; + pow_rand_table.assign(&mut layouter, &challenges, 1 << (self.k - 1))?; + config.assign::( + &mut layouter, + witness_rows, + aux_data, + fse_aux_tables, + block_info_arr, + sequence_info_arr, + &challenges, + self.k, + )?; + + Ok(()) + } + } + + #[test] + fn test_decoder_config_working_example() { + let raw: Vec = String::from("Romeo and Juliet@Excerpt from Act 2, Scene 2@@JULIET@O Romeo, Romeo! wherefore art thou Romeo?@Deny thy father and refuse thy name;@Or, if thou wilt not, be but sworn my love,@And I'll no longer be a Capulet.@@ROMEO@[Aside] Shall I hear more, or shall I speak at this?@@JULIET@'Tis but thy name that is my enemy;@Thou art thyself, though not a Montague.@What's Montague? it is nor hand, nor foot,@Nor arm, nor face, nor any other part@Belonging to a man. O, be some other name!@What's in a name? that which we call a rose@By any other name would smell as sweet;@So Romeo would, were he not Romeo call'd,@Retain that dear perfection which he owes@Without that title. Romeo, doff thy name,@And for that name which is no part of thee@Take all myself.@@ROMEO@I take thee at thy word:@Call me but love, and I'll be new baptized;@Henceforth I never will be Romeo.@@JULIET@What man art thou that thus bescreen'd in night@So stumblest on my counsel?").as_bytes().to_vec(); + + let compressed = { + // compression level = 0 defaults to using level=3, which is zstd's default. + let mut encoder = + zstd::stream::write::Encoder::new(Vec::new(), 0).expect("Encoder construction"); + + // disable compression of literals, i.e. literals will be raw bytes. + encoder + .set_parameter(zstd::stream::raw::CParameter::LiteralCompressionMode( + zstd::zstd_safe::ParamSwitch::Disable, + )) + .expect("Encoder set_parameter: LiteralCompressionMode"); + // set target block size to fit within a single block. + encoder + .set_parameter(zstd::stream::raw::CParameter::TargetCBlockSize(124 * 1024)) + .expect("Encoder set_parameter: TargetCBlockSize"); + // do not include the checksum at the end of the encoded data. + encoder + .include_checksum(false) + .expect("Encoder include_checksum: false"); + // do not include magic bytes at the start of the frame since we will have a single + // frame. + encoder + .include_magicbytes(false) + .expect("Encoder include magicbytes: false"); + // set source length, which will be reflected in the frame header. + encoder + .set_pledged_src_size(Some(raw.len() as u64)) + .expect("Encoder src_size: raw.len()"); + // include the content size to know at decode time the expected size of decoded data. + encoder + .include_contentsize(true) + .expect("Encoder include_contentsize: true"); + + encoder.write_all(&raw).expect("Encoder wirte_all"); + encoder.finish().expect("Encoder success") + }; + + let k = 18; + let decoder_config_tester = DecoderConfigTester { compressed, k }; + let mock_prover = MockProver::::run(k, &decoder_config_tester, vec![]).unwrap(); + mock_prover.assert_satisfied_par(); } } diff --git a/aggregator/src/aggregation/decoder/seq_exec.rs b/aggregator/src/aggregation/decoder/seq_exec.rs new file mode 100644 index 0000000000..6e4aff91c5 --- /dev/null +++ b/aggregator/src/aggregation/decoder/seq_exec.rs @@ -0,0 +1,1065 @@ + +use eth_types::Field; +use gadgets::{ + is_equal::*, + is_zero::*, + util::{and, or, not, select, Expr}, +}; +use halo2_proofs::{ + circuit::{Value, Region, Layouter}, + plonk::{Advice, Any, Column, ConstraintSystem, VirtualCells, Error, Expression, Fixed, SecondPhase}, + poly::Rotation, +}; +use zkevm_circuits::{ + evm_circuit::{BaseConstraintBuilder, ConstrainBuilderCommon}, + table::LookupTable, + util::Challenges, +}; +use crate::aggregation::decoder::witgen; +use witgen::{ZstdTag, SequenceInfo, SequenceExec, SequenceExecInfo}; +use super::tables; +use tables::SeqInstTable; + +/// TODO: This is in fact part of the `BlockConfig` in +/// Decoder, we can use BlockConfig if it is decoupled +/// from Decoder module later + +#[derive(Clone)] +pub struct SequenceConfig { + // the `is_block` flag in `BlockConfig` + enabled: Column, + // the index of block which the literal section is in + block_index: Column, + // Number of sequences decoded from the sequences section header in the block. + num_sequences: Column, +} + +impl SequenceConfig { + + #[cfg(test)] + pub fn mock_assign( + &self, + layouter: &mut impl Layouter, + seq_cfg: &SequenceInfo, + ) -> Result<(), Error>{ + + layouter.assign_region(||"seq cfg mock", + |mut region|{ + let mut offset = 0usize; + + for col in [self.enabled, self.block_index, self.num_sequences]{ + region.assign_advice(||"flush for non lookup", col, offset, ||Value::known(F::zero()))?; + } + + offset += 1; + for (col, val) in [ + (self.enabled, F::one()), + (self.block_index, F::from(seq_cfg.block_idx as u64)), + (self.num_sequences, F::from(seq_cfg.num_sequences as u64)), + ]{ + region.assign_advice(||"flush mock table", col, offset, ||Value::known(val))?; + } + + Ok(()) + } + ) + } + + /// construct table for rows: [enabled, blk_index, num_seq] + pub fn construct(cols: [Column;3]) -> Self { + Self { + enabled: cols[0], + block_index: cols[1], + num_sequences: cols[2], + } + } + + /// export the exps for literal copying lookup: [tag, blk_ind, byte_ind, char, padding] + pub fn lookup_tbl( + &self, + meta: &mut VirtualCells<'_, F> + ) -> [Expression; 3]{ + [ + meta.query_advice(self.enabled, Rotation::cur()), + meta.query_advice(self.block_index, Rotation::cur()), + meta.query_advice(self.num_sequences, Rotation::cur()), + ] + } +} + +/// The literal table which execution circuit expect to lookup from +#[derive(Clone)] +pub struct LiteralTable { + // the tag for current row in literal section + tag: Column, + // the index of block which the literal section is in + block_index: Column, + // the 1-indexed byte of byte of literal section's raw bytes + byte_index: Column, + // the corresponding char of current index + char: Column, + // the flag IN NEXT ROW is set to 1 indicate it is + // the last byte in current section + last_flag: Column, + // the flag should be 0 for a valid lookup row + padding_flag: Column, +} + +impl LiteralTable { + + #[cfg(test)] + pub fn mock_assign( + &self, + layouter: &mut impl Layouter, + literals: &[u64], + ) -> Result<(), Error>{ + + layouter.assign_region(||"literal tbl mock", + |mut region|{ + let mut offset = 0usize; + + for col in [self.tag, self.block_index, self.byte_index, self.char, self.last_flag, self.padding_flag]{ + region.assign_advice(||"flush for non lookup", col, offset, ||Value::known(F::zero()))?; + } + offset += 1; + // TODO: ensure the index in literal table is 0 or 1 indexed + for (i, char) in literals.iter().copied().enumerate() { + for (col, val) in [ + (self.tag, F::from(ZstdTag::ZstdBlockLiteralsRawBytes as u64)), + (self.block_index, F::one()), + (self.byte_index, F::from(i as u64 +1)), + (self.char, F::from(char)), + (self.last_flag, F::zero()), + (self.padding_flag, F::zero()), + ]{ + region.assign_advice(||"flush mock table", col, offset, ||Value::known(val))?; + } + offset += 1; + } + + for col in [self.byte_index, self.char, self.padding_flag]{ + region.assign_advice(||"flush dummy row for border", col, offset, ||Value::known(F::zero()))?; + } + region.assign_advice(||"set dummy border", self.tag, offset, ||Value::known(F::from(ZstdTag::ZstdBlockLiteralsRawBytes as u64)))?; + region.assign_advice(||"set dummy border", self.block_index, offset, ||Value::known(F::from(2 as u64)))?; + region.assign_advice(||"set dummy border", self.last_flag, offset, ||Value::known(F::one()))?; + + Ok(()) + } + ) + } + + /// construct table for rows: [tag, blk_index, byte_index, char, last, padding] + pub fn construct(cols: [Column;6]) -> Self { + Self { + tag: cols[0], + block_index: cols[1], + byte_index: cols[2], + char: cols[3], + last_flag: cols[4], + padding_flag: cols[5], + } + } + + /// export the exps for literal copying lookup: [tag, blk_ind, byte_ind, char, padding] + pub fn lookup_tbl_for_lit_cp( + &self, + meta: &mut VirtualCells<'_, F> + ) -> [Expression; 5]{ + [ + meta.query_advice(self.tag, Rotation::cur()), + meta.query_advice(self.block_index, Rotation::cur()), + meta.query_advice(self.byte_index, Rotation::cur()), + meta.query_advice(self.char, Rotation::cur()), + meta.query_advice(self.padding_flag, Rotation::cur()), + ] + } + + /// export the exps for literal size lookup: [tag, blk_ind, byte_ind, flag, padding] + pub fn lookup_tbl_for_lit_size( + &self, + meta: &mut VirtualCells<'_, F> + ) -> [Expression; 5]{ + [ + meta.query_advice(self.tag, Rotation::cur()), + meta.query_advice(self.block_index, Rotation::cur()), + meta.query_advice(self.byte_index, Rotation::cur()), + meta.query_advice(self.last_flag, Rotation::next()), + meta.query_advice(self.padding_flag, Rotation::cur()), + ] + } +} + +/// SeqExecConfig handling the sequences in each block and output the +/// decompressed bytes +#[derive(Clone)] +pub struct SeqExecConfig { + // active flag, one active row parse + q_enabled: Column, + // indicate the row above active region + q_head: Column, + // 1-index for each block, keep the same for each row + // until all sequenced has been handled + block_index: Column, + // the 1-indexed seq number (1..=n_seq) for each + // sequence. + seq_index: Column, + // the decoded length of output byte so it is start + // from 1 for the first output char + decoded_len: Column, + // the decoded byte under current index + decoded_byte: Column, + // the rlc of decoded output byte + decoded_rlc: Column, + /// An incremental accumulator of the number of bytes decoded so far. + decoded_len_acc: Column, + + // the flag indicate current seq is the special one + // (copying the rest bytes in literal section) + s_last_lit_cp_phase: Column, + // the flag indicate the execution is under + // "literal copying" phase + s_lit_cp_phase: Column, + // the flag indicate the execution is under + // back reference phase + s_back_ref_phase: Column, + // the copied index in literal section + literal_pos: Column, + // the back-ref pos + backref_pos: Column, + // counting the progress of back ref bytes + backref_progress: Column, + + // the flag indicate the execution has ended and rows + // are filled by padding data + is_padding: Expression, + // the flag exp indicate current row is the beginning + // of a new instruction, it is also the beginning of + // a literal copying + is_inst_begin: Expression, + // the flag indicate current row is the beginning of + // a new block + is_block_begin: Expression, +} + + +impl SeqExecConfig { + + /// Construct the sequence instruction table + /// the maxium rotation is prev(2), next(1) + pub fn configure( + meta: &mut ConstraintSystem, + challenges: &Challenges>, + literal_table: &LiteralTable, + inst_table: &SeqInstTable, + seq_config: &SequenceConfig, + ) -> Self { + let q_enabled = meta.fixed_column(); + let q_head = meta.fixed_column(); + let block_index = meta.advice_column(); + let seq_index = meta.advice_column(); + let decoded_len = meta.advice_column(); + let decoded_byte = meta.advice_column(); + let decoded_rlc = meta.advice_column_in(SecondPhase); + // TODO: constraint the len acc + let decoded_len_acc = meta.advice_column(); + let s_last_lit_cp_phase = meta.advice_column(); + let s_lit_cp_phase = meta.advice_column(); + let s_back_ref_phase = meta.advice_column(); + let backref_progress = meta.advice_column(); + let literal_pos = meta.advice_column(); + let backref_pos = meta.advice_column(); + + // need to constraint the final block index so + // we ensure all blocks has been handled + meta.enable_equality(block_index); + + // dummy init + let mut is_inst_begin = 0.expr(); + let mut is_block_begin = 0.expr(); + let mut is_padding = 0.expr(); + + meta.create_gate("borders", |meta|{ + let mut cb = BaseConstraintBuilder::default(); + + // boolean constraint that index is increment + cb.require_boolean("instruction border is boolean", is_inst_begin.expr()); + + is_block_begin = meta.query_advice(block_index, Rotation::cur()) + - meta.query_advice(block_index, Rotation::prev()); + + cb.require_boolean("block border is boolean", is_block_begin.expr()); + + is_inst_begin = select::expr( + is_block_begin.expr(), + 1.expr(), + meta.query_advice(seq_index, Rotation::cur()) + - meta.query_advice(seq_index, Rotation::prev()), + ); + + cb.require_boolean("inst border is boolean", is_inst_begin.expr()); + + cb.gate( + meta.query_fixed(q_enabled, Rotation::cur()) + ) + }); + + meta.create_gate("phases", |meta|{ + let mut cb = BaseConstraintBuilder::default(); + + let s_lit_cp_phase_next = meta.query_advice(s_lit_cp_phase, Rotation::next()); + let s_back_ref_phase_next = meta.query_advice(s_back_ref_phase, Rotation::next()); + let s_lit_cp_phase_prev = meta.query_advice(s_lit_cp_phase, Rotation::prev()); + let s_back_ref_phase_prev = meta.query_advice(s_back_ref_phase, Rotation::prev()); + let s_lit_cp_phase = meta.query_advice(s_lit_cp_phase, Rotation::cur()); + let s_back_ref_phase = meta.query_advice(s_back_ref_phase, Rotation::cur()); + + cb.require_boolean("phase is boolean", s_lit_cp_phase.expr()); + cb.require_boolean("phase is boolean", s_back_ref_phase.expr()); + + is_padding = 1.expr() - s_lit_cp_phase.expr() - s_back_ref_phase.expr(); + // constraint padding is boolean, so cp/back_ref phase is excluded + // i.e. two phases can not be enabled at the same time + cb.require_boolean("padding is boolean", is_padding.expr()); + + cb.condition(and::expr([ + not::expr(is_inst_begin.expr()), + not::expr(s_lit_cp_phase_prev.expr()), + ]), + |cb|{ + cb.require_equal("inside a inst, cp phase keep 0 once it changed to 0", + s_lit_cp_phase.expr(), + 0.expr(), + ); + }); + + cb.condition(and::expr([ + not::expr(is_inst_begin.expr()), + s_back_ref_phase_prev.expr(), + ]), + |cb|{ + cb.require_equal("inside a inst, backref phase keep 1 once it changed to 1", + s_back_ref_phase_prev.expr(), + 1.expr(), + ); + }); + + let is_padding_next = 1.expr() - s_lit_cp_phase_next.expr() - s_back_ref_phase_next.expr(); + cb.condition(is_padding.expr(), |cb|{ + cb.require_equal("padding never change once actived", + is_padding_next.expr(), + is_padding.expr(), + ); + }); + + cb.gate( + meta.query_fixed(q_enabled, Rotation::cur()) + ) + }); + + meta.create_gate("last literal cp phase", |meta|{ + let mut cb = BaseConstraintBuilder::default(); + + let s_last_lit_cp_phase_prev = meta.query_advice(s_last_lit_cp_phase, Rotation::prev()); + let s_last_lit_cp_phase = meta.query_advice(s_last_lit_cp_phase, Rotation::cur()); + cb.require_boolean("last lit_cp phase is boolean", s_last_lit_cp_phase.expr()); + + cb.condition(and::expr([ + s_last_lit_cp_phase.expr(), + not::expr(s_last_lit_cp_phase_prev.expr()), + ]), |cb|{ + cb.require_equal("phase can only be actived in inst border", + is_inst_begin.expr(), + 1.expr(), + ); + }); + + cb.condition(and::expr([ + s_last_lit_cp_phase_prev.expr(), + not::expr(is_block_begin.expr()), + ]), |cb|{ + cb.require_equal("phase must keep actived until block end", + s_last_lit_cp_phase_prev.expr(), + s_last_lit_cp_phase.expr(), + ); + }); + + cb.condition(s_last_lit_cp_phase.expr(), |cb|{ + cb.require_equal("lit cp must actived if last lit cp is actived", + meta.query_advice(s_lit_cp_phase, Rotation::cur()), + 1.expr(), + ); + }); + + cb.gate( + meta.query_fixed(q_enabled, Rotation::cur()) + ) + }); + + meta.create_gate("lit cp phase pos", |meta|{ + let mut cb = BaseConstraintBuilder::default(); + + let literal_pos_prev = meta.query_advice(literal_pos, Rotation::prev()); + let literal_pos = meta.query_advice(literal_pos, Rotation::cur()); + + let s_lit_cp_phase = meta.query_advice(s_lit_cp_phase, Rotation::cur()); + + let in_block_prog = select::expr( + s_lit_cp_phase.expr(), + literal_pos_prev.expr() + 1.expr(), + literal_pos_prev.expr(), + ); + cb.require_equal("lit cp is increment in one block", + select::expr( + is_block_begin.expr(), + // so we start at 1 if first row is lit cp + // or 0 if not + s_lit_cp_phase.expr(), + in_block_prog.expr(), + ), + literal_pos.expr(), + ); + + cb.gate( + meta.query_fixed(q_enabled, Rotation::cur()) + ) + }); + + meta.create_gate("backref phase pos", |meta|{ + let mut cb = BaseConstraintBuilder::default(); + + let backref_progress_prev = meta.query_advice(backref_progress, Rotation::prev()); + let backref_progress = meta.query_advice(backref_progress, Rotation::cur()); + + let s_back_ref_phase = meta.query_advice(s_back_ref_phase, Rotation::cur()); + + let back_ref_prog = select::expr( + s_back_ref_phase.expr(), + backref_progress_prev.expr() + 1.expr(), + backref_progress_prev.expr(), + ); + + cb.require_equal("backref progress is increment in one inst", + select::expr( + is_inst_begin.expr(), + // so we start at 1 if first row is lit cp + // or 0 if not + s_back_ref_phase.expr(), + back_ref_prog.expr(), + ), + backref_progress.expr(), + ); + + let backref_pos_prev = meta.query_advice(backref_pos, Rotation::prev()); + let backref_pos = meta.query_advice(backref_pos, Rotation::cur()); + + cb.condition( + not::expr(is_inst_begin.expr()), |cb|{ + cb.require_equal("backref position keep the same in one instruction", + backref_pos_prev.expr(), + backref_pos.expr(), + ); + } + ); + + cb.require_equal("backref progress keep the same in back ref phase", + select::expr( + is_inst_begin.expr(), + // so we start at 1 if first row is lit cp + // or 0 if not + s_back_ref_phase.expr(), + back_ref_prog.expr(), + ), + backref_pos.expr(), + ); + + cb.gate( + meta.query_fixed(q_enabled, Rotation::cur()) + ) + }); + + meta.create_gate("output and paddings", |meta|{ + let mut cb = BaseConstraintBuilder::default(); + + let decoded_len_prev = meta.query_advice(decoded_len, Rotation::prev()); + let decoded_rlc_prev = meta.query_advice(decoded_rlc, Rotation::prev()); + let decoded_len = meta.query_advice(decoded_len, Rotation::cur()); + let decoded_rlc = meta.query_advice(decoded_rlc, Rotation::cur()); + let decoded_byte = meta.query_advice(decoded_byte, Rotation::cur()); + + cb.require_equal( + "decoded len increase 1 in next row until paddings", + select::expr( + is_padding.expr(), + decoded_len_prev.expr(), + decoded_len_prev.expr() + 1.expr(), + ), + decoded_len.expr(), + ); + cb.condition( + is_padding.expr(), + |cb|cb.require_zero( + "while padding, byte is always zero", + decoded_byte.expr(), + ), + ); + + cb.require_equal("rlc accumulate", + decoded_rlc_prev.expr() * + (decoded_len.expr() - decoded_len_prev.expr()) + * challenges.evm_word() + decoded_byte.expr(), + decoded_rlc.expr(), + ); + + cb.gate(meta.query_fixed(q_head, Rotation::cur())) + }); + + // meta.create_gate("header", |meta|{ + // let mut cb = BaseConstraintBuilder::default(); + + // cb.gate(meta.query_fixed(q_head, Rotation::cur())) + // }); + + meta.lookup_any("the instruction from inst table", |meta|{ + + let q_enabled = meta.query_fixed(q_enabled, Rotation::prev()); + + let block_index = meta.query_advice(block_index, Rotation::prev()); + let seq_index = meta.query_advice(seq_index, Rotation::prev()); + let not_last_lit_cp = not::expr(meta.query_advice(s_last_lit_cp_phase, Rotation::prev())); + let literal_pos_at_inst_end = meta.query_advice(literal_pos, Rotation::prev()); + let backref_pos_at_inst_end = meta.query_advice(backref_pos, Rotation::prev()); + let backref_len_at_inst_end = meta.query_advice(backref_progress, Rotation::prev()); + + inst_table.instructions().into_iter().zip( + [ + block_index, + seq_index, + backref_pos_at_inst_end, + literal_pos_at_inst_end, + backref_len_at_inst_end + ] + ).map(|(lookup_col, src_expr)|{ + let lookup_expr = meta.query_advice(lookup_col, Rotation::cur()); + let src_expr = src_expr + * is_inst_begin.expr() + * not_last_lit_cp.expr() + * q_enabled.expr(); + assert!(src_expr.degree() <= 5); + (src_expr, lookup_expr) + }).collect() + }); + + meta.lookup_any("lit cp char", |meta|{ + let enabled = meta.query_fixed(q_enabled, Rotation::cur()) + * meta.query_advice(s_lit_cp_phase, Rotation::cur()); + + let block_index = meta.query_advice(block_index, Rotation::cur()); + let literal_pos = meta.query_advice(literal_pos, Rotation::cur()); + let cp_byte = meta.query_advice(decoded_byte, Rotation::cur()); + + let tbl_exprs = literal_table.lookup_tbl_for_lit_cp(meta); + tbl_exprs.into_iter().zip( + [ + ZstdTag::ZstdBlockLiteralsRawBytes.expr(), + block_index, + literal_pos, + cp_byte, + 0.expr(), + ] + ).map(|(lookup_expr, src_expr)|{ + (src_expr * enabled.expr(), lookup_expr) + }).collect() + }); + + meta.lookup_any("back ref char", |meta|{ + let enabled = meta.query_fixed(q_enabled, Rotation::cur()) + * meta.query_advice(s_back_ref_phase, Rotation::cur()); + + let block_index = meta.query_advice(block_index, Rotation::cur()); + let backref_pos = meta.query_advice(backref_pos, Rotation::cur()); + let cp_byte = meta.query_advice(decoded_byte, Rotation::cur()); + let decode_pos = meta.query_advice(decoded_len, Rotation::cur()); + let ref_pos = decode_pos.expr() - backref_pos.expr(); + + let tbl_exprs = [ + block_index.expr(), + decode_pos.expr(), + cp_byte.expr(), + ]; + tbl_exprs.into_iter().zip( + [ + block_index, + ref_pos, + cp_byte, + ] + ).map(|(lookup_expr, src_expr)|{ + (src_expr * enabled.expr(), lookup_expr) + }).collect() + }); + + meta.lookup_any("actual literal byte", |meta|{ + let q_enabled = meta.query_fixed(q_enabled, Rotation::prev()); + let block_index = meta.query_advice(block_index, Rotation::prev()); + let literal_pos_at_block_end = meta.query_advice(literal_pos, Rotation::prev()); + + let tbl_exprs = literal_table.lookup_tbl_for_lit_size(meta); + tbl_exprs.into_iter().zip( + [ + ZstdTag::ZstdBlockLiteralsRawBytes.expr(), + block_index, + literal_pos_at_block_end, + 1.expr(), + 0.expr(), + ] + ).map(|(lookup_expr, src_expr)|{ + (src_expr * is_block_begin.expr() * q_enabled.expr(), lookup_expr) + }).collect() + }); + + meta.lookup_any("instruction counts", |meta|{ + let q_enabled = meta.query_fixed(q_enabled, Rotation::prev()); + let block_index = meta.query_advice(block_index, Rotation::prev()); + let seq_index_at_block_end = + meta.query_advice(seq_index, Rotation::prev()) + // if we have a additional literal copying phase, we + // in fact has one extra instruction + - meta.query_advice(s_last_lit_cp_phase, Rotation::prev()); + + seq_config.lookup_tbl(meta).into_iter().zip( + [ + 1.expr(), + block_index, + seq_index_at_block_end, + ] + ).map(|(lookup_expr, src_expr)|{ + (src_expr * is_block_begin.expr() * q_enabled.expr(), lookup_expr) + }).collect() + }); + + Self { + q_enabled, + q_head, + block_index, + seq_index, + decoded_len, + decoded_byte, + decoded_rlc, + decoded_len_acc, + s_last_lit_cp_phase, + s_lit_cp_phase, + s_back_ref_phase, + backref_progress, + literal_pos, + backref_pos, + is_padding, + is_inst_begin, + is_block_begin, + } + } + + /// fill the rest region with padding rows + pub fn paddings<'a>( + &self, + region: &mut Region, + offset: usize, + till_offset: usize, + decoded_len: usize, + decoded_rlc: Value, + padded_block_ind: u64, + ) -> Result<(), Error>{ + + for offset in offset..=till_offset { + // flush one more row for rotation next() + if offset != till_offset { + region.assign_fixed( + ||"enable padding row", + self.q_enabled, + offset, + ||Value::known(F::one()) + )?; + } + + for (col, val) in [ + (self.block_index, Value::known(F::from(padded_block_ind))), + (self.decoded_len, Value::known(F::from(decoded_len as u64))), + (self.decoded_rlc, decoded_rlc), + ]{ + region.assign_advice(||"set padding rows", + col, + offset, + ||val, + )?; + } + + + for col in [ + self.decoded_byte, + self.s_last_lit_cp_phase, + self.s_lit_cp_phase, + self.s_back_ref_phase, + self.backref_pos, + self.backref_progress, + self.literal_pos, + self.seq_index, + ] { + region.assign_advice(||"flush padding rows", + col, + offset, + ||Value::known(F::zero()), + )?; + } + } + + Ok(()) + + } + + /// assign a single block from current offset / byte decompression + /// progress and return the offset / progress below the last used row + pub fn assign_block<'a>( + &self, + region: &mut Region, + chng: Value, + mut offset: usize, + mut decoded_len: usize, + mut decoded_rlc: Value, + seq_info: &SequenceInfo, + seq_exec_infos: impl Iterator, + literals: &[u64], + // all of the decompressed bytes, not only current block + decompressed_bytes: &[u8], + ) -> Result<(usize, usize, Value), Error>{ + + let block_ind = seq_info.block_idx; + let mut cur_literal_cp = 0usize; + let last_exec = SequenceExec(seq_info.num_sequences+1, SequenceExecInfo::LastLiteralCopy); + + for SequenceExec(inst_ind, exec_info) in seq_exec_infos + .map(|v|v) // a trick to handle the lifetime issue + .chain(std::iter::once(&last_exec)) { + + let base_rows = [ + (self.block_index, F::from(block_ind as u64)), + (self.seq_index, F::from(*inst_ind as u64)), + ( + self.s_last_lit_cp_phase, + if *inst_ind > seq_info.num_sequences { + F::one() + }else { + F::zero() + }, + ), + ]; + + let (is_literal, r) = match exec_info { + SequenceExecInfo::LiteralCopy(r) => { + assert_eq!(cur_literal_cp, r.start); + cur_literal_cp = r.end; + (true, r.clone()) + }, + SequenceExecInfo::BackRef(r) => (false, r.clone()), + SequenceExecInfo::LastLiteralCopy => + (true, cur_literal_cp..literals.len()), + }; + + for (i, pos) in r.clone().enumerate() { + decoded_len += 1; + let out_byte = F::from( + if is_literal { + literals[pos as usize] + } else { + decompressed_bytes[pos as usize] as u64 + } + ); + decoded_rlc = decoded_rlc * chng + Value::known(out_byte); + + println!("set row at {}, output {}:{:x}", offset, decoded_len, out_byte.get_lower_32()); + + region.assign_advice( + ||"set output region", + self.decoded_rlc, offset, + ||decoded_rlc, + )?; + + let decodes = [ + ( + self.decoded_len, + F::from(decoded_len as u64), + + ), + ( + self.decoded_byte, + out_byte, + ), + ]; + + for (col, val) in base_rows.clone() + .into_iter() + .chain(decodes) + .chain( + if is_literal { + println!("literal cp {}-{}-{}", pos+1, 0, 0); + [ + (self.s_lit_cp_phase, F::one()), + (self.s_back_ref_phase, F::zero()), + (self.literal_pos, F::from(pos as u64+1)), + (self.backref_pos, F::zero()), + (self.backref_progress, F::zero()), + ] + } else { + println!("backref cp {}-{}-{}", cur_literal_cp, pos - i, i); + [ + (self.s_lit_cp_phase, F::one()), + (self.s_back_ref_phase, F::zero()), + (self.literal_pos, F::from(cur_literal_cp as u64)), + (self.backref_pos, F::from((pos - i) as u64)), + (self.backref_progress, F::from(i as u64)), + ] + } + ){ + region.assign_advice( + ||"set output region", + col, offset, + ||Value::known(val), + )?; + + } + + region.assign_fixed( + ||"enable row", + self.q_enabled, + offset, + ||Value::known(F::one()) + )?; + offset += 1; + } + } + + Ok((offset, decoded_len, decoded_rlc)) + } + + /// assign the top row + pub fn init_top_row( + &self, + region: &mut Region, + from_offset: Option, + ) -> Result{ + let offset = from_offset.unwrap_or_default(); + + for col in [ + self.decoded_byte, + self.decoded_len, + self.decoded_rlc, + self.block_index, + self.seq_index, + self.s_back_ref_phase, + self.s_lit_cp_phase, + self.s_back_ref_phase, + self.backref_pos, + self.literal_pos, + self.backref_progress, + ] { + region.assign_advice(||"top row fluash", col, offset, ||Value::known(F::zero()))?; + } + + Ok(offset+1) + } + + #[cfg(test)] + pub fn mock_assign( + &self, + layouter: &mut impl Layouter, + chng: &Challenges>, + n_seq: usize, + seq_exec_infos: &[SequenceExec], + literals: &[u8], + // all of the decompressed bytes, not only current block + decompressed_bytes: &[u8], + enabled_rows: usize, + ) -> Result<(), Error>{ + + let literals = literals.iter().copied().map(|b|b as u64).collect::>(); + + layouter.assign_region( + || "output region", + |mut region|{ + + let offset = self.init_top_row(&mut region, None)?; + let (offset, decoded_len, decoded_rlc) = self.assign_block( + &mut region, + chng.evm_word(), + offset, + 0, + Value::known(F::zero()), + &SequenceInfo { + block_idx: 1, + num_sequences: n_seq, + ..Default::default() + }, + seq_exec_infos.iter(), + &literals, + decompressed_bytes + )?; + self.paddings(&mut region, + offset, + enabled_rows, + decoded_len, + decoded_rlc, + 2 + )?; + + Ok(()) + } + ) + } + +} + + +#[cfg(test)] +mod tests { + + use halo2_proofs::{ + circuit::SimpleFloorPlanner, + dev::MockProver, + halo2curves::bn256::Fr, + plonk::Circuit, + }; + use super::*; + use witgen::AddressTableRow; + use zkevm_circuits::util::MockChallenges; + + #[derive(Clone, Debug)] + struct SeqExecMock { + outputs: Vec, + literal: Vec, + seq_conf: SequenceInfo, + insts: Vec, + exec_trace: Vec, + } + + #[derive(Clone)] + struct SeqExecMockConfig { + config: SeqExecConfig, + inst_tbl: SeqInstTable, + literal_tbl: LiteralTable, + seq_cfg: SequenceConfig, + chng_mock: MockChallenges, + } + + impl Circuit for SeqExecMock { + type Config = SeqExecMockConfig; + type FloorPlanner = SimpleFloorPlanner; + fn without_witnesses(&self) -> Self { + unimplemented!() + } + + fn configure(meta: &mut ConstraintSystem) -> Self::Config { + + let const_col = meta.fixed_column(); + meta.enable_constant(const_col); + + let literal_tbl = LiteralTable::construct( + [0;6].map(|_|meta.advice_column()) + ); + + let seq_cfg = SequenceConfig::construct( + [0;3].map(|_|meta.advice_column()) + ); + + let inst_tbl = SeqInstTable::configure(meta); + + let chng_mock = MockChallenges::construct(meta); + let chng = chng_mock.exprs(meta); + + let config = SeqExecConfig::configure(meta, &chng, &literal_tbl, &inst_tbl, &seq_cfg); + + Self::Config{ + config, + literal_tbl, + inst_tbl, + seq_cfg, + chng_mock, + } + } + + fn synthesize( + &self, + config: Self::Config, + mut layouter: impl Layouter, + ) -> Result<(), Error> { + + config.literal_tbl.mock_assign(&mut layouter, + self.literal.iter().copied() + .map(|b|b as u64).collect::>().as_slice())?; + + config.seq_cfg.mock_assign(&mut layouter, &self.seq_conf)?; + + config.inst_tbl.mock_assign(&mut layouter, &self.insts, 15)?; + + let chng_val = config.chng_mock.values(&mut layouter); + + config.config.mock_assign( + &mut layouter, + &chng_val, + self.insts.len(), + &self.exec_trace, + &self.literal, + &self.outputs, + 50, + )?; + + Ok(()) + } + } + + fn build_table_row(samples: &[[u64;5]]) -> Vec { + let mut ret = Vec::::new(); + + for sample in samples { + let mut new_item = AddressTableRow { + cooked_match_offset: sample[0], + literal_length: sample[1], + repeated_offset1: sample[2], + repeated_offset2: sample[3], + repeated_offset3: sample[4], + actual_offset: sample[2], + ..Default::default() + }; + + if let Some(old_item) = ret.last() { + new_item.instruction_idx = old_item.instruction_idx + 1; + new_item.literal_length_acc = old_item.literal_length_acc + sample[1]; + } else { + new_item.literal_length_acc = sample[1]; + } + + ret.push(new_item); + } + + ret + } + + #[test] + fn seq_exec_literal_only(){ + + // no instructions, we only copy literals to output + let circuit = SeqExecMock{ + outputs: Vec::from("abcd".as_bytes()), + literal: Vec::from("abcd".as_bytes()), + seq_conf: SequenceInfo { + num_sequences: 0, + block_idx: 1, + ..Default::default() + }, + insts: Vec::new(), + exec_trace: Vec::new(), + }; + + let k = 12; + let mock_prover = MockProver::::run(k, &circuit, vec![]).expect("failed to run mock prover"); + mock_prover.verify().unwrap(); + + } +} \ No newline at end of file diff --git a/aggregator/src/aggregation/decoder/tables.rs b/aggregator/src/aggregation/decoder/tables.rs index dbc08b22f4..0957cf238f 100644 --- a/aggregator/src/aggregation/decoder/tables.rs +++ b/aggregator/src/aggregation/decoder/tables.rs @@ -12,6 +12,9 @@ pub use fse::FseTable; mod literals_header; pub use literals_header::LiteralsHeaderTable; +mod seqinst_table; +/// Input for validating the sequence instruction comes from the parsed value +pub use seqinst_table::SeqInstTable; /// Fixed lookup table and its variants. mod fixed; pub use fixed::{predefined_fse, FixedLookupTag, FixedTable, PredefinedFse}; diff --git a/aggregator/src/aggregation/decoder/tables/bitstring.rs b/aggregator/src/aggregation/decoder/tables/bitstring.rs index 1ab3bb5539..b89fb35838 100644 --- a/aggregator/src/aggregation/decoder/tables/bitstring.rs +++ b/aggregator/src/aggregation/decoder/tables/bitstring.rs @@ -1,6 +1,6 @@ use gadgets::util::{and, not, select, Expr}; use halo2_proofs::{ - circuit::Layouter, + circuit::{Layouter, Value}, halo2curves::bn256::Fr, plonk::{Advice, Any, Column, ConstraintSystem, Error, Expression, Fixed}, poly::Rotation, @@ -10,7 +10,11 @@ use zkevm_circuits::{ table::{LookupTable, RangeTable, U8Table}, }; -use crate::aggregation::decoder::witgen::ZstdWitnessRow; +use crate::aggregation::decoder::{ + util::value_bits_le, + witgen::{ZstdTag, ZstdWitnessRow}, + BlockInfo, +}; /// In the process of decoding zstd encoded data, there are several scenarios in which we process /// bits instead of bytes, for instance: @@ -104,7 +108,11 @@ pub struct BitstringTable { impl BitstringTable { /// Construct the bitstring accumulation table. - pub fn configure(meta: &mut ConstraintSystem, u8_table: U8Table) -> Self { + pub fn configure( + meta: &mut ConstraintSystem, + q_enable: Column, + u8_table: U8Table, + ) -> Self { let config = Self { q_first: meta.fixed_column(), byte_idx_1: meta.advice_column(), @@ -127,6 +135,7 @@ impl BitstringTable { meta.create_gate("BitstringTable: bit_index == 0", |meta| { let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), meta.query_fixed(config.q_start, Rotation::cur()), not::expr(meta.query_advice(config.is_padding, Rotation::cur())), ]); @@ -137,11 +146,6 @@ impl BitstringTable { .map(|i| meta.query_advice(config.bit, Rotation(i))) .collect::>>(); - let (byte_idx_1, byte_idx_2, byte_idx_3) = ( - meta.query_advice(config.byte_idx_1, Rotation::cur()), - meta.query_advice(config.byte_idx_2, Rotation::cur()), - meta.query_advice(config.byte_idx_3, Rotation::cur()), - ); let (byte_1, byte_2, byte_3) = ( meta.query_advice(config.byte_1, Rotation::cur()), meta.query_advice(config.byte_2, Rotation::cur()), @@ -226,17 +230,20 @@ impl BitstringTable { ); // from_start initialises at 1 + /* cb.require_equal( "if bit_index == 0: from_start == 1", meta.query_advice(config.from_start, Rotation::cur()), 1.expr(), ); + */ cb.gate(condition) }); meta.create_gate("BitstringTable: bit_index > 0", |meta| { let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), not::expr(meta.query_fixed(config.q_start, Rotation::cur())), not::expr(meta.query_advice(config.is_padding, Rotation::cur())), ]); @@ -271,7 +278,10 @@ impl BitstringTable { }); meta.create_gate("BitstringTable: bitstring_value accumulation", |meta| { - let condition = not::expr(meta.query_advice(config.is_padding, Rotation::cur())); + let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), + not::expr(meta.query_advice(config.is_padding, Rotation::cur())), + ]); let mut cb = BaseConstraintBuilder::default(); @@ -298,6 +308,7 @@ impl BitstringTable { let delta = meta.query_advice(config.until_end, Rotation::next()) - meta.query_advice(config.until_end, Rotation::cur()); + /* cb.condition(is_end.expr(), |cb| { cb.require_equal( "if bit_index == 23: until_end == 1", @@ -305,6 +316,7 @@ impl BitstringTable { 1.expr(), ); }); + */ cb.condition(not::expr(is_end.expr()), |cb| { cb.require_boolean("until_end delta is boolean", delta); }); @@ -372,7 +384,10 @@ impl BitstringTable { }); meta.create_gate("BitstringTable: first row", |meta| { - let condition = meta.query_fixed(config.q_first, Rotation::cur()); + let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), + meta.query_fixed(config.q_first, Rotation::cur()), + ]); let mut cb = BaseConstraintBuilder::default(); @@ -385,7 +400,10 @@ impl BitstringTable { }); meta.create_gate("BitstringTable: padding", |meta| { - let condition = not::expr(meta.query_fixed(config.q_first, Rotation::cur())); + let condition = and::expr([ + not::expr(meta.query_fixed(config.q_first, Rotation::cur())), + meta.query_fixed(q_enable, Rotation::cur()), + ]); let mut cb = BaseConstraintBuilder::default(); @@ -404,6 +422,7 @@ impl BitstringTable { cb.gate(condition) }); + // witgen_debug // For every bitstring accumulation, the byte indices must be in the order in which // they appear in the rows assigned to the DecoderConfig. Which means: // - byte_idx_2 at the most increments by 1 compared to byte_idx_1. @@ -419,6 +438,7 @@ impl BitstringTable { // TODO: for a multi-block setup, the difference may be greater than 255. meta.lookup("BitstringTable: byte_idx_1 is increasing", |meta| { let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), meta.query_fixed(config.q_start, Rotation::cur()), not::expr(meta.query_fixed(config.q_first, Rotation::cur())), not::expr(meta.query_advice(config.is_padding, Rotation::cur())), @@ -442,11 +462,273 @@ impl BitstringTable { pub fn assign( &self, layouter: &mut impl Layouter, + block_info_arr: &Vec, witness_rows: &[ZstdWitnessRow], + k: u32, ) -> Result<(), Error> { - unimplemented!(); - - Ok(()) + assert!(!witness_rows.is_empty()); + + layouter.assign_region( + || "Bitstring Accumulation Table", + |mut region| { + let mut offset: usize = 0; + region.assign_fixed(|| "q_first", self.q_first, 0, || Value::known(Fr::one()))?; + + for i in 0..(((1 << k) - 30) / 24) { + for bit_idx in 0..24 { + region.assign_fixed( + || "bit_index", + self.bit_index, + 24 * i + bit_idx, + || Value::known(Fr::from(bit_idx as u64)), + )?; + } + region.assign_fixed( + || "q_start", + self.q_start, + 24 * i, + || Value::known(Fr::one()), + )?; + } + + // Multi-block assignment + for block in block_info_arr { + // Fse decoding rows + let fse_position = witness_rows + .iter() + .position(|r| { + r.state.block_idx == (block.block_idx as u64) + && r.state.tag == ZstdTag::ZstdBlockSequenceFseCode + }) + .unwrap(); + let mut fse_rows = witness_rows + .iter() + .filter(|&r| { + r.state.block_idx == (block.block_idx as u64) + && r.state.tag == ZstdTag::ZstdBlockSequenceFseCode + }) + .map(|r| { + ( + r.encoded_data.byte_idx as usize, + r.encoded_data.value_byte as u64, + r.bitstream_read_data.bit_start_idx, + r.bitstream_read_data.bit_end_idx, + r.bitstream_read_data.bit_value, + r.state.tag.is_reverse() as u64, + ) + }) + .collect::>(); + // Append 2 more witness rows to accommodate the 3-bytes chunk for the last + // FseCode row. + let fse_rows_len = fse_rows.len(); + fse_rows.extend_from_slice( + witness_rows + .iter() + .skip(fse_position + fse_rows_len) + .take(2) + .map(|r| { + ( + r.encoded_data.byte_idx as usize, + r.encoded_data.value_byte as u64, + r.bitstream_read_data.bit_start_idx, + r.bitstream_read_data.bit_end_idx, + r.bitstream_read_data.bit_value, + r.state.tag.is_reverse() as u64, + ) + }) + .collect::>() + .as_slice(), + ); + + // Sequence data rows + let sequence_data_position = witness_rows + .iter() + .position(|r| { + r.state.block_idx == (block.block_idx as u64) + && r.state.tag == ZstdTag::ZstdBlockSequenceData + }) + .unwrap(); + let mut sequence_data_rows = witness_rows + .iter() + .filter(|&r| { + r.state.block_idx == (block.block_idx as u64) + && r.state.tag == ZstdTag::ZstdBlockSequenceData + }) + .map(|r| { + ( + r.encoded_data.byte_idx as usize, + r.encoded_data.value_byte as u64, + r.bitstream_read_data.bit_start_idx, + r.bitstream_read_data.bit_end_idx, + r.bitstream_read_data.bit_value, + r.state.tag.is_reverse() as u64, + ) + }) + .collect::>(); + // Append 2 more witness rows to accommodate the 3-bytes chunk for the last + // FseCode row. + let sequence_data_rows_len = sequence_data_rows.len(); + sequence_data_rows.extend_from_slice( + witness_rows + .iter() + .skip(sequence_data_position + sequence_data_rows_len) + .take(2) + .map(|r| { + ( + r.encoded_data.byte_idx as usize, + r.encoded_data.value_byte as u64, + r.bitstream_read_data.bit_start_idx, + r.bitstream_read_data.bit_end_idx, + r.bitstream_read_data.bit_value, + r.state.tag.is_reverse() as u64, + ) + }) + .collect::>() + .as_slice(), + ); + + for rows in [fse_rows, sequence_data_rows].into_iter() { + for grouped_rows in rows.windows(3) { + let curr_row = grouped_rows[0].clone(); + + let byte_idx_1 = grouped_rows[0].0; + let byte_idx_2 = grouped_rows[1].0; + let byte_idx_3 = grouped_rows[2].0; + let byte_1 = grouped_rows[0].1; + let byte_2 = grouped_rows[1].1; + let byte_3 = grouped_rows[2].1; + + let byte_1_bits = value_bits_le(byte_1 as u8); + let byte_2_bits = value_bits_le(byte_2 as u8); + let byte_3_bits = value_bits_le(byte_3 as u8); + + let bits = if curr_row.5 > 0 { + // reversed + [ + byte_1_bits.into_iter().rev().collect::>(), + byte_2_bits.into_iter().rev().collect::>(), + byte_3_bits.into_iter().rev().collect::>(), + ] + .concat() + } else { + // not reversed + [byte_1_bits, byte_2_bits, byte_3_bits].concat() + }; + + let mut acc: u64 = 0; + let mut bitstring_len: u64 = 0; + + for (bit_idx, bit) in bits.into_iter().enumerate().take(24) { + region.assign_advice( + || "byte_idx_1", + self.byte_idx_1, + offset + bit_idx, + || Value::known(Fr::from(byte_idx_1 as u64)), + )?; + region.assign_advice( + || "byte_idx_2", + self.byte_idx_2, + offset + bit_idx, + || Value::known(Fr::from(byte_idx_2 as u64)), + )?; + region.assign_advice( + || "byte_idx_3", + self.byte_idx_3, + offset + bit_idx, + || Value::known(Fr::from(byte_idx_3 as u64)), + )?; + region.assign_advice( + || "byte_1", + self.byte_1, + offset + bit_idx, + || Value::known(Fr::from(byte_1 as u64)), + )?; + region.assign_advice( + || "byte_2", + self.byte_2, + offset + bit_idx, + || Value::known(Fr::from(byte_2 as u64)), + )?; + region.assign_advice( + || "byte_3", + self.byte_3, + offset + bit_idx, + || Value::known(Fr::from(byte_3 as u64)), + )?; + + if bit_idx >= curr_row.2 && bit_idx <= curr_row.3 { + acc = acc * 2 + (bit as u64); + bitstring_len += 1; + } + region.assign_advice( + || "bit", + self.bit, + offset + bit_idx, + || Value::known(Fr::from(bit as u64)), + )?; + region.assign_advice( + || "bitstring_value", + self.bitstring_value, + offset + bit_idx, + || Value::known(Fr::from(curr_row.4 as u64)), + )?; + region.assign_advice( + || "bitstring_value_acc", + self.bitstring_value_acc, + offset + bit_idx, + || Value::known(Fr::from(acc)), + )?; + region.assign_advice( + || "bitstring_len", + self.bitstring_len, + offset + bit_idx, + || Value::known(Fr::from(bitstring_len)), + )?; + region.assign_advice( + || "from_start", + self.from_start, + offset + bit_idx, + || Value::known(Fr::from((bit_idx <= curr_row.3) as u64)), + )?; + region.assign_advice( + || "until_end", + self.until_end, + offset + bit_idx, + || Value::known(Fr::from((bit_idx >= curr_row.2) as u64)), + )?; + region.assign_advice( + || "is_reverse", + self.is_reverse, + offset + bit_idx, + || Value::known(Fr::from(curr_row.5 as u64)), + )?; + } + + offset += 24; + } + } + } + + for idx in 0..offset { + region.assign_advice( + || "is_padding", + self.is_padding, + idx, + || Value::known(Fr::zero()), + )?; + } + for idx in offset..((1 << k) - 30) { + region.assign_advice( + || "is_padding", + self.is_padding, + idx, + || Value::known(Fr::one()), + )?; + } + + Ok(()) + }, + ) } } diff --git a/aggregator/src/aggregation/decoder/tables/fixed/seq_data_interleaved_order.rs b/aggregator/src/aggregation/decoder/tables/fixed/seq_data_interleaved_order.rs index 4e79fdb227..0ded3077bb 100644 --- a/aggregator/src/aggregation/decoder/tables/fixed/seq_data_interleaved_order.rs +++ b/aggregator/src/aggregation/decoder/tables/fixed/seq_data_interleaved_order.rs @@ -27,13 +27,13 @@ impl FixedLookupValues for RomSeqDataInterleavedOrder { Value::known(Fr::zero()), // table_kind_prev Value::known(Fr::from(LLT as u64)), // table_kind_curr Value::known(Fr::one()), // is_init_state - Value::known(Fr::zero()), // is_update_state + Value::known(Fr::one()), // is_update_state Value::known(Fr::zero()), Value::known(Fr::zero()), ]], [ - (LLT, MOT, true, false), // init state (MOT) - (MOT, MLT, true, false), // init state (MLT) + (LLT, MOT, true, true), // init state (MOT) + (MOT, MLT, true, true), // init state (MLT) (MLT, MOT, false, false), (MOT, MLT, false, false), (MLT, LLT, false, false), diff --git a/aggregator/src/aggregation/decoder/tables/fixed/seq_tag_order.rs b/aggregator/src/aggregation/decoder/tables/fixed/seq_tag_order.rs index a63620b958..5fed286498 100644 --- a/aggregator/src/aggregation/decoder/tables/fixed/seq_tag_order.rs +++ b/aggregator/src/aggregation/decoder/tables/fixed/seq_tag_order.rs @@ -49,7 +49,7 @@ impl FixedLookupValues for RomSeqTagOrder { fn values() -> Vec<[Value; 7]> { use FseTableKind::{LLT, MLT, MOT}; use ZstdTag::{ - ZstdBlockFseCode as FseCode, ZstdBlockLstream as SeqData, + ZstdBlockSequenceData as SeqData, ZstdBlockSequenceFseCode as FseCode, ZstdBlockSequenceHeader as SeqHeader, }; diff --git a/aggregator/src/aggregation/decoder/tables/fixed/tag_transition.rs b/aggregator/src/aggregation/decoder/tables/fixed/tag_transition.rs index 67dd3f8ea1..3a2235efce 100644 --- a/aggregator/src/aggregation/decoder/tables/fixed/tag_transition.rs +++ b/aggregator/src/aggregation/decoder/tables/fixed/tag_transition.rs @@ -1,6 +1,6 @@ use halo2_proofs::{circuit::Value, halo2curves::bn256::Fr}; -use crate::aggregation::decoder::{tables::fixed::FixedLookupTag, witgen::ZstdTag}; +use crate::aggregation::decoder::{tables::fixed::FixedLookupTag, witgen::{ZstdTag, lookup_max_tag_len}}; use super::FixedLookupValues; @@ -23,22 +23,27 @@ impl FixedLookupValues for RomTagTransition { fn values() -> Vec<[Value; 7]> { use ZstdTag::{ BlockHeader, FrameContentSize, FrameHeaderDescriptor, ZstdBlockLiteralsHeader, - ZstdBlockLiteralsRawBytes, ZstdBlockSequenceHeader, + ZstdBlockLiteralsRawBytes, ZstdBlockSequenceHeader, ZstdBlockSequenceFseCode, ZstdBlockSequenceData, Null, }; [ - (FrameHeaderDescriptor, FrameContentSize, 1), - (FrameContentSize, BlockHeader, 8), - (BlockHeader, ZstdBlockLiteralsHeader, 3), - (ZstdBlockLiteralsHeader, ZstdBlockLiteralsRawBytes, 5), - (ZstdBlockLiteralsRawBytes, ZstdBlockSequenceHeader, 1048575), // (1 << 20) - 1 + (FrameHeaderDescriptor, FrameContentSize), + (FrameContentSize, BlockHeader), + (BlockHeader, ZstdBlockLiteralsHeader), + (ZstdBlockLiteralsHeader, ZstdBlockLiteralsRawBytes), + (ZstdBlockLiteralsRawBytes, ZstdBlockSequenceHeader), + (ZstdBlockSequenceHeader, ZstdBlockSequenceFseCode), + (ZstdBlockSequenceHeader, ZstdBlockSequenceData), + (ZstdBlockSequenceFseCode, ZstdBlockSequenceFseCode), + (ZstdBlockSequenceFseCode, ZstdBlockSequenceData), + (ZstdBlockSequenceData, Null), ] - .map(|(tag, tag_next, max_len)| { + .map(|(tag, tag_next)| { [ Value::known(Fr::from(FixedLookupTag::TagTransition as u64)), Value::known(Fr::from(tag as u64)), Value::known(Fr::from(tag_next as u64)), - Value::known(Fr::from(max_len)), + Value::known(Fr::from(lookup_max_tag_len(tag))), Value::known(Fr::from(tag.is_output())), Value::known(Fr::from(tag.is_reverse())), Value::known(Fr::from(tag.is_block())), diff --git a/aggregator/src/aggregation/decoder/tables/fse.rs b/aggregator/src/aggregation/decoder/tables/fse.rs index 72f71c0895..d373c40073 100644 --- a/aggregator/src/aggregation/decoder/tables/fse.rs +++ b/aggregator/src/aggregation/decoder/tables/fse.rs @@ -1,10 +1,11 @@ use gadgets::{ - is_equal::{IsEqualChip, IsEqualConfig}, + is_equal::{IsEqualChip, IsEqualConfig, IsEqualInstruction}, util::{and, not, select, Expr}, }; use halo2_proofs::{ + circuit::{Layouter, Value}, halo2curves::bn256::Fr, - plonk::{Advice, Column, ConstraintSystem, Expression, Fixed, VirtualCells}, + plonk::{Advice, Column, ConstraintSystem, Error, Expression, Fixed, VirtualCells}, poly::Rotation, }; use itertools::Itertools; @@ -16,6 +17,7 @@ use zkevm_circuits::{ use crate::aggregation::decoder::{ tables::{FixedLookupTag, FixedTable}, witgen::FseTableKind, + FseAuxiliaryTableData, ZstdWitnessRow, }; /// The FSE table verifies that given the symbols and the states allocated to those symbols, the @@ -126,6 +128,7 @@ impl FseTable { /// Configure the FSE table. pub fn configure( meta: &mut ConstraintSystem, + q_enable: Column, fixed_table: &FixedTable, u8_table: U8Table, range8_table: RangeTable<8>, @@ -134,7 +137,7 @@ impl FseTable { ) -> Self { // Auxiliary table to validate that (baseline, nb) were assigned correctly to the states // allocated to a symbol. - let sorted_table = FseSortedStatesTable::configure(meta, pow2_table, u8_table); + let sorted_table = FseSortedStatesTable::configure(meta, q_enable, pow2_table, u8_table); let config = Self { sorted_table, @@ -157,6 +160,7 @@ impl FseTable { // - table_size_rs_3 == table_size >> 3. meta.lookup("FseTable: table_size >> 3", |meta| { let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), meta.query_fixed(config.sorted_table.q_start, Rotation::cur()), not::expr(meta.query_advice(config.is_padding, Rotation::cur())), ]); @@ -169,36 +173,14 @@ impl FseTable { // Every FSE symbol is a byte. meta.lookup("FseTable: symbol in [0, 256)", |meta| { + let condition = meta.query_fixed(q_enable, Rotation::cur()); + vec![( - meta.query_advice(config.symbol, Rotation::cur()), + condition * meta.query_advice(config.symbol, Rotation::cur()), u8_table.into(), )] }); - // The first row of the FseTable layout, i.e. q_first=true. - meta.create_gate("FseTable: first row", |meta| { - let condition = meta.query_fixed(config.sorted_table.q_first, Rotation::cur()); - - let mut cb = BaseConstraintBuilder::default(); - - // The first row is all 0s. This is then followed by a q_start==1 fixed column. We want - // to make sure the first FSE table belongs to block_idx=1. - cb.require_equal( - "block_idx == 1 for the first FSE table", - meta.query_advice(config.sorted_table.block_idx, Rotation::next()), - 1.expr(), - ); - - // The first FSE table described should be the LLT table. - cb.require_equal( - "table_kind == LLT for the first FSE table", - meta.query_advice(config.sorted_table.table_kind, Rotation::next()), - FseTableKind::LLT.expr(), - ); - - cb.gate(condition) - }); - // Check that on the starting row of every FSE table, i.e. q_start=true: // // - tuple (block_idx::prev, block_idx::cur, table_kind::prev, table_kind::cur) @@ -209,6 +191,7 @@ impl FseTable { "FseSortedStatesTable: start row (ROM block_idx and table_kind transition)", |meta| { let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), meta.query_fixed(config.sorted_table.q_start, Rotation::cur()), not::expr(meta.query_advice(config.is_padding, Rotation::cur())), ]); @@ -239,6 +222,7 @@ impl FseTable { // The starting row of every FSE table, i.e. q_start=true. meta.create_gate("FseTable: start row", |meta| { let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), meta.query_fixed(config.sorted_table.q_start, Rotation::cur()), not::expr(meta.query_advice(config.is_padding, Rotation::cur())), ]); @@ -290,7 +274,10 @@ impl FseTable { // For every symbol that has a normalised probability prob=-1. meta.lookup_any("FseTable: all symbols with prob=-1 (nb==AL)", |meta| { - let condition = meta.query_advice(config.is_prob_less_than1, Rotation::cur()); + let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), + meta.query_advice(config.is_prob_less_than1, Rotation::cur()), + ]); // for a symbol with prob=-1, we do a full state reset, i.e. // read nb=AL bits, i.e. 1 << nb == table_size. @@ -306,7 +293,10 @@ impl FseTable { // For every symbol that has a normalised probability prob=-1. meta.create_gate("FseTable: all symbols with prob=-1", |meta| { - let condition = meta.query_advice(config.is_prob_less_than1, Rotation::cur()); + let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), + meta.query_advice(config.is_prob_less_than1, Rotation::cur()), + ]); let mut cb = BaseConstraintBuilder::default(); @@ -343,6 +333,7 @@ impl FseTable { "FseTable: subsequent symbols with prob=-1 (symbol increasing)", |meta| { let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), not::expr(meta.query_fixed(config.sorted_table.q_start, Rotation::cur())), meta.query_advice(config.is_prob_less_than1, Rotation::cur()), ]); @@ -369,6 +360,7 @@ impl FseTable { "FseTable: subsequent symbols with prob=-1 (state retreating)", |meta| { let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), not::expr(meta.query_fixed(config.sorted_table.q_start, Rotation::cur())), meta.query_advice(config.is_prob_less_than1, Rotation::cur()), ]); @@ -392,6 +384,7 @@ impl FseTable { "FseTable: symbols with prob>=1 (symbol increasing)", |meta| { let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), not::expr(meta.query_fixed(config.sorted_table.q_start, Rotation::cur())), not::expr(meta.query_advice(config.is_prob_less_than1, Rotation::prev())), meta.query_advice(config.is_new_symbol, Rotation::cur()), @@ -416,6 +409,8 @@ impl FseTable { // Symbols with prob>=1 continue the same symbol if not a new symbol. meta.create_gate("FseTable: symbols with prob>=1", |meta| { let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), + not::expr(meta.query_fixed(config.sorted_table.q_first, Rotation::cur())), not::expr(meta.query_fixed(config.sorted_table.q_start, Rotation::cur())), not::expr(meta.query_advice(config.is_prob_less_than1, Rotation::cur())), not::expr(meta.query_advice(config.is_padding, Rotation::cur())), @@ -440,8 +435,11 @@ impl FseTable { // All rows in an instance of FSE table, except the starting row (q_start=true). meta.create_gate("FseTable: every FSE table (except q_start=1)", |meta| { - let condition = - not::expr(meta.query_fixed(config.sorted_table.q_start, Rotation::cur())); + let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), + not::expr(meta.query_fixed(config.sorted_table.q_first, Rotation::cur())), + not::expr(meta.query_fixed(config.sorted_table.q_start, Rotation::cur())), + ]); let mut cb = BaseConstraintBuilder::default(); @@ -508,7 +506,10 @@ impl FseTable { // A state is skipped only if that state was pre-allocated to a symbol with prob=-1. meta.lookup_any("FseTable: skipped state", |meta| { - let condition = meta.query_advice(config.is_skipped_state, Rotation::cur()); + let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), + meta.query_advice(config.is_skipped_state, Rotation::cur()), + ]); // A state can be skipped only if it was pre-allocated to a symbol with prob=-1. So we // check that there exists a row with the same block_idx, table_kind and the skipped @@ -538,6 +539,7 @@ impl FseTable { "FseTable: assigned state (baseline, nb) validation", |meta| { let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), not::expr(meta.query_advice(config.is_prob_less_than1, Rotation::cur())), not::expr(meta.query_advice(config.is_skipped_state, Rotation::cur())), not::expr(meta.query_advice(config.is_padding, Rotation::cur())), @@ -558,9 +560,9 @@ impl FseTable { block_idx, table_kind, table_size, - state, symbol, symbol_count, + state, baseline, nb, 0.expr(), @@ -576,6 +578,7 @@ impl FseTable { // every state in the FSE table. meta.lookup_any("FseTable: predefined table validation", |meta| { let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), meta.query_advice(config.sorted_table.is_predefined, Rotation::cur()), not::expr(meta.query_advice(config.is_skipped_state, Rotation::cur())), not::expr(meta.query_advice(config.is_padding, Rotation::cur())), @@ -608,6 +611,7 @@ impl FseTable { // For every new symbol detected. meta.create_gate("FseTable: new symbol", |meta| { let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), meta.query_advice(config.is_new_symbol, Rotation::cur()), not::expr(meta.query_advice(config.is_padding, Rotation::cur())), ]); @@ -641,6 +645,8 @@ impl FseTable { // Whenever we continue allocating states to the same symbol. meta.create_gate("FseTable: same symbol, transitioned state", |meta| { let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), + not::expr(meta.query_fixed(config.sorted_table.q_first, Rotation::cur())), not::expr(meta.query_advice(config.is_new_symbol, Rotation::cur())), not::expr(meta.query_advice(config.is_padding, Rotation::cur())), ]); @@ -676,6 +682,8 @@ impl FseTable { // - state'' == state + (table_size >> 3) + (table_size >> 1) + 3 meta.lookup_any("FseTable: state transition", |meta| { let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), + not::expr(meta.query_fixed(config.sorted_table.q_first, Rotation::cur())), not::expr(meta.query_fixed(config.sorted_table.q_start, Rotation::cur())), not::expr(meta.query_advice(config.is_prob_less_than1, Rotation::cur())), not::expr(meta.query_advice(config.is_padding, Rotation::cur())), @@ -705,6 +713,479 @@ impl FseTable { config } + + /// Assign the FSE table. + pub fn assign( + &self, + layouter: &mut impl Layouter, + data: Vec, + k: u32, + ) -> Result<(), Error> { + layouter.assign_region( + || "FseTable", + |mut region| { + region.assign_fixed( + || "q_first", + self.sorted_table.q_first, + 0, + || Value::known(Fr::one()), + )?; + + // Both tables should skip the first row + let mut fse_offset: usize = 1; + let mut sorted_offset: usize = 1; + + for i in (1..((1 << k) - 30)).step_by(1 << 10) { + region.assign_fixed( + || "q_start", + self.sorted_table.q_start, + i, + || Value::known(Fr::one()), + )?; + } + + for (table_idx, table) in data.clone().into_iter().enumerate() { + let target_end_offset = fse_offset + (1 << 10); // reserve enough rows to accommodate skipped states + // Assign q_start + + let states_to_symbol = table.parse_state_table(); + let mut state_idx: usize = 1; + + // Assign the symbols with negative normalised probability + let tail_states_count = table + .normalised_probs + .iter() + .filter(|(&_sym, &w)| w < 0) + .count(); + if tail_states_count > 0 { + for state in ((table.table_size - tail_states_count as u64) + ..=(table.table_size - 1)) + .rev() + { + region.assign_advice( + || "state", + self.state, + fse_offset, + || Value::known(Fr::from(state)), + )?; + region.assign_advice( + || "idx", + self.idx, + fse_offset, + || Value::known(Fr::from(state_idx as u64)), + )?; + region.assign_advice( + || "symbol", + self.symbol, + fse_offset, + || { + Value::known(Fr::from( + states_to_symbol.get(&state).expect("state exists").0, + )) + }, + )?; + region.assign_advice( + || "baseline", + self.baseline, + fse_offset, + || { + Value::known(Fr::from( + states_to_symbol.get(&state).expect("state exists").1, + )) + }, + )?; + region.assign_advice( + || "nb", + self.nb, + fse_offset, + || { + Value::known(Fr::from( + states_to_symbol.get(&state).expect("state exists").2, + )) + }, + )?; + region.assign_advice( + || "is_new_symbol", + self.is_new_symbol, + fse_offset, + || Value::known(Fr::one()), + )?; + region.assign_advice( + || "is_prob_less_than1", + self.is_prob_less_than1, + fse_offset, + || Value::known(Fr::one()), + )?; + region.assign_advice( + || "is_skipped_state", + self.is_skipped_state, + fse_offset, + || Value::known(Fr::one()), + )?; + region.assign_advice( + || "symbol_count", + self.symbol_count, + fse_offset, + || Value::known(Fr::one()), + )?; + region.assign_advice( + || "symbol_count_acc", + self.symbol_count_acc, + fse_offset, + || Value::known(Fr::one()), + )?; + region.assign_advice( + || "table_size_rs_1", + self.table_size_rs_1, + fse_offset, + || Value::known(Fr::from(table.table_size >> 1)), + )?; + region.assign_advice( + || "table_size_rs_3", + self.table_size_rs_3, + fse_offset, + || Value::known(Fr::from(table.table_size >> 3)), + )?; + + state_idx += 1; + fse_offset += 1; + } + } + + // Assign the symbols with positive probability in fse table + let regular_symbols = table + .normalised_probs + .clone() + .into_iter() + .filter(|(_sym, w)| *w > 0) + .collect::>(); + for (sym, _c) in regular_symbols.clone().into_iter() { + let mut sym_acc: usize = 1; + let sym_rows = table.sym_to_states.get(&sym).expect("symbol exists."); + let sym_count = sym_rows.iter().filter(|r| !r.is_state_skipped).count(); + + for fse_row in sym_rows { + region.assign_advice( + || "state", + self.state, + fse_offset, + || Value::known(Fr::from(fse_row.state)), + )?; + region.assign_advice( + || "idx", + self.idx, + fse_offset, + || Value::known(Fr::from(state_idx as u64)), + )?; + region.assign_advice( + || "symbol", + self.symbol, + fse_offset, + || Value::known(Fr::from(fse_row.symbol)), + )?; + region.assign_advice( + || "baseline", + self.baseline, + fse_offset, + || Value::known(Fr::from(fse_row.baseline)), + )?; + region.assign_advice( + || "nb", + self.nb, + fse_offset, + || Value::known(Fr::from(fse_row.num_bits)), + )?; + region.assign_advice( + || "is_new_symbol", + self.is_new_symbol, + fse_offset, + || Value::known(Fr::from((sym_acc == 1) as u64)), + )?; + region.assign_advice( + || "is_prob_less_than1", + self.is_prob_less_than1, + fse_offset, + || Value::known(Fr::zero()), + )?; + region.assign_advice( + || "is_skipped_state", + self.is_skipped_state, + fse_offset, + || Value::known(Fr::from(fse_row.is_state_skipped as u64)), + )?; + region.assign_advice( + || "symbol_count", + self.symbol_count, + fse_offset, + || Value::known(Fr::from(sym_count as u64)), + )?; + region.assign_advice( + || "symbol_count_acc", + self.symbol_count_acc, + fse_offset, + || Value::known(Fr::from(sym_acc as u64)), + )?; + region.assign_advice( + || "table_size_rs_1", + self.table_size_rs_1, + fse_offset, + || Value::known(Fr::from(table.table_size >> 1)), + )?; + region.assign_advice( + || "table_size_rs_3", + self.table_size_rs_3, + fse_offset, + || Value::known(Fr::from(table.table_size >> 3)), + )?; + + fse_offset += 1; + if !fse_row.is_state_skipped { + state_idx += 1; + sym_acc += 1; + } + } + } + + // witgen_debug + // assert!( + // state_idx as u64 == table.table_size, + // "Last state should correspond to end of table" + // ); + + // Assign the symbols with positive probability in sorted table + for (sym, _c) in regular_symbols.into_iter() { + let mut sym_acc: usize = 1; + let sym_rows = table + .sym_to_sorted_states + .get(&sym) + .expect("symbol exists."); + let sym_count = sym_rows.iter().filter(|r| !r.is_state_skipped).count(); + let last_baseline = sym_rows.last().unwrap().baseline; + let mut spot_acc = 0u64; + let mut baseline_mark = false; + let smallest_spot = (1 + << sym_rows + .iter() + .filter(|r| !r.is_state_skipped) + .map(|r| r.num_bits) + .min() + .expect("Minimum bits read should exist.")) + as u64; + + for fse_row in sym_rows { + if !fse_row.is_state_skipped { + region.assign_advice( + || "sorted_table.block_idx", + self.sorted_table.block_idx, + sorted_offset, + || Value::known(Fr::from(table.block_idx)), + )?; + region.assign_advice( + || "sorted_table.table_kind", + self.sorted_table.table_kind, + sorted_offset, + || Value::known(Fr::from(table.table_kind as u64)), + )?; + region.assign_advice( + || "sorted_table.table_size", + self.sorted_table.table_size, + sorted_offset, + || Value::known(Fr::from(table.table_size)), + )?; + region.assign_advice( + || "sorted_table.is_predefined", + self.sorted_table.is_predefined, + sorted_offset, + || Value::known(Fr::from(table.is_predefined as u64)), + )?; + region.assign_advice( + || "sorted_table.table_size", + self.sorted_table.table_size, + sorted_offset, + || Value::known(Fr::from(table.table_size)), + )?; + region.assign_advice( + || "sorted_table.symbol", + self.sorted_table.symbol, + sorted_offset, + || Value::known(Fr::from(fse_row.symbol)), + )?; + region.assign_advice( + || "sorted_table.is_new_symbol", + self.sorted_table.is_new_symbol, + sorted_offset, + || Value::known(Fr::from((sym_acc == 1) as u64)), + )?; + region.assign_advice( + || "sorted_table.symbol_count", + self.sorted_table.symbol_count, + sorted_offset, + || Value::known(Fr::from(sym_count as u64)), + )?; + region.assign_advice( + || "sorted_table.symbol_count_acc", + self.sorted_table.symbol_count_acc, + sorted_offset, + || Value::known(Fr::from(sym_acc as u64)), + )?; + region.assign_advice( + || "sorted_table.state", + self.sorted_table.state, + sorted_offset, + || Value::known(Fr::from(fse_row.state)), + )?; + region.assign_advice( + || "sorted_table.nb", + self.sorted_table.nb, + sorted_offset, + || Value::known(Fr::from(fse_row.num_bits)), + )?; + + let curr_baseline = fse_row.baseline; + if curr_baseline == 0 { + baseline_mark = true; + } + region.assign_advice( + || "sorted_table.baseline", + self.sorted_table.baseline, + sorted_offset, + || Value::known(Fr::from(curr_baseline)), + )?; + region.assign_advice( + || "sorted_table.last_baseline", + self.sorted_table.last_baseline, + sorted_offset, + || Value::known(Fr::from(last_baseline)), + )?; + region.assign_advice( + || "sorted_table.baseline_mark", + self.sorted_table.baseline_mark, + sorted_offset, + || Value::known(Fr::from(baseline_mark as u64)), + )?; + + region.assign_advice( + || "sorted_table.spot", + self.sorted_table.spot, + sorted_offset, + || Value::known(Fr::from(1 << fse_row.num_bits)), + )?; + region.assign_advice( + || "sorted_table.smallest_spot", + self.sorted_table.smallest_spot, + sorted_offset, + || Value::known(Fr::from(smallest_spot)), + )?; + + spot_acc += 1 << fse_row.num_bits; + region.assign_advice( + || "sorted_table.spot_acc", + self.sorted_table.spot_acc, + sorted_offset, + || Value::known(Fr::from(spot_acc)), + )?; + + let baseline_0x00 = + IsEqualChip::construct(self.sorted_table.baseline_0x00.clone()); + baseline_0x00.assign( + &mut region, + sorted_offset, + Value::known(Fr::from(curr_baseline)), + Value::known(Fr::zero()), + )?; + + sorted_offset += 1; + sym_acc += 1; + } + } + } + + for offset in fse_offset..target_end_offset { + region.assign_advice( + || "is_padding", + self.is_padding, + offset, + || Value::known(Fr::one()), + )?; + region.assign_advice( + || "table_size_rs_1", + self.table_size_rs_1, + offset, + || Value::known(Fr::from(table.table_size >> 1)), + )?; + region.assign_advice( + || "table_size_rs_3", + self.table_size_rs_3, + offset, + || Value::known(Fr::from(table.table_size >> 3)), + )?; + region.assign_advice( + || "idx", + self.idx, + offset, + // We incremented state_idx after the last valid symbol's last state. + // So we less 1 here. + || Value::known(Fr::from(state_idx as u64 - 1)), + )?; + } + for offset in sorted_offset..target_end_offset { + region.assign_advice( + || "sorted_table.sorted_table.is_padding", + self.sorted_table.is_padding, + offset, + || Value::known(Fr::one()), + )?; + region.assign_advice( + || "sorted_table.block_idx", + self.sorted_table.block_idx, + offset, + || Value::known(Fr::from(table.block_idx)), + )?; + region.assign_advice( + || "sorted_table.table_kind", + self.sorted_table.table_kind, + offset, + || Value::known(Fr::from(table.table_kind as u64)), + )?; + region.assign_advice( + || "sorted_table.table_size", + self.sorted_table.table_size, + offset, + || Value::known(Fr::from(table.table_size)), + )?; + region.assign_advice( + || "sorted_table.is_predefined", + self.sorted_table.is_predefined, + offset, + || Value::known(Fr::from(table.is_predefined as u64)), + )?; + } + fse_offset = target_end_offset; + sorted_offset = target_end_offset; + } + + for idx in fse_offset..((1 << k) - 30) { + region.assign_advice( + || "is_padding", + self.is_padding, + idx, + || Value::known(Fr::one()), + )?; + } + + for idx in sorted_offset..((1 << k) - 30) { + region.assign_advice( + || "sorted_table.is_padding", + self.sorted_table.is_padding, + idx, + || Value::known(Fr::one()), + )?; + } + + Ok(()) + }, + ) + } } impl FseTable { @@ -855,6 +1336,7 @@ struct FseSortedStatesTable { impl FseSortedStatesTable { fn configure( meta: &mut ConstraintSystem, + q_enable: Column, pow2_table: Pow2Table<20>, u8_table: U8Table, ) -> Self { @@ -877,7 +1359,7 @@ impl FseSortedStatesTable { is_padding, baseline_0x00: IsEqualChip::configure( meta, - |meta| not::expr(meta.query_advice(is_padding, Rotation::cur())), + |meta| meta.query_fixed(q_enable, Rotation::cur()), |meta| meta.query_advice(baseline, Rotation::cur()), |_| 0.expr(), ), @@ -890,7 +1372,10 @@ impl FseSortedStatesTable { // For every non-padded row, the SPoT is 2^nb. meta.lookup_any("FseSortedStatesTable: spot == 1 << nb", |meta| { - let condition = not::expr(meta.query_advice(config.is_padding, Rotation::cur())); + let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), + not::expr(meta.query_advice(config.is_padding, Rotation::cur())), + ]); [ meta.query_advice(config.nb, Rotation::cur()), @@ -929,6 +1414,7 @@ impl FseSortedStatesTable { // The starting row of every FSE table, i.e. q_start=true. meta.create_gate("FseSortedStatesTable: start row", |meta| { let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), meta.query_fixed(config.q_start, Rotation::cur()), not::expr(meta.query_advice(config.is_padding, Rotation::cur())), ]); @@ -955,6 +1441,7 @@ impl FseSortedStatesTable { "FseSortedStatesTable: symbols are in increasing order", |meta| { let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), not::expr(meta.query_fixed(config.q_start, Rotation::cur())), meta.query_advice(config.is_new_symbol, Rotation::cur()), ]); @@ -978,6 +1465,7 @@ impl FseSortedStatesTable { // We continue the same symbol if not a new symbol. meta.create_gate("FseSortedStatesTable: same symbol", |meta| { let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), not::expr(meta.query_fixed(config.q_first, Rotation::cur())), not::expr(meta.query_fixed(config.q_start, Rotation::cur())), not::expr(meta.query_advice(config.is_new_symbol, Rotation::cur())), @@ -1002,6 +1490,7 @@ impl FseSortedStatesTable { "FseSortedStatesTable: states are in increasing order", |meta| { let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), not::expr(meta.query_fixed(config.q_first, Rotation::cur())), not::expr(meta.query_fixed(config.q_start, Rotation::cur())), not::expr(meta.query_advice(config.is_new_symbol, Rotation::cur())), @@ -1028,7 +1517,11 @@ impl FseSortedStatesTable { meta.create_gate( "FseSortedStatesTable: every FSE table (except q_start=1)", |meta| { - let condition = not::expr(meta.query_fixed(config.q_start, Rotation::cur())); + let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), + not::expr(meta.query_fixed(config.q_first, Rotation::cur())), + not::expr(meta.query_fixed(config.q_start, Rotation::cur())), + ]); let mut cb = BaseConstraintBuilder::default(); @@ -1063,6 +1556,7 @@ impl FseSortedStatesTable { // For every new symbol detected. meta.create_gate("FseSortedStatesTable: new symbol", |meta| { let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), meta.query_advice(config.is_new_symbol, Rotation::cur()), not::expr(meta.query_advice(config.is_padding, Rotation::cur())), ]); @@ -1075,26 +1569,28 @@ impl FseSortedStatesTable { // - spot_acc accumulated to table_size. // - the last state has the smallest spot value. // - the last state's baseline is in fact last_baseline. - cb.require_equal( - "symbol_count == symbol_count_acc", - meta.query_advice(config.symbol_count, Rotation::prev()), - meta.query_advice(config.symbol_count_acc, Rotation::prev()), - ); - cb.require_equal( - "spot_acc == table_size", - meta.query_advice(config.spot_acc, Rotation::prev()), - meta.query_advice(config.table_size, Rotation::prev()), - ); - cb.require_equal( - "spot == smallest_spot", - meta.query_advice(config.spot, Rotation::prev()), - meta.query_advice(config.smallest_spot, Rotation::prev()), - ); - cb.require_equal( - "baseline == last_baseline", - meta.query_advice(config.baseline, Rotation::prev()), - meta.query_advice(config.last_baseline, Rotation::prev()), - ); + cb.condition(not::expr(meta.query_fixed(config.q_start, Rotation::cur())), |cb| { + cb.require_equal( + "symbol_count == symbol_count_acc", + meta.query_advice(config.symbol_count, Rotation::prev()), + meta.query_advice(config.symbol_count_acc, Rotation::prev()), + ); + cb.require_equal( + "spot_acc == table_size", + meta.query_advice(config.spot_acc, Rotation::prev()), + meta.query_advice(config.table_size, Rotation::prev()), + ); + cb.require_equal( + "spot == smallest_spot", + meta.query_advice(config.spot, Rotation::prev()), + meta.query_advice(config.smallest_spot, Rotation::prev()), + ); + cb.require_equal( + "baseline == last_baseline", + meta.query_advice(config.baseline, Rotation::prev()), + meta.query_advice(config.last_baseline, Rotation::prev()), + ); + }); // When the symbol changes, we wish to check in case the baseline==0x00 or not. If it // is, then the baseline_mark should be turned on from this row onwards (while the @@ -1158,6 +1654,8 @@ impl FseSortedStatesTable { // Whenever we continue allocating states to the same symbol. meta.create_gate("FseSortedStatesTable: same symbol, new state", |meta| { let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), + not::expr(meta.query_fixed(config.q_first, Rotation::cur())), not::expr(meta.query_advice(config.is_new_symbol, Rotation::cur())), not::expr(meta.query_advice(config.is_padding, Rotation::cur())), ]); diff --git a/aggregator/src/aggregation/decoder/tables/literals_header.rs b/aggregator/src/aggregation/decoder/tables/literals_header.rs index 1f4d54814b..2077fc94f6 100644 --- a/aggregator/src/aggregation/decoder/tables/literals_header.rs +++ b/aggregator/src/aggregation/decoder/tables/literals_header.rs @@ -1,7 +1,13 @@ -use gadgets::util::{not, select, Expr}; +use crate::aggregation::decoder::witgen::{ + util::{le_bits_to_value, value_bits_le}, + BlockType, +}; +use eth_types::Field; +use gadgets::util::{and, not, select, Expr}; use halo2_proofs::{ + circuit::{Layouter, Value}, halo2curves::bn256::Fr, - plonk::{Advice, Any, Column, ConstraintSystem, Fixed}, + plonk::{Advice, Any, Column, ConstraintSystem, Error, Fixed}, poly::Rotation, }; use zkevm_circuits::{ @@ -42,6 +48,7 @@ impl LiteralsHeaderTable { /// Construct and constrain the literals header table. pub fn configure( meta: &mut ConstraintSystem, + q_enable: Column, range8: RangeTable<8>, range16: RangeTable<16>, ) -> Self { @@ -79,7 +86,10 @@ impl LiteralsHeaderTable { }); meta.create_gate("LiteralsHeaderTable: main gate", |meta| { - let condition = not::expr(meta.query_advice(config.is_padding, Rotation::cur())); + let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), + not::expr(meta.query_advice(config.is_padding, Rotation::cur())), + ]); let mut cb = BaseConstraintBuilder::default(); @@ -115,48 +125,46 @@ impl LiteralsHeaderTable { cb.gate(condition) }); - meta.create_gate( - "LiteralsHeaderTable: subsequent rows after q_first=true", - |meta| { - let condition = not::expr(meta.query_fixed(config.q_first, Rotation::cur())); - - let mut cb = BaseConstraintBuilder::default(); - - // padding transitions from 0 -> 1 only once. - let is_padding_cur = meta.query_advice(config.is_padding, Rotation::cur()); - let is_padding_prev = meta.query_advice(config.is_padding, Rotation::prev()); - let is_padding_delta = is_padding_cur.expr() - is_padding_prev; - - cb.require_boolean("is_padding is boolean", is_padding_cur.expr()); - cb.require_boolean("is_padding delta is boolean", is_padding_delta); - - // if this is not a padding row, then block_idx has incremented. - cb.condition(not::expr(is_padding_cur.expr()), |cb| { - cb.require_equal( - "block_idx increments by 1", - meta.query_advice(config.block_idx, Rotation::cur()), - meta.query_advice(config.block_idx, Rotation::prev()) + 1.expr(), - ); - }); - - // block_idx increments. - // - // This also ensures that we are not populating conflicting literal headers for the - // same block_idx in this layout. - cb.condition(not::expr(is_padding_cur), |cb| { - cb.require_equal( - "block_idx increments", - meta.query_advice(config.block_idx, Rotation::cur()), - meta.query_advice(config.block_idx, Rotation::prev()) + 1.expr(), - ); - }); - - cb.gate(condition) - }, - ); + // witgen_debug + // meta.create_gate( + // "LiteralsHeaderTable: subsequent rows after q_first=true", + // |meta| { + // let condition = and::expr([ + // meta.query_fixed(q_enable, Rotation::cur()), + // not::expr(meta.query_fixed(config.q_first, Rotation::cur())), + // ]); + + // let mut cb = BaseConstraintBuilder::default(); + + // // padding transitions from 0 -> 1 only once. + // let is_padding_cur = meta.query_advice(config.is_padding, Rotation::cur()); + // let is_padding_prev = meta.query_advice(config.is_padding, Rotation::prev()); + // let is_padding_delta = is_padding_cur.expr() - is_padding_prev; + + // cb.require_boolean("is_padding is boolean", is_padding_cur.expr()); + // cb.require_boolean("is_padding delta is boolean", is_padding_delta); + + // // block_idx increments. + // // + // // This also ensures that we are not populating conflicting literal headers for the + // // same block_idx in this layout. + // cb.condition(not::expr(is_padding_cur), |cb| { + // cb.require_equal( + // "block_idx increments", + // meta.query_advice(config.block_idx, Rotation::cur()), + // meta.query_advice(config.block_idx, Rotation::prev()) + 1.expr(), + // ); + // }); + + // cb.gate(condition) + // }, + // ); meta.lookup("LiteralsHeaderTable: byte0 >> 3", |meta| { - let condition = 1.expr(); + let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), + not::expr(meta.query_advice(config.is_padding, Rotation::cur())), + ]); let range_value = meta.query_advice(config.byte0, Rotation::cur()) - (meta.query_advice(config.byte0_rs_3, Rotation::cur()) * 8.expr()); @@ -165,7 +173,10 @@ impl LiteralsHeaderTable { }); meta.lookup("LiteralsHeaderTable: byte0 >> 4", |meta| { - let condition = 1.expr(); + let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), + not::expr(meta.query_advice(config.is_padding, Rotation::cur())), + ]); let range_value = meta.query_advice(config.byte0, Rotation::cur()) - (meta.query_advice(config.byte0_rs_4, Rotation::cur()) * 16.expr()); @@ -177,6 +188,82 @@ impl LiteralsHeaderTable { config } + + /// Assign witness to the literals header table. + pub fn assign( + &self, + layouter: &mut impl Layouter, + literals_headers: Vec<(u64, u64, (u64, u64, u64))>, + ) -> Result<(), Error> { + layouter.assign_region( + || "LiteralsHeaderTable", + |mut region| { + region.assign_fixed(|| "q_first", self.q_first, 0, || Value::known(F::one()))?; + + for (offset, (block_idx, _byte_offset, (byte0, byte1, byte2))) in + literals_headers.clone().into_iter().enumerate() + { + let lh_bytes = [byte0 as u8, byte1 as u8, byte2 as u8]; + let literals_block_type = BlockType::from(lh_bytes[0] & 0x3); + let size_format = (lh_bytes[0] >> 2) & 3; + + let [n_bits_fmt, n_bits_regen, n_bytes_header]: [usize; 3] = + match literals_block_type { + BlockType::RawBlock => match size_format { + 0b00 | 0b10 => [1, 5, 1], + 0b01 => [2, 12, 2], + 0b11 => [2, 20, 3], + _ => unreachable!("size_format out of bound"), + }, + _ => unreachable!( + "BlockType::* unexpected. Must be raw bytes for literals." + ), + }; + + // Bits for representing regenerated_size and compressed_size + let sizing_bits = &lh_bytes.clone().into_iter().fold(vec![], |mut acc, b| { + acc.extend(value_bits_le(b)); + acc + })[(2 + n_bits_fmt)..(n_bytes_header * 8)]; + + let regen_size = le_bits_to_value(&sizing_bits[0..n_bits_regen]); + + for (col, value, annotation) in [ + (self.block_idx, block_idx, "block_idx"), + (self.byte0, byte0, "byte0"), + (self.byte1, byte1, "byte1"), + (self.byte2, byte2, "byte2"), + (self.regen_size, regen_size, "regen_size"), + // witgen_debug: check bit order + ( + self.size_format_bit0, + (size_format & 1) as u64, + "size_format_bit0", + ), + ( + self.size_format_bit1, + (size_format & 2) as u64, + "size_format_bit1", + ), + (self.byte0_rs_3, byte0 >> 3, "byte0_rs_3"), + (self.byte0_rs_4, byte0 >> 4, "byte0_rs_4"), + ] { + region.assign_advice( + || annotation, + col, + offset, + || Value::known(F::from(value)), + )?; + } + } + + // TODO(ray): assign is_padding=true for other rows so that the block_idx + // increments gate is not checked. + + Ok(()) + }, + ) + } } impl LookupTable for LiteralsHeaderTable { diff --git a/aggregator/src/aggregation/decoder/tables/seqinst_table.rs b/aggregator/src/aggregation/decoder/tables/seqinst_table.rs new file mode 100644 index 0000000000..92c1c0df5e --- /dev/null +++ b/aggregator/src/aggregation/decoder/tables/seqinst_table.rs @@ -0,0 +1,913 @@ + +use eth_types::Field; +use gadgets::{ + is_equal::*, + is_zero::*, + util::{and, or, not, select, Expr}, +}; +use halo2_proofs::{ + circuit::{Value, Region, Layouter}, + plonk::{Advice, Any, Column, ConstraintSystem, Error, Fixed}, + poly::Rotation, +}; +use zkevm_circuits::{ + evm_circuit::{BaseConstraintBuilder, ConstrainBuilderCommon}, + table::LookupTable, +}; +use crate::aggregation::decoder::witgen; +use witgen::AddressTableRow; + +/// Table used carry the raw sequence instructions parsed from sequence section +/// and would be later transformed as the back-reference instructions +/// +/// For every block, one row in the table represent a single sequence instruction +/// in the sequence section, and handle all data parsed from the same sequence. +/// The 'block_index' is a 1-index for each block with n sequences in its +/// sequence section, the parsed value from bitstream for current sequence is put +/// in the 'input cols' section (`literal_len`, `match_offset` and `match_len`) +/// The transformed sequence instructions is put in 'output cols' section ( +/// `acc_literal_len`, `offset` and `match_len`), +/// notice we can use `match_len` without transformation. +/// +/// | enabled |block_index| n_seq |seq_index|s_beginning||| +/// |---------|-----------|-------|---------|-----------|------------|-------------| +/// | 1 | 1 | 30 | 0 | 1 | | | +/// | 1 | 1 | 30 | 1 | 0 | (4,2,4) | (4,4,4) | +/// | 1 | 1 | 30 | 2 | 0 | (1,5,2) | (5,5,2) | +/// | 1 | 1 | 30 | 3 | 0 | (0,2,1) | (5,1,1) | +/// | 1 | ... | 30 | ... | 0 | ... | | +/// | 1 | 1 | 30 | 30 | 0 | (1,50,11) | | +/// | 1 | 2 | 20 | 0 | 1 | | | +/// | 1 | 2 | 20 | 1 | 0 | (3,52,13) | | +/// | 1 | ... | 20 | ... | 0 | | | +/// | 1 | 2 | 20 | 20 | 0 | | | +/// | 1 | 3 | 4 | 0 | 1 | | | +/// | ... | ... | ... | ... | ... | | | +/// | 1 | 998 | 0 | 0 | 1 | | | +/// | 1 | 999 | 0 | 0 | 1 | | | +/// +/// When all sequences from compressed data has been handled, the rest rows being enabled +/// (q_enabled is true) has to be padded with increased block index, with `n_seq` is 0 +/// and `s_beginning` is true +/// +/// The transform from 'input cols' to 'output cols' according to zstd's spec +/// include following steps: +/// 1. accumulate the copied literal bytes in one section +/// 2. for match offset > 3, set the actual offset val is -=3, else we refer it +/// from the reference tables represented by 'repeated_offset_1/2/3' cols +/// 3. After each sequence, the reference tables is updated according to the +/// value of cooked offset and whether `literal_len` is zero +/// +/// |literal_len|match_offset|acc_lit_len| offset |match_len|rep_offset_1|rep_offset_2|rep_offset_3|s_beginning| +/// |-----------|------------|-----------|--------|---------|------------|------------|------------|-----------| +/// | | | | | | 1 | 4 | 8 | 1 | +/// | 4 | 2 | 4 | 4 | 4 | 4 | 1 | 8 | 0 | +/// | 1 | 5 | 5 | 5 | 2 | 5 | 4 | 1 | 0 | +/// | 0 | 2 | 5 | 1 | 1 | 1 | 5 | 4 | 0 | +/// | | | | | | | | | 0 | +/// + +#[derive(Clone)] +pub struct SeqInstTable { + + // active flag, one active row parse + q_enabled: Column, + + // 1-index for each block, keep the same for each row + // until all sequenced has been handled + block_index: Column, + // the count of sequences in one block, keey the same + // for each row when block index is not changed + n_seq: Column, + // the 1-indexed seq number (1..=n_seq) for each + // sequence. We have extra row at the beginning of + // each block with seq_index is 0 + seq_index: Column, + // the flag for the first row in each block (i.e. seq_index is 0) + s_beginning: Column, + + // the value directly decoded from bitstream, one row + // for one sequence + literal_len: Column, + match_offset: Column, + match_len: Column, + + // exported instructions for one sequence, + // note the match_len would be exported as-is + // updated offset + offset: Column, + // updated (acc) literal len + acc_literal_len: Column, + + // the reference table for repeated offset + rep_offset_1: Column, + rep_offset_2: Column, + rep_offset_3: Column, + + // helper cols for "zero testing". i.e for a cell with + // value v, the value in corresponding helper h is 1/v + // (if v is not zero) or 0 (if v is zero), and we constraint + // v * (1- v * h) == 0. We would have a boolean flag + // from h * v + + // detect if literal_len is zero + literal_is_zero: IsZeroConfig, + // detect if seq_index in current row equal + // to n_seq (i.e. n_seq - seq_index is zero) + seq_index_is_n_seq: IsEqualConfig, + // detect if current match_offset is 1, 2 or 3 + offset_is_1: IsEqualConfig, + offset_is_2: IsEqualConfig, + offset_is_3: IsEqualConfig, + + // detect if rep_offset_1 is 0 (indicate the data + // is corrupt) + ref_offset_1_is_zero: IsZeroConfig, +} + +impl LookupTable for SeqInstTable { + fn columns(&self) -> Vec> { + vec![ + self.q_enabled.into(), + self.block_index.into(), + self.n_seq.into(), + self.s_beginning.into(), + self.seq_index.into(), + self.literal_len.into(), + self.match_offset.into(), + self.match_len.into(), + ] + } + + fn annotations(&self) -> Vec { + vec![ + String::from("q_enabled"), + String::from("n_seq"), + String::from("block_index"), + String::from("s_beginning"), + String::from("seq_index"), + String::from("literal_len"), + String::from("match_offset"), + String::from("match_len"), + ] + } +} + +#[derive(Clone, Debug)] +struct ChipContext { + literal_is_zero_chip: IsZeroChip, + ref_offset_1_is_zero_chip: IsZeroChip, + seq_index_chip: IsEqualChip, + offset_is_1_chip: IsEqualChip, + offset_is_2_chip: IsEqualChip, + offset_is_3_chip: IsEqualChip, +} + +impl ChipContext { + fn construct(config: &SeqInstTable) -> Self { + + let literal_is_zero_chip = IsZeroChip::construct(config.literal_is_zero.clone()); + let ref_offset_1_is_zero_chip = IsZeroChip::construct(config.ref_offset_1_is_zero.clone()); + let seq_index_chip = IsEqualChip::construct(config.seq_index_is_n_seq.clone()); + let offset_is_1_chip = IsEqualChip::construct(config.offset_is_1.clone()); + let offset_is_2_chip = IsEqualChip::construct(config.offset_is_2.clone()); + let offset_is_3_chip = IsEqualChip::construct(config.offset_is_3.clone()); + + Self { + literal_is_zero_chip, + ref_offset_1_is_zero_chip, + seq_index_chip, + offset_is_1_chip, + offset_is_2_chip, + offset_is_3_chip, + } + + } +} + +impl SeqInstTable { + + /// The sequence count should be lookuped by parsed bitstream, + /// used the block index and value for sequnce count tag to + /// lookup (`true`, `block_index`, 1, `value`) + /// The table would be padded by increased block index to + /// fill all rows being enabled + /// + /// | enabled |block_index| flag | n_seq | + /// |---------|-----------|-------|-------| + /// | 1 | 1 | 1 | 30 | + /// | 1 | ... | ... | 30 | + /// | 1 | 2 | 1 | 20 | + /// | 1 | ... | ... | 20 | + /// | 1 | 3 | 1 | 4 | + /// | ... | ... | ... | ... | + /// | 1 | 999 | 1 | 0 | + pub fn seq_count_lookup(&self) -> [Column;4]{ + [ + self.q_enabled.into(), + self.block_index.into(), + self.s_beginning.into(), + self.n_seq.into(), + ] + } + + /// The sequence values should be lookuped by parsed bitstream, + /// used the block index and value with each sequence tag for + /// multiple lookup (`true`, `block_index`, 0, `seq_index`, `value`) on + /// corresponding value column (literal len, offset, match len) + /// , or a lookup with suitable rotations + /// | enabled |block_index|s_beginning|seq_index| literal | offset | match | + /// |---------|-----------|-----------|---------|---------|--------|-------| + /// | 1 | 1 | 0 | 1 | 4 | 2 | 4 | + /// | 1 | 1 | 0 | 2 | 1 | 5 | 2 | + /// | 1 | 1 | 0 | 3 | 0 | 2 | 3 | + /// | 1 | ... | 0 | ... | ... | ... | ... | + /// | 1 | 1 | 0 | 30 | 1 | 50 | 11 | + /// | 1 | 2 | 0 | 1 | 3 | 52 | 13 | + /// | 1 | ... | 0 | ... | ... | ... | ... | + /// + pub fn seq_values_lookup(&self) -> [Column;7]{ + [ + self.q_enabled.into(), + self.block_index.into(), + self.s_beginning.into(), + self.seq_index.into(), + self.literal_len.into(), + self.match_offset.into(), + self.match_len.into(), + ] + } + + /// Obtian the instruction table cols + pub fn instructions(&self) -> [Column;5]{ + [ + self.block_index, + self.seq_index, + self.offset, + self.acc_literal_len, + self.match_len, + ] + } + + /// Construct the sequence instruction table + /// the maxium rotation is prev(1), next(1) + pub fn configure( + meta: &mut ConstraintSystem, + ) -> Self { + let q_enabled = meta.fixed_column(); + let block_index = meta.advice_column(); + let n_seq = meta.advice_column(); + let literal_len = meta.advice_column(); + let match_offset = meta.advice_column(); + let match_len = meta.advice_column(); + let offset = meta.advice_column(); + let acc_literal_len = meta.advice_column(); + let s_beginning = meta.advice_column(); + let seq_index = meta.advice_column(); + let rep_offset_1 = meta.advice_column(); + let rep_offset_2 = meta.advice_column(); + let rep_offset_3 = meta.advice_column(); + + let [literal_is_zero, ref_offset_1_is_zero] = + [literal_len, rep_offset_1].map(|col|{ + let inv_col = meta.advice_column(); + IsZeroChip::configure( + meta, + |meta|meta.query_fixed(q_enabled, Rotation::cur()), + |meta|meta.query_advice(col, Rotation::cur()), + inv_col + ) + }); + let [offset_is_1, offset_is_2, offset_is_3] = + [1,2,3].map(|val|{ + IsEqualChip::configure( + meta, + |meta|meta.query_fixed(q_enabled, Rotation::cur()), + |meta|meta.query_advice(match_offset, Rotation::cur()), + |_|val.expr() + ) + }); + let seq_index_is_n_seq = IsEqualChip::configure( + meta, + |meta|meta.query_fixed(q_enabled, Rotation::cur()), + |meta|meta.query_advice(seq_index, Rotation::cur()), + |meta|meta.query_advice(n_seq, Rotation::cur()), + ); + + // seq_index must increment and compare with n_seq for seq border + meta.create_gate("seq index and section borders", |meta|{ + let mut cb = BaseConstraintBuilder::default(); + + let seq_index_next = meta.query_advice(seq_index, Rotation::next()); + let seq_index = meta.query_advice(seq_index, Rotation::cur()); + let is_seq_border = &seq_index_is_n_seq; + + cb.require_equal("seq index must increment or 0 in s_beginning", + select::expr( + is_seq_border.expr(), + 0.expr(), + seq_index.expr() + 1.expr(), + ), seq_index_next.expr() + ); + + let s_beginning = meta.query_advice(s_beginning, Rotation::next()); + cb.require_boolean("s_beginning is boolean", + s_beginning.expr(), + ); + + cb.condition(not::expr(is_seq_border.expr()), + |cb|{ + cb.require_zero("s_beginning on enabled after seq border", + s_beginning.expr(), + ) + } + ); + + cb.gate( + meta.query_fixed(q_enabled, Rotation::next()) + ) + }); + + // block index must be increment at seq border, so section for each + // block index can occur once + // and the lookup from seq_table enforce valid block / seq / s_beginning + // must be put + meta.create_gate("block index", |meta|{ + let mut cb = BaseConstraintBuilder::default(); + + let block_index_next = meta.query_advice(block_index, Rotation::next()); + let block_index = meta.query_advice(block_index, Rotation::cur()); + + let is_seq_border = &seq_index_is_n_seq; + + cb.require_equal("block can only increase in seq border", + select::expr( + is_seq_border.expr(), + block_index.expr() + 1.expr(), + block_index.expr(), + ), + block_index_next, + ); + cb.gate(meta.query_fixed(q_enabled, Rotation::next())) + }); + + // so, we enforce s_beginning enabled for valid block index + meta.create_gate("border constaints", |meta|{ + let mut cb = BaseConstraintBuilder::default(); + let s_beginning = meta.query_advice(s_beginning, Rotation::cur()); + + let repeated_offset_pairs = [ + rep_offset_1, + rep_offset_2, + rep_offset_3, + ].map(|col| + (meta.query_advice(col, Rotation::cur()), + meta.query_advice(col, Rotation::prev())) + ); + + for (repeated_offset, repeated_offset_prev) in repeated_offset_pairs { + cb.condition(s_beginning.expr(), |cb|{ + + cb.require_equal("offset must be inherited in border", + repeated_offset, + repeated_offset_prev, + ) + }); + } + + let literal_len = meta.query_advice(literal_len, Rotation::cur()); + cb.require_equal("literal len accumulation", + select::expr(s_beginning.expr(), + literal_len.expr(), + literal_len.expr() + meta.query_advice(acc_literal_len, Rotation::prev()), + ), + meta.query_advice(acc_literal_len, Rotation::cur()), + ); + + cb.gate(meta.query_fixed(q_enabled, Rotation::cur())) + }); + + // offset is in-section (not s_beginning) + meta.create_gate("offset reference", |meta|{ + let mut cb = BaseConstraintBuilder::default(); + + let offset_val = meta.query_advice(offset, Rotation::cur()); + let offset = meta.query_advice(match_offset, Rotation::cur()); + + let s_is_offset_ref = or::expr([ + offset_is_1.expr(), + offset_is_2.expr(), + offset_is_3.expr(), + ]); + + let [rep_offset_1_prev, rep_offset_2_prev, rep_offset_3_prev] + = [ + rep_offset_1, + rep_offset_2, + rep_offset_3, + ].map(|col|meta.query_advice(col, Rotation::prev())); + + let [rep_offset_1, rep_offset_2, rep_offset_3] + = [ + rep_offset_1, + rep_offset_2, + rep_offset_3, + ].map(|col|meta.query_advice(col, Rotation::cur())); + + // in ref offset case, the actual offset val come from + // ref offset table (exception case rasised if literal len + // is zero) + let offset_ref_val_on_literal_zero = + offset_is_3.expr() * (rep_offset_1_prev.expr() - 1.expr()) + + offset_is_1.expr() * rep_offset_2_prev.expr() + + offset_is_2.expr() * rep_offset_3_prev.expr(); + let offset_ref_val = + offset_is_1.expr() * rep_offset_1_prev.expr() + + offset_is_2.expr() * rep_offset_2_prev.expr() + + offset_is_3.expr() * rep_offset_3_prev.expr(); + + let offset_ref_val = select::expr( + literal_is_zero.expr(), + offset_ref_val_on_literal_zero, + offset_ref_val, + ); + + cb.require_equal("offset value", + select::expr( + s_is_offset_ref.expr(), + offset_ref_val, + offset.expr() - 3.expr(), + ), + offset_val.expr() + ); + // and ref in offset_1 is updated by current value + cb.require_equal("set offset 0 to offset val", + offset_val.expr(), + rep_offset_1.expr(), + ); + + // following we updated table for rep_offset_2/3 + + // for no-ref ref offset table 2/3 is + // updated with a "shift" nature + cb.condition(not::expr(s_is_offset_ref.expr()),|cb|{ + cb.require_equal("shift 1 -> 2", + rep_offset_1_prev.expr(), + rep_offset_2.expr(), + ); + cb.require_equal("shift 2 -> 3", + rep_offset_2_prev.expr(), + rep_offset_3.expr(), + ); + }); + + // in ref offset case (offset is 1-3), the table is + // updated by more complificant fashion + cb.condition( + and::expr([ + not::expr(literal_is_zero.expr()), + offset_is_1.expr(), + ]), + |cb|{ + + cb.require_equal("copy offset 2 for ref 1", + rep_offset_2_prev.expr(), + rep_offset_2.expr(), + ); + cb.require_equal("copy offset 3 for ref 1", + rep_offset_3_prev.expr(), + rep_offset_3.expr(), + ); + } + ); + cb.condition( + select::expr( + literal_is_zero.expr(), + offset_is_1.expr(), + offset_is_2.expr(), + ), + |cb|{ + + cb.require_equal("swap 1&2 for ref 2", + rep_offset_1_prev.expr(), + rep_offset_2.expr(), + ); + cb.require_equal("copy offset 3 for ref 2", + rep_offset_3_prev.expr(), + rep_offset_3.expr(), + ); + } + ); + cb.condition( + select::expr( + literal_is_zero.expr(), + // this equal to "or" since they are exclusive + // this trick is used to save a degree + offset_is_2.expr() + offset_is_3.expr(), + offset_is_3.expr(), + ), + |cb|{ + + cb.require_equal("rotate 3-1 for ref 3", + rep_offset_1_prev.expr(), + rep_offset_2.expr(), + ); + cb.require_equal("rotate 3-1 for ref 3", + rep_offset_2_prev.expr(), + rep_offset_3.expr(), + ); + } + ); + + cb.condition(literal_is_zero.expr(), |cb|{ + cb.require_zero("data must not corrupt", + ref_offset_1_is_zero.expr(), + ) + }); + + cb.gate( + meta.query_fixed(q_enabled, Rotation::cur())* + not::expr(meta.query_advice(s_beginning, Rotation::cur())), + ) + }); + + // the beginning of following rows must be constrainted + meta.enable_equality(block_index); + meta.enable_equality(seq_index); + meta.enable_equality(rep_offset_1); + meta.enable_equality(rep_offset_2); + meta.enable_equality(rep_offset_3); + + Self { + q_enabled, + block_index, + n_seq, + literal_len, + match_offset, + match_len, + offset, + acc_literal_len, + s_beginning, + seq_index, + rep_offset_1, + rep_offset_2, + rep_offset_3, + offset_is_1, + offset_is_2, + offset_is_3, + literal_is_zero, + seq_index_is_n_seq, + ref_offset_1_is_zero, + } + } + + // assign a heading / padding row before a each block + fn assign_heading_row<'a>( + &self, + region: &mut Region, + offset: usize, + block_ind: u64, + n_seq: usize, + chip_ctx: &ChipContext, + offset_table: &[u64;3], + ) -> Result{ + + region.assign_fixed( + ||"enable row", + self.q_enabled, offset, + || Value::known(F::one()), + )?; + + for col in [ + self.rep_offset_1, + self.rep_offset_2, + self.rep_offset_3, + self.match_len, + self.match_offset, + self.literal_len, + self.acc_literal_len, + self.offset, + self.seq_index, + ] { + region.assign_advice( + ||"padding values", + col, offset, ||Value::known(F::zero()) + )?; + } + + for (col, val) in [ + (self.rep_offset_1, offset_table[0]), + (self.rep_offset_2, offset_table[1]), + (self.rep_offset_3, offset_table[2]), + (self.block_index, block_ind), + (self.n_seq, n_seq as u64) + ]{ + region.assign_advice( + ||"header block fill", + col, offset, + ||Value::known(F::from(val)) + )?; + } + + chip_ctx.literal_is_zero_chip.assign(region, offset, Value::known(F::zero()))?; + chip_ctx.ref_offset_1_is_zero_chip.assign(region, offset, Value::known(F::from(offset_table[0])))?; + + for (chip, val) in [ + (&chip_ctx.offset_is_1_chip, F::from(1u64)), + (&chip_ctx.offset_is_2_chip, F::from(2u64)), + (&chip_ctx.offset_is_3_chip, F::from(3u64)), + (&chip_ctx.seq_index_chip, F::from(n_seq as u64)), + ]{ + chip.assign(region, offset, Value::known(F::zero()), Value::known(val))?; + } + + region.assign_advice(||"set beginning flag", + self.s_beginning, + offset, + ||Value::known(F::one()), + )?; + + Ok(offset+1) + + } + + // padding for the rest row + fn padding_rows<'a>( + &self, + region: &mut Region, + mut offset: usize, + till_offset: usize, + mut blk_index: u64, + chip_ctx: &ChipContext, + offset_table: &[u64;3], + ) -> Result<(), Error>{ + + // pad the rest rows until final row + while offset < till_offset { + offset = self.assign_heading_row( + region, + offset, + blk_index, + 0, + chip_ctx, + offset_table, + )?; + + blk_index += 1; + } + + Ok(()) + } + + // assign a single block from current offset + // and return the offset below the last used row + fn assign_block<'a>( + &self, + region: &mut Region, + mut offset: usize, + block_ind: u64, + n_seq: usize, + table_rows: impl Iterator, + chip_ctx: &ChipContext, + offset_table: &mut [u64;3], + ) -> Result{ + + let mut seq_index = 0u64; + let mut acc_literal_len = 0u64; + + for table_row in table_rows { + + seq_index += 1; + + region.assign_fixed( + ||"enable row", + self.q_enabled, offset, + || Value::known(F::one()), + )?; + + let offset_val = match table_row.cooked_match_offset { + 0 => panic!("invalid cooked offset"), + 1 => if table_row.literal_length == 0 { + offset_table[1] + } else { + offset_table[0] + }, + 2 => if table_row.literal_length == 0 { + offset_table[2] + } else { + offset_table[1] + }, + 3 => if table_row.literal_length == 0 { + offset_table[0] - 1 + } else { + offset_table[2] + }, + val => val - 3, + }; + acc_literal_len += table_row.literal_length; + + assert_eq!(offset_val, table_row.actual_offset); + offset_table[0] = table_row.repeated_offset1; + offset_table[1] = table_row.repeated_offset2; + offset_table[2] = table_row.repeated_offset3; + + for (name, col, val) in [ + ("beginning flag", self.s_beginning, F::zero()), + ("offset table 1", self.rep_offset_1, F::from(offset_table[0])), + ("offset table 2", self.rep_offset_2, F::from(offset_table[1])), + ("offset table 3", self.rep_offset_3, F::from(offset_table[2])), + ("mlen", self.match_len, F::from(table_row.match_length)), + ("moff", self.match_offset, F::from(table_row.cooked_match_offset)), + ("llen", self.literal_len, F::from(table_row.literal_length)), + ("llen_acc", self.acc_literal_len, F::from(acc_literal_len)), + ("offset", self.offset, F::from(offset_val)), + ("seq ind", self.seq_index, F::from(seq_index)), + ("block ind", self.block_index, F::from(block_ind)), + ("n_seq", self.n_seq, F::from(n_seq as u64)), + ] { + region.assign_advice( + ||name, col, offset, ||Value::known(val) + )?; + } + + for (chip, val) in [ + (&chip_ctx.literal_is_zero_chip, F::from(table_row.literal_length)), + (&chip_ctx.ref_offset_1_is_zero_chip, F::from(offset_table[0])), + ] { + chip.assign(region, offset, Value::known(val))?; + } + + for (chip, val_l, val_r) in [ + (&chip_ctx.offset_is_1_chip, F::from(table_row.cooked_match_offset), F::from(1u64)), + (&chip_ctx.offset_is_2_chip, F::from(table_row.cooked_match_offset), F::from(2u64)), + (&chip_ctx.offset_is_3_chip, F::from(table_row.cooked_match_offset), F::from(3u64)), + (&chip_ctx.seq_index_chip, F::from(seq_index), F::from(n_seq as u64)), + ]{ + chip.assign( + region, + offset, + Value::known(val_l), + Value::known(val_r) + )?; + } + offset += 1; + } + + assert_eq!(n_seq as u64, seq_index); + + Ok(offset) + } + + // assign the top row + fn init_top_row( + &self, + region: &mut Region, + from_offset: Option, + ) -> Result{ + let offset = from_offset.unwrap_or_default(); + // top row constraint + for (col, val) in [ + (self.rep_offset_1, F::from(1u64)), + (self.rep_offset_2, F::from(4u64)), + (self.rep_offset_3, F::from(8u64)), + ] { + region.assign_advice_from_constant(||"top row", col, offset, val)?; + } + + for col in [ + self.block_index, + self.seq_index, + self.acc_literal_len, + ] { + region.assign_advice(||"top row flush", col, offset, ||Value::known(F::zero()))?; + } + + for (col, val) in [ + (self.block_index, F::one()), + (self.seq_index, F::zero()), + ] { + region.assign_advice_from_constant(||"begin row constraint", col, offset+1, val)?; + } + + Ok(offset+1) + } + + #[cfg(test)] + pub fn mock_assign( + &self, + layouter: &mut impl Layouter, + table_rows: &[AddressTableRow], + enabled_rows: usize, + ) -> Result<(), Error>{ + let chip_ctx = ChipContext::construct(self); + layouter.assign_region( + || "addr table", + |mut region|{ + let mut offset_table : [u64;3]= [1,4,8]; + let offset = self.init_top_row(&mut region, None)?; + let offset = self.assign_heading_row( + &mut region, + offset, + 1, + table_rows.len(), + &chip_ctx, + &mut offset_table, + )?; + let offset = self.assign_block( + &mut region, + offset, + 1, + table_rows.len(), + table_rows.iter(), + &chip_ctx, + &mut offset_table, + )?; + assert!(offset < enabled_rows); + + self.padding_rows( + &mut region, + offset, + enabled_rows, + 2, + &chip_ctx, + &offset_table, + )?; + + Ok(()) + } + ) + } + +} + +#[cfg(test)] +mod tests { + + use halo2_proofs::{ + circuit::SimpleFloorPlanner, + dev::MockProver, + halo2curves::bn256::Fr, + plonk::Circuit, + }; + use super::*; + + #[derive(Clone, Debug)] + struct SeqTable (Vec); + + impl Circuit for SeqTable { + type Config = SeqInstTable; + type FloorPlanner = SimpleFloorPlanner; + fn without_witnesses(&self) -> Self { + unimplemented!() + } + + fn configure(meta: &mut ConstraintSystem) -> Self::Config { + + let const_col = meta.fixed_column(); + meta.enable_constant(const_col); + + Self::Config::configure(meta) + } + + fn synthesize( + &self, + config: Self::Config, + mut layouter: impl Layouter, + ) -> Result<(), Error> { + + config.mock_assign( + &mut layouter, + &self.0, + 15, + )?; + + Ok(()) + } + } + + #[test] + fn seqinst_table_gates(){ + + // example comes from zstd's spec + let circuit = SeqTable( + AddressTableRow::mock_samples( + &[ + [1114, 11, 1111, 1, 4], + [1, 22, 1111, 1, 4], + [2225, 22, 2222, 1111, 1], + [1114, 111, 1111, 2222, 1111], + [3336, 33, 3333, 1111, 2222], + [2, 22, 1111, 3333, 2222], + [3, 33, 2222, 1111, 3333], + [3, 0, 2221, 2222, 1111], + [1, 0, 2222, 2221, 1111], + ], + ), + ); + + let k = 12; + let mock_prover = MockProver::::run(k, &circuit, vec![]).expect("failed to run mock prover"); + mock_prover.verify().unwrap(); + + } +} \ No newline at end of file diff --git a/aggregator/src/aggregation/decoder/witgen.rs b/aggregator/src/aggregation/decoder/witgen.rs index e25b1744e3..c106952fc8 100644 --- a/aggregator/src/aggregation/decoder/witgen.rs +++ b/aggregator/src/aggregation/decoder/witgen.rs @@ -1,7 +1,16 @@ -use std::collections::BTreeMap; +#![allow(dead_code)] +#![allow(clippy::too_many_arguments)] use eth_types::Field; -use halo2_proofs::circuit::Value; +// use ethers_core::k256::pkcs8::der::Sequence; +use halo2_proofs::{circuit::Value, halo2curves::bn256::Fr}; +use revm_precompile::HashMap; +use revm_primitives::bitvec::ptr::write; +// use zkevm_circuits::witness; +// use zstd::zstd_safe::WriteBuf; + +// witgen_debug +use std::{io, io::Write}; mod params; pub use params::*; @@ -12,26 +21,23 @@ pub use types::{ZstdTag::*, *}; pub mod util; use util::{be_bits_to_value, increment_idx, le_bits_to_value, value_bits_le}; -const TAG_MAX_LEN: [(ZstdTag, u64); 13] = [ +const TAG_MAX_LEN: [(ZstdTag, u64); 8] = [ (FrameHeaderDescriptor, 1), (FrameContentSize, 8), (BlockHeader, 3), - (RawBlockBytes, 8388607), // (1 << 23) - 1 - (RleBlockBytes, 8388607), (ZstdBlockLiteralsHeader, 5), (ZstdBlockLiteralsRawBytes, 1048575), // (1 << 20) - 1 - (ZstdBlockLiteralsRleBytes, 1048575), - (ZstdBlockLiteralsHeader, 5), - (ZstdBlockFseCode, 128), - (ZstdBlockHuffmanCode, 128), // header_byte < 128 - (ZstdBlockJumpTable, 6), - (ZstdBlockLstream, 1000), // 1kB hard-limit + (ZstdBlockSequenceHeader, 4), + (ZstdBlockSequenceFseCode, 128), + (ZstdBlockSequenceData, 1048575), // (1 << 20) - 1 ]; -fn lookup_max_tag_len(tag: ZstdTag) -> u64 { +pub fn lookup_max_tag_len(tag: ZstdTag) -> u64 { TAG_MAX_LEN.iter().find(|record| record.0 == tag).unwrap().1 } +const CMOT_N: u64 = 31; + /// FrameHeaderDescriptor and FrameContentSize fn process_frame_header( src: &[u8], @@ -63,23 +69,14 @@ fn process_frame_header( _ => unreachable!("2-bit value"), }; - // FrameContentSize bytes are read in little-endian, hence its in reverse mode. let fcs_bytes = src .iter() .skip(byte_offset + 1) .take(fcs_tag_len) - // .rev() - .cloned() - .collect::>(); - let fcs_bytes_rev = src - .iter() - .skip(byte_offset + 1) - .take(fcs_tag_len) - .rev() .cloned() .collect::>(); let fcs = { - let fcs = fcs_bytes_rev + let fcs = fcs_bytes .iter() .fold(0u64, |acc, &byte| acc * 256u64 + (byte as u64)); match fcs_tag_len { @@ -89,7 +86,6 @@ fn process_frame_header( }; let fcs_tag_value_iter = fcs_bytes .iter() - .rev() .scan(Value::known(F::zero()), |acc, &byte| { *acc = *acc * Value::known(F::from(256u64)) + Value::known(F::from(byte as u64)); Some(*acc) @@ -127,6 +123,7 @@ fn process_frame_header( tag: ZstdTag::FrameHeaderDescriptor, tag_next: ZstdTag::FrameContentSize, max_tag_len: lookup_max_tag_len(ZstdTag::FrameHeaderDescriptor), + block_idx: 0, tag_len: 1, tag_idx: 1, tag_value: Value::known(F::from(*fhd_byte as u64)), @@ -150,15 +147,14 @@ fn process_frame_header( decoded_value_rlc: Value::known(F::zero()), }, bitstream_read_data: BitstreamReadRow::default(), - huffman_data: HuffmanData::default(), - fse_data: FseTableRow::default(), + fse_data: FseDecodingRow::default(), }) .chain( - fcs_bytes_rev + fcs_bytes .iter() .zip(fcs_tag_value_iter) - .zip(fcs_value_rlcs.iter().rev()) - .zip(tag_rlc_iter.iter().rev()) + .zip(fcs_value_rlcs.iter()) + .zip(tag_rlc_iter.iter()) .enumerate() .map( |(i, (((&value_byte, tag_value_acc), _value_rlc), &tag_rlc_acc))| { @@ -166,6 +162,7 @@ fn process_frame_header( state: ZstdState { tag: ZstdTag::FrameContentSize, tag_next: ZstdTag::BlockHeader, + block_idx: 0, max_tag_len: lookup_max_tag_len(ZstdTag::FrameContentSize), tag_len: fcs_tag_len as u64, tag_idx: (i + 1) as u64, @@ -179,7 +176,7 @@ fn process_frame_header( byte_idx: (byte_offset + 2 + i) as u64, encoded_len: last_row.encoded_data.encoded_len, value_byte, - reverse: true, + reverse: false, reverse_idx: (fcs_tag_len - i) as u64, reverse_len: fcs_tag_len as u64, aux_1: *aux_1, @@ -194,8 +191,7 @@ fn process_frame_header( decoded_value_rlc: Value::known(F::zero()), }, bitstream_read_data: BitstreamReadRow::default(), - huffman_data: HuffmanData::default(), - fse_data: FseTableRow::default(), + fse_data: FseDecodingRow::default(), } }, ), @@ -207,95 +203,81 @@ fn process_frame_header( type AggregateBlockResult = ( usize, Vec>, - bool, + BlockInfo, + SequenceInfo, Vec, Vec, Vec, - FseAuxiliaryTableData, - HuffmanCodesData, + [FseAuxiliaryTableData; 3], // 3 sequence section FSE tables ); fn process_block( src: &[u8], + block_idx: u64, byte_offset: usize, last_row: &ZstdWitnessRow, randomness: Value, ) -> AggregateBlockResult { let mut witness_rows = vec![]; - let (byte_offset, rows, last_block, block_type, block_size) = - process_block_header(src, byte_offset, last_row, randomness); + let (byte_offset, rows, block_info) = + process_block_header(src, block_idx, byte_offset, last_row, randomness); witness_rows.extend_from_slice(&rows); let last_row = rows.last().expect("last row expected to exist"); - let (_byte_offset, rows, literals, lstream_len, aux_data, fse_aux_table, huffman_codes) = - match block_type { - BlockType::RawBlock => process_block_raw( - src, - byte_offset, - last_row, - randomness, - block_size, - last_block, - ), - BlockType::RleBlock => process_block_rle( - src, - byte_offset, - last_row, - randomness, - block_size, - last_block, - ), + let (_byte_offset, rows, literals, lstream_len, aux_data, sequence_info, fse_aux_tables) = + match block_info.block_type { BlockType::ZstdCompressedBlock => process_block_zstd( src, + block_idx, byte_offset, last_row, randomness, - block_size, - last_block, + block_info.block_len, + block_info.is_last_block, ), - BlockType::Reserved => unreachable!("Reserved block type not expected"), + _ => unreachable!("BlockType::ZstdCompressedBlock expected"), }; witness_rows.extend_from_slice(&rows); ( byte_offset, witness_rows, - last_block, + block_info, + sequence_info, literals, lstream_len, aux_data, - fse_aux_table, - huffman_codes, + fse_aux_tables, ) } fn process_block_header( src: &[u8], + block_idx: u64, byte_offset: usize, last_row: &ZstdWitnessRow, randomness: Value, -) -> (usize, Vec>, bool, BlockType, usize) { +) -> (usize, Vec>, BlockInfo) { + let mut block_info = BlockInfo::default(); + block_info.block_idx = block_idx as usize; let bh_bytes = src .iter() .skip(byte_offset) .take(N_BLOCK_HEADER_BYTES) .cloned() .collect::>(); - let last_block = (bh_bytes[0] & 1) == 1; - let block_type = BlockType::from((bh_bytes[0] >> 1) & 3); - let block_size = + block_info.is_last_block = (bh_bytes[0] & 1) == 1; + block_info.block_type = BlockType::from((bh_bytes[0] >> 1) & 3); + block_info.block_len = (bh_bytes[2] as usize * 256 * 256 + bh_bytes[1] as usize * 256 + bh_bytes[0] as usize) >> 3; - let tag_next = match block_type { - BlockType::RawBlock => ZstdTag::RawBlockBytes, - BlockType::RleBlock => ZstdTag::RleBlockBytes, + let tag_next = match block_info.block_type { BlockType::ZstdCompressedBlock => ZstdTag::ZstdBlockLiteralsHeader, - _ => unreachable!("BlockType::Reserved unexpected"), + _ => unreachable!("BlockType::ZstdCompressedBlock expected"), }; let tag_value_iter = bh_bytes .iter() - .rev() .scan(Value::known(F::zero()), |acc, &byte| { *acc = *acc * Value::known(F::from(256u64)) + Value::known(F::from(byte as u64)); Some(*acc) @@ -336,15 +318,15 @@ fn process_block_header( byte_offset + N_BLOCK_HEADER_BYTES, bh_bytes .iter() - .rev() .zip(tag_value_iter) - .zip(tag_rlc_iter.iter().rev()) + .zip(tag_rlc_iter.iter()) .enumerate() .map( |(i, ((&value_byte, tag_value_acc), tag_rlc_acc))| ZstdWitnessRow { state: ZstdState { tag: ZstdTag::BlockHeader, tag_next, + block_idx, max_tag_len: lookup_max_tag_len(ZstdTag::BlockHeader), tag_len: N_BLOCK_HEADER_BYTES as u64, tag_idx: (i + 1) as u64, @@ -358,168 +340,17 @@ fn process_block_header( byte_idx: (byte_offset + i + 1) as u64, encoded_len: last_row.encoded_data.encoded_len, value_byte, - reverse: true, + reverse: false, value_rlc, ..Default::default() }, bitstream_read_data: BitstreamReadRow::default(), decoded_data: last_row.decoded_data.clone(), - huffman_data: HuffmanData::default(), - fse_data: FseTableRow::default(), - }, - ) - .collect::>(), - last_block, - block_type, - block_size, - ) -} - -fn process_raw_bytes( - src: &[u8], - byte_offset: usize, - last_row: &ZstdWitnessRow, - randomness: Value, - n_bytes: usize, - tag: ZstdTag, - tag_next: ZstdTag, -) -> (usize, Vec>) { - let value_rlc_iter = src.iter().skip(byte_offset).take(n_bytes).scan( - last_row.encoded_data.value_rlc, - |acc, &byte| { - *acc = *acc * randomness + Value::known(F::from(byte as u64)); - Some(*acc) - }, - ); - let decoded_value_rlc_iter = src.iter().skip(byte_offset).take(n_bytes).scan( - last_row.decoded_data.decoded_value_rlc, - |acc, &byte| { - *acc = *acc * randomness + Value::known(F::from(byte as u64)); - Some(*acc) - }, - ); - let tag_value_iter = - src.iter() - .skip(byte_offset) - .take(n_bytes) - .scan(Value::known(F::zero()), |acc, &byte| { - *acc = *acc * randomness + Value::known(F::from(byte as u64)); - Some(*acc) - }); - let tag_value = tag_value_iter - .clone() - .last() - .expect("Raw bytes must be of non-zero length"); - - ( - byte_offset + n_bytes, - src.iter() - .skip(byte_offset) - .take(n_bytes) - .zip(tag_value_iter) - .zip(value_rlc_iter) - .zip(decoded_value_rlc_iter) - .enumerate() - .map( - |(i, (((&value_byte, tag_value_acc), value_rlc), decoded_value_rlc))| { - ZstdWitnessRow { - state: ZstdState { - tag, - tag_next, - max_tag_len: lookup_max_tag_len(tag), - tag_len: n_bytes as u64, - tag_idx: (i + 1) as u64, - tag_value, - tag_value_acc, - is_tag_change: i == 0, - tag_rlc: Value::known(F::zero()), - tag_rlc_acc: Value::known(F::zero()), - }, - encoded_data: EncodedData { - byte_idx: (byte_offset + i + 1) as u64, - encoded_len: last_row.encoded_data.encoded_len, - value_byte, - value_rlc, - reverse: false, - ..Default::default() - }, - decoded_data: DecodedData { - decoded_len: last_row.decoded_data.decoded_len, - decoded_len_acc: last_row.decoded_data.decoded_len + (i as u64) + 1, - total_decoded_len: last_row.decoded_data.total_decoded_len, - decoded_byte: value_byte, - decoded_value_rlc, - }, - bitstream_read_data: BitstreamReadRow::default(), - huffman_data: HuffmanData::default(), - fse_data: FseTableRow::default(), - } + fse_data: FseDecodingRow::default(), }, ) .collect::>(), - ) -} - -fn process_rle_bytes( - src: &[u8], - byte_offset: usize, - last_row: &ZstdWitnessRow, - randomness: Value, - n_bytes: usize, - tag: ZstdTag, - tag_next: ZstdTag, -) -> (usize, Vec>) { - let rle_byte = src[byte_offset]; - let value_rlc = - last_row.encoded_data.value_rlc * randomness + Value::known(F::from(rle_byte as u64)); - let decoded_value_rlc_iter = std::iter::repeat(rle_byte).take(n_bytes).scan( - last_row.decoded_data.decoded_value_rlc, - |acc, byte| { - *acc = *acc * randomness + Value::known(F::from(byte as u64)); - Some(*acc) - }, - ); - let tag_value = Value::known(F::from(rle_byte as u64)); - - ( - byte_offset + 1, - std::iter::repeat(rle_byte) - .take(n_bytes) - .zip(decoded_value_rlc_iter) - .enumerate() - .map(|(i, (value_byte, decoded_value_rlc))| ZstdWitnessRow { - state: ZstdState { - tag, - tag_next, - max_tag_len: lookup_max_tag_len(tag), - tag_len: n_bytes as u64, - tag_idx: (i + 1) as u64, - tag_value, - tag_value_acc: tag_value, - is_tag_change: i == 0, - tag_rlc: Value::known(F::zero()), - tag_rlc_acc: Value::known(F::zero()), - }, - encoded_data: EncodedData { - byte_idx: (byte_offset + 1) as u64, - encoded_len: last_row.encoded_data.encoded_len, - value_byte, - reverse: false, - value_rlc, - ..Default::default() - }, - decoded_data: DecodedData { - decoded_len: last_row.decoded_data.decoded_len, - decoded_len_acc: last_row.decoded_data.decoded_len_acc + (i as u64) + 1, - total_decoded_len: last_row.decoded_data.total_decoded_len, - decoded_byte: value_byte, - decoded_value_rlc, - }, - bitstream_read_data: BitstreamReadRow::default(), - huffman_data: HuffmanData::default(), - fse_data: FseTableRow::default(), - }) - .collect::>(), + block_info, ) } @@ -529,276 +360,143 @@ type BlockProcessingResult = ( Vec, Vec, Vec, - FseAuxiliaryTableData, - HuffmanCodesData, + SequenceInfo, + [FseAuxiliaryTableData; 3], // 3 sequence section FSE tables ); -fn process_block_raw( - src: &[u8], - byte_offset: usize, - last_row: &ZstdWitnessRow, - randomness: Value, - block_size: usize, - last_block: bool, -) -> BlockProcessingResult { - let tag_next = if last_block { - ZstdTag::Null - } else { - ZstdTag::BlockHeader - }; - - let (byte_offset, rows) = process_raw_bytes( - src, - byte_offset, - last_row, - randomness, - block_size, - ZstdTag::RawBlockBytes, - tag_next, - ); - - let fse_aux_table = FseAuxiliaryTableData { - block_idx: 0, - table_kind: FseTableKind::LLT, - table_size: 0, - sym_to_states: BTreeMap::default(), - sym_to_sorted_states: BTreeMap::default(), - }; - let huffman_weights = HuffmanCodesData { - byte_offset: 0, - weights: vec![], - }; - - ( - byte_offset, - rows.clone(), - vec![], - vec![rows.len() as u64, 0, 0, 0], - vec![0, 0, 0, 0, 0, 0], - fse_aux_table, - huffman_weights, - ) -} - -fn process_block_rle( - src: &[u8], - byte_offset: usize, - last_row: &ZstdWitnessRow, - randomness: Value, - block_size: usize, - last_block: bool, -) -> BlockProcessingResult { - let tag_next = if last_block { - ZstdTag::Null - } else { - ZstdTag::BlockHeader - }; - - let (byte_offset, rows) = process_rle_bytes( - src, - byte_offset, - last_row, - randomness, - block_size, - ZstdTag::RleBlockBytes, - tag_next, - ); - - let fse_aux_table = FseAuxiliaryTableData { - block_idx: 0, - table_kind: FseTableKind::LLT, - table_size: 0, - sym_to_states: BTreeMap::default(), - sym_to_sorted_states: BTreeMap::default(), - }; - let huffman_weights = HuffmanCodesData { - byte_offset: 0, - weights: vec![], - }; - - ( - byte_offset, - rows.clone(), - vec![], - vec![rows.len() as u64, 0, 0, 0], - vec![0, 0, 0, 0, 0, 0], - fse_aux_table, - huffman_weights, - ) -} - type LiteralsBlockResult = (usize, Vec>, Vec, Vec, Vec); #[allow(unused_variables)] fn process_block_zstd( src: &[u8], + block_idx: u64, byte_offset: usize, last_row: &ZstdWitnessRow, randomness: Value, block_size: usize, last_block: bool, ) -> BlockProcessingResult { + let end_offset = byte_offset + block_size; let mut witness_rows = vec![]; // 1-5 bytes LiteralSectionHeader let literals_header_result: LiteralsHeaderProcessingResult = - process_block_zstd_literals_header::(src, byte_offset, last_row, randomness); + process_block_zstd_literals_header::(src, block_idx, byte_offset, last_row, randomness); let ( byte_offset, rows, - literals_block_type, + _literals_block_type, n_streams, regen_size, compressed_size, (branch, sf_max), ) = literals_header_result; - // let ( - // byte_offset, - // rows, - // literals_block_type, - // n_streams, - // regen_size, - // compressed_size, - // (branch, sf_max), - // ) = process_block_zstd_literals_header::(src, byte_offset, last_row, randomness); witness_rows.extend_from_slice(&rows); - let mut fse_aux_table = FseAuxiliaryTableData { - block_idx: 0, - table_kind: FseTableKind::LLT, - table_size: 0, - sym_to_states: BTreeMap::default(), - sym_to_sorted_states: BTreeMap::default(), - }; - let mut huffman_weights = HuffmanCodesData { - byte_offset: 0, - weights: vec![], - }; - - // Depending on the literals block type, decode literals section accordingly - let literals_block_result: LiteralsBlockResult = match literals_block_type { - BlockType::RawBlock => { - let (byte_offset, rows) = process_raw_bytes( - src, - byte_offset, - rows.last().expect("last row expected to exist"), - randomness, - regen_size, - ZstdTag::ZstdBlockLiteralsRawBytes, - ZstdTag::ZstdBlockSequenceHeader, - ); - - ( - byte_offset, - rows.clone(), - vec![], - vec![rows.len() as u64, 0, 0, 0], - vec![0, 0, 0, 0], - ) - } - BlockType::RleBlock => { - let (byte_offset, rows) = process_rle_bytes( - src, - byte_offset, - rows.last().expect("last row expected to exist"), - randomness, - regen_size, - ZstdTag::ZstdBlockLiteralsRleBytes, - ZstdTag::ZstdBlockSequenceHeader, - ); - - ( - byte_offset, - rows.clone(), - vec![], - vec![rows.len() as u64, 0, 0, 0], - vec![0, 0, 0, 0], - ) - } - BlockType::ZstdCompressedBlock => { - let mut huffman_rows = vec![]; - - let ( - bytes_offset, - rows, - huffman_codes, - n_huffman_bytes, - huffman_byte_offset, - last_rlc, - huffman_idx, - fse_size, - fse_accuracy, - n_huffman_bitstream_bytes, - fse_aux_data, - ) = process_block_zstd_huffman_code( - src, - byte_offset, - rows.last().expect("last row must exist"), - randomness, - n_streams, - ); - huffman_rows.extend_from_slice(&rows); - fse_aux_table = fse_aux_data; - huffman_weights = huffman_codes.clone(); - - // Subtract huffman header (1-byte), len of huffman bytes and 6-byte jump table (if - // n_streams > 1) - let mut literal_stream_size = compressed_size - (n_huffman_bytes + 1); - if n_streams > 1 { - literal_stream_size -= 6; - } - // Start decoding the literal section - let mut stream_offset = bytes_offset; + let literals_block_result: LiteralsBlockResult = { + let last_row = rows.last().cloned().unwrap(); + let multiplier = + (0..last_row.state.tag_len).fold(Value::known(F::one()), |acc, _| acc * randomness); + let value_rlc = last_row.encoded_data.value_rlc * multiplier + last_row.state.tag_rlc; + let tag = ZstdTag::ZstdBlockLiteralsRawBytes; + let tag_next = ZstdTag::ZstdBlockSequenceHeader; + let literals = src[byte_offset..(byte_offset + regen_size)].to_vec(); + let value_rlc_iter = literals + .iter() + .scan(last_row.encoded_data.value_rlc, |acc, &byte| { + *acc = *acc * randomness + Value::known(F::from(byte as u64)); + Some(*acc) + }); + let decoded_value_rlc_iter = + literals + .iter() + .scan(last_row.decoded_data.decoded_value_rlc, |acc, &byte| { + *acc = *acc * randomness + Value::known(F::from(byte as u64)); + Some(*acc) + }); + let tag_value_iter = literals.iter().scan(Value::known(F::zero()), |acc, &byte| { + *acc = *acc * randomness + Value::known(F::from(byte as u64)); + Some(*acc) + }); + let tag_value = tag_value_iter.clone().last().expect("Literals must exist."); + let tag_rlc_iter = literals.iter().scan(Value::known(F::zero()), |acc, &byte| { + *acc = *acc * randomness + Value::known(F::from(byte as u64)); + Some(*acc) + }); + let tag_rlc = tag_value_iter.clone().last().expect("Literals must exist."); - let (bytes_offset, rows, lstream_lens) = process_block_zstd_huffman_jump_table( - src, - stream_offset, - huffman_rows.last().expect("last row should exist"), - literal_stream_size, - n_streams, - randomness, - last_rlc, - ); - huffman_rows.extend_from_slice(&rows); - stream_offset = bytes_offset; - - let mut literals: Vec = vec![]; - - // for idx in 0..n_streams { - for (idx, l_len) in lstream_lens.iter().enumerate().take(n_streams) { - let (byte_offset, rows, symbols) = process_block_zstd_lstream( - src, - stream_offset, - *l_len as usize, - huffman_rows.last().expect("last row should exist"), - randomness, - idx, - &huffman_codes, - huffman_byte_offset, - ); - huffman_rows.extend_from_slice(&rows); - literals.extend_from_slice(&symbols); + ( + byte_offset + regen_size, + literals + .iter() + .zip(tag_value_iter) + .zip(decoded_value_rlc_iter) + .zip(tag_rlc_iter) + .enumerate() + .map( + |( + i, + (((&value_byte, tag_value_acc), decoded_value_rlc), + tag_rlc_acc, + ), + )| { + ZstdWitnessRow { + state: ZstdState { + tag, + tag_next, + block_idx, + max_tag_len: lookup_max_tag_len(tag), + tag_len: regen_size as u64, + tag_idx: (i + 1) as u64, + tag_value, + tag_value_acc, + is_tag_change: i == 0, + tag_rlc, + tag_rlc_acc, + }, + encoded_data: EncodedData { + byte_idx: (byte_offset + i + 1) as u64, + encoded_len: last_row.encoded_data.encoded_len, + value_byte, + value_rlc, + reverse: false, + ..Default::default() + }, + decoded_data: DecodedData { + decoded_len: last_row.decoded_data.decoded_len, + decoded_len_acc: last_row.decoded_data.decoded_len + (i as u64) + 1, + total_decoded_len: last_row.decoded_data.total_decoded_len, + decoded_byte: value_byte, + decoded_value_rlc, + }, + bitstream_read_data: BitstreamReadRow::default(), + fse_data: FseDecodingRow::default(), + } + }, + ) + .collect::>(), + literals.iter().map(|b| *b as u64).collect::>(), + vec![regen_size as u64, 0, 0, 0], + vec![0, 0, 0, 0], + ) + }; - stream_offset = byte_offset; - } + let (byte_offset, rows, literals, lstream_len, aux_data) = literals_block_result; + witness_rows.extend_from_slice(&rows); - ( - stream_offset, - huffman_rows, - literals, - lstream_lens, - vec![ - huffman_idx as u64, - fse_size, - fse_accuracy, - n_huffman_bitstream_bytes, - ], - ) - } - _ => unreachable!("Invalid literals section BlockType"), - }; - let (bytes_offset, rows, literals, lstream_len, aux_data) = literals_block_result; + let last_row = witness_rows.last().expect("last row expected to exist"); + let (bytes_offset, rows, fse_aux_tables, address_table_rows, original_inputs, sequence_info, sequence_exec_info) = + process_sequences::( + src, + block_idx, + byte_offset, + end_offset, + literals.clone(), + last_row, + last_block, + randomness, + ); witness_rows.extend_from_slice(&rows); ( @@ -816,407 +514,538 @@ fn process_block_zstd( branch, sf_max as u64, ], - fse_aux_table, - huffman_weights, + sequence_info, + fse_aux_tables, ) } -type LiteralsHeaderProcessingResult = ( +type SequencesProcessingResult = ( usize, Vec>, - BlockType, - usize, - usize, - usize, - (u64, bool), + [FseAuxiliaryTableData; 3], // LLT, MLT, CMOT + Vec, // Parsed sequence instructions + Vec, // Recovered original input + SequenceInfo, + Vec, ); -fn process_block_zstd_literals_header( +fn process_sequences( src: &[u8], + block_idx: u64, byte_offset: usize, + end_offset: usize, + literals: Vec, last_row: &ZstdWitnessRow, + last_block: bool, randomness: Value, -) -> LiteralsHeaderProcessingResult { - let lh_bytes = src - .iter() - .skip(byte_offset) - .take(N_MAX_LITERAL_HEADER_BYTES) - .cloned() - .collect::>(); +) -> SequencesProcessingResult { + // Initialize witness rows + let mut witness_rows: Vec> = vec![]; - let literals_block_type = BlockType::from(lh_bytes[0] & 0x3); - let size_format = (lh_bytes[0] >> 2) & 3; - let sf_max = size_format == 3; + // Other consistent values + let encoded_len = last_row.encoded_data.encoded_len; + let _decoded_data = last_row.decoded_data.clone(); - let [n_bits_fmt, n_bits_regen, n_bits_compressed, n_streams, n_bytes_header, branch]: [usize; - 6] = match literals_block_type { - BlockType::RawBlock | BlockType::RleBlock => match size_format { - 0b00 | 0b10 => [1, 5, 0, 1, 1, 0], - 0b01 => [2, 12, 0, 1, 2, 1], - 0b11 => [2, 20, 0, 1, 3, 2], - _ => unreachable!("size_format out of bound"), - }, - BlockType::ZstdCompressedBlock => match size_format { - 0b00 => [2, 10, 10, 1, 3, 3], - 0b01 => [2, 10, 10, 4, 3, 3], - 0b10 => [2, 14, 14, 4, 4, 4], - 0b11 => [2, 18, 18, 4, 5, 5], - _ => unreachable!("size_format out of bound"), - }, - _ => unreachable!("BlockType::Reserved unexpected or treeless literal section"), - }; - - // Bits for representing regenerated_size and compressed_size - let sizing_bits = &lh_bytes.clone().into_iter().fold(vec![], |mut acc, b| { - acc.extend(value_bits_le(b)); - acc - })[(2 + n_bits_fmt)..(n_bytes_header * N_BITS_PER_BYTE)]; + // First, process the sequence header + let mut sequence_info = SequenceInfo::default(); + sequence_info.block_idx = block_idx as usize; - let regen_size = le_bits_to_value(&sizing_bits[0..n_bits_regen]); - let compressed_size = - le_bits_to_value(&sizing_bits[n_bits_regen..(n_bits_regen + n_bits_compressed)]); + let byte0 = src + .get(byte_offset) + .expect("First byte of sequence header must exist.") + .clone(); + assert!(byte0 > 0u8, "Sequences can't be of 0 length"); - let tag_next = match literals_block_type { - BlockType::RawBlock => ZstdTag::ZstdBlockLiteralsRawBytes, - BlockType::RleBlock => ZstdTag::ZstdBlockLiteralsRleBytes, - BlockType::ZstdCompressedBlock => ZstdTag::ZstdBlockFseCode, - _ => unreachable!("BlockType::Reserved unexpected or treeless literal section"), + let (num_of_sequences, num_sequence_header_bytes) = if byte0 < 128 { + (byte0 as u64, 2usize) + } else { + let byte1 = src + .get(byte_offset + 1) + .expect("Next byte of sequence header must exist.") + .clone(); + if byte0 < 255 { + ((((byte0 - 128) as u64) << 8) + byte1 as u64, 3) + } else { + let byte2 = src + .get(byte_offset + 2) + .expect("Third byte of sequence header must exist.") + .clone(); + ((byte1 as u64) + ((byte2 as u64) << 8) + 0x7F00, 4) + } }; + sequence_info.num_sequences = num_of_sequences as usize; - let tag_value_iter = - lh_bytes - .iter() - .take(n_bytes_header) - .scan(Value::known(F::zero()), |acc, &byte| { - *acc = *acc * Value::known(F::from(256u64)) + Value::known(F::from(byte as u64)); - Some(*acc) - }); - let tag_value = tag_value_iter - .clone() - .last() - .expect("LiteralsHeader expected"); - - let tag_rlc_iter = - lh_bytes - .iter() - .take(n_bytes_header) - .scan(Value::known(F::zero()), |acc, &byte| { - *acc = *acc * randomness + Value::known(F::from(byte as u64)); - Some(*acc) - }); - let tag_rlc = tag_rlc_iter.clone().last().expect("Tag RLC expected"); - - let value_rlc_iter = - lh_bytes - .iter() - .take(n_bytes_header) - .scan(last_row.encoded_data.value_rlc, |acc, &byte| { - *acc = *acc * randomness + Value::known(F::from(byte as u64)); - Some(*acc) - }); - - let multiplier = - (0..last_row.state.tag_len).fold(Value::known(F::one()), |acc, _| acc * randomness); - let value_rlc = last_row.encoded_data.value_rlc * multiplier + last_row.state.tag_rlc; - - ( - byte_offset + n_bytes_header, - lh_bytes - .iter() - .take(n_bytes_header) - .zip(tag_value_iter) - .zip(value_rlc_iter) - .zip(tag_rlc_iter) - .enumerate() - .map( - |(i, (((&value_byte, tag_value_acc), _v_rlc), tag_rlc_acc))| ZstdWitnessRow { - state: ZstdState { - tag: ZstdTag::ZstdBlockLiteralsHeader, - tag_next, - max_tag_len: lookup_max_tag_len(ZstdTag::ZstdBlockLiteralsHeader), - tag_len: n_bytes_header as u64, - tag_idx: (i + 1) as u64, - tag_value, - tag_value_acc, - is_tag_change: i == 0, - tag_rlc, - tag_rlc_acc, - }, - encoded_data: EncodedData { - byte_idx: (byte_offset + i + 1) as u64, - encoded_len: last_row.encoded_data.encoded_len, - value_byte, - reverse: false, - value_rlc, - ..Default::default() - }, - bitstream_read_data: BitstreamReadRow::default(), - decoded_data: last_row.decoded_data.clone(), - huffman_data: HuffmanData::default(), - fse_data: FseTableRow::default(), - }, - ) - .collect::>(), - literals_block_type, - n_streams, - regen_size as usize, - compressed_size as usize, - (branch as u64, sf_max), - ) -} - -type HuffmanCodeProcessingResult = ( - usize, - Vec>, - HuffmanCodesData, - usize, - usize, - Value, - usize, - u64, - u64, - u64, - FseAuxiliaryTableData, -); - -fn process_block_zstd_huffman_code( - src: &[u8], - byte_offset: usize, - last_row: &ZstdWitnessRow, - randomness: Value, - n_streams: usize, -) -> HuffmanCodeProcessingResult { - // Preserve this value for later construction of HuffmanCodesDataTable - let huffman_code_byte_offset = byte_offset; + let compression_mode_byte = src + .get(byte_offset + num_sequence_header_bytes - 1) + .expect("Compression mode byte must exist.") + .clone(); + let mode_bits = value_bits_le(compression_mode_byte); - // Other consistent values - let encoded_len = last_row.encoded_data.encoded_len; - let decoded_data = last_row.decoded_data.clone(); + let literal_lengths_mode = mode_bits[6] + mode_bits[7] * 2; + let offsets_mode = mode_bits[4] + mode_bits[5] * 2; + let match_lengths_mode = mode_bits[2] + mode_bits[3] * 2; + let reserved = mode_bits[0] + mode_bits[1] * 2; - // Get the next tag - let tag_next = ZstdTag::ZstdBlockHuffmanCode; + assert!(reserved == 0, "Reserved bits must be 0"); - // Parse the header byte - let mut witness_rows: Vec> = vec![]; - let header_byte = src[byte_offset]; - assert!(header_byte < 128, "FSE encoded huffman weights assumed"); - let n_bytes = header_byte as usize; + // TODO: Treatment of other encoding modes + assert!( + literal_lengths_mode == 2 || literal_lengths_mode == 0, + "Only FSE_Compressed_Mode is allowed" + ); + assert!( + offsets_mode == 2 || offsets_mode == 0, + "Only FSE_Compressed_Mode is allowed" + ); + assert!( + match_lengths_mode == 2 || match_lengths_mode == 0, + "Only FSE_Compressed_Mode is allowed" + ); + sequence_info.compression_mode = [ + literal_lengths_mode > 0, + offsets_mode > 0, + match_lengths_mode > 0, + ]; let multiplier = (0..last_row.state.tag_len).fold(Value::known(F::one()), |acc, _| acc * randomness); let value_rlc = last_row.encoded_data.value_rlc * multiplier + last_row.state.tag_rlc; - // Add a witness row for Huffman header - let mut huffman_header_row: ZstdWitnessRow = ZstdWitnessRow { - state: ZstdState { - tag: ZstdTag::ZstdBlockFseCode, - tag_next, - max_tag_len: lookup_max_tag_len(ZstdTag::ZstdBlockFseCode), - tag_len: 0_u64, /* There's no information at this point about the length of FSE - * table bytes. So this value has to be modified later. */ - tag_idx: 1_u64, - tag_value: Value::default(), // Must be changed after FSE table length is known - tag_value_acc: Value::default(), // Must be changed after FSE table length is known - is_tag_change: true, - tag_rlc: Value::known(F::zero()), // Must be changed after FSE table length is known - tag_rlc_acc: Value::known(F::zero()), // Must be changed after FSE table length is known - }, - encoded_data: EncodedData { - byte_idx: (byte_offset + 1) as u64, - encoded_len, - value_byte: header_byte, - value_rlc, - reverse: false, - ..Default::default() - }, - bitstream_read_data: BitstreamReadRow { - bit_start_idx: 0usize, - bit_end_idx: 7usize, - bit_value: header_byte as u64, - is_zero_bit_read: false, - }, - decoded_data: decoded_data.clone(), - huffman_data: HuffmanData::default(), - fse_data: FseTableRow::default(), - }; - - // Recover the FSE table for generating Huffman weights - // TODO(ray): this part is redundant however to compile, we have added the required args to the - // ``reconstruct`` method. - let (n_fse_bytes, bit_boundaries, table) = - FseAuxiliaryTableData::reconstruct(src, 1, FseTableKind::LLT, byte_offset + 1) - .expect("Reconstructing FSE table should not fail."); - - // Witness generation - let accuracy_log = (src[byte_offset + 1] & 0b1111) + 5; - - let mut tag_value_iter = src.iter().skip(byte_offset).take(n_fse_bytes + 1).scan( - Value::known(F::zero()), - |acc, &byte| { + // Add witness rows for the sequence header + let sequence_header_start_offset = byte_offset; + let sequence_header_end_offset = byte_offset + num_sequence_header_bytes; + let tag_value_iter = src[sequence_header_start_offset..sequence_header_end_offset] + .iter() + .scan(Value::known(F::zero()), |acc, &byte| { *acc = *acc * randomness + Value::known(F::from(byte as u64)); Some(*acc) - }, - ); + }); let tag_value = tag_value_iter.clone().last().expect("Tag value must exist"); - let mut tag_rlc_iter = src.iter().skip(byte_offset).take(n_fse_bytes + 1).scan( - Value::known(F::zero()), - |acc, &byte| { + let tag_rlc_iter = src[sequence_header_start_offset..sequence_header_end_offset] + .iter() + .scan(Value::known(F::zero()), |acc, &byte| { *acc = *acc * randomness + Value::known(F::from(byte as u64)); Some(*acc) - }, - ); + }); let tag_rlc = tag_rlc_iter.clone().last().expect("Tag RLC must exist"); - // Backfill missing data on the huffman header row - huffman_header_row.state.tag_len = (n_fse_bytes + 1usize) as u64; - huffman_header_row.state.tag_value = tag_value; - huffman_header_row.state.tag_value_acc = - tag_value_iter.next().expect("Next value should exist"); - huffman_header_row.state.tag_rlc = tag_rlc; - huffman_header_row.state.tag_rlc_acc = tag_rlc_iter.next().expect("Next value expected"); - witness_rows.push(huffman_header_row); - - // Process bit boundaries into bitstream reader info - let mut decoded: u8 = 0; - let mut n_acc: usize = 0; - let mut current_tag_value_acc = Value::known(F::zero()); - let mut current_tag_rlc_acc = Value::known(F::zero()); - let mut last_byte_idx: i64 = 0; - let mut from_pos: (i64, i64) = (1, 0); - let mut to_pos: (i64, i64) = (0, 0); - - let bitstream_rows = bit_boundaries + let header_rows = src[sequence_header_start_offset..sequence_header_end_offset] .iter() + .zip(tag_value_iter) + .zip(tag_rlc_iter) .enumerate() - .map(|(sym, (bit_idx, value))| { - from_pos = if sym == 0 { (1, -1) } else { to_pos }; - - from_pos.1 += 1; - if from_pos.1 == 8 { - from_pos = (from_pos.0 + 1, 0); - } - from_pos.1 = (from_pos.1 as u64).rem_euclid(8) as i64; - - if from_pos.0 > last_byte_idx { - current_tag_value_acc = tag_value_iter.next().unwrap(); - current_tag_rlc_acc = tag_rlc_iter.next().unwrap(); - last_byte_idx = from_pos.0; - } + .map( + |(i, ((&value_byte, tag_value_acc), tag_rlc_acc))| ZstdWitnessRow { + state: ZstdState { + tag: ZstdTag::ZstdBlockSequenceHeader, + tag_next: ZstdTag::ZstdBlockSequenceFseCode, + block_idx, + max_tag_len: lookup_max_tag_len(ZstdTag::ZstdBlockSequenceHeader), + tag_len: num_sequence_header_bytes as u64, + tag_idx: (i + 1) as u64, + tag_value, + tag_value_acc, + is_tag_change: i == 0, + tag_rlc, + tag_rlc_acc, + }, + encoded_data: EncodedData { + byte_idx: (sequence_header_start_offset + i + 1) as u64, + encoded_len: last_row.encoded_data.encoded_len, + value_byte, + value_rlc, + reverse: false, + ..Default::default() + }, + decoded_data: DecodedData { + decoded_len: last_row.decoded_data.decoded_len, + decoded_len_acc: last_row.decoded_data.decoded_len + (i as u64) + 1, + total_decoded_len: last_row.decoded_data.total_decoded_len, + decoded_byte: value_byte, + decoded_value_rlc: last_row.decoded_data.decoded_value_rlc, + }, + bitstream_read_data: BitstreamReadRow::default(), + fse_data: FseDecodingRow::default(), + }, + ) + .collect::>(); - let to_byte_idx = (bit_idx - 1) / 8; - let mut to_bit_idx = bit_idx - to_byte_idx * (N_BITS_PER_BYTE as u32) - 1; + witness_rows.extend_from_slice(&header_rows); - if from_pos.0 < (to_byte_idx + 1) as i64 { - to_bit_idx += 8; - } + // Second, process the sequence tables (encoded using FSE) + let byte_offset = sequence_header_end_offset; + let fse_starting_byte_offset = byte_offset; - to_pos = ((to_byte_idx + 1) as i64, to_bit_idx as i64); + // Literal Length Table (LLT) + let (n_fse_bytes_llt, bit_boundaries_llt, table_llt) = FseAuxiliaryTableData::reconstruct( + src, + block_idx, + FseTableKind::LLT, + byte_offset, + literal_lengths_mode < 2, + ) + .expect("Reconstructing FSE-packed Literl Length (LL) table should not fail."); + let llt = table_llt.parse_state_table(); + let al_llt = if literal_lengths_mode > 0 { + bit_boundaries_llt + .first() + .expect("Accuracy Log should exist") + .1 + + 5 + } else { + 6 + }; - if sym > 0 && n_acc < (1 << accuracy_log) { - decoded = (sym - 1) as u8; - n_acc += (*value - 1) as usize; - } + // witgen_debug + let stdout = io::stdout(); + let mut handle = stdout.lock(); + // write!(handle, "bit_boundaries_llt: {:?}", bit_boundaries_llt).unwrap(); + // writeln!(handle).unwrap(); - ( - decoded, - from_pos.0 as usize, - from_pos.1 as usize, - to_pos.0 as usize, - to_pos.1 as usize, - *value, - current_tag_value_acc, - current_tag_rlc_acc, - 0, - n_acc, - ) - }) - .collect::, - Value, - usize, - usize, - )>>(); - - // Add witness rows for FSE representation bytes - for row in bitstream_rows { - witness_rows.push(ZstdWitnessRow { - state: ZstdState { - tag: ZstdTag::ZstdBlockFseCode, - tag_next, - max_tag_len: lookup_max_tag_len(ZstdTag::ZstdBlockFseCode), - tag_len: (n_fse_bytes + 1) as u64, - tag_idx: (row.1 + 1) as u64, // count the huffman header byte - tag_value, - tag_value_acc: row.6, - is_tag_change: false, - tag_rlc, - tag_rlc_acc: row.7, - }, - encoded_data: EncodedData { - byte_idx: (byte_offset + row.1 + 1) as u64, // count the huffman header byte - encoded_len, - value_byte: src[byte_offset + row.1], - value_rlc, - reverse: false, - ..Default::default() - }, - bitstream_read_data: BitstreamReadRow { - bit_start_idx: row.2, - bit_end_idx: row.4, - bit_value: row.5, - is_zero_bit_read: false, - }, - decoded_data: DecodedData { - decoded_len: last_row.decoded_data.decoded_len, - decoded_len_acc: last_row.decoded_data.decoded_len_acc, - total_decoded_len: last_row.decoded_data.total_decoded_len, - decoded_byte: row.0, - decoded_value_rlc: last_row.decoded_data.decoded_value_rlc, - }, - huffman_data: HuffmanData::default(), - fse_data: FseTableRow { - state: 0, - symbol: 0, - baseline: 0, - num_bits: 0, - num_emitted: 0, - is_state_skipped: false, - }, - }); - } + // Cooked Match Offset Table (CMOT) + let byte_offset = byte_offset + n_fse_bytes_llt; + let (n_fse_bytes_cmot, bit_boundaries_cmot, table_cmot) = FseAuxiliaryTableData::reconstruct( + src, + block_idx, + FseTableKind::MOT, + byte_offset, + offsets_mode < 2, + ) + .expect("Reconstructing FSE-packed Cooked Match Offset (CMO) table should not fail."); + let cmot = table_cmot.parse_state_table(); + let al_cmot = if offsets_mode > 0 { + bit_boundaries_cmot + .first() + .expect("Accuracy Log should exist") + .1 + + 5 + } else { + 5 + }; - // Now start decoding the huffman weights using the actual Huffman code section - let tag_next = if n_streams > 1 { - ZstdTag::ZstdBlockJumpTable + // Match Length Table (MLT) + let byte_offset = byte_offset + n_fse_bytes_cmot; + let (n_fse_bytes_mlt, bit_boundaries_mlt, table_mlt) = FseAuxiliaryTableData::reconstruct( + src, + block_idx, + FseTableKind::MLT, + byte_offset, + match_lengths_mode < 2, + ) + .expect("Reconstructing FSE-packed Match Length (ML) table should not fail."); + let mlt = table_mlt.parse_state_table(); + let al_mlt = if match_lengths_mode > 0 { + bit_boundaries_mlt + .first() + .expect("Accuracy Log should exist") + .1 + + 5 } else { - ZstdTag::ZstdBlockLstream + 6 }; - // Update the last row - let last_row = witness_rows.last().expect("Last row exists"); - let multiplier = - (0..last_row.state.tag_len).fold(Value::known(F::one()), |acc, _| acc * randomness); - let value_rlc = last_row.encoded_data.value_rlc * multiplier + last_row.state.tag_rlc; + // Add witness rows for the FSE tables + let mut last_row = header_rows.last().cloned().unwrap(); + for (idx, start_offset, end_offset, bit_boundaries, tag_len, table) in [ + ( + 0usize, + fse_starting_byte_offset, + fse_starting_byte_offset + n_fse_bytes_llt, + bit_boundaries_llt, + n_fse_bytes_llt as u64, + &table_llt, + ), + ( + 1usize, + fse_starting_byte_offset + n_fse_bytes_llt, + fse_starting_byte_offset + n_fse_bytes_llt + n_fse_bytes_cmot, + bit_boundaries_cmot, + n_fse_bytes_cmot as u64, + &table_cmot, + ), + ( + 2usize, + fse_starting_byte_offset + n_fse_bytes_llt + n_fse_bytes_cmot, + fse_starting_byte_offset + n_fse_bytes_llt + n_fse_bytes_cmot + n_fse_bytes_mlt, + bit_boundaries_mlt, + n_fse_bytes_mlt as u64, + &table_mlt, + ), + ] { + let mut tag_value_iter = + src[start_offset..end_offset] + .iter() + .scan(Value::known(F::zero()), |acc, &byte| { + *acc = *acc * randomness + Value::known(F::from(byte as u64)); + Some(*acc) + }); + let tag_value = tag_value_iter.clone().last().expect("Tag value must exist"); - // Bitstream processing state values - let mut num_emitted: usize = 0; - let n_huffman_code_bytes = n_bytes - n_fse_bytes; - let mut last_byte_idx: usize = 1; - let mut current_byte_idx: usize = 1; // byte_idx is 1-indexed - let mut current_bit_idx: usize = 0; + let mut tag_rlc_iter = + src[start_offset..end_offset] + .iter() + .scan(Value::known(F::zero()), |acc, &byte| { + *acc = *acc * randomness + Value::known(F::from(byte as u64)); + Some(*acc) + }); + let tag_rlc = tag_rlc_iter.clone().last().expect("Tag RLC must exist"); + + let mut decoded: u64 = 0; + let mut n_acc: usize = 0; + let mut n_emitted: usize = 0; + let mut current_tag_value_acc = Value::known(F::zero()); + let mut current_tag_rlc_acc = Value::known(F::zero()); + let mut last_byte_idx: i64 = 0; + let mut from_pos: (i64, i64) = (1, 0); + let mut to_pos: (i64, i64) = (0, 0); + let kind = table.table_kind; + let mut next_symbol: i32 = -1; + let mut is_repeating_bit_boundary: HashMap = HashMap::new(); + + let multiplier = + (0..last_row.state.tag_len).fold(Value::known(F::one()), |acc, _| acc * randomness); + let value_rlc = last_row.encoded_data.value_rlc * multiplier + last_row.state.tag_rlc; + let mut last_symbol: i32 = 0; - // Construct the Huffman bitstream - let huffman_bitstream = src + let bitstream_rows = bit_boundaries + .iter() + .enumerate() + .map(|(bit_boundary_idx, (bit_idx, value_read, value_decoded))| { + // Calculate byte and bit positions. Increment allocators. + from_pos = if next_symbol == -1 { (1, -1) } else { to_pos }; + + from_pos.1 += 1; + if from_pos.1 == 8 { + from_pos = (from_pos.0 + 1, 0); + } + + from_pos.1 = (from_pos.1 as u64).rem_euclid(8) as i64; + + while from_pos.0 > last_byte_idx { + current_tag_value_acc = tag_value_iter.next().unwrap(); + current_tag_rlc_acc = tag_rlc_iter.next().unwrap(); + last_byte_idx = from_pos.0; + } + + let to_byte_idx = (bit_idx - 1) / 8; + let mut to_bit_idx = bit_idx - to_byte_idx * (N_BITS_PER_BYTE as u32) - 1; + + if from_pos.0 < (to_byte_idx + 1) as i64 { + to_bit_idx += 8; + } + + to_pos = ((to_byte_idx + 1) as i64, to_bit_idx as i64); + + // Decide Fse decoding results + if bit_boundary_idx < 1 { + // Accuracy log bits + next_symbol += 1; + assert_eq!(value_read, value_decoded, "no varbit packing for AL bits"); + ( + 0, + n_emitted, + from_pos.0 as usize, + from_pos.1 as usize, + to_pos.0 as usize, + to_pos.1 as usize, + *value_read, + *value_decoded, + current_tag_value_acc, + current_tag_rlc_acc, + n_acc, + // FseDecoder-specific witness values + kind as u64, + table.table_size as u64, + false, + false, + ) + } else if !is_repeating_bit_boundary.contains_key(&bit_boundary_idx) { + if n_acc >= (table.table_size as usize) { + // Trailing bits + assert_eq!(value_read, value_decoded, "no varbit packing for trailing bits"); + ( + last_symbol as u64, + n_emitted, + from_pos.0 as usize, + from_pos.1 as usize, + to_pos.0 as usize, + to_pos.1 as usize, + *value_read, + *value_decoded, + current_tag_value_acc, + current_tag_rlc_acc, + n_acc, + // FseDecoder-specific witness values + kind as u64, + table.table_size as u64, + false, + true, + ) + } else { + // Regular decoding state + assert!(next_symbol >= 0); + decoded = next_symbol as u64; + n_emitted += 1; + last_symbol = next_symbol; + next_symbol += 1; + match *value_decoded { + 0 => { + // When a symbol has a value==0, it signifies a case of prob=-1 (or + // probability "less than 1"), where + // such symbols are allocated states from the + // end and retreating. Exactly 1 state is allocated in this case. + n_acc += 1; + } + 1 => { + let mut repeating_bit_boundary_idx = bit_boundary_idx + 1; + loop { + let repeating_bits = + bit_boundaries[repeating_bit_boundary_idx].1; + next_symbol += repeating_bits as i32; // skip symbols + is_repeating_bit_boundary + .insert(repeating_bit_boundary_idx, true); + + if repeating_bits < 3 { + break; + } else { + repeating_bit_boundary_idx += 1; + } + } + } + _ => { + n_acc += (*value_decoded - 1) as usize; + } + } + + ( + decoded, + n_emitted, + from_pos.0 as usize, + from_pos.1 as usize, + to_pos.0 as usize, + to_pos.1 as usize, + *value_read, + *value_decoded, + current_tag_value_acc, + current_tag_rlc_acc, + n_acc, + // FseDecoder-specific witness values + kind as u64, + table.table_size as u64, + false, // repeating bits + false, // trailing bits + ) + } + } else { + // Repeating bits + let symbol = last_symbol as u64 + value_decoded; + last_symbol = symbol as i32; + assert_eq!(value_read, value_decoded, "no varbit packing for repeat-bits flag"); + ( + symbol, + n_emitted, + from_pos.0 as usize, + from_pos.1 as usize, + to_pos.0 as usize, + to_pos.1 as usize, + *value_read, + *value_decoded, + current_tag_value_acc, + current_tag_rlc_acc, + n_acc, + // FseDecoder-specific witness values + kind as u64, + table.table_size as u64, + true, + false, + ) + } + }) + .collect::, + Value, + usize, + u64, + u64, + bool, + bool, + )>>(); + + // Transform bitstream rows into witness rows + for (j, row) in bitstream_rows.iter().enumerate() { + witness_rows.push(ZstdWitnessRow { + state: ZstdState { + tag: ZstdTag::ZstdBlockSequenceFseCode, + tag_next: if idx > 1 { + ZstdTag::ZstdBlockSequenceData + } else { + ZstdTag::ZstdBlockSequenceFseCode + }, + block_idx, + max_tag_len: lookup_max_tag_len(ZstdTag::ZstdBlockSequenceFseCode), + tag_len, + tag_idx: row.2 as u64, + tag_value, + tag_value_acc: row.8, + is_tag_change: j == 0, + tag_rlc, + tag_rlc_acc: row.9, + }, + encoded_data: EncodedData { + byte_idx: (start_offset + row.2) as u64, + encoded_len, + value_byte: src[start_offset + row.2 - 1], + value_rlc, + reverse: false, + ..Default::default() + }, + bitstream_read_data: BitstreamReadRow { + bit_start_idx: row.3, + bit_end_idx: row.5, + bit_value: row.6, + is_zero_bit_read: false, + ..Default::default() + }, + decoded_data: DecodedData { + decoded_len: last_row.decoded_data.decoded_len, + decoded_len_acc: last_row.decoded_data.decoded_len_acc, + total_decoded_len: last_row.decoded_data.total_decoded_len, + decoded_byte: 0u8, + decoded_value_rlc: last_row.decoded_data.decoded_value_rlc, + }, + fse_data: FseDecodingRow { + table_kind: row.11, + table_size: row.12, + symbol: row.0, + num_emitted: row.1 as u64, + value_decoded: row.7, + probability_acc: row.10 as u64, + is_repeat_bits_loop: row.13, + is_trailing_bits: row.14, + }, + }); + } + last_row = witness_rows.last().cloned().unwrap(); + } + + // Reconstruct LLTV, CMOTV, and MLTV which specifies bit actions for a specific state + let lltv = SequenceFixedStateActionTable::reconstruct_lltv(); + let cmotv = SequenceFixedStateActionTable::reconstruct_cmotv(CMOT_N); + let mltv = SequenceFixedStateActionTable::reconstruct_mltv(); + + // Decode sequence bitstream + let byte_offset = byte_offset + n_fse_bytes_mlt; + let sequence_bitstream = &src[byte_offset..end_offset] .iter() - .skip(byte_offset + n_fse_bytes + 1) - .take(n_huffman_code_bytes) .rev() .clone() .flat_map(|v| { @@ -1226,58 +1055,79 @@ fn process_block_zstd_huffman_code( }) .collect::>(); - // Accumulators for Huffman code section - let mut value_rlc_iter = src - .iter() - .skip(byte_offset + n_fse_bytes + 1) - .take(n_huffman_code_bytes) - .scan(Value::known(F::zero()), |acc, &byte| { - *acc = *acc * randomness + Value::known(F::from(byte as u64)); - Some(*acc) - }) + // Bitstream processing state values + let _num_emitted: usize = 0; + let n_sequence_data_bytes = end_offset - byte_offset; + let mut last_byte_idx: usize = 1; + let mut current_byte_idx: usize = 1; + let mut current_bit_idx: usize = 0; + + // Update the last row + let multiplier = + (0..last_row.state.tag_len).fold(Value::known(F::one()), |acc, _| acc * randomness); + let value_rlc = last_row.encoded_data.value_rlc * multiplier + last_row.state.tag_rlc; + + let value_rlc_iter = + &src[byte_offset..end_offset] + .iter() + .scan(Value::known(F::zero()), |acc, &byte| { + *acc = *acc * randomness + Value::known(F::from(byte as u64)); + Some(*acc) + }); + let mut value_rlc_iter = value_rlc_iter + .clone() .collect::>>() .into_iter() .rev(); - let mut tag_value_iter = src - .iter() - .skip(byte_offset + n_fse_bytes + 1) - .take(n_huffman_code_bytes) - .rev() - .scan(Value::known(F::zero()), |acc, &byte| { - *acc = *acc * randomness + Value::known(F::from(byte as u64)); - Some(*acc) - }); + + let tag_value_iter = + &src[byte_offset..end_offset] + .iter() + .scan(Value::known(F::zero()), |acc, &byte| { + *acc = *acc * randomness + Value::known(F::from(byte as u64)); + Some(*acc) + }); let tag_value = tag_value_iter.clone().last().expect("Tag value must exist"); - let tag_rlc_iter = src - .iter() - .skip(byte_offset + n_fse_bytes + 1) - .take(n_huffman_code_bytes) - .scan(Value::known(F::zero()), |acc, &byte| { - *acc = *acc * randomness + Value::known(F::from(byte as u64)); - Some(*acc) - }); + let mut tag_value_iter = tag_value_iter + .clone() + .collect::>>() + .into_iter() + .rev(); + + let tag_rlc_iter = + &src[byte_offset..end_offset] + .iter() + .rev() + .scan(Value::known(F::zero()), |acc, &byte| { + *acc = *acc * randomness + Value::known(F::from(byte as u64)); + Some(*acc) + }); let tag_rlc = tag_rlc_iter.clone().last().expect("Tag RLC must exist"); - let mut tag_rlc_iter = tag_rlc_iter.collect::>>().into_iter().rev(); + let mut tag_rlc_iter = tag_rlc_iter + .clone() + .collect::>>() + .into_iter() + .rev(); let mut next_tag_value_acc = tag_value_iter.next().unwrap(); let next_value_rlc_acc = value_rlc_iter.next().unwrap(); let mut next_tag_rlc_acc = tag_rlc_iter.next().unwrap(); let aux_1 = next_value_rlc_acc; - let aux_2 = witness_rows[witness_rows.len() - 1].encoded_data.value_rlc; - let mut padding_end_idx: usize = 0; - while huffman_bitstream[padding_end_idx] == 0 { + let mut padding_end_idx = 0; + while sequence_bitstream[padding_end_idx] == 0 { padding_end_idx += 1; } // Add a witness row for leading 0s and the sentinel 1-bit witness_rows.push(ZstdWitnessRow { state: ZstdState { - tag: ZstdTag::ZstdBlockHuffmanCode, - tag_next, - max_tag_len: lookup_max_tag_len(ZstdTag::ZstdBlockHuffmanCode), - tag_len: n_huffman_code_bytes as u64, + tag: ZstdTag::ZstdBlockSequenceData, + tag_next: if last_block { ZstdTag::Null } else { ZstdTag::BlockHeader }, + block_idx, + max_tag_len: lookup_max_tag_len(ZstdTag::ZstdBlockSequenceData), + tag_len: n_sequence_data_bytes as u64, tag_idx: 1_u64, tag_value, tag_value_acc: next_tag_value_acc, @@ -1286,83 +1136,206 @@ fn process_block_zstd_huffman_code( tag_rlc_acc: next_tag_rlc_acc, }, encoded_data: EncodedData { - byte_idx: (byte_offset + n_fse_bytes + 1 + current_byte_idx) as u64, + byte_idx: (byte_offset + current_byte_idx) as u64, encoded_len, - value_byte: src - [byte_offset + n_fse_bytes + 1 + n_huffman_code_bytes - current_byte_idx], + value_byte: src[byte_offset + current_byte_idx - 1], value_rlc, reverse: true, - reverse_len: n_huffman_code_bytes as u64, - reverse_idx: (n_huffman_code_bytes - (current_byte_idx - 1)) as u64, + reverse_len: n_sequence_data_bytes as u64, + reverse_idx: (n_sequence_data_bytes - (current_byte_idx - 1)) as u64, aux_1, - aux_2, + aux_2: Value::known(F::zero()), }, bitstream_read_data: BitstreamReadRow { - bit_value: 1u64, bit_start_idx: 0usize, bit_end_idx: padding_end_idx, + bit_value: 1u64, is_zero_bit_read: false, + ..Default::default() }, - huffman_data: HuffmanData::default(), decoded_data: last_row.decoded_data.clone(), - fse_data: FseTableRow::default(), + fse_data: FseDecodingRow::default(), }); // Exclude the leading zero section - while huffman_bitstream[current_bit_idx] == 0 { + while sequence_bitstream[current_bit_idx] == 0 { (current_byte_idx, current_bit_idx) = increment_idx(current_byte_idx, current_bit_idx); } // Exclude the sentinel 1-bit (current_byte_idx, current_bit_idx) = increment_idx(current_byte_idx, current_bit_idx); - // Update accumulator + // Update accumulators if current_byte_idx > last_byte_idx { next_tag_value_acc = tag_value_iter.next().unwrap(); next_tag_rlc_acc = tag_rlc_iter.next().unwrap(); last_byte_idx = current_byte_idx; } - // Now the actual weight-bearing bitstream starts - // The Huffman bitstream is decoded by two interleaved states reading the stream in alternating - // order. The FSE table for the two independent decoding strands are the same. - let mut color: usize = 0; // use 0, 1 (colors) to denote two alternating decoding strands. - let mut prev_baseline: [u64; 2] = [0, 0]; - let mut next_nb_to_read: [usize; 2] = [accuracy_log as usize, accuracy_log as usize]; - let mut decoded_weights: Vec = vec![]; - let mut fse_table_idx: u64 = 1; + // Now the actual data-bearing bitstream starts + // The sequence bitstream is interleaved by 6 bit processing strands. + // The interleaving order is: CMOVBits, MLVBits, LLVBits, LLFBits, MLFBits, CMOFBits + let mut seq_idx: usize = 0; + let mut decoded_bitstring_values: Vec<(SequenceDataTag, u64)> = vec![]; + let mut raw_sequence_instructions: Vec<(usize, usize, usize)> = vec![]; // offset_state, match_length, literal_length + let mut curr_instruction: [usize; 3] = [0, 0, 0]; + + // Note: mode and order_idx produces 6 distinct decoding state + let mut mode: usize = 1; // use 0 or 1 to denote whether bitstream produces data or next decoding state + let mut order_idx: usize = 0; // use 0, 1, 2 to denote the order of decoded value within current mode + + let mut state_baselines: [usize; 3] = [0, 0, 0]; // 3 states for LL, ML, CMO + let mut decoding_baselines: [usize; 3] = [0, 0, 0]; // 3 decoding bl for CMO, ML, LL + + let data_tags = [ + SequenceDataTag::CookedMatchOffsetValue, + SequenceDataTag::MatchLengthValue, + SequenceDataTag::LiteralLengthValue, + SequenceDataTag::LiteralLengthFse, + SequenceDataTag::MatchLengthFse, + SequenceDataTag::CookedMatchOffsetFse, + ]; + let next_nb_to_read_for_states: [usize; 3] = + [al_llt as usize, al_mlt as usize, al_cmot as usize]; // Obtained from accuracy log + let next_nb_to_read_for_values: [usize; 3] = [0, 0, 0]; + let mut nb_switch = [next_nb_to_read_for_values, next_nb_to_read_for_states]; + let v_tables = [cmotv, mltv, lltv]; + let f_tables = [llt, mlt, cmot]; + + let mut is_init = true; + let mut nb = nb_switch[mode][order_idx]; + let bitstream_end_bit_idx = n_sequence_data_bytes * N_BITS_PER_BYTE; + let mut table_kind = 0u64; + let mut table_size = 0u64; + let mut last_states: [u64; 3] = [0, 0, 0]; + let mut last_symbols: [u64; 3] = [0, 0, 0]; + let mut current_decoding_state = 0u64; + + // witgen_debug + let stdout = io::stdout(); + let mut handle = stdout.lock(); + + while current_bit_idx + nb <= bitstream_end_bit_idx { + let is_tail = current_bit_idx == bitstream_end_bit_idx; + if is_tail { + assert!(nb == 0, "Can only read 0 bit at the very tail end of bitstream."); + // The byte idx has already been incremented to > n_sequence_bytes. + // But continuously reading 0 bits from the very tail end of the last byte is allowed. + // In this case, the byte_idx is restored to the last byte of the bitstream bytes. + if current_byte_idx > n_sequence_data_bytes { + current_byte_idx -= 1; + } + } - // Convert FSE auxiliary data into a state-indexed representation - let fse_state_table = table.clone().parse_state_table(); + // witgen_debug + // write!(handle, "current_byte_idx: {:?}, current_bit_idx: {:?}, nb: {:?}", current_byte_idx, current_bit_idx, nb).unwrap(); + // writeln!(handle).unwrap(); - while current_bit_idx + next_nb_to_read[color] <= (n_huffman_code_bytes) * N_BITS_PER_BYTE { - let nb = next_nb_to_read[color]; let bitstring_value = - be_bits_to_value(&huffman_bitstream[current_bit_idx..(current_bit_idx + nb)]); - let next_state = prev_baseline[color] + bitstring_value; + be_bits_to_value(&sequence_bitstream[current_bit_idx..(current_bit_idx + nb)]); - let from_bit_idx = current_bit_idx.rem_euclid(8); + let mut curr_baseline = 0; + if mode > 0 { + // For the initial baseline determination, ML and CMO positions are flipped. + if is_init { + order_idx = [0, 2, 1][order_idx]; + } + + if order_idx < 1 { + seq_idx += 1; + } + + let new_decoded = (data_tags[mode * 3 + order_idx], bitstring_value); + decoded_bitstring_values.push(new_decoded); + + current_decoding_state = (mode * 3 + order_idx) as u64; + + table_kind = match new_decoded.0 { + SequenceDataTag::CookedMatchOffsetFse | SequenceDataTag::CookedMatchOffsetValue => table_cmot.table_kind as u64, + SequenceDataTag::MatchLengthFse | SequenceDataTag::MatchLengthValue => table_mlt.table_kind as u64, + SequenceDataTag::LiteralLengthFse | SequenceDataTag::LiteralLengthValue => table_llt.table_kind as u64, + _ => unreachable!(), + }; + table_size = match new_decoded.0 { + SequenceDataTag::CookedMatchOffsetFse | SequenceDataTag::CookedMatchOffsetValue => table_cmot.table_size, + SequenceDataTag::MatchLengthFse | SequenceDataTag::MatchLengthValue => table_mlt.table_size, + SequenceDataTag::LiteralLengthFse | SequenceDataTag::LiteralLengthValue => table_llt.table_size, + _ => unreachable!(), + }; + + // FSE state update step + curr_baseline = state_baselines[order_idx]; + let new_state = (curr_baseline as u64) + bitstring_value; + last_states[order_idx] = new_state; + let new_state_params = f_tables[order_idx] + .get(&new_state) + .expect("State should exist."); + let state_symbol = new_state_params.0; + last_symbols[order_idx] = state_symbol; + + let value_idx = 3 - order_idx - 1; + + // Update baseline and nb for next FSE state transition + state_baselines[order_idx] = new_state_params.1 as usize; + nb_switch[1][order_idx] = new_state_params.2 as usize; + + // Update baseline and nb for next value decoding + decoding_baselines[value_idx] = v_tables[value_idx].states_to_actions + [state_symbol as usize] + .1 + .0 as usize; + nb_switch[0][value_idx] = v_tables[value_idx].states_to_actions[state_symbol as usize] + .1 + .1 as usize; + + // Flip back the idx for first step + if is_init { + order_idx = [0, 2, 1][order_idx]; + } + } else { + let new_decoded = (data_tags[mode * 3 + order_idx], bitstring_value); + decoded_bitstring_values.push(new_decoded); + + current_decoding_state = (mode * 3 + order_idx) as u64; + + table_kind = match new_decoded.0 { + SequenceDataTag::CookedMatchOffsetFse | SequenceDataTag::CookedMatchOffsetValue => table_cmot.table_kind as u64, + SequenceDataTag::MatchLengthFse | SequenceDataTag::MatchLengthValue => table_mlt.table_kind as u64, + SequenceDataTag::LiteralLengthFse | SequenceDataTag::LiteralLengthValue => table_llt.table_kind as u64, + _ => unreachable!(), + }; + table_size = match new_decoded.0 { + SequenceDataTag::CookedMatchOffsetFse | SequenceDataTag::CookedMatchOffsetValue => table_cmot.table_size, + SequenceDataTag::MatchLengthFse | SequenceDataTag::MatchLengthValue => table_mlt.table_size, + SequenceDataTag::LiteralLengthFse | SequenceDataTag::LiteralLengthValue => table_llt.table_size, + _ => unreachable!(), + }; + + // Value decoding step + curr_baseline = decoding_baselines[order_idx]; + let new_value = (curr_baseline as u64) + bitstring_value; + curr_instruction[order_idx] = new_value as usize; + } + + // bitstream witness row data + let from_bit_idx = if !is_tail { + current_bit_idx.rem_euclid(8) + } else { + 7 + }; let to_bit_idx = if nb > 0 { from_bit_idx + (nb - 1) } else { from_bit_idx }; - // Lookup the FSE table row for the state - let fse_row = fse_state_table - .get(&{ next_state }) - .expect("next state should be in fse table"); - - // Decode the symbol - decoded_weights.push(fse_row.0 as u8); - num_emitted += 1; - // Add a witness row witness_rows.push(ZstdWitnessRow { state: ZstdState { - tag: ZstdTag::ZstdBlockHuffmanCode, - tag_next, - max_tag_len: lookup_max_tag_len(ZstdTag::ZstdBlockHuffmanCode), - tag_len: (n_huffman_code_bytes) as u64, + tag: ZstdTag::ZstdBlockSequenceData, + tag_next: if last_block { ZstdTag::Null } else { ZstdTag::BlockHeader }, + block_idx, + max_tag_len: lookup_max_tag_len(ZstdTag::ZstdBlockSequenceData), + tag_len: n_sequence_data_bytes as u64, tag_idx: current_byte_idx as u64, tag_value, tag_value_acc: next_tag_value_acc, @@ -1371,434 +1344,456 @@ fn process_block_zstd_huffman_code( tag_rlc_acc: next_tag_rlc_acc, }, encoded_data: EncodedData { - byte_idx: (byte_offset + n_fse_bytes + 1 + current_byte_idx) as u64, + byte_idx: (byte_offset + current_byte_idx) as u64, encoded_len, - value_byte: src - [byte_offset + n_fse_bytes + 1 + n_huffman_code_bytes - current_byte_idx], + // witgen_debug, idx overflow + // TODO(ray): This is a special case of the sequences data being a part of the + // "last block", hence the overflow. I have just re-used the "last" byte from the + // source data in such a case. + value_byte: if byte_offset + current_byte_idx - 1 < src.len() { + src[byte_offset + current_byte_idx - 1] + } else { + src.last().cloned().unwrap() + }, value_rlc, reverse: true, - reverse_len: n_huffman_code_bytes as u64, - reverse_idx: (n_huffman_code_bytes - (current_byte_idx - 1)) as u64, + reverse_len: n_sequence_data_bytes as u64, + reverse_idx: (n_sequence_data_bytes - (current_byte_idx - 1)) as u64, aux_1, - aux_2, + aux_2: Value::known(F::zero()), }, bitstream_read_data: BitstreamReadRow { - bit_value: bitstring_value, bit_start_idx: from_bit_idx, bit_end_idx: to_bit_idx, + bit_value: bitstring_value, is_zero_bit_read: (nb == 0), + is_seq_init: is_init, + seq_idx, + states: last_states.clone(), + symbols: last_symbols.clone(), + values: [ + curr_instruction[2] as u64, + curr_instruction[1] as u64, + curr_instruction[0] as u64, + ], + baseline: curr_baseline as u64, + is_nil: false, + is_update_state: (current_decoding_state >= 3) as u64, }, - fse_data: FseTableRow { - state: next_state, - symbol: fse_row.0, - baseline: fse_row.1, - num_bits: fse_row.2, - num_emitted: num_emitted as u64, - // TODO(ray): pls check where to get this field from. - is_state_skipped: false, - }, - huffman_data: HuffmanData::default(), - decoded_data: decoded_data.clone(), + decoded_data: last_row.decoded_data.clone(), + fse_data: FseDecodingRow { + table_kind, + table_size, + ..Default::default() + } }); - // increment fse idx - fse_table_idx += 1; + let multi_byte_boundaries: [usize; 2] = [15, 23]; + let mut skipped_bits = 0usize; + + for boundary in multi_byte_boundaries { + if to_bit_idx >= boundary { + for _ in 0..N_BITS_PER_BYTE { + (current_byte_idx, current_bit_idx) = increment_idx(current_byte_idx, current_bit_idx); + } + if current_byte_idx > last_byte_idx && current_byte_idx <= n_sequence_data_bytes { + next_tag_value_acc = tag_value_iter.next().unwrap(); + next_tag_rlc_acc = tag_rlc_iter.next().unwrap(); + last_byte_idx = current_byte_idx; + } + skipped_bits += N_BITS_PER_BYTE; + + witness_rows.push(ZstdWitnessRow { + state: ZstdState { + tag: ZstdTag::ZstdBlockSequenceData, + tag_next: if last_block { ZstdTag::Null } else { ZstdTag::BlockHeader }, + block_idx, + max_tag_len: lookup_max_tag_len(ZstdTag::ZstdBlockSequenceData), + tag_len: n_sequence_data_bytes as u64, + tag_idx: current_byte_idx as u64, + tag_value, + tag_value_acc: next_tag_value_acc, + is_tag_change: false, + tag_rlc, + tag_rlc_acc: next_tag_rlc_acc, + }, + encoded_data: EncodedData { + byte_idx: (byte_offset + current_byte_idx) as u64, + encoded_len, + // witgen_debug, idx overflow + // TODO(ray): This is a special case of the sequences data being a part of the + // "last block", hence the overflow. I have just re-used the "last" byte from the + // source data in such a case. + value_byte: if byte_offset + current_byte_idx - 1 < src.len() { + src[byte_offset + current_byte_idx - 1] + } else { + src.last().cloned().unwrap() + }, + value_rlc, + reverse: true, + reverse_len: n_sequence_data_bytes as u64, + reverse_idx: (n_sequence_data_bytes - (current_byte_idx - 1)) as u64, + aux_1, + aux_2: Value::known(F::zero()), + }, + bitstream_read_data: BitstreamReadRow { + bit_start_idx: 0, + bit_end_idx: 0, + bit_value: 0, + is_zero_bit_read: false, + is_seq_init: false, + seq_idx, + states: last_states.clone(), + symbols: last_symbols.clone(), + values: [ + curr_instruction[2] as u64, + curr_instruction[1] as u64, + curr_instruction[0] as u64, + ], + baseline: curr_baseline as u64, + is_nil: true, + is_update_state: 0u64, + }, + decoded_data: last_row.decoded_data.clone(), + fse_data: FseDecodingRow { + table_kind, + table_size, + ..Default::default() + } + }) + } + } + + order_idx += 1; + if mode > 0 { + if order_idx > 2 { + is_init = false; + mode = 0; // switch to data mode + order_idx = 0; + } + } else { + if order_idx > 2 { + mode = 1; // switch to FSE mode + order_idx = 0; + + // Add the instruction + let new_instruction = ( + curr_instruction[0], + curr_instruction[1], + curr_instruction[2], + ); - // Advance byte and bit marks. Get next acc value if byte changes - for _ in 0..nb { + // witgen_debug + // write!(handle, "NewInstruction - idx: {:?}, Offset: {:?}, ML: {:?}, LLT: {:?}", raw_sequence_instructions.len(), new_instruction.0, new_instruction.1, new_instruction.2).unwrap(); + // writeln!(handle); + + raw_sequence_instructions.push(new_instruction); + } + } + + for _ in 0..(nb - skipped_bits) { (current_byte_idx, current_bit_idx) = increment_idx(current_byte_idx, current_bit_idx); } - if current_byte_idx > last_byte_idx && current_byte_idx <= n_huffman_code_bytes { + if current_byte_idx > last_byte_idx && current_byte_idx <= n_sequence_data_bytes { next_tag_value_acc = tag_value_iter.next().unwrap(); next_tag_rlc_acc = tag_rlc_iter.next().unwrap(); last_byte_idx = current_byte_idx; } - // Preparing for next state - prev_baseline[color] = fse_row.1; - next_nb_to_read[color] = fse_row.2 as usize; - - color = if color > 0 { 0 } else { 1 }; + if is_init { + // On the first step, ML and CMO are flipped + let true_idx = [0, 2, 1][order_idx]; + nb = nb_switch[mode][true_idx]; + } else { + nb = nb_switch[mode][order_idx]; + } } - // Construct HuffmanCodesTable - let huffman_codes = HuffmanCodesData { - byte_offset: (huffman_code_byte_offset + 1) as u64, - weights: decoded_weights - .into_iter() - .map(|w| FseSymbol::from(w as usize)) - .collect(), - }; - - // rlc after a reverse section - let mul = - (0..(n_huffman_code_bytes - 1)).fold(Value::known(F::one()), |acc, _| acc * randomness); - let new_value_rlc_init_value = aux_2 * mul + aux_1; + // Process raw sequence instructions + let mut address_table_rows: Vec = vec![]; + let mut literal_len_acc: usize = 0; + let mut repeated_offset: [usize; 3] = [1, 4, 8]; + + // witgen_debug + // for idx in 0..witness_rows.len() { + // if witness_rows[idx].state.tag == ZstdTag::ZstdBlockSequenceData + // && !witness_rows[idx].bitstream_read_data.is_seq_init + // { + // let seq_idx = witness_rows[idx].bitstream_read_data.seq_idx; + // if seq_idx > 0 { + // witness_rows[idx].bitstream_read_data.values = [ + // // literal length, match length and match offset. + // raw_sequence_instructions[seq_idx - 1].2 as u64, + // raw_sequence_instructions[seq_idx - 1].1 as u64, + // raw_sequence_instructions[seq_idx - 1].0 as u64, + // ]; + // } + // } + // } + + for (idx, inst) in raw_sequence_instructions.iter().enumerate() { + let actual_offset = if inst.0 > 3 { + inst.0 - 3 + } else { + let mut repeat_idx = inst.0; + if inst.2 == 0 { + repeat_idx += 1; + if repeat_idx > 3 { + repeat_idx = 1; + } + } - ( - byte_offset + 1 + n_fse_bytes + n_huffman_code_bytes, - witness_rows, - huffman_codes, - n_bytes, - huffman_code_byte_offset + 1, - new_value_rlc_init_value, - byte_offset + 1, - (1 << accuracy_log) as u64, - accuracy_log as u64, - n_huffman_code_bytes as u64, - table, // FSE table - ) -} + repeated_offset[repeat_idx] + } as u64; + + literal_len_acc += inst.2; + + address_table_rows.push(AddressTableRow { + s_padding: 0, + instruction_idx: idx as u64, + literal_length: inst.2 as u64, + cooked_match_offset: inst.0 as u64, + match_length: inst.1 as u64, + literal_length_acc: literal_len_acc as u64, + repeated_offset1: repeated_offset[0] as u64, + repeated_offset2: repeated_offset[1] as u64, + repeated_offset3: repeated_offset[2] as u64, + actual_offset, + }); -fn process_block_zstd_huffman_jump_table( - src: &[u8], - byte_offset: usize, - last_row: &ZstdWitnessRow, - literal_stream_size: usize, - n_streams: usize, - randomness: Value, - last_rlc: Value, -) -> (usize, Vec>, Vec) { - if n_streams <= 1 { - (byte_offset, vec![], vec![literal_stream_size as u64]) - } else { - // Note: The decompressed size of each stream is equal to (regen_size + 3) / 4 - // but the compressed bitstream length will be different. - // Jump table provides information on the length of first 3 bitstreams. + // Update repeated offset + if inst.0 > 3 { + repeated_offset[2] = repeated_offset[1]; + repeated_offset[1] = repeated_offset[0]; + repeated_offset[0] = inst.0 - 3; + } else { + let mut repeat_idx = inst.0; + if inst.2 == 0 { + repeat_idx += 1; + if repeat_idx > 3 { + repeat_idx = 1; + } + } - let jt_bytes = src - .iter() - .skip(byte_offset) - .take(N_JUMP_TABLE_BYTES) - .cloned() - .map(|x| x as u64) - .collect::>(); - - let l1: u64 = jt_bytes[0] + jt_bytes[1] * 256; - let l2: u64 = jt_bytes[2] + jt_bytes[3] * 256; - let l3: u64 = jt_bytes[4] + jt_bytes[5] * 256; - let l4: u64 = (literal_stream_size as u64) - l1 - l2 - l3; - - let value_rlc_iter = - src.iter() - .skip(byte_offset) - .take(N_JUMP_TABLE_BYTES) - .scan(last_rlc, |acc, &byte| { - *acc = *acc * randomness + Value::known(F::from(byte as u64)); - Some(*acc) - }); - let multiplier = - (0..last_row.state.tag_len).fold(Value::known(F::one()), |acc, _| acc * randomness); - let value_rlc = last_row.encoded_data.value_rlc * multiplier + last_row.state.tag_rlc; + if repeat_idx == 2 { + let result = repeated_offset[1]; + repeated_offset[1] = repeated_offset[0]; + repeated_offset[0] = result; + } else if repeat_idx == 3 { + let result = repeated_offset[2]; + repeated_offset[2] = repeated_offset[1]; + repeated_offset[1] = repeated_offset[0]; + repeated_offset[0] = result; + } else { + // repeat 1 + } + }; + } - let tag_value_iter = src.iter().skip(byte_offset).take(N_JUMP_TABLE_BYTES).scan( - Value::known(F::zero()), - |acc, &byte| { - *acc = *acc * Value::known(F::from(256u64)) + Value::known(F::from(byte as u64)); - Some(*acc) - }, - ); - let tag_value = tag_value_iter - .clone() - .last() - .expect("Tag value must exist."); - let tag_rlc_iter = src.iter().skip(byte_offset).take(N_JUMP_TABLE_BYTES).scan( - Value::known(F::zero()), - |acc, &byte| { - *acc = *acc * randomness + Value::known(F::from(byte as u64)); - Some(*acc) - }, - ); - let tag_rlc = tag_rlc_iter.clone().last().expect("Tag value must exist."); + // Executing sequence instructions to acquire the original input. + // At this point, the address table rows are not padded. Paddings will be added as sequence + // instructions progress. + let mut recovered_inputs: Vec = vec![]; + let mut seq_exec_info: Vec = vec![]; + let mut current_literal_pos: usize = 0; + + for inst in address_table_rows.clone() { + let new_literal_pos = current_literal_pos + (inst.literal_length as usize); + if new_literal_pos > current_literal_pos { + let r = current_literal_pos..new_literal_pos; + seq_exec_info.push( + SequenceExec( + inst.instruction_idx as usize, + SequenceExecInfo::LiteralCopy(r.clone()), + ) + ); + recovered_inputs.extend_from_slice( + literals[r] + .iter() + .map(|&v| v as u8) + .collect::>() + .as_slice(), + ); + } - ( - byte_offset + N_JUMP_TABLE_BYTES, - src.iter() - .skip(byte_offset) - .take(N_JUMP_TABLE_BYTES) - .zip(tag_value_iter) - .zip(value_rlc_iter) - .zip(tag_rlc_iter) - .enumerate() - .map( - |(i, (((&value_byte, tag_value_acc), _v_rlc), tag_rlc_acc))| ZstdWitnessRow { - state: ZstdState { - tag: ZstdTag::ZstdBlockJumpTable, - tag_next: ZstdTag::ZstdBlockLstream, - max_tag_len: lookup_max_tag_len(ZstdTag::ZstdBlockJumpTable), - tag_len: N_JUMP_TABLE_BYTES as u64, - tag_idx: (i + 1) as u64, - tag_value, - tag_value_acc, - is_tag_change: i == 0, - tag_rlc, - tag_rlc_acc, - }, - encoded_data: EncodedData { - byte_idx: (byte_offset + i + 1) as u64, - encoded_len: last_row.encoded_data.encoded_len, - value_byte, - value_rlc, - reverse: false, - ..Default::default() - }, - bitstream_read_data: BitstreamReadRow { - bit_start_idx: 0, - bit_end_idx: 7, - bit_value: value_byte as u64, - is_zero_bit_read: false, - }, - decoded_data: last_row.decoded_data.clone(), - huffman_data: HuffmanData::default(), - fse_data: FseTableRow::default(), - }, + let match_pos = recovered_inputs.len() - (inst.actual_offset as usize); + if inst.match_length > 0 { + let r = match_pos..(inst.match_length as usize + match_pos); + seq_exec_info.push( + SequenceExec( + inst.instruction_idx as usize, + SequenceExecInfo::BackRef(r.clone()), ) - .collect::>(), - vec![l1, l2, l3, l4], - ) + ); + let matched_bytes = Vec::from(&recovered_inputs[r]); + recovered_inputs.extend_from_slice(&matched_bytes.as_slice()); + } + current_literal_pos = new_literal_pos; } + + // Add remaining literal bytes + if current_literal_pos < literals.len() { + let r = current_literal_pos..literals.len(); + seq_exec_info.push( + SequenceExec( + sequence_info.num_sequences+1, + SequenceExecInfo::LiteralCopy(r.clone()), + ) + ); + recovered_inputs.extend_from_slice( + literals[r] + .iter() + .map(|&v| v as u8) + .collect::>() + .as_slice(), + ); + } + + ( + end_offset, + witness_rows, + [table_llt, table_cmot, table_mlt], + address_table_rows, + recovered_inputs, + sequence_info, + seq_exec_info, + ) } -#[allow(clippy::too_many_arguments)] -fn process_block_zstd_lstream( +type LiteralsHeaderProcessingResult = ( + usize, + Vec>, + BlockType, + usize, + usize, + usize, + (u64, bool), +); + +fn process_block_zstd_literals_header( src: &[u8], + block_idx: u64, byte_offset: usize, - len: usize, last_row: &ZstdWitnessRow, randomness: Value, - stream_idx: usize, - huffman_code: &HuffmanCodesData, - huffman_code_byte_offset: usize, -) -> (usize, Vec>, Vec) { - // Obtain literal stream bits (reversed). - let lstream_bits = src +) -> LiteralsHeaderProcessingResult { + let lh_bytes = src .iter() .skip(byte_offset) - .take(len) - .rev() - .clone() - .flat_map(|v| { - let mut bits = value_bits_le(*v); - bits.reverse(); - bits - }) + .take(N_MAX_LITERAL_HEADER_BYTES) + .cloned() .collect::>(); - // Bitstream processing state helper values - let mut witness_rows: Vec> = vec![]; - let mut last_byte_idx: usize = 1; - let mut current_byte_idx: usize = 1; - let mut current_bit_idx: usize = 0; - let mut decoded_len_acc = last_row.decoded_data.decoded_len_acc; - let mut decoded_rlc = last_row.decoded_data.decoded_value_rlc; + let literals_block_type = BlockType::from(lh_bytes[0] & 0x3); + let size_format = (lh_bytes[0] >> 2) & 3; + let sf_max = size_format == 3; - // accumulators - let aux_1 = last_row.encoded_data.value_rlc; - let multiplier = - (0..last_row.state.tag_len).fold(Value::known(F::one()), |acc, _| acc * randomness); - let value_rlc = last_row.encoded_data.value_rlc * multiplier + last_row.state.tag_rlc; + let [n_bits_fmt, n_bits_regen, n_bits_compressed, n_streams, n_bytes_header, branch]: [usize; + 6] = match literals_block_type { + BlockType::RawBlock => match size_format { + 0b00 | 0b10 => [1, 5, 0, 1, 1, 0], + 0b01 => [2, 12, 0, 1, 2, 1], + 0b11 => [2, 20, 0, 1, 3, 2], + _ => unreachable!("size_format out of bound"), + }, + _ => unreachable!("BlockType::* unexpected. Must be raw bytes for literals."), + }; - let mut tag_value_acc = - src.iter() - .skip(byte_offset) - .take(len) - .rev() + // Bits for representing regenerated_size and compressed_size + let sizing_bits = &lh_bytes.clone().into_iter().fold(vec![], |mut acc, b| { + acc.extend(value_bits_le(b)); + acc + })[(2 + n_bits_fmt)..(n_bytes_header * N_BITS_PER_BYTE)]; + + let regen_size = le_bits_to_value(&sizing_bits[0..n_bits_regen]); + let compressed_size = + le_bits_to_value(&sizing_bits[n_bits_regen..(n_bits_regen + n_bits_compressed)]); + + let tag_next = match literals_block_type { + BlockType::RawBlock => ZstdTag::ZstdBlockLiteralsRawBytes, + _ => unreachable!("BlockType::* unexpected. Must be raw bytes for literals."), + }; + + let tag_value_iter = + lh_bytes + .iter() + .take(n_bytes_header) .scan(Value::known(F::zero()), |acc, &byte| { - *acc = *acc * randomness + Value::known(F::from(byte as u64)); + *acc = *acc * Value::known(F::from(256u64)) + Value::known(F::from(byte as u64)); Some(*acc) }); - let tag_value = tag_value_acc.clone().last().expect("Tag value exists"); + let tag_value = tag_value_iter + .clone() + .last() + .expect("LiteralsHeader expected"); let tag_rlc_iter = - src.iter() - .skip(byte_offset) - .take(len) + lh_bytes + .iter() + .take(n_bytes_header) .scan(Value::known(F::zero()), |acc, &byte| { *acc = *acc * randomness + Value::known(F::from(byte as u64)); Some(*acc) }); - let tag_rlc = tag_rlc_iter.clone().last().expect("Tag value exists"); - let mut tag_rlc_iter = tag_rlc_iter.collect::>>().into_iter().rev(); - - // Decide the next tag - let tag_next = match stream_idx { - 0..=2 => ZstdTag::ZstdBlockLstream, - 3 => ZstdTag::ZstdBlockSequenceHeader, - _ => unreachable!("stream_idx value out of range"), - }; - - let mut padding_end_idx = 0; - while lstream_bits[padding_end_idx] == 0 { - padding_end_idx += 1; - } - - let mut next_tag_value_acc = tag_value_acc.next().unwrap(); - let mut next_tag_rlc_acc = tag_rlc_iter.next().unwrap(); - - // Add a witness row for leading 0s and sentinel 1-bit - witness_rows.push(ZstdWitnessRow { - state: ZstdState { - tag: ZstdTag::ZstdBlockLstream, - tag_next, - max_tag_len: lookup_max_tag_len(ZstdTag::ZstdBlockLstream), - tag_len: len as u64, - tag_idx: current_byte_idx as u64, - tag_value, - tag_value_acc: next_tag_value_acc, - is_tag_change: true, - tag_rlc, - tag_rlc_acc: next_tag_rlc_acc, - }, - encoded_data: EncodedData { - byte_idx: (byte_offset + current_byte_idx) as u64, - encoded_len: last_row.encoded_data.encoded_len, - value_byte: src[byte_offset + len - current_byte_idx], - value_rlc, - // reverse specific values - reverse: true, - reverse_len: len as u64, - reverse_idx: (len - (current_byte_idx - 1)) as u64, - aux_1, - aux_2: tag_value, - }, - huffman_data: HuffmanData { - byte_offset: huffman_code_byte_offset as u64, - bit_value: 1u8, - stream_idx, - k: (0, padding_end_idx as u8), - }, - bitstream_read_data: BitstreamReadRow { - bit_value: 1u64, - bit_start_idx: 0usize, - bit_end_idx: padding_end_idx, - is_zero_bit_read: false, - }, - decoded_data: DecodedData { - decoded_len: last_row.decoded_data.decoded_len, - decoded_len_acc: last_row.decoded_data.decoded_len_acc, - total_decoded_len: last_row.decoded_data.total_decoded_len, - decoded_byte: 0, - decoded_value_rlc: last_row.decoded_data.decoded_value_rlc, - }, - fse_data: FseTableRow::default(), - }); - - // Exclude the leading zero section - while lstream_bits[current_bit_idx] == 0 { - (current_byte_idx, current_bit_idx) = increment_idx(current_byte_idx, current_bit_idx); - } - // Exclude the sentinel 1-bit - (current_byte_idx, current_bit_idx) = increment_idx(current_byte_idx, current_bit_idx); - - // Update accumulator - if current_byte_idx > last_byte_idx { - next_tag_value_acc = tag_value_acc.next().unwrap(); - next_tag_rlc_acc = tag_rlc_iter.next().unwrap(); - last_byte_idx = current_byte_idx; - } - - // Now the actual symbol-bearing bitstream starts - let (max_bitstring_len, huffman_bitstring_map) = huffman_code.parse_bitstring_map(); - let mut decoded_symbols: Vec = vec![]; - let mut bitstring_acc: String = String::from(""); - let mut cur_bitstring_len: usize = 0; - - while current_bit_idx < len * N_BITS_PER_BYTE { - if huffman_bitstring_map.contains_key(bitstring_acc.as_str()) { - let sym = *huffman_bitstring_map.get(bitstring_acc.as_str()).unwrap(); - decoded_symbols.push(sym); - - let from_byte_idx = current_byte_idx; - let from_bit_idx = current_bit_idx; - - // advance byte and bit marks to the last bit - for _ in 0..(cur_bitstring_len - 1) { - (current_byte_idx, current_bit_idx) = - increment_idx(current_byte_idx, current_bit_idx); - } - let end_bit_idx = if current_byte_idx > from_byte_idx { - current_bit_idx.rem_euclid(8) + 8 - } else { - current_bit_idx.rem_euclid(8) - }; - (current_byte_idx, current_bit_idx) = increment_idx(current_byte_idx, current_bit_idx); - - decoded_len_acc += 1; - decoded_rlc = decoded_rlc * randomness + Value::known(F::from(sym)); + let tag_rlc = tag_rlc_iter.clone().last().expect("Tag RLC expected"); - // Add a witness row for emitted symbol - witness_rows.push(ZstdWitnessRow { - state: ZstdState { - tag: ZstdTag::ZstdBlockLstream, - tag_next, - max_tag_len: lookup_max_tag_len(ZstdTag::ZstdBlockLstream), - tag_len: len as u64, - tag_idx: from_byte_idx as u64, - tag_value, - tag_value_acc: next_tag_value_acc, - is_tag_change: false, - tag_rlc, - tag_rlc_acc: next_tag_rlc_acc, - }, - encoded_data: EncodedData { - byte_idx: (byte_offset + from_byte_idx) as u64, - encoded_len: last_row.encoded_data.encoded_len, - value_byte: src[byte_offset + len - from_byte_idx], - value_rlc, - // reverse specific values - reverse: true, - reverse_len: len as u64, - reverse_idx: (len - from_byte_idx + 1) as u64, - aux_1, - aux_2: tag_value, - }, - huffman_data: HuffmanData { - byte_offset: huffman_code_byte_offset as u64, - bit_value: u8::from_str_radix(bitstring_acc.as_str(), 2).unwrap(), - stream_idx, - k: (from_bit_idx.rem_euclid(8) as u8, end_bit_idx as u8), - }, - bitstream_read_data: BitstreamReadRow { - bit_value: u8::from_str_radix(bitstring_acc.as_str(), 2).unwrap() as u64, - bit_start_idx: from_bit_idx.rem_euclid(8), - bit_end_idx: end_bit_idx, - is_zero_bit_read: false, - }, - decoded_data: DecodedData { - decoded_len: last_row.decoded_data.decoded_len, - decoded_len_acc, - total_decoded_len: last_row.decoded_data.total_decoded_len, - decoded_byte: sym as u8, - decoded_value_rlc: decoded_rlc, - }, - fse_data: FseTableRow::default(), + let value_rlc_iter = + lh_bytes + .iter() + .take(n_bytes_header) + .scan(last_row.encoded_data.value_rlc, |acc, &byte| { + *acc = *acc * randomness + Value::known(F::from(byte as u64)); + Some(*acc) }); - // Update accumulator - if current_byte_idx > last_byte_idx && current_byte_idx <= len { - next_tag_value_acc = tag_value_acc.next().unwrap(); - next_tag_rlc_acc = tag_rlc_iter.next().unwrap(); - last_byte_idx = current_byte_idx; - } - - // Reset decoding state - bitstring_acc = String::from(""); - cur_bitstring_len = 0; - } else { - if lstream_bits[current_bit_idx + cur_bitstring_len] > 0 { - bitstring_acc.push('1'); - } else { - bitstring_acc.push('0'); - } - cur_bitstring_len += 1; - - if cur_bitstring_len > max_bitstring_len as usize { - panic!("Reading bit len greater than max bitstring len not allowed."); - } - } - } + let multiplier = + (0..last_row.state.tag_len).fold(Value::known(F::one()), |acc, _| acc * randomness); + let value_rlc = last_row.encoded_data.value_rlc * multiplier + last_row.state.tag_rlc; - (byte_offset + len, witness_rows, decoded_symbols) + ( + byte_offset + n_bytes_header, + lh_bytes + .iter() + .take(n_bytes_header) + .zip(tag_value_iter) + .zip(value_rlc_iter) + .zip(tag_rlc_iter) + .enumerate() + .map( + |(i, (((&value_byte, tag_value_acc), _v_rlc), tag_rlc_acc))| ZstdWitnessRow { + state: ZstdState { + tag: ZstdTag::ZstdBlockLiteralsHeader, + tag_next, + block_idx, + max_tag_len: lookup_max_tag_len(ZstdTag::ZstdBlockLiteralsHeader), + tag_len: n_bytes_header as u64, + tag_idx: (i + 1) as u64, + tag_value, + tag_value_acc, + is_tag_change: i == 0, + tag_rlc, + tag_rlc_acc, + }, + encoded_data: EncodedData { + byte_idx: (byte_offset + i + 1) as u64, + encoded_len: last_row.encoded_data.encoded_len, + value_byte, + reverse: false, + value_rlc, + ..Default::default() + }, + bitstream_read_data: BitstreamReadRow::default(), + decoded_data: last_row.decoded_data.clone(), + fse_data: FseDecodingRow::default(), + }, + ) + .collect::>(), + literals_block_type, + n_streams, + regen_size as usize, + compressed_size as usize, + (branch as u64, sf_max), + ) } /// Result for processing multiple blocks from compressed data @@ -1807,7 +1802,8 @@ pub type MultiBlockProcessResult = ( Vec, Vec, Vec, - Vec, + Vec, + Vec, ); /// Process a slice of bytes into decompression circuit witness rows @@ -1816,9 +1812,14 @@ pub fn process(src: &[u8], randomness: Value) -> MultiBlockProcessR let mut literals: Vec = vec![]; let mut aux_data: Vec = vec![]; let mut fse_aux_tables: Vec = vec![]; - let mut huffman_codes: Vec = vec![]; + let mut block_info_arr: Vec = vec![]; + let mut sequence_info_arr: Vec = vec![]; let byte_offset = 0; + // witgen_debug + let stdout = io::stdout(); + let mut handle = stdout.lock(); + // FrameHeaderDescriptor and FrameContentSize let (byte_offset, rows) = process_frame_header::( src, @@ -1828,140 +1829,191 @@ pub fn process(src: &[u8], randomness: Value) -> MultiBlockProcessR ); witness_rows.extend_from_slice(&rows); + let mut block_idx: u64 = 1; loop { let ( _byte_offset, rows, - last_block, + block_info, + sequence_info, new_literals, lstream_lens, pipeline_data, - fse_aux_table, - huffman_code, + new_fse_aux_tables, ) = process_block::( src, + block_idx, byte_offset, rows.last().expect("last row expected to exist"), randomness, ); + witness_rows.extend_from_slice(&rows); literals.extend_from_slice(&new_literals); aux_data.extend_from_slice(&lstream_lens); aux_data.extend_from_slice(&pipeline_data); - fse_aux_tables.push(fse_aux_table); - huffman_codes.push(huffman_code); + for fse_aux_table in new_fse_aux_tables { + fse_aux_tables.push(fse_aux_table); + } - if last_block { + block_info_arr.push(block_info); + sequence_info_arr.push(sequence_info); + + if block_info.is_last_block { // TODO: Recover this assertion after the sequence section decoding is completed. // assert!(byte_offset >= src.len()); break; + } else { + block_idx += 1; } } + // witgen_debug + // for (idx, row) in witness_rows.iter().enumerate() { + // write!( + // handle, + // "{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};", + // idx, + // row.state.tag, row.state.tag_next, row.state.block_idx, row.state.max_tag_len, + // row.state.tag_len, row.state.tag_idx, row.state.tag_value, row.state.tag_value_acc, + // row.state.is_tag_change, row.state.tag_rlc_acc, row.encoded_data.byte_idx, + // row.encoded_data.encoded_len, row.encoded_data.value_byte, row.encoded_data.reverse, + // row.encoded_data.reverse_idx, row.encoded_data.reverse_len, row.encoded_data.aux_1, + // row.encoded_data.aux_2, row.encoded_data.value_rlc, row.decoded_data.decoded_len, + // row.decoded_data.decoded_len_acc, row.decoded_data.total_decoded_len, + // row.decoded_data.decoded_byte, row.decoded_data.decoded_value_rlc, + // row.fse_data.table_kind, row.fse_data.table_size, row.fse_data.symbol, + // row.fse_data.num_emitted, row.fse_data.value_decoded, row.fse_data.probability_acc, + // row.fse_data.is_repeat_bits_loop, row.fse_data.is_trailing_bits, + // row.bitstream_read_data.bit_start_idx, + // row.bitstream_read_data.bit_end_idx, row.bitstream_read_data.bit_value, + // row.bitstream_read_data.is_nil, + // row.bitstream_read_data.is_zero_bit_read, + // row.bitstream_read_data.is_seq_init, + // row.bitstream_read_data.seq_idx, + // row.bitstream_read_data.states, + // row.bitstream_read_data.symbols, + // row.bitstream_read_data.values, + // row.bitstream_read_data.baseline, + // row.bitstream_read_data.is_update_state, + // ).unwrap(); + + // writeln!(handle).unwrap(); + // } + ( witness_rows, literals, aux_data, fse_aux_tables, - huffman_codes, + block_info_arr, + sequence_info_arr, ) } #[cfg(test)] mod tests { - use super::*; - - use eth_types::H256; - use ethers_core::utils::keccak256; - use halo2_proofs::halo2curves::bn256::Fr; - - use std::{ - fs::{self, File}, - io::Write, - }; - - #[test] - #[ignore] - fn compression_ratio() -> Result<(), std::io::Error> { - use csv::WriterBuilder; - - let get_compression_ratio = |data: &[u8]| -> Result<(u64, u64, H256), std::io::Error> { - let raw_len = data.len(); - let compressed = { - // compression level = 0 defaults to using level=3, which is zstd's default. - let mut encoder = zstd::stream::write::Encoder::new(Vec::new(), 0)?; - - // disable compression of literals, i.e. literals will be raw bytes. - encoder.set_parameter(zstd::stream::raw::CParameter::LiteralCompressionMode( - zstd::zstd_safe::ParamSwitch::Disable, - ))?; - // set target block size to fit within a single block. - encoder - .set_parameter(zstd::stream::raw::CParameter::TargetCBlockSize(124 * 1024))?; - // do not include the checksum at the end of the encoded data. - encoder.include_checksum(false)?; - // do not include magic bytes at the start of the frame since we will have a single - // frame. - encoder.include_magicbytes(false)?; - // set source length, which will be reflected in the frame header. - encoder.set_pledged_src_size(Some(raw_len as u64))?; - // include the content size to know at decode time the expected size of decoded - // data. - encoder.include_contentsize(true)?; - - encoder.write_all(data)?; - encoder.finish()? - }; - let hash = keccak256(&compressed); - let compressed_len = compressed.len(); - Ok((raw_len as u64, compressed_len as u64, hash.into())) - }; - - let mut batch_files = fs::read_dir("./data")? - .map(|entry| entry.map(|e| e.path())) - .collect::, std::io::Error>>()?; - batch_files.sort(); - - let batches = batch_files - .iter() - .map(fs::read_to_string) - .filter_map(|data| data.ok()) - .map(|data| hex::decode(data.trim_end()).expect("Failed to decode hex data")) - .collect::>>(); - - let file = File::create("modified-ratio.csv")?; - let mut writer = WriterBuilder::new().from_writer(file); - - // Write headers to CSV - writer.write_record(["ID", "Len(input)", "Compression Ratio"])?; - - // Test and store results in CSV - for (i, batch) in batches.iter().enumerate() { - let (raw_len, compr_len, keccak_hash) = get_compression_ratio(batch)?; - println!( - "batch{:0>3}, raw_size={:6}, compr_size={:6}, compr_keccak_hash={:64x}", - i, raw_len, compr_len, keccak_hash - ); - - // Write input and result to CSV - let compr_ratio = raw_len as f64 / compr_len as f64; - writer.write_record(&[i.to_string(), raw_len.to_string(), compr_ratio.to_string()])?; - } - - // Flush the CSV writer - writer.flush()?; - - Ok(()) - } + // witgen_debug + // use super::*; + // use bitstream_io::write; + // use halo2_proofs::halo2curves::bn256::Fr; + // use serde_json::from_str; + + // witgen_debug + // use std::{ + // fs::{self, File}, + // io::{self, Write}, + // }; + + // witgen_debug + // #[test] + // #[ignore] + // fn compression_ratio() -> Result<(), std::io::Error> { + // use csv::WriterBuilder; + // use super::*; + + // let get_compression_ratio = |data: &[u8]| -> Result<(u64, u64, H256), std::io::Error> { + // let raw_len = data.len(); + // let compressed = { + // // compression level = 0 defaults to using level=3, which is zstd's default. + // let mut encoder = zstd::stream::write::Encoder::new(Vec::new(), 0)?; + + // // disable compression of literals, i.e. literals will be raw bytes. + // encoder.set_parameter(zstd::stream::raw::CParameter::LiteralCompressionMode( + // zstd::zstd_safe::ParamSwitch::Disable, + // ))?; + // // set target block size to fit within a single block. + // encoder + // .set_parameter(zstd::stream::raw::CParameter::TargetCBlockSize(124 * 1024))?; + // // do not include the checksum at the end of the encoded data. + // encoder.include_checksum(false)?; + // // do not include magic bytes at the start of the frame since we will have a + // single // frame. + // encoder.include_magicbytes(false)?; + // // set source length, which will be reflected in the frame header. + // encoder.set_pledged_src_size(Some(raw_len as u64))?; + // // include the content size to know at decode time the expected size of decoded + // // data. + // encoder.include_contentsize(true)?; + + // encoder.write_all(data)?; + // encoder.finish()? + // }; + // let hash = keccak256(&compressed); + // let compressed_len = compressed.len(); + // Ok((raw_len as u64, compressed_len as u64, hash.into())) + // }; + + // let mut batch_files = fs::read_dir("./data")? + // .map(|entry| entry.map(|e| e.path())) + // .collect::, std::io::Error>>()?; + // batch_files.sort(); + + // let batches = batch_files + // .iter() + // .map(fs::read_to_string) + // .filter_map(|data| data.ok()) + // .map(|data| hex::decode(data.trim_end()).expect("Failed to decode hex data")) + // .collect::>>(); + + // let file = File::create("modified-ratio.csv")?; + // let mut writer = WriterBuilder::new().from_writer(file); + + // // Write headers to CSV + // writer.write_record(["ID", "Len(input)", "Compression Ratio"])?; + + // // Test and store results in CSV + // for (i, batch) in batches.iter().enumerate() { + // let (raw_len, compr_len, keccak_hash) = get_compression_ratio(batch)?; + // println!( + // "batch{:0>3}, raw_size={:6}, compr_size={:6}, compr_keccak_hash={:64x}", + // i, raw_len, compr_len, keccak_hash + // ); + + // // Write input and result to CSV + // let compr_ratio = raw_len as f64 / compr_len as f64; + // writer.write_record(&[i.to_string(), raw_len.to_string(), compr_ratio.to_string()])?; + // } + + // // Flush the CSV writer + // writer.flush()?; + + // Ok(()) + // } #[test] fn batch_compression_zstd() -> Result<(), std::io::Error> { use halo2_proofs::halo2curves::bn256::Fr; - use hex::FromHex; + // witgen_debug + // use hex::FromHex; use super::*; - let raw = >::from_hex(r#"0100000000000231fb0000000064e588f7000000000000000000000000000000000000000000000000000000000000000000000000007a12000006000000000219f90216038510229a150083039bd49417afd0263d6909ba1f9a8eac697f76532365fb95880234e1a857498000b901a45ae401dc0000000000000000000000000000000000000000000000000000000064e58a1400000000000000000000000000000000000000000000000000000000000000400000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000002000000000000000000000000000000000000000000000000000000000000000e404e45aaf0000000000000000000000005300000000000000000000000000000000000004000000000000000000000000d9692f1748afee00face2da35242417dd05a86150000000000000000000000000000000000000000000000000000000000000bb8000000000000000000000000c3100d07a5997a7f9f9cdde967d396f9a2aed6a60000000000000000000000000000000000000000000000000234e1a8574980000000000000000000000000000000000000000000000000049032ac61d5dce9e600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000083104ec1a053077484b4d7a88434c2d03c30c3c55bd3a82b259f339f1c0e1e1244189009c5a01c915dd14aed1b824bf610a95560e380ea3213f0bf345df3bddff1acaf7da84d000002d8f902d5068510229a1500830992fd94bbad0e891922a8a4a7e9c39d4cc0559117016fec87082b6be7f5b757b90264ac9650d800000000000000000000000000000000000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000002000000000000000000000000000000000000000000000000000000000000004000000000000000000000000000000000000000000000000000000000000001e00000000000000000000000000000000000000000000000000000000000000164883164560000000000000000000000005300000000000000000000000000000000000004000000000000000000000000ffd2ece82f7959ae184d10fe17865d27b4f0fb9400000000000000000000000000000000000000000000000000000000000001f4fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffce9f6fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffcea0a00000000000000000000000000000000000000000000000000082b6be7f5b75700000000000000000000000000000000000000000000000000000000004c4b40000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000006aea61ea08dd6e4834cd43a257ed52d9a31dd3b90000000000000000000000000000000000000000000000000000000064e58a1400000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000412210e8a0000000000000000000000000000000000000000000000000000000083104ec2a0bc501c59bceb707d958423bad14c0d0daec84ad067f7e42209ad2cb8d904a55da00a04de4c79ed24b7a82d523b5de63c7ff68a3b7bb519546b3fe4ba8bc90a396600000137f9013480850f7eb06980830317329446ce46951d12710d85bc4fe10bb29c6ea501207787019945ca262000b8c4b2dd898a000000000000000000000000000000000000000000000000000000000000002000000000000000000000000065e4e8d7bd50191abfee6e5bcdc4d16ddfe9975e000000000000000000000000000000000000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000800000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000083104ec2a037979a5225dd156f51abf9a8601e9156e1b1308c0474d69af98c55627886232ea048ac197295187e7ad48aa34cc37c2625434fa812449337732d8522014f4eacfc00000137f9013480850f7eb06980830317329446ce46951d12710d85bc4fe10bb29c6ea501207787019945ca262000b8c4b2dd898a000000000000000000000000000000000000000000000000000000000000002000000000000000000000000065e4e8d7bd50191abfee6e5bcdc4d16ddfe9975e000000000000000000000000000000000000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000800000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000083104ec1a087269dbb9e987e5d58ecd3bcb724cbc4e6c843eb9095de16a25263aebfe06f5aa07f3ac49b6847ba51c5319174e51e088117742240f8555c5c1d77108cf0df90d700000137f9013480850f7eb06980830317329446ce46951d12710d85bc4fe10bb29c6ea501207787019945ca262000b8c4b2dd898a000000000000000000000000000000000000000000000000000000000000002000000000000000000000000065e4e8d7bd50191abfee6e5bcdc4d16ddfe9975e000000000000000000000000000000000000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000800000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000083104ec1a04abdb8572dcabf1996825de6f753124eed41c1292fcfdc4d9a90cb4f8a0f8ff1a06ef25857e2cc9d0fa8b6ecc03b4ba6ef6f3ec1515d570fcc9102e2aa653f347a00000137f9013480850f7eb06980830317329446ce46951d12710d85bc4fe10bb29c6ea501207787019945ca262000b8c4b2dd898a000000000000000000000000000000000000000000000000000000000000002000000000000000000000000065e4e8d7bd50191abfee6e5bcdc4d16ddfe9975e000000000000000000000000000000000000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000800000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000083104ec2a0882202163cbb9a299709b443b663fbab459440deabfbe183e999c98c00ea80c2a010ecb1e5196f0b1ee3d067d9a158b47b1376706e42ce2e769cf8e986935781dd"#) - .expect("FromHex failure"); + // let raw = >::from_hex(r#"0100000000000231fb0000000064e588f7000000000000000000000000000000000000000000000000000000000000000000000000007a12000006000000000219f90216038510229a150083039bd49417afd0263d6909ba1f9a8eac697f76532365fb95880234e1a857498000b901a45ae401dc0000000000000000000000000000000000000000000000000000000064e58a1400000000000000000000000000000000000000000000000000000000000000400000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000002000000000000000000000000000000000000000000000000000000000000000e404e45aaf0000000000000000000000005300000000000000000000000000000000000004000000000000000000000000d9692f1748afee00face2da35242417dd05a86150000000000000000000000000000000000000000000000000000000000000bb8000000000000000000000000c3100d07a5997a7f9f9cdde967d396f9a2aed6a60000000000000000000000000000000000000000000000000234e1a8574980000000000000000000000000000000000000000000000000049032ac61d5dce9e600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000083104ec1a053077484b4d7a88434c2d03c30c3c55bd3a82b259f339f1c0e1e1244189009c5a01c915dd14aed1b824bf610a95560e380ea3213f0bf345df3bddff1acaf7da84d000002d8f902d5068510229a1500830992fd94bbad0e891922a8a4a7e9c39d4cc0559117016fec87082b6be7f5b757b90264ac9650d800000000000000000000000000000000000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000002000000000000000000000000000000000000000000000000000000000000004000000000000000000000000000000000000000000000000000000000000001e00000000000000000000000000000000000000000000000000000000000000164883164560000000000000000000000005300000000000000000000000000000000000004000000000000000000000000ffd2ece82f7959ae184d10fe17865d27b4f0fb9400000000000000000000000000000000000000000000000000000000000001f4fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffce9f6fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffcea0a00000000000000000000000000000000000000000000000000082b6be7f5b75700000000000000000000000000000000000000000000000000000000004c4b40000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000006aea61ea08dd6e4834cd43a257ed52d9a31dd3b90000000000000000000000000000000000000000000000000000000064e58a1400000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000412210e8a0000000000000000000000000000000000000000000000000000000083104ec2a0bc501c59bceb707d958423bad14c0d0daec84ad067f7e42209ad2cb8d904a55da00a04de4c79ed24b7a82d523b5de63c7ff68a3b7bb519546b3fe4ba8bc90a396600000137f9013480850f7eb06980830317329446ce46951d12710d85bc4fe10bb29c6ea501207787019945ca262000b8c4b2dd898a000000000000000000000000000000000000000000000000000000000000002000000000000000000000000065e4e8d7bd50191abfee6e5bcdc4d16ddfe9975e000000000000000000000000000000000000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000800000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000083104ec2a037979a5225dd156f51abf9a8601e9156e1b1308c0474d69af98c55627886232ea048ac197295187e7ad48aa34cc37c2625434fa812449337732d8522014f4eacfc00000137f9013480850f7eb06980830317329446ce46951d12710d85bc4fe10bb29c6ea501207787019945ca262000b8c4b2dd898a000000000000000000000000000000000000000000000000000000000000002000000000000000000000000065e4e8d7bd50191abfee6e5bcdc4d16ddfe9975e000000000000000000000000000000000000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000800000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000083104ec1a087269dbb9e987e5d58ecd3bcb724cbc4e6c843eb9095de16a25263aebfe06f5aa07f3ac49b6847ba51c5319174e51e088117742240f8555c5c1d77108cf0df90d700000137f9013480850f7eb06980830317329446ce46951d12710d85bc4fe10bb29c6ea501207787019945ca262000b8c4b2dd898a000000000000000000000000000000000000000000000000000000000000002000000000000000000000000065e4e8d7bd50191abfee6e5bcdc4d16ddfe9975e000000000000000000000000000000000000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000800000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000083104ec1a04abdb8572dcabf1996825de6f753124eed41c1292fcfdc4d9a90cb4f8a0f8ff1a06ef25857e2cc9d0fa8b6ecc03b4ba6ef6f3ec1515d570fcc9102e2aa653f347a00000137f9013480850f7eb06980830317329446ce46951d12710d85bc4fe10bb29c6ea501207787019945ca262000b8c4b2dd898a000000000000000000000000000000000000000000000000000000000000002000000000000000000000000065e4e8d7bd50191abfee6e5bcdc4d16ddfe9975e000000000000000000000000000000000000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000800000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000083104ec2a0882202163cbb9a299709b443b663fbab459440deabfbe183e999c98c00ea80c2a010ecb1e5196f0b1ee3d067d9a158b47b1376706e42ce2e769cf8e986935781dd"#) + // .expect("FromHex failure"); + + // witgen_debug + let raw: Vec = String::from("Romeo and Juliet@Excerpt from Act 2, Scene 2@@JULIET@O Romeo, Romeo! wherefore art thou Romeo?@Deny thy father and refuse thy name;@Or, if thou wilt not, be but sworn my love,@And I'll no longer be a Capulet.@@ROMEO@[Aside] Shall I hear more, or shall I speak at this?@@JULIET@'Tis but thy name that is my enemy;@Thou art thyself, though not a Montague.@What's Montague? it is nor hand, nor foot,@Nor arm, nor face, nor any other part@Belonging to a man. O, be some other name!@What's in a name? that which we call a rose@By any other name would smell as sweet;@So Romeo would, were he not Romeo call'd,@Retain that dear perfection which he owes@Without that title. Romeo, doff thy name,@And for that name which is no part of thee@Take all myself.@@ROMEO@I take thee at thy word:@Call me but love, and I'll be new baptized;@Henceforth I never will be Romeo.@@JULIET@What man art thou that thus bescreen'd in night@So stumblest on my counsel?").as_bytes().to_vec(); + let compressed = { // compression level = 0 defaults to using level=3, which is zstd's default. let mut encoder = zstd::stream::write::Encoder::new(Vec::new(), 0)?; @@ -1986,176 +2038,14 @@ mod tests { encoder.finish()? }; - let (_witness_rows, _decoded_literals, _aux_data, _fse_aux_tables, _huffman_codes) = - process::(&compressed, Value::known(Fr::from(123456789))); - - Ok(()) - } - - // Verify correct interleaved decoding of FSE-coded Huffman Weights - // Example link: https://nigeltao.github.io/blog/2022/zstandard-part-5-fse.html - #[test] - fn interleaved_huffman_code_fse() -> Result<(), std::io::Error> { - // Input includes FSE table representation (normalized symbol frequencies) and the actual - // Huffman bitstream For structure reference: https://nigeltao.github.io/blog/2022/zstandard-part-2-structure.html - let input: [u8; 36] = [ - 0x23, 0x30, 0x6f, 0x9b, 0x03, 0x7d, 0xc7, 0x16, 0x0b, 0xbe, 0xc8, 0xf2, 0xd0, 0x22, - 0x4b, 0x6b, 0xbc, 0x54, 0x5d, 0xa9, 0xd4, 0x93, 0xef, 0xc4, 0x54, 0x96, 0xb2, 0xe2, - 0xa8, 0xa8, 0x24, 0x1c, 0x54, 0x40, 0x29, 0x01, - ]; - let ( - _byte_offset, _witness_rows, - huffman_codes, - _n_huffan_bytes, - _huffman_byte_offset, - _last_rlc, - _huffman_idx, - _fse_size, - _fse_accuracy, - _n_huffman_bitstream_bytes, - _fse_aux_data, - ) = process_block_zstd_huffman_code::( - &input, - 0, - &ZstdWitnessRow::init(0), - Value::known(Fr::from(123456789)), - 4, - ); - - let expected_weights: Vec = vec![ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 6, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 3, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, - 1, 2, 0, 0, 0, 2, 0, 1, 1, 1, 1, 1, 0, 0, 1, 2, 1, 0, 1, 1, 1, 2, 0, 0, 1, 1, 1, 1, 0, - 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 5, 3, 3, 3, 6, 3, 2, 4, 4, 0, 1, 4, 4, 5, 5, 2, 0, 4, 4, - 5, 3, 1, 3, 1, 3, - ] - .into_iter() - .map(FseSymbol::from) - .collect::>(); - - assert_eq!( - huffman_codes.weights, expected_weights, - "Huffman weights should be correctly decoded with interleaved states" - ); - - Ok(()) - } - - // Verify correct decoding of literal bitstream using a HuffmanCode table - // Example link: https://nigeltao.github.io/blog/2022/zstandard-part-4-huffman.html - #[test] - fn decode_literal_bitstream() -> Result<(), std::io::Error> { - let huffman_codes = HuffmanCodesData { - byte_offset: 0, - weights: vec![ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 6, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 3, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, - 0, 0, 1, 2, 0, 0, 0, 2, 0, 1, 1, 1, 1, 1, 0, 0, 1, 2, 1, 0, 1, 1, 1, 2, 0, 0, 1, 1, - 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 5, 3, 3, 3, 6, 3, 2, 4, 4, 0, 1, 4, 4, 5, 5, - 2, 0, 4, 4, 5, 3, 1, 3, 1, 3, - ] - .into_iter() - .map(FseSymbol::from) - .collect::>(), - }; - - let lstream1: [u8; 85] = [ - 0xcc, 0x51, 0x73, 0x3a, 0x85, 0x9e, 0xf7, 0x59, 0xfc, 0xc5, 0xca, 0x6a, 0x7a, 0xd9, - 0x82, 0x9c, 0x65, 0xc5, 0x45, 0x92, 0xe3, 0x0d, 0xf3, 0xef, 0x71, 0xee, 0xdc, 0xd5, - 0xa2, 0xe3, 0x48, 0xad, 0xa3, 0xbc, 0x41, 0x7a, 0x3c, 0xaa, 0xd6, 0xeb, 0xd0, 0x77, - 0xea, 0xdc, 0x5d, 0x41, 0x06, 0x50, 0x1c, 0x49, 0x0f, 0x07, 0x10, 0x05, 0x88, 0x84, - 0x94, 0x02, 0xfc, 0x3c, 0xe3, 0x60, 0x25, 0xc0, 0xcb, 0x0c, 0xb8, 0xa9, 0x73, 0xbc, - 0x13, 0x77, 0xc6, 0xe2, 0x20, 0xed, 0x17, 0x7b, 0x12, 0xdc, 0x24, 0x5a, 0xdf, 0xb4, - 0x21, - ]; - - let (_byte_offset, _witness_rows, decoded_symbols) = process_block_zstd_lstream::( - &lstream1, - 0, - 85, - &ZstdWitnessRow::init(0), - Value::known(Fr::from(123456789)), - 1, - &huffman_codes, - 0, - ); - - let ascii_symbols: String = decoded_symbols - .iter() - .filter_map(|&s| char::from_u32(s as u32)) - .collect(); - let expected_decoded_ascii: String = String::from("Romeo and Juliet\nExcerpt from Act 2, Scene 2\n\nJULIET\nO ,! wherefore art thou?\nDeny thy fatherrefusename;\nOr, ifwilt not, be but sworn my l"); - - assert_eq!( - ascii_symbols, expected_decoded_ascii, - "Expect correct decoding" - ); - - Ok(()) - } - - #[test] - fn decode_literal_section() -> Result<(), std::io::Error> { - let encoded: [u8; 555] = [ - // 0x28, 0xb5, 0x2f, 0xfd, // magic numbers are removed - 0x60, // originally 0x64. unset the checksum bit. - 0xae, 0x02, 0x0d, 0x11, 0x00, 0x76, 0x62, 0x5e, 0x23, 0x30, 0x6f, 0x9b, 0x03, 0x7d, - 0xc7, 0x16, 0x0b, 0xbe, 0xc8, 0xf2, 0xd0, 0x22, 0x4b, 0x6b, 0xbc, 0x54, 0x5d, 0xa9, - 0xd4, 0x93, 0xef, 0xc4, 0x54, 0x96, 0xb2, 0xe2, 0xa8, 0xa8, 0x24, 0x1c, 0x54, 0x40, - 0x29, 0x01, 0x55, 0x00, 0x57, 0x00, 0x51, 0x00, 0xcc, 0x51, 0x73, 0x3a, 0x85, 0x9e, - 0xf7, 0x59, 0xfc, 0xc5, 0xca, 0x6a, 0x7a, 0xd9, 0x82, 0x9c, 0x65, 0xc5, 0x45, 0x92, - 0xe3, 0x0d, 0xf3, 0xef, 0x71, 0xee, 0xdc, 0xd5, 0xa2, 0xe3, 0x48, 0xad, 0xa3, 0xbc, - 0x41, 0x7a, 0x3c, 0xaa, 0xd6, 0xeb, 0xd0, 0x77, 0xea, 0xdc, 0x5d, 0x41, 0x06, 0x50, - 0x1c, 0x49, 0x0f, 0x07, 0x10, 0x05, 0x88, 0x84, 0x94, 0x02, 0xfc, 0x3c, 0xe3, 0x60, - 0x25, 0xc0, 0xcb, 0x0c, 0xb8, 0xa9, 0x73, 0xbc, 0x13, 0x77, 0xc6, 0xe2, 0x20, 0xed, - 0x17, 0x7b, 0x12, 0xdc, 0x24, 0x5a, 0xdf, 0xb4, 0x21, 0x9a, 0xcb, 0x8f, 0xc7, 0x58, - 0x54, 0x11, 0xa9, 0xf1, 0x47, 0x82, 0x9b, 0xba, 0x60, 0xb4, 0x92, 0x28, 0x0e, 0xfb, - 0x8b, 0x1e, 0x92, 0x23, 0x6a, 0xcf, 0xbf, 0xe5, 0x45, 0xb5, 0x7e, 0xeb, 0x81, 0xf1, - 0x78, 0x4b, 0xad, 0x17, 0x4d, 0x81, 0x9f, 0xbc, 0x67, 0xa7, 0x56, 0xee, 0xb4, 0xd9, - 0xe1, 0x95, 0x21, 0x66, 0x0c, 0x95, 0x83, 0x27, 0xde, 0xac, 0x37, 0x20, 0x91, 0x22, - 0x07, 0x0b, 0x91, 0x86, 0x94, 0x1a, 0x7b, 0xf6, 0x4c, 0xb0, 0xc0, 0xe8, 0x2e, 0x49, - 0x65, 0xd6, 0x34, 0x63, 0x0c, 0x88, 0x9b, 0x1c, 0x48, 0xca, 0x2b, 0x34, 0xa9, 0x6b, - 0x99, 0x3b, 0xee, 0x13, 0x3b, 0x7c, 0x93, 0x0b, 0xf7, 0x0d, 0x49, 0x69, 0x18, 0x57, - 0xbe, 0x3b, 0x64, 0x45, 0x1d, 0x92, 0x63, 0x7f, 0xe8, 0xf9, 0xa1, 0x19, 0x7b, 0x7b, - 0x6e, 0xd8, 0xa3, 0x90, 0x23, 0x82, 0xf4, 0xa7, 0xce, 0xc8, 0xf8, 0x90, 0x15, 0xb3, - 0x14, 0xf4, 0x40, 0xe7, 0x02, 0x78, 0xd3, 0x17, 0x71, 0x23, 0xb1, 0x19, 0xad, 0x6b, - 0x49, 0xae, 0x13, 0xa4, 0x75, 0x38, 0x51, 0x47, 0x89, 0x67, 0xb0, 0x39, 0xb4, 0x53, - 0x86, 0xa4, 0xac, 0xaa, 0xa3, 0x34, 0x89, 0xca, 0x2e, 0xe9, 0xc1, 0xfe, 0xf2, 0x51, - 0xc6, 0x51, 0x73, 0xaa, 0xf7, 0x9d, 0x2d, 0xed, 0xd9, 0xb7, 0x4a, 0xb2, 0xb2, 0x61, - 0xe4, 0xef, 0x98, 0xf7, 0xc5, 0xef, 0x51, 0x9b, 0xd8, 0xdc, 0x60, 0x6c, 0x41, 0x76, - 0xaf, 0x78, 0x1a, 0x62, 0xb5, 0x4c, 0x1e, 0x21, 0x39, 0x9a, 0x5f, 0xac, 0x9d, 0xe0, - 0x62, 0xe8, 0xe9, 0x2f, 0x2f, 0x48, 0x02, 0x8d, 0x53, 0xc8, 0x91, 0xf2, 0x1a, 0xd2, - 0x7c, 0x0a, 0x7c, 0x48, 0xbf, 0xda, 0xa9, 0xe3, 0x38, 0xda, 0x34, 0xce, 0x76, 0xa9, - 0xda, 0x15, 0x91, 0xde, 0x21, 0xf5, 0x55, 0x46, 0xa8, 0x21, 0x9d, 0x51, 0xcc, 0x18, - 0x42, 0x44, 0x81, 0x8c, 0x94, 0xb4, 0x50, 0x1e, 0x20, 0x42, 0x82, 0x98, 0xc2, 0x3b, - 0x10, 0x48, 0xec, 0xa6, 0x39, 0x63, 0x13, 0xa7, 0x01, 0x94, 0x40, 0xff, 0x88, 0x0f, - 0x98, 0x07, 0x4a, 0x46, 0x38, 0x05, 0xa9, 0xcb, 0xf6, 0xc8, 0x21, 0x59, 0xaa, 0x38, - 0x45, 0xbf, 0x5c, 0xf8, 0x55, 0x9e, 0x9f, 0x04, 0xed, 0xc8, 0x03, 0x42, 0x2a, 0x4b, - 0xf6, 0x78, 0x7e, 0x23, 0x67, 0x15, 0xa2, 0x79, 0x29, 0xf4, 0x9b, 0x7e, 0x00, 0xbc, - 0x2f, 0x46, 0x96, 0x99, 0xea, 0xf1, 0xee, 0x1c, 0x6e, 0x06, 0x9c, 0xdb, 0xe4, 0x8c, - 0xc2, 0x05, 0xf7, 0x54, 0x51, 0x84, 0xc0, 0x33, 0x02, 0x01, 0xb1, 0x8c, 0x80, 0xdc, - 0x99, 0x8f, 0xcb, 0x46, 0xff, 0xd1, 0x25, 0xb5, 0xb6, 0x3a, 0xf3, 0x25, 0xbe, 0x85, - 0x50, 0x84, 0xf5, 0x86, 0x5a, 0x71, 0xf7, 0xbd, 0xa1, 0x4c, 0x52, 0x4f, 0x20, 0xa3, - 0x61, 0x23, 0x77, 0x12, 0xd3, 0xb1, 0x58, 0x75, 0x22, 0x01, 0x12, 0x70, 0xec, 0x14, - 0x91, 0xf9, 0x85, 0x61, 0xd5, 0x7e, 0x98, 0x84, 0xc9, 0x76, 0x84, 0xbc, 0xb8, 0xfe, - 0x4e, 0x53, 0xa5, 0x06, 0x82, 0x14, 0x95, 0x51, - ]; - - let (_witness_rows, decoded_literals, _aux_data, _fse_aux_tables, _huffman_codes) = - process::(&encoded, Value::known(Fr::from(123456789))); - - let decoded_literal_string: String = decoded_literals - .iter() - .filter_map(|&s| char::from_u32(s as u32)) - .collect(); - let expected_literal_string = String::from("Romeo and Juliet\nExcerpt from Act 2, Scene 2\n\nJULIET\nO ,! wherefore art thou?\nDeny thy fatherrefusename;\nOr, ifwilt not, be but sworn my love,\nAnd I'll no longera Capulet.\n\nROMEO\n[Aside] Shall I hear more, or sspeak at this?'Tis that isenemy;\nTyself,gh a Montague.\nWhat's? inor hand,foot,\nNor armaceany opart\nBeing to a man. Osome!in a?which we ca rose\nBy would smell as sweet;\nSo, were he'd,\nRetaindear perfectionhe owes\nWithoitle.dofffor oee\nTake mI t hy word:\nCebe new baptized;\nHencth I never will. manthus bescreen'dnightstumblest on my counsel?\n"); - - assert_eq!( - decoded_literal_string, expected_literal_string, - "Decode the correct literal string" - ); + _decoded_literals, + _aux_data, + _fse_aux_tables, + block_info_arr, + sequence_info_arr, + ) = process::(&compressed, Value::known(Fr::from(123456789))); Ok(()) } diff --git a/aggregator/src/aggregation/decoder/witgen/params.rs b/aggregator/src/aggregation/decoder/witgen/params.rs index 149c48db9d..1fa6fb9222 100644 --- a/aggregator/src/aggregation/decoder/witgen/params.rs +++ b/aggregator/src/aggregation/decoder/witgen/params.rs @@ -6,20 +6,12 @@ pub const N_BLOCK_HEADER_BYTES: usize = 3; /// Constants for zstd-compressed block pub const N_MAX_LITERAL_HEADER_BYTES: usize = 3; -/// Maximum bytes for the jump table -pub const N_JUMP_TABLE_BYTES: usize = 6; -/// Maximum bytes for the FSE representation -pub const N_MAX_LITERAL_FSE_BYTES: usize = 8; - -/// Maximum number of symbols (weights), i.e. symbol in [0, N_MAX_SYMBOLS). -pub const N_MAX_SYMBOLS: usize = 8; - -/// Number of bits used to represent the symbol in binary form. This will be used as a helper -/// gadget to form equality constraints over the symbol's value. -pub const N_BITS_SYMBOL: usize = 3; /// Number of bits used to represent the tag in binary form. pub const N_BITS_ZSTD_TAG: usize = 4; /// Number of bits in the repeat bits that follow value=1 in reconstructing FSE table. pub const N_BITS_REPEAT_FLAG: usize = 2; + +// we use offset window no more than = 22 +pub const CL_WINDOW_LIMIT : usize = 22; \ No newline at end of file diff --git a/aggregator/src/aggregation/decoder/witgen/types.rs b/aggregator/src/aggregation/decoder/witgen/types.rs index 71a8c7d25f..ae61698190 100644 --- a/aggregator/src/aggregation/decoder/witgen/types.rs +++ b/aggregator/src/aggregation/decoder/witgen/types.rs @@ -1,18 +1,18 @@ -use std::{ - collections::{BTreeMap, HashMap}, - io::Cursor, -}; +use std::{collections::BTreeMap, io::Cursor}; use bitstream_io::{BitRead, BitReader, LittleEndian}; use eth_types::Field; use gadgets::impl_expr; use halo2_proofs::{circuit::Value, plonk::Expression}; use itertools::Itertools; +use std::collections::HashMap; use strum_macros::EnumIter; +use crate::aggregation::decoder::tables::FseTable; + use super::{ params::N_BITS_PER_BYTE, - util::{bit_length, read_variable_bit_packing, smaller_powers_of_two, value_bits_le}, + util::{read_variable_bit_packing, smaller_powers_of_two, value_bits_le}, }; /// A read-only memory table (fixed table) for decompression circuit to verify that the next tag @@ -70,59 +70,9 @@ impl RomTagTableRow { } } -/// The symbol emitted by FSE table. This is also the weight in the canonical Huffman code. -#[derive(Clone, Copy, Debug, EnumIter, PartialEq, Eq, PartialOrd, Ord)] -pub enum FseSymbol { - /// Weight == 0. - S0 = 0, - /// Weight == 1. - S1, - /// Weight == 2. - S2, - /// Weight == 3. - S3, - /// Weight == 4. - S4, - /// Weight == 5. - S5, - /// Weight == 6. - S6, - /// Weight == 7. - S7, -} - -impl_expr!(FseSymbol); - -impl From for usize { - fn from(value: FseSymbol) -> Self { - value as usize - } -} - -impl From for u64 { - fn from(value: FseSymbol) -> Self { - value as u64 - } -} - -impl From for FseSymbol { - fn from(value: usize) -> Self { - match value { - 0 => Self::S0, - 1 => Self::S1, - 2 => Self::S2, - 3 => Self::S3, - 4 => Self::S4, - 5 => Self::S5, - 6 => Self::S6, - 7 => Self::S7, - _ => unreachable!("FseSymbol in [0, 8)"), - } - } -} - -#[derive(Debug)] +#[derive(Debug, Default, Clone, Copy)] pub enum BlockType { + #[default] RawBlock = 0, RleBlock, ZstdCompressedBlock, @@ -141,6 +91,33 @@ impl From for BlockType { } } +#[derive(Debug, Default, Clone, Copy)] +pub struct BlockInfo { + pub block_idx: usize, + pub block_type: BlockType, + pub block_len: usize, + pub is_last_block: bool, +} + +#[derive(Debug, Default, Clone, Copy)] +pub struct SequenceInfo { + pub block_idx: usize, + pub num_sequences: usize, + pub compression_mode: [bool; 3], +} + +/// The type for indicate each range in output bytes by sequence execution +#[derive(Debug, Clone)] +pub enum SequenceExecInfo { + LiteralCopy(std::ops::Range), + BackRef(std::ops::Range), + LastLiteralCopy, +} + +/// The type to describe an execution: (instruction_id, exec_info) +#[derive(Debug, Clone)] +pub struct SequenceExec (pub usize, pub SequenceExecInfo); + /// The type of Lstream. #[derive(Clone, Copy, Debug, EnumIter)] pub enum LstreamNum { @@ -184,26 +161,16 @@ pub enum ZstdTag { FrameContentSize, /// The block's header. BlockHeader, - /// Raw bytes. - RawBlockBytes, - /// Run-length encoded bytes. - RleBlockBytes, /// Zstd block's literals header. ZstdBlockLiteralsHeader, /// Zstd blocks might contain raw bytes. ZstdBlockLiteralsRawBytes, - /// Zstd blocks might contain rle bytes. - ZstdBlockLiteralsRleBytes, - /// Zstd block's huffman header and FSE code. - ZstdBlockFseCode, - /// Zstd block's huffman code. - ZstdBlockHuffmanCode, - /// Zstd block's jump table. - ZstdBlockJumpTable, - /// Literal stream. - ZstdBlockLstream, /// Beginning of sequence section. ZstdBlockSequenceHeader, + /// Zstd block's FSE code. + ZstdBlockSequenceFseCode, + /// sequence bitstream for recovering instructions + ZstdBlockSequenceData, } impl ZstdTag { @@ -214,17 +181,11 @@ impl ZstdTag { Self::FrameHeaderDescriptor => false, Self::FrameContentSize => false, Self::BlockHeader => false, - Self::RawBlockBytes => true, - Self::RleBlockBytes => true, Self::ZstdBlockLiteralsHeader => false, Self::ZstdBlockLiteralsRawBytes => false, - Self::ZstdBlockLiteralsRleBytes => false, - Self::ZstdBlockFseCode => false, - Self::ZstdBlockHuffmanCode => false, - Self::ZstdBlockJumpTable => false, - Self::ZstdBlockLstream => false, Self::ZstdBlockSequenceHeader => false, - // TODO: more tags + Self::ZstdBlockSequenceFseCode => false, + Self::ZstdBlockSequenceData => true, } } @@ -235,17 +196,11 @@ impl ZstdTag { Self::FrameHeaderDescriptor => false, Self::FrameContentSize => false, Self::BlockHeader => false, - Self::RawBlockBytes => true, - Self::RleBlockBytes => true, Self::ZstdBlockLiteralsHeader => true, Self::ZstdBlockLiteralsRawBytes => true, - Self::ZstdBlockLiteralsRleBytes => true, - Self::ZstdBlockFseCode => true, - Self::ZstdBlockHuffmanCode => true, - Self::ZstdBlockJumpTable => true, - Self::ZstdBlockLstream => true, Self::ZstdBlockSequenceHeader => true, - // TODO: more tags + Self::ZstdBlockSequenceFseCode => true, + Self::ZstdBlockSequenceData => true, } } @@ -254,19 +209,13 @@ impl ZstdTag { match self { Self::Null => false, Self::FrameHeaderDescriptor => false, - Self::FrameContentSize => true, - Self::BlockHeader => true, - Self::RawBlockBytes => false, - Self::RleBlockBytes => false, + Self::FrameContentSize => false, + Self::BlockHeader => false, Self::ZstdBlockLiteralsHeader => false, Self::ZstdBlockLiteralsRawBytes => false, - Self::ZstdBlockLiteralsRleBytes => false, - Self::ZstdBlockFseCode => false, - Self::ZstdBlockHuffmanCode => true, - Self::ZstdBlockJumpTable => false, - Self::ZstdBlockLstream => true, Self::ZstdBlockSequenceHeader => false, - // TODO: more tags + Self::ZstdBlockSequenceFseCode => false, + Self::ZstdBlockSequenceData => true, } } } @@ -280,7 +229,7 @@ impl From for usize { } /// FSE table variants that we observe in the sequences section. -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, PartialEq)] #[allow(clippy::upper_case_acronyms)] pub enum FseTableKind { /// Literal length FSE table. @@ -300,16 +249,11 @@ impl ToString for ZstdTag { Self::FrameHeaderDescriptor => "FrameHeaderDescriptor", Self::FrameContentSize => "FrameContentSize", Self::BlockHeader => "BlockHeader", - Self::RawBlockBytes => "RawBlockBytes", - Self::RleBlockBytes => "RleBlockBytes", Self::ZstdBlockLiteralsHeader => "ZstdBlockLiteralsHeader", Self::ZstdBlockLiteralsRawBytes => "ZstdBlockLiteralsRawBytes", - Self::ZstdBlockLiteralsRleBytes => "ZstdBlockLiteralsRleBytes", - Self::ZstdBlockFseCode => "ZstdBlockFseCode", - Self::ZstdBlockHuffmanCode => "ZstdBlockHuffmanCode", - Self::ZstdBlockJumpTable => "ZstdBlockJumpTable", - Self::ZstdBlockLstream => "ZstdBlockLstream", Self::ZstdBlockSequenceHeader => "ZstdBlockSequenceHeader", + Self::ZstdBlockSequenceFseCode => "ZstdBlockSequenceFseCode", + Self::ZstdBlockSequenceData => "ZstdBlockSequenceData", }) } } @@ -318,6 +262,7 @@ impl ToString for ZstdTag { pub struct ZstdState { pub tag: ZstdTag, pub tag_next: ZstdTag, + pub block_idx: u64, pub max_tag_len: u64, pub tag_len: u64, pub tag_idx: u64, @@ -334,6 +279,7 @@ impl Default for ZstdState { Self { tag: ZstdTag::Null, tag_next: ZstdTag::FrameHeaderDescriptor, + block_idx: 0, max_tag_len: 0, tag_len: 0, tag_idx: 0, @@ -390,137 +336,28 @@ pub struct DecodedData { pub decoded_value_rlc: Value, } -#[derive(Clone, Debug, Default)] -pub struct HuffmanData { - pub byte_offset: u64, - pub bit_value: u8, - pub stream_idx: usize, - pub k: (u8, u8), -} - -/// Witness to the HuffmanCodesTable. -#[derive(Clone, Debug)] -pub struct HuffmanCodesData { - /// The byte offset in the frame at which the FSE table is described. - pub byte_offset: u64, - /// A mapping of symbol to the weight assigned to it as per canonical Huffman coding. The - /// symbol is the raw byte that is encoded using a Huffman code and the weight assigned to it - /// is a symbol emitted by the corresponding FSE table. - pub weights: Vec, -} - -/// Denotes the tuple (max_bitstring_len, Map). -type ParsedCanonicalHuffmanCode = (u64, BTreeMap); -/// A representation indexed by bitstring (String) as key for decoding symbols specifically. -/// Huffman code decoding ensures prefix code, thus the explicit articulation of bitstring is -/// necessary. -type ParsedCanonicalHuffmanCodeBitstringMap = (u64, HashMap); - -impl HuffmanCodesData { - /// Reconstruct the bitstrings for each symbol based on the canonical Huffman code weights. The - /// returned value is tuple of max bitstring length and a map from symbol to its weight and bit - /// value. - pub fn parse_canonical(&self) -> ParsedCanonicalHuffmanCode { - let sum_weights: u64 = self - .weights - .iter() - .map(|&weight| { - let weight: usize = weight.into(); - if weight > 0 { - 1 << (weight - 1) - } else { - 0 - } - }) - .sum(); - - // Calculate the last symbol's weight and append it. - let max_bitstring_len = bit_length(sum_weights); - let nearest_pow2 = 1 << max_bitstring_len; - let last_weight = ((nearest_pow2 - sum_weights) as f64).log2() as u64; - let weights = self - .weights - .iter() - .map(|&weight| weight as u64) - .chain(std::iter::once(last_weight)) - .collect::>(); - - let mut sym_to_tuple = BTreeMap::new(); - let mut bit_value = 0; - for l in (0..=max_bitstring_len).rev() { - bit_value = (bit_value + 1) >> 1; - weights - .iter() - .enumerate() - .filter(|(_symbol, &weight)| max_bitstring_len - weight + 1 == l) - .for_each(|(symbol, &weight)| { - sym_to_tuple.insert(symbol as u64, (weight, bit_value)); - bit_value += 1; - }); - } - - // populate symbols that don't occur in the Huffman code. - weights - .iter() - .enumerate() - .filter(|(_, &weight)| weight == 0) - .for_each(|(sym, _)| { - sym_to_tuple.insert(sym as u64, (0, 0)); - }); - - (max_bitstring_len, sym_to_tuple) - } - - /// parse bit string map - pub fn parse_bitstring_map(&self) -> ParsedCanonicalHuffmanCodeBitstringMap { - let mut weights: Vec = self.weights.iter().map(|w| *w as usize).collect(); - let sum_weights: usize = weights - .iter() - .filter_map(|&w| if w > 0 { Some(1 << (w - 1)) } else { None }) - .sum(); - - let nearest_pow_2: usize = 1 << (sum_weights - 1).next_power_of_two().trailing_zeros(); - weights.push(f64::log2((nearest_pow_2 - sum_weights) as f64).ceil() as usize + 1); - let max_number_of_bits = nearest_pow_2.trailing_zeros() as usize; - let n = weights.len(); - - let bitstring_length: Vec = weights - .iter() - .map(|&w| { - if w != 0 { - max_number_of_bits - w + 1 - } else { - 0 - } - }) - .collect(); - - let mut bitstring_map = HashMap::new(); - let mut cur_bit_value = 0; - - for bit_len in (1..=max_number_of_bits).rev() { - cur_bit_value += 1; - cur_bit_value >>= 1; - - for (sym, b_len) in bitstring_length.iter().enumerate().take(n) { - if *b_len == bit_len { - bitstring_map.insert( - format!("{:0width$b}", cur_bit_value, width = bit_len), - sym as u64, - ); - cur_bit_value += 1; - } - } - } - - let max_bitstring_len = bitstring_map - .keys() - .map(|k| k.len()) - .max() - .expect("Keys have maximum len"); - - (max_bitstring_len as u64, bitstring_map) - } +/// FSE decoding data from witness generation +#[derive(Clone, Debug, Default, PartialEq)] +pub struct FseDecodingRow { + /// The FSE table that is being decoded. Possible values are: + /// - LLT = 1, MOT = 2, MLT = 3 + pub table_kind: u64, + /// The number of states in the FSE table. table_size == 1 << AL, where AL is the accuracy log + /// of the FSE table. + pub table_size: u64, + /// The symbol emitted by the FSE table at this state. + pub symbol: u64, + /// During FSE table decoding, keep track of the number of symbol emitted + pub num_emitted: u64, + /// The value decoded as per variable bit-packing. + pub value_decoded: u64, + /// An accumulator of the number of states allocated to each symbol as we decode the FSE table. + /// This is the normalised probability for the symbol. + pub probability_acc: u64, + /// Whether we are in the repeat bits loop. + pub is_repeat_bits_loop: bool, + /// Whether this row represents the 0-7 trailing bits that should be ignored. + pub is_trailing_bits: bool, } /// A single row in the FSE table. @@ -553,6 +390,196 @@ pub struct BitstreamReadRow { pub bit_value: u64, /// Whether 0 bit is read pub is_zero_bit_read: bool, + /// Indicator for when sequence data bitstream initial baselines are determined + pub is_seq_init: bool, + /// Idx of sequence instruction + pub seq_idx: usize, + /// The states (LLT, MLT, MOT) at this row + pub states: [u64; 3], + /// The symbols emitted at this state (LLT, MLT, MOT) + pub symbols: [u64; 3], + /// The values computed for literal length, match length and match offset. + pub values: [u64; 3], + /// The baseline value associated with this state. + pub baseline: u64, + /// Whether current byte is completely covered in a multi-byte packing scheme + pub is_nil: bool, + /// Indicate which exact state is the bitstring value is for + /// 1. MOT Code to Value + /// 2. MLT Code to Value + /// 3. LLT Code to Value + /// 4. LLT FSE update + /// 5. MLT FSE update + /// 6. MOT FSE update + pub is_update_state: u64, +} + +/// Sequence data is interleaved with 6 bitstreams. Each producing a different type of value. +#[derive(Clone, Copy, Debug)] +pub enum SequenceDataTag { + Null = 0, + LiteralLengthFse, + MatchLengthFse, + CookedMatchOffsetFse, + LiteralLengthValue, + MatchLengthValue, + CookedMatchOffsetValue, +} + +/// A single row in the Address table. +#[derive(Clone, Debug, Default, PartialEq)] +pub struct AddressTableRow { + /// Whether this row is padding for positional alignment with input + pub s_padding: u64, + /// Instruction Index + pub instruction_idx: u64, + /// Literal Length (directly decoded from sequence bitstream) + pub literal_length: u64, + /// Cooked Match Offset (directly decoded from sequence bitstream) + pub cooked_match_offset: u64, + /// Match Length (directly decoded from sequence bitstream) + pub match_length: u64, + /// Accumulation of literal length + pub literal_length_acc: u64, + /// Repeated offset 1 + pub repeated_offset1: u64, + /// Repeated offset 2 + pub repeated_offset2: u64, + /// Repeated offset 3 + pub repeated_offset3: u64, + /// The actual match offset derived from cooked match offset + pub actual_offset: u64, +} + +impl AddressTableRow { + + /// a debug helper, input datas in the form of example in + /// zstd spec: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#repeat-offsets + /// i.e. [offset, literal, rep_1, rep_2, rep_3] + #[cfg(test)] + pub fn mock_samples(samples: &[[u64;5]]) -> Vec { + let mut ret = Vec::::new(); + + for sample in samples { + let mut new_item = Self { + cooked_match_offset: sample[0], + literal_length: sample[1], + repeated_offset1: sample[2], + repeated_offset2: sample[3], + repeated_offset3: sample[4], + actual_offset: sample[2], + ..Default::default() + }; + + if let Some(old_item) = ret.last() { + new_item.instruction_idx = old_item.instruction_idx + 1; + new_item.literal_length_acc = old_item.literal_length_acc + sample[1]; + } else { + new_item.literal_length_acc = sample[1]; + } + + ret.push(new_item); + } + + ret + } +} + +/// Data for BL and Number of Bits for a state in LLT, CMOT and MLT +#[derive(Clone, Debug)] +pub struct SequenceFixedStateActionTable { + /// Represent the state, BL and NB + pub states_to_actions: Vec<(u64, (u64, u64))>, +} + +impl SequenceFixedStateActionTable { + /// Reconstruct action state table for literal length recovery + pub fn reconstruct_lltv() -> Self { + let mut states_to_actions = vec![]; + + for idx in 0..=15 { + states_to_actions.push((idx as u64, (idx as u64, 0u64))) + } + + let rows: Vec<(u64, u64, u64)> = vec![ + (16, 16, 1), + (17, 18, 1), + (18, 20, 1), + (19, 22, 1), + (20, 24, 2), + (21, 28, 2), + (22, 32, 3), + (23, 40, 3), + (24, 48, 4), + (25, 64, 6), + (26, 128, 7), + (27, 256, 8), + (28, 512, 9), + (29, 1024, 10), + (30, 2048, 11), + (31, 4096, 12), + (32, 8192, 13), + (33, 16384, 14), + (34, 32768, 15), + (35, 65536, 16), + ]; + + for row in rows { + states_to_actions.push((row.0, (row.1, row.2))); + } + + Self { states_to_actions } + } + + /// Reconstruct action state table for match length recovery + pub fn reconstruct_mltv() -> Self { + let mut states_to_actions = vec![]; + + for idx in 0..=31 { + states_to_actions.push((idx as u64, (idx as u64 + 3, 0u64))) + } + + let rows: Vec<(u64, u64, u64)> = vec![ + (32, 35, 1), + (33, 37, 1), + (34, 39, 1), + (35, 41, 1), + (36, 43, 2), + (37, 47, 2), + (38, 51, 3), + (39, 59, 3), + (40, 67, 4), + (41, 83, 4), + (42, 99, 5), + (43, 131, 7), + (44, 259, 8), + (45, 515, 9), + (46, 1027, 10), + (47, 2051, 11), + (48, 4099, 12), + (49, 8195, 13), + (50, 16387, 14), + (51, 32771, 15), + (52, 65539, 16), + ]; + + for row in rows { + states_to_actions.push((row.0, (row.1, row.2))); + } + + Self { states_to_actions } + } + + /// Reconstruct action state table for offset recovery + pub fn reconstruct_cmotv(n: u64) -> Self { + let mut states_to_actions = vec![]; + + for idx in 0..=n { + states_to_actions.push((idx, ((1 << idx) as u64, idx))) + } + + Self { states_to_actions } + } } /// Data for the FSE table's witness values. @@ -571,10 +598,15 @@ pub struct FseTableData { pub struct FseAuxiliaryTableData { /// The block index in which this FSE table appears. pub block_idx: u64, + /// Indicates whether the table is pre-defined. + pub is_predefined: bool, /// The FSE table kind, variants are: LLT=1, MOT=2, MLT=3. pub table_kind: FseTableKind, /// The FSE table's size, i.e. 1 << AL (accuracy log). pub table_size: u64, + /// Normalized probability, + /// Used to indicate actual probability frequency of symbols, with 0 and -1 symbols present + pub normalised_probs: BTreeMap, /// A map from FseSymbol (weight) to states, also including fields for that state, for /// instance, the baseline and the number of bits to read from the FSE bitstream. /// @@ -589,7 +621,7 @@ pub struct FseAuxiliaryTableData { /// This representation makes it easy to look up decoded symbol from current state. /// Map. type FseStateMapping = BTreeMap; -type ReconstructedFse = (usize, Vec<(u32, u64)>, FseAuxiliaryTableData); +type ReconstructedFse = (usize, Vec<(u32, u64, u64)>, FseAuxiliaryTableData); impl FseAuxiliaryTableData { /// While we reconstruct an FSE table from a bitstream, we do not know before reconstruction @@ -605,11 +637,12 @@ impl FseAuxiliaryTableData { block_idx: u64, table_kind: FseTableKind, byte_offset: usize, + is_predefined: bool, ) -> std::io::Result { // construct little-endian bit-reader. let data = src.iter().skip(byte_offset).cloned().collect::>(); let mut reader = BitReader::endian(Cursor::new(&data), LittleEndian); - let mut bit_boundaries: Vec<(u32, u64)> = vec![]; + let mut bit_boundaries: Vec<(u32, u64, u64)> = vec![]; // number of bits read by the bit-reader from the bistream. let mut offset = 0; @@ -618,7 +651,7 @@ impl FseAuxiliaryTableData { offset += 4; reader.read::(offset)? + 5 }; - bit_boundaries.push((offset, accuracy_log as u64 - 5)); + bit_boundaries.push((offset, accuracy_log as u64 - 5, accuracy_log as u64 - 5)); let table_size = 1 << accuracy_log; //////////////////////////////////////////////////////////////////////////////////////// @@ -627,75 +660,111 @@ impl FseAuxiliaryTableData { let mut normalised_probs = BTreeMap::new(); let mut R = table_size; let mut symbol = 0; - while R > 0 { - // number of bits and value read from the variable bit-packed data. - // And update the total number of bits read so far. - let (n_bits_read, value) = read_variable_bit_packing(&data, offset, R + 1)?; - reader.skip(n_bits_read)?; - offset += n_bits_read; - bit_boundaries.push((offset, value)); - - // Number of states allocated to this symbol. - // - prob=-1 => 1 - // - prob=0 => 0 - // - prob>=1 => prob - let N = match value { - 0 => 1, - _ => value - 1, - }; - // When a symbol has a value==0, it signifies a case of prob=-1 (or probability "less - // than 1"), where such symbols are allocated states from the end and retreating. In - // such cases, we reset the FSE state, i.e. read accuracy_log number of bits from the - // bitstream with a baseline==0x00. - if value == 0 { - normalised_probs.insert(symbol, -1); - symbol += 1; + if is_predefined { + let predefined_frequencies = match table_kind { + FseTableKind::LLT => { + vec![ + 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 2, 1, 1, 1, 1, 1, -1, -1, -1, -1, + ] + } + FseTableKind::MOT => { + vec![ + 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, + -1, -1, -1, -1, + ] + } + FseTableKind::MLT => { + vec![ + 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, + -1, -1, -1, -1, + ] + } + _ => unreachable!("Invalid table type."), + }; + for (symbol, freq) in predefined_frequencies.into_iter().enumerate() { + normalised_probs.insert(symbol as u64, freq); } + } else { + while R > 0 { + // number of bits and value read from the variable bit-packed data. + // And update the total number of bits read so far. + let (n_bits_read, value_read, value_decoded) = read_variable_bit_packing(&data, offset, R + 1)?; + reader.skip(n_bits_read)?; + offset += n_bits_read; + bit_boundaries.push((offset, value_read, value_decoded)); + + // Number of states allocated to this symbol. + // - prob=-1 => 1 + // - prob=0 => 0 + // - prob>=1 => prob + let N = match value_decoded { + 0 => 1, + _ => value_decoded - 1, + }; + + // When a symbol has a value==0, it signifies a case of prob=-1 (or probability + // "less than 1"), where such symbols are allocated states from the + // end and retreating. In such cases, we reset the FSE state, i.e. + // read accuracy_log number of bits from the bitstream with a + // baseline==0x00. + if value_decoded == 0 { + normalised_probs.insert(symbol, -1); + symbol += 1; + } - // When a symbol has a value==1 (prob==0), it is followed by a 2-bits repeat flag. This - // repeat flag tells how many probabilities of zeroes follow the current one. It - // provides a number ranging from 0 to 3. If it is a 3, another 2-bits repeat flag - // follows, and so on. - if value == 1 { - normalised_probs.insert(symbol, 0); - symbol += 1; - loop { - let repeat_bits = reader.read::(2)?; - offset += 2; - bit_boundaries.push((offset, repeat_bits as u64)); - - for k in 0..repeat_bits { - normalised_probs.insert(symbol + (k as u64), 0); - } - symbol += repeat_bits as u64; + // When a symbol has a value==1 (prob==0), it is followed by a 2-bits repeat flag. + // This repeat flag tells how many probabilities of zeroes follow + // the current one. It provides a number ranging from 0 to 3. If it + // is a 3, another 2-bits repeat flag follows, and so on. + if value_decoded == 1 { + normalised_probs.insert(symbol, 0); + symbol += 1; + loop { + let repeat_bits = reader.read::(2)?; + offset += 2; + bit_boundaries.push((offset, repeat_bits as u64, repeat_bits as u64)); + + for k in 0..repeat_bits { + normalised_probs.insert(symbol + (k as u64), 0); + } + symbol += repeat_bits as u64; - if repeat_bits < 3 { - break; + if repeat_bits < 3 { + break; + } } } - } - // When a symbol has a value>1 (prob>=1), it is allocated that many number of states in - // the FSE table. - if value > 1 { - normalised_probs.insert(symbol, N as i32); - symbol += 1; - } + // When a symbol has a value>1 (prob>=1), it is allocated that many number of states + // in the FSE table. + if value_decoded > 1 { + normalised_probs.insert(symbol, N as i32); + symbol += 1; + } - // remove N slots from a total of R. - R -= N; + // remove N slots from a total of R. + R -= N; + } } // ignore any bits left to be read until byte-aligned. - let t = (((offset as usize) - 1) / N_BITS_PER_BYTE) + 1; + let t = if is_predefined { + 0 + } else { + (((offset as usize) - 1) / N_BITS_PER_BYTE) + 1 + }; // read the trailing section if t * N_BITS_PER_BYTE > (offset as usize) { let bits_remaining = t * N_BITS_PER_BYTE - offset as usize; + let trailing_value = reader.read::(bits_remaining as u32)? as u64; bit_boundaries.push(( offset + bits_remaining as u32, - reader.read::(bits_remaining as u32)? as u64, + trailing_value, + trailing_value, )); } @@ -716,11 +785,17 @@ impl FseAuxiliaryTableData { Ok(( t, - bit_boundaries, + if is_predefined { + vec![] + } else { + bit_boundaries + }, Self { block_idx, + is_predefined, table_kind, table_size, + normalised_probs, sym_to_states, sym_to_sorted_states, }, @@ -876,10 +951,8 @@ pub struct ZstdWitnessRow { pub encoded_data: EncodedData, /// Data on decompressed data pub decoded_data: DecodedData, - /// Huffman code bitstring marker that devides bitstream into symbol segments - pub huffman_data: HuffmanData, /// Fse decoding state transition data - pub fse_data: FseTableRow, + pub fse_data: FseDecodingRow, /// Bitstream reader pub bitstream_read_data: BitstreamReadRow, } @@ -894,8 +967,7 @@ impl ZstdWitnessRow { ..Default::default() }, decoded_data: DecodedData::default(), - huffman_data: HuffmanData::default(), - fse_data: FseTableRow::default(), + fse_data: FseDecodingRow::default(), bitstream_read_data: BitstreamReadRow::default(), } } @@ -917,7 +989,7 @@ mod tests { let src = vec![0xff, 0xff, 0xff, 0x30, 0x6f, 0x9b, 0x03, 0xff, 0xff, 0xff]; let (n_bytes, _bit_boundaries, table) = - FseAuxiliaryTableData::reconstruct(&src, 1, FseTableKind::LLT, 3)?; + FseAuxiliaryTableData::reconstruct(&src, 1, FseTableKind::LLT, 3, false)?; // TODO: assert equality for the entire table. // for now only comparing state/baseline/nb for S1, i.e. weight == 1. @@ -1035,105 +1107,13 @@ mod tests { 0x21, 0x9d, 0x51, 0xcc, 0x18, 0x42, 0x44, 0x81, 0x8c, 0x94, 0xb4, 0x50, 0x1e, ]; - let (n_bytes, _bit_boundaries, table) = - FseAuxiliaryTableData::reconstruct(&src, 1, FseTableKind::LLT, 0)?; - let parsed_state_map = table.parse_state_table(); + let (_n_bytes, _bit_boundaries, table) = + FseAuxiliaryTableData::reconstruct(&src, 0, FseTableKind::LLT, 0, false)?; + let _parsed_state_map = table.parse_state_table(); + // witgen_debug // TODO: assertions Ok(()) } - - #[test] - fn test_huffman_bitstring_reconstruction() -> std::io::Result<()> { - let weights = vec![ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 6, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 3, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, - 1, 2, 0, 0, 0, 2, 0, 1, 1, 1, 1, 1, 0, 0, 1, 2, 1, 0, 1, 1, 1, 2, 0, 0, 1, 1, 1, 1, 0, - 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 5, 3, 3, 3, 6, 3, 2, 4, 4, 0, 1, 4, 4, 5, 5, 2, 0, 4, 4, - 5, 3, 1, 3, 1, 3, - ] - .into_iter() - .map(FseSymbol::from) - .collect::>(); - - let huffman_codes_data = HuffmanCodesData { - byte_offset: 0, - weights, - }; - - let (max_bitstring_len, bitstring_map) = huffman_codes_data.parse_bitstring_map(); - - let expected_bitstrings: [(&str, u64); 53] = [ - ("01001", 10), - ("110", 32), - ("00000000", 33), - ("0001100", 39), - ("001010", 44), - ("0001101", 46), - ("00000001", 50), - ("00000010", 58), - ("0001110", 59), - ("0001111", 63), - ("00000011", 65), - ("00000100", 66), - ("00000101", 67), - ("00000110", 68), - ("00000111", 69), - ("00001000", 72), - ("0010000", 73), - ("00001001", 74), - ("00001010", 76), - ("00001011", 77), - ("00001100", 78), - ("0010001", 79), - ("00001101", 82), - ("00001110", 83), - ("00001111", 84), - ("00010000", 85), - ("00010001", 87), - ("00010010", 91), - ("00010011", 93), - ("1000", 97), - ("001011", 98), - ("001100", 99), - ("001101", 100), - ("111", 101), - ("001110", 102), - ("0010010", 103), - ("01010", 104), - ("01011", 105), - ("00010100", 107), - ("01100", 108), - ("01101", 109), - ("1001", 110), - ("1010", 111), - ("0010011", 112), - ("01110", 114), - ("01111", 115), - ("1011", 116), - ("001111", 117), - ("00010101", 118), - ("010000", 119), - ("00010110", 120), - ("010001", 121), - ("00010111", 122), - ]; - - assert_eq!(max_bitstring_len, 8, "max bitstring len is 8"); - assert_eq!( - expected_bitstrings.len(), - bitstring_map.len(), - "# of bitstring is the same" - ); - for pair in expected_bitstrings { - assert_eq!( - *bitstring_map.get(pair.0).unwrap(), - pair.1, - "bitstring mapping is correct" - ); - } - - Ok(()) - } } diff --git a/aggregator/src/aggregation/decoder/witgen/util.rs b/aggregator/src/aggregation/decoder/witgen/util.rs index 07cacb3f12..7e08820fe8 100644 --- a/aggregator/src/aggregation/decoder/witgen/util.rs +++ b/aggregator/src/aggregation/decoder/witgen/util.rs @@ -2,7 +2,7 @@ use bitstream_io::{ read::{BitRead, BitReader}, LittleEndian, }; -use std::io::{Cursor, Result}; +use std::io::{Cursor, Result, Read}; use super::N_BITS_PER_BYTE; @@ -15,7 +15,7 @@ use super::N_BITS_PER_BYTE; /// - v: the read value that is in the range 0..=r /// /// [doclink]: https://www.rfc-editor.org/rfc/rfc8478.txt -pub fn read_variable_bit_packing(src: &[u8], offset: u32, r: u64) -> Result<(u32, u64)> { +pub fn read_variable_bit_packing(src: &[u8], offset: u32, r: u64) -> Result<(u32, u64, u64)> { // construct a bit-reader. let mut reader = BitReader::endian(Cursor::new(&src), LittleEndian); @@ -25,9 +25,8 @@ pub fn read_variable_bit_packing(src: &[u8], offset: u32, r: u64) -> Result<(u32 // if there is no need for variable bit-packing, i.e. if the range is 0..=(2^k - 1) if r + 1 == max { - reader.skip(offset)?; let value = reader.read::(size)?; - return Ok((size, value)); + return Ok((size, value, value)); } // lo_pin denotes the pin where if the value read is below the pin, its considered a low value @@ -52,19 +51,18 @@ pub fn read_variable_bit_packing(src: &[u8], offset: u32, r: u64) -> Result<(u32 // // - value : the value denoted by size-bits. // - lo_value : the value denoted by the low (size-1)-bits. - reader.skip(offset)?; let value = reader.read::(size)?; let lo_value = value & ((1 << (size - 1)) - 1); Ok(if (0..lo_pin).contains(&lo_value) { - (size - 1, lo_value) + (size - 1, lo_value, lo_value) } else if (lo_pin..hi_pin_1).contains(&value) { - (size, value) + (size, value, value) } else if (hi_pin_1..hi_pin_2).contains(&value) { - (size - 1, value - hi_pin_1) + (size - 1, lo_value, value - hi_pin_1) } else { assert!((hi_pin_2..(1 << size)).contains(&value)); - (size, value - lo_pin) + (size, value, value - lo_pin) }) } @@ -85,18 +83,21 @@ pub fn smaller_powers_of_two(sum: u64, n: u64) -> (usize, Vec) { } let next_pow2 = 1 << bit_length(n); - let diff = next_pow2 - n; + let mut diff = (next_pow2 - n) as usize; let smallest_spot = sum / next_pow2; let smallest_exponent = (smallest_spot as f64).log2() as u64; - ( - diff as usize, - std::iter::repeat(smallest_exponent + 1) - .take(diff as usize) - .chain(std::iter::repeat(smallest_exponent)) - .take(n as usize) - .collect(), - ) + let pows: Vec = std::iter::repeat(smallest_exponent + 1) + .take(diff as usize) + .chain(std::iter::repeat(smallest_exponent)) + .take(n as usize) + .collect(); + + if diff >= pows.len() { + diff = 0; + } + + (diff, pows) } // Returns the number of bits needed to represent a u32 value in binary form. @@ -165,9 +166,9 @@ mod tests { let src = vec![0x30, 0x6f, 0x9b, 0x03]; let offset = 4; let range = 32; - let (n_bits, value_read) = read_variable_bit_packing(&src, offset, range)?; + let (n_bits, value_read, value_decoded) = read_variable_bit_packing(&src, offset, range)?; assert_eq!(n_bits, 5); - assert_eq!(value_read, 19); + assert_eq!(value_decoded, 19); // case 2: // read in little-endian order: @@ -179,17 +180,17 @@ mod tests { let src = vec![0b10000000]; let offset = 6; let range = 3; - let (n_bits, value_read) = read_variable_bit_packing(&src, offset, range)?; + let (n_bits, value_read, value_decoded) = read_variable_bit_packing(&src, offset, range)?; assert_eq!(n_bits, 2); - assert_eq!(value_read, 2); + assert_eq!(value_decoded, 2); // case 3: let src = vec![0b11000000]; let offset = 6; let range = 2; - let (n_bits, value_read) = read_variable_bit_packing(&src, offset, range)?; + let (n_bits, value_read, value_decoded) = read_variable_bit_packing(&src, offset, range)?; assert_eq!(n_bits, 2); - assert_eq!(value_read, 2); + assert_eq!(value_decoded, 2); Ok(()) } diff --git a/zkevm-circuits/src/table.rs b/zkevm-circuits/src/table.rs index 73b556d9a6..e501ff0b7f 100644 --- a/zkevm-circuits/src/table.rs +++ b/zkevm-circuits/src/table.rs @@ -3286,7 +3286,7 @@ impl BitwiseOpTable { || "BitwiseOp table", |mut region| { let mut offset = 0; - for op in [BitwiseOp::AND, BitwiseOp::OR, BitwiseOp::XOR] { + for op in [BitwiseOp::AND/*, BitwiseOp::OR, BitwiseOp::XOR*/] { for [lhs, rhs, out] in (0..256).flat_map(move |lhs| { (0..256).map(move |rhs| { [