diff --git a/zkevm-circuits/src/decompression_circuit.rs b/zkevm-circuits/src/decompression_circuit.rs index aef0159776..99767d25ed 100644 --- a/zkevm-circuits/src/decompression_circuit.rs +++ b/zkevm-circuits/src/decompression_circuit.rs @@ -8,12 +8,28 @@ mod test; use std::marker::PhantomData; +use crate::{ + evm_circuit::util::constraint_builder::{BaseConstraintBuilder, ConstrainBuilderCommon}, + table::{ + decompression::{ + BitstringAccumulationTable, BlockTypeRomTable, DecodedLiteralsTable, FseTable, + HuffmanCodesTable, LiteralsHeaderRomTable, LiteralsHeaderTable, TagRomTable, + }, + BitwiseOpTable, KeccakTable, LookupTable, Pow2Table, PowOfRandTable, RangeTable, + }, + util::{Challenges, SubCircuit, SubCircuitConfig}, + witness::{ + process, value_bits_le, Block, FseAuxiliaryTableData, HuffmanCodesData, LstreamNum, + ZstdTag, ZstdWitnessRow, N_BITS_PER_BYTE, N_BITS_ZSTD_TAG, N_BLOCK_HEADER_BYTES, + N_JUMP_TABLE_BYTES, + }, +}; use array_init::array_init; use eth_types::Field; use gadgets::{ binary_number::{BinaryNumberChip, BinaryNumberConfig}, - comparator::{ComparatorChip, ComparatorConfig}, - less_than::{LtChip, LtConfig}, + comparator::{ComparatorChip, ComparatorConfig, ComparatorInstruction}, + less_than::{LtChip, LtConfig, LtInstruction}, util::{and, not, select, sum, Expr}, }; use halo2_proofs::{ @@ -24,22 +40,6 @@ use halo2_proofs::{ poly::Rotation, }; -use crate::{ - evm_circuit::util::constraint_builder::{BaseConstraintBuilder, ConstrainBuilderCommon}, - table::{ - decompression::{ - BitstringAccumulationTable, BlockTypeRomTable, FseTable, HuffmanCodesTable, - LiteralsHeaderRomTable, LiteralsHeaderTable, TagRomTable, - }, - KeccakTable, LookupTable, Pow2Table, PowOfRandTable, RangeTable, - }, - util::{Challenges, SubCircuit, SubCircuitConfig}, - witness::{ - Block, LstreamNum, ZstdTag, N_BITS_PER_BYTE, N_BITS_ZSTD_TAG, N_BLOCK_HEADER_BYTES, - N_JUMP_TABLE_BYTES, - }, -}; - /// Tables, challenge API used to configure the Decompression circuit. pub struct DecompressionCircuitConfigArgs { /// Challenge API. @@ -52,8 +52,18 @@ pub struct DecompressionCircuitConfigArgs { pub bs_acc_table: BitstringAccumulationTable, /// Lookup table to get regenerated and compressed size from LiteralsHeader. pub literals_header_table: LiteralsHeaderTable, + /// Lookup table to validate decoded literal bytes. + pub decoded_literals_table: DecodedLiteralsTable, + /// Bitwise OP table. + pub bitwise_op_table: BitwiseOpTable, + /// RangeTable for [0, 4). + pub range4: RangeTable<4>, /// RangeTable for [0, 8). pub range8: RangeTable<8>, + /// RangeTable for [0, 16). + pub range16: RangeTable<16>, + /// RangeTable for [0, 64). + pub range64: RangeTable<64>, /// RangeTable for [0; 128). pub range128: RangeTable<128>, /// U8 table, i.e. RangeTable for [0, 1 << 8). @@ -118,6 +128,25 @@ pub struct DecompressionCircuitConfig { fse_decoder: FseDecoder, /// Literal stream tag related configs. lstream_config: LstreamConfig, + + /// Internal Tables + bitwise_op_table: BitwiseOpTable, + range4: RangeTable<4>, + range8: RangeTable<8>, + range16: RangeTable<16>, + range64: RangeTable<64>, + range128: RangeTable<128>, + range256: RangeTable<256>, + tag_rom_table: TagRomTable, + pow_rand_table: PowOfRandTable, + block_type_rom_table: BlockTypeRomTable, + pow2_table: Pow2Table, + literals_header_rom_table: LiteralsHeaderRomTable, + literals_header_table: LiteralsHeaderTable, + bitstring_accumulation_table: BitstringAccumulationTable, + fse_table: FseTable, + huffman_codes_table: HuffmanCodesTable, + decoded_literals_table: DecodedLiteralsTable, } /// Block level details are specified in these columns. @@ -131,7 +160,7 @@ pub struct BlockGadget { block_len: Column, /// Boolean column to mark whether or not this is the last block. is_last_block: Column, - /// Check: block_idx <= block_len. + // Check: block_idx <= block_len. idx_cmp_len: ComparatorConfig, } @@ -140,7 +169,7 @@ pub struct BlockGadget { pub struct TagGadget { /// The zstd tag at the current row. tag: Column, - /// Helper gadget to construct equality constraints against the current tag. + // Helper gadget to construct equality constraints against the current tag. tag_bits: BinaryNumberConfig, /// The tag that follows once the current tag is done processing. tag_next: Column, @@ -170,13 +199,13 @@ pub struct TagGadget { /// value, however the tag_rlc always uses the keccak randomness. tag_rlc_acc: Column, /// Helper gadget to check whether max_len < 0x20. - mlen_lt_0x20: LtConfig, + mlen_lt_0x20: LtConfig, /// A boolean column to indicate that tag has been changed on this row. is_tag_change: Column, - /// Check: tag_idx <= tag_len. - idx_cmp_len: ComparatorConfig, - /// Check: tag_len <= max_len. - len_cmp_max: ComparatorConfig, + // Check: tag_idx <= tag_len. + idx_cmp_len: ComparatorConfig, + // Check: tag_len <= max_len. + len_cmp_max: ComparatorConfig, /// Helper column to reduce the circuit degree. Set when tag == BlockHeader. is_block_header: Column, /// Helper column to reduce the circuit degree. Set when tag == LiteralsHeader. @@ -233,6 +262,10 @@ struct HuffmanConfig { /// could span over two bytes. #[derive(Clone, Debug)] pub struct BitstreamDecoder { + /// Boolean that is set for the special case that we don't read from the bitstream, i.e. we + /// read 0 number of bits. This case can only occur while processing the + /// tag=ZstdBlockHuffmanCode. + is_nil: Column, /// The bit-index where the bittsring begins. 0 <= bit_index_start < 8. bit_index_start: Column, /// The bit-index where the bitstring ends. 0 <= bit_index_end < 16. @@ -353,7 +386,12 @@ impl SubCircuitConfig for DecompressionCircuitConfig { huffman_codes_table, bs_acc_table, literals_header_table, + decoded_literals_table, + bitwise_op_table, + range4, range8, + range16, + range64, range128, range256, pow2_table, @@ -462,6 +500,7 @@ impl SubCircuitConfig for DecompressionCircuitConfig { let bitstream_decoder = { let bit_index_end = meta.advice_column(); BitstreamDecoder { + is_nil: meta.advice_column(), bit_index_start: meta.advice_column(), bit_index_end, bitstring_contained: ComparatorChip::configure( @@ -522,7 +561,11 @@ impl SubCircuitConfig for DecompressionCircuitConfig { let mut cb = BaseConstraintBuilder::default(); // Boolean columns. - for col in [is_padding, block_gadget.is_last_block] { + for col in [ + is_padding, + block_gadget.is_last_block, + bitstream_decoder.is_nil, + ] { cb.require_boolean( "Boolean column check", meta.query_advice(col, Rotation::cur()), @@ -546,7 +589,7 @@ impl SubCircuitConfig for DecompressionCircuitConfig { }; } degree_reduction_check!(tag_gadget.is_block_header, is_block_header(meta)); - degree_reduction_check!(tag_gadget.is_literals_section, is_zb_literals_header(meta)); + degree_reduction_check!(tag_gadget.is_literals_header, is_zb_literals_header(meta)); degree_reduction_check!(tag_gadget.is_fse_code, is_zb_fse_code(meta)); degree_reduction_check!(tag_gadget.is_huffman_code, is_zb_huffman_code(meta)); degree_reduction_check!(tag_gadget.is_lstream, is_zb_lstream(meta)); @@ -608,8 +651,9 @@ impl SubCircuitConfig for DecompressionCircuitConfig { cb.require_boolean("every value bit is boolean", bit.expr()); } - let is_new_byte = meta.query_advice(byte_idx, Rotation::next()) - - meta.query_advice(byte_idx, Rotation::cur()); + let is_new_byte = meta.query_advice(byte_idx, Rotation::cur()) + - meta.query_advice(byte_idx, Rotation::prev()); + cb.require_boolean( "byte_idx' == byte_idx or byte_idx' == byte_idx + 1", is_new_byte.expr(), @@ -618,13 +662,13 @@ impl SubCircuitConfig for DecompressionCircuitConfig { cb.require_equal( "encoded length remains the same", meta.query_advice(encoded_len, Rotation::cur()), - meta.query_advice(encoded_len, Rotation::next()), + meta.query_advice(encoded_len, Rotation::prev()), ); cb.require_equal( "decoded length remains the same", meta.query_advice(decoded_len, Rotation::cur()), - meta.query_advice(decoded_len, Rotation::next()), + meta.query_advice(decoded_len, Rotation::prev()), ); cb.require_boolean( @@ -636,7 +680,7 @@ impl SubCircuitConfig for DecompressionCircuitConfig { cb.condition(not::expr(is_new_byte.expr()), |cb| { cb.require_equal( "value_byte' == value_byte if not a new byte", - meta.query_advice(value_byte, Rotation::next()), + meta.query_advice(value_byte, Rotation::prev()), meta.query_advice(value_byte, Rotation::cur()), ); }); @@ -645,8 +689,10 @@ impl SubCircuitConfig for DecompressionCircuitConfig { // on the next row iff: // - tag_idx == tag_len // - byte_idx' == byte_idx + 1 + let is_next_new_byte = meta.query_advice(byte_idx, Rotation::next()) + - meta.query_advice(byte_idx, Rotation::cur()); let (_, tidx_eq_tlen) = tag_gadget.idx_cmp_len.expr(meta, None); - cb.condition(and::expr([tidx_eq_tlen, is_new_byte]), |cb| { + cb.condition(and::expr([tidx_eq_tlen, is_next_new_byte]), |cb| { cb.require_equal( "is_tag_change should be set", meta.query_advice(tag_gadget.is_tag_change, Rotation::next()), @@ -656,7 +702,7 @@ impl SubCircuitConfig for DecompressionCircuitConfig { cb.gate(and::expr([ meta.query_fixed(q_enable, Rotation::cur()), - not::expr(meta.query_advice(is_padding, Rotation::cur())), + not::expr(meta.query_fixed(q_first, Rotation::cur())), ])) }); @@ -752,7 +798,7 @@ impl SubCircuitConfig for DecompressionCircuitConfig { cb.gate(and::expr([ meta.query_fixed(q_enable, Rotation::cur()), - not::expr(meta.query_advice(is_padding, Rotation::cur())), + not::expr(meta.query_fixed(q_first, Rotation::cur())), meta.query_advice(tag_gadget.is_tag_change, Rotation::cur()), ])) }); @@ -797,6 +843,7 @@ impl SubCircuitConfig for DecompressionCircuitConfig { let tag_value_acc_prev = meta.query_advice(tag_gadget.tag_value_acc, Rotation::prev()); let value_byte_curr = meta.query_advice(value_byte, Rotation::cur()); + cb.require_equal( "tag_value calculation depending on whether new byte", meta.query_advice(tag_gadget.tag_value_acc, Rotation::cur()), @@ -812,7 +859,7 @@ impl SubCircuitConfig for DecompressionCircuitConfig { cb.condition(not::expr(is_new_byte.expr()), |cb| { cb.require_equal( "tag_rlc_acc remains the same if not a new byte", - meta.query_advice(tag_gadget.tag_rlc_acc, Rotation::next()), + meta.query_advice(tag_gadget.tag_rlc_acc, Rotation::prev()), meta.query_advice(tag_gadget.tag_rlc_acc, Rotation::cur()), ); }); @@ -828,6 +875,7 @@ impl SubCircuitConfig for DecompressionCircuitConfig { ); }, ); + let value_byte_prev = meta.query_advice(value_byte, Rotation::prev()); cb.condition(and::expr([is_new_byte, is_reverse]), |cb| { cb.require_equal( @@ -841,11 +889,12 @@ impl SubCircuitConfig for DecompressionCircuitConfig { cb.gate(and::expr([ meta.query_fixed(q_enable, Rotation::cur()), - not::expr(meta.query_advice(is_padding, Rotation::cur())), + not::expr(meta.query_fixed(q_first, Rotation::cur())), not::expr(meta.query_advice(tag_gadget.is_tag_change, Rotation::cur())), ])) }, ); + meta.lookup_any("DecompressionCircuit: randomness power tag_len", |meta| { let condition = and::expr([ meta.query_fixed(q_enable, Rotation::cur()), @@ -941,18 +990,15 @@ impl SubCircuitConfig for DecompressionCircuitConfig { meta.query_advice(byte_idx, Rotation::cur()), 1.expr(), ); - cb.require_equal( "tag == FrameHeaderDescriptor", meta.query_advice(tag_gadget.tag, Rotation::cur()), ZstdTag::FrameHeaderDescriptor.expr(), ); - cb.require_zero( "value_rlc starts at 0", meta.query_advice(value_rlc, Rotation::cur()), ); - cb.require_zero( "decoded_rlc initialises at 0", meta.query_advice(decoded_rlc, Rotation::cur()), @@ -1098,7 +1144,7 @@ impl SubCircuitConfig for DecompressionCircuitConfig { let fcs_flag1 = meta.query_advice(value_bits[6], Rotation::prev()); let fcs_tag_value = meta.query_advice(tag_gadget.tag_value, Rotation::cur()); let frame_content_size = select::expr( - and::expr([fcs_flag0, not::expr(fcs_flag1)]), + and::expr([not::expr(fcs_flag0), fcs_flag1]), 256.expr() + fcs_tag_value.expr(), fcs_tag_value, ); @@ -1107,7 +1153,6 @@ impl SubCircuitConfig for DecompressionCircuitConfig { frame_content_size, meta.query_advice(decoded_len, Rotation::cur()), ); - cb.gate(and::expr([ meta.query_fixed(q_enable, Rotation::cur()), meta.query_advice(tag_gadget.is_tag_change, Rotation::cur()), @@ -1122,16 +1167,17 @@ impl SubCircuitConfig for DecompressionCircuitConfig { ////////////////////////////////// ZstdTag::BlockHeader /////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////// + // TODO: Block constraints will be examined later // Note: We only verify the 1st row of BlockHeader for tag_value. + + /* meta.create_gate("DecompressionCircuit: BlockHeader", |meta| { let mut cb = BaseConstraintBuilder::default(); - cb.require_equal( "tag_len == 3", meta.query_advice(tag_gadget.tag_len, Rotation::cur()), N_BLOCK_HEADER_BYTES.expr(), ); - // The lowest bit (as per little-endian representation) is whether the block is the // last block in the frame or not. // @@ -1160,9 +1206,8 @@ impl SubCircuitConfig for DecompressionCircuitConfig { meta.query_advice(block_gadget.idx, Rotation(N_BLOCK_HEADER_BYTES as i32)), 1.expr(), ); - - // For Raw/RLE blocks, the block_len is equal to the tag_len. These blocks appear with - // block type 00 or 01, i.e. the block_type_bit1 is 0. + // For Raw/RLE blocks, the block_len is equal to the tag_len. These blocks appear + // with block type 00 or 01, i.e. the block_type_bit1 is 0. cb.condition(not::expr(block_type_bit1), |cb| { cb.require_equal( "Raw/RLE blocks: tag_len == block_len", @@ -1173,7 +1218,6 @@ impl SubCircuitConfig for DecompressionCircuitConfig { ), ); }); - // Validate that for an RLE block: value_byte == decoded_byte. cb.condition(block_type_bit0, |cb| { cb.require_equal( @@ -1182,28 +1226,28 @@ impl SubCircuitConfig for DecompressionCircuitConfig { meta.query_advice(decoded_byte, Rotation(N_BLOCK_HEADER_BYTES as i32)), ); }); - // If this wasn't the first block, then the previous block's last byte should have // block's idx == block length. // // This block is the first block iff the FrameContentSize tag precedes it. However we - // assume that the block_idx and block_len will be set to 0 for FrameContentSize as it - // is not part of a "block". + // assume that the block_idx and block_len will be set to 0 for FrameContentSize as + // it is not part of a "block". cb.require_equal( "block_idx::prev == block_len::prev", meta.query_advice(block_gadget.idx, Rotation::prev()), meta.query_advice(block_gadget.block_len, Rotation::prev()), ); - cb.gate(and::expr([ meta.query_fixed(q_enable, Rotation::cur()), meta.query_advice(tag_gadget.is_tag_change, Rotation::cur()), meta.query_advice(tag_gadget.is_block_header, Rotation::cur()), ])) }); + */ + + /* meta.create_gate("DecompressionCircuit: while processing a block", |meta| { let mut cb = BaseConstraintBuilder::default(); - // If byte_idx increments, then block_gadet.idx should also increment. cb.require_equal( "idx in block increments if byte_idx increments", @@ -1212,40 +1256,37 @@ impl SubCircuitConfig for DecompressionCircuitConfig { meta.query_advice(byte_idx, Rotation::next()) - meta.query_advice(byte_idx, Rotation::cur()), ); - cb.require_equal( "block_len remains unchanged", meta.query_advice(block_gadget.block_len, Rotation::next()), meta.query_advice(block_gadget.block_len, Rotation::cur()), ); - cb.require_equal( "is_last_block remains unchanged", meta.query_advice(block_gadget.is_last_block, Rotation::next()), meta.query_advice(block_gadget.is_last_block, Rotation::cur()), ); - cb.gate(and::expr([ meta.query_fixed(q_enable, Rotation::cur()), meta.query_advice(block_gadget.is_block, Rotation::cur()), meta.query_advice(block_gadget.is_block, Rotation::next()), ])) }); + */ + + /* meta.create_gate("DecompressionCircuit: handle end of other blocks", |meta| { let mut cb = BaseConstraintBuilder::default(); - cb.require_equal( "tag_next depending on whether or not this is the last block", meta.query_advice(tag_gadget.tag_next, Rotation::cur()), ZstdTag::BlockHeader.expr(), ); - cb.require_equal( "block_idx == block_len", meta.query_advice(block_gadget.idx, Rotation::cur()), meta.query_advice(block_gadget.block_len, Rotation::cur()), ); - let (_, idx_eq_len) = block_gadget.idx_cmp_len.expr(meta, None); cb.gate(and::expr([ meta.query_fixed(q_enable, Rotation::cur()), @@ -1254,27 +1295,26 @@ impl SubCircuitConfig for DecompressionCircuitConfig { not::expr(meta.query_advice(block_gadget.is_last_block, Rotation::cur())), ])) }); + */ + + /* meta.create_gate("DecompressionCircuit: handle end of last block", |meta| { let mut cb = BaseConstraintBuilder::default(); - cb.require_equal( "tag_next depending on whether or not this is the last block", meta.query_advice(tag_gadget.tag_next, Rotation::cur()), ZstdTag::Null.expr(), ); - cb.require_equal( "decoded_len has been reached if last block", meta.query_advice(decoded_len_acc, Rotation::cur()), meta.query_advice(decoded_len, Rotation::cur()), ); - cb.require_equal( "byte idx has reached the encoded len", meta.query_advice(byte_idx, Rotation::cur()), meta.query_advice(encoded_len, Rotation::cur()), ); - cb.require_equal( "block can end only on Raw/Rle/TODO tag", sum::expr([ @@ -1284,13 +1324,11 @@ impl SubCircuitConfig for DecompressionCircuitConfig { ]), 1.expr(), ); - cb.require_equal( "block_idx == block_len", meta.query_advice(block_gadget.idx, Rotation::cur()), meta.query_advice(block_gadget.block_len, Rotation::cur()), ); - let (_, idx_eq_len) = block_gadget.idx_cmp_len.expr(meta, None); cb.gate(and::expr([ meta.query_fixed(q_enable, Rotation::cur()), @@ -1299,6 +1337,9 @@ impl SubCircuitConfig for DecompressionCircuitConfig { idx_eq_len, ])) }); + */ + + /* meta.lookup( "DecompressionCircuit: BlockHeader (BlockSize == BlockHeader >> 3)", |meta| { @@ -1315,18 +1356,20 @@ impl SubCircuitConfig for DecompressionCircuitConfig { vec![(condition * range_value, range8.into())] }, ); + */ + meta.lookup_any( "DecompressionCircuit: lookup for tuple (block_type, tag_next)", |meta| { let condition = and::expr([ meta.query_fixed(q_enable, Rotation::cur()), - meta.query_advice(tag_gadget.is_tag_change, Rotation::cur()), meta.query_advice(tag_gadget.is_block_header, Rotation::cur()), + meta.query_advice(tag_gadget.is_tag_change, Rotation::cur()), ]); [ meta.query_advice(tag_gadget.tag, Rotation::cur()), - meta.query_advice(value_bits[6], Rotation(N_BLOCK_HEADER_BYTES as i32 - 1)), meta.query_advice(value_bits[5], Rotation(N_BLOCK_HEADER_BYTES as i32 - 1)), + meta.query_advice(value_bits[6], Rotation(N_BLOCK_HEADER_BYTES as i32 - 1)), meta.query_advice(tag_gadget.tag_next, Rotation::cur()), ] .into_iter() @@ -1402,8 +1445,8 @@ impl SubCircuitConfig for DecompressionCircuitConfig { |meta| { let mut cb = BaseConstraintBuilder::default(); - let block_type_bit0 = meta.query_advice(value_bits[7], Rotation::cur()); - let block_type_bit1 = meta.query_advice(value_bits[6], Rotation::cur()); + let block_type_bit0 = meta.query_advice(value_bits[0], Rotation::cur()); + let block_type_bit1 = meta.query_advice(value_bits[1], Rotation::cur()); cb.require_zero( "block type cannot be TREELESS, i.e. block_type == 3 not possible", block_type_bit0 * block_type_bit1, @@ -1416,18 +1459,19 @@ impl SubCircuitConfig for DecompressionCircuitConfig { ])) }, ); + meta.lookup_any( "DecompressionCircuit: lookup for tuple (zstd_block_type, tag_next)", |meta| { let condition = and::expr([ meta.query_fixed(q_enable, Rotation::cur()), - meta.query_advice(tag_gadget.is_tag_change, Rotation::cur()), meta.query_advice(tag_gadget.is_literals_header, Rotation::cur()), + meta.query_advice(tag_gadget.is_tag_change, Rotation::cur()), ]); [ meta.query_advice(tag_gadget.tag, Rotation::cur()), - meta.query_advice(value_bits[7], Rotation::cur()), - meta.query_advice(value_bits[6], Rotation::cur()), + meta.query_advice(value_bits[1], Rotation::cur()), + meta.query_advice(value_bits[0], Rotation::cur()), meta.query_advice(tag_gadget.tag_next, Rotation::cur()), ] .into_iter() @@ -1436,6 +1480,7 @@ impl SubCircuitConfig for DecompressionCircuitConfig { .collect() }, ); + meta.lookup_any( "DecompressionCircuit: lookup for LiteralsHeader decomposition", |meta| { @@ -1445,10 +1490,10 @@ impl SubCircuitConfig for DecompressionCircuitConfig { meta.query_advice(tag_gadget.is_literals_header, Rotation::cur()), ]); [ - meta.query_advice(value_bits[7], Rotation::cur()), // block type bit0 - meta.query_advice(value_bits[6], Rotation::cur()), // block type bit1 - meta.query_advice(value_bits[5], Rotation::cur()), // size format bit0 - meta.query_advice(value_bits[4], Rotation::cur()), // size format bit1 + meta.query_advice(value_bits[0], Rotation::cur()), // block type bit0 + meta.query_advice(value_bits[1], Rotation::cur()), // block type bit1 + meta.query_advice(value_bits[2], Rotation::cur()), // size format bit0 + meta.query_advice(value_bits[3], Rotation::cur()), // size format bit1 meta.query_advice(tag_gadget.tag_len, Rotation::cur()), // num bytes header meta.query_advice(lstream_config.lstream_kind, Rotation::cur()), // 1 or 4 meta.query_advice(literals_header.branch, Rotation::cur()), // branch @@ -1460,55 +1505,65 @@ impl SubCircuitConfig for DecompressionCircuitConfig { .collect() }, ); + meta.lookup_any( "DecompressionCircuit: lookup for LiteralsHeader regen/compr size", |meta| { let condition = and::expr([ meta.query_fixed(q_enable, Rotation::cur()), - meta.query_advice(tag_gadget.is_tag_change, Rotation::cur()), meta.query_advice(tag_gadget.is_literals_header, Rotation::cur()), + meta.query_advice(tag_gadget.is_tag_change, Rotation::cur()), ]); // Which branch are we taking in the literals header decomposition. let branch = meta.query_advice(literals_header.branch, Rotation::cur()); // Is it the case of zstd compressed block, i.e. block type == 0b10. Since we - // already know that block type == 0b11 (TREELESS) will not occur, we can skip the - // check for not::expr(value_bits[7]). - let is_compressed = meta.query_advice(value_bits[6], Rotation::cur()); + // already know that block type == 0b11 (TREELESS) will not occur, we can skip + // the check for not::expr(value_bits[0]). + let is_compressed = meta.query_advice(value_bits[1], Rotation::cur()); // Is the size format == 0b11. let is_size_format_0b11 = meta.query_advice(literals_header.sf_max, Rotation::cur()); + let size_format_bit0 = meta.query_advice(value_bits[2], Rotation::cur()); + let size_format_bit1 = meta.query_advice(value_bits[3], Rotation::cur()); + // Literals header is at least 1 byte for all branches. let byte0 = meta.query_advice(value_byte, Rotation::cur()); + // Literals header is at least 2 bytes for: + // - zstd compressed block + // - size format 01 / 11 for Raw/RLE block let byte1 = select::expr( is_compressed.expr(), meta.query_advice(value_byte, Rotation(1)), select::expr( - meta.query_advice(value_bits[5], Rotation::cur()), + size_format_bit0.expr(), meta.query_advice(value_byte, Rotation(1)), 0.expr(), ), ); + // Literals header is at least 3 bytes for: + // - zstd compressed block + // - size format 11 for Raw/RLE block let byte2 = select::expr( is_compressed.expr(), meta.query_advice(value_byte, Rotation(2)), select::expr( - meta.query_advice(value_bits[5], Rotation::cur()), + is_size_format_0b11.expr(), meta.query_advice(value_byte, Rotation(2)), 0.expr(), ), ); + // Literals header is at least 4 bytes for: + // - zstd compressed block with size format 10 / 11 let byte3 = select::expr( - is_compressed.expr(), - select::expr( - meta.query_advice(value_bits[5], Rotation::cur()), - meta.query_advice(value_byte, Rotation(3)), - 0.expr(), - ), + is_compressed.expr() * size_format_bit1.expr(), + meta.query_advice(value_byte, Rotation(3)), 0.expr(), ); + // Literals header is 5 bytes for: + // - zstd compressed block with size format 11 let byte4 = select::expr( is_compressed * is_size_format_0b11, meta.query_advice(value_byte, Rotation(4)), @@ -1523,8 +1578,8 @@ impl SubCircuitConfig for DecompressionCircuitConfig { byte2, // byte2 byte3, // byte3 byte4, // byte4 - meta.query_advice(literals_header.regen_size, Rotation::cur()), // regenerated size - meta.query_advice(literals_header.compr_size, Rotation::cur()), // compressed size + meta.query_advice(literals_header.regen_size, Rotation::cur()), + meta.query_advice(literals_header.compr_size, Rotation::cur()), ] .into_iter() .zip(literals_header_table.table_exprs(meta)) @@ -1532,6 +1587,7 @@ impl SubCircuitConfig for DecompressionCircuitConfig { .collect() }, ); + meta.create_gate("DecompressionCircuit: LiteralsSection", |meta| { let mut cb = BaseConstraintBuilder::default(); @@ -1570,23 +1626,14 @@ impl SubCircuitConfig for DecompressionCircuitConfig { meta.create_gate("DecompressionCircuit: ZstdBlock Raw bytes", |meta| { let mut cb = BaseConstraintBuilder::default(); - cb.require_equal( - "value_byte == decoded_byte", - meta.query_advice(value_byte, Rotation::cur()), - meta.query_advice(decoded_byte, Rotation::cur()), - ); - - cb.condition( - meta.query_advice(tag_gadget.is_tag_change, Rotation::cur()), - |cb| { - cb.require_equal( - "tag_len == regen_size", - meta.query_advice(tag_gadget.tag_len, Rotation::cur()), - meta.query_advice(literals_header.regen_size, Rotation::prev()), - ); - }, - ); - + let is_first = meta.query_advice(tag_gadget.is_tag_change, Rotation::cur()); + cb.condition(is_first, |cb| { + cb.require_equal( + "tag_len == regen_size", + meta.query_advice(tag_gadget.tag_len, Rotation::cur()), + meta.query_advice(literals_header.regen_size, Rotation::prev()), + ); + }); cb.require_equal( "byte_idx increments", meta.query_advice(byte_idx, Rotation::cur()), @@ -1607,32 +1654,20 @@ impl SubCircuitConfig for DecompressionCircuitConfig { meta.create_gate("DecompressionCircuit: ZstdBlock RLE bytes", |meta| { let mut cb = BaseConstraintBuilder::default(); - cb.require_equal( - "value_byte == decoded_byte", - meta.query_advice(value_byte, Rotation::cur()), - meta.query_advice(decoded_byte, Rotation::cur()), - ); - - let is_tag_change = meta.query_advice(tag_gadget.is_tag_change, Rotation::cur()); - cb.condition(is_tag_change.expr(), |cb| { + let is_first = meta.query_advice(tag_gadget.is_tag_change, Rotation::cur()); + cb.condition(is_first.expr(), |cb| { cb.require_equal( "tag_len == regen_size", meta.query_advice(tag_gadget.tag_len, Rotation::cur()), meta.query_advice(literals_header.regen_size, Rotation::prev()), ); }); - - cb.condition(not::expr(is_tag_change), |cb| { + cb.condition(not::expr(is_first), |cb| { cb.require_equal( "byte_idx remains the same", meta.query_advice(byte_idx, Rotation::cur()), meta.query_advice(byte_idx, Rotation::prev()), ); - cb.require_equal( - "decoded byte remains the same", - meta.query_advice(decoded_byte, Rotation::cur()), - meta.query_advice(decoded_byte, Rotation::prev()), - ); }); cb.gate(and::expr([ @@ -1664,6 +1699,7 @@ impl SubCircuitConfig for DecompressionCircuitConfig { let tag_len_fse_code = meta.query_advice(tag_gadget.tag_len, Rotation::cur()); let tag_len_huffman_code = meta.query_advice(huffman_tree_config.huffman_code_len, Rotation::cur()); + cb.require_equal( "huffman header value byte check", meta.query_advice(value_byte, Rotation::cur()) + 1.expr(), @@ -1754,6 +1790,7 @@ impl SubCircuitConfig for DecompressionCircuitConfig { .collect() }, ); + meta.create_gate( "DecompressionCircuit: ZstdBlockFseCode (fse code)", |meta| { @@ -1762,20 +1799,16 @@ impl SubCircuitConfig for DecompressionCircuitConfig { // The decoded symbol keeps incrementing in the FSE code reconstruction. Since // we've already done the check for the first symbol in the huffman header gate, we // only check for increments. - cb.require_equal( - "fse table reconstruction: decoded symbol increments", - meta.query_advice(bitstream_decoder.decoded_symbol, Rotation::cur()), - meta.query_advice(bitstream_decoder.decoded_symbol, Rotation::prev()) - + 1.expr(), - ); - cb.require_equal( - "number of states assigned so far is accumulated correctly", - meta.query_advice(fse_decoder.n_acc, Rotation::cur()) + 1.expr(), - meta.query_advice(fse_decoder.n_acc, Rotation::prev()) - + meta.query_advice(bitstream_decoder.bit_value, Rotation::cur()), - ); - let is_last = meta.query_advice(tag_gadget.is_tag_change, Rotation::next()); + + cb.condition(not::expr(is_last.clone()), |cb| { + cb.require_equal( + "number of states assigned so far is accumulated correctly", + meta.query_advice(fse_decoder.n_acc, Rotation::cur()), + meta.query_advice(fse_decoder.n_acc, Rotation::prev()) + + (meta.query_advice(bitstream_decoder.bit_value, Rotation::cur()) - 1.expr()), + ); + }); cb.condition(is_last, |cb| { cb.require_equal( "on the last row, accumulated number of symbols is the table size of FSE table", @@ -1788,139 +1821,11 @@ impl SubCircuitConfig for DecompressionCircuitConfig { meta.query_fixed(q_enable, Rotation::cur()), meta.query_advice(tag_gadget.is_fse_code, Rotation::cur()), not::expr(meta.query_advice(tag_gadget.is_tag_change, Rotation::cur())), + not::expr(meta.query_advice(tag_gadget.is_tag_change, Rotation::prev())), ])) }, ); - meta.lookup_any( - "DecompressionCircuit: ZstdBlockFseCode (contained bitstream start)", - |meta| { - let (huffman_byte_offset, start, bit_value) = ( - meta.query_advice(huffman_tree_config.huffman_tree_idx, Rotation::cur()), - meta.query_advice(bitstream_decoder.bit_index_start, Rotation::cur()), - meta.query_advice(bitstream_decoder.bit_value, Rotation::cur()), - ); - let condition = and::expr([ - meta.query_fixed(q_enable, Rotation::cur()), - meta.query_advice(tag_gadget.is_fse_code, Rotation::cur()), - not::expr(meta.query_advice(tag_gadget.is_tag_change, Rotation::cur())), - bitstream_decoder.is_contained(meta, None), - ]); - [ - huffman_byte_offset, // huffman ID - meta.query_advice(byte_idx, Rotation::cur()), // byte index - meta.query_advice(value_byte, Rotation::cur()), // byte value - bit_value, // bitstring value - 1.expr(), // bitstring length accumulator, starts at 1 - start, // bit index start - 1.expr(), // denotes that this bit index is a part of the bitstring - 1.expr(), // denotes that this bit index is a part of the bitstring - meta.query_advice(tag_gadget.is_reverse, Rotation::cur()), // is reverse - ] - .into_iter() - .zip(bs_acc_table.table_exprs_contained(meta)) - .map(|(value, table)| (condition.expr() * value, table)) - .collect() - }, - ); - meta.lookup_any( - "DecompressionCircuit: ZstdBlockFseCode (contained bitstream end)", - |meta| { - let (huffman_byte_offset, start, end, bit_value) = ( - meta.query_advice(huffman_tree_config.huffman_tree_idx, Rotation::cur()), - meta.query_advice(bitstream_decoder.bit_index_start, Rotation::cur()), - meta.query_advice(bitstream_decoder.bit_index_end, Rotation::cur()), - meta.query_advice(bitstream_decoder.bit_value, Rotation::cur()), - ); - let condition = and::expr([ - meta.query_fixed(q_enable, Rotation::cur()), - meta.query_advice(tag_gadget.is_fse_code, Rotation::cur()), - not::expr(meta.query_advice(tag_gadget.is_tag_change, Rotation::cur())), - bitstream_decoder.is_contained(meta, None), - ]); - [ - huffman_byte_offset, // huffman ID - meta.query_advice(byte_idx, Rotation::cur()), // byte index - meta.query_advice(value_byte, Rotation::cur()), // byte value - bit_value, // bitstring value - end.expr() - start + 1.expr(), // bitstring length - end, // bit index at end - 1.expr(), // from start - 1.expr(), // to end - meta.query_advice(tag_gadget.is_reverse, Rotation::cur()), // is reverse - ] - .into_iter() - .zip(bs_acc_table.table_exprs_contained(meta)) - .map(|(value, table)| (condition.expr() * value, table)) - .collect() - }, - ); - meta.lookup_any( - "DecompressionCircuit: ZstdBlockFseCode (spanned bitstream start)", - |meta| { - let (huffman_byte_offset, start, bit_value) = ( - meta.query_advice(huffman_tree_config.huffman_tree_idx, Rotation::cur()), - meta.query_advice(bitstream_decoder.bit_index_start, Rotation::cur()), - meta.query_advice(bitstream_decoder.bit_value, Rotation::cur()), - ); - let condition = and::expr([ - meta.query_fixed(q_enable, Rotation::cur()), - meta.query_advice(tag_gadget.is_fse_code, Rotation::cur()), - not::expr(meta.query_advice(tag_gadget.is_tag_change, Rotation::cur())), - bitstream_decoder.is_spanned(meta, None), - ]); - [ - huffman_byte_offset, // huffman ID - meta.query_advice(byte_idx, Rotation::cur()), // byte index - meta.query_advice(byte_idx, Rotation::next()), // byte index' - meta.query_advice(value_byte, Rotation::cur()), // byte value - meta.query_advice(value_byte, Rotation::next()), // byte value' - bit_value, // bitstring value - 1.expr(), // bitstring len acc - start, // bit index start - 1.expr(), // from start - 1.expr(), // to end - meta.query_advice(tag_gadget.is_reverse, Rotation::cur()), // is reverse - ] - .into_iter() - .zip(bs_acc_table.table_exprs_spanned(meta)) - .map(|(value, table)| (condition.expr() * value, table)) - .collect() - }, - ); - meta.lookup_any( - "DecompressionCircuit: ZstdBlockFseCode (spanned bitstring end)", - |meta| { - let (huffman_byte_offset, start, end, bit_value) = ( - meta.query_advice(huffman_tree_config.huffman_tree_idx, Rotation::cur()), - meta.query_advice(bitstream_decoder.bit_index_start, Rotation::cur()), - meta.query_advice(bitstream_decoder.bit_index_end, Rotation::cur()), - meta.query_advice(bitstream_decoder.bit_value, Rotation::cur()), - ); - let condition = and::expr([ - meta.query_fixed(q_enable, Rotation::cur()), - meta.query_advice(tag_gadget.is_fse_code, Rotation::cur()), - not::expr(meta.query_advice(tag_gadget.is_tag_change, Rotation::cur())), - bitstream_decoder.is_spanned(meta, None), - ]); - [ - huffman_byte_offset, // huffman ID - meta.query_advice(byte_idx, Rotation::cur()), // byte index - meta.query_advice(byte_idx, Rotation::next()), // byte index' - meta.query_advice(value_byte, Rotation::cur()), // byte value - meta.query_advice(value_byte, Rotation::next()), // byte value' - bit_value, // bitstring value - end.expr() - start + 1.expr(), // bitstring length - end, // bit index at end - 1.expr(), // from start - 1.expr(), // to end - meta.query_advice(tag_gadget.is_reverse, Rotation::cur()), // is reverse - ] - .into_iter() - .zip(bs_acc_table.table_exprs_spanned(meta)) - .map(|(value, table)| (condition.expr() * value, table)) - .collect() - }, - ); + meta.lookup_any( "DecompressionCircuit: ZstdBlockFseCode (symbol count check)", |meta| { @@ -1932,7 +1837,9 @@ impl SubCircuitConfig for DecompressionCircuitConfig { let condition = and::expr([ meta.query_fixed(q_enable, Rotation::cur()), meta.query_advice(tag_gadget.is_fse_code, Rotation::cur()), - not::expr(meta.query_advice(tag_gadget.is_tag_change, Rotation::cur())), + not::expr(meta.query_advice(tag_gadget.is_tag_change, Rotation::cur())), // Exclude huffman header byte + not::expr(meta.query_advice(tag_gadget.is_tag_change, Rotation::prev())), // Exclude accuracy log bits + not::expr(meta.query_advice(tag_gadget.is_tag_change, Rotation::next())), // Exclude trailing bits ]); // The FSE table reconstruction follows a variable bit packing. However we know the // start and end bit index for the bitstring that was read. We read a value in the @@ -1951,6 +1858,7 @@ impl SubCircuitConfig for DecompressionCircuitConfig { .collect() }, ); + meta.create_gate("DecompressionCircuit: HuffmanTreeSection", |meta| { let mut cb = BaseConstraintBuilder::default(); @@ -1986,6 +1894,7 @@ impl SubCircuitConfig for DecompressionCircuitConfig { /////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////// ZstdTag::ZstdBlockHuffmanCode ///////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////// + meta.create_gate( "DecompressionCircuit: ZstdBlockHuffmanCode (first row)", |meta| { @@ -1993,12 +1902,12 @@ impl SubCircuitConfig for DecompressionCircuitConfig { // - The first row of the HuffmanCode tag is the leading 0s and sentinel bit. // - The second row of the HuffmanCode tag is the reading of AL number of bits from - // the bitstream to find the initial state in the FSE table. - // - Only from the third row onwards, do we start emitting symbols (weights). - - cb.require_zero( - "num_emitted starts at 0 from the second row", + // the bitstream to find the initial state in the FSE table and emit the first + // symbol. + cb.require_equal( + "num_emitted starts at 1 from the second row", meta.query_advice(fse_decoder.num_emitted, Rotation::next()), + 1.expr(), ); // On the second row we read AL number of bits. @@ -2014,7 +1923,17 @@ impl SubCircuitConfig for DecompressionCircuitConfig { cb.require_equal( "init state of FSE table", meta.query_advice(bitstream_decoder.bit_value, Rotation::next()), - meta.query_advice(fse_decoder.state, Rotation(2)), + meta.query_advice(fse_decoder.state, Rotation::next()), + ); + + // Baseline conditions for FSE state transition + cb.require_zero( + "Current row baseline", + meta.query_advice(fse_decoder.baseline, Rotation::cur()), + ); + cb.require_zero( + "Previous row baseline", + meta.query_advice(fse_decoder.baseline, Rotation::prev()), ); let lstream_kind = meta.query_advice(lstream_config.lstream_kind, Rotation::cur()); @@ -2046,152 +1965,22 @@ impl SubCircuitConfig for DecompressionCircuitConfig { meta.query_advice(fse_decoder.num_emitted, Rotation::prev()) + 1.expr(), ); - // Check for state transition, except if we are on the last row of HuffmanCode. - let is_last_row = meta.query_advice(tag_gadget.is_tag_change, Rotation::next()); - let baseline = meta.query_advice(fse_decoder.baseline, Rotation::cur()); // baseline at state + let baseline = meta.query_advice(fse_decoder.baseline, Rotation(-2)); // baseline at state let bit_value = meta.query_advice(bitstream_decoder.bit_value, Rotation::cur()); // bits read - cb.condition(not::expr(is_last_row), |cb| { - cb.require_equal( - "state' == baseline(state) + bit_value", - meta.query_advice(fse_decoder.state, Rotation::next()), - baseline + bit_value, - ); - }); + + cb.require_equal( + "state' == baseline(state) + bit_value (every other row)", + meta.query_advice(fse_decoder.state, Rotation::cur()), + baseline + bit_value, + ); cb.gate(and::expr([ meta.query_fixed(q_enable, Rotation::cur()), meta.query_advice(tag_gadget.is_huffman_code, Rotation::cur()), not::expr(meta.query_advice(tag_gadget.is_tag_change, Rotation::cur())), - not::expr(meta.query_advice(tag_gadget.is_tag_change, Rotation::prev())), ])) }, ); - meta.lookup_any( - "DecompressionCircuit: ZstdBlockHuffmanCode (contained bitstream start)", - |meta| { - let (huffman_byte_offset, start, bit_value) = ( - meta.query_advice(huffman_tree_config.huffman_tree_idx, Rotation::cur()), - meta.query_advice(bitstream_decoder.bit_index_start, Rotation::cur()), - meta.query_advice(bitstream_decoder.bit_value, Rotation::cur()), - ); - let condition = and::expr([ - meta.query_fixed(q_enable, Rotation::cur()), - meta.query_advice(tag_gadget.is_huffman_code, Rotation::cur()), - bitstream_decoder.is_contained(meta, None), - ]); - [ - huffman_byte_offset, // huffman ID - meta.query_advice(byte_idx, Rotation::cur()), // byte index - meta.query_advice(value_byte, Rotation::cur()), // byte value - bit_value, // bitstring value - 1.expr(), // bitstring length accumulator, starts at 1 - start, // bit index start - 1.expr(), // denotes that this bit index is a part of the bitstring - 1.expr(), // denotes that this bit index is a part of the bitstring - meta.query_advice(tag_gadget.is_reverse, Rotation::cur()), // is reverse - ] - .into_iter() - .zip(bs_acc_table.table_exprs_contained(meta)) - .map(|(value, table)| (condition.expr() * value, table)) - .collect() - }, - ); - meta.lookup_any( - "DecompressionCircuit: ZstdBlockHuffmanCode (contained bitstream end)", - |meta| { - let (huffman_byte_offset, start, end, bit_value) = ( - meta.query_advice(huffman_tree_config.huffman_tree_idx, Rotation::cur()), - meta.query_advice(bitstream_decoder.bit_index_start, Rotation::cur()), - meta.query_advice(bitstream_decoder.bit_index_end, Rotation::cur()), - meta.query_advice(bitstream_decoder.bit_value, Rotation::cur()), - ); - let condition = and::expr([ - meta.query_fixed(q_enable, Rotation::cur()), - meta.query_advice(tag_gadget.is_huffman_code, Rotation::cur()), - bitstream_decoder.is_contained(meta, None), - ]); - [ - huffman_byte_offset, // huffman ID - meta.query_advice(byte_idx, Rotation::cur()), // byte index - meta.query_advice(value_byte, Rotation::cur()), // byte value - bit_value, // bitstring value - end.expr() - start + 1.expr(), // bitstring length - end, // bit index at end - 1.expr(), // from start - 1.expr(), // to end - meta.query_advice(tag_gadget.is_reverse, Rotation::cur()), // is reverse - ] - .into_iter() - .zip(bs_acc_table.table_exprs_contained(meta)) - .map(|(value, table)| (condition.expr() * value, table)) - .collect() - }, - ); - meta.lookup_any( - "DecompressionCircuit: ZstdBlockHuffmanCode (spanned bitstream start)", - |meta| { - let (huffman_byte_offset, start, bit_value) = ( - meta.query_advice(huffman_tree_config.huffman_tree_idx, Rotation::cur()), - meta.query_advice(bitstream_decoder.bit_index_start, Rotation::cur()), - meta.query_advice(bitstream_decoder.bit_value, Rotation::cur()), - ); - let condition = and::expr([ - meta.query_fixed(q_enable, Rotation::cur()), - meta.query_advice(tag_gadget.is_huffman_code, Rotation::cur()), - bitstream_decoder.is_spanned(meta, None), - ]); - [ - huffman_byte_offset, // huffman ID - meta.query_advice(byte_idx, Rotation::cur()), // byte index - meta.query_advice(byte_idx, Rotation::next()), // byte index' - meta.query_advice(value_byte, Rotation::cur()), // byte value - meta.query_advice(value_byte, Rotation::next()), // byte value' - bit_value, // bitstring value - 1.expr(), // bitstring len acc - start, // bit index start - 1.expr(), // from start - 1.expr(), // to end - meta.query_advice(tag_gadget.is_reverse, Rotation::cur()), // is reverse - ] - .into_iter() - .zip(bs_acc_table.table_exprs_spanned(meta)) - .map(|(value, table)| (condition.expr() * value, table)) - .collect() - }, - ); - meta.lookup_any( - "DecompressionCircuit: ZstdBlockHuffmanCode (spanned bitstring end)", - |meta| { - let (huffman_byte_offset, start, end, bit_value) = ( - meta.query_advice(huffman_tree_config.huffman_tree_idx, Rotation::cur()), - meta.query_advice(bitstream_decoder.bit_index_start, Rotation::cur()), - meta.query_advice(bitstream_decoder.bit_index_end, Rotation::cur()), - meta.query_advice(bitstream_decoder.bit_value, Rotation::cur()), - ); - let condition = and::expr([ - meta.query_fixed(q_enable, Rotation::cur()), - meta.query_advice(tag_gadget.is_huffman_code, Rotation::cur()), - bitstream_decoder.is_spanned(meta, None), - ]); - [ - huffman_byte_offset, // huffman ID - meta.query_advice(byte_idx, Rotation::cur()), // byte index - meta.query_advice(byte_idx, Rotation::next()), // byte index' - meta.query_advice(value_byte, Rotation::cur()), // byte value - meta.query_advice(value_byte, Rotation::next()), // byte value' - bit_value, // bitstring value - end.expr() - start + 1.expr(), // bitstring length - end, // bit index at end - 1.expr(), // from start - 1.expr(), // to end - meta.query_advice(tag_gadget.is_reverse, Rotation::cur()), // is reverse - ] - .into_iter() - .zip(bs_acc_table.table_exprs_spanned(meta)) - .map(|(value, table)| (condition.expr() * value, table)) - .collect() - }, - ); // 1. We first read AL number of bits from the bitstream (say bit_value_init) and transition // to the state == bit_value_init. @@ -2216,18 +2005,40 @@ impl SubCircuitConfig for DecompressionCircuitConfig { // symbols (weights), where N is the total number of huffman symbols that are being encoded // in that Huffman table. As per the canonical Huffman code representation, we only need to // emit N - 1 weights and the weight of the last symbol can be calculated. + meta.lookup_any( "DecompressionCircuit: ZstdBlockHuffmanCode (fse table lookup)", |meta| { let condition = and::expr([ - meta.query_fixed(q_enable, Rotation::cur()), + // TODO: Degree > 9 + // Comment q_enable out for now with the assumption that when is_huffman_code is on, q_enable must also be on. (perhaps constrain this?) + // meta.query_fixed(q_enable, Rotation::cur()), meta.query_advice(tag_gadget.is_huffman_code, Rotation::cur()), - not::expr(meta.query_advice(tag_gadget.is_tag_change, Rotation::cur())), - not::expr(meta.query_advice(tag_gadget.is_tag_change, Rotation::prev())), + // TODO: Verify below exclusions + not::expr(meta.query_advice(tag_gadget.is_tag_change, Rotation::cur())), // Exclude leading 0s and sentinel 1 bit + not::expr(meta.query_advice(tag_gadget.is_tag_change, Rotation::next())), // Exclude the last row + not::expr(meta.query_advice(tag_gadget.is_tag_change, Rotation(2))), // Exclude the second last row as max rotation is 2 + ]); - let start = meta.query_advice(bitstream_decoder.bit_index_start, Rotation::cur()); - let end = meta.query_advice(bitstream_decoder.bit_index_end, Rotation::cur()); - let num_bits = end - start + 1.expr(); + + // TODO: Verify that acquiring data for num_bits from bitstream_decoder targets 2 rows down, not the current row + + // let start = meta.query_advice(bitstream_decoder.bit_index_start, Rotation::cur()); + // let end = meta.query_advice(bitstream_decoder.bit_index_end, Rotation::cur()); + // let num_bits = select::expr( + // meta.query_advice(bitstream_decoder.is_nil, Rotation::cur()), + // 0.expr(), + // end - start + 1.expr(), + // ); + + let start = meta.query_advice(bitstream_decoder.bit_index_start, Rotation(2)); + let end = meta.query_advice(bitstream_decoder.bit_index_end, Rotation(2)); + let num_bits = select::expr( + meta.query_advice(bitstream_decoder.is_nil, Rotation(2)), + 0.expr(), + end - start + 1.expr(), + ); + [ meta.query_advice(huffman_tree_config.huffman_tree_idx, Rotation::cur()), meta.query_advice(huffman_tree_config.fse_table_size, Rotation::cur()), @@ -2242,6 +2053,7 @@ impl SubCircuitConfig for DecompressionCircuitConfig { .collect() }, ); + meta.lookup_any( "DecompressionCircuit: ZstdBlockHuffmanCode (huffman codes table lookup)", |meta| { @@ -2253,7 +2065,7 @@ impl SubCircuitConfig for DecompressionCircuitConfig { ]); [ meta.query_advice(huffman_tree_config.huffman_tree_idx, Rotation::cur()), - meta.query_advice(bitstream_decoder.bit_value, Rotation::cur()), + meta.query_advice(fse_decoder.num_emitted, Rotation::cur()) - 1.expr(), meta.query_advice(fse_decoder.symbol, Rotation::cur()), ] .into_iter() @@ -2262,6 +2074,7 @@ impl SubCircuitConfig for DecompressionCircuitConfig { .collect() }, ); + meta.lookup_any( "DecompressionCircuit: ZstdBlockHuffmanCode (num symbols in huffman code)", |meta| { @@ -2327,13 +2140,15 @@ impl SubCircuitConfig for DecompressionCircuitConfig { // // The HuffmanTreeDescriptionSize can be calculated as: // - HuffmanTreeDescriptionSize == byte_idx(JumpTable) - byte_idx(HuffmanTree) + cb.require_equal( "length of lstream4", meta.query_advice(lstream_config.len_lstream4, Rotation::cur()) + len1 + len2 + len3 - + meta.query_advice(byte_idx, Rotation::cur()), + + meta.query_advice(byte_idx, Rotation::cur()) + + 6.expr(), meta.query_advice(literals_header.compr_size, Rotation::cur()) + meta.query_advice(huffman_tree_config.huffman_tree_idx, Rotation::cur()), ); @@ -2439,12 +2254,6 @@ impl SubCircuitConfig for DecompressionCircuitConfig { meta.query_advice(lstream_config.lstream, Rotation::prev()), ); - cb.require_equal( - "decoded byte is the decoded symbol", - meta.query_advice(decoded_byte, Rotation::cur()), - meta.query_advice(bitstream_decoder.decoded_symbol, Rotation::cur()), - ); - cb.gate(and::expr([ meta.query_fixed(q_enable, Rotation::cur()), meta.query_advice(tag_gadget.is_lstream, Rotation::cur()), @@ -2527,133 +2336,109 @@ impl SubCircuitConfig for DecompressionCircuitConfig { ])) }, ); + + // TODO: to be enabled once DecodedLiteralsTable has been assigned witness to. + /* meta.lookup_any( - "DecompressionCircuit: ZstdBlockLstream (contained bitstream start)", + "DecompressionCircuit: ZstdBlockLstream (decoded literal byte)", |meta| { - let (huffman_byte_offset, start, bit_value) = ( - meta.query_advice(huffman_tree_config.huffman_tree_idx, Rotation::cur()), - meta.query_advice(bitstream_decoder.bit_index_start, Rotation::cur()), - meta.query_advice(bitstream_decoder.bit_value, Rotation::cur()), - ); let condition = and::expr([ meta.query_fixed(q_enable, Rotation::cur()), meta.query_advice(tag_gadget.is_lstream, Rotation::cur()), - bitstream_decoder.is_contained(meta, None), + not::expr(meta.query_advice(tag_gadget.is_tag_change, Rotation::cur())), ]); [ - huffman_byte_offset, // huffman ID - meta.query_advice(byte_idx, Rotation::cur()), // byte index - meta.query_advice(value_byte, Rotation::cur()), // byte value - bit_value, // bitstring value - 1.expr(), // bitstring length accumulator, starts at 1 - start, // bit index start - 1.expr(), // denotes that this bit index is a part of the bitstring - 1.expr(), // denotes that this bit index is a part of the bitstring - meta.query_advice(tag_gadget.is_reverse, Rotation::cur()), // is reverse - ] - .into_iter() - .zip(bs_acc_table.table_exprs_contained(meta)) - .map(|(value, table)| (condition.expr() * value, table)) - .collect() - }, - ); - meta.lookup_any( - "DecompressionCircuit: ZstdBlockLstream (contained bitstream end)", - |meta| { - let (huffman_byte_offset, start, end, bit_value) = ( meta.query_advice(huffman_tree_config.huffman_tree_idx, Rotation::cur()), - meta.query_advice(bitstream_decoder.bit_index_start, Rotation::cur()), - meta.query_advice(bitstream_decoder.bit_index_end, Rotation::cur()), - meta.query_advice(bitstream_decoder.bit_value, Rotation::cur()), - ); - let condition = and::expr([ - meta.query_fixed(q_enable, Rotation::cur()), - meta.query_advice(tag_gadget.is_lstream, Rotation::cur()), - bitstream_decoder.is_contained(meta, None), - ]); - [ - huffman_byte_offset, // huffman ID - meta.query_advice(byte_idx, Rotation::cur()), // byte index - meta.query_advice(value_byte, Rotation::cur()), // byte value - bit_value, // bitstring value - end.expr() - start + 1.expr(), // bitstring length - end, // bit index at end - 1.expr(), // from start - 1.expr(), // to end - meta.query_advice(tag_gadget.is_reverse, Rotation::cur()), // is reverse + meta.query_advice(byte_idx, Rotation::cur()), + meta.query_advice(bitstream_decoder.decoded_symbol, Rotation::cur()), ] .into_iter() - .zip(bs_acc_table.table_exprs_contained(meta)) + .zip(decoded_literals_table.table_exprs(meta)) .map(|(value, table)| (condition.expr() * value, table)) .collect() }, ); - meta.lookup_any( - "DecompressionCircuit: ZstdBlockLstream (spanned bitstream start)", - |meta| { - let (huffman_byte_offset, start, bit_value) = ( - meta.query_advice(huffman_tree_config.huffman_tree_idx, Rotation::cur()), - meta.query_advice(bitstream_decoder.bit_index_start, Rotation::cur()), - meta.query_advice(bitstream_decoder.bit_value, Rotation::cur()), - ); - let condition = and::expr([ - meta.query_fixed(q_enable, Rotation::cur()), + */ + + meta.lookup_any("DecompressionCircuit: bitstring (start)", |meta| { + let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), + // TODO: Make sure that both rows must be active witness rows and not paddings. + // This condition also excludes the last row from lookup + meta.query_fixed(q_enable, Rotation::next()), + sum::expr([ + and::expr([ + meta.query_advice(tag_gadget.is_fse_code, Rotation::cur()), + not::expr(meta.query_advice(tag_gadget.is_tag_change, Rotation::cur())), + ]), + meta.query_advice(tag_gadget.is_huffman_code, Rotation::cur()), meta.query_advice(tag_gadget.is_lstream, Rotation::cur()), - bitstream_decoder.is_spanned(meta, None), - ]); - [ - huffman_byte_offset, // huffman ID - meta.query_advice(byte_idx, Rotation::cur()), // byte index - meta.query_advice(byte_idx, Rotation::next()), // byte index' - meta.query_advice(value_byte, Rotation::cur()), // byte value - meta.query_advice(value_byte, Rotation::next()), // byte value' - bit_value, // bitstring value - 1.expr(), // bitstring len acc - start, // bit index start - 1.expr(), // from start - 1.expr(), // to end - meta.query_advice(tag_gadget.is_reverse, Rotation::cur()), // is reverse - ] - .into_iter() - .zip(bs_acc_table.table_exprs_spanned(meta)) - .map(|(value, table)| (condition.expr() * value, table)) - .collect() - }, - ); - meta.lookup_any( - "DecompressionCircuit: ZstdBlockLstream (spanned bitstring end)", - |meta| { - let (huffman_byte_offset, start, end, bit_value) = ( - meta.query_advice(huffman_tree_config.huffman_tree_idx, Rotation::cur()), - meta.query_advice(bitstream_decoder.bit_index_start, Rotation::cur()), - meta.query_advice(bitstream_decoder.bit_index_end, Rotation::cur()), - meta.query_advice(bitstream_decoder.bit_value, Rotation::cur()), - ); - let condition = and::expr([ - meta.query_fixed(q_enable, Rotation::cur()), + ]), + not::expr(meta.query_advice(bitstream_decoder.is_nil, Rotation::cur())), + ]); + let (huffman_byte_offset, bit_index_start, bit_value) = ( + meta.query_advice(huffman_tree_config.huffman_tree_idx, Rotation::cur()), + meta.query_advice(bitstream_decoder.bit_index_start, Rotation::cur()), + meta.query_advice(bitstream_decoder.bit_value, Rotation::cur()), + ); + [ + huffman_byte_offset, + meta.query_advice(byte_idx, Rotation::cur()), + meta.query_advice(byte_idx, Rotation::next()), + meta.query_advice(value_byte, Rotation::cur()), + meta.query_advice(value_byte, Rotation::next()), + bit_value, + 1.expr(), // bitstring_len at start + bit_index_start, + 1.expr(), // from_start + 1.expr(), // until_end + meta.query_advice(tag_gadget.is_reverse, Rotation::cur()), + ] + .into_iter() + .zip(bs_acc_table.table_exprs(meta)) + .map(|(value, table)| (condition.expr() * value, table)) + .collect() + }); + meta.lookup_any("DecompressionCircuit: bitstring (end)", |meta| { + let condition = and::expr([ + meta.query_fixed(q_enable, Rotation::cur()), + // TODO: Make sure that both rows must be active witness rows and not paddings. + // This condition also excludes the last row from lookup + meta.query_fixed(q_enable, Rotation::next()), + sum::expr([ + and::expr([ + meta.query_advice(tag_gadget.is_fse_code, Rotation::cur()), + not::expr(meta.query_advice(tag_gadget.is_tag_change, Rotation::cur())), + ]), + meta.query_advice(tag_gadget.is_huffman_code, Rotation::cur()), meta.query_advice(tag_gadget.is_lstream, Rotation::cur()), - bitstream_decoder.is_spanned(meta, None), - ]); - [ - huffman_byte_offset, // huffman ID - meta.query_advice(byte_idx, Rotation::cur()), // byte index - meta.query_advice(byte_idx, Rotation::next()), // byte index' - meta.query_advice(value_byte, Rotation::cur()), // byte value - meta.query_advice(value_byte, Rotation::next()), // byte value' - bit_value, // bitstring value - end.expr() - start + 1.expr(), // bitstring length - end, // bit index at end - 1.expr(), // from start - 1.expr(), // to end - meta.query_advice(tag_gadget.is_reverse, Rotation::cur()), // is reverse - ] - .into_iter() - .zip(bs_acc_table.table_exprs_spanned(meta)) - .map(|(value, table)| (condition.expr() * value, table)) - .collect() - }, - ); - + ]), + not::expr(meta.query_advice(bitstream_decoder.is_nil, Rotation::cur())), + ]); + let (huffman_byte_offset, bit_index_start, bit_index_end, bit_value) = ( + meta.query_advice(huffman_tree_config.huffman_tree_idx, Rotation::cur()), + meta.query_advice(bitstream_decoder.bit_index_start, Rotation::cur()), + meta.query_advice(bitstream_decoder.bit_index_end, Rotation::cur()), + meta.query_advice(bitstream_decoder.bit_value, Rotation::cur()), + ); + [ + huffman_byte_offset, + meta.query_advice(byte_idx, Rotation::cur()), + meta.query_advice(byte_idx, Rotation::next()), + meta.query_advice(value_byte, Rotation::cur()), + meta.query_advice(value_byte, Rotation::next()), + bit_value, + bit_index_end.expr() - bit_index_start + 1.expr(), // bitstring_len at end + bit_index_end, + 1.expr(), // from_start + 1.expr(), // until_end + meta.query_advice(tag_gadget.is_reverse, Rotation::cur()), + ] + .into_iter() + .zip(bs_acc_table.table_exprs(meta)) + .map(|(value, table)| (condition.expr() * value, table)) + .collect() + }); meta.create_gate("DecompressionCircuit: bitstream reader", |meta| { let mut cb = BaseConstraintBuilder::default(); @@ -2692,10 +2477,12 @@ impl SubCircuitConfig for DecompressionCircuitConfig { let is_strictly_contained = and::expr([ is_not_last.expr(), bitstream_decoder.is_strictly_contained(meta, None), + not::expr(meta.query_advice(bitstream_decoder.is_nil, Rotation::cur())), ]); let is_byte_aligned = and::expr([ is_not_last.expr(), bitstream_decoder.is_byte_aligned(meta, None), + not::expr(meta.query_advice(bitstream_decoder.is_nil, Rotation::cur())), ]); let is_spanned = and::expr([is_not_last.expr(), bitstream_decoder.is_spanned(meta, None)]); @@ -2727,6 +2514,39 @@ impl SubCircuitConfig for DecompressionCircuitConfig { ); }); + // Special case where we are reading no bits from the bitstream. This can only occur in + // case we are processing tag=ZstdBlockHuffmanCode. + cb.condition( + meta.query_advice(bitstream_decoder.is_nil, Rotation::cur()), + |cb| { + cb.require_equal( + "0 # of bits read can only happen in ZstdBlockHuffmanCode", + meta.query_advice(tag_gadget.tag, Rotation::cur()), + ZstdTag::ZstdBlockHuffmanCode.expr(), + ); + cb.require_equal( + "bit_index_start == bit_index_end since no bit is read", + meta.query_advice(bitstream_decoder.bit_index_start, Rotation::cur()), + meta.query_advice(bitstream_decoder.bit_index_end, Rotation::cur()), + ); + cb.require_equal( + "bit_value == 0 since no bit is read", + meta.query_advice(bitstream_decoder.bit_value, Rotation::cur()), + 0.expr(), + ); + cb.require_equal( + "byte_idx' == byte_idx since no bit is read", + meta.query_advice(byte_idx, Rotation::next()), + meta.query_advice(byte_idx, Rotation::cur()), + ); + cb.require_equal( + "bit_index_start' == bit_index_start since no bit is read", + meta.query_advice(bitstream_decoder.bit_index_start, Rotation::next()), + meta.query_advice(bitstream_decoder.bit_index_start, Rotation::cur()), + ); + }, + ); + // if bitstring is spanned. cb.condition(is_spanned, |cb| { cb.require_equal( @@ -2773,13 +2593,594 @@ impl SubCircuitConfig for DecompressionCircuitConfig { bitstream_decoder, fse_decoder, lstream_config, + bitwise_op_table, + range4, + range8, + range16, + range64, + range128, + range256, + tag_rom_table, + pow_rand_table, + block_type_rom_table, + pow2_table, + literals_header_rom_table, + literals_header_table, + bitstring_accumulation_table: bs_acc_table, + fse_table, + huffman_codes_table, + decoded_literals_table, } } } +impl DecompressionCircuitConfig { + /// Assign witness to the decompression circuit. + pub(crate) fn assign( + &self, + layouter: &mut impl Layouter, + witness_rows: Vec>, + aux_data: Vec, + fse_aux_tables: Vec, + huffman_codes: Vec, + challenges: &Challenges>, + ) -> Result<(), Error> { + let mut rand_pow: Vec> = vec![Value::known(F::one())]; + + self.bitstring_accumulation_table + .assign(layouter, &witness_rows)?; + self.fse_table.assign(layouter, fse_aux_tables)?; + self.huffman_codes_table.assign(layouter, huffman_codes)?; + + let literal_header_offset = witness_rows + .iter() + .find(|r| r.state.tag == ZstdTag::ZstdBlockLiteralsHeader) + .unwrap() + .encoded_data + .byte_idx; + let literal_bytes = witness_rows + .iter() + .filter(|&r| r.state.tag == ZstdTag::ZstdBlockLiteralsHeader) + .map(|r| r.encoded_data.value_byte) + .collect::>(); + + self.literals_header_table.assign( + layouter, + &[( + literal_header_offset, + literal_bytes.as_slice(), + aux_data[10], + aux_data[4], + aux_data[5], + )], + )?; + + // TODO: pass decoded literals along with boundaries (literal lengths calculated while + // applying the Sequences FSE tables). + // self.decoded_literals_table.assign(layouter)?; + + layouter.assign_region( + || "Decompression table region", + |mut region| { + let mut last_byte_idx: usize = 0; + let mut value_rlc = Value::known(F::zero()); + + for (i, row) in witness_rows.iter().enumerate() { + let tag_len = row.state.tag_len as usize; + assert!(tag_len > 0); + + last_byte_idx = row.encoded_data.byte_idx as usize; + + while tag_len >= rand_pow.len() { + let tail = *rand_pow.last().expect("Tail exists"); + rand_pow.push(tail * challenges.keccak_input()); + } + + region.assign_fixed( + || "q_enable", + self.q_enable, + i, + || Value::known(F::one()), + )?; + region.assign_fixed( + || "q_first", + self.q_first, + i, + || Value::known(F::from((i == 0) as u64)), + )?; + region.assign_advice( + || "is_padding", + self.is_padding, + i, + || Value::known(F::zero()), + )?; + region.assign_advice( + || "byte_idx", + self.byte_idx, + i, + || Value::known(F::from(row.encoded_data.byte_idx)), + )?; + region.assign_advice( + || "encoded_len", + self.encoded_len, + i, + || Value::known(F::from(row.encoded_data.encoded_len)), + )?; + + if i > 0 && row.state.is_tag_change { + let prev_row = &witness_rows[i - 1]; + value_rlc = value_rlc * rand_pow[prev_row.state.tag_len as usize] + + prev_row.state.tag_rlc; + } + + region.assign_advice( + || "value_rlc", + self.value_rlc, + i, + || { + if i == 0 { + Value::known(F::zero()) + } else { + value_rlc + } + }, + )?; + + // Byte value and bits decomposition + region.assign_advice( + || "value_byte", + self.value_byte, + i, + || Value::known(F::from(row.encoded_data.value_byte as u64)), + )?; + let bits = value_bits_le(row.encoded_data.value_byte); + let is_reverse = row.encoded_data.reverse; + for (idx, col) in self.value_bits.iter().rev().enumerate() { + region.assign_advice( + || "value_bits", + *col, + i, + || { + Value::known(F::from( + (if is_reverse { + bits[idx] + } else { + bits[N_BITS_PER_BYTE - idx - 1] + }) as u64, + )) + }, + )?; + } + + // Decoded Data + region.assign_advice( + || "decoded_len", + self.decoded_len, + i, + || Value::known(F::from(row.decoded_data.decoded_len)), + )?; + region.assign_advice( + || "decoded_len_acc", + self.decoded_len_acc, + i, + || Value::known(F::from(row.decoded_data.decoded_len_acc)), + )?; + region.assign_advice( + || "decoded_byte", + self.decoded_byte, + i, + || Value::known(F::from(row.decoded_data.decoded_byte as u64)), + )?; + region.assign_advice( + || "decoded_rlc", + self.decoded_rlc, + i, + || row.decoded_data.decoded_value_rlc, + )?; + + // Block Gadget + let is_block = !(row.state.tag == ZstdTag::FrameHeaderDescriptor + || row.state.tag == ZstdTag::FrameContentSize + || row.state.tag == ZstdTag::BlockHeader) + as u64; + region.assign_advice( + || "block_gadget.is_block", + self.block_gadget.is_block, + i, + || Value::known(F::from(is_block)), + )?; + region.assign_advice( + || "block_gadget.block_idx", + self.block_gadget.idx, + i, + || Value::known(F::one()), + )?; + region.assign_advice( + || "block_gadget.block_len", + self.block_gadget.block_len, + i, + || Value::known(F::one()), + )?; + region.assign_advice( + || "block_gadget.is_last_block", + self.block_gadget.is_last_block, + i, + || Value::known(F::one()), + )?; + + let idx_cmp_len_chip = + ComparatorChip::construct(self.block_gadget.idx_cmp_len.clone()); + idx_cmp_len_chip.assign(&mut region, i, F::one(), F::one())?; + + // Tag Gadget + region.assign_advice( + || "tag_gadget.tag", + self.tag_gadget.tag, + i, + || Value::known(F::from(row.state.tag as u64)), + )?; + region.assign_advice( + || "tag_gadget.tag_next", + self.tag_gadget.tag_next, + i, + || Value::known(F::from(row.state.tag_next as u64)), + )?; + region.assign_advice( + || "tag_gadget.max_len", + self.tag_gadget.max_len, + i, + || Value::known(F::from(row.state.max_tag_len)), + )?; + region.assign_advice( + || "tag_gadget.tag_idx", + self.tag_gadget.tag_idx, + i, + || Value::known(F::from(row.state.tag_idx)), + )?; + region.assign_advice( + || "tag_gadget.tag_len", + self.tag_gadget.tag_len, + i, + || Value::known(F::from(row.state.tag_len)), + )?; + region.assign_advice( + || "tag_gadget.is_reverse", + self.tag_gadget.is_reverse, + i, + || Value::known(F::from(row.encoded_data.reverse as u64)), + )?; + region.assign_advice( + || "tag_gadget.is_tag_change", + self.tag_gadget.is_tag_change, + i, + || Value::known(F::from(row.state.is_tag_change as u64)), + )?; + region.assign_advice( + || "tag_gadget.tag_value", + self.tag_gadget.tag_value, + i, + || row.state.tag_value, + )?; + region.assign_advice( + || "tag_gadget.tag_value_acc", + self.tag_gadget.tag_value_acc, + i, + || row.state.tag_value_acc, + )?; + region.assign_advice( + || "tag_gadget.tag_rlc", + self.tag_gadget.tag_rlc, + i, + || row.state.tag_rlc, + )?; + region.assign_advice( + || "tag_gadget.tag_rlc_acc", + self.tag_gadget.tag_rlc_acc, + i, + || row.state.tag_rlc_acc, + )?; + region.assign_advice( + || "tag_gadget.rand_pow_tag_len", + self.tag_gadget.rand_pow_tag_len, + i, + || rand_pow[tag_len], + )?; + + let tag_bits = BinaryNumberChip::construct(self.tag_gadget.tag_bits); + tag_bits.assign(&mut region, i, &row.state.tag)?; + + let idx_cmp_len_chip = + ComparatorChip::construct(self.tag_gadget.idx_cmp_len.clone()); + idx_cmp_len_chip.assign( + &mut region, + i, + F::from(row.state.tag_idx), + F::from(row.state.tag_len), + )?; + + let len_cmp_max_chip = + ComparatorChip::construct(self.tag_gadget.len_cmp_max.clone()); + len_cmp_max_chip.assign( + &mut region, + i, + F::from(row.state.tag_len), + F::from(row.state.max_tag_len), + )?; + + let max_tag_len = row.state.max_tag_len; + let mlen_lt_0x20_chip = LtChip::construct(self.tag_gadget.mlen_lt_0x20); + mlen_lt_0x20_chip.assign( + &mut region, + i, + F::from(max_tag_len), + F::from(0x20), + )?; + + let is_block_header = (row.state.tag == ZstdTag::BlockHeader) as u64; + let is_literals_header = + (row.state.tag == ZstdTag::ZstdBlockLiteralsHeader) as u64; + let is_fse_code = (row.state.tag == ZstdTag::ZstdBlockFseCode) as u64; + let is_huffman_code = (row.state.tag == ZstdTag::ZstdBlockHuffmanCode) as u64; + let is_lstream = (row.state.tag == ZstdTag::ZstdBlockLstream) as u64; + let is_jumptable = (row.state.tag == ZstdTag::ZstdBlockJumpTable) as u64; + let is_literals_section = is_literals_header + + is_fse_code + + is_huffman_code + + is_lstream + + is_jumptable; + let is_huffman_tree_section = + is_fse_code + is_huffman_code + is_jumptable + is_lstream; + + let is_output = row.state.tag.is_output() as u64; + region.assign_advice( + || "tag_gadget.is_output", + self.tag_gadget.is_output, + i, + || Value::known(F::from(is_output)), + )?; + + region.assign_advice( + || "tag_gadget.is_block_header", + self.tag_gadget.is_block_header, + i, + || Value::known(F::from(is_block_header)), + )?; + region.assign_advice( + || "tag_gadget.is_literals_header", + self.tag_gadget.is_literals_header, + i, + || Value::known(F::from(is_literals_header)), + )?; + region.assign_advice( + || "tag_gadget.is_lstream", + self.tag_gadget.is_lstream, + i, + || Value::known(F::from(is_lstream)), + )?; + region.assign_advice( + || "tag_gadget.is_fse_code", + self.tag_gadget.is_fse_code, + i, + || Value::known(F::from(is_fse_code)), + )?; + region.assign_advice( + || "tag_gadget.is_huffman_code", + self.tag_gadget.is_huffman_code, + i, + || Value::known(F::from(is_huffman_code)), + )?; + region.assign_advice( + || "tag_gadget.is_literals_section", + self.tag_gadget.is_literals_section, + i, + || Value::known(F::from(is_literals_section)), + )?; + region.assign_advice( + || "tag_gadget.is_huffman_tree_section", + self.tag_gadget.is_huffman_tree_section, + i, + || Value::known(F::from(is_huffman_tree_section)), + )?; + + // Literals Header + region.assign_advice( + || "literals_header.branch", + self.literals_header.branch, + i, + || Value::known(F::from(aux_data[10])), + )?; + region.assign_advice( + || "literals_header.sf_max", + self.literals_header.sf_max, + i, + || Value::known(F::from(aux_data[11])), + )?; + region.assign_advice( + || "literals_header.regen_size", + self.literals_header.regen_size, + i, + || Value::known(F::from(aux_data[4])), + )?; + region.assign_advice( + || "literals_header.compr_size", + self.literals_header.compr_size, + i, + || Value::known(F::from(aux_data[5])), + )?; + + // Huffman Tree Config + region.assign_advice( + || "huffman_tree_config.huffman_tree_idx", + self.huffman_tree_config.huffman_tree_idx, + i, + || Value::known(F::from(aux_data[6])), + )?; + region.assign_advice( + || "huffman_tree_config.fse_table_size", + self.huffman_tree_config.fse_table_size, + i, + || Value::known(F::from(aux_data[7])), + )?; + region.assign_advice( + || "huffman_tree_config.fse_table_al", + self.huffman_tree_config.fse_table_al, + i, + || Value::known(F::from(aux_data[8])), + )?; + region.assign_advice( + || "huffman_tree_config.huffman_code_len", + self.huffman_tree_config.huffman_code_len, + i, + || Value::known(F::from(aux_data[9])), + )?; + + // Bitstream Decoder + region.assign_advice( + || "bitstream_decoder.bit_index_start", + self.bitstream_decoder.bit_index_start, + i, + || Value::known(F::from(row.bitstream_read_data.bit_start_idx as u64)), + )?; + region.assign_advice( + || "bitstream_decoder.bit_index_end", + self.bitstream_decoder.bit_index_end, + i, + || Value::known(F::from(row.bitstream_read_data.bit_end_idx as u64)), + )?; + region.assign_advice( + || "bitstream_decoder.bit_value", + self.bitstream_decoder.bit_value, + i, + || Value::known(F::from(row.bitstream_read_data.bit_value)), + )?; + region.assign_advice( + || "bitstream_decoder.is_nil", + self.bitstream_decoder.is_nil, + i, + || Value::known(F::from(row.bitstream_read_data.is_zero_bit_read as u64)), + )?; + + let bitstring_contained_chip = ComparatorChip::construct( + self.bitstream_decoder.bitstring_contained.clone(), + ); + bitstring_contained_chip.assign( + &mut region, + i, + F::from(row.bitstream_read_data.bit_end_idx as u64), + F::from(7u64), + )?; + + region.assign_advice( + || "bitstream_decoder.decoded_symbol", + self.bitstream_decoder.decoded_symbol, + i, + || Value::known(F::from(row.decoded_data.decoded_byte as u64)), + )?; + + // FSE Gadget + region.assign_advice( + || "fse_decoder.num_emitted", + self.fse_decoder.num_emitted, + i, + || Value::known(F::from(row.fse_data.num_emitted)), + )?; + region.assign_advice( + || "fse_decoder.n_acc", + self.fse_decoder.n_acc, + i, + || Value::known(F::from(row.fse_data.n_acc)), + )?; + region.assign_advice( + || "fse_decoder.state", + self.fse_decoder.state, + i, + || Value::known(F::from(row.fse_data.state)), + )?; + region.assign_advice( + || "fse_decoder.baseline", + self.fse_decoder.baseline, + i, + || Value::known(F::from(row.fse_data.baseline)), + )?; + region.assign_advice( + || "fse_decoder.symbol", + self.fse_decoder.symbol, + i, + || Value::known(F::from(row.fse_data.symbol)), + )?; + + // Lstream Config + let is_four_streams: u64 = if aux_data[2] > 0 { 1 } else { 0 }; + region.assign_advice( + || "lstream_config.lstream_kind", + self.lstream_config.lstream_kind, + i, + || Value::known(F::from(is_four_streams)), + )?; + region.assign_advice( + || "lstream_config.lstream", + self.lstream_config.lstream, + i, + || Value::known(F::from(row.huffman_data.stream_idx as u64)), + )?; + + let lstream_num_chip = + BinaryNumberChip::construct(self.lstream_config.lstream_num); + lstream_num_chip.assign(&mut region, i, &row.huffman_data.stream_idx.into())?; + + region.assign_advice( + || "lstream_config.len_lstream1", + self.lstream_config.len_lstream1, + i, + || Value::known(F::from(aux_data[0])), + )?; + region.assign_advice( + || "lstream_config.len_lstream2", + self.lstream_config.len_lstream2, + i, + || Value::known(F::from(aux_data[1])), + )?; + region.assign_advice( + || "lstream_config.len_lstream3", + self.lstream_config.len_lstream3, + i, + || Value::known(F::from(aux_data[2])), + )?; + region.assign_advice( + || "lstream_config.len_lstream4", + self.lstream_config.len_lstream4, + i, + || Value::known(F::from(aux_data[3])), + )?; + } + + // TODO: Should assign sequence section. Dummy row for sequencing section header as + // of now + region.assign_advice( + || "byte_idx", + self.byte_idx, + witness_rows.len(), + || Value::known(F::from((last_byte_idx + 1) as u64)), + )?; + region.assign_advice( + || "tag_gadget.is_tag_change", + self.tag_gadget.is_tag_change, + witness_rows.len(), + || Value::known(F::one()), + )?; + + Ok(()) + }, + )?; + + Ok(()) + } +} + /// The Decompression circuit decodes an instance of zstd compressed data. #[derive(Clone, Debug, Default)] pub struct DecompressionCircuit { + compressed_frames: Vec>, _data: PhantomData, } @@ -2796,10 +3197,31 @@ impl SubCircuit for DecompressionCircuit { fn synthesize_sub( &self, - _config: &Self::Config, - _challenges: &Challenges>, - _layouter: &mut impl Layouter, + config: &Self::Config, + challenges: &Challenges>, + layouter: &mut impl Layouter, ) -> Result<(), Error> { - Ok(()) + let mut witness_rows: Vec> = vec![]; + let mut data: Vec = vec![]; + let mut fse_aux_tables = vec![]; + let mut huffman_aux_data = vec![]; + + for idx in 0..self.compressed_frames.len() { + let (rows, _decoded_literals, aux_data, f_fse_aux_tables, huffman_codes) = + process::(&self.compressed_frames[idx], challenges.keccak_input()); + witness_rows.extend_from_slice(&rows); + data.extend_from_slice(&aux_data); + fse_aux_tables.extend_from_slice(&f_fse_aux_tables); + huffman_aux_data.extend_from_slice(&huffman_codes); + } + + config.assign( + layouter, + witness_rows, + data, + fse_aux_tables, + huffman_aux_data, + challenges, + ) } } diff --git a/zkevm-circuits/src/decompression_circuit/dev.rs b/zkevm-circuits/src/decompression_circuit/dev.rs index 1748963c7b..6308410122 100644 --- a/zkevm-circuits/src/decompression_circuit/dev.rs +++ b/zkevm-circuits/src/decompression_circuit/dev.rs @@ -10,7 +10,8 @@ use crate::{ }, table::{ decompression::{ - BitstringAccumulationTable, FseTable, HuffmanCodesTable, LiteralsHeaderTable, + BitstringAccumulationTable, DecodedLiteralsTable, FseTable, HuffmanCodesTable, + LiteralsHeaderTable, }, BitwiseOpTable, KeccakTable, Pow2Table, PowOfRandTable, RangeTable, }, @@ -19,6 +20,7 @@ use crate::{ impl Circuit for DecompressionCircuit { type Config = (DecompressionCircuitConfig, Challenges); + type FloorPlanner = SimpleFloorPlanner; #[cfg(feature = "circuit-params")] type Params = (); @@ -51,6 +53,8 @@ impl Circuit for DecompressionCircuit { range16, range64, ); + let decoded_literals_table = + DecodedLiteralsTable::construct(meta, challenge_exprs.clone(), range256); let config = DecompressionCircuitConfig::new( meta, @@ -60,7 +64,12 @@ impl Circuit for DecompressionCircuit { huffman_codes_table, bs_acc_table, literals_header_table, + decoded_literals_table, + bitwise_op_table, + range4, range8, + range16, + range64, range128, range256, pow2_table, @@ -78,6 +87,20 @@ impl Circuit for DecompressionCircuit { mut layouter: impl Layouter, ) -> Result<(), Error> { let challenges = &config.1.values(&layouter); + + config.0.bitwise_op_table.load(&mut layouter)?; + config.0.range4.load(&mut layouter)?; + config.0.range8.load(&mut layouter)?; + config.0.range16.load(&mut layouter)?; + config.0.range64.load(&mut layouter)?; + config.0.range128.load(&mut layouter)?; + config.0.range256.load(&mut layouter)?; + config.0.tag_rom_table.load(&mut layouter)?; + config.0.pow_rand_table.assign(&mut layouter, challenges)?; + config.0.block_type_rom_table.load(&mut layouter)?; + config.0.pow2_table.load(&mut layouter)?; + config.0.literals_header_rom_table.load(&mut layouter)?; + self.synthesize_sub(&config.0, challenges, &mut layouter) } } diff --git a/zkevm-circuits/src/decompression_circuit/test.rs b/zkevm-circuits/src/decompression_circuit/test.rs index d39272c34a..ae2e5ed43a 100644 --- a/zkevm-circuits/src/decompression_circuit/test.rs +++ b/zkevm-circuits/src/decompression_circuit/test.rs @@ -1,8 +1,7 @@ #[test] fn test_basic() { - use halo2_proofs::{dev::MockProver, halo2curves::bn256::Fr}; - use crate::decompression_circuit::DecompressionCircuit; + use halo2_proofs::{dev::MockProver, halo2curves::bn256::Fr}; let circuit = DecompressionCircuit::::default(); let mock_prover = MockProver::run(17, &circuit, vec![]); @@ -14,3 +13,74 @@ fn test_basic() { mock_prover.assert_satisfied_par(); } + +#[test] +fn test_work_example_decompression() { + use crate::decompression_circuit::DecompressionCircuit; + use halo2_proofs::{dev::MockProver, halo2curves::bn256::Fr}; + + let compressed: Vec = vec![ + // 0x28, 0xb5, 0x2f, 0xfd, // magic numbers are removed + 0x60, // Originally 0x64. unset the checksum bit. + 0xae, 0x02, // FrameContentSize + 0x0d, 0x11, 0x00, // BlockHeader + 0x76, 0x62, 0x5e, // ZstdBlockLiteralsHeader + 0x23, 0x30, 0x6f, 0x9b, 0x03, // ZstdBlockFseCode + // ZstdBlockHuffmanCode + 0x7d, 0xc7, 0x16, 0x0b, 0xbe, 0xc8, 0xf2, 0xd0, 0x22, 0x4b, 0x6b, 0xbc, 0x54, 0x5d, 0xa9, + 0xd4, 0x93, 0xef, 0xc4, 0x54, 0x96, 0xb2, 0xe2, 0xa8, 0xa8, 0x24, 0x1c, 0x54, 0x40, 0x29, + 0x01, // ZstdBlockJumpTable + 0x55, 0x00, 0x57, 0x00, 0x51, 0x00, // LStream1 + 0xcc, 0x51, 0x73, 0x3a, 0x85, 0x9e, 0xf7, 0x59, 0xfc, 0xc5, 0xca, 0x6a, 0x7a, 0xd9, 0x82, + 0x9c, 0x65, 0xc5, 0x45, 0x92, 0xe3, 0x0d, 0xf3, 0xef, 0x71, 0xee, 0xdc, 0xd5, 0xa2, 0xe3, + 0x48, 0xad, 0xa3, 0xbc, 0x41, 0x7a, 0x3c, 0xaa, 0xd6, 0xeb, 0xd0, 0x77, 0xea, 0xdc, 0x5d, + 0x41, 0x06, 0x50, 0x1c, 0x49, 0x0f, 0x07, 0x10, 0x05, 0x88, 0x84, 0x94, 0x02, 0xfc, 0x3c, + 0xe3, 0x60, 0x25, 0xc0, 0xcb, 0x0c, 0xb8, 0xa9, 0x73, 0xbc, 0x13, 0x77, 0xc6, 0xe2, 0x20, + 0xed, 0x17, 0x7b, 0x12, 0xdc, 0x24, 0x5a, 0xdf, 0xb4, 0x21, // LStream2 + 0x9a, 0xcb, 0x8f, 0xc7, 0x58, 0x54, 0x11, 0xa9, 0xf1, 0x47, 0x82, 0x9b, 0xba, 0x60, 0xb4, + 0x92, 0x28, 0x0e, 0xfb, 0x8b, 0x1e, 0x92, 0x23, 0x6a, 0xcf, 0xbf, 0xe5, 0x45, 0xb5, 0x7e, + 0xeb, 0x81, 0xf1, 0x78, 0x4b, 0xad, 0x17, 0x4d, 0x81, 0x9f, 0xbc, 0x67, 0xa7, 0x56, 0xee, + 0xb4, 0xd9, 0xe1, 0x95, 0x21, 0x66, 0x0c, 0x95, 0x83, 0x27, 0xde, 0xac, 0x37, 0x20, 0x91, + 0x22, 0x07, 0x0b, 0x91, 0x86, 0x94, 0x1a, 0x7b, 0xf6, 0x4c, 0xb0, 0xc0, 0xe8, 0x2e, 0x49, + 0x65, 0xd6, 0x34, 0x63, 0x0c, 0x88, 0x9b, 0x1c, 0x48, 0xca, 0x2b, 0x34, + // LStream3 + 0xa9, 0x6b, 0x99, 0x3b, 0xee, 0x13, 0x3b, 0x7c, 0x93, 0x0b, 0xf7, 0x0d, 0x49, 0x69, 0x18, + 0x57, 0xbe, 0x3b, 0x64, 0x45, 0x1d, 0x92, 0x63, 0x7f, 0xe8, 0xf9, 0xa1, 0x19, 0x7b, 0x7b, + 0x6e, 0xd8, 0xa3, 0x90, 0x23, 0x82, 0xf4, 0xa7, 0xce, 0xc8, 0xf8, 0x90, 0x15, 0xb3, 0x14, + 0xf4, 0x40, 0xe7, 0x02, 0x78, 0xd3, 0x17, 0x71, 0x23, 0xb1, 0x19, 0xad, 0x6b, 0x49, 0xae, + 0x13, 0xa4, 0x75, 0x38, 0x51, 0x47, 0x89, 0x67, 0xb0, 0x39, 0xb4, 0x53, 0x86, 0xa4, 0xac, + 0xaa, 0xa3, 0x34, 0x89, 0xca, 0x2e, // LStream4 + 0xe9, 0xc1, 0xfe, 0xf2, 0x51, 0xc6, 0x51, 0x73, 0xaa, 0xf7, 0x9d, 0x2d, 0xed, 0xd9, 0xb7, + 0x4a, 0xb2, 0xb2, 0x61, 0xe4, 0xef, 0x98, 0xf7, 0xc5, 0xef, 0x51, 0x9b, 0xd8, 0xdc, 0x60, + 0x6c, 0x41, 0x76, 0xaf, 0x78, 0x1a, 0x62, 0xb5, 0x4c, 0x1e, 0x21, 0x39, 0x9a, 0x5f, 0xac, + 0x9d, 0xe0, 0x62, 0xe8, 0xe9, 0x2f, 0x2f, 0x48, 0x02, 0x8d, 0x53, 0xc8, 0x91, 0xf2, 0x1a, + 0xd2, 0x7c, 0x0a, 0x7c, 0x48, 0xbf, 0xda, 0xa9, 0xe3, 0x38, 0xda, 0x34, 0xce, 0x76, 0xa9, + 0xda, 0x15, 0x91, 0xde, 0x21, 0xf5, 0x55, // Sequence Section + 0x46, 0xa8, 0x21, 0x9d, 0x51, 0xcc, 0x18, 0x42, 0x44, 0x81, 0x8c, 0x94, 0xb4, 0x50, 0x1e, + 0x20, 0x42, 0x82, 0x98, 0xc2, 0x3b, 0x10, 0x48, 0xec, 0xa6, 0x39, 0x63, 0x13, 0xa7, 0x01, + 0x94, 0x40, 0xff, 0x88, 0x0f, 0x98, 0x07, 0x4a, 0x46, 0x38, 0x05, 0xa9, 0xcb, 0xf6, 0xc8, + 0x21, 0x59, 0xaa, 0x38, 0x45, 0xbf, 0x5c, 0xf8, 0x55, 0x9e, 0x9f, 0x04, 0xed, 0xc8, 0x03, + 0x42, 0x2a, 0x4b, 0xf6, 0x78, 0x7e, 0x23, 0x67, 0x15, 0xa2, 0x79, 0x29, 0xf4, 0x9b, 0x7e, + 0x00, 0xbc, 0x2f, 0x46, 0x96, 0x99, 0xea, 0xf1, 0xee, 0x1c, 0x6e, 0x06, 0x9c, 0xdb, 0xe4, + 0x8c, 0xc2, 0x05, 0xf7, 0x54, 0x51, 0x84, 0xc0, 0x33, 0x02, 0x01, 0xb1, 0x8c, 0x80, 0xdc, + 0x99, 0x8f, 0xcb, 0x46, 0xff, 0xd1, 0x25, 0xb5, 0xb6, 0x3a, 0xf3, 0x25, 0xbe, 0x85, 0x50, + 0x84, 0xf5, 0x86, 0x5a, 0x71, 0xf7, 0xbd, 0xa1, 0x4c, 0x52, 0x4f, 0x20, 0xa3, 0x61, 0x23, + 0x77, 0x12, 0xd3, 0xb1, 0x58, 0x75, 0x22, 0x01, 0x12, 0x70, 0xec, 0x14, 0x91, 0xf9, 0x85, + 0x61, 0xd5, 0x7e, 0x98, 0x84, 0xc9, 0x76, 0x84, 0xbc, 0xb8, 0xfe, 0x4e, 0x53, 0xa5, 0x06, + 0x82, 0x14, 0x95, 0x51, + ]; + + let decompression_circuit = DecompressionCircuit:: { + compressed_frames: vec![compressed], + _data: Default::default(), + }; + + let mock_prover = MockProver::run(18, &decompression_circuit, vec![]); + + let mock_prover = mock_prover.unwrap(); + if let Err(errors) = mock_prover.verify_par() { + log::debug!("errors.len() = {}", errors.len()); + } + + mock_prover.assert_satisfied_par(); +} diff --git a/zkevm-circuits/src/table/decompression.rs b/zkevm-circuits/src/table/decompression.rs deleted file mode 100644 index a3a0f4ef76..0000000000 --- a/zkevm-circuits/src/table/decompression.rs +++ /dev/null @@ -1,2134 +0,0 @@ -//! Tables with constraints used for verification of zstd decoding from Huffman Codes and FSE -//! codes. - -use array_init::array_init; -use eth_types::Field; -use gadgets::{ - binary_number::{BinaryNumberChip, BinaryNumberConfig}, - comparator::{ComparatorChip, ComparatorConfig, ComparatorInstruction}, - impl_expr, - is_equal::{IsEqualChip, IsEqualConfig}, - util::{and, not, select, Expr}, -}; -use halo2_proofs::{ - circuit::{Layouter, Value}, - plonk::{Advice, Any, Column, ConstraintSystem, Error, Expression, Fixed, VirtualCells}, - poly::Rotation, -}; -use strum::IntoEnumIterator; -use strum_macros::EnumIter; - -use crate::{ - evm_circuit::util::constraint_builder::{BaseConstraintBuilder, ConstrainBuilderCommon}, - table::BitwiseOp, - witness::{ - FseAuxiliaryTableData, FseSymbol, HuffmanCodesData, TagRomTableRow, ZstdTag, N_BITS_SYMBOL, - N_MAX_SYMBOLS, - }, -}; - -use super::{BitwiseOpTable, LookupTable, Pow2Table, RangeTable, U8Table}; - -/// An auxiliary table used to ensure that the FSE table was reconstructed appropriately. Contrary -/// to the FseTable where the state is incremental, in the Auxiliary table we club together rows by -/// symbol. Which means, we will have rows with symbol s0 (and varying, but not necessarily -/// incremental states) clubbed together, followed by symbol s1 and so on. -/// -/// | State | Symbol | Baseline | Nb | Baseline Mark | -/// |-------|--------|----------|-----|---------------| -/// | 0x00 | s0 | ... | ... | 0 | -/// | 0x01 | s0 | ... | ... | 0 | -/// | 0x02 | s0 | ... | ... | 0 | -/// | ... | s0 | ... | ... | ... | -/// | 0x1d | s0 | ... | ... | 0 | -/// | 0x03 | s1 -> | 0x10 | ... | 0 | -/// | 0x0c | s1 -> | 0x18 | ... | 0 | -/// | 0x11 | s1 -> | 0x00 | ... | 1 | -/// | 0x15 | s1 -> | 0x04 | ... | 1 | -/// | 0x1a | s1 -> | 0x08 | ... | 1 | -/// | 0x1e | s1 -> | 0x0c | ... | 1 | -/// | 0x08 | s2 | ... | ... | 0 | -/// | ... | ... | ... | ... | 0 | -/// | 0x09 | s6 | ... | ... | 0 | -/// -/// Above is a representation of this table. Primarily we are interested in verifying that: -/// - next state (for the same symbol) was assigned correctly -/// - the number of times this symbol appears is assigned correctly -/// -/// For more details, refer the [FSE reconstruction][doclink] section. -/// -/// [doclink]: https://nigeltao.github.io/blog/2022/zstandard-part-5-fse.html#fse-reconstruction -#[derive(Clone, Debug)] -pub struct FseTable { - /// Fixed column to denote whether the constraints will be enabled or not. - pub q_enabled: Column, - /// The byte offset within the data instance where the encoded FSE table begins. This is - /// 1-indexed, i.e. byte_offset == 1 at the first byte. - pub byte_offset: Column, - /// Helper gadget to know when we are done handling a single canonical Huffman code. - pub byte_offset_cmp: ComparatorConfig, - /// The size of the FSE table that starts at byte_offset. - pub table_size: Column, - /// Helper column for (table_size >> 1). - pub table_size_rs_1: Column, - /// Helper column for (table_size >> 3). - pub table_size_rs_3: Column, - /// Incremental index. - pub idx: Column, - /// The symbol (weight) assigned to this state. - pub symbol: Column, - /// Helper gadget to know whether the symbol is the same or not. - pub symbol_eq: IsEqualConfig, - /// Represents the number of times this symbol appears in the FSE table. This value does not - /// change while the symbol in the table remains the same. - pub symbol_count: Column, - /// An accumulator that resets to 1 each time we encounter a new symbol in the Auxiliary table - /// and increments by 1 while the symbol remains the same. On the row where symbol' != symbol - /// we have: symbol_count == symbol_count_acc. - pub symbol_count_acc: Column, - /// The state in FSE. In the Auxiliary table, it does not increment by 1. Instead, it follows: - /// - state'' == state + table_size_rs_1 + table_size_rs_3 + 3 - /// - state' == state'' & (table_size - 1) - /// - /// where state' is the next row's state. - pub state: Column, - /// Denotes the baseline field. - pub baseline: Column, - /// Helper column to mark the baseline observed at the last occurence of a symbol. - pub last_baseline: Column, - /// The number of bits to be read from bitstream at this state. - pub nb: Column, - /// The smaller power of two assigned to this state. The following must hold: - /// - 2 ^ nb == SPoT. - pub spot: Column, - /// An accumulator over SPoT value. - pub spot_acc: Column, - /// Helper column to remember the smallest spot for that symbol. - pub smallest_spot: Column, - /// Helper boolean column which is set only from baseline == 0x00. - pub baseline_mark: Column, -} - -impl FseTable { - /// Construct the auxiliary table for FSE codes. - pub fn construct( - meta: &mut ConstraintSystem, - bitwise_op_table: BitwiseOpTable, - pow2_table: Pow2Table, - range_table: RangeTable<8>, - u8_table: U8Table, - ) -> Self { - let q_enabled = meta.fixed_column(); - let byte_offset = meta.advice_column(); - let symbol = meta.advice_column(); - let spot = meta.advice_column(); - let smallest_spot = meta.advice_column(); - let table = Self { - q_enabled, - byte_offset, - byte_offset_cmp: ComparatorChip::configure( - meta, - |meta| meta.query_fixed(q_enabled, Rotation::cur()), - |meta| meta.query_advice(byte_offset, Rotation::cur()), - |meta| meta.query_advice(byte_offset, Rotation::next()), - u8_table.into(), - ), - table_size: meta.advice_column(), - table_size_rs_1: meta.advice_column(), - table_size_rs_3: meta.advice_column(), - idx: meta.advice_column(), - symbol, - symbol_eq: IsEqualChip::configure( - meta, - |meta| meta.query_fixed(q_enabled, Rotation::cur()), - |meta| meta.query_advice(symbol, Rotation::cur()), - |meta| meta.query_advice(symbol, Rotation::next()), - ), - symbol_count: meta.advice_column(), - symbol_count_acc: meta.advice_column(), - state: meta.advice_column(), - baseline: meta.advice_column(), - last_baseline: meta.advice_column(), - nb: meta.advice_column(), - spot, - spot_acc: meta.advice_column(), - smallest_spot, - baseline_mark: meta.advice_column(), - }; - - // All rows. - meta.create_gate("FseAuxiliaryTable: all rows", |meta| { - let mut cb = BaseConstraintBuilder::default(); - - cb.require_boolean( - "baseline_mark == [0, 1]", - meta.query_advice(table.baseline_mark, Rotation::cur()), - ); - - let (gt, eq) = table.byte_offset_cmp.expr(meta, None); - cb.require_equal("byte offset is increasing", gt + eq, 1.expr()); - - cb.gate(meta.query_fixed(table.q_enabled, Rotation::cur())) - }); - - // Validate SPoT assignment: all rows. - meta.lookup_any("FseAuxiliaryTable: SPoT == 2 ^ Nb", |meta| { - let condition = meta.query_fixed(table.q_enabled, Rotation::cur()); - - [ - meta.query_advice(table.nb, Rotation::cur()), - meta.query_advice(table.spot, Rotation::cur()), - ] - .into_iter() - .zip(pow2_table.table_exprs(meta)) - .map(|(input, table)| (input * condition.clone(), table)) - .collect::>() - }); - - // Constraints while traversing an FSE table. - meta.create_gate("FseAuxiliaryTable: table size and helper columns", |meta| { - let mut cb = BaseConstraintBuilder::default(); - - // Table size, and the right-shifted helper values remain unchanged. - for col in [ - table.table_size, - table.table_size_rs_1, - table.table_size_rs_3, - ] { - cb.require_equal( - "while byte_offset' == byte_offset: table_size and helpers remain unchanged", - meta.query_advice(col, Rotation::next()), - meta.query_advice(col, Rotation::cur()), - ); - } - - // Index is incremental. - cb.require_equal( - "idx' == idx + 1", - meta.query_advice(table.idx, Rotation::next()), - meta.query_advice(table.idx, Rotation::cur()) + 1.expr(), - ); - - cb.require_boolean( - "symbol' == symbol or symbol' == symbol + 1", - meta.query_advice(table.symbol, Rotation::next()) - - meta.query_advice(table.symbol, Rotation::cur()), - ); - - let (_gt, eq) = table.byte_offset_cmp.expr(meta, None); - cb.gate(and::expr([ - meta.query_fixed(table.q_enabled, Rotation::cur()), - eq, - ])) - }); - - // Constraints for last row of an FSE table. - meta.create_gate("FseAuxiliaryTable: table shift right ops", |meta| { - let mut cb = BaseConstraintBuilder::default(); - - // Constraint for table_size >> 1. - cb.require_boolean( - "table_size >> 1", - meta.query_advice(table.table_size, Rotation::cur()) - - (meta.query_advice(table.table_size_rs_1, Rotation::cur()) * 2.expr()), - ); - - // Constraint for idx == table_size. - cb.require_equal( - "idx == table_size", - meta.query_advice(table.idx, Rotation::cur()), - meta.query_advice(table.table_size, Rotation::cur()), - ); - - let (gt, _eq) = table.byte_offset_cmp.expr(meta, None); - cb.gate(and::expr([ - meta.query_fixed(q_enabled, Rotation::cur()), - gt, - ])) - }); - - // Constraint for table_size >> 3. Only check on the last row. - meta.lookup("FseAuxiliaryTable: table shift right ops", |meta| { - let (gt, _eq) = table.byte_offset_cmp.expr(meta, None); - let condition = and::expr([meta.query_fixed(q_enabled, Rotation::cur()), gt]); - - let range_value = meta.query_advice(table.table_size, Rotation::cur()) - - (meta.query_advice(table.table_size_rs_3, Rotation::cur()) * 8.expr()); - - vec![(condition * range_value, range_table.into())] - }); - - // Constraint for state' calculation. We wish to constrain: - // - // - state' == state'' & (table_size - 1) - // - state'' == state + (table_size >> 3) + (table_size >> 1) + 3 - meta.lookup_any("FseAuxiliaryTable: next state computation", |meta| { - let (_gt, eq) = table.byte_offset_cmp.expr(meta, None); - let condition = and::expr([meta.query_fixed(table.q_enabled, Rotation::cur()), eq]); - - let lhs = meta.query_advice(table.state, Rotation::cur()) - + meta.query_advice(table.table_size_rs_3, Rotation::cur()) - + meta.query_advice(table.table_size_rs_1, Rotation::cur()) - + 3.expr(); - let rhs = meta.query_advice(table.table_size, Rotation::cur()) - 1.expr(); - let output = meta.query_advice(table.state, Rotation::next()); - - [BitwiseOp::AND.expr(), lhs, rhs, output] - .into_iter() - .zip(bitwise_op_table.table_exprs(meta)) - .map(|(input, table)| (input * condition.clone(), table)) - .collect::>() - }); - - // Constraints for same FSE table and same symbol. - meta.create_gate("FseAuxiliaryTable: symbol' == symbol", |meta| { - let mut cb = BaseConstraintBuilder::default(); - - // Symbol's count remains unchanged while symbol remained unchanged. - cb.require_equal( - "if symbol' == symbol: symbol_count' == symbol_count", - meta.query_advice(table.symbol_count, Rotation::next()), - meta.query_advice(table.symbol_count, Rotation::cur()), - ); - - // SPoT at baseline == 0x00 remains unchanged over these rows. - cb.require_equal( - "if symbol' == symbol: smallest SPoT is unchanged", - meta.query_advice(table.smallest_spot, Rotation::next()), - meta.query_advice(table.smallest_spot, Rotation::cur()), - ); - - // last baseline remains unchanged over these rows. - cb.require_equal( - "if symbol' == symbol: last baseline is unchanged", - meta.query_advice(table.last_baseline, Rotation::next()), - meta.query_advice(table.last_baseline, Rotation::cur()), - ); - - // Symbol count accumulator increments. - cb.require_equal( - "if symbol' == symbol: symbol count accumulator increments", - meta.query_advice(table.symbol_count_acc, Rotation::next()), - meta.query_advice(table.symbol_count_acc, Rotation::cur()) + 1.expr(), - ); - - // SPoT accumulation. - cb.require_equal( - "SPoT_acc::next == SPoT_acc::cur + SPoT::next", - meta.query_advice(table.spot_acc, Rotation::next()), - meta.query_advice(table.spot_acc, Rotation::cur()) - + meta.query_advice(table.spot, Rotation::next()), - ); - - // baseline_mark can only transition from 0 to 1 once. - cb.require_boolean( - "baseline_mark transition", - meta.query_advice(table.baseline_mark, Rotation::next()) - - meta.query_advice(table.baseline_mark, Rotation::cur()), - ); - - let is_next_baseline_0x00 = meta.query_advice(table.baseline_mark, Rotation::next()) - - meta.query_advice(table.baseline_mark, Rotation::cur()); - cb.condition(is_next_baseline_0x00.expr(), |cb| { - cb.require_equal( - "baseline::next == 0x00", - meta.query_advice(table.baseline, Rotation::next()), - 0x00.expr(), - ); - }); - cb.condition(not::expr(is_next_baseline_0x00.expr()), |cb| { - cb.require_equal( - "baseline::next == baseline::cur + spot::cur", - meta.query_advice(table.baseline, Rotation::next()), - meta.query_advice(table.baseline, Rotation::cur()) - + meta.query_advice(table.spot, Rotation::cur()), - ); - }); - - let (_gt, eq) = table.byte_offset_cmp.expr(meta, None); - cb.gate(and::expr([ - meta.query_fixed(table.q_enabled, Rotation::cur()), - eq, - table.symbol_eq.expr(), - ])) - }); - - // Constraints when symbol changes in an FSE table, i.e. symbol' != symbol. - meta.create_gate("FseAuxiliaryTable: symbol' != symbol", |meta| { - let mut cb = BaseConstraintBuilder::default(); - - // Constraint for idx == table_size. - cb.require_equal( - "symbol_count_acc == symbol_count", - meta.query_advice(table.symbol_count_acc, Rotation::cur()), - meta.query_advice(table.symbol_count, Rotation::cur()), - ); - - // SPoT accumulator == table_size at the end of processing the symbol. - cb.require_equal( - "SPoT_acc == table_size", - meta.query_advice(table.spot_acc, Rotation::cur()), - meta.query_advice(table.table_size, Rotation::cur()), - ); - - // The SPoT at baseline == 0x00 matches this SPoT. - cb.require_equal( - "last symbol occurrence => SPoT == SPoT at baseline 0x00", - meta.query_advice(table.smallest_spot, Rotation::cur()), - meta.query_advice(table.spot, Rotation::cur()), - ); - - // last baseline matches. - cb.require_equal( - "baseline == last_baseline", - meta.query_advice(table.baseline, Rotation::cur()), - meta.query_advice(table.last_baseline, Rotation::cur()), - ); - - cb.gate(and::expr([ - meta.query_fixed(q_enabled, Rotation::cur()), - not::expr(table.symbol_eq.expr()), - ])) - }); - - // Constraints for the first occurence of a particular symbol in the table. - meta.create_gate("FseAuxiliaryTable: new symbol", |meta| { - let mut cb = BaseConstraintBuilder::default(); - - let is_baseline_marked = meta.query_advice(table.baseline_mark, Rotation::cur()); - cb.condition(is_baseline_marked.expr(), |cb| { - cb.require_equal( - "baseline == 0x00", - meta.query_advice(table.baseline, Rotation::cur()), - 0x00.expr(), - ); - }); - - cb.condition(not::expr(is_baseline_marked.expr()), |cb| { - cb.require_equal( - "baseline == last_baseline + smallest_spot", - meta.query_advice(table.baseline, Rotation::cur()), - meta.query_advice(table.last_baseline, Rotation::cur()) - + meta.query_advice(table.smallest_spot, Rotation::cur()), - ); - }); - - let symbol_prev = meta.query_advice(table.symbol, Rotation::prev()); - let symbol_cur = meta.query_advice(table.symbol, Rotation::cur()); - cb.gate(and::expr([ - meta.query_fixed(table.q_enabled, Rotation::cur()), - not::expr( - table - .symbol_eq - .expr_at(meta, Rotation::prev(), symbol_prev, symbol_cur), - ), - ])) - }); - - debug_assert!(meta.degree() <= 9); - - table - } - - /// Load witness. - pub fn dev_load( - &self, - layouter: &mut impl Layouter, - data: Vec, - ) -> Result<(), Error> { - layouter.assign_region( - || "FseAuxiliaryTable: dev load", - |mut region| { - let mut offset = 0; - for table in data.iter() { - let byte_offset = Value::known(F::from(table.byte_offset)); - let table_size = Value::known(F::from(table.table_size)); - let table_size_rs_1 = Value::known(F::from(table.table_size >> 1)); - let table_size_rs_3 = Value::known(F::from(table.table_size >> 3)); - for (&symbol, rows) in table.sym_to_states.iter() { - let symbol_count = rows.len() as u64; - let smallest_spot = rows - .iter() - .map(|fse_row| 1 << fse_row.num_bits) - .min() - .expect("symbol should have at least 1 row"); - let spot_acc_iter = rows.iter().scan(0, |spot_acc, fse_row| { - *spot_acc += 1 << fse_row.num_bits; - Some(*spot_acc) - }); - // TODO: byte_offset_cmp - // TODO: symbol_eq - // TODO: baseline_mark - // TODO: last_baseline - // TODO: q_enabled - for (i, (fse_row, spot_acc)) in rows.iter().zip(spot_acc_iter).enumerate() { - for (annotation, col, value) in [ - ("byte_offset", self.byte_offset, byte_offset), - ("table_size", self.table_size, table_size), - ("table_size_rs_1", self.table_size_rs_1, table_size_rs_1), - ("table_size_rs_3", self.table_size_rs_3, table_size_rs_3), - ("symbol", self.symbol, Value::known(F::from(symbol as u64))), - ( - "symbol_count", - self.symbol_count, - Value::known(F::from(symbol_count)), - ), - ( - "symbol_count_acc", - self.symbol_count_acc, - Value::known(F::from(i as u64 + 1)), - ), - ("state", self.state, Value::known(F::from(fse_row.state))), - ( - "baseline", - self.baseline, - Value::known(F::from(fse_row.baseline)), - ), - ("nb", self.nb, Value::known(F::from(fse_row.num_bits))), - ( - "spot", - self.spot, - Value::known(F::from(1 << fse_row.num_bits)), - ), - ( - "smallest_spot", - self.smallest_spot, - Value::known(F::from(smallest_spot)), - ), - ("spot_acc", self.spot_acc, Value::known(F::from(spot_acc))), - ("idx", self.idx, Value::known(F::from(fse_row.idx))), - ] { - region.assign_advice( - || format!("FseAuxiliaryTable: {}", annotation), - col, - offset, - || value, - )?; - } - offset += 1; - } - } - } - - Ok(()) - }, - ) - } -} - -impl FseTable { - /// Lookup table expressions for (state, symbol) tuple check. - pub fn table_exprs_state_check(&self, meta: &mut VirtualCells) -> Vec> { - vec![ - meta.query_advice(self.byte_offset, Rotation::cur()), - meta.query_advice(self.table_size, Rotation::cur()), - meta.query_advice(self.state, Rotation::cur()), - meta.query_advice(self.symbol, Rotation::cur()), - meta.query_advice(self.baseline, Rotation::cur()), - meta.query_advice(self.nb, Rotation::cur()), - ] - } - - /// Lookup table expressions for (symbol, symbol_count) tuple check. - pub fn table_exprs_symbol_count_check(&self, meta: &mut VirtualCells) -> Vec> { - vec![ - meta.query_advice(self.byte_offset, Rotation::cur()), - meta.query_advice(self.table_size, Rotation::cur()), - meta.query_advice(self.symbol, Rotation::cur()), - meta.query_advice(self.symbol_count, Rotation::cur()), - ] - } -} - -/// The Huffman codes table maps the canonical weights (symbols as per FseTable) to the Huffman -/// codes. -#[derive(Clone, Debug)] -pub struct HuffmanCodesTable { - /// Fixed column to denote whether the constraints will be enabled or not. - pub q_enabled: Column, - /// Fixed column to mark the first row in the table. - pub q_first: Column, - /// Set when this is the start of a new huffman code. - pub is_start: Column, - /// The byte offset within the data instance where the encoded FSE table begins. This is - /// 1-indexed, i.e. byte_offset == 1 at the first byte. - pub byte_offset: Column, - /// Helper gadget to know when we are done handling a single canonical Huffman code. - pub byte_offset_cmp: ComparatorConfig, - /// The byte that is being encoded by a Huffman code. - pub symbol: Column, - /// The weight assigned to this symbol as per the canonical Huffman code weights. - pub weight: Column, - /// A binary representation of the weight's value. - pub weight_bits: BinaryNumberConfig, - /// An accumulator over the weight values. - pub weight_acc: Column, - /// Helper column to denote 2 ^ (weight - 1). - pub pow2_weight: Column, - /// The sum of canonical Huffman code weights. This value does not change over the rows for a - /// specific Huffman code. - pub sum_weights: Column, - /// The maximum length of a bitstring as per this Huffman code. Again, this value does not - /// change over the rows for a specific Huffman code. - pub max_bitstring_len: Column, - /// As per Huffman coding, every symbol is mapped to a bit value, which is then represented in - /// binary form (padded) of length bitstring_len. - pub bit_value: Column, - /// The last seen bit_value for each symbol in this Huffman coding. - pub last_bit_values: [Column; N_MAX_SYMBOLS], - /// The last_bit_values assigned at the first row of a table. - pub first_lbvs: [Column; N_MAX_SYMBOLS], -} - -impl HuffmanCodesTable { - /// Construct the huffman codes table. - pub fn construct( - meta: &mut ConstraintSystem, - pow2_table: Pow2Table, - u8_table: U8Table, - ) -> Self { - let q_enabled = meta.fixed_column(); - let byte_offset = meta.advice_column(); - let weight = meta.advice_column(); - let table = Self { - q_enabled, - q_first: meta.fixed_column(), - byte_offset, - byte_offset_cmp: ComparatorChip::configure( - meta, - |meta| meta.query_fixed(q_enabled, Rotation::cur()), - |meta| meta.query_advice(byte_offset, Rotation::cur()), - |meta| meta.query_advice(byte_offset, Rotation::next()), - u8_table.into(), - ), - is_start: meta.advice_column(), - symbol: meta.advice_column(), - weight, - weight_bits: BinaryNumberChip::configure(meta, q_enabled, Some(weight.into())), - pow2_weight: meta.advice_column(), - weight_acc: meta.advice_column(), - sum_weights: meta.advice_column(), - max_bitstring_len: meta.advice_column(), - bit_value: meta.advice_column(), - last_bit_values: array_init(|_| meta.advice_column()), - first_lbvs: array_init(|_| meta.advice_column()), - }; - - // TODO: constrain is_start - - // All rows - meta.create_gate("HuffmanCodesTable: all rows", |meta| { - let mut cb = BaseConstraintBuilder::default(); - - let (gt, eq) = table.byte_offset_cmp.expr(meta, None); - cb.require_equal("byte_offset' >= byte_offset", gt + eq, 1.expr()); - - // Weight == 0 implies the bit value is 0. - cb.condition( - table - .weight_bits - .value_equals(FseSymbol::S0, Rotation::cur())(meta), - |cb| { - cb.require_zero( - "bit value == 0", - meta.query_advice(table.bit_value, Rotation::cur()), - ); - }, - ); - - // Last bit value at weight == 0 is also 0. - cb.require_zero( - "last_bit_values[0] == 0", - meta.query_advice( - table.last_bit_values[FseSymbol::S0 as usize], - Rotation::cur(), - ), - ); - - cb.gate(meta.query_fixed(table.q_enabled, Rotation::cur())) - }); - - // The first row of the HuffmanCodesTable. - meta.create_gate("HuffmanCodesTable: first (fixed) row", |meta| { - let mut cb = BaseConstraintBuilder::default(); - - // Canonical Huffman code starts with the weight of the first symbol, i.e. 0x00. - cb.require_equal( - "symbol == 0x00", - meta.query_advice(table.symbol, Rotation::cur()), - 0x00.expr(), - ); - - // Weight accumulation starts with the first weight. - cb.require_equal( - "weight_acc == 2^(weight - 1)", - meta.query_advice(table.weight_acc, Rotation::cur()), - meta.query_advice(table.pow2_weight, Rotation::cur()), - ); - - // Constrain the last bit_value of the maximum bitstring length. Maximum bitstring - // length implies weight == 1. - cb.require_zero( - "if first row: last_bit_values[1] == 0", - meta.query_advice( - table.last_bit_values[FseSymbol::S1 as usize], - Rotation::cur(), - ), - ); - - // Do an equality check for the last_bit_values at the first row. - for i in FseSymbol::iter() { - cb.require_equal( - "last bit value at the first row equality check", - meta.query_advice(table.last_bit_values[i as usize], Rotation::cur()), - meta.query_advice(table.first_lbvs[i as usize], Rotation::cur()), - ); - } - - cb.gate(and::expr([ - meta.query_fixed(table.q_enabled, Rotation::cur()), - meta.query_fixed(table.q_first, Rotation::cur()), - ])) - }); - - // While we are processing the weights of a particular canonical Huffman code - // representation, i.e. byte_offset == byte_offset'. - meta.create_gate( - "HuffmanCodesTable: traversing a canonical huffman coding table", - |meta| { - let mut cb = BaseConstraintBuilder::default(); - - // Sum of weights remains the same across all rows. - cb.require_equal( - "sum_weights' == sum_weights", - meta.query_advice(table.sum_weights, Rotation::next()), - meta.query_advice(table.sum_weights, Rotation::cur()), - ); - - // Maximum bitstring length remains the same across all rows. - cb.require_equal( - "max_bitstring_len' == max_bitstring_len", - meta.query_advice(table.max_bitstring_len, Rotation::next()), - meta.query_advice(table.max_bitstring_len, Rotation::cur()), - ); - - // The first row's last_bit_values remain the same. - for col in table.first_lbvs { - cb.require_equal( - "first_lbvs[i]' == first_lbvs[i]", - meta.query_advice(col, Rotation::next()), - meta.query_advice(col, Rotation::cur()), - ); - } - - // Weight accumulation is assigned correctly. - cb.require_equal( - "weight_acc' == weight_acc + 2^(weight - 1)", - meta.query_advice(table.weight_acc, Rotation::next()), - meta.query_advice(table.weight_acc, Rotation::cur()) - + meta.query_advice(table.pow2_weight, Rotation::next()), - ); - - // pow2_weight is assigned correctly for weight == 0. - cb.condition( - table - .weight_bits - .value_equals(FseSymbol::S0, Rotation::cur())(meta), - |cb| { - cb.require_zero( - "pow2_weight == 0 if weight == 0", - meta.query_advice(table.pow2_weight, Rotation::cur()), - ); - }, - ); - - // For all rows (except the first row of a canonical Huffman code representation, we - // want to ensure the last_bit_values was assigned correctly. - let is_start = meta.query_advice(table.is_start, Rotation::cur()); - cb.condition(not::expr(is_start.expr()), |cb| { - for (symbol, &last_bit_value) in - FseSymbol::iter().zip(table.last_bit_values.iter()) - { - cb.require_equal( - "last_bit_value_i::cur == last_bit_value::prev + (weight::cur == i)", - meta.query_advice(last_bit_value, Rotation::cur()), - meta.query_advice(last_bit_value, Rotation::prev()) - + table.weight_bits.value_equals(symbol, Rotation::cur())(meta), - ); - } - }); - - let (_gt, eq) = table.byte_offset_cmp.expr(meta, None); - cb.gate(and::expr([ - meta.query_fixed(table.q_enabled, Rotation::cur()), - eq, - ])) - }, - ); - - // For every row, we want the pow2_weight column to be assigned correctly. We want: - // - // pow2_weight == 2^(weight - 1). - // - // Note that this is valid only if weight > 0. For weight == 0, we want pow2_weight == 0. - meta.lookup_any("HuffmanCodesTable: pow2_weight assignment", |meta| { - let condition = and::expr([ - meta.query_fixed(table.q_enabled, Rotation::cur()), - not::expr(table - .weight_bits - .value_equals(FseSymbol::S0, Rotation::cur())( - meta - )), - // TODO: add padding column. - ]); - - let exponent = meta.query_advice(table.weight, Rotation::cur()) - 1.expr(); - let exponentiation = meta.query_advice(table.pow2_weight, Rotation::cur()); - - [exponent, exponentiation] - .into_iter() - .zip(pow2_table.table_exprs(meta)) - .map(|(input, table)| (input * condition.clone(), table)) - .collect::>() - }); - - // When we end processing a huffman code, i.e. the byte_offset changes. No need to check if - // the next row is padding or not. - meta.create_gate("HuffmanCodesTable: end of huffman code", |meta| { - let mut cb = BaseConstraintBuilder::default(); - - // The total sum of weights is in fact the accumulated weight. - cb.require_equal( - "sum_weights == weight_acc", - meta.query_advice(table.sum_weights, Rotation::cur()), - meta.query_advice(table.weight_acc, Rotation::cur()), - ); - - // We want to check the following: - // - // if lbv_1: The last bit_value for weight i on the first row. - // if lbv_2: The last bit_value for weight i+1 on the last row. - // - // then lbv_2 == (lbv_1 + 1) // 2 - // i.e. lbv_2 * 2 - lbv_1 is boolean. - // - // Note: we only do this check for weight > 0, hence we skip the FseSymbol::S0. - for i in [ - FseSymbol::S1, - FseSymbol::S2, - FseSymbol::S3, - FseSymbol::S4, - FseSymbol::S5, - FseSymbol::S6, - ] { - let i = i as usize; - let lbv_1 = meta.query_advice(table.first_lbvs[i], Rotation::cur()); - let lbv_2 = meta.query_advice(table.last_bit_values[i + 1], Rotation::cur()); - cb.require_boolean( - "last bit value check for weights i and i+1 on the first and last rows", - lbv_2 * 2.expr() - lbv_1, - ); - } - - let (gt, _eq) = table.byte_offset_cmp.expr(meta, None); - cb.gate(and::expr([ - meta.query_fixed(table.q_enabled, Rotation::cur()), - gt, - ])) - }); - - // The weight for the last symbol is assigned appropriately. The weight for the last - // symbol should satisfy: - // - // last_weight == log2(nearest_pow2 - sum_weights) + 1 - // where nearest_pow2 is the nearest power of 2 greater than the sum of weights so far. - // - // i.e. 2^(last_weight - 1) + sum_weights == 2^(max_bitstring_len) - meta.lookup_any("HuffmanCodesTable: weight of the last symbol", |meta| { - let (gt, _eq) = table.byte_offset_cmp.expr(meta, None); - let condition = and::expr([meta.query_fixed(table.q_enabled, Rotation::cur()), gt]); - - let exponent = meta.query_advice(table.max_bitstring_len, Rotation::cur()); - let exponentiation = meta.query_advice(table.pow2_weight, Rotation::cur()) - + meta.query_advice(table.sum_weights, Rotation::prev()); - - [exponent, exponentiation] - .into_iter() - .zip(pow2_table.table_exprs(meta)) - .map(|(input, table)| (input * condition.clone(), table)) - .collect::>() - }); - - // When we transition from one Huffman code to another, i.e. the byte_offset changes. We - // also check that the next row is not a padding row. - // - // TODO: add the padding column. - meta.create_gate("HuffmanCodesTable: new huffman code", |meta| { - let mut cb = BaseConstraintBuilder::default(); - - // Marks the start of a new huffman code. - cb.require_equal( - "is_start == 1", - meta.query_advice(table.is_start, Rotation::next()), - 1.expr(), - ); - - // Canonical Huffman code starts with the weight of the first symbol, i.e. 0x00. - cb.require_equal( - "symbol == 0x00", - meta.query_advice(table.symbol, Rotation::next()), - 0x00.expr(), - ); - - // Weight accumulation starts with the first weight. - cb.require_equal( - "weight_acc == 2^(weight - 1)", - meta.query_advice(table.weight_acc, Rotation::next()), - meta.query_advice(table.pow2_weight, Rotation::next()), - ); - - // Constrain the last bit_value of the maximum bitstring length. Maximum bitstring - // length implies weight == 1. - cb.require_zero( - "if first row: last_bit_values[1] == 0", - meta.query_advice( - table.last_bit_values[FseSymbol::S1 as usize], - Rotation::next(), - ), - ); - - // Do an equality check for the last_bit_values at the first row. - for i in FseSymbol::iter() { - cb.require_equal( - "last bit value at the first row equality check", - meta.query_advice(table.last_bit_values[i as usize], Rotation::next()), - meta.query_advice(table.first_lbvs[i as usize], Rotation::next()), - ); - } - - let (gt, _eq) = table.byte_offset_cmp.expr(meta, None); - cb.gate(and::expr([ - meta.query_fixed(table.q_enabled, Rotation::cur()), - meta.query_fixed(table.q_enabled, Rotation::next()), - gt, - ])) - }); - - debug_assert!(meta.degree() <= 9); - - table - } - - /// Load witness to the huffman codes table: dev mode. - pub fn dev_load( - &self, - layouter: &mut impl Layouter, - data: Vec, - ) -> Result<(), Error> { - layouter.assign_region( - || "HuffmanCodesTable: dev load", - |mut region| { - let weight_bits = BinaryNumberChip::construct(self.weight_bits); - let mut offset = 0; - for code in data.iter() { - let byte_offset = Value::known(F::from(code.byte_offset)); - let (max_bitstring_len, sym_map) = code.parse_canonical(); - let max_bitstring_len = Value::known(F::from(max_bitstring_len)); - let sum_weights = Value::known(F::from( - sym_map - .values() - .map(|(weight, _bit_value)| weight) - .sum::(), - )); - let weight_acc_iter = sym_map.values().scan(0, |acc, (weight, _bit_value)| { - *acc += weight; - Some(*acc) - }); - - for (i, weight_acc) in weight_acc_iter.enumerate() { - region.assign_advice( - || "HuffmanCodesTable: weight_acc", - self.weight_acc, - offset + i, - || Value::known(F::from(weight_acc)), - )?; - } - for (&symbol, &(weight, bit_value)) in sym_map.iter() { - for (annotation, column, value) in [ - ("byte_offset", self.byte_offset, byte_offset), - ( - "max_bitstring_len", - self.max_bitstring_len, - max_bitstring_len, - ), - ("sum_weights", self.sum_weights, sum_weights), - ("symbol", self.symbol, Value::known(F::from(symbol))), - ("weight", self.weight, Value::known(F::from(weight))), - ( - "bit_value", - self.bit_value, - Value::known(F::from(bit_value)), - ), - ( - "pow2_weight", - self.pow2_weight, - Value::known(F::from(if weight > 0 { - (weight - 1).pow(2) - } else { - 0 - })), - ), - ] { - region.assign_advice( - || format!("HuffmanCodesTable: {annotation}"), - column, - offset, - || value, - )?; - } - let fse_symbol: FseSymbol = (weight as usize).into(); - weight_bits.assign(&mut region, offset, &fse_symbol)?; - - offset += 1; - } - - // TODO: assign last_bit_values - } - - // Assign the byte offset comparison gadget. - let cmp_chip = ComparatorChip::construct(self.byte_offset_cmp.clone()); - offset = 0; - - // if there is a single table. - if data.len() == 1 { - let byte_offset = data[0].byte_offset; - let n_rows = data[0].weights.len() + 1; - for _ in 0..n_rows - 1 { - cmp_chip.assign( - &mut region, - offset, - F::from(byte_offset), - F::from(byte_offset), - )?; - offset += 1; - } - cmp_chip.assign(&mut region, offset, F::from(byte_offset), F::zero())?; - } - - // if there are multiple tables. - if data.len() > 1 { - for window in data.windows(2) { - let byte_offset_1 = window[0].byte_offset; - let byte_offset_2 = window[1].byte_offset; - let n_rows = window[0].weights.len() + 1; - for _ in 0..n_rows - 1 { - cmp_chip.assign( - &mut region, - offset, - F::from(byte_offset_1), - F::from(byte_offset_1), - )?; - offset += 1; - } - cmp_chip.assign( - &mut region, - offset, - F::from(byte_offset_1), - F::from(byte_offset_2), - )?; - offset += 1; - } - // handle the last table. - if let Some(last_table) = data.last() { - let byte_offset = last_table.byte_offset; - let n_rows = last_table.weights.len() + 1; - for _ in 0..n_rows - 1 { - cmp_chip.assign( - &mut region, - offset, - F::from(byte_offset), - F::from(byte_offset), - )?; - offset += 1; - } - cmp_chip.assign(&mut region, offset, F::from(byte_offset), F::zero())?; - } - } - - Ok(()) - }, - ) - } -} - -impl HuffmanCodesTable { - /// Lookup the canonical weight assigned to a symbol in the Huffman code with the header at - /// the given byte_offset. - pub fn table_exprs_canonical_weight(&self, meta: &mut VirtualCells) -> Vec> { - vec![ - meta.query_advice(self.byte_offset, Rotation::cur()), - meta.query_advice(self.symbol, Rotation::cur()), - meta.query_advice(self.weight, Rotation::cur()), - ] - } - - /// Lookup the number of symbols that are present in the canonical representation of the - /// Huffman code. - pub fn table_exprs_weights_count(&self, meta: &mut VirtualCells) -> Vec> { - vec![ - meta.query_advice(self.byte_offset, Rotation::cur()), - meta.query_advice(self.symbol, Rotation::cur()), - // TODO: add is_last to mark the last row of a specific Huffman code. - ] - } -} - -/// An auxiliary table for the Huffman Codes. In Huffman coding a symbol (byte) is mapped to a -/// bitstring of particular length such that more frequently occuring symbols are mapped to -/// bitstrings of smaller lengths. -/// -/// We already have the symbols and their bit_value in the HuffmanCodesTable. However, we still -/// need to validate that the bit_value is in fact assigned correctly. Since bitstrings may not be -/// byte-aligned, i.e. a bitstring can span over 2 bytes (assuming a maximum bitstring length of 8) -/// we need to make sure that the bit_value is in fact the binary value represented by the bits of -/// that bitstring. -#[derive(Clone, Debug)] -pub struct BitstringAccumulationTable { - /// Fixed column to denote whether the constraints will be enabled or not. - pub q_enabled: Column, - /// The byte offset within the data instance where the encoded FSE table begins. This is - /// 1-indexed, i.e. byte_offset == 1 at the first byte. - pub byte_offset: Column, - /// The byte offset of byte_1 in the zstd encoded data. byte_idx' == byte_idx - /// while 0 <= bit_index < 15. At bit_index == 15, byte_idx' == byte_idx + 1. - pub byte_idx_1: Column, - /// The byte offset of byte_2 in the zstd encoded data. byte_idx' == byte_idx - /// while 0 <= bit_index < 15. At bit_index == 15, byte_idx' == byte_idx + 1. - /// - /// We also have byte_idx_2 == byte_idx_1 + 1. - pub byte_idx_2: Column, - /// The byte value at byte_idx_1. - pub byte_1: Column, - /// The byte value at byte_idx_2. - pub byte_2: Column, - /// The index within these 2 bytes, i.e. 0 <= bit_index <= 15. bit_index increments until its - /// 15 and then is reset to 0. Repeats while we finish bitstring accumulation of all bitstrings - /// used in the Huffman codes. - pub bit_index: Column, - /// Helper column to know the start of a new chunk of 2 bytes, this is a fixed column as well - /// as it is set only on bit_index == 0. - pub q_first: Column, - /// The bit at bit_index. Accumulation of bits from 0 <= bit_index <= 7 denotes byte_1. - /// Accumulation of 8 <= bit_index <= 15 denotes byte_2. - pub bit: Column, - /// The final value of the bit accumulation for the set bits. - pub bit_value: Column, - /// The length of the bitstring, i.e. the number of bits that were set. - pub bitstring_len: Column, - /// The accumulator over bits from is_start to is_end, i.e. while is_set == 1. - pub bit_value_acc: Column, - /// Boolean that is set from start of bit chunk to bit_index == 15. - pub from_start: Column, - /// Boolean that is set from bit_index == 0 to end of bit chunk. - pub until_end: Column, - /// Boolean to mark if the bitstring is a part of bytes that are read from front-to-back or - /// back-to-front. For the back-to-front case, the is_reverse boolean is set. - pub is_reverse: Column, -} - -impl BitstringAccumulationTable { - /// Construct the bitstring accumulation table. - pub fn construct(meta: &mut ConstraintSystem) -> Self { - let q_enabled = meta.fixed_column(); - let table = Self { - q_enabled, - byte_offset: meta.advice_column(), - byte_idx_1: meta.advice_column(), - byte_idx_2: meta.advice_column(), - byte_1: meta.advice_column(), - byte_2: meta.advice_column(), - bit_index: meta.fixed_column(), - q_first: meta.fixed_column(), - bit: meta.advice_column(), - bit_value: meta.advice_column(), - bitstring_len: meta.advice_column(), - bit_value_acc: meta.advice_column(), - from_start: meta.advice_column(), - until_end: meta.advice_column(), - is_reverse: meta.advice_column(), - }; - - meta.create_gate("BitstringAccumulationTable: bit_index == 0", |meta| { - let mut cb = BaseConstraintBuilder::default(); - - let bits = (0..16) - .map(|i| meta.query_advice(table.bit, Rotation(i))) - .collect::>>(); - - cb.require_equal( - "byte1 is the binary accumulation of 0 <= bit_index <= 7", - meta.query_advice(table.byte_1, Rotation::cur()), - select::expr( - meta.query_advice(table.is_reverse, Rotation::cur()), - bits[7].expr() - + bits[6].expr() * 2.expr() - + bits[5].expr() * 4.expr() - + bits[4].expr() * 8.expr() - + bits[3].expr() * 16.expr() - + bits[2].expr() * 32.expr() - + bits[1].expr() * 64.expr() - + bits[0].expr() * 128.expr(), - bits[0].expr() - + bits[1].expr() * 2.expr() - + bits[2].expr() * 4.expr() - + bits[3].expr() * 8.expr() - + bits[4].expr() * 16.expr() - + bits[5].expr() * 32.expr() - + bits[6].expr() * 64.expr() - + bits[7].expr() * 128.expr(), - ), - ); - - cb.require_equal( - "byte2 is the binary accumulation of 8 <= bit_index <= 15", - meta.query_advice(table.byte_2, Rotation::cur()), - select::expr( - meta.query_advice(table.is_reverse, Rotation::cur()), - bits[15].expr() - + bits[14].expr() * 2.expr() - + bits[13].expr() * 4.expr() - + bits[12].expr() * 8.expr() - + bits[11].expr() * 16.expr() - + bits[10].expr() * 32.expr() - + bits[9].expr() * 64.expr() - + bits[8].expr() * 128.expr(), - bits[8].expr() - + bits[9].expr() * 2.expr() - + bits[10].expr() * 4.expr() - + bits[11].expr() * 8.expr() - + bits[12].expr() * 16.expr() - + bits[13].expr() * 32.expr() - + bits[14].expr() * 64.expr() - + bits[15].expr() * 128.expr(), - ), - ); - - cb.require_equal( - "byte2 follows byte2, i.e. byte_idx_2 == byte_idx_1 + 1", - meta.query_advice(table.byte_idx_2, Rotation::cur()), - meta.query_advice(table.byte_idx_1, Rotation::cur()) + 1.expr(), - ); - - cb.require_boolean( - "is_reverse is boolean", - meta.query_advice(table.is_reverse, Rotation::cur()), - ); - - cb.gate(and::expr([ - meta.query_fixed(table.q_enabled, Rotation::cur()), - meta.query_fixed(table.q_first, Rotation::cur()), - ])) - }); - - debug_assert!(meta.degree() <= 9); - - meta.create_gate("BitstringAccumulationTable: bit_index > 0", |meta| { - let mut cb = BaseConstraintBuilder::default(); - - // Constrain columns that are unchanged from 0 < bit_idx < 16. - for col in [ - table.byte_offset, - table.byte_idx_1, - table.byte_idx_2, - table.byte_1, - table.byte_2, - table.bit_value, - table.is_reverse, - ] { - cb.require_equal( - "unchanged columns from 0 < bit_idx < 16", - meta.query_advice(col, Rotation::cur()), - meta.query_advice(col, Rotation::prev()), - ); - } - - cb.gate(and::expr([ - meta.query_fixed(table.q_enabled, Rotation::cur()), - not::expr(meta.query_fixed(table.q_first, Rotation::cur())), - ])) - }); - - debug_assert!(meta.degree() <= 9); - - // Consider a bit chunk from bit_index == 4 to bit_index == 9. We will have: - // - // | bit index | from start | until end | bitstring len | bit | bit value acc | - // |-----------|------------|-----------|---------------|-----|---------------| - // | 0 | 1 | 0 | 0 | 0 | 0 | - // | 1 | 1 | 0 | 0 | 0 | 0 | - // | 2 | 1 | 0 | 0 | 1 | 0 | - // | 3 | 1 | 0 | 0 | 0 | 0 | - // | 4 -> | 1 | 1 | 1 | 1 | 1 | - // | 5 -> | 1 | 1 | 2 | 0 | 1 | - // | 6 -> | 1 | 1 | 3 | 1 | 5 | - // | 7 -> | 1 | 1 | 4 | 1 | 13 | - // | 8 -> | 1 | 1 | 5 | 0 | 13 | - // | 9 -> | 1 | 1 | 6 | 1 | 45 | - // | 10 | 0 | 1 | 6 | 0 | 45 | - // | 11 | 0 | 1 | 6 | 0 | 45 | - // | 12 | 0 | 1 | 6 | 0 | 45 | - // | 13 | 0 | 1 | 6 | 1 | 45 | - // | 14 | 0 | 1 | 6 | 1 | 45 | - // | 15 | 0 | 1 | 6 | 0 | 45 | - // - // The bits for the bitstring are where from_start == until_end == 1. - meta.create_gate("BitstringAccumulationTable: bit value", |meta| { - let mut cb = BaseConstraintBuilder::default(); - - // Columns from_start and until_end are boolean. - cb.require_boolean( - "from_start is boolean", - meta.query_advice(table.from_start, Rotation::cur()), - ); - cb.require_boolean( - "until_end is boolean", - meta.query_advice(table.until_end, Rotation::cur()), - ); - - // Column from_start transitions from 1 to 0 only once. - let is_first = meta.query_fixed(table.q_first, Rotation::cur()); - cb.condition(is_first.expr(), |cb| { - cb.require_equal( - "if q_first == True: from_start == 1", - meta.query_advice(table.from_start, Rotation::cur()), - 1.expr(), - ); - }); - cb.condition(not::expr(is_first.expr()), |cb| { - cb.require_boolean( - "from_start transitions from 1 to 0 only once", - meta.query_advice(table.from_start, Rotation::prev()) - - meta.query_advice(table.from_start, Rotation::cur()), - ); - }); - - // Column until_end transitions from 0 to 1 only once. - let is_last = meta.query_fixed(table.q_first, Rotation::next()); - cb.condition(is_last.expr(), |cb| { - cb.require_equal( - "if q_first::next == True: until_end == 1", - meta.query_advice(table.until_end, Rotation::cur()), - 1.expr(), - ); - }); - cb.condition(not::expr(is_last.expr()), |cb| { - cb.require_boolean( - "until_end transitions from 0 to 1 only once", - meta.query_advice(table.until_end, Rotation::next()) - - meta.query_advice(table.until_end, Rotation::cur()), - ); - }); - - // Constraints at meaningful bits. - let is_set = and::expr([ - meta.query_advice(table.from_start, Rotation::cur()), - meta.query_advice(table.until_end, Rotation::cur()), - ]); - cb.condition(is_first.expr() * is_set.expr(), |cb| { - cb.require_equal( - "if is_first && is_set: bit == bit_value_acc", - meta.query_advice(table.bit, Rotation::cur()), - meta.query_advice(table.bit_value_acc, Rotation::cur()), - ); - cb.require_equal( - "if is_first && is_set: bitstring_len == 1", - meta.query_advice(table.bitstring_len, Rotation::cur()), - 1.expr(), - ); - }); - cb.condition(not::expr(is_first) * is_set, |cb| { - cb.require_equal( - "is_set: bit_value_acc == bit_value_acc::prev * 2 + bit", - meta.query_advice(table.bit_value_acc, Rotation::cur()), - meta.query_advice(table.bit_value_acc, Rotation::prev()) * 2.expr() - + meta.query_advice(table.bit, Rotation::cur()), - ); - cb.require_equal( - "is_set: bitstring_len == bitstring_len::prev + 1", - meta.query_advice(table.bitstring_len, Rotation::cur()), - meta.query_advice(table.bitstring_len, Rotation::prev()) + 1.expr(), - ); - }); - - // Constraints at bits to be ignored (at the start). - let is_ignored = not::expr(meta.query_advice(table.until_end, Rotation::cur())); - cb.condition(is_ignored, |cb| { - cb.require_zero( - "while until_end == 0: bitstring_len == 0", - meta.query_advice(table.bitstring_len, Rotation::cur()), - ); - cb.require_zero( - "while until_end == 0: bit_value_acc == 0", - meta.query_advice(table.bit_value_acc, Rotation::cur()), - ); - }); - - // Constraints at bits to be ignored (towards the end). - let is_ignored = not::expr(meta.query_advice(table.from_start, Rotation::cur())); - cb.condition(is_ignored, |cb| { - cb.require_equal( - "bitstring_len unchanged at the last ignored bits", - meta.query_advice(table.bitstring_len, Rotation::cur()), - meta.query_advice(table.bitstring_len, Rotation::prev()), - ); - cb.require_equal( - "bit_value_acc unchanged at the last ignored bits", - meta.query_advice(table.bit_value_acc, Rotation::cur()), - meta.query_advice(table.bit_value_acc, Rotation::prev()), - ); - }); - - cb.gate(meta.query_fixed(table.q_enabled, Rotation::cur())) - }); - - debug_assert!(meta.degree() <= 9); - - table - } - - /// Load witness to the table: dev mode. - pub fn dev_load(&self, _layouter: &mut impl Layouter) -> Result<(), Error> { - // TODO - - Ok(()) - } -} - -impl BitstringAccumulationTable { - /// Lookup table expressions for a bitsteam completely contained within the bits of a single - /// byte in the encoded data. - pub fn table_exprs_contained( - &self, - meta: &mut VirtualCells, - ) -> Vec> { - vec![ - meta.query_advice(self.byte_offset, Rotation::cur()), - meta.query_advice(self.byte_idx_1, Rotation::cur()), - meta.query_advice(self.byte_1, Rotation::cur()), - meta.query_advice(self.bit_value, Rotation::cur()), - meta.query_advice(self.bitstring_len, Rotation::cur()), - meta.query_fixed(self.bit_index, Rotation::cur()), - meta.query_advice(self.from_start, Rotation::cur()), - meta.query_advice(self.until_end, Rotation::cur()), - meta.query_advice(self.is_reverse, Rotation::cur()), - ] - } - - /// Lookup table expressions for a bitstream that spans over 2 consequtive bytes in the - /// encoded data. - pub fn table_exprs_spanned(&self, meta: &mut VirtualCells) -> Vec> { - vec![ - meta.query_advice(self.byte_offset, Rotation::cur()), - meta.query_advice(self.byte_idx_1, Rotation::cur()), - meta.query_advice(self.byte_idx_2, Rotation::cur()), - meta.query_advice(self.byte_1, Rotation::cur()), - meta.query_advice(self.byte_2, Rotation::cur()), - meta.query_advice(self.bit_value, Rotation::cur()), - meta.query_advice(self.bitstring_len, Rotation::cur()), - meta.query_fixed(self.bit_index, Rotation::cur()), - meta.query_advice(self.from_start, Rotation::cur()), - meta.query_advice(self.until_end, Rotation::cur()), - meta.query_advice(self.is_reverse, Rotation::cur()), - ] - } -} - -/// Different branches that can be taken while calculating regenerated size and compressed size in -/// the Literals Header. -#[derive(Clone, Copy, Debug, EnumIter)] -pub enum LiteralsHeaderBranch { - /// Raw/RLE block type with size_format 00 or 10. - RawRle0, - /// Raw/RLE block type with size format 10. - RawRle1, - /// Raw/RLE block type with size format 11. - RawRle2, - /// Compressed block type with size format 00 or 01. - Compressed0, - /// Compressed block type with size format 10. - Compressed1, - /// Compressed block type with size format 11. - Compressed2, -} - -impl_expr!(LiteralsHeaderBranch); - -impl From for usize { - fn from(value: LiteralsHeaderBranch) -> Self { - value as usize - } -} - -/// Helper table to calculate regenerated and compressed size from the Literals Header. -#[derive(Clone, Debug)] -pub struct LiteralsHeaderTable { - /// Whether to enable. - pub q_enable: Column, - /// Byte offset at which this literals header is located. - pub byte_offset: Column, - /// The branch taken for this literals header. - pub branch: Column, - /// To identify the branch. - pub branch_bits: BinaryNumberConfig, - /// The first byte of the literals header. - pub byte0: Column, - /// The second byte. - pub byte1: Column, - /// The third byte. - pub byte2: Column, - /// The fourth byte. - pub byte3: Column, - /// The fifth byte. - pub byte4: Column, - /// byte0 >> 3. - pub byte0_rs_3: Column, - /// byte0 >> 4. - pub byte0_rs_4: Column, - /// byte1 >> 6. - pub byte1_rs_6: Column, - /// byte1 & 0b111111. - pub byte1_and_63: Column, - /// byte2 >> 2. - pub byte2_rs_2: Column, - /// byte2 >> 6. - pub byte2_rs_6: Column, - /// byte2 & 0b11. - pub byte2_and_3: Column, - /// byte2 & 0b111111. - pub byte2_and_63: Column, - /// Regenerated size. - pub regen_size: Column, - /// Compressed size. - pub compr_size: Column, -} - -impl LiteralsHeaderTable { - /// Construct and constrain the literals header table. - pub fn construct( - meta: &mut ConstraintSystem, - bitwise_op_table: BitwiseOpTable, - range4: RangeTable<4>, - range8: RangeTable<8>, - range16: RangeTable<16>, - range64: RangeTable<64>, - ) -> Self { - let q_enable = meta.fixed_column(); - let branch = meta.advice_column(); - let table = Self { - q_enable, - byte_offset: meta.advice_column(), - branch, - branch_bits: BinaryNumberChip::configure(meta, q_enable, Some(branch.into())), - byte0: meta.advice_column(), - byte1: meta.advice_column(), - byte2: meta.advice_column(), - byte3: meta.advice_column(), - byte4: meta.advice_column(), - byte0_rs_3: meta.advice_column(), - byte0_rs_4: meta.advice_column(), - byte1_rs_6: meta.advice_column(), - byte1_and_63: meta.advice_column(), - byte2_rs_2: meta.advice_column(), - byte2_rs_6: meta.advice_column(), - byte2_and_3: meta.advice_column(), - byte2_and_63: meta.advice_column(), - regen_size: meta.advice_column(), - compr_size: meta.advice_column(), - }; - - macro_rules! is_branch { - ($var:ident, $branch_variant:ident) => { - let $var = |meta: &mut VirtualCells| { - table - .branch_bits - .value_equals(LiteralsHeaderBranch::$branch_variant, Rotation::cur())( - meta - ) - }; - }; - } - - is_branch!(branch0, RawRle0); - is_branch!(branch1, RawRle1); - is_branch!(branch2, RawRle2); - is_branch!(branch3, Compressed0); - is_branch!(branch4, Compressed1); - is_branch!(branch5, Compressed2); - - meta.create_gate("LiteralsHeaderTable", |meta| { - let mut cb = BaseConstraintBuilder::default(); - - let byte0_rs_3 = meta.query_advice(table.byte0_rs_3, Rotation::cur()); - let byte0_rs_4 = meta.query_advice(table.byte0_rs_4, Rotation::cur()); - let byte1_ls_4 = meta.query_advice(table.byte1, Rotation::cur()) * 16.expr(); - let byte1_and_63_ls_4 = - meta.query_advice(table.byte1_and_63, Rotation::cur()) * 16.expr(); - let byte1_rs_6 = meta.query_advice(table.byte1_rs_6, Rotation::cur()); - let byte2_rs_2 = meta.query_advice(table.byte2_rs_2, Rotation::cur()); - let byte2_rs_6 = meta.query_advice(table.byte2_rs_6, Rotation::cur()); - let byte2_ls_2 = meta.query_advice(table.byte2, Rotation::cur()) * 4.expr(); - let byte2_ls_12 = meta.query_advice(table.byte2, Rotation::cur()) * 4096.expr(); - let byte2_and_3_ls_12 = - meta.query_advice(table.byte2_and_3, Rotation::cur()) * 4096.expr(); - let byte2_and_63_ls_12 = - meta.query_advice(table.byte2_and_63, Rotation::cur()) * 4096.expr(); - let byte3_ls_6 = meta.query_advice(table.byte3, Rotation::cur()) * 64.expr(); - let byte3_ls_2 = meta.query_advice(table.byte3, Rotation::cur()) * 4.expr(); - let byte4_ls_10 = meta.query_advice(table.byte4, Rotation::cur()) * 1024.expr(); - - // regen_size == lh_byte[0] >> 3. - // compr_size == 0. - cb.condition(branch0(meta), |cb| { - cb.require_equal( - "branch0: regenerated size", - meta.query_advice(table.regen_size, Rotation::cur()), - byte0_rs_3, - ); - cb.require_zero( - "branch0: compressed size", - meta.query_advice(table.compr_size, Rotation::cur()), - ); - for col in [table.byte1, table.byte2, table.byte3, table.byte4] { - cb.require_zero("byte[i] == 0", meta.query_advice(col, Rotation::cur())); - } - }); - - // regen_size == (lh_byte[0] >> 4) + (lh_byte[1] << 4). - // compr_size == 0. - cb.condition(branch1(meta), |cb| { - cb.require_equal( - "branch1: regenerated size", - meta.query_advice(table.regen_size, Rotation::cur()), - byte0_rs_4.expr() + byte1_ls_4.expr(), - ); - cb.require_zero( - "branch1: compressed size", - meta.query_advice(table.compr_size, Rotation::cur()), - ); - for col in [table.byte2, table.byte3, table.byte4] { - cb.require_zero("byte[i] == 0", meta.query_advice(col, Rotation::cur())); - } - }); - - // regen_size == (lh_byte[0] >> 4) + (lh_byte[1] << 4) + (lh_byte[2] << 12). - // compr_size == 0. - cb.condition(branch2(meta), |cb| { - cb.require_equal( - "branch2: regenerated size", - meta.query_advice(table.regen_size, Rotation::cur()), - byte0_rs_4.expr() + byte1_ls_4.expr() + byte2_ls_12, - ); - cb.require_zero( - "branch2: compressed size", - meta.query_advice(table.compr_size, Rotation::cur()), - ); - for col in [table.byte3, table.byte4] { - cb.require_zero("byte[i] == 0", meta.query_advice(col, Rotation::cur())); - } - }); - - // regen_size == (lh_byte[0] >> 4) + ((lh_byte[1] & 0b111111) << 4). - // compr_size == (lh_byte[1] >> 6) + (lh_byte[2] << 2). - cb.condition(branch3(meta), |cb| { - cb.require_equal( - "branch3: regenerated size", - meta.query_advice(table.regen_size, Rotation::cur()), - byte0_rs_4.expr() + byte1_and_63_ls_4, - ); - cb.require_equal( - "branch3: compressed size", - meta.query_advice(table.compr_size, Rotation::cur()), - byte1_rs_6 + byte2_ls_2.expr(), - ); - for col in [table.byte3, table.byte4] { - cb.require_zero("byte[i] == 0", meta.query_advice(col, Rotation::cur())); - } - }); - - // regen_size == (lh_byte[0] >> 4) + (lh_byte[1] << 4) + ((lh_byte[2] & 0b11) << 12). - // compr_size == (lh_byte[2] >> 2) + (lh_byte[3] << 6). - cb.condition(branch4(meta), |cb| { - cb.require_equal( - "branch4: regenerated size", - meta.query_advice(table.regen_size, Rotation::cur()), - byte0_rs_4.expr() + byte1_ls_4.expr() + byte2_and_3_ls_12, - ); - cb.require_equal( - "branch4: compressed size", - meta.query_advice(table.compr_size, Rotation::cur()), - byte2_rs_2 + byte3_ls_6, - ); - cb.require_zero( - "byte[i] == 0", - meta.query_advice(table.byte4, Rotation::cur()), - ); - }); - - // regen_size == (lh_byte[0] >> 4) + (lh_byte[1] << 4) + ((lh_byte[2] & 0b111111) << - // 12). compr_size == (lh_byte[2] >> 6) + (lh_byte[3] << 2) + (lh_byte[4] << - // 10). - cb.condition(branch5(meta), |cb| { - cb.require_equal( - "branch5: regenerated size", - meta.query_advice(table.regen_size, Rotation::cur()), - byte0_rs_4 + byte1_ls_4 + byte2_and_63_ls_12, - ); - cb.require_equal( - "branch5: compressed size", - meta.query_advice(table.compr_size, Rotation::cur()), - byte2_rs_6 + byte3_ls_2 + byte4_ls_10, - ); - }); - - cb.gate(meta.query_fixed(table.q_enable, Rotation::cur())) - }); - meta.lookup("LiteralsHeaderTable: byte0 >> 3", |meta| { - let condition = meta.query_fixed(table.q_enable, Rotation::cur()); - let range_value = meta.query_advice(table.byte0, Rotation::cur()) - - (meta.query_advice(table.byte0_rs_3, Rotation::cur()) * 8.expr()); - - vec![(condition * range_value, range8.into())] - }); - meta.lookup("LiteralsHeaderTable: byte0 >> 4", |meta| { - let condition = meta.query_fixed(table.q_enable, Rotation::cur()); - let range_value = meta.query_advice(table.byte0, Rotation::cur()) - - (meta.query_advice(table.byte0_rs_4, Rotation::cur()) * 16.expr()); - - vec![(condition * range_value, range16.into())] - }); - meta.lookup("LiteralsHeaderTable: byte1 >> 6", |meta| { - let condition = meta.query_fixed(table.q_enable, Rotation::cur()); - let range_value = meta.query_advice(table.byte1, Rotation::cur()) - - (meta.query_advice(table.byte1_rs_6, Rotation::cur()) * 64.expr()); - - vec![(condition * range_value, range64.into())] - }); - meta.lookup("LiteralsHeaderTable: byte2 >> 2", |meta| { - let condition = meta.query_fixed(table.q_enable, Rotation::cur()); - let range_value = meta.query_advice(table.byte2, Rotation::cur()) - - (meta.query_advice(table.byte2_rs_2, Rotation::cur()) * 4.expr()); - - vec![(condition * range_value, range4.into())] - }); - meta.lookup("LiteralsHeaderTable: byte2 >> 6", |meta| { - let condition = meta.query_fixed(table.q_enable, Rotation::cur()); - let range_value = meta.query_advice(table.byte2, Rotation::cur()) - - (meta.query_advice(table.byte2_rs_6, Rotation::cur()) * 64.expr()); - - vec![(condition * range_value, range64.into())] - }); - meta.lookup_any("LiteralsHeaderTable: byte1 & 63", |meta| { - let condition = and::expr([ - meta.query_fixed(table.q_enable, Rotation::cur()), - not::expr(branch0(meta)), - ]); - [ - BitwiseOp::AND.expr(), - meta.query_advice(table.byte1, Rotation::cur()), - 63.expr(), - meta.query_advice(table.byte1_and_63, Rotation::cur()), - ] - .into_iter() - .zip(bitwise_op_table.table_exprs(meta)) - .map(|(input, table)| (input * condition.clone(), table)) - .collect::>() - }); - meta.lookup_any("LiteralsHeaderTable: byte2 & 3", |meta| { - let condition = meta.query_fixed(table.q_enable, Rotation::cur()); - [ - BitwiseOp::AND.expr(), - meta.query_advice(table.byte2, Rotation::cur()), - 3.expr(), - meta.query_advice(table.byte2_and_3, Rotation::cur()), - ] - .into_iter() - .zip(bitwise_op_table.table_exprs(meta)) - .map(|(input, table)| (input * condition.clone(), table)) - .collect::>() - }); - meta.lookup_any("LiteralsHeaderTable: byte2 & 63", |meta| { - let condition = meta.query_fixed(table.q_enable, Rotation::cur()); - [ - BitwiseOp::AND.expr(), - meta.query_advice(table.byte2, Rotation::cur()), - 63.expr(), - meta.query_advice(table.byte2_and_63, Rotation::cur()), - ] - .into_iter() - .zip(bitwise_op_table.table_exprs(meta)) - .map(|(input, table)| (input * condition.clone(), table)) - .collect::>() - }); - - debug_assert!(meta.degree() <= 9); - - table - } - - /// Assign witness to the literals header table. - pub fn dev_load( - &self, - layouter: &mut impl Layouter, - literals_headers: Vec>, - ) -> Result<(), Error> { - layouter.assign_region( - || "LiteralsHeaderTable", - |_region| { - for (_i, header) in literals_headers.iter().enumerate() { - let _n_bytes_header = header.len(); - // TODO: make the appropriate assignment. - } - Ok(()) - }, - ) - } -} - -impl LookupTable for LiteralsHeaderTable { - fn columns(&self) -> Vec> { - vec![ - self.byte_offset.into(), - self.branch.into(), - self.byte0.into(), - self.byte1.into(), - self.byte2.into(), - self.byte3.into(), - self.byte4.into(), - self.regen_size.into(), - self.compr_size.into(), - ] - } - - fn annotations(&self) -> Vec { - vec![ - String::from("byte_offset"), - String::from("branch"), - String::from("byte0"), - String::from("byte1"), - String::from("byte2"), - String::from("byte3"), - String::from("byte4"), - String::from("regen_size"), - String::from("compr_size"), - ] - } -} - -/// Read-only Memory table for the Decompression circuit. This table allows us a lookup argument -/// from the Decompression circuit to check if a given row can occur depending on the row's tag, -/// next tag and tag length. -#[derive(Clone, Copy, Debug)] -pub struct TagRomTable { - /// Tag of the current field being decoded. - pub tag: Column, - /// Tag of the following field when the current field is finished decoding. - pub tag_next: Column, - /// The maximum length in terms of number of bytes that the current tag can take up. - pub max_len: Column, - /// Whether this tag outputs a decoded byte or not. - pub is_output: Column, - /// Whether this tag belongs to a ``block`` in zstd or not. - pub is_block: Column, - /// Whether this tag is processed back-to-front, i.e. in reverse order. - pub is_reverse: Column, -} - -impl LookupTable for TagRomTable { - fn columns(&self) -> Vec> { - vec![ - self.tag.into(), - self.tag_next.into(), - self.max_len.into(), - self.is_output.into(), - self.is_block.into(), - self.is_reverse.into(), - ] - } - - fn annotations(&self) -> Vec { - vec![ - String::from("tag"), - String::from("tag_next"), - String::from("max_len"), - String::from("is_output"), - String::from("is_block"), - String::from("is_reverse"), - ] - } -} - -impl TagRomTable { - /// Construct the ROM table. - pub fn construct(meta: &mut ConstraintSystem) -> Self { - Self { - tag: meta.fixed_column(), - tag_next: meta.fixed_column(), - max_len: meta.fixed_column(), - is_output: meta.fixed_column(), - is_block: meta.fixed_column(), - is_reverse: meta.fixed_column(), - } - } - - /// Load the ROM table. - pub fn load(&self, layouter: &mut impl Layouter) -> Result<(), Error> { - layouter.assign_region( - || "Zstd ROM table", - |mut region| { - for (offset, row) in TagRomTableRow::rows().iter().enumerate() { - for (&column, (value, annotation)) in - >::fixed_columns(self).iter().zip( - row.values::() - .into_iter() - .zip(>::annotations(self).iter()), - ) - { - region.assign_fixed( - || format!("{annotation} at offset={offset}"), - column, - offset, - || value, - )?; - } - } - - Ok(()) - }, - ) - } -} - -/// Read-only Memory table for the Decompression circuit. This table allows us a lookup argument -/// from the Decompression circuit to check if the next tag is correct based on which block type we -/// have encountered in the block header. Block type is denoted by 2 bits in the block header. -#[derive(Clone, Copy, Debug)] -pub struct BlockTypeRomTable { - /// Current tag. - tag: Column, - /// Lower bit. - lo_bit: Column, - /// Higher bit. - hi_bit: Column, - /// Tag that follows. - tag_next: Column, -} - -impl LookupTable for BlockTypeRomTable { - fn columns(&self) -> Vec> { - vec![ - self.tag.into(), - self.lo_bit.into(), - self.hi_bit.into(), - self.tag_next.into(), - ] - } - - fn annotations(&self) -> Vec { - vec![ - String::from("tag"), - String::from("lo_bit"), - String::from("hi_bit"), - String::from("tag_next"), - ] - } -} - -impl BlockTypeRomTable { - /// Construct the ROM table. - pub fn construct(meta: &mut ConstraintSystem) -> Self { - Self { - tag: meta.fixed_column(), - lo_bit: meta.fixed_column(), - hi_bit: meta.fixed_column(), - tag_next: meta.fixed_column(), - } - } - - /// Load the ROM table. - pub fn load(&self, layouter: &mut impl Layouter) -> Result<(), Error> { - layouter.assign_region( - || "Zstd BlockType ROM table", - |mut region| { - for (i, &(tag, lo_bit, hi_bit, tag_next)) in [ - (ZstdTag::BlockHeader, 0, 0, ZstdTag::RawBlockBytes), - (ZstdTag::BlockHeader, 0, 1, ZstdTag::RleBlockBytes), - (ZstdTag::BlockHeader, 1, 0, ZstdTag::ZstdBlockLiteralsHeader), - ( - ZstdTag::ZstdBlockLiteralsHeader, - 0, - 0, - ZstdTag::ZstdBlockLiteralsRawBytes, - ), - ( - ZstdTag::ZstdBlockLiteralsHeader, - 0, - 1, - ZstdTag::ZstdBlockLiteralsRleBytes, - ), - ( - ZstdTag::ZstdBlockLiteralsHeader, - 1, - 0, - ZstdTag::ZstdBlockHuffmanCode, - ), - ] - .iter() - .enumerate() - { - region.assign_fixed( - || "tag", - self.tag, - i, - || Value::known(F::from(tag as u64)), - )?; - region.assign_fixed( - || "lo_bit", - self.lo_bit, - i, - || Value::known(F::from(lo_bit)), - )?; - region.assign_fixed( - || "hi_bit", - self.hi_bit, - i, - || Value::known(F::from(hi_bit)), - )?; - region.assign_fixed( - || "tag_next", - self.tag_next, - i, - || Value::known(F::from(tag_next as u64)), - )?; - } - Ok(()) - }, - ) - } -} - -/// Read-only memory table for zstd block's literals header. -#[derive(Clone, Copy, Debug)] -pub struct LiteralsHeaderRomTable { - /// Block type first bit. - block_type_bit0: Column, - /// Block type second bit. - block_type_bit1: Column, - /// Size format first bit. - size_format_bit0: Column, - /// Size format second bit. - size_format_bit1: Column, - /// Number of bytes occupied by the literals header. - n_bytes_header: Column, - /// Number of literal streams to be decoded. - n_lstreams: Column, - /// The branch we take to decompose the literals header. There are a total of 7 branches that - /// can be used to decompose the literals header, namely: - /// - /// - block_type == Raw/RLE and size_format == 00 or 10 - /// - block_type == Raw/RLE and size_format == 01 - /// - block_type == Raw/RLE and size_format == 11 - /// - block_type == Compressed and size_format == 00 - /// - block_type == Compressed and size_format == 01 - /// - block_type == Compressed and size_format == 10 - /// - block_type == Compressed and size_format == 11 - branch: Column, - // size format == 0b11? - is_size_format_0b11: Column, -} - -impl LookupTable for LiteralsHeaderRomTable { - fn columns(&self) -> Vec> { - vec![ - self.block_type_bit0.into(), - self.block_type_bit1.into(), - self.size_format_bit0.into(), - self.size_format_bit1.into(), - self.n_bytes_header.into(), - self.n_lstreams.into(), - self.branch.into(), - self.is_size_format_0b11.into(), - ] - } - - fn annotations(&self) -> Vec { - vec![ - String::from("block_type_bit0"), - String::from("block_type_bit1"), - String::from("size_format_bit0"), - String::from("size_format_bit1"), - String::from("n_bytes_header"), - String::from("n_lstreams"), - String::from("branch"), - String::from("is_size_format_0b11"), - ] - } -} - -impl LiteralsHeaderRomTable { - /// Construct the ROM table. - pub fn construct(meta: &mut ConstraintSystem) -> Self { - Self { - block_type_bit0: meta.fixed_column(), - block_type_bit1: meta.fixed_column(), - size_format_bit0: meta.fixed_column(), - size_format_bit1: meta.fixed_column(), - n_bytes_header: meta.fixed_column(), - n_lstreams: meta.fixed_column(), - branch: meta.fixed_column(), - is_size_format_0b11: meta.fixed_column(), - } - } - - /// Load the ROM table. - pub fn load(&self, layouter: &mut impl Layouter) -> Result<(), Error> { - layouter.assign_region( - || "LiteralsHeader ROM table", - |mut region| { - // Refer: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#literals_section_header - for (i, row) in [ - [0, 0, 0, 0, 1, 1, 0, 0], // Raw: 1 byte header - [0, 0, 0, 1, 1, 1, 0, 0], // Raw: 1 byte header - [0, 0, 1, 0, 2, 1, 1, 0], // Raw: 2 bytes header - [0, 0, 1, 1, 3, 1, 2, 1], // Raw: 3 bytes header - [1, 0, 0, 0, 1, 1, 0, 0], // RLE: 1 byte header - [1, 0, 0, 1, 1, 1, 0, 0], // RLE: 1 byte header - [1, 0, 1, 0, 2, 1, 1, 0], // RLE: 2 bytes header - [1, 0, 1, 1, 3, 1, 2, 1], // RLE: 3 bytes header - [0, 1, 0, 0, 3, 1, 3, 0], // Compressed: 3 bytes header - [0, 1, 1, 0, 3, 4, 4, 0], // Compressed: 3 bytes header - [0, 1, 0, 1, 4, 4, 5, 0], // Compressed: 4 bytes header - [0, 1, 1, 1, 5, 4, 6, 1], // Compressed: 5 bytes header - ] - .iter() - .enumerate() - { - for (&column, (&value, annotation)) in - >::fixed_columns(self).iter().zip( - row.iter() - .zip(>::annotations(self).iter()), - ) - { - region.assign_fixed( - || format!("{annotation} at offset={i}"), - column, - i, - || Value::known(F::from(value)), - )?; - } - } - - Ok(()) - }, - ) - } -} diff --git a/zkevm-circuits/src/table/decompression/bitstring_accumulation_table.rs b/zkevm-circuits/src/table/decompression/bitstring_accumulation_table.rs new file mode 100644 index 0000000000..0dee39192f --- /dev/null +++ b/zkevm-circuits/src/table/decompression/bitstring_accumulation_table.rs @@ -0,0 +1,557 @@ +use eth_types::Field; +use gadgets::util::{and, not, select, Expr}; +use halo2_proofs::{ + circuit::{Layouter, Value}, + plonk::{Advice, Any, Column, ConstraintSystem, Error, Expression, Fixed}, + poly::Rotation, +}; + +use crate::{ + evm_circuit::util::constraint_builder::{BaseConstraintBuilder, ConstrainBuilderCommon}, + table::LookupTable, + witness::{value_bits_le, ZstdTag, ZstdWitnessRow}, +}; + +/// An auxiliary table for the Huffman Codes. In Huffman coding a symbol (byte) is mapped to a +/// bitstring of particular length such that more frequently occuring symbols are mapped to +/// bitstrings of smaller lengths. +/// +/// We already have the symbols and their bit_value in the HuffmanCodesTable. However, we still +/// need to validate that the bit_value is in fact assigned correctly. Since bitstrings may not be +/// byte-aligned, i.e. a bitstring can span over 2 bytes (assuming a maximum bitstring length of 8) +/// we need to make sure that the bit_value is in fact the binary value represented by the bits of +/// that bitstring. +#[derive(Clone, Debug)] +pub struct BitstringAccumulationTable { + /// Fixed column to denote whether the constraints will be enabled or not. + pub q_enabled: Column, + /// The byte offset within the data instance where the encoded FSE table begins. This is + /// 1-indexed, i.e. byte_offset == 1 at the first byte. + pub byte_offset: Column, + /// The byte offset of byte_1 in the zstd encoded data. byte_idx' == byte_idx + /// while 0 <= bit_index < 15. At bit_index == 15, byte_idx' == byte_idx + 1. + pub byte_idx_1: Column, + /// The byte offset of byte_2 in the zstd encoded data. byte_idx' == byte_idx + /// while 0 <= bit_index < 15. At bit_index == 15, byte_idx' == byte_idx + 1. + /// + /// We also have byte_idx_2 == byte_idx_1 + 1. + pub byte_idx_2: Column, + /// The byte value at byte_idx_1. + pub byte_1: Column, + /// The byte value at byte_idx_2. + pub byte_2: Column, + /// The index within these 2 bytes, i.e. 0 <= bit_index <= 15. bit_index increments until its + /// 15 and then is reset to 0. Repeats while we finish bitstring accumulation of all bitstrings + /// used in the Huffman codes. + pub bit_index: Column, + /// Helper column to know the start of a new chunk of 2 bytes, this is a fixed column as well + /// as it is set only on bit_index == 0. + pub q_first: Column, + /// The bit at bit_index. Accumulation of bits from 0 <= bit_index <= 7 denotes byte_1. + /// Accumulation of 8 <= bit_index <= 15 denotes byte_2. + pub bit: Column, + /// The final value of the bit accumulation for the set bits. + pub bit_value: Column, + /// The length of the bitstring, i.e. the number of bits that were set. + pub bitstring_len: Column, + /// The accumulator over bits from is_start to is_end, i.e. while is_set == 1. + pub bit_value_acc: Column, + /// Boolean that is set from start of bit chunk to bit_index == 15. + pub from_start: Column, + /// Boolean that is set from bit_index == 0 to end of bit chunk. + pub until_end: Column, + /// Boolean to mark if the bitstring is a part of bytes that are read from front-to-back or + /// back-to-front. For the back-to-front case, the is_reverse boolean is set. + pub is_reverse: Column, +} + +impl BitstringAccumulationTable { + /// Construct the bitstring accumulation table. + pub fn construct(meta: &mut ConstraintSystem) -> Self { + let q_enabled = meta.fixed_column(); + let table = Self { + q_enabled, + byte_offset: meta.advice_column(), + byte_idx_1: meta.advice_column(), + byte_idx_2: meta.advice_column(), + byte_1: meta.advice_column(), + byte_2: meta.advice_column(), + bit_index: meta.fixed_column(), + q_first: meta.fixed_column(), + bit: meta.advice_column(), + bit_value: meta.advice_column(), + bitstring_len: meta.advice_column(), + bit_value_acc: meta.advice_column(), + from_start: meta.advice_column(), + until_end: meta.advice_column(), + is_reverse: meta.advice_column(), + }; + + meta.create_gate("BitstringAccumulationTable: bit_index == 0", |meta| { + let mut cb = BaseConstraintBuilder::default(); + + let bits = (0..16) + .map(|i| meta.query_advice(table.bit, Rotation(i))) + .collect::>>(); + + cb.require_equal( + "byte1 is the binary accumulation of 0 <= bit_index <= 7", + meta.query_advice(table.byte_1, Rotation::cur()), + select::expr( + meta.query_advice(table.is_reverse, Rotation::cur()), + bits[7].expr() + + bits[6].expr() * 2.expr() + + bits[5].expr() * 4.expr() + + bits[4].expr() * 8.expr() + + bits[3].expr() * 16.expr() + + bits[2].expr() * 32.expr() + + bits[1].expr() * 64.expr() + + bits[0].expr() * 128.expr(), + bits[0].expr() + + bits[1].expr() * 2.expr() + + bits[2].expr() * 4.expr() + + bits[3].expr() * 8.expr() + + bits[4].expr() * 16.expr() + + bits[5].expr() * 32.expr() + + bits[6].expr() * 64.expr() + + bits[7].expr() * 128.expr(), + ), + ); + + cb.require_equal( + "byte2 is the binary accumulation of 8 <= bit_index <= 15", + meta.query_advice(table.byte_2, Rotation::cur()), + select::expr( + meta.query_advice(table.is_reverse, Rotation::cur()), + bits[15].expr() + + bits[14].expr() * 2.expr() + + bits[13].expr() * 4.expr() + + bits[12].expr() * 8.expr() + + bits[11].expr() * 16.expr() + + bits[10].expr() * 32.expr() + + bits[9].expr() * 64.expr() + + bits[8].expr() * 128.expr(), + bits[8].expr() + + bits[9].expr() * 2.expr() + + bits[10].expr() * 4.expr() + + bits[11].expr() * 8.expr() + + bits[12].expr() * 16.expr() + + bits[13].expr() * 32.expr() + + bits[14].expr() * 64.expr() + + bits[15].expr() * 128.expr(), + ), + ); + + cb.require_boolean( + "is_reverse is boolean", + meta.query_advice(table.is_reverse, Rotation::cur()), + ); + + // TODO: Possibly exclude jump table bytes as they create a gap in byte_idx between + // huffman code and lstreams + cb.require_boolean( + "byte2 == byte1 or byte2 == byte1 + 1", + meta.query_advice(table.byte_idx_2, Rotation::cur()) + - meta.query_advice(table.byte_idx_1, Rotation::cur()), + ); + + cb.gate(and::expr([ + meta.query_fixed(table.q_enabled, Rotation::cur()), + meta.query_fixed(table.q_first, Rotation::cur()), + ])) + }); + + debug_assert!(meta.degree() <= 9); + + meta.create_gate("BitstringAccumulationTable: bit_index > 0", |meta| { + let mut cb = BaseConstraintBuilder::default(); + + // Constrain columns that are unchanged from 0 < bit_idx < 16. + for col in [ + table.byte_offset, + table.byte_idx_1, + table.byte_idx_2, + table.byte_1, + table.byte_2, + table.bit_value, + table.is_reverse, + ] { + cb.require_equal( + "unchanged columns from 0 < bit_idx < 16", + meta.query_advice(col, Rotation::cur()), + meta.query_advice(col, Rotation::prev()), + ); + } + + let is_last = meta.query_fixed(table.q_first, Rotation::next()); + cb.condition(is_last, |cb| { + cb.require_equal( + "byte_idx_1' == byte_idx_2", + meta.query_advice(table.byte_idx_1, Rotation::next()), + meta.query_advice(table.byte_idx_2, Rotation::cur()), + ); + }); + + cb.gate(and::expr([ + meta.query_fixed(table.q_enabled, Rotation::cur()), + not::expr(meta.query_fixed(table.q_first, Rotation::cur())), + ])) + }); + + debug_assert!(meta.degree() <= 9); + + // Consider a bit chunk from bit_index == 4 to bit_index == 9. We will have: + // + // | bit index | from start | until end | bitstring len | bit | bit value acc | + // |-----------|------------|-----------|---------------|-----|---------------| + // | 0 | 1 | 0 | 0 | 0 | 0 | + // | 1 | 1 | 0 | 0 | 0 | 0 | + // | 2 | 1 | 0 | 0 | 1 | 0 | + // | 3 | 1 | 0 | 0 | 0 | 0 | + // | 4 -> | 1 | 1 | 1 | 1 | 1 | + // | 5 -> | 1 | 1 | 2 | 0 | 1 | + // | 6 -> | 1 | 1 | 3 | 1 | 5 | + // | 7 -> | 1 | 1 | 4 | 1 | 13 | + // | 8 -> | 1 | 1 | 5 | 0 | 13 | + // | 9 -> | 1 | 1 | 6 | 1 | 45 | + // | 10 | 0 | 1 | 6 | 0 | 45 | + // | 11 | 0 | 1 | 6 | 0 | 45 | + // | 12 | 0 | 1 | 6 | 0 | 45 | + // | 13 | 0 | 1 | 6 | 1 | 45 | + // | 14 | 0 | 1 | 6 | 1 | 45 | + // | 15 | 0 | 1 | 6 | 0 | 45 | + // + // The bits for the bitstring are where from_start == until_end == 1. + meta.create_gate("BitstringAccumulationTable: bit value", |meta| { + let mut cb = BaseConstraintBuilder::default(); + + // Columns from_start and until_end are boolean. + cb.require_boolean( + "from_start is boolean", + meta.query_advice(table.from_start, Rotation::cur()), + ); + cb.require_boolean( + "until_end is boolean", + meta.query_advice(table.until_end, Rotation::cur()), + ); + + // Column from_start transitions from 1 to 0 only once. + let is_first = meta.query_fixed(table.q_first, Rotation::cur()); + cb.condition(is_first.expr(), |cb| { + cb.require_equal( + "if q_first == True: from_start == 1", + meta.query_advice(table.from_start, Rotation::cur()), + 1.expr(), + ); + }); + cb.condition(not::expr(is_first.expr()), |cb| { + cb.require_boolean( + "from_start transitions from 1 to 0 only once", + meta.query_advice(table.from_start, Rotation::prev()) + - meta.query_advice(table.from_start, Rotation::cur()), + ); + }); + + // Column until_end transitions from 0 to 1 only once. + let is_last = meta.query_fixed(table.q_first, Rotation::next()); + cb.condition(is_last.expr(), |cb| { + cb.require_equal( + "if q_first::next == True: until_end == 1", + meta.query_advice(table.until_end, Rotation::cur()), + 1.expr(), + ); + }); + cb.condition(not::expr(is_last.expr()), |cb| { + cb.require_boolean( + "until_end transitions from 0 to 1 only once", + meta.query_advice(table.until_end, Rotation::next()) + - meta.query_advice(table.until_end, Rotation::cur()), + ); + }); + + // Constraints at meaningful bits. + let is_set = and::expr([ + meta.query_advice(table.from_start, Rotation::cur()), + meta.query_advice(table.until_end, Rotation::cur()), + ]); + cb.condition(is_first.expr() * is_set.expr(), |cb| { + cb.require_equal( + "if is_first && is_set: bit == bit_value_acc", + meta.query_advice(table.bit, Rotation::cur()), + meta.query_advice(table.bit_value_acc, Rotation::cur()), + ); + cb.require_equal( + "if is_first && is_set: bitstring_len == 1", + meta.query_advice(table.bitstring_len, Rotation::cur()), + 1.expr(), + ); + }); + cb.condition(not::expr(is_first) * is_set, |cb| { + cb.require_equal( + "is_set: bit_value_acc == bit_value_acc::prev * 2 + bit", + meta.query_advice(table.bit_value_acc, Rotation::cur()), + meta.query_advice(table.bit_value_acc, Rotation::prev()) * 2.expr() + + meta.query_advice(table.bit, Rotation::cur()), + ); + cb.require_equal( + "is_set: bitstring_len == bitstring_len::prev + 1", + meta.query_advice(table.bitstring_len, Rotation::cur()), + meta.query_advice(table.bitstring_len, Rotation::prev()) + 1.expr(), + ); + }); + + // Constraints at bits to be ignored (at the start). + let is_ignored = not::expr(meta.query_advice(table.until_end, Rotation::cur())); + cb.condition(is_ignored, |cb| { + cb.require_zero( + "while until_end == 0: bitstring_len == 0", + meta.query_advice(table.bitstring_len, Rotation::cur()), + ); + cb.require_zero( + "while until_end == 0: bit_value_acc == 0", + meta.query_advice(table.bit_value_acc, Rotation::cur()), + ); + }); + + // Constraints at bits to be ignored (towards the end). + let is_ignored = not::expr(meta.query_advice(table.from_start, Rotation::cur())); + cb.condition(is_ignored, |cb| { + cb.require_equal( + "bitstring_len unchanged at the last ignored bits", + meta.query_advice(table.bitstring_len, Rotation::cur()), + meta.query_advice(table.bitstring_len, Rotation::prev()), + ); + cb.require_equal( + "bit_value_acc unchanged at the last ignored bits", + meta.query_advice(table.bit_value_acc, Rotation::cur()), + meta.query_advice(table.bit_value_acc, Rotation::prev()), + ); + }); + + cb.gate(meta.query_fixed(table.q_enabled, Rotation::cur())) + }); + + debug_assert!(meta.degree() <= 9); + + table + } + + /// Load witness to the table: dev mode. + pub fn assign( + &self, + layouter: &mut impl Layouter, + witness_rows: &[ZstdWitnessRow], + ) -> Result<(), Error> { + assert!(!witness_rows.is_empty()); + + // Get the byte at which FSE is described + // TODO: Determining huffman offset in a multi-block scenario. + let huffman_offset = witness_rows + .iter() + .find(|&r| r.state.tag == ZstdTag::ZstdBlockFseCode) + .unwrap() + .encoded_data + .byte_idx; + + // Extract bit accumulation-related info from the rows + let accumulation_rows = witness_rows + .iter() + .filter(|&r| { + r.state.tag == ZstdTag::ZstdBlockFseCode + || r.state.tag == ZstdTag::ZstdBlockHuffmanCode + || r.state.tag == ZstdTag::ZstdBlockJumpTable + || r.state.tag == ZstdTag::ZstdBlockLstream + }) + .map(|r| { + ( + r.encoded_data.byte_idx as usize, + r.encoded_data.value_byte as u64, + r.bitstream_read_data.bit_start_idx, + r.bitstream_read_data.bit_end_idx, + r.bitstream_read_data.bit_value, + r.state.tag.is_reverse() as u64, // is_reverse + ) + }) + .collect::>(); + + layouter.assign_region( + || "Bitstring Accumulation Table", + |mut region| { + let mut offset: usize = 0; + let mut last_byte_idx: usize = 0; + for rows in accumulation_rows.windows(2) { + let row = rows[0]; + let next_row = rows[1]; + let byte_1_bits = value_bits_le(row.1 as u8); + let byte_2_bits = value_bits_le(next_row.1 as u8); + let bits = if row.5 > 0 { + // reversed + [ + byte_1_bits.into_iter().rev().collect::>(), + byte_2_bits.into_iter().rev().collect::>(), + ] + .concat() + } else { + // not reversed + [byte_1_bits, byte_2_bits].concat() + }; + + let mut acc: u64 = 0; + let mut bitstring_len: u64 = 0; + + for (bit_idx, bit) in bits.into_iter().enumerate().take(16) { + region.assign_fixed( + || "q_enable", + self.q_enabled, + offset + bit_idx, + || Value::known(F::one()), + )?; + region.assign_advice( + || "byte_offset", + self.byte_offset, + offset + bit_idx, + || Value::known(F::from(huffman_offset)), + )?; + region.assign_advice( + || "byte_idx_1", + self.byte_idx_1, + offset + bit_idx, + || Value::known(F::from(row.0 as u64)), + )?; + region.assign_advice( + || "byte_idx_2", + self.byte_idx_2, + offset + bit_idx, + || Value::known(F::from(next_row.0 as u64)), + )?; + region.assign_advice( + || "byte_1", + self.byte_1, + offset + bit_idx, + || Value::known(F::from(row.1)), + )?; + region.assign_advice( + || "byte_2", + self.byte_2, + offset + bit_idx, + || Value::known(F::from(next_row.1)), + )?; + region.assign_fixed( + || "bit_index", + self.bit_index, + offset + bit_idx, + || Value::known(F::from(bit_idx as u64)), + )?; + region.assign_fixed( + || "q_first", + self.q_first, + offset + bit_idx, + || Value::known(F::from((bit_idx == 0) as u64)), + )?; + + if bit_idx >= row.2 && bit_idx <= row.3 { + acc = acc * 2 + (bit as u64); + bitstring_len += 1; + } + region.assign_advice( + || "bit", + self.bit, + offset + bit_idx, + || Value::known(F::from(bit as u64)), + )?; + region.assign_advice( + || "bit_value_acc", + self.bit_value_acc, + offset + bit_idx, + || Value::known(F::from(acc)), + )?; + region.assign_advice( + || "bit_value", + self.bit_value, + offset + bit_idx, + || Value::known(F::from(row.4)), + )?; + region.assign_advice( + || "bitstring_len", + self.bitstring_len, + offset + bit_idx, + || Value::known(F::from(bitstring_len)), + )?; + region.assign_advice( + || "from_start", + self.from_start, + offset + bit_idx, + || Value::known(F::from((bit_idx <= row.3) as u64)), + )?; + region.assign_advice( + || "until_end", + self.until_end, + offset + bit_idx, + || Value::known(F::from((bit_idx >= row.2) as u64)), + )?; + region.assign_advice( + || "is_reverse", + self.is_reverse, + offset + bit_idx, + || Value::known(F::from(row.5)), + )?; + } + + offset += 16; + last_byte_idx = next_row.0; + } + + region.assign_fixed( + || "q_first", + self.q_first, + offset, + || Value::known(F::one()), + )?; + region.assign_advice( + || "byte_idx_1", + self.byte_idx_1, + offset, + || Value::known(F::from(last_byte_idx as u64)), + )?; + + Ok(()) + }, + )?; + + Ok(()) + } +} + +impl LookupTable for BitstringAccumulationTable { + fn columns(&self) -> Vec> { + vec![ + self.byte_offset.into(), + self.byte_idx_1.into(), + self.byte_idx_2.into(), + self.byte_1.into(), + self.byte_2.into(), + self.bit_value.into(), + self.bitstring_len.into(), + self.bit_index.into(), + self.from_start.into(), + self.until_end.into(), + self.is_reverse.into(), + ] + } + + fn annotations(&self) -> Vec { + vec![ + String::from("byte_offset"), + String::from("byte_idx_1"), + String::from("byte_idx_2"), + String::from("byte_1"), + String::from("byte_2"), + String::from("bit_value"), + String::from("bitstring_len"), + String::from("bit_index"), + String::from("from_start"), + String::from("until_end"), + String::from("is_reverse"), + ] + } +} diff --git a/zkevm-circuits/src/table/decompression/block_type_rom_table.rs b/zkevm-circuits/src/table/decompression/block_type_rom_table.rs new file mode 100644 index 0000000000..f74a5bad96 --- /dev/null +++ b/zkevm-circuits/src/table/decompression/block_type_rom_table.rs @@ -0,0 +1,115 @@ +use eth_types::Field; +use halo2_proofs::{ + circuit::{Layouter, Value}, + plonk::{Any, Column, ConstraintSystem, Error, Fixed}, +}; + +use crate::{table::LookupTable, witness::ZstdTag}; + +/// Read-only Memory table for the Decompression circuit. This table allows us a lookup argument +/// from the Decompression circuit to check if the next tag is correct based on which block type we +/// have encountered in the block header. Block type is denoted by 2 bits in the block header. +#[derive(Clone, Copy, Debug)] +pub struct BlockTypeRomTable { + /// Current tag. + tag: Column, + /// Lower bit. + lo_bit: Column, + /// Higher bit. + hi_bit: Column, + /// Tag that follows. + tag_next: Column, +} + +impl LookupTable for BlockTypeRomTable { + fn columns(&self) -> Vec> { + vec![ + self.tag.into(), + self.lo_bit.into(), + self.hi_bit.into(), + self.tag_next.into(), + ] + } + + fn annotations(&self) -> Vec { + vec![ + String::from("tag"), + String::from("lo_bit"), + String::from("hi_bit"), + String::from("tag_next"), + ] + } +} + +impl BlockTypeRomTable { + /// Construct the ROM table. + pub fn construct(meta: &mut ConstraintSystem) -> Self { + Self { + tag: meta.fixed_column(), + lo_bit: meta.fixed_column(), + hi_bit: meta.fixed_column(), + tag_next: meta.fixed_column(), + } + } + + /// Load the ROM table. + pub fn load(&self, layouter: &mut impl Layouter) -> Result<(), Error> { + layouter.assign_region( + || "Zstd BlockType ROM table", + |mut region| { + for (i, &(tag, lo_bit, hi_bit, tag_next)) in [ + (ZstdTag::BlockHeader, 0, 0, ZstdTag::RawBlockBytes), + (ZstdTag::BlockHeader, 0, 1, ZstdTag::RleBlockBytes), + (ZstdTag::BlockHeader, 1, 0, ZstdTag::ZstdBlockLiteralsHeader), + ( + ZstdTag::ZstdBlockLiteralsHeader, + 0, + 0, + ZstdTag::ZstdBlockLiteralsRawBytes, + ), + ( + ZstdTag::ZstdBlockLiteralsHeader, + 0, + 1, + ZstdTag::ZstdBlockLiteralsRleBytes, + ), + ( + ZstdTag::ZstdBlockLiteralsHeader, + 1, + 0, + ZstdTag::ZstdBlockFseCode, + ), + ] + .iter() + .enumerate() + { + region.assign_fixed( + || "tag", + self.tag, + i, + || Value::known(F::from(tag as u64)), + )?; + region.assign_fixed( + || "lo_bit", + self.lo_bit, + i, + || Value::known(F::from(lo_bit)), + )?; + region.assign_fixed( + || "hi_bit", + self.hi_bit, + i, + || Value::known(F::from(hi_bit)), + )?; + region.assign_fixed( + || "tag_next", + self.tag_next, + i, + || Value::known(F::from(tag_next as u64)), + )?; + } + Ok(()) + }, + ) + } +} diff --git a/zkevm-circuits/src/table/decompression/decoded_literals_table.rs b/zkevm-circuits/src/table/decompression/decoded_literals_table.rs new file mode 100644 index 0000000000..5da15297d6 --- /dev/null +++ b/zkevm-circuits/src/table/decompression/decoded_literals_table.rs @@ -0,0 +1,172 @@ +use eth_types::Field; +use gadgets::{ + comparator::{ComparatorChip, ComparatorConfig}, + util::{and, not, Expr}, +}; +use halo2_proofs::{ + circuit::Layouter, + plonk::{Advice, Column, ConstraintSystem, Error, Expression, Fixed, SecondPhase}, + poly::Rotation, +}; + +use crate::{ + evm_circuit::util::constraint_builder::{BaseConstraintBuilder, ConstrainBuilderCommon}, + table::{LookupTable, RangeTable}, + util::Challenges, +}; + +/// Table that consists of every decoded literal byte. Depending on the literals length from +/// sequences execution, we also accumulate RLC over contiguous bytes. +#[derive(Clone, Debug)] +pub struct DecodedLiteralsTable { + q_enable: Column, + q_first: Column, + huffman_byte_offset: Column, + huffman_byte_offset_cmp: ComparatorConfig, + byte_offset: Column, + is_boundary: Column, + decoded_byte: Column, + decoded_literals_length: Column, + decoded_literals_rlc: Column, +} + +impl DecodedLiteralsTable { + /// Construct and constrain the decoded literals table. + pub fn construct( + meta: &mut ConstraintSystem, + challenges: Challenges>, + range256: RangeTable<256>, + ) -> Self { + let q_enable = meta.fixed_column(); + let huffman_byte_offset = meta.advice_column(); + let table = Self { + q_enable, + q_first: meta.fixed_column(), + huffman_byte_offset, + huffman_byte_offset_cmp: ComparatorChip::configure( + meta, + |meta| meta.query_fixed(q_enable, Rotation::cur()), + |meta| meta.query_advice(huffman_byte_offset, Rotation::prev()), + |meta| meta.query_advice(huffman_byte_offset, Rotation::cur()), + range256.into(), + ), + byte_offset: meta.advice_column(), + is_boundary: meta.advice_column(), + decoded_byte: meta.advice_column(), + decoded_literals_length: meta.advice_column(), + decoded_literals_rlc: meta.advice_column_in(SecondPhase), + }; + + meta.create_gate("DecodedLiteralsTable: first row", |meta| { + let mut cb = BaseConstraintBuilder::default(); + + cb.require_equal( + "init decoded literals RLC", + meta.query_advice(table.decoded_literals_rlc, Rotation::cur()), + meta.query_advice(table.decoded_byte, Rotation::cur()), + ); + cb.require_equal( + "init decoded literals length", + meta.query_advice(table.decoded_literals_length, Rotation::cur()), + 1.expr(), + ); + + cb.gate(and::expr([ + meta.query_fixed(table.q_enable, Rotation::cur()), + meta.query_fixed(table.q_first, Rotation::cur()), + ])) + }); + + meta.create_gate("DecodedLiteralsTable: all rows", |meta| { + let mut cb = BaseConstraintBuilder::default(); + + cb.require_boolean( + "is_boundary is boolean", + meta.query_advice(table.is_boundary, Rotation::cur()), + ); + + cb.gate(meta.query_fixed(table.q_enable, Rotation::cur())) + }); + + meta.create_gate("DecodedLiteralsTable: instance of huffman code", |meta| { + let mut cb = BaseConstraintBuilder::default(); + + cb.require_boolean( + "byte_offset is increasing", + meta.query_advice(table.byte_offset, Rotation::cur()) + - meta.query_advice(table.byte_offset, Rotation::prev()), + ); + + let crossed_boundary = meta.query_advice(table.is_boundary, Rotation::prev()); + + // if not boundary, continue RLC. + cb.condition(not::expr(crossed_boundary.expr()), |cb| { + cb.require_equal( + "no boundary: continue decoded literals RLC", + meta.query_advice(table.decoded_literals_rlc, Rotation::cur()), + meta.query_advice(table.decoded_literals_rlc, Rotation::prev()) + * challenges.keccak_input() + + meta.query_advice(table.decoded_byte, Rotation::cur()), + ); + cb.require_equal( + "no boundary: continue decoded literals length", + meta.query_advice(table.decoded_literals_length, Rotation::cur()), + meta.query_advice(table.decoded_literals_length, Rotation::prev()) + 1.expr(), + ); + }); + + // if boundary, reset RLC. + cb.condition(crossed_boundary.expr(), |cb| { + cb.require_equal( + "crossed boundary: reset decoded literals RLC", + meta.query_advice(table.decoded_literals_rlc, Rotation::cur()), + meta.query_advice(table.decoded_byte, Rotation::cur()), + ); + cb.require_equal( + "crossed boundary: reset decoded literals length", + meta.query_advice(table.decoded_literals_length, Rotation::cur()), + 1.expr(), + ); + }); + + let (_lt, huffman_code_unchanged) = table.huffman_byte_offset_cmp.expr(meta, None); + cb.gate(and::expr([ + meta.query_fixed(table.q_enable, Rotation::cur()), + huffman_code_unchanged, + ])) + }); + + meta.lookup("DecodedLiteralsTable: decoded byte", |meta| { + let condition = meta.query_fixed(table.q_enable, Rotation::cur()); + vec![( + condition * meta.query_advice(table.decoded_byte, Rotation::cur()), + range256.into(), + )] + }); + + table + } + + /// Load witness to the table: dev mode. + pub fn assign(&self, _layouter: &mut impl Layouter) -> Result<(), Error> { + unimplemented!() + } +} + +impl LookupTable for DecodedLiteralsTable { + fn columns(&self) -> Vec> { + vec![ + self.huffman_byte_offset.into(), + self.byte_offset.into(), + self.decoded_byte.into(), + ] + } + + fn annotations(&self) -> Vec { + vec![ + String::from("huffman_byte_offset"), + String::from("byte_offset"), + String::from("decoded_byte"), + ] + } +} diff --git a/zkevm-circuits/src/table/decompression/fse_table.rs b/zkevm-circuits/src/table/decompression/fse_table.rs new file mode 100644 index 0000000000..a4d5f0b8c6 --- /dev/null +++ b/zkevm-circuits/src/table/decompression/fse_table.rs @@ -0,0 +1,528 @@ +use eth_types::Field; +use gadgets::{ + comparator::{ComparatorChip, ComparatorConfig}, + is_equal::{IsEqualChip, IsEqualConfig}, + util::{and, not, Expr}, +}; +use halo2_proofs::{ + circuit::{Layouter, Value}, + plonk::{Advice, Column, ConstraintSystem, Error, Expression, Fixed, VirtualCells}, + poly::Rotation, +}; + +use crate::{ + evm_circuit::util::constraint_builder::{BaseConstraintBuilder, ConstrainBuilderCommon}, + table::{BitwiseOp, BitwiseOpTable, LookupTable, Pow2Table, RangeTable}, + witness::FseAuxiliaryTableData, +}; + +/// An auxiliary table used to ensure that the FSE table was reconstructed appropriately. Contrary +/// to the FseTable where the state is incremental, in the Auxiliary table we club together rows by +/// symbol. Which means, we will have rows with symbol s0 (and varying, but not necessarily +/// incremental states) clubbed together, followed by symbol s1 and so on. +/// +/// | State | Symbol | Baseline | Nb | Baseline Mark | +/// |-------|--------|----------|-----|---------------| +/// | 0x00 | s0 | ... | ... | 0 | +/// | 0x01 | s0 | ... | ... | 0 | +/// | 0x02 | s0 | ... | ... | 0 | +/// | ... | s0 | ... | ... | ... | +/// | 0x1d | s0 | ... | ... | 0 | +/// | 0x03 | s1 -> | 0x10 | ... | 0 | +/// | 0x0c | s1 -> | 0x18 | ... | 0 | +/// | 0x11 | s1 -> | 0x00 | ... | 1 | +/// | 0x15 | s1 -> | 0x04 | ... | 1 | +/// | 0x1a | s1 -> | 0x08 | ... | 1 | +/// | 0x1e | s1 -> | 0x0c | ... | 1 | +/// | 0x08 | s2 | ... | ... | 0 | +/// | ... | ... | ... | ... | 0 | +/// | 0x09 | s6 | ... | ... | 0 | +/// +/// Above is a representation of this table. Primarily we are interested in verifying that: +/// - next state (for the same symbol) was assigned correctly +/// - the number of times this symbol appears is assigned correctly +/// +/// For more details, refer the [FSE reconstruction][doclink] section. +/// +/// [doclink]: https://nigeltao.github.io/blog/2022/zstandard-part-5-fse.html#fse-reconstruction +#[derive(Clone, Debug)] +pub struct FseTable { + /// Fixed column to denote whether the constraints will be enabled or not. + pub q_enabled: Column, + /// The byte offset within the data instance where the encoded FSE table begins. This is + /// 1-indexed, i.e. byte_offset == 1 at the first byte. + pub byte_offset: Column, + /// Helper gadget to know when we are done handling a single canonical Huffman code. + pub byte_offset_cmp: ComparatorConfig, + /// The size of the FSE table that starts at byte_offset. + pub table_size: Column, + /// Helper column for (table_size >> 1). + pub table_size_rs_1: Column, + /// Helper column for (table_size >> 3). + pub table_size_rs_3: Column, + /// Incremental index. + pub idx: Column, + /// The symbol (weight) assigned to this state. + pub symbol: Column, + /// Helper gadget to know whether the symbol is the same or not. + pub symbol_eq: IsEqualConfig, + /// Represents the number of times this symbol appears in the FSE table. This value does not + /// change while the symbol in the table remains the same. + pub symbol_count: Column, + /// An accumulator that resets to 1 each time we encounter a new symbol in the Auxiliary table + /// and increments by 1 while the symbol remains the same. On the row where symbol' != symbol + /// we have: symbol_count == symbol_count_acc. + pub symbol_count_acc: Column, + /// The state in FSE. In the Auxiliary table, it does not increment by 1. Instead, it follows: + /// - state'' == state + table_size_rs_1 + table_size_rs_3 + 3 + /// - state' == state'' & (table_size - 1) + /// + /// where state' is the next row's state. + pub state: Column, + /// Denotes the baseline field. + pub baseline: Column, + /// Helper column to mark the baseline observed at the last occurence of a symbol. + pub last_baseline: Column, + /// The number of bits to be read from bitstream at this state. + pub nb: Column, + /// The smaller power of two assigned to this state. The following must hold: + /// - 2 ^ nb == SPoT. + pub spot: Column, + /// An accumulator over SPoT value. + pub spot_acc: Column, + /// Helper column to remember the smallest spot for that symbol. + pub smallest_spot: Column, + /// Helper boolean column which is set only from baseline == 0x00. + pub baseline_mark: Column, +} + +impl FseTable { + /// Construct the auxiliary table for FSE codes. + pub fn construct( + meta: &mut ConstraintSystem, + bitwise_op_table: BitwiseOpTable, + pow2_table: Pow2Table, + range8: RangeTable<8>, + range256: RangeTable<256>, + ) -> Self { + let q_enabled = meta.fixed_column(); + let byte_offset = meta.advice_column(); + let symbol = meta.advice_column(); + let spot = meta.advice_column(); + let smallest_spot = meta.advice_column(); + let table = Self { + q_enabled, + byte_offset, + byte_offset_cmp: ComparatorChip::configure( + meta, + |meta| meta.query_fixed(q_enabled, Rotation::cur()), + |meta| meta.query_advice(byte_offset, Rotation::cur()), + |meta| meta.query_advice(byte_offset, Rotation::next()), + range256.into(), + ), + table_size: meta.advice_column(), + table_size_rs_1: meta.advice_column(), + table_size_rs_3: meta.advice_column(), + idx: meta.advice_column(), + symbol, + symbol_eq: IsEqualChip::configure( + meta, + |meta| meta.query_fixed(q_enabled, Rotation::cur()), + |meta| meta.query_advice(symbol, Rotation::cur()), + |meta| meta.query_advice(symbol, Rotation::next()), + ), + symbol_count: meta.advice_column(), + symbol_count_acc: meta.advice_column(), + state: meta.advice_column(), + baseline: meta.advice_column(), + last_baseline: meta.advice_column(), + nb: meta.advice_column(), + spot, + spot_acc: meta.advice_column(), + smallest_spot, + baseline_mark: meta.advice_column(), + }; + + // All rows. + meta.create_gate("FseAuxiliaryTable: all rows", |meta| { + let mut cb = BaseConstraintBuilder::default(); + + cb.require_boolean( + "baseline_mark == [0, 1]", + meta.query_advice(table.baseline_mark, Rotation::cur()), + ); + + let (gt, eq) = table.byte_offset_cmp.expr(meta, None); + cb.require_equal("byte offset is increasing", gt + eq, 1.expr()); + + cb.gate(meta.query_fixed(table.q_enabled, Rotation::cur())) + }); + + // Validate SPoT assignment: all rows. + meta.lookup_any("FseAuxiliaryTable: SPoT == 2 ^ Nb", |meta| { + let condition = meta.query_fixed(table.q_enabled, Rotation::cur()); + + [ + meta.query_advice(table.nb, Rotation::cur()), + meta.query_advice(table.spot, Rotation::cur()), + ] + .into_iter() + .zip(pow2_table.table_exprs(meta)) + .map(|(input, table)| (input * condition.clone(), table)) + .collect::>() + }); + + // Constraints while traversing an FSE table. + meta.create_gate("FseAuxiliaryTable: table size and helper columns", |meta| { + let mut cb = BaseConstraintBuilder::default(); + + // Table size, and the right-shifted helper values remain unchanged. + for col in [ + table.table_size, + table.table_size_rs_1, + table.table_size_rs_3, + ] { + cb.require_equal( + "while byte_offset' == byte_offset: table_size and helpers remain unchanged", + meta.query_advice(col, Rotation::next()), + meta.query_advice(col, Rotation::cur()), + ); + } + + // Index is incremental. + cb.require_equal( + "idx' == idx + 1", + meta.query_advice(table.idx, Rotation::next()), + meta.query_advice(table.idx, Rotation::cur()) + 1.expr(), + ); + + cb.require_boolean( + "symbol' == symbol or symbol' == symbol + 1", + meta.query_advice(table.symbol, Rotation::next()) + - meta.query_advice(table.symbol, Rotation::cur()), + ); + + let (_gt, eq) = table.byte_offset_cmp.expr(meta, None); + cb.gate(and::expr([ + meta.query_fixed(table.q_enabled, Rotation::cur()), + eq, + ])) + }); + + // Constraints for last row of an FSE table. + meta.create_gate("FseAuxiliaryTable: table shift right ops", |meta| { + let mut cb = BaseConstraintBuilder::default(); + + // Constraint for table_size >> 1. + cb.require_boolean( + "table_size >> 1", + meta.query_advice(table.table_size, Rotation::cur()) + - (meta.query_advice(table.table_size_rs_1, Rotation::cur()) * 2.expr()), + ); + + // Constraint for idx == table_size. + cb.require_equal( + "idx == table_size", + meta.query_advice(table.idx, Rotation::cur()), + meta.query_advice(table.table_size, Rotation::cur()), + ); + + let (gt, _eq) = table.byte_offset_cmp.expr(meta, None); + cb.gate(and::expr([ + meta.query_fixed(q_enabled, Rotation::cur()), + gt, + ])) + }); + + // Constraint for table_size >> 3. Only check on the last row. + meta.lookup("FseAuxiliaryTable: table shift right ops", |meta| { + let (gt, _eq) = table.byte_offset_cmp.expr(meta, None); + let condition = and::expr([meta.query_fixed(q_enabled, Rotation::cur()), gt]); + + let range_value = meta.query_advice(table.table_size, Rotation::cur()) + - (meta.query_advice(table.table_size_rs_3, Rotation::cur()) * 8.expr()); + + vec![(condition * range_value, range8.into())] + }); + + // Constraint for state' calculation. We wish to constrain: + // + // - state' == state'' & (table_size - 1) + // - state'' == state + (table_size >> 3) + (table_size >> 1) + 3 + meta.lookup_any("FseAuxiliaryTable: next state computation", |meta| { + let (_gt, eq) = table.byte_offset_cmp.expr(meta, None); + let condition = and::expr([meta.query_fixed(table.q_enabled, Rotation::cur()), eq]); + + let lhs = meta.query_advice(table.state, Rotation::cur()) + + meta.query_advice(table.table_size_rs_3, Rotation::cur()) + + meta.query_advice(table.table_size_rs_1, Rotation::cur()) + + 3.expr(); + let rhs = meta.query_advice(table.table_size, Rotation::cur()) - 1.expr(); + let output = meta.query_advice(table.state, Rotation::next()); + + [BitwiseOp::AND.expr(), lhs, rhs, output] + .into_iter() + .zip(bitwise_op_table.table_exprs(meta)) + .map(|(input, table)| (input * condition.clone(), table)) + .collect::>() + }); + + // Constraints for same FSE table and same symbol. + meta.create_gate("FseAuxiliaryTable: symbol' == symbol", |meta| { + let mut cb = BaseConstraintBuilder::default(); + + // Symbol's count remains unchanged while symbol remained unchanged. + cb.require_equal( + "if symbol' == symbol: symbol_count' == symbol_count", + meta.query_advice(table.symbol_count, Rotation::next()), + meta.query_advice(table.symbol_count, Rotation::cur()), + ); + + // SPoT at baseline == 0x00 remains unchanged over these rows. + cb.require_equal( + "if symbol' == symbol: smallest SPoT is unchanged", + meta.query_advice(table.smallest_spot, Rotation::next()), + meta.query_advice(table.smallest_spot, Rotation::cur()), + ); + + // last baseline remains unchanged over these rows. + cb.require_equal( + "if symbol' == symbol: last baseline is unchanged", + meta.query_advice(table.last_baseline, Rotation::next()), + meta.query_advice(table.last_baseline, Rotation::cur()), + ); + + // Symbol count accumulator increments. + cb.require_equal( + "if symbol' == symbol: symbol count accumulator increments", + meta.query_advice(table.symbol_count_acc, Rotation::next()), + meta.query_advice(table.symbol_count_acc, Rotation::cur()) + 1.expr(), + ); + + // SPoT accumulation. + cb.require_equal( + "SPoT_acc::next == SPoT_acc::cur + SPoT::next", + meta.query_advice(table.spot_acc, Rotation::next()), + meta.query_advice(table.spot_acc, Rotation::cur()) + + meta.query_advice(table.spot, Rotation::next()), + ); + + // baseline_mark can only transition from 0 to 1 once. + cb.require_boolean( + "baseline_mark transition", + meta.query_advice(table.baseline_mark, Rotation::next()) + - meta.query_advice(table.baseline_mark, Rotation::cur()), + ); + + let is_next_baseline_0x00 = meta.query_advice(table.baseline_mark, Rotation::next()) + - meta.query_advice(table.baseline_mark, Rotation::cur()); + cb.condition(is_next_baseline_0x00.expr(), |cb| { + cb.require_equal( + "baseline::next == 0x00", + meta.query_advice(table.baseline, Rotation::next()), + 0x00.expr(), + ); + }); + cb.condition(not::expr(is_next_baseline_0x00.expr()), |cb| { + cb.require_equal( + "baseline::next == baseline::cur + spot::cur", + meta.query_advice(table.baseline, Rotation::next()), + meta.query_advice(table.baseline, Rotation::cur()) + + meta.query_advice(table.spot, Rotation::cur()), + ); + }); + + let (_gt, eq) = table.byte_offset_cmp.expr(meta, None); + cb.gate(and::expr([ + meta.query_fixed(table.q_enabled, Rotation::cur()), + eq, + table.symbol_eq.expr(), + ])) + }); + + // Constraints when symbol changes in an FSE table, i.e. symbol' != symbol. + meta.create_gate("FseAuxiliaryTable: symbol' != symbol", |meta| { + let mut cb = BaseConstraintBuilder::default(); + + // Constraint for idx == table_size. + cb.require_equal( + "symbol_count_acc == symbol_count", + meta.query_advice(table.symbol_count_acc, Rotation::cur()), + meta.query_advice(table.symbol_count, Rotation::cur()), + ); + + // SPoT accumulator == table_size at the end of processing the symbol. + cb.require_equal( + "SPoT_acc == table_size", + meta.query_advice(table.spot_acc, Rotation::cur()), + meta.query_advice(table.table_size, Rotation::cur()), + ); + + // The SPoT at baseline == 0x00 matches this SPoT. + cb.require_equal( + "last symbol occurrence => SPoT == SPoT at baseline 0x00", + meta.query_advice(table.smallest_spot, Rotation::cur()), + meta.query_advice(table.spot, Rotation::cur()), + ); + + // last baseline matches. + cb.require_equal( + "baseline == last_baseline", + meta.query_advice(table.baseline, Rotation::cur()), + meta.query_advice(table.last_baseline, Rotation::cur()), + ); + + cb.gate(and::expr([ + meta.query_fixed(q_enabled, Rotation::cur()), + not::expr(table.symbol_eq.expr()), + ])) + }); + + // Constraints for the first occurence of a particular symbol in the table. + meta.create_gate("FseAuxiliaryTable: new symbol", |meta| { + let mut cb = BaseConstraintBuilder::default(); + + let is_baseline_marked = meta.query_advice(table.baseline_mark, Rotation::cur()); + cb.condition(is_baseline_marked.expr(), |cb| { + cb.require_equal( + "baseline == 0x00", + meta.query_advice(table.baseline, Rotation::cur()), + 0x00.expr(), + ); + }); + + cb.condition(not::expr(is_baseline_marked.expr()), |cb| { + cb.require_equal( + "baseline == last_baseline + smallest_spot", + meta.query_advice(table.baseline, Rotation::cur()), + meta.query_advice(table.last_baseline, Rotation::cur()) + + meta.query_advice(table.smallest_spot, Rotation::cur()), + ); + }); + + let symbol_prev = meta.query_advice(table.symbol, Rotation::prev()); + let symbol_cur = meta.query_advice(table.symbol, Rotation::cur()); + cb.gate(and::expr([ + meta.query_fixed(table.q_enabled, Rotation::cur()), + not::expr( + table + .symbol_eq + .expr_at(meta, Rotation::prev(), symbol_prev, symbol_cur), + ), + ])) + }); + + debug_assert!(meta.degree() <= 9); + + table + } + + /// Load witness. + pub fn assign( + &self, + layouter: &mut impl Layouter, + data: Vec, + ) -> Result<(), Error> { + layouter.assign_region( + || "FseAuxiliaryTable: dev load", + |mut region| { + let mut offset = 0; + for table in data.iter() { + let byte_offset = Value::known(F::from(table.byte_offset)); + let table_size = Value::known(F::from(table.table_size)); + let table_size_rs_1 = Value::known(F::from(table.table_size >> 1)); + let table_size_rs_3 = Value::known(F::from(table.table_size >> 3)); + for (&symbol, rows) in table.sym_to_states.iter() { + let symbol_count = rows.len() as u64; + let smallest_spot = rows + .iter() + .map(|fse_row| 1 << fse_row.num_bits) + .min() + .expect("symbol should have at least 1 row"); + let spot_acc_iter = rows.iter().scan(0, |spot_acc, fse_row| { + *spot_acc += 1 << fse_row.num_bits; + Some(*spot_acc) + }); + // TODO: byte_offset_cmp + // TODO: symbol_eq + // TODO: baseline_mark + // TODO: last_baseline + // TODO: q_enabled + for (i, (fse_row, spot_acc)) in rows.iter().zip(spot_acc_iter).enumerate() { + for (annotation, col, value) in [ + ("byte_offset", self.byte_offset, byte_offset), + ("table_size", self.table_size, table_size), + ("table_size_rs_1", self.table_size_rs_1, table_size_rs_1), + ("table_size_rs_3", self.table_size_rs_3, table_size_rs_3), + ("symbol", self.symbol, Value::known(F::from(symbol as u64))), + ( + "symbol_count", + self.symbol_count, + Value::known(F::from(symbol_count)), + ), + ( + "symbol_count_acc", + self.symbol_count_acc, + Value::known(F::from(i as u64 + 1)), + ), + ("state", self.state, Value::known(F::from(fse_row.state))), + ( + "baseline", + self.baseline, + Value::known(F::from(fse_row.baseline)), + ), + ("nb", self.nb, Value::known(F::from(fse_row.num_bits))), + ( + "spot", + self.spot, + Value::known(F::from(1 << fse_row.num_bits)), + ), + ( + "smallest_spot", + self.smallest_spot, + Value::known(F::from(smallest_spot)), + ), + ("spot_acc", self.spot_acc, Value::known(F::from(spot_acc))), + ("idx", self.idx, Value::known(F::from(fse_row.idx))), + ] { + region.assign_advice( + || format!("FseAuxiliaryTable: {}", annotation), + col, + offset, + || value, + )?; + } + offset += 1; + } + } + } + + Ok(()) + }, + ) + } +} + +impl FseTable { + /// Lookup table expressions for (state, symbol) tuple check. + pub fn table_exprs_state_check(&self, meta: &mut VirtualCells) -> Vec> { + vec![ + meta.query_advice(self.byte_offset, Rotation::cur()), + meta.query_advice(self.table_size, Rotation::cur()), + meta.query_advice(self.state, Rotation::cur()), + meta.query_advice(self.symbol, Rotation::cur()), + meta.query_advice(self.baseline, Rotation::cur()), + meta.query_advice(self.nb, Rotation::cur()), + ] + } + + /// Lookup table expressions for (symbol, symbol_count) tuple check. + pub fn table_exprs_symbol_count_check(&self, meta: &mut VirtualCells) -> Vec> { + vec![ + meta.query_advice(self.byte_offset, Rotation::cur()), + meta.query_advice(self.table_size, Rotation::cur()), + meta.query_advice(self.symbol, Rotation::cur()), + meta.query_advice(self.symbol_count, Rotation::cur()), + ] + } +} diff --git a/zkevm-circuits/src/table/decompression/huffman_codes_table.rs b/zkevm-circuits/src/table/decompression/huffman_codes_table.rs new file mode 100644 index 0000000000..6f9ad11570 --- /dev/null +++ b/zkevm-circuits/src/table/decompression/huffman_codes_table.rs @@ -0,0 +1,563 @@ +use array_init::array_init; +use eth_types::Field; +use gadgets::{ + binary_number::{BinaryNumberChip, BinaryNumberConfig}, + comparator::{ComparatorChip, ComparatorConfig, ComparatorInstruction}, + util::{and, not, Expr}, +}; +use halo2_proofs::{ + circuit::{Layouter, Value}, + plonk::{Advice, Column, ConstraintSystem, Error, Expression, Fixed, VirtualCells}, + poly::Rotation, +}; +use strum::IntoEnumIterator; + +use crate::{ + evm_circuit::util::constraint_builder::{BaseConstraintBuilder, ConstrainBuilderCommon}, + table::{LookupTable, Pow2Table, RangeTable}, + witness::{FseSymbol, HuffmanCodesData, N_BITS_SYMBOL, N_MAX_SYMBOLS}, +}; + +/// The Huffman codes table maps the canonical weights (symbols as per FseTable) to the Huffman +/// codes. +#[derive(Clone, Debug)] +pub struct HuffmanCodesTable { + /// Fixed column to denote whether the constraints will be enabled or not. + pub q_enabled: Column, + /// Fixed column to mark the first row in the table. + pub q_first: Column, + /// Set when this is the start of a new huffman code. + pub is_start: Column, + /// The byte offset within the data instance where the encoded FSE table begins. This is + /// 1-indexed, i.e. byte_offset == 1 at the first byte. + pub byte_offset: Column, + /// Helper gadget to know when we are done handling a single canonical Huffman code. + pub byte_offset_cmp: ComparatorConfig, + /// The byte that is being encoded by a Huffman code. + pub symbol: Column, + /// The weight assigned to this symbol as per the canonical Huffman code weights. + pub weight: Column, + /// A binary representation of the weight's value. + pub weight_bits: BinaryNumberConfig, + /// An accumulator over the weight values. + pub weight_acc: Column, + /// Helper column to denote 2 ^ (weight - 1). + pub pow2_weight: Column, + /// The sum of canonical Huffman code weights. This value does not change over the rows for a + /// specific Huffman code. + pub sum_weights: Column, + /// The maximum length of a bitstring as per this Huffman code. Again, this value does not + /// change over the rows for a specific Huffman code. + pub max_bitstring_len: Column, + /// As per Huffman coding, every symbol is mapped to a bit value, which is then represented in + /// binary form (padded) of length bitstring_len. + pub bit_value: Column, + /// The last seen bit_value for each symbol in this Huffman coding. + pub last_bit_values: [Column; N_MAX_SYMBOLS], + /// The last_bit_values assigned at the first row of a table. + pub first_lbvs: [Column; N_MAX_SYMBOLS], +} + +impl HuffmanCodesTable { + /// Construct the huffman codes table. + pub fn construct( + meta: &mut ConstraintSystem, + pow2_table: Pow2Table, + range256: RangeTable<256>, + ) -> Self { + let q_enabled = meta.fixed_column(); + let byte_offset = meta.advice_column(); + let weight = meta.advice_column(); + let table = Self { + q_enabled, + q_first: meta.fixed_column(), + byte_offset, + byte_offset_cmp: ComparatorChip::configure( + meta, + |meta| meta.query_fixed(q_enabled, Rotation::cur()), + |meta| meta.query_advice(byte_offset, Rotation::cur()), + |meta| meta.query_advice(byte_offset, Rotation::next()), + range256.into(), + ), + is_start: meta.advice_column(), + symbol: meta.advice_column(), + weight, + weight_bits: BinaryNumberChip::configure(meta, q_enabled, Some(weight.into())), + pow2_weight: meta.advice_column(), + weight_acc: meta.advice_column(), + sum_weights: meta.advice_column(), + max_bitstring_len: meta.advice_column(), + bit_value: meta.advice_column(), + last_bit_values: array_init(|_| meta.advice_column()), + first_lbvs: array_init(|_| meta.advice_column()), + }; + + // TODO: constrain is_start + + // All rows + meta.create_gate("HuffmanCodesTable: all rows", |meta| { + let mut cb = BaseConstraintBuilder::default(); + + let (gt, eq) = table.byte_offset_cmp.expr(meta, None); + cb.require_equal("byte_offset' >= byte_offset", gt + eq, 1.expr()); + + // Weight == 0 implies the bit value is 0. + cb.condition( + table + .weight_bits + .value_equals(FseSymbol::S0, Rotation::cur())(meta), + |cb| { + cb.require_zero( + "bit value == 0", + meta.query_advice(table.bit_value, Rotation::cur()), + ); + }, + ); + + // Last bit value at weight == 0 is also 0. + cb.require_zero( + "last_bit_values[0] == 0", + meta.query_advice( + table.last_bit_values[FseSymbol::S0 as usize], + Rotation::cur(), + ), + ); + + cb.gate(meta.query_fixed(table.q_enabled, Rotation::cur())) + }); + + // The first row of the HuffmanCodesTable. + meta.create_gate("HuffmanCodesTable: first (fixed) row", |meta| { + let mut cb = BaseConstraintBuilder::default(); + + // Canonical Huffman code starts with the weight of the first symbol, i.e. 0x00. + cb.require_equal( + "symbol == 0x00", + meta.query_advice(table.symbol, Rotation::cur()), + 0x00.expr(), + ); + + // Weight accumulation starts with the first weight. + cb.require_equal( + "weight_acc == 2^(weight - 1)", + meta.query_advice(table.weight_acc, Rotation::cur()), + meta.query_advice(table.pow2_weight, Rotation::cur()), + ); + + // Constrain the last bit_value of the maximum bitstring length. Maximum bitstring + // length implies weight == 1. + cb.require_zero( + "if first row: last_bit_values[1] == 0", + meta.query_advice( + table.last_bit_values[FseSymbol::S1 as usize], + Rotation::cur(), + ), + ); + + // Do an equality check for the last_bit_values at the first row. + for i in FseSymbol::iter() { + cb.require_equal( + "last bit value at the first row equality check", + meta.query_advice(table.last_bit_values[i as usize], Rotation::cur()), + meta.query_advice(table.first_lbvs[i as usize], Rotation::cur()), + ); + } + + cb.gate(and::expr([ + meta.query_fixed(table.q_enabled, Rotation::cur()), + meta.query_fixed(table.q_first, Rotation::cur()), + ])) + }); + + // While we are processing the weights of a particular canonical Huffman code + // representation, i.e. byte_offset == byte_offset'. + meta.create_gate( + "HuffmanCodesTable: traversing a canonical huffman coding table", + |meta| { + let mut cb = BaseConstraintBuilder::default(); + + // Sum of weights remains the same across all rows. + cb.require_equal( + "sum_weights' == sum_weights", + meta.query_advice(table.sum_weights, Rotation::next()), + meta.query_advice(table.sum_weights, Rotation::cur()), + ); + + // Maximum bitstring length remains the same across all rows. + cb.require_equal( + "max_bitstring_len' == max_bitstring_len", + meta.query_advice(table.max_bitstring_len, Rotation::next()), + meta.query_advice(table.max_bitstring_len, Rotation::cur()), + ); + + // The first row's last_bit_values remain the same. + for col in table.first_lbvs { + cb.require_equal( + "first_lbvs[i]' == first_lbvs[i]", + meta.query_advice(col, Rotation::next()), + meta.query_advice(col, Rotation::cur()), + ); + } + + // Weight accumulation is assigned correctly. + cb.require_equal( + "weight_acc' == weight_acc + 2^(weight - 1)", + meta.query_advice(table.weight_acc, Rotation::next()), + meta.query_advice(table.weight_acc, Rotation::cur()) + + meta.query_advice(table.pow2_weight, Rotation::next()), + ); + + // pow2_weight is assigned correctly for weight == 0. + cb.condition( + table + .weight_bits + .value_equals(FseSymbol::S0, Rotation::cur())(meta), + |cb| { + cb.require_zero( + "pow2_weight == 0 if weight == 0", + meta.query_advice(table.pow2_weight, Rotation::cur()), + ); + }, + ); + + // For all rows (except the first row of a canonical Huffman code representation, we + // want to ensure the last_bit_values was assigned correctly. + let is_start = meta.query_advice(table.is_start, Rotation::cur()); + cb.condition(not::expr(is_start.expr()), |cb| { + for (symbol, &last_bit_value) in + FseSymbol::iter().zip(table.last_bit_values.iter()) + { + cb.require_equal( + "last_bit_value_i::cur == last_bit_value::prev + (weight::cur == i)", + meta.query_advice(last_bit_value, Rotation::cur()), + meta.query_advice(last_bit_value, Rotation::prev()) + + table.weight_bits.value_equals(symbol, Rotation::cur())(meta), + ); + } + }); + + let (_gt, eq) = table.byte_offset_cmp.expr(meta, None); + cb.gate(and::expr([ + meta.query_fixed(table.q_enabled, Rotation::cur()), + eq, + ])) + }, + ); + + // For every row, we want the pow2_weight column to be assigned correctly. We want: + // + // pow2_weight == 2^(weight - 1). + // + // Note that this is valid only if weight > 0. For weight == 0, we want pow2_weight == 0. + meta.lookup_any("HuffmanCodesTable: pow2_weight assignment", |meta| { + let condition = and::expr([ + meta.query_fixed(table.q_enabled, Rotation::cur()), + not::expr(table + .weight_bits + .value_equals(FseSymbol::S0, Rotation::cur())( + meta + )), + // TODO: add padding column. + ]); + + let exponent = meta.query_advice(table.weight, Rotation::cur()) - 1.expr(); + let exponentiation = meta.query_advice(table.pow2_weight, Rotation::cur()); + + [exponent, exponentiation] + .into_iter() + .zip(pow2_table.table_exprs(meta)) + .map(|(input, table)| (input * condition.clone(), table)) + .collect::>() + }); + + // When we end processing a huffman code, i.e. the byte_offset changes. No need to check if + // the next row is padding or not. + meta.create_gate("HuffmanCodesTable: end of huffman code", |meta| { + let mut cb = BaseConstraintBuilder::default(); + + // The total sum of weights is in fact the accumulated weight. + cb.require_equal( + "sum_weights == weight_acc", + meta.query_advice(table.sum_weights, Rotation::cur()), + meta.query_advice(table.weight_acc, Rotation::cur()), + ); + + // We want to check the following: + // + // if lbv_1: The last bit_value for weight i on the first row. + // if lbv_2: The last bit_value for weight i+1 on the last row. + // + // then lbv_2 == (lbv_1 + 1) // 2 + // i.e. lbv_2 * 2 - lbv_1 is boolean. + // + // Note: we only do this check for weight > 0, hence we skip the FseSymbol::S0. + for i in [ + FseSymbol::S1, + FseSymbol::S2, + FseSymbol::S3, + FseSymbol::S4, + FseSymbol::S5, + FseSymbol::S6, + ] { + let i = i as usize; + let lbv_1 = meta.query_advice(table.first_lbvs[i], Rotation::cur()); + let lbv_2 = meta.query_advice(table.last_bit_values[i + 1], Rotation::cur()); + cb.require_boolean( + "last bit value check for weights i and i+1 on the first and last rows", + lbv_2 * 2.expr() - lbv_1, + ); + } + + let (gt, _eq) = table.byte_offset_cmp.expr(meta, None); + cb.gate(and::expr([ + meta.query_fixed(table.q_enabled, Rotation::cur()), + gt, + ])) + }); + + // The weight for the last symbol is assigned appropriately. The weight for the last + // symbol should satisfy: + // + // last_weight == log2(nearest_pow2 - sum_weights) + 1 + // where nearest_pow2 is the nearest power of 2 greater than the sum of weights so far. + // + // i.e. 2^(last_weight - 1) + sum_weights == 2^(max_bitstring_len) + meta.lookup_any("HuffmanCodesTable: weight of the last symbol", |meta| { + let (gt, _eq) = table.byte_offset_cmp.expr(meta, None); + let condition = and::expr([meta.query_fixed(table.q_enabled, Rotation::cur()), gt]); + + let exponent = meta.query_advice(table.max_bitstring_len, Rotation::cur()); + let exponentiation = meta.query_advice(table.pow2_weight, Rotation::cur()) + + meta.query_advice(table.sum_weights, Rotation::prev()); + + [exponent, exponentiation] + .into_iter() + .zip(pow2_table.table_exprs(meta)) + .map(|(input, table)| (input * condition.clone(), table)) + .collect::>() + }); + + // When we transition from one Huffman code to another, i.e. the byte_offset changes. We + // also check that the next row is not a padding row. + // + // TODO: add the padding column. + meta.create_gate("HuffmanCodesTable: new huffman code", |meta| { + let mut cb = BaseConstraintBuilder::default(); + + // Marks the start of a new huffman code. + cb.require_equal( + "is_start == 1", + meta.query_advice(table.is_start, Rotation::next()), + 1.expr(), + ); + + // Canonical Huffman code starts with the weight of the first symbol, i.e. 0x00. + cb.require_equal( + "symbol == 0x00", + meta.query_advice(table.symbol, Rotation::next()), + 0x00.expr(), + ); + + // Weight accumulation starts with the first weight. + cb.require_equal( + "weight_acc == 2^(weight - 1)", + meta.query_advice(table.weight_acc, Rotation::next()), + meta.query_advice(table.pow2_weight, Rotation::next()), + ); + + // Constrain the last bit_value of the maximum bitstring length. Maximum bitstring + // length implies weight == 1. + cb.require_zero( + "if first row: last_bit_values[1] == 0", + meta.query_advice( + table.last_bit_values[FseSymbol::S1 as usize], + Rotation::next(), + ), + ); + + // Do an equality check for the last_bit_values at the first row. + for i in FseSymbol::iter() { + cb.require_equal( + "last bit value at the first row equality check", + meta.query_advice(table.last_bit_values[i as usize], Rotation::next()), + meta.query_advice(table.first_lbvs[i as usize], Rotation::next()), + ); + } + + let (gt, _eq) = table.byte_offset_cmp.expr(meta, None); + cb.gate(and::expr([ + meta.query_fixed(table.q_enabled, Rotation::cur()), + meta.query_fixed(table.q_enabled, Rotation::next()), + gt, + ])) + }); + + debug_assert!(meta.degree() <= 9); + + table + } + + /// Load witness to the huffman codes table: dev mode. + pub fn assign( + &self, + layouter: &mut impl Layouter, + data: Vec, + ) -> Result<(), Error> { + layouter.assign_region( + || "HuffmanCodesTable: dev load", + |mut region| { + let weight_bits = BinaryNumberChip::construct(self.weight_bits); + let mut offset = 0; + for code in data.iter() { + let byte_offset = Value::known(F::from(code.byte_offset)); + let (max_bitstring_len, sym_map) = code.parse_canonical(); + + let max_bitstring_len = Value::known(F::from(max_bitstring_len)); + let sum_weights = Value::known(F::from( + sym_map + .values() + .map(|(weight, _bit_value)| weight) + .sum::(), + )); + let weight_acc_iter = sym_map.values().scan(0, |acc, (weight, _bit_value)| { + *acc += weight; + Some(*acc) + }); + + for (i, weight_acc) in weight_acc_iter.enumerate() { + region.assign_advice( + || "HuffmanCodesTable: weight_acc", + self.weight_acc, + offset + i, + || Value::known(F::from(weight_acc)), + )?; + } + for (&symbol, &(weight, bit_value)) in sym_map.iter() { + for (annotation, column, value) in [ + ("byte_offset", self.byte_offset, byte_offset), + ( + "max_bitstring_len", + self.max_bitstring_len, + max_bitstring_len, + ), + ("sum_weights", self.sum_weights, sum_weights), + ("symbol", self.symbol, Value::known(F::from(symbol))), + ("weight", self.weight, Value::known(F::from(weight))), + ( + "bit_value", + self.bit_value, + Value::known(F::from(bit_value)), + ), + ( + "pow2_weight", + self.pow2_weight, + Value::known(F::from(if weight > 0 { + (weight - 1).pow(2) + } else { + 0 + })), + ), + ] { + region.assign_advice( + || format!("HuffmanCodesTable: {annotation}"), + column, + offset, + || value, + )?; + } + let fse_symbol: FseSymbol = (weight as usize).into(); + weight_bits.assign(&mut region, offset, &fse_symbol)?; + + offset += 1; + } + + // TODO: assign last_bit_values + } + + // Assign the byte offset comparison gadget. + let cmp_chip = ComparatorChip::construct(self.byte_offset_cmp.clone()); + offset = 0; + + // if there is a single table. + if data.len() == 1 { + let byte_offset = data[0].byte_offset; + let n_rows = data[0].weights.len() + 1; + for _ in 0..n_rows - 1 { + cmp_chip.assign( + &mut region, + offset, + F::from(byte_offset), + F::from(byte_offset), + )?; + offset += 1; + } + cmp_chip.assign(&mut region, offset, F::from(byte_offset), F::zero())?; + } + + // if there are multiple tables. + if data.len() > 1 { + for window in data.windows(2) { + let byte_offset_1 = window[0].byte_offset; + let byte_offset_2 = window[1].byte_offset; + let n_rows = window[0].weights.len() + 1; + for _ in 0..n_rows - 1 { + cmp_chip.assign( + &mut region, + offset, + F::from(byte_offset_1), + F::from(byte_offset_1), + )?; + offset += 1; + } + cmp_chip.assign( + &mut region, + offset, + F::from(byte_offset_1), + F::from(byte_offset_2), + )?; + offset += 1; + } + // handle the last table. + if let Some(last_table) = data.last() { + let byte_offset = last_table.byte_offset; + let n_rows = last_table.weights.len() + 1; + for _ in 0..n_rows - 1 { + cmp_chip.assign( + &mut region, + offset, + F::from(byte_offset), + F::from(byte_offset), + )?; + offset += 1; + } + cmp_chip.assign(&mut region, offset, F::from(byte_offset), F::zero())?; + } + } + + Ok(()) + }, + ) + } +} + +impl HuffmanCodesTable { + /// Lookup the canonical weight assigned to a symbol in the Huffman code with the header at + /// the given byte_offset. + pub fn table_exprs_canonical_weight(&self, meta: &mut VirtualCells) -> Vec> { + vec![ + meta.query_advice(self.byte_offset, Rotation::cur()), + meta.query_advice(self.symbol, Rotation::cur()), + meta.query_advice(self.weight, Rotation::cur()), + ] + } + + /// Lookup the number of symbols that are present in the canonical representation of the + /// Huffman code. + pub fn table_exprs_weights_count(&self, meta: &mut VirtualCells) -> Vec> { + vec![ + meta.query_advice(self.byte_offset, Rotation::cur()), + meta.query_advice(self.symbol, Rotation::cur()), + // TODO: add is_last to mark the last row of a specific Huffman code. + ] + } +} diff --git a/zkevm-circuits/src/table/decompression/literals_header_rom_table.rs b/zkevm-circuits/src/table/decompression/literals_header_rom_table.rs new file mode 100644 index 0000000000..c0e69a9d73 --- /dev/null +++ b/zkevm-circuits/src/table/decompression/literals_header_rom_table.rs @@ -0,0 +1,123 @@ +use eth_types::Field; +use halo2_proofs::{ + circuit::{Layouter, Value}, + plonk::{Any, Column, ConstraintSystem, Error, Fixed}, +}; + +use crate::table::LookupTable; + +/// Read-only memory table for zstd block's literals header. +#[derive(Clone, Copy, Debug)] +pub struct LiteralsHeaderRomTable { + /// Block type first bit. + block_type_bit0: Column, + /// Block type second bit. + block_type_bit1: Column, + /// Size format first bit. + size_format_bit0: Column, + /// Size format second bit. + size_format_bit1: Column, + /// Number of bytes occupied by the literals header. + n_bytes_header: Column, + /// Number of literal streams to be decoded. + n_lstreams: Column, + /// The branch we take to decompose the literals header. There are a total of 7 branches that + /// can be used to decompose the literals header, namely: + /// + /// - block_type == Raw/RLE and size_format == 00 or 10 + /// - block_type == Raw/RLE and size_format == 01 + /// - block_type == Raw/RLE and size_format == 11 + /// - block_type == Compressed and size_format == 00 or 01 + /// - block_type == Compressed and size_format == 10 + /// - block_type == Compressed and size_format == 11 + branch: Column, + // size format == 0b11? + is_size_format_0b11: Column, +} + +impl LookupTable for LiteralsHeaderRomTable { + fn columns(&self) -> Vec> { + vec![ + self.block_type_bit0.into(), + self.block_type_bit1.into(), + self.size_format_bit0.into(), + self.size_format_bit1.into(), + self.n_bytes_header.into(), + self.n_lstreams.into(), + self.branch.into(), + self.is_size_format_0b11.into(), + ] + } + + fn annotations(&self) -> Vec { + vec![ + String::from("block_type_bit0"), + String::from("block_type_bit1"), + String::from("size_format_bit0"), + String::from("size_format_bit1"), + String::from("n_bytes_header"), + String::from("n_lstreams"), + String::from("branch"), + String::from("is_size_format_0b11"), + ] + } +} + +impl LiteralsHeaderRomTable { + /// Construct the ROM table. + pub fn construct(meta: &mut ConstraintSystem) -> Self { + Self { + block_type_bit0: meta.fixed_column(), + block_type_bit1: meta.fixed_column(), + size_format_bit0: meta.fixed_column(), + size_format_bit1: meta.fixed_column(), + n_bytes_header: meta.fixed_column(), + n_lstreams: meta.fixed_column(), + branch: meta.fixed_column(), + is_size_format_0b11: meta.fixed_column(), + } + } + + /// Load the ROM table. + pub fn load(&self, layouter: &mut impl Layouter) -> Result<(), Error> { + layouter.assign_region( + || "LiteralsHeader ROM table", + |mut region| { + // Refer: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#literals_section_header + for (i, row) in [ + [0, 0, 0, 0, 1, 0, 0, 0], // Raw: 1 byte header + [0, 0, 0, 1, 1, 0, 0, 0], // Raw: 1 byte header + [0, 0, 1, 0, 2, 0, 1, 0], // Raw: 2 bytes header + [0, 0, 1, 1, 3, 0, 2, 1], // Raw: 3 bytes header + [1, 0, 0, 0, 1, 0, 0, 0], // RLE: 1 byte header + [1, 0, 0, 1, 1, 0, 0, 0], // RLE: 1 byte header + [1, 0, 1, 0, 2, 0, 1, 0], // RLE: 2 bytes header + [1, 0, 1, 1, 3, 0, 2, 1], // RLE: 3 bytes header + [0, 1, 0, 0, 3, 0, 3, 0], // Compressed: 3 bytes header + [0, 1, 1, 0, 3, 1, 3, 0], // Compressed: 3 bytes header + [0, 1, 0, 1, 4, 1, 4, 0], // Compressed: 4 bytes header + [0, 1, 1, 1, 5, 1, 5, 1], // Compressed: 5 bytes header + ] + .iter() + .enumerate() + { + for (&column, (&value, annotation)) in + >::fixed_columns(self).iter().zip( + row.iter() + .zip(>::annotations(self).iter()), + ) + { + region.assign_fixed( + || format!("{annotation} at offset={i}"), + column, + i, + || Value::known(F::from(value)), + )?; + } + } + + Ok(()) + }, + ) + } +} diff --git a/zkevm-circuits/src/table/decompression/literals_header_table.rs b/zkevm-circuits/src/table/decompression/literals_header_table.rs new file mode 100644 index 0000000000..666c936c67 --- /dev/null +++ b/zkevm-circuits/src/table/decompression/literals_header_table.rs @@ -0,0 +1,451 @@ +use eth_types::Field; +use gadgets::{ + binary_number::{BinaryNumberChip, BinaryNumberConfig}, + impl_expr, + util::{and, not, Expr}, +}; +use halo2_proofs::{ + circuit::{Layouter, Value}, + plonk::{Advice, Any, Column, ConstraintSystem, Error, Expression, Fixed, VirtualCells}, + poly::Rotation, +}; +use strum_macros::EnumIter; + +use crate::{ + evm_circuit::util::constraint_builder::{BaseConstraintBuilder, ConstrainBuilderCommon}, + table::{BitwiseOp, BitwiseOpTable, LookupTable, RangeTable}, +}; + +/// Different branches that can be taken while calculating regenerated size and compressed size in +/// the Literals Header. +#[derive(Clone, Copy, Debug, EnumIter)] +pub enum LiteralsHeaderBranch { + /// Raw/RLE block type with size_format 00 or 10. + RawRle0 = 0, + /// Raw/RLE block type with size format 10. + RawRle1, + /// Raw/RLE block type with size format 11. + RawRle2, + /// Compressed block type with size format 00 or 01. + Compressed0, + /// Compressed block type with size format 10. + Compressed1, + /// Compressed block type with size format 11. + Compressed2, +} + +impl_expr!(LiteralsHeaderBranch); + +impl From for LiteralsHeaderBranch { + fn from(value: u64) -> Self { + match value { + 0 => Self::RawRle0, + 1 => Self::RawRle1, + 2 => Self::RawRle2, + 3 => Self::Compressed0, + 4 => Self::Compressed1, + 5 => Self::Compressed2, + _ => unreachable!("LiteralsHeaderBranch only from 0..=5"), + } + } +} + +impl From for usize { + fn from(value: LiteralsHeaderBranch) -> Self { + value as usize + } +} + +/// Helper table to calculate regenerated and compressed size from the Literals Header. +#[derive(Clone, Debug)] +pub struct LiteralsHeaderTable { + /// Whether to enable. + pub q_enable: Column, + /// Byte offset at which this literals header is located. + pub byte_offset: Column, + /// The branch taken for this literals header. + pub branch: Column, + /// To identify the branch. + pub branch_bits: BinaryNumberConfig, + /// The first byte of the literals header. + pub byte0: Column, + /// The second byte. + pub byte1: Column, + /// The third byte. + pub byte2: Column, + /// The fourth byte. + pub byte3: Column, + /// The fifth byte. + pub byte4: Column, + /// byte0 >> 3. + pub byte0_rs_3: Column, + /// byte0 >> 4. + pub byte0_rs_4: Column, + /// byte1 >> 6. + pub byte1_rs_6: Column, + /// byte1 & 0b111111. + pub byte1_and_63: Column, + /// byte2 >> 2. + pub byte2_rs_2: Column, + /// byte2 >> 6. + pub byte2_rs_6: Column, + /// byte2 & 0b11. + pub byte2_and_3: Column, + /// byte2 & 0b111111. + pub byte2_and_63: Column, + /// Regenerated size. + pub regen_size: Column, + /// Compressed size. + pub compr_size: Column, +} + +impl LiteralsHeaderTable { + /// Construct and constrain the literals header table. + pub fn construct( + meta: &mut ConstraintSystem, + bitwise_op_table: BitwiseOpTable, + range4: RangeTable<4>, + range8: RangeTable<8>, + range16: RangeTable<16>, + range64: RangeTable<64>, + ) -> Self { + let q_enable = meta.fixed_column(); + let branch = meta.advice_column(); + let table = Self { + q_enable, + byte_offset: meta.advice_column(), + branch, + branch_bits: BinaryNumberChip::configure(meta, q_enable, Some(branch.into())), + byte0: meta.advice_column(), + byte1: meta.advice_column(), + byte2: meta.advice_column(), + byte3: meta.advice_column(), + byte4: meta.advice_column(), + byte0_rs_3: meta.advice_column(), + byte0_rs_4: meta.advice_column(), + byte1_rs_6: meta.advice_column(), + byte1_and_63: meta.advice_column(), + byte2_rs_2: meta.advice_column(), + byte2_rs_6: meta.advice_column(), + byte2_and_3: meta.advice_column(), + byte2_and_63: meta.advice_column(), + regen_size: meta.advice_column(), + compr_size: meta.advice_column(), + }; + + macro_rules! is_branch { + ($var:ident, $branch_variant:ident) => { + let $var = |meta: &mut VirtualCells| { + table + .branch_bits + .value_equals(LiteralsHeaderBranch::$branch_variant, Rotation::cur())( + meta + ) + }; + }; + } + + is_branch!(branch0, RawRle0); + is_branch!(branch1, RawRle1); + is_branch!(branch2, RawRle2); + is_branch!(branch3, Compressed0); + is_branch!(branch4, Compressed1); + is_branch!(branch5, Compressed2); + + meta.create_gate("LiteralsHeaderTable", |meta| { + let mut cb = BaseConstraintBuilder::default(); + + let byte0_rs_3 = meta.query_advice(table.byte0_rs_3, Rotation::cur()); + let byte0_rs_4 = meta.query_advice(table.byte0_rs_4, Rotation::cur()); + let byte1_ls_4 = meta.query_advice(table.byte1, Rotation::cur()) * 16.expr(); + let byte1_and_63_ls_4 = + meta.query_advice(table.byte1_and_63, Rotation::cur()) * 16.expr(); + let byte1_rs_6 = meta.query_advice(table.byte1_rs_6, Rotation::cur()); + let byte2_rs_2 = meta.query_advice(table.byte2_rs_2, Rotation::cur()); + let byte2_rs_6 = meta.query_advice(table.byte2_rs_6, Rotation::cur()); + let byte2_ls_2 = meta.query_advice(table.byte2, Rotation::cur()) * 4.expr(); + let byte2_ls_12 = meta.query_advice(table.byte2, Rotation::cur()) * 4096.expr(); + let byte2_and_3_ls_12 = + meta.query_advice(table.byte2_and_3, Rotation::cur()) * 4096.expr(); + let byte2_and_63_ls_12 = + meta.query_advice(table.byte2_and_63, Rotation::cur()) * 4096.expr(); + let byte3_ls_6 = meta.query_advice(table.byte3, Rotation::cur()) * 64.expr(); + let byte3_ls_2 = meta.query_advice(table.byte3, Rotation::cur()) * 4.expr(); + let byte4_ls_10 = meta.query_advice(table.byte4, Rotation::cur()) * 1024.expr(); + + // regen_size == lh_byte[0] >> 3. + // compr_size == 0. + cb.condition(branch0(meta), |cb| { + cb.require_equal( + "branch0: regenerated size", + meta.query_advice(table.regen_size, Rotation::cur()), + byte0_rs_3, + ); + cb.require_zero( + "branch0: compressed size", + meta.query_advice(table.compr_size, Rotation::cur()), + ); + for col in [table.byte1, table.byte2, table.byte3, table.byte4] { + cb.require_zero("byte[i] == 0", meta.query_advice(col, Rotation::cur())); + } + }); + + // regen_size == (lh_byte[0] >> 4) + (lh_byte[1] << 4). + // compr_size == 0. + cb.condition(branch1(meta), |cb| { + cb.require_equal( + "branch1: regenerated size", + meta.query_advice(table.regen_size, Rotation::cur()), + byte0_rs_4.expr() + byte1_ls_4.expr(), + ); + cb.require_zero( + "branch1: compressed size", + meta.query_advice(table.compr_size, Rotation::cur()), + ); + for col in [table.byte2, table.byte3, table.byte4] { + cb.require_zero("byte[i] == 0", meta.query_advice(col, Rotation::cur())); + } + }); + + // regen_size == (lh_byte[0] >> 4) + (lh_byte[1] << 4) + (lh_byte[2] << 12). + // compr_size == 0. + cb.condition(branch2(meta), |cb| { + cb.require_equal( + "branch2: regenerated size", + meta.query_advice(table.regen_size, Rotation::cur()), + byte0_rs_4.expr() + byte1_ls_4.expr() + byte2_ls_12, + ); + cb.require_zero( + "branch2: compressed size", + meta.query_advice(table.compr_size, Rotation::cur()), + ); + for col in [table.byte3, table.byte4] { + cb.require_zero("byte[i] == 0", meta.query_advice(col, Rotation::cur())); + } + }); + + // regen_size == (lh_byte[0] >> 4) + ((lh_byte[1] & 0b111111) << 4). + // compr_size == (lh_byte[1] >> 6) + (lh_byte[2] << 2). + cb.condition(branch3(meta), |cb| { + cb.require_equal( + "branch3: regenerated size", + meta.query_advice(table.regen_size, Rotation::cur()), + byte0_rs_4.expr() + byte1_and_63_ls_4, + ); + cb.require_equal( + "branch3: compressed size", + meta.query_advice(table.compr_size, Rotation::cur()), + byte1_rs_6 + byte2_ls_2.expr(), + ); + for col in [table.byte3, table.byte4] { + cb.require_zero("byte[i] == 0", meta.query_advice(col, Rotation::cur())); + } + }); + + // regen_size == (lh_byte[0] >> 4) + (lh_byte[1] << 4) + ((lh_byte[2] & 0b11) << 12). + // compr_size == (lh_byte[2] >> 2) + (lh_byte[3] << 6). + cb.condition(branch4(meta), |cb| { + cb.require_equal( + "branch4: regenerated size", + meta.query_advice(table.regen_size, Rotation::cur()), + byte0_rs_4.expr() + byte1_ls_4.expr() + byte2_and_3_ls_12, + ); + cb.require_equal( + "branch4: compressed size", + meta.query_advice(table.compr_size, Rotation::cur()), + byte2_rs_2 + byte3_ls_6, + ); + cb.require_zero( + "byte[i] == 0", + meta.query_advice(table.byte4, Rotation::cur()), + ); + }); + + // regen_size == (lh_byte[0] >> 4) + (lh_byte[1] << 4) + ((lh_byte[2] & 0b111111) << + // 12). compr_size == (lh_byte[2] >> 6) + (lh_byte[3] << 2) + (lh_byte[4] << + // 10). + cb.condition(branch5(meta), |cb| { + cb.require_equal( + "branch5: regenerated size", + meta.query_advice(table.regen_size, Rotation::cur()), + byte0_rs_4 + byte1_ls_4 + byte2_and_63_ls_12, + ); + cb.require_equal( + "branch5: compressed size", + meta.query_advice(table.compr_size, Rotation::cur()), + byte2_rs_6 + byte3_ls_2 + byte4_ls_10, + ); + }); + + cb.gate(meta.query_fixed(table.q_enable, Rotation::cur())) + }); + meta.lookup("LiteralsHeaderTable: byte0 >> 3", |meta| { + let condition = meta.query_fixed(table.q_enable, Rotation::cur()); + let range_value = meta.query_advice(table.byte0, Rotation::cur()) + - (meta.query_advice(table.byte0_rs_3, Rotation::cur()) * 8.expr()); + + vec![(condition * range_value, range8.into())] + }); + meta.lookup("LiteralsHeaderTable: byte0 >> 4", |meta| { + let condition = meta.query_fixed(table.q_enable, Rotation::cur()); + let range_value = meta.query_advice(table.byte0, Rotation::cur()) + - (meta.query_advice(table.byte0_rs_4, Rotation::cur()) * 16.expr()); + + vec![(condition * range_value, range16.into())] + }); + meta.lookup("LiteralsHeaderTable: byte1 >> 6", |meta| { + let condition = meta.query_fixed(table.q_enable, Rotation::cur()); + let range_value = meta.query_advice(table.byte1, Rotation::cur()) + - (meta.query_advice(table.byte1_rs_6, Rotation::cur()) * 64.expr()); + + vec![(condition * range_value, range64.into())] + }); + meta.lookup("LiteralsHeaderTable: byte2 >> 2", |meta| { + let condition = meta.query_fixed(table.q_enable, Rotation::cur()); + let range_value = meta.query_advice(table.byte2, Rotation::cur()) + - (meta.query_advice(table.byte2_rs_2, Rotation::cur()) * 4.expr()); + + vec![(condition * range_value, range4.into())] + }); + meta.lookup("LiteralsHeaderTable: byte2 >> 6", |meta| { + let condition = meta.query_fixed(table.q_enable, Rotation::cur()); + let range_value = meta.query_advice(table.byte2, Rotation::cur()) + - (meta.query_advice(table.byte2_rs_6, Rotation::cur()) * 64.expr()); + + vec![(condition * range_value, range64.into())] + }); + meta.lookup_any("LiteralsHeaderTable: byte1 & 63", |meta| { + let condition = and::expr([ + meta.query_fixed(table.q_enable, Rotation::cur()), + not::expr(branch0(meta)), + ]); + [ + BitwiseOp::AND.expr(), + meta.query_advice(table.byte1, Rotation::cur()), + 63.expr(), + meta.query_advice(table.byte1_and_63, Rotation::cur()), + ] + .into_iter() + .zip(bitwise_op_table.table_exprs(meta)) + .map(|(input, table)| (input * condition.clone(), table)) + .collect::>() + }); + meta.lookup_any("LiteralsHeaderTable: byte2 & 3", |meta| { + let condition = meta.query_fixed(table.q_enable, Rotation::cur()); + [ + BitwiseOp::AND.expr(), + meta.query_advice(table.byte2, Rotation::cur()), + 3.expr(), + meta.query_advice(table.byte2_and_3, Rotation::cur()), + ] + .into_iter() + .zip(bitwise_op_table.table_exprs(meta)) + .map(|(input, table)| (input * condition.clone(), table)) + .collect::>() + }); + meta.lookup_any("LiteralsHeaderTable: byte2 & 63", |meta| { + let condition = meta.query_fixed(table.q_enable, Rotation::cur()); + [ + BitwiseOp::AND.expr(), + meta.query_advice(table.byte2, Rotation::cur()), + 63.expr(), + meta.query_advice(table.byte2_and_63, Rotation::cur()), + ] + .into_iter() + .zip(bitwise_op_table.table_exprs(meta)) + .map(|(input, table)| (input * condition.clone(), table)) + .collect::>() + }); + + debug_assert!(meta.degree() <= 9); + + table + } + + /// Assign witness to the literals header table. + pub fn assign( + &self, + layouter: &mut impl Layouter, + literals_headers: &[(u64, &[u8], u64, u64, u64)], /* (byte_offset, bytes, branch, + * regen_size, compr_size) */ + ) -> Result<(), Error> { + layouter.assign_region( + || "LiteralsHeaderTable", + |mut region| { + for (offset, &(byte_offset, header, branch, regen_size, compr_size)) in + literals_headers.iter().enumerate() + { + assert!(header.len() <= 5); + let [byte0, byte1, byte2, byte3, byte4] = [0, 1, 2, 3, 4] + .map(|i| header.get(i).cloned().map_or(0u64, |byte| byte as u64)); + region.assign_fixed( + || "q_enable", + self.q_enable, + offset, + || Value::known(F::one()), + )?; + for (col, value, annotation) in [ + (self.byte_offset, byte_offset, "byte_offset"), + (self.branch, branch, "branch"), + (self.byte0, byte0, "byte0"), + (self.byte1, byte1, "byte1"), + (self.byte2, byte2, "byte2"), + (self.byte3, byte3, "byte3"), + (self.byte4, byte4, "byte4"), + (self.byte0_rs_3, byte0 >> 3, "byte0_rs_3"), + (self.byte0_rs_4, byte0 >> 4, "byte0_rs_4"), + (self.byte1_rs_6, byte1 >> 6, "byte1_rs_6"), + (self.byte1_and_63, byte1 & 63, "byte1_and_63"), + (self.byte2_rs_2, byte2 >> 2, "byte2_rs_2"), + (self.byte2_rs_6, byte2 >> 6, "byte2_rs_6"), + (self.byte2_and_3, byte2 & 3, "byte2_and_3"), + (self.byte2_and_63, byte2 & 63, "byte2_and_63"), + (self.regen_size, regen_size, "regen_size"), + (self.compr_size, compr_size, "compr_size"), + ] { + region.assign_advice( + || annotation, + col, + offset, + || Value::known(F::from(value)), + )?; + } + let branch_chip = BinaryNumberChip::construct(self.branch_bits); + branch_chip.assign(&mut region, offset, &LiteralsHeaderBranch::from(branch))?; + } + + Ok(()) + }, + ) + } +} + +impl LookupTable for LiteralsHeaderTable { + fn columns(&self) -> Vec> { + vec![ + self.byte_offset.into(), + self.branch.into(), + self.byte0.into(), + self.byte1.into(), + self.byte2.into(), + self.byte3.into(), + self.byte4.into(), + self.regen_size.into(), + self.compr_size.into(), + ] + } + + fn annotations(&self) -> Vec { + vec![ + String::from("byte_offset"), + String::from("branch"), + String::from("byte0"), + String::from("byte1"), + String::from("byte2"), + String::from("byte3"), + String::from("byte4"), + String::from("regen_size"), + String::from("compr_size"), + ] + } +} diff --git a/zkevm-circuits/src/table/decompression/mod.rs b/zkevm-circuits/src/table/decompression/mod.rs new file mode 100644 index 0000000000..1672565453 --- /dev/null +++ b/zkevm-circuits/src/table/decompression/mod.rs @@ -0,0 +1,20 @@ +//! Tables with constraints used for verification of zstd decoding from Huffman Codes and FSE +//! codes. + +mod bitstring_accumulation_table; +mod block_type_rom_table; +mod decoded_literals_table; +mod fse_table; +mod huffman_codes_table; +mod literals_header_rom_table; +mod literals_header_table; +mod tag_rom_table; + +pub use bitstring_accumulation_table::BitstringAccumulationTable; +pub use block_type_rom_table::BlockTypeRomTable; +pub use decoded_literals_table::DecodedLiteralsTable; +pub use fse_table::FseTable; +pub use huffman_codes_table::HuffmanCodesTable; +pub use literals_header_rom_table::LiteralsHeaderRomTable; +pub use literals_header_table::{LiteralsHeaderBranch, LiteralsHeaderTable}; +pub use tag_rom_table::TagRomTable; diff --git a/zkevm-circuits/src/table/decompression/tag_rom_table.rs b/zkevm-circuits/src/table/decompression/tag_rom_table.rs new file mode 100644 index 0000000000..b83d2867d4 --- /dev/null +++ b/zkevm-circuits/src/table/decompression/tag_rom_table.rs @@ -0,0 +1,91 @@ +use eth_types::Field; +use halo2_proofs::{ + circuit::Layouter, + plonk::{Any, Column, ConstraintSystem, Error, Fixed}, +}; + +use crate::{table::LookupTable, witness::TagRomTableRow}; + +/// Read-only Memory table for the Decompression circuit. This table allows us a lookup argument +/// from the Decompression circuit to check if a given row can occur depending on the row's tag, +/// next tag and tag length. +#[derive(Clone, Copy, Debug)] +pub struct TagRomTable { + /// Tag of the current field being decoded. + pub tag: Column, + /// Tag of the following field when the current field is finished decoding. + pub tag_next: Column, + /// The maximum length in terms of number of bytes that the current tag can take up. + pub max_len: Column, + /// Whether this tag outputs a decoded byte or not. + pub is_output: Column, + /// Whether this tag belongs to a ``block`` in zstd or not. + pub is_block: Column, + /// Whether this tag is processed back-to-front, i.e. in reverse order. + pub is_reverse: Column, +} + +impl LookupTable for TagRomTable { + fn columns(&self) -> Vec> { + vec![ + self.tag.into(), + self.tag_next.into(), + self.max_len.into(), + self.is_output.into(), + self.is_block.into(), + self.is_reverse.into(), + ] + } + + fn annotations(&self) -> Vec { + vec![ + String::from("tag"), + String::from("tag_next"), + String::from("max_len"), + String::from("is_output"), + String::from("is_block"), + String::from("is_reverse"), + ] + } +} + +impl TagRomTable { + /// Construct the ROM table. + pub fn construct(meta: &mut ConstraintSystem) -> Self { + Self { + tag: meta.fixed_column(), + tag_next: meta.fixed_column(), + max_len: meta.fixed_column(), + is_output: meta.fixed_column(), + is_block: meta.fixed_column(), + is_reverse: meta.fixed_column(), + } + } + + /// Load the ROM table. + pub fn load(&self, layouter: &mut impl Layouter) -> Result<(), Error> { + layouter.assign_region( + || "Zstd ROM table", + |mut region| { + for (offset, row) in TagRomTableRow::rows().iter().enumerate() { + for (&column, (value, annotation)) in + >::fixed_columns(self).iter().zip( + row.values::() + .into_iter() + .zip(>::annotations(self).iter()), + ) + { + region.assign_fixed( + || format!("{annotation} at offset={offset}"), + column, + offset, + || value, + )?; + } + } + + Ok(()) + }, + ) + } +} diff --git a/zkevm-circuits/src/witness.rs b/zkevm-circuits/src/witness.rs index ffa4602a32..de3f967fc9 100644 --- a/zkevm-circuits/src/witness.rs +++ b/zkevm-circuits/src/witness.rs @@ -39,7 +39,7 @@ pub use tx::Transaction; mod zstd; pub use zstd::{ - FseAuxiliaryTableData, FseSymbol, FseTableData, FseTableRow, HuffmanCodesData, LstreamNum, - TagRomTableRow, ZstdTag, N_BITS_PER_BYTE, N_BITS_SYMBOL, N_BITS_ZSTD_TAG, N_BLOCK_HEADER_BYTES, - N_JUMP_TABLE_BYTES, N_MAX_SYMBOLS, + process, util::value_bits_le, FseAuxiliaryTableData, FseSymbol, FseTableData, FseTableRow, + HuffmanCodesData, LstreamNum, TagRomTableRow, ZstdTag, ZstdWitnessRow, N_BITS_PER_BYTE, + N_BITS_SYMBOL, N_BITS_ZSTD_TAG, N_BLOCK_HEADER_BYTES, N_JUMP_TABLE_BYTES, N_MAX_SYMBOLS, }; diff --git a/zkevm-circuits/src/witness/zstd/mod.rs b/zkevm-circuits/src/witness/zstd/mod.rs index 244b61190a..ac79734b84 100644 --- a/zkevm-circuits/src/witness/zstd/mod.rs +++ b/zkevm-circuits/src/witness/zstd/mod.rs @@ -1,3 +1,5 @@ +use std::collections::BTreeMap; + use eth_types::Field; use halo2_proofs::circuit::Value; @@ -5,15 +7,35 @@ mod params; pub use params::*; mod types; -pub use types::*; +pub use types::{ZstdTag::*, *}; #[cfg(test)] mod tui; #[cfg(test)] use tui::draw_rows; -mod util; -use util::value_bits_le; +pub mod util; +use util::{be_bits_to_value, increment_idx, le_bits_to_value, value_bits_le}; + +const TAG_MAX_LEN: [(ZstdTag, u64); 13] = [ + (FrameHeaderDescriptor, 1), + (FrameContentSize, 8), + (BlockHeader, 3), + (RawBlockBytes, 8388607), // (1 << 23) - 1 + (RleBlockBytes, 8388607), + (ZstdBlockLiteralsHeader, 5), + (ZstdBlockLiteralsRawBytes, 1048575), // (1 << 20) - 1 + (ZstdBlockLiteralsRleBytes, 1048575), + (ZstdBlockLiteralsHeader, 5), + (ZstdBlockFseCode, 128), + (ZstdBlockHuffmanCode, 128), // header_byte < 128 + (ZstdBlockJumpTable, 6), + (ZstdBlockLstream, 1000), // 1kB hard-limit +]; + +fn lookup_max_tag_len(tag: ZstdTag) -> u64 { + TAG_MAX_LEN.iter().find(|record| record.0 == tag).unwrap().1 +} /// FrameHeaderDescriptor and FrameContentSize fn process_frame_header( @@ -48,6 +70,13 @@ fn process_frame_header( // FrameContentSize bytes are read in little-endian, hence its in reverse mode. let fcs_bytes = src + .iter() + .skip(byte_offset + 1) + .take(fcs_tag_len) + // .rev() + .cloned() + .collect::>(); + let fcs_bytes_rev = src .iter() .skip(byte_offset + 1) .take(fcs_tag_len) @@ -55,7 +84,7 @@ fn process_frame_header( .cloned() .collect::>(); let fcs = { - let fcs = fcs_bytes + let fcs = fcs_bytes_rev .iter() .fold(0u64, |acc, &byte| acc * 256u64 + (byte as u64)); match fcs_tag_len { @@ -65,8 +94,9 @@ fn process_frame_header( }; let fcs_tag_value_iter = fcs_bytes .iter() + .rev() .scan(Value::known(F::zero()), |acc, &byte| { - *acc = *acc * randomness + Value::known(F::from(byte as u64)); + *acc = *acc * Value::known(F::from(256u64)) + Value::known(F::from(byte as u64)); Some(*acc) }); let fcs_tag_value = fcs_tag_value_iter @@ -80,6 +110,16 @@ fn process_frame_header( Some(*acc) }) .collect::>>(); + + let tag_rlc_iter = fcs_bytes + .iter() + .scan(Value::known(F::zero()), |acc, &byte| { + *acc = *acc * randomness + Value::known(F::from(byte as u64)); + Some(*acc) + }) + .collect::>>(); + let tag_rlc = *(tag_rlc_iter.clone().last().expect("Tag RLC expected")); + let aux_1 = fcs_value_rlcs .last() .expect("FrameContentSize bytes expected"); @@ -91,16 +131,20 @@ fn process_frame_header( state: ZstdState { tag: ZstdTag::FrameHeaderDescriptor, tag_next: ZstdTag::FrameContentSize, + max_tag_len: lookup_max_tag_len(ZstdTag::FrameHeaderDescriptor), tag_len: 1, tag_idx: 1, tag_value: Value::known(F::from(*fhd_byte as u64)), tag_value_acc: Value::known(F::from(*fhd_byte as u64)), + is_tag_change: true, + tag_rlc: Value::known(F::from(*fhd_byte as u64)), + tag_rlc_acc: Value::known(F::from(*fhd_byte as u64)), }, encoded_data: EncodedData { byte_idx: (byte_offset + 1) as u64, encoded_len: last_row.encoded_data.encoded_len, value_byte: *fhd_byte, - value_rlc: fhd_value_rlc, + value_rlc: Value::known(F::zero()), ..Default::default() }, decoded_data: DecodedData { @@ -108,41 +152,56 @@ fn process_frame_header( decoded_len_acc: 0, total_decoded_len: last_row.decoded_data.total_decoded_len + fcs, decoded_byte: 0, - decoded_value_rlc: last_row.decoded_data.decoded_value_rlc, + decoded_value_rlc: Value::known(F::zero()), }, + bitstream_read_data: BitstreamReadRow::default(), huffman_data: HuffmanData::default(), fse_data: FseTableRow::default(), }) .chain( - fcs_bytes + fcs_bytes_rev .iter() .zip(fcs_tag_value_iter) .zip(fcs_value_rlcs.iter().rev()) + .zip(tag_rlc_iter.iter().rev()) .enumerate() .map( - |(i, ((&value_byte, tag_value_acc), &value_rlc))| ZstdWitnessRow { - state: ZstdState { - tag: ZstdTag::FrameContentSize, - tag_next: ZstdTag::BlockHeader, - tag_len: fcs_tag_len as u64, - tag_idx: (i + 1) as u64, - tag_value: fcs_tag_value, - tag_value_acc, - }, - encoded_data: EncodedData { - byte_idx: (byte_offset + 2 + i) as u64, - encoded_len: last_row.encoded_data.encoded_len, - value_byte, - reverse: true, - reverse_idx: (fcs_tag_len - i) as u64, - reverse_len: fcs_tag_len as u64, - aux_1: *aux_1, - aux_2, - value_rlc, - }, - decoded_data: last_row.decoded_data.clone(), - huffman_data: HuffmanData::default(), - fse_data: FseTableRow::default(), + |(i, (((&value_byte, tag_value_acc), _value_rlc), &tag_rlc_acc))| { + ZstdWitnessRow { + state: ZstdState { + tag: ZstdTag::FrameContentSize, + tag_next: ZstdTag::BlockHeader, + max_tag_len: lookup_max_tag_len(ZstdTag::FrameContentSize), + tag_len: fcs_tag_len as u64, + tag_idx: (i + 1) as u64, + tag_value: fcs_tag_value, + tag_value_acc, + is_tag_change: i == 0, + tag_rlc, + tag_rlc_acc, + }, + encoded_data: EncodedData { + byte_idx: (byte_offset + 2 + i) as u64, + encoded_len: last_row.encoded_data.encoded_len, + value_byte, + reverse: true, + reverse_idx: (fcs_tag_len - i) as u64, + reverse_len: fcs_tag_len as u64, + aux_1: *aux_1, + aux_2, + value_rlc: fhd_value_rlc, + }, + decoded_data: DecodedData { + decoded_len: fcs, + decoded_len_acc: 0, + total_decoded_len: last_row.decoded_data.total_decoded_len + fcs, + decoded_byte: 0, + decoded_value_rlc: Value::known(F::zero()), + }, + bitstream_read_data: BitstreamReadRow::default(), + huffman_data: HuffmanData::default(), + fse_data: FseTableRow::default(), + } }, ), ) @@ -150,12 +209,22 @@ fn process_frame_header( ) } +type AggregateBlockResult = ( + usize, + Vec>, + bool, + Vec, + Vec, + Vec, + FseAuxiliaryTableData, + HuffmanCodesData, +); fn process_block( src: &[u8], byte_offset: usize, last_row: &ZstdWitnessRow, randomness: Value, -) -> (usize, Vec>, bool) { +) -> AggregateBlockResult { let mut witness_rows = vec![]; let (byte_offset, rows, last_block, block_type, block_size) = @@ -163,36 +232,46 @@ fn process_block( witness_rows.extend_from_slice(&rows); let last_row = rows.last().expect("last row expected to exist"); - let (_byte_offset, rows) = match block_type { - BlockType::RawBlock => process_block_raw( - src, - byte_offset, - last_row, - randomness, - block_size, - last_block, - ), - BlockType::RleBlock => process_block_rle( - src, - byte_offset, - last_row, - randomness, - block_size, - last_block, - ), - BlockType::ZstdCompressedBlock => process_block_zstd( - src, - byte_offset, - last_row, - randomness, - block_size, - last_block, - ), - BlockType::Reserved => unreachable!("Reserved block type not expected"), - }; + let (_byte_offset, rows, literals, lstream_len, aux_data, fse_aux_table, huffman_codes) = + match block_type { + BlockType::RawBlock => process_block_raw( + src, + byte_offset, + last_row, + randomness, + block_size, + last_block, + ), + BlockType::RleBlock => process_block_rle( + src, + byte_offset, + last_row, + randomness, + block_size, + last_block, + ), + BlockType::ZstdCompressedBlock => process_block_zstd( + src, + byte_offset, + last_row, + randomness, + block_size, + last_block, + ), + BlockType::Reserved => unreachable!("Reserved block type not expected"), + }; witness_rows.extend_from_slice(&rows); - (byte_offset, witness_rows, last_block) + ( + byte_offset, + witness_rows, + last_block, + literals, + lstream_len, + aux_data, + fse_aux_table, + huffman_codes, + ) } fn process_block_header( @@ -219,12 +298,28 @@ fn process_block_header( _ => unreachable!("BlockType::Reserved unexpected"), }; - let tag_value_iter = bh_bytes.iter().scan(Value::known(F::zero()), |acc, &byte| { - *acc = *acc * randomness + Value::known(F::from(byte as u64)); - Some(*acc) - }); + let tag_value_iter = bh_bytes + .iter() + .rev() + .scan(Value::known(F::zero()), |acc, &byte| { + *acc = *acc * Value::known(F::from(256u64)) + Value::known(F::from(byte as u64)); + Some(*acc) + }); let tag_value = tag_value_iter.clone().last().expect("BlockHeader expected"); + let tag_rlc_iter = bh_bytes + .iter() + .scan(Value::known(F::zero()), |acc, &byte| { + *acc = *acc * randomness + Value::known(F::from(byte as u64)); + Some(*acc) + }) + .collect::>>(); + let tag_rlc = *(tag_rlc_iter.clone().last().expect("Tag RLC expected")); + + let multiplier = + (0..last_row.state.tag_len).fold(Value::known(F::one()), |acc, _| acc * randomness); + let value_rlc = last_row.encoded_data.value_rlc * multiplier + last_row.state.tag_rlc; + // BlockHeader follows FrameContentSize which is processed in reverse order. // Hence value_rlc at the first BlockHeader byte will be calculated as: // @@ -234,7 +329,7 @@ fn process_block_header( let acc_start = last_row.encoded_data.aux_1 * randomness.map(|r| r.pow([last_row.encoded_data.reverse_len, 0, 0, 0])) + last_row.encoded_data.aux_2; - let value_rlcs = bh_bytes + let _value_rlcs = bh_bytes .iter() .scan(acc_start, |acc, &byte| { *acc = *acc * randomness + Value::known(F::from(byte as u64)); @@ -246,27 +341,33 @@ fn process_block_header( byte_offset + N_BLOCK_HEADER_BYTES, bh_bytes .iter() + .rev() .zip(tag_value_iter) - .zip(value_rlcs.iter()) + .zip(tag_rlc_iter.iter().rev()) .enumerate() .map( - |(i, ((&value_byte, tag_value_acc), &value_rlc))| ZstdWitnessRow { + |(i, ((&value_byte, tag_value_acc), tag_rlc_acc))| ZstdWitnessRow { state: ZstdState { tag: ZstdTag::BlockHeader, tag_next, + max_tag_len: lookup_max_tag_len(ZstdTag::BlockHeader), tag_len: N_BLOCK_HEADER_BYTES as u64, tag_idx: (i + 1) as u64, tag_value, tag_value_acc, + is_tag_change: i == 0, + tag_rlc, + tag_rlc_acc: *tag_rlc_acc, }, encoded_data: EncodedData { byte_idx: (byte_offset + i + 1) as u64, encoded_len: last_row.encoded_data.encoded_len, value_byte, - reverse: false, + reverse: true, value_rlc, ..Default::default() }, + bitstream_read_data: BitstreamReadRow::default(), decoded_data: last_row.decoded_data.clone(), huffman_data: HuffmanData::default(), fse_data: FseTableRow::default(), @@ -279,50 +380,47 @@ fn process_block_header( ) } -fn process_block_raw( +fn process_raw_bytes( src: &[u8], byte_offset: usize, last_row: &ZstdWitnessRow, randomness: Value, - block_size: usize, - last_block: bool, + n_bytes: usize, + tag: ZstdTag, + tag_next: ZstdTag, ) -> (usize, Vec>) { - let value_rlc_iter = src.iter().skip(byte_offset).take(block_size).scan( + let value_rlc_iter = src.iter().skip(byte_offset).take(n_bytes).scan( last_row.encoded_data.value_rlc, |acc, &byte| { *acc = *acc * randomness + Value::known(F::from(byte as u64)); Some(*acc) }, ); - let decoded_value_rlc_iter = src.iter().skip(byte_offset).take(block_size).scan( + let decoded_value_rlc_iter = src.iter().skip(byte_offset).take(n_bytes).scan( last_row.decoded_data.decoded_value_rlc, |acc, &byte| { *acc = *acc * randomness + Value::known(F::from(byte as u64)); Some(*acc) }, ); - let tag_value_iter = src.iter().skip(byte_offset).take(block_size).scan( - Value::known(F::zero()), - |acc, &byte| { - *acc = *acc * randomness + Value::known(F::from(byte as u64)); - Some(*acc) - }, - ); + let tag_value_iter = + src.iter() + .skip(byte_offset) + .take(n_bytes) + .scan(Value::known(F::zero()), |acc, &byte| { + *acc = *acc * randomness + Value::known(F::from(byte as u64)); + Some(*acc) + }); let tag_value = tag_value_iter .clone() .last() .expect("Raw bytes must be of non-zero length"); - let tag_next = if last_block { - ZstdTag::Null - } else { - ZstdTag::BlockHeader - }; ( - byte_offset + block_size, + byte_offset + n_bytes, src.iter() .skip(byte_offset) - .take(block_size) + .take(n_bytes) .zip(tag_value_iter) .zip(value_rlc_iter) .zip(decoded_value_rlc_iter) @@ -331,12 +429,16 @@ fn process_block_raw( |(i, (((&value_byte, tag_value_acc), value_rlc), decoded_value_rlc))| { ZstdWitnessRow { state: ZstdState { - tag: ZstdTag::RawBlockBytes, + tag, tag_next, - tag_len: block_size as u64, + max_tag_len: lookup_max_tag_len(tag), + tag_len: n_bytes as u64, tag_idx: (i + 1) as u64, tag_value, tag_value_acc, + is_tag_change: i == 0, + tag_rlc: Value::known(F::zero()), + tag_rlc_acc: Value::known(F::zero()), }, encoded_data: EncodedData { byte_idx: (byte_offset + i + 1) as u64, @@ -353,6 +455,7 @@ fn process_block_raw( decoded_byte: value_byte, decoded_value_rlc, }, + bitstream_read_data: BitstreamReadRow::default(), huffman_data: HuffmanData::default(), fse_data: FseTableRow::default(), } @@ -362,18 +465,19 @@ fn process_block_raw( ) } -fn process_block_rle( +fn process_rle_bytes( src: &[u8], byte_offset: usize, last_row: &ZstdWitnessRow, randomness: Value, - block_size: usize, - last_block: bool, + n_bytes: usize, + tag: ZstdTag, + tag_next: ZstdTag, ) -> (usize, Vec>) { let rle_byte = src[byte_offset]; let value_rlc = last_row.encoded_data.value_rlc * randomness + Value::known(F::from(rle_byte as u64)); - let decoded_value_rlc_iter = std::iter::repeat(rle_byte).take(block_size).scan( + let decoded_value_rlc_iter = std::iter::repeat(rle_byte).take(n_bytes).scan( last_row.decoded_data.decoded_value_rlc, |acc, byte| { *acc = *acc * randomness + Value::known(F::from(byte as u64)); @@ -381,26 +485,25 @@ fn process_block_rle( }, ); let tag_value = Value::known(F::from(rle_byte as u64)); - let tag_next = if last_block { - ZstdTag::Null - } else { - ZstdTag::BlockHeader - }; ( byte_offset + 1, std::iter::repeat(rle_byte) - .take(block_size) + .take(n_bytes) .zip(decoded_value_rlc_iter) .enumerate() .map(|(i, (value_byte, decoded_value_rlc))| ZstdWitnessRow { state: ZstdState { - tag: ZstdTag::RleBlockBytes, + tag, tag_next, - tag_len: block_size as u64, + max_tag_len: lookup_max_tag_len(tag), + tag_len: n_bytes as u64, tag_idx: (i + 1) as u64, tag_value, tag_value_acc: tag_value, + is_tag_change: i == 0, + tag_rlc: Value::known(F::zero()), + tag_rlc_acc: Value::known(F::zero()), }, encoded_data: EncodedData { byte_idx: (byte_offset + 1) as u64, @@ -417,6 +520,7 @@ fn process_block_rle( decoded_byte: value_byte, decoded_value_rlc, }, + bitstream_read_data: BitstreamReadRow::default(), huffman_data: HuffmanData::default(), fse_data: FseTableRow::default(), }) @@ -424,6 +528,108 @@ fn process_block_rle( ) } +type BlockProcessingResult = ( + usize, + Vec>, + Vec, + Vec, + Vec, + FseAuxiliaryTableData, + HuffmanCodesData, +); + +fn process_block_raw( + src: &[u8], + byte_offset: usize, + last_row: &ZstdWitnessRow, + randomness: Value, + block_size: usize, + last_block: bool, +) -> BlockProcessingResult { + let tag_next = if last_block { + ZstdTag::Null + } else { + ZstdTag::BlockHeader + }; + + let (byte_offset, rows) = process_raw_bytes( + src, + byte_offset, + last_row, + randomness, + block_size, + ZstdTag::RawBlockBytes, + tag_next, + ); + + let fse_aux_table = FseAuxiliaryTableData { + byte_offset: 0, + table_size: 0, + sym_to_states: BTreeMap::default(), + }; + let huffman_weights = HuffmanCodesData { + byte_offset: 0, + weights: vec![], + }; + + ( + byte_offset, + rows.clone(), + vec![], + vec![rows.len() as u64, 0, 0, 0], + vec![0, 0, 0, 0, 0, 0], + fse_aux_table, + huffman_weights, + ) +} + +fn process_block_rle( + src: &[u8], + byte_offset: usize, + last_row: &ZstdWitnessRow, + randomness: Value, + block_size: usize, + last_block: bool, +) -> BlockProcessingResult { + let tag_next = if last_block { + ZstdTag::Null + } else { + ZstdTag::BlockHeader + }; + + let (byte_offset, rows) = process_rle_bytes( + src, + byte_offset, + last_row, + randomness, + block_size, + ZstdTag::RleBlockBytes, + tag_next, + ); + + let fse_aux_table = FseAuxiliaryTableData { + byte_offset: 0, + table_size: 0, + sym_to_states: BTreeMap::default(), + }; + let huffman_weights = HuffmanCodesData { + byte_offset: 0, + weights: vec![], + }; + + ( + byte_offset, + rows.clone(), + vec![], + vec![rows.len() as u64, 0, 0, 0], + vec![0, 0, 0, 0, 0, 0], + fse_aux_table, + huffman_weights, + ) +} + +type LiteralsBlockResult = (usize, Vec>, Vec, Vec, Vec); + #[allow(unused_variables)] fn process_block_zstd( src: &[u8], @@ -432,32 +638,1183 @@ fn process_block_zstd( randomness: Value, block_size: usize, last_block: bool, -) -> (usize, Vec>) { - unimplemented!(); -} +) -> BlockProcessingResult { + let mut witness_rows = vec![]; + + // 1-5 bytes LiteralSectionHeader + let literals_header_result: LiteralsHeaderProcessingResult = + process_block_zstd_literals_header::(src, byte_offset, last_row, randomness); + let ( + byte_offset, + rows, + literals_block_type, + n_streams, + regen_size, + compressed_size, + (branch, sf_max), + ) = literals_header_result; + // let ( + // byte_offset, + // rows, + // literals_block_type, + // n_streams, + // regen_size, + // compressed_size, + // (branch, sf_max), + // ) = process_block_zstd_literals_header::(src, byte_offset, last_row, randomness); + + witness_rows.extend_from_slice(&rows); + let mut fse_aux_table = FseAuxiliaryTableData { + byte_offset: 0, + table_size: 0, + sym_to_states: BTreeMap::default(), + }; + let mut huffman_weights = HuffmanCodesData { + byte_offset: 0, + weights: vec![], + }; + + // Depending on the literals block type, decode literals section accordingly + let literals_block_result: LiteralsBlockResult = match literals_block_type { + BlockType::RawBlock => { + let (byte_offset, rows) = process_raw_bytes( + src, + byte_offset, + rows.last().expect("last row expected to exist"), + randomness, + regen_size, + ZstdTag::ZstdBlockLiteralsRawBytes, + ZstdTag::ZstdBlockSequenceHeader, + ); -fn process_block_zstd_literals_header() -> (usize, Vec>) { - unimplemented!(); + ( + byte_offset, + rows.clone(), + vec![], + vec![rows.len() as u64, 0, 0, 0], + vec![0, 0, 0, 0], + ) + } + BlockType::RleBlock => { + let (byte_offset, rows) = process_rle_bytes( + src, + byte_offset, + rows.last().expect("last row expected to exist"), + randomness, + regen_size, + ZstdTag::ZstdBlockLiteralsRleBytes, + ZstdTag::ZstdBlockSequenceHeader, + ); + + ( + byte_offset, + rows.clone(), + vec![], + vec![rows.len() as u64, 0, 0, 0], + vec![0, 0, 0, 0], + ) + } + BlockType::ZstdCompressedBlock => { + let mut huffman_rows = vec![]; + + let ( + bytes_offset, + rows, + huffman_codes, + n_huffman_bytes, + huffman_byte_offset, + last_rlc, + huffman_idx, + fse_size, + fse_accuracy, + n_huffman_bitstream_bytes, + fse_aux_data, + ) = process_block_zstd_huffman_code( + src, + byte_offset, + rows.last().expect("last row must exist"), + randomness, + n_streams, + ); + huffman_rows.extend_from_slice(&rows); + fse_aux_table = fse_aux_data; + huffman_weights = huffman_codes.clone(); + + // Subtract huffman header (1-byte), len of huffman bytes and 6-byte jump table (if + // n_streams > 1) + let mut literal_stream_size = compressed_size - (n_huffman_bytes + 1); + if n_streams > 1 { + literal_stream_size -= 6; + } + + // Start decoding the literal section + let mut stream_offset = bytes_offset; + + let (bytes_offset, rows, lstream_lens) = process_block_zstd_huffman_jump_table( + src, + stream_offset, + huffman_rows.last().expect("last row should exist"), + literal_stream_size, + n_streams, + randomness, + last_rlc, + ); + huffman_rows.extend_from_slice(&rows); + stream_offset = bytes_offset; + + let mut literals: Vec = vec![]; + + // for idx in 0..n_streams { + for (idx, l_len) in lstream_lens.iter().enumerate().take(n_streams) { + let (byte_offset, rows, symbols) = process_block_zstd_lstream( + src, + stream_offset, + *l_len as usize, + huffman_rows.last().expect("last row should exist"), + randomness, + idx, + &huffman_codes, + huffman_byte_offset, + ); + huffman_rows.extend_from_slice(&rows); + literals.extend_from_slice(&symbols); + + stream_offset = byte_offset; + } + + ( + stream_offset, + huffman_rows, + literals, + lstream_lens, + vec![ + huffman_idx as u64, + fse_size, + fse_accuracy, + n_huffman_bitstream_bytes, + ], + ) + } + _ => unreachable!("Invalid literals section BlockType"), + }; + let (bytes_offset, rows, literals, lstream_len, aux_data) = literals_block_result; + witness_rows.extend_from_slice(&rows); + + ( + bytes_offset, + witness_rows, + literals, + lstream_len, + vec![ + regen_size as u64, + compressed_size as u64, + aux_data[0], + aux_data[1], + aux_data[2], + aux_data[3], + branch, + sf_max as u64, + ], + fse_aux_table, + huffman_weights, + ) } -fn process_block_zstd_fse() -> (usize, Vec>) { - unimplemented!() +type LiteralsHeaderProcessingResult = ( + usize, + Vec>, + BlockType, + usize, + usize, + usize, + (u64, bool), +); + +fn process_block_zstd_literals_header( + src: &[u8], + byte_offset: usize, + last_row: &ZstdWitnessRow, + randomness: Value, +) -> LiteralsHeaderProcessingResult { + let lh_bytes = src + .iter() + .skip(byte_offset) + .take(N_MAX_LITERAL_HEADER_BYTES) + .cloned() + .collect::>(); + + let literals_block_type = BlockType::from(lh_bytes[0] & 0x3); + let size_format = (lh_bytes[0] >> 2) & 3; + let sf_max = size_format == 3; + + let [n_bits_fmt, n_bits_regen, n_bits_compressed, n_streams, n_bytes_header, branch]: [usize; + 6] = match literals_block_type { + BlockType::RawBlock | BlockType::RleBlock => match size_format { + 0b00 | 0b10 => [1, 5, 0, 1, 1, 0], + 0b01 => [2, 12, 0, 1, 2, 1], + 0b11 => [2, 20, 0, 1, 3, 2], + _ => unreachable!("size_format out of bound"), + }, + BlockType::ZstdCompressedBlock => match size_format { + 0b00 => [2, 10, 10, 1, 3, 3], + 0b01 => [2, 10, 10, 4, 3, 3], + 0b10 => [2, 14, 14, 4, 4, 4], + 0b11 => [2, 18, 18, 4, 5, 5], + _ => unreachable!("size_format out of bound"), + }, + _ => unreachable!("BlockType::Reserved unexpected or treeless literal section"), + }; + + // Bits for representing regenerated_size and compressed_size + let sizing_bits = &lh_bytes.clone().into_iter().fold(vec![], |mut acc, b| { + acc.extend(value_bits_le(b)); + acc + })[(2 + n_bits_fmt)..(n_bytes_header * N_BITS_PER_BYTE)]; + + let regen_size = le_bits_to_value(&sizing_bits[0..n_bits_regen]); + let compressed_size = + le_bits_to_value(&sizing_bits[n_bits_regen..(n_bits_regen + n_bits_compressed)]); + + let tag_next = match literals_block_type { + BlockType::RawBlock => ZstdTag::ZstdBlockLiteralsRawBytes, + BlockType::RleBlock => ZstdTag::ZstdBlockLiteralsRleBytes, + BlockType::ZstdCompressedBlock => ZstdTag::ZstdBlockFseCode, + _ => unreachable!("BlockType::Reserved unexpected or treeless literal section"), + }; + + let tag_value_iter = + lh_bytes + .iter() + .take(n_bytes_header) + .scan(Value::known(F::zero()), |acc, &byte| { + *acc = *acc * Value::known(F::from(256u64)) + Value::known(F::from(byte as u64)); + Some(*acc) + }); + let tag_value = tag_value_iter + .clone() + .last() + .expect("LiteralsHeader expected"); + + let tag_rlc_iter = + lh_bytes + .iter() + .take(n_bytes_header) + .scan(Value::known(F::zero()), |acc, &byte| { + *acc = *acc * randomness + Value::known(F::from(byte as u64)); + Some(*acc) + }); + let tag_rlc = tag_rlc_iter.clone().last().expect("Tag RLC expected"); + + let value_rlc_iter = + lh_bytes + .iter() + .take(n_bytes_header) + .scan(last_row.encoded_data.value_rlc, |acc, &byte| { + *acc = *acc * randomness + Value::known(F::from(byte as u64)); + Some(*acc) + }); + + let multiplier = + (0..last_row.state.tag_len).fold(Value::known(F::one()), |acc, _| acc * randomness); + let value_rlc = last_row.encoded_data.value_rlc * multiplier + last_row.state.tag_rlc; + + ( + byte_offset + n_bytes_header, + lh_bytes + .iter() + .take(n_bytes_header) + .zip(tag_value_iter) + .zip(value_rlc_iter) + .zip(tag_rlc_iter) + .enumerate() + .map( + |(i, (((&value_byte, tag_value_acc), _v_rlc), tag_rlc_acc))| ZstdWitnessRow { + state: ZstdState { + tag: ZstdTag::ZstdBlockLiteralsHeader, + tag_next, + max_tag_len: lookup_max_tag_len(ZstdTag::ZstdBlockLiteralsHeader), + tag_len: n_bytes_header as u64, + tag_idx: (i + 1) as u64, + tag_value, + tag_value_acc, + is_tag_change: i == 0, + tag_rlc, + tag_rlc_acc, + }, + encoded_data: EncodedData { + byte_idx: (byte_offset + i + 1) as u64, + encoded_len: last_row.encoded_data.encoded_len, + value_byte, + reverse: false, + value_rlc, + ..Default::default() + }, + bitstream_read_data: BitstreamReadRow::default(), + decoded_data: last_row.decoded_data.clone(), + huffman_data: HuffmanData::default(), + fse_data: FseTableRow::default(), + }, + ) + .collect::>(), + literals_block_type, + n_streams, + regen_size as usize, + compressed_size as usize, + (branch as u64, sf_max), + ) } -fn process_block_zstd_huffman_code() -> (usize, Vec>) { - unimplemented!(); +type HuffmanCodeProcessingResult = ( + usize, + Vec>, + HuffmanCodesData, + usize, + usize, + Value, + usize, + u64, + u64, + u64, + FseAuxiliaryTableData, +); + +fn process_block_zstd_huffman_code( + src: &[u8], + byte_offset: usize, + last_row: &ZstdWitnessRow, + randomness: Value, + n_streams: usize, +) -> HuffmanCodeProcessingResult { + // Preserve this value for later construction of HuffmanCodesDataTable + let huffman_code_byte_offset = byte_offset; + + // Other consistent values + let encoded_len = last_row.encoded_data.encoded_len; + let decoded_data = last_row.decoded_data.clone(); + + // Get the next tag + let tag_next = ZstdTag::ZstdBlockHuffmanCode; + + // Parse the header byte + let mut witness_rows: Vec> = vec![]; + let header_byte = src[byte_offset]; + assert!(header_byte < 128, "FSE encoded huffman weights assumed"); + let n_bytes = header_byte as usize; + + let multiplier = + (0..last_row.state.tag_len).fold(Value::known(F::one()), |acc, _| acc * randomness); + let value_rlc = last_row.encoded_data.value_rlc * multiplier + last_row.state.tag_rlc; + + // Add a witness row for Huffman header + let mut huffman_header_row: ZstdWitnessRow = ZstdWitnessRow { + state: ZstdState { + tag: ZstdTag::ZstdBlockFseCode, + tag_next, + max_tag_len: lookup_max_tag_len(ZstdTag::ZstdBlockFseCode), + tag_len: 0_u64, /* There's no information at this point about the length of FSE + * table bytes. So this value has to be modified later. */ + tag_idx: 1_u64, + tag_value: Value::default(), // Must be changed after FSE table length is known + tag_value_acc: Value::default(), // Must be changed after FSE table length is known + is_tag_change: true, + tag_rlc: Value::known(F::zero()), // Must be changed after FSE table length is known + tag_rlc_acc: Value::known(F::zero()), // Must be changed after FSE table length is known + }, + encoded_data: EncodedData { + byte_idx: (byte_offset + 1) as u64, + encoded_len, + value_byte: header_byte, + value_rlc, + reverse: false, + ..Default::default() + }, + bitstream_read_data: BitstreamReadRow { + bit_start_idx: 0usize, + bit_end_idx: 7usize, + bit_value: header_byte as u64, + is_zero_bit_read: false, + }, + decoded_data: decoded_data.clone(), + huffman_data: HuffmanData::default(), + fse_data: FseTableRow::default(), + }; + + // Recover the FSE table for generating Huffman weights + let (n_fse_bytes, bit_boundaries, table) = + FseAuxiliaryTableData::reconstruct(src, byte_offset + 1) + .expect("Reconstructing FSE table should not fail."); + + // Witness generation + let accuracy_log = (src[byte_offset + 1] & 0b1111) + 5; + + let mut tag_value_iter = src.iter().skip(byte_offset).take(n_fse_bytes + 1).scan( + Value::known(F::zero()), + |acc, &byte| { + *acc = *acc * randomness + Value::known(F::from(byte as u64)); + Some(*acc) + }, + ); + let tag_value = tag_value_iter.clone().last().expect("Tag value must exist"); + + let mut tag_rlc_iter = src.iter().skip(byte_offset).take(n_fse_bytes + 1).scan( + Value::known(F::zero()), + |acc, &byte| { + *acc = *acc * randomness + Value::known(F::from(byte as u64)); + Some(*acc) + }, + ); + let tag_rlc = tag_rlc_iter.clone().last().expect("Tag RLC must exist"); + + // Backfill missing data on the huffman header row + huffman_header_row.state.tag_len = (n_fse_bytes + 1usize) as u64; + huffman_header_row.state.tag_value = tag_value; + huffman_header_row.state.tag_value_acc = + tag_value_iter.next().expect("Next value should exist"); + huffman_header_row.state.tag_rlc = tag_rlc; + huffman_header_row.state.tag_rlc_acc = tag_rlc_iter.next().expect("Next value expected"); + witness_rows.push(huffman_header_row); + + // Process bit boundaries into bitstream reader info + let mut decoded: u8 = 0; + let mut n_acc: usize = 0; + let mut current_tag_value_acc = Value::known(F::zero()); + let mut current_tag_rlc_acc = Value::known(F::zero()); + let mut last_byte_idx: i64 = 0; + let mut from_pos: (i64, i64) = (1, 0); + let mut to_pos: (i64, i64) = (0, 0); + + let bitstream_rows = bit_boundaries + .iter() + .enumerate() + .map(|(sym, (bit_idx, value))| { + from_pos = if sym == 0 { (1, -1) } else { to_pos }; + + from_pos.1 += 1; + if from_pos.1 == 8 { + from_pos = (from_pos.0 + 1, 0); + } + from_pos.1 = (from_pos.1 as u64).rem_euclid(8) as i64; + + if from_pos.0 > last_byte_idx { + current_tag_value_acc = tag_value_iter.next().unwrap(); + current_tag_rlc_acc = tag_rlc_iter.next().unwrap(); + last_byte_idx = from_pos.0; + } + + let to_byte_idx = (bit_idx - 1) / 8; + let mut to_bit_idx = bit_idx - to_byte_idx * (N_BITS_PER_BYTE as u32) - 1; + + if from_pos.0 < (to_byte_idx + 1) as i64 { + to_bit_idx += 8; + } + + to_pos = ((to_byte_idx + 1) as i64, to_bit_idx as i64); + + if sym > 0 && n_acc < (1 << accuracy_log) { + decoded = (sym - 1) as u8; + n_acc += (*value - 1) as usize; + } + + ( + decoded, + from_pos.0 as usize, + from_pos.1 as usize, + to_pos.0 as usize, + to_pos.1 as usize, + *value, + current_tag_value_acc, + current_tag_rlc_acc, + 0, + n_acc, + ) + }) + .collect::, + Value, + usize, + usize, + )>>(); + + // Add witness rows for FSE representation bytes + for row in bitstream_rows { + witness_rows.push(ZstdWitnessRow { + state: ZstdState { + tag: ZstdTag::ZstdBlockFseCode, + tag_next, + max_tag_len: lookup_max_tag_len(ZstdTag::ZstdBlockFseCode), + tag_len: (n_fse_bytes + 1) as u64, + tag_idx: (row.1 + 1) as u64, // count the huffman header byte + tag_value, + tag_value_acc: row.6, + is_tag_change: false, + tag_rlc, + tag_rlc_acc: row.7, + }, + encoded_data: EncodedData { + byte_idx: (byte_offset + row.1 + 1) as u64, // count the huffman header byte + encoded_len, + value_byte: src[byte_offset + row.1], + value_rlc, + reverse: false, + ..Default::default() + }, + bitstream_read_data: BitstreamReadRow { + bit_start_idx: row.2, + bit_end_idx: row.4, + bit_value: row.5, + is_zero_bit_read: false, + }, + decoded_data: DecodedData { + decoded_len: last_row.decoded_data.decoded_len, + decoded_len_acc: last_row.decoded_data.decoded_len_acc, + total_decoded_len: last_row.decoded_data.total_decoded_len, + decoded_byte: row.0, + decoded_value_rlc: last_row.decoded_data.decoded_value_rlc, + }, + huffman_data: HuffmanData::default(), + fse_data: FseTableRow { + idx: 0, + state: 0, + symbol: 0, + baseline: 0, + num_bits: 0, + num_emitted: 0, + n_acc: row.9 as u64, + }, + }); + } + + // Now start decoding the huffman weights using the actual Huffman code section + let tag_next = if n_streams > 1 { + ZstdTag::ZstdBlockJumpTable + } else { + ZstdTag::ZstdBlockLstream + }; + + // Update the last row + let last_row = witness_rows.last().expect("Last row exists"); + let multiplier = + (0..last_row.state.tag_len).fold(Value::known(F::one()), |acc, _| acc * randomness); + let value_rlc = last_row.encoded_data.value_rlc * multiplier + last_row.state.tag_rlc; + + // Bitstream processing state values + let mut num_emitted: usize = 0; + let n_huffman_code_bytes = n_bytes - n_fse_bytes; + let mut last_byte_idx: usize = 1; + let mut current_byte_idx: usize = 1; // byte_idx is 1-indexed + let mut current_bit_idx: usize = 0; + + // Construct the Huffman bitstream + let huffman_bitstream = src + .iter() + .skip(byte_offset + n_fse_bytes + 1) + .take(n_huffman_code_bytes) + .rev() + .clone() + .flat_map(|v| { + let mut bits = value_bits_le(*v); + bits.reverse(); + bits + }) + .collect::>(); + + // Accumulators for Huffman code section + let mut value_rlc_iter = src + .iter() + .skip(byte_offset + n_fse_bytes + 1) + .take(n_huffman_code_bytes) + .scan(Value::known(F::zero()), |acc, &byte| { + *acc = *acc * randomness + Value::known(F::from(byte as u64)); + Some(*acc) + }) + .collect::>>() + .into_iter() + .rev(); + let mut tag_value_iter = src + .iter() + .skip(byte_offset + n_fse_bytes + 1) + .take(n_huffman_code_bytes) + .rev() + .scan(Value::known(F::zero()), |acc, &byte| { + *acc = *acc * randomness + Value::known(F::from(byte as u64)); + Some(*acc) + }); + let tag_value = tag_value_iter.clone().last().expect("Tag value must exist"); + let tag_rlc_iter = src + .iter() + .skip(byte_offset + n_fse_bytes + 1) + .take(n_huffman_code_bytes) + .scan(Value::known(F::zero()), |acc, &byte| { + *acc = *acc * randomness + Value::known(F::from(byte as u64)); + Some(*acc) + }); + let tag_rlc = tag_rlc_iter.clone().last().expect("Tag RLC must exist"); + let mut tag_rlc_iter = tag_rlc_iter.collect::>>().into_iter().rev(); + + let mut next_tag_value_acc = tag_value_iter.next().unwrap(); + let next_value_rlc_acc = value_rlc_iter.next().unwrap(); + let mut next_tag_rlc_acc = tag_rlc_iter.next().unwrap(); + + let aux_1 = next_value_rlc_acc; + let aux_2 = witness_rows[witness_rows.len() - 1].encoded_data.value_rlc; + + let mut padding_end_idx: usize = 0; + while huffman_bitstream[padding_end_idx] == 0 { + padding_end_idx += 1; + } + + // Add a witness row for leading 0s and the sentinel 1-bit + witness_rows.push(ZstdWitnessRow { + state: ZstdState { + tag: ZstdTag::ZstdBlockHuffmanCode, + tag_next, + max_tag_len: lookup_max_tag_len(ZstdTag::ZstdBlockHuffmanCode), + tag_len: n_huffman_code_bytes as u64, + tag_idx: 1_u64, + tag_value, + tag_value_acc: next_tag_value_acc, + is_tag_change: true, + tag_rlc, + tag_rlc_acc: next_tag_rlc_acc, + }, + encoded_data: EncodedData { + byte_idx: (byte_offset + n_fse_bytes + 1 + current_byte_idx) as u64, + encoded_len, + value_byte: src + [byte_offset + n_fse_bytes + 1 + n_huffman_code_bytes - current_byte_idx], + value_rlc, + reverse: true, + reverse_len: n_huffman_code_bytes as u64, + reverse_idx: (n_huffman_code_bytes - (current_byte_idx - 1)) as u64, + aux_1, + aux_2, + }, + bitstream_read_data: BitstreamReadRow { + bit_value: 1u64, + bit_start_idx: 0usize, + bit_end_idx: padding_end_idx, + is_zero_bit_read: false, + }, + huffman_data: HuffmanData::default(), + decoded_data: last_row.decoded_data.clone(), + fse_data: FseTableRow::default(), + }); + + // Exclude the leading zero section + while huffman_bitstream[current_bit_idx] == 0 { + (current_byte_idx, current_bit_idx) = increment_idx(current_byte_idx, current_bit_idx); + } + // Exclude the sentinel 1-bit + (current_byte_idx, current_bit_idx) = increment_idx(current_byte_idx, current_bit_idx); + + // Update accumulator + if current_byte_idx > last_byte_idx { + next_tag_value_acc = tag_value_iter.next().unwrap(); + next_tag_rlc_acc = tag_rlc_iter.next().unwrap(); + last_byte_idx = current_byte_idx; + } + + // Now the actual weight-bearing bitstream starts + // The Huffman bitstream is decoded by two interleaved states reading the stream in alternating + // order. The FSE table for the two independent decoding strands are the same. + let mut color: usize = 0; // use 0, 1 (colors) to denote two alternating decoding strands. + let mut prev_baseline: [u64; 2] = [0, 0]; + let mut next_nb_to_read: [usize; 2] = [accuracy_log as usize, accuracy_log as usize]; + let mut decoded_weights: Vec = vec![]; + let mut fse_table_idx: u64 = 1; + + // Convert FSE auxiliary data into a state-indexed representation + let fse_state_table = table.clone().parse_state_table(); + + while current_bit_idx + next_nb_to_read[color] <= (n_huffman_code_bytes) * N_BITS_PER_BYTE { + let nb = next_nb_to_read[color]; + let bitstring_value = + be_bits_to_value(&huffman_bitstream[current_bit_idx..(current_bit_idx + nb)]); + let next_state = prev_baseline[color] + bitstring_value; + + let from_bit_idx = current_bit_idx.rem_euclid(8); + let to_bit_idx = if nb > 0 { + from_bit_idx + (nb - 1) + } else { + from_bit_idx + }; + + // Lookup the FSE table row for the state + let fse_row = fse_state_table + .get(&{ next_state }) + .expect("next state should be in fse table"); + + // Decode the symbol + decoded_weights.push(fse_row.0 as u8); + num_emitted += 1; + + // Add a witness row + witness_rows.push(ZstdWitnessRow { + state: ZstdState { + tag: ZstdTag::ZstdBlockHuffmanCode, + tag_next, + max_tag_len: lookup_max_tag_len(ZstdTag::ZstdBlockHuffmanCode), + tag_len: (n_huffman_code_bytes) as u64, + tag_idx: current_byte_idx as u64, + tag_value, + tag_value_acc: next_tag_value_acc, + is_tag_change: false, + tag_rlc, + tag_rlc_acc: next_tag_rlc_acc, + }, + encoded_data: EncodedData { + byte_idx: (byte_offset + n_fse_bytes + 1 + current_byte_idx) as u64, + encoded_len, + value_byte: src + [byte_offset + n_fse_bytes + 1 + n_huffman_code_bytes - current_byte_idx], + value_rlc, + reverse: true, + reverse_len: n_huffman_code_bytes as u64, + reverse_idx: (n_huffman_code_bytes - (current_byte_idx - 1)) as u64, + aux_1, + aux_2, + }, + bitstream_read_data: BitstreamReadRow { + bit_value: bitstring_value, + bit_start_idx: from_bit_idx, + bit_end_idx: to_bit_idx, + is_zero_bit_read: (nb == 0), + }, + fse_data: FseTableRow { + idx: fse_table_idx, + state: next_state, + symbol: fse_row.0, + baseline: fse_row.1, + num_bits: fse_row.2, + num_emitted: num_emitted as u64, + n_acc: 0, + }, + huffman_data: HuffmanData::default(), + decoded_data: decoded_data.clone(), + }); + + // increment fse idx + fse_table_idx += 1; + + // Advance byte and bit marks. Get next acc value if byte changes + for _ in 0..nb { + (current_byte_idx, current_bit_idx) = increment_idx(current_byte_idx, current_bit_idx); + } + if current_byte_idx > last_byte_idx && current_byte_idx <= n_huffman_code_bytes { + next_tag_value_acc = tag_value_iter.next().unwrap(); + next_tag_rlc_acc = tag_rlc_iter.next().unwrap(); + last_byte_idx = current_byte_idx; + } + + // Preparing for next state + prev_baseline[color] = fse_row.1; + next_nb_to_read[color] = fse_row.2 as usize; + + color = if color > 0 { 0 } else { 1 }; + } + + // Construct HuffmanCodesTable + let huffman_codes = HuffmanCodesData { + byte_offset: (huffman_code_byte_offset + 1) as u64, + weights: decoded_weights + .into_iter() + .map(|w| FseSymbol::from(w as usize)) + .collect(), + }; + + // rlc after a reverse section + let mul = + (0..(n_huffman_code_bytes - 1)).fold(Value::known(F::one()), |acc, _| acc * randomness); + let new_value_rlc_init_value = aux_2 * mul + aux_1; + + ( + byte_offset + 1 + n_fse_bytes + n_huffman_code_bytes, + witness_rows, + huffman_codes, + n_bytes, + huffman_code_byte_offset + 1, + new_value_rlc_init_value, + byte_offset + 1, + (1 << accuracy_log) as u64, + accuracy_log as u64, + n_huffman_code_bytes as u64, + table, // FSE table + ) } -fn process_block_zstd_huffman_jump_table() -> (usize, Vec>) { - unimplemented!(); +fn process_block_zstd_huffman_jump_table( + src: &[u8], + byte_offset: usize, + last_row: &ZstdWitnessRow, + literal_stream_size: usize, + n_streams: usize, + randomness: Value, + last_rlc: Value, +) -> (usize, Vec>, Vec) { + if n_streams <= 1 { + (byte_offset, vec![], vec![literal_stream_size as u64]) + } else { + // Note: The decompressed size of each stream is equal to (regen_size + 3) / 4 + // but the compressed bitstream length will be different. + // Jump table provides information on the length of first 3 bitstreams. + + let jt_bytes = src + .iter() + .skip(byte_offset) + .take(N_JUMP_TABLE_BYTES) + .cloned() + .map(|x| x as u64) + .collect::>(); + + let l1: u64 = jt_bytes[0] + jt_bytes[1] * 256; + let l2: u64 = jt_bytes[2] + jt_bytes[3] * 256; + let l3: u64 = jt_bytes[4] + jt_bytes[5] * 256; + let l4: u64 = (literal_stream_size as u64) - l1 - l2 - l3; + + let value_rlc_iter = + src.iter() + .skip(byte_offset) + .take(N_JUMP_TABLE_BYTES) + .scan(last_rlc, |acc, &byte| { + *acc = *acc * randomness + Value::known(F::from(byte as u64)); + Some(*acc) + }); + let multiplier = + (0..last_row.state.tag_len).fold(Value::known(F::one()), |acc, _| acc * randomness); + let value_rlc = last_row.encoded_data.value_rlc * multiplier + last_row.state.tag_rlc; + + let tag_value_iter = src.iter().skip(byte_offset).take(N_JUMP_TABLE_BYTES).scan( + Value::known(F::zero()), + |acc, &byte| { + *acc = *acc * Value::known(F::from(256u64)) + Value::known(F::from(byte as u64)); + Some(*acc) + }, + ); + let tag_value = tag_value_iter + .clone() + .last() + .expect("Tag value must exist."); + let tag_rlc_iter = src.iter().skip(byte_offset).take(N_JUMP_TABLE_BYTES).scan( + Value::known(F::zero()), + |acc, &byte| { + *acc = *acc * randomness + Value::known(F::from(byte as u64)); + Some(*acc) + }, + ); + let tag_rlc = tag_rlc_iter.clone().last().expect("Tag value must exist."); + + ( + byte_offset + N_JUMP_TABLE_BYTES, + src.iter() + .skip(byte_offset) + .take(N_JUMP_TABLE_BYTES) + .zip(tag_value_iter) + .zip(value_rlc_iter) + .zip(tag_rlc_iter) + .enumerate() + .map( + |(i, (((&value_byte, tag_value_acc), _v_rlc), tag_rlc_acc))| ZstdWitnessRow { + state: ZstdState { + tag: ZstdTag::ZstdBlockJumpTable, + tag_next: ZstdTag::ZstdBlockLstream, + max_tag_len: lookup_max_tag_len(ZstdTag::ZstdBlockJumpTable), + tag_len: N_JUMP_TABLE_BYTES as u64, + tag_idx: (i + 1) as u64, + tag_value, + tag_value_acc, + is_tag_change: i == 0, + tag_rlc, + tag_rlc_acc, + }, + encoded_data: EncodedData { + byte_idx: (byte_offset + i + 1) as u64, + encoded_len: last_row.encoded_data.encoded_len, + value_byte, + value_rlc, + reverse: false, + ..Default::default() + }, + bitstream_read_data: BitstreamReadRow { + bit_start_idx: 0, + bit_end_idx: 7, + bit_value: value_byte as u64, + is_zero_bit_read: false, + }, + decoded_data: last_row.decoded_data.clone(), + huffman_data: HuffmanData::default(), + fse_data: FseTableRow::default(), + }, + ) + .collect::>(), + vec![l1, l2, l3, l4], + ) + } } -fn process_block_zstd_lstream() -> (usize, Vec>) { - unimplemented!(); +#[allow(clippy::too_many_arguments)] +fn process_block_zstd_lstream( + src: &[u8], + byte_offset: usize, + len: usize, + last_row: &ZstdWitnessRow, + randomness: Value, + stream_idx: usize, + huffman_code: &HuffmanCodesData, + huffman_code_byte_offset: usize, +) -> (usize, Vec>, Vec) { + // Obtain literal stream bits (reversed). + let lstream_bits = src + .iter() + .skip(byte_offset) + .take(len) + .rev() + .clone() + .flat_map(|v| { + let mut bits = value_bits_le(*v); + bits.reverse(); + bits + }) + .collect::>(); + + // Bitstream processing state helper values + let mut witness_rows: Vec> = vec![]; + let mut last_byte_idx: usize = 1; + let mut current_byte_idx: usize = 1; + let mut current_bit_idx: usize = 0; + let mut decoded_len_acc = last_row.decoded_data.decoded_len_acc; + let mut decoded_rlc = last_row.decoded_data.decoded_value_rlc; + + // accumulators + let aux_1 = last_row.encoded_data.value_rlc; + let multiplier = + (0..last_row.state.tag_len).fold(Value::known(F::one()), |acc, _| acc * randomness); + let value_rlc = last_row.encoded_data.value_rlc * multiplier + last_row.state.tag_rlc; + + let mut tag_value_acc = + src.iter() + .skip(byte_offset) + .take(len) + .rev() + .scan(Value::known(F::zero()), |acc, &byte| { + *acc = *acc * randomness + Value::known(F::from(byte as u64)); + Some(*acc) + }); + let tag_value = tag_value_acc.clone().last().expect("Tag value exists"); + + let tag_rlc_iter = + src.iter() + .skip(byte_offset) + .take(len) + .scan(Value::known(F::zero()), |acc, &byte| { + *acc = *acc * randomness + Value::known(F::from(byte as u64)); + Some(*acc) + }); + let tag_rlc = tag_rlc_iter.clone().last().expect("Tag value exists"); + let mut tag_rlc_iter = tag_rlc_iter.collect::>>().into_iter().rev(); + + // Decide the next tag + let tag_next = match stream_idx { + 0..=2 => ZstdTag::ZstdBlockLstream, + 3 => ZstdTag::ZstdBlockSequenceHeader, + _ => unreachable!("stream_idx value out of range"), + }; + + let mut padding_end_idx = 0; + while lstream_bits[padding_end_idx] == 0 { + padding_end_idx += 1; + } + + let mut next_tag_value_acc = tag_value_acc.next().unwrap(); + let mut next_tag_rlc_acc = tag_rlc_iter.next().unwrap(); + + // Add a witness row for leading 0s and sentinel 1-bit + witness_rows.push(ZstdWitnessRow { + state: ZstdState { + tag: ZstdTag::ZstdBlockLstream, + tag_next, + max_tag_len: lookup_max_tag_len(ZstdTag::ZstdBlockLstream), + tag_len: len as u64, + tag_idx: current_byte_idx as u64, + tag_value, + tag_value_acc: next_tag_value_acc, + is_tag_change: true, + tag_rlc, + tag_rlc_acc: next_tag_rlc_acc, + }, + encoded_data: EncodedData { + byte_idx: (byte_offset + current_byte_idx) as u64, + encoded_len: last_row.encoded_data.encoded_len, + value_byte: src[byte_offset + len - current_byte_idx], + value_rlc, + // reverse specific values + reverse: true, + reverse_len: len as u64, + reverse_idx: (len - (current_byte_idx - 1)) as u64, + aux_1, + aux_2: tag_value, + }, + huffman_data: HuffmanData { + byte_offset: huffman_code_byte_offset as u64, + bit_value: 1u8, + stream_idx, + k: (0, padding_end_idx as u8), + }, + bitstream_read_data: BitstreamReadRow { + bit_value: 1u64, + bit_start_idx: 0usize, + bit_end_idx: padding_end_idx, + is_zero_bit_read: false, + }, + decoded_data: DecodedData { + decoded_len: last_row.decoded_data.decoded_len, + decoded_len_acc: last_row.decoded_data.decoded_len_acc, + total_decoded_len: last_row.decoded_data.total_decoded_len, + decoded_byte: 0, + decoded_value_rlc: last_row.decoded_data.decoded_value_rlc, + }, + fse_data: FseTableRow::default(), + }); + + // Exclude the leading zero section + while lstream_bits[current_bit_idx] == 0 { + (current_byte_idx, current_bit_idx) = increment_idx(current_byte_idx, current_bit_idx); + } + // Exclude the sentinel 1-bit + (current_byte_idx, current_bit_idx) = increment_idx(current_byte_idx, current_bit_idx); + + // Update accumulator + if current_byte_idx > last_byte_idx { + next_tag_value_acc = tag_value_acc.next().unwrap(); + next_tag_rlc_acc = tag_rlc_iter.next().unwrap(); + last_byte_idx = current_byte_idx; + } + + // Now the actual symbol-bearing bitstream starts + let (max_bitstring_len, huffman_bitstring_map) = huffman_code.parse_bitstring_map(); + let mut decoded_symbols: Vec = vec![]; + let mut bitstring_acc: String = String::from(""); + let mut cur_bitstring_len: usize = 0; + + while current_bit_idx < len * N_BITS_PER_BYTE { + if huffman_bitstring_map.contains_key(bitstring_acc.as_str()) { + let sym = *huffman_bitstring_map.get(bitstring_acc.as_str()).unwrap(); + decoded_symbols.push(sym); + + let from_byte_idx = current_byte_idx; + let from_bit_idx = current_bit_idx; + + // advance byte and bit marks to the last bit + for _ in 0..(cur_bitstring_len - 1) { + (current_byte_idx, current_bit_idx) = + increment_idx(current_byte_idx, current_bit_idx); + } + let end_bit_idx = if current_byte_idx > from_byte_idx { + current_bit_idx.rem_euclid(8) + 8 + } else { + current_bit_idx.rem_euclid(8) + }; + (current_byte_idx, current_bit_idx) = increment_idx(current_byte_idx, current_bit_idx); + + decoded_len_acc += 1; + decoded_rlc = decoded_rlc * randomness + Value::known(F::from(sym)); + + // Add a witness row for emitted symbol + witness_rows.push(ZstdWitnessRow { + state: ZstdState { + tag: ZstdTag::ZstdBlockLstream, + tag_next, + max_tag_len: lookup_max_tag_len(ZstdTag::ZstdBlockLstream), + tag_len: len as u64, + tag_idx: from_byte_idx as u64, + tag_value, + tag_value_acc: next_tag_value_acc, + is_tag_change: false, + tag_rlc, + tag_rlc_acc: next_tag_rlc_acc, + }, + encoded_data: EncodedData { + byte_idx: (byte_offset + from_byte_idx) as u64, + encoded_len: last_row.encoded_data.encoded_len, + value_byte: src[byte_offset + len - from_byte_idx], + value_rlc, + // reverse specific values + reverse: true, + reverse_len: len as u64, + reverse_idx: (len - from_byte_idx + 1) as u64, + aux_1, + aux_2: tag_value, + }, + huffman_data: HuffmanData { + byte_offset: huffman_code_byte_offset as u64, + bit_value: u8::from_str_radix(bitstring_acc.as_str(), 2).unwrap(), + stream_idx, + k: (from_bit_idx.rem_euclid(8) as u8, end_bit_idx as u8), + }, + bitstream_read_data: BitstreamReadRow { + bit_value: u8::from_str_radix(bitstring_acc.as_str(), 2).unwrap() as u64, + bit_start_idx: from_bit_idx.rem_euclid(8), + bit_end_idx: end_bit_idx, + is_zero_bit_read: false, + }, + decoded_data: DecodedData { + decoded_len: last_row.decoded_data.decoded_len, + decoded_len_acc, + total_decoded_len: last_row.decoded_data.total_decoded_len, + decoded_byte: sym as u8, + decoded_value_rlc: decoded_rlc, + }, + fse_data: FseTableRow::default(), + }); + + // Update accumulator + if current_byte_idx > last_byte_idx && current_byte_idx <= len { + next_tag_value_acc = tag_value_acc.next().unwrap(); + next_tag_rlc_acc = tag_rlc_iter.next().unwrap(); + last_byte_idx = current_byte_idx; + } + + // Reset decoding state + bitstring_acc = String::from(""); + cur_bitstring_len = 0; + } else { + if lstream_bits[current_bit_idx + cur_bitstring_len] > 0 { + bitstring_acc.push('1'); + } else { + bitstring_acc.push('0'); + } + cur_bitstring_len += 1; + + if cur_bitstring_len > max_bitstring_len as usize { + panic!("Reading bit len greater than max bitstring len not allowed."); + } + } + } + + (byte_offset + len, witness_rows, decoded_symbols) } -pub fn process(src: &[u8], randomness: Value) -> Vec> { +/// Result for processing multiple blocks from compressed data +pub type MultiBlockProcessResult = ( + Vec>, + Vec, + Vec, + Vec, + Vec, +); + +/// Process a slice of bytes into decompression circuit witness rows +pub fn process(src: &[u8], randomness: Value) -> MultiBlockProcessResult { let mut witness_rows = vec![]; + let mut literals: Vec = vec![]; + let mut aux_data: Vec = vec![]; + let mut fse_aux_tables: Vec = vec![]; + let mut huffman_codes: Vec = vec![]; let byte_offset = 0; // FrameHeaderDescriptor and FrameContentSize @@ -470,16 +1827,31 @@ pub fn process(src: &[u8], randomness: Value) -> Vec( + let ( + _byte_offset, + rows, + last_block, + new_literals, + lstream_lens, + pipeline_data, + fse_aux_table, + huffman_code, + ) = process_block::( src, byte_offset, rows.last().expect("last row expected to exist"), randomness, ); witness_rows.extend_from_slice(&rows); + literals.extend_from_slice(&new_literals); + aux_data.extend_from_slice(&lstream_lens); + aux_data.extend_from_slice(&pipeline_data); + fse_aux_tables.push(fse_aux_table); + huffman_codes.push(huffman_code); if last_block { - assert!(byte_offset >= src.len()); + // TODO: Recover this assertion after the sequence section decoding is completed. + // assert!(byte_offset >= src.len()); break; } } @@ -487,11 +1859,19 @@ pub fn process(src: &[u8], randomness: Value) -> Vec(&compressed, Value::known(Fr::from(123456789))); + let (_witness_rows, _decoded_literals, _aux_data, _fse_aux_tables, _huffman_codes) = + process::(&compressed, Value::known(Fr::from(123456789))); + + Ok(()) + } + + // Verify correct interleaved decoding of FSE-coded Huffman Weights + // Example link: https://nigeltao.github.io/blog/2022/zstandard-part-5-fse.html + #[test] + fn interleaved_huffman_code_fse() -> Result<(), std::io::Error> { + // Input includes FSE table representation (normalized symbol frequencies) and the actual + // Huffman bitstream For structure reference: https://nigeltao.github.io/blog/2022/zstandard-part-2-structure.html + let input: [u8; 36] = [ + 0x23, 0x30, 0x6f, 0x9b, 0x03, 0x7d, 0xc7, 0x16, 0x0b, 0xbe, 0xc8, 0xf2, 0xd0, 0x22, + 0x4b, 0x6b, 0xbc, 0x54, 0x5d, 0xa9, 0xd4, 0x93, 0xef, 0xc4, 0x54, 0x96, 0xb2, 0xe2, + 0xa8, 0xa8, 0x24, 0x1c, 0x54, 0x40, 0x29, 0x01, + ]; + + let ( + _byte_offset, + _witness_rows, + huffman_codes, + _n_huffan_bytes, + _huffman_byte_offset, + _last_rlc, + _huffman_idx, + _fse_size, + _fse_accuracy, + _n_huffman_bitstream_bytes, + _fse_aux_data, + ) = process_block_zstd_huffman_code::( + &input, + 0, + &ZstdWitnessRow::init(0), + Value::known(Fr::from(123456789)), + 4, + ); + + let expected_weights: Vec = vec![ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 6, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 3, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, + 1, 2, 0, 0, 0, 2, 0, 1, 1, 1, 1, 1, 0, 0, 1, 2, 1, 0, 1, 1, 1, 2, 0, 0, 1, 1, 1, 1, 0, + 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 5, 3, 3, 3, 6, 3, 2, 4, 4, 0, 1, 4, 4, 5, 5, 2, 0, 4, 4, + 5, 3, 1, 3, 1, 3, + ] + .into_iter() + .map(FseSymbol::from) + .collect::>(); + + assert_eq!( + huffman_codes.weights, expected_weights, + "Huffman weights should be correctly decoded with interleaved states" + ); + + Ok(()) + } + + // Verify correct decoding of literal bitstream using a HuffmanCode table + // Example link: https://nigeltao.github.io/blog/2022/zstandard-part-4-huffman.html + #[test] + fn decode_literal_bitstream() -> Result<(), std::io::Error> { + let huffman_codes = HuffmanCodesData { + byte_offset: 0, + weights: vec![ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 6, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 3, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 1, 2, 0, 0, 0, 2, 0, 1, 1, 1, 1, 1, 0, 0, 1, 2, 1, 0, 1, 1, 1, 2, 0, 0, 1, 1, + 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 5, 3, 3, 3, 6, 3, 2, 4, 4, 0, 1, 4, 4, 5, 5, + 2, 0, 4, 4, 5, 3, 1, 3, 1, 3, + ] + .into_iter() + .map(FseSymbol::from) + .collect::>(), + }; + + let lstream1: [u8; 85] = [ + 0xcc, 0x51, 0x73, 0x3a, 0x85, 0x9e, 0xf7, 0x59, 0xfc, 0xc5, 0xca, 0x6a, 0x7a, 0xd9, + 0x82, 0x9c, 0x65, 0xc5, 0x45, 0x92, 0xe3, 0x0d, 0xf3, 0xef, 0x71, 0xee, 0xdc, 0xd5, + 0xa2, 0xe3, 0x48, 0xad, 0xa3, 0xbc, 0x41, 0x7a, 0x3c, 0xaa, 0xd6, 0xeb, 0xd0, 0x77, + 0xea, 0xdc, 0x5d, 0x41, 0x06, 0x50, 0x1c, 0x49, 0x0f, 0x07, 0x10, 0x05, 0x88, 0x84, + 0x94, 0x02, 0xfc, 0x3c, 0xe3, 0x60, 0x25, 0xc0, 0xcb, 0x0c, 0xb8, 0xa9, 0x73, 0xbc, + 0x13, 0x77, 0xc6, 0xe2, 0x20, 0xed, 0x17, 0x7b, 0x12, 0xdc, 0x24, 0x5a, 0xdf, 0xb4, + 0x21, + ]; + + let (_byte_offset, _witness_rows, decoded_symbols) = process_block_zstd_lstream::( + &lstream1, + 0, + 85, + &ZstdWitnessRow::init(0), + Value::known(Fr::from(123456789)), + 1, + &huffman_codes, + 0, + ); + + let ascii_symbols: String = decoded_symbols + .iter() + .filter_map(|&s| char::from_u32(s as u32)) + .collect(); + let expected_decoded_ascii: String = String::from("Romeo and Juliet\nExcerpt from Act 2, Scene 2\n\nJULIET\nO ,! wherefore art thou?\nDeny thy fatherrefusename;\nOr, ifwilt not, be but sworn my l"); + + assert_eq!( + ascii_symbols, expected_decoded_ascii, + "Expect correct decoding" + ); + + Ok(()) + } + + #[test] + fn decode_literal_section() -> Result<(), std::io::Error> { + let encoded: [u8; 555] = [ + // 0x28, 0xb5, 0x2f, 0xfd, // magic numbers are removed + 0x60, // originally 0x64. unset the checksum bit. + 0xae, 0x02, 0x0d, 0x11, 0x00, 0x76, 0x62, 0x5e, 0x23, 0x30, 0x6f, 0x9b, 0x03, 0x7d, + 0xc7, 0x16, 0x0b, 0xbe, 0xc8, 0xf2, 0xd0, 0x22, 0x4b, 0x6b, 0xbc, 0x54, 0x5d, 0xa9, + 0xd4, 0x93, 0xef, 0xc4, 0x54, 0x96, 0xb2, 0xe2, 0xa8, 0xa8, 0x24, 0x1c, 0x54, 0x40, + 0x29, 0x01, 0x55, 0x00, 0x57, 0x00, 0x51, 0x00, 0xcc, 0x51, 0x73, 0x3a, 0x85, 0x9e, + 0xf7, 0x59, 0xfc, 0xc5, 0xca, 0x6a, 0x7a, 0xd9, 0x82, 0x9c, 0x65, 0xc5, 0x45, 0x92, + 0xe3, 0x0d, 0xf3, 0xef, 0x71, 0xee, 0xdc, 0xd5, 0xa2, 0xe3, 0x48, 0xad, 0xa3, 0xbc, + 0x41, 0x7a, 0x3c, 0xaa, 0xd6, 0xeb, 0xd0, 0x77, 0xea, 0xdc, 0x5d, 0x41, 0x06, 0x50, + 0x1c, 0x49, 0x0f, 0x07, 0x10, 0x05, 0x88, 0x84, 0x94, 0x02, 0xfc, 0x3c, 0xe3, 0x60, + 0x25, 0xc0, 0xcb, 0x0c, 0xb8, 0xa9, 0x73, 0xbc, 0x13, 0x77, 0xc6, 0xe2, 0x20, 0xed, + 0x17, 0x7b, 0x12, 0xdc, 0x24, 0x5a, 0xdf, 0xb4, 0x21, 0x9a, 0xcb, 0x8f, 0xc7, 0x58, + 0x54, 0x11, 0xa9, 0xf1, 0x47, 0x82, 0x9b, 0xba, 0x60, 0xb4, 0x92, 0x28, 0x0e, 0xfb, + 0x8b, 0x1e, 0x92, 0x23, 0x6a, 0xcf, 0xbf, 0xe5, 0x45, 0xb5, 0x7e, 0xeb, 0x81, 0xf1, + 0x78, 0x4b, 0xad, 0x17, 0x4d, 0x81, 0x9f, 0xbc, 0x67, 0xa7, 0x56, 0xee, 0xb4, 0xd9, + 0xe1, 0x95, 0x21, 0x66, 0x0c, 0x95, 0x83, 0x27, 0xde, 0xac, 0x37, 0x20, 0x91, 0x22, + 0x07, 0x0b, 0x91, 0x86, 0x94, 0x1a, 0x7b, 0xf6, 0x4c, 0xb0, 0xc0, 0xe8, 0x2e, 0x49, + 0x65, 0xd6, 0x34, 0x63, 0x0c, 0x88, 0x9b, 0x1c, 0x48, 0xca, 0x2b, 0x34, 0xa9, 0x6b, + 0x99, 0x3b, 0xee, 0x13, 0x3b, 0x7c, 0x93, 0x0b, 0xf7, 0x0d, 0x49, 0x69, 0x18, 0x57, + 0xbe, 0x3b, 0x64, 0x45, 0x1d, 0x92, 0x63, 0x7f, 0xe8, 0xf9, 0xa1, 0x19, 0x7b, 0x7b, + 0x6e, 0xd8, 0xa3, 0x90, 0x23, 0x82, 0xf4, 0xa7, 0xce, 0xc8, 0xf8, 0x90, 0x15, 0xb3, + 0x14, 0xf4, 0x40, 0xe7, 0x02, 0x78, 0xd3, 0x17, 0x71, 0x23, 0xb1, 0x19, 0xad, 0x6b, + 0x49, 0xae, 0x13, 0xa4, 0x75, 0x38, 0x51, 0x47, 0x89, 0x67, 0xb0, 0x39, 0xb4, 0x53, + 0x86, 0xa4, 0xac, 0xaa, 0xa3, 0x34, 0x89, 0xca, 0x2e, 0xe9, 0xc1, 0xfe, 0xf2, 0x51, + 0xc6, 0x51, 0x73, 0xaa, 0xf7, 0x9d, 0x2d, 0xed, 0xd9, 0xb7, 0x4a, 0xb2, 0xb2, 0x61, + 0xe4, 0xef, 0x98, 0xf7, 0xc5, 0xef, 0x51, 0x9b, 0xd8, 0xdc, 0x60, 0x6c, 0x41, 0x76, + 0xaf, 0x78, 0x1a, 0x62, 0xb5, 0x4c, 0x1e, 0x21, 0x39, 0x9a, 0x5f, 0xac, 0x9d, 0xe0, + 0x62, 0xe8, 0xe9, 0x2f, 0x2f, 0x48, 0x02, 0x8d, 0x53, 0xc8, 0x91, 0xf2, 0x1a, 0xd2, + 0x7c, 0x0a, 0x7c, 0x48, 0xbf, 0xda, 0xa9, 0xe3, 0x38, 0xda, 0x34, 0xce, 0x76, 0xa9, + 0xda, 0x15, 0x91, 0xde, 0x21, 0xf5, 0x55, 0x46, 0xa8, 0x21, 0x9d, 0x51, 0xcc, 0x18, + 0x42, 0x44, 0x81, 0x8c, 0x94, 0xb4, 0x50, 0x1e, 0x20, 0x42, 0x82, 0x98, 0xc2, 0x3b, + 0x10, 0x48, 0xec, 0xa6, 0x39, 0x63, 0x13, 0xa7, 0x01, 0x94, 0x40, 0xff, 0x88, 0x0f, + 0x98, 0x07, 0x4a, 0x46, 0x38, 0x05, 0xa9, 0xcb, 0xf6, 0xc8, 0x21, 0x59, 0xaa, 0x38, + 0x45, 0xbf, 0x5c, 0xf8, 0x55, 0x9e, 0x9f, 0x04, 0xed, 0xc8, 0x03, 0x42, 0x2a, 0x4b, + 0xf6, 0x78, 0x7e, 0x23, 0x67, 0x15, 0xa2, 0x79, 0x29, 0xf4, 0x9b, 0x7e, 0x00, 0xbc, + 0x2f, 0x46, 0x96, 0x99, 0xea, 0xf1, 0xee, 0x1c, 0x6e, 0x06, 0x9c, 0xdb, 0xe4, 0x8c, + 0xc2, 0x05, 0xf7, 0x54, 0x51, 0x84, 0xc0, 0x33, 0x02, 0x01, 0xb1, 0x8c, 0x80, 0xdc, + 0x99, 0x8f, 0xcb, 0x46, 0xff, 0xd1, 0x25, 0xb5, 0xb6, 0x3a, 0xf3, 0x25, 0xbe, 0x85, + 0x50, 0x84, 0xf5, 0x86, 0x5a, 0x71, 0xf7, 0xbd, 0xa1, 0x4c, 0x52, 0x4f, 0x20, 0xa3, + 0x61, 0x23, 0x77, 0x12, 0xd3, 0xb1, 0x58, 0x75, 0x22, 0x01, 0x12, 0x70, 0xec, 0x14, + 0x91, 0xf9, 0x85, 0x61, 0xd5, 0x7e, 0x98, 0x84, 0xc9, 0x76, 0x84, 0xbc, 0xb8, 0xfe, + 0x4e, 0x53, 0xa5, 0x06, 0x82, 0x14, 0x95, 0x51, + ]; + + let (_witness_rows, decoded_literals, _aux_data, _fse_aux_tables, _huffman_codes) = + process::(&encoded, Value::known(Fr::from(123456789))); + + let decoded_literal_string: String = decoded_literals + .iter() + .filter_map(|&s| char::from_u32(s as u32)) + .collect(); + let expected_literal_string = String::from("Romeo and Juliet\nExcerpt from Act 2, Scene 2\n\nJULIET\nO ,! wherefore art thou?\nDeny thy fatherrefusename;\nOr, ifwilt not, be but sworn my love,\nAnd I'll no longera Capulet.\n\nROMEO\n[Aside] Shall I hear more, or sspeak at this?'Tis that isenemy;\nTyself,gh a Montague.\nWhat's? inor hand,foot,\nNor armaceany opart\nBeing to a man. Osome!in a?which we ca rose\nBy would smell as sweet;\nSo, were he'd,\nRetaindear perfectionhe owes\nWithoitle.dofffor oee\nTake mI t hy word:\nCebe new baptized;\nHencth I never will. manthus bescreen'dnightstumblest on my counsel?\n"); + + assert_eq!( + decoded_literal_string, expected_literal_string, + "Decode the correct literal string" + ); Ok(()) } diff --git a/zkevm-circuits/src/witness/zstd/params.rs b/zkevm-circuits/src/witness/zstd/params.rs index 389625641e..1dec732d21 100644 --- a/zkevm-circuits/src/witness/zstd/params.rs +++ b/zkevm-circuits/src/witness/zstd/params.rs @@ -4,8 +4,12 @@ pub const N_BITS_PER_BYTE: usize = 8; /// Number of bytes used to specify block header. pub const N_BLOCK_HEADER_BYTES: usize = 3; -/// Number of bytes used in the Jump table. +/// Constants for zstd-compressed block +pub const N_MAX_LITERAL_HEADER_BYTES: usize = 3; +/// Maximum bytes for the jump table pub const N_JUMP_TABLE_BYTES: usize = 6; +/// Maximum bytes for the FSE representation +pub const N_MAX_LITERAL_FSE_BYTES: usize = 8; /// Maximum number of symbols (weights), i.e. symbol in [0, N_MAX_SYMBOLS). pub const N_MAX_SYMBOLS: usize = 8; diff --git a/zkevm-circuits/src/witness/zstd/types.rs b/zkevm-circuits/src/witness/zstd/types.rs index e061fcd325..9e0d722682 100644 --- a/zkevm-circuits/src/witness/zstd/types.rs +++ b/zkevm-circuits/src/witness/zstd/types.rs @@ -1,4 +1,7 @@ -use std::{collections::BTreeMap, io::Cursor}; +use std::{ + collections::{BTreeMap, HashMap}, + io::Cursor, +}; use bitstream_io::{BitRead, BitReader, LittleEndian}; use eth_types::Field; @@ -135,6 +138,7 @@ impl From for FseSymbol { } } +#[derive(Debug)] pub enum BlockType { RawBlock = 0, RleBlock, @@ -172,11 +176,22 @@ impl From for usize { value as usize } } +impl From for LstreamNum { + fn from(value: usize) -> LstreamNum { + match value { + 0 => LstreamNum::Lstream1, + 1 => LstreamNum::Lstream2, + 2 => LstreamNum::Lstream3, + 3 => LstreamNum::Lstream4, + _ => unreachable!("Wrong stream_idx"), + } + } +} impl_expr!(LstreamNum); /// Various tags that we can decode from a zstd encoded data. -#[derive(Clone, Copy, Debug, EnumIter)] +#[derive(Clone, Copy, Debug, EnumIter, PartialEq, Eq, Hash)] pub enum ZstdTag { /// Null should not occur. Null = 0, @@ -209,7 +224,8 @@ pub enum ZstdTag { } impl ZstdTag { - fn is_output(&self) -> bool { + /// Whether this tag produces an output or not. + pub fn is_output(&self) -> bool { match self { Self::Null => false, Self::FrameHeaderDescriptor => false, @@ -218,18 +234,19 @@ impl ZstdTag { Self::RawBlockBytes => true, Self::RleBlockBytes => true, Self::ZstdBlockLiteralsHeader => false, - Self::ZstdBlockLiteralsRawBytes => true, - Self::ZstdBlockLiteralsRleBytes => true, + Self::ZstdBlockLiteralsRawBytes => false, + Self::ZstdBlockLiteralsRleBytes => false, Self::ZstdBlockFseCode => false, Self::ZstdBlockHuffmanCode => false, Self::ZstdBlockJumpTable => false, - Self::ZstdBlockLstream => true, + Self::ZstdBlockLstream => false, Self::ZstdBlockSequenceHeader => false, // TODO: more tags } } - fn is_block(&self) -> bool { + /// Whether this tag is a part of block or not. + pub fn is_block(&self) -> bool { match self { Self::Null => false, Self::FrameHeaderDescriptor => false, @@ -249,7 +266,8 @@ impl ZstdTag { } } - fn is_reverse(&self) -> bool { + /// Whether this tag is processed in back-to-front order. + pub fn is_reverse(&self) -> bool { match self { Self::Null => false, Self::FrameHeaderDescriptor => false, @@ -303,10 +321,15 @@ impl ToString for ZstdTag { pub struct ZstdState { pub tag: ZstdTag, pub tag_next: ZstdTag, + pub max_tag_len: u64, pub tag_len: u64, pub tag_idx: u64, pub tag_value: Value, pub tag_value_acc: Value, + pub is_tag_change: bool, + // Unlike tag_value, tag_rlc only uses challenge as multiplier + pub tag_rlc: Value, + pub tag_rlc_acc: Value, } impl Default for ZstdState { @@ -314,15 +337,19 @@ impl Default for ZstdState { Self { tag: ZstdTag::Null, tag_next: ZstdTag::FrameHeaderDescriptor, + max_tag_len: 0, tag_len: 0, tag_idx: 0, tag_value: Value::known(F::zero()), tag_value_acc: Value::known(F::zero()), + is_tag_change: false, + tag_rlc: Value::known(F::zero()), + tag_rlc_acc: Value::known(F::zero()), } } } -#[derive(Clone, Debug, Default)] +#[derive(Clone, Debug)] pub struct EncodedData { pub byte_idx: u64, pub encoded_len: u64, @@ -341,6 +368,22 @@ impl EncodedData { } } +impl Default for EncodedData { + fn default() -> Self { + Self { + byte_idx: 0, + encoded_len: 0, + value_byte: 0, + reverse: false, + reverse_idx: 0, + reverse_len: 0, + aux_1: Value::known(F::zero()), + aux_2: Value::known(F::zero()), + value_rlc: Value::known(F::zero()), + } + } +} + #[derive(Clone, Debug, Default)] pub struct DecodedData { pub decoded_len: u64, @@ -354,6 +397,7 @@ pub struct DecodedData { pub struct HuffmanData { pub byte_offset: u64, pub bit_value: u8, + pub stream_idx: usize, pub k: (u8, u8), } @@ -370,6 +414,10 @@ pub struct HuffmanCodesData { /// Denotes the tuple (max_bitstring_len, Map). type ParsedCanonicalHuffmanCode = (u64, BTreeMap); +/// A representation indexed by bitstring (String) as key for decoding symbols specifically. +/// Huffman code decoding ensures prefix code, thus the explicit articulation of bitstring is +/// necessary. +type ParsedCanonicalHuffmanCodeBitstringMap = (u64, HashMap); impl HuffmanCodesData { /// Reconstruct the bitstrings for each symbol based on the canonical Huffman code weights. The @@ -425,6 +473,57 @@ impl HuffmanCodesData { (max_bitstring_len, sym_to_tuple) } + + /// parse bit string map + pub fn parse_bitstring_map(&self) -> ParsedCanonicalHuffmanCodeBitstringMap { + let mut weights: Vec = self.weights.iter().map(|w| *w as usize).collect(); + let sum_weights: usize = weights + .iter() + .filter_map(|&w| if w > 0 { Some(1 << (w - 1)) } else { None }) + .sum(); + + let nearest_pow_2: usize = 1 << (sum_weights - 1).next_power_of_two().trailing_zeros(); + weights.push(f64::log2((nearest_pow_2 - sum_weights) as f64).ceil() as usize + 1); + let max_number_of_bits = nearest_pow_2.trailing_zeros() as usize; + let n = weights.len(); + + let bitstring_length: Vec = weights + .iter() + .map(|&w| { + if w != 0 { + max_number_of_bits - w + 1 + } else { + 0 + } + }) + .collect(); + + let mut bitstring_map = HashMap::new(); + let mut cur_bit_value = 0; + + for bit_len in (1..=max_number_of_bits).rev() { + cur_bit_value += 1; + cur_bit_value >>= 1; + + for (sym, b_len) in bitstring_length.iter().enumerate().take(n) { + if *b_len == bit_len { + bitstring_map.insert( + format!("{:0width$b}", cur_bit_value, width = bit_len), + sym as u64, + ); + cur_bit_value += 1; + } + } + } + + let max_bitstring_len = bitstring_map + .keys() + .map(|k| k.len()) + .max() + .expect("Keys have maximum len"); + + (max_bitstring_len as u64, bitstring_map) + } } /// A single row in the FSE table. @@ -440,6 +539,23 @@ pub struct FseTableRow { pub num_bits: u64, /// The symbol emitted by the FSE table at this state. pub symbol: u64, + /// During FSE table decoding, keep track of the number of symbol emitted + pub num_emitted: u64, + /// During FSE table decoding, keep track of accumulated states assigned + pub n_acc: u64, +} + +// Used for tracking bit markers for non-byte-aligned bitstream decoding +#[derive(Clone, Debug, Default, PartialEq)] +pub struct BitstreamReadRow { + /// Start of the bit location within a byte [0, 8) + pub bit_start_idx: usize, + /// End of the bit location within a byte (0, 16) + pub bit_end_idx: usize, + /// The value of the bitstring + pub bit_value: u64, + /// Whether 0 bit is read + pub is_zero_bit_read: bool, } /// Data for the FSE table's witness values. @@ -467,6 +583,13 @@ pub struct FseAuxiliaryTableData { pub sym_to_states: BTreeMap>, } +/// Another form of Fse table that has state as key instead of the FseSymbol. +/// In decoding, symbols are emitted from state-chaining. +/// This representation makes it easy to look up decoded symbol from current state. +/// Map. +type FseStateMapping = BTreeMap; +type ReconstructedFse = (usize, Vec<(u32, u64)>, FseAuxiliaryTableData); + impl FseAuxiliaryTableData { #[allow(non_snake_case)] /// While we reconstruct an FSE table from a bitstream, we do not know before reconstruction @@ -476,10 +599,11 @@ impl FseAuxiliaryTableData { /// with the reconstructed FSE table. After processing the entire bitstream to reconstruct the /// FSE table, if the read bitstream was not byte aligned, then we discard the 1..8 bits from /// the last byte that we read from. - pub fn reconstruct(src: &[u8], byte_offset: usize) -> std::io::Result<(usize, Self)> { + pub fn reconstruct(src: &[u8], byte_offset: usize) -> std::io::Result { // construct little-endian bit-reader. let data = src.iter().skip(byte_offset).cloned().collect::>(); let mut reader = BitReader::endian(Cursor::new(&data), LittleEndian); + let mut bit_boundaries: Vec<(u32, u64)> = vec![]; // number of bits read by the bit-reader from the bistream. let mut offset = 0; @@ -488,6 +612,7 @@ impl FseAuxiliaryTableData { offset += 4; reader.read::(offset)? + 5 }; + bit_boundaries.push((offset, accuracy_log as u64 - 5)); let table_size = 1 << accuracy_log; let mut sym_to_states = BTreeMap::new(); @@ -538,6 +663,8 @@ impl FseAuxiliaryTableData { num_bits: nb, baseline, symbol: symbol.into(), + num_emitted: 0, + n_acc: 0, }) .collect(), ); @@ -545,6 +672,7 @@ impl FseAuxiliaryTableData { // update the total number of bits read so far. offset += n_bits_read; + bit_boundaries.push((offset, value)); // increment symbol. symbol = ((symbol as usize) + 1).into(); @@ -560,8 +688,18 @@ impl FseAuxiliaryTableData { // ignore any bits left to be read until byte-aligned. let t = (((offset as usize) - 1) / N_BITS_PER_BYTE) + 1; + // read the trailing section + if t * N_BITS_PER_BYTE > (offset as usize) { + let bits_remaining = t * N_BITS_PER_BYTE - offset as usize; + bit_boundaries.push(( + offset + bits_remaining as u32, + reader.read::(bits_remaining as u32)? as u64, + )); + } + Ok(( t, + bit_boundaries, Self { byte_offset: byte_offset as u64, table_size, @@ -569,18 +707,44 @@ impl FseAuxiliaryTableData { }, )) } + + /// Convert an FseAuxiliaryTableData into a state-mapped representation. + /// This makes it easier to lookup state-chaining during decoding. + pub fn parse_state_table(&self) -> FseStateMapping { + let rows: Vec = self + .sym_to_states + .values() + .flat_map(|v| v.clone()) + .collect(); + let mut state_table: FseStateMapping = BTreeMap::new(); + + for row in rows { + state_table.insert(row.state, (row.symbol, row.baseline, row.num_bits)); + } + + state_table + } } #[derive(Clone, Debug)] +/// Row witness value for decompression circuit pub struct ZstdWitnessRow { + /// Current decoding state during Zstd decompression pub state: ZstdState, + /// Data on compressed data pub encoded_data: EncodedData, + /// Data on decompressed data pub decoded_data: DecodedData, + /// Huffman code bitstring marker that devides bitstream into symbol segments pub huffman_data: HuffmanData, + /// Fse decoding state transition data pub fse_data: FseTableRow, + /// Bitstream reader + pub bitstream_read_data: BitstreamReadRow, } impl ZstdWitnessRow { + /// Construct the first row of witnesses for decompression circuit pub fn init(src_len: usize) -> Self { Self { state: ZstdState::default(), @@ -591,6 +755,7 @@ impl ZstdWitnessRow { decoded_data: DecodedData::default(), huffman_data: HuffmanData::default(), fse_data: FseTableRow::default(), + bitstream_read_data: BitstreamReadRow::default(), } } } @@ -607,10 +772,12 @@ mod tests { // other bytes are garbage (for the purpose of this test case), and we want to make // sure FSE reconstruction ignores them. let src = vec![0xff, 0xff, 0xff, 0x30, 0x6f, 0x9b, 0x03, 0xff, 0xff, 0xff]; - let (n_bytes, table) = FseAuxiliaryTableData::reconstruct(&src, 3)?; + + let (n_bytes, _bit_boundaries, table) = FseAuxiliaryTableData::reconstruct(&src, 3)?; // TODO: assert equality for the entire table. // for now only comparing state/baseline/nb for S1, i.e. weight == 1. + assert_eq!(n_bytes, 4); assert_eq!( table.sym_to_states.get(&FseSymbol::S1).cloned().unwrap(), @@ -630,10 +797,105 @@ mod tests { symbol: 1, baseline, num_bits, + num_emitted: 0, + n_acc: 0, }) .collect::>(), ); Ok(()) } + + #[test] + fn test_huffman_bitstring_reconstruction() -> std::io::Result<()> { + let weights = vec![ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 6, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 3, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, + 1, 2, 0, 0, 0, 2, 0, 1, 1, 1, 1, 1, 0, 0, 1, 2, 1, 0, 1, 1, 1, 2, 0, 0, 1, 1, 1, 1, 0, + 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 5, 3, 3, 3, 6, 3, 2, 4, 4, 0, 1, 4, 4, 5, 5, 2, 0, 4, 4, + 5, 3, 1, 3, 1, 3, + ] + .into_iter() + .map(FseSymbol::from) + .collect::>(); + + let huffman_codes_data = HuffmanCodesData { + byte_offset: 0, + weights, + }; + + let (max_bitstring_len, bitstring_map) = huffman_codes_data.parse_bitstring_map(); + + let expected_bitstrings: [(&str, u64); 53] = [ + ("01001", 10), + ("110", 32), + ("00000000", 33), + ("0001100", 39), + ("001010", 44), + ("0001101", 46), + ("00000001", 50), + ("00000010", 58), + ("0001110", 59), + ("0001111", 63), + ("00000011", 65), + ("00000100", 66), + ("00000101", 67), + ("00000110", 68), + ("00000111", 69), + ("00001000", 72), + ("0010000", 73), + ("00001001", 74), + ("00001010", 76), + ("00001011", 77), + ("00001100", 78), + ("0010001", 79), + ("00001101", 82), + ("00001110", 83), + ("00001111", 84), + ("00010000", 85), + ("00010001", 87), + ("00010010", 91), + ("00010011", 93), + ("1000", 97), + ("001011", 98), + ("001100", 99), + ("001101", 100), + ("111", 101), + ("001110", 102), + ("0010010", 103), + ("01010", 104), + ("01011", 105), + ("00010100", 107), + ("01100", 108), + ("01101", 109), + ("1001", 110), + ("1010", 111), + ("0010011", 112), + ("01110", 114), + ("01111", 115), + ("1011", 116), + ("001111", 117), + ("00010101", 118), + ("010000", 119), + ("00010110", 120), + ("010001", 121), + ("00010111", 122), + ]; + + assert_eq!(max_bitstring_len, 8, "max bitstring len is 8"); + assert_eq!( + expected_bitstrings.len(), + bitstring_map.len(), + "# of bitstring is the same" + ); + for pair in expected_bitstrings { + assert_eq!( + *bitstring_map.get(pair.0).unwrap(), + pair.1, + "bitstring mapping is correct" + ); + } + + Ok(()) + } } diff --git a/zkevm-circuits/src/witness/zstd/util.rs b/zkevm-circuits/src/witness/zstd/util.rs index c9a3974602..07cacb3f12 100644 --- a/zkevm-circuits/src/witness/zstd/util.rs +++ b/zkevm-circuits/src/witness/zstd/util.rs @@ -117,6 +117,38 @@ pub fn value_bits_le(value_byte: u8) -> [u8; N_BITS_PER_BYTE] { .expect("expected N_BITS_PER_BYTE elements") } +pub fn le_bits_to_value(bits: &[u8]) -> u64 { + assert!(bits.len() <= 32); + let mut m: u64 = 1; + + bits.iter().fold(0, |mut acc, b| { + acc += (*b as u64) * m; + m *= 2; + acc + }) +} + +pub fn be_bits_to_value(bits: &[u8]) -> u64 { + assert!(bits.len() <= 32); + + bits.iter().fold(0, |mut acc, b| { + acc = acc * 2 + *b as u64; + acc + }) +} + +// helper utility for helping manage bitstream delimitation +pub fn increment_idx(current_byte_idx: usize, current_bit_idx: usize) -> (usize, usize) { + let current_bit_idx = current_bit_idx + 1; + let mut current_byte_idx = current_byte_idx; + + if current_bit_idx >= current_byte_idx * N_BITS_PER_BYTE { + current_byte_idx += 1; + } + + (current_byte_idx, current_bit_idx) +} + #[cfg(test)] mod tests { use super::*;