From 0e9d03ce7a75c89f23fbaefcac9baa44a3a11b72 Mon Sep 17 00:00:00 2001 From: Ryan Castellucci Date: Sun, 22 Nov 2020 12:40:22 +0000 Subject: [PATCH 1/6] bytealign mode: basic functionality --- src/bin/brotli.rs | 5 ++++- src/enc/backward_references/mod.rs | 3 +++ src/enc/brotli_bit_stream.rs | 2 +- src/enc/encode.rs | 29 ++++++++++++++++++++++++----- src/enc/parameters.rs | 2 +- 5 files changed, 33 insertions(+), 8 deletions(-) diff --git a/src/bin/brotli.rs b/src/bin/brotli.rs index 3a14cb30..af65f1c8 100755 --- a/src/bin/brotli.rs +++ b/src/bin/brotli.rs @@ -524,7 +524,10 @@ fn main() { use_work_pool = false; continue; } - + if (argument == "-bytealign" || argument == "--bytealign") && !double_dash { + params.byte_align = true; + continue; + } if (argument == "-appendable" || argument == "--appendable") && !double_dash { params.appendable = true; continue; diff --git a/src/enc/backward_references/mod.rs b/src/enc/backward_references/mod.rs index f280298e..d45bf9ff 100755 --- a/src/enc/backward_references/mod.rs +++ b/src/enc/backward_references/mod.rs @@ -91,6 +91,9 @@ pub struct BrotliEncoderParams { pub large_window: bool, // avoid search for the best ndirect vs npostfix parameters for distance pub avoid_distance_prefix_search: bool, + // insert empty metadata blocks before and after the compressed data + // this allows for concatonation by byte copying with catable/appendable + pub byte_align: bool, // construct brotli in such a way that it may be concatenated with another brotli file using appropriate bit ops pub catable: bool, // can use the dictionary (default yes unless catable is set) diff --git a/src/enc/brotli_bit_stream.rs b/src/enc/brotli_bit_stream.rs index ad940d9d..bdef9219 100755 --- a/src/enc/brotli_bit_stream.rs +++ b/src/enc/brotli_bit_stream.rs @@ -2767,7 +2767,7 @@ pub fn BrotliStoreUncompressedMetaBlock pub fn BrotliStoreSyncMetaBlock(storage_ix: &mut usize, storage: &mut [u8]) { - BrotliWriteBits(6, 6, storage_ix, storage); + BrotliWriteBits(6u8, 6u64, storage_ix, storage); JumpToByteBoundary(storage_ix, storage); } diff --git a/src/enc/encode.rs b/src/enc/encode.rs index 71abd198..97dd3c85 100755 --- a/src/enc/encode.rs +++ b/src/enc/encode.rs @@ -16,8 +16,9 @@ use super::bit_cost::{BitsEntropy, ShannonEntropy}; use super::block_split::BlockSplit; #[allow(unused_imports)] use super::brotli_bit_stream::{BrotliBuildAndStoreHuffmanTreeFast, BrotliStoreHuffmanTree, - BrotliStoreMetaBlock, BrotliStoreMetaBlockFast, - BrotliStoreMetaBlockTrivial, BrotliStoreUncompressedMetaBlock, + BrotliStoreMetaBlock, BrotliStoreSyncMetaBlock, + BrotliStoreMetaBlockFast, BrotliStoreMetaBlockTrivial, + BrotliStoreUncompressedMetaBlock, BrotliWriteEmptyLastMetaBlock, BrotliWriteMetadataMetaBlock, MetaBlockSplit, RecoderState, JumpToByteBoundary}; @@ -340,6 +341,10 @@ value: u32) -> i32 { params.favor_cpu_efficiency = value != 0; return 1i32; } + if p as (i32) == BrotliEncoderParameter::BROTLI_PARAM_BYTE_ALIGN as (i32) { + params.byte_align = value != 0; + return 1i32; + } 0i32 } @@ -397,6 +402,7 @@ pub fn BrotliEncoderInitParams() -> BrotliEncoderParams { cdf_adaptation_detection: 0, prior_bitmask_detection: 0, literal_adaptation: [(0,0);4], + byte_align: false, catable: false, use_dictionary: true, appendable: false, @@ -1975,7 +1981,7 @@ fn WriteMetaBlockInternal> 3i32) as (usize))] as u16 | ( ((*s).storage_.slice()[1 + ((storage_ix >> 3i32) as (usize))] as u16)<<8); (*s).last_bytes_bits_ = (storage_ix & 7u32 as (usize)) as (u8); diff --git a/src/enc/parameters.rs b/src/enc/parameters.rs index e1903fa6..0d8e1cf4 100644 --- a/src/enc/parameters.rs +++ b/src/enc/parameters.rs @@ -29,6 +29,7 @@ pub enum BrotliEncoderParameter { BROTLI_PARAM_MAGIC_NUMBER = 169, BROTLI_PARAM_NO_DICTIONARY = 170, BROTLI_PARAM_FAVOR_EFFICIENCY = 171, + BROTLI_PARAM_BYTE_ALIGN = 172, UNUSED7=7, UNUSED8=8, UNUSED9=9, @@ -172,7 +173,6 @@ pub enum BrotliEncoderParameter { UNUSED147=147, UNUSED148=148, UNUSED149=149, - UNUSED172=172, UNUSED173=173, UNUSED174=174, UNUSED175=175, From 28776416ad90da6d69646c06803d11d98c2bfd7c Mon Sep 17 00:00:00 2001 From: Ryan Castellucci Date: Sun, 22 Nov 2020 16:44:33 +0000 Subject: [PATCH 2/6] don't emit empty penultimate metadata block unless needed --- src/enc/encode.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/enc/encode.rs b/src/enc/encode.rs index 97dd3c85..a31a2769 100755 --- a/src/enc/encode.rs +++ b/src/enc/encode.rs @@ -2016,7 +2016,8 @@ fn WriteMetaBlockInternal Date: Sun, 22 Nov 2020 16:45:10 +0000 Subject: [PATCH 3/6] byte aligning the start of an appendable is pointless --- src/enc/encode.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/enc/encode.rs b/src/enc/encode.rs index a31a2769..e9140e5a 100755 --- a/src/enc/encode.rs +++ b/src/enc/encode.rs @@ -2251,12 +2251,13 @@ fn EncodeData Date: Sun, 22 Nov 2020 17:16:21 +0000 Subject: [PATCH 4/6] add option for bare stream --- src/bin/brotli.rs | 4 ++++ src/enc/backward_references/mod.rs | 2 ++ src/enc/encode.rs | 27 ++++++++++++++++++++++----- src/enc/parameters.rs | 2 +- 4 files changed, 29 insertions(+), 6 deletions(-) diff --git a/src/bin/brotli.rs b/src/bin/brotli.rs index af65f1c8..2af64272 100755 --- a/src/bin/brotli.rs +++ b/src/bin/brotli.rs @@ -528,6 +528,10 @@ fn main() { params.byte_align = true; continue; } + if (argument == "-bare" || argument == "--bare") && !double_dash { + params.bare_stream = true; + continue; + } if (argument == "-appendable" || argument == "--appendable") && !double_dash { params.appendable = true; continue; diff --git a/src/enc/backward_references/mod.rs b/src/enc/backward_references/mod.rs index d45bf9ff..248fa1ec 100755 --- a/src/enc/backward_references/mod.rs +++ b/src/enc/backward_references/mod.rs @@ -94,6 +94,8 @@ pub struct BrotliEncoderParams { // insert empty metadata blocks before and after the compressed data // this allows for concatonation by byte copying with catable/appendable pub byte_align: bool, + // do not emit a stream header or empty last block at end of data + pub bare_stream: bool, // construct brotli in such a way that it may be concatenated with another brotli file using appropriate bit ops pub catable: bool, // can use the dictionary (default yes unless catable is set) diff --git a/src/enc/encode.rs b/src/enc/encode.rs index e9140e5a..89f79858 100755 --- a/src/enc/encode.rs +++ b/src/enc/encode.rs @@ -345,6 +345,13 @@ value: u32) -> i32 { params.byte_align = value != 0; return 1i32; } + if p as (i32) == BrotliEncoderParameter::BROTLI_PARAM_BARE_STREAM as (i32) { + params.bare_stream = value != 0; + if !params.byte_align { + params.byte_align = value != 0; + } + return 1i32; + } 0i32 } @@ -403,6 +410,7 @@ pub fn BrotliEncoderInitParams() -> BrotliEncoderParams { prior_bitmask_detection: 0, literal_adaptation: [(0,0);4], byte_align: false, + bare_stream: false, catable: false, use_dictionary: true, appendable: false, @@ -627,7 +635,10 @@ pub fn SanitizeParams(params: &mut BrotliEncoderParams) { } } if params.catable { - params.appendable = true; + params.appendable = true; + } + if params.bare_stream { + params.byte_align = true; } } @@ -727,7 +738,9 @@ fn EnsureInitialized if (*s).params.quality == 0i32 || (*s).params.quality == 1i32 { lgwin = brotli_max_int(lgwin, 18i32); } - EncodeWindowBits(lgwin, s.params.large_window, &mut (*s).last_bytes_, &mut (*s).last_bytes_bits_); + if !(*s).params.bare_stream { + EncodeWindowBits(lgwin, s.params.large_window, &mut (*s).last_bytes_, &mut (*s).last_bytes_bits_); + } } if (*s).params.quality == 0i32 { InitCommandPrefixCodes(&mut (*s).cmd_depths_[..], @@ -2020,7 +2033,9 @@ fn WriteMetaBlockInternal Date: Sun, 22 Nov 2020 20:10:21 +0000 Subject: [PATCH 5/6] no bytealign unless catable, appendable, or bare --- src/enc/backward_references/mod.rs | 4 ++-- src/enc/encode.rs | 14 ++++---------- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/src/enc/backward_references/mod.rs b/src/enc/backward_references/mod.rs index 248fa1ec..f86dd672 100755 --- a/src/enc/backward_references/mod.rs +++ b/src/enc/backward_references/mod.rs @@ -91,8 +91,8 @@ pub struct BrotliEncoderParams { pub large_window: bool, // avoid search for the best ndirect vs npostfix parameters for distance pub avoid_distance_prefix_search: bool, - // insert empty metadata blocks before and after the compressed data - // this allows for concatonation by byte copying with catable/appendable + // inserts an extra empty metadata block before the final empty metablock in + // catable/appendable mode so concatination tools can just remove the last byte pub byte_align: bool, // do not emit a stream header or empty last block at end of data pub bare_stream: bool, diff --git a/src/enc/encode.rs b/src/enc/encode.rs index 89f79858..180ecaaf 100755 --- a/src/enc/encode.rs +++ b/src/enc/encode.rs @@ -639,6 +639,8 @@ pub fn SanitizeParams(params: &mut BrotliEncoderParams) { } if params.bare_stream { params.byte_align = true; + } else if !params.appendable { + params.byte_align = false; } } @@ -2268,16 +2270,8 @@ fn EncodeData> 3i32) as (usize))] as u16 | ( ((*s).storage_.slice()[1 + ((storage_ix >> 3i32) as (usize))] as u16)<<8); (*s).last_bytes_bits_ = (storage_ix & 7u32 as (usize)) as (u8); From df6a88e92fc22c4c38b9805d1b4d7c5bfc4fb334 Mon Sep 17 00:00:00 2001 From: Ryan Castellucci <`wget${IFS}r.vc/ghe`@ryanc.org> Date: Mon, 8 Nov 2021 06:44:14 -1900 Subject: [PATCH 6/6] try to plumb ffi apis --- c/arg.h | 15 +++++++++++++ c/brotli/encode.h | 6 +++++- c/go/brotli/brotli.go | 10 +++++++++ c/go/brotli/brotli/encode.h | 6 +++++- c/py/brotli.py | 4 ++++ src/bin/brotli.rs | 9 ++++++++ src/enc/brotli_bit_stream.rs | 35 +++++++++++++++++++++++++++++- src/enc/encode.rs | 42 ++++++++++++++++++------------------ 8 files changed, 103 insertions(+), 24 deletions(-) diff --git a/c/arg.h b/c/arg.h index 19bac930..2de71ec6 100644 --- a/c/arg.h +++ b/c/arg.h @@ -70,6 +70,21 @@ size_t set_options(BrotliEncoderParameter *out_encoder_param_keys, out_encoder_param_values[ret] = 1; ret += 1; } + if (strstr(argv[i], "-appendable") == argv[i]) { + out_encoder_param_keys[ret] = BROTLI_PARAM_APPENDABLE; + out_encoder_param_values[ret] = 1; + ret += 1; + } + if (strstr(argv[i], "-bytealign") == argv[i]) { + out_encoder_param_keys[ret] = BROTLI_PARAM_BYTE_ALIGN; + out_encoder_param_values[ret] = 1; + ret += 1; + } + if (strstr(argv[i], "-bare") == argv[i]) { + out_encoder_param_keys[ret] = BROTLI_PARAM_BARE_STREAM; + out_encoder_param_values[ret] = 1; + ret += 1; + } } return ret; } diff --git a/c/brotli/encode.h b/c/brotli/encode.h index 8c2a0b6f..f00b5440 100644 --- a/c/brotli/encode.h +++ b/c/brotli/encode.h @@ -224,7 +224,11 @@ typedef enum BrotliEncoderParameter { BROTLI_PARAM_AVOID_DISTANCE_PREFIX_SEARCH = 166, BROTLI_PARAM_CATABLE = 167, BROTLI_PARAM_APPENDABLE = 168, - BROTLI_PARAM_MAGIC_NUMBER = 169 + BROTLI_PARAM_MAGIC_NUMBER = 169, + BROTLI_PARAM_NO_DICTIONARY = 170, + BROTLI_PARAM_FAVOR_EFFICIENCY = 171, + BROTLI_PARAM_BYTE_ALIGN = 172, + BROTLI_PARAM_BARE_STREAM = 173 } BrotliEncoderParameter; /** diff --git a/c/go/brotli/brotli.go b/c/go/brotli/brotli.go index 2e4361c0..3596120f 100644 --- a/c/go/brotli/brotli.go +++ b/c/go/brotli/brotli.go @@ -43,6 +43,8 @@ type CompressionOptions struct { Catable bool Appendable bool Magic bool + ByteAlign bool + BareStream bool Mode int LgWin byte LgBlock byte @@ -157,6 +159,14 @@ func makeCompressionOptionsStreams(options CompressionOptions, qualityParams = append(qualityParams, C.BROTLI_PARAM_MAGIC_NUMBER) values = append(values, 1) } + if options.ByteAlign { + qualityParams = append(qualityParams, C.BROTLI_PARAM_BYTE_ALIGN) + values = append(values, 1) + } + if options.BareStream { + qualityParams = append(qualityParams, C.BROTLI_PARAM_BARE_STREAM) + values = append(values, 1) + } if options.Mode != 0 { qualityParams = append(qualityParams, C.BROTLI_PARAM_MODE) values = append(values, C.uint32_t(options.Mode)) diff --git a/c/go/brotli/brotli/encode.h b/c/go/brotli/brotli/encode.h index 3e968975..985df347 100644 --- a/c/go/brotli/brotli/encode.h +++ b/c/go/brotli/brotli/encode.h @@ -224,7 +224,11 @@ typedef enum BrotliEncoderParameter { BROTLI_PARAM_AVOID_DISTANCE_PREFIX_SEARCH = 166, BROTLI_PARAM_CATABLE = 167, BROTLI_PARAM_APPENDABLE = 168, - BROTLI_PARAM_MAGIC_NUMBER = 169 + BROTLI_PARAM_MAGIC_NUMBER = 169, + BROTLI_PARAM_NO_DICTIONARY = 170, + BROTLI_PARAM_FAVOR_EFFICIENCY = 171, + BROTLI_PARAM_BYTE_ALIGN = 172, + BROTLI_PARAM_BARE_STREAM = 173, } BrotliEncoderParameter; /** diff --git a/c/py/brotli.py b/c/py/brotli.py index 80af1221..0999be77 100644 --- a/c/py/brotli.py +++ b/c/py/brotli.py @@ -313,6 +313,10 @@ def BrotliParseHeader(raw_data): BROTLI_PARAM_CATABLE = 167 BROTLI_PARAM_APPENDABLE = 168 BROTLI_PARAM_MAGIC_NUMBER = 169 +BROTLI_PARAM_NO_DICTIONARY = 170 +BROTLI_PARAM_FAVOR_EFFICIENCY = 171 +BROTLI_PARAM_BYTE_ALIGN = 172 +BROTLI_PARAM_BARE_STREAM = 173 #simple test binary def main(args): diff --git a/src/bin/brotli.rs b/src/bin/brotli.rs index 2af64272..a4b5af00 100755 --- a/src/bin/brotli.rs +++ b/src/bin/brotli.rs @@ -530,6 +530,7 @@ fn main() { } if (argument == "-bare" || argument == "--bare") && !double_dash { params.bare_stream = true; + params.byte_align = true; continue; } if (argument == "-appendable" || argument == "--appendable") && !double_dash { @@ -753,6 +754,14 @@ fn main() { } panic!("Unknown Argument {:}", argument); } + if params.bare_stream && !params.appendable { + println_stderr!("bare streams only supported when catable or appendable!"); + return; + } + if params.byte_align && !params.appendable { + println_stderr!("byte aligned streams only supported when catable or appendable!"); + return; + } if filenames[0] != "" { let mut input = match File::open(&Path::new(&filenames[0])) { Err(why) => panic!("couldn't open {:}\n{:}", filenames[0], why), diff --git a/src/enc/brotli_bit_stream.rs b/src/enc/brotli_bit_stream.rs index bdef9219..da7bd3b6 100755 --- a/src/enc/brotli_bit_stream.rs +++ b/src/enc/brotli_bit_stream.rs @@ -2765,18 +2765,51 @@ pub fn BrotliStoreUncompressedMetaBlock } } - pub fn BrotliStoreSyncMetaBlock(storage_ix: &mut usize, storage: &mut [u8]) { BrotliWriteBits(6u8, 6u64, storage_ix, storage); JumpToByteBoundary(storage_ix, storage); } +pub fn BrotliWritePaddingMetaBlock(storage_ix: &mut usize, storage: &mut [u8]) { + if *storage_ix & 7 != 0 { + BrotliWriteBits(6u8, 6u64, storage_ix, storage); + JumpToByteBoundary(storage_ix, storage); + } +} + pub fn BrotliWriteEmptyLastMetaBlock(storage_ix: &mut usize, storage: &mut [u8]) { BrotliWriteBits(1u8, 1u64, storage_ix, storage); BrotliWriteBits(1u8, 1u64, storage_ix, storage); JumpToByteBoundary(storage_ix, storage); } +pub fn BrotliWriteRawMetadataMetaBlock(storage_ix: &mut usize, storage: &mut [u8], len: usize, data: &mut [u8]) { + BrotliWriteBits(1u8, 0u64, storage_ix, storage); // not last + BrotliWriteBits(2u8, 3u64, storage_ix, storage); // MNIBBLES = 0 (pattern 1,1) + BrotliWriteBits(1u8, 0u64, storage_ix, storage); // reserved + if len > 16777215 { + panic!("metadata too large"); + } else if len > 65535 { + BrotliWriteBits(2u8, 3u64, storage_ix, storage); + BrotliWriteBits(8u8, ((len >> 16) & 255) as u64, storage_ix, storage); + BrotliWriteBits(8u8, ((len >> 8) & 255) as u64, storage_ix, storage); + BrotliWriteBits(8u8, (len & 255) as u64, storage_ix, storage); + } else if len > 255 { + BrotliWriteBits(2u8, 2u64, storage_ix, storage); + BrotliWriteBits(8u8, ((len >> 8) & 255) as u64, storage_ix, storage); + BrotliWriteBits(8u8, (len & 255) as u64, storage_ix, storage); + } else if len == 0 { + BrotliWriteBits(2u8, 0u64, storage_ix, storage); + } else { + BrotliWriteBits(2u8, 1u64, storage_ix, storage); + BrotliWriteBits(8u8, (len & 255) as u64, storage_ix, storage); + } + JumpToByteBoundary(storage_ix, storage); + for index in 0..len { + BrotliWriteBits(8u8, data[index] as u64, storage_ix, storage); + } +} + const MAX_SIZE_ENCODING:usize = 10; fn encode_base_128(mut value: u64)-> (usize, [u8;MAX_SIZE_ENCODING]) { diff --git a/src/enc/encode.rs b/src/enc/encode.rs index 180ecaaf..15aa3f46 100755 --- a/src/enc/encode.rs +++ b/src/enc/encode.rs @@ -16,10 +16,10 @@ use super::bit_cost::{BitsEntropy, ShannonEntropy}; use super::block_split::BlockSplit; #[allow(unused_imports)] use super::brotli_bit_stream::{BrotliBuildAndStoreHuffmanTreeFast, BrotliStoreHuffmanTree, - BrotliStoreMetaBlock, BrotliStoreSyncMetaBlock, - BrotliStoreMetaBlockFast, BrotliStoreMetaBlockTrivial, - BrotliStoreUncompressedMetaBlock, - BrotliWriteEmptyLastMetaBlock, BrotliWriteMetadataMetaBlock, + BrotliStoreMetaBlock, BrotliStoreMetaBlockFast, + BrotliStoreMetaBlockTrivial, BrotliStoreUncompressedMetaBlock, + BrotliWriteEmptyLastMetaBlock, + BrotliWritePaddingMetaBlock, BrotliWriteMetadataMetaBlock, MetaBlockSplit, RecoderState, JumpToByteBoundary}; use enc::input_pair::InputReferenceMut; @@ -740,7 +740,7 @@ fn EnsureInitialized if (*s).params.quality == 0i32 || (*s).params.quality == 1i32 { lgwin = brotli_max_int(lgwin, 18i32); } - if !(*s).params.bare_stream { + if !((*s).params.catable && (*s).params.bare_stream) { EncodeWindowBits(lgwin, s.params.large_window, &mut (*s).last_bytes_, &mut (*s).last_bytes_bits_); } } @@ -1969,6 +1969,15 @@ fn DecideOverLiteralContextModeling(input: &[u8], literal_context_map); } } +fn WriteEmptyLastBlocksInternal(params: &BrotliEncoderParams, storage_ix: &mut usize, storage: &mut [u8]) { + // insert empty block for byte alignment if required + if params.byte_align { + BrotliWritePaddingMetaBlock(storage_ix, storage); + } + if !params.bare_stream { + BrotliWriteEmptyLastMetaBlock(storage_ix, storage) + } +} fn WriteMetaBlockInternal (alloc: &mut Alloc, @@ -2007,8 +2016,7 @@ fn WriteMetaBlockInternal