From edca2b4537b6be64b9598a0779e3c407e49cb75c Mon Sep 17 00:00:00 2001 From: MCOfficer Date: Thu, 4 Apr 2024 15:02:15 +0200 Subject: [PATCH 1/4] Commit generated protobuf code, only regenerate on-demand --- bitar/build.rs | 16 ++- bitar/proto/chunk_dictionary.proto | 6 ++ bitar/src/chunk_dictionary.rs | 156 +++++++++++++++++++++++++++++ bitar/src/lib.rs | 6 +- 4 files changed, 178 insertions(+), 6 deletions(-) create mode 100644 bitar/src/chunk_dictionary.rs diff --git a/bitar/build.rs b/bitar/build.rs index f33c1f4..abbe32f 100644 --- a/bitar/build.rs +++ b/bitar/build.rs @@ -1,3 +1,17 @@ fn main() { - prost_build::compile_protos(&["proto/chunk_dictionary.proto"], &["proto/"]).unwrap(); + println!("cargo:rerun-if-changed=proto/"); // Note: change this to cargo:: (double colon) once MSRV>=1.77 + + let output_file = std::path::PathBuf::from("src/chunk_dictionary.rs"); + let generated_file = + std::path::PathBuf::from(std::env::var("OUT_DIR").unwrap()).join("chunk_dictionary.rs"); + + if !output_file.exists() { + prost_build::compile_protos(&["proto/chunk_dictionary.proto"], &["proto/"]).unwrap(); + + let content = format!( + "// THIS FILE IS AUTOMATICALLY GENERATED\n// EDIT ../proto/chunk_dictionary.proto INSTEAD\n\n{}", + std::fs::read_to_string(generated_file).unwrap() + ); + std::fs::write(output_file, content).unwrap(); + } } diff --git a/bitar/proto/chunk_dictionary.proto b/bitar/proto/chunk_dictionary.proto index 0561bfe..a8e048b 100644 --- a/bitar/proto/chunk_dictionary.proto +++ b/bitar/proto/chunk_dictionary.proto @@ -1,3 +1,9 @@ +/* + Note: When editing this file, delete `src/chunk_dictionary.rs` + and run `cargo build` to have the buildscript regenerate it. + `protoc` needs to be in your PATH. + */ + syntax = "proto3"; package chunk_dictionary; diff --git a/bitar/src/chunk_dictionary.rs b/bitar/src/chunk_dictionary.rs new file mode 100644 index 0000000..bb86ef7 --- /dev/null +++ b/bitar/src/chunk_dictionary.rs @@ -0,0 +1,156 @@ +// THIS FILE IS AUTOMATICALLY GENERATED +// EDIT ../proto/chunk_dictionary.proto INSTEAD + +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ChunkDescriptor { + /// Hash of (uncompressed) chunk + #[prost(bytes = "vec", tag = "1")] + pub checksum: ::prost::alloc::vec::Vec, + /// Chunk data placement in archive. + /// If the archive_size = source_size then the chunk is uncompresed. + #[prost(uint32, tag = "3")] + pub archive_size: u32, + #[prost(uint64, tag = "4")] + pub archive_offset: u64, + /// Size of uncompressed chunk data + #[prost(uint32, tag = "5")] + pub source_size: u32, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ChunkerParameters { + #[prost(uint32, tag = "1")] + pub chunk_filter_bits: u32, + #[prost(uint32, tag = "2")] + pub min_chunk_size: u32, + /// max_chunk_size is also the fixed chunk size when FIXED_SIZE is set + #[prost(uint32, tag = "3")] + pub max_chunk_size: u32, + #[prost(uint32, tag = "4")] + pub rolling_hash_window_size: u32, + #[prost(uint32, tag = "5")] + pub chunk_hash_length: u32, + #[prost(enumeration = "chunker_parameters::ChunkingAlgorithm", tag = "6")] + pub chunking_algorithm: i32, +} +/// Nested message and enum types in `ChunkerParameters`. +pub mod chunker_parameters { + #[derive( + Clone, + Copy, + Debug, + PartialEq, + Eq, + Hash, + PartialOrd, + Ord, + ::prost::Enumeration + )] + #[repr(i32)] + pub enum ChunkingAlgorithm { + Buzhash = 0, + Rollsum = 1, + FixedSize = 2, + } + impl ChunkingAlgorithm { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + ChunkingAlgorithm::Buzhash => "BUZHASH", + ChunkingAlgorithm::Rollsum => "ROLLSUM", + ChunkingAlgorithm::FixedSize => "FIXED_SIZE", + } + } + /// Creates an enum from field names used in the ProtoBuf definition. + pub fn from_str_name(value: &str) -> ::core::option::Option { + match value { + "BUZHASH" => Some(Self::Buzhash), + "ROLLSUM" => Some(Self::Rollsum), + "FIXED_SIZE" => Some(Self::FixedSize), + _ => None, + } + } + } +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ChunkCompression { + #[prost(enumeration = "chunk_compression::CompressionType", tag = "2")] + pub compression: i32, + #[prost(uint32, tag = "3")] + pub compression_level: u32, +} +/// Nested message and enum types in `ChunkCompression`. +pub mod chunk_compression { + #[derive( + Clone, + Copy, + Debug, + PartialEq, + Eq, + Hash, + PartialOrd, + Ord, + ::prost::Enumeration + )] + #[repr(i32)] + pub enum CompressionType { + None = 0, + Lzma = 1, + Zstd = 2, + Brotli = 3, + } + impl CompressionType { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + CompressionType::None => "NONE", + CompressionType::Lzma => "LZMA", + CompressionType::Zstd => "ZSTD", + CompressionType::Brotli => "BROTLI", + } + } + /// Creates an enum from field names used in the ProtoBuf definition. + pub fn from_str_name(value: &str) -> ::core::option::Option { + match value { + "NONE" => Some(Self::None), + "LZMA" => Some(Self::Lzma), + "ZSTD" => Some(Self::Zstd), + "BROTLI" => Some(Self::Brotli), + _ => None, + } + } + } +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ChunkDictionary { + /// Dictionary was created with this version + #[prost(string, tag = "1")] + pub application_version: ::prost::alloc::string::String, + /// Hash of the source file + #[prost(bytes = "vec", tag = "2")] + pub source_checksum: ::prost::alloc::vec::Vec, + /// Total size of the source file + #[prost(uint64, tag = "3")] + pub source_total_size: u64, + /// Chunker parameters used when building archive + #[prost(message, optional, tag = "4")] + pub chunker_params: ::core::option::Option, + /// Chunk compression used for all chunks in archive + #[prost(message, optional, tag = "5")] + pub chunk_compression: ::core::option::Option, + /// Array of chunk descriptor indexes describing howto rebuild the source + #[prost(uint32, repeated, tag = "6")] + pub rebuild_order: ::prost::alloc::vec::Vec, + /// Chunk descriptors in order of first occurence in source file + #[prost(message, repeated, tag = "7")] + pub chunk_descriptors: ::prost::alloc::vec::Vec, +} diff --git a/bitar/src/lib.rs b/bitar/src/lib.rs index 1419dfb..6e9d993 100644 --- a/bitar/src/lib.rs +++ b/bitar/src/lib.rs @@ -11,6 +11,7 @@ mod rolling_hash; pub mod api; pub mod archive_reader; +pub mod chunk_dictionary; pub mod chunker; pub mod header; @@ -26,8 +27,3 @@ pub use compression::{ Compression, CompressionAlgorithm, CompressionError, CompressionLevelOutOfRangeError, }; pub use hashsum::HashSum; - -pub mod chunk_dictionary { - #![allow(clippy::derive_partial_eq_without_eq)] - include!(concat!(env!("OUT_DIR"), "/chunk_dictionary.rs")); -} From 0005d255073e9d2f6fd7abc04b54daf3e162dfc9 Mon Sep 17 00:00:00 2001 From: MCOfficer Date: Thu, 4 Apr 2024 15:26:10 +0200 Subject: [PATCH 2/4] rustfmt: skip generated code --- bitar/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/bitar/src/lib.rs b/bitar/src/lib.rs index 6e9d993..dbb94a0 100644 --- a/bitar/src/lib.rs +++ b/bitar/src/lib.rs @@ -11,6 +11,7 @@ mod rolling_hash; pub mod api; pub mod archive_reader; +#[rustfmt::skip] pub mod chunk_dictionary; pub mod chunker; pub mod header; From 30fa7896624a6898048300129b292830f1a6825c Mon Sep 17 00:00:00 2001 From: MCOfficer Date: Thu, 4 Apr 2024 23:06:56 +0200 Subject: [PATCH 3/4] remove protoc from CI --- .github/workflows/cd.yml | 4 ---- .github/workflows/rust.yml | 15 --------------- 2 files changed, 19 deletions(-) diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index 904380f..91da3a8 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -41,10 +41,6 @@ jobs: with: toolchain: stable targets: ${{ matrix.target }} - - name: Install protoc - uses: arduino/setup-protoc@v1 - with: - repo-token: ${{ secrets.GITHUB_TOKEN }} - name: Enable static CRT linkage (Windows) if: ${{ matrix.os == 'windows-latest' }} run: | diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 9f65b4c..fb30199 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -20,11 +20,6 @@ jobs: with: toolchain: ${{ matrix.rust }} - - name: Install protoc - uses: arduino/setup-protoc@v1 - with: - repo-token: ${{ secrets.GITHUB_TOKEN }} - - name: build run: cargo build -p bitar --verbose @@ -52,11 +47,6 @@ jobs: with: toolchain: ${{ matrix.rust }} - - name: Install protoc - uses: arduino/setup-protoc@v1 - with: - repo-token: ${{ secrets.GITHUB_TOKEN }} - - name: build run: cargo build @@ -123,10 +113,5 @@ jobs: toolchain: ${{ matrix.rust }} components: clippy - - name: Install protoc - uses: arduino/setup-protoc@v1 - with: - repo-token: ${{ secrets.GITHUB_TOKEN }} - - name: Check lints run: cargo clippy --all -- -D warnings -A clippy::cognitive-complexity From ab50757454c62d3418696be792fe738619ae461a Mon Sep 17 00:00:00 2001 From: MCOfficer Date: Fri, 5 Apr 2024 10:35:37 +0200 Subject: [PATCH 4/4] remove cargo:rerun-if-changed --- bitar/build.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/bitar/build.rs b/bitar/build.rs index abbe32f..41d0fe2 100644 --- a/bitar/build.rs +++ b/bitar/build.rs @@ -1,6 +1,4 @@ fn main() { - println!("cargo:rerun-if-changed=proto/"); // Note: change this to cargo:: (double colon) once MSRV>=1.77 - let output_file = std::path::PathBuf::from("src/chunk_dictionary.rs"); let generated_file = std::path::PathBuf::from(std::env::var("OUT_DIR").unwrap()).join("chunk_dictionary.rs");