From 4777016c5a826c310b19a8a2b6f9aef103ef8ba8 Mon Sep 17 00:00:00 2001 From: Egor Larionov Date: Sat, 1 Oct 2022 23:21:01 -0700 Subject: [PATCH] Implement compression support in DataArrays Each inline (non-appended) binary DataArray now supports compression in the same way appended data did before. This commit concludes fixes for all failing meshes from issue #21. The remaining pygmsh tests are now enabled. --- src/xml.rs | 345 +++++++++++++++++++++++++++--------------------- tests/pygmsh.rs | 2 - 2 files changed, 192 insertions(+), 155 deletions(-) diff --git a/src/xml.rs b/src/xml.rs index 8cad64f..09c8726 100644 --- a/src/xml.rs +++ b/src/xml.rs @@ -1921,6 +1921,49 @@ impl DataArray { DataArray { num_comp, ..self } } + /// Helper to extract possibly compressed binary data from a `String` in `IOBuffer` format. + fn extract_data( + ei: EncodingInfo, + scalar_type: ScalarType, + data: Data, + ) -> std::result::Result { + use model::IOBuffer; + + let header_bytes = ei.header_type.size(); + // Binary data in a data array (i.e. not in appended data) is always base64 encoded. + // It can also be compressed. + if matches!(ei.compressor, Compressor::None) { + // First byte gives the bytes + let bytes = base64::decode(data.into_string())?; + // eprintln!("{:?}", &bytes[..header_bytes]); + return Ok(IOBuffer::from_bytes( + &bytes[header_bytes..], + scalar_type.into(), + ei.byte_order, + )?); + } + + // Temporary buffer used for decoding compressed types. + let mut buf = Vec::new(); + + let data_string = data.into_string(); + let encoded_data = data_string.as_bytes(); + let bytes = decode_and_decompress( + &mut buf, + base64_decode_buf, + to_b64, + encoded_data, + header_bytes, + ei, + )?; + + Ok(IOBuffer::from_bytes( + bytes.as_slice(), + scalar_type.into(), + ei.byte_order, + )?) + } + /// Convert this data array into a `model::FieldArray` type. /// /// The given arguments are the number of elements (not bytes) in the expected output @@ -1946,7 +1989,6 @@ impl DataArray { //eprintln!("name = {:?}", &name); let num_elements = usize::try_from(num_comp).unwrap() * l; - let header_bytes = ei.header_type.size(); let data = match format { DataArrayFormat::Appended => { @@ -1966,14 +2008,7 @@ impl DataArray { } } DataArrayFormat::Binary => { - // First byte gives the bytes - let bytes = base64::decode(data[0].clone().into_string())?; - // eprintln!("{:?}", &bytes[..header_bytes]); - let buf = IOBuffer::from_bytes( - &bytes[header_bytes..], - scalar_type.into(), - ei.byte_order, - )?; + let buf = Self::extract_data(ei, scalar_type, data.into_iter().next().unwrap())?; if buf.len() != num_elements { return Err(ValidationError::DataArraySizeMismatch { name, @@ -2209,6 +2244,151 @@ pub enum Encoding { Raw, } +/// Customized base64::decode function that accepts a buffer. +fn base64_decode_buf<'a>( + input: &[u8], + buf: &'a mut Vec, +) -> std::result::Result<&'a [u8], ValidationError> { + base64::decode_config_buf( + input, + base64::STANDARD.decode_allow_trailing_bits(true), + buf, + )?; + Ok(buf.as_slice()) +} + +/// Converts the number of target bytes to number of chars in base64 encoding. +fn to_b64(bytes: usize) -> usize { + 4 * (bytes as f64 / 3.0).ceil() as usize + //(bytes * 4 + 1) / 3 + match bytes % 3 { + // 1 => 2, 2 => 1, _ => 0 + //} +} + +// Helper function to read a single header number, which depends on the encoding parameters. +fn read_header_num>( + header_buf: &mut std::io::Cursor, + ei: EncodingInfo, +) -> std::result::Result { + use byteorder::ReadBytesExt; + use byteorder::{BE, LE}; + Ok(match ei.byte_order { + model::ByteOrder::LittleEndian => { + if ei.header_type == ScalarType::UInt64 { + header_buf.read_u64::()? as usize + } else { + header_buf.read_u32::()? as usize + } + } + model::ByteOrder::BigEndian => { + if ei.header_type == ScalarType::UInt64 { + header_buf.read_u64::()? as usize + } else { + header_buf.read_u32::()? as usize + } + } + }) +} + +/// Returns an allocated decompressed Vec of bytes. +// Allow this warning which are fired when compression is disabled. +#[allow(unused_variables)] +fn decode_and_decompress<'a, D, B>( + buf: &'a mut Vec, + mut decode: D, + mut to_b64: B, + data: &'a [u8], + header_bytes: usize, + ei: EncodingInfo, +) -> std::result::Result, ValidationError> +where + D: for<'b> FnMut(&'b [u8], &'b mut Vec) -> std::result::Result<&'b [u8], ValidationError>, + B: FnMut(usize) -> usize, +{ + use std::io::Cursor; + + // Compressed data has a more complex header. + // The data is organized as [nb][nu][np][nc_1]...[nc_nb][Data] + // Where + // [nb] = Number of blocks in the data array + // [nu] = Block size before compression + // [np] = Size of the last partial block before compression (zero if it is not needed) + // [nc_i] = Size in bytes of block i after compression + // See https://vtk.org/Wiki/VTK_XML_Formats for details. + // In this case we don't know how many bytes are in the data array so we must first read + // this information from a header. + + // First we need to determine the number of blocks stored. + let num_blocks = { + let encoded_header = &data[0..to_b64(header_bytes)]; + let decoded_header = decode(encoded_header, buf)?; + read_header_num(&mut Cursor::new(decoded_header), ei)? + }; + + let full_header_bytes = header_bytes * (3 + num_blocks); // nb + nu + np + sum_i nc_i + buf.clear(); + + let encoded_header = &data[0..to_b64(full_header_bytes)]; + let decoded_header = decode(encoded_header, buf)?; + let mut header_cursor = Cursor::new(decoded_header); + let _nb = read_header_num(&mut header_cursor, ei); // We already know the number of blocks + let _nu = read_header_num(&mut header_cursor, ei); + let _np = read_header_num(&mut header_cursor, ei); + let nc_total = (0..num_blocks).fold(0, |acc, _| { + acc + read_header_num(&mut header_cursor, ei).unwrap_or(0) + }); + let num_data_bytes = to_b64(nc_total); + let start = to_b64(full_header_bytes); + buf.clear(); + let encoded_data = &data[start..start + num_data_bytes]; + let decoded_data = decode(encoded_data, buf)?; + + // Now that the data is decoded, what is left is to decompress it. + match ei.compressor { + Compressor::ZLib => { + #[cfg(not(feature = "flate2"))] + { + return Err(ValidationError::MissingCompressionLibrary(ei.compressor)); + } + #[cfg(feature = "flate2")] + { + use std::io::Read; + let mut out = Vec::new(); + let mut decoder = flate2::read::ZlibDecoder::new(decoded_data); + decoder.read_to_end(&mut out)?; + Ok(out) + } + } + Compressor::LZ4 => { + #[cfg(not(feature = "lz4"))] + { + return Err(ValidationError::MissingCompressionLibrary(ei.compressor)); + } + #[cfg(feature = "lz4")] + { + Ok(lz4::decompress(decoded_data, num_data_bytes)?) + } + } + Compressor::LZMA => { + #[cfg(not(feature = "xz2"))] + { + return Err(ValidationError::MissingCompressionLibrary(ei.compressor)); + } + #[cfg(feature = "xz2")] + { + use std::io::Read; + let mut out = Vec::new(); + let mut decoder = xz2::read::XzDecoder::new(decoded_data); + decoder.read_to_end(&mut out)?; + Ok(out) + } + } + _ => { + unreachable!() + } + } +} + impl AppendedData { /// Extract the decompressed and unencoded raw bytes from appended data. /// @@ -2223,14 +2403,6 @@ impl AppendedData { scalar_type: ScalarType, ei: EncodingInfo, ) -> std::result::Result { - // Convert number of target bytes to number of chars in base64 encoding. - fn to_b64(bytes: usize) -> usize { - 4 * (bytes as f64 / 3.0).ceil() as usize - //(bytes * 4 + 1) / 3 + match bytes % 3 { - // 1 => 2, 2 => 1, _ => 0 - //} - } - let header_bytes = ei.header_type.size(); let expected_num_bytes = num_elements * scalar_type.size(); let mut start = offset; @@ -2275,136 +2447,10 @@ impl AppendedData { }; } - // Compressed data has a more complex header. - // The data is organized as [nb][nu][np][nc_1]...[nc_nb][Data] - // Where - // [nb] = Number of blocks in the data array - // [nu] = Block size before compression - // [np] = Size of the last partial block before compression (zero if it is not needed) - // [nc_i] = Size in bytes of block i after compression - // See https://vtk.org/Wiki/VTK_XML_Formats for details. - // In this case we dont know how many bytes are in the data array so we must first read - // this information from a header. - - // Helper function to read a single header number, which depends on the encoding parameters. - fn read_header_num>( - header_buf: &mut std::io::Cursor, - ei: EncodingInfo, - ) -> std::result::Result { - use byteorder::ReadBytesExt; - use byteorder::{BE, LE}; - Ok(match ei.byte_order { - model::ByteOrder::LittleEndian => { - if ei.header_type == ScalarType::UInt64 { - header_buf.read_u64::()? as usize - } else { - header_buf.read_u32::()? as usize - } - } - model::ByteOrder::BigEndian => { - if ei.header_type == ScalarType::UInt64 { - header_buf.read_u64::()? as usize - } else { - header_buf.read_u32::()? as usize - } - } - }) - } - - // Allow this warning which are fired when compression is disabled. - #[allow(unused_variables)] - fn get_data_slice<'a, D, B>( - buf: &'a mut Vec, - mut decode: D, - mut to_b64: B, - data: &'a [u8], - header_bytes: usize, - ei: EncodingInfo, - ) -> std::result::Result, ValidationError> - where - D: for<'b> FnMut( - &'b [u8], - &'b mut Vec, - ) -> std::result::Result<&'b [u8], ValidationError>, - B: FnMut(usize) -> usize, - { - use std::io::Cursor; - - // First we need to determine the number of blocks stored. - let num_blocks = { - let encoded_header = &data[0..to_b64(header_bytes)]; - let decoded_header = decode(encoded_header, buf)?; - read_header_num(&mut Cursor::new(decoded_header), ei)? - }; - - let full_header_bytes = header_bytes * (3 + num_blocks); // nb + nu + np + sum_i nc_i - buf.clear(); - - let encoded_header = &data[0..to_b64(full_header_bytes)]; - let decoded_header = decode(encoded_header, buf)?; - let mut header_cursor = Cursor::new(decoded_header); - let _nb = read_header_num(&mut header_cursor, ei); // We already know the number of blocks - let _nu = read_header_num(&mut header_cursor, ei); - let _np = read_header_num(&mut header_cursor, ei); - let nc_total = (0..num_blocks).fold(0, |acc, _| { - acc + read_header_num(&mut header_cursor, ei).unwrap_or(0) - }); - let num_data_bytes = to_b64(nc_total); - let start = to_b64(full_header_bytes); - buf.clear(); - let encoded_data = &data[start..start + num_data_bytes]; - let decoded_data = decode(encoded_data, buf)?; - - // Now that the data is decoded, what is left is to decompress it. - match ei.compressor { - Compressor::ZLib => { - #[cfg(not(feature = "flate2"))] - { - return Err(ValidationError::MissingCompressionLibrary(ei.compressor)); - } - #[cfg(feature = "flate2")] - { - use std::io::Read; - let mut out = Vec::new(); - let mut decoder = flate2::read::ZlibDecoder::new(decoded_data); - decoder.read_to_end(&mut out)?; - Ok(out) - } - } - Compressor::LZ4 => { - #[cfg(not(feature = "lz4"))] - { - return Err(ValidationError::MissingCompressionLibrary(ei.compressor)); - } - #[cfg(feature = "lz4")] - { - Ok(lz4::decompress(decoded_data, num_data_bytes)?) - } - } - Compressor::LZMA => { - #[cfg(not(feature = "xz2"))] - { - return Err(ValidationError::MissingCompressionLibrary(ei.compressor)); - } - #[cfg(feature = "xz2")] - { - use std::io::Read; - let mut out = Vec::new(); - let mut decoder = xz2::read::XzDecoder::new(decoded_data); - decoder.read_to_end(&mut out)?; - Ok(out) - } - } - _ => { - unreachable!() - } - } - } - let out = match self.encoding { Encoding::Raw => { let mut buf = Vec::new(); - get_data_slice( + decode_and_decompress( &mut buf, |header, _| Ok(header), |x| x, @@ -2415,16 +2461,9 @@ impl AppendedData { } Encoding::Base64 => { let mut buf = Vec::new(); - get_data_slice( + decode_and_decompress( &mut buf, - |header, buf| { - base64::decode_config_buf( - header, - base64::STANDARD.decode_allow_trailing_bits(true), - buf, - )?; - Ok(buf.as_slice()) - }, + base64_decode_buf, to_b64, &self.data.0[offset..], header_bytes, diff --git a/tests/pygmsh.rs b/tests/pygmsh.rs index 80be71b..2b810d5 100644 --- a/tests/pygmsh.rs +++ b/tests/pygmsh.rs @@ -246,7 +246,6 @@ fn xml_ascii() -> Result { } #[test] -#[ignore] #[cfg(feature = "xz2")] fn xml_lzma() -> Result { let mut vtu = Vtk::import("./assets/pygmsh/lzma.vtu")?; @@ -268,7 +267,6 @@ fn xml_no_compression() -> Result { } #[test] -#[ignore] #[cfg(feature = "flate2")] fn xml_zlib() -> Result { let mut vtu = Vtk::import("./assets/pygmsh/zlib.vtu")?;