diff --git a/.gitignore b/.gitignore index ae541b4..8bb4a56 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ target Cargo.lock *.bk .vim +/.idea/ fuzz.xlsx .idea nyc.rs diff --git a/Cargo.toml b/Cargo.toml index 4c753af..a52f85b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "calamine" -version = "0.24.0" +version = "0.25.0" authors = ["Johann Tuffe "] repository = "https://github.com/tafia/calamine" documentation = "https://docs.rs/calamine" diff --git a/src/lib.rs b/src/lib.rs index 301e8e7..96dc560 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -131,13 +131,25 @@ impl fmt::Display for CellErrorType { } } -#[derive(Debug, PartialEq, Default, Clone, Copy)] -pub(crate) struct Dimensions { +/// Dimensions info +#[derive(Debug, Default, PartialEq, Eq, Hash, Ord, PartialOrd, Copy, Clone)] +pub struct Dimensions { + /// start: (row, col) pub start: (u32, u32), + /// end: (row, col) pub end: (u32, u32), } impl Dimensions { + /// create dimensions info with start position and end position + pub fn new(start: (u32, u32), end: (u32, u32)) -> Self { + Self { start, end } + } + /// check if a position is in it + pub fn contains(&self, row: u32, col: u32) -> bool { + row >= self.start.0 && row <= self.end.0 && col >= self.start.1 && col <= self.end.1 + } + /// len pub fn len(&self) -> u64 { (self.end.0 - self.start.0 + 1) as u64 * (self.end.1 - self.start.1 + 1) as u64 } diff --git a/src/xls.rs b/src/xls.rs index 9ee6caa..7b4b7bb 100644 --- a/src/xls.rs +++ b/src/xls.rs @@ -17,7 +17,9 @@ use crate::formats::{ use crate::utils::read_usize; use crate::utils::{push_column, read_f64, read_i16, read_i32, read_u16, read_u32}; use crate::vba::VbaProject; -use crate::{Cell, CellErrorType, Data, Metadata, Range, Reader, Sheet, SheetType, SheetVisible}; +use crate::{ + Cell, CellErrorType, Data, Dimensions, Metadata, Range, Reader, Sheet, SheetType, SheetVisible, +}; #[derive(Debug)] /// An enum to handle Xls specific errors @@ -775,11 +777,6 @@ fn parse_label_sst(r: &[u8], strings: &[String]) -> Result>, X Ok(None) } -struct Dimensions { - start: (u32, u32), - end: (u32, u32), -} - fn parse_dimensions(r: &[u8]) -> Result { let (rf, rl, cf, cl) = match r.len() { 10 => ( diff --git a/src/xlsx/mod.rs b/src/xlsx/mod.rs index 94a9419..2aba53e 100644 --- a/src/xlsx/mod.rs +++ b/src/xlsx/mod.rs @@ -192,6 +192,8 @@ pub struct Xlsx { /// Pictures #[cfg(feature = "picture")] pictures: Option)>>, + /// Merged Regions: Name, Sheet, Merged Dimensions + merged_regions: Option>, } impl Xlsx { @@ -640,6 +642,67 @@ impl Xlsx { Ok(()) } + // sheets must be added before this is called!! + fn read_merged_regions(&mut self) -> Result<(), XlsxError> { + let mut regions = Vec::new(); + for (sheet_name, sheet_path) in &self.sheets { + // we need another mutable borrow of self.zip later so we enclose this borrow within braces + { + let mut xml = match xml_reader(&mut self.zip, &sheet_path) { + None => continue, + Some(x) => x?, + }; + let mut buf = Vec::new(); + loop { + buf.clear(); + match xml.read_event_into(&mut buf) { + Ok(Event::Start(ref e)) if e.local_name() == QName(b"mergeCell").into() => { + if let Some(attr) = get_attribute(e.attributes(), QName(b"ref").into())? + { + let dismension = get_dimension(attr)?; + regions.push(( + sheet_name.to_string(), + sheet_path.to_string(), + dismension, + )); + } + } + Ok(Event::Eof) => break, + Err(e) => return Err(XlsxError::Xml(e)), + _ => (), + } + } + } + } + self.merged_regions = Some(regions); + Ok(()) + } + + /// Load the merged regions + pub fn load_merged_regions(&mut self) -> Result<(), XlsxError> { + if self.merged_regions.is_none() { + self.read_merged_regions() + } else { + Ok(()) + } + } + + /// Get the merged regions of all the sheets + pub fn merged_regions(&self) -> &Vec<(String, String, Dimensions)> { + self.merged_regions + .as_ref() + .expect("Merged Regions must be loaded before the are referenced") + } + + /// Get the merged regions by sheet name + pub fn merged_regions_by_sheet(&self, name: &str) -> Vec<(&String, &String, &Dimensions)> { + self.merged_regions() + .iter() + .filter(|s| (**s).0 == name) + .map(|(name, sheet, region)| (name, sheet, region)) + .collect() + } + /// Load the tables from pub fn load_tables(&mut self) -> Result<(), XlsxError> { if self.tables.is_none() { @@ -780,6 +843,7 @@ impl Reader for Xlsx { metadata: Metadata::default(), #[cfg(feature = "picture")] pictures: None, + merged_regions: None, }; xlsx.read_shared_strings()?; xlsx.read_styles()?; diff --git a/tests/merged_range.xls b/tests/merged_range.xls new file mode 100644 index 0000000..043cee0 Binary files /dev/null and b/tests/merged_range.xls differ diff --git a/tests/merged_range.xlsx b/tests/merged_range.xlsx new file mode 100644 index 0000000..24e467a Binary files /dev/null and b/tests/merged_range.xlsx differ diff --git a/tests/test.rs b/tests/test.rs index 5252def..4e09ff4 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -4,6 +4,7 @@ use calamine::{ Sheet, SheetType, SheetVisible, Xls, Xlsb, Xlsx, }; use calamine::{CellErrorType::*, Data}; +use std::collections::BTreeSet; use std::io::Cursor; use std::sync::Once; @@ -1088,6 +1089,198 @@ fn issue_221() { ); } +#[test] +fn merged_regions_xlsx() { + use calamine::Dimensions; + use std::string::String; + let path = format!("{}/tests/merged_range.xlsx", env!("CARGO_MANIFEST_DIR")); + let mut excel: Xlsx<_> = open_workbook(&path).unwrap(); + excel.load_merged_regions().unwrap(); + assert_eq!( + excel + .merged_regions() + .iter() + .map(|(o1, o2, o3)| (o1.to_string(), o2.to_string(), o3.clone())) + .collect::>(), + vec![ + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((0, 0), (1, 0)) + ), // A1:A2 + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((0, 1), (1, 1)) + ), // B1:B2 + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((0, 2), (1, 3)) + ), // C1:D2 + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((2, 2), (2, 3)) + ), // C3:D3 + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((3, 2), (3, 3)) + ), // C4:D4 + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((0, 4), (1, 4)) + ), // E1:E2 + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((0, 5), (1, 5)) + ), // F1:F2 + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((0, 6), (1, 6)) + ), // G1:G2 + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((0, 7), (1, 7)) + ), // H1:H2 + ( + "Sheet2".to_string(), + "xl/worksheets/sheet2.xml".to_string(), + Dimensions::new((0, 0), (3, 0)) + ), // A1:A4 + ( + "Sheet2".to_string(), + "xl/worksheets/sheet2.xml".to_string(), + Dimensions::new((0, 1), (1, 1)) + ), // B1:B2 + ( + "Sheet2".to_string(), + "xl/worksheets/sheet2.xml".to_string(), + Dimensions::new((0, 2), (1, 3)) + ), // C1:D2 + ( + "Sheet2".to_string(), + "xl/worksheets/sheet2.xml".to_string(), + Dimensions::new((2, 2), (3, 3)) + ), // C3:D4 + ( + "Sheet2".to_string(), + "xl/worksheets/sheet2.xml".to_string(), + Dimensions::new((0, 4), (1, 4)) + ), // E1:E2 + ( + "Sheet2".to_string(), + "xl/worksheets/sheet2.xml".to_string(), + Dimensions::new((0, 5), (3, 7)) + ), // F1:H4 + ] + .into_iter() + .collect::>(), + ); + assert_eq!( + excel + .merged_regions_by_sheet("Sheet1") + .iter() + .map(|&(o1, o2, o3)| (o1.to_string(), o2.to_string(), o3.clone())) + .collect::>(), + vec![ + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((0, 0), (1, 0)) + ), // A1:A2 + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((0, 1), (1, 1)) + ), // B1:B2 + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((0, 2), (1, 3)) + ), // C1:D2 + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((2, 2), (2, 3)) + ), // C3:D3 + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((3, 2), (3, 3)) + ), // C4:D4 + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((0, 4), (1, 4)) + ), // E1:E2 + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((0, 5), (1, 5)) + ), // F1:F2 + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((0, 6), (1, 6)) + ), // G1:G2 + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((0, 7), (1, 7)) + ), // H1:H2 + ] + .into_iter() + .collect::>(), + ); + assert_eq!( + excel + .merged_regions_by_sheet("Sheet2") + .iter() + .map(|&(o1, o2, o3)| (o1.to_string(), o2.to_string(), o3.clone())) + .collect::>(), + vec![ + ( + "Sheet2".to_string(), + "xl/worksheets/sheet2.xml".to_string(), + Dimensions::new((0, 0), (3, 0)) + ), // A1:A4 + ( + "Sheet2".to_string(), + "xl/worksheets/sheet2.xml".to_string(), + Dimensions::new((0, 1), (1, 1)) + ), // B1:B2 + ( + "Sheet2".to_string(), + "xl/worksheets/sheet2.xml".to_string(), + Dimensions::new((0, 2), (1, 3)) + ), // C1:D2 + ( + "Sheet2".to_string(), + "xl/worksheets/sheet2.xml".to_string(), + Dimensions::new((2, 2), (3, 3)) + ), // C3:D4 + ( + "Sheet2".to_string(), + "xl/worksheets/sheet2.xml".to_string(), + Dimensions::new((0, 4), (1, 4)) + ), // E1:E2 + ( + "Sheet2".to_string(), + "xl/worksheets/sheet2.xml".to_string(), + Dimensions::new((0, 5), (3, 7)) + ), // F1:H4 + ] + .into_iter() + .collect::>(), + ); +} + #[test] fn issue_252() { setup();