Skip to content

Commit

Permalink
Merge pull request #405 from gy0801151351/merged_region
Browse files Browse the repository at this point in the history
Support to get merged region
  • Loading branch information
tafia authored Mar 8, 2024
2 parents 58c8ba2 + 5238efc commit 953d80e
Show file tree
Hide file tree
Showing 8 changed files with 276 additions and 9 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ target
Cargo.lock
*.bk
.vim
/.idea/
fuzz.xlsx
.idea
nyc.rs
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "calamine"
version = "0.24.0"
version = "0.25.0"
authors = ["Johann Tuffe <[email protected]>"]
repository = "https://github.com/tafia/calamine"
documentation = "https://docs.rs/calamine"
Expand Down
16 changes: 14 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,13 +131,25 @@ impl fmt::Display for CellErrorType {
}
}

#[derive(Debug, PartialEq, Default, Clone, Copy)]
pub(crate) struct Dimensions {
/// Dimensions info
#[derive(Debug, Default, PartialEq, Eq, Hash, Ord, PartialOrd, Copy, Clone)]
pub struct Dimensions {
/// start: (row, col)
pub start: (u32, u32),
/// end: (row, col)
pub end: (u32, u32),
}

impl Dimensions {
/// create dimensions info with start position and end position
pub fn new(start: (u32, u32), end: (u32, u32)) -> Self {
Self { start, end }
}
/// check if a position is in it
pub fn contains(&self, row: u32, col: u32) -> bool {
row >= self.start.0 && row <= self.end.0 && col >= self.start.1 && col <= self.end.1
}
/// len
pub fn len(&self) -> u64 {
(self.end.0 - self.start.0 + 1) as u64 * (self.end.1 - self.start.1 + 1) as u64
}
Expand Down
9 changes: 3 additions & 6 deletions src/xls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ use crate::formats::{
use crate::utils::read_usize;
use crate::utils::{push_column, read_f64, read_i16, read_i32, read_u16, read_u32};
use crate::vba::VbaProject;
use crate::{Cell, CellErrorType, Data, Metadata, Range, Reader, Sheet, SheetType, SheetVisible};
use crate::{
Cell, CellErrorType, Data, Dimensions, Metadata, Range, Reader, Sheet, SheetType, SheetVisible,
};

#[derive(Debug)]
/// An enum to handle Xls specific errors
Expand Down Expand Up @@ -775,11 +777,6 @@ fn parse_label_sst(r: &[u8], strings: &[String]) -> Result<Option<Cell<Data>>, X
Ok(None)
}

struct Dimensions {
start: (u32, u32),
end: (u32, u32),
}

fn parse_dimensions(r: &[u8]) -> Result<Dimensions, XlsError> {
let (rf, rl, cf, cl) = match r.len() {
10 => (
Expand Down
64 changes: 64 additions & 0 deletions src/xlsx/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,8 @@ pub struct Xlsx<RS> {
/// Pictures
#[cfg(feature = "picture")]
pictures: Option<Vec<(String, Vec<u8>)>>,
/// Merged Regions: Name, Sheet, Merged Dimensions
merged_regions: Option<Vec<(String, String, Dimensions)>>,
}

impl<RS: Read + Seek> Xlsx<RS> {
Expand Down Expand Up @@ -640,6 +642,67 @@ impl<RS: Read + Seek> Xlsx<RS> {
Ok(())
}

// sheets must be added before this is called!!
fn read_merged_regions(&mut self) -> Result<(), XlsxError> {
let mut regions = Vec::new();
for (sheet_name, sheet_path) in &self.sheets {
// we need another mutable borrow of self.zip later so we enclose this borrow within braces
{
let mut xml = match xml_reader(&mut self.zip, &sheet_path) {
None => continue,
Some(x) => x?,
};
let mut buf = Vec::new();
loop {
buf.clear();
match xml.read_event_into(&mut buf) {
Ok(Event::Start(ref e)) if e.local_name() == QName(b"mergeCell").into() => {
if let Some(attr) = get_attribute(e.attributes(), QName(b"ref").into())?
{
let dismension = get_dimension(attr)?;
regions.push((
sheet_name.to_string(),
sheet_path.to_string(),
dismension,
));
}
}
Ok(Event::Eof) => break,
Err(e) => return Err(XlsxError::Xml(e)),
_ => (),
}
}
}
}
self.merged_regions = Some(regions);
Ok(())
}

/// Load the merged regions
pub fn load_merged_regions(&mut self) -> Result<(), XlsxError> {
if self.merged_regions.is_none() {
self.read_merged_regions()
} else {
Ok(())
}
}

/// Get the merged regions of all the sheets
pub fn merged_regions(&self) -> &Vec<(String, String, Dimensions)> {
self.merged_regions
.as_ref()
.expect("Merged Regions must be loaded before the are referenced")
}

/// Get the merged regions by sheet name
pub fn merged_regions_by_sheet(&self, name: &str) -> Vec<(&String, &String, &Dimensions)> {
self.merged_regions()
.iter()
.filter(|s| (**s).0 == name)
.map(|(name, sheet, region)| (name, sheet, region))
.collect()
}

/// Load the tables from
pub fn load_tables(&mut self) -> Result<(), XlsxError> {
if self.tables.is_none() {
Expand Down Expand Up @@ -780,6 +843,7 @@ impl<RS: Read + Seek> Reader<RS> for Xlsx<RS> {
metadata: Metadata::default(),
#[cfg(feature = "picture")]
pictures: None,
merged_regions: None,
};
xlsx.read_shared_strings()?;
xlsx.read_styles()?;
Expand Down
Binary file added tests/merged_range.xls
Binary file not shown.
Binary file added tests/merged_range.xlsx
Binary file not shown.
193 changes: 193 additions & 0 deletions tests/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use calamine::{
Sheet, SheetType, SheetVisible, Xls, Xlsb, Xlsx,
};
use calamine::{CellErrorType::*, Data};
use std::collections::BTreeSet;
use std::io::Cursor;
use std::sync::Once;

Expand Down Expand Up @@ -1088,6 +1089,198 @@ fn issue_221() {
);
}

#[test]
fn merged_regions_xlsx() {
use calamine::Dimensions;
use std::string::String;
let path = format!("{}/tests/merged_range.xlsx", env!("CARGO_MANIFEST_DIR"));
let mut excel: Xlsx<_> = open_workbook(&path).unwrap();
excel.load_merged_regions().unwrap();
assert_eq!(
excel
.merged_regions()
.iter()
.map(|(o1, o2, o3)| (o1.to_string(), o2.to_string(), o3.clone()))
.collect::<BTreeSet<(String, String, Dimensions)>>(),
vec![
(
"Sheet1".to_string(),
"xl/worksheets/sheet1.xml".to_string(),
Dimensions::new((0, 0), (1, 0))
), // A1:A2
(
"Sheet1".to_string(),
"xl/worksheets/sheet1.xml".to_string(),
Dimensions::new((0, 1), (1, 1))
), // B1:B2
(
"Sheet1".to_string(),
"xl/worksheets/sheet1.xml".to_string(),
Dimensions::new((0, 2), (1, 3))
), // C1:D2
(
"Sheet1".to_string(),
"xl/worksheets/sheet1.xml".to_string(),
Dimensions::new((2, 2), (2, 3))
), // C3:D3
(
"Sheet1".to_string(),
"xl/worksheets/sheet1.xml".to_string(),
Dimensions::new((3, 2), (3, 3))
), // C4:D4
(
"Sheet1".to_string(),
"xl/worksheets/sheet1.xml".to_string(),
Dimensions::new((0, 4), (1, 4))
), // E1:E2
(
"Sheet1".to_string(),
"xl/worksheets/sheet1.xml".to_string(),
Dimensions::new((0, 5), (1, 5))
), // F1:F2
(
"Sheet1".to_string(),
"xl/worksheets/sheet1.xml".to_string(),
Dimensions::new((0, 6), (1, 6))
), // G1:G2
(
"Sheet1".to_string(),
"xl/worksheets/sheet1.xml".to_string(),
Dimensions::new((0, 7), (1, 7))
), // H1:H2
(
"Sheet2".to_string(),
"xl/worksheets/sheet2.xml".to_string(),
Dimensions::new((0, 0), (3, 0))
), // A1:A4
(
"Sheet2".to_string(),
"xl/worksheets/sheet2.xml".to_string(),
Dimensions::new((0, 1), (1, 1))
), // B1:B2
(
"Sheet2".to_string(),
"xl/worksheets/sheet2.xml".to_string(),
Dimensions::new((0, 2), (1, 3))
), // C1:D2
(
"Sheet2".to_string(),
"xl/worksheets/sheet2.xml".to_string(),
Dimensions::new((2, 2), (3, 3))
), // C3:D4
(
"Sheet2".to_string(),
"xl/worksheets/sheet2.xml".to_string(),
Dimensions::new((0, 4), (1, 4))
), // E1:E2
(
"Sheet2".to_string(),
"xl/worksheets/sheet2.xml".to_string(),
Dimensions::new((0, 5), (3, 7))
), // F1:H4
]
.into_iter()
.collect::<BTreeSet<(String, String, Dimensions)>>(),
);
assert_eq!(
excel
.merged_regions_by_sheet("Sheet1")
.iter()
.map(|&(o1, o2, o3)| (o1.to_string(), o2.to_string(), o3.clone()))
.collect::<BTreeSet<(String, String, Dimensions)>>(),
vec![
(
"Sheet1".to_string(),
"xl/worksheets/sheet1.xml".to_string(),
Dimensions::new((0, 0), (1, 0))
), // A1:A2
(
"Sheet1".to_string(),
"xl/worksheets/sheet1.xml".to_string(),
Dimensions::new((0, 1), (1, 1))
), // B1:B2
(
"Sheet1".to_string(),
"xl/worksheets/sheet1.xml".to_string(),
Dimensions::new((0, 2), (1, 3))
), // C1:D2
(
"Sheet1".to_string(),
"xl/worksheets/sheet1.xml".to_string(),
Dimensions::new((2, 2), (2, 3))
), // C3:D3
(
"Sheet1".to_string(),
"xl/worksheets/sheet1.xml".to_string(),
Dimensions::new((3, 2), (3, 3))
), // C4:D4
(
"Sheet1".to_string(),
"xl/worksheets/sheet1.xml".to_string(),
Dimensions::new((0, 4), (1, 4))
), // E1:E2
(
"Sheet1".to_string(),
"xl/worksheets/sheet1.xml".to_string(),
Dimensions::new((0, 5), (1, 5))
), // F1:F2
(
"Sheet1".to_string(),
"xl/worksheets/sheet1.xml".to_string(),
Dimensions::new((0, 6), (1, 6))
), // G1:G2
(
"Sheet1".to_string(),
"xl/worksheets/sheet1.xml".to_string(),
Dimensions::new((0, 7), (1, 7))
), // H1:H2
]
.into_iter()
.collect::<BTreeSet<(String, String, Dimensions)>>(),
);
assert_eq!(
excel
.merged_regions_by_sheet("Sheet2")
.iter()
.map(|&(o1, o2, o3)| (o1.to_string(), o2.to_string(), o3.clone()))
.collect::<BTreeSet<(String, String, Dimensions)>>(),
vec![
(
"Sheet2".to_string(),
"xl/worksheets/sheet2.xml".to_string(),
Dimensions::new((0, 0), (3, 0))
), // A1:A4
(
"Sheet2".to_string(),
"xl/worksheets/sheet2.xml".to_string(),
Dimensions::new((0, 1), (1, 1))
), // B1:B2
(
"Sheet2".to_string(),
"xl/worksheets/sheet2.xml".to_string(),
Dimensions::new((0, 2), (1, 3))
), // C1:D2
(
"Sheet2".to_string(),
"xl/worksheets/sheet2.xml".to_string(),
Dimensions::new((2, 2), (3, 3))
), // C3:D4
(
"Sheet2".to_string(),
"xl/worksheets/sheet2.xml".to_string(),
Dimensions::new((0, 4), (1, 4))
), // E1:E2
(
"Sheet2".to_string(),
"xl/worksheets/sheet2.xml".to_string(),
Dimensions::new((0, 5), (3, 7))
), // F1:H4
]
.into_iter()
.collect::<BTreeSet<(String, String, Dimensions)>>(),
);
}

#[test]
fn issue_252() {
setup();
Expand Down

0 comments on commit 953d80e

Please sign in to comment.