diff --git a/src/xlsx/cells_reader.rs b/src/xlsx/cells_reader.rs index b6435d20..d4207688 100644 --- a/src/xlsx/cells_reader.rs +++ b/src/xlsx/cells_reader.rs @@ -2,9 +2,11 @@ use quick_xml::{ events::{attributes::Attribute, BytesStart, Event}, name::QName, }; +use std::{borrow::Borrow, collections::HashMap}; use super::{ - get_attribute, get_dimension, get_row, get_row_column, read_string, Dimensions, XlReader, + get_attribute, get_dimension, get_row, get_row_column, read_string, replace_cell_names, + Dimensions, XlReader, }; use crate::{ datatype::DataRef, @@ -23,6 +25,7 @@ pub struct XlsxCellReader<'a> { col_index: u32, buf: Vec, cell_buf: Vec, + formulas: Vec)>>, } impl<'a> XlsxCellReader<'a> { @@ -68,6 +71,7 @@ impl<'a> XlsxCellReader<'a> { col_index: 0, buf: Vec::with_capacity(1024), cell_buf: Vec::with_capacity(1024), + formulas: Vec::with_capacity(1024), }) } @@ -165,9 +169,103 @@ impl<'a> XlsxCellReader<'a> { self.cell_buf.clear(); match self.xml.read_event_into(&mut self.cell_buf) { Ok(Event::Start(ref e)) => { - if let Some(f) = read_formula(&mut self.xml, e)? { - value = Some(f); + let formula = read_formula(&mut self.xml, e)?; + if let Some(f) = formula.borrow() { + value = Some(f.clone()); } + match get_attribute(e.attributes(), QName(b"t")) { + Ok(Some(b"shared")) => { + // shared formula + let mut offset_map: HashMap<(u32, u32), (i64, i64)> = + HashMap::new(); + // shared index + let shared_index = + match get_attribute(e.attributes(), QName(b"si"))? { + Some(res) => match std::str::from_utf8(res) { + Ok(res) => match usize::from_str_radix(res, 10) + { + Ok(res) => res, + Err(e) => { + return Err(XlsxError::ParseInt(e)); + } + }, + Err(_) => { + return Err(XlsxError::Unexpected( + "si attribute must be a number", + )); + } + }, + None => { + return Err(XlsxError::Unexpected( + "si attribute is mandatory if it is shared", + )); + } + }; + // shared reference + match get_attribute(e.attributes(), QName(b"ref"))? { + Some(res) => { + // orignal reference formula + let reference = get_dimension(res)?; + if reference.start.0 != reference.end.0 { + for i in + 0..=(reference.end.0 - reference.start.0) + { + offset_map.insert( + ( + reference.start.0 + i, + reference.start.1, + ), + ( + (reference.start.0 as i64 + - pos.0 as i64 + + i as i64), + 0, + ), + ); + } + } else if reference.start.1 != reference.end.1 { + for i in + 0..=(reference.end.1 - reference.start.1) + { + offset_map.insert( + ( + reference.start.0, + reference.start.1 + i, + ), + ( + 0, + (reference.start.1 as i64 + - pos.1 as i64 + + i as i64), + ), + ); + } + } + + if let Some(f) = formula.borrow() { + while self.formulas.len() < shared_index { + self.formulas.push(None); + } + self.formulas + .push(Some((f.clone(), offset_map))); + } + value = formula; + } + None => { + // calculated formula + if let Some(Some((f, offset_map))) = + self.formulas.get(shared_index) + { + if let Some(offset) = offset_map.get(&*&pos) { + value = + Some(replace_cell_names(f, *offset)?); + } + } + } + }; + } + _ => {} + }; } Ok(Event::End(ref e)) if e.local_name().as_ref() == b"c" => break, Ok(Event::Eof) => return Err(XlsxError::XmlEof("c")), diff --git a/src/xlsx/mod.rs b/src/xlsx/mod.rs index 4104e5ee..a90c9332 100644 --- a/src/xlsx/mod.rs +++ b/src/xlsx/mod.rs @@ -1117,6 +1117,130 @@ fn check_for_password_protected(reader: &mut RS) -> Result<(), Ok(()) } +/// check if a char vector is a valid cell name +/// column name must be between A and XFD, +/// last char must be digit +fn valid_cell_name(name: &[char]) -> bool { + if name.is_empty() { + return false; + } + if name.len() < 2 { + return false; + } + if name.len() > 3 { + if name[3].is_ascii_alphabetic() { + return false; + } + if name[2].is_alphabetic() { + if "YZ".contains(name[0]) { + return false; + } else if name[0] == 'X' { + if name[1] == 'F' { + if !"ABCD".contains(name[2]) { + return false; + }; + } else if !"ABCDE".contains(name[1]) { + return false; + } + } + } + } + match name.last() { + Some(c) => c.is_ascii_digit(), + _ => false, + } +} + +/// advance the cell name by the offset +fn replace_cell(name: &[char], offset: (i64, i64)) -> Result, XlsxError> { + let cell = get_row_column( + name.into_iter() + .map(|c| *c as u8) + .collect::>() + .as_slice(), + )?; + coordinate_to_name(( + (cell.0 as i64 + offset.0) as u32, + (cell.1 as i64 + offset.1) as u32, + )) +} + +/// advance all valid cell names in the string by the offset +fn replace_cell_names(s: &str, offset: (i64, i64)) -> Result { + let mut res: Vec = Vec::new(); + let mut cell: Vec = Vec::new(); + let mut is_cell_row = false; + let mut in_quote = false; + for c in s.chars() { + if c == '"' { + in_quote = !in_quote; + } + if in_quote { + res.push(c as u8); + continue; + } + if c.is_ascii_alphabetic() { + if is_cell_row { + // two cell not possible stick togather in formula + res.extend(cell.iter().map(|c| *c as u8)); + cell.clear(); + is_cell_row = false; + } + cell.push(c); + } else if c.is_ascii_digit() { + is_cell_row = true; + cell.push(c); + } else { + if valid_cell_name(cell.as_ref()) { + res.extend(replace_cell(cell.as_ref(), offset)?); + } else { + res.extend(cell.iter().map(|c| *c as u8)); + } + cell.clear(); + is_cell_row = false; + res.push(c as u8); + } + } + if !cell.is_empty() { + if valid_cell_name(cell.as_ref()) { + res.extend(replace_cell(cell.as_ref(), offset)?); + } else { + res.extend(cell.iter().map(|c| *c as u8)); + } + } + match String::from_utf8(res) { + Ok(s) => Ok(s), + Err(_) => Err(XlsxError::Unexpected("fail to convert cell name")), + } +} + +/// Convert the integer to Excelsheet column title. +/// If the column number not in 1~16384, an Error is returned. +pub(crate) fn column_number_to_name(num: u32) -> Result, XlsxError> { + if num >= MAX_COLUMNS { + return Err(XlsxError::Unexpected("column number overflow")); + } + let mut col: Vec = Vec::new(); + let mut num = num + 1; + while num > 0 { + let integer = ((num - 1) % 26 + 65) as u8; + col.push(integer); + num = (num - 1) / 26; + } + col.reverse(); + Ok(col) +} + +/// Convert a cell coordinate to Excelsheet cell name. +/// If the column number not in 1~16384, an Error is returned. +pub(crate) fn coordinate_to_name(cell: (u32, u32)) -> Result, XlsxError> { + let cell = &[ + column_number_to_name(cell.1)?, + (cell.0 + 1).to_string().into_bytes(), + ]; + Ok(cell.concat()) +} + #[cfg(test)] mod tests { use super::*; @@ -1178,4 +1302,39 @@ mod tests { CellErrorType::Value ); } + + #[test] + fn test_column_number_to_name() { + assert_eq!(column_number_to_name(0).unwrap(), b"A"); + assert_eq!(column_number_to_name(25).unwrap(), b"Z"); + assert_eq!(column_number_to_name(26).unwrap(), b"AA"); + assert_eq!(column_number_to_name(27).unwrap(), b"AB"); + assert_eq!(column_number_to_name(MAX_COLUMNS - 1).unwrap(), b"XFD"); + } + + #[test] + fn test_coordinate_to_name() { + assert_eq!(coordinate_to_name((0, 0)).unwrap(), b"A1"); + assert_eq!( + coordinate_to_name((MAX_ROWS - 1, MAX_COLUMNS - 1)).unwrap(), + b"XFD1048576" + ); + } + + #[test] + fn test_replace_cell_names() { + assert_eq!(replace_cell_names("A1", (1, 0)).unwrap(), "A2".to_owned()); + assert_eq!( + replace_cell_names("CONCATENATE(A1, \"a\")", (1, 0)).unwrap(), + "CONCATENATE(A2, \"a\")".to_owned() + ); + assert_eq!( + replace_cell_names( + "A1 is a cell, B1 is another, also C107, but XFE123 is not and \"A3\" in quote wont change.", + (1, 0) + ) + .unwrap(), + "A2 is a cell, B2 is another, also C108, but XFE123 is not and \"A3\" in quote wont change.".to_owned() + ); + } } diff --git a/tests/issue_391.xlsx b/tests/issue_391.xlsx new file mode 100644 index 00000000..f8b63979 Binary files /dev/null and b/tests/issue_391.xlsx differ diff --git a/tests/test.rs b/tests/test.rs index 4e09ff45..8370d13d 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -1,7 +1,7 @@ use calamine::Data::{Bool, DateTime, DateTimeIso, DurationIso, Empty, Error, Float, String}; use calamine::{ - open_workbook, open_workbook_auto, DataType, ExcelDateTime, ExcelDateTimeType, Ods, Reader, - Sheet, SheetType, SheetVisible, Xls, Xlsb, Xlsx, + open_workbook, open_workbook_auto, DataType, ExcelDateTime, ExcelDateTimeType, Ods, Range, + Reader, Sheet, SheetType, SheetVisible, Xls, Xlsb, Xlsx, }; use calamine::{CellErrorType::*, Data}; use std::collections::BTreeSet; @@ -1878,3 +1878,22 @@ fn issue_401_empty_tables() { let tables = excel.table_names(); assert!(tables.is_empty()); } + +#[test] +fn issue_391_shared_formula() { + setup(); + + let path = format!("{}/tests/issue_391.xlsx", env!("CARGO_MANIFEST_DIR")); + let mut excel: Xlsx<_> = open_workbook(&path).unwrap(); + let mut expect = Range::::new((1, 0), (6, 0)); + for (i, cell) in vec!["A1+1", "A2+1", "A3+1", "A4+1", "A5+1", "A6+1"] + .iter() + .enumerate() + { + expect.set_value((1 + i as u32, 0), cell.to_string()); + } + let res = excel.worksheet_formula("Sheet1").unwrap(); + assert_eq!(expect.start(), res.start()); + assert_eq!(expect.end(), res.end()); + assert!(expect.cells().eq(res.cells())); +}