Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add shared formula logic #418

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ zip = { version = "0.6", default-features = false, features = ["deflate"] }
chrono = { version = "0.4", features = [
"serde",
], optional = true, default-features = false }
regex = "1.10"
ling7334 marked this conversation as resolved.
Show resolved Hide resolved

[dev-dependencies]
glob = "0.3"
Expand Down
156 changes: 153 additions & 3 deletions src/xlsx/cells_reader.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,37 @@
use std::collections::HashMap;

use quick_xml::{
events::{attributes::Attribute, BytesStart, Event},
name::QName,
};
use regex::{Captures, Regex};

use super::{
get_attribute, get_dimension, get_row, get_row_column, read_string, Dimensions, XlReader,
coordinate_to_name, get_attribute, get_dimension, get_row, get_row_column, read_string,
Dimensions, XlReader,
};
use crate::{
datatype::DataRef,
formats::{format_excel_f64_ref, CellFormat},
Cell, XlsxError,
};

fn replace_all<E>(
re: &Regex,
haystack: &str,
replacement: impl Fn(&Captures) -> Result<String, E>,
) -> Result<String, E> {
let mut new = String::with_capacity(haystack.len());
let mut last_match = 0;
for caps in re.captures_iter(haystack) {
let m = caps.get(0).unwrap();
new.push_str(&haystack[last_match..m.start()]);
new.push_str(&replacement(&caps)?);
last_match = m.end();
}
new.push_str(&haystack[last_match..]);
Ok(new)
}
/// An xlsx Cell Iterator
pub struct XlsxCellReader<'a> {
xml: XlReader<'a>,
Expand All @@ -23,6 +43,7 @@ pub struct XlsxCellReader<'a> {
col_index: u32,
buf: Vec<u8>,
cell_buf: Vec<u8>,
formulas: Vec<Option<(String, HashMap<String, (i64, i64)>)>>,
}

impl<'a> XlsxCellReader<'a> {
Expand Down Expand Up @@ -68,6 +89,7 @@ impl<'a> XlsxCellReader<'a> {
col_index: 0,
buf: Vec::with_capacity(1024),
cell_buf: Vec::with_capacity(1024),
formulas: Vec::with_capacity(1024),
})
}

Expand Down Expand Up @@ -165,8 +187,136 @@ impl<'a> XlsxCellReader<'a> {
self.cell_buf.clear();
match self.xml.read_event_into(&mut self.cell_buf) {
Ok(Event::Start(ref e)) => {
if let Some(f) = read_formula(&mut self.xml, e)? {
value = Some(f);
match get_attribute(e.attributes(), QName(b"t")).unwrap_or(None) {
Some(b"shared") => {
// shared formula
let mut offset_map: HashMap<String, (i64, i64)> =
HashMap::new();
// get shared formula index
let shared_index =
match get_attribute(e.attributes(), QName(b"si"))? {
Some(res) => match std::str::from_utf8(res) {
Ok(res) => match u32::from_str_radix(res, 10) {
Ok(res) => res,
Err(e) => {
return Err(XlsxError::ParseInt(e));
}
},
Err(_) => {
return Err(XlsxError::Unexpected(
"si attribute must be a number",
));
}
},
None => {
return Err(XlsxError::Unexpected(
"si attribute is mandatory if it is shared",
));
}
};
// get shared formula reference
let shared_ref =
match get_attribute(e.attributes(), QName(b"ref"))? {
Some(res) => {
let reference = get_dimension(res)?;
if reference.start.0 != reference.end.0 {
for i in 0..=(reference.end.0
- reference.start.0)
{
offset_map.insert(
coordinate_to_name((
reference.start.0 + i,
reference.start.1,
))?,
(
(reference.start.0 as i64
- pos.0 as i64
+ i as i64),
0,
),
);
}
} else if reference.start.1 != reference.end.1 {
for i in 0..=(reference.end.1
- reference.start.1)
{
offset_map.insert(
coordinate_to_name((
reference.start.0,
reference.start.1 + i,
))?,
(
0,
(reference.start.1 as i64
- pos.1 as i64
+ i as i64),
),
);
}
}
Some(reference)
}
None => None,
};

if let Some(f) = read_formula(&mut self.xml, e)? {
value = Some(f.clone());
if shared_ref.is_some() {
// original shared formula
while self.formulas.len() < shared_index as usize {
self.formulas.push(None);
}
self.formulas.push(Some((f, offset_map)));
}
}
if shared_ref.is_none() {
// shared formula
let cell_regex =
Regex::new(r"\b[A-Z]{1,3}\d+\b").unwrap();
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do not build it in a loop.

if let Some((f, offset)) =
self.formulas[shared_index as usize].clone()
{
if let Some((row, col)) =
offset.get(&coordinate_to_name(pos)?)
{
let replacement =
|caps: &Captures| -> Result<String, String> {
match get_row_column(caps[0].as_bytes()) {
Ok(cell) => {
match coordinate_to_name((
(cell.0 as i64 + *row) as u32,
(cell.1 as i64 + *col) as u32,
)) {
Ok(name) => Ok(name),
Err(e) => {
Err(e.to_string())
}
}
}
Err(e) => Err(e.to_string()),
}
};

match replace_all(
&cell_regex,
f.as_str(),
&replacement,
) {
Ok(s) => {
value = Some(s);
}
Err(_) => {}
}
}
};
};
}
_ => {
// good old formula
if let Some(f) = read_formula(&mut self.xml, e)? {
value = Some(f);
}
}
}
}
Ok(Event::End(ref e)) if e.local_name().as_ref() == b"c" => break,
Expand Down
44 changes: 44 additions & 0 deletions src/xlsx/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1113,6 +1113,32 @@ fn check_for_password_protected<RS: Read + Seek>(reader: &mut RS) -> Result<(),
Ok(())
}

/// Convert the integer to Excelsheet column title.
/// If the column number not in 1~16384, an Error is returned.
pub(crate) fn column_number_to_name(num: u32) -> Result<String, XlsxError> {
if num >= MAX_COLUMNS {
return Err(XlsxError::Unexpected("column number overflow"));
}
let mut col: Vec<u8> = Vec::new();
let mut num = num + 1;
while num > 0 {
let integer = ((num - 1) % 26 + 65) as u8;
col.push(integer);
num = (num - 1) / 26;
}
col.reverse();
match String::from_utf8(col) {
Ok(s) => Ok(s),
Err(_) => Err(XlsxError::Unexpected("not valid utf8")),
}
}

/// Convert a cell coordinate to Excelsheet cell name.
/// If the column number not in 1~16384, an Error is returned.
pub(crate) fn coordinate_to_name(cell: (u32, u32)) -> Result<String, XlsxError> {
Ok(format!("{}{}", column_number_to_name(cell.1)?, cell.0 + 1,))
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -1174,4 +1200,22 @@ mod tests {
CellErrorType::Value
);
}

#[test]
fn test_column_number_to_name() {
assert_eq!(column_number_to_name(0).unwrap(), "A");
assert_eq!(column_number_to_name(25).unwrap(), "Z");
assert_eq!(column_number_to_name(26).unwrap(), "AA");
assert_eq!(column_number_to_name(27).unwrap(), "AB");
assert_eq!(column_number_to_name(MAX_COLUMNS - 1).unwrap(), "XFD");
}

#[test]
fn test_position_to_title() {
assert_eq!(coordinate_to_name((0, 0)).unwrap(), "A1");
assert_eq!(
coordinate_to_name((1048_575, MAX_COLUMNS - 1)).unwrap(),
"XFD1048576"
);
}
}
Binary file added tests/issue_391.xlsx
Binary file not shown.
23 changes: 21 additions & 2 deletions tests/test.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use calamine::Data::{Bool, DateTime, DateTimeIso, DurationIso, Empty, Error, Float, String};
use calamine::{
open_workbook, open_workbook_auto, DataType, ExcelDateTime, ExcelDateTimeType, Ods, Reader,
Sheet, SheetType, SheetVisible, Xls, Xlsb, Xlsx,
open_workbook, open_workbook_auto, DataType, ExcelDateTime, ExcelDateTimeType, Ods, Range,
Reader, Sheet, SheetType, SheetVisible, Xls, Xlsb, Xlsx,
};
use calamine::{CellErrorType::*, Data};
use std::collections::BTreeSet;
Expand Down Expand Up @@ -1878,3 +1878,22 @@ fn issue_401_empty_tables() {
let tables = excel.table_names();
assert!(tables.is_empty());
}

#[test]
fn issue_391_shared_formula() {
setup();

let path = format!("{}/tests/issue_391.xlsx", env!("CARGO_MANIFEST_DIR"));
let mut excel: Xlsx<_> = open_workbook(&path).unwrap();
let mut expect = Range::<std::string::String>::new((1, 0), (6, 0));
for (i, cell) in vec!["A1+1", "A2+1", "A3+1", "A4+1", "A5+1", "A6+1"]
.iter()
.enumerate()
{
expect.set_value((1 + i as u32, 0), cell.to_string());
}
let res = excel.worksheet_formula("Sheet1").unwrap();
assert_eq!(expect.start(), res.start());
assert_eq!(expect.end(), res.end());
assert!(expect.cells().eq(res.cells()));
}