From bbffee37cd32f9aedf91591bd590c9a8196126c0 Mon Sep 17 00:00:00 2001 From: Flynn <1845797+ling7334@users.noreply.github.com> Date: Tue, 16 Apr 2024 11:23:22 +0800 Subject: [PATCH 1/3] feat: helper function --- src/xlsx/mod.rs | 159 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 159 insertions(+) diff --git a/src/xlsx/mod.rs b/src/xlsx/mod.rs index 4104e5e..a90c933 100644 --- a/src/xlsx/mod.rs +++ b/src/xlsx/mod.rs @@ -1117,6 +1117,130 @@ fn check_for_password_protected(reader: &mut RS) -> Result<(), Ok(()) } +/// check if a char vector is a valid cell name +/// column name must be between A and XFD, +/// last char must be digit +fn valid_cell_name(name: &[char]) -> bool { + if name.is_empty() { + return false; + } + if name.len() < 2 { + return false; + } + if name.len() > 3 { + if name[3].is_ascii_alphabetic() { + return false; + } + if name[2].is_alphabetic() { + if "YZ".contains(name[0]) { + return false; + } else if name[0] == 'X' { + if name[1] == 'F' { + if !"ABCD".contains(name[2]) { + return false; + }; + } else if !"ABCDE".contains(name[1]) { + return false; + } + } + } + } + match name.last() { + Some(c) => c.is_ascii_digit(), + _ => false, + } +} + +/// advance the cell name by the offset +fn replace_cell(name: &[char], offset: (i64, i64)) -> Result, XlsxError> { + let cell = get_row_column( + name.into_iter() + .map(|c| *c as u8) + .collect::>() + .as_slice(), + )?; + coordinate_to_name(( + (cell.0 as i64 + offset.0) as u32, + (cell.1 as i64 + offset.1) as u32, + )) +} + +/// advance all valid cell names in the string by the offset +fn replace_cell_names(s: &str, offset: (i64, i64)) -> Result { + let mut res: Vec = Vec::new(); + let mut cell: Vec = Vec::new(); + let mut is_cell_row = false; + let mut in_quote = false; + for c in s.chars() { + if c == '"' { + in_quote = !in_quote; + } + if in_quote { + res.push(c as u8); + continue; + } + if c.is_ascii_alphabetic() { + if is_cell_row { + // two cell not possible stick togather in formula + res.extend(cell.iter().map(|c| *c as u8)); + cell.clear(); + is_cell_row = false; + } + cell.push(c); + } else if c.is_ascii_digit() { + is_cell_row = true; + cell.push(c); + } else { + if valid_cell_name(cell.as_ref()) { + res.extend(replace_cell(cell.as_ref(), offset)?); + } else { + res.extend(cell.iter().map(|c| *c as u8)); + } + cell.clear(); + is_cell_row = false; + res.push(c as u8); + } + } + if !cell.is_empty() { + if valid_cell_name(cell.as_ref()) { + res.extend(replace_cell(cell.as_ref(), offset)?); + } else { + res.extend(cell.iter().map(|c| *c as u8)); + } + } + match String::from_utf8(res) { + Ok(s) => Ok(s), + Err(_) => Err(XlsxError::Unexpected("fail to convert cell name")), + } +} + +/// Convert the integer to Excelsheet column title. +/// If the column number not in 1~16384, an Error is returned. +pub(crate) fn column_number_to_name(num: u32) -> Result, XlsxError> { + if num >= MAX_COLUMNS { + return Err(XlsxError::Unexpected("column number overflow")); + } + let mut col: Vec = Vec::new(); + let mut num = num + 1; + while num > 0 { + let integer = ((num - 1) % 26 + 65) as u8; + col.push(integer); + num = (num - 1) / 26; + } + col.reverse(); + Ok(col) +} + +/// Convert a cell coordinate to Excelsheet cell name. +/// If the column number not in 1~16384, an Error is returned. +pub(crate) fn coordinate_to_name(cell: (u32, u32)) -> Result, XlsxError> { + let cell = &[ + column_number_to_name(cell.1)?, + (cell.0 + 1).to_string().into_bytes(), + ]; + Ok(cell.concat()) +} + #[cfg(test)] mod tests { use super::*; @@ -1178,4 +1302,39 @@ mod tests { CellErrorType::Value ); } + + #[test] + fn test_column_number_to_name() { + assert_eq!(column_number_to_name(0).unwrap(), b"A"); + assert_eq!(column_number_to_name(25).unwrap(), b"Z"); + assert_eq!(column_number_to_name(26).unwrap(), b"AA"); + assert_eq!(column_number_to_name(27).unwrap(), b"AB"); + assert_eq!(column_number_to_name(MAX_COLUMNS - 1).unwrap(), b"XFD"); + } + + #[test] + fn test_coordinate_to_name() { + assert_eq!(coordinate_to_name((0, 0)).unwrap(), b"A1"); + assert_eq!( + coordinate_to_name((MAX_ROWS - 1, MAX_COLUMNS - 1)).unwrap(), + b"XFD1048576" + ); + } + + #[test] + fn test_replace_cell_names() { + assert_eq!(replace_cell_names("A1", (1, 0)).unwrap(), "A2".to_owned()); + assert_eq!( + replace_cell_names("CONCATENATE(A1, \"a\")", (1, 0)).unwrap(), + "CONCATENATE(A2, \"a\")".to_owned() + ); + assert_eq!( + replace_cell_names( + "A1 is a cell, B1 is another, also C107, but XFE123 is not and \"A3\" in quote wont change.", + (1, 0) + ) + .unwrap(), + "A2 is a cell, B2 is another, also C108, but XFE123 is not and \"A3\" in quote wont change.".to_owned() + ); + } } From c32f3d07a45a7d678ecf88c714a03433ecc16c25 Mon Sep 17 00:00:00 2001 From: Flynn <1845797+ling7334@users.noreply.github.com> Date: Tue, 16 Apr 2024 11:23:37 +0800 Subject: [PATCH 2/3] feat: add shared formula logic --- src/xlsx/cells_reader.rs | 104 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 101 insertions(+), 3 deletions(-) diff --git a/src/xlsx/cells_reader.rs b/src/xlsx/cells_reader.rs index b6435d2..d420768 100644 --- a/src/xlsx/cells_reader.rs +++ b/src/xlsx/cells_reader.rs @@ -2,9 +2,11 @@ use quick_xml::{ events::{attributes::Attribute, BytesStart, Event}, name::QName, }; +use std::{borrow::Borrow, collections::HashMap}; use super::{ - get_attribute, get_dimension, get_row, get_row_column, read_string, Dimensions, XlReader, + get_attribute, get_dimension, get_row, get_row_column, read_string, replace_cell_names, + Dimensions, XlReader, }; use crate::{ datatype::DataRef, @@ -23,6 +25,7 @@ pub struct XlsxCellReader<'a> { col_index: u32, buf: Vec, cell_buf: Vec, + formulas: Vec)>>, } impl<'a> XlsxCellReader<'a> { @@ -68,6 +71,7 @@ impl<'a> XlsxCellReader<'a> { col_index: 0, buf: Vec::with_capacity(1024), cell_buf: Vec::with_capacity(1024), + formulas: Vec::with_capacity(1024), }) } @@ -165,9 +169,103 @@ impl<'a> XlsxCellReader<'a> { self.cell_buf.clear(); match self.xml.read_event_into(&mut self.cell_buf) { Ok(Event::Start(ref e)) => { - if let Some(f) = read_formula(&mut self.xml, e)? { - value = Some(f); + let formula = read_formula(&mut self.xml, e)?; + if let Some(f) = formula.borrow() { + value = Some(f.clone()); } + match get_attribute(e.attributes(), QName(b"t")) { + Ok(Some(b"shared")) => { + // shared formula + let mut offset_map: HashMap<(u32, u32), (i64, i64)> = + HashMap::new(); + // shared index + let shared_index = + match get_attribute(e.attributes(), QName(b"si"))? { + Some(res) => match std::str::from_utf8(res) { + Ok(res) => match usize::from_str_radix(res, 10) + { + Ok(res) => res, + Err(e) => { + return Err(XlsxError::ParseInt(e)); + } + }, + Err(_) => { + return Err(XlsxError::Unexpected( + "si attribute must be a number", + )); + } + }, + None => { + return Err(XlsxError::Unexpected( + "si attribute is mandatory if it is shared", + )); + } + }; + // shared reference + match get_attribute(e.attributes(), QName(b"ref"))? { + Some(res) => { + // orignal reference formula + let reference = get_dimension(res)?; + if reference.start.0 != reference.end.0 { + for i in + 0..=(reference.end.0 - reference.start.0) + { + offset_map.insert( + ( + reference.start.0 + i, + reference.start.1, + ), + ( + (reference.start.0 as i64 + - pos.0 as i64 + + i as i64), + 0, + ), + ); + } + } else if reference.start.1 != reference.end.1 { + for i in + 0..=(reference.end.1 - reference.start.1) + { + offset_map.insert( + ( + reference.start.0, + reference.start.1 + i, + ), + ( + 0, + (reference.start.1 as i64 + - pos.1 as i64 + + i as i64), + ), + ); + } + } + + if let Some(f) = formula.borrow() { + while self.formulas.len() < shared_index { + self.formulas.push(None); + } + self.formulas + .push(Some((f.clone(), offset_map))); + } + value = formula; + } + None => { + // calculated formula + if let Some(Some((f, offset_map))) = + self.formulas.get(shared_index) + { + if let Some(offset) = offset_map.get(&*&pos) { + value = + Some(replace_cell_names(f, *offset)?); + } + } + } + }; + } + _ => {} + }; } Ok(Event::End(ref e)) if e.local_name().as_ref() == b"c" => break, Ok(Event::Eof) => return Err(XlsxError::XmlEof("c")), From d0b1eee0e21f93e2f0c0667120d166c02562273e Mon Sep 17 00:00:00 2001 From: Flynn <1845797+ling7334@users.noreply.github.com> Date: Tue, 16 Apr 2024 11:23:50 +0800 Subject: [PATCH 3/3] test: add shared formula testcase --- tests/issue_391.xlsx | Bin 0 -> 8051 bytes tests/test.rs | 23 +++++++++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 tests/issue_391.xlsx diff --git a/tests/issue_391.xlsx b/tests/issue_391.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..f8b6397954058137a4078eefdda57af674973482 GIT binary patch literal 8051 zcmeHMg5UF6opK)S`@U~qWzyJUMbO4;~&p%9%0Dve|0Du^P zfovk@u{SqDMpz7N@xAq<~T?|k((eb0|w{i5z7P_n^e#h9#mP$Nj|N&xntr(rEKWS<_j49)oEBD!ZulWrh^p;(;GFj{R-@@B-42IuN2AK9F%_@&kj3NTz zm{klpeKxIgM>qmQ#~LNUc<5-@&vCWCVr(;<*O$4Q< zp)t6b%a2q-PXA-sQ#k`S>oaXQN#61h$3^gz2qBk{vptxr9fuK>mt0uz;R=HoQR#M- zB(Hlwr3}1A;U+#d2$6X|exLxf{*jZ7dOQrr2%@PWQVtuDlNPSFAUAHVpZovi;(u`h z{_WN)lGHW3d2k|NN>`En@ad%l0$Eir8KrhQ-N0bwMf^r^5hLYN7Yh}EE=3s1!ystj z)!@RCIQV-%-RU~8B8G@WjG@V^G9vxL#RHw4*)>hkrDCmz&~y4^`ZVLAiVv$NG@iY@ z)@82H?jiGt7{M#g%QpT)1L_Co?M3PVdFp#`K z+<)eYmy_#rD<`MtKU>#dIfI1gUx+CG-CIS{m`V>1qJD?M2);T_R?*XE?a2_HvY#Ny ziaNY6Rvw5Ax;Tx9HX%zp`8f1esM~*qTdt|S`dtjZ+J=>u)?FqH9OfpjW31)O^el9O z9?^#R55ZW>jC0eo1`7tqnMXuMLP`BxAtZ)h542VINYT|5sCYM&$tLqfhNgzJv%>bY z9S0B1i@T_`^1P!1%4v5OObAARUpIb0t!G8T#wI%O@13cIGv@jS2HU%TPrQ5ejh;VL zoTkf&KljMUSve1q{oLbafpPL0e=&-Yyt!~3XVTFI{qRyIkD}j^O(Qn;aP5g7q0iyu zcPGR||9e^8>AASS_EYMh008)iGZ2jVJ69@_)#K-R2qIvq7c?9zVaWt_!5HN_Z=!Ye z%G368!TfZjH&GxvB)w}`4x>u@wE5|H@-F#h2K;2M{IXnsgFHKZh6I?=lM+nWl=4+~ zoF-CpBAbY&g{`#?4a*lr6E+K5spOi;iJ!^EM2_XQzb{*FGaN$!gcC0Wzf-f#`HGaj z*W>>=IK2}M!?+e0=T_Vbbf@K5GP&1O_By!oZY3HqS`y)apQQ z=@AxHq|3MH%gSWT0cWwkFU8&vp#Fq`yrg>E7|RY?`5sjpL~uUnL{!g6&QWcUs=F_2 zDz>J6l)7CNT`2ED;|amhA_abcK;KFs%?f7evkx;HrS+v${%BZ#N7#erjISZ-=F-iX zlA?8>{{p4SmLuvwJSO?I*h1sJ_WRx+z3!^60ZGLT>7nakEznmM7yHQ^a%ST|b+=*s z+A&&94{6U#yL!A4^P0;lYj7ZAvDYQk5A#h;{H~Z>N(-dteT!r8YYZ^cZW4I$%3*S8$FX_I;Yrtx6Ze4v z#|PMWX+P5KVHK7E5Qf0=8aMn<91JiTE*Arf&{%)faLS$wg z?%c(rgc>52$_tg|Z(8s)`arTtMhj2@s+7#C?mH^XQFW-zOH5z)AWQL5ZBd@`;nq9B z0q?!E7Qo@JU@1fXI3rOOkKMB0*0ZTvGp-8eq_VJyqIL0d&a$WGQ!ym9XoOlh`vr(H zS?}OLRK2{ubZD*#5}FFeKJ}$Fv+kC)WQo*!xvEP2aY{tCVOLg)*i*a*9=@&Bb4T31 z{qD(TniqH4godMhFR96hWB!e6`{H9)!$$$d^%HH6#*U!*^{3SUUC{akIB*<)=7t0S z{=oAwHfC=U*3>2;_2F6m&d2rk*>BV3Xy!*2E31nKU&h`((E7Tv@D?A9PJROMJp#_j z`*ce7=id?dg8N{Vl1%}e4rGNC#z@L<(KJ)t)Aq9oY()kY`)M#-ipE8skkDs_2I)Ii zgG9BJGgj{D5EOtCtwJbcoZ;vJ3Ai9yPJV!uX_PucLCf1Mg$HjW)!0_6v^(hhXeu=t zWB#2|eOgB_=8AoiKb54$fN8^$=DYH8nOrG9&%^z#fo=;;Y6(%FGq>etSKB!=nE|zC zzw>T`pa6L5(I)pwm;aT=%*j*n*#qvti}hZouC76e*cOc5Bd6RB?~9G|W{aDH&(<+; zjc7$te8^7?p_$Y>us8SY@b2w*}Ex|278+alxD7&}O#jQjC<1>Sp;bD~B&a7D5f`@V%IM1dIb)3yvhoU{h3 zjq%uO1ciz;e=^my>@faV9r}K5vQsm?_NCe#k%sd{x!A3KYB3xJlCjEcWumP)%`>_b zR#!(J%P<)K5pE(p0X+qz4tEFzyXSGDHcOj&s3?T=v&2j#4Gq1@fPGHVAk=3^nRin1 zRZHSOxlXCO{1IEXAaFqiT-rM`o9aj=+7|Qpk`m$6eT|;l9!WZYi^Dzib@)W4MGB+& zvWslb0d{yk}CCQiP#1N+pdD>O0dp>u{4 zG0q}~!eF0r##wCCH(FUY!&QT6LH9nlS^m9Y@<@-aXZn;@{)5f)5*+IRf&-`J(`AcU zPuAZhvd%xaX}f<)K`)06w!&}ntoCf=Z!|C`OlWeFmE9g@)NW3&ub`mAuBI zDxnAj7dVj`$~@|_fcsrC8w{T4*wwIH!B0P+dKNi!hr5tpSe!Bj=(o0_X!MnhOrocV zV^zhSPax{$+fq5@Mb9t$v(wVV5V~lSMzqlVYNlCaqx0s(u-942eUog|*b=+OF|qER z*jRZ_U!0Wk1^Lx9*kCKcWq=}{itdTa@>*7bMeHcTbX0LcWkx_2)KUIfS^cS%(wA)M zX4lf%(^@le-^69lV+&(^&)X&KcpzPA`&pP{aD$A+cDlYqq3c^RaxIdUcdKT4TGYwkUuQbrh~0pr~XJk zd0vDIqd^JPWW6bup(4?HA%kW|!=N&oR)1SX@9cYb5R9c>0|0@oVQse^0ZMwr0^sN*h@ z8V7xJdEn`UEdX8HJtN3hdzUiLVtv|Z;RSEZkXnE_+k^G*N2-lY*GDmO((ZfcjWXrS z-_aoYP<~b)sAK&UR+wWcSvYt(pX_2}W;JA+imhh0G}3(BDJ}Hby|j*e`Yj<7NQ6PT zM~7m`C%7p7p)F3VcyhcrDS>(KbNKHSjh`xY8a_h*8r}f_?)_3UZuYje?rz+F41YAT zl$3?&g`d&}I{>|4p-%G4lt5GZcny&3#m7k~eY^wVkSkN~r;dP!uEc3(#7WrQ{b`*+ zPi|`KO2|GXAvF+|4LzljPm%th$JQoV(iJ2j?cmq!oZPIN@Sv~ABD zM{d?CR8MI+g{925gH8PJ(oLM<#`U+{Mez@58rc#w&wmjFjf&Bx@38NP8}l7ou7hQ! zXw}L+4|?<2fWX6A`<`~^S%bL4TjvG9!KLl+RBB8od2pGoVe`YSin056 z70EFhAY<3S_>HT|4c0I;yk7rISVd>+=a>gqy}%$J{2-qfUD;pT&6l%(e8&npiI9rB z63&q*U!$CC{D$+i;E*Bqy>Ipb>)W}qW!zJ&%hpFT>^|lK-eE{_&q-E~byYqcy}#B^ z7ppx(|D7z(i`sTbLy!m+A%<}OB$1oD56IT-rv!SfYw~=7ha`|@2O)vl>z0I7LwD)gcUmPR_f@TFInF~eIz_eA zw)JP}Yt{G)HE;Vx%h=f)tjHz#VyX|}FrDqV&?OkT8wPF4$o_Kc6NM;FxSEb%0cG*3 zW+nJFo^Cxu6K|~kj73stkEJk6lIsh<@KlZ;=<+&8-u9t7LlL5#UqaihBVQzP7&MDP zaF?+1=N0*l3k5T?yko~R%uy3Nfb-wy%>b+u9wVx!N!v*rn4DuufJ@QWtaO%(EV(6( z`lE$9G$~<+C~YiIU)_s#_A73u9q8fIjkKn2Fw=WRO;yeY8N6uRR!>p|rfrF;(p8gg zsEIlg*@tB+Ke^ewcbcwm(u9b{tc5vD*JZN}dvgKz4oR@~UTNQ9wNIdl!wom!p_EYD zgyX|<3O=xEFq0{f@n|;B>+-~G`ovYJp4wkp&eIUuUwXEgZ|h_1(l!~5m8XL5 zN<^EJgy{m?JTbY(X|GHa?U!v^VDIKR@5Cs=c!^^bKl^|>D0;@ai=4Fa#LZtidpuC_ zf`s}!(C)0>f(I#?Upjv`1pC7@4F<}S>0DKU3q0X24l@@G?R=uUk&|OMd0#Xu z*3mIl1-t{+H0M1C$R&roIv-6eet2BjUe9ChTo|@Se&bVrQ?s~8;!=Y#8}rz6An9>N zg^&ziySbBKFJEl;wncJfK=-I8w_5IdtICSt5ka{+l=I7a&Te8z$;t@pc&@8O-_gD` z(F++Da>aKb4YGGuff=X@M7>$(pnznOfpEXWK?8b5ZD1#nLrMizx|*}V>9d{jDJ*s5 z6K>^%TH=i>|IM!7IsuDS;mnl?;u0W^68uSEYfF%|oW12!$DbSq#lORP|I;f8g=x8Z zZ(w9%+#@@;gFzyyBEo@Df6^}&&E+&CAs}bzi;XyF7jOw zC`>VSW}kjc)iD|HU4@r>wE=0Sgfcio^9#dr-~pJ?+ZNP zsxMM9;vU23fgP(Fm4rg)Z&M*%e5b^$kUyFIb_@*|0W_wL$=j(INUM_J_67N0~D|1JNum5_lg7zOhd!3eU2A#MdWCt8o79GruPwJe1{~ zE>(A{Cv_IgRRl}NnQIu0Sm{|d!%PIn>WXI(eSmaFHgP--F)X}(#D8*FWaVrFxFrQ^ zgoU|AP6;b^f&Ka}wJuE*_M1kJ3ggSNg^6qt8O~Zd$H>+hk^vYC`1@MAk+$8p6L5+1km~_J2%9Q275h|0HeIZXS{s zD|dd#4fti*EcM|yptJkA(i&>}V-GCpgFYt6w_p2Bdd}cQ#KUuwVYz;8nC+NJ|Eatl#A0?9dB!a4uo44*6xESQDzKQ3#C8CVI~)a z!6PhpdXPxGl%2oP?opJt;6s+IyL(wyP;v?Dda%UP7u^<-toey1$k}?MfEG=ANIH+- z$b_8YO@^;aRil?LPNPHlT*?OX)aJIRyav;(2p+;G^onX77GA4g-lLQ1g)Nk%2Tr$q zT&hIPr%w*W#9kC?>fdj`h%9eCT5$escIWVhG~lda{~U;66aqJ`mgaf3!Yl4{|xZY^_ahaKgSfrCI7ZMa~t?ikNWSxR)pC4&yMx& zIJce4Uy-&EtGKs(%-i7GM%OR!Io5A&uiF7`zh(Ri044m_)&K3C<93wWGV@myWRgFk z{1TnFp|>0SFK9REAJBia``hS$_RwEg03eqP0Jz;zZ=?TNxc-i|Ll}quLjP69v{dgP TW)=XzLVWxYwQh~-=db?*wYcEN literal 0 HcmV?d00001 diff --git a/tests/test.rs b/tests/test.rs index 4e09ff4..8370d13 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -1,7 +1,7 @@ use calamine::Data::{Bool, DateTime, DateTimeIso, DurationIso, Empty, Error, Float, String}; use calamine::{ - open_workbook, open_workbook_auto, DataType, ExcelDateTime, ExcelDateTimeType, Ods, Reader, - Sheet, SheetType, SheetVisible, Xls, Xlsb, Xlsx, + open_workbook, open_workbook_auto, DataType, ExcelDateTime, ExcelDateTimeType, Ods, Range, + Reader, Sheet, SheetType, SheetVisible, Xls, Xlsb, Xlsx, }; use calamine::{CellErrorType::*, Data}; use std::collections::BTreeSet; @@ -1878,3 +1878,22 @@ fn issue_401_empty_tables() { let tables = excel.table_names(); assert!(tables.is_empty()); } + +#[test] +fn issue_391_shared_formula() { + setup(); + + let path = format!("{}/tests/issue_391.xlsx", env!("CARGO_MANIFEST_DIR")); + let mut excel: Xlsx<_> = open_workbook(&path).unwrap(); + let mut expect = Range::::new((1, 0), (6, 0)); + for (i, cell) in vec!["A1+1", "A2+1", "A3+1", "A4+1", "A5+1", "A6+1"] + .iter() + .enumerate() + { + expect.set_value((1 + i as u32, 0), cell.to_string()); + } + let res = excel.worksheet_formula("Sheet1").unwrap(); + assert_eq!(expect.start(), res.start()); + assert_eq!(expect.end(), res.end()); + assert!(expect.cells().eq(res.cells())); +}