From e5c23b4be6297096230e055acbba129ab1115c62 Mon Sep 17 00:00:00 2001 From: Flynn <1845797+ling7334@users.noreply.github.com> Date: Tue, 2 Apr 2024 14:33:37 +0800 Subject: [PATCH 1/7] feat: add position_to_title helper function --- src/xlsx/mod.rs | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/src/xlsx/mod.rs b/src/xlsx/mod.rs index 2aba53e4..664b5f8a 100644 --- a/src/xlsx/mod.rs +++ b/src/xlsx/mod.rs @@ -1113,6 +1113,31 @@ fn check_for_password_protected(reader: &mut RS) -> Result<(), Ok(()) } +/// Convert the integer to Excelsheet column title. +/// If the column number not in 1~16384, an Error is returned. +pub(crate) fn column_number_to_name(num: u32) -> Result { + if num < 1 || num > MAX_COLUMNS { + return Err(XlsxError::Unexpected("column number overflow")); + } + let mut col: Vec = Vec::new(); + let mut num = num; + while num > 0 { + let integer: u8 = (num as u8 - 1) % 26 + 65; + col.push(integer); + num = (num - 1) / 26; + } + col.reverse(); + match String::from_utf8(col) { + Ok(s) => Ok(s), + Err(_) => Err(XlsxError::NumericColumn(num as u8)), + } +} + +pub(crate) fn position_to_title(cell: (u32, u32)) -> Result { + let col = column_number_to_name(cell.0)?; + Ok(format!("{col}{}", cell.1 + 1).to_owned()) +} + #[cfg(test)] mod tests { use super::*; @@ -1174,4 +1199,24 @@ mod tests { CellErrorType::Value ); } + + #[test] + fn test_column_number_to_name() { + assert_eq!(column_number_to_name(1).unwrap(), String::from("A")); + assert_eq!(column_number_to_name(37).unwrap(), String::from("AK")); + assert_eq!( + column_number_to_name(MAX_COLUMNS - 1).unwrap(), + String::from("XNU") + ); + } + + #[test] + fn test_position_to_title() { + assert_eq!(position_to_title((1, 1)).unwrap(), String::from("A1")); + assert_eq!(position_to_title((37, 1)).unwrap(), String::from("AK1")); + assert_eq!( + position_to_title((MAX_COLUMNS - 1, 1)).unwrap(), + String::from("XNU1") + ); + } } From 7553e005bd5ac82602cd9ff5dbf3cee40347c0ee Mon Sep 17 00:00:00 2001 From: Flynn <1845797+ling7334@users.noreply.github.com> Date: Tue, 2 Apr 2024 14:37:07 +0800 Subject: [PATCH 2/7] feat: add shared formula logic --- Cargo.toml | 1 + src/xlsx/cells_reader.rs | 113 ++++++++++++++++++++++++++++++++++++++- tests/issue_391.xlsx | Bin 0 -> 8051 bytes tests/test.rs | 23 +++++++- 4 files changed, 133 insertions(+), 4 deletions(-) create mode 100644 tests/issue_391.xlsx diff --git a/Cargo.toml b/Cargo.toml index a52f85b8..5f43a84e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,6 +24,7 @@ zip = { version = "0.6", default-features = false, features = ["deflate"] } chrono = { version = "0.4", features = [ "serde", ], optional = true, default-features = false } +regex = "1.10" [dev-dependencies] glob = "0.3" diff --git a/src/xlsx/cells_reader.rs b/src/xlsx/cells_reader.rs index b6435d20..e92f4bcd 100644 --- a/src/xlsx/cells_reader.rs +++ b/src/xlsx/cells_reader.rs @@ -1,10 +1,14 @@ +use std::collections::HashMap; + use quick_xml::{ events::{attributes::Attribute, BytesStart, Event}, name::QName, }; +use regex::Regex; use super::{ - get_attribute, get_dimension, get_row, get_row_column, read_string, Dimensions, XlReader, + get_attribute, get_dimension, get_row, get_row_column, position_to_title, read_string, + Dimensions, XlReader, }; use crate::{ datatype::DataRef, @@ -23,6 +27,7 @@ pub struct XlsxCellReader<'a> { col_index: u32, buf: Vec, cell_buf: Vec, + formulas: Vec)>>, } impl<'a> XlsxCellReader<'a> { @@ -68,6 +73,7 @@ impl<'a> XlsxCellReader<'a> { col_index: 0, buf: Vec::with_capacity(1024), cell_buf: Vec::with_capacity(1024), + formulas: Vec::with_capacity(1024), }) } @@ -165,8 +171,111 @@ impl<'a> XlsxCellReader<'a> { self.cell_buf.clear(); match self.xml.read_event_into(&mut self.cell_buf) { Ok(Event::Start(ref e)) => { + let mut offset_map: HashMap = HashMap::new(); + let mut shared_index = None; + let mut shared_ref = None; + let shared = + get_attribute(e.attributes(), QName(b"t")).unwrap_or(None); + match shared { + Some(b"shared") => { + shared_index = Some( + String::from_utf8( + get_attribute(e.attributes(), QName(b"si"))? + .unwrap() + .to_vec(), + ) + .unwrap() + .parse::()?, + ); + match get_attribute(e.attributes(), QName(b"ref"))? { + Some(res) => { + let reference = get_dimension(res)?; + if reference.start.0 != reference.end.0 { + for i in + 0..=(reference.end.0 - reference.start.0) + { + offset_map.insert( + position_to_title(( + reference.start.0 + i, + reference.start.1, + ))?, + ( + (reference.start.0 as i64 + - pos.0 as i64 + + i as i64) + as i32, + 0, + ), + ); + } + } else if reference.start.1 != reference.end.1 { + for i in + 0..=(reference.end.1 - reference.start.1) + { + offset_map.insert( + position_to_title(( + reference.start.0, + reference.start.1 + i, + ))?, + ( + 0, + (reference.start.1 as i64 + - pos.1 as i64 + + i as i64) + as i32, + ), + ); + } + } + shared_ref = Some(reference); + } + None => {} + } + } + _ => {} + } if let Some(f) = read_formula(&mut self.xml, e)? { - value = Some(f); + value = Some(f.clone()); + if shared_index.is_some() && shared_ref.is_some() { + // original shared formula + while self.formulas.len() < shared_index.unwrap() as usize { + self.formulas.push(None); + } + self.formulas.push(Some((f, offset_map))); + } + } + if shared_index.is_some() && shared_ref.is_none() { + // shared formula + let cell_regex = Regex::new(r"[A-Z]+[0-9]+").unwrap(); + if let Some((f, offset)) = + self.formulas[shared_index.unwrap() as usize].clone() + { + let cells = cell_regex + .find_iter(f.as_str()) + .map(|x| get_row_column(x.as_str().as_bytes())); + let mut template = cell_regex + .replace_all(f.as_str(), r"\uffff") + .into_owned(); + let ffff_regex = Regex::new(r"\\uffff").unwrap(); + let (row, col) = + offset.get(&position_to_title(pos)?).unwrap(); + for res in cells { + match res { + Ok(cell) => { + // calculate new formula cell pos + let name = position_to_title(( + (cell.0 as i64 + *col as i64) as u32, + (cell.1 as i64 + *row as i64 + 1) as u32, + ))?; + template = ffff_regex + .replace(&template, name.as_str()) + .into_owned(); + } + Err(_) => {} + }; + } + value = Some(template.clone()); + }; } } Ok(Event::End(ref e)) if e.local_name().as_ref() == b"c" => break, diff --git a/tests/issue_391.xlsx b/tests/issue_391.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..f8b6397954058137a4078eefdda57af674973482 GIT binary patch literal 8051 zcmeHMg5UF6opK)S`@U~qWzyJUMbO4;~&p%9%0Dve|0Du^P zfovk@u{SqDMpz7N@xAq<~T?|k((eb0|w{i5z7P_n^e#h9#mP$Nj|N&xntr(rEKWS<_j49)oEBD!ZulWrh^p;(;GFj{R-@@B-42IuN2AK9F%_@&kj3NTz zm{klpeKxIgM>qmQ#~LNUc<5-@&vCWCVr(;<*O$4Q< zp)t6b%a2q-PXA-sQ#k`S>oaXQN#61h$3^gz2qBk{vptxr9fuK>mt0uz;R=HoQR#M- zB(Hlwr3}1A;U+#d2$6X|exLxf{*jZ7dOQrr2%@PWQVtuDlNPSFAUAHVpZovi;(u`h z{_WN)lGHW3d2k|NN>`En@ad%l0$Eir8KrhQ-N0bwMf^r^5hLYN7Yh}EE=3s1!ystj z)!@RCIQV-%-RU~8B8G@WjG@V^G9vxL#RHw4*)>hkrDCmz&~y4^`ZVLAiVv$NG@iY@ z)@82H?jiGt7{M#g%QpT)1L_Co?M3PVdFp#`K z+<)eYmy_#rD<`MtKU>#dIfI1gUx+CG-CIS{m`V>1qJD?M2);T_R?*XE?a2_HvY#Ny ziaNY6Rvw5Ax;Tx9HX%zp`8f1esM~*qTdt|S`dtjZ+J=>u)?FqH9OfpjW31)O^el9O z9?^#R55ZW>jC0eo1`7tqnMXuMLP`BxAtZ)h542VINYT|5sCYM&$tLqfhNgzJv%>bY z9S0B1i@T_`^1P!1%4v5OObAARUpIb0t!G8T#wI%O@13cIGv@jS2HU%TPrQ5ejh;VL zoTkf&KljMUSve1q{oLbafpPL0e=&-Yyt!~3XVTFI{qRyIkD}j^O(Qn;aP5g7q0iyu zcPGR||9e^8>AASS_EYMh008)iGZ2jVJ69@_)#K-R2qIvq7c?9zVaWt_!5HN_Z=!Ye z%G368!TfZjH&GxvB)w}`4x>u@wE5|H@-F#h2K;2M{IXnsgFHKZh6I?=lM+nWl=4+~ zoF-CpBAbY&g{`#?4a*lr6E+K5spOi;iJ!^EM2_XQzb{*FGaN$!gcC0Wzf-f#`HGaj z*W>>=IK2}M!?+e0=T_Vbbf@K5GP&1O_By!oZY3HqS`y)apQQ z=@AxHq|3MH%gSWT0cWwkFU8&vp#Fq`yrg>E7|RY?`5sjpL~uUnL{!g6&QWcUs=F_2 zDz>J6l)7CNT`2ED;|amhA_abcK;KFs%?f7evkx;HrS+v${%BZ#N7#erjISZ-=F-iX zlA?8>{{p4SmLuvwJSO?I*h1sJ_WRx+z3!^60ZGLT>7nakEznmM7yHQ^a%ST|b+=*s z+A&&94{6U#yL!A4^P0;lYj7ZAvDYQk5A#h;{H~Z>N(-dteT!r8YYZ^cZW4I$%3*S8$FX_I;Yrtx6Ze4v z#|PMWX+P5KVHK7E5Qf0=8aMn<91JiTE*Arf&{%)faLS$wg z?%c(rgc>52$_tg|Z(8s)`arTtMhj2@s+7#C?mH^XQFW-zOH5z)AWQL5ZBd@`;nq9B z0q?!E7Qo@JU@1fXI3rOOkKMB0*0ZTvGp-8eq_VJyqIL0d&a$WGQ!ym9XoOlh`vr(H zS?}OLRK2{ubZD*#5}FFeKJ}$Fv+kC)WQo*!xvEP2aY{tCVOLg)*i*a*9=@&Bb4T31 z{qD(TniqH4godMhFR96hWB!e6`{H9)!$$$d^%HH6#*U!*^{3SUUC{akIB*<)=7t0S z{=oAwHfC=U*3>2;_2F6m&d2rk*>BV3Xy!*2E31nKU&h`((E7Tv@D?A9PJROMJp#_j z`*ce7=id?dg8N{Vl1%}e4rGNC#z@L<(KJ)t)Aq9oY()kY`)M#-ipE8skkDs_2I)Ii zgG9BJGgj{D5EOtCtwJbcoZ;vJ3Ai9yPJV!uX_PucLCf1Mg$HjW)!0_6v^(hhXeu=t zWB#2|eOgB_=8AoiKb54$fN8^$=DYH8nOrG9&%^z#fo=;;Y6(%FGq>etSKB!=nE|zC zzw>T`pa6L5(I)pwm;aT=%*j*n*#qvti}hZouC76e*cOc5Bd6RB?~9G|W{aDH&(<+; zjc7$te8^7?p_$Y>us8SY@b2w*}Ex|278+alxD7&}O#jQjC<1>Sp;bD~B&a7D5f`@V%IM1dIb)3yvhoU{h3 zjq%uO1ciz;e=^my>@faV9r}K5vQsm?_NCe#k%sd{x!A3KYB3xJlCjEcWumP)%`>_b zR#!(J%P<)K5pE(p0X+qz4tEFzyXSGDHcOj&s3?T=v&2j#4Gq1@fPGHVAk=3^nRin1 zRZHSOxlXCO{1IEXAaFqiT-rM`o9aj=+7|Qpk`m$6eT|;l9!WZYi^Dzib@)W4MGB+& zvWslb0d{yk}CCQiP#1N+pdD>O0dp>u{4 zG0q}~!eF0r##wCCH(FUY!&QT6LH9nlS^m9Y@<@-aXZn;@{)5f)5*+IRf&-`J(`AcU zPuAZhvd%xaX}f<)K`)06w!&}ntoCf=Z!|C`OlWeFmE9g@)NW3&ub`mAuBI zDxnAj7dVj`$~@|_fcsrC8w{T4*wwIH!B0P+dKNi!hr5tpSe!Bj=(o0_X!MnhOrocV zV^zhSPax{$+fq5@Mb9t$v(wVV5V~lSMzqlVYNlCaqx0s(u-942eUog|*b=+OF|qER z*jRZ_U!0Wk1^Lx9*kCKcWq=}{itdTa@>*7bMeHcTbX0LcWkx_2)KUIfS^cS%(wA)M zX4lf%(^@le-^69lV+&(^&)X&KcpzPA`&pP{aD$A+cDlYqq3c^RaxIdUcdKT4TGYwkUuQbrh~0pr~XJk zd0vDIqd^JPWW6bup(4?HA%kW|!=N&oR)1SX@9cYb5R9c>0|0@oVQse^0ZMwr0^sN*h@ z8V7xJdEn`UEdX8HJtN3hdzUiLVtv|Z;RSEZkXnE_+k^G*N2-lY*GDmO((ZfcjWXrS z-_aoYP<~b)sAK&UR+wWcSvYt(pX_2}W;JA+imhh0G}3(BDJ}Hby|j*e`Yj<7NQ6PT zM~7m`C%7p7p)F3VcyhcrDS>(KbNKHSjh`xY8a_h*8r}f_?)_3UZuYje?rz+F41YAT zl$3?&g`d&}I{>|4p-%G4lt5GZcny&3#m7k~eY^wVkSkN~r;dP!uEc3(#7WrQ{b`*+ zPi|`KO2|GXAvF+|4LzljPm%th$JQoV(iJ2j?cmq!oZPIN@Sv~ABD zM{d?CR8MI+g{925gH8PJ(oLM<#`U+{Mez@58rc#w&wmjFjf&Bx@38NP8}l7ou7hQ! zXw}L+4|?<2fWX6A`<`~^S%bL4TjvG9!KLl+RBB8od2pGoVe`YSin056 z70EFhAY<3S_>HT|4c0I;yk7rISVd>+=a>gqy}%$J{2-qfUD;pT&6l%(e8&npiI9rB z63&q*U!$CC{D$+i;E*Bqy>Ipb>)W}qW!zJ&%hpFT>^|lK-eE{_&q-E~byYqcy}#B^ z7ppx(|D7z(i`sTbLy!m+A%<}OB$1oD56IT-rv!SfYw~=7ha`|@2O)vl>z0I7LwD)gcUmPR_f@TFInF~eIz_eA zw)JP}Yt{G)HE;Vx%h=f)tjHz#VyX|}FrDqV&?OkT8wPF4$o_Kc6NM;FxSEb%0cG*3 zW+nJFo^Cxu6K|~kj73stkEJk6lIsh<@KlZ;=<+&8-u9t7LlL5#UqaihBVQzP7&MDP zaF?+1=N0*l3k5T?yko~R%uy3Nfb-wy%>b+u9wVx!N!v*rn4DuufJ@QWtaO%(EV(6( z`lE$9G$~<+C~YiIU)_s#_A73u9q8fIjkKn2Fw=WRO;yeY8N6uRR!>p|rfrF;(p8gg zsEIlg*@tB+Ke^ewcbcwm(u9b{tc5vD*JZN}dvgKz4oR@~UTNQ9wNIdl!wom!p_EYD zgyX|<3O=xEFq0{f@n|;B>+-~G`ovYJp4wkp&eIUuUwXEgZ|h_1(l!~5m8XL5 zN<^EJgy{m?JTbY(X|GHa?U!v^VDIKR@5Cs=c!^^bKl^|>D0;@ai=4Fa#LZtidpuC_ zf`s}!(C)0>f(I#?Upjv`1pC7@4F<}S>0DKU3q0X24l@@G?R=uUk&|OMd0#Xu z*3mIl1-t{+H0M1C$R&roIv-6eet2BjUe9ChTo|@Se&bVrQ?s~8;!=Y#8}rz6An9>N zg^&ziySbBKFJEl;wncJfK=-I8w_5IdtICSt5ka{+l=I7a&Te8z$;t@pc&@8O-_gD` z(F++Da>aKb4YGGuff=X@M7>$(pnznOfpEXWK?8b5ZD1#nLrMizx|*}V>9d{jDJ*s5 z6K>^%TH=i>|IM!7IsuDS;mnl?;u0W^68uSEYfF%|oW12!$DbSq#lORP|I;f8g=x8Z zZ(w9%+#@@;gFzyyBEo@Df6^}&&E+&CAs}bzi;XyF7jOw zC`>VSW}kjc)iD|HU4@r>wE=0Sgfcio^9#dr-~pJ?+ZNP zsxMM9;vU23fgP(Fm4rg)Z&M*%e5b^$kUyFIb_@*|0W_wL$=j(INUM_J_67N0~D|1JNum5_lg7zOhd!3eU2A#MdWCt8o79GruPwJe1{~ zE>(A{Cv_IgRRl}NnQIu0Sm{|d!%PIn>WXI(eSmaFHgP--F)X}(#D8*FWaVrFxFrQ^ zgoU|AP6;b^f&Ka}wJuE*_M1kJ3ggSNg^6qt8O~Zd$H>+hk^vYC`1@MAk+$8p6L5+1km~_J2%9Q275h|0HeIZXS{s zD|dd#4fti*EcM|yptJkA(i&>}V-GCpgFYt6w_p2Bdd}cQ#KUuwVYz;8nC+NJ|Eatl#A0?9dB!a4uo44*6xESQDzKQ3#C8CVI~)a z!6PhpdXPxGl%2oP?opJt;6s+IyL(wyP;v?Dda%UP7u^<-toey1$k}?MfEG=ANIH+- z$b_8YO@^;aRil?LPNPHlT*?OX)aJIRyav;(2p+;G^onX77GA4g-lLQ1g)Nk%2Tr$q zT&hIPr%w*W#9kC?>fdj`h%9eCT5$escIWVhG~lda{~U;66aqJ`mgaf3!Yl4{|xZY^_ahaKgSfrCI7ZMa~t?ikNWSxR)pC4&yMx& zIJce4Uy-&EtGKs(%-i7GM%OR!Io5A&uiF7`zh(Ri044m_)&K3C<93wWGV@myWRgFk z{1TnFp|>0SFK9REAJBia``hS$_RwEg03eqP0Jz;zZ=?TNxc-i|Ll}quLjP69v{dgP TW)=XzLVWxYwQh~-=db?*wYcEN literal 0 HcmV?d00001 diff --git a/tests/test.rs b/tests/test.rs index 4e09ff45..8370d13d 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -1,7 +1,7 @@ use calamine::Data::{Bool, DateTime, DateTimeIso, DurationIso, Empty, Error, Float, String}; use calamine::{ - open_workbook, open_workbook_auto, DataType, ExcelDateTime, ExcelDateTimeType, Ods, Reader, - Sheet, SheetType, SheetVisible, Xls, Xlsb, Xlsx, + open_workbook, open_workbook_auto, DataType, ExcelDateTime, ExcelDateTimeType, Ods, Range, + Reader, Sheet, SheetType, SheetVisible, Xls, Xlsb, Xlsx, }; use calamine::{CellErrorType::*, Data}; use std::collections::BTreeSet; @@ -1878,3 +1878,22 @@ fn issue_401_empty_tables() { let tables = excel.table_names(); assert!(tables.is_empty()); } + +#[test] +fn issue_391_shared_formula() { + setup(); + + let path = format!("{}/tests/issue_391.xlsx", env!("CARGO_MANIFEST_DIR")); + let mut excel: Xlsx<_> = open_workbook(&path).unwrap(); + let mut expect = Range::::new((1, 0), (6, 0)); + for (i, cell) in vec!["A1+1", "A2+1", "A3+1", "A4+1", "A5+1", "A6+1"] + .iter() + .enumerate() + { + expect.set_value((1 + i as u32, 0), cell.to_string()); + } + let res = excel.worksheet_formula("Sheet1").unwrap(); + assert_eq!(expect.start(), res.start()); + assert_eq!(expect.end(), res.end()); + assert!(expect.cells().eq(res.cells())); +} From 9c4e7cccbb7566e3d8d9150a201883c957997c4c Mon Sep 17 00:00:00 2001 From: Flynn <1845797+ling7334@users.noreply.github.com> Date: Thu, 4 Apr 2024 08:51:42 +0800 Subject: [PATCH 3/7] test: fix test issue --- src/xlsx/mod.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/xlsx/mod.rs b/src/xlsx/mod.rs index 664b5f8a..7e127762 100644 --- a/src/xlsx/mod.rs +++ b/src/xlsx/mod.rs @@ -1212,11 +1212,11 @@ mod tests { #[test] fn test_position_to_title() { - assert_eq!(position_to_title((1, 1)).unwrap(), String::from("A1")); - assert_eq!(position_to_title((37, 1)).unwrap(), String::from("AK1")); + assert_eq!(position_to_title((1, 0)).unwrap(), String::from("A1")); + assert_eq!(position_to_title((37, 1)).unwrap(), String::from("AK2")); assert_eq!( - position_to_title((MAX_COLUMNS - 1, 1)).unwrap(), - String::from("XNU1") + position_to_title((MAX_COLUMNS - 1, 1_000_000)).unwrap(), + String::from("XNU1000001") ); } } From 6cd6a7f4d7160c0f2cd95eb0a95e34867c17694c Mon Sep 17 00:00:00 2001 From: Flynn <1845797+ling7334@users.noreply.github.com> Date: Thu, 4 Apr 2024 10:14:21 +0800 Subject: [PATCH 4/7] refactor: neat way to replace cell names --- src/xlsx/cells_reader.rs | 57 +++++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 21 deletions(-) diff --git a/src/xlsx/cells_reader.rs b/src/xlsx/cells_reader.rs index e92f4bcd..cd6b061f 100644 --- a/src/xlsx/cells_reader.rs +++ b/src/xlsx/cells_reader.rs @@ -4,7 +4,7 @@ use quick_xml::{ events::{attributes::Attribute, BytesStart, Event}, name::QName, }; -use regex::Regex; +use regex::{Captures, Regex}; use super::{ get_attribute, get_dimension, get_row, get_row_column, position_to_title, read_string, @@ -16,6 +16,22 @@ use crate::{ Cell, XlsxError, }; +fn replace_all( + re: &Regex, + haystack: &str, + replacement: impl Fn(&Captures) -> Result, +) -> Result { + let mut new = String::with_capacity(haystack.len()); + let mut last_match = 0; + for caps in re.captures_iter(haystack) { + let m = caps.get(0).unwrap(); + new.push_str(&haystack[last_match..m.start()]); + new.push_str(&replacement(&caps)?); + last_match = m.end(); + } + new.push_str(&haystack[last_match..]); + Ok(new) +} /// An xlsx Cell Iterator pub struct XlsxCellReader<'a> { xml: XlReader<'a>, @@ -250,31 +266,30 @@ impl<'a> XlsxCellReader<'a> { if let Some((f, offset)) = self.formulas[shared_index.unwrap() as usize].clone() { - let cells = cell_regex - .find_iter(f.as_str()) - .map(|x| get_row_column(x.as_str().as_bytes())); - let mut template = cell_regex - .replace_all(f.as_str(), r"\uffff") - .into_owned(); - let ffff_regex = Regex::new(r"\\uffff").unwrap(); let (row, col) = offset.get(&position_to_title(pos)?).unwrap(); - for res in cells { - match res { - Ok(cell) => { - // calculate new formula cell pos - let name = position_to_title(( - (cell.0 as i64 + *col as i64) as u32, - (cell.1 as i64 + *row as i64 + 1) as u32, - ))?; - template = ffff_regex - .replace(&template, name.as_str()) - .into_owned(); + let replacement = + |caps: &Captures| -> Result { + match get_row_column(caps[0].as_bytes()) { + Ok(cell) => { + match position_to_title(( + (cell.0 as i64 + *col as i64) as u32, + (cell.1 as i64 + *row as i64 + 1) + as u32, + )) { + Ok(name) => Ok(name), + Err(_) => Err("invalid cell reference"), + } + } + Err(_) => Err("invalid cell reference"), } - Err(_) => {} }; + match replace_all(&cell_regex, f.as_str(), &replacement) { + Ok(s) => { + value = Some(s); + } + Err(_) => {} } - value = Some(template.clone()); }; } } From f18a4a40d2c0092af3e0002becbaec9038f980f7 Mon Sep 17 00:00:00 2001 From: Flynn <1845797+ling7334@users.noreply.github.com> Date: Tue, 9 Apr 2024 14:45:56 +0800 Subject: [PATCH 5/7] fix: fix boundry issue and add function hint --- src/xlsx/mod.rs | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/src/xlsx/mod.rs b/src/xlsx/mod.rs index 7e127762..240f56fe 100644 --- a/src/xlsx/mod.rs +++ b/src/xlsx/mod.rs @@ -1116,26 +1116,27 @@ fn check_for_password_protected(reader: &mut RS) -> Result<(), /// Convert the integer to Excelsheet column title. /// If the column number not in 1~16384, an Error is returned. pub(crate) fn column_number_to_name(num: u32) -> Result { - if num < 1 || num > MAX_COLUMNS { + if num >= MAX_COLUMNS { return Err(XlsxError::Unexpected("column number overflow")); } let mut col: Vec = Vec::new(); - let mut num = num; + let mut num = num + 1; while num > 0 { - let integer: u8 = (num as u8 - 1) % 26 + 65; + let integer = ((num - 1) % 26 + 65) as u8; col.push(integer); num = (num - 1) / 26; } col.reverse(); match String::from_utf8(col) { Ok(s) => Ok(s), - Err(_) => Err(XlsxError::NumericColumn(num as u8)), + Err(_) => Err(XlsxError::Unexpected("not valid utf8")), } } -pub(crate) fn position_to_title(cell: (u32, u32)) -> Result { - let col = column_number_to_name(cell.0)?; - Ok(format!("{col}{}", cell.1 + 1).to_owned()) +/// Convert a cell coordinate to Excelsheet cell name. +/// If the column number not in 1~16384, an Error is returned. +pub(crate) fn coordinate_to_name(cell: (u32, u32)) -> Result { + Ok(format!("{}{}", column_number_to_name(cell.1)?, cell.0 + 1,)) } #[cfg(test)] @@ -1202,21 +1203,19 @@ mod tests { #[test] fn test_column_number_to_name() { - assert_eq!(column_number_to_name(1).unwrap(), String::from("A")); - assert_eq!(column_number_to_name(37).unwrap(), String::from("AK")); - assert_eq!( - column_number_to_name(MAX_COLUMNS - 1).unwrap(), - String::from("XNU") - ); + assert_eq!(column_number_to_name(0).unwrap(), "A"); + assert_eq!(column_number_to_name(25).unwrap(), "Z"); + assert_eq!(column_number_to_name(26).unwrap(), "AA"); + assert_eq!(column_number_to_name(27).unwrap(), "AB"); + assert_eq!(column_number_to_name(MAX_COLUMNS - 1).unwrap(), "XFD"); } #[test] fn test_position_to_title() { - assert_eq!(position_to_title((1, 0)).unwrap(), String::from("A1")); - assert_eq!(position_to_title((37, 1)).unwrap(), String::from("AK2")); + assert_eq!(coordinate_to_name((0, 0)).unwrap(), "A1"); assert_eq!( - position_to_title((MAX_COLUMNS - 1, 1_000_000)).unwrap(), - String::from("XNU1000001") + coordinate_to_name((1048_575, MAX_COLUMNS - 1)).unwrap(), + "XFD1048576" ); } } From 875c7bbfea166eed58b73b593d4f4f08a3f7d1d0 Mon Sep 17 00:00:00 2001 From: Flynn <1845797+ling7334@users.noreply.github.com> Date: Tue, 9 Apr 2024 14:49:12 +0800 Subject: [PATCH 6/7] fix: change offset type i64 to eliminate type conversion, refator to eliminate unwrap and return proper error --- src/xlsx/cells_reader.rs | 217 ++++++++++++++++++++++----------------- 1 file changed, 122 insertions(+), 95 deletions(-) diff --git a/src/xlsx/cells_reader.rs b/src/xlsx/cells_reader.rs index cd6b061f..556a8931 100644 --- a/src/xlsx/cells_reader.rs +++ b/src/xlsx/cells_reader.rs @@ -7,7 +7,7 @@ use quick_xml::{ use regex::{Captures, Regex}; use super::{ - get_attribute, get_dimension, get_row, get_row_column, position_to_title, read_string, + coordinate_to_name, get_attribute, get_dimension, get_row, get_row_column, read_string, Dimensions, XlReader, }; use crate::{ @@ -43,7 +43,7 @@ pub struct XlsxCellReader<'a> { col_index: u32, buf: Vec, cell_buf: Vec, - formulas: Vec)>>, + formulas: Vec)>>, } impl<'a> XlsxCellReader<'a> { @@ -187,110 +187,137 @@ impl<'a> XlsxCellReader<'a> { self.cell_buf.clear(); match self.xml.read_event_into(&mut self.cell_buf) { Ok(Event::Start(ref e)) => { - let mut offset_map: HashMap = HashMap::new(); - let mut shared_index = None; - let mut shared_ref = None; - let shared = - get_attribute(e.attributes(), QName(b"t")).unwrap_or(None); - match shared { + match get_attribute(e.attributes(), QName(b"t")).unwrap_or(None) { Some(b"shared") => { - shared_index = Some( - String::from_utf8( - get_attribute(e.attributes(), QName(b"si"))? - .unwrap() - .to_vec(), - ) - .unwrap() - .parse::()?, - ); - match get_attribute(e.attributes(), QName(b"ref"))? { - Some(res) => { - let reference = get_dimension(res)?; - if reference.start.0 != reference.end.0 { - for i in - 0..=(reference.end.0 - reference.start.0) - { - offset_map.insert( - position_to_title(( - reference.start.0 + i, - reference.start.1, - ))?, - ( - (reference.start.0 as i64 - - pos.0 as i64 - + i as i64) - as i32, - 0, - ), - ); + // shared formula + let mut offset_map: HashMap = + HashMap::new(); + // get shared formula index + let shared_index = + match get_attribute(e.attributes(), QName(b"si"))? { + Some(res) => match std::str::from_utf8(res) { + Ok(res) => match u32::from_str_radix(res, 10) { + Ok(res) => res, + Err(e) => { + return Err(XlsxError::ParseInt(e)); + } + }, + Err(_) => { + return Err(XlsxError::Unexpected( + "si attribute must be a number", + )); } - } else if reference.start.1 != reference.end.1 { - for i in - 0..=(reference.end.1 - reference.start.1) - { - offset_map.insert( - position_to_title(( - reference.start.0, - reference.start.1 + i, - ))?, - ( - 0, - (reference.start.1 as i64 - - pos.1 as i64 - + i as i64) - as i32, - ), - ); + }, + None => { + return Err(XlsxError::Unexpected( + "si attribute is mandatory if it is shared", + )); + } + }; + // get shared formula reference + let shared_ref = + match get_attribute(e.attributes(), QName(b"ref"))? { + Some(res) => { + let reference = get_dimension(res)?; + if reference.start.0 != reference.end.0 { + for i in 0..=(reference.end.0 + - reference.start.0) + { + offset_map.insert( + coordinate_to_name(( + reference.start.0 + i, + reference.start.1, + ))?, + ( + (reference.start.0 as i64 + - pos.0 as i64 + + i as i64), + 0, + ), + ); + } + } else if reference.start.1 != reference.end.1 { + for i in 0..=(reference.end.1 + - reference.start.1) + { + offset_map.insert( + coordinate_to_name(( + reference.start.0, + reference.start.1 + i, + ))?, + ( + 0, + (reference.start.1 as i64 + - pos.1 as i64 + + i as i64), + ), + ); + } } + Some(reference) } - shared_ref = Some(reference); + None => None, + }; + + if let Some(f) = read_formula(&mut self.xml, e)? { + value = Some(f.clone()); + if let (si, true) = (shared_index, shared_ref.is_some()) + { + // original shared formula + while self.formulas.len() < si as usize { + self.formulas.push(None); + } + self.formulas.push(Some((f, offset_map))); } - None => {} } - } - _ => {} - } - if let Some(f) = read_formula(&mut self.xml, e)? { - value = Some(f.clone()); - if shared_index.is_some() && shared_ref.is_some() { - // original shared formula - while self.formulas.len() < shared_index.unwrap() as usize { - self.formulas.push(None); - } - self.formulas.push(Some((f, offset_map))); - } - } - if shared_index.is_some() && shared_ref.is_none() { - // shared formula - let cell_regex = Regex::new(r"[A-Z]+[0-9]+").unwrap(); - if let Some((f, offset)) = - self.formulas[shared_index.unwrap() as usize].clone() - { - let (row, col) = - offset.get(&position_to_title(pos)?).unwrap(); - let replacement = - |caps: &Captures| -> Result { - match get_row_column(caps[0].as_bytes()) { - Ok(cell) => { - match position_to_title(( - (cell.0 as i64 + *col as i64) as u32, - (cell.1 as i64 + *row as i64 + 1) - as u32, - )) { - Ok(name) => Ok(name), - Err(_) => Err("invalid cell reference"), + if let (si, true) = (shared_index, shared_ref.is_none()) { + // shared formula + let cell_regex = + Regex::new(r"\b[A-Z]{1,3}\d+\b").unwrap(); + if let Some((f, offset)) = + self.formulas[si as usize].clone() + { + if let Some((row, col)) = + offset.get(&coordinate_to_name(pos)?) + { + let replacement = + |caps: &Captures| -> Result { + match get_row_column(caps[0].as_bytes()) { + Ok(cell) => { + match coordinate_to_name(( + (cell.0 as i64 + *row) as u32, + (cell.1 as i64 + *col) as u32, + )) { + Ok(name) => Ok(name), + Err(e) => { + Err(e.to_string()) + } + } + } + Err(e) => Err(e.to_string()), + } + }; + + match replace_all( + &cell_regex, + f.as_str(), + &replacement, + ) { + Ok(s) => { + value = Some(s); } + Err(_) => {} } - Err(_) => Err("invalid cell reference"), } }; - match replace_all(&cell_regex, f.as_str(), &replacement) { - Ok(s) => { - value = Some(s); - } - Err(_) => {} + }; + } + _ => { + // good old formula + if let Some(f) = read_formula(&mut self.xml, e)? { + value = Some(f); } - }; + } } } Ok(Event::End(ref e)) if e.local_name().as_ref() == b"c" => break, From b92aca20059e98133699002e42881db63e56bdce Mon Sep 17 00:00:00 2001 From: Flynn <1845797+ling7334@users.noreply.github.com> Date: Wed, 10 Apr 2024 15:33:17 +0800 Subject: [PATCH 7/7] refactor: eliminate redundant assignment --- src/xlsx/cells_reader.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/xlsx/cells_reader.rs b/src/xlsx/cells_reader.rs index 556a8931..5bac833b 100644 --- a/src/xlsx/cells_reader.rs +++ b/src/xlsx/cells_reader.rs @@ -261,21 +261,20 @@ impl<'a> XlsxCellReader<'a> { if let Some(f) = read_formula(&mut self.xml, e)? { value = Some(f.clone()); - if let (si, true) = (shared_index, shared_ref.is_some()) - { + if shared_ref.is_some() { // original shared formula - while self.formulas.len() < si as usize { + while self.formulas.len() < shared_index as usize { self.formulas.push(None); } self.formulas.push(Some((f, offset_map))); } } - if let (si, true) = (shared_index, shared_ref.is_none()) { + if shared_ref.is_none() { // shared formula let cell_regex = Regex::new(r"\b[A-Z]{1,3}\d+\b").unwrap(); if let Some((f, offset)) = - self.formulas[si as usize].clone() + self.formulas[shared_index as usize].clone() { if let Some((row, col)) = offset.get(&coordinate_to_name(pos)?)