From edf916507f93f3f7ba3df1df2126176914056f7b Mon Sep 17 00:00:00 2001 From: Wouter Doppenberg Date: Tue, 3 Sep 2024 17:10:33 +0200 Subject: [PATCH 1/6] Refactor table metadata retrieval for reuse Extracted table metadata retrieval into a separate function `get_table_meta` for code reuse and clarity. This change simplifies the `table_by_name` function and introduces a new `table_by_name_ref` function that also leverages the shared `get_table_meta` functionality. --- src/lib.rs | 2 +- src/xlsx/mod.rs | 73 ++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 61 insertions(+), 14 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index c2896edf..6354079e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -468,7 +468,7 @@ impl Range { // search bounds let row_start = cells.first().unwrap().pos.0; let row_end = cells.last().unwrap().pos.0; - let mut col_start = std::u32::MAX; + let mut col_start = u32::MAX; let mut col_end = 0; for c in cells.iter().map(|c| c.pos.1) { if c < col_start { diff --git a/src/xlsx/mod.rs b/src/xlsx/mod.rs index 452dcaa0..b5506642 100644 --- a/src/xlsx/mod.rs +++ b/src/xlsx/mod.rs @@ -680,6 +680,28 @@ impl Xlsx { Ok(()) } + fn get_table_meta(&self, table_name: &str) -> Result { + let match_table_meta = self + .tables + .as_ref() + .expect("Tables must be loaded before they are referenced") + .iter() + .find(|(table, ..)| table == table_name) + .ok_or_else(|| XlsxError::TableNotFound(table_name.into()))?; + + let name = match_table_meta.0.to_owned(); + let sheet_name = match_table_meta.1.clone(); + let columns = match_table_meta.2.clone(); + let dimensions = (match_table_meta.3.start, match_table_meta.3.end); + + Ok(TableMetadata { + name, + sheet_name, + columns, + dimensions, + }) + } + /// Load the merged regions pub fn load_merged_regions(&mut self) -> Result<(), XlsxError> { if self.merged_regions.is_none() { @@ -735,23 +757,39 @@ impl Xlsx { .collect() } - /// Get the table by name + /// Get the table by name (owned) // TODO: If retrieving multiple tables from a single sheet, get tables by sheet will be more efficient pub fn table_by_name(&mut self, table_name: &str) -> Result, XlsxError> { - let match_table_meta = self - .tables - .as_ref() - .expect("Tables must be loaded before they are referenced") - .iter() - .find(|(table, ..)| table == table_name) - .ok_or_else(|| XlsxError::TableNotFound(table_name.into()))?; - let name = match_table_meta.0.to_owned(); - let sheet_name = match_table_meta.1.clone(); - let columns = match_table_meta.2.clone(); - let start_dim = match_table_meta.3.start; - let end_dim = match_table_meta.3.end; + let TableMetadata { + name, + sheet_name, + columns, + dimensions, + } = self.get_table_meta(table_name)?; + let (start_dim, end_dim) = dimensions; let range = self.worksheet_range(&sheet_name)?; let tbl_rng = range.range(start_dim, end_dim); + + Ok(Table { + name, + sheet_name, + columns, + data: tbl_rng, + }) + } + + /// Get the table by name (ref) + pub fn table_by_name_ref(&mut self, table_name: &str) -> Result, XlsxError> { + let TableMetadata { + name, + sheet_name, + columns, + dimensions, + } = self.get_table_meta(table_name)?; + let (start_dim, end_dim) = dimensions; + let range = self.worksheet_range_ref(&sheet_name)?; + let tbl_rng = range.range(start_dim, end_dim); + Ok(Table { name, sheet_name, @@ -810,6 +848,15 @@ impl Xlsx { } } +type Dimension = (u32, u32); + +struct TableMetadata { + name: String, + sheet_name: String, + columns: Vec, + dimensions: (Dimension, Dimension), +} + struct InnerTableMetadata { display_name: String, ref_cells: String, From 6f10fcea7380fa050451a635e6a074d8a10bc0c8 Mon Sep 17 00:00:00 2001 From: Wouter Doppenberg Date: Tue, 3 Sep 2024 17:55:28 +0200 Subject: [PATCH 2/6] Tests; owned data method; duplication removed --- src/lib.rs | 5 +++++ src/xlsx/mod.rs | 18 ++++++++------- tests/test.rs | 60 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 75 insertions(+), 8 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 6354079e..3350037d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -955,6 +955,11 @@ impl Table { pub fn data(&self) -> &Range { &self.data } + + /// Get an owned range representing the data from the table (excludes column headers) + pub fn data_owned(self) -> Range { + self.data + } } /// A helper function to deserialize cell values as `i64`, diff --git a/src/xlsx/mod.rs b/src/xlsx/mod.rs index b5506642..8c7cae49 100644 --- a/src/xlsx/mod.rs +++ b/src/xlsx/mod.rs @@ -680,6 +680,7 @@ impl Xlsx { Ok(()) } + #[inline] fn get_table_meta(&self, table_name: &str) -> Result { let match_table_meta = self .tables @@ -692,7 +693,10 @@ impl Xlsx { let name = match_table_meta.0.to_owned(); let sheet_name = match_table_meta.1.clone(); let columns = match_table_meta.2.clone(); - let dimensions = (match_table_meta.3.start, match_table_meta.3.end); + let dimensions = Dimensions { + start: match_table_meta.3.start, + end: match_table_meta.3.end, + }; Ok(TableMetadata { name, @@ -766,9 +770,9 @@ impl Xlsx { columns, dimensions, } = self.get_table_meta(table_name)?; - let (start_dim, end_dim) = dimensions; + let Dimensions { start, end } = dimensions; let range = self.worksheet_range(&sheet_name)?; - let tbl_rng = range.range(start_dim, end_dim); + let tbl_rng = range.range(start, end); Ok(Table { name, @@ -786,9 +790,9 @@ impl Xlsx { columns, dimensions, } = self.get_table_meta(table_name)?; - let (start_dim, end_dim) = dimensions; + let Dimensions { start, end } = dimensions; let range = self.worksheet_range_ref(&sheet_name)?; - let tbl_rng = range.range(start_dim, end_dim); + let tbl_rng = range.range(start, end); Ok(Table { name, @@ -848,13 +852,11 @@ impl Xlsx { } } -type Dimension = (u32, u32); - struct TableMetadata { name: String, sheet_name: String, columns: Vec, - dimensions: (Dimension, Dimension), + dimensions: Dimensions, } struct InnerTableMetadata { diff --git a/tests/test.rs b/tests/test.rs index 460af0d1..6a1cf07a 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -584,6 +584,66 @@ fn table() { assert_eq!(data.get((0, 1)), Some(&Float(12.5))); assert_eq!(data.get((1, 1)), Some(&Float(64.0))); xls.worksheet_range_at(0).unwrap().unwrap(); + + // Check if owned data works + let owned_data = table.data_owned(); + + assert_eq!( + owned_data.get((0, 0)), + Some(&String("something".to_owned())) + ); + assert_eq!(owned_data.get((1, 0)), Some(&String("else".to_owned()))); + assert_eq!(owned_data.get((0, 1)), Some(&Float(12.5))); + assert_eq!(owned_data.get((1, 1)), Some(&Float(64.0))); +} + +#[test] +fn table_by_ref() { + let mut xls: Xlsx<_> = wb("temperature-table.xlsx"); + xls.load_tables().unwrap(); + let table_names = xls.table_names(); + assert_eq!(table_names[0], "Temperature"); + assert_eq!(table_names[1], "OtherTable"); + let table = xls + .table_by_name_ref("Temperature") + .expect("Parsing table's sheet should not error"); + assert_eq!(table.name(), "Temperature"); + assert_eq!(table.columns()[0], "label"); + assert_eq!(table.columns()[1], "value"); + let data = table.data(); + assert_eq!( + data.get((0, 0)) + .expect("Could not get data from table ref."), + &DataRef::SharedString("celsius") + ); + assert_eq!( + data.get((1, 0)) + .expect("Could not get data from table ref."), + &DataRef::SharedString("fahrenheit") + ); + assert_eq!( + data.get((0, 1)) + .expect("Could not get data from table ref."), + &DataRef::Float(22.2222) + ); + assert_eq!( + data.get((1, 1)) + .expect("Could not get data from table ref."), + &DataRef::Float(72.0) + ); + // Check the second table + let table = xls + .table_by_name("OtherTable") + .expect("Parsing table's sheet should not error"); + assert_eq!(table.name(), "OtherTable"); + assert_eq!(table.columns()[0], "label2"); + assert_eq!(table.columns()[1], "value2"); + let data = table.data(); + assert_eq!(data.get((0, 0)), Some(&String("something".to_owned()))); + assert_eq!(data.get((1, 0)), Some(&String("else".to_owned()))); + assert_eq!(data.get((0, 1)), Some(&Float(12.5))); + assert_eq!(data.get((1, 1)), Some(&Float(64.0))); + xls.worksheet_range_at(0).unwrap().unwrap(); } #[test] From a536d368cb38fa6d96d0558e46606f71a0a80548 Mon Sep 17 00:00:00 2001 From: Wouter Doppenberg Date: Tue, 3 Sep 2024 18:06:31 +0200 Subject: [PATCH 3/6] Refactor test to use `table_by_name_ref` and `DataRef` assertions. --- tests/test.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test.rs b/tests/test.rs index 6a1cf07a..8fb42ea4 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -633,16 +633,16 @@ fn table_by_ref() { ); // Check the second table let table = xls - .table_by_name("OtherTable") + .table_by_name_ref("OtherTable") .expect("Parsing table's sheet should not error"); assert_eq!(table.name(), "OtherTable"); assert_eq!(table.columns()[0], "label2"); assert_eq!(table.columns()[1], "value2"); let data = table.data(); - assert_eq!(data.get((0, 0)), Some(&String("something".to_owned()))); - assert_eq!(data.get((1, 0)), Some(&String("else".to_owned()))); - assert_eq!(data.get((0, 1)), Some(&Float(12.5))); - assert_eq!(data.get((1, 1)), Some(&Float(64.0))); + assert_eq!(data.get((0, 0)).expect("Could not get data from table ref."), &DataRef::SharedString("something")); + assert_eq!(data.get((1, 0)).expect("Could not get data from table ref."), &DataRef::SharedString("else")); + assert_eq!(data.get((0, 1)).expect("Could not get data from table ref."), &DataRef::Float(12.5)); + assert_eq!(data.get((1, 1)).expect("Could not get data from table ref."), &DataRef::Float(64.0)); xls.worksheet_range_at(0).unwrap().unwrap(); } From 854d025d2cc96b7e09d8a80092dd734e1d432994 Mon Sep 17 00:00:00 2001 From: Wouter Doppenberg Date: Tue, 3 Sep 2024 18:07:04 +0200 Subject: [PATCH 4/6] formatting --- tests/test.rs | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/tests/test.rs b/tests/test.rs index 8fb42ea4..10b454fc 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -639,10 +639,26 @@ fn table_by_ref() { assert_eq!(table.columns()[0], "label2"); assert_eq!(table.columns()[1], "value2"); let data = table.data(); - assert_eq!(data.get((0, 0)).expect("Could not get data from table ref."), &DataRef::SharedString("something")); - assert_eq!(data.get((1, 0)).expect("Could not get data from table ref."), &DataRef::SharedString("else")); - assert_eq!(data.get((0, 1)).expect("Could not get data from table ref."), &DataRef::Float(12.5)); - assert_eq!(data.get((1, 1)).expect("Could not get data from table ref."), &DataRef::Float(64.0)); + assert_eq!( + data.get((0, 0)) + .expect("Could not get data from table ref."), + &DataRef::SharedString("something") + ); + assert_eq!( + data.get((1, 0)) + .expect("Could not get data from table ref."), + &DataRef::SharedString("else") + ); + assert_eq!( + data.get((0, 1)) + .expect("Could not get data from table ref."), + &DataRef::Float(12.5) + ); + assert_eq!( + data.get((1, 1)) + .expect("Could not get data from table ref."), + &DataRef::Float(64.0) + ); xls.worksheet_range_at(0).unwrap().unwrap(); } From 7f6ef63bfaae72cab050f822977bcc90b22b1971 Mon Sep 17 00:00:00 2001 From: Wouter Doppenberg Date: Tue, 3 Sep 2024 21:09:02 +0200 Subject: [PATCH 5/6] Swapped `into_data` for `From` implementation --- src/lib.rs | 11 ++++++----- tests/test.rs | 30 +++++++++++++++++++++++++++++- 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 3350037d..12da3e02 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -932,13 +932,13 @@ impl<'a, T: 'a + CellType> DoubleEndedIterator for Rows<'a, T> { impl<'a, T: 'a + CellType> ExactSizeIterator for Rows<'a, T> {} /// Struct with the key elements of a table -pub struct Table { +pub struct Table { pub(crate) name: String, pub(crate) sheet_name: String, pub(crate) columns: Vec, pub(crate) data: Range, } -impl Table { +impl Table { /// Get the name of the table pub fn name(&self) -> &str { &self.name @@ -955,10 +955,11 @@ impl Table { pub fn data(&self) -> &Range { &self.data } +} - /// Get an owned range representing the data from the table (excludes column headers) - pub fn data_owned(self) -> Range { - self.data +impl From> for Range { + fn from(table: Table) -> Range { + table.data } } diff --git a/tests/test.rs b/tests/test.rs index 10b454fc..2f751890 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -586,7 +586,7 @@ fn table() { xls.worksheet_range_at(0).unwrap().unwrap(); // Check if owned data works - let owned_data = table.data_owned(); + let owned_data: Range = table.into(); assert_eq!( owned_data.get((0, 0)), @@ -660,6 +660,34 @@ fn table_by_ref() { &DataRef::Float(64.0) ); xls.worksheet_range_at(0).unwrap().unwrap(); + + // Check if owned data works + let owned_data: Range = table.into(); + + assert_eq!( + owned_data + .get((0, 0)) + .expect("Could not get data from table ref."), + &DataRef::SharedString("something") + ); + assert_eq!( + owned_data + .get((1, 0)) + .expect("Could not get data from table ref."), + &DataRef::SharedString("else") + ); + assert_eq!( + owned_data + .get((0, 1)) + .expect("Could not get data from table ref."), + &DataRef::Float(12.5) + ); + assert_eq!( + owned_data + .get((1, 1)) + .expect("Could not get data from table ref."), + &DataRef::Float(64.0) + ); } #[test] From e9d6dee1adafa7659815dd4ebbbce4efefe3a4f7 Mon Sep 17 00:00:00 2001 From: Wouter Doppenberg Date: Mon, 16 Sep 2024 08:58:43 +0200 Subject: [PATCH 6/6] Removed trait bounds to satisfy MR feedback; Fixed test --- src/lib.rs | 4 ++-- tests/test.rs | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 12da3e02..3365096b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -932,13 +932,13 @@ impl<'a, T: 'a + CellType> DoubleEndedIterator for Rows<'a, T> { impl<'a, T: 'a + CellType> ExactSizeIterator for Rows<'a, T> {} /// Struct with the key elements of a table -pub struct Table { +pub struct Table { pub(crate) name: String, pub(crate) sheet_name: String, pub(crate) columns: Vec, pub(crate) data: Range, } -impl Table { +impl Table { /// Get the name of the table pub fn name(&self) -> &str { &self.name diff --git a/tests/test.rs b/tests/test.rs index 2f751890..0c0da6b0 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -659,7 +659,6 @@ fn table_by_ref() { .expect("Could not get data from table ref."), &DataRef::Float(64.0) ); - xls.worksheet_range_at(0).unwrap().unwrap(); // Check if owned data works let owned_data: Range = table.into();