Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add option to set header row #453

Merged
merged 15 commits into from
Oct 8, 2024
Merged
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ chrono = { version = "0.4", features = [
[dev-dependencies]
glob = "0.3"
env_logger = "0.11"
rstest = { version = "0.21.0", default-features = false }
serde_derive = "1.0"
sha256 = "1.3"

Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ pub use crate::errors::Error;
pub use crate::ods::{Ods, OdsError};
pub use crate::xls::{Xls, XlsError, XlsOptions};
pub use crate::xlsb::{Xlsb, XlsbError};
pub use crate::xlsx::{Xlsx, XlsxError};
pub use crate::xlsx::{Xlsx, XlsxError, XlsxOptions};

use crate::vba::VbaProject;

Expand Down
46 changes: 46 additions & 0 deletions src/xlsx/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,29 @@ pub struct Xlsx<RS> {
pictures: Option<Vec<(String, Vec<u8>)>>,
/// Merged Regions: Name, Sheet, Merged Dimensions
merged_regions: Option<Vec<(String, String, Dimensions)>>,
/// Reader options
options: XlsxOptions,
}

/// Xlsx reader options
#[derive(Debug, Default)]
pub struct XlsxOptions {
/// By default, calamine skips empty rows until a nonempty row is found
pub keep_first_empty_rows: bool,
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think a more generic approach would be to have the option to set header row?
By default None means that it'll skip all empty rows (like today) and then we could have with_header_row(self, row: u32)?
I understand that >90% of use cases are the first row but it still feels better and not necessarily more complicated to give the option to the users?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perfect will do that

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

}

impl XlsxOptions {
/// Create a new XlsxOptions
pub fn new() -> Self {
Self::default()
}

/// Avoid skipping first empty rows
pub fn with_keep_first_empty_rows(self, keep_first_empty_rows: bool) -> Self {
Self {
keep_first_empty_rows,
}
}
}

impl<RS: Read + Seek> Xlsx<RS> {
Expand Down Expand Up @@ -850,6 +873,14 @@ impl<RS: Read + Seek> Xlsx<RS> {
}
}

impl<RS> Xlsx<RS> {
/// Set reader options
pub fn with_options(mut self, options: XlsxOptions) -> Self {
self.options = options;
self
}
}

impl<RS: Read + Seek> Reader<RS> for Xlsx<RS> {
type Error = XlsxError;

Expand All @@ -867,6 +898,7 @@ impl<RS: Read + Seek> Reader<RS> for Xlsx<RS> {
#[cfg(feature = "picture")]
pictures: None,
merged_regions: None,
options: XlsxOptions::default(),
};
xlsx.read_shared_strings()?;
xlsx.read_styles()?;
Expand Down Expand Up @@ -947,6 +979,7 @@ impl<RS: Read + Seek> Reader<RS> for Xlsx<RS> {

impl<RS: Read + Seek> ReaderRef<RS> for Xlsx<RS> {
fn worksheet_range_ref<'a>(&'a mut self, name: &str) -> Result<Range<DataRef<'a>>, XlsxError> {
let keep_first_empty_rows = self.options.keep_first_empty_rows;
let mut cell_reader = match self.worksheet_cells_reader(name) {
Ok(reader) => reader,
Err(XlsxError::NotAWorksheet(typ)) => {
Expand All @@ -971,6 +1004,19 @@ impl<RS: Read + Seek> ReaderRef<RS> for Xlsx<RS> {
Err(e) => return Err(e),
}
}

// If the first cell doesn't start at row 0, we add an empty cell
// at row 0 but still at the same column as the first cell
if keep_first_empty_rows && cells.first().map_or(false, |c| c.pos.0 != 0) {
cells.insert(
0,
Cell {
pos: (0, cells.first().expect("cells should not be empty").pos.1),
val: DataRef::Empty,
},
);
}

Ok(Range::from_sparse(cells))
}
}
Expand Down
Binary file added tests/keep-empty-rows.xlsx
Binary file not shown.
Binary file added tests/temperature-in-middle.xlsx
Binary file not shown.
39 changes: 38 additions & 1 deletion tests/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@ use calamine::Data::{Bool, DateTime, DateTimeIso, DurationIso, Empty, Error, Flo
use calamine::{
open_workbook, open_workbook_auto, DataRef, DataType, Dimensions, ExcelDateTime,
ExcelDateTimeType, Ods, Range, Reader, ReaderRef, Sheet, SheetType, SheetVisible, Xls, Xlsb,
Xlsx,
Xlsx, XlsxOptions,
};
use calamine::{CellErrorType::*, Data};
use rstest::rstest;
use std::collections::BTreeSet;
use std::fs::File;
use std::io::{BufReader, Cursor};
Expand Down Expand Up @@ -1786,3 +1787,39 @@ fn test_ref_xlsb() {
]
);
}

#[rstest]
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Didn't know about rstest. It seems neat.

#[case("keep-empty-rows.xlsx", false, (2, 0), (9, 3), &[Empty, Empty, String("Note 1".to_string()), Empty], 32)]
#[case("keep-empty-rows.xlsx", true, (0, 0), (9, 3), &[Empty, Empty, Empty, Empty], 40)]
#[case("temperature.xlsx", false, (0, 0), (2, 1), &[String("label".to_string()), String("value".to_string())], 6)]
#[case("temperature.xlsx", true, (0, 0), (2, 1), &[String("label".to_string()), String("value".to_string())], 6)]
#[case("temperature-in-middle.xlsx", false, (3, 1), (5, 2), &[String("label".to_string()), String("value".to_string())], 6)]
#[case("temperature-in-middle.xlsx", true, (0, 1), (5, 2), &[Empty, Empty], 12)]
fn keep_first_empty_rows_xlsx(
#[case] fixture_path: &str,
#[case] keep_first_empty_rows: bool,
#[case] expected_start: (u32, u32),
#[case] expected_end: (u32, u32),
#[case] expected_first_row: &[Data],
#[case] expected_total_cells: usize,
) {
let excel: Xlsx<_> = wb(fixture_path);
assert_eq!(
excel.sheets_metadata(),
&[Sheet {
name: "Sheet1".to_string(),
typ: SheetType::WorkSheet,
visible: SheetVisible::Visible
},]
);

// By default empty cells are skipped so the first row is skipped
let range = excel
.with_options(XlsxOptions::new().with_keep_first_empty_rows(keep_first_empty_rows))
.worksheet_range("Sheet1")
.unwrap();
assert_eq!(range.start(), Some(expected_start));
assert_eq!(range.end(), Some(expected_end));
assert_eq!(range.rows().next().unwrap(), expected_first_row,);
assert_eq!(range.cells().count(), expected_total_cells);
}