Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add option to set header row #453

Merged
merged 15 commits into from
Oct 8, 2024
Prev Previous commit
Next Next commit
refactor: implement properly header_row for xlsx
  • Loading branch information
PrettyWood committed Sep 18, 2024
commit dba5b60aea363561b5a4645c03d378d862707490
66 changes: 46 additions & 20 deletions src/xlsx/mod.rs
Original file line number Diff line number Diff line change
@@ -989,28 +989,54 @@ impl<RS: Read + Seek> ReaderRef<RS> for Xlsx<RS> {
if len < 100_000 {
cells.reserve(len as usize);
}
loop {
match cell_reader.next_cell() {
Ok(Some(Cell {
val: DataRef::Empty,
..
})) => (),
Ok(Some(cell)) => cells.push(cell),
Ok(None) => break,
Err(e) => return Err(e),

// If `header_row` is set, we only add non-empty cells after the `header_row`.
if let Some(header_row) = header_row {
loop {
match cell_reader.next_cell() {
Ok(Some(Cell {
val: DataRef::Empty,
..
})) => (),
Ok(Some(cell)) => {
if cell.pos.0 >= header_row {
cells.push(cell);
}
}
Ok(None) => break,
Err(e) => return Err(e),
}
}
}

// If the first cell doesn't start at row 0, we add an empty cell
// at row 0 but still at the same column as the first cell
if header_row == Some(0) && cells.first().map_or(false, |c| c.pos.0 != 0) {
cells.insert(
0,
Cell {
pos: (0, cells.first().expect("cells should not be empty").pos.1),
val: DataRef::Empty,
},
);
// If `header_row` is set and the first non-empty cell is not at the `header_row`, we add
// an empty cell at the beginning with row `header_row` and same column as the first non-empty cell.
if cells.first().map_or(false, |c| c.pos.0 != header_row) {
cells.insert(
header_row as usize,
Cell {
pos: (
header_row,
cells.first().expect("cells should not be empty").pos.1,
),
val: DataRef::Empty,
},
);
}
// If `header_row` is not specified (default), the header row is the row of the first non-empty cell.
} else {
loop {
match cell_reader.next_cell() {
Ok(Some(Cell {
val: DataRef::Empty,
..
})) => (),
Ok(Some(cell)) => {
cells.push(cell);
}
Ok(None) => break,
Err(e) => return Err(e),
}
}
}

Ok(Range::from_sparse(cells))
3 changes: 2 additions & 1 deletion tests/test.rs
Original file line number Diff line number Diff line change
@@ -1800,6 +1800,7 @@ fn test_ref_xlsb() {
#[rstest]
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Didn't know about rstest. It seems neat.

#[case("header-row.xlsx", None, (2, 0), (9, 3), &[Empty, Empty, String("Note 1".to_string()), Empty], 32)]
#[case("header-row.xlsx", Some(0), (0, 0), (9, 3), &[Empty, Empty, Empty, Empty], 40)]
#[case("header-row.xlsx", Some(8), (8, 0), (9, 3), &[String("Columns".to_string()), String("Column A".to_string()), String("Column B".to_string()), String("Column C".to_string())], 8)]
#[case("temperature.xlsx", None, (0, 0), (2, 1), &[String("label".to_string()), String("value".to_string())], 6)]
#[case("temperature.xlsx", Some(0), (0, 0), (2, 1), &[String("label".to_string()), String("value".to_string())], 6)]
#[case("temperature-in-middle.xlsx", None, (3, 1), (5, 2), &[String("label".to_string()), String("value".to_string())], 6)]
@@ -1829,7 +1830,7 @@ fn header_row_xlsx(
.unwrap();
assert_eq!(range.start(), Some(expected_start));
assert_eq!(range.end(), Some(expected_end));
assert_eq!(range.rows().next().unwrap(), expected_first_row,);
assert_eq!(range.rows().next().unwrap(), expected_first_row);
assert_eq!(range.cells().count(), expected_total_cells);
}