diff --git a/src/auto.rs b/src/auto.rs index b1d28fd..824abf1 100644 --- a/src/auto.rs +++ b/src/auto.rs @@ -3,8 +3,8 @@ use crate::errors::Error; use crate::vba::VbaProject; use crate::{ - open_workbook, open_workbook_from_rs, Data, DataRef, Metadata, Ods, Range, Reader, ReaderRef, - Xls, Xlsb, Xlsx, + open_workbook, open_workbook_from_rs, Data, DataRef, HeaderRow, Metadata, Ods, Range, Reader, + ReaderRef, Xls, Xlsb, Xlsx, }; use std::borrow::Cow; use std::fs::File; @@ -85,7 +85,7 @@ where Err(Error::Msg("Sheets must be created from a Path")) } - fn with_header_row(&mut self, header_row: Option) -> &mut Self { + fn with_header_row(&mut self, header_row: HeaderRow) -> &mut Self { match self { Sheets::Xls(ref mut e) => { e.with_header_row(header_row); diff --git a/src/lib.rs b/src/lib.rs index 7dc815c..f884932 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -215,6 +215,23 @@ pub struct Sheet { pub visible: SheetVisible, } +/// Row to use as header +/// By default, the first non-empty row is used as header +#[derive(Debug, Clone, Copy)] +#[non_exhaustive] +pub enum HeaderRow { + /// First non-empty row + FirstNonEmptyRow, + /// Index of the header row + Row(u32), +} + +impl Default for HeaderRow { + fn default() -> Self { + HeaderRow::FirstNonEmptyRow + } +} + // FIXME `Reader` must only be seek `Seek` for `Xls::xls`. Because of the present API this limits // the kinds of readers (other) data in formats can be read from. /// A trait to share spreadsheets reader functions across different `FileType`s @@ -230,7 +247,7 @@ where /// Set header row (i.e. first row to be read) /// If `header_row` is `None`, the first non-empty row will be used as header row - fn with_header_row(&mut self, header_row: Option) -> &mut Self; + fn with_header_row(&mut self, header_row: HeaderRow) -> &mut Self; /// Gets `VbaProject` fn vba_project(&mut self) -> Option, Self::Error>>; diff --git a/src/ods.rs b/src/ods.rs index d18c462..aa34a9b 100644 --- a/src/ods.rs +++ b/src/ods.rs @@ -16,7 +16,7 @@ use zip::read::{ZipArchive, ZipFile}; use zip::result::ZipError; use crate::vba::VbaProject; -use crate::{Data, DataType, Metadata, Range, Reader, Sheet, SheetType, SheetVisible}; +use crate::{Data, DataType, HeaderRow, Metadata, Range, Reader, Sheet, SheetType, SheetVisible}; use std::marker::PhantomData; const MIMETYPE: &[u8] = b"application/vnd.oasis.opendocument.spreadsheet"; @@ -64,8 +64,9 @@ pub enum OdsError { /// Ods reader options #[derive(Debug, Default)] +#[non_exhaustive] struct OdsOptions { - pub header_row: Option, + pub header_row: HeaderRow, } from_err!(std::io::Error, OdsError, Io); @@ -173,7 +174,7 @@ where }) } - fn with_header_row(&mut self, header_row: Option) -> &mut Self { + fn with_header_row(&mut self, header_row: HeaderRow) -> &mut Self { self.options.header_row = header_row; self } @@ -197,15 +198,17 @@ where .0 .to_owned(); - // If a header_row is defined, adjust the range - if let Some(header_row) = self.options.header_row { - if let (Some(start), Some(end)) = (sheet.start(), sheet.end()) { - return Ok(sheet.range((header_row, start.1), end)); + match self.options.header_row { + HeaderRow::FirstNonEmptyRow => Ok(sheet), + HeaderRow::Row(header_row_idx) => { + // If `header_row` is a row index, adjust the range + if let (Some(start), Some(end)) = (sheet.start(), sheet.end()) { + Ok(sheet.range((header_row_idx, start.1), end)) + } else { + Ok(sheet) + } } } - - // Return the original range if no header row is set - Ok(sheet) } fn worksheets(&mut self) -> Vec<(String, Range)> { diff --git a/src/xls.rs b/src/xls.rs index 761b6cc..a04fcbe 100644 --- a/src/xls.rs +++ b/src/xls.rs @@ -17,7 +17,8 @@ use crate::utils::read_usize; use crate::utils::{push_column, read_f64, read_i16, read_i32, read_u16, read_u32}; use crate::vba::VbaProject; use crate::{ - Cell, CellErrorType, Data, Dimensions, Metadata, Range, Reader, Sheet, SheetType, SheetVisible, + Cell, CellErrorType, Data, Dimensions, HeaderRow, Metadata, Range, Reader, Sheet, SheetType, + SheetVisible, }; #[derive(Debug)] @@ -136,9 +137,8 @@ pub struct XlsOptions { /// /// [code page]: https://docs.microsoft.com/en-us/windows/win32/intl/code-page-identifiers pub force_codepage: Option, - /// Index of the header row - /// If not set, the first non-empty row is considered the header row - pub header_row: Option, + /// Row to use as header + pub header_row: HeaderRow, } struct SheetData { @@ -234,7 +234,7 @@ impl Reader for Xls { Self::new_with_options(reader, XlsOptions::default()) } - fn with_header_row(&mut self, header_row: Option) -> &mut Self { + fn with_header_row(&mut self, header_row: HeaderRow) -> &mut Self { self.options.header_row = header_row; self } @@ -255,15 +255,17 @@ impl Reader for Xls { .map(|r| r.range.clone()) .ok_or_else(|| XlsError::WorksheetNotFound(name.into()))?; - // If a header_row is defined, adjust the range - if let Some(header_row) = self.options.header_row { - if let (Some(start), Some(end)) = (sheet.start(), sheet.end()) { - return Ok(sheet.range((header_row, start.1), end)); + match self.options.header_row { + HeaderRow::FirstNonEmptyRow => Ok(sheet), + HeaderRow::Row(header_row_idx) => { + // If `header_row` is a row index, adjust the range + if let (Some(start), Some(end)) = (sheet.start(), sheet.end()) { + Ok(sheet.range((header_row_idx, start.1), end)) + } else { + Ok(sheet) + } } } - - // Return the original range if no header row is set - Ok(sheet) } fn worksheets(&mut self) -> Vec<(String, Range)> { diff --git a/src/xlsb/mod.rs b/src/xlsb/mod.rs index 492ea29..3de08a4 100644 --- a/src/xlsb/mod.rs +++ b/src/xlsb/mod.rs @@ -20,7 +20,9 @@ use crate::datatype::DataRef; use crate::formats::{builtin_format_by_code, detect_custom_number_format, CellFormat}; use crate::utils::{push_column, read_f64, read_i32, read_u16, read_u32, read_usize}; use crate::vba::VbaProject; -use crate::{Cell, Data, Metadata, Range, Reader, ReaderRef, Sheet, SheetType, SheetVisible}; +use crate::{ + Cell, Data, HeaderRow, Metadata, Range, Reader, ReaderRef, Sheet, SheetType, SheetVisible, +}; /// A Xlsb specific error #[derive(Debug)] @@ -130,8 +132,9 @@ impl std::error::Error for XlsbError { /// Xlsb reader options #[derive(Debug, Default)] +#[non_exhaustive] struct XlsbOptions { - pub header_row: Option, + pub header_row: HeaderRow, } /// A Xlsb reader @@ -468,7 +471,7 @@ impl Reader for Xlsb { Ok(xlsb) } - fn with_header_row(&mut self, header_row: Option) -> &mut Self { + fn with_header_row(&mut self, header_row: HeaderRow) -> &mut Self { self.options.header_row = header_row; self } @@ -541,52 +544,56 @@ impl ReaderRef for Xlsb { cells.reserve(len as usize); } - // If `header_row` is set, we only add non-empty cells after the `header_row`. - if let Some(header_row) = header_row { - loop { - match cell_reader.next_cell() { - Ok(Some(Cell { - val: DataRef::Empty, - .. - })) => (), - Ok(Some(cell)) => { - if cell.pos.0 >= header_row { - cells.push(cell); - } + match header_row { + HeaderRow::FirstNonEmptyRow => { + // the header row is the row of the first non-empty cell + loop { + match cell_reader.next_cell() { + Ok(Some(Cell { + val: DataRef::Empty, + .. + })) => (), + Ok(Some(cell)) => cells.push(cell), + Ok(None) => break, + Err(e) => return Err(e), } - Ok(None) => break, - Err(e) => return Err(e), } } + HeaderRow::Row(header_row_idx) => { + // If `header_row` is a row index, we only add non-empty cells after this index. + loop { + match cell_reader.next_cell() { + Ok(Some(Cell { + val: DataRef::Empty, + .. + })) => (), + Ok(Some(cell)) => { + if cell.pos.0 >= header_row_idx { + cells.push(cell); + } + } + Ok(None) => break, + Err(e) => return Err(e), + } + } - // If `header_row` is set and the first non-empty cell is not at the `header_row`, we add - // an empty cell at the beginning with row `header_row` and same column as the first non-empty cell. - if cells.first().map_or(false, |c| c.pos.0 != header_row) { - cells.insert( - header_row as usize, - Cell { - pos: ( - header_row, - cells.first().expect("cells should not be empty").pos.1, - ), - val: DataRef::Empty, - }, - ); - } - // If `header_row` is not specified (default), the header row is the row of the first non-empty cell. - } else { - loop { - match cell_reader.next_cell() { - Ok(Some(Cell { - val: DataRef::Empty, - .. - })) => (), - Ok(Some(cell)) => cells.push(cell), - Ok(None) => break, - Err(e) => return Err(e), + // If `header_row` is set and the first non-empty cell is not at the `header_row`, we add + // an empty cell at the beginning with row `header_row` and same column as the first non-empty cell. + if cells.first().map_or(false, |c| c.pos.0 != header_row_idx) { + cells.insert( + header_row_idx as usize, + Cell { + pos: ( + header_row_idx, + cells.first().expect("cells should not be empty").pos.1, + ), + val: DataRef::Empty, + }, + ); } } } + Ok(Range::from_sparse(cells)) } } diff --git a/src/xlsx/mod.rs b/src/xlsx/mod.rs index 7d5d484..78d1ae9 100644 --- a/src/xlsx/mod.rs +++ b/src/xlsx/mod.rs @@ -18,8 +18,8 @@ use crate::datatype::DataRef; use crate::formats::{builtin_format_by_id, detect_custom_number_format, CellFormat}; use crate::vba::VbaProject; use crate::{ - Cell, CellErrorType, Data, Dimensions, Metadata, Range, Reader, ReaderRef, Sheet, SheetType, - SheetVisible, Table, + Cell, CellErrorType, Data, Dimensions, HeaderRow, Metadata, Range, Reader, ReaderRef, Sheet, + SheetType, SheetVisible, Table, }; pub use cells_reader::XlsxCellReader; @@ -203,8 +203,9 @@ pub struct Xlsx { /// Xlsx reader options #[derive(Debug, Default)] +#[non_exhaustive] struct XlsxOptions { - pub header_row: Option, + pub header_row: HeaderRow, } impl Xlsx { @@ -884,7 +885,7 @@ impl Reader for Xlsx { Ok(xlsx) } - fn with_header_row(&mut self, header_row: Option) -> &mut Self { + fn with_header_row(&mut self, header_row: HeaderRow) -> &mut Self { self.options.header_row = header_row; self } @@ -973,49 +974,52 @@ impl ReaderRef for Xlsx { cells.reserve(len as usize); } - // If `header_row` is set, we only add non-empty cells after the `header_row`. - if let Some(header_row) = header_row { - loop { - match cell_reader.next_cell() { - Ok(Some(Cell { - val: DataRef::Empty, - .. - })) => (), - Ok(Some(cell)) => { - if cell.pos.0 >= header_row { - cells.push(cell); - } + match header_row { + HeaderRow::FirstNonEmptyRow => { + // the header row is the row of the first non-empty cell + loop { + match cell_reader.next_cell() { + Ok(Some(Cell { + val: DataRef::Empty, + .. + })) => (), + Ok(Some(cell)) => cells.push(cell), + Ok(None) => break, + Err(e) => return Err(e), } - Ok(None) => break, - Err(e) => return Err(e), } } + HeaderRow::Row(header_row_idx) => { + // If `header_row` is a row index, we only add non-empty cells after this index. + loop { + match cell_reader.next_cell() { + Ok(Some(Cell { + val: DataRef::Empty, + .. + })) => (), + Ok(Some(cell)) => { + if cell.pos.0 >= header_row_idx { + cells.push(cell); + } + } + Ok(None) => break, + Err(e) => return Err(e), + } + } - // If `header_row` is set and the first non-empty cell is not at the `header_row`, we add - // an empty cell at the beginning with row `header_row` and same column as the first non-empty cell. - if cells.first().map_or(false, |c| c.pos.0 != header_row) { - cells.insert( - header_row as usize, - Cell { - pos: ( - header_row, - cells.first().expect("cells should not be empty").pos.1, - ), - val: DataRef::Empty, - }, - ); - } - // If `header_row` is not specified (default), the header row is the row of the first non-empty cell. - } else { - loop { - match cell_reader.next_cell() { - Ok(Some(Cell { - val: DataRef::Empty, - .. - })) => (), - Ok(Some(cell)) => cells.push(cell), - Ok(None) => break, - Err(e) => return Err(e), + // If `header_row` is set and the first non-empty cell is not at the `header_row`, we add + // an empty cell at the beginning with row `header_row` and same column as the first non-empty cell. + if cells.first().map_or(false, |c| c.pos.0 != header_row_idx) { + cells.insert( + header_row_idx as usize, + Cell { + pos: ( + header_row_idx, + cells.first().expect("cells should not be empty").pos.1, + ), + val: DataRef::Empty, + }, + ); } } } diff --git a/tests/test.rs b/tests/test.rs index fb69cc9..95a4643 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -1,8 +1,8 @@ use calamine::Data::{Bool, DateTime, DateTimeIso, DurationIso, Empty, Error, Float, Int, String}; use calamine::{ open_workbook, open_workbook_auto, DataRef, DataType, Dimensions, ExcelDateTime, - ExcelDateTimeType, Ods, Range, Reader, ReaderRef, Sheet, SheetType, SheetVisible, Xls, Xlsb, - Xlsx, + ExcelDateTimeType, HeaderRow, Ods, Range, Reader, ReaderRef, Sheet, SheetType, SheetVisible, + Xls, Xlsb, Xlsx, }; use calamine::{CellErrorType::*, Data}; use rstest::rstest; @@ -1798,16 +1798,16 @@ fn test_ref_xlsb() { } #[rstest] -#[case("header-row.xlsx", None, (2, 0), (9, 3), &[Empty, Empty, String("Note 1".to_string()), Empty], 32)] -#[case("header-row.xlsx", Some(0), (0, 0), (9, 3), &[Empty, Empty, Empty, Empty], 40)] -#[case("header-row.xlsx", Some(8), (8, 0), (9, 3), &[String("Columns".to_string()), String("Column A".to_string()), String("Column B".to_string()), String("Column C".to_string())], 8)] -#[case("temperature.xlsx", None, (0, 0), (2, 1), &[String("label".to_string()), String("value".to_string())], 6)] -#[case("temperature.xlsx", Some(0), (0, 0), (2, 1), &[String("label".to_string()), String("value".to_string())], 6)] -#[case("temperature-in-middle.xlsx", None, (3, 1), (5, 2), &[String("label".to_string()), String("value".to_string())], 6)] -#[case("temperature-in-middle.xlsx", Some(0), (0, 1), (5, 2), &[Empty, Empty], 12)] +#[case("header-row.xlsx", HeaderRow::FirstNonEmptyRow, (2, 0), (9, 3), &[Empty, Empty, String("Note 1".to_string()), Empty], 32)] +#[case("header-row.xlsx", HeaderRow::Row(0), (0, 0), (9, 3), &[Empty, Empty, Empty, Empty], 40)] +#[case("header-row.xlsx", HeaderRow::Row(8), (8, 0), (9, 3), &[String("Columns".to_string()), String("Column A".to_string()), String("Column B".to_string()), String("Column C".to_string())], 8)] +#[case("temperature.xlsx", HeaderRow::FirstNonEmptyRow, (0, 0), (2, 1), &[String("label".to_string()), String("value".to_string())], 6)] +#[case("temperature.xlsx", HeaderRow::Row(0), (0, 0), (2, 1), &[String("label".to_string()), String("value".to_string())], 6)] +#[case("temperature-in-middle.xlsx", HeaderRow::FirstNonEmptyRow, (3, 1), (5, 2), &[String("label".to_string()), String("value".to_string())], 6)] +#[case("temperature-in-middle.xlsx", HeaderRow::Row(0), (0, 1), (5, 2), &[Empty, Empty], 12)] fn test_header_row_xlsx( #[case] fixture_path: &str, - #[case] header_row: Option, + #[case] header_row: HeaderRow, #[case] expected_start: (u32, u32), #[case] expected_end: (u32, u32), #[case] expected_first_row: &[Data], @@ -1837,11 +1837,11 @@ fn test_header_row_xlsx( fn test_read_twice_with_different_header_rows() { let mut xlsx: Xlsx<_> = wb("any_sheets.xlsx"); let _ = xlsx - .with_header_row(Some(2)) + .with_header_row(HeaderRow::Row(2)) .worksheet_range("Visible") .unwrap(); let _ = xlsx - .with_header_row(Some(1)) + .with_header_row(HeaderRow::Row(1)) .worksheet_range("Visible") .unwrap(); } @@ -1882,7 +1882,7 @@ fn test_header_row_xlsb() { assert_eq!(range.rows().nth(1).unwrap(), &second_line); let range = xlsb - .with_header_row(Some(1)) + .with_header_row(HeaderRow::Row(1)) .worksheet_range("Sheet1") .unwrap(); assert_eq!(range.start(), Some((1, 0))); @@ -1926,7 +1926,7 @@ fn test_header_row_xls() { assert_eq!(range.rows().nth(1).unwrap(), &second_line); let range = xls - .with_header_row(Some(1)) + .with_header_row(HeaderRow::Row(1)) .worksheet_range("Sheet1") .unwrap(); assert_eq!(range.start(), Some((1, 0))); @@ -1956,7 +1956,7 @@ fn test_header_row_ods() { assert_eq!(range.rows().nth(2).unwrap(), &third_line); let range = ods - .with_header_row(Some(2)) + .with_header_row(HeaderRow::Row(2)) .worksheet_range("Sheet1") .unwrap(); assert_eq!(range.start(), Some((2, 0)));