Skip to content

Commit

Permalink
refactor: switch to enum for header row
Browse files Browse the repository at this point in the history
  • Loading branch information
PrettyWood committed Oct 7, 2024
1 parent 0d63b39 commit b6f91e0
Show file tree
Hide file tree
Showing 7 changed files with 159 additions and 126 deletions.
6 changes: 3 additions & 3 deletions src/auto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
use crate::errors::Error;
use crate::vba::VbaProject;
use crate::{
open_workbook, open_workbook_from_rs, Data, DataRef, Metadata, Ods, Range, Reader, ReaderRef,
Xls, Xlsb, Xlsx,
open_workbook, open_workbook_from_rs, Data, DataRef, HeaderRow, Metadata, Ods, Range, Reader,
ReaderRef, Xls, Xlsb, Xlsx,
};
use std::borrow::Cow;
use std::fs::File;
Expand Down Expand Up @@ -85,7 +85,7 @@ where
Err(Error::Msg("Sheets must be created from a Path"))
}

fn with_header_row(&mut self, header_row: Option<u32>) -> &mut Self {
fn with_header_row(&mut self, header_row: HeaderRow) -> &mut Self {
match self {
Sheets::Xls(ref mut e) => {
e.with_header_row(header_row);
Expand Down
19 changes: 18 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,23 @@ pub struct Sheet {
pub visible: SheetVisible,
}

/// Row to use as header
/// By default, the first non-empty row is used as header
#[derive(Debug, Clone, Copy)]
#[non_exhaustive]
pub enum HeaderRow {
/// First non-empty row
FirstNonEmptyRow,
/// Index of the header row
Row(u32),
}

impl Default for HeaderRow {
fn default() -> Self {
HeaderRow::FirstNonEmptyRow
}
}

// FIXME `Reader` must only be seek `Seek` for `Xls::xls`. Because of the present API this limits
// the kinds of readers (other) data in formats can be read from.
/// A trait to share spreadsheets reader functions across different `FileType`s
Expand All @@ -230,7 +247,7 @@ where

/// Set header row (i.e. first row to be read)
/// If `header_row` is `None`, the first non-empty row will be used as header row
fn with_header_row(&mut self, header_row: Option<u32>) -> &mut Self;
fn with_header_row(&mut self, header_row: HeaderRow) -> &mut Self;

/// Gets `VbaProject`
fn vba_project(&mut self) -> Option<Result<Cow<'_, VbaProject>, Self::Error>>;
Expand Down
23 changes: 13 additions & 10 deletions src/ods.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use zip::read::{ZipArchive, ZipFile};
use zip::result::ZipError;

use crate::vba::VbaProject;
use crate::{Data, DataType, Metadata, Range, Reader, Sheet, SheetType, SheetVisible};
use crate::{Data, DataType, HeaderRow, Metadata, Range, Reader, Sheet, SheetType, SheetVisible};
use std::marker::PhantomData;

const MIMETYPE: &[u8] = b"application/vnd.oasis.opendocument.spreadsheet";
Expand Down Expand Up @@ -64,8 +64,9 @@ pub enum OdsError {

/// Ods reader options
#[derive(Debug, Default)]
#[non_exhaustive]
struct OdsOptions {
pub header_row: Option<u32>,
pub header_row: HeaderRow,
}

from_err!(std::io::Error, OdsError, Io);
Expand Down Expand Up @@ -173,7 +174,7 @@ where
})
}

fn with_header_row(&mut self, header_row: Option<u32>) -> &mut Self {
fn with_header_row(&mut self, header_row: HeaderRow) -> &mut Self {
self.options.header_row = header_row;
self
}
Expand All @@ -197,15 +198,17 @@ where
.0
.to_owned();

// If a header_row is defined, adjust the range
if let Some(header_row) = self.options.header_row {
if let (Some(start), Some(end)) = (sheet.start(), sheet.end()) {
return Ok(sheet.range((header_row, start.1), end));
match self.options.header_row {
HeaderRow::FirstNonEmptyRow => Ok(sheet),
HeaderRow::Row(header_row_idx) => {
// If `header_row` is a row index, adjust the range
if let (Some(start), Some(end)) = (sheet.start(), sheet.end()) {
Ok(sheet.range((header_row_idx, start.1), end))
} else {
Ok(sheet)
}
}
}

// Return the original range if no header row is set
Ok(sheet)
}

fn worksheets(&mut self) -> Vec<(String, Range<Data>)> {
Expand Down
26 changes: 14 additions & 12 deletions src/xls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ use crate::utils::read_usize;
use crate::utils::{push_column, read_f64, read_i16, read_i32, read_u16, read_u32};
use crate::vba::VbaProject;
use crate::{
Cell, CellErrorType, Data, Dimensions, Metadata, Range, Reader, Sheet, SheetType, SheetVisible,
Cell, CellErrorType, Data, Dimensions, HeaderRow, Metadata, Range, Reader, Sheet, SheetType,
SheetVisible,
};

#[derive(Debug)]
Expand Down Expand Up @@ -136,9 +137,8 @@ pub struct XlsOptions {
///
/// [code page]: https://docs.microsoft.com/en-us/windows/win32/intl/code-page-identifiers
pub force_codepage: Option<u16>,
/// Index of the header row
/// If not set, the first non-empty row is considered the header row
pub header_row: Option<u32>,
/// Row to use as header
pub header_row: HeaderRow,
}

struct SheetData {
Expand Down Expand Up @@ -234,7 +234,7 @@ impl<RS: Read + Seek> Reader<RS> for Xls<RS> {
Self::new_with_options(reader, XlsOptions::default())
}

fn with_header_row(&mut self, header_row: Option<u32>) -> &mut Self {
fn with_header_row(&mut self, header_row: HeaderRow) -> &mut Self {
self.options.header_row = header_row;
self
}
Expand All @@ -255,15 +255,17 @@ impl<RS: Read + Seek> Reader<RS> for Xls<RS> {
.map(|r| r.range.clone())
.ok_or_else(|| XlsError::WorksheetNotFound(name.into()))?;

// If a header_row is defined, adjust the range
if let Some(header_row) = self.options.header_row {
if let (Some(start), Some(end)) = (sheet.start(), sheet.end()) {
return Ok(sheet.range((header_row, start.1), end));
match self.options.header_row {
HeaderRow::FirstNonEmptyRow => Ok(sheet),
HeaderRow::Row(header_row_idx) => {
// If `header_row` is a row index, adjust the range
if let (Some(start), Some(end)) = (sheet.start(), sheet.end()) {
Ok(sheet.range((header_row_idx, start.1), end))
} else {
Ok(sheet)
}
}
}

// Return the original range if no header row is set
Ok(sheet)
}

fn worksheets(&mut self) -> Vec<(String, Range<Data>)> {
Expand Down
91 changes: 49 additions & 42 deletions src/xlsb/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ use crate::datatype::DataRef;
use crate::formats::{builtin_format_by_code, detect_custom_number_format, CellFormat};
use crate::utils::{push_column, read_f64, read_i32, read_u16, read_u32, read_usize};
use crate::vba::VbaProject;
use crate::{Cell, Data, Metadata, Range, Reader, ReaderRef, Sheet, SheetType, SheetVisible};
use crate::{
Cell, Data, HeaderRow, Metadata, Range, Reader, ReaderRef, Sheet, SheetType, SheetVisible,
};

/// A Xlsb specific error
#[derive(Debug)]
Expand Down Expand Up @@ -130,8 +132,9 @@ impl std::error::Error for XlsbError {

/// Xlsb reader options
#[derive(Debug, Default)]
#[non_exhaustive]
struct XlsbOptions {
pub header_row: Option<u32>,
pub header_row: HeaderRow,
}

/// A Xlsb reader
Expand Down Expand Up @@ -468,7 +471,7 @@ impl<RS: Read + Seek> Reader<RS> for Xlsb<RS> {
Ok(xlsb)
}

fn with_header_row(&mut self, header_row: Option<u32>) -> &mut Self {
fn with_header_row(&mut self, header_row: HeaderRow) -> &mut Self {
self.options.header_row = header_row;
self
}
Expand Down Expand Up @@ -541,52 +544,56 @@ impl<RS: Read + Seek> ReaderRef<RS> for Xlsb<RS> {
cells.reserve(len as usize);
}

// If `header_row` is set, we only add non-empty cells after the `header_row`.
if let Some(header_row) = header_row {
loop {
match cell_reader.next_cell() {
Ok(Some(Cell {
val: DataRef::Empty,
..
})) => (),
Ok(Some(cell)) => {
if cell.pos.0 >= header_row {
cells.push(cell);
}
match header_row {
HeaderRow::FirstNonEmptyRow => {
// the header row is the row of the first non-empty cell
loop {
match cell_reader.next_cell() {
Ok(Some(Cell {
val: DataRef::Empty,
..
})) => (),
Ok(Some(cell)) => cells.push(cell),
Ok(None) => break,
Err(e) => return Err(e),
}
Ok(None) => break,
Err(e) => return Err(e),
}
}
HeaderRow::Row(header_row_idx) => {
// If `header_row` is a row index, we only add non-empty cells after this index.
loop {
match cell_reader.next_cell() {
Ok(Some(Cell {
val: DataRef::Empty,
..
})) => (),
Ok(Some(cell)) => {
if cell.pos.0 >= header_row_idx {
cells.push(cell);
}
}
Ok(None) => break,
Err(e) => return Err(e),
}
}

// If `header_row` is set and the first non-empty cell is not at the `header_row`, we add
// an empty cell at the beginning with row `header_row` and same column as the first non-empty cell.
if cells.first().map_or(false, |c| c.pos.0 != header_row) {
cells.insert(
header_row as usize,
Cell {
pos: (
header_row,
cells.first().expect("cells should not be empty").pos.1,
),
val: DataRef::Empty,
},
);
}
// If `header_row` is not specified (default), the header row is the row of the first non-empty cell.
} else {
loop {
match cell_reader.next_cell() {
Ok(Some(Cell {
val: DataRef::Empty,
..
})) => (),
Ok(Some(cell)) => cells.push(cell),
Ok(None) => break,
Err(e) => return Err(e),
// If `header_row` is set and the first non-empty cell is not at the `header_row`, we add
// an empty cell at the beginning with row `header_row` and same column as the first non-empty cell.
if cells.first().map_or(false, |c| c.pos.0 != header_row_idx) {
cells.insert(
header_row_idx as usize,
Cell {
pos: (
header_row_idx,
cells.first().expect("cells should not be empty").pos.1,
),
val: DataRef::Empty,
},
);
}
}
}

Ok(Range::from_sparse(cells))
}
}
Expand Down
Loading

0 comments on commit b6f91e0

Please sign in to comment.