Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Missing text #462

Closed
wants to merge 11 commits into from
5 changes: 5 additions & 0 deletions src/cfb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,11 @@ pub struct XlsEncoding {
}

impl XlsEncoding {
pub fn unicode() -> XlsEncoding {
XlsEncoding {
encoding: encoding_rs::UTF_16LE,
}
}
pub fn from_codepage(codepage: u16) -> Result<XlsEncoding, CfbError> {
let e = codepage::to_encoding(codepage).ok_or(CfbError::CodePageNotFound(codepage))?;
Ok(XlsEncoding { encoding: e })
Expand Down
3 changes: 2 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,8 @@ impl<T: CellType> Range<T> {
///
/// panics when a `Cell` row is lower than the first `Cell` row or
/// bigger than the last `Cell` row.
pub fn from_sparse(cells: Vec<Cell<T>>) -> Range<T> {
pub fn from_sparse(mut cells: Vec<Cell<T>>) -> Range<T> {
cells.sort_by_key(|cell| (cell.pos.0, cell.pos.1));
if cells.is_empty() {
Range::empty()
} else {
Expand Down
62 changes: 38 additions & 24 deletions src/xls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ impl<RS: Read + Seek> Xls<RS> {
let cch = r.data[3] as usize;
let cce = read_u16(&r.data[4..]) as usize;
let mut name = String::new();
read_unicode_string_no_cch(&encoding, &r.data[14..], &cch, &mut name);
read_unicode_string_no_cch(&r.data[14..], &cch, &mut name);
let rgce = &r.data[r.data.len() - cce..];
let formula = parse_defined_names(rgce)?;
defined_names.push((name, formula));
Expand Down Expand Up @@ -450,24 +450,27 @@ impl<RS: Read + Seek> Xls<RS> {
// it will appear in 0x0207 record coming next
cells.push(Cell::new(fmla_pos, val));
}
let fmla = parse_formula(
&r.data[20..],
&fmla_sheet_names,
&defined_names,
&xtis,
&encoding,
)
.unwrap_or_else(|e| {
debug!("{}", e);
format!(
"Unrecognised formula \
let fmla =
parse_formula(&r.data[20..], &fmla_sheet_names, &defined_names, &xtis)
.unwrap_or_else(|e| {
debug!("{}", e);
format!(
"Unrecognised formula \
for cell ({}, {}): {:?}",
row, col, e
)
});
row, col, e
)
});
formulas.push(Cell::new(fmla_pos, fmla));
}
_ => (),
// tests/high_byte_string.xls contains a record type that
// cannot be found in the "By Number" 2.3.2 table
0x00D6 => {
let Ok(s) = parse_label(r.data, &encoding, biff) else {
continue;
};
cells.extend(s);
}
_ => {}
}
}
let range = Range::from_sparse(cells);
Expand Down Expand Up @@ -756,14 +759,18 @@ fn parse_short_string(

/// XLUnicodeString [MS-XLS 2.5.294]
fn parse_string(r: &[u8], encoding: &XlsEncoding, biff: Biff) -> Result<String, XlsError> {
if r.len() < 4 {
if r.len() < 2 {
return Err(XlsError::Len {
typ: "string",
expected: 4,
expected: 2,
found: r.len(),
});
}
let cch = read_u16(r) as usize;
if cch == 0 {
// tests/high_byte_string.xls
return Ok(String::new());
}

let (high_byte, start) = match biff {
Biff::Biff2 | Biff::Biff3 | Biff::Biff4 | Biff::Biff5 => (None, 2),
Expand Down Expand Up @@ -819,7 +826,7 @@ fn parse_label_sst(r: &[u8], strings: &[String]) -> Result<Option<Cell<Data>>, X
}

fn parse_dimensions(r: &[u8]) -> Result<Dimensions, XlsError> {
let (rf, rl, cf, cl) = match r.len() {
let (rf, rl, mut cf, cl) = match r.len() {
10 => (
read_u16(&r[0..2]) as u32,
read_u16(&r[2..4]) as u32,
Expand All @@ -840,6 +847,12 @@ fn parse_dimensions(r: &[u8]) -> Result<Dimensions, XlsError> {
});
}
};
// 2.5.53 ColU must be <= 0xFF, if larger, reasonable to assume
// starts at 0
// tests/OOM_alloc2.xls
if 0xFF < cf || cl < cf {
cf = 0;
}
if 1 <= rl && 1 <= cl {
Ok(Dimensions {
start: (rf, cf),
Expand Down Expand Up @@ -984,8 +997,10 @@ fn read_dbcs(
Ok(s)
}

fn read_unicode_string_no_cch(encoding: &XlsEncoding, buf: &[u8], len: &usize, s: &mut String) {
encoding.decode_to(&buf[1..=*len], *len, s, Some(buf[0] & 0x1 != 0));
fn read_unicode_string_no_cch(buf: &[u8], len: &usize, s: &mut String) -> usize {
XlsEncoding::unicode()
.decode_to(&buf[1..], *len, s, Some(buf[0] & 0x1 != 0))
.1
}

struct Record<'a> {
Expand Down Expand Up @@ -1126,7 +1141,6 @@ fn parse_formula(
sheets: &[String],
names: &[(String, String)],
xtis: &[Xti],
encoding: &XlsEncoding,
) -> Result<String, XlsError> {
let mut stack = Vec::new();
let mut formula = String::with_capacity(rgce.len());
Expand Down Expand Up @@ -1245,9 +1259,9 @@ fn parse_formula(
stack.push(formula.len());
formula.push('\"');
let cch = rgce[0] as usize;
read_unicode_string_no_cch(encoding, &rgce[1..], &cch, &mut formula);
let l = read_unicode_string_no_cch(&rgce[1..], &cch, &mut formula);
formula.push('\"');
rgce = &rgce[2 + cch..];
rgce = &rgce[2 + l..];
}
0x18 => {
rgce = &rgce[5..];
Expand Down
Binary file added tests/OOM_alloc.xls
Binary file not shown.
Binary file added tests/OOM_alloc2.xls
Binary file not shown.
Binary file added tests/high_byte_string.xls
Binary file not shown.
22 changes: 22 additions & 0 deletions tests/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1900,6 +1900,28 @@ fn test_ref_xlsb() {
);
}

fn test_high_byte_strings_and_unicode_strings_without_reserved_tags() {
// file contains XLUnicodeString with cch = 0 and do not have a reserved byte tag
// as well as record types that do not seem to be present in the spec
let mut xls: Xls<_> = wb("high_byte_string.xls");
for (_, ws) in xls.worksheets() {
for (row, _, cell) in ws.used_cells() {
if row == 3 {
assert_eq!(
cell.as_string().unwrap(),
"Inside FERC's Gas Market Report monthly bidweek price file. "
);
}
}
}
}

#[test]
fn test_oom_allocation() {
let _xls: Xls<_> = wb("OOM_alloc.xls");
let _xls: Xls<_> = wb("OOM_alloc2.xls");
}

#[rstest]
#[case("single-empty.ods")]
#[case("multi-empty.ods")]
Expand Down