Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes #374 #375

Merged
merged 5 commits into from
Dec 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 87 additions & 14 deletions src/xls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@ impl<RS: Read + Seek> Xls<RS> {
let mut xtis = Vec::new();
let mut formats = BTreeMap::new();
let mut xfs = Vec::new();
let mut biff = Biff::Biff8; // Binary Interchange File Format (BIFF) version
let codepage = self.options.force_codepage.unwrap_or(1200);
let mut encoding = XlsEncoding::from_codepage(codepage)?;
#[cfg(feature = "picture")]
Expand Down Expand Up @@ -300,10 +301,15 @@ impl<RS: Read + Seek> Xls<RS> {
}
// RRTabId
0x0085 => {
let (pos, sheet) = parse_sheet_metadata(&mut r, &encoding)?;
let (pos, sheet) = parse_sheet_metadata(&mut r, &encoding, biff)?;
self.metadata.sheets.push(sheet.clone());
sheet_names.push((pos, sheet.name)); // BoundSheet8
}
// BOF
0x0809 => {
let bof = parse_bof(&mut r)?;
biff = bof.biff;
}
0x0018 => {
// Lbl for defined_names
let cch = r.data[3] as usize;
Expand Down Expand Up @@ -387,11 +393,11 @@ impl<RS: Read + Seek> Xls<RS> {
}
//0x0201 => cells.push(parse_blank(r.data)?), // 513: Blank
0x0203 => cells.push(parse_number(r.data, &self.formats, self.is_1904)?), // 515: Number
0x0204 => cells.extend(parse_label(r.data, &encoding)?), // 516: Label [MS-XLS 2.4.148]
0x0205 => cells.push(parse_bool_err(r.data)?), // 517: BoolErr
0x0204 => cells.extend(parse_label(r.data, &encoding, biff)?), // 516: Label [MS-XLS 2.4.148]
0x0205 => cells.push(parse_bool_err(r.data)?), // 517: BoolErr
0x0207 => {
// 519 String (formula value)
let val = DataType::String(parse_string(r.data, &encoding)?);
let val = DataType::String(parse_string(r.data, &encoding, biff)?);
cells.push(Cell::new(fmla_pos, val))
}
0x027E => cells.push(parse_rk(r.data, &self.formats, self.is_1904)?), // 638: Rk
Expand Down Expand Up @@ -455,10 +461,57 @@ impl<RS: Read + Seek> Xls<RS> {
}
}

/// https://learn.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/4d6a3d1e-d7c5-405f-bbae-d01e9cb79366
struct Bof {
/// Binary Interchange File Format
biff: Biff,
}

/// https://www.loc.gov/preservation/digital/formats/fdd/fdd000510.shtml#notes
#[derive(Clone, Copy)]
enum Biff {
Biff2,
Biff3,
Biff4,
Biff5,
Biff8,
// Used by MS-XLSB Workbook(2.1.7.61) or Worksheet(2.1.7.61) which are not supported yet.
// Biff12,
}

/// BOF [MS-XLS] 2.4.21
fn parse_bof(r: &mut Record<'_>) -> Result<Bof, XlsError> {
let mut dt = 0;
let biff_version = read_u16(&r.data[..2]);

if r.data.len() >= 4 {
dt = read_u16(&r.data[2..]);
};

let biff = match biff_version {
0x0200 | 0x0002 | 0x0007 => Biff::Biff2,
0x0300 => Biff::Biff3,
0x0400 => Biff::Biff4,
0x0500 => Biff::Biff5,
0x0600 => Biff::Biff8,
0 => {
if dt == 0x1000 {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wasn't able to find an example of biff5 file that would trigger this logic. Please let me know if you'd like me to remove this code.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to clarify: I'm talking about removing this code since, if we comment it out, as you've suggested earlier, we'd have no use for dt variable. Then I guess we'll need to comment out more code. I agree that following the spec is the best option, especially with no evidence that we'll ever encounter files that can trigger this logic.

Biff::Biff5
} else {
Biff::Biff8
}
}
_ => Biff::Biff8,
};

Ok(Bof { biff })
}

/// BoundSheet8 [MS-XLS 2.4.28]
fn parse_sheet_metadata(
r: &mut Record<'_>,
encoding: &XlsEncoding,
biff: Biff,
) -> Result<(usize, Sheet), XlsError> {
let pos = read_u32(r.data) as usize;
let visible = match r.data[4] & 0b0011_1111 {
Expand All @@ -485,7 +538,7 @@ fn parse_sheet_metadata(
}
};
r.data = &r.data[6..];
let name = parse_short_string(r, encoding)?;
let name = parse_short_string(r, encoding, biff)?;
let sheet_name = name
.as_bytes()
.iter()
Expand Down Expand Up @@ -628,24 +681,35 @@ fn rk_num(rk: &[u8], formats: &[CellFormat], is_1904: bool) -> DataType {
}

/// ShortXLUnicodeString [MS-XLS 2.5.240]
fn parse_short_string(r: &mut Record<'_>, encoding: &XlsEncoding) -> Result<String, XlsError> {
fn parse_short_string(
r: &mut Record<'_>,
encoding: &XlsEncoding,
biff: Biff,
) -> Result<String, XlsError> {
if r.data.len() < 2 {
return Err(XlsError::Len {
typ: "short string",
expected: 2,
found: r.data.len(),
});
}

let cch = r.data[0] as usize;
let high_byte = r.data[1] & 0x1 != 0;
r.data = &r.data[2..];
r.data = &r.data[1..];
let mut high_byte = None;

if matches!(biff, Biff::Biff8) {
high_byte = Some(r.data[0] & 0x1 != 0);
r.data = &r.data[1..];
}

let mut s = String::with_capacity(cch);
let _ = encoding.decode_to(r.data, cch, &mut s, Some(high_byte));
let _ = encoding.decode_to(r.data, cch, &mut s, high_byte);
Ok(s)
}

/// XLUnicodeString [MS-XLS 2.5.294]
fn parse_string(r: &[u8], encoding: &XlsEncoding) -> Result<String, XlsError> {
fn parse_string(r: &[u8], encoding: &XlsEncoding, biff: Biff) -> Result<String, XlsError> {
if r.len() < 4 {
return Err(XlsError::Len {
typ: "string",
Expand All @@ -654,13 +718,22 @@ fn parse_string(r: &[u8], encoding: &XlsEncoding) -> Result<String, XlsError> {
});
}
let cch = read_u16(r) as usize;
let high_byte = r[2] & 0x1 != 0;

let (high_byte, start) = match biff {
Biff::Biff2 | Biff::Biff3 | Biff::Biff4 | Biff::Biff5 => (None, 2),
_ => (Some(r[2] & 0x1 != 0), 3),
};

let mut s = String::with_capacity(cch);
let _ = encoding.decode_to(&r[3..], cch, &mut s, Some(high_byte));
let _ = encoding.decode_to(&r[start..], cch, &mut s, high_byte);
Ok(s)
}

fn parse_label(r: &[u8], encoding: &XlsEncoding) -> Result<Option<Cell<DataType>>, XlsError> {
fn parse_label(
r: &[u8],
encoding: &XlsEncoding,
biff: Biff,
) -> Result<Option<Cell<DataType>>, XlsError> {
if r.len() < 6 {
return Err(XlsError::Len {
typ: "label",
Expand All @@ -673,7 +746,7 @@ fn parse_label(r: &[u8], encoding: &XlsEncoding) -> Result<Option<Cell<DataType>
let _ixfe = read_u16(&r[4..]);
return Ok(Some(Cell::new(
(row as u32, col as u32),
DataType::String(parse_string(&r[6..], encoding)?),
DataType::String(parse_string(&r[6..], encoding, biff)?),
)));
}

Expand Down
Binary file added tests/biff5_write.xls
Binary file not shown.
20 changes: 20 additions & 0 deletions tests/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1488,3 +1488,23 @@ fn any_sheets_ods() {
]
);
}

#[test]
fn issue_374() {
let path = format!("{}/tests/biff5_write.xls", env!("CARGO_MANIFEST_DIR"));
let mut workbook: Xls<_> = open_workbook(path).unwrap();

let first_sheet_name = workbook.sheet_names().first().unwrap().to_owned();

assert_eq!("SheetJS", first_sheet_name);

let range = workbook
.worksheet_range(&first_sheet_name)
.unwrap()
.unwrap();

let second_row = range.rows().nth(1).unwrap();
let cell_text = second_row.get(3).unwrap().to_string();

assert_eq!("sheetjs", cell_text);
}