Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding in ability to read merge cells from xls and xlsx files. #437

Merged
merged 1 commit into from
May 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 54 additions & 6 deletions src/xls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,15 @@ pub struct XlsOptions {
pub force_codepage: Option<u16>,
}

struct SheetData {
range: Range<Data>,
formula: Range<String>,
merge_cells: Vec<Dimensions>,
}

/// A struct representing an old xls format file (CFB)
pub struct Xls<RS> {
sheets: BTreeMap<String, (Range<Data>, Range<String>)>,
sheets: BTreeMap<String, SheetData>,
vba: Option<VbaProject>,
metadata: Metadata,
marker: PhantomData<RS>,
Expand Down Expand Up @@ -204,6 +210,19 @@ impl<RS: Read + Seek> Xls<RS> {

Ok(xls)
}

/// Gets the worksheet merge cell dimensions
pub fn worksheet_merge_cells(&self, name: &str) -> Option<Vec<Dimensions>> {
self.sheets.get(name).map(|r| r.merge_cells.clone())
}

/// Get the nth worksheet. Shortcut for getting the nth
/// sheet_name, then the corresponding worksheet.
pub fn worksheet_merge_cells_at(&self, n: usize) -> Option<Vec<Dimensions>> {
let sheet = self.metadata().sheets.get(n)?;

self.worksheet_merge_cells(&sheet.name)
}
}

impl<RS: Read + Seek> Reader<RS> for Xls<RS> {
Expand All @@ -225,22 +244,22 @@ impl<RS: Read + Seek> Reader<RS> for Xls<RS> {
fn worksheet_range(&mut self, name: &str) -> Result<Range<Data>, XlsError> {
self.sheets
.get(name)
.map(|r| r.0.clone())
.map(|r| r.range.clone())
.ok_or_else(|| XlsError::WorksheetNotFound(name.into()))
}

fn worksheets(&mut self) -> Vec<(String, Range<Data>)> {
self.sheets
.iter()
.map(|(name, (data, _))| (name.to_owned(), data.clone()))
.map(|(name, sheet)| (name.to_owned(), sheet.range.clone()))
.collect()
}

fn worksheet_formula(&mut self, name: &str) -> Result<Range<String>, XlsError> {
self.sheets
.get(name)
.ok_or_else(|| XlsError::WorksheetNotFound(name.into()))
.map(|r| r.1.clone())
.map(|r| r.formula.clone())
}

#[cfg(feature = "picture")]
Expand Down Expand Up @@ -390,6 +409,7 @@ impl<RS: Read + Seek> Xls<RS> {
let mut cells = Vec::new();
let mut formulas = Vec::new();
let mut fmla_pos = (0, 0);
let mut merge_cells = Vec::new();
for record in records {
let r = record?;
match r.typ {
Expand All @@ -412,7 +432,8 @@ impl<RS: Read + Seek> Xls<RS> {
0x027E => cells.push(parse_rk(r.data, &self.formats, self.is_1904)?), // 638: Rk
0x00FD => cells.extend(parse_label_sst(r.data, &strings)?), // LabelSst
0x00BD => parse_mul_rk(r.data, &mut cells, &self.formats, self.is_1904)?, // 189: MulRk
0x000A => break, // 10: EOF,
0x00E5 => parse_merge_cells(r.data, &mut merge_cells)?, // 229: Merge Cells
0x000A => break, // 10: EOF,
0x0006 => {
// 6: Formula
if r.data.len() < 20 {
Expand Down Expand Up @@ -452,7 +473,14 @@ impl<RS: Read + Seek> Xls<RS> {
}
let range = Range::from_sparse(cells);
let formula = Range::from_sparse(formulas);
sheets.insert(name, (range, formula));
sheets.insert(
name,
SheetData {
range,
formula,
merge_cells,
},
);
}

self.sheets = sheets;
Expand Down Expand Up @@ -629,6 +657,26 @@ fn parse_rk(r: &[u8], formats: &[CellFormat], is_1904: bool) -> Result<Cell<Data
))
}

fn parse_merge_cells(r: &[u8], merge_cells: &mut Vec<Dimensions>) -> Result<(), XlsError> {
let count = read_u16(r);

for i in 0..count {
let offset: usize = (2 + i * 8).into();

let rf = read_u16(&r[offset + 0..]);
let rl = read_u16(&r[offset + 2..]);
let cf = read_u16(&r[offset + 4..]);
let cl = read_u16(&r[offset + 6..]);

merge_cells.push(Dimensions {
start: (rf.into(), cf.into()),
end: (rl.into(), cl.into()),
})
}

Ok(())
}

fn parse_mul_rk(
r: &[u8],
cells: &mut Vec<Cell<Data>>,
Expand Down
80 changes: 80 additions & 0 deletions src/xlsx/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -757,6 +757,55 @@ impl<RS: Read + Seek> Xlsx<RS> {
data: tbl_rng,
})
}

/// Gets the worksheet merge cell dimensions
pub fn worksheet_merge_cells(
&mut self,
name: &str,
) -> Option<Result<Vec<Dimensions>, XlsxError>> {
let (_, path) = self.sheets.iter().find(|(n, _)| n == name)?;
let xml = xml_reader(&mut self.zip, path);

xml.map(|xml| {
let mut xml = xml?;
let mut merge_cells = Vec::new();
let mut buffer = Vec::new();

loop {
buffer.clear();

match xml.read_event_into(&mut buffer) {
Ok(Event::Start(event)) if event.local_name().as_ref() == b"mergeCells" => {
if let Ok(cells) = read_merge_cells(&mut xml) {
merge_cells = cells;
}

break;
}
Ok(Event::Eof) => break,
Err(e) => return Err(XlsxError::Xml(e)),
_ => (),
}
}

Ok(merge_cells)
})
}

/// Get the nth worksheet. Shortcut for getting the nth
/// sheet_name, then the corresponding worksheet.
pub fn worksheet_merge_cells_at(
&mut self,
n: usize,
) -> Option<Result<Vec<Dimensions>, XlsxError>> {
let name = self
.metadata()
.sheets
.get(n)
.map(|sheet| sheet.name.clone())?;

self.worksheet_merge_cells(&name)
}
}

struct InnerTableMetadata {
Expand Down Expand Up @@ -1117,6 +1166,37 @@ fn check_for_password_protected<RS: Read + Seek>(reader: &mut RS) -> Result<(),
Ok(())
}

fn read_merge_cells(xml: &mut XlReader<'_>) -> Result<Vec<Dimensions>, XlsxError> {
let mut merge_cells = Vec::new();

loop {
let mut buffer = Vec::new();

match xml.read_event_into(&mut buffer) {
Ok(Event::Start(event)) if event.local_name().as_ref() == b"mergeCell" => {
for attribute in event.attributes() {
let attribute = attribute.map_err(XlsxError::XmlAttr)?;

if attribute.key == QName(b"ref") {
let dimensions = get_dimension(&attribute.value)?;
merge_cells.push(dimensions);

break;
}
}
}
Ok(Event::End(event)) if event.local_name().as_ref() == b"mergeCells" => {
break;
}
Ok(Event::Eof) => return Err(XlsxError::XmlEof("")),
Err(e) => return Err(XlsxError::Xml(e)),
_ => (),
}
}

Ok(merge_cells)
}

/// check if a char vector is a valid cell name
/// column name must be between A and XFD,
/// last char must be digit
Expand Down
Binary file added tests/merge_cells.xls
Binary file not shown.
Binary file added tests/merge_cells.xlsx
Binary file not shown.
36 changes: 34 additions & 2 deletions tests/test.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use calamine::Data::{Bool, DateTime, DateTimeIso, DurationIso, Empty, Error, Float, String};
use calamine::{
open_workbook, open_workbook_auto, DataType, ExcelDateTime, ExcelDateTimeType, Ods, Range,
Reader, Sheet, SheetType, SheetVisible, Xls, Xlsb, Xlsx,
open_workbook, open_workbook_auto, DataType, Dimensions, ExcelDateTime, ExcelDateTimeType, Ods,
Range, Reader, Sheet, SheetType, SheetVisible, Xls, Xlsb, Xlsx,
};
use calamine::{CellErrorType::*, Data};
use std::collections::BTreeSet;
Expand Down Expand Up @@ -1402,6 +1402,38 @@ fn issue_271() -> Result<(), calamine::Error> {
Ok(())
}

#[test]
fn issue_305_merge_cells() {
let path = format!("{}/tests/merge_cells.xlsx", env!("CARGO_MANIFEST_DIR"));
let mut excel: Xlsx<_> = open_workbook(&path).unwrap();
let merge_cells = excel.worksheet_merge_cells_at(0).unwrap().unwrap();

assert_eq!(
merge_cells,
vec![
Dimensions::new((0, 0), (0, 1)),
Dimensions::new((1, 0), (3, 0)),
Dimensions::new((1, 1), (3, 3))
]
);
}

#[test]
fn issue_305_merge_cells_xls() {
let path = format!("{}/tests/merge_cells.xls", env!("CARGO_MANIFEST_DIR"));
let excel: Xls<_> = open_workbook(&path).unwrap();
let merge_cells = excel.worksheet_merge_cells_at(0).unwrap();

assert_eq!(
merge_cells,
vec![
Dimensions::new((0, 0), (0, 1)),
Dimensions::new((1, 0), (3, 0)),
Dimensions::new((1, 1), (3, 3))
]
);
}

// cargo test --features picture
#[test]
#[cfg(feature = "picture")]
Expand Down