diff --git a/src/xls.rs b/src/xls.rs index 7b4b7bb..f837bd7 100644 --- a/src/xls.rs +++ b/src/xls.rs @@ -139,9 +139,15 @@ pub struct XlsOptions { pub force_codepage: Option, } +struct SheetData { + range: Range, + formula: Range, + merge_cells: Vec, +} + /// A struct representing an old xls format file (CFB) pub struct Xls { - sheets: BTreeMap, Range)>, + sheets: BTreeMap, vba: Option, metadata: Metadata, marker: PhantomData, @@ -204,6 +210,19 @@ impl Xls { Ok(xls) } + + /// Gets the worksheet merge cell dimensions + pub fn worksheet_merge_cells(&self, name: &str) -> Option> { + self.sheets.get(name).map(|r| r.merge_cells.clone()) + } + + /// Get the nth worksheet. Shortcut for getting the nth + /// sheet_name, then the corresponding worksheet. + pub fn worksheet_merge_cells_at(&self, n: usize) -> Option> { + let sheet = self.metadata().sheets.get(n)?; + + self.worksheet_merge_cells(&sheet.name) + } } impl Reader for Xls { @@ -225,14 +244,14 @@ impl Reader for Xls { fn worksheet_range(&mut self, name: &str) -> Result, XlsError> { self.sheets .get(name) - .map(|r| r.0.clone()) + .map(|r| r.range.clone()) .ok_or_else(|| XlsError::WorksheetNotFound(name.into())) } fn worksheets(&mut self) -> Vec<(String, Range)> { self.sheets .iter() - .map(|(name, (data, _))| (name.to_owned(), data.clone())) + .map(|(name, sheet)| (name.to_owned(), sheet.range.clone())) .collect() } @@ -240,7 +259,7 @@ impl Reader for Xls { self.sheets .get(name) .ok_or_else(|| XlsError::WorksheetNotFound(name.into())) - .map(|r| r.1.clone()) + .map(|r| r.formula.clone()) } #[cfg(feature = "picture")] @@ -390,6 +409,7 @@ impl Xls { let mut cells = Vec::new(); let mut formulas = Vec::new(); let mut fmla_pos = (0, 0); + let mut merge_cells = Vec::new(); for record in records { let r = record?; match r.typ { @@ -412,7 +432,8 @@ impl Xls { 0x027E => cells.push(parse_rk(r.data, &self.formats, self.is_1904)?), // 638: Rk 0x00FD => cells.extend(parse_label_sst(r.data, &strings)?), // LabelSst 0x00BD => parse_mul_rk(r.data, &mut cells, &self.formats, self.is_1904)?, // 189: MulRk - 0x000A => break, // 10: EOF, + 0x00E5 => parse_merge_cells(r.data, &mut merge_cells)?, // 229: Merge Cells + 0x000A => break, // 10: EOF, 0x0006 => { // 6: Formula if r.data.len() < 20 { @@ -452,7 +473,14 @@ impl Xls { } let range = Range::from_sparse(cells); let formula = Range::from_sparse(formulas); - sheets.insert(name, (range, formula)); + sheets.insert( + name, + SheetData { + range, + formula, + merge_cells, + }, + ); } self.sheets = sheets; @@ -629,6 +657,26 @@ fn parse_rk(r: &[u8], formats: &[CellFormat], is_1904: bool) -> Result) -> Result<(), XlsError> { + let count = read_u16(r); + + for i in 0..count { + let offset: usize = (2 + i * 8).into(); + + let rf = read_u16(&r[offset + 0..]); + let rl = read_u16(&r[offset + 2..]); + let cf = read_u16(&r[offset + 4..]); + let cl = read_u16(&r[offset + 6..]); + + merge_cells.push(Dimensions { + start: (rf.into(), cf.into()), + end: (rl.into(), cl.into()), + }) + } + + Ok(()) +} + fn parse_mul_rk( r: &[u8], cells: &mut Vec>, diff --git a/src/xlsx/mod.rs b/src/xlsx/mod.rs index 7586360..b82d062 100644 --- a/src/xlsx/mod.rs +++ b/src/xlsx/mod.rs @@ -757,6 +757,55 @@ impl Xlsx { data: tbl_rng, }) } + + /// Gets the worksheet merge cell dimensions + pub fn worksheet_merge_cells( + &mut self, + name: &str, + ) -> Option, XlsxError>> { + let (_, path) = self.sheets.iter().find(|(n, _)| n == name)?; + let xml = xml_reader(&mut self.zip, path); + + xml.map(|xml| { + let mut xml = xml?; + let mut merge_cells = Vec::new(); + let mut buffer = Vec::new(); + + loop { + buffer.clear(); + + match xml.read_event_into(&mut buffer) { + Ok(Event::Start(event)) if event.local_name().as_ref() == b"mergeCells" => { + if let Ok(cells) = read_merge_cells(&mut xml) { + merge_cells = cells; + } + + break; + } + Ok(Event::Eof) => break, + Err(e) => return Err(XlsxError::Xml(e)), + _ => (), + } + } + + Ok(merge_cells) + }) + } + + /// Get the nth worksheet. Shortcut for getting the nth + /// sheet_name, then the corresponding worksheet. + pub fn worksheet_merge_cells_at( + &mut self, + n: usize, + ) -> Option, XlsxError>> { + let name = self + .metadata() + .sheets + .get(n) + .map(|sheet| sheet.name.clone())?; + + self.worksheet_merge_cells(&name) + } } struct InnerTableMetadata { @@ -1117,6 +1166,37 @@ fn check_for_password_protected(reader: &mut RS) -> Result<(), Ok(()) } +fn read_merge_cells(xml: &mut XlReader<'_>) -> Result, XlsxError> { + let mut merge_cells = Vec::new(); + + loop { + let mut buffer = Vec::new(); + + match xml.read_event_into(&mut buffer) { + Ok(Event::Start(event)) if event.local_name().as_ref() == b"mergeCell" => { + for attribute in event.attributes() { + let attribute = attribute.map_err(XlsxError::XmlAttr)?; + + if attribute.key == QName(b"ref") { + let dimensions = get_dimension(&attribute.value)?; + merge_cells.push(dimensions); + + break; + } + } + } + Ok(Event::End(event)) if event.local_name().as_ref() == b"mergeCells" => { + break; + } + Ok(Event::Eof) => return Err(XlsxError::XmlEof("")), + Err(e) => return Err(XlsxError::Xml(e)), + _ => (), + } + } + + Ok(merge_cells) +} + /// check if a char vector is a valid cell name /// column name must be between A and XFD, /// last char must be digit diff --git a/tests/merge_cells.xls b/tests/merge_cells.xls new file mode 100644 index 0000000..5af5df7 Binary files /dev/null and b/tests/merge_cells.xls differ diff --git a/tests/merge_cells.xlsx b/tests/merge_cells.xlsx new file mode 100644 index 0000000..5c78406 Binary files /dev/null and b/tests/merge_cells.xlsx differ diff --git a/tests/test.rs b/tests/test.rs index 05d6e53..d66f68b 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -1,7 +1,7 @@ use calamine::Data::{Bool, DateTime, DateTimeIso, DurationIso, Empty, Error, Float, String}; use calamine::{ - open_workbook, open_workbook_auto, DataType, ExcelDateTime, ExcelDateTimeType, Ods, Range, - Reader, Sheet, SheetType, SheetVisible, Xls, Xlsb, Xlsx, + open_workbook, open_workbook_auto, DataType, Dimensions, ExcelDateTime, ExcelDateTimeType, Ods, + Range, Reader, Sheet, SheetType, SheetVisible, Xls, Xlsb, Xlsx, }; use calamine::{CellErrorType::*, Data}; use std::collections::BTreeSet; @@ -1402,6 +1402,38 @@ fn issue_271() -> Result<(), calamine::Error> { Ok(()) } +#[test] +fn issue_305_merge_cells() { + let path = format!("{}/tests/merge_cells.xlsx", env!("CARGO_MANIFEST_DIR")); + let mut excel: Xlsx<_> = open_workbook(&path).unwrap(); + let merge_cells = excel.worksheet_merge_cells_at(0).unwrap().unwrap(); + + assert_eq!( + merge_cells, + vec![ + Dimensions::new((0, 0), (0, 1)), + Dimensions::new((1, 0), (3, 0)), + Dimensions::new((1, 1), (3, 3)) + ] + ); +} + +#[test] +fn issue_305_merge_cells_xls() { + let path = format!("{}/tests/merge_cells.xls", env!("CARGO_MANIFEST_DIR")); + let excel: Xls<_> = open_workbook(&path).unwrap(); + let merge_cells = excel.worksheet_merge_cells_at(0).unwrap(); + + assert_eq!( + merge_cells, + vec![ + Dimensions::new((0, 0), (0, 1)), + Dimensions::new((1, 0), (3, 0)), + Dimensions::new((1, 1), (3, 3)) + ] + ); +} + // cargo test --features picture #[test] #[cfg(feature = "picture")]