diff --git a/src/xls.rs b/src/xls.rs index 7b4b7bb..06f6250 100644 --- a/src/xls.rs +++ b/src/xls.rs @@ -141,7 +141,7 @@ pub struct XlsOptions { /// A struct representing an old xls format file (CFB) pub struct Xls { - sheets: BTreeMap, Range)>, + sheets: BTreeMap, Range, Vec)>, vba: Option, metadata: Metadata, marker: PhantomData, @@ -204,6 +204,19 @@ impl Xls { Ok(xls) } + + /// Gets the worksheet merge cell demensions + pub fn worksheet_merge_cells(&mut self, name: &str) -> Option> { + self.sheets.get(name).map(|r| r.2.clone()) + } + + /// Get the nth worksheet. Shortcut for getting the nth + /// sheet_name, then the corresponding worksheet. + pub fn worksheet_merge_cells_at(&mut self, n: usize) -> Option> { + let name = self.sheet_names().get(n)?.to_string(); + + self.worksheet_merge_cells(&name) + } } impl Reader for Xls { @@ -232,7 +245,7 @@ impl Reader for Xls { fn worksheets(&mut self) -> Vec<(String, Range)> { self.sheets .iter() - .map(|(name, (data, _))| (name.to_owned(), data.clone())) + .map(|(name, (data, _, _))| (name.to_owned(), data.clone())) .collect() } @@ -390,6 +403,7 @@ impl Xls { let mut cells = Vec::new(); let mut formulas = Vec::new(); let mut fmla_pos = (0, 0); + let mut merge_cells = Vec::new(); for record in records { let r = record?; match r.typ { @@ -412,7 +426,8 @@ impl Xls { 0x027E => cells.push(parse_rk(r.data, &self.formats, self.is_1904)?), // 638: Rk 0x00FD => cells.extend(parse_label_sst(r.data, &strings)?), // LabelSst 0x00BD => parse_mul_rk(r.data, &mut cells, &self.formats, self.is_1904)?, // 189: MulRk - 0x000A => break, // 10: EOF, + 0x00E5 => parse_merge_cells(r.data, &mut merge_cells)?, // 229: Merge Cells + 0x000A => break, // 10: EOF, 0x0006 => { // 6: Formula if r.data.len() < 20 { @@ -452,7 +467,7 @@ impl Xls { } let range = Range::from_sparse(cells); let formula = Range::from_sparse(formulas); - sheets.insert(name, (range, formula)); + sheets.insert(name, (range, formula, merge_cells)); } self.sheets = sheets; @@ -629,6 +644,26 @@ fn parse_rk(r: &[u8], formats: &[CellFormat], is_1904: bool) -> Result) -> Result<(), XlsError> { + let count = read_u16(r); + + for i in 0..count { + let offset: usize = (2 + i * 8).into(); + + let rf = read_u16(&r[offset + 0..]); + let rl = read_u16(&r[offset + 2..]); + let cf = read_u16(&r[offset + 4..]); + let cl = read_u16(&r[offset + 6..]); + + merge_cells.push(Dimensions { + start: (rf.into(), cf.into()), + end: (rl.into(), cl.into()), + }) + } + + Ok(()) +} + fn parse_mul_rk( r: &[u8], cells: &mut Vec>, diff --git a/src/xlsx/mod.rs b/src/xlsx/mod.rs index 7586360..a54280b 100644 --- a/src/xlsx/mod.rs +++ b/src/xlsx/mod.rs @@ -757,6 +757,52 @@ impl Xlsx { data: tbl_rng, }) } + + /// Gets the worksheet merge cell demensions + pub fn worksheet_merge_cells( + &mut self, + name: &str, + ) -> Option, XlsxError>> { + let xml = match self.sheets.iter().find(|&&(ref n, _)| n == name) { + Some(&(_, ref path)) => xml_reader(&mut self.zip, path), + None => return None, + }; + + xml.map(|xml_result| { + let mut xml = xml_result.unwrap(); + let mut merge_cells = Vec::new(); + let mut buf = Vec::new(); + + loop { + buf.clear(); + match xml.read_event_into(&mut buf) { + Ok(Event::Start(ref e)) => match e.local_name().as_ref() { + b"mergeCells" => { + if let Ok(cells) = read_merge_cells(&mut xml) { + merge_cells = cells; + } + break; + } + _ => (), + }, + Ok(Event::Eof) => break, + Err(e) => return Err(XlsxError::Xml(e)), + _ => (), + } + } + Ok(merge_cells) + }) + } + + /// Get the nth worksheet. Shortcut for getting the nth + /// sheet_name, then the corresponding worksheet. + pub fn worksheet_merge_cells_at( + &mut self, + n: usize, + ) -> Option, XlsxError>> { + let name = self.sheet_names().get(n)?.to_string(); + self.worksheet_merge_cells(&name) + } } struct InnerTableMetadata { @@ -1117,6 +1163,54 @@ fn check_for_password_protected(reader: &mut RS) -> Result<(), Ok(()) } +fn read_merge_cells(xml: &mut XlReader<'_>) -> Result, XlsxError> { + let mut buf = Vec::new(); + let mut merge_cells = Vec::new(); + loop { + buf.clear(); + match xml.read_event_into(&mut buf) { + Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"mergeCell" => { + for a in e.attributes() { + match a.map_err(XlsxError::XmlAttr)? { + Attribute { + key: QName(b"ref"), + value: v, + } => { + match get_dimension(&v) { + Ok(d) => merge_cells.push(d), + Err(e) => return Err(e), + } + break; + } + _ => (), + } + } + } + Ok(Event::End(ref e)) if e.local_name().as_ref() == b"mergeCells" => { + break; + } + Ok(Event::Eof) => return Err(XlsxError::XmlEof("")), + Err(e) => return Err(XlsxError::Xml(e)), + _ => (), + } + } + + Ok(merge_cells) +} + +#[test] +fn test_dimensions() { + assert_eq!(get_row_column(b"A1").unwrap(), (0, 0)); + assert_eq!(get_row_column(b"C107").unwrap(), (106, 2)); + assert_eq!( + get_dimension(b"C2:D35").unwrap(), + Dimensions { + start: (1, 2), + end: (34, 3) + } + ); +} + /// check if a char vector is a valid cell name /// column name must be between A and XFD, /// last char must be digit diff --git a/tests/merge_cells.xls b/tests/merge_cells.xls new file mode 100644 index 0000000..5af5df7 Binary files /dev/null and b/tests/merge_cells.xls differ diff --git a/tests/merge_cells.xlsx b/tests/merge_cells.xlsx new file mode 100644 index 0000000..5c78406 Binary files /dev/null and b/tests/merge_cells.xlsx differ diff --git a/tests/test.rs b/tests/test.rs index 05d6e53..8655f74 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -32,6 +32,33 @@ macro_rules! range_eq { }; } +macro_rules! merge_cells_eq { + ($merge_cells:expr, $right:expr) => { + for (i, item) in $right.iter().enumerate() { + assert_eq!( + $merge_cells[i].start.0, item[0].0, + "Mismatch at position ({})", + i + ); + assert_eq!( + $merge_cells[i].start.1, item[0].1, + "Mismatch at position ({})", + i + ); + assert_eq!( + $merge_cells[i].end.0, item[1].0, + "Mismatch at position ({})", + i + ); + assert_eq!( + $merge_cells[i].end.1, item[1].1, + "Mismatch at position ({})", + i + ); + } + }; +} + #[test] fn issue_2() { setup(); @@ -1402,6 +1429,30 @@ fn issue_271() -> Result<(), calamine::Error> { Ok(()) } +#[test] +fn issue_305_merge_cells() { + let path = format!("{}/tests/merge_cells.xlsx", env!("CARGO_MANIFEST_DIR")); + let mut excel: Xlsx<_> = open_workbook(&path).unwrap(); + let merge_cells = excel.worksheet_merge_cells_at(0).unwrap().unwrap(); + + merge_cells_eq!( + merge_cells, + [[(0, 0), (0, 1)], [(1, 0), (3, 0)], [(1, 1), (3, 3)]] + ); +} + +#[test] +fn issue_305_merge_cells_xls() { + let path = format!("{}/tests/merge_cells.xls", env!("CARGO_MANIFEST_DIR")); + let mut excel: Xls<_> = open_workbook(&path).unwrap(); + let merge_cells = excel.worksheet_merge_cells_at(0).unwrap(); + + merge_cells_eq!( + merge_cells, + [[(0, 0), (0, 1)], [(1, 0), (3, 0)], [(1, 1), (3, 3)]] + ); +} + // cargo test --features picture #[test] #[cfg(feature = "picture")]