Skip to content

Commit

Permalink
Adding in ability to read merge cells from xls and xlsx files.
Browse files Browse the repository at this point in the history
  • Loading branch information
tspayne87 authored and Andrii Hetman committed May 24, 2024
1 parent 1e65739 commit 6b25dbc
Show file tree
Hide file tree
Showing 5 changed files with 168 additions and 8 deletions.
60 changes: 54 additions & 6 deletions src/xls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,15 @@ pub struct XlsOptions {
pub force_codepage: Option<u16>,
}

struct SheetData {
range: Range<Data>,
formula: Range<String>,
merge_cells: Vec<Dimensions>,
}

/// A struct representing an old xls format file (CFB)
pub struct Xls<RS> {
sheets: BTreeMap<String, (Range<Data>, Range<String>)>,
sheets: BTreeMap<String, SheetData>,
vba: Option<VbaProject>,
metadata: Metadata,
marker: PhantomData<RS>,
Expand Down Expand Up @@ -204,6 +210,19 @@ impl<RS: Read + Seek> Xls<RS> {

Ok(xls)
}

/// Gets the worksheet merge cell dimensions
pub fn worksheet_merge_cells(&self, name: &str) -> Option<Vec<Dimensions>> {
self.sheets.get(name).map(|r| r.merge_cells.clone())
}

/// Get the nth worksheet. Shortcut for getting the nth
/// sheet_name, then the corresponding worksheet.
pub fn worksheet_merge_cells_at(&self, n: usize) -> Option<Vec<Dimensions>> {
let sheet = self.metadata().sheets.get(n)?;

self.worksheet_merge_cells(&sheet.name)
}
}

impl<RS: Read + Seek> Reader<RS> for Xls<RS> {
Expand All @@ -225,22 +244,22 @@ impl<RS: Read + Seek> Reader<RS> for Xls<RS> {
fn worksheet_range(&mut self, name: &str) -> Result<Range<Data>, XlsError> {
self.sheets
.get(name)
.map(|r| r.0.clone())
.map(|r| r.range.clone())
.ok_or_else(|| XlsError::WorksheetNotFound(name.into()))
}

fn worksheets(&mut self) -> Vec<(String, Range<Data>)> {
self.sheets
.iter()
.map(|(name, (data, _))| (name.to_owned(), data.clone()))
.map(|(name, sheet)| (name.to_owned(), sheet.range.clone()))
.collect()
}

fn worksheet_formula(&mut self, name: &str) -> Result<Range<String>, XlsError> {
self.sheets
.get(name)
.ok_or_else(|| XlsError::WorksheetNotFound(name.into()))
.map(|r| r.1.clone())
.map(|r| r.formula.clone())
}

#[cfg(feature = "picture")]
Expand Down Expand Up @@ -390,6 +409,7 @@ impl<RS: Read + Seek> Xls<RS> {
let mut cells = Vec::new();
let mut formulas = Vec::new();
let mut fmla_pos = (0, 0);
let mut merge_cells = Vec::new();
for record in records {
let r = record?;
match r.typ {
Expand All @@ -412,7 +432,8 @@ impl<RS: Read + Seek> Xls<RS> {
0x027E => cells.push(parse_rk(r.data, &self.formats, self.is_1904)?), // 638: Rk
0x00FD => cells.extend(parse_label_sst(r.data, &strings)?), // LabelSst
0x00BD => parse_mul_rk(r.data, &mut cells, &self.formats, self.is_1904)?, // 189: MulRk
0x000A => break, // 10: EOF,
0x00E5 => parse_merge_cells(r.data, &mut merge_cells)?, // 229: Merge Cells
0x000A => break, // 10: EOF,
0x0006 => {
// 6: Formula
if r.data.len() < 20 {
Expand Down Expand Up @@ -452,7 +473,14 @@ impl<RS: Read + Seek> Xls<RS> {
}
let range = Range::from_sparse(cells);
let formula = Range::from_sparse(formulas);
sheets.insert(name, (range, formula));
sheets.insert(
name,
SheetData {
range,
formula,
merge_cells,
},
);
}

self.sheets = sheets;
Expand Down Expand Up @@ -629,6 +657,26 @@ fn parse_rk(r: &[u8], formats: &[CellFormat], is_1904: bool) -> Result<Cell<Data
))
}

fn parse_merge_cells(r: &[u8], merge_cells: &mut Vec<Dimensions>) -> Result<(), XlsError> {
let count = read_u16(r);

for i in 0..count {
let offset: usize = (2 + i * 8).into();

let rf = read_u16(&r[offset + 0..]);
let rl = read_u16(&r[offset + 2..]);
let cf = read_u16(&r[offset + 4..]);
let cl = read_u16(&r[offset + 6..]);

merge_cells.push(Dimensions {
start: (rf.into(), cf.into()),
end: (rl.into(), cl.into()),
})
}

Ok(())
}

fn parse_mul_rk(
r: &[u8],
cells: &mut Vec<Cell<Data>>,
Expand Down
80 changes: 80 additions & 0 deletions src/xlsx/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -757,6 +757,55 @@ impl<RS: Read + Seek> Xlsx<RS> {
data: tbl_rng,
})
}

/// Gets the worksheet merge cell dimensions
pub fn worksheet_merge_cells(
&mut self,
name: &str,
) -> Option<Result<Vec<Dimensions>, XlsxError>> {
let (_, path) = self.sheets.iter().find(|(n, _)| n == name)?;
let xml = xml_reader(&mut self.zip, path);

xml.map(|xml| {
let mut xml = xml?;
let mut merge_cells = Vec::new();
let mut buffer = Vec::new();

loop {
buffer.clear();

match xml.read_event_into(&mut buffer) {
Ok(Event::Start(event)) if event.local_name().as_ref() == b"mergeCells" => {
if let Ok(cells) = read_merge_cells(&mut xml) {
merge_cells = cells;
}

break;
}
Ok(Event::Eof) => break,
Err(e) => return Err(XlsxError::Xml(e)),
_ => (),
}
}

Ok(merge_cells)
})
}

/// Get the nth worksheet. Shortcut for getting the nth
/// sheet_name, then the corresponding worksheet.
pub fn worksheet_merge_cells_at(
&mut self,
n: usize,
) -> Option<Result<Vec<Dimensions>, XlsxError>> {
let name = self
.metadata()
.sheets
.get(n)
.map(|sheet| sheet.name.clone())?;

self.worksheet_merge_cells(&name)
}
}

struct InnerTableMetadata {
Expand Down Expand Up @@ -1117,6 +1166,37 @@ fn check_for_password_protected<RS: Read + Seek>(reader: &mut RS) -> Result<(),
Ok(())
}

fn read_merge_cells(xml: &mut XlReader<'_>) -> Result<Vec<Dimensions>, XlsxError> {
let mut merge_cells = Vec::new();

loop {
let mut buffer = Vec::new();

match xml.read_event_into(&mut buffer) {
Ok(Event::Start(event)) if event.local_name().as_ref() == b"mergeCell" => {
for attribute in event.attributes() {
let attribute = attribute.map_err(XlsxError::XmlAttr)?;

if attribute.key == QName(b"ref") {
let dimensions = get_dimension(&attribute.value)?;
merge_cells.push(dimensions);

break;
}
}
}
Ok(Event::End(event)) if event.local_name().as_ref() == b"mergeCells" => {
break;
}
Ok(Event::Eof) => return Err(XlsxError::XmlEof("")),
Err(e) => return Err(XlsxError::Xml(e)),
_ => (),
}
}

Ok(merge_cells)
}

/// check if a char vector is a valid cell name
/// column name must be between A and XFD,
/// last char must be digit
Expand Down
Binary file added tests/merge_cells.xls
Binary file not shown.
Binary file added tests/merge_cells.xlsx
Binary file not shown.
36 changes: 34 additions & 2 deletions tests/test.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use calamine::Data::{Bool, DateTime, DateTimeIso, DurationIso, Empty, Error, Float, String};
use calamine::{
open_workbook, open_workbook_auto, DataType, ExcelDateTime, ExcelDateTimeType, Ods, Range,
Reader, Sheet, SheetType, SheetVisible, Xls, Xlsb, Xlsx,
open_workbook, open_workbook_auto, DataType, Dimensions, ExcelDateTime, ExcelDateTimeType, Ods,
Range, Reader, Sheet, SheetType, SheetVisible, Xls, Xlsb, Xlsx,
};
use calamine::{CellErrorType::*, Data};
use std::collections::BTreeSet;
Expand Down Expand Up @@ -1402,6 +1402,38 @@ fn issue_271() -> Result<(), calamine::Error> {
Ok(())
}

#[test]
fn issue_305_merge_cells() {
let path = format!("{}/tests/merge_cells.xlsx", env!("CARGO_MANIFEST_DIR"));
let mut excel: Xlsx<_> = open_workbook(&path).unwrap();
let merge_cells = excel.worksheet_merge_cells_at(0).unwrap().unwrap();

assert_eq!(
merge_cells,
vec![
Dimensions::new((0, 0), (0, 1)),
Dimensions::new((1, 0), (3, 0)),
Dimensions::new((1, 1), (3, 3))
]
);
}

#[test]
fn issue_305_merge_cells_xls() {
let path = format!("{}/tests/merge_cells.xls", env!("CARGO_MANIFEST_DIR"));
let excel: Xls<_> = open_workbook(&path).unwrap();
let merge_cells = excel.worksheet_merge_cells_at(0).unwrap();

assert_eq!(
merge_cells,
vec![
Dimensions::new((0, 0), (0, 1)),
Dimensions::new((1, 0), (3, 0)),
Dimensions::new((1, 1), (3, 3))
]
);
}

// cargo test --features picture
#[test]
#[cfg(feature = "picture")]
Expand Down

0 comments on commit 6b25dbc

Please sign in to comment.