Skip to content

Commit

Permalink
Adding in ability to read merge cells from xls and xlsx files.
Browse files Browse the repository at this point in the history
  • Loading branch information
tspayne87 authored and Andrii Hetman committed May 19, 2024
1 parent 1e65739 commit b4158e2
Show file tree
Hide file tree
Showing 5 changed files with 189 additions and 4 deletions.
48 changes: 44 additions & 4 deletions src/xls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ pub struct XlsOptions {

/// A struct representing an old xls format file (CFB)
pub struct Xls<RS> {
sheets: BTreeMap<String, (Range<Data>, Range<String>)>,
sheets: BTreeMap<String, (Range<Data>, Range<String>, Vec<Dimensions>)>,
vba: Option<VbaProject>,
metadata: Metadata,
marker: PhantomData<RS>,
Expand Down Expand Up @@ -204,6 +204,24 @@ impl<RS: Read + Seek> Xls<RS> {

Ok(xls)
}

/// Gets the worksheet merge cell demensions
pub fn worksheet_merge_cells(
&mut self,
name: &str,
) -> Option<Result<Vec<Dimensions>, XlsError>> {
self.sheets.get(name).map(|r| Ok(r.2.clone()))
}

/// Get the nth worksheet. Shortcut for getting the nth
/// sheet_name, then the corresponding worksheet.
pub fn worksheet_merge_cells_at(
&mut self,
n: usize,
) -> Option<Result<Vec<Dimensions>, XlsError>> {
let name = self.sheet_names().get(n)?.to_string();
self.worksheet_merge_cells(&name)
}
}

impl<RS: Read + Seek> Reader<RS> for Xls<RS> {
Expand Down Expand Up @@ -232,7 +250,7 @@ impl<RS: Read + Seek> Reader<RS> for Xls<RS> {
fn worksheets(&mut self) -> Vec<(String, Range<Data>)> {
self.sheets
.iter()
.map(|(name, (data, _))| (name.to_owned(), data.clone()))
.map(|(name, (data, _, _))| (name.to_owned(), data.clone()))
.collect()
}

Expand Down Expand Up @@ -390,6 +408,7 @@ impl<RS: Read + Seek> Xls<RS> {
let mut cells = Vec::new();
let mut formulas = Vec::new();
let mut fmla_pos = (0, 0);
let mut merge_cells = Vec::new();
for record in records {
let r = record?;
match r.typ {
Expand All @@ -412,7 +431,8 @@ impl<RS: Read + Seek> Xls<RS> {
0x027E => cells.push(parse_rk(r.data, &self.formats, self.is_1904)?), // 638: Rk
0x00FD => cells.extend(parse_label_sst(r.data, &strings)?), // LabelSst
0x00BD => parse_mul_rk(r.data, &mut cells, &self.formats, self.is_1904)?, // 189: MulRk
0x000A => break, // 10: EOF,
0x00E5 => parse_merge_cells(r.data, &mut merge_cells)?, // 229: Merge Cells
0x000A => break, // 10: EOF,
0x0006 => {
// 6: Formula
if r.data.len() < 20 {
Expand Down Expand Up @@ -452,7 +472,7 @@ impl<RS: Read + Seek> Xls<RS> {
}
let range = Range::from_sparse(cells);
let formula = Range::from_sparse(formulas);
sheets.insert(name, (range, formula));
sheets.insert(name, (range, formula, merge_cells));
}

self.sheets = sheets;
Expand Down Expand Up @@ -629,6 +649,26 @@ fn parse_rk(r: &[u8], formats: &[CellFormat], is_1904: bool) -> Result<Cell<Data
))
}

fn parse_merge_cells(r: &[u8], merge_cells: &mut Vec<Dimensions>) -> Result<(), XlsError> {
let count = read_u16(r);

for i in 0..count {
let offset: usize = (2 + i * 8).into();

let rf = read_u16(&r[offset + 0..]);
let rl = read_u16(&r[offset + 2..]);
let cf = read_u16(&r[offset + 4..]);
let cl = read_u16(&r[offset + 6..]);

merge_cells.push(Dimensions {
start: (rf.into(), cf.into()),
end: (rl.into(), cl.into()),
})
}

Ok(())
}

fn parse_mul_rk(
r: &[u8],
cells: &mut Vec<Cell<Data>>,
Expand Down
94 changes: 94 additions & 0 deletions src/xlsx/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -757,6 +757,52 @@ impl<RS: Read + Seek> Xlsx<RS> {
data: tbl_rng,
})
}

/// Gets the worksheet merge cell demensions
pub fn worksheet_merge_cells(
&mut self,
name: &str,
) -> Option<Result<Vec<Dimensions>, XlsxError>> {
let xml = match self.sheets.iter().find(|&&(ref n, _)| n == name) {
Some(&(_, ref path)) => xml_reader(&mut self.zip, path),
None => return None,
};

xml.map(|xml_result| {
let mut xml = xml_result.unwrap();
let mut merge_cells = Vec::new();
let mut buf = Vec::new();

loop {
buf.clear();
match xml.read_event_into(&mut buf) {
Ok(Event::Start(ref e)) => match e.local_name().as_ref() {
b"mergeCells" => {
if let Ok(cells) = read_merge_cells(&mut xml) {
merge_cells = cells;
}
break;
}
_ => (),
},
Ok(Event::Eof) => break,
Err(e) => return Err(XlsxError::Xml(e)),
_ => (),
}
}
Ok(merge_cells)
})
}

/// Get the nth worksheet. Shortcut for getting the nth
/// sheet_name, then the corresponding worksheet.
pub fn worksheet_merge_cells_at(
&mut self,
n: usize,
) -> Option<Result<Vec<Dimensions>, XlsxError>> {
let name = self.sheet_names().get(n)?.to_string();
self.worksheet_merge_cells(&name)
}
}

struct InnerTableMetadata {
Expand Down Expand Up @@ -1117,6 +1163,54 @@ fn check_for_password_protected<RS: Read + Seek>(reader: &mut RS) -> Result<(),
Ok(())
}

fn read_merge_cells(xml: &mut XlReader<'_>) -> Result<Vec<Dimensions>, XlsxError> {
let mut buf = Vec::new();
let mut merge_cells = Vec::new();
loop {
buf.clear();
match xml.read_event_into(&mut buf) {
Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"mergeCell" => {
for a in e.attributes() {
match a.map_err(XlsxError::XmlAttr)? {
Attribute {
key: QName(b"ref"),
value: v,
} => {
match get_dimension(&v) {
Ok(d) => merge_cells.push(d),
Err(e) => return Err(e),
}
break;
}
_ => (),
}
}
}
Ok(Event::End(ref e)) if e.local_name().as_ref() == b"mergeCells" => {
break;
}
Ok(Event::Eof) => return Err(XlsxError::XmlEof("")),
Err(e) => return Err(XlsxError::Xml(e)),
_ => (),
}
}

Ok(merge_cells)
}

#[test]
fn test_dimensions() {
assert_eq!(get_row_column(b"A1").unwrap(), (0, 0));
assert_eq!(get_row_column(b"C107").unwrap(), (106, 2));
assert_eq!(
get_dimension(b"C2:D35").unwrap(),
Dimensions {
start: (1, 2),
end: (34, 3)
}
);
}

/// check if a char vector is a valid cell name
/// column name must be between A and XFD,
/// last char must be digit
Expand Down
Binary file added tests/merge_cells.xls
Binary file not shown.
Binary file added tests/merge_cells.xlsx
Binary file not shown.
51 changes: 51 additions & 0 deletions tests/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,33 @@ macro_rules! range_eq {
};
}

macro_rules! merge_cells_eq {
($merge_cells:expr, $right:expr) => {
for (i, item) in $right.iter().enumerate() {
assert_eq!(
$merge_cells[i].start.0, item[0].0,
"Mismatch at position ({})",
i
);
assert_eq!(
$merge_cells[i].start.1, item[0].1,
"Mismatch at position ({})",
i
);
assert_eq!(
$merge_cells[i].end.0, item[1].0,
"Mismatch at position ({})",
i
);
assert_eq!(
$merge_cells[i].end.1, item[1].1,
"Mismatch at position ({})",
i
);
}
};
}

#[test]
fn issue_2() {
setup();
Expand Down Expand Up @@ -1402,6 +1429,30 @@ fn issue_271() -> Result<(), calamine::Error> {
Ok(())
}

#[test]
fn issue_305_merge_cells() {
let path = format!("{}/tests/merge_cells.xlsx", env!("CARGO_MANIFEST_DIR"));
let mut excel: Xlsx<_> = open_workbook(&path).unwrap();
let merge_cells = excel.worksheet_merge_cells_at(0).unwrap().unwrap();

merge_cells_eq!(
merge_cells,
[[(0, 0), (0, 1)], [(1, 0), (3, 0)], [(1, 1), (3, 3)]]
);
}

#[test]
fn issue_305_merge_cells_xls() {
let path = format!("{}/tests/merge_cells.xls", env!("CARGO_MANIFEST_DIR"));
let mut excel: Xls<_> = open_workbook(&path).unwrap();
let merge_cells = excel.worksheet_merge_cells_at(0).unwrap().unwrap();

merge_cells_eq!(
merge_cells,
[[(0, 0), (0, 1)], [(1, 0), (3, 0)], [(1, 1), (3, 3)]]
);
}

// cargo test --features picture
#[test]
#[cfg(feature = "picture")]
Expand Down

0 comments on commit b4158e2

Please sign in to comment.