Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement handling of merged cells for .xlsx workbooks #226

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 68 additions & 3 deletions src/xlsx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::borrow::Cow;
use std::collections::HashMap;
use std::io::BufReader;
use std::io::{Read, Seek};
use std::iter;
use std::str::FromStr;

use log::warn;
Expand All @@ -12,7 +13,7 @@ use zip::read::{ZipArchive, ZipFile};
use zip::result::ZipError;

use crate::vba::VbaProject;
use crate::{Cell, CellErrorType, DataType, Metadata, Range, Reader, Table};
use crate::{Cell, CellErrorType, CellType, DataType, Metadata, Range, Reader, Table};

type XlsReader<'a> = XmlReader<BufReader<ZipFile<'a>>>;

Expand Down Expand Up @@ -606,6 +607,7 @@ where
{
let mut cells = Vec::new();
let mut buf = Vec::new();
let mut merge_cells = None;
'xml: loop {
buf.clear();
match xml.read_event(&mut buf) {
Expand All @@ -631,7 +633,16 @@ where
}
b"sheetData" => {
read_data(&strings, &formats, &mut xml, &mut cells)?;
break;
}
b"mergeCells" => {
let merge_count: usize = std::str::from_utf8(
get_attribute(e.attributes(), b"count")?
.ok_or(XlsxError::XmlEof("count"))?,
)
.unwrap_or("0")
.parse()?;

merge_cells = Some(read_merge_cells(&mut xml, merge_count)?);
}
_ => (),
}
Expand All @@ -641,7 +652,14 @@ where
_ => (),
}
}
Ok(Range::from_sparse(cells))

let mut range = Range::from_sparse(cells);

if let Some(ref merge_cells) = merge_cells {
write_merge_cells(merge_cells, &mut range)?;
}

Ok(range)
}

impl<RS: Read + Seek> Reader for Xlsx<RS> {
Expand Down Expand Up @@ -943,6 +961,53 @@ fn read_sheet_data(
})
}

fn read_merge_cells(
xml: &mut XlsReader<'_>,
merge_count: usize,
) -> Result<Vec<Dimensions>, XlsxError> {
let mut buf = Vec::new();
let mut merge_dimensions = Vec::with_capacity(merge_count);

loop {
buf.clear();

match xml.read_event(&mut buf) {
Ok(Event::Start(ref e)) if e.local_name() == b"mergeCell" => {
let merge_ref =
get_attribute(e.attributes(), b"ref")?.ok_or(XlsxError::XmlEof("ref"))?;
merge_dimensions.push(get_dimension(merge_ref)?);
}
Ok(Event::End(ref e)) if e.local_name() == b"mergeCells" => {
return Ok(merge_dimensions)
}
Ok(Event::Eof) => return Err(XlsxError::XmlEof("mergeCells")),
Err(e) => return Err(XlsxError::Xml(e)),
_ => (),
}
}
}

fn write_merge_cells<T>(merge_cells: &[Dimensions], range: &mut Range<T>) -> Result<(), XlsxError>
where
T: CellType,
{
for merge_cell in merge_cells {
let Dimensions { start, end } = *merge_cell;
let source_cell = range
.get_value(start)
.ok_or_else(|| {
XlsxError::Unexpected("expected start cell of merge range to be present")
})?
.clone();

for target in (start.0..=end.0).flat_map(|r| iter::repeat(r).zip(start.1..=end.1)) {
range.set_value(target, source_cell.clone());
}
}

Ok(())
}

// This tries to detect number formats that are definitely date/time formats.
// This is definitely not perfect!
fn is_custom_date_format(format: &str) -> bool {
Expand Down