Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: named table data as DataRef type #464

Merged
merged 6 commits into from
Oct 3, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,7 @@ impl<T: CellType> Range<T> {
// search bounds
let row_start = cells.first().unwrap().pos.0;
let row_end = cells.last().unwrap().pos.0;
let mut col_start = std::u32::MAX;
let mut col_start = u32::MAX;
let mut col_end = 0;
for c in cells.iter().map(|c| c.pos.1) {
if c < col_start {
Expand Down Expand Up @@ -932,13 +932,13 @@ impl<'a, T: 'a + CellType> DoubleEndedIterator for Rows<'a, T> {
impl<'a, T: 'a + CellType> ExactSizeIterator for Rows<'a, T> {}

/// Struct with the key elements of a table
pub struct Table<T> {
pub struct Table<T: CellType> {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is better to refrain from adding constraints on the structs, the impl are enough, more generic and more future proof.

pub(crate) name: String,
pub(crate) sheet_name: String,
pub(crate) columns: Vec<String>,
pub(crate) data: Range<T>,
}
impl<T> Table<T> {
impl<T: CellType> Table<T> {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here, it is unnecessary I think.

/// Get the name of the table
pub fn name(&self) -> &str {
&self.name
Expand All @@ -957,6 +957,12 @@ impl<T> Table<T> {
}
}

impl<T: CellType> From<Table<T>> for Range<T> {
fn from(table: Table<T>) -> Range<T> {
table.data
}
}

/// A helper function to deserialize cell values as `i64`,
/// useful when cells may also contain invalid values (i.e. strings).
/// It applies the [`as_i64`] method to the cell value, and returns
Expand Down
77 changes: 63 additions & 14 deletions src/xlsx/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -680,6 +680,32 @@ impl<RS: Read + Seek> Xlsx<RS> {
Ok(())
}

#[inline]
fn get_table_meta(&self, table_name: &str) -> Result<TableMetadata, XlsxError> {
let match_table_meta = self
.tables
.as_ref()
.expect("Tables must be loaded before they are referenced")
.iter()
.find(|(table, ..)| table == table_name)
.ok_or_else(|| XlsxError::TableNotFound(table_name.into()))?;

let name = match_table_meta.0.to_owned();
let sheet_name = match_table_meta.1.clone();
let columns = match_table_meta.2.clone();
let dimensions = Dimensions {
start: match_table_meta.3.start,
end: match_table_meta.3.end,
};

Ok(TableMetadata {
name,
sheet_name,
columns,
dimensions,
})
}

/// Load the merged regions
pub fn load_merged_regions(&mut self) -> Result<(), XlsxError> {
if self.merged_regions.is_none() {
Expand Down Expand Up @@ -735,23 +761,39 @@ impl<RS: Read + Seek> Xlsx<RS> {
.collect()
}

/// Get the table by name
/// Get the table by name (owned)
// TODO: If retrieving multiple tables from a single sheet, get tables by sheet will be more efficient
pub fn table_by_name(&mut self, table_name: &str) -> Result<Table<Data>, XlsxError> {
let match_table_meta = self
.tables
.as_ref()
.expect("Tables must be loaded before they are referenced")
.iter()
.find(|(table, ..)| table == table_name)
.ok_or_else(|| XlsxError::TableNotFound(table_name.into()))?;
let name = match_table_meta.0.to_owned();
let sheet_name = match_table_meta.1.clone();
let columns = match_table_meta.2.clone();
let start_dim = match_table_meta.3.start;
let end_dim = match_table_meta.3.end;
let TableMetadata {
name,
sheet_name,
columns,
dimensions,
} = self.get_table_meta(table_name)?;
let Dimensions { start, end } = dimensions;
let range = self.worksheet_range(&sheet_name)?;
let tbl_rng = range.range(start_dim, end_dim);
let tbl_rng = range.range(start, end);

Ok(Table {
name,
sheet_name,
columns,
data: tbl_rng,
})
}

/// Get the table by name (ref)
pub fn table_by_name_ref(&mut self, table_name: &str) -> Result<Table<DataRef>, XlsxError> {
let TableMetadata {
name,
sheet_name,
columns,
dimensions,
} = self.get_table_meta(table_name)?;
let Dimensions { start, end } = dimensions;
let range = self.worksheet_range_ref(&sheet_name)?;
let tbl_rng = range.range(start, end);

Ok(Table {
name,
sheet_name,
Expand Down Expand Up @@ -810,6 +852,13 @@ impl<RS: Read + Seek> Xlsx<RS> {
}
}

struct TableMetadata {
name: String,
sheet_name: String,
columns: Vec<String>,
dimensions: Dimensions,
}

struct InnerTableMetadata {
display_name: String,
ref_cells: String,
Expand Down
104 changes: 104 additions & 0 deletions tests/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,110 @@ fn table() {
assert_eq!(data.get((0, 1)), Some(&Float(12.5)));
assert_eq!(data.get((1, 1)), Some(&Float(64.0)));
xls.worksheet_range_at(0).unwrap().unwrap();

// Check if owned data works
let owned_data: Range<Data> = table.into();

assert_eq!(
owned_data.get((0, 0)),
Some(&String("something".to_owned()))
);
assert_eq!(owned_data.get((1, 0)), Some(&String("else".to_owned())));
assert_eq!(owned_data.get((0, 1)), Some(&Float(12.5)));
assert_eq!(owned_data.get((1, 1)), Some(&Float(64.0)));
}

#[test]
fn table_by_ref() {
let mut xls: Xlsx<_> = wb("temperature-table.xlsx");
xls.load_tables().unwrap();
let table_names = xls.table_names();
assert_eq!(table_names[0], "Temperature");
assert_eq!(table_names[1], "OtherTable");
let table = xls
.table_by_name_ref("Temperature")
.expect("Parsing table's sheet should not error");
assert_eq!(table.name(), "Temperature");
assert_eq!(table.columns()[0], "label");
assert_eq!(table.columns()[1], "value");
let data = table.data();
assert_eq!(
data.get((0, 0))
.expect("Could not get data from table ref."),
&DataRef::SharedString("celsius")
);
assert_eq!(
data.get((1, 0))
.expect("Could not get data from table ref."),
&DataRef::SharedString("fahrenheit")
);
assert_eq!(
data.get((0, 1))
.expect("Could not get data from table ref."),
&DataRef::Float(22.2222)
);
assert_eq!(
data.get((1, 1))
.expect("Could not get data from table ref."),
&DataRef::Float(72.0)
);
// Check the second table
let table = xls
.table_by_name_ref("OtherTable")
.expect("Parsing table's sheet should not error");
assert_eq!(table.name(), "OtherTable");
assert_eq!(table.columns()[0], "label2");
assert_eq!(table.columns()[1], "value2");
let data = table.data();
assert_eq!(
data.get((0, 0))
.expect("Could not get data from table ref."),
&DataRef::SharedString("something")
);
assert_eq!(
data.get((1, 0))
.expect("Could not get data from table ref."),
&DataRef::SharedString("else")
);
assert_eq!(
data.get((0, 1))
.expect("Could not get data from table ref."),
&DataRef::Float(12.5)
);
assert_eq!(
data.get((1, 1))
.expect("Could not get data from table ref."),
&DataRef::Float(64.0)
);
xls.worksheet_range_at(0).unwrap().unwrap();

// Check if owned data works
let owned_data: Range<DataRef> = table.into();

assert_eq!(
owned_data
.get((0, 0))
.expect("Could not get data from table ref."),
&DataRef::SharedString("something")
);
assert_eq!(
owned_data
.get((1, 0))
.expect("Could not get data from table ref."),
&DataRef::SharedString("else")
);
assert_eq!(
owned_data
.get((0, 1))
.expect("Could not get data from table ref."),
&DataRef::Float(12.5)
);
assert_eq!(
owned_data
.get((1, 1))
.expect("Could not get data from table ref."),
&DataRef::Float(64.0)
);
}

#[test]
Expand Down
Loading