Skip to content

Commit

Permalink
add a read_parquet test
Browse files Browse the repository at this point in the history
  • Loading branch information
dougbrn committed May 14, 2024
1 parent f43551a commit 4b98df8
Showing 1 changed file with 31 additions and 0 deletions.
31 changes: 31 additions & 0 deletions tests/dask_nested/test_io.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import dask_nested as dn


def test_read_parquet(test_dataset, tmp_path):
"""test the reproducibility of read_parquet"""

# Setup a temporary directory for files
nested_save_path = tmp_path / "nested"
test_save_path = tmp_path / "test_dataset"

# Save Nested to Parquet
flat_nested = test_dataset.nested.nest.to_flat()
flat_nested.to_parquet(nested_save_path, write_index=True)

# Save Base to Parquet
test_dataset[["a", "b"]].to_parquet(test_save_path, write_index=True)

# Now read
base = dn.read_parquet(test_save_path, calculate_divisions=True)
nested = dn.read_parquet(nested_save_path, calculate_divisions=True)

base = base.add_nested(nested, "nested")

# Check the loaded dataset against the original
assert base.divisions == test_dataset.divisions # equal divisions
assert base.compute().equals(test_dataset.compute()) # equal data

# Check the flat nested datasets
base_nested_flat = base.nested.nest.to_flat().compute()
test_nested_flat = base.nested.nest.to_flat().compute()
assert base_nested_flat.equals(test_nested_flat)

0 comments on commit 4b98df8

Please sign in to comment.