Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rename to nested-dask #11

Merged
merged 3 commits into from
May 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# dask-nested
# nested-dask

[![Template](https://img.shields.io/badge/Template-LINCC%20Frameworks%20Python%20Project%20Template-brightgreen)](https://lincc-ppt.readthedocs.io/en/latest/)

[![PyPI](https://img.shields.io/pypi/v/dask-nested?color=blue&logo=pypi&logoColor=white)](https://pypi.org/project/dask-nested/)
[![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/lincc-frameworks/dask-nested/smoke-test.yml)](https://github.com/lincc-frameworks/dask-nested/actions/workflows/smoke-test.yml)
[![Codecov](https://codecov.io/gh/lincc-frameworks/dask-nested/branch/main/graph/badge.svg)](https://codecov.io/gh/lincc-frameworks/dask-nested)
[![Read The Docs](https://img.shields.io/readthedocs/dask-nested)](https://dask-nested.readthedocs.io/)
[![Benchmarks](https://img.shields.io/github/actions/workflow/status/lincc-frameworks/dask-nested/asv-main.yml?label=benchmarks)](https://lincc-frameworks.github.io/dask-nested/)
[![PyPI](https://img.shields.io/pypi/v/nested-dask?color=blue&logo=pypi&logoColor=white)](https://pypi.org/project/nested-dask/)
[![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/lincc-frameworks/nested-dask/smoke-test.yml)](https://github.com/lincc-frameworks/nested-dask/actions/workflows/smoke-test.yml)
[![Codecov](https://codecov.io/gh/lincc-frameworks/nested-dask/branch/main/graph/badge.svg)](https://codecov.io/gh/lincc-frameworks/nested-dask)
[![Read The Docs](https://img.shields.io/readthedocs/nested-dask)](https://nested-dask.readthedocs.io/)
[![Benchmarks](https://img.shields.io/github/actions/workflow/status/lincc-frameworks/nested-dask/asv-main.yml?label=benchmarks)](https://lincc-frameworks.github.io/nested-dask/)

This project was automatically generated using the LINCC-Frameworks
[python-project-template](https://github.com/lincc-frameworks/python-project-template).
Expand Down
6 changes: 3 additions & 3 deletions benchmarks/asv.conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
// you know what you are doing.
"version": 1,
// The name of the project being benchmarked.
"project": "dask-nested",
"project": "nested-dask",
// The project's homepage.
"project_url": "https://github.com/lincc-frameworks/dask-nested",
"project_url": "https://github.com/lincc-frameworks/nested-dask",
// The URL or local path of the source code repository for the
// project being benchmarked.
"repo": "..",
Expand All @@ -32,7 +32,7 @@
// variable.
"environment_type": "virtualenv",
// the base URL to show a commit for the project.
"show_commit_url": "https://github.com/lincc-frameworks/dask-nested/commit/",
"show_commit_url": "https://github.com/lincc-frameworks/nested-dask/commit/",
// The Pythons you'd like to test against. If not provided, defaults
// to the current version of Python used to run `asv`.
"pythons": [
Expand Down
14 changes: 7 additions & 7 deletions benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
For more information on writing benchmarks:
https://asv.readthedocs.io/en/stable/writing_benchmarks.html."""

import dask_nested as dn
import nested_dask as nd
import nested_pandas as npd
import numpy as np
import pandas as pd
Expand Down Expand Up @@ -31,8 +31,8 @@ def _generate_benchmark_data(add_nested=True):
layer_nf = npd.NestedFrame(data=layer_data).set_index("index").sort_index()

# Convert to Dask
base_nf = dn.NestedFrame.from_nested_pandas(base_nf).repartition(npartitions=5)
layer_nf = dn.NestedFrame.from_nested_pandas(layer_nf).repartition(npartitions=50)
base_nf = nd.NestedFrame.from_nested_pandas(base_nf).repartition(npartitions=5)
layer_nf = nd.NestedFrame.from_nested_pandas(layer_nf).repartition(npartitions=50)

# Return based on add_nested
if add_nested:
Expand All @@ -47,8 +47,8 @@ class NestedFrameAddNested:

n_base = 100
layer_size = 1000
base_nf = dn.NestedFrame
layer_nf = dn.NestedFrame
base_nf = nd.NestedFrame
layer_nf = nd.NestedFrame

def setup(self):
"""Set up the benchmark environment"""
Expand All @@ -70,7 +70,7 @@ def peakmem_run(self):
class NestedFrameReduce:
"""Benchmark the NestedFrame.reduce function"""

nf = dn.NestedFrame
nf = nd.NestedFrame

def setup(self):
"""Set up the benchmark environment"""
Expand All @@ -93,7 +93,7 @@ def peakmem_run(self):
class NestedFrameQuery:
"""Benchmark the NestedFrame.query function"""

nf = dn.NestedFrame
nf = nd.NestedFrame

def setup(self):
"""Set up the benchmark environment"""
Expand Down
4 changes: 2 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
# -- Project information -----------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information

project = "dask-nested"
project = "nested-dask"
copyright = "2023, lincc-frameworks"
author = "lincc-frameworks"
release = version("dask-nested")
release = version("nested-dask")
# for example take major/minor
version = ".".join(release.split(".")[:2])

Expand Down
4 changes: 2 additions & 2 deletions docs/index.rst
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
.. dask_nested documentation main file.
.. nested_dask documentation main file.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.

Welcome to dask_nested's documentation!
Welcome to nested-dask's documentation!
========================================================================================

Dev Guide - Getting Started
Expand Down
10 changes: 5 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[project]
name = "dask-nested"
name = "nested-dask"
license = {file = "LICENSE"}
readme = "README.md"
authors = [
Expand All @@ -25,7 +25,7 @@ dependencies = [
]

[project.urls]
"Source Code" = "https://github.com/lincc-frameworks/dask-nested"
"Source Code" = "https://github.com/lincc-frameworks/nested-dask"

# On a mac, install optional dependencies with `pip install '.[dev]'` (include the single quotes)
[project.optional-dependencies]
Expand All @@ -47,7 +47,7 @@ requires = [
build-backend = "setuptools.build_meta"

[tool.setuptools_scm]
write_to = "src/dask_nested/_version.py"
write_to = "src/nested_dask/_version.py"

[tool.pytest.ini_options]
testpaths = [
Expand Down Expand Up @@ -107,7 +107,7 @@ ignore = [
"UP028", # Allow yield in for loop
]
[tool.setuptools.package-data]
dask_nested = ["py.typed"]
nested_dask = ["py.typed"]

[tool.coverage.run]
omit=["src/dask_nested/_version.py"]
omit=["src/nested_dask/_version.py"]
File renamed without changes.
File renamed without changes.
File renamed without changes.
4 changes: 2 additions & 2 deletions src/dask_nested/core.py → src/nested_dask/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def from_dask_dataframe(cls, df) -> NestedFrame:

Returns
-------
`dask_nested.NestedFrame`
`nested_dask.NestedFrame`
"""
return df.map_partitions(npd.NestedFrame)

Expand Down Expand Up @@ -163,7 +163,7 @@ def add_nested(self, nested, name, how="outer") -> NestedFrame: # type: ignore[

Returns
-------
`dask_nested.NestedFrame`
`nested_dask.NestedFrame`
"""
nested = nested.map_partitions(lambda x: pack_flat(x)).rename(name)
return self.join(nested, how=how)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from nested_pandas import datasets

import dask_nested as dn
import nested_dask as nd


def generate_data(n_base, n_layer, npartitions=1, seed=None) -> dn.NestedFrame:
def generate_data(n_base, n_layer, npartitions=1, seed=None) -> nd.NestedFrame:
"""Generates a toy dataset.

Docstring copied from nested-pandas.
Expand All @@ -28,15 +28,15 @@ def generate_data(n_base, n_layer, npartitions=1, seed=None) -> dn.NestedFrame:

Examples
--------
>>> import dask_nested as dn
>>> dn.datasets.generate_data(10,100)
>>> dn.datasets.generate_data(10, {"nested_a": 100, "nested_b": 200})
>>> import nested_dask as nd
>>> nd.datasets.generate_data(10,100)
>>> nd.datasets.generate_data(10, {"nested_a": 100, "nested_b": 200})
"""

# Use nested-pandas generator
base_nf = datasets.generate_data(n_base, n_layer, seed=seed)

# Convert to dask-nested
base_nf = dn.NestedFrame.from_nested_pandas(base_nf).repartition(npartitions=npartitions)
# Convert to nested-dask
base_nf = nd.NestedFrame.from_nested_pandas(base_nf).repartition(npartitions=npartitions)

return base_nf
File renamed without changes.
File renamed without changes.
20 changes: 10 additions & 10 deletions tests/dask_nested/conftest.py → tests/nested_dask/conftest.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import dask_nested as dn
import nested_dask as nd
import nested_pandas as npd
import numpy as np
import pytest
Expand All @@ -23,10 +23,10 @@ def test_dataset():
}
layer_nf = npd.NestedFrame(data=layer_data).set_index("index").sort_index()

base_dn = dn.NestedFrame.from_nested_pandas(base_nf, npartitions=5)
layer_dn = dn.NestedFrame.from_nested_pandas(layer_nf, npartitions=10)
base_nd = nd.NestedFrame.from_nested_pandas(base_nf, npartitions=5)
layer_nd = nd.NestedFrame.from_nested_pandas(layer_nf, npartitions=10)

return base_dn.add_nested(layer_dn, "nested")
return base_nd.add_nested(layer_nd, "nested")


@pytest.fixture
Expand All @@ -53,10 +53,10 @@ def test_dataset_with_nans():
}
layer_nf = npd.NestedFrame(data=layer_data).set_index("index")

base_dn = dn.NestedFrame.from_nested_pandas(base_nf, npartitions=5)
layer_dn = dn.NestedFrame.from_nested_pandas(layer_nf, npartitions=10)
base_nd = nd.NestedFrame.from_nested_pandas(base_nf, npartitions=5)
layer_nd = nd.NestedFrame.from_nested_pandas(layer_nf, npartitions=10)

return base_dn.add_nested(layer_dn, "nested")
return base_nd.add_nested(layer_nd, "nested")


@pytest.fixture
Expand All @@ -78,7 +78,7 @@ def test_dataset_no_add_nested():
}
layer_nf = npd.NestedFrame(data=layer_data).set_index("index")

base_dn = dn.NestedFrame.from_nested_pandas(base_nf, npartitions=5)
layer_dn = dn.NestedFrame.from_nested_pandas(layer_nf, npartitions=10)
base_nd = nd.NestedFrame.from_nested_pandas(base_nf, npartitions=5)
layer_nd = nd.NestedFrame.from_nested_pandas(layer_nf, npartitions=10)

return (base_dn, layer_dn)
return (base_nd, layer_nd)
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import dask_nested as dn
import nested_dask as nd


def test_generate_data():
"""test the dataset generator function"""

# test the seed
generate_1 = dn.datasets.generate_data(10, 100, npartitions=2, seed=1)
generate_2 = dn.datasets.generate_data(10, 100, npartitions=2, seed=1)
generate_3 = dn.datasets.generate_data(10, 100, npartitions=2, seed=2)
generate_1 = nd.datasets.generate_data(10, 100, npartitions=2, seed=1)
generate_2 = nd.datasets.generate_data(10, 100, npartitions=2, seed=1)
generate_3 = nd.datasets.generate_data(10, 100, npartitions=2, seed=2)

assert generate_1.compute().equals(generate_2.compute())
assert not generate_1.compute().equals(generate_3.compute())
Expand Down
6 changes: 3 additions & 3 deletions tests/dask_nested/test_io.py → tests/nested_dask/test_io.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import dask_nested as dn
import nested_dask as nd


def test_read_parquet(test_dataset, tmp_path):
Expand All @@ -16,8 +16,8 @@ def test_read_parquet(test_dataset, tmp_path):
test_dataset[["a", "b"]].to_parquet(test_save_path, write_index=True)

# Now read
base = dn.read_parquet(test_save_path, calculate_divisions=True)
nested = dn.read_parquet(nested_save_path, calculate_divisions=True)
base = nd.read_parquet(test_save_path, calculate_divisions=True)
nested = nd.read_parquet(nested_save_path, calculate_divisions=True)

base = base.add_nested(nested, "nested")

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import dask_nested as dn
import nested_dask as nd
import numpy as np
import pytest
from nested_pandas.series.dtype import NestedDtype
Expand Down Expand Up @@ -31,7 +31,7 @@ def test_add_nested(test_dataset_no_add_nested):
base_with_nested = base.add_nested(layer, "nested")

# Check that the result is a nestedframe
assert isinstance(base_with_nested, dn.NestedFrame)
assert isinstance(base_with_nested, nd.NestedFrame)

# Check that there's a new nested column with the correct dtype
assert "nested" in base_with_nested.columns
Expand Down Expand Up @@ -109,7 +109,7 @@ def test_to_parquet_combined(test_dataset, tmp_path):
test_dataset.to_parquet(test_save_path, by_layer=False)

# load back from parquet
loaded_dataset = dn.read_parquet(test_save_path, calculate_divisions=True)
loaded_dataset = nd.read_parquet(test_save_path, calculate_divisions=True)
# todo: file bug for this and investigate
loaded_dataset = loaded_dataset.reset_index().set_index("index")

Expand All @@ -131,8 +131,8 @@ def test_to_parquet_by_layer(test_dataset, tmp_path):
test_dataset.to_parquet(test_save_path, by_layer=True, write_index=True)

# load back from parquet
loaded_base = dn.read_parquet(test_save_path / "base", calculate_divisions=True)
loaded_nested = dn.read_parquet(test_save_path / "nested", calculate_divisions=True)
loaded_base = nd.read_parquet(test_save_path / "base", calculate_divisions=True)
loaded_nested = nd.read_parquet(test_save_path / "nested", calculate_divisions=True)

loaded_dataset = loaded_base.add_nested(loaded_nested, "nested")

Expand Down