From d93cdf5b7ad0628863c1515dec8464c33fb09902 Mon Sep 17 00:00:00 2001 From: Doug Branton Date: Tue, 21 May 2024 10:29:56 -0700 Subject: [PATCH 1/3] rename to nested-dask --- README.md | 12 +++++------ benchmarks/asv.conf.json | 6 +++--- benchmarks/benchmarks.py | 14 ++++++------- docs/conf.py | 4 ++-- pyproject.toml | 4 ++-- src/{dask_nested => nested_dask}/__init__.py | 0 src/{dask_nested => nested_dask}/accessor.py | 0 src/{dask_nested => nested_dask}/backends.py | 0 src/{dask_nested => nested_dask}/core.py | 4 ++-- .../datasets/__init__.py | 0 .../datasets/generation.py | 12 +++++------ src/{dask_nested => nested_dask}/io.py | 0 src/{dask_nested => nested_dask}/py.typed | 0 .../{dask_nested => nested_dask}/conftest.py | 20 +++++++++---------- .../test_accessor.py | 0 .../test_datasets.py | 8 ++++---- tests/{dask_nested => nested_dask}/test_io.py | 6 +++--- .../test_nestedframe.py | 10 +++++----- 18 files changed, 50 insertions(+), 50 deletions(-) rename src/{dask_nested => nested_dask}/__init__.py (100%) rename src/{dask_nested => nested_dask}/accessor.py (100%) rename src/{dask_nested => nested_dask}/backends.py (100%) rename src/{dask_nested => nested_dask}/core.py (99%) rename src/{dask_nested => nested_dask}/datasets/__init__.py (100%) rename src/{dask_nested => nested_dask}/datasets/generation.py (80%) rename src/{dask_nested => nested_dask}/io.py (100%) rename src/{dask_nested => nested_dask}/py.typed (100%) rename tests/{dask_nested => nested_dask}/conftest.py (80%) rename tests/{dask_nested => nested_dask}/test_accessor.py (100%) rename tests/{dask_nested => nested_dask}/test_datasets.py (66%) rename tests/{dask_nested => nested_dask}/test_io.py (86%) rename tests/{dask_nested => nested_dask}/test_nestedframe.py (94%) diff --git a/README.md b/README.md index 4a79caf..e2dccef 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,12 @@ -# dask-nested +# nested-dask [![Template](https://img.shields.io/badge/Template-LINCC%20Frameworks%20Python%20Project%20Template-brightgreen)](https://lincc-ppt.readthedocs.io/en/latest/) -[![PyPI](https://img.shields.io/pypi/v/dask-nested?color=blue&logo=pypi&logoColor=white)](https://pypi.org/project/dask-nested/) -[![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/lincc-frameworks/dask-nested/smoke-test.yml)](https://github.com/lincc-frameworks/dask-nested/actions/workflows/smoke-test.yml) -[![Codecov](https://codecov.io/gh/lincc-frameworks/dask-nested/branch/main/graph/badge.svg)](https://codecov.io/gh/lincc-frameworks/dask-nested) -[![Read The Docs](https://img.shields.io/readthedocs/dask-nested)](https://dask-nested.readthedocs.io/) -[![Benchmarks](https://img.shields.io/github/actions/workflow/status/lincc-frameworks/dask-nested/asv-main.yml?label=benchmarks)](https://lincc-frameworks.github.io/dask-nested/) +[![PyPI](https://img.shields.io/pypi/v/nested-dask?color=blue&logo=pypi&logoColor=white)](https://pypi.org/project/nested-dask/) +[![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/lincc-frameworks/nested-dask/smoke-test.yml)](https://github.com/lincc-frameworks/nested-dask/actions/workflows/smoke-test.yml) +[![Codecov](https://codecov.io/gh/lincc-frameworks/nested-dask/branch/main/graph/badge.svg)](https://codecov.io/gh/lincc-frameworks/nested-dask) +[![Read The Docs](https://img.shields.io/readthedocs/nested-dask)](https://nested-dask.readthedocs.io/) +[![Benchmarks](https://img.shields.io/github/actions/workflow/status/lincc-frameworks/nested-dask/asv-main.yml?label=benchmarks)](https://lincc-frameworks.github.io/nested-dask/) This project was automatically generated using the LINCC-Frameworks [python-project-template](https://github.com/lincc-frameworks/python-project-template). diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json index 0c45d53..26d93fa 100644 --- a/benchmarks/asv.conf.json +++ b/benchmarks/asv.conf.json @@ -3,9 +3,9 @@ // you know what you are doing. "version": 1, // The name of the project being benchmarked. - "project": "dask-nested", + "project": "nested-dask", // The project's homepage. - "project_url": "https://github.com/lincc-frameworks/dask-nested", + "project_url": "https://github.com/lincc-frameworks/nested-dask", // The URL or local path of the source code repository for the // project being benchmarked. "repo": "..", @@ -32,7 +32,7 @@ // variable. "environment_type": "virtualenv", // the base URL to show a commit for the project. - "show_commit_url": "https://github.com/lincc-frameworks/dask-nested/commit/", + "show_commit_url": "https://github.com/lincc-frameworks/nested-dask/commit/", // The Pythons you'd like to test against. If not provided, defaults // to the current version of Python used to run `asv`. "pythons": [ diff --git a/benchmarks/benchmarks.py b/benchmarks/benchmarks.py index c07f397..2978651 100644 --- a/benchmarks/benchmarks.py +++ b/benchmarks/benchmarks.py @@ -3,7 +3,7 @@ For more information on writing benchmarks: https://asv.readthedocs.io/en/stable/writing_benchmarks.html.""" -import dask_nested as dn +import nested_dask as nd import nested_pandas as npd import numpy as np import pandas as pd @@ -31,8 +31,8 @@ def _generate_benchmark_data(add_nested=True): layer_nf = npd.NestedFrame(data=layer_data).set_index("index").sort_index() # Convert to Dask - base_nf = dn.NestedFrame.from_nested_pandas(base_nf).repartition(npartitions=5) - layer_nf = dn.NestedFrame.from_nested_pandas(layer_nf).repartition(npartitions=50) + base_nf = nd.NestedFrame.from_nested_pandas(base_nf).repartition(npartitions=5) + layer_nf = nd.NestedFrame.from_nested_pandas(layer_nf).repartition(npartitions=50) # Return based on add_nested if add_nested: @@ -47,8 +47,8 @@ class NestedFrameAddNested: n_base = 100 layer_size = 1000 - base_nf = dn.NestedFrame - layer_nf = dn.NestedFrame + base_nf = nd.NestedFrame + layer_nf = nd.NestedFrame def setup(self): """Set up the benchmark environment""" @@ -70,7 +70,7 @@ def peakmem_run(self): class NestedFrameReduce: """Benchmark the NestedFrame.reduce function""" - nf = dn.NestedFrame + nf = nd.NestedFrame def setup(self): """Set up the benchmark environment""" @@ -93,7 +93,7 @@ def peakmem_run(self): class NestedFrameQuery: """Benchmark the NestedFrame.query function""" - nf = dn.NestedFrame + nf = nd.NestedFrame def setup(self): """Set up the benchmark environment""" diff --git a/docs/conf.py b/docs/conf.py index 4109e2a..0f7b5c4 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -14,10 +14,10 @@ # -- Project information ----------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information -project = "dask-nested" +project = "nested-dask" copyright = "2023, lincc-frameworks" author = "lincc-frameworks" -release = version("dask-nested") +release = version("nested-dask") # for example take major/minor version = ".".join(release.split(".")[:2]) diff --git a/pyproject.toml b/pyproject.toml index bf8e911..d5ac265 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [project] -name = "dask-nested" +name = "nested-dask" license = {file = "LICENSE"} readme = "README.md" authors = [ @@ -25,7 +25,7 @@ dependencies = [ ] [project.urls] -"Source Code" = "https://github.com/lincc-frameworks/dask-nested" +"Source Code" = "https://github.com/lincc-frameworks/nested-dask" # On a mac, install optional dependencies with `pip install '.[dev]'` (include the single quotes) [project.optional-dependencies] diff --git a/src/dask_nested/__init__.py b/src/nested_dask/__init__.py similarity index 100% rename from src/dask_nested/__init__.py rename to src/nested_dask/__init__.py diff --git a/src/dask_nested/accessor.py b/src/nested_dask/accessor.py similarity index 100% rename from src/dask_nested/accessor.py rename to src/nested_dask/accessor.py diff --git a/src/dask_nested/backends.py b/src/nested_dask/backends.py similarity index 100% rename from src/dask_nested/backends.py rename to src/nested_dask/backends.py diff --git a/src/dask_nested/core.py b/src/nested_dask/core.py similarity index 99% rename from src/dask_nested/core.py rename to src/nested_dask/core.py index a93b3da..20006de 100644 --- a/src/dask_nested/core.py +++ b/src/nested_dask/core.py @@ -108,7 +108,7 @@ def from_dask_dataframe(cls, df) -> NestedFrame: Returns ------- - `dask_nested.NestedFrame` + `nested_dask.NestedFrame` """ return df.map_partitions(npd.NestedFrame) @@ -163,7 +163,7 @@ def add_nested(self, nested, name, how="outer") -> NestedFrame: # type: ignore[ Returns ------- - `dask_nested.NestedFrame` + `nested_dask.NestedFrame` """ nested = nested.map_partitions(lambda x: pack_flat(x)).rename(name) return self.join(nested, how=how) diff --git a/src/dask_nested/datasets/__init__.py b/src/nested_dask/datasets/__init__.py similarity index 100% rename from src/dask_nested/datasets/__init__.py rename to src/nested_dask/datasets/__init__.py diff --git a/src/dask_nested/datasets/generation.py b/src/nested_dask/datasets/generation.py similarity index 80% rename from src/dask_nested/datasets/generation.py rename to src/nested_dask/datasets/generation.py index d5b296f..70f1754 100644 --- a/src/dask_nested/datasets/generation.py +++ b/src/nested_dask/datasets/generation.py @@ -1,9 +1,9 @@ from nested_pandas import datasets -import dask_nested as dn +import nested_dask as nd -def generate_data(n_base, n_layer, npartitions=1, seed=None) -> dn.NestedFrame: +def generate_data(n_base, n_layer, npartitions=1, seed=None) -> nd.NestedFrame: """Generates a toy dataset. Docstring copied from nested-pandas. @@ -28,15 +28,15 @@ def generate_data(n_base, n_layer, npartitions=1, seed=None) -> dn.NestedFrame: Examples -------- - >>> import dask_nested as dn - >>> dn.datasets.generate_data(10,100) - >>> dn.datasets.generate_data(10, {"nested_a": 100, "nested_b": 200}) + >>> import nested_dask as nd + >>> nd.datasets.generate_data(10,100) + >>> nd.datasets.generate_data(10, {"nested_a": 100, "nested_b": 200}) """ # Use nested-pandas generator base_nf = datasets.generate_data(n_base, n_layer, seed=seed) # Convert to dask-nested - base_nf = dn.NestedFrame.from_nested_pandas(base_nf).repartition(npartitions=npartitions) + base_nf = nd.NestedFrame.from_nested_pandas(base_nf).repartition(npartitions=npartitions) return base_nf diff --git a/src/dask_nested/io.py b/src/nested_dask/io.py similarity index 100% rename from src/dask_nested/io.py rename to src/nested_dask/io.py diff --git a/src/dask_nested/py.typed b/src/nested_dask/py.typed similarity index 100% rename from src/dask_nested/py.typed rename to src/nested_dask/py.typed diff --git a/tests/dask_nested/conftest.py b/tests/nested_dask/conftest.py similarity index 80% rename from tests/dask_nested/conftest.py rename to tests/nested_dask/conftest.py index 30b2196..778553b 100644 --- a/tests/dask_nested/conftest.py +++ b/tests/nested_dask/conftest.py @@ -1,4 +1,4 @@ -import dask_nested as dn +import nested_dask as nd import nested_pandas as npd import numpy as np import pytest @@ -23,10 +23,10 @@ def test_dataset(): } layer_nf = npd.NestedFrame(data=layer_data).set_index("index").sort_index() - base_dn = dn.NestedFrame.from_nested_pandas(base_nf, npartitions=5) - layer_dn = dn.NestedFrame.from_nested_pandas(layer_nf, npartitions=10) + base_nd = nd.NestedFrame.from_nested_pandas(base_nf, npartitions=5) + layer_nd = nd.NestedFrame.from_nested_pandas(layer_nf, npartitions=10) - return base_dn.add_nested(layer_dn, "nested") + return base_nd.add_nested(layer_nd, "nested") @pytest.fixture @@ -53,10 +53,10 @@ def test_dataset_with_nans(): } layer_nf = npd.NestedFrame(data=layer_data).set_index("index") - base_dn = dn.NestedFrame.from_nested_pandas(base_nf, npartitions=5) - layer_dn = dn.NestedFrame.from_nested_pandas(layer_nf, npartitions=10) + base_nd = nd.NestedFrame.from_nested_pandas(base_nf, npartitions=5) + layer_nd = nd.NestedFrame.from_nested_pandas(layer_nf, npartitions=10) - return base_dn.add_nested(layer_dn, "nested") + return base_nd.add_nested(layer_nd, "nested") @pytest.fixture @@ -78,7 +78,7 @@ def test_dataset_no_add_nested(): } layer_nf = npd.NestedFrame(data=layer_data).set_index("index") - base_dn = dn.NestedFrame.from_nested_pandas(base_nf, npartitions=5) - layer_dn = dn.NestedFrame.from_nested_pandas(layer_nf, npartitions=10) + base_nd = nd.NestedFrame.from_nested_pandas(base_nf, npartitions=5) + layer_nd = nd.NestedFrame.from_nested_pandas(layer_nf, npartitions=10) - return (base_dn, layer_dn) + return (base_nd, layer_nd) diff --git a/tests/dask_nested/test_accessor.py b/tests/nested_dask/test_accessor.py similarity index 100% rename from tests/dask_nested/test_accessor.py rename to tests/nested_dask/test_accessor.py diff --git a/tests/dask_nested/test_datasets.py b/tests/nested_dask/test_datasets.py similarity index 66% rename from tests/dask_nested/test_datasets.py rename to tests/nested_dask/test_datasets.py index b174d27..26f9ad7 100644 --- a/tests/dask_nested/test_datasets.py +++ b/tests/nested_dask/test_datasets.py @@ -1,13 +1,13 @@ -import dask_nested as dn +import nested_dask as nd def test_generate_data(): """test the dataset generator function""" # test the seed - generate_1 = dn.datasets.generate_data(10, 100, npartitions=2, seed=1) - generate_2 = dn.datasets.generate_data(10, 100, npartitions=2, seed=1) - generate_3 = dn.datasets.generate_data(10, 100, npartitions=2, seed=2) + generate_1 = nd.datasets.generate_data(10, 100, npartitions=2, seed=1) + generate_2 = nd.datasets.generate_data(10, 100, npartitions=2, seed=1) + generate_3 = nd.datasets.generate_data(10, 100, npartitions=2, seed=2) assert generate_1.compute().equals(generate_2.compute()) assert not generate_1.compute().equals(generate_3.compute()) diff --git a/tests/dask_nested/test_io.py b/tests/nested_dask/test_io.py similarity index 86% rename from tests/dask_nested/test_io.py rename to tests/nested_dask/test_io.py index d7f11ec..c02c051 100644 --- a/tests/dask_nested/test_io.py +++ b/tests/nested_dask/test_io.py @@ -1,4 +1,4 @@ -import dask_nested as dn +import nested_dask as nd def test_read_parquet(test_dataset, tmp_path): @@ -16,8 +16,8 @@ def test_read_parquet(test_dataset, tmp_path): test_dataset[["a", "b"]].to_parquet(test_save_path, write_index=True) # Now read - base = dn.read_parquet(test_save_path, calculate_divisions=True) - nested = dn.read_parquet(nested_save_path, calculate_divisions=True) + base = nd.read_parquet(test_save_path, calculate_divisions=True) + nested = nd.read_parquet(nested_save_path, calculate_divisions=True) base = base.add_nested(nested, "nested") diff --git a/tests/dask_nested/test_nestedframe.py b/tests/nested_dask/test_nestedframe.py similarity index 94% rename from tests/dask_nested/test_nestedframe.py rename to tests/nested_dask/test_nestedframe.py index 5dfed86..0dfa3bb 100644 --- a/tests/dask_nested/test_nestedframe.py +++ b/tests/nested_dask/test_nestedframe.py @@ -1,4 +1,4 @@ -import dask_nested as dn +import nested_dask as nd import numpy as np import pytest from nested_pandas.series.dtype import NestedDtype @@ -31,7 +31,7 @@ def test_add_nested(test_dataset_no_add_nested): base_with_nested = base.add_nested(layer, "nested") # Check that the result is a nestedframe - assert isinstance(base_with_nested, dn.NestedFrame) + assert isinstance(base_with_nested, nd.NestedFrame) # Check that there's a new nested column with the correct dtype assert "nested" in base_with_nested.columns @@ -109,7 +109,7 @@ def test_to_parquet_combined(test_dataset, tmp_path): test_dataset.to_parquet(test_save_path, by_layer=False) # load back from parquet - loaded_dataset = dn.read_parquet(test_save_path, calculate_divisions=True) + loaded_dataset = nd.read_parquet(test_save_path, calculate_divisions=True) # todo: file bug for this and investigate loaded_dataset = loaded_dataset.reset_index().set_index("index") @@ -131,8 +131,8 @@ def test_to_parquet_by_layer(test_dataset, tmp_path): test_dataset.to_parquet(test_save_path, by_layer=True, write_index=True) # load back from parquet - loaded_base = dn.read_parquet(test_save_path / "base", calculate_divisions=True) - loaded_nested = dn.read_parquet(test_save_path / "nested", calculate_divisions=True) + loaded_base = nd.read_parquet(test_save_path / "base", calculate_divisions=True) + loaded_nested = nd.read_parquet(test_save_path / "nested", calculate_divisions=True) loaded_dataset = loaded_base.add_nested(loaded_nested, "nested") From 24b4e2e90f56e5633363b02e44accefaceae5416 Mon Sep 17 00:00:00 2001 From: Doug Branton Date: Tue, 21 May 2024 10:37:21 -0700 Subject: [PATCH 2/3] add a few missing refs --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d5ac265..81dcc76 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,7 +47,7 @@ requires = [ build-backend = "setuptools.build_meta" [tool.setuptools_scm] -write_to = "src/dask_nested/_version.py" +write_to = "src/nested_dask/_version.py" [tool.pytest.ini_options] testpaths = [ @@ -107,7 +107,7 @@ ignore = [ "UP028", # Allow yield in for loop ] [tool.setuptools.package-data] -dask_nested = ["py.typed"] +nested_dask = ["py.typed"] [tool.coverage.run] -omit=["src/dask_nested/_version.py"] +omit=["src/nested_dask/_version.py"] From 3456f85b61c17597c90fd4668b44c8c9ff1352dc Mon Sep 17 00:00:00 2001 From: Doug Branton Date: Tue, 21 May 2024 11:50:36 -0700 Subject: [PATCH 3/3] two more fixes --- docs/index.rst | 4 ++-- src/nested_dask/datasets/generation.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 0a28bf5..30f7c54 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,8 +1,8 @@ -.. dask_nested documentation main file. +.. nested_dask documentation main file. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -Welcome to dask_nested's documentation! +Welcome to nested-dask's documentation! ======================================================================================== Dev Guide - Getting Started diff --git a/src/nested_dask/datasets/generation.py b/src/nested_dask/datasets/generation.py index 70f1754..2d4b9ee 100644 --- a/src/nested_dask/datasets/generation.py +++ b/src/nested_dask/datasets/generation.py @@ -36,7 +36,7 @@ def generate_data(n_base, n_layer, npartitions=1, seed=None) -> nd.NestedFrame: # Use nested-pandas generator base_nf = datasets.generate_data(n_base, n_layer, seed=seed) - # Convert to dask-nested + # Convert to nested-dask base_nf = nd.NestedFrame.from_nested_pandas(base_nf).repartition(npartitions=npartitions) return base_nf