From 4bc8517adde6319aced8975249d51789a13e8a31 Mon Sep 17 00:00:00 2001 From: Tom White Date: Tue, 26 Nov 2024 13:43:27 +0000 Subject: [PATCH] Rewrite Cubed intro docs (#622) * Rewrite Cubed intro docs * Add 'scalable' more prominently * Rewrite top-level README --- README.md | 22 ++++++++------------ docs/conf.py | 4 +++- docs/index.md | 48 ++++++++++++++++++++++++++----------------- docs/requirements.txt | 3 ++- pyproject.toml | 2 +- 5 files changed, 43 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index 4e5782f1..0766e349 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,14 @@ # Cubed -## Bounded-memory serverless distributed N-dimensional array processing +## Scalable out-of-core array processing in Python -Cubed is a distributed N-dimensional array library implemented in Python using bounded-memory serverless processing and Zarr for storage. +Cubed is a Python library for scalable out-of-core multi-dimensional array processing with bounded memory. -- Implements the [Python Array API standard](https://data-apis.org/array-api/latest/) (see [coverage status](./api_status.md)) -- Guaranteed maximum memory usage for standard array functions -- Follows [Dask Array](https://docs.dask.org/en/stable/array.html)'s chunked array API (`map_blocks`, `map_overlap`, `rechunk`, `apply_gufunc`, etc) -- [Zarr](https://zarr.readthedocs.io/en/stable/) for persistent and intermediate storage -- Multiple serverless runtimes: Python (in-process), [Lithops](https://lithops-cloud.github.io/), [Modal](https://modal.com/), [Apache Beam](https://beam.apache.org/) -- Integration with [Xarray](https://xarray.dev/) via [cubed-xarray](https://github.com/xarray-contrib/cubed-xarray) +- Cubed provides NumPy and Xarray APIs for processing your multi-dimensional array data +- Cubed is a drop-in replacement for Dask's Array API +- Cubed will tell you if your computation would run out of memory *before* running it +- Cubed is designed to be robust to failures and will reliably complete a computation +- Cubed can process hundreds of GB of array data on your laptop using all available cores +- Cubed is horizontally scalable and stateless, and can scale to multi-TB datasets in the cloud [Documentation](https://cubed-dev.github.io/cubed/) - -### Articles - -[Cubed: Bounded-memory serverless array processing in xarray](https://xarray.dev/blog/cubed-xarray) - -[Optimizing Cubed](https://medium.com/pangeo/optimizing-cubed-7a0b8f65f5b7) diff --git a/docs/conf.py b/docs/conf.py index 96fc62f8..b1b62c60 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -33,14 +33,16 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - "myst_parser", + "myst_nb", "sphinx.ext.autodoc", "sphinx.ext.autosummary", "sphinx.ext.githubpages", "sphinx.ext.napoleon", + "sphinx_design", ] autodoc_mock_imports = ["modal"] autodoc_typehints = "none" +myst_enable_extensions = ["colon_fence"] # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] diff --git a/docs/index.md b/docs/index.md index 2ca39d72..dbbf0c25 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,23 +1,34 @@ -# Cubed +# Cubed: Scalable out-of-core array processing in Python -## Bounded-memory serverless distributed N-dimensional array processing +Cubed is a Python library for scalable out-of-core multi-dimensional array processing with bounded memory. -Cubed is a distributed N-dimensional array library implemented in Python using bounded-memory serverless processing and Zarr for storage. +::::{grid} 2 +:gutter: 2 -- Implements the [Python Array API standard](https://data-apis.org/array-api/latest/) -- Guaranteed maximum memory usage for standard array functions -- Follows [Dask Array](https://docs.dask.org/en/stable/array.html)'s chunked array API (`map_blocks`, `map_overlap`, `rechunk`, `apply_gufunc`, etc) -- [Zarr](https://zarr.readthedocs.io/en/stable/) for storage -- Multiple serverless runtimes: Python (in-process), [Lithops](https://lithops-cloud.github.io/), [Modal](https://modal.com/), [Apache Beam](https://beam.apache.org/) -- Integration with [Xarray](https://xarray.dev/) via [cubed-xarray](https://github.com/xarray-contrib/cubed-xarray) - -## Documentation +:::{grid-item-card} Familiar API +Cubed provides NumPy and Xarray APIs for processing your multi-dimensional array data +::: +:::{grid-item-card} Dask replacement +Cubed is a drop-in replacement for Dask's Array API +::: +:::{grid-item-card} Predictable memory usage +Cubed will tell you if your computation would run out of memory *before* running it +::: +:::{grid-item-card} Reliable +Cubed is designed to be robust to failures and will reliably complete a computation +::: +:::{grid-item-card} Run locally +Cubed can process hundreds of GB of array data on your laptop using all available cores +::: +:::{grid-item-card} Scale in the cloud +Cubed is horizontally scalable and stateless, and can scale to multi-TB datasets in the cloud +::: +:::: ```{toctree} ---- -maxdepth: 2 -caption: For users ---- +:hidden: +:maxdepth: 2 +:caption: For users getting-started/index user-guide/index Intro slides @@ -31,10 +42,9 @@ articles ``` ```{toctree} ---- -maxdepth: 2 -caption: For developers ---- +:hidden: +:maxdepth: 2 +:caption: For developers design operations computation diff --git a/docs/requirements.txt b/docs/requirements.txt index 6f8d4809..c9f6531e 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -19,4 +19,5 @@ zarr # docs sphinx-book-theme sphinx-autobuild -myst-parser +sphinx-design +myst-nb diff --git a/pyproject.toml b/pyproject.toml index 2f077005..25f9d1c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ authors = [ {name = "Tom White", email = "tom.e.white@gmail.com"}, ] license = {text = "Apache License 2.0"} -description = "Bounded-memory serverless distributed N-dimensional array processing" +description = "Scalable out-of-core array processing in Python" readme = {file = "README.md", content-type = "text/markdown"} classifiers = [ "Development Status :: 4 - Beta",