Rewrite Cubed intro docs (#622)

* Rewrite Cubed intro docs * Add 'scalable' more prominently * Rewrite top-level README
cubed-dev · Nov 26, 2024 · 4bc8517 · 4bc8517
1 parent 003bf92
commit 4bc8517
Show file tree

Hide file tree

Showing 5 changed files with 43 additions and 36 deletions.
diff --git a/README.md b/README.md
@@ -1,20 +1,14 @@
 # Cubed
 
-## Bounded-memory serverless distributed N-dimensional array processing
+## Scalable out-of-core array processing in Python
 
-Cubed is a distributed N-dimensional array library implemented in Python using bounded-memory serverless processing and Zarr for storage.
+Cubed is a Python library for scalable out-of-core multi-dimensional array processing with bounded memory.
 
-- Implements the [Python Array API standard](https://data-apis.org/array-api/latest/) (see [coverage status](./api_status.md))
-- Guaranteed maximum memory usage for standard array functions
-- Follows [Dask Array](https://docs.dask.org/en/stable/array.html)'s chunked array API (`map_blocks`, `map_overlap`, `rechunk`, `apply_gufunc`, etc)
-- [Zarr](https://zarr.readthedocs.io/en/stable/) for persistent and intermediate storage
-- Multiple serverless runtimes: Python (in-process), [Lithops](https://lithops-cloud.github.io/), [Modal](https://modal.com/), [Apache Beam](https://beam.apache.org/)
-- Integration with [Xarray](https://xarray.dev/) via [cubed-xarray](https://github.com/xarray-contrib/cubed-xarray)
+- Cubed provides NumPy and Xarray APIs for processing your multi-dimensional array data
+- Cubed is a drop-in replacement for Dask's Array API
+- Cubed will tell you if your computation would run out of memory *before* running it
+- Cubed is designed to be robust to failures and will reliably complete a computation
+- Cubed can process hundreds of GB of array data on your laptop using all available cores
+- Cubed is horizontally scalable and stateless, and can scale to multi-TB datasets in the cloud
 
 [Documentation](https://cubed-dev.github.io/cubed/)
-
-### Articles
-
-[Cubed: Bounded-memory serverless array processing in xarray](https://xarray.dev/blog/cubed-xarray)
-
-[Optimizing Cubed](https://medium.com/pangeo/optimizing-cubed-7a0b8f65f5b7)
diff --git a/docs/conf.py b/docs/conf.py
@@ -33,14 +33,16 @@
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
-    "myst_parser",
+    "myst_nb",
     "sphinx.ext.autodoc",
     "sphinx.ext.autosummary",
     "sphinx.ext.githubpages",
     "sphinx.ext.napoleon",
+    "sphinx_design",
 ]
 autodoc_mock_imports = ["modal"]
 autodoc_typehints = "none"
+myst_enable_extensions = ["colon_fence"]
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ["_templates"]

diff --git a/docs/index.md b/docs/index.md
@@ -1,23 +1,34 @@
-# Cubed
+# Cubed: Scalable out-of-core array processing in Python
 
-## Bounded-memory serverless distributed N-dimensional array processing
+Cubed is a Python library for scalable out-of-core multi-dimensional array processing with bounded memory.
 
-Cubed is a distributed N-dimensional array library implemented in Python using bounded-memory serverless processing and Zarr for storage.
+::::{grid} 2
+:gutter: 2
 
-- Implements the [Python Array API standard](https://data-apis.org/array-api/latest/)
-- Guaranteed maximum memory usage for standard array functions
-- Follows [Dask Array](https://docs.dask.org/en/stable/array.html)'s chunked array API (`map_blocks`, `map_overlap`, `rechunk`, `apply_gufunc`, etc)
-- [Zarr](https://zarr.readthedocs.io/en/stable/) for storage
-- Multiple serverless runtimes: Python (in-process), [Lithops](https://lithops-cloud.github.io/), [Modal](https://modal.com/), [Apache Beam](https://beam.apache.org/)
-- Integration with [Xarray](https://xarray.dev/) via [cubed-xarray](https://github.com/xarray-contrib/cubed-xarray)
-
-## Documentation
+:::{grid-item-card}  Familiar API
+Cubed provides NumPy and Xarray APIs for processing your multi-dimensional array data
+:::
+:::{grid-item-card}  Dask replacement
+Cubed is a drop-in replacement for Dask's Array API
+:::
+:::{grid-item-card}  Predictable memory usage
+Cubed will tell you if your computation would run out of memory *before* running it
+:::
+:::{grid-item-card}  Reliable
+Cubed is designed to be robust to failures and will reliably complete a computation
+:::
+:::{grid-item-card}  Run locally
+Cubed can process hundreds of GB of array data on your laptop using all available cores
+:::
+:::{grid-item-card}  Scale in the cloud
+Cubed is horizontally scalable and stateless, and can scale to multi-TB datasets in the cloud
+:::
+::::
 
 ```{toctree}
----
-maxdepth: 2
-caption: For users
----
+:hidden:
+:maxdepth: 2
+:caption: For users
 getting-started/index
 user-guide/index
 Intro slides <https://cubed-dev.github.io/cubed/cubed-intro.slides.html>
@@ -31,10 +42,9 @@ articles
 ```
 
 ```{toctree}
----
-maxdepth: 2
-caption: For developers
----
+:hidden:
+:maxdepth: 2
+:caption: For developers
 design
 operations
 computation

diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -19,4 +19,5 @@ zarr
 # docs
 sphinx-book-theme
 sphinx-autobuild
-myst-parser
+sphinx-design
+myst-nb
diff --git a/pyproject.toml b/pyproject.toml
@@ -9,7 +9,7 @@ authors = [
     {name = "Tom White", email = "[email protected]"},
 ]
 license = {text = "Apache License 2.0"}
-description = "Bounded-memory serverless distributed N-dimensional array processing"
+description = "Scalable out-of-core array processing in Python"
 readme = {file = "README.md", content-type = "text/markdown"}
 classifiers = [
     "Development Status :: 4 - Beta",