Improve docs (#8)

theislab · Nov 21, 2024 · fdc2eb6 · fdc2eb6
1 parent 4d9d01a
commit fdc2eb6
Show file tree

Hide file tree

Showing 4 changed files with 109 additions and 17 deletions.
diff --git a/README.md b/README.md
@@ -25,8 +25,22 @@ Unsupervised Deep Disentangled Representation of Single-Cell Omics
 
 Please refer to the [documentation][link-docs]. In particular, the
 
--   [API documentation][link-api]
--   [Tutorials][link-tutorials]
+-   [Tutorials][link-tutorials], specially
+    -   [A demo](https://drvi.readthedocs.io/latest/notebooks/general_pipeline.html) of how to train DRVI and interpret the latent dimensions.
+-   [API documentation][link-api], specially
+    -   [DRVI Model](https://drvi.readthedocs.io/latest/api/generated/drvi.model.DRVI.html)
+    -   [DRVI utility functions (tools)](https://drvi.readthedocs.io/latest/api/tools.html)
+    -   [DRVI plotting functions](https://drvi.readthedocs.io/latest/api/plotting.html)
+
+## System requirements
+
+We recommend running DRVI on a recent Linux distribution.
+DRVI is actively tested on the latest LTS version of Ubuntu (currently 24.04 LTS).
+
+[//]: # "TODO: remove ubuntu version later"
+
+For optimal performance, we highly recommend using a GPU with CUDA capabilities.
+While CPU-based systems are supported, GPU-powered systems are strongly recommended for optimal performance.
 
 ## Installation
 
@@ -35,7 +49,9 @@ Python installed, we recommend installing [Mambaforge](https://github.com/conda-
 
 There are several options to install drvi:
 
-1. Install the latest release of `drvi-py` from [PyPI][link-pypi]:
+[//]: # "TODO: remove install time!"
+
+1. Install the latest release of `drvi-py` from [PyPI][link-pypi], which should take around two minutes:
 
 ```bash
 pip install drvi-py
@@ -47,6 +63,11 @@ pip install drvi-py
 pip install git+https://github.com/theislab/drvi.git@main
 ```
 
+Please be sure to install a version of [PyTorch][pytorch-home] that is compatible with your GPU.
+Dependencies are installed automatically, please take a look at the versions for different dependencies in `pyproject.toml` if needed.
+
+[pytorch-home]: https://pytorch.org/
+
 ## Release notes
 
 See the [changelog][changelog].
@@ -66,6 +87,11 @@ If DRVI is helpful in your research, please consider citing the following paper:
 > **Unsupervised deep disentangled representation of single-cell omics.**
 > bioRxiv 2024.11.06.622266 (2024) [doi:10.1101/2024.11.06.622266](https://doi.org/10.1101/2024.11.06.622266).
 
+## Reproducibility
+
+Code, notebooks, and instructions to reproduce the results from the paper are available at the [reproducibility repository][repr-repo].
+
+[repr-repo]: https://github.com/theislab/drvi_reproducibility
 [issue-tracker]: https://github.com/theislab/drvi/issues
 [changelog]: https://drvi.readthedocs.io/latest/changelog.html
 [link-docs]: https://drvi.readthedocs.io

diff --git a/docs/notebooks/general_pipeline.ipynb b/docs/notebooks/general_pipeline.ipynb
@@ -142,7 +142,13 @@
     "# Check if the file exists\n",
     "if [ ! -f tmp/immune_all.h5ad ]; then\n",
     "  # Download the file if it does not exist\n",
-    "  wget -O tmp/immune_all.h5ad https://figshare.com/ndownloader/files/25717328\n",
+    "  { # try\n",
+    "      wget -O tmp/immune_all.h5ad https://figshare.com/ndownloader/files/25717328\n",
+    "      #save your output\n",
+    "  } || \\\n",
+    "  { # catch\n",
+    "      curl -L https://figshare.com/ndownloader/files/25717328 -o tmp/immune_all.h5ad\n",
+    "  }\n",
     "  echo \"File downloaded successfully.\"\n",
     "else\n",
     "  echo \"File already exists.\"\n",
@@ -480,12 +486,38 @@
     }
    ],
    "source": [
+    "# For cpu training you should add the following line to the model.train parameters:\n",
+    "# accelerator=\"cpu\", devices=1,\n",
+    "#\n",
+    "# For mps acceleration on macbooks, add the following line to the model.train parameters:\n",
+    "# accelerator=\"mps\", devices=1,\n",
+    "#\n",
+    "# For gpu training don't provide any additional parameter.\n",
+    "# More details here: https://lightning.ai/docs/pytorch/stable/accelerators/gpu_basic.html\n",
+    "\n",
+    "n_epochs = 400\n",
+    "\n",
     "# train the model\n",
     "model.train(\n",
-    "    max_epochs=400,\n",
+    "    max_epochs=n_epochs,\n",
     "    early_stopping=False,\n",
     "    early_stopping_patience=20,\n",
-    ")"
+    "    # mps\n",
+    "    # accelerator=\"mps\", devices=1,\n",
+    "    # cpu\n",
+    "    # accelerator=\"cpu\", devices=1,\n",
+    "    # gpu: no additional parameter\n",
+    "    #\n",
+    "    # No need to provide `plan_kwargs` if n_epochs >= 400.\n",
+    "    plan_kwargs={\n",
+    "        \"n_epochs_kl_warmup\": n_epochs,\n",
+    "    },\n",
+    ")\n",
+    "\n",
+    "# Runtime:\n",
+    "# The runtime for CPU laptop (M1) is 208 minutes\n",
+    "# The runtime for Macbook gpu (M1) is 64 minutes\n",
+    "# The runtime for GPU (A100) is 17 minutes"
    ]
   },
   {
@@ -2751,9 +2783,9 @@
    "formats": "ipynb,py:light"
   },
   "kernelspec": {
-   "display_name": "drvi",
+   "display_name": "drvi-repr",
    "language": "python",
-   "name": "drvi"
+   "name": "drvi-repr"
   },
   "language_info": {
    "codemirror_mode": {
@@ -2765,7 +2797,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.13"
+   "version": "3.10.15"
   }
  },
  "nbformat": 4,

diff --git a/docs/notebooks/general_pipeline.py b/docs/notebooks/general_pipeline.py
@@ -6,11 +6,11 @@
 #       extension: .py
 #       format_name: light
 #       format_version: '1.5'
-#       jupytext_version: 1.15.2
+#       jupytext_version: 1.16.4
 #   kernelspec:
-#     display_name: drvi
+#     display_name: drvi-repr
 #     language: python
-#     name: drvi
+#     name: drvi-repr
 # ---
 
 # # General training and interpretability pipeline
@@ -61,7 +61,13 @@
 # # Check if the file exists
 # if [ ! -f tmp/immune_all.h5ad ]; then
 #   # Download the file if it does not exist
-#   wget -O tmp/immune_all.h5ad https://figshare.com/ndownloader/files/25717328
+#   { # try
+#       wget -O tmp/immune_all.h5ad https://figshare.com/ndownloader/files/25717328
+#       #save your output
+#   } || \
+#   { # catch
+#       curl -L https://figshare.com/ndownloader/files/25717328 -o tmp/immune_all.h5ad
+#   }
 #   echo "File downloaded successfully."
 # else
 #   echo "File already exists."
@@ -121,15 +127,42 @@
     decoder_dims=[128, 128],
 )
 model
-# -
+
+# +
+# For cpu training you should add the following line to the model.train parameters:
+# accelerator="cpu", devices=1,
+#
+# For mps acceleration on macbooks, add the following line to the model.train parameters:
+# accelerator="mps", devices=1,
+#
+# For gpu training don't provide any additional parameter.
+# More details here: https://lightning.ai/docs/pytorch/stable/accelerators/gpu_basic.html
+
+n_epochs = 400
 
 # train the model
 model.train(
-    max_epochs=400,
+    max_epochs=n_epochs,
     early_stopping=False,
     early_stopping_patience=20,
+    # mps
+    # accelerator="mps", devices=1,
+    # cpu
+    # accelerator="cpu", devices=1,
+    # gpu: no additional parameter
+    #
+    # No need to provide `plan_kwargs` if n_epochs >= 400.
+    plan_kwargs={
+        "n_epochs_kl_warmup": n_epochs,
+    },
 )
 
+# Runtime:
+# The runtime for CPU laptop (M1) is 208 minutes
+# The runtime for Macbook gpu (M1) is 64 minutes
+# The runtime for GPU (A100) is 17 minutes
+# -
+
 # Save the model
 model.save("tmp/drvi_general_pipeline_immune_128", overwrite=True)
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -68,7 +68,8 @@ dev = [
     "twine>=4.0.2",
 ]
 doc = [
-    "merlin-dataloader==23.8.0",
+    # Disable for now as nvidia servers return 404
+    # "merlin-dataloader==23.8.0",
     "docutils>=0.8,!=0.18.*,!=0.19.*",
     "sphinx>=4",
     "sphinx-book-theme>=1.0.0",
@@ -89,7 +90,7 @@ test = [
 ]
 tutorials = [
     "leidenalg",
-    "gprofiler",
+    "gprofiler-official==0.3.5",
 ]
 merlin = [
     "merlin-dataloader==23.8.0",