From 95f0a33e377eff36bf2b20f25748489dbdb5e5b2 Mon Sep 17 00:00:00 2001 From: Ray Douglass Date: Thu, 19 Sep 2024 11:46:22 -0400 Subject: [PATCH 01/13] DOC v24.12 Updates [skip ci] --- .github/workflows/build.yaml | 10 +++---- .github/workflows/pr.yaml | 12 ++++----- .github/workflows/test.yaml | 2 +- VERSION | 2 +- ci/build_docs.sh | 2 +- .../all_cuda-114_arch-x86_64.yaml | 14 +++++----- .../all_cuda-118_arch-x86_64.yaml | 14 +++++----- .../all_cuda-125_arch-x86_64.yaml | 14 +++++----- dependencies.yaml | 26 +++++++++---------- docs/source/explicit_comms.rst | 2 +- pyproject.toml | 10 +++---- 11 files changed, 54 insertions(+), 54 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 67bbd027..3d097bcd 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: conda-python-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -38,7 +38,7 @@ jobs: if: github.ref_type == 'branch' needs: [conda-python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 with: arch: "amd64" branch: ${{ inputs.branch }} @@ -51,7 +51,7 @@ jobs: upload-conda: needs: [conda-python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -59,7 +59,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -72,7 +72,7 @@ jobs: wheel-publish: needs: wheel-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 76014652..0e20bdaf 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -18,26 +18,26 @@ jobs: - docs-build - wheel-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.12 checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.12 conda-python-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.12 with: build_type: pull-request conda-python-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 with: build_type: pull-request docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -46,7 +46,7 @@ jobs: run_script: "ci/build_docs.sh" wheel-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: build_type: pull-request # Package is pure Python and only ever requires one build. diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 1a0e7d87..631a6173 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-python-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 with: build_type: nightly branch: ${{ inputs.branch }} diff --git a/VERSION b/VERSION index 7c7ba044..af28c42b 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -24.10.00 +24.12.00 diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 42103004..7850211e 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -23,7 +23,7 @@ rapids-mamba-retry install \ --channel "${PYTHON_CHANNEL}" \ dask-cuda -export RAPIDS_VERSION_NUMBER="24.10" +export RAPIDS_VERSION_NUMBER="24.12" export RAPIDS_DOCS_DIR="$(mktemp -d)" rapids-logger "Build Python docs" diff --git a/conda/environments/all_cuda-114_arch-x86_64.yaml b/conda/environments/all_cuda-114_arch-x86_64.yaml index 3cfd9cb2..3c327ff0 100644 --- a/conda/environments/all_cuda-114_arch-x86_64.yaml +++ b/conda/environments/all_cuda-114_arch-x86_64.yaml @@ -10,10 +10,10 @@ dependencies: - click >=8.1 - cuda-version=11.4 - cudatoolkit -- cudf==24.10.*,>=0.0.0a0 -- dask-cudf==24.10.*,>=0.0.0a0 -- distributed-ucxx==0.40.*,>=0.0.0a0 -- kvikio==24.10.*,>=0.0.0a0 +- cudf==24.12.*,>=0.0.0a0 +- dask-cudf==24.12.*,>=0.0.0a0 +- distributed-ucxx==0.41.*,>=0.0.0a0 +- kvikio==24.12.*,>=0.0.0a0 - numactl-devel-cos7-x86_64 - numba>=0.57 - numpy>=1.23,<3.0a0 @@ -25,13 +25,13 @@ dependencies: - pytest-cov - python>=3.10,<3.13 - rapids-build-backend>=0.3.0,<0.4.0dev0 -- rapids-dask-dependency==24.10.*,>=0.0.0a0 +- rapids-dask-dependency==24.12.*,>=0.0.0a0 - setuptools>=64.0.0 - sphinx - sphinx-click>=2.7.1 - sphinx-rtd-theme>=0.5.1 - ucx-proc=*=gpu -- ucx-py==0.40.*,>=0.0.0a0 -- ucxx==0.40.*,>=0.0.0a0 +- ucx-py==0.41.*,>=0.0.0a0 +- ucxx==0.41.*,>=0.0.0a0 - zict>=2.0.0 name: all_cuda-114_arch-x86_64 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index b7b99751..3931f3bf 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -10,10 +10,10 @@ dependencies: - click >=8.1 - cuda-version=11.8 - cudatoolkit -- cudf==24.10.*,>=0.0.0a0 -- dask-cudf==24.10.*,>=0.0.0a0 -- distributed-ucxx==0.40.*,>=0.0.0a0 -- kvikio==24.10.*,>=0.0.0a0 +- cudf==24.12.*,>=0.0.0a0 +- dask-cudf==24.12.*,>=0.0.0a0 +- distributed-ucxx==0.41.*,>=0.0.0a0 +- kvikio==24.12.*,>=0.0.0a0 - numactl-devel-cos7-x86_64 - numba>=0.57 - numpy>=1.23,<3.0a0 @@ -25,13 +25,13 @@ dependencies: - pytest-cov - python>=3.10,<3.13 - rapids-build-backend>=0.3.0,<0.4.0dev0 -- rapids-dask-dependency==24.10.*,>=0.0.0a0 +- rapids-dask-dependency==24.12.*,>=0.0.0a0 - setuptools>=64.0.0 - sphinx - sphinx-click>=2.7.1 - sphinx-rtd-theme>=0.5.1 - ucx-proc=*=gpu -- ucx-py==0.40.*,>=0.0.0a0 -- ucxx==0.40.*,>=0.0.0a0 +- ucx-py==0.41.*,>=0.0.0a0 +- ucxx==0.41.*,>=0.0.0a0 - zict>=2.0.0 name: all_cuda-118_arch-x86_64 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 652a8f0c..760ae971 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -11,10 +11,10 @@ dependencies: - cuda-nvcc-impl - cuda-nvrtc - cuda-version=12.5 -- cudf==24.10.*,>=0.0.0a0 -- dask-cudf==24.10.*,>=0.0.0a0 -- distributed-ucxx==0.40.*,>=0.0.0a0 -- kvikio==24.10.*,>=0.0.0a0 +- cudf==24.12.*,>=0.0.0a0 +- dask-cudf==24.12.*,>=0.0.0a0 +- distributed-ucxx==0.41.*,>=0.0.0a0 +- kvikio==24.12.*,>=0.0.0a0 - numactl-devel-cos7-x86_64 - numba>=0.57 - numpy>=1.23,<3.0a0 @@ -26,13 +26,13 @@ dependencies: - pytest-cov - python>=3.10,<3.13 - rapids-build-backend>=0.3.0,<0.4.0dev0 -- rapids-dask-dependency==24.10.*,>=0.0.0a0 +- rapids-dask-dependency==24.12.*,>=0.0.0a0 - setuptools>=64.0.0 - sphinx - sphinx-click>=2.7.1 - sphinx-rtd-theme>=0.5.1 - ucx-proc=*=gpu -- ucx-py==0.40.*,>=0.0.0a0 -- ucxx==0.40.*,>=0.0.0a0 +- ucx-py==0.41.*,>=0.0.0a0 +- ucxx==0.41.*,>=0.0.0a0 - zict>=2.0.0 name: all_cuda-125_arch-x86_64 diff --git a/dependencies.yaml b/dependencies.yaml index 9e6b3a10..59ac8c01 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -158,7 +158,7 @@ dependencies: - numpy>=1.23,<3.0a0 - pandas>=1.3 - pynvml>=11.0.0,<11.5 - - rapids-dask-dependency==24.10.*,>=0.0.0a0 + - rapids-dask-dependency==24.12.*,>=0.0.0a0 - zict>=2.0.0 test_python: common: @@ -168,13 +168,13 @@ dependencies: - pytest-cov - output_types: [conda] packages: - - &cudf_unsuffixed cudf==24.10.*,>=0.0.0a0 - - &dask_cudf_unsuffixed dask-cudf==24.10.*,>=0.0.0a0 - - distributed-ucxx==0.40.*,>=0.0.0a0 - - &kvikio_unsuffixed kvikio==24.10.*,>=0.0.0a0 - - &ucx_py_unsuffixed ucx-py==0.40.*,>=0.0.0a0 + - &cudf_unsuffixed cudf==24.12.*,>=0.0.0a0 + - &dask_cudf_unsuffixed dask-cudf==24.12.*,>=0.0.0a0 + - distributed-ucxx==0.41.*,>=0.0.0a0 + - &kvikio_unsuffixed kvikio==24.12.*,>=0.0.0a0 + - &ucx_py_unsuffixed ucx-py==0.41.*,>=0.0.0a0 - ucx-proc=*=gpu - - ucxx==0.40.*,>=0.0.0a0 + - ucxx==0.41.*,>=0.0.0a0 specific: - output_types: conda matrices: @@ -194,16 +194,16 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - cudf-cu12==24.10.*,>=0.0.0a0 - - dask-cudf-cu12==24.10.*,>=0.0.0a0 - - ucx-py-cu12==0.40.*,>=0.0.0a0 + - cudf-cu12==24.12.*,>=0.0.0a0 + - dask-cudf-cu12==24.12.*,>=0.0.0a0 + - ucx-py-cu12==0.41.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - cudf-cu11==24.10.*,>=0.0.0a0 - - dask-cudf-cu11==24.10.*,>=0.0.0a0 - - ucx-py-cu11==0.40.*,>=0.0.0a0 + - cudf-cu11==24.12.*,>=0.0.0a0 + - dask-cudf-cu11==24.12.*,>=0.0.0a0 + - ucx-py-cu11==0.41.*,>=0.0.0a0 - matrix: packages: - *cudf_unsuffixed diff --git a/docs/source/explicit_comms.rst b/docs/source/explicit_comms.rst index af317056..db621977 100644 --- a/docs/source/explicit_comms.rst +++ b/docs/source/explicit_comms.rst @@ -14,4 +14,4 @@ Usage In order to use explicit-comms in Dask/Distributed automatically, simply define the environment variable ``DASK_EXPLICIT_COMMS=True`` or setting the ``"explicit-comms"`` key in the `Dask configuration `_. -It is also possible to use explicit-comms in tasks manually, see the `API <../api/#explicit-comms>`_ and our `implementation of shuffle `_ for guidance. +It is also possible to use explicit-comms in tasks manually, see the `API <../api/#explicit-comms>`_ and our `implementation of shuffle `_ for guidance. diff --git a/pyproject.toml b/pyproject.toml index 730225ad..fcf57276 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ dependencies = [ "numpy>=1.23,<3.0a0", "pandas>=1.3", "pynvml>=11.0.0,<11.5", - "rapids-dask-dependency==24.10.*,>=0.0.0a0", + "rapids-dask-dependency==24.12.*,>=0.0.0a0", "zict>=2.0.0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ @@ -50,12 +50,12 @@ docs = [ "sphinx-rtd-theme>=0.5.1", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit dependencies.yaml and run `rapids-dependency-file-generator`. test = [ - "cudf==24.10.*,>=0.0.0a0", - "dask-cudf==24.10.*,>=0.0.0a0", - "kvikio==24.10.*,>=0.0.0a0", + "cudf==24.12.*,>=0.0.0a0", + "dask-cudf==24.12.*,>=0.0.0a0", + "kvikio==24.12.*,>=0.0.0a0", "pytest", "pytest-cov", - "ucx-py==0.40.*,>=0.0.0a0", + "ucx-py==0.41.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit dependencies.yaml and run `rapids-dependency-file-generator`. [project.urls] From 93a1ee23a43563f33fba8a5a8761c03ccef25a1c Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Wed, 9 Oct 2024 18:12:23 +0200 Subject: [PATCH 02/13] Limit output of pytest durations (#1393) Durations output were previously increased to show all tests to allow us debugging of timeouts. However, now they have not been as important so limiting to only the 50 longer running tests is best to decrease log lengths, we may soon remove it entirely if they are not currently important. Authors: - Peter Andreas Entschev (https://github.com/pentschev) Approvers: - James Lamb (https://github.com/jameslamb) URL: https://github.com/rapidsai/dask-cuda/pull/1393 --- ci/test_python.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/test_python.sh b/ci/test_python.sh index 78330a40..32c0d940 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -52,7 +52,7 @@ UCX_WARN_UNUSED_ENV_VARS=n \ UCX_MEMTYPE_CACHE=n \ timeout 60m pytest \ -vv \ - --durations=0 \ + --durations=50 \ --capture=no \ --cache-clear \ --junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cuda.xml" \ @@ -73,7 +73,7 @@ UCX_WARN_UNUSED_ENV_VARS=n \ UCX_MEMTYPE_CACHE=n \ timeout 30m pytest \ -vv \ - --durations=0 \ + --durations=50 \ --capture=no \ --cache-clear \ --junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cuda-legacy.xml" \ From f775d883c1149b00a462a041cf6589f9081aa4fb Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 10 Oct 2024 12:59:31 -0500 Subject: [PATCH 03/13] make conda installs in CI stricter (#1395) Contributes to https://github.com/rapidsai/build-planning/issues/106 Proposes specifying the RAPIDS version in `conda install` calls that install CI artifacts, to reduce the risk of CI jobs picking up artifacts from other releases. Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/dask-cuda/pull/1395 --- ci/build_docs.sh | 7 ++++--- ci/release/update-version.sh | 1 - ci/test_python.sh | 4 +++- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 7850211e..58da36c7 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -5,6 +5,8 @@ set -euo pipefail rapids-logger "Create test conda environment" . /opt/conda/etc/profile.d/conda.sh +RAPIDS_VERSION="$(rapids-version)" + rapids-dependency-file-generator \ --output conda \ --file-key docs \ @@ -21,9 +23,8 @@ PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python) rapids-mamba-retry install \ --channel "${PYTHON_CHANNEL}" \ - dask-cuda + "dask-cuda=${RAPIDS_VERSION}" -export RAPIDS_VERSION_NUMBER="24.12" export RAPIDS_DOCS_DIR="$(mktemp -d)" rapids-logger "Build Python docs" @@ -33,4 +34,4 @@ mkdir -p "${RAPIDS_DOCS_DIR}/dask-cuda/"html mv _html/* "${RAPIDS_DOCS_DIR}/dask-cuda/html" popd -rapids-upload-docs +RAPIDS_VERSION_NUMBER="$(rapids-version-major-minor)" rapids-upload-docs diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 2dbe504c..b229d280 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -68,7 +68,6 @@ done for FILE in .github/workflows/*.yaml; do sed_runner "/shared-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}" done -sed_runner "s/RAPIDS_VERSION_NUMBER=\".*/RAPIDS_VERSION_NUMBER=\"${NEXT_SHORT_TAG}\"/g" ci/build_docs.sh # Docs referencing source code find docs/source/ -type f -name *.rst -print0 | while IFS= read -r -d '' filename; do diff --git a/ci/test_python.sh b/ci/test_python.sh index 32c0d940..33914172 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -5,6 +5,8 @@ set -euo pipefail . /opt/conda/etc/profile.d/conda.sh +RAPIDS_VERSION="$(rapids-version)" + rapids-logger "Generate Python testing dependencies" rapids-dependency-file-generator \ --output conda \ @@ -29,7 +31,7 @@ rapids-print-env rapids-mamba-retry install \ --channel "${PYTHON_CHANNEL}" \ - dask-cuda + "dask-cuda=${RAPIDS_VERSION}" rapids-logger "Check GPU usage" nvidia-smi From 8d88006a6a064165e8408dcb9c288059c6f98a7f Mon Sep 17 00:00:00 2001 From: Vibhu Jawa Date: Sat, 12 Oct 2024 13:51:38 -0600 Subject: [PATCH 04/13] Enable Pytorch to share same memory pool as RMM via cli (#1392) This PR closes: https://github.com/rapidsai/dask-cuda/issues/1281 Usage example: ``` from dask_cuda import LocalCUDACluster from dask.distributed import Client cluster = LocalCUDACluster(rmm_allocator_external_lib_list=["torch", "cupy"]) client = Client(cluster) ``` Verify working ``` def get_torch_allocator(): import torch return torch.cuda.get_allocator_backend() client.run(get_torch_allocator) ``` ``` client.run(get_torch_allocator) ``` ``` {'tcp://127.0.0.1:37167': 'pluggable', 'tcp://127.0.0.1:38749': 'pluggable', 'tcp://127.0.0.1:43109': 'pluggable', 'tcp://127.0.0.1:44259': 'pluggable', 'tcp://127.0.0.1:44953': 'pluggable', 'tcp://127.0.0.1:45087': 'pluggable', 'tcp://127.0.0.1:45623': 'pluggable', 'tcp://127.0.0.1:45847': 'pluggable'} ``` Without it its `native`. Context: This helps NeMo-Curator to have a more stable use of Pytorch+dask-cuda CC: @pentschev . Authors: - Vibhu Jawa (https://github.com/VibhuJawa) Approvers: - Peter Andreas Entschev (https://github.com/pentschev) URL: https://github.com/rapidsai/dask-cuda/pull/1392 --- dask_cuda/cli.py | 14 ++++++- dask_cuda/cuda_worker.py | 2 + dask_cuda/local_cuda_cluster.py | 22 +++++++++++ dask_cuda/plugins.py | 67 +++++++++++++++++++++++++++++++++ dask_cuda/utils.py | 11 ++++++ 5 files changed, 115 insertions(+), 1 deletion(-) diff --git a/dask_cuda/cli.py b/dask_cuda/cli.py index a8c6d972..8101f020 100644 --- a/dask_cuda/cli.py +++ b/dask_cuda/cli.py @@ -13,7 +13,7 @@ from distributed.utils import import_term from .cuda_worker import CUDAWorker -from .utils import print_cluster_config +from .utils import CommaSeparatedChoice, print_cluster_config logger = logging.getLogger(__name__) @@ -164,6 +164,16 @@ def cuda(): incompatible with RMM pools and managed memory, trying to enable both will result in failure.""", ) +@click.option( + "--set-rmm-allocator-for-libs", + "rmm_allocator_external_lib_list", + type=CommaSeparatedChoice(["cupy", "torch"]), + default=None, + show_default=True, + help=""" + Set RMM as the allocator for external libraries. Provide a comma-separated + list of libraries to set, e.g., "torch,cupy".""", +) @click.option( "--rmm-release-threshold", default=None, @@ -351,6 +361,7 @@ def worker( rmm_maximum_pool_size, rmm_managed_memory, rmm_async, + rmm_allocator_external_lib_list, rmm_release_threshold, rmm_log_directory, rmm_track_allocations, @@ -425,6 +436,7 @@ def worker( rmm_maximum_pool_size, rmm_managed_memory, rmm_async, + rmm_allocator_external_lib_list, rmm_release_threshold, rmm_log_directory, rmm_track_allocations, diff --git a/dask_cuda/cuda_worker.py b/dask_cuda/cuda_worker.py index 3e03ed29..30c14450 100644 --- a/dask_cuda/cuda_worker.py +++ b/dask_cuda/cuda_worker.py @@ -47,6 +47,7 @@ def __init__( rmm_maximum_pool_size=None, rmm_managed_memory=False, rmm_async=False, + rmm_allocator_external_lib_list=None, rmm_release_threshold=None, rmm_log_directory=None, rmm_track_allocations=False, @@ -231,6 +232,7 @@ def del_pid_file(): release_threshold=rmm_release_threshold, log_directory=rmm_log_directory, track_allocations=rmm_track_allocations, + external_lib_list=rmm_allocator_external_lib_list, ), PreImport(pre_import), CUDFSetup(spill=enable_cudf_spill, spill_stats=cudf_spill_stats), diff --git a/dask_cuda/local_cuda_cluster.py b/dask_cuda/local_cuda_cluster.py index c037223b..7a24df43 100644 --- a/dask_cuda/local_cuda_cluster.py +++ b/dask_cuda/local_cuda_cluster.py @@ -143,6 +143,11 @@ class LocalCUDACluster(LocalCluster): The asynchronous allocator requires CUDA Toolkit 11.2 or newer. It is also incompatible with RMM pools and managed memory. Trying to enable both will result in an exception. + rmm_allocator_external_lib_list: str, list or None, default None + List of external libraries for which to set RMM as the allocator. + Supported options are: ``["torch", "cupy"]``. Can be a comma-separated string + (like ``"torch,cupy"``) or a list of strings (like ``["torch", "cupy"]``). + If ``None``, no external libraries will use RMM as their allocator. rmm_release_threshold: int, str or None, default None When ``rmm.async is True`` and the pool size grows beyond this value, unused memory held by the pool will be released at the next synchronization point. @@ -231,6 +236,7 @@ def __init__( rmm_maximum_pool_size=None, rmm_managed_memory=False, rmm_async=False, + rmm_allocator_external_lib_list=None, rmm_release_threshold=None, rmm_log_directory=None, rmm_track_allocations=False, @@ -265,6 +271,19 @@ def __init__( n_workers = len(CUDA_VISIBLE_DEVICES) if n_workers < 1: raise ValueError("Number of workers cannot be less than 1.") + + if rmm_allocator_external_lib_list is not None: + if isinstance(rmm_allocator_external_lib_list, str): + rmm_allocator_external_lib_list = [ + v.strip() for v in rmm_allocator_external_lib_list.split(",") + ] + elif not isinstance(rmm_allocator_external_lib_list, list): + raise ValueError( + "rmm_allocator_external_lib_list must be either a comma-separated " + "string or a list of strings. Examples: 'torch,cupy' " + "or ['torch', 'cupy']" + ) + # Set nthreads=1 when parsing mem_limit since it only depends on n_workers logger = logging.getLogger(__name__) self.memory_limit = parse_memory_limit( @@ -284,6 +303,8 @@ def __init__( self.rmm_managed_memory = rmm_managed_memory self.rmm_async = rmm_async self.rmm_release_threshold = rmm_release_threshold + self.rmm_allocator_external_lib_list = rmm_allocator_external_lib_list + if rmm_pool_size is not None or rmm_managed_memory or rmm_async: try: import rmm # noqa F401 @@ -437,6 +458,7 @@ def new_worker_spec(self): release_threshold=self.rmm_release_threshold, log_directory=self.rmm_log_directory, track_allocations=self.rmm_track_allocations, + external_lib_list=self.rmm_allocator_external_lib_list, ), PreImport(self.pre_import), CUDFSetup(self.enable_cudf_spill, self.cudf_spill_stats), diff --git a/dask_cuda/plugins.py b/dask_cuda/plugins.py index 122f93ff..cd1928af 100644 --- a/dask_cuda/plugins.py +++ b/dask_cuda/plugins.py @@ -1,5 +1,6 @@ import importlib import os +from typing import Callable, Dict from distributed import WorkerPlugin @@ -39,6 +40,7 @@ def __init__( release_threshold, log_directory, track_allocations, + external_lib_list, ): if initial_pool_size is None and maximum_pool_size is not None: raise ValueError( @@ -61,6 +63,7 @@ def __init__( self.logging = log_directory is not None self.log_directory = log_directory self.rmm_track_allocations = track_allocations + self.external_lib_list = external_lib_list def setup(self, worker=None): if self.initial_pool_size is not None: @@ -123,6 +126,70 @@ def setup(self, worker=None): mr = rmm.mr.get_current_device_resource() rmm.mr.set_current_device_resource(rmm.mr.TrackingResourceAdaptor(mr)) + if self.external_lib_list is not None: + for lib in self.external_lib_list: + enable_rmm_memory_for_library(lib) + + +def enable_rmm_memory_for_library(lib_name: str) -> None: + """Enable RMM memory pool support for a specified third-party library. + + This function allows the given library to utilize RMM's memory pool if it supports + integration with RMM. The library name is passed as a string argument, and if the + library is compatible, its memory allocator will be configured to use RMM. + + Parameters + ---------- + lib_name : str + The name of the third-party library to enable RMM memory pool support for. + Supported libraries are "cupy" and "torch". + + Raises + ------ + ValueError + If the library name is not supported or does not have RMM integration. + ImportError + If the required library is not installed. + """ + + # Mapping of supported libraries to their respective setup functions + setup_functions: Dict[str, Callable[[], None]] = { + "torch": _setup_rmm_for_torch, + "cupy": _setup_rmm_for_cupy, + } + + if lib_name not in setup_functions: + supported_libs = ", ".join(setup_functions.keys()) + raise ValueError( + f"The library '{lib_name}' is not supported for RMM integration. " + f"Supported libraries are: {supported_libs}." + ) + + # Call the setup function for the specified library + setup_functions[lib_name]() + + +def _setup_rmm_for_torch() -> None: + try: + import torch + except ImportError as e: + raise ImportError("PyTorch is not installed.") from e + + from rmm.allocators.torch import rmm_torch_allocator + + torch.cuda.memory.change_current_allocator(rmm_torch_allocator) + + +def _setup_rmm_for_cupy() -> None: + try: + import cupy + except ImportError as e: + raise ImportError("CuPy is not installed.") from e + + from rmm.allocators.cupy import rmm_cupy_allocator + + cupy.cuda.set_allocator(rmm_cupy_allocator) + class PreImport(WorkerPlugin): def __init__(self, libraries): diff --git a/dask_cuda/utils.py b/dask_cuda/utils.py index ff4dbbae..74596fe2 100644 --- a/dask_cuda/utils.py +++ b/dask_cuda/utils.py @@ -9,6 +9,7 @@ from multiprocessing import cpu_count from typing import Optional +import click import numpy as np import pynvml import toolz @@ -764,3 +765,13 @@ def get_rmm_memory_resource_stack(mr) -> list: if isinstance(mr, rmm.mr.StatisticsResourceAdaptor): return mr.allocation_counts["current_bytes"] return None + + +class CommaSeparatedChoice(click.Choice): + def convert(self, value, param, ctx): + values = [v.strip() for v in value.split(",")] + for v in values: + if v not in self.choices: + choices_str = ", ".join(f"'{c}'" for c in self.choices) + self.fail(f"invalid choice(s): {v}. (choices are: {choices_str})") + return values From dfcd399171cdaca93155fe7a1f47812db63c780c Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Mon, 14 Oct 2024 19:06:16 +0200 Subject: [PATCH 05/13] Reenable UCXX in CI (#1396) UCXX CI tests had been previously disabled due to instabilities, see https://github.com/rapidsai/dask-cuda/pull/1270#issuecomment-1806295358, it should now be much more resilient so we should reenable them in preparation for the permanent migration to UCXX. Authors: - Peter Andreas Entschev (https://github.com/pentschev) Approvers: - Jake Awe (https://github.com/AyodeAwe) URL: https://github.com/rapidsai/dask-cuda/pull/1396 --- ci/test_python.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ci/test_python.sh b/ci/test_python.sh index 33914172..18dd88cf 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -52,7 +52,7 @@ DASK_CUDA_WAIT_WORKERS_MIN_TIMEOUT=20 \ UCXPY_IFNAME=eth0 \ UCX_WARN_UNUSED_ENV_VARS=n \ UCX_MEMTYPE_CACHE=n \ -timeout 60m pytest \ +timeout 90m pytest \ -vv \ --durations=50 \ --capture=no \ @@ -62,7 +62,7 @@ timeout 60m pytest \ --cov=dask_cuda \ --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/dask-cuda-coverage.xml" \ --cov-report=term \ - tests -k "not ucxx" + tests popd rapids-logger "pytest explicit-comms (legacy dd)" @@ -73,7 +73,7 @@ DASK_CUDA_WAIT_WORKERS_MIN_TIMEOUT=20 \ UCXPY_IFNAME=eth0 \ UCX_WARN_UNUSED_ENV_VARS=n \ UCX_MEMTYPE_CACHE=n \ -timeout 30m pytest \ +timeout 60m pytest \ -vv \ --durations=50 \ --capture=no \ @@ -83,7 +83,7 @@ timeout 30m pytest \ --cov=dask_cuda \ --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/dask-cuda-coverage-legacy.xml" \ --cov-report=term \ - tests/test_explicit_comms.py -k "not ucxx" + tests/test_explicit_comms.py popd rapids-logger "Run local benchmark (dask-expr)" From 0f78f5d23029313ecb3647faca6c28933b52d130 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 22 Oct 2024 23:39:51 +0200 Subject: [PATCH 06/13] Ignore legacy Dask dataframe warnings (#1397) Ignore legacy Dask dataframe warnings that the implementation is going to be soon removed, introduced in https://github.com/dask/dask/pull/11437 . The warning is only raised for `DASK_DATAFRAME__QUERY_PLANNING=False` cases. Authors: - Peter Andreas Entschev (https://github.com/pentschev) Approvers: - Richard (Rick) Zamora (https://github.com/rjzamora) - James Lamb (https://github.com/jameslamb) URL: https://github.com/rapidsai/dask-cuda/pull/1397 --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index fcf57276..2266fb5b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -128,6 +128,9 @@ filterwarnings = [ # is enabled in both dask-cudf and dask-cuda. # See: https://github.com/rapidsai/dask-cuda/issues/1311 "ignore:Dask DataFrame implementation is deprecated:DeprecationWarning", + # Dask now loudly throws warnings: https://github.com/dask/dask/pull/11437 + # When the legacy implementation is removed we can remove this warning and stop running pytests with `DASK_DATAFRAME__QUERY_PLANNING=False` + "ignore:The legacy Dask DataFrame implementation is deprecated and will be removed in a future version.*:FutureWarning", ] [tool.rapids-build-backend] From 4639a968bcbf9837085be5c8df40ef27d00bf009 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 23 Oct 2024 14:12:46 -0500 Subject: [PATCH 07/13] remove unnecessary cmake and sccache configuration (#1400) Contributes to https://github.com/rapidsai/build-planning/issues/108 This is a pure Python project, so it doesn't need configuration about CMake or `sccache`. This proposes removing them to simplify build scripts a bit. It also proposes updating the `rapids-dependency-file-generator` pre-commit hook to it's latest version, something I'm trying to roll out across RAPIDS as part of https://github.com/rapidsai/build-planning/issues/108. Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Jake Awe (https://github.com/AyodeAwe) URL: https://github.com/rapidsai/dask-cuda/pull/1400 --- .pre-commit-config.yaml | 2 +- ci/build_python.sh | 4 ---- ci/build_wheel.sh | 3 +-- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4707492a..a2202df3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -37,7 +37,7 @@ repos: hooks: - id: verify-alpha-spec - repo: https://github.com/rapidsai/dependency-file-generator - rev: v1.13.11 + rev: v1.16.0 hooks: - id: rapids-dependency-file-generator args: ["--clean"] diff --git a/ci/build_python.sh b/ci/build_python.sh index 48cece32..c12a0dde 100755 --- a/ci/build_python.sh +++ b/ci/build_python.sh @@ -5,12 +5,8 @@ set -euo pipefail rapids-configure-conda-channels -source rapids-configure-sccache - source rapids-date-string -export CMAKE_GENERATOR=Ninja - rapids-print-env rapids-generate-version > ./VERSION diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 828972dc..91c57231 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -3,11 +3,10 @@ set -euo pipefail -source rapids-configure-sccache source rapids-date-string rapids-generate-version > ./VERSION -python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check +python -m pip wheel . -w dist -v --no-deps --disable-pip-version-check RAPIDS_PY_WHEEL_NAME="dask-cuda" rapids-upload-wheels-to-s3 dist From fc80d43bf22db405fe123be8324aaee7978d4956 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Fri, 1 Nov 2024 13:02:58 -0500 Subject: [PATCH 08/13] Switch pytest `traceback` to `native` (#1389) In cudf & cuml we have observed a ~10% to ~20% respectively speed up of pytest suite execution by switching pytest traceback to `--native`: ``` currently: 102474 passed, 2117 skipped, 902 xfailed in 892.16s (0:14:52) --tb=short: 102474 passed, 2117 skipped, 902 xfailed in 898.99s (0:14:58) --tb=no: 102474 passed, 2117 skipped, 902 xfailed in 815.98s (0:13:35) --tb=native: 102474 passed, 2117 skipped, 902 xfailed in 820.92s (0:13:40) ``` This PR makes similar change to `dask-cuda` repo. xref: https://github.com/rapidsai/cudf/pull/16851 Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) - Peter Andreas Entschev (https://github.com/pentschev) Approvers: - Peter Andreas Entschev (https://github.com/pentschev) URL: https://github.com/rapidsai/dask-cuda/pull/1389 --- dask_cuda/tests/pytest.ini | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 dask_cuda/tests/pytest.ini diff --git a/dask_cuda/tests/pytest.ini b/dask_cuda/tests/pytest.ini new file mode 100644 index 00000000..7b0a9f29 --- /dev/null +++ b/dask_cuda/tests/pytest.ini @@ -0,0 +1,4 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +[pytest] +addopts = --tb=native From 233376d8f111e2571f745e6f31729db9bc2183ac Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 5 Nov 2024 16:37:40 +0100 Subject: [PATCH 09/13] Add warmup runs and profile all iterations to benchmarks (#1402) Add support for initial warmup runs in benchmarks and allows profiling all iterations or just the last one. This is technically a breaking change since `--profile` now profiles all iterations, and the new `--profile-last` option profiles only the last one as `--profile` used to behave. Authors: - Peter Andreas Entschev (https://github.com/pentschev) Approvers: - Mads R. B. Kristensen (https://github.com/madsbk) URL: https://github.com/rapidsai/dask-cuda/pull/1402 --- dask_cuda/benchmarks/common.py | 21 ++++++++++++----- dask_cuda/benchmarks/local_cudf_groupby.py | 13 +++-------- dask_cuda/benchmarks/local_cudf_merge.py | 8 +------ dask_cuda/benchmarks/local_cudf_shuffle.py | 13 +++-------- dask_cuda/benchmarks/local_cupy.py | 15 ++++-------- .../benchmarks/local_cupy_map_overlap.py | 15 ++++-------- dask_cuda/benchmarks/utils.py | 23 ++++++++++++++++++- 7 files changed, 52 insertions(+), 56 deletions(-) diff --git a/dask_cuda/benchmarks/common.py b/dask_cuda/benchmarks/common.py index 7f48d4fa..49676fee 100644 --- a/dask_cuda/benchmarks/common.py +++ b/dask_cuda/benchmarks/common.py @@ -1,3 +1,4 @@ +import contextlib from argparse import Namespace from functools import partial from typing import Any, Callable, List, Mapping, NamedTuple, Optional, Tuple @@ -7,7 +8,7 @@ import pandas as pd import dask -from distributed import Client +from distributed import Client, performance_report from dask_cuda.benchmarks.utils import ( address_to_index, @@ -87,12 +88,20 @@ def run_benchmark(client: Client, args: Namespace, config: Config): If ``args.profile`` is set, the final run is profiled. """ + results = [] - for _ in range(max(1, args.runs) - 1): - res = config.bench_once(client, args, write_profile=None) - results.append(res) - results.append(config.bench_once(client, args, write_profile=args.profile)) - return results + for _ in range(max(0, args.warmup_runs)): + config.bench_once(client, args, write_profile=None) + + ctx = contextlib.nullcontext() + if args.profile is not None: + ctx = performance_report(filename=args.profile) + with ctx: + for _ in range(max(1, args.runs) - 1): + res = config.bench_once(client, args, write_profile=None) + results.append(res) + results.append(config.bench_once(client, args, write_profile=args.profile_last)) + return results def gather_bench_results(client: Client, args: Namespace, config: Config): diff --git a/dask_cuda/benchmarks/local_cudf_groupby.py b/dask_cuda/benchmarks/local_cudf_groupby.py index f094ff18..a9e7d833 100644 --- a/dask_cuda/benchmarks/local_cudf_groupby.py +++ b/dask_cuda/benchmarks/local_cudf_groupby.py @@ -98,10 +98,9 @@ def bench_once(client, args, write_profile=None): "False": False, }.get(args.shuffle, args.shuffle) - if write_profile is None: - ctx = contextlib.nullcontext() - else: - ctx = performance_report(filename=args.profile) + ctx = contextlib.nullcontext() + if write_profile is not None: + ctx = performance_report(filename=write_profile) with ctx: t1 = clock() @@ -260,12 +259,6 @@ def parse_args(): "type": str, "help": "Do shuffle with GPU or CPU dataframes (default 'gpu')", }, - { - "name": "--runs", - "default": 3, - "type": int, - "help": "Number of runs", - }, ] return parse_benchmark_args( diff --git a/dask_cuda/benchmarks/local_cudf_merge.py b/dask_cuda/benchmarks/local_cudf_merge.py index e2b03520..6ebe005a 100644 --- a/dask_cuda/benchmarks/local_cudf_merge.py +++ b/dask_cuda/benchmarks/local_cudf_merge.py @@ -190,7 +190,7 @@ def bench_once(client, args, write_profile=None): if args.backend == "explicit-comms": ctx1 = dask.config.set(explicit_comms=True) if write_profile is not None: - ctx2 = performance_report(filename=args.profile) + ctx2 = performance_report(filename=write_profile) with ctx1: with ctx2: @@ -346,12 +346,6 @@ def parse_args(): "action": "store_true", "help": "Don't shuffle the keys of the left (base) dataframe.", }, - { - "name": "--runs", - "default": 3, - "type": int, - "help": "Number of runs", - }, { "name": [ "-s", diff --git a/dask_cuda/benchmarks/local_cudf_shuffle.py b/dask_cuda/benchmarks/local_cudf_shuffle.py index 25f42e59..3a0955c4 100644 --- a/dask_cuda/benchmarks/local_cudf_shuffle.py +++ b/dask_cuda/benchmarks/local_cudf_shuffle.py @@ -121,10 +121,9 @@ def create_data( def bench_once(client, args, write_profile=None): data_processed, df = create_data(client, args) - if write_profile is None: - ctx = contextlib.nullcontext() - else: - ctx = performance_report(filename=args.profile) + ctx = contextlib.nullcontext() + if write_profile is not None: + ctx = performance_report(filename=write_profile) with ctx: if args.backend in {"dask", "dask-noop"}: @@ -228,12 +227,6 @@ def parse_args(): "type": str, "help": "Do shuffle with GPU or CPU dataframes (default 'gpu')", }, - { - "name": "--runs", - "default": 3, - "type": int, - "help": "Number of runs", - }, { "name": "--ignore-index", "action": "store_true", diff --git a/dask_cuda/benchmarks/local_cupy.py b/dask_cuda/benchmarks/local_cupy.py index c9c8fe1c..ba88db30 100644 --- a/dask_cuda/benchmarks/local_cupy.py +++ b/dask_cuda/benchmarks/local_cupy.py @@ -141,12 +141,11 @@ def bench_once(client, args, write_profile=None): chunksize = x.chunksize data_processed = sum(arg.nbytes for arg in func_args) - # Execute the operations to benchmark - if args.profile is not None and write_profile is not None: - ctx = performance_report(filename=args.profile) - else: - ctx = contextlib.nullcontext() + ctx = contextlib.nullcontext() + if write_profile is not None: + ctx = performance_report(filename=write_profile) + # Execute the operations to benchmark with ctx: rng = start_range(message=args.operation, color="purple") result = func(*func_args) @@ -297,12 +296,6 @@ def parse_args(): "type": int, "help": "Chunk size (default 2500).", }, - { - "name": "--runs", - "default": 3, - "type": int, - "help": "Number of runs (default 3).", - }, { "name": [ "-b", diff --git a/dask_cuda/benchmarks/local_cupy_map_overlap.py b/dask_cuda/benchmarks/local_cupy_map_overlap.py index 8b975a24..ecefa52a 100644 --- a/dask_cuda/benchmarks/local_cupy_map_overlap.py +++ b/dask_cuda/benchmarks/local_cupy_map_overlap.py @@ -42,12 +42,11 @@ def bench_once(client, args, write_profile=None): data_processed = x.nbytes - # Execute the operations to benchmark - if args.profile is not None and write_profile is not None: - ctx = performance_report(filename=args.profile) - else: - ctx = contextlib.nullcontext() + ctx = contextlib.nullcontext() + if write_profile is not None: + ctx = performance_report(filename=write_profile) + # Execute the operations to benchmark with ctx: result = x.map_overlap(mean_filter, args.kernel_size, shape=ks) if args.backend == "dask-noop": @@ -168,12 +167,6 @@ def parse_args(): "type": int, "help": "Kernel size, 2*k+1, in each dimension (default 1)", }, - { - "name": "--runs", - "default": 3, - "type": int, - "help": "Number of runs", - }, { "name": [ "-b", diff --git a/dask_cuda/benchmarks/utils.py b/dask_cuda/benchmarks/utils.py index de7e2ae1..4f87a025 100644 --- a/dask_cuda/benchmarks/utils.py +++ b/dask_cuda/benchmarks/utils.py @@ -323,7 +323,16 @@ def parse_benchmark_args( metavar="PATH", default=None, type=str, - help="Write dask profile report (E.g. dask-report.html)", + help="Write dask profile report (E.g. dask-report.html) on all " + "iterations (excluding warmup).", + ) + parser.add_argument( + "--profile-last", + metavar="PATH", + default=None, + type=str, + help="Write dask profile report (E.g. dask-report.html) on last " + "iteration only.", ) # See save_benchmark_data for more information parser.add_argument( @@ -344,6 +353,18 @@ def parse_benchmark_args( type=parse_bytes, help="Bandwidth statistics: ignore messages smaller than this (default '1 MB')", ) + parser.add_argument( + "--runs", + default=3, + type=int, + help="Number of runs", + ) + parser.add_argument( + "--warmup-runs", + default=1, + type=int, + help="Number of warmup runs", + ) for args in args_list: name = args.pop("name") From 9e7a926bc305f32aa0463a44eb96d4494fe55fc0 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 13 Nov 2024 09:13:11 -0600 Subject: [PATCH 10/13] enforce wheel size limits, README formatting in CI (#1404) Contributes to https://github.com/rapidsai/build-planning/issues/110 Proposes adding 2 types of validation on wheels in CI, to ensure we continue to produce wheels that are suitable for PyPI. * checks on wheel size (compressed), - *to be sure they're under PyPI limits* - *and to prompt discussion on PRs that significantly increase wheel sizes* * checks on README formatting - *to ensure they'll render properly as the PyPI project homepages* - *e.g. like how https://github.com/scikit-learn/scikit-learn/blob/main/README.rst becomes https://pypi.org/project/scikit-learn/* Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/dask-cuda/pull/1404 --- ci/build_wheel.sh | 1 + ci/validate_wheel.sh | 18 ++++++++++++++++++ pyproject.toml | 8 ++++++++ 3 files changed, 27 insertions(+) create mode 100755 ci/validate_wheel.sh diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 91c57231..760e46e3 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -8,5 +8,6 @@ source rapids-date-string rapids-generate-version > ./VERSION python -m pip wheel . -w dist -v --no-deps --disable-pip-version-check +./ci/validate_wheel.sh dist RAPIDS_PY_WHEEL_NAME="dask-cuda" rapids-upload-wheels-to-s3 dist diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh new file mode 100755 index 00000000..60a80fce --- /dev/null +++ b/ci/validate_wheel.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -euo pipefail + +wheel_dir_relative_path=$1 + +rapids-logger "validate packages with 'pydistcheck'" + +pydistcheck \ + --inspect \ + "$(echo ${wheel_dir_relative_path}/*.whl)" + +rapids-logger "validate packages with 'twine'" + +twine check \ + --strict \ + "$(echo ${wheel_dir_relative_path}/*.whl)" diff --git a/pyproject.toml b/pyproject.toml index 2266fb5b..7025ca4e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -152,3 +152,11 @@ exclude = [ "docs.*", "tests.*", ] + +[tool.pydistcheck] +select = [ + "distro-too-large-compressed", +] + +# PyPI limit is 100 MiB, fail CI before we get too close to that +max_allowed_size_compressed = '75M' From af1a2f7f20000796965653a8cb3d799cefd0c58d Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Wed, 20 Nov 2024 18:42:17 +0100 Subject: [PATCH 11/13] Disable UCXX tests in CI (#1406) Temporarily disable UCXX tests in CI due to some non-deterministic failures during code freeze phase. They will be reenabled after 24.12 release. Authors: - Peter Andreas Entschev (https://github.com/pentschev) Approvers: - Jake Awe (https://github.com/AyodeAwe) URL: https://github.com/rapidsai/dask-cuda/pull/1406 --- ci/test_python.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/test_python.sh b/ci/test_python.sh index 18dd88cf..319efef2 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -62,7 +62,7 @@ timeout 90m pytest \ --cov=dask_cuda \ --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/dask-cuda-coverage.xml" \ --cov-report=term \ - tests + tests -k "not ucxx" popd rapids-logger "pytest explicit-comms (legacy dd)" @@ -83,7 +83,7 @@ timeout 60m pytest \ --cov=dask_cuda \ --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/dask-cuda-coverage-legacy.xml" \ --cov-report=term \ - tests/test_explicit_comms.py + tests/test_explicit_comms.py -k "not ucxx" popd rapids-logger "Run local benchmark (dask-expr)" From 075f8beb4098359ac72f88b88ae89621e41f5774 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Wed, 20 Nov 2024 19:49:30 +0000 Subject: [PATCH 12/13] Update PyNVML and set upper pin (#1130) Handling the str vs. bytes discrepancy should have been covered by the changes in #1118. Authors: - Lawrence Mitchell (https://github.com/wence-) - Peter Andreas Entschev (https://github.com/pentschev) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) - https://github.com/jakirkham URL: https://github.com/rapidsai/dask-cuda/pull/1130 --- conda/environments/all_cuda-114_arch-x86_64.yaml | 2 +- conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +- conda/environments/all_cuda-125_arch-x86_64.yaml | 2 +- dependencies.yaml | 2 +- pyproject.toml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/conda/environments/all_cuda-114_arch-x86_64.yaml b/conda/environments/all_cuda-114_arch-x86_64.yaml index 3c327ff0..c7b20c69 100644 --- a/conda/environments/all_cuda-114_arch-x86_64.yaml +++ b/conda/environments/all_cuda-114_arch-x86_64.yaml @@ -20,7 +20,7 @@ dependencies: - numpydoc>=1.1.0 - pandas>=1.3 - pre-commit -- pynvml>=11.0.0,<11.5 +- pynvml>=11.0.0,<12.0.0a0 - pytest - pytest-cov - python>=3.10,<3.13 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 3931f3bf..9fd24d4e 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -20,7 +20,7 @@ dependencies: - numpydoc>=1.1.0 - pandas>=1.3 - pre-commit -- pynvml>=11.0.0,<11.5 +- pynvml>=11.0.0,<12.0.0a0 - pytest - pytest-cov - python>=3.10,<3.13 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 760ae971..cd7c1679 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -21,7 +21,7 @@ dependencies: - numpydoc>=1.1.0 - pandas>=1.3 - pre-commit -- pynvml>=11.0.0,<11.5 +- pynvml>=11.0.0,<12.0.0a0 - pytest - pytest-cov - python>=3.10,<3.13 diff --git a/dependencies.yaml b/dependencies.yaml index 59ac8c01..fa6a56e0 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -157,7 +157,7 @@ dependencies: - numba>=0.57 - numpy>=1.23,<3.0a0 - pandas>=1.3 - - pynvml>=11.0.0,<11.5 + - pynvml>=11.0.0,<12.0.0a0 - rapids-dask-dependency==24.12.*,>=0.0.0a0 - zict>=2.0.0 test_python: diff --git a/pyproject.toml b/pyproject.toml index 7025ca4e..f6332875 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ dependencies = [ "numba>=0.57", "numpy>=1.23,<3.0a0", "pandas>=1.3", - "pynvml>=11.0.0,<11.5", + "pynvml>=11.0.0,<12.0.0a0", "rapids-dask-dependency==24.12.*,>=0.0.0a0", "zict>=2.0.0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit dependencies.yaml and run `rapids-dependency-file-generator`. From e68afed3983bc652f05e91661887f071ddf72ff1 Mon Sep 17 00:00:00 2001 From: Ray Douglass Date: Wed, 11 Dec 2024 13:11:43 -0500 Subject: [PATCH 13/13] Update Changelog [skip ci] --- CHANGELOG.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f8c992fb..3b0d08d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,29 @@ +# dask-cuda 24.12.00 (11 Dec 2024) + +## 🚨 Breaking Changes + +- Add warmup runs and profile all iterations to benchmarks ([#1402](https://github.com/rapidsai/dask-cuda/pull/1402)) [@pentschev](https://github.com/pentschev) + +## 🐛 Bug Fixes + +- Disable UCXX tests in CI ([#1406](https://github.com/rapidsai/dask-cuda/pull/1406)) [@pentschev](https://github.com/pentschev) +- Ignore legacy Dask dataframe warnings ([#1397](https://github.com/rapidsai/dask-cuda/pull/1397)) [@pentschev](https://github.com/pentschev) +- Reenable UCXX in CI ([#1396](https://github.com/rapidsai/dask-cuda/pull/1396)) [@pentschev](https://github.com/pentschev) + +## 🚀 New Features + +- Enable Pytorch to share same memory pool as RMM via cli ([#1392](https://github.com/rapidsai/dask-cuda/pull/1392)) [@VibhuJawa](https://github.com/VibhuJawa) + +## 🛠️ Improvements + +- enforce wheel size limits, README formatting in CI ([#1404](https://github.com/rapidsai/dask-cuda/pull/1404)) [@jameslamb](https://github.com/jameslamb) +- Add warmup runs and profile all iterations to benchmarks ([#1402](https://github.com/rapidsai/dask-cuda/pull/1402)) [@pentschev](https://github.com/pentschev) +- remove unnecessary cmake and sccache configuration ([#1400](https://github.com/rapidsai/dask-cuda/pull/1400)) [@jameslamb](https://github.com/jameslamb) +- make conda installs in CI stricter ([#1395](https://github.com/rapidsai/dask-cuda/pull/1395)) [@jameslamb](https://github.com/jameslamb) +- Limit output of pytest durations ([#1393](https://github.com/rapidsai/dask-cuda/pull/1393)) [@pentschev](https://github.com/pentschev) +- Switch pytest `traceback` to `native` ([#1389](https://github.com/rapidsai/dask-cuda/pull/1389)) [@galipremsagar](https://github.com/galipremsagar) +- Update PyNVML and set upper pin ([#1130](https://github.com/rapidsai/dask-cuda/pull/1130)) [@wence-](https://github.com/wence-) + # dask-cuda 24.10.00 (9 Oct 2024) ## 🚨 Breaking Changes