Skip to content

Commit

Permalink
loosen pytorch pin, remove unnecessary stuff in test_python.sh
Browse files Browse the repository at this point in the history
  • Loading branch information
jameslamb committed Oct 17, 2024
1 parent 3772c8f commit c312058
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 154 deletions.
186 changes: 38 additions & 148 deletions ci/test_python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,14 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../

. /opt/conda/etc/profile.d/conda.sh

RAPIDS_VERSION="$(rapids-version)"

rapids-logger "Generate Python testing dependencies"
rapids-dependency-file-generator \
--output conda \
--file-key test_python \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml

rapids-mamba-retry env create --yes -f env.yaml -n test

# Temporarily allow unbound variables for conda activation.
set +u
conda activate test
set -u

rapids-logger "Downloading artifacts from previous jobs"
CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
Expand All @@ -29,153 +24,48 @@ RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
RAPIDS_COVERAGE_DIR=${RAPIDS_COVERAGE_DIR:-"${PWD}/coverage-results"}
mkdir -p "${RAPIDS_TESTS_DIR}" "${RAPIDS_COVERAGE_DIR}"

rapids-print-env

rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
libcugraph \
pylibcugraph \
cugraph \
nx-cugraph \
cugraph-service-server \
cugraph-service-client

rapids-logger "Check GPU usage"
nvidia-smi

# RAPIDS_DATASET_ROOT_DIR is used by test scripts
export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)"
pushd "${RAPIDS_DATASET_ROOT_DIR}"
./get_test_data.sh --benchmark
popd

EXITCODE=0
trap "EXITCODE=1" ERR
set +e


# Test runs that include tests that use dask require
# --import-mode=append. Those tests start a LocalCUDACluster that inherits
# changes from pytest's modifications to PYTHONPATH (which defaults to
# prepending source tree paths to PYTHONPATH). This causes the
# LocalCUDACluster subprocess to import cugraph from the source tree instead of
# the install location, and in most cases, the source tree does not have
# extensions built in-place and will result in ImportErrors.
#
# FIXME: TEMPORARILY disable MG PropertyGraph tests (experimental) tests and
# bulk sampler IO tests (hangs in CI)
if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then
if [[ "${RUNNER_ARCH}" != "ARM64" ]]; then
# we are only testing in a single cuda version
# because of pytorch and rapids compatibilty problems
rapids-mamba-retry env create --yes -f env.yaml -n test_cugraph_dgl

# activate test_cugraph_dgl environment for dgl
set +u
conda activate test_cugraph_dgl
set -u
rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
--channel conda-forge \
--channel dglteam/label/cu118 \
--channel nvidia \
libcugraph \
pylibcugraph \
pylibcugraphops \
cugraph \
cugraph-dgl \
'dgl>=1.1.0.cu*,<=2.0.0.cu*' \
'pytorch>=2.0' \
'cuda-version=11.8'

rapids-print-env

rapids-logger "pytest cugraph_dgl (single GPU)"
./ci/run_cugraph_dgl_pytests.sh \
--junitxml="${RAPIDS_TESTS_DIR}/junit-cugraph-dgl.xml" \
--cov-config=../../.coveragerc \
--cov=cugraph_dgl \
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cugraph-dgl-coverage.xml" \
--cov-report=term

# Reactivate the test environment back
set +u
conda deactivate
conda activate test
set -u
else
rapids-logger "skipping cugraph_dgl pytest on ARM64"
fi
else
rapids-logger "skipping cugraph_dgl pytest on CUDA!=11.8"
fi

if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then
if [[ "${RUNNER_ARCH}" != "ARM64" ]]; then
rapids-mamba-retry env create --yes -f env.yaml -n test_cugraph_pyg

# Temporarily allow unbound variables for conda activation.
set +u
conda activate test_cugraph_pyg
set -u

# TODO re-enable logic once CUDA 12 is testable
#if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then
CONDA_CUDA_VERSION="11.8"
PYG_URL="https://data.pyg.org/whl/torch-2.1.0+cu118.html"
#else
# CONDA_CUDA_VERSION="12.1"
# PYG_URL="https://data.pyg.org/whl/torch-2.1.0+cu121.html"
#fi

# Will automatically install built dependencies of cuGraph-PyG
rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
--channel pytorch \
--channel pyg \
--channel nvidia \
"cugraph-pyg" \
"pytorch=2.1.0" \
"pytorch-cuda=${CONDA_CUDA_VERSION}"

# Install pyg dependencies (which requires pip)

pip install \
ogb \
tensordict

pip install \
pyg_lib \
torch_scatter \
torch_sparse \
-f ${PYG_URL}

rapids-print-env

rapids-logger "pytest cugraph_pyg (single GPU)"
# rmat is not tested because of multi-GPU testing
./ci/run_cugraph_pyg_pytests.sh \
--junitxml="${RAPIDS_TESTS_DIR}/junit-cugraph-pyg.xml" \
--cov-config=../../.coveragerc \
--cov=cugraph_pyg \
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cugraph-pyg-coverage.xml" \
--cov-report=term

# Reactivate the test environment back
set +u
conda deactivate
conda activate test
set -u
else
rapids-logger "skipping cugraph_pyg pytest on ARM64"
fi
if [[ "${RUNNER_ARCH}" != "ARM64" ]]; then
rapids-mamba-retry env create --yes -f env.yaml -n test_pylibwholegraph

# Temporarily allow unbound variables for conda activation.
set +u
conda activate test_pylibwholegraph
set -u

# Will automatically install built dependencies of pylibwholegraph
rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
--channel pytorch \
'mkl<2024.1.0' \
"pylibwholegraph=${RAPIDS_VERSION}" \
'pytorch::pytorch>=2.3,<2.4' \
'ogb'

rapids-print-env

rapids-logger "Check GPU usage"
nvidia-smi

rapids-logger "pytest pylibwholegraph (single GPU)"
./ci/run_pylibwholegraph_pytests.sh \
--junitxml="${RAPIDS_TESTS_DIR}/junit-pylibwholegraph.xml" \
--cov-config=../../.coveragerc \
--cov=cugraph_pyg \
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/pylibwholegraph-coverage.xml" \
--cov-report=term

# Reactivate the test environment back
set +u
conda deactivate
set -u
else
rapids-logger "skipping cugraph_pyg pytest on CUDA!=11.8"
rapids-logger "skipping pylibwholegraph pytest on ARM64"
fi


rapids-logger "Test script exiting with value: $EXITCODE"
exit ${EXITCODE}
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ dependencies:
- pytest-forked
- pytest-xdist
- pytorch-cuda=11.8
- pytorch::pytorch>=2.0,<2.2.0a0
- pytorch::pytorch>=2.0,<2.4.0a0
- raft-dask==24.12.*,>=0.0.0a0
- recommonmark
- rmm==24.12.*,>=0.0.0a0
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-121_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ dependencies:
- pytest-forked
- pytest-xdist
- pytorch-cuda=12.1
- pytorch::pytorch>=2.0,<2.2.0a0
- pytorch::pytorch>=2.0,<2.4.0a0
- raft-dask==24.12.*,>=0.0.0a0
- recommonmark
- rmm==24.12.*,>=0.0.0a0
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-124_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ dependencies:
- pytest-forked
- pytest-xdist
- pytorch-cuda=12.1
- pytorch::pytorch>=2.0,<2.2.0a0
- pytorch::pytorch>=2.0,<2.4.0a0
- raft-dask==24.12.*,>=0.0.0a0
- recommonmark
- rmm==24.12.*,>=0.0.0a0
Expand Down
2 changes: 1 addition & 1 deletion dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@ dependencies:
common:
- output_types: [conda]
packages:
- pytorch::pytorch>=2.0,<2.2.0a0
- pytorch::pytorch>=2.0,<2.4.0a0
- torchdata
- pydantic
specific:
Expand Down
2 changes: 1 addition & 1 deletion python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ dependencies:
- pytest-xdist
- pytorch-cuda=11.8
- pytorch::pytorch>=2.0
- pytorch::pytorch>=2.0,<2.2.0a0
- pytorch::pytorch>=2.0,<2.4.0a0
- scipy
- tensordict>=0.1.2
- torchdata
Expand Down
2 changes: 1 addition & 1 deletion python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ dependencies:
- pytest-xdist
- pytorch-cuda=11.8
- pytorch::pytorch>=2.0
- pytorch::pytorch>=2.0,<2.2.0a0
- pytorch::pytorch>=2.0,<2.4.0a0
- scipy
- tensordict>=0.1.2
- torchdata
Expand Down

0 comments on commit c312058

Please sign in to comment.