forked from horovod/horovod
-
Notifications
You must be signed in to change notification settings - Fork 4
/
Jenkinsfile.ppc64le
72 lines (69 loc) · 2.9 KB
/
Jenkinsfile.ppc64le
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
pipeline {
options {
buildDiscarder(logRotator(numToKeepStr: '30'))
timeout(time: 10, unit: 'MINUTES')
}
agent {
docker {
alwaysPull true
// WMLCE 1.7.0 has CUDA 10.2, NCCL 2.5.6, TensorFlow 2.1.0, and PyTorch 1.3.1
image 'tensorflowppc64le/tensorflow-ppc64le:osuosl-ubuntu-horovod-wlmce1.7.0-py3.7-ppc64le'
args '--cap-add=SYS_PTRACE --shm-size=256g'
label 'power8-gpu'
registryCredentialsId 'TensorFlow'
}
}
stages {
stage ('UPDATE_GITHUB_STATUS') {
steps {
setBuildStatus("ppc64le Build/Tests Pending", "PENDING");
}
}
stage ('BUILD_HOROVOD') {
steps {
sh '''#!/usr/bin/env bash
git submodule update --init --recursive
. ${CONDA_INIT}
conda activate ${CONDA_ENV}
conda install -y cmake make
set -xe
HOROVOD_WITHOUT_MXNET=1 HOROVOD_WITHOUT_GLOO=1 HOROVOD_WITH_PYTORCH=1 HOROVOD_WITH_TENSORFLOW=1 \
HOROVOD_CUDA_HOME=$CONDA_PREFIX HOROVOD_GPU_OPERATIONS=NCCL MAKEFLAGS="-j1" \
pip install -v . --no-cache-dir --no-deps
'''
}
}
stage ('TEST_HOROVOD') {
steps {
ansiColor('xterm') {
sh '''#!/usr/bin/env bash
. ${CONDA_INIT}
conda activate ${CONDA_ENV}
set -xe
# TensorFlow unit tests
horovodrun -n 2 -H localhost:2 --mpi-args="-pami_noib" pytest -k 'not multi_gpu' -v -s test/parallel/test_tensorflow.py
# Container has only 2 GPUs, so run the 'multi_gpu' test seperatly on one process
horovodrun -n 1 -H localhost:1 --mpi-args="-pami_noib" pytest -k 'multi_gpu' -v -s test/parallel/test_tensorflow.py
# PyTorch unit tests
horovodrun -n 2 -H localhost:2 --mpi-args="-pami_noib" pytest -v -s test/parallel/test_torch.py
'''
}
}
}
} // end of stages
post {
success {
setBuildStatus("ppc64le Build/Tests Passed", "SUCCESS");
}
failure {
setBuildStatus("ppc64le Build/Tests Failed", "FAILURE");
}
unstable {
setBuildStatus("ppc64le Build/Tests Failed", "FAILURE");
}
}
} //end of pipeline
void setBuildStatus(String message, String state) {
githubNotify context: 'ppc64le-checks', description: "${message}", status: "${state}",
targetUrl: "https://powerci.osuosl.org/job/Horovod_PPC64LE_GPU_PIPELINE/view/change-requests/job/${BRANCH_NAME}/${BUILD_NUMBER}/console"
}