forked from NVIDIA/NeMo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile
133 lines (113 loc) · 5.17 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# syntax=docker/dockerfile:experimental
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:21.03-py3
# build an image that includes only the nemo dependencies, ensures that dependencies
# are included first for optimal caching, and useful for building a development
# image (by specifying build target as `nemo-deps`)
FROM ${BASE_IMAGE} as nemo-deps
# Ensure apt-get won't prompt for selecting options
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && \
apt-get install -y \
libsndfile1 sox \
libfreetype6 \
python-setuptools swig \
python-dev ffmpeg && \
rm -rf /var/lib/apt/lists/*
# uninstall stuff from base container
RUN pip uninstall -y sacrebleu torchtext
# build torchaudio (change latest release version to match pytorch)
WORKDIR /tmp/torchaudio_build
RUN git clone --depth 1 --branch release/0.7 https://github.com/pytorch/audio.git && \
cd audio && \
BUILD_SOX=1 python setup.py install && \
cd .. && rm -r audio
# TODO: remove when 21.04 container is released
# build torchtext
WORKDIR /tmp/torchtext_build
RUN git clone --branch v0.8.1 https://github.com/pytorch/text.git && \
cd text && \
git submodule update --init --recursive && \
python setup.py clean install && \
cd .. && rm -r text
# build RNN-T loss
WORKDIR /workspace/deps/rnnt
RUN COMMIT_SHA=f546575109111c455354861a0567c8aa794208a2 && \
git clone https://github.com/HawkAaron/warp-transducer && \
cd warp-transducer && \
git checkout $COMMIT_SHA && \
# disable old compile flags (compute_30 arch)
sed -i 's/set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_30,code=sm_30 -O2")/#set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_30,code=sm_30 -O2")/g' CMakeLists.txt && \
# enable Cuda 11 compilation if necessary
sed -i 's/set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_75,code=sm_75")/set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_80,code=sm_80")/g' CMakeLists.txt && \
# build loss function
mkdir build && \
cd build && \
cmake .. && \
make VERBOSE=1 && \
# set env flags
export CUDA_HOME="/usr/local/cuda" && \
export WARP_RNNT_PATH=`pwd` && \
export CUDA_TOOLKIT_ROOT_DIR=$CUDA_HOME && \
export LD_LIBRARY_PATH="$CUDA_HOME/extras/CUPTI/lib64:$LD_LIBRARY_PATH" && \
export LIBRARY_PATH=$CUDA_HOME/lib64:$LIBRARY_PATH && \
export LD_LIBRARY_PATH=$CUDA_HOME/lib64:$LD_LIBRARY_PATH && \
export CFLAGS="-I$CUDA_HOME/include $CFLAGS" && \
# install pytorch binding
cd ../pytorch_binding && \
python3 setup.py install && \
rm -rf ../tests test ../tensorflow_binding
# install nemo dependencies
WORKDIR /tmp/nemo
COPY requirements .
RUN for f in $(ls requirements*.txt); do pip install --disable-pip-version-check --no-cache-dir -r $f; done
# install nemo_text_processing dependencies
COPY nemo_text_processing/setup.sh nemo_text_processing_setup.sh
RUN bash nemo_text_processing.sh
#install TRT tools: PT quantization support and ONNX graph optimizer
WORKDIR /tmp/trt_build
RUN git clone https://github.com/NVIDIA/TensorRT.git && \
cd TensorRT/tools/onnx-graphsurgeon && python setup.py install && \
cd ../pytorch-quantization && \
python setup.py install && \
rm -fr /tmp/trt_build
# copy nemo source into a scratch image
FROM scratch as nemo-src
COPY . .
# start building the final container
FROM nemo-deps as nemo
ARG NEMO_VERSION=1.0.0rc1
# Check that NEMO_VERSION is set. Build will fail without this. Expose NEMO and base container
# version information as runtime environment variable for introspection purposes
RUN /usr/bin/test -n "$NEMO_VERSION" && \
/bin/echo "export NEMO_VERSION=${NEMO_VERSION}" >> /root/.bashrc && \
/bin/echo "export BASE_IMAGE=${BASE_IMAGE}" >> /root/.bashrc
RUN --mount=from=nemo-src,target=/tmp/nemo cd /tmp/nemo && pip install ".[all]" && \
python -c "import nemo.collections.asr as nemo_asr" && \
python -c "import nemo.collections.nlp as nemo_nlp" && \
python -c "import nemo.collections.tts as nemo_tts" && \
python -c "import nemo_text_processing.text_normalization as text_normalization"
# TODO: Remove once 21.04 container is base container
# install latest numba version
RUN conda update -c numba numba -y
# copy scripts/examples/tests into container for end user
WORKDIR /workspace/nemo
COPY scripts /workspace/nemo/scripts
COPY examples /workspace/nemo/examples
COPY tests /workspace/nemo/tests
COPY tutorials /workspace/nemo/tutorials
# COPY README.rst LICENSE /workspace/nemo/
RUN printf "#!/bin/bash\njupyter lab --no-browser --allow-root --ip=0.0.0.0" >> start-jupyter.sh && \
chmod +x start-jupyter.sh