diff --git a/examples/deepspeech/Dockerfile b/examples/deepspeech/Dockerfile deleted file mode 100644 index a5e9f3ca..00000000 --- a/examples/deepspeech/Dockerfile +++ /dev/null @@ -1,81 +0,0 @@ -FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04 -RUN apt-get update -RUN apt-get update && apt-get install -y --no-install-recommends \ - build-essential \ - curl \ - wget \ - git \ - python \ - python-dev \ - python-pip \ - python-wheel \ - python-numpy \ - libcurl3-dev \ - ca-certificates \ - gcc \ - sox \ - libsox-fmt-mp3 \ - htop \ - nano \ - swig \ - cmake \ - libboost-all-dev \ - zlib1g-dev \ - libbz2-dev \ - liblzma-dev \ - locales \ - pkg-config \ - libsox-dev - - -RUN apt-get install -y python3 python3-pip -RUN pip3 install 'tensorflow-gpu==1.6.0' pandas python_speech_features pyxdg progressbar2 scipy - -RUN git clone https://github.com/mozilla/DeepSpeech.git -WORKDIR /DeepSpeech/ -RUN git reset --hard e00bfd0f413912855eb2312bc1efe3bd2b023b25 - -# GPU Environment Setup -ENV TF_NEED_CUDA 1 -ENV CUDA_TOOLKIT_PATH /usr/local/cuda -ENV CUDA_PKG_VERSION 9-0=9.0.176-1 -ENV CUDA_VERSION 9.0.176 -ENV TF_CUDA_VERSION 9.0 -ENV TF_CUDNN_VERSION 7.4.1 -ENV CUDNN_INSTALL_PATH /usr/lib/x86_64-linux-gnu/ -ENV TF_CUDA_COMPUTE_CAPABILITIES 6.0 - -# Common Environment Setup -ENV TF_BUILD_CONTAINER_TYPE GPU -ENV TF_BUILD_OPTIONS OPT -ENV TF_BUILD_DISABLE_GCP 1 -ENV TF_BUILD_ENABLE_XLA 0 -ENV TF_BUILD_PYTHON_VERSION PYTHON2 -ENV TF_BUILD_IS_OPT OPT -ENV TF_BUILD_IS_PIP PIP - -# Other Parameters -ENV CC_OPT_FLAGS -mavx -mavx2 -msse4.1 -msse4.2 -mfma -ENV TF_NEED_GCP 0 -ENV TF_NEED_HDFS 0 -ENV TF_NEED_JEMALLOC 1 -ENV TF_NEED_OPENCL 0 -ENV TF_CUDA_CLANG 0 -ENV TF_NEED_MKL 0 -ENV TF_ENABLE_XLA 0 - -ENV GIT_LFS_SKIP_SMUDGE=1 - -RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 -RUN cp /usr/include/cudnn.h /usr/local/cuda/include/cudnn.h - -# Set library paths -ENV LD_LIBRARY_PATH $LD_LIBRARY_PATH:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/lib/x86_64-linux-gnu/:/usr/local/cuda/lib64/stubs/ -WORKDIR /DeepSpeech/ - -RUN python3 util/taskcluster.py --arch gpu --target native_client/ --branch=v0.2.0 -RUN python3 bin/import_ldc93s1.py ldc93s1 -RUN wget https://raw.githubusercontent.com/roscisz/TensorHive/master/examples/deepspeech/deepspeech_benchmarking.patch -RUN git apply deepspeech_benchmarking.patch -ENV LD_LIBRARY_PATH=native_client/ - diff --git a/examples/deepspeech/README.md b/examples/deepspeech/README.md index 1ff1c90b..e331182f 100644 --- a/examples/deepspeech/README.md +++ b/examples/deepspeech/README.md @@ -1,204 +1,8 @@ -# DeepSpeech benchmarks +# DeepSpeech test application -In this example we provide experimental results and steps to reproduce for benchmarking performance of training the -[Baidu's Deep Speech](https://arxiv.org/abs/1412.5567) Recurrent Neural Network for automatic speech recognition. +The distributed DeepSpeech training application that had been used as a test +application and requirement provider towards TensorHive is now in a +[separate repository](https://github.com/roscisz/dnn_training_benchmarks/tree/master/TensorFlowV1_DeepSpeech_ldc93s1). -The example is based on [Project DeepSpeech by Mozilla](https://github.com/mozilla/DeepSpeech) which is an open source -implementation in TensorFlow that supports distributed training using Distributed TensorFlow. - -## Table of contents -- [x] [Installation instructions](#installation) -- [ ] [Instructions for running the benchmarks](#running-the-benchmarks) - - [x] [Manually](#manually) - - [x] [Using run_cluster.sh](#run-clustersh) - - [x] [Using Docker](#docker) - - [x] [Using Kubernetes](#kubernetes) - - [ ] [Using TensorHive](#tensorhive) -- [ ] [Experimental results](#experimental-results): - - [x] [Batch size influence on training performance on various GPUs](#batch-size) - - [x] [Scalability on multiple GPUs](#multigpu-scalability) - - [ ] Scalability in a distributed setting - -## Installation - -In this section we describe installation steps for DeepSpeech that we used in our setup. -For detailed instructions for running the DeepSpeech training go to the -[DeepSpeech project site](https://github.com/mozilla/DeepSpeech). - -### Prerequisites - -* GNU/Linux -* Python 3, Pip3, git, wget -* CUDA 9.0 with CuDNN 7 -* TensorFlow 1.6.0 -* Python packages: pandas, python_speech_features, pyxdg, progressbar2 - -The environment can be for example set up using nvidia-docker as follows: - -```bash -nvidia-docker pull nvidia/cuda:9.0-cudnn7-devel -nvidia-docker run -it nvidia/cuda:9.0-cudnn7-devel -apt-get update -apt-get install -y python3 python3-pip git wget -pip3 install 'tensorflow-gpu==1.6.0' pandas python_speech_features pyxdg progressbar2 scipy -``` - -### Installing DeepSpeech - -**Clone the proper version of DeepSpeech** -```bash -git clone https://github.com/mozilla/DeepSpeech.git -cd DeepSpeech/ -git reset --hard e00bfd0f413912855eb2312bc1efe3bd2b023b25 -``` -Note: if you have git-lfs installed, you can disable it for the benchmarks using environment variable GIT_LFS_SKIP_SMUDGE=1. - -**Download native libraries** -```bash -python3 util/taskcluster.py --arch gpu --target native_client/ --branch=v0.2.0 -``` - -**Download small dataset** -```bash -python3 bin/import_ldc93s1.py ldc93s1 -``` - -### Applying the benchmarking patch - -```bash -wget https://raw.githubusercontent.com/roscisz/TensorHive/master/examples/deepspeech/deepspeech_benchmarking.patch -git apply deepspeech_benchmarking.patch -``` - -## Running the benchmarks - -In this section we describe the steps to reproduce the [experimental results](#experimental-results), -assuming that the DeepSpeech training program is installed and the benchmarking patch is applied. - -### Manually - -To run the benchmark, specify the number of "global steps" to be benchmarked using the `benchmark_steps` parameter: - -```bash -LD_LIBRARY_PATH=native_client/ CUDA_VISIBLE_DEVICES=0 python3 ./DeepSpeech.py --train_files=ldc93s1/ldc93s1.csv --dev_files=ldc93s1/ldc93s1.csv --test_files=ldc93s1/ldc93s1.csv --log_level=3 --benchmark_steps=10 -``` - -**Testing batch size on one GPU** - -To check the performance for various batch sizes, modify the `train_batch_size` parameter. For example, to use batch size of 128 run: - -```bash -LD_LIBRARY_PATH=native_client/ CUDA_VISIBLE_DEVICES=0 python3 ./DeepSpeech.py --train_files=ldc93s1/ldc93s1.csv --dev_files=ldc93s1/ldc93s1.csv --test_files=ldc93s1/ldc93s1.csv --log_level=3 --benchmark_steps=10 --train_batch_size=128 -``` -**Testing scalability on many GPUs** - -***In-graph replication*** - -To check the performance of parallel training on multiple GPUs, modify the CUDA_VISIBLE_DEVICES environment variable. -For example, to use GPUs 1 and 2, set CUDA_VISIBLE_DEVICES=1,2 and to use all GPUs in a 4-GPU system, set -CUDA_VISIBLE_DEVICES=0,1,2,3. The in-graph replication method for data-parallel, synchronized training implemented in -Mozilla DeepSpeech will be used. - -### run-cluster.sh - -The Mozilla DeepSpeech implementation supports distributed training using Distributed TensorFlow with a -parameter server and worker processes. The 'bin/run-cluster.sh' script is helpful for configuring and running -these processes on a single machine. The script accepts an argument in a p:w:g format, where p denotes the -number of used parameter servers, w denotes the number of worker processes and g denotes the number of -GPUs used by individual worker processes. - -For example, running our benchmark on the distributed training -application using 1 parameter server and 4 workers using 1 GPU each would require executing the following command: - -```bash -LD_LIBRARY_PATH=native_client/ bin/run-cluster.sh 1:4:1 --script="python3 DeepSpeech.py" --train_files=ldc93s1/ldc93s1.csv --dev_files=ldc93s1/ldc93s1.csv --test_files=ldc93s1/ldc93s1.csv --train_batch_size=64 --epoch=1000 --benchmark_warmup_steps=10 --benchmark_steps=10 --log_level=3 --noshow_progressbar -``` - -It should be noted that the distributed training introduces a startup overhead, so increasing the number of -warmup steps can be necessary to collect reliable results. - -### Docker - -We provide a Dockerfile that allows to build and run the benchmark as a Docker image: - - -```bash -docker build -t deepspeech . -``` - -If there is a need to share the image between distributed machines, the repository has to be given -in the image tag, and the image has to be pushed in to a Docker repository: - -```bash -docker build -t /deepspeech . -docker push /deepspeech -``` - -Now, the benchmark can be executed in a Docker container, so that no dependencies need to be installed -on the host machine: - -```bash -docker run deepspeech python3 ./DeepSpeech.py --train_files=ldc93s1/ldc93s1.csv --dev_files=ldc93s1/ldc93s1.csv --test_files=ldc93s1/ldc93s1.csv --log_level=3 --benchmark_steps=10 --train_batch_size=128 -``` - - -### Kubernetes - -In order to enqueue the benchmark in a Kubernetes installation -[(for example microk8s)](https://gist.github.com/PiotrowskiD/07a57ad0f21e2b4de78454d02b34865c), -create an adequate deployment file (example provided in ds.yaml) and create the resource: - -```bash -kubectl create -f ds.yaml -``` - -The status of the resulting Pod, its detailed description and logs can be fetch as follows: - -```bash -kubectl get pod -kubectl describe pod ds -kubectl logs ds -``` - -Unfortunately Kubernetes doesn't take into account other process using GPUs which leads to conflicts if -somebody else runs their jobs manually. Because CUDA_VISIBLE_DEVICES env variable is used inside the -container, it can only chose from GPUs kubernetes assigns to the container. So if we would like to deploy -our training to GPU number 3 we would have to set GPUs limit to 4 and set CVD to "3". That is if the GPUs -are on a single node. If they would be on different ones we could use -[node labels and selectors](https://kubernetes.io/docs/tasks/configure-pod-container/assign-pods-nodes/). - - -### TensorHive - -## Experimental results - -### Batch size - -As expected, performance increases proportionally to batch size, up to a limit depending on GPU memory capacity: - -![batch_size_v100](https://raw.githubusercontent.com/roscisz/TensorHive/master/examples/deepspeech/img/batch_size_v100.png) -![batch_size_gtx1060](https://raw.githubusercontent.com/roscisz/TensorHive/master/examples/deepspeech/img/batch_size_gtx1060.png) - -Although using Distributed TensorFlow with Parameter Servers allows distributed training on multiple nodes, it -should be noted that communication with the Parameter Server introduces significant overhead comparing to the -in-graph replication method: - -![batch_size_v100_distributed](https://raw.githubusercontent.com/roscisz/TensorHive/master/examples/deepspeech/img/batch_size_v100_distributed.png) - -### MultiGPU scalability - -The following results show performance results on NVIDIA® DGX Station™, depending on the choice utilized GPUs. The -results are marked with ID's of the used GPUs, for example '013' means that CUDA_VISIBLE_DEVICES was set to 0,1,3. - -![multigpu_128](https://raw.githubusercontent.com/roscisz/TensorHive/master/examples/deepspeech/img/multigpu_128.png) - -Interestingly, in the cases of utilizing two GPUs, it is significant which GPUs are used exactly. For example, -combining GPUs 0 and 1 or 2 and 3 results in worse performance. This is probably connected with interconnects between -the GPUs. - -Overhead of the Distributed TensorFlow implementation is also visible in the multi-GPU setup: - -![multigpu_128_distributed](https://raw.githubusercontent.com/roscisz/TensorHive/master/examples/deepspeech/img/multigpu_128_distributed.png) - -The results show that in the investigated setup it is better to run many processes utilizing single GPUs than -to run one process utilizing multiple GPUs. +See the [TensorHive examples directory](https://github.com/roscisz/TensorHive/tree/master/examples) for +examples how TensorHive task execution module can be used for various training applications. \ No newline at end of file diff --git a/examples/deepspeech/deepspeech_benchmarking.patch b/examples/deepspeech/deepspeech_benchmarking.patch deleted file mode 100644 index a937c16a..00000000 --- a/examples/deepspeech/deepspeech_benchmarking.patch +++ /dev/null @@ -1,181 +0,0 @@ -diff --git a/DeepSpeech.py b/DeepSpeech.py -index 8e1b529..c5e26a2 100755 ---- a/DeepSpeech.py -+++ b/DeepSpeech.py -@@ -22,12 +22,14 @@ from six.moves import zip, range, filter, urllib, BaseHTTPServer - from tensorflow.python.tools import freeze_graph - from threading import Thread, Lock - from util.audio import audiofile_to_input_vector -+from util.benchmark import BenchmarkHook - from util.feeding import DataSet, ModelFeeder - from util.gpu import get_available_gpus - from util.shared_lib import check_cupti - from util.text import sparse_tensor_value_to_texts, wer, levenshtein, Alphabet, ndarray_to_text - from xdg import BaseDirectory as xdg - import numpy as np -+import signal - - - # Importer -@@ -160,6 +162,11 @@ tf.app.flags.DEFINE_string ('one_shot_infer', '', 'one-shot inferen - - tf.app.flags.DEFINE_string ('initialize_from_frozen_model', '', 'path to frozen model to initialize from. This behaves like a checkpoint, loading the weights from the frozen model and starting training with those weights. The optimizer parameters aren\'t restored, so remember to adjust the learning rate.') - -+# Benchmarking -+tf.app.flags.DEFINE_integer ('benchmark_steps', 0, 'number of benchmark steps - if 0, normal training will be run') -+tf.app.flags.DEFINE_integer ('benchmark_log_steps', 1, 'period of benchmark logging in steps - if 0, only final result will be displayed') -+tf.app.flags.DEFINE_integer ('benchmark_warmup_steps', 1, 'number of warmup steps before benchmarking, works only if benchmark_steps > 0') -+ - for var in ['b1', 'h1', 'b2', 'h2', 'b3', 'h3', 'b5', 'h5', 'b6', 'h6']: - tf.app.flags.DEFINE_float('%s_stddev' % var, None, 'standard deviation to use when initialising %s' % var) - -@@ -228,6 +235,9 @@ def initialize_globals(): - if len(FLAGS.checkpoint_dir) == 0: - FLAGS.checkpoint_dir = xdg.save_data_path(os.path.join('deepspeech','checkpoints')) - -+ if FLAGS.benchmark_steps > 0: -+ FLAGS.checkpoint_dir = None -+ - # Set default summary dir - if len(FLAGS.summary_dir) == 0: - FLAGS.summary_dir = xdg.save_data_path(os.path.join('deepspeech','summaries')) -@@ -483,7 +493,7 @@ def decode_with_lm(inputs, sequence_length, beam_width=100, - custom_op_module.ctc_beam_search_decoder_with_lm( - inputs, sequence_length, beam_width=beam_width, - model_path=FLAGS.lm_binary_path, trie_path=FLAGS.lm_trie_path, alphabet_path=FLAGS.alphabet_config_path, -- lm_weight=FLAGS.lm_weight, word_count_weight=FLAGS.word_count_weight, valid_word_count_weight=FLAGS.valid_word_count_weight, -+ lm_weight=FLAGS.lm_weight, valid_word_count_weight=FLAGS.valid_word_count_weight, - top_paths=top_paths, merge_repeated=merge_repeated)) - - return ( -@@ -1322,7 +1332,8 @@ class TrainingCoordinator(object): - return str - if status == 204: # We use 204 (no content) to indicate end of training - return default -- except urllib.error.HTTPError as error: -+ except Exception: -+ os.kill(os.getpid(), signal.SIGTERM) - log_traffic('Problem reaching coordinator - url: %s, HTTP code: %d' % (url, error.code)) - pass - time.sleep(10) -@@ -1449,6 +1460,7 @@ def send_token_to_ps(session, kill=False): - log_debug('Sending %s token to ps %d...' % (kind, index)) - session.run(enqueue, feed_dict={ token_placeholder: token }) - log_debug('Sent %s token to ps %d.' % (kind, index)) -+ os.kill(os.getpid(), signal.SIGTERM) - - def train(server=None): - r''' -@@ -1551,14 +1563,21 @@ def train(server=None): - hooks.append(optimizer.make_session_run_hook(is_chief)) - - # Hook to save TensorBoard summaries -- if FLAGS.summary_secs > 0: -+ if FLAGS.summary_secs > 0 and FLAGS.benchmark_steps == 0: - hooks.append(tf.train.SummarySaverHook(save_secs=FLAGS.summary_secs, output_dir=FLAGS.summary_dir, summary_op=merge_all_summaries_op)) - - # Hook wih number of checkpoint files to save in checkpoint_dir -- if FLAGS.train and FLAGS.max_to_keep > 0: -+ if FLAGS.train and FLAGS.max_to_keep > 0 and FLAGS.checkpoint_dir is not None: - saver = tf.train.Saver(max_to_keep=FLAGS.max_to_keep) - hooks.append(tf.train.CheckpointSaverHook(checkpoint_dir=FLAGS.checkpoint_dir, save_secs=FLAGS.checkpoint_secs, saver=saver)) - -+ chief_only_hooks = [] -+ -+ if FLAGS.benchmark_steps > 0: -+ chief_only_hooks.append(BenchmarkHook(FLAGS.benchmark_steps, FLAGS.benchmark_warmup_steps, -+ FLAGS.benchmark_log_steps, global_step, len(available_devices) * -+ max(1, FLAGS.replicas_to_agg) * FLAGS.train_batch_size)) -+ - if len(FLAGS.initialize_from_frozen_model) > 0: - with tf.gfile.FastGFile(FLAGS.initialize_from_frozen_model, 'rb') as fin: - graph_def = tf.GraphDef() -@@ -1595,8 +1614,10 @@ def train(server=None): - with tf.train.MonitoredTrainingSession(master='' if server is None else server.target, - is_chief=is_chief, - hooks=hooks, -+ chief_only_hooks=chief_only_hooks, - checkpoint_dir=FLAGS.checkpoint_dir, - save_checkpoint_secs=None, # already taken care of by a hook -+ save_summaries_secs=None, save_summaries_steps=None, - config=session_config) as session: - if len(FLAGS.initialize_from_frozen_model) > 0: - log_info('Initializing from frozen model: {}'.format(FLAGS.initialize_from_frozen_model)) -@@ -1840,6 +1861,7 @@ def main(_) : - log_debug('Got a kill switch token from worker %i.' % abs(token + 1)) - break - log_debug('Got a stop token from worker %i.' % token) -+ os.kill(os.getpid(), signal.SIGTERM) - log_debug('Session closed.') - elif FLAGS.job_name == 'worker': - # We are a worker and therefore we have to do some work. -@@ -1864,7 +1886,7 @@ def main(_) : - do_single_file_inference(FLAGS.one_shot_infer) - - # Stopping the coordinator -- COORD.stop() -+ COORD.stop(wait_for_running_epochs=(FLAGS.benchmark_steps == 0)) - - if __name__ == '__main__' : - tf.app.run() -diff --git a/util/benchmark.py b/util/benchmark.py -index 7dca193..de94dbc 100644 ---- a/util/benchmark.py -+++ b/util/benchmark.py -@@ -3,6 +3,56 @@ - - from __future__ import absolute_import, division, print_function - -+import time -+import tensorflow as tf -+ -+ -+class BenchmarkHook(tf.train.SessionRunHook): -+ def __init__(self, steps, warmup_steps, log_steps, global_step_tensor, batch_size): -+ self.steps = steps -+ self.warmup_steps = warmup_steps -+ self.log_steps = log_steps -+ self.global_step_tensor = global_step_tensor -+ self.batch_size = batch_size -+ -+ self.start_time = None -+ self.last_time = None -+ self.start_global_step = None -+ self.benchmark_global_step = None -+ self.benchmarking = False -+ -+ def before_run(self, run_context): -+ return tf.train.SessionRunArgs(self.global_step_tensor) -+ -+ def after_run(self, run_context, run_values): -+ current_global_step = run_values.results -+ -+ if self.start_global_step is None: -+ self.start_global_step = current_global_step -+ self.benchmark_global_step = self.start_global_step + self.warmup_steps -+ print('B Starting warm up') -+ elif current_global_step >= self.benchmark_global_step: -+ if not self.benchmarking: -+ print('B Done warm up') -+ if self.log_steps != 0: -+ print('B Step\tutt/sec') -+ self.last_time = self.start_time = time.time() -+ self.benchmarking = True -+ else: -+ current_time = time.time() -+ if self.log_steps != 0 and not (current_global_step - self.benchmark_global_step) % self.log_steps: -+ speed = self.log_steps * self.batch_size / (current_time - self.last_time) -+ self.last_time = current_time -+ print('B {}\t{:.2f}'.format(current_global_step - self.benchmark_global_step, speed)) -+ -+ if current_global_step - self.benchmark_global_step == self.steps: -+ speed = self.steps * self.batch_size / (current_time - self.start_time) -+ print('-' * 64) -+ print('B total utt/sec: {:.2f}'.format(speed)) -+ print('-' * 64) -+ run_context.request_stop() -+ -+ - def keep_only_digits(s): - r''' - local helper to just keep digits diff --git a/examples/deepspeech/ds.yaml b/examples/deepspeech/ds.yaml deleted file mode 100644 index e6f66bbb..00000000 --- a/examples/deepspeech/ds.yaml +++ /dev/null @@ -1,25 +0,0 @@ -apiVersion: v1 -kind: Pod -metadata: - name: ds -spec: - containers: - - args: - - python3 - - ./DeepSpeech.py - - --train_files=ldc93s1/ldc93s1.csv - - --dev_files=ldc93s1/ldc93s1.csv - - --test_files=ldc93s1/ldc93s1.csv - - --log_level=0 - - --train_batch_size=32 - - --benchmark_steps=30 - - --notest - name: ds-container - image: /deepspeech - env: - - name: CUDA_VISIBLE_DEVICES - value: "0" - resources: - limits: - nvidia.com/gpu: 1 - restartPolicy: Never diff --git a/examples/deepspeech/img/batch_size_gtx1060.png b/examples/deepspeech/img/batch_size_gtx1060.png deleted file mode 100644 index a579fcff..00000000 Binary files a/examples/deepspeech/img/batch_size_gtx1060.png and /dev/null differ diff --git a/examples/deepspeech/img/batch_size_v100.png b/examples/deepspeech/img/batch_size_v100.png deleted file mode 100644 index 2c27773f..00000000 Binary files a/examples/deepspeech/img/batch_size_v100.png and /dev/null differ diff --git a/examples/deepspeech/img/batch_size_v100_distributed.png b/examples/deepspeech/img/batch_size_v100_distributed.png deleted file mode 100644 index 303534c0..00000000 Binary files a/examples/deepspeech/img/batch_size_v100_distributed.png and /dev/null differ diff --git a/examples/deepspeech/img/multigpu_128.png b/examples/deepspeech/img/multigpu_128.png deleted file mode 100644 index 86738372..00000000 Binary files a/examples/deepspeech/img/multigpu_128.png and /dev/null differ diff --git a/examples/deepspeech/img/multigpu_128_distributed.png b/examples/deepspeech/img/multigpu_128_distributed.png deleted file mode 100644 index 71d78f7e..00000000 Binary files a/examples/deepspeech/img/multigpu_128_distributed.png and /dev/null differ diff --git a/examples/deepspeech/img/multigpu_64.png b/examples/deepspeech/img/multigpu_64.png deleted file mode 100644 index e2d305fc..00000000 Binary files a/examples/deepspeech/img/multigpu_64.png and /dev/null differ diff --git a/examples/deepspeech/template-ds.sh b/examples/deepspeech/template-ds.sh deleted file mode 100644 index acf59268..00000000 --- a/examples/deepspeech/template-ds.sh +++ /dev/null @@ -1,51 +0,0 @@ - -#!/bin/bash -if [ -z $1 ]; then - TBS=2 -else - TBS=$1 -fi - -if [ -z $2 ]; then - CVD='0' -else - CVD=$2 -fi - -if [ -z $3 ]; then - GPUS=1 -else - GPUS=$3 -fi - -echo `rm t2t.yaml` -echo `touch ds.yaml` - -cat <> ds.yaml -apiVersion: v1 -kind: Pod -metadata: - name: ds -spec: - containers: - - args: - - python3 - - ./DeepSpeech.py - - --train_files=ldc93s1/ldc93s1.csv - - --dev_files=ldc93s1/ldc93s1.csv - - --test_files=ldc93s1/ldc93s1.csv - - --log_level=0 - - --train_batch_size=$TBS - - --benchmark_steps=30 - - --notest - name: ds-container - image: /deepspeech - env: - - name: CUDA_VISIBLE_DEVICES - value: "$CVD" - resources: - limits: - nvidia.com/gpu: $GPUS - restartPolicy: Never -EOT -echo `kubectl create -f ds.yaml`