diff --git a/.circleci/scripts/pull_image_daemonset.yaml b/.circleci/scripts/pull_image_daemonset.yaml index a6ca9fe1b01..d44265a4e93 100644 --- a/.circleci/scripts/pull_image_daemonset.yaml +++ b/.circleci/scripts/pull_image_daemonset.yaml @@ -13,7 +13,7 @@ spec: spec: containers: - name: pull-docker-daemonset - image: determinedai/pytorch-ngc:0.35.0 + image: determinedai/pytorch-ngc:0.35.1 command: ["/bin/bash"] args: ["echo", "test"] resources: diff --git a/docs/model-dev-guide/prepare-container/custom-env.rst b/docs/model-dev-guide/prepare-container/custom-env.rst index 0614b65700a..a29cedaef9c 100644 --- a/docs/model-dev-guide/prepare-container/custom-env.rst +++ b/docs/model-dev-guide/prepare-container/custom-env.rst @@ -114,9 +114,9 @@ Default Images - - Environment - File Name - - CPUs - - ``determinedai/pytorch-ngc:0.35.0`` + - ``determinedai/pytorch-ngc:0.35.1`` - - NVIDIA GPUs - - ``determinedai/pytorch-ngc:0.35.0`` + - ``determinedai/pytorch-ngc:0.35.1`` - - AMD GPUs - ``determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-0.26.4`` @@ -155,7 +155,7 @@ Example Dockerfile that installs custom ``conda``-, ``pip``-, and ``apt``-based .. code:: bash # Determined Image - FROM determinedai/tensorflow-ngc:0.35.0 + FROM determinedai/tensorflow-ngc:0.35.1 # Custom Configuration RUN apt-get update && \ @@ -216,7 +216,7 @@ environments using :ref:`custom images `: .. code:: bash # Determined Image - FROM determinedai/pytorch-ngc:0.35.0 + FROM determinedai/pytorch-ngc:0.35.1 # Create a virtual environment RUN conda create -n myenv python=3.8 diff --git a/docs/model-dev-guide/prepare-container/tensorflow-support.rst b/docs/model-dev-guide/prepare-container/tensorflow-support.rst index c7082176ccb..663ae5b37ec 100644 --- a/docs/model-dev-guide/prepare-container/tensorflow-support.rst +++ b/docs/model-dev-guide/prepare-container/tensorflow-support.rst @@ -20,7 +20,7 @@ Determined supports both TensorFlow 1 and 2. The version of TensorFlow used for experiment is controlled by the configured container image. Determined provides prebuilt Docker images that include TensorFlow 2+, 1.15, and 2.8, respectively: -- ``determinedai/tensorflow-ngc:0.35.0`` +- ``determinedai/tensorflow-ngc:0.35.1`` - ``determinedai/environments:cuda-10.2-pytorch-1.7-tf-1.15-gpu-0.21.2`` - ``determinedai/environments:cuda-11.2-tf-2.8-gpu-0.29.1`` diff --git a/docs/reference/deploy/helm-config-reference.rst b/docs/reference/deploy/helm-config-reference.rst index db74e9c48dd..d074155e6a3 100644 --- a/docs/reference/deploy/helm-config-reference.rst +++ b/docs/reference/deploy/helm-config-reference.rst @@ -197,13 +197,13 @@ - ``cpuImage``: Sets the default Docker image for all non-GPU tasks. If a Docker image is specified in the :ref:`experiment config ` this default is overriden. - Defaults to: ``determinedai/pytorch-ngc:0.35.0``. + Defaults to: ``determinedai/pytorch-ngc:0.35.1``. - ``startupHook``: An optional inline script that will be executed as part of task set up. - ``gpuImage``: Sets the default Docker image for all GPU tasks. If a Docker image is specified in the :ref:`experiment config ` this default is overriden. Defaults - to: ``determinedai/pytorch-ngc:0.35.0``. + to: ``determinedai/pytorch-ngc:0.35.1``. - ``logPolicies``: Sets log policies for trials. For details, visit :ref:`log_policies `. diff --git a/docs/reference/deploy/master-config-reference.rst b/docs/reference/deploy/master-config-reference.rst index 29c163734d7..d9b68147869 100644 --- a/docs/reference/deploy/master-config-reference.rst +++ b/docs/reference/deploy/master-config-reference.rst @@ -89,12 +89,12 @@ configure different container images for NVIDIA GPU tasks using the ``cuda`` key Determined 0.17.6), CPU tasks using ``cpu`` key, and ROCm (AMD GPU) tasks using the ``rocm`` key. Default values: -- ``determinedai/pytorch-ngc:0.35.0`` for NVIDIA GPUs and for CPUs. +- ``determinedai/pytorch-ngc:0.35.1`` for NVIDIA GPUs and for CPUs. - ``determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-0.26.4`` for ROCm. For TensorFlow users, we provide an image that must be referenced in the experiment configuration: -- ``determinedai/tensorflow-ngc:0.35.0`` for NVIDIA GPUs and for CPUs. +- ``determinedai/tensorflow-ngc:0.35.1`` for NVIDIA GPUs and for CPUs. ``environment_variables`` ========================= diff --git a/docs/reference/experiment-config-reference.rst b/docs/reference/experiment-config-reference.rst index e51de71addb..5f0eba74554 100644 --- a/docs/reference/experiment-config-reference.rst +++ b/docs/reference/experiment-config-reference.rst @@ -1353,12 +1353,12 @@ Optional. The Docker image to use when executing the workload. This image must b container images for NVIDIA GPU tasks using ``cuda`` key (``gpu`` prior to 0.17.6), CPU tasks using ``cpu`` key, and ROCm (AMD GPU) tasks using ``rocm`` key. Default values: -- ``determinedai/pytorch-ngc:0.35.0`` for NVIDIA GPUs and for CPUs. +- ``determinedai/pytorch-ngc:0.35.1`` for NVIDIA GPUs and for CPUs. - ``determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-0.26.4`` for ROCm. For TensorFlow users, we provide an image that must be referenced in the experiment configuration: -- ``determinedai/tensorflow-ngc:0.35.0`` for NVIDIA GPUs and for CPUs. +- ``determinedai/tensorflow-ngc:0.35.1`` for NVIDIA GPUs and for CPUs. When the cluster is configured with :ref:`resource_manager.type: slurm ` and ``container_run_type: singularity``, images are executed using diff --git a/docs/reference/job-config-reference.rst b/docs/reference/job-config-reference.rst index 3c66ef43d13..8f3c23f50fc 100644 --- a/docs/reference/job-config-reference.rst +++ b/docs/reference/job-config-reference.rst @@ -45,13 +45,13 @@ The following configuration settings are supported: different container images for NVIDIA GPU tasks using ``cuda`` key (``gpu`` prior to 0.17.6), CPU tasks using ``cpu`` key, and ROCm (AMD GPU) tasks using ``rocm`` key. Default values: - - ``determinedai/pytorch-ngc:0.35.0`` for NVIDIA GPUs and for CPUs. + - ``determinedai/pytorch-ngc:0.35.1`` for NVIDIA GPUs and for CPUs. - ``determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-0.26.4`` for ROCm. For TensorFlow users, we provide an image that must be referenced in the experiment configuration: - - ``determinedai/tensorflow-ngc:0.35.0`` for NVIDIA GPUs and for CPUs. + - ``determinedai/tensorflow-ngc:0.35.1`` for NVIDIA GPUs and for CPUs. - ``force_pull_image``: Forcibly pull the image from the Docker registry and bypass the Docker cache. Defaults to ``false``. diff --git a/docs/setup-cluster/deploy-cluster/slurm/singularity.rst b/docs/setup-cluster/deploy-cluster/slurm/singularity.rst index ed827a6d3d8..f9dc67fad07 100644 --- a/docs/setup-cluster/deploy-cluster/slurm/singularity.rst +++ b/docs/setup-cluster/deploy-cluster/slurm/singularity.rst @@ -30,9 +30,9 @@ by default in this version of Determined are described below. - - Environment - File Name - - CPUs - - ``determinedai/pytorch-ngc:0.35.0`` + - ``determinedai/pytorch-ngc:0.35.1`` - - NVIDIA GPUs - - ``determinedai/pytorch-ngc:0.35.0`` + - ``determinedai/pytorch-ngc:0.35.1`` - - AMD GPUs - ``determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-622d512`` diff --git a/docs/setup-cluster/gcp/install-gcp.rst b/docs/setup-cluster/gcp/install-gcp.rst index 12e0ba5a2cd..13ae2c9800d 100644 --- a/docs/setup-cluster/gcp/install-gcp.rst +++ b/docs/setup-cluster/gcp/install-gcp.rst @@ -406,5 +406,5 @@ This command line will spin up a cluster of up to 2 A100s in the ``us-central1-c --compute-agent-instance-type a2-highgpu-1g --gpu-num 1 \ --gpu-type nvidia-tesla-a100 \ --region us-central1 --zone us-central1-c \ - --gpu-env-image determinedai/pytorch-ngc:0.35.0 \ - --cpu-env-image determinedai/pytorch-ngc:0.35.0 + --gpu-env-image determinedai/pytorch-ngc:0.35.1 \ + --cpu-env-image determinedai/pytorch-ngc:0.35.1 diff --git a/docs/setup-cluster/slurm/singularity.rst b/docs/setup-cluster/slurm/singularity.rst index 4c9866b02b4..4e1fefee8d8 100644 --- a/docs/setup-cluster/slurm/singularity.rst +++ b/docs/setup-cluster/slurm/singularity.rst @@ -30,9 +30,9 @@ by default in this version of Determined are described below. - - Environment - File Name - - CPUs - - ``determinedai/pytorch-ngc:0.35.0`` + - ``determinedai/pytorch-ngc:0.35.1`` - - NVIDIA GPUs - - ``determinedai/pytorch-ngc:0.35.0`` + - ``determinedai/pytorch-ngc:0.35.1`` - - AMD GPUs - ``determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-622d512`` diff --git a/docs/setup-cluster/slurm/slurm-requirements.rst b/docs/setup-cluster/slurm/slurm-requirements.rst index edc878defdc..cb2da4d4cee 100644 --- a/docs/setup-cluster/slurm/slurm-requirements.rst +++ b/docs/setup-cluster/slurm/slurm-requirements.rst @@ -510,7 +510,7 @@ platform. There may be additional per-user configuration that is required. .. code:: bash - image=determinedai/pytorch-ngc:0.35.0 + image=determinedai/pytorch-ngc:0.35.1 cd /shared/enroot/images enroot import docker://$image enroot create /shared/enroot/images/${image//[\/:]/\+}.sqsh diff --git a/e2e_tests/tests/config.py b/e2e_tests/tests/config.py index 012783852cf..fcaaba488ad 100644 --- a/e2e_tests/tests/config.py +++ b/e2e_tests/tests/config.py @@ -14,12 +14,12 @@ MAX_TRIAL_BUILD_SECS = 90 -DEFAULT_TF2_CPU_IMAGE = "determinedai/tensorflow-ngc:0.35.0" -DEFAULT_TF2_GPU_IMAGE = "determinedai/tensorflow-ngc:0.35.0" -DEFAULT_PT_CPU_IMAGE = "determinedai/pytorch-tensorflow-cpu:0.35.0" -DEFAULT_PT_GPU_IMAGE = "determinedai/pytorch-tensorflow-cuda:0.35.0" -DEFAULT_PT2_CPU_IMAGE = "determinedai/pytorch-ngc:0.35.0" -DEFAULT_PT2_GPU_IMAGE = "determinedai/pytorch-ngc:0.35.0" +DEFAULT_TF2_CPU_IMAGE = "determinedai/tensorflow-ngc:0.35.1" +DEFAULT_TF2_GPU_IMAGE = "determinedai/tensorflow-ngc:0.35.1" +DEFAULT_PT_CPU_IMAGE = "determinedai/pytorch-tensorflow-cpu:0.35.1" +DEFAULT_PT_GPU_IMAGE = "determinedai/pytorch-tensorflow-cuda:0.35.1" +DEFAULT_PT2_CPU_IMAGE = "determinedai/pytorch-ngc:0.35.1" +DEFAULT_PT2_GPU_IMAGE = "determinedai/pytorch-ngc:0.35.1" TF2_CPU_IMAGE = os.environ.get("TF2_CPU_IMAGE") or DEFAULT_TF2_CPU_IMAGE TF2_GPU_IMAGE = os.environ.get("TF2_GPU_IMAGE") or DEFAULT_TF2_GPU_IMAGE diff --git a/e2e_tests/tests/fixtures/ports-proxy/config.yaml b/e2e_tests/tests/fixtures/ports-proxy/config.yaml index 9f7fc75dcbd..46ef51ca9ef 100644 --- a/e2e_tests/tests/fixtures/ports-proxy/config.yaml +++ b/e2e_tests/tests/fixtures/ports-proxy/config.yaml @@ -23,7 +23,7 @@ max_restarts: 0 # Hardcode the image because the new image has a bug. TODO fix this when the image bug is fixed. environment: - image: determinedai/pytorch-tensorflow-cpu:0.35.0 + image: determinedai/pytorch-tensorflow-cpu:0.35.1 proxy_ports: - proxy_port: 8000 proxy_tcp: false diff --git a/examples/computer_vision/iris_tf_keras/adaptive.yaml b/examples/computer_vision/iris_tf_keras/adaptive.yaml index 57729333694..55505fc5aca 100644 --- a/examples/computer_vision/iris_tf_keras/adaptive.yaml +++ b/examples/computer_vision/iris_tf_keras/adaptive.yaml @@ -4,8 +4,8 @@ data: test_url: http://download.tensorflow.org/data/iris_test.csv environment: image: - cpu: determinedai/tensorflow-ngc:0.35.0 - gpu: determinedai/tensorflow-ngc:0.35.0 + cpu: determinedai/tensorflow-ngc:0.35.1 + gpu: determinedai/tensorflow-ngc:0.35.1 hyperparameters: learning_rate: type: log diff --git a/examples/computer_vision/iris_tf_keras/const.yaml b/examples/computer_vision/iris_tf_keras/const.yaml index b1bb67d49ed..9bb1e008c8f 100644 --- a/examples/computer_vision/iris_tf_keras/const.yaml +++ b/examples/computer_vision/iris_tf_keras/const.yaml @@ -4,8 +4,8 @@ data: test_url: http://download.tensorflow.org/data/iris_test.csv environment: image: - cpu: determinedai/tensorflow-ngc:0.35.0 - gpu: determinedai/tensorflow-ngc:0.35.0 + cpu: determinedai/tensorflow-ngc:0.35.1 + gpu: determinedai/tensorflow-ngc:0.35.1 hyperparameters: learning_rate: 1.0e-4 learning_rate_decay: 1.0e-6 diff --git a/examples/computer_vision/iris_tf_keras/distributed.yaml b/examples/computer_vision/iris_tf_keras/distributed.yaml index 74cc69cb0e1..240c50ad171 100644 --- a/examples/computer_vision/iris_tf_keras/distributed.yaml +++ b/examples/computer_vision/iris_tf_keras/distributed.yaml @@ -4,8 +4,8 @@ data: test_url: http://download.tensorflow.org/data/iris_test.csv environment: image: - cpu: determinedai/tensorflow-ngc:0.35.0 - gpu: determinedai/tensorflow-ngc:0.35.0 + cpu: determinedai/tensorflow-ngc:0.35.1 + gpu: determinedai/tensorflow-ngc:0.35.1 hyperparameters: learning_rate: 1.0e-4 learning_rate_decay: 1.0e-6 diff --git a/examples/computer_vision/iris_tf_keras/iris_tf_keras_cancelable.yaml b/examples/computer_vision/iris_tf_keras/iris_tf_keras_cancelable.yaml index efe3caeda43..87c226df307 100644 --- a/examples/computer_vision/iris_tf_keras/iris_tf_keras_cancelable.yaml +++ b/examples/computer_vision/iris_tf_keras/iris_tf_keras_cancelable.yaml @@ -4,8 +4,8 @@ data: test_url: http://download.tensorflow.org/data/iris_test.csv environment: image: - cpu: determinedai/tensorflow-ngc:0.35.0 - gpu: determinedai/tensorflow-ngc:0.35.0 + cpu: determinedai/tensorflow-ngc:0.35.1 + gpu: determinedai/tensorflow-ngc:0.35.1 resources: slots_per_trial: 8 resource_pool: defq_GPU_cancelable diff --git a/examples/computer_vision/iris_tf_keras/iris_tf_keras_high_priority.yaml b/examples/computer_vision/iris_tf_keras/iris_tf_keras_high_priority.yaml index bde4a544c61..14e031b96c9 100644 --- a/examples/computer_vision/iris_tf_keras/iris_tf_keras_high_priority.yaml +++ b/examples/computer_vision/iris_tf_keras/iris_tf_keras_high_priority.yaml @@ -4,8 +4,8 @@ data: test_url: http://download.tensorflow.org/data/iris_test.csv environment: image: - cpu: determinedai/tensorflow-ngc:0.35.0 - gpu: determinedai/tensorflow-ngc:0.35.0 + cpu: determinedai/tensorflow-ngc:0.35.1 + gpu: determinedai/tensorflow-ngc:0.35.1 resources: slots_per_trial: 8 resource_pool: defq_GPU_hipri diff --git a/examples/deepspeed_autotune/torchvision/core_api/deepspeed.yaml b/examples/deepspeed_autotune/torchvision/core_api/deepspeed.yaml index 299d946031c..0f523c70492 100644 --- a/examples/deepspeed_autotune/torchvision/core_api/deepspeed.yaml +++ b/examples/deepspeed_autotune/torchvision/core_api/deepspeed.yaml @@ -2,7 +2,7 @@ name: torchvision dsat core_api max_restarts: 0 environment: image: - gpu: determinedai/pytorch-ngc:0.35.0 + gpu: determinedai/pytorch-ngc:0.35.1 resources: slots_per_trial: 2 shm_size: 4294967296 # 4 GiB. diff --git a/examples/deepspeed_autotune/torchvision/deepspeed_trial/deepspeed.yaml b/examples/deepspeed_autotune/torchvision/deepspeed_trial/deepspeed.yaml index ea54624db2e..d4b64df27d3 100644 --- a/examples/deepspeed_autotune/torchvision/deepspeed_trial/deepspeed.yaml +++ b/examples/deepspeed_autotune/torchvision/deepspeed_trial/deepspeed.yaml @@ -2,7 +2,7 @@ name: torchvision dsat deepspeed_trial max_restarts: 0 environment: image: - gpu: determinedai/pytorch-ngc:0.35.0 + gpu: determinedai/pytorch-ngc:0.35.1 resources: slots_per_trial: 2 shm_size: 4294967296 # 4 GiB. diff --git a/examples/hf_trainer_api/hf_image_classification/deepspeed.yaml b/examples/hf_trainer_api/hf_image_classification/deepspeed.yaml index 0bb6363a2d4..9cfa650eb60 100644 --- a/examples/hf_trainer_api/hf_image_classification/deepspeed.yaml +++ b/examples/hf_trainer_api/hf_image_classification/deepspeed.yaml @@ -6,7 +6,7 @@ environment: # You may need to modify this to match your network configuration. - NCCL_SOCKET_IFNAME=ens,eth,ib image: - gpu: determinedai/pytorch-ngc:0.35.0 + gpu: determinedai/pytorch-ngc:0.35.1 resources: slots_per_trial: 2 searcher: diff --git a/examples/hf_trainer_api/hf_language_modeling/deepspeed.yaml b/examples/hf_trainer_api/hf_language_modeling/deepspeed.yaml index 3f7cd015fa5..e83da16fb21 100644 --- a/examples/hf_trainer_api/hf_language_modeling/deepspeed.yaml +++ b/examples/hf_trainer_api/hf_language_modeling/deepspeed.yaml @@ -6,7 +6,7 @@ environment: # You may need to modify this to match your network configuration. - NCCL_SOCKET_IFNAME=ens,eth,ib image: - gpu: determinedai/pytorch-ngc:0.35.0 + gpu: determinedai/pytorch-ngc:0.35.1 resources: slots_per_trial: 2 searcher: diff --git a/harness/tests/experiment/fixtures/ancient-checkpoints/0.17.6-keras/metadata.json b/harness/tests/experiment/fixtures/ancient-checkpoints/0.17.6-keras/metadata.json index 21906f6eb78..048133d9e60 100644 --- a/harness/tests/experiment/fixtures/ancient-checkpoints/0.17.6-keras/metadata.json +++ b/harness/tests/experiment/fixtures/ancient-checkpoints/0.17.6-keras/metadata.json @@ -39,8 +39,8 @@ }, "force_pull_image": false, "image": { - "cpu": "determinedai/tensorflow-ngc:0.35.0", - "cuda": "determinedai/tensorflow-ngc:0.35.0", + "cpu": "determinedai/tensorflow-ngc:0.35.1", + "cuda": "determinedai/tensorflow-ngc:0.35.1", "rocm": "determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-622d512" }, "pod_spec": null, diff --git a/harness/tests/experiment/fixtures/ancient-checkpoints/0.17.6-pytorch/metadata.json b/harness/tests/experiment/fixtures/ancient-checkpoints/0.17.6-pytorch/metadata.json index cc0e162f057..7b31164deec 100644 --- a/harness/tests/experiment/fixtures/ancient-checkpoints/0.17.6-pytorch/metadata.json +++ b/harness/tests/experiment/fixtures/ancient-checkpoints/0.17.6-pytorch/metadata.json @@ -38,8 +38,8 @@ }, "force_pull_image": false, "image": { - "cpu": "determinedai/tensorflow-ngc:0.35.0", - "cuda": "determinedai/tensorflow-ngc:0.35.0", + "cpu": "determinedai/tensorflow-ngc:0.35.1", + "cuda": "determinedai/tensorflow-ngc:0.35.1", "rocm": "determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-622d512" }, "pod_spec": null, diff --git a/harness/tests/fixtures/checkpoint.json b/harness/tests/fixtures/checkpoint.json index 2ae2c914a6c..2b7ba93c47a 100644 --- a/harness/tests/fixtures/checkpoint.json +++ b/harness/tests/fixtures/checkpoint.json @@ -69,8 +69,8 @@ }, "force_pull_image":false, "image":{ - "cpu":"determinedai/pytorch-ngc:0.35.0", - "cuda":"determinedai/pytorch-ngc:0.35.0", + "cpu":"determinedai/pytorch-ngc:0.35.1", + "cuda":"determinedai/pytorch-ngc:0.35.1", "rocm":"determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-622d512" }, "pod_spec":null, diff --git a/helm/charts/determined/values.yaml b/helm/charts/determined/values.yaml index 62daf30819e..a03032a23d5 100644 --- a/helm/charts/determined/values.yaml +++ b/helm/charts/determined/values.yaml @@ -31,8 +31,8 @@ defaultImages: kubeSchedulerPreemption: "determinedai/kube-scheduler:0.17.0" # default images for CPU and GPU environments - cpuImage: "determinedai/pytorch-ngc:0.35.0" - gpuImage: "determinedai/pytorch-ngc:0.35.0" + cpuImage: "determinedai/pytorch-ngc:0.35.1" + gpuImage: "determinedai/pytorch-ngc:0.35.1" # Install Determined enterprise edition. enterpriseEdition: false diff --git a/master/pkg/schemas/expconf/const.go b/master/pkg/schemas/expconf/const.go index 0d066ed60e4..ee90bb2d1dc 100644 --- a/master/pkg/schemas/expconf/const.go +++ b/master/pkg/schemas/expconf/const.go @@ -8,7 +8,7 @@ const ( // Default task environment docker image names. const ( - CPUImage = "determinedai/pytorch-ngc:0.35.0" - CUDAImage = "determinedai/pytorch-ngc:0.35.0" + CPUImage = "determinedai/pytorch-ngc:0.35.1" + CUDAImage = "determinedai/pytorch-ngc:0.35.1" ROCMImage = "determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-622d512" ) diff --git a/model_hub/Makefile b/model_hub/Makefile index 562e1088ccc..63e149e8351 100644 --- a/model_hub/Makefile +++ b/model_hub/Makefile @@ -5,7 +5,7 @@ SHORT_GIT_HASH := $(shell git rev-parse --short HEAD) ARTIFACTS_DIR := /tmp/artifacts # Model-hub library environments will be built on top of the default GPU and CPU images in master/pkg/model/defaults.go -DEFAULT_GPU_IMAGE := determinedai/pytorch-tensorflow-cuda:0.35.0 +DEFAULT_GPU_IMAGE := determinedai/pytorch-tensorflow-cuda:0.35.1 ############REMINDER############ # When bumping third-party library versions, remember to bump versions in diff --git a/schemas/test_cases/v0/experiment.yaml b/schemas/test_cases/v0/experiment.yaml index 2b603b177da..6ae5a107403 100644 --- a/schemas/test_cases/v0/experiment.yaml +++ b/schemas/test_cases/v0/experiment.yaml @@ -47,8 +47,8 @@ environment_variables: {} force_pull_image: false image: - cpu: determinedai/pytorch-ngc:0.35.0 - cuda: determinedai/pytorch-ngc:0.35.0 + cpu: determinedai/pytorch-ngc:0.35.1 + cuda: determinedai/pytorch-ngc:0.35.1 rocm: determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-622d512 pod_spec: null ports: diff --git a/tools/scripts/bumpenvs.yaml b/tools/scripts/bumpenvs.yaml index 04ee3fe131c..26543b65aba 100644 --- a/tools/scripts/bumpenvs.yaml +++ b/tools/scripts/bumpenvs.yaml @@ -14,7 +14,7 @@ deepspeed_0_hashed: {new: determinedai/environments:cuda-11.3-pytorch-1.10-deeps old: determinedai/environments:cuda-11.3-pytorch-1.10-deepspeed-0.8.3-gpu-079eb6d} deepspeed_0_versioned: {new: determinedai/environments:cuda-11.3-pytorch-1.10-deepspeed-0.8.3-gpu-0.31.1, old: determinedai/environments:cuda-11.3-pytorch-1.10-deepspeed-0.8.3-gpu-0.30.1} -deepspeed_gpt_neox_0_hashed: {new: determinedai/deepspeed-cuda-gpt-neox:0.35.0, old: determinedai/deepspeed-cuda-gpt-neox:5432424} +deepspeed_gpt_neox_0_hashed: {new: determinedai/deepspeed-cuda-gpt-neox:0.35.1, old: determinedai/deepspeed-cuda-gpt-neox:0.35.0} deepspeed_gpu_0_hashed: {new: determinedai/environments:cuda-11.3-pytorch-1.10-deepspeed-0.8.3-gpu-2196775, old: determinedai/environments:cuda-11.3-pytorch-1.10-deepspeed-0.8.3-gpu-f66cbce} deepspeed_gpu_0_versioned: {new: determinedai/environments:cuda-11.3-pytorch-1.10-deepspeed-0.8.3-gpu-0.29.1, @@ -81,7 +81,8 @@ pytorch10_tf27_rocm50_0_hashed: {new: determinedai/environments:rocm-5.0-pytorch old: determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-bf9480b} pytorch10_tf27_rocm50_0_versioned: {new: determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-0.26.4, old: determinedai/environments-dev:rocm-5.0-pytorch-1.10-tf-2.7-rocm-0.26.4} -pytorch13_tf210_rocm56_0_hashed: {new: determinedai/environments:0.35.0, old: determinedai/environments-dev:rocm-5.6-pytorch-1.3-tf-2.10-rocm-ompi-5432424} +pytorch13_tf210_rocm56_0_hashed: {new: determinedai/environments-dev:rocm-5.6-pytorch-1.3-tf-2.10-rocm-ompi-5432424, + old: determinedai/environments-dev:rocm-5.6-pytorch-1.3-tf-2.10-rocm-ompi-5432424} pytorch13_tf210_rocm56_0_versioned: {new: determinedai/environments-dev:rocm-5.6-pytorch-1.3-tf-2.10-rocm-ompi-0.33.1, old: determinedai/environments:rocm-5.6-pytorch-1.3-tf-2.10-rocm-ompi-0.33.1} pytorch13_tf210_rocm56_1_hashed: {new: determinedai/environments:rocm-5.6-pytorch-1.3-tf-2.10-rocm-ompi-2196775, @@ -94,25 +95,26 @@ pytorch19_tf25_rocm_0_versioned: {new: determinedai/environments:rocm-5.0-pytorc old: determinedai/environments:rocm-4.2-pytorch-1.9-tf-2.5-rocm-0.18.5} pytorch19_tf25_rocm_1_hashed: {new: determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-096d730} pytorch19_tf25_rocm_1_versioned: {new: determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-0.19.4} -pytorch20_tf210_rocm56_0_hashed: {new: determinedai/environments:0.35.0, old: determinedai/environments-dev:rocm-5.6-pytorch-2.0-tf-2.10-rocm-ompi-5432424} +pytorch20_tf210_rocm56_0_hashed: {new: determinedai/environments-dev:rocm-5.6-pytorch-1.3-tf-2.10-rocm-ompi-5432424, + old: determinedai/environments-dev:rocm-5.6-pytorch-2.0-tf-2.10-rocm-ompi-5432424} pytorch20_tf210_rocm56_0_versioned: {new: determinedai/environments-dev:rocm-5.6-pytorch-2.0-tf-2.10-rocm-ompi-0.33.1, old: determinedai/environments:rocm-5.6-pytorch-2.0-tf-2.10-rocm-ompi-0.33.1} pytorch20_tf210_rocm56_1_hashed: {new: determinedai/environments:rocm-5.6-pytorch-2.0-tf-2.10-rocm-ompi-2196775, old: determinedai/environments:rocm-5.6-pytorch-2.0-tf-2.10-rocm-ompi-f66cbce} pytorch20_tf210_rocm56_1_versioned: {new: determinedai/environments:rocm-5.6-pytorch-2.0-tf-2.10-rocm-ompi-0.29.1, old: determinedai/environments:rocm-5.6-pytorch-2.0-tf-2.10-rocm-ompi-0.27.1} -pytorch_cpu_0_hashed: {new: determinedai/pytorch-cpu:0.35.0, old: determinedai/pytorch-cpu-dev:5432424} -pytorch_cpu_1_hashed: {new: determinedai/pytorch-cpu-hpc:0.35.0, old: determinedai/pytorch-cpu-hpc-dev:5432424} -pytorch_cuda_0_hashed: {new: determinedai/pytorch-cuda:0.35.0, old: determinedai/pytorch-cuda-dev:5432424} -pytorch_cuda_1_hashed: {new: determinedai/pytorch-cuda-hpc:0.35.0, old: determinedai/pytorch-cuda-hpc-dev:5432424} -pytorch_ngc_hashed: {new: determinedai/pytorch-ngc:0.35.0, old: determinedai/pytorch-ngc-dev:5432424} -pytorch_ngc_hpc_hashed: {new: determinedai/pytorch-ngc-hpc:0.35.0, old: determinedai/pytorch-ngc-hpc-dev:5432424} -tensorflow_cpu_0_hashed: {new: determinedai/pytorch-tensorflow-cpu:0.35.0, old: determinedai/pytorch-tensorflow-cpu-dev:5432424} -tensorflow_cpu_1_hashed: {new: determinedai/pytorch-tensorflow-cpu-hpc:0.35.0, old: determinedai/pytorch-tensorflow-cpu-hpc-dev:5432424} -tensorflow_cuda_0_hashed: {new: determinedai/pytorch-tensorflow-cuda:0.35.0, old: determinedai/pytorch-tensorflow-cuda-dev:5432424} -tensorflow_cuda_1_hashed: {new: determinedai/pytorch-tensorflow-cuda-hpc:0.35.0, old: determinedai/pytorch-tensorflow-cuda-hpc-dev:5432424} -tensorflow_ngc_hashed: {new: determinedai/tensorflow-ngc:0.35.0, old: determinedai/tensorflow-ngc-dev:5432424} -tensorflow_ngc_hpc_hashed: {new: determinedai/tensorflow-ngc-hpc:0.35.0, old: determinedai/tensorflow-ngc-hpc-dev:5432424} +pytorch_cpu_0_hashed: {new: determinedai/pytorch-cpu:0.35.1, old: determinedai/pytorch-cpu:0.35.0} +pytorch_cpu_1_hashed: {new: determinedai/pytorch-cpu-hpc:0.35.1, old: determinedai/pytorch-cpu-hpc:0.35.0} +pytorch_cuda_0_hashed: {new: determinedai/pytorch-cuda:0.35.1, old: determinedai/pytorch-cuda:0.35.0} +pytorch_cuda_1_hashed: {new: determinedai/pytorch-cuda-hpc:0.35.1, old: determinedai/pytorch-cuda-hpc:0.35.0} +pytorch_ngc_hashed: {new: determinedai/pytorch-ngc:0.35.1, old: determinedai/pytorch-ngc:0.35.0} +pytorch_ngc_hpc_hashed: {new: determinedai/pytorch-ngc-hpc:0.35.1, old: determinedai/pytorch-ngc-hpc:0.35.0} +tensorflow_cpu_0_hashed: {new: determinedai/pytorch-tensorflow-cpu:0.35.1, old: determinedai/pytorch-tensorflow-cpu:0.35.0} +tensorflow_cpu_1_hashed: {new: determinedai/pytorch-tensorflow-cpu-hpc:0.35.1, old: determinedai/pytorch-tensorflow-cpu-hpc:0.35.0} +tensorflow_cuda_0_hashed: {new: determinedai/pytorch-tensorflow-cuda:0.35.1, old: determinedai/pytorch-tensorflow-cuda:0.35.0} +tensorflow_cuda_1_hashed: {new: determinedai/pytorch-tensorflow-cuda-hpc:0.35.1, old: determinedai/pytorch-tensorflow-cuda-hpc:0.35.0} +tensorflow_ngc_hashed: {new: determinedai/tensorflow-ngc:0.35.1, old: determinedai/tensorflow-ngc:0.35.0} +tensorflow_ngc_hpc_hashed: {new: determinedai/tensorflow-ngc-hpc:0.35.1, old: determinedai/tensorflow-ngc-hpc:0.35.0} tf24_cpu_0_hashed: {new: determinedai/environments:py-3.8-pytorch-1.9-tf-2.4-cpu-24586f0, old: determinedai/environments-dev:py-3.8-pytorch-1.9-tf-2.4-cpu-1c769fb} tf24_cpu_0_versioned: {new: determinedai/environments:py-3.8-pytorch-1.9-tf-2.4-cpu-0.19.10, diff --git a/tools/scripts/environments-target.txt b/tools/scripts/environments-target.txt index 7b52f5e5178..731b95d7fc8 100644 --- a/tools/scripts/environments-target.txt +++ b/tools/scripts/environments-target.txt @@ -1 +1 @@ -0.35.0 +0.35.1 diff --git a/tools/scripts/update-docker-tags.sh b/tools/scripts/update-docker-tags.sh index 0617574e27a..21c37d8a563 100755 --- a/tools/scripts/update-docker-tags.sh +++ b/tools/scripts/update-docker-tags.sh @@ -8,17 +8,17 @@ if [ "$#" -lt 1 ] || [ "$#" -gt 2 ] || [ -n "$2" && "$2" != "--release" ]; then fi # check for dirty changes -if [[ -n "$(git status --porcelain)" ]]; then - echo "untracked or dirty files are not allowed, cleanup before running update-docker-tags.sh" - exit 1 -fi +# if [[ -n "$(git status --porcelain)" ]]; then +# echo "untracked or dirty files are not allowed, cleanup before running update-docker-tags.sh" +# exit 1 +# fi # parse tag args export OLD_TAG=$(cat tools/scripts/environments-target.txt) export NEW_TAG="$1" # get list of images to replace via OLD_TAG in bumpenvs.yaml -export IMAGES=$(grep -oP "(?<=new: ).*(?=,)" tools/scripts/bumpenvs.yaml | grep -F :$OLD_TAG) +export IMAGES=$(rg -oP "(?<=new: ).*(?=,)" tools/scripts/bumpenvs.yaml | rg -F :$OLD_TAG) # update tags on dockerhub for NAME in $IMAGES; do diff --git a/webui/react/src/fixtures/responses/experiment-details/non-scalar-metrics-4078.json b/webui/react/src/fixtures/responses/experiment-details/non-scalar-metrics-4078.json index f1c8e6def09..45c1ece987b 100644 --- a/webui/react/src/fixtures/responses/experiment-details/non-scalar-metrics-4078.json +++ b/webui/react/src/fixtures/responses/experiment-details/non-scalar-metrics-4078.json @@ -32,8 +32,8 @@ "name": "Fork of Fork of mnist_tp_to_estimator_const", "environment": { "image": { - "cpu": "determinedai/pytorch-ngc:0.35.0", - "gpu": "determinedai/pytorch-ngc:0.35.0" + "cpu": "determinedai/pytorch-ngc:0.35.1", + "gpu": "determinedai/pytorch-ngc:0.35.1" }, "ports": null, "pod_spec": null, diff --git a/webui/react/src/fixtures/responses/experiment-details/set-a.json b/webui/react/src/fixtures/responses/experiment-details/set-a.json index f573a0783fb..39ff7226494 100644 --- a/webui/react/src/fixtures/responses/experiment-details/set-a.json +++ b/webui/react/src/fixtures/responses/experiment-details/set-a.json @@ -694,8 +694,8 @@ "environment_variables": {}, "force_pull_image": false, "image": { - "cpu": "determinedai/pytorch-ngc:0.35.0", - "gpu": "determinedai/pytorch-ngc:0.35.0" + "cpu": "determinedai/pytorch-ngc:0.35.1", + "gpu": "determinedai/pytorch-ngc:0.35.1" }, "pod_spec": null, "ports": null @@ -838,8 +838,8 @@ "environment_variables": {}, "force_pull_image": false, "image": { - "cpu": "determinedai/tensorflow-ngc:0.35.0", - "gpu": "determinedai/tensorflow-ngc:0.35.0" + "cpu": "determinedai/tensorflow-ngc:0.35.1", + "gpu": "determinedai/tensorflow-ngc:0.35.1" }, "pod_spec": { "metadata": { diff --git a/webui/react/src/fixtures/responses/trial-details/old-trial-config-noop-adaptive.json b/webui/react/src/fixtures/responses/trial-details/old-trial-config-noop-adaptive.json index 873f42c1a4c..178996a9c6b 100644 --- a/webui/react/src/fixtures/responses/trial-details/old-trial-config-noop-adaptive.json +++ b/webui/react/src/fixtures/responses/trial-details/old-trial-config-noop-adaptive.json @@ -30,8 +30,8 @@ "name": "noop_adaptive", "environment": { "image": { - "cpu": "determinedai/pytorch-ngc:0.35.0", - "gpu": "determinedai/pytorch-ngc:0.35.0" + "cpu": "determinedai/pytorch-ngc:0.35.1", + "gpu": "determinedai/pytorch-ngc:0.35.1" }, "ports": null, "force_pull_image": false,