From 40d3509868f184b3198546141525f2ba0d35b449 Mon Sep 17 00:00:00 2001
From: RAGHUL M <ragm@redhat.com>
Date: Tue, 3 Dec 2024 14:39:57 +0530
Subject: [PATCH 1/2] Smoke Test failure - Name fix for Runtime template
 (#2103)

* Name fix for Runtime template

* Name fix for Runtime YAML template

* uncommented teardown

---------

Co-authored-by: Tarun Kumar <takumar@redhat.com>
---
 .../1003__model_serving_customruntimes.robot                   | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ods_ci/tests/Tests/1000__model_serving/1003__model_serving_customruntimes.robot b/ods_ci/tests/Tests/1000__model_serving/1003__model_serving_customruntimes.robot
index 7cd957f57..6bb59ee08 100644
--- a/ods_ci/tests/Tests/1000__model_serving/1003__model_serving_customruntimes.robot
+++ b/ods_ci/tests/Tests/1000__model_serving/1003__model_serving_customruntimes.robot
@@ -13,6 +13,7 @@ Test Tags         Dashboard
 ${RESOURCES_DIRPATH}=        tests/Resources/Files
 ${OVMS_RUNTIME_FILEPATH}=    ${RESOURCES_DIRPATH}/ovms_servingruntime.yaml
 ${UPLOADED_OVMS_DISPLAYED_NAME}=    ODS-CI Custom OpenVINO Model Server
+${UPLOADED_OVMS_YAML_NAME}=    ovms-ods-ci
 ${PRJ_TITLE}=    CustomServingRuntimesProject
 ${PRJ_DESCRIPTION}=    ODS-CI DS Project for testing of Custom Serving Runtimes
 ${MODEL_SERVER_NAME}=    ODS-CI CustomServingRuntime Server
@@ -25,7 +26,7 @@ Verify RHODS Admins Can Import A Custom Serving Runtime Template By Uploading A
     Open Dashboard Settings    settings_page=Serving runtimes
     Upload Serving Runtime Template    runtime_filepath=${OVMS_RUNTIME_FILEPATH}
     ...    serving_platform=multi      runtime_protocol=gRPC
-    Serving Runtime Template Should Be Listed    displayed_name=${UPLOADED_OVMS_DISPLAYED_NAME}
+    Serving Runtime Template Should Be Listed    displayed_name=${UPLOADED_OVMS_YAML_NAME}
     ...    serving_platform=multi
 
 Verify RHODS Admins Can Delete A Custom Serving Runtime Template

From 0e5a93e0d5d123f62d1b1a1cb08b020c9942266a Mon Sep 17 00:00:00 2001
From: Jorge <jgarciao@users.noreply.github.com>
Date: Tue, 3 Dec 2024 10:18:38 +0100
Subject: [PATCH 2/2] Update images used in nvidia and rocm pipeline testing
 for 2.16 (master) (#2086)

Update images used in nvidia and rocm pipeline testing for 2.16

Use the workbench images availables in 2.16 RC2

Signed-off-by: Jorge Garcia Oncins <jgarciao@redhat.com>
---
 .../pytorch/pytorch_amd_gpu_availability.py   | 10 ++--
 ...pytorch_amd_gpu_availability_compiled.yaml | 48 +++++++++----------
 .../pytorch_nvidia_gpu_availability.py        | 11 ++---
 ...orch_nvidia_gpu_availability_compiled.yaml | 48 +++++++++----------
 4 files changed, 57 insertions(+), 60 deletions(-)

diff --git a/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_amd_gpu_availability.py b/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_amd_gpu_availability.py
index bd9b74b69..52c6d83d2 100644
--- a/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_amd_gpu_availability.py
+++ b/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_amd_gpu_availability.py
@@ -3,7 +3,7 @@
 
 #  Runtime: Pytorch with ROCm and Python 3.9 (UBI 9)
 common_base_image = (
-    "quay.io/modh/runtime-images@sha256:a1cfb7bfcff3b2aae2b20b17da83b6683d632403f674a51af6efdfe809a6fc10"
+    "quay.io/modh/runtime-images@sha256:6340efaa92bc54bcede518e890492db626fb9fe96f028c2cd5251f286b2b2852"
 )
 
 
@@ -14,11 +14,9 @@ def add_gpu_toleration(task: PipelineTask, accelerator_type: str, accelerator_li
     kubernetes.add_toleration(task, key=accelerator_type, operator="Exists", effect="NoSchedule")
 
 
-@dsl.component(
-    base_image=common_base_image
-)
+@dsl.component(base_image=common_base_image)
 def verify_gpu_availability(gpu_toleration: bool):
-    import torch
+    import torch  # noqa: PLC0415
 
     cuda_available = torch.cuda.is_available()
     device_count = torch.cuda.device_count()
@@ -30,7 +28,7 @@ def verify_gpu_availability(gpu_toleration: bool):
     if gpu_toleration:
         assert torch.cuda.is_available()
         assert torch.cuda.device_count() > 0
-        t = torch.tensor([5, 5, 5], dtype=torch.int64, device='cuda')
+        t = torch.tensor([5, 5, 5], dtype=torch.int64, device="cuda")
     else:
         assert not torch.cuda.is_available()
         assert torch.cuda.device_count() == 0
diff --git a/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_amd_gpu_availability_compiled.yaml b/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_amd_gpu_availability_compiled.yaml
index 8652d23c5..d3f158ecd 100644
--- a/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_amd_gpu_availability_compiled.yaml
+++ b/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_amd_gpu_availability_compiled.yaml
@@ -42,18 +42,18 @@ deploymentSpec:
 
           '
         - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
-          \ *\n\ndef verify_gpu_availability(gpu_toleration: bool):\n    import torch\n\
-          \n    cuda_available = torch.cuda.is_available()\n    device_count = torch.cuda.device_count()\n\
-          \    print(\"------------------------------\")\n    print(\"GPU availability\"\
-          )\n    print(\"------------------------------\")\n    print(f\"cuda available:\
-          \ {cuda_available}\")\n    print(f\"device count: {device_count}\")\n  \
-          \  if gpu_toleration:\n        assert torch.cuda.is_available()\n      \
-          \  assert torch.cuda.device_count() > 0\n        t = torch.tensor([5, 5,\
-          \ 5], dtype=torch.int64, device='cuda')\n    else:\n        assert not torch.cuda.is_available()\n\
-          \        assert torch.cuda.device_count() == 0\n        t = torch.tensor([5,\
-          \ 5, 5], dtype=torch.int64)\n    print(f\"tensor: {t}\")\n    print(\"GPU\
-          \ availability test: PASS\")\n\n"
-        image: quay.io/modh/runtime-images@sha256:a1cfb7bfcff3b2aae2b20b17da83b6683d632403f674a51af6efdfe809a6fc10
+          \ *\n\ndef verify_gpu_availability(gpu_toleration: bool):\n    import torch\
+          \  # noqa: PLC0415\n\n    cuda_available = torch.cuda.is_available()\n \
+          \   device_count = torch.cuda.device_count()\n    print(\"------------------------------\"\
+          )\n    print(\"GPU availability\")\n    print(\"------------------------------\"\
+          )\n    print(f\"cuda available: {cuda_available}\")\n    print(f\"device\
+          \ count: {device_count}\")\n    if gpu_toleration:\n        assert torch.cuda.is_available()\n\
+          \        assert torch.cuda.device_count() > 0\n        t = torch.tensor([5,\
+          \ 5, 5], dtype=torch.int64, device=\"cuda\")\n    else:\n        assert\
+          \ not torch.cuda.is_available()\n        assert torch.cuda.device_count()\
+          \ == 0\n        t = torch.tensor([5, 5, 5], dtype=torch.int64)\n    print(f\"\
+          tensor: {t}\")\n    print(\"GPU availability test: PASS\")\n\n"
+        image: quay.io/modh/runtime-images@sha256:6340efaa92bc54bcede518e890492db626fb9fe96f028c2cd5251f286b2b2852
     exec-verify-gpu-availability-2:
       container:
         args:
@@ -80,18 +80,18 @@ deploymentSpec:
 
           '
         - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
-          \ *\n\ndef verify_gpu_availability(gpu_toleration: bool):\n    import torch\n\
-          \n    cuda_available = torch.cuda.is_available()\n    device_count = torch.cuda.device_count()\n\
-          \    print(\"------------------------------\")\n    print(\"GPU availability\"\
-          )\n    print(\"------------------------------\")\n    print(f\"cuda available:\
-          \ {cuda_available}\")\n    print(f\"device count: {device_count}\")\n  \
-          \  if gpu_toleration:\n        assert torch.cuda.is_available()\n      \
-          \  assert torch.cuda.device_count() > 0\n        t = torch.tensor([5, 5,\
-          \ 5], dtype=torch.int64, device='cuda')\n    else:\n        assert not torch.cuda.is_available()\n\
-          \        assert torch.cuda.device_count() == 0\n        t = torch.tensor([5,\
-          \ 5, 5], dtype=torch.int64)\n    print(f\"tensor: {t}\")\n    print(\"GPU\
-          \ availability test: PASS\")\n\n"
-        image: quay.io/modh/runtime-images@sha256:a1cfb7bfcff3b2aae2b20b17da83b6683d632403f674a51af6efdfe809a6fc10
+          \ *\n\ndef verify_gpu_availability(gpu_toleration: bool):\n    import torch\
+          \  # noqa: PLC0415\n\n    cuda_available = torch.cuda.is_available()\n \
+          \   device_count = torch.cuda.device_count()\n    print(\"------------------------------\"\
+          )\n    print(\"GPU availability\")\n    print(\"------------------------------\"\
+          )\n    print(f\"cuda available: {cuda_available}\")\n    print(f\"device\
+          \ count: {device_count}\")\n    if gpu_toleration:\n        assert torch.cuda.is_available()\n\
+          \        assert torch.cuda.device_count() > 0\n        t = torch.tensor([5,\
+          \ 5, 5], dtype=torch.int64, device=\"cuda\")\n    else:\n        assert\
+          \ not torch.cuda.is_available()\n        assert torch.cuda.device_count()\
+          \ == 0\n        t = torch.tensor([5, 5, 5], dtype=torch.int64)\n    print(f\"\
+          tensor: {t}\")\n    print(\"GPU availability test: PASS\")\n\n"
+        image: quay.io/modh/runtime-images@sha256:6340efaa92bc54bcede518e890492db626fb9fe96f028c2cd5251f286b2b2852
         resources:
           accelerator:
             count: '1'
diff --git a/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_nvidia_gpu_availability.py b/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_nvidia_gpu_availability.py
index fa32cd9b0..d593a8c5c 100644
--- a/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_nvidia_gpu_availability.py
+++ b/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_nvidia_gpu_availability.py
@@ -2,8 +2,9 @@
 from kfp.dsl import PipelineTask
 
 #  Runtime: Pytorch with CUDA and Python 3.9 (UBI 9)
+#  Images for each release can be found here (in the branch for the release)
 common_base_image = (
-    "quay.io/modh/runtime-images@sha256:7d1b065f100666fe46f64a2e8aae888cb41a38b5482bb9b9343b14db05c2a14a"
+    "quay.io/modh/runtime-images@sha256:e1f7ad986f694236a818796af290a99b4e7f73d44cd39ca45860087644d136dd"
 )
 
 
@@ -14,11 +15,9 @@ def add_gpu_toleration(task: PipelineTask, accelerator_type: str, accelerator_li
     kubernetes.add_toleration(task, key=accelerator_type, operator="Exists", effect="NoSchedule")
 
 
-@dsl.component(
-    base_image=common_base_image
-)
+@dsl.component(base_image=common_base_image)
 def verify_gpu_availability(gpu_toleration: bool):
-    import torch
+    import torch  # noqa: PLC0415
 
     cuda_available = torch.cuda.is_available()
     device_count = torch.cuda.device_count()
@@ -30,7 +29,7 @@ def verify_gpu_availability(gpu_toleration: bool):
     if gpu_toleration:
         assert torch.cuda.is_available()
         assert torch.cuda.device_count() > 0
-        t = torch.tensor([5, 5, 5], dtype=torch.int64, device='cuda')
+        t = torch.tensor([5, 5, 5], dtype=torch.int64, device="cuda")
     else:
         assert not torch.cuda.is_available()
         assert torch.cuda.device_count() == 0
diff --git a/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_nvidia_gpu_availability_compiled.yaml b/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_nvidia_gpu_availability_compiled.yaml
index d66218962..95cbebf16 100644
--- a/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_nvidia_gpu_availability_compiled.yaml
+++ b/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_nvidia_gpu_availability_compiled.yaml
@@ -42,18 +42,18 @@ deploymentSpec:
 
           '
         - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
-          \ *\n\ndef verify_gpu_availability(gpu_toleration: bool):\n    import torch\n\
-          \n    cuda_available = torch.cuda.is_available()\n    device_count = torch.cuda.device_count()\n\
-          \    print(\"------------------------------\")\n    print(\"GPU availability\"\
-          )\n    print(\"------------------------------\")\n    print(f\"cuda available:\
-          \ {cuda_available}\")\n    print(f\"device count: {device_count}\")\n  \
-          \  if gpu_toleration:\n        assert torch.cuda.is_available()\n      \
-          \  assert torch.cuda.device_count() > 0\n        t = torch.tensor([5, 5,\
-          \ 5], dtype=torch.int64, device='cuda')\n    else:\n        assert not torch.cuda.is_available()\n\
-          \        assert torch.cuda.device_count() == 0\n        t = torch.tensor([5,\
-          \ 5, 5], dtype=torch.int64)\n    print(f\"tensor: {t}\")\n    print(\"GPU\
-          \ availability test: PASS\")\n\n"
-        image: quay.io/modh/runtime-images@sha256:7d1b065f100666fe46f64a2e8aae888cb41a38b5482bb9b9343b14db05c2a14a
+          \ *\n\ndef verify_gpu_availability(gpu_toleration: bool):\n    import torch\
+          \  # noqa: PLC0415\n\n    cuda_available = torch.cuda.is_available()\n \
+          \   device_count = torch.cuda.device_count()\n    print(\"------------------------------\"\
+          )\n    print(\"GPU availability\")\n    print(\"------------------------------\"\
+          )\n    print(f\"cuda available: {cuda_available}\")\n    print(f\"device\
+          \ count: {device_count}\")\n    if gpu_toleration:\n        assert torch.cuda.is_available()\n\
+          \        assert torch.cuda.device_count() > 0\n        t = torch.tensor([5,\
+          \ 5, 5], dtype=torch.int64, device=\"cuda\")\n    else:\n        assert\
+          \ not torch.cuda.is_available()\n        assert torch.cuda.device_count()\
+          \ == 0\n        t = torch.tensor([5, 5, 5], dtype=torch.int64)\n    print(f\"\
+          tensor: {t}\")\n    print(\"GPU availability test: PASS\")\n\n"
+        image: quay.io/modh/runtime-images@sha256:e1f7ad986f694236a818796af290a99b4e7f73d44cd39ca45860087644d136dd
     exec-verify-gpu-availability-2:
       container:
         args:
@@ -80,18 +80,18 @@ deploymentSpec:
 
           '
         - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
-          \ *\n\ndef verify_gpu_availability(gpu_toleration: bool):\n    import torch\n\
-          \n    cuda_available = torch.cuda.is_available()\n    device_count = torch.cuda.device_count()\n\
-          \    print(\"------------------------------\")\n    print(\"GPU availability\"\
-          )\n    print(\"------------------------------\")\n    print(f\"cuda available:\
-          \ {cuda_available}\")\n    print(f\"device count: {device_count}\")\n  \
-          \  if gpu_toleration:\n        assert torch.cuda.is_available()\n      \
-          \  assert torch.cuda.device_count() > 0\n        t = torch.tensor([5, 5,\
-          \ 5], dtype=torch.int64, device='cuda')\n    else:\n        assert not torch.cuda.is_available()\n\
-          \        assert torch.cuda.device_count() == 0\n        t = torch.tensor([5,\
-          \ 5, 5], dtype=torch.int64)\n    print(f\"tensor: {t}\")\n    print(\"GPU\
-          \ availability test: PASS\")\n\n"
-        image: quay.io/modh/runtime-images@sha256:7d1b065f100666fe46f64a2e8aae888cb41a38b5482bb9b9343b14db05c2a14a
+          \ *\n\ndef verify_gpu_availability(gpu_toleration: bool):\n    import torch\
+          \  # noqa: PLC0415\n\n    cuda_available = torch.cuda.is_available()\n \
+          \   device_count = torch.cuda.device_count()\n    print(\"------------------------------\"\
+          )\n    print(\"GPU availability\")\n    print(\"------------------------------\"\
+          )\n    print(f\"cuda available: {cuda_available}\")\n    print(f\"device\
+          \ count: {device_count}\")\n    if gpu_toleration:\n        assert torch.cuda.is_available()\n\
+          \        assert torch.cuda.device_count() > 0\n        t = torch.tensor([5,\
+          \ 5, 5], dtype=torch.int64, device=\"cuda\")\n    else:\n        assert\
+          \ not torch.cuda.is_available()\n        assert torch.cuda.device_count()\
+          \ == 0\n        t = torch.tensor([5, 5, 5], dtype=torch.int64)\n    print(f\"\
+          tensor: {t}\")\n    print(\"GPU availability test: PASS\")\n\n"
+        image: quay.io/modh/runtime-images@sha256:e1f7ad986f694236a818796af290a99b4e7f73d44cd39ca45860087644d136dd
         resources:
           accelerator:
             count: '1'