Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
 into fix-permissions

rh-pre-commit.version: 2.3.2
rh-pre-commit.check-secrets: ENABLED
  • Loading branch information
rnetser committed Dec 3, 2024
2 parents 26c41a6 + 0e5a93e commit 32b1e42
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 61 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

# Runtime: Pytorch with ROCm and Python 3.9 (UBI 9)
common_base_image = (
"quay.io/modh/runtime-images@sha256:a1cfb7bfcff3b2aae2b20b17da83b6683d632403f674a51af6efdfe809a6fc10"
"quay.io/modh/runtime-images@sha256:6340efaa92bc54bcede518e890492db626fb9fe96f028c2cd5251f286b2b2852"
)


Expand All @@ -14,11 +14,9 @@ def add_gpu_toleration(task: PipelineTask, accelerator_type: str, accelerator_li
kubernetes.add_toleration(task, key=accelerator_type, operator="Exists", effect="NoSchedule")


@dsl.component(
base_image=common_base_image
)
@dsl.component(base_image=common_base_image)
def verify_gpu_availability(gpu_toleration: bool):
import torch
import torch # noqa: PLC0415

cuda_available = torch.cuda.is_available()
device_count = torch.cuda.device_count()
Expand All @@ -30,7 +28,7 @@ def verify_gpu_availability(gpu_toleration: bool):
if gpu_toleration:
assert torch.cuda.is_available()
assert torch.cuda.device_count() > 0
t = torch.tensor([5, 5, 5], dtype=torch.int64, device='cuda')
t = torch.tensor([5, 5, 5], dtype=torch.int64, device="cuda")
else:
assert not torch.cuda.is_available()
assert torch.cuda.device_count() == 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,18 +42,18 @@ deploymentSpec:
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef verify_gpu_availability(gpu_toleration: bool):\n import torch\n\
\n cuda_available = torch.cuda.is_available()\n device_count = torch.cuda.device_count()\n\
\ print(\"------------------------------\")\n print(\"GPU availability\"\
)\n print(\"------------------------------\")\n print(f\"cuda available:\
\ {cuda_available}\")\n print(f\"device count: {device_count}\")\n \
\ if gpu_toleration:\n assert torch.cuda.is_available()\n \
\ assert torch.cuda.device_count() > 0\n t = torch.tensor([5, 5,\
\ 5], dtype=torch.int64, device='cuda')\n else:\n assert not torch.cuda.is_available()\n\
\ assert torch.cuda.device_count() == 0\n t = torch.tensor([5,\
\ 5, 5], dtype=torch.int64)\n print(f\"tensor: {t}\")\n print(\"GPU\
\ availability test: PASS\")\n\n"
image: quay.io/modh/runtime-images@sha256:a1cfb7bfcff3b2aae2b20b17da83b6683d632403f674a51af6efdfe809a6fc10
\ *\n\ndef verify_gpu_availability(gpu_toleration: bool):\n import torch\
\ # noqa: PLC0415\n\n cuda_available = torch.cuda.is_available()\n \
\ device_count = torch.cuda.device_count()\n print(\"------------------------------\"\
)\n print(\"GPU availability\")\n print(\"------------------------------\"\
)\n print(f\"cuda available: {cuda_available}\")\n print(f\"device\
\ count: {device_count}\")\n if gpu_toleration:\n assert torch.cuda.is_available()\n\
\ assert torch.cuda.device_count() > 0\n t = torch.tensor([5,\
\ 5, 5], dtype=torch.int64, device=\"cuda\")\n else:\n assert\
\ not torch.cuda.is_available()\n assert torch.cuda.device_count()\
\ == 0\n t = torch.tensor([5, 5, 5], dtype=torch.int64)\n print(f\"\
tensor: {t}\")\n print(\"GPU availability test: PASS\")\n\n"
image: quay.io/modh/runtime-images@sha256:6340efaa92bc54bcede518e890492db626fb9fe96f028c2cd5251f286b2b2852
exec-verify-gpu-availability-2:
container:
args:
Expand All @@ -80,18 +80,18 @@ deploymentSpec:
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef verify_gpu_availability(gpu_toleration: bool):\n import torch\n\
\n cuda_available = torch.cuda.is_available()\n device_count = torch.cuda.device_count()\n\
\ print(\"------------------------------\")\n print(\"GPU availability\"\
)\n print(\"------------------------------\")\n print(f\"cuda available:\
\ {cuda_available}\")\n print(f\"device count: {device_count}\")\n \
\ if gpu_toleration:\n assert torch.cuda.is_available()\n \
\ assert torch.cuda.device_count() > 0\n t = torch.tensor([5, 5,\
\ 5], dtype=torch.int64, device='cuda')\n else:\n assert not torch.cuda.is_available()\n\
\ assert torch.cuda.device_count() == 0\n t = torch.tensor([5,\
\ 5, 5], dtype=torch.int64)\n print(f\"tensor: {t}\")\n print(\"GPU\
\ availability test: PASS\")\n\n"
image: quay.io/modh/runtime-images@sha256:a1cfb7bfcff3b2aae2b20b17da83b6683d632403f674a51af6efdfe809a6fc10
\ *\n\ndef verify_gpu_availability(gpu_toleration: bool):\n import torch\
\ # noqa: PLC0415\n\n cuda_available = torch.cuda.is_available()\n \
\ device_count = torch.cuda.device_count()\n print(\"------------------------------\"\
)\n print(\"GPU availability\")\n print(\"------------------------------\"\
)\n print(f\"cuda available: {cuda_available}\")\n print(f\"device\
\ count: {device_count}\")\n if gpu_toleration:\n assert torch.cuda.is_available()\n\
\ assert torch.cuda.device_count() > 0\n t = torch.tensor([5,\
\ 5, 5], dtype=torch.int64, device=\"cuda\")\n else:\n assert\
\ not torch.cuda.is_available()\n assert torch.cuda.device_count()\
\ == 0\n t = torch.tensor([5, 5, 5], dtype=torch.int64)\n print(f\"\
tensor: {t}\")\n print(\"GPU availability test: PASS\")\n\n"
image: quay.io/modh/runtime-images@sha256:6340efaa92bc54bcede518e890492db626fb9fe96f028c2cd5251f286b2b2852
resources:
accelerator:
count: '1'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
from kfp.dsl import PipelineTask

# Runtime: Pytorch with CUDA and Python 3.9 (UBI 9)
# Images for each release can be found here (in the branch for the release)
common_base_image = (
"quay.io/modh/runtime-images@sha256:7d1b065f100666fe46f64a2e8aae888cb41a38b5482bb9b9343b14db05c2a14a"
"quay.io/modh/runtime-images@sha256:e1f7ad986f694236a818796af290a99b4e7f73d44cd39ca45860087644d136dd"
)


Expand All @@ -14,11 +15,9 @@ def add_gpu_toleration(task: PipelineTask, accelerator_type: str, accelerator_li
kubernetes.add_toleration(task, key=accelerator_type, operator="Exists", effect="NoSchedule")


@dsl.component(
base_image=common_base_image
)
@dsl.component(base_image=common_base_image)
def verify_gpu_availability(gpu_toleration: bool):
import torch
import torch # noqa: PLC0415

cuda_available = torch.cuda.is_available()
device_count = torch.cuda.device_count()
Expand All @@ -30,7 +29,7 @@ def verify_gpu_availability(gpu_toleration: bool):
if gpu_toleration:
assert torch.cuda.is_available()
assert torch.cuda.device_count() > 0
t = torch.tensor([5, 5, 5], dtype=torch.int64, device='cuda')
t = torch.tensor([5, 5, 5], dtype=torch.int64, device="cuda")
else:
assert not torch.cuda.is_available()
assert torch.cuda.device_count() == 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,18 +42,18 @@ deploymentSpec:
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef verify_gpu_availability(gpu_toleration: bool):\n import torch\n\
\n cuda_available = torch.cuda.is_available()\n device_count = torch.cuda.device_count()\n\
\ print(\"------------------------------\")\n print(\"GPU availability\"\
)\n print(\"------------------------------\")\n print(f\"cuda available:\
\ {cuda_available}\")\n print(f\"device count: {device_count}\")\n \
\ if gpu_toleration:\n assert torch.cuda.is_available()\n \
\ assert torch.cuda.device_count() > 0\n t = torch.tensor([5, 5,\
\ 5], dtype=torch.int64, device='cuda')\n else:\n assert not torch.cuda.is_available()\n\
\ assert torch.cuda.device_count() == 0\n t = torch.tensor([5,\
\ 5, 5], dtype=torch.int64)\n print(f\"tensor: {t}\")\n print(\"GPU\
\ availability test: PASS\")\n\n"
image: quay.io/modh/runtime-images@sha256:7d1b065f100666fe46f64a2e8aae888cb41a38b5482bb9b9343b14db05c2a14a
\ *\n\ndef verify_gpu_availability(gpu_toleration: bool):\n import torch\
\ # noqa: PLC0415\n\n cuda_available = torch.cuda.is_available()\n \
\ device_count = torch.cuda.device_count()\n print(\"------------------------------\"\
)\n print(\"GPU availability\")\n print(\"------------------------------\"\
)\n print(f\"cuda available: {cuda_available}\")\n print(f\"device\
\ count: {device_count}\")\n if gpu_toleration:\n assert torch.cuda.is_available()\n\
\ assert torch.cuda.device_count() > 0\n t = torch.tensor([5,\
\ 5, 5], dtype=torch.int64, device=\"cuda\")\n else:\n assert\
\ not torch.cuda.is_available()\n assert torch.cuda.device_count()\
\ == 0\n t = torch.tensor([5, 5, 5], dtype=torch.int64)\n print(f\"\
tensor: {t}\")\n print(\"GPU availability test: PASS\")\n\n"
image: quay.io/modh/runtime-images@sha256:e1f7ad986f694236a818796af290a99b4e7f73d44cd39ca45860087644d136dd
exec-verify-gpu-availability-2:
container:
args:
Expand All @@ -80,18 +80,18 @@ deploymentSpec:
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef verify_gpu_availability(gpu_toleration: bool):\n import torch\n\
\n cuda_available = torch.cuda.is_available()\n device_count = torch.cuda.device_count()\n\
\ print(\"------------------------------\")\n print(\"GPU availability\"\
)\n print(\"------------------------------\")\n print(f\"cuda available:\
\ {cuda_available}\")\n print(f\"device count: {device_count}\")\n \
\ if gpu_toleration:\n assert torch.cuda.is_available()\n \
\ assert torch.cuda.device_count() > 0\n t = torch.tensor([5, 5,\
\ 5], dtype=torch.int64, device='cuda')\n else:\n assert not torch.cuda.is_available()\n\
\ assert torch.cuda.device_count() == 0\n t = torch.tensor([5,\
\ 5, 5], dtype=torch.int64)\n print(f\"tensor: {t}\")\n print(\"GPU\
\ availability test: PASS\")\n\n"
image: quay.io/modh/runtime-images@sha256:7d1b065f100666fe46f64a2e8aae888cb41a38b5482bb9b9343b14db05c2a14a
\ *\n\ndef verify_gpu_availability(gpu_toleration: bool):\n import torch\
\ # noqa: PLC0415\n\n cuda_available = torch.cuda.is_available()\n \
\ device_count = torch.cuda.device_count()\n print(\"------------------------------\"\
)\n print(\"GPU availability\")\n print(\"------------------------------\"\
)\n print(f\"cuda available: {cuda_available}\")\n print(f\"device\
\ count: {device_count}\")\n if gpu_toleration:\n assert torch.cuda.is_available()\n\
\ assert torch.cuda.device_count() > 0\n t = torch.tensor([5,\
\ 5, 5], dtype=torch.int64, device=\"cuda\")\n else:\n assert\
\ not torch.cuda.is_available()\n assert torch.cuda.device_count()\
\ == 0\n t = torch.tensor([5, 5, 5], dtype=torch.int64)\n print(f\"\
tensor: {t}\")\n print(\"GPU availability test: PASS\")\n\n"
image: quay.io/modh/runtime-images@sha256:e1f7ad986f694236a818796af290a99b4e7f73d44cd39ca45860087644d136dd
resources:
accelerator:
count: '1'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Test Tags Dashboard
${RESOURCES_DIRPATH}= tests/Resources/Files
${OVMS_RUNTIME_FILEPATH}= ${RESOURCES_DIRPATH}/ovms_servingruntime.yaml
${UPLOADED_OVMS_DISPLAYED_NAME}= ODS-CI Custom OpenVINO Model Server
${UPLOADED_OVMS_YAML_NAME}= ovms-ods-ci
${PRJ_TITLE}= CustomServingRuntimesProject
${PRJ_DESCRIPTION}= ODS-CI DS Project for testing of Custom Serving Runtimes
${MODEL_SERVER_NAME}= ODS-CI CustomServingRuntime Server
Expand All @@ -25,7 +26,7 @@ Verify RHODS Admins Can Import A Custom Serving Runtime Template By Uploading A
Open Dashboard Settings settings_page=Serving runtimes
Upload Serving Runtime Template runtime_filepath=${OVMS_RUNTIME_FILEPATH}
... serving_platform=multi runtime_protocol=gRPC
Serving Runtime Template Should Be Listed displayed_name=${UPLOADED_OVMS_DISPLAYED_NAME}
Serving Runtime Template Should Be Listed displayed_name=${UPLOADED_OVMS_YAML_NAME}
... serving_platform=multi

Verify RHODS Admins Can Delete A Custom Serving Runtime Template
Expand Down

0 comments on commit 32b1e42

Please sign in to comment.