From 3b64527947681348f68e478349e17551e9a14321 Mon Sep 17 00:00:00 2001 From: mattmahoneyrh Date: Wed, 27 Nov 2024 13:55:28 -0500 Subject: [PATCH 01/10] Add tag to disruptive tests so that they can be exclution from an Operator suite run --- ...__dsc_negative_dependant_operators_not_installed.robot | 8 ++++---- .../2003__smcp_already_created.robot | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ods_ci/tests/Tests/2001__disruptive_tests/2002__dsc_negative_dependant_operators_not_installed.robot b/ods_ci/tests/Tests/2001__disruptive_tests/2002__dsc_negative_dependant_operators_not_installed.robot index 4b20cdeaa..e1d2f5ea2 100644 --- a/ods_ci/tests/Tests/2001__disruptive_tests/2002__dsc_negative_dependant_operators_not_installed.robot +++ b/ods_ci/tests/Tests/2001__disruptive_tests/2002__dsc_negative_dependant_operators_not_installed.robot @@ -28,7 +28,7 @@ ${IS_NOT_PRESENT} 1 Validate DSC and DSCI Created With Errors When Service Mesh Operator Is Not Installed #robocop:disable [Documentation] The purpose of this Test Case is to validate that DSC and DSCI are created ... without Service Mesh Operator installed, but with errors - [Tags] Operator Tier3 ODS-2584 RHOAIENG-2514 + [Tags] Operator Tier3 ODS-2584 RHOAIENG-2514 OperatorExclude Remove DSC And DSCI Resources Uninstall Service Mesh Operator CLI @@ -50,7 +50,7 @@ Validate DSC and DSCI Created With Errors When Service Mesh Operator Is Not Inst Validate DSC and DSCI Created With Errors When Serverless Operator Is Not Installed #robocop:disable [Documentation] The purpose of this Test Case is to validate that DSC and DSCI are created ... without Serverless Operator installed, but with errors - [Tags] Operator Tier3 ODS-2586 RHOAIENG-2512 + [Tags] Operator Tier3 ODS-2586 RHOAIENG-2512 OperatorExclude Remove DSC And DSCI Resources Uninstall Serverless Operator CLI @@ -70,7 +70,7 @@ Validate DSC and DSCI Created With Errors When Serverless Operator Is Not Instal Validate DSC and DSCI Created With Errors When Service Mesh And Serverless Operators Are Not Installed #robocop:disable [Documentation] The purpose of this Test Case is to validate that DSC and DSCI are created ... without dependant operators ((servicemesh, serverless) installed, but with errors - [Tags] Operator Tier3 ODS-2527 RHOAIENG-2518 + [Tags] Operator Tier3 ODS-2527 RHOAIENG-2518 OperatorExclude Remove DSC And DSCI Resources Uninstall Service Mesh Operator CLI @@ -96,7 +96,7 @@ Validate DSC and DSCI Created With No Errors When Kserve Serving Is Unmanaged An [Documentation] The purpose of this Test Case is to validate that DSC and DSCI are created ... without dependant operators ((servicemesh, serverless) installed and with no errors ... because the Kserve component serving is unmanaged - [Tags] Operator Tier3 RHOAIENG-3472 + [Tags] Operator Tier3 RHOAIENG-3472 OperatorExclude Remove DSC And DSCI Resources Uninstall Service Mesh Operator CLI diff --git a/ods_ci/tests/Tests/2001__disruptive_tests/2003__smcp_already_created.robot b/ods_ci/tests/Tests/2001__disruptive_tests/2003__smcp_already_created.robot index aea001494..0578dbb95 100644 --- a/ods_ci/tests/Tests/2001__disruptive_tests/2003__smcp_already_created.robot +++ b/ods_ci/tests/Tests/2001__disruptive_tests/2003__smcp_already_created.robot @@ -29,7 +29,7 @@ ${MSG_REGEX} denied the request: only one service *** Test Cases *** Validate Service Mesh Control Plane Already Created [Documentation] This Test Case validates that only one ServiceMeshControlPlane is allowed to be installed per project/namespace - [Tags] RHOAIENG-2517 Operator + [Tags] RHOAIENG-2517 Operator OperatorExclude Fetch Image Url And Update Channel Check Whether DSC Exists And Save Component Statuses Fetch Cluster Type By Domain From 46caaf18109a440604cab2f095e6b44c79bdc6a8 Mon Sep 17 00:00:00 2001 From: Karel Suta Date: Fri, 29 Nov 2024 14:03:56 +0100 Subject: [PATCH 02/10] Add image references to the Distributed Workload image digests --- .../DistributedWorkloads.resource | 19 +++++++++++++++++-- .../0201__pre_upgrade.robot | 2 +- .../0203__post_upgrade.robot | 2 +- .../test-run-codeflare-sdk-e2e-tests.robot | 10 +++++----- .../0602__training/test-run-kuberay-e2e.robot | 3 +-- ...test-run-distributed-workloads-tests.robot | 7 ------- ...-run-distributed-workloads-tests_3.9.robot | 3 --- 7 files changed, 25 insertions(+), 21 deletions(-) diff --git a/ods_ci/tests/Resources/Page/DistributedWorkloads/DistributedWorkloads.resource b/ods_ci/tests/Resources/Page/DistributedWorkloads/DistributedWorkloads.resource index fb70e7a25..9b95ddd13 100644 --- a/ods_ci/tests/Resources/Page/DistributedWorkloads/DistributedWorkloads.resource +++ b/ods_ci/tests/Resources/Page/DistributedWorkloads/DistributedWorkloads.resource @@ -10,11 +10,26 @@ ${CODEFLARE-SDK-RELEASE-TAG-3.9} adjustments-release-0.21.1 ${CODEFLARE-SDK_DIR} codeflare-sdk ${CODEFLARE-SDK_REPO_URL} %{CODEFLARE-SDK_REPO_URL=https://github.com/project-codeflare/codeflare-sdk.git} ${DISTRIBUTED_WORKLOADS_RELEASE_ASSETS} https://github.com/opendatahub-io/distributed-workloads/releases/latest/download -${RAY_IMAGE_3.11} quay.io/modh/ray@sha256:db667df1bc437a7b0965e8031e905d3ab04b86390d764d120e05ea5a5c18d1b4 -${RAY_IMAGE_3.9} quay.io/modh/ray@sha256:0d715f92570a2997381b7cafc0e224cfa25323f18b9545acfd23bc2b71576d06 +# Corresponds to quay.io/modh/ray:2.35.0-py311-cu121 +${RAY_CUDA_IMAGE_3.11} quay.io/modh/ray@sha256:db667df1bc437a7b0965e8031e905d3ab04b86390d764d120e05ea5a5c18d1b4 +# Corresponds to quay.io/rhoai/ray:2.35.0-py311-cu121-torch24-fa26 +${RAY_TORCH_CUDA_IMAGE_3.11} quay.io/rhoai/ray@sha256:5077f9bb230dfa88f34089fecdfcdaa8abc6964716a8a8325c7f9dcdf11bbbb3 +# Corresponds to quay.io/modh/ray:2.35.0-py311-rocm61 +${RAY_ROCM_IMAGE_3.11} quay.io/modh/ray@sha256:f8b4f2b1c954187753c1f5254f7bb6a4286cec5a4f1b43def7ef4e009f2d28cb +# Corresponds to quay.io/modh/ray:2.35.0-py39-cu121 +${RAY_CUDA_IMAGE_3.9} quay.io/modh/ray@sha256:0d715f92570a2997381b7cafc0e224cfa25323f18b9545acfd23bc2b71576d06 +# Corresponds to quay.io/rhoai/ray:2.35.0-py39-cu121-torch24-fa26 +${RAY_TORCH_CUDA_IMAGE_3.9} quay.io/rhoai/ray@sha256:158b481b8e9110008d60ac9fb8d156eadd71cb057ac30382e62e3a231ceb39c0 +# Corresponds to quay.io/modh/fms-hf-tuning:v2.1.2 ${FMS_HF_TUNING_IMAGE} quay.io/modh/fms-hf-tuning@sha256:6f98907f9095db72932caa54094438eae742145f4b66c28d15887d5303ff1186 +# Corresponds to quay.io/modh/training:py311-cuda121-torch241 ${CUDA_TRAINING_IMAGE} quay.io/modh/training@sha256:b98e373a972ff6f896a9dc054d56920e915675339c02ea7fa123e0f4bbef4d74 +# Corresponds to quay.io/modh/training:py311-rocm61-torch241 ${ROCM_TRAINING_IMAGE} quay.io/modh/training@sha256:2efb6efba4ec08e63847d701e3062a5f6ddf51c91af5fbcef6378b9e6520a3bb +# Corresponds to quay.io/modh/odh-generic-data-science-notebook:v3-2024b-20241111 +${NOTEBOOK_IMAGE_3.11} quay.io/modh/odh-generic-data-science-notebook@sha256:7c1a4ca213b71d342a2d1366171304e469da06d5f15710fab5dd3ce013aa1b73 +# Corresponds to quay.io/modh/odh-generic-data-science-notebook:v2-2024a-20241108 +${NOTEBOOK_IMAGE_3.9} quay.io/modh/odh-generic-data-science-notebook@sha256:b1066204611b4bcfa6172c3115650a8e8393089d5606458fa0d8c53633d2ce17 ${NOTEBOOK_USER_NAME} ${TEST_USER_3.USERNAME} ${NOTEBOOK_USER_PASSWORD} ${TEST_USER_3.PASSWORD} ${KFTO_CORE_BINARY_NAME} kfto diff --git a/ods_ci/tests/Tests/0200__rhoai_upgrade/0201__pre_upgrade.robot b/ods_ci/tests/Tests/0200__rhoai_upgrade/0201__pre_upgrade.robot index 1e79a9943..a409ba2c1 100644 --- a/ods_ci/tests/Tests/0200__rhoai_upgrade/0201__pre_upgrade.robot +++ b/ods_ci/tests/Tests/0200__rhoai_upgrade/0201__pre_upgrade.robot @@ -132,7 +132,7 @@ Verify Distributed Workload Metrics Resources By Creating Ray Cluster Workload [Setup] Prepare Codeflare-SDK Test Setup ${PRJ_UPGRADE} Set Variable test-ns-rayupgrade ${JOB_NAME} Set Variable mnist - Run Codeflare-SDK Test upgrade raycluster_sdk_upgrade_test.py::TestMNISTRayClusterUp 3.11 ${RAY_IMAGE_3.11} ${CODEFLARE-SDK-RELEASE-TAG} + Run Codeflare-SDK Test upgrade raycluster_sdk_upgrade_test.py::TestMNISTRayClusterUp 3.11 ${RAY_CUDA_IMAGE_3.11} ${CODEFLARE-SDK-RELEASE-TAG} Set Library Search Order SeleniumLibrary RHOSi Setup Launch Dashboard ${TEST_USER.USERNAME} ${TEST_USER.PASSWORD} ${TEST_USER.AUTH_TYPE} diff --git a/ods_ci/tests/Tests/0200__rhoai_upgrade/0203__post_upgrade.robot b/ods_ci/tests/Tests/0200__rhoai_upgrade/0203__post_upgrade.robot index 4726771cc..5682444b6 100644 --- a/ods_ci/tests/Tests/0200__rhoai_upgrade/0203__post_upgrade.robot +++ b/ods_ci/tests/Tests/0200__rhoai_upgrade/0203__post_upgrade.robot @@ -162,7 +162,7 @@ Verify Ray Cluster Exists And Monitor Workload Metrics By Submitting Ray Job Aft ${PRJ_UPGRADE} Set Variable test-ns-rayupgrade ${LOCAL_QUEUE} Set Variable local-queue-mnist ${JOB_NAME} Set Variable mnist - Run Codeflare-SDK Test upgrade raycluster_sdk_upgrade_test.py::TestMnistJobSubmit 3.11 ${RAY_IMAGE_3.11} ${CODEFLARE-SDK-RELEASE-TAG} + Run Codeflare-SDK Test upgrade raycluster_sdk_upgrade_test.py::TestMnistJobSubmit 3.11 ${RAY_CUDA_IMAGE_3.11} ${CODEFLARE-SDK-RELEASE-TAG} Set Global Variable ${DW_PROJECT_CREATED} True Set Library Search Order SeleniumLibrary RHOSi Setup diff --git a/ods_ci/tests/Tests/0600__distributed_workloads/0601__workloads_orchestration/test-run-codeflare-sdk-e2e-tests.robot b/ods_ci/tests/Tests/0600__distributed_workloads/0601__workloads_orchestration/test-run-codeflare-sdk-e2e-tests.robot index 3c4653f35..a0165fbb5 100644 --- a/ods_ci/tests/Tests/0600__distributed_workloads/0601__workloads_orchestration/test-run-codeflare-sdk-e2e-tests.robot +++ b/ods_ci/tests/Tests/0600__distributed_workloads/0601__workloads_orchestration/test-run-codeflare-sdk-e2e-tests.robot @@ -17,7 +17,7 @@ Run TestRayClusterSDKOauth test with Python 3.9 ... DistributedWorkloads ... WorkloadsOrchestration ... Codeflare-sdk - Run Codeflare-SDK Test e2e mnist_raycluster_sdk_oauth_test.py 3.9 ${RAY_IMAGE_3.9} ${CODEFLARE-SDK-RELEASE-TAG-3.9} + Run Codeflare-SDK Test e2e mnist_raycluster_sdk_oauth_test.py 3.9 ${RAY_CUDA_IMAGE_3.9} ${CODEFLARE-SDK-RELEASE-TAG-3.9} Run TestRayClusterSDKOauth test with Python 3.11 [Documentation] Run Python E2E test: TestRayClusterSDKOauth @@ -26,7 +26,7 @@ Run TestRayClusterSDKOauth test with Python 3.11 ... DistributedWorkloads ... WorkloadsOrchestration ... Codeflare-sdk - Run Codeflare-SDK Test e2e mnist_raycluster_sdk_oauth_test.py 3.11 ${RAY_IMAGE_3.11} ${CODEFLARE-SDK-RELEASE-TAG} + Run Codeflare-SDK Test e2e mnist_raycluster_sdk_oauth_test.py 3.11 ${RAY_CUDA_IMAGE_3.11} ${CODEFLARE-SDK-RELEASE-TAG} Run TestRayLocalInteractiveOauth test with Python 3.9 [Documentation] Run Python E2E test: TestRayLocalInteractiveOauth @@ -35,7 +35,7 @@ Run TestRayLocalInteractiveOauth test with Python 3.9 ... DistributedWorkloads ... WorkloadsOrchestration ... Codeflare-sdk - Run Codeflare-SDK Test e2e local_interactive_sdk_oauth_test.py 3.9 ${RAY_IMAGE_3.9} ${CODEFLARE-SDK-RELEASE-TAG-3.9} + Run Codeflare-SDK Test e2e local_interactive_sdk_oauth_test.py 3.9 ${RAY_CUDA_IMAGE_3.9} ${CODEFLARE-SDK-RELEASE-TAG-3.9} Run TestRayLocalInteractiveOauth test with Python 3.11 [Documentation] Run Python E2E test: TestRayLocalInteractiveOauth @@ -44,7 +44,7 @@ Run TestRayLocalInteractiveOauth test with Python 3.11 ... DistributedWorkloads ... WorkloadsOrchestration ... Codeflare-sdk - Run Codeflare-SDK Test e2e local_interactive_sdk_oauth_test.py 3.11 ${RAY_IMAGE_3.11} ${CODEFLARE-SDK-RELEASE-TAG} + Run Codeflare-SDK Test e2e local_interactive_sdk_oauth_test.py 3.11 ${RAY_CUDA_IMAGE_3.11} ${CODEFLARE-SDK-RELEASE-TAG} Run TestHeterogenousClustersOauth [Documentation] Run Python E2E test: TestHeterogenousClustersOauth (workaround for 2.15) @@ -54,7 +54,7 @@ Run TestHeterogenousClustersOauth ... WorkloadsOrchestration ... HeterogeneousCluster ... Codeflare-sdk - Run Codeflare-SDK Test e2e heterogeneous_clusters_oauth_test.py 3.11 ${RAY_IMAGE_3.11} ${CODEFLARE-SDK-RELEASE-TAG} + Run Codeflare-SDK Test e2e heterogeneous_clusters_oauth_test.py 3.11 ${RAY_CUDA_IMAGE_3.11} ${CODEFLARE-SDK-RELEASE-TAG} *** Keywords *** Prepare Codeflare-sdk E2E Test Suite diff --git a/ods_ci/tests/Tests/0600__distributed_workloads/0602__training/test-run-kuberay-e2e.robot b/ods_ci/tests/Tests/0600__distributed_workloads/0602__training/test-run-kuberay-e2e.robot index 427cf936d..20fe8ddcc 100644 --- a/ods_ci/tests/Tests/0600__distributed_workloads/0602__training/test-run-kuberay-e2e.robot +++ b/ods_ci/tests/Tests/0600__distributed_workloads/0602__training/test-run-kuberay-e2e.robot @@ -9,7 +9,6 @@ Resource ../../../Resources/Page/DistributedWorkloads/DistributedWorklo *** Variables *** ${KUBERAY_RELEASE_ASSETS} %{KUBERAY_RELEASE_ASSETS=https://github.com/opendatahub-io/kuberay/releases/latest/download} -${KUBERAY_TEST_RAY_IMAGE} quay.io/modh/ray@sha256:db667df1bc437a7b0965e8031e905d3ab04b86390d764d120e05ea5a5c18d1b4 *** Test Cases *** Run TestRayJob test @@ -79,7 +78,7 @@ Run Kuberay E2E Test ... env:KUBERAY_TEST_TIMEOUT_SHORT=2m ... env:KUBERAY_TEST_TIMEOUT_MEDIUM=10m ... env:KUBERAY_TEST_TIMEOUT_LONG=12m - ... env:KUBERAY_TEST_RAY_IMAGE=${KUBERAY_TEST_RAY_IMAGE} + ... env:KUBERAY_TEST_RAY_IMAGE=${RAY_CUDA_IMAGE_3.11} ... env:KUBERAY_TEST_OUTPUT_DIR=%{WORKSPACE}/kuberay-logs ... shell=true ... stderr=STDOUT diff --git a/ods_ci/tests/Tests/0600__distributed_workloads/test-run-distributed-workloads-tests.robot b/ods_ci/tests/Tests/0600__distributed_workloads/test-run-distributed-workloads-tests.robot index 8bd9f0a0d..e15cf49fe 100644 --- a/ods_ci/tests/Tests/0600__distributed_workloads/test-run-distributed-workloads-tests.robot +++ b/ods_ci/tests/Tests/0600__distributed_workloads/test-run-distributed-workloads-tests.robot @@ -9,13 +9,6 @@ Resource ../../Resources/Page/DistributedWorkloads/DistributedWorkloads Test Tags DistributedWorkloads3.11 -*** Variables *** -${RAY_CUDA_IMAGE_3.11} quay.io/modh/ray@sha256:db667df1bc437a7b0965e8031e905d3ab04b86390d764d120e05ea5a5c18d1b4 -${RAY_TORCH_CUDA_IMAGE_3.11} quay.io/rhoai/ray@sha256:5077f9bb230dfa88f34089fecdfcdaa8abc6964716a8a8325c7f9dcdf11bbbb3 -${RAY_ROCM_IMAGE_3.11} quay.io/modh/ray@sha256:f8b4f2b1c954187753c1f5254f7bb6a4286cec5a4f1b43def7ef4e009f2d28cb -${NOTEBOOK_IMAGE_3.11} quay.io/modh/odh-generic-data-science-notebook@sha256:7c1a4ca213b71d342a2d1366171304e469da06d5f15710fab5dd3ce013aa1b73 - - *** Test Cases *** Run TestKueueRayCpu ODH test with Python 3.11 [Documentation] Run Go ODH test: TestKueueRayCpu diff --git a/ods_ci/tests/Tests/0600__distributed_workloads/test-run-distributed-workloads-tests_3.9.robot b/ods_ci/tests/Tests/0600__distributed_workloads/test-run-distributed-workloads-tests_3.9.robot index 4920c98d3..510ccb207 100644 --- a/ods_ci/tests/Tests/0600__distributed_workloads/test-run-distributed-workloads-tests_3.9.robot +++ b/ods_ci/tests/Tests/0600__distributed_workloads/test-run-distributed-workloads-tests_3.9.robot @@ -12,9 +12,6 @@ Test Tags DistributedWorkloads3.9 *** Variables *** # This is the last and latest distributed workloads release assest which contains test binaries compatible with python 3.9 ${DISTRIBUTED_WORKLOADS_RELEASE_ASSETS_3.9} https://github.com/opendatahub-io/distributed-workloads/releases/download/v2.14.0-09-24-2024_adjustments_1 -${RAY_CUDA_IMAGE_3.9} quay.io/modh/ray@sha256:0d715f92570a2997381b7cafc0e224cfa25323f18b9545acfd23bc2b71576d06 -${RAY_TORCH_CUDA_IMAGE_3.9} quay.io/rhoai/ray@sha256:158b481b8e9110008d60ac9fb8d156eadd71cb057ac30382e62e3a231ceb39c0 -${NOTEBOOK_IMAGE_3.9} quay.io/modh/odh-generic-data-science-notebook@sha256:b1066204611b4bcfa6172c3115650a8e8393089d5606458fa0d8c53633d2ce17 *** Test Cases *** From c5dda0c24270d7a9bcd540bf9496a333101758d9 Mon Sep 17 00:00:00 2001 From: Matthew Mahoney Date: Mon, 2 Dec 2024 10:00:08 -0500 Subject: [PATCH 03/10] Update 2002__dsc_negative_dependant_operators_not_installed.robot Add Tier3 tag --- ...__dsc_negative_dependant_operators_not_installed.robot | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ods_ci/tests/Tests/2001__disruptive_tests/2002__dsc_negative_dependant_operators_not_installed.robot b/ods_ci/tests/Tests/2001__disruptive_tests/2002__dsc_negative_dependant_operators_not_installed.robot index e1d2f5ea2..02b67ea72 100644 --- a/ods_ci/tests/Tests/2001__disruptive_tests/2002__dsc_negative_dependant_operators_not_installed.robot +++ b/ods_ci/tests/Tests/2001__disruptive_tests/2002__dsc_negative_dependant_operators_not_installed.robot @@ -28,7 +28,7 @@ ${IS_NOT_PRESENT} 1 Validate DSC and DSCI Created With Errors When Service Mesh Operator Is Not Installed #robocop:disable [Documentation] The purpose of this Test Case is to validate that DSC and DSCI are created ... without Service Mesh Operator installed, but with errors - [Tags] Operator Tier3 ODS-2584 RHOAIENG-2514 OperatorExclude + [Tags] Operator Tier3 ODS-2584 RHOAIENG-2514 Tier3 Remove DSC And DSCI Resources Uninstall Service Mesh Operator CLI @@ -50,7 +50,7 @@ Validate DSC and DSCI Created With Errors When Service Mesh Operator Is Not Inst Validate DSC and DSCI Created With Errors When Serverless Operator Is Not Installed #robocop:disable [Documentation] The purpose of this Test Case is to validate that DSC and DSCI are created ... without Serverless Operator installed, but with errors - [Tags] Operator Tier3 ODS-2586 RHOAIENG-2512 OperatorExclude + [Tags] Operator Tier3 ODS-2586 RHOAIENG-2512 Tier3 Remove DSC And DSCI Resources Uninstall Serverless Operator CLI @@ -70,7 +70,7 @@ Validate DSC and DSCI Created With Errors When Serverless Operator Is Not Instal Validate DSC and DSCI Created With Errors When Service Mesh And Serverless Operators Are Not Installed #robocop:disable [Documentation] The purpose of this Test Case is to validate that DSC and DSCI are created ... without dependant operators ((servicemesh, serverless) installed, but with errors - [Tags] Operator Tier3 ODS-2527 RHOAIENG-2518 OperatorExclude + [Tags] Operator Tier3 ODS-2527 RHOAIENG-2518 Tier3 Remove DSC And DSCI Resources Uninstall Service Mesh Operator CLI @@ -96,7 +96,7 @@ Validate DSC and DSCI Created With No Errors When Kserve Serving Is Unmanaged An [Documentation] The purpose of this Test Case is to validate that DSC and DSCI are created ... without dependant operators ((servicemesh, serverless) installed and with no errors ... because the Kserve component serving is unmanaged - [Tags] Operator Tier3 RHOAIENG-3472 OperatorExclude + [Tags] Operator Tier3 RHOAIENG-3472 Tier3 Remove DSC And DSCI Resources Uninstall Service Mesh Operator CLI From 90dfcfb1199a532cc417e01a2ee8aaff592baed8 Mon Sep 17 00:00:00 2001 From: Matthew Mahoney Date: Mon, 2 Dec 2024 10:01:05 -0500 Subject: [PATCH 04/10] Update 2003__smcp_already_created.robot Add Tier3 tag. --- .../2001__disruptive_tests/2003__smcp_already_created.robot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ods_ci/tests/Tests/2001__disruptive_tests/2003__smcp_already_created.robot b/ods_ci/tests/Tests/2001__disruptive_tests/2003__smcp_already_created.robot index 0fc1ee713..40f58882d 100644 --- a/ods_ci/tests/Tests/2001__disruptive_tests/2003__smcp_already_created.robot +++ b/ods_ci/tests/Tests/2001__disruptive_tests/2003__smcp_already_created.robot @@ -29,7 +29,7 @@ ${MSG_REGEX} denied the request: only one service *** Test Cases *** Validate Service Mesh Control Plane Already Created [Documentation] This Test Case validates that only one ServiceMeshControlPlane is allowed to be installed per project/namespace - [Tags] RHOAIENG-2517 Operator OperatorExclude + [Tags] RHOAIENG-2517 Operator Tier3 Fetch Image Url And Update Channel Check Whether DSC Exists And Save Component Statuses IF "${CLUSTER_TYPE}" == "selfmanaged" From c50c71a0458e0dfcd3387b276c32174af77c3caa Mon Sep 17 00:00:00 2001 From: Matthew Mahoney Date: Mon, 2 Dec 2024 10:09:08 -0500 Subject: [PATCH 05/10] Update 2002__dsc_negative_dependant_operators_not_installed.robot Tier3 tag will be used for suite run exclusion. --- ...02__dsc_negative_dependant_operators_not_installed.robot | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ods_ci/tests/Tests/2001__disruptive_tests/2002__dsc_negative_dependant_operators_not_installed.robot b/ods_ci/tests/Tests/2001__disruptive_tests/2002__dsc_negative_dependant_operators_not_installed.robot index 02b67ea72..4400188f4 100644 --- a/ods_ci/tests/Tests/2001__disruptive_tests/2002__dsc_negative_dependant_operators_not_installed.robot +++ b/ods_ci/tests/Tests/2001__disruptive_tests/2002__dsc_negative_dependant_operators_not_installed.robot @@ -28,7 +28,7 @@ ${IS_NOT_PRESENT} 1 Validate DSC and DSCI Created With Errors When Service Mesh Operator Is Not Installed #robocop:disable [Documentation] The purpose of this Test Case is to validate that DSC and DSCI are created ... without Service Mesh Operator installed, but with errors - [Tags] Operator Tier3 ODS-2584 RHOAIENG-2514 Tier3 + [Tags] Operator Tier3 ODS-2584 RHOAIENG-2514 Remove DSC And DSCI Resources Uninstall Service Mesh Operator CLI @@ -70,7 +70,7 @@ Validate DSC and DSCI Created With Errors When Serverless Operator Is Not Instal Validate DSC and DSCI Created With Errors When Service Mesh And Serverless Operators Are Not Installed #robocop:disable [Documentation] The purpose of this Test Case is to validate that DSC and DSCI are created ... without dependant operators ((servicemesh, serverless) installed, but with errors - [Tags] Operator Tier3 ODS-2527 RHOAIENG-2518 Tier3 + [Tags] Operator Tier3 ODS-2527 RHOAIENG-2518 Remove DSC And DSCI Resources Uninstall Service Mesh Operator CLI @@ -96,7 +96,7 @@ Validate DSC and DSCI Created With No Errors When Kserve Serving Is Unmanaged An [Documentation] The purpose of this Test Case is to validate that DSC and DSCI are created ... without dependant operators ((servicemesh, serverless) installed and with no errors ... because the Kserve component serving is unmanaged - [Tags] Operator Tier3 RHOAIENG-3472 Tier3 + [Tags] Operator Tier3 RHOAIENG-3472 Remove DSC And DSCI Resources Uninstall Service Mesh Operator CLI From 52d6c549d01d18548ca7fb2a6ea55a59d6466023 Mon Sep 17 00:00:00 2001 From: Matthew Mahoney Date: Mon, 2 Dec 2024 10:09:50 -0500 Subject: [PATCH 06/10] Update 2002__dsc_negative_dependant_operators_not_installed.robot --- .../2002__dsc_negative_dependant_operators_not_installed.robot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ods_ci/tests/Tests/2001__disruptive_tests/2002__dsc_negative_dependant_operators_not_installed.robot b/ods_ci/tests/Tests/2001__disruptive_tests/2002__dsc_negative_dependant_operators_not_installed.robot index 4400188f4..4b20cdeaa 100644 --- a/ods_ci/tests/Tests/2001__disruptive_tests/2002__dsc_negative_dependant_operators_not_installed.robot +++ b/ods_ci/tests/Tests/2001__disruptive_tests/2002__dsc_negative_dependant_operators_not_installed.robot @@ -50,7 +50,7 @@ Validate DSC and DSCI Created With Errors When Service Mesh Operator Is Not Inst Validate DSC and DSCI Created With Errors When Serverless Operator Is Not Installed #robocop:disable [Documentation] The purpose of this Test Case is to validate that DSC and DSCI are created ... without Serverless Operator installed, but with errors - [Tags] Operator Tier3 ODS-2586 RHOAIENG-2512 Tier3 + [Tags] Operator Tier3 ODS-2586 RHOAIENG-2512 Remove DSC And DSCI Resources Uninstall Serverless Operator CLI From 2d753dc68216c77be3f0929d1be9d3cc8bc29746 Mon Sep 17 00:00:00 2001 From: Tarun Kumar Date: Tue, 3 Dec 2024 11:36:45 +0530 Subject: [PATCH 07/10] update caikit runtime image and runtime validate param (#2106) Update runtime image and other paramter for runtime Signed-off-by: Tarun Kumar --- ...caikit_standalone_servingruntime_grpc.yaml | 2 +- ...caikit_standalone_servingruntime_http.yaml | 2 +- .../vllm_servingruntime_http.yaml | 2 +- .../1007__model_serving_llm_models.robot | 80 +++++++++---------- 4 files changed, 43 insertions(+), 43 deletions(-) diff --git a/ods_ci/tests/Resources/Files/llm/serving_runtimes/caikit_standalone_servingruntime_grpc.yaml b/ods_ci/tests/Resources/Files/llm/serving_runtimes/caikit_standalone_servingruntime_grpc.yaml index 72b7bf85b..2c551c042 100644 --- a/ods_ci/tests/Resources/Files/llm/serving_runtimes/caikit_standalone_servingruntime_grpc.yaml +++ b/ods_ci/tests/Resources/Files/llm/serving_runtimes/caikit_standalone_servingruntime_grpc.yaml @@ -10,7 +10,7 @@ spec: name: caikit containers: - name: kserve-container - image: quay.io/opendatahub/caikit-nlp:stable + image: quay.io/modh/caikit-nlp@sha256:3c33185fda84d7bac6715c8743c446a6713cdbc0cb0ed831acc0df89bd8bab6b command: ["python", "-m", "caikit.runtime"] env: - name: RUNTIME_LOCAL_MODELS_DIR diff --git a/ods_ci/tests/Resources/Files/llm/serving_runtimes/caikit_standalone_servingruntime_http.yaml b/ods_ci/tests/Resources/Files/llm/serving_runtimes/caikit_standalone_servingruntime_http.yaml index 5d661893c..8b50d3d17 100644 --- a/ods_ci/tests/Resources/Files/llm/serving_runtimes/caikit_standalone_servingruntime_http.yaml +++ b/ods_ci/tests/Resources/Files/llm/serving_runtimes/caikit_standalone_servingruntime_http.yaml @@ -10,7 +10,7 @@ spec: name: caikit containers: - name: kserve-container - image: quay.io/opendatahub/caikit-nlp:stable + image: quay.io/modh/caikit-nlp@sha256:3c33185fda84d7bac6715c8743c446a6713cdbc0cb0ed831acc0df89bd8bab6b command: ["python", "-m", "caikit.runtime"] env: - name: RUNTIME_LOCAL_MODELS_DIR diff --git a/ods_ci/tests/Resources/Files/llm/serving_runtimes/vllm_servingruntime_http.yaml b/ods_ci/tests/Resources/Files/llm/serving_runtimes/vllm_servingruntime_http.yaml index 4369e98fd..6ce3f691d 100644 --- a/ods_ci/tests/Resources/Files/llm/serving_runtimes/vllm_servingruntime_http.yaml +++ b/ods_ci/tests/Resources/Files/llm/serving_runtimes/vllm_servingruntime_http.yaml @@ -12,7 +12,7 @@ spec: - '--served-model-name={{.Name}}' - '--distributed-executor-backend=mp' - '--chat-template=/app/data/template/template_chatml.jinja' - image: quay.io/modh/vllm@sha256:c86ff1e89c86bc9821b75d7f2bbc170b3c13e3ccf538bf543b1110f23e056316 + image: ${runtime_image} name: kserve-container command: - python3 diff --git a/ods_ci/tests/Tests/1000__model_serving/1007__model_serving_llm/1007__model_serving_llm_models.robot b/ods_ci/tests/Tests/1000__model_serving/1007__model_serving_llm/1007__model_serving_llm_models.robot index 1ce4f832d..92e32598c 100644 --- a/ods_ci/tests/Tests/1000__model_serving/1007__model_serving_llm/1007__model_serving_llm_models.robot +++ b/ods_ci/tests/Tests/1000__model_serving/1007__model_serving_llm/1007__model_serving_llm_models.robot @@ -209,19 +209,19 @@ Verify User Can Serve And Query A elyza/elyza-japanese-llama-2-7b-instruct Model ... port_forwarding=${use_port_forwarding} Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} ... inference_type=tokenize n_times=1 query_idx=4 - ... namespace=${test_namespace} validate_response=${TRUE} string_check_only=${TRUE} + ... namespace=${test_namespace} validate_response=${FALSE} string_check_only=${FALSE} ... port_forwarding=${use_port_forwarding} Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} ... inference_type=model-info n_times=1 - ... namespace=${test_namespace} validate_response=${TRUE} string_check_only=${TRUE} + ... namespace=${test_namespace} validate_response=${FALSE} string_check_only=${FALSE} ... port_forwarding=${use_port_forwarding} ELSE IF "${RUNTIME_NAME}" == "vllm-runtime" and "${KSERVE_MODE}" == "Serverless" Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http ... inference_type=completions n_times=1 query_idx=10 - ... namespace=${test_namespace} string_check_only=${TRUE} + ... namespace=${test_namespace} string_check_only=${FALSE} Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http ... inference_type=chat-completions n_times=1 query_idx=9 - ... namespace=${test_namespace} string_check_only=${TRUE} + ... namespace=${test_namespace} string_check_only=${FALSE} END [Teardown] Run Keywords ... Clean Up Test Project test_ns=${test_namespace} @@ -400,7 +400,7 @@ Verify User Can Serve And Query A meta-llama/llama-2-13b-chat Model # robocop Set Test Variable ${RUNTIME_NAME} tgis-runtime Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} ... inference_type=all-tokens n_times=1 protocol=${PROTOCOL} - ... namespace=${test_namespace} query_idx=0 validate_response=${TRUE} # temp + ... namespace=${test_namespace} query_idx=0 validate_response=${FALSE} # temp ... port_forwarding=${use_port_forwarding} Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} ... inference_type=streaming n_times=1 protocol=${PROTOCOL} @@ -408,19 +408,19 @@ Verify User Can Serve And Query A meta-llama/llama-2-13b-chat Model # robocop ... port_forwarding=${use_port_forwarding} Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} ... inference_type=model-info n_times=0 - ... namespace=${test_namespace} validate_response=${TRUE} string_check_only=${TRUE} + ... namespace=${test_namespace} validate_response=${FALSE} string_check_only=${FALSE} ... port_forwarding=${use_port_forwarding} Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} ... inference_type=tokenize n_times=0 query_idx=0 - ... namespace=${test_namespace} validate_response=${TRUE} string_check_only=${TRUE} + ... namespace=${test_namespace} validate_response=${FALSE} string_check_only=${FALSE} ... port_forwarding=${use_port_forwarding} ELSE IF "${RUNTIME_NAME}" == "vllm-runtime" and "${KSERVE_MODE}" == "Serverless" Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http ... inference_type=chat-completions n_times=1 query_idx=12 - ... namespace=${test_namespace} string_check_only=${TRUE} + ... namespace=${test_namespace} string_check_only=${FALSE} Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http ... inference_type=completions n_times=1 query_idx=11 - ... namespace=${test_namespace} string_check_only=${TRUE} + ... namespace=${test_namespace} string_check_only=${FALSE} END [Teardown] Run Keywords ... Clean Up Test Project test_ns=${test_namespace} @@ -533,19 +533,19 @@ Verify User Can Serve And Query A instructlab/merlinite-7b-lab Model # roboco ... port_forwarding=${use_port_forwarding} Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} ... inference_type=model-info n_times=0 - ... namespace=${test_namespace} validate_response=${TRUE} string_check_only=${TRUE} + ... namespace=${test_namespace} validate_response=${FALSE} string_check_only=${FALSE} ... port_forwarding=${use_port_forwarding} Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} ... inference_type=tokenize n_times=0 query_idx=0 - ... namespace=${test_namespace} validate_response=${TRUE} string_check_only=${TRUE} + ... namespace=${test_namespace} validate_response=${FALSE} string_check_only=${FALSE} ... port_forwarding=${use_port_forwarding} ELSE IF "${RUNTIME_NAME}" == "vllm-runtime" and "${KSERVE_MODE}" == "Serverless" Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http ... inference_type=chat-completions n_times=1 query_idx=12 - ... namespace=${test_namespace} string_check_only=${TRUE} + ... namespace=${test_namespace} string_check_only=${FALSE} Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http ... inference_type=completions n_times=1 query_idx=11 - ... namespace=${test_namespace} string_check_only=${TRUE} + ... namespace=${test_namespace} string_check_only=${FALSE} END [Teardown] Run Keywords ... Clean Up Test Project test_ns=${test_namespace} @@ -596,19 +596,19 @@ Verify User Can Serve And Query A ibm-granite/granite-8b-code-base Model # ro ... port_forwarding=${use_port_forwarding} Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} ... inference_type=model-info n_times=0 - ... namespace=${test_namespace} validate_response=${TRUE} string_check_only=${TRUE} + ... namespace=${test_namespace} validate_response=${FALSE} string_check_only=${FALSE} ... port_forwarding=${use_port_forwarding} Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} ... inference_type=tokenize n_times=0 query_idx=0 - ... namespace=${test_namespace} validate_response=${TRUE} string_check_only=${TRUE} + ... namespace=${test_namespace} validate_response=${FALSE} string_check_only=${FALSE} ... port_forwarding=${use_port_forwarding} ELSE IF "${RUNTIME_NAME}" == "vllm-runtime" and "${KSERVE_MODE}" == "Serverless" Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http ... inference_type=chat-completions n_times=1 query_idx=12 - ... namespace=${test_namespace} string_check_only=${TRUE} + ... namespace=${test_namespace} string_check_only=${FALSE} Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http ... inference_type=completions n_times=1 query_idx=11 - ... namespace=${test_namespace} string_check_only=${TRUE} + ... namespace=${test_namespace} string_check_only=${FALSE} END [Teardown] Run Keywords ... Clean Up Test Project test_ns=${test_namespace} @@ -695,7 +695,7 @@ Verify User Can Serve And Query A meta-llama/llama-3-8B-Instruct Model # robo Set Test Variable ${RUNTIME_NAME} tgis-runtime Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} ... inference_type=all-tokens n_times=1 protocol=${PROTOCOL} - ... namespace=${test_namespace} query_idx=0 validate_response=${TRUE} # temp + ... namespace=${test_namespace} query_idx=0 validate_response=${FALSE} # temp ... port_forwarding=${use_port_forwarding} Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} ... inference_type=streaming n_times=1 protocol=${PROTOCOL} @@ -703,19 +703,19 @@ Verify User Can Serve And Query A meta-llama/llama-3-8B-Instruct Model # robo ... port_forwarding=${use_port_forwarding} Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} ... inference_type=model-info n_times=0 - ... namespace=${test_namespace} validate_response=${TRUE} string_check_only=${TRUE} + ... namespace=${test_namespace} validate_response=${FALSE} string_check_only=${FALSE} ... port_forwarding=${use_port_forwarding} Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} ... inference_type=tokenize n_times=0 query_idx=0 - ... namespace=${test_namespace} validate_response=${TRUE} string_check_only=${TRUE} + ... namespace=${test_namespace} validate_response=${FALSE} string_check_only=${FALSE} ... port_forwarding=${use_port_forwarding} ELSE IF "${RUNTIME_NAME}" == "vllm-runtime" and "${KSERVE_MODE}" == "Serverless" Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http ... inference_type=chat-completions n_times=1 query_idx=12 - ... namespace=${test_namespace} string_check_only=${TRUE} + ... namespace=${test_namespace} string_check_only=${FALSE} Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http ... inference_type=completions n_times=1 query_idx=11 - ... namespace=${test_namespace} string_check_only=${TRUE} + ... namespace=${test_namespace} string_check_only=${FALSE} END [Teardown] Run Keywords ... Clean Up Test Project test_ns=${test_namespace} @@ -766,19 +766,19 @@ Verify User Can Serve And Query A ibm-granite/granite-3b-code-instruct Model ... port_forwarding=${use_port_forwarding} Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} ... inference_type=model-info n_times=0 - ... namespace=${test_namespace} validate_response=${TRUE} string_check_only=${TRUE} + ... namespace=${test_namespace} validate_response=${FALSE} string_check_only=${FALSE} ... port_forwarding=${use_port_forwarding} Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} ... inference_type=tokenize n_times=0 query_idx=0 - ... namespace=${test_namespace} validate_response=${TRUE} string_check_only=${TRUE} + ... namespace=${test_namespace} validate_response=${FALSE} string_check_only=${FALSE} ... port_forwarding=${use_port_forwarding} ELSE IF "${RUNTIME_NAME}" == "vllm-runtime" and "${KSERVE_MODE}" == "Serverless" Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http ... inference_type=chat-completions n_times=1 query_idx=12 - ... namespace=${test_namespace} string_check_only=${TRUE} + ... namespace=${test_namespace} string_check_only=${FALSE} Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http ... inference_type=completions n_times=1 query_idx=11 - ... namespace=${test_namespace} string_check_only=${TRUE} + ... namespace=${test_namespace} string_check_only=${FALSE} END [Teardown] Run Keywords ... Clean Up Test Project test_ns=${test_namespace} @@ -829,19 +829,19 @@ Verify User Can Serve And Query A ibm-granite/granite-8b-code-instruct Model ... port_forwarding=${use_port_forwarding} Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} ... inference_type=model-info n_times=0 - ... namespace=${test_namespace} validate_response=${TRUE} string_check_only=${TRUE} + ... namespace=${test_namespace} validate_response=${FALSE} string_check_only=${FALSE} ... port_forwarding=${use_port_forwarding} Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} ... inference_type=tokenize n_times=0 query_idx=0 - ... namespace=${test_namespace} validate_response=${TRUE} string_check_only=${TRUE} + ... namespace=${test_namespace} validate_response=${FALSE} string_check_only=${FALSE} ... port_forwarding=${use_port_forwarding} ELSE IF "${RUNTIME_NAME}" == "vllm-runtime" and "${KSERVE_MODE}" == "Serverless" Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http ... inference_type=chat-completions n_times=1 query_idx=12 - ... namespace=${test_namespace} string_check_only=${TRUE} + ... namespace=${test_namespace} string_check_only=${FALSE} Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http ... inference_type=completions n_times=1 query_idx=11 - ... namespace=${test_namespace} string_check_only=${TRUE} + ... namespace=${test_namespace} string_check_only=${FALSE} END [Teardown] Run Keywords ... Clean Up Test Project test_ns=${test_namespace} @@ -892,19 +892,19 @@ Verify User Can Serve And Query A ibm-granite/granite-7b-lab Model # robocop: ... port_forwarding=${use_port_forwarding} Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} ... inference_type=model-info n_times=0 - ... namespace=${test_namespace} validate_response=${TRUE} string_check_only=${TRUE} + ... namespace=${test_namespace} validate_response=${FALSE} string_check_only=${FALSE} ... port_forwarding=${use_port_forwarding} Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} ... inference_type=tokenize n_times=0 query_idx=0 - ... namespace=${test_namespace} validate_response=${TRUE} string_check_only=${TRUE} + ... namespace=${test_namespace} validate_response=${FALSE} string_check_only=${FALSE} ... port_forwarding=${use_port_forwarding} ELSE IF "${RUNTIME_NAME}" == "vllm-runtime" and "${KSERVE_MODE}" == "Serverless" Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http ... inference_type=chat-completions n_times=1 query_idx=12 - ... namespace=${test_namespace} string_check_only=${TRUE} + ... namespace=${test_namespace} string_check_only=${FALSE} Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http ... inference_type=completions n_times=1 query_idx=11 - ... namespace=${test_namespace} string_check_only=${TRUE} + ... namespace=${test_namespace} string_check_only=${FALSE} END [Teardown] Run Keywords ... Clean Up Test Project test_ns=${test_namespace} @@ -958,11 +958,11 @@ Verify User Can Serve And Query A ibm-granite/granite-7b-lab ngram speculative M ... port_forwarding=${use_port_forwarding} Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} ... inference_type=model-info n_times=0 - ... namespace=${test_namespace} validate_response=${TRUE} string_check_only=${TRUE} + ... namespace=${test_namespace} validate_response=${FALSE} string_check_only=${FALSE} ... port_forwarding=${use_port_forwarding} Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} ... inference_type=tokenize n_times=0 query_idx=0 - ... namespace=${test_namespace} validate_response=${TRUE} string_check_only=${TRUE} + ... namespace=${test_namespace} validate_response=${FALSE} string_check_only=${FALSE} ... port_forwarding=${use_port_forwarding} ELSE IF "${RUNTIME_NAME}" == "vllm-runtime" and "${KSERVE_MODE}" == "Serverless" Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http @@ -1142,10 +1142,10 @@ Verify User Can Serve And Query RHAL AI granite-7b-starter Model # robocop: o ELSE IF "${RUNTIME_NAME}" == "vllm-runtime" and "${KSERVE_MODE}" == "Serverless" Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http ... inference_type=chat-completions n_times=1 query_idx=12 - ... namespace=${test_namespace} string_check_only=${TRUE} + ... namespace=${test_namespace} string_check_only=${FALSE} Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http ... inference_type=completions n_times=1 query_idx=11 - ... namespace=${test_namespace} string_check_only=${TRUE} + ... namespace=${test_namespace} string_check_only=${FALSE} END [Teardown] Run Keywords ... Clean Up Test Project test_ns=${test_namespace} @@ -1275,10 +1275,10 @@ Verify User Can Serve And Query RHAL AI Granite-7b-redhat-lab Model # robocop ELSE IF "${RUNTIME_NAME}" == "vllm-runtime" and "${KSERVE_MODE}" == "Serverless" Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http ... inference_type=chat-completions n_times=1 query_idx=12 - ... namespace=${test_namespace} string_check_only=${TRUE} + ... namespace=${test_namespace} string_check_only=${FALSE} Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http ... inference_type=completions n_times=1 query_idx=11 - ... namespace=${test_namespace} string_check_only=${TRUE} + ... namespace=${test_namespace} string_check_only=${FALSE} END [Teardown] Run Keywords ... Clean Up Test Project test_ns=${test_namespace} From 40d3509868f184b3198546141525f2ba0d35b449 Mon Sep 17 00:00:00 2001 From: RAGHUL M Date: Tue, 3 Dec 2024 14:39:57 +0530 Subject: [PATCH 08/10] Smoke Test failure - Name fix for Runtime template (#2103) * Name fix for Runtime template * Name fix for Runtime YAML template * uncommented teardown --------- Co-authored-by: Tarun Kumar --- .../1003__model_serving_customruntimes.robot | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ods_ci/tests/Tests/1000__model_serving/1003__model_serving_customruntimes.robot b/ods_ci/tests/Tests/1000__model_serving/1003__model_serving_customruntimes.robot index 7cd957f57..6bb59ee08 100644 --- a/ods_ci/tests/Tests/1000__model_serving/1003__model_serving_customruntimes.robot +++ b/ods_ci/tests/Tests/1000__model_serving/1003__model_serving_customruntimes.robot @@ -13,6 +13,7 @@ Test Tags Dashboard ${RESOURCES_DIRPATH}= tests/Resources/Files ${OVMS_RUNTIME_FILEPATH}= ${RESOURCES_DIRPATH}/ovms_servingruntime.yaml ${UPLOADED_OVMS_DISPLAYED_NAME}= ODS-CI Custom OpenVINO Model Server +${UPLOADED_OVMS_YAML_NAME}= ovms-ods-ci ${PRJ_TITLE}= CustomServingRuntimesProject ${PRJ_DESCRIPTION}= ODS-CI DS Project for testing of Custom Serving Runtimes ${MODEL_SERVER_NAME}= ODS-CI CustomServingRuntime Server @@ -25,7 +26,7 @@ Verify RHODS Admins Can Import A Custom Serving Runtime Template By Uploading A Open Dashboard Settings settings_page=Serving runtimes Upload Serving Runtime Template runtime_filepath=${OVMS_RUNTIME_FILEPATH} ... serving_platform=multi runtime_protocol=gRPC - Serving Runtime Template Should Be Listed displayed_name=${UPLOADED_OVMS_DISPLAYED_NAME} + Serving Runtime Template Should Be Listed displayed_name=${UPLOADED_OVMS_YAML_NAME} ... serving_platform=multi Verify RHODS Admins Can Delete A Custom Serving Runtime Template From 0e5a93e0d5d123f62d1b1a1cb08b020c9942266a Mon Sep 17 00:00:00 2001 From: Jorge Date: Tue, 3 Dec 2024 10:18:38 +0100 Subject: [PATCH 09/10] Update images used in nvidia and rocm pipeline testing for 2.16 (master) (#2086) Update images used in nvidia and rocm pipeline testing for 2.16 Use the workbench images availables in 2.16 RC2 Signed-off-by: Jorge Garcia Oncins --- .../pytorch/pytorch_amd_gpu_availability.py | 10 ++-- ...pytorch_amd_gpu_availability_compiled.yaml | 48 +++++++++---------- .../pytorch_nvidia_gpu_availability.py | 11 ++--- ...orch_nvidia_gpu_availability_compiled.yaml | 48 +++++++++---------- 4 files changed, 57 insertions(+), 60 deletions(-) diff --git a/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_amd_gpu_availability.py b/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_amd_gpu_availability.py index bd9b74b69..52c6d83d2 100644 --- a/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_amd_gpu_availability.py +++ b/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_amd_gpu_availability.py @@ -3,7 +3,7 @@ # Runtime: Pytorch with ROCm and Python 3.9 (UBI 9) common_base_image = ( - "quay.io/modh/runtime-images@sha256:a1cfb7bfcff3b2aae2b20b17da83b6683d632403f674a51af6efdfe809a6fc10" + "quay.io/modh/runtime-images@sha256:6340efaa92bc54bcede518e890492db626fb9fe96f028c2cd5251f286b2b2852" ) @@ -14,11 +14,9 @@ def add_gpu_toleration(task: PipelineTask, accelerator_type: str, accelerator_li kubernetes.add_toleration(task, key=accelerator_type, operator="Exists", effect="NoSchedule") -@dsl.component( - base_image=common_base_image -) +@dsl.component(base_image=common_base_image) def verify_gpu_availability(gpu_toleration: bool): - import torch + import torch # noqa: PLC0415 cuda_available = torch.cuda.is_available() device_count = torch.cuda.device_count() @@ -30,7 +28,7 @@ def verify_gpu_availability(gpu_toleration: bool): if gpu_toleration: assert torch.cuda.is_available() assert torch.cuda.device_count() > 0 - t = torch.tensor([5, 5, 5], dtype=torch.int64, device='cuda') + t = torch.tensor([5, 5, 5], dtype=torch.int64, device="cuda") else: assert not torch.cuda.is_available() assert torch.cuda.device_count() == 0 diff --git a/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_amd_gpu_availability_compiled.yaml b/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_amd_gpu_availability_compiled.yaml index 8652d23c5..d3f158ecd 100644 --- a/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_amd_gpu_availability_compiled.yaml +++ b/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_amd_gpu_availability_compiled.yaml @@ -42,18 +42,18 @@ deploymentSpec: ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef verify_gpu_availability(gpu_toleration: bool):\n import torch\n\ - \n cuda_available = torch.cuda.is_available()\n device_count = torch.cuda.device_count()\n\ - \ print(\"------------------------------\")\n print(\"GPU availability\"\ - )\n print(\"------------------------------\")\n print(f\"cuda available:\ - \ {cuda_available}\")\n print(f\"device count: {device_count}\")\n \ - \ if gpu_toleration:\n assert torch.cuda.is_available()\n \ - \ assert torch.cuda.device_count() > 0\n t = torch.tensor([5, 5,\ - \ 5], dtype=torch.int64, device='cuda')\n else:\n assert not torch.cuda.is_available()\n\ - \ assert torch.cuda.device_count() == 0\n t = torch.tensor([5,\ - \ 5, 5], dtype=torch.int64)\n print(f\"tensor: {t}\")\n print(\"GPU\ - \ availability test: PASS\")\n\n" - image: quay.io/modh/runtime-images@sha256:a1cfb7bfcff3b2aae2b20b17da83b6683d632403f674a51af6efdfe809a6fc10 + \ *\n\ndef verify_gpu_availability(gpu_toleration: bool):\n import torch\ + \ # noqa: PLC0415\n\n cuda_available = torch.cuda.is_available()\n \ + \ device_count = torch.cuda.device_count()\n print(\"------------------------------\"\ + )\n print(\"GPU availability\")\n print(\"------------------------------\"\ + )\n print(f\"cuda available: {cuda_available}\")\n print(f\"device\ + \ count: {device_count}\")\n if gpu_toleration:\n assert torch.cuda.is_available()\n\ + \ assert torch.cuda.device_count() > 0\n t = torch.tensor([5,\ + \ 5, 5], dtype=torch.int64, device=\"cuda\")\n else:\n assert\ + \ not torch.cuda.is_available()\n assert torch.cuda.device_count()\ + \ == 0\n t = torch.tensor([5, 5, 5], dtype=torch.int64)\n print(f\"\ + tensor: {t}\")\n print(\"GPU availability test: PASS\")\n\n" + image: quay.io/modh/runtime-images@sha256:6340efaa92bc54bcede518e890492db626fb9fe96f028c2cd5251f286b2b2852 exec-verify-gpu-availability-2: container: args: @@ -80,18 +80,18 @@ deploymentSpec: ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef verify_gpu_availability(gpu_toleration: bool):\n import torch\n\ - \n cuda_available = torch.cuda.is_available()\n device_count = torch.cuda.device_count()\n\ - \ print(\"------------------------------\")\n print(\"GPU availability\"\ - )\n print(\"------------------------------\")\n print(f\"cuda available:\ - \ {cuda_available}\")\n print(f\"device count: {device_count}\")\n \ - \ if gpu_toleration:\n assert torch.cuda.is_available()\n \ - \ assert torch.cuda.device_count() > 0\n t = torch.tensor([5, 5,\ - \ 5], dtype=torch.int64, device='cuda')\n else:\n assert not torch.cuda.is_available()\n\ - \ assert torch.cuda.device_count() == 0\n t = torch.tensor([5,\ - \ 5, 5], dtype=torch.int64)\n print(f\"tensor: {t}\")\n print(\"GPU\ - \ availability test: PASS\")\n\n" - image: quay.io/modh/runtime-images@sha256:a1cfb7bfcff3b2aae2b20b17da83b6683d632403f674a51af6efdfe809a6fc10 + \ *\n\ndef verify_gpu_availability(gpu_toleration: bool):\n import torch\ + \ # noqa: PLC0415\n\n cuda_available = torch.cuda.is_available()\n \ + \ device_count = torch.cuda.device_count()\n print(\"------------------------------\"\ + )\n print(\"GPU availability\")\n print(\"------------------------------\"\ + )\n print(f\"cuda available: {cuda_available}\")\n print(f\"device\ + \ count: {device_count}\")\n if gpu_toleration:\n assert torch.cuda.is_available()\n\ + \ assert torch.cuda.device_count() > 0\n t = torch.tensor([5,\ + \ 5, 5], dtype=torch.int64, device=\"cuda\")\n else:\n assert\ + \ not torch.cuda.is_available()\n assert torch.cuda.device_count()\ + \ == 0\n t = torch.tensor([5, 5, 5], dtype=torch.int64)\n print(f\"\ + tensor: {t}\")\n print(\"GPU availability test: PASS\")\n\n" + image: quay.io/modh/runtime-images@sha256:6340efaa92bc54bcede518e890492db626fb9fe96f028c2cd5251f286b2b2852 resources: accelerator: count: '1' diff --git a/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_nvidia_gpu_availability.py b/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_nvidia_gpu_availability.py index fa32cd9b0..d593a8c5c 100644 --- a/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_nvidia_gpu_availability.py +++ b/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_nvidia_gpu_availability.py @@ -2,8 +2,9 @@ from kfp.dsl import PipelineTask # Runtime: Pytorch with CUDA and Python 3.9 (UBI 9) +# Images for each release can be found here (in the branch for the release) common_base_image = ( - "quay.io/modh/runtime-images@sha256:7d1b065f100666fe46f64a2e8aae888cb41a38b5482bb9b9343b14db05c2a14a" + "quay.io/modh/runtime-images@sha256:e1f7ad986f694236a818796af290a99b4e7f73d44cd39ca45860087644d136dd" ) @@ -14,11 +15,9 @@ def add_gpu_toleration(task: PipelineTask, accelerator_type: str, accelerator_li kubernetes.add_toleration(task, key=accelerator_type, operator="Exists", effect="NoSchedule") -@dsl.component( - base_image=common_base_image -) +@dsl.component(base_image=common_base_image) def verify_gpu_availability(gpu_toleration: bool): - import torch + import torch # noqa: PLC0415 cuda_available = torch.cuda.is_available() device_count = torch.cuda.device_count() @@ -30,7 +29,7 @@ def verify_gpu_availability(gpu_toleration: bool): if gpu_toleration: assert torch.cuda.is_available() assert torch.cuda.device_count() > 0 - t = torch.tensor([5, 5, 5], dtype=torch.int64, device='cuda') + t = torch.tensor([5, 5, 5], dtype=torch.int64, device="cuda") else: assert not torch.cuda.is_available() assert torch.cuda.device_count() == 0 diff --git a/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_nvidia_gpu_availability_compiled.yaml b/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_nvidia_gpu_availability_compiled.yaml index d66218962..95cbebf16 100644 --- a/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_nvidia_gpu_availability_compiled.yaml +++ b/ods_ci/tests/Resources/Files/pipeline-samples/v2/cache-disabled/gpu/pytorch/pytorch_nvidia_gpu_availability_compiled.yaml @@ -42,18 +42,18 @@ deploymentSpec: ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef verify_gpu_availability(gpu_toleration: bool):\n import torch\n\ - \n cuda_available = torch.cuda.is_available()\n device_count = torch.cuda.device_count()\n\ - \ print(\"------------------------------\")\n print(\"GPU availability\"\ - )\n print(\"------------------------------\")\n print(f\"cuda available:\ - \ {cuda_available}\")\n print(f\"device count: {device_count}\")\n \ - \ if gpu_toleration:\n assert torch.cuda.is_available()\n \ - \ assert torch.cuda.device_count() > 0\n t = torch.tensor([5, 5,\ - \ 5], dtype=torch.int64, device='cuda')\n else:\n assert not torch.cuda.is_available()\n\ - \ assert torch.cuda.device_count() == 0\n t = torch.tensor([5,\ - \ 5, 5], dtype=torch.int64)\n print(f\"tensor: {t}\")\n print(\"GPU\ - \ availability test: PASS\")\n\n" - image: quay.io/modh/runtime-images@sha256:7d1b065f100666fe46f64a2e8aae888cb41a38b5482bb9b9343b14db05c2a14a + \ *\n\ndef verify_gpu_availability(gpu_toleration: bool):\n import torch\ + \ # noqa: PLC0415\n\n cuda_available = torch.cuda.is_available()\n \ + \ device_count = torch.cuda.device_count()\n print(\"------------------------------\"\ + )\n print(\"GPU availability\")\n print(\"------------------------------\"\ + )\n print(f\"cuda available: {cuda_available}\")\n print(f\"device\ + \ count: {device_count}\")\n if gpu_toleration:\n assert torch.cuda.is_available()\n\ + \ assert torch.cuda.device_count() > 0\n t = torch.tensor([5,\ + \ 5, 5], dtype=torch.int64, device=\"cuda\")\n else:\n assert\ + \ not torch.cuda.is_available()\n assert torch.cuda.device_count()\ + \ == 0\n t = torch.tensor([5, 5, 5], dtype=torch.int64)\n print(f\"\ + tensor: {t}\")\n print(\"GPU availability test: PASS\")\n\n" + image: quay.io/modh/runtime-images@sha256:e1f7ad986f694236a818796af290a99b4e7f73d44cd39ca45860087644d136dd exec-verify-gpu-availability-2: container: args: @@ -80,18 +80,18 @@ deploymentSpec: ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef verify_gpu_availability(gpu_toleration: bool):\n import torch\n\ - \n cuda_available = torch.cuda.is_available()\n device_count = torch.cuda.device_count()\n\ - \ print(\"------------------------------\")\n print(\"GPU availability\"\ - )\n print(\"------------------------------\")\n print(f\"cuda available:\ - \ {cuda_available}\")\n print(f\"device count: {device_count}\")\n \ - \ if gpu_toleration:\n assert torch.cuda.is_available()\n \ - \ assert torch.cuda.device_count() > 0\n t = torch.tensor([5, 5,\ - \ 5], dtype=torch.int64, device='cuda')\n else:\n assert not torch.cuda.is_available()\n\ - \ assert torch.cuda.device_count() == 0\n t = torch.tensor([5,\ - \ 5, 5], dtype=torch.int64)\n print(f\"tensor: {t}\")\n print(\"GPU\ - \ availability test: PASS\")\n\n" - image: quay.io/modh/runtime-images@sha256:7d1b065f100666fe46f64a2e8aae888cb41a38b5482bb9b9343b14db05c2a14a + \ *\n\ndef verify_gpu_availability(gpu_toleration: bool):\n import torch\ + \ # noqa: PLC0415\n\n cuda_available = torch.cuda.is_available()\n \ + \ device_count = torch.cuda.device_count()\n print(\"------------------------------\"\ + )\n print(\"GPU availability\")\n print(\"------------------------------\"\ + )\n print(f\"cuda available: {cuda_available}\")\n print(f\"device\ + \ count: {device_count}\")\n if gpu_toleration:\n assert torch.cuda.is_available()\n\ + \ assert torch.cuda.device_count() > 0\n t = torch.tensor([5,\ + \ 5, 5], dtype=torch.int64, device=\"cuda\")\n else:\n assert\ + \ not torch.cuda.is_available()\n assert torch.cuda.device_count()\ + \ == 0\n t = torch.tensor([5, 5, 5], dtype=torch.int64)\n print(f\"\ + tensor: {t}\")\n print(\"GPU availability test: PASS\")\n\n" + image: quay.io/modh/runtime-images@sha256:e1f7ad986f694236a818796af290a99b4e7f73d44cd39ca45860087644d136dd resources: accelerator: count: '1' From f286be7d08aaf5e010686912e6b3542f8b0b2347 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Sanz=20G=C3=B3miz?= Date: Tue, 3 Dec 2024 15:02:10 +0100 Subject: [PATCH 10/10] chores on upgrade test suite --- .../CLI/MustGather/MustGather.resource | 4 +- .../test-must-gather-logs.robot | 6 +- .../0201__pre_upgrade.robot | 325 +++++++------ .../0202__during_upgrade.robot | 144 ++++-- .../0203__post_upgrade.robot | 450 ++++++++++-------- 5 files changed, 538 insertions(+), 391 deletions(-) diff --git a/ods_ci/tests/Resources/CLI/MustGather/MustGather.resource b/ods_ci/tests/Resources/CLI/MustGather/MustGather.resource index 50f506a6f..d42abee82 100644 --- a/ods_ci/tests/Resources/CLI/MustGather/MustGather.resource +++ b/ods_ci/tests/Resources/CLI/MustGather/MustGather.resource @@ -7,7 +7,7 @@ Resource ../../Common.robot *** Keywords *** -Get must-gather Logs +Get Must-Gather Logs [Documentation] Runs the must-gather image and obtains the ODH/RHOAI logs ${output}= Run process tests/Resources/CLI/MustGather/get-must-gather-logs.sh shell=yes Should Be Equal As Integers ${output.rc} 0 @@ -27,6 +27,6 @@ Verify Logs For ${namespace} ${log_files}= Run find ${namespaces_log_dir}/${namespace}/pods -type f -name "*.log" Should Not Be Equal ${log_files} ${EMPTY} -Cleanup must-gather Logs +Cleanup Must-Gather Logs [Documentation] Deletes the folder with the must-gather logs Run Keyword If "${must_gather_dir}" != "${EMPTY}" Remove Directory ${must_gather_dir} recursive=True diff --git a/ods_ci/tests/Tests/0100__platform/0103__must_gather/test-must-gather-logs.robot b/ods_ci/tests/Tests/0100__platform/0103__must_gather/test-must-gather-logs.robot index 29388c627..61712deed 100644 --- a/ods_ci/tests/Tests/0100__platform/0103__must_gather/test-must-gather-logs.robot +++ b/ods_ci/tests/Tests/0100__platform/0103__must_gather/test-must-gather-logs.robot @@ -16,10 +16,10 @@ Verify that the must-gather image provides RHODS logs and info ... MustGather ... ExcludeOnODH ... ExcludeOnDisconnected - Get must-gather Logs - Verify logs for ${APPLICATIONS_NAMESPACE} + Get Must-Gather Logs + Verify Logs For ${APPLICATIONS_NAMESPACE} IF "${PRODUCT}" == "RHODS" Verify Logs For ${OPERATOR_NAMESPACE} - Run Keyword If RHODS Is Managed Verify logs for ${MONITORING_NAMESPACE} + Run Keyword If RHODS Is Managed Verify Logs For ${MONITORING_NAMESPACE} END [Teardown] Cleanup must-gather Logs diff --git a/ods_ci/tests/Tests/0200__rhoai_upgrade/0201__pre_upgrade.robot b/ods_ci/tests/Tests/0200__rhoai_upgrade/0201__pre_upgrade.robot index a409ba2c1..928e8ab40 100644 --- a/ods_ci/tests/Tests/0200__rhoai_upgrade/0201__pre_upgrade.robot +++ b/ods_ci/tests/Tests/0200__rhoai_upgrade/0201__pre_upgrade.robot @@ -1,197 +1,254 @@ *** Settings *** -Documentation Test Suite for Upgrade testing, to be run before the upgrade -Library OpenShiftLibrary -Resource ../../Resources/RHOSi.resource -Resource ../../Resources/ODS.robot -Resource ../../Resources/Page/ODH/ODHDashboard/ODHDashboard.resource -Resource ../../Resources/Page/ODH/ODHDashboard/ODHDashboardResources.resource -Resource ../../Resources/Page/ODH/ODHDashboard/ODHModelServing.resource -Resource ../../Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/DataConnections.resource -Resource ../../Resources/Page/ODH/JupyterHub/HighAvailability.robot -Resource ../../Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/Projects.resource -Resource ../../Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/ModelServer.resource -Resource ../../Resources/Page/ODH/AiApps/Anaconda.resource -Resource ../../Resources/Page/LoginPage.robot -Resource ../../Resources/Page/OCPLogin/OCPLogin.robot -Resource ../../Resources/Common.robot -Resource ../../Resources/Page/OCPDashboard/Pods/Pods.robot -Resource ../../Resources/Page/OCPDashboard/Builds/Builds.robot -Resource ../../Resources/Page/HybridCloudConsole/OCM.robot -Resource ../../Resources/CLI/ModelServing/modelmesh.resource -Resource ../../Resources/CLI/DataSciencePipelines/DataSciencePipelinesUpgradeTesting.resource -Resource ../../Resources/Page/DistributedWorkloads/DistributedWorkloads.resource -Resource ../../Resources/Page/DistributedWorkloads/WorkloadMetricsUI.resource -Resource ../../Resources/Page/ModelRegistry/ModelRegistry.resource -Suite Setup Dashboard Suite Setup -Suite Teardown RHOSi Teardown -Test Tags PreUpgrade +Documentation Test Suite for Upgrade testing, to be run before the upgrade + +Library OpenShiftLibrary +Resource ../../Resources/RHOSi.resource +Resource ../../Resources/ODS.robot +Resource ../../Resources/Page/ODH/ODHDashboard/ODHDashboard.resource +Resource ../../Resources/Page/ODH/ODHDashboard/ODHDashboardResources.resource +Resource ../../Resources/Page/ODH/ODHDashboard/ODHModelServing.resource +Resource ../../Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/DataConnections.resource +Resource ../../Resources/Page/ODH/JupyterHub/HighAvailability.robot +Resource ../../Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/Projects.resource +Resource ../../Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/ModelServer.resource +Resource ../../Resources/Page/ODH/AiApps/Anaconda.resource +Resource ../../Resources/Page/LoginPage.robot +Resource ../../Resources/Page/OCPLogin/OCPLogin.robot +Resource ../../Resources/Common.robot +Resource ../../Resources/Page/OCPDashboard/Pods/Pods.robot +Resource ../../Resources/Page/OCPDashboard/Builds/Builds.robot +Resource ../../Resources/Page/HybridCloudConsole/OCM.robot +Resource ../../Resources/CLI/ModelServing/modelmesh.resource +Resource ../../Resources/CLI/DataSciencePipelines/DataSciencePipelinesUpgradeTesting.resource +Resource ../../Resources/Page/DistributedWorkloads/DistributedWorkloads.resource +Resource ../../Resources/Page/DistributedWorkloads/WorkloadMetricsUI.resource +Resource ../../Resources/Page/ModelRegistry/ModelRegistry.resource + +Suite Setup Dashboard Suite Setup +Suite Teardown RHOSi Teardown + +Test Tags PreUpgrade *** Variables *** -${CUSTOM_CULLER_TIMEOUT} 60000 -${S_SIZE} 25 -${DW_PROJECT_CREATED}= False +${CUSTOM_CULLER_TIMEOUT} 60000 +${S_SIZE} 25 +${DW_PROJECT_CREATED} False *** Test Cases *** Set PVC Size Via UI [Documentation] Sets a Pod toleration via the admin UI - [Tags] Upgrade - [Setup] Begin Web Test - Set PVC Value In RHODS Dashboard ${S_SIZE} - [Teardown] Dashboard Test Teardown + [Tags] Upgrade + [Setup] Begin Web Test + Set PVC Value In RHODS Dashboard ${S_SIZE} + [Teardown] Dashboard Test Teardown Set Culler Timeout - [Documentation] Sets a culler timeout via the admin UI - [Tags] Upgrade - [Setup] Begin Web Test - Modify Notebook Culler Timeout ${CUSTOM_CULLER_TIMEOUT} - [Teardown] Dashboard Test Teardown + [Documentation] Sets a culler timeout via the admin UI + [Tags] Upgrade + [Setup] Begin Web Test + Modify Notebook Culler Timeout ${CUSTOM_CULLER_TIMEOUT} + [Teardown] Dashboard Test Teardown Setting Pod Toleration Via UI [Documentation] Sets a Pod toleration via the admin UI - [Tags] Upgrade - [Setup] Begin Web Test - Menu.Navigate To Page Settings Cluster settings - Wait Until Page Contains Notebook pod tolerations - Set Pod Toleration Via UI TestToleration + [Tags] Upgrade + [Setup] Begin Web Test + Menu.Navigate To Page Settings Cluster settings + Wait Until Page Contains Notebook pod tolerations + Set Pod Toleration Via UI TestToleration Disable "Usage Data Collection" - [Teardown] Dashboard Test Teardown + [Teardown] Dashboard Test Teardown Verify RHODS Accept Multiple Admin Groups And CRD Gets Updates [Documentation] Verify that users can set multiple admin groups and - ... check OdhDashboardConfig CRD gets updated according to Admin UI - [Tags] Upgrade - [Setup] Begin Web Test - Launch Dashboard And Check User Management Option Is Available For The User ${TEST_USER.USERNAME} ${TEST_USER.PASSWORD} ${TEST_USER.AUTH_TYPE} #robocop: disable + ... check OdhDashboardConfig CRD gets updated according to Admin UI + [Tags] Upgrade + [Setup] Begin Web Test + # robocop: disable + Launch Dashboard And Check User Management Option Is Available For The User + ... ${TEST_USER.USERNAME} + ... ${TEST_USER.PASSWORD} + ... ${TEST_USER.AUTH_TYPE} Clear User Management Settings - Add OpenShift Groups To Data Science Administrators rhods-admins rhods-users - Add OpenShift Groups To Data Science User Groups system:authenticated + Add OpenShift Groups To Data Science Administrators rhods-admins rhods-users + Add OpenShift Groups To Data Science User Groups system:authenticated Save Changes In User Management Setting - [Teardown] Dashboard Test Teardown + [Teardown] Dashboard Test Teardown Verify Custom Image Can Be Added - [Documentation] Create Custome notebook using Cli - [Tags] Upgrade - Oc Apply kind=ImageStream src=tests/Tests/0200__rhoai_upgrade/custome_image.yaml + [Documentation] Create Custome notebook using Cli + [Tags] Upgrade + Oc Apply kind=ImageStream src=tests/Tests/0200__rhoai_upgrade/custome_image.yaml Verify User Can Disable The Runtime - [Documentation] Disable the Serving runtime using Cli - [Tags] Upgrade - Disable Model Serving Runtime Using CLI namespace=redhat-ods-applications + [Documentation] Disable the Serving runtime using Cli + [Tags] Upgrade + Disable Model Serving Runtime Using CLI namespace=redhat-ods-applications Verify Model Can Be Deployed Via UI For Upgrade - [Tags] Upgrade - [Setup] Begin Web Test - ${PRJ_TITLE}= Set Variable model-serving-upgrade - ${PRJ_DESCRIPTION}= Set Variable project used for model serving tests - ${MODEL_NAME}= Set Variable test-model - ${MODEL_CREATED}= Set Variable ${FALSE} - ${RUNTIME_NAME}= Set Variable Model Serving Test - ${INFERENCE_INPUT}= Set Variable @tests/Resources/Files/modelmesh-mnist-input.json - ${INFERENCE_INPUT_OPENVINO}= Set Variable @tests/Resources/Files/openvino-example-input.json - ${EXPECTED_INFERENCE_OUTPUT}= Set Variable {"model_name":"test-model__isvc-83d6fab7bd","model_version":"1","outputs":[{"name":"Plus214_Output_0","datatype":"FP32","shape":[1,10],"data":[-8.233053,-7.7497034,-3.4236815,12.3630295,-12.079103,17.266596,-10.570976,0.7130762,3.321715,1.3621228]}]} - ${EXPECTED_INFERENCE_OUTPUT_OPENVINO}= Set Variable {"model_name":"test-model__isvc-8655dc7979","model_version":"1","outputs":[{"name":"Func/StatefulPartitionedCall/output/_13:0","datatype":"FP32","shape":[1,1],"data":[0.99999994]}]} - ${runtime_pod_name} = Replace String Using Regexp string=${RUNTIME_NAME} pattern=\\s replace_with=- - ${runtime_pod_name} = Convert To Lower Case ${runtime_pod_name} + # robocop: off=too-long-test-case + # robocop: off=too-many-calls-in-test-case + [Documentation] Verify Model Can Be Deployed Via UI For Upgrade + [Tags] Upgrade + [Setup] Begin Web Test + ${PRJ_TITLE}= Set Variable model-serving-upgrade + ${PRJ_DESCRIPTION}= Set Variable project used for model serving tests + ${MODEL_NAME}= Set Variable test-model + ${MODEL_CREATED}= Set Variable ${FALSE} + ${RUNTIME_NAME}= Set Variable Model Serving Test + ${INFERENCE_INPUT_OPENVINO}= Set Variable + ... @tests/Resources/Files/openvino-example-input.json + ${EXPECTED_INFERENCE_OUTPUT_OPENVINO}= Set Variable + ... {"model_name":"test-model__isvc-8655dc7979","model_version":"1","outputs":[{"name":"Func/StatefulPartitionedCall/output/_13:0","datatype":"FP32","shape":[1,1],"data":[0.99999994]}]} # robocop: disable:line-too-long + ${runtime_pod_name}= Replace String Using Regexp + ... string=${RUNTIME_NAME} + ... pattern=\\s + ... replace_with=- + ${runtime_pod_name}= Convert To Lower Case ${runtime_pod_name} Fetch CA Certificate If RHODS Is Self-Managed Clean All Models Of Current User Open Data Science Projects Home Page - Wait For RHODS Dashboard To Load wait_for_cards=${FALSE} expected_page=Data Science Projects - Create Data Science Project title=${PRJ_TITLE} description=${PRJ_DESCRIPTION} - Create S3 Data Connection project_title=${PRJ_TITLE} dc_name=model-serving-connection - ... aws_access_key=${S3.AWS_ACCESS_KEY_ID} aws_secret_access=${S3.AWS_SECRET_ACCESS_KEY} - ... aws_bucket_name=ods-ci-s3 - Create Model Server token=${FALSE} server_name=${RUNTIME_NAME} - Serve Model project_name=${PRJ_TITLE} model_name=${MODEL_NAME} framework=openvino_ir existing_data_connection=${TRUE} - ... data_connection_name=model-serving-connection model_path=openvino-example-model - Run Keyword And Continue On Failure Wait Until Keyword Succeeds - ... 5 min 10 sec Verify Openvino Deployment runtime_name=${runtime_pod_name} - Run Keyword And Continue On Failure Wait Until Keyword Succeeds 5 min 10 sec Verify Serving Service - Verify Model Status ${MODEL_NAME} success - Set Suite Variable ${MODEL_CREATED} ${TRUE} - Run Keyword And Continue On Failure Verify Model Inference ${MODEL_NAME} ${INFERENCE_INPUT_OPENVINO} ${EXPECTED_INFERENCE_OUTPUT_OPENVINO} token_auth=${FALSE} - Remove File openshift_ca.crt - [Teardown] Run Keywords Dashboard Test Teardown - ... AND - ... Run Keyword If Test Failed Get Events And Pod Logs namespace=${PRJ_TITLE} - ... label_selector=name=modelmesh-serving-${runtime_pod_name} + Wait For RHODS Dashboard To Load + ... wait_for_cards=${FALSE} + ... expected_page=Data Science Projects + Create Data Science Project title=${PRJ_TITLE} description=${PRJ_DESCRIPTION} + Create S3 Data Connection + ... project_title=${PRJ_TITLE} + ... dc_name=model-serving-connection + ... aws_access_key=${S3.AWS_ACCESS_KEY_ID} + ... aws_secret_access=${S3.AWS_SECRET_ACCESS_KEY} + ... aws_bucket_name=ods-ci-s3 + Create Model Server token=${FALSE} server_name=${RUNTIME_NAME} + Serve Model + ... project_name=${PRJ_TITLE} + ... model_name=${MODEL_NAME} + ... framework=openvino_ir + ... existing_data_connection=${TRUE} + ... data_connection_name=model-serving-connection + ... model_path=openvino-example-model + Run Keyword And Continue On Failure + ... Wait Until Keyword Succeeds + ... 5 min + ... 10 sec + ... Verify Openvino Deployment + ... runtime_name=${runtime_pod_name} + Run Keyword And Continue On Failure + ... Wait Until Keyword Succeeds + ... 5 min + ... 10 sec + ... Verify Serving Service + Verify Model Status ${MODEL_NAME} success + Set Suite Variable ${MODEL_CREATED} ${TRUE} # robocop: disable:replace-set-variable-with-var + Run Keyword And Continue On Failure + ... Verify Model Inference + ... ${MODEL_NAME} + ... ${INFERENCE_INPUT_OPENVINO} + ... ${EXPECTED_INFERENCE_OUTPUT_OPENVINO} + ... token_auth=${FALSE} + Remove File openshift_ca.crt + [Teardown] Run Keywords Dashboard Test Teardown + ... AND + ... Run Keyword If Test Failed Get Events And Pod Logs namespace=${PRJ_TITLE} + ... label_selector=name=modelmesh-serving-${runtime_pod_name} Verify User Can Deploy Custom Runtime For Upgrade - [Tags] Upgrade - Create Custom Serving Runtime Using Template By CLI tests/Resources/Files/caikit_runtime_template.yaml + [Documentation] Verify User Can Deploy Custom Runtime For Upgrade + [Tags] Upgrade + Create Custom Serving Runtime Using Template By CLI + ... tests/Resources/Files/caikit_runtime_template.yaml Begin Web Test - Menu.Navigate To Page Settings Serving runtimes - Wait Until Page Contains Add serving runtime timeout=15s - Page Should Contain Element //tr[@id='caikit-runtime'] - [Teardown] Dashboard Test Teardown + Menu.Navigate To Page Settings Serving runtimes + Wait Until Page Contains Add serving runtime timeout=15s + Page Should Contain Element //tr[@id='caikit-runtime'] + [Teardown] Dashboard Test Teardown Verify Distributed Workload Metrics Resources By Creating Ray Cluster Workload + # robocop: off=too-long-test-case + # robocop: off=too-many-calls-in-test-case [Documentation] Creates the Ray Cluster and verify resource usage - [Tags] Upgrade - [Setup] Prepare Codeflare-SDK Test Setup - ${PRJ_UPGRADE} Set Variable test-ns-rayupgrade - ${JOB_NAME} Set Variable mnist - Run Codeflare-SDK Test upgrade raycluster_sdk_upgrade_test.py::TestMNISTRayClusterUp 3.11 ${RAY_CUDA_IMAGE_3.11} ${CODEFLARE-SDK-RELEASE-TAG} - Set Library Search Order SeleniumLibrary + [Tags] Upgrade + [Setup] Prepare Codeflare-SDK Test Setup + ${PRJ_UPGRADE}= Set Variable test-ns-rayupgrade + ${JOB_NAME}= Set Variable mnist + Run Codeflare-SDK Test + ... upgrade + ... raycluster_sdk_upgrade_test.py::TestMNISTRayClusterUp + ... 3.11 + ... ${RAY_CUDA_IMAGE_3.11} + ... ${CODEFLARE-SDK-RELEASE-TAG} + Set Library Search Order SeleniumLibrary RHOSi Setup - Launch Dashboard ${TEST_USER.USERNAME} ${TEST_USER.PASSWORD} ${TEST_USER.AUTH_TYPE} - ... ${ODH_DASHBOARD_URL} ${BROWSER.NAME} ${BROWSER.OPTIONS} + Launch Dashboard + ... ${TEST_USER.USERNAME} + ... ${TEST_USER.PASSWORD} + ... ${TEST_USER.AUTH_TYPE} + ... ${ODH_DASHBOARD_URL} + ... ${BROWSER.NAME} + ... ${BROWSER.OPTIONS} Open Distributed Workload Metrics Home Page - Select Distributed Workload Project By Name ${PRJ_UPGRADE} - Set Global Variable ${DW_PROJECT_CREATED} True - Select Refresh Interval 15 seconds - Wait Until Element Is Visible ${DISTRIBUITED_WORKLOAD_RESOURCE_METRICS_TITLE_XP} timeout=20 - Wait Until Element Is Visible xpath=//*[text()="Running"] timeout=30 - - ${cpu_requested} = Get CPU Requested ${PRJ_UPGRADE} local-queue-mnist - ${memory_requested} = Get Memory Requested ${PRJ_UPGRADE} local-queue-mnist RayCluster - Check Requested Resources Chart ${PRJ_UPGRADE} ${cpu_requested} ${memory_requested} - Check Requested Resources ${PRJ_UPGRADE} ${CPU_SHARED_QUOTA} - ... ${MEMEORY_SHARED_QUOTA} ${cpu_requested} ${memory_requested} RayCluster - - Check Distributed Workload Resource Metrics Status ${JOB_NAME} Running - Check Distributed Worklaod Status Overview ${JOB_NAME} Running - ... All pods were ready or succeeded since the workload admission + Select Distributed Workload Project By Name ${PRJ_UPGRADE} + Set Global Variable ${DW_PROJECT_CREATED} True # robocop: disable:replace-set-variable-with-var + Select Refresh Interval 15 seconds + Wait Until Element Is Visible + ... ${DISTRIBUITED_WORKLOAD_RESOURCE_METRICS_TITLE_XP} + ... timeout=20 + Wait Until Element Is Visible xpath=//*[text()="Running"] timeout=30 + + ${cpu_requested}= Get CPU Requested ${PRJ_UPGRADE} local-queue-mnist + ${memory_requested}= Get Memory Requested ${PRJ_UPGRADE} local-queue-mnist RayCluster + Check Requested Resources Chart ${PRJ_UPGRADE} ${cpu_requested} ${memory_requested} + Check Requested Resources + ... ${PRJ_UPGRADE} + ... ${CPU_SHARED_QUOTA} + ... ${MEMEORY_SHARED_QUOTA} + ... ${cpu_requested} + ... ${memory_requested} + ... RayCluster + + Check Distributed Workload Resource Metrics Status ${JOB_NAME} Running + Check Distributed Worklaod Status Overview ${JOB_NAME} Running + ... All pods were ready or succeeded since the workload admission Click Button ${PROJECT_METRICS_TAB_XP} - Check Distributed Workload Resource Metrics Chart ${PRJ_UPGRADE} ${cpu_requested} - ... ${memory_requested} RayCluster ${JOB_NAME} + Check Distributed Workload Resource Metrics Chart ${PRJ_UPGRADE} ${cpu_requested} + ... ${memory_requested} RayCluster ${JOB_NAME} - [Teardown] Run Keywords Cleanup Codeflare-SDK Setup AND - ... Run Keyword If Test Failed Codeflare Upgrade Tests Teardown ${PRJ_UPGRADE} ${DW_PROJECT_CREATED} + [Teardown] Run Keywords Cleanup Codeflare-SDK Setup AND + ... Run Keyword If Test Failed Codeflare Upgrade Tests Teardown ${PRJ_UPGRADE} ${DW_PROJECT_CREATED} # robocop: disable:line-too-long Run Training Operator ODH Setup PyTorchJob Test Use Case [Documentation] Run Training Operator ODH Setup PyTorchJob Test Use Case - [Tags] Upgrade - [Setup] Prepare Training Operator E2E Upgrade Test Suite - Run Training Operator ODH Upgrade Test TestSetupPytorchjob - [Teardown] Teardown Training Operator E2E Upgrade Test Suite + [Tags] Upgrade + [Setup] Prepare Training Operator E2E Upgrade Test Suite + Run Training Operator ODH Upgrade Test TestSetupPytorchjob + [Teardown] Teardown Training Operator E2E Upgrade Test Suite Run Training Operator ODH Setup Sleep PyTorchJob Test Use Case [Documentation] Setup PyTorchJob which is kept running for 24 hours - [Tags] Upgrade - [Setup] Prepare Training Operator E2E Upgrade Test Suite - Run Training Operator ODH Upgrade Test TestSetupSleepPytorchjob - [Teardown] Teardown Training Operator E2E Upgrade Test Suite + [Tags] Upgrade + [Setup] Prepare Training Operator E2E Upgrade Test Suite + Run Training Operator ODH Upgrade Test TestSetupSleepPytorchjob + [Teardown] Teardown Training Operator E2E Upgrade Test Suite Data Science Pipelines Pre Upgrade Configuration [Documentation] Creates project dsp-test-upgrade and configures the pipeline resources testing upgrade - [Tags] Upgrade DataSciencePipelines-Backend + [Tags] Upgrade DataSciencePipelines-Backend DataSciencePipelinesUpgradeTesting.Setup Environment For Upgrade Testing Model Registry Pre Upgrade Set Up [Documentation] Creates a Model Registry instance and registers a model/version - [Tags] Upgrade ModelRegistryUpgrade + [Tags] Upgrade ModelRegistryUpgrade Model Registry Pre Upgrade Scenario *** Keywords *** Dashboard Suite Setup - [Documentation] Basic suite setup + [Documentation] Basic suite setup Set Library Search Order SeleniumLibrary RHOSi Setup Dashboard Test Teardown - [Documentation] Basic suite teardown + [Documentation] Basic suite teardown Close All Browsers diff --git a/ods_ci/tests/Tests/0200__rhoai_upgrade/0202__during_upgrade.robot b/ods_ci/tests/Tests/0200__rhoai_upgrade/0202__during_upgrade.robot index b34d1e897..740cc93f2 100644 --- a/ods_ci/tests/Tests/0200__rhoai_upgrade/0202__during_upgrade.robot +++ b/ods_ci/tests/Tests/0200__rhoai_upgrade/0202__during_upgrade.robot @@ -1,5 +1,6 @@ *** Settings *** Documentation Test Suite for Upgrade testing,to be run during the upgrade + Resource ../../Resources/ODS.robot Resource ../../Resources/Common.robot Resource ../../Resources/Page/ODH/JupyterHub/JupyterHubSpawner.robot @@ -9,107 +10,146 @@ Resource ../../Resources/Page/ODH/ODHDashboard/ODHDashboardSettings.r Resource ../../Resources/Page/ODH/JupyterHub/ODHJupyterhub.resource Library DebugLibrary Library JupyterLibrary + Test Tags DuringUpgrade *** Variables *** -${CODE} while True: import time ; time.sleep(10); print ("Hello") +${CODE} while True: import time ; time.sleep(10); print ("Hello") *** Test Cases *** Long Running Jupyter Notebook [Documentation] Launch a long running notebook before the upgrade - [Tags] Upgrade + [Tags] Upgrade Launch Notebook - Add And Run JupyterLab Code Cell In Active Notebook ${CODE} - ${return_code} ${timestamp} Run And Return Rc And Output oc get pod -n ${NOTEBOOKS_NAMESPACE} jupyter-nb-ldap-2dadmin2-0 --no-headers --output='custom-columns=TIMESTAMP:.metadata.creationTimestamp' #robocop:disable - Should Be Equal As Integers ${return_code} 0 - Set Global Variable ${timestamp} #robocop: disable + Add And Run JupyterLab Code Cell In Active Notebook ${CODE} + # robocop:disable + ${return_code} ${timestamp} = Run And Return Rc And Output + ... oc get pod -n ${NOTEBOOKS_NAMESPACE} jupyter-nb-ldap-2dadmin2-0 --no-headers --output='custom-columns=TIMESTAMP:.metadata.creationTimestamp' + Should Be Equal As Integers ${return_code} 0 + Set Global Variable ${timestamp} # robocop: disable Close Browser Upgrade RHODS [Documentation] Approve the install plan for the upgrade and make sure that upgrade has completed - [Tags] ODS-1766 - ... Upgrade - ${initial_version} = Get RHODS Version - ${initial_creation_date} = Get Operator Pod Creation Date - ${return_code} ${output} Run And Return Rc And Output oc patch installplan $(oc get installplans -n ${OPERATOR_NAMESPACE} | grep -v NAME | awk '{print $1}') -n ${OPERATOR_NAMESPACE} --type='json' -p '[{"op": "replace", "path": "/spec/approved", "value": true}]' #robocop:disable - Should Be Equal As Integers ${return_code} 0 msg=Error while upgrading RHODS - Sleep 30s reason=wait for thirty seconds until old CSV is removed and new one is ready - RHODS Version Should Be Greater Than ${initial_version} - Operator Pod Creation Date Should Be Updated ${initial_creation_date} - OpenShiftLibrary.Wait For Pods Status namespace=${OPERATOR_NAMESPACE} timeout=300 + [Tags] ODS-1766 Upgrade + ${initial_version} = Get RHODS Version + ${initial_creation_date} = Get Operator Pod Creation Date + # robocop:disable + ${return_code} ${output} = Run And Return Rc And Output + ... oc patch installplan $(oc get installplans -n ${OPERATOR_NAMESPACE} | grep -v NAME | awk '{print $1}') -n ${OPERATOR_NAMESPACE} --type='json' -p '[{"op": "replace", "path": "/spec/approved", "value": true}]' + Should Be Equal As Integers + ... ${return_code} + ... 0 + ... msg=Error while upgrading RHODS + Sleep + ... 30s + ... reason=wait for thirty seconds until old CSV is removed and new one is ready + RHODS Version Should Be Greater Than ${initial_version} + Operator Pod Creation Date Should Be Updated ${initial_creation_date} + OpenShiftLibrary.Wait For Pods Status namespace=${OPERATOR_NAMESPACE} timeout=300 TensorFlow Image Test - [Documentation] Run basic tensorflow notebook during upgrade - [Tags] Upgrade - Launch Notebook tensorflow ${TEST_USER.USERNAME} ${TEST_USER.PASSWORD} ${TEST_USER.AUTH_TYPE} - [Teardown] Upgrade Test Teardown + [Documentation] Run basic tensorflow notebook during upgrade + [Tags] Upgrade + Launch Notebook + ... tensorflow + ... ${TEST_USER.USERNAME} + ... ${TEST_USER.PASSWORD} + ... ${TEST_USER.AUTH_TYPE} + [Teardown] Upgrade Test Teardown PyTorch Image Workload Test - [Documentation] Run basic pytorch notebook during upgrade - [Tags] Upgrade - Launch Notebook pytorch ${TEST_USER.USERNAME} ${TEST_USER.PASSWORD} ${TEST_USER.AUTH_TYPE} - Run Repo And Clean https://github.com/lugi0/notebook-benchmarks notebook-benchmarks/pytorch/PyTorch-MNIST-Minimal.ipynb + [Documentation] Run basic pytorch notebook during upgrade + [Tags] Upgrade + Launch Notebook + ... pytorch + ... ${TEST_USER.USERNAME} + ... ${TEST_USER.PASSWORD} + ... ${TEST_USER.AUTH_TYPE} + Run Repo And Clean + ... https://github.com/lugi0/notebook-benchmarks + ... notebook-benchmarks/pytorch/PyTorch-MNIST-Minimal.ipynb Capture Page Screenshot JupyterLab Code Cell Error Output Should Not Be Visible - [Teardown] Upgrade Test Teardown + [Teardown] Upgrade Test Teardown *** Keywords *** Launch Notebook - [Documentation] Launch notebook for the suite - [Arguments] ${notbook_image}=minimal-notebook ${username}=${TEST_USER2.USERNAME} ${password}=${TEST_USER2.PASSWORD} ${auth_type}=${TEST_USER2.AUTH_TYPE} #robocop: disable - Begin Web Test username=${username} password=${password} auth_type=${auth_type} - Login To RHODS Dashboard ${username} ${password} ${auth_type} + [Documentation] Launch notebook for the suite + [Arguments] ${notbook_image}=minimal-notebook + ... ${username}=${TEST_USER2.USERNAME} + ... ${password}=${TEST_USER2.PASSWORD} + ... ${auth_type}=${TEST_USER2.AUTH_TYPE} + # robocop: disable + Begin Web Test username=${username} password=${password} auth_type=${auth_type} + Login To RHODS Dashboard ${username} ${password} ${auth_type} Wait For RHODS Dashboard To Load Launch Jupyter From RHODS Dashboard Link - Login To Jupyterhub ${username} ${password} ${auth_type} - ${authorization_required} Is Service Account Authorization Required + Login To Jupyterhub ${username} ${password} ${auth_type} + ${authorization_required} = Is Service Account Authorization Required IF ${authorization_required} Authorize Jupyterhub Service Account Fix Spawner Status - Spawn Notebook With Arguments image=${notbook_image} username=${username} password=${password} auth_type=${auth_type} #robocop: disable + # robocop: disable + Spawn Notebook With Arguments + ... image=${notbook_image} + ... username=${username} + ... password=${password} + ... auth_type=${auth_type} Upgrade Test Teardown + # robocop: off=too-many-calls-in-keyword + [Documentation] Upgrade Test Teardown End Web Test Skip If RHODS Is Self-Managed ${expression} = Set Variable rhods_aggregate_availability&step=1 ${resp} = Prometheus.Run Query ${RHODS_PROMETHEUS_URL} ${RHODS_PROMETHEUS_TOKEN} ${expression} Log rhods_aggregate_availability: ${resp.json()["data"]["result"][0]["value"][-1]} - @{list_values} = Create List 1 - Run Keyword And Warn On Failure Should Contain ${list_values} ${resp.json()["data"]["result"][0]["value"][-1]} - ${expression} = Set Variable rhods_aggregate_availability{name="rhods-dashboard"}&step=1 + @{list_values} = Create List 1 # robocop: disable:replace-set-variable-with-var + Run Keyword And Warn On Failure + ... Should Contain + ... ${list_values} + ... ${resp.json()["data"]["result"][0]["value"][-1]} + ${expression} = Set Variable rhods_aggregate_availability{name="rhods-dashboard"}&step=1 ${resp} = Prometheus.Run Query ${RHODS_PROMETHEUS_URL} ${RHODS_PROMETHEUS_TOKEN} ${expression} Log rhods_aggregate_availability: ${resp.json()["data"]["result"][0]["value"][-1]} - @{list_values} = Create List 1 - Run Keyword And Warn On Failure Should Contain ${list_values} ${resp.json()["data"]["result"][0]["value"][-1]} - ${expression} = Set Variable rhods_aggregate_availability{name="notebook-spawner"}&step=1 + @{list_values} = Create List 1 # robocop: disable:replace-set-variable-with-var + Run Keyword And Warn On Failure + ... Should Contain + ... ${list_values} + ... ${resp.json()["data"]["result"][0]["value"][-1]} + ${expression} = Set Variable rhods_aggregate_availability{name="notebook-spawner"}&step=1 ${resp} = Prometheus.Run Query ${RHODS_PROMETHEUS_URL} ${RHODS_PROMETHEUS_TOKEN} ${expression} Log rhods_aggregate_availability: ${resp.json()["data"]["result"][0]["value"][-1]} - @{list_values} = Create List 1 - Run Keyword And Warn On Failure Should Contain ${list_values} ${resp.json()["data"]["result"][0]["value"][-1]} + @{list_values} = Create List 1 # robocop: disable:replace-set-variable-with-var + Run Keyword And Warn On Failure + ... Should Contain + ... ${list_values} + ... ${resp.json()["data"]["result"][0]["value"][-1]} RHODS Version Should Be Greater Than [Documentation] Checks if the RHODS version is greater than the given initial version. - ... Fails if the version is not greater. - [Arguments] ${initial_version} - ${ver} = Get RHODS Version - ${ver} = Fetch From Left ${ver} - + ... Fails if the version is not greater. + [Arguments] ${initial_version} + ${ver} = Get RHODS Version + ${ver} = Fetch From Left ${ver} - Should Be True '${ver}' > '${initial_version}' msg=Version wasn't greater than initial one ${initial_version} Get Operator Pod Creation Date [Documentation] Retrieves the creation date of the RHODS operator pod. - ... Returns the creation date as a string. - ... Fails if the command to retrieve the creation date fails. - ${return_code} ${creation_date} = Run And Return Rc And Output - ... oc get pod -n ${OPERATOR_NAMESPACE} -l name=rhods-operator --no-headers -o jsonpath='{.items[0].metadata.creationTimestamp}' - Should Be Equal As Integers ${return_code} 0 msg=Error while getting creation date of the operator pod + ... Returns the creation date as a string. + ... Fails if the command to retrieve the creation date fails. + ${return_code} ${creation_date} = Run And Return Rc And Output + ... oc get pod -n ${OPERATOR_NAMESPACE} -l name=rhods-operator --no-headers -o jsonpath='{.items[0].metadata.creationTimestamp}' #robocop: disable:line-too-long + Should Be Equal As Integers ${return_code} 0 msg=Error while getting creation date of the operator pod RETURN ${creation_date} Operator Pod Creation Date Should Be Updated [Documentation] Checks if the operator pod creation date has been updated after the upgrade. - ... Fails if the updated creation date is not more recent than the initial creation date. - [Arguments] ${initial_creation_date} - ${updated_creation_date} = Get Operator Pod Creation Date + ... Fails if the updated creation date is not more recent than the initial creation date. + [Arguments] ${initial_creation_date} + ${updated_creation_date} = Get Operator Pod Creation Date Should Be True '${updated_creation_date}' > '${initial_creation_date}' ... msg=Operator pod creation date was not updated after upgrade diff --git a/ods_ci/tests/Tests/0200__rhoai_upgrade/0203__post_upgrade.robot b/ods_ci/tests/Tests/0200__rhoai_upgrade/0203__post_upgrade.robot index 5682444b6..cfe28b010 100644 --- a/ods_ci/tests/Tests/0200__rhoai_upgrade/0203__post_upgrade.robot +++ b/ods_ci/tests/Tests/0200__rhoai_upgrade/0203__post_upgrade.robot @@ -1,310 +1,360 @@ *** Settings *** Documentation Test Suite for Upgrade testing,to be run after the upgrade -Library OpenShiftLibrary -Resource ../../Resources/RHOSi.resource -Resource ../../Resources/ODS.robot -Resource ../../Resources/OCP.resource -Resource ../../../tasks/Resources/RHODS_OLM/install/oc_install.robot -Resource ../../Resources/Page/ODH/ODHDashboard/ODHDashboard.resource -Resource ../../Resources/Page/ODH/ODHDashboard/ODHDashboardResources.resource -Resource ../../Resources/Page/ODH/ODHDashboard/ODHModelServing.resource -Resource ../../Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/DataConnections.resource -Resource ../../Resources/Page/ODH/JupyterHub/HighAvailability.robot -Resource ../../Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/Projects.resource -Resource ../../Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/ModelServer.resource -Resource ../../Resources/Page/ODH/AiApps/Anaconda.resource -Resource ../../Resources/Page/LoginPage.robot -Resource ../../Resources/Page/OCPLogin/OCPLogin.robot -Resource ../../Resources/Common.robot -Resource ../../Resources/Page/OCPDashboard/Pods/Pods.robot -Resource ../../Resources/Page/OCPDashboard/Builds/Builds.robot -Resource ../../Resources/Page/HybridCloudConsole/OCM.robot -Resource ../../Resources/Page/DistributedWorkloads/DistributedWorkloads.resource -Resource ../../Resources/Page/DistributedWorkloads/WorkloadMetricsUI.resource -Resource ../../Resources/CLI/MustGather/MustGather.resource -Resource ../../Resources/CLI/DataSciencePipelines/DataSciencePipelinesUpgradeTesting.resource -Resource ../../Resources/Page/ModelRegistry/ModelRegistry.resource -Suite Setup Upgrade Suite Setup -Test Tags PostUpgrade + +Library OpenShiftLibrary +Resource ../../Resources/RHOSi.resource +Resource ../../Resources/ODS.robot +Resource ../../Resources/OCP.resource +Resource ../../../tasks/Resources/RHODS_OLM/install/oc_install.robot +Resource ../../Resources/Page/ODH/ODHDashboard/ODHDashboard.resource +Resource ../../Resources/Page/ODH/ODHDashboard/ODHDashboardResources.resource +Resource ../../Resources/Page/ODH/ODHDashboard/ODHModelServing.resource +Resource ../../Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/DataConnections.resource +Resource ../../Resources/Page/ODH/JupyterHub/HighAvailability.robot +Resource ../../Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/Projects.resource +Resource ../../Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/ModelServer.resource +Resource ../../Resources/Page/ODH/AiApps/Anaconda.resource +Resource ../../Resources/Page/LoginPage.robot +Resource ../../Resources/Page/OCPLogin/OCPLogin.robot +Resource ../../Resources/Common.robot +Resource ../../Resources/Page/OCPDashboard/Pods/Pods.robot +Resource ../../Resources/Page/OCPDashboard/Builds/Builds.robot +Resource ../../Resources/Page/HybridCloudConsole/OCM.robot +Resource ../../Resources/Page/DistributedWorkloads/DistributedWorkloads.resource +Resource ../../Resources/Page/DistributedWorkloads/WorkloadMetricsUI.resource +Resource ../../Resources/CLI/MustGather/MustGather.resource +Resource ../../Resources/CLI/DataSciencePipelines/DataSciencePipelinesUpgradeTesting.resource +Resource ../../Resources/Page/ModelRegistry/ModelRegistry.resource + +Suite Setup Upgrade Suite Setup + +Test Tags PostUpgrade *** Variables *** -${S_SIZE} 25 -${DW_PROJECT_CREATED}= False +${S_SIZE} 25 +${DW_PROJECT_CREATED} False *** Test Cases *** Verify PVC Size [Documentation] Verify PVC Size after the upgrade - [Tags] Upgrade + [Tags] Upgrade Get Dashboard Config Data - ${size} Set Variable ${payload[0]['spec']['notebookController']['pvcSize']}[:-2] - Should Be Equal As Strings '${size}' '${S_SIZE}' + ${size} Set Variable ${payload[0]['spec']['notebookController']['pvcSize']}[:-2] + Should Be Equal As Strings '${size}' '${S_SIZE}' Verify Pod Toleration [Documentation] Verify Pod toleration after the upgrade - [Tags] Upgrade - ${enable} Set Variable ${payload[0]['spec']['notebookController']['notebookTolerationSettings']['enabled']} - Should Be Equal As Strings '${enable}' 'True' + [Tags] Upgrade + ${enable} Set Variable + ... ${payload[0]['spec']['notebookController']['notebookTolerationSettings']['enabled']} + Should Be Equal As Strings '${enable}' 'True' Verify RHODS User Groups [Documentation] Verify User Configuration after the upgrade - [Tags] Upgrade - ${admin} Set Variable ${payload[0]['spec']['groupsConfig']['adminGroups']} - ${user} Set Variable ${payload[0]['spec']['groupsConfig']['allowedGroups']} - Should Be Equal As Strings '${admin}' 'rhods-admins,rhods-users' - Should Be Equal As Strings '${user}' 'system:authenticated' - [Teardown] Set Default Users + [Tags] Upgrade + ${admin} Set Variable ${payload[0]['spec']['groupsConfig']['adminGroups']} + ${user} Set Variable ${payload[0]['spec']['groupsConfig']['allowedGroups']} + Should Be Equal As Strings '${admin}' 'rhods-admins,rhods-users' + Should Be Equal As Strings '${user}' 'system:authenticated' + [Teardown] Set Default Users Verify Culler is Enabled [Documentation] Verify Culler Configuration after the upgrade - [Tags] Upgrade - ${status} Check If ConfigMap Exists ${APPLICATIONS_NAMESPACE} notebook-controller-culler-config + [Tags] Upgrade + ${status} Check If ConfigMap Exists + ... ${APPLICATIONS_NAMESPACE} + ... notebook-controller-culler-config IF '${status}' != 'PASS' - Fail msg=Culler has been diabled after the upgrade + Fail msg=Culler has been diabled after the upgrade END Verify Notebook Has Not Restarted [Documentation] Verify Notbook pod has not restarted after the upgrade - [Tags] Upgrade - ${return_code} ${new_timestamp} Run And Return Rc And Output oc get pod -n ${NOTEBOOKS_NAMESPACE} jupyter-nb-ldap-2dadmin2-0 --no-headers --output='custom-columns=TIMESTAMP:.metadata.creationTimestamp' #robocop:disable - Should Be Equal As Integers ${return_code} 0 - Should Be Equal ${timestamp} ${new_timestamp} msg=Running notebook pod has restarted + [Tags] Upgrade + # robocop:disable + ${return_code} ${new_timestamp} Run And Return Rc And Output + ... oc get pod -n ${NOTEBOOKS_NAMESPACE} jupyter-nb-ldap-2dadmin2-0 --no-headers --output='custom-columns=TIMESTAMP:.metadata.creationTimestamp' + Should Be Equal As Integers ${return_code} 0 + Should Be Equal ${timestamp} ${new_timestamp} msg=Running notebook pod has restarted Verify Custom Image Is Present - [Tags] Upgrade - [Documentation] Verify Custom Noteboook is not deleted after the upgrade - ${status} Run Keyword And Return Status Oc Get kind=ImageStream namespace=${APPLICATIONS_NAMESPACE} - ... field_selector=metadata.name==byon-upgrade - IF not ${status} Fail Notebook image is deleted after the upgrade - [Teardown] Delete OOTB Image + [Documentation] Verify Custom Noteboook is not deleted after the upgrade + [Tags] Upgrade + ${status} Run Keyword And Return Status + ... Oc Get + ... kind=ImageStream + ... namespace=${APPLICATIONS_NAMESPACE} + ... field_selector=metadata.name==byon-upgrade + IF not ${status} Fail Notebook image is deleted after the upgrade + [Teardown] Delete OOTB Image Verify Disable Runtime Is Present - [Documentation] Disable the Serving runtime using Cli - [Tags] Upgrade - ${rn} Set Variable ${payload[0]['spec']['templateDisablement']} - List Should Contain Value ${rn} ovms-gpu - [Teardown] Enable Model Serving Runtime Using CLI namespace=redhat-ods-applications + [Documentation] Disable the Serving runtime using Cli + [Tags] Upgrade + ${rn} Set Variable ${payload[0]['spec']['templateDisablement']} + List Should Contain Value ${rn} ovms-gpu + [Teardown] Enable Model Serving Runtime Using CLI namespace=redhat-ods-applications Reset PVC Size Via UI [Documentation] Sets a Pod toleration via the admin UI - [Tags] Upgrade - [Setup] Begin Web Test - Set PVC Value In RHODS Dashboard 20 - [Teardown] Dashboard Test Teardown + [Tags] Upgrade + [Setup] Begin Web Test + Set PVC Value In RHODS Dashboard 20 + [Teardown] Dashboard Test Teardown Reset Culler Timeout [Documentation] Sets a culler timeout via the admin UI - [Tags] Upgrade - [Setup] Begin Web Test + [Tags] Upgrade + [Setup] Begin Web Test Disable Notebook Culler - [Teardown] Dashboard Test Teardown + [Teardown] Dashboard Test Teardown Resetting Pod Toleration Via UI [Documentation] Sets a Pod toleration via the admin UI - [Tags] Upgrade - [Setup] Begin Web Test - Menu.Navigate To Page Settings Cluster settings - Wait Until Page Contains Notebook pod tolerations + [Tags] Upgrade + [Setup] Begin Web Test + Menu.Navigate To Page Settings Cluster settings + Wait Until Page Contains Notebook pod tolerations Disable Pod Toleration Via UI Enable "Usage Data Collection" - IF ${is_data_collection_enabled}==True - Fail msg=Usage data colletion is enbaled after the upgrade + IF ${is_data_collection_enabled} + Fail msg=Usage data colletion is enbaled after the upgrade END - [Teardown] Dashboard Test Teardown + [Teardown] Dashboard Test Teardown Verify POD Status [Documentation] Verify all the pods are up and running - [Tags] Upgrade - Wait For Pods Status namespace=${APPLICATIONS_NAMESPACE} timeout=60 - Log Verified ${APPLICATIONS_NAMESPACE} console=yes - Wait For Pods Status namespace=${OPERATOR_NAMESPACE} timeout=60 - Log Verified ${OPERATOR_NAMESPACE} console=yes - Wait For Pods Status namespace=${MONITORING_NAMESPACE} timeout=60 - Log Verified ${MONITORING_NAMESPACE} console=yes - Oc Get kind=Namespace field_selector=metadata.name=${NOTEBOOKS_NAMESPACE} - Log "Verified rhods-notebook" + [Tags] Upgrade + Wait For Pods Status namespace=${APPLICATIONS_NAMESPACE} timeout=60 + Log Verified ${APPLICATIONS_NAMESPACE} console=yes + Wait For Pods Status namespace=${OPERATOR_NAMESPACE} timeout=60 + Log Verified ${OPERATOR_NAMESPACE} console=yes + Wait For Pods Status namespace=${MONITORING_NAMESPACE} timeout=60 + Log Verified ${MONITORING_NAMESPACE} console=yes + Oc Get kind=Namespace field_selector=metadata.name=${NOTEBOOKS_NAMESPACE} + Log "Verified rhods-notebook" Test Inference Post RHODS Upgrade + # robocop: off=too-many-calls-in-test-case + # robocop: off=too-long-test-case [Documentation] Test the inference result after having deployed a model that requires Token Authentication - [Tags] Upgrade - [Setup] Begin Web Test - ${PRJ_TITLE}= Set Variable model-serving-upgrade - ${PRJ_DESCRIPTION}= Set Variable project used for model serving tests - ${MODEL_NAME}= Set Variable test-model - ${MODEL_CREATED}= Set Variable ${FALSE} - ${RUNTIME_NAME}= Set Variable Model Serving Test - ${INFERENCE_INPUT}= Set Variable @tests/Resources/Files/modelmesh-mnist-input.json - ${INFERENCE_INPUT_OPENVINO}= Set Variable @tests/Resources/Files/openvino-example-input.json - ${EXPECTED_INFERENCE_OUTPUT}= Set Variable {"model_name":"test-model__isvc-83d6fab7bd","model_version":"1","outputs":[{"name":"Plus214_Output_0","datatype":"FP32","shape":[1,10],"data":[-8.233053,-7.7497034,-3.4236815,12.3630295,-12.079103,17.266596,-10.570976,0.7130762,3.321715,1.3621228]}]} - ${EXPECTED_INFERENCE_OUTPUT_OPENVINO}= Set Variable {"model_name":"test-model__isvc-8655dc7979","model_version":"1","outputs":[{"name":"Func/StatefulPartitionedCall/output/_13:0","datatype":"FP32","shape":[1,1],"data":[0.99999994]}]} + [Tags] Upgrade + [Setup] Begin Web Test + ${PRJ_TITLE} Set Variable model-serving-upgrade + ${PRJ_DESCRIPTION} Set Variable project used for model serving tests # robocop: off=unused-variable # robocop: disable:line-too-long + ${MODEL_NAME} Set Variable test-model + ${MODEL_CREATED} Set Variable ${FALSE} # robocop: off=unused-variable + ${RUNTIME_NAME} Set Variable Model Serving Test # robocop: off=unused-variable + ${INFERENCE_INPUT} Set Variable @tests/Resources/Files/modelmesh-mnist-input.json # robocop: off=unused-variable # robocop: disable:line-too-long + ${INFERENCE_INPUT_OPENVINO} Set Variable + ... @tests/Resources/Files/openvino-example-input.json + ${EXPECTED_INFERENCE_OUTPUT_OPENVINO} Set Variable + ... {"model_name":"test-model__isvc-8655dc7979","model_version":"1","outputs":[{"name":"Func/StatefulPartitionedCall/output/_13:0","datatype":"FP32","shape":[1,1],"data":[0.99999994]}]}ยบ # robocop: disable:line-too-long Fetch CA Certificate If RHODS Is Self-Managed Open Model Serving Home Page - Verify Model Status ${MODEL_NAME} success - Run Keyword And Continue On Failure Verify Model Inference ${MODEL_NAME} ${INFERENCE_INPUT_OPENVINO} ${EXPECTED_INFERENCE_OUTPUT_OPENVINO} token_auth=${FALSE} - Remove File openshift_ca.crt - [Teardown] Run oc delete project ${PRJ_TITLE} + Verify Model Status ${MODEL_NAME} success + Run Keyword And Continue On Failure + ... Verify Model Inference + ... ${MODEL_NAME} + ... ${INFERENCE_INPUT_OPENVINO} + ... ${EXPECTED_INFERENCE_OUTPUT_OPENVINO} + ... token_auth=${FALSE} + Remove File openshift_ca.crt + [Teardown] Run oc delete project ${PRJ_TITLE} Verify Custom Runtime Exists After Upgrade [Documentation] Test the inference result after having deployed a model that requires Token Authentication - [Tags] Upgrade - [Setup] Begin Web Test - Menu.Navigate To Page Settings Serving runtimes - Wait Until Page Contains Add serving runtime timeout=15s - Page Should Contain Element //tr[@id='caikit-runtime'] - Delete Serving Runtime Template From CLI By Runtime Name OR Display Name runtime_name=caikit-runtime - [Teardown] Dashboard Test Teardown + [Tags] Upgrade + [Setup] Begin Web Test + Menu.Navigate To Page Settings Serving runtimes + Wait Until Page Contains Add serving runtime timeout=15s + Page Should Contain Element //tr[@id='caikit-runtime'] + Delete Serving Runtime Template From CLI By Runtime Name OR Display Name + ... runtime_name=caikit-runtime + [Teardown] Dashboard Test Teardown Verify Ray Cluster Exists And Monitor Workload Metrics By Submitting Ray Job After Upgrade - [Documentation] check the Ray Cluster exists , submit ray job and verify resource usage after upgrade - [Tags] Upgrade - [Setup] Prepare Codeflare-SDK Test Setup - ${PRJ_UPGRADE} Set Variable test-ns-rayupgrade - ${LOCAL_QUEUE} Set Variable local-queue-mnist - ${JOB_NAME} Set Variable mnist - Run Codeflare-SDK Test upgrade raycluster_sdk_upgrade_test.py::TestMnistJobSubmit 3.11 ${RAY_CUDA_IMAGE_3.11} ${CODEFLARE-SDK-RELEASE-TAG} - Set Global Variable ${DW_PROJECT_CREATED} True - Set Library Search Order SeleniumLibrary + # robocop: off=too-long-test-case + # robocop: off=too-many-calls-in-test-case + [Documentation] check the Ray Cluster exists , submit ray job and verify resource usage after upgrade + [Tags] Upgrade + [Setup] Prepare Codeflare-SDK Test Setup + ${PRJ_UPGRADE} Set Variable test-ns-rayupgrade + ${LOCAL_QUEUE} Set Variable local-queue-mnist + ${JOB_NAME} Set Variable mnist + Run Codeflare-SDK Test + ... upgrade + ... raycluster_sdk_upgrade_test.py::TestMnistJobSubmit + ... 3.11 + ... ${RAY_CUDA_IMAGE_3.11} + ... ${CODEFLARE-SDK-RELEASE-TAG} + Set Global Variable ${DW_PROJECT_CREATED} True # robocop: disable:replace-set-variable-with-var + Set Library Search Order SeleniumLibrary RHOSi Setup - Launch Dashboard ${TEST_USER.USERNAME} ${TEST_USER.PASSWORD} ${TEST_USER.AUTH_TYPE} - ... ${ODH_DASHBOARD_URL} ${BROWSER.NAME} ${BROWSER.OPTIONS} + Launch Dashboard + ... ${TEST_USER.USERNAME} + ... ${TEST_USER.PASSWORD} + ... ${TEST_USER.AUTH_TYPE} + ... ${ODH_DASHBOARD_URL} + ... ${BROWSER.NAME} + ... ${BROWSER.OPTIONS} Open Distributed Workload Metrics Home Page - Select Distributed Workload Project By Name ${PRJ_UPGRADE} - Select Refresh Interval 15 seconds - Wait Until Element Is Visible ${DISTRIBUITED_WORKLOAD_RESOURCE_METRICS_TITLE_XP} timeout=20 - Wait Until Element Is Visible xpath=//*[text()="Running"] timeout=30 - - ${cpu_requested} = Get CPU Requested ${PRJ_UPGRADE} ${LOCAL_QUEUE} - ${memory_requested} = Get Memory Requested ${PRJ_UPGRADE} ${LOCAL_QUEUE} RayCluster - Check Requested Resources Chart ${PRJ_UPGRADE} ${cpu_requested} ${memory_requested} - Check Requested Resources ${PRJ_UPGRADE} ${CPU_SHARED_QUOTA} - ... ${MEMEORY_SHARED_QUOTA} ${cpu_requested} ${memory_requested} RayCluster - - Check Distributed Workload Resource Metrics Status ${JOB_NAME} Running - Check Distributed Worklaod Status Overview ${JOB_NAME} Running - ... All pods were ready or succeeded since the workload admission - - Click Button ${PROJECT_METRICS_TAB_XP} - Check Distributed Workload Resource Metrics Chart ${PRJ_UPGRADE} ${cpu_requested} - ... ${memory_requested} RayCluster ${JOB_NAME} - - [Teardown] Run Keywords Cleanup Codeflare-SDK Setup AND - ... Codeflare Upgrade Tests Teardown ${PRJ_UPGRADE} ${DW_PROJECT_CREATED} + Select Distributed Workload Project By Name ${PRJ_UPGRADE} + Select Refresh Interval 15 seconds + Wait Until Element Is Visible + ... ${DISTRIBUITED_WORKLOAD_RESOURCE_METRICS_TITLE_XP} + ... timeout=20 + Wait Until Element Is Visible xpath=//*[text()="Running"] timeout=30 + + ${cpu_requested} Get CPU Requested ${PRJ_UPGRADE} ${LOCAL_QUEUE} + ${memory_requested} Get Memory Requested ${PRJ_UPGRADE} ${LOCAL_QUEUE} RayCluster + Check Requested Resources Chart ${PRJ_UPGRADE} ${cpu_requested} ${memory_requested} + Check Requested Resources + ... ${PRJ_UPGRADE} + ... ${CPU_SHARED_QUOTA} + ... ${MEMEORY_SHARED_QUOTA} + ... ${cpu_requested} + ... ${memory_requested} + ... RayCluster + + Check Distributed Workload Resource Metrics Status ${JOB_NAME} Running + Check Distributed Worklaod Status Overview ${JOB_NAME} Running + ... All pods were ready or succeeded since the workload admission + + Click Button ${PROJECT_METRICS_TAB_XP} + Check Distributed Workload Resource Metrics Chart ${PRJ_UPGRADE} ${cpu_requested} + ... ${memory_requested} RayCluster ${JOB_NAME} + + [Teardown] Run Keywords Cleanup Codeflare-SDK Setup AND + ... Codeflare Upgrade Tests Teardown ${PRJ_UPGRADE} ${DW_PROJECT_CREATED} Run Training Operator ODH Run PyTorchJob Test Use Case [Documentation] Run Training Operator ODH Run PyTorchJob Test Use Case - [Tags] Upgrade - [Setup] Prepare Training Operator E2E Upgrade Test Suite - Run Training Operator ODH Upgrade Test TestRunPytorchjob - [Teardown] Teardown Training Operator E2E Upgrade Test Suite + [Tags] Upgrade + [Setup] Prepare Training Operator E2E Upgrade Test Suite + Run Training Operator ODH Upgrade Test TestRunPytorchjob + [Teardown] Teardown Training Operator E2E Upgrade Test Suite Run Training Operator ODH Run Sleep PyTorchJob Test Use Case [Documentation] Verify that running PyTorchJob Pod wasn't restarted - [Tags] Upgrade - [Setup] Prepare Training Operator E2E Upgrade Test Suite - Run Training Operator ODH Upgrade Test TestVerifySleepPytorchjob - [Teardown] Teardown Training Operator E2E Upgrade Test Suite + [Tags] Upgrade + [Setup] Prepare Training Operator E2E Upgrade Test Suite + Run Training Operator ODH Upgrade Test TestVerifySleepPytorchjob + [Teardown] Teardown Training Operator E2E Upgrade Test Suite Verify that the must-gather image provides RHODS logs and info - [Documentation] Tests the must-gather image for ODH/RHOAI after upgrading - [Tags] Upgrade - Get must-gather Logs - Verify logs for ${APPLICATIONS_NAMESPACE} - IF "${PRODUCT}" == "RHODS" + [Documentation] Tests the must-gather image for ODH/RHOAI after upgrading + [Tags] Upgrade + Get Must-Gather Logs + Verify Logs For ${APPLICATIONS_NAMESPACE} + IF "${PRODUCT}" == "RHODS" Verify Logs For ${OPERATOR_NAMESPACE} - Run Keyword If RHODS Is Managed Verify logs for ${MONITORING_NAMESPACE} + Run Keyword If RHODS Is Managed Verify Logs For ${MONITORING_NAMESPACE} END - [Teardown] Cleanup must-gather Logs + [Teardown] Cleanup Must-Gather Logs -Verify That DSC And DSCI Release.Name Attribute matches ${expected_release_name} +Verify That DSC And DSCI Release.Name Attribute matches ${expected_release_name} # robocop: disable:not-allowed-char-in-name [Documentation] Tests the release.name attribute from the DSC and DSCI matches the desired value. - ... ODH: Open Data Hub - ... RHOAI managed: OpenShift AI Cloud Service - ... RHOAI selfmanaged: OpenShift AI Self-Managed - [Tags] Upgrade - Should Be Equal As Strings ${DSC_RELEASE_NAME} ${expected_release_name} - Should Be Equal As Strings ${DSCI_RELEASE_NAME} ${expected_release_name} - -Verify That DSC And DSCI Release.Version Attribute matches the value in the subscription + ... ODH: Open Data Hub + ... RHOAI managed: OpenShift AI Cloud Service + ... RHOAI selfmanaged: OpenShift AI Self-Managed + [Tags] Upgrade + Should Be Equal As Strings ${DSC_RELEASE_NAME} ${expected_release_name} + Should Be Equal As Strings ${DSCI_RELEASE_NAME} ${expected_release_name} + +Verify That DSC And DSCI Release.Version Attribute matches the value in the subscription # robocop: disable:not-allowed-char-in-name [Documentation] Tests the release.version attribute from the DSC and DSCI matches the value in the subscription. - [Tags] Upgrade - ${rc} ${csv_name}= Run And Return Rc And Output - ... oc get subscription -n ${OPERATOR_NAMESPACE} -l ${OPERATOR_SUBSCRIPTION_LABEL} -ojson | jq '.items[0].status.currentCSV' | tr -d '"' - - Should Be Equal As Integers ${rc} ${0} ${rc} - - ${csv_version}= Get Resource Attribute ${OPERATOR_NAMESPACE} - ... ClusterServiceVersion ${csv_name} .spec.version - - Should Be Equal As Strings ${DSC_RELEASE_VERSION} ${csv_version} - Should Be Equal As Strings ${DSCI_RELEASE_VERSION} ${csv_version} + [Tags] Upgrade + ${rc} ${csv_name} Run And Return Rc And Output + ... oc get subscription -n ${OPERATOR_NAMESPACE} -l ${OPERATOR_SUBSCRIPTION_LABEL} -ojson | jq '.items[0].status.currentCSV' | tr -d '"' # robocop: disable:line-too-long + Should Be Equal As Integers ${rc} ${0} ${rc} + ${csv_version} Get Resource Attribute ${OPERATOR_NAMESPACE} + ... ClusterServiceVersion ${csv_name} .spec.version + Should Be Equal As Strings ${DSC_RELEASE_VERSION} ${csv_version} + Should Be Equal As Strings ${DSCI_RELEASE_VERSION} ${csv_version} Data Science Pipelines Post Upgrade Verifications [Documentation] Verifies the status of the resources created in project dsp-test-upgrade after the upgradea - [Tags] Upgrade DataSciencePipelines-Backend - Skip If Operator Starting Version Is Not Supported minimum_version=2.14.0 + [Tags] Upgrade DataSciencePipelines-Backend + Skip If Operator Starting Version Is Not Supported minimum_version=2.14.0 DataSciencePipelinesUpgradeTesting.Verify Resources After Upgrade Model Registry Post Upgrade Verification [Documentation] Verifies that registered model/version in pre-upgrade is present after the upgrade - [Tags] Upgrade ModelRegistryUpgrade - ... ProductBug RHOAIENG-15033 - Skip If Operator Starting Version Is Not Supported minimum_version=2.14.0 + [Tags] Upgrade ModelRegistryUpgrade ProductBug RHOAIENG-15033 + Skip If Operator Starting Version Is Not Supported minimum_version=2.14.0 Model Registry Post Upgrade Scenario - [Teardown] Post Upgrade Scenario Teardown + [Teardown] Post Upgrade Scenario Teardown *** Keywords *** Dashboard Suite Setup - [Documentation] Basic suite setup + [Documentation] Basic suite setup Set Library Search Order SeleniumLibrary RHOSi Setup Dashboard Test Teardown - [Documentation] Basic suite Teradown + [Documentation] Basic suite Teradown IF not ${IS_SELF_MANAGED} Managed RHOAI Upgrade Test Teardown Close All Browsers Get Dashboard Config Data - [Documentation] Get OdhDashboardConfig CR data - ${payload} Oc Get kind=OdhDashboardConfig namespace=${APPLICATIONS_NAMESPACE} + [Documentation] Get OdhDashboardConfig CR data + ${payload} Oc Get kind=OdhDashboardConfig namespace=${APPLICATIONS_NAMESPACE} ... field_selector=metadata.name==odh-dashboard-config - Set Suite Variable ${payload} #robocop:disable + Set Suite Variable ${payload} # robocop:disable Set Default Users - [Documentation] Set Default user settings + [Documentation] Set Default user settings Set Standard RHODS Groups Variables Set Default Access Groups Settings IF not ${IS_SELF_MANAGED} Managed RHOAI Upgrade Test Teardown Delete OOTB Image - [Documentation] Delete the Custom notbook create - ${status} Run Keyword And Return Status Oc Delete kind=ImageStream name=byon-upgrade namespace=${APPLICATIONS_NAMESPACE} #robocop:disable - IF not ${status} Fail Notebook image is deleted after the upgrade - IF not ${IS_SELF_MANAGED} Managed RHOAI Upgrade Test Teardown + [Documentation] Delete the Custom notbook create + # robocop:disable + ${status} Run Keyword And Return Status + ... Oc Delete + ... kind=ImageStream + ... name=byon-upgrade + ... namespace=${APPLICATIONS_NAMESPACE} + IF not ${status} Fail Notebook image is deleted after the upgrade + IF not ${IS_SELF_MANAGED} Managed RHOAI Upgrade Test Teardown Managed RHOAI Upgrade Test Teardown + # robocop: off=too-many-calls-in-keyword [Documentation] Check rhods_aggregate_availability metric when RHOAI is installed as managed - ${expression} = Set Variable rhods_aggregate_availability&step=1 - ${resp} = Prometheus.Run Query ${RHODS_PROMETHEUS_URL} ${RHODS_PROMETHEUS_TOKEN} ${expression} + ${expression} Set Variable rhods_aggregate_availability&step=1 + ${resp} Prometheus.Run Query ${RHODS_PROMETHEUS_URL} ${RHODS_PROMETHEUS_TOKEN} ${expression} Log rhods_aggregate_availability: ${resp.json()["data"]["result"][0]["value"][-1]} - @{list_values} = Create List 1 - Run Keyword And Warn On Failure Should Contain ${list_values} ${resp.json()["data"]["result"][0]["value"][-1]} - ${expression} = Set Variable rhods_aggregate_availability{name="rhods-dashboard"}&step=1 - ${resp} = Prometheus.Run Query ${RHODS_PROMETHEUS_URL} ${RHODS_PROMETHEUS_TOKEN} ${expression} + @{list_values} Create List 1 # robocop: disable:replace-create-with-var + Run Keyword And Warn On Failure + ... Should Contain + ... ${list_values} + ... ${resp.json()["data"]["result"][0]["value"][-1]} + ${expression} Set Variable rhods_aggregate_availability{name="rhods-dashboard"}&step=1 + ${resp} Prometheus.Run Query ${RHODS_PROMETHEUS_URL} ${RHODS_PROMETHEUS_TOKEN} ${expression} Log rhods_aggregate_availability: ${resp.json()["data"]["result"][0]["value"][-1]} - @{list_values} = Create List 1 - Run Keyword And Warn On Failure Should Contain ${list_values} ${resp.json()["data"]["result"][0]["value"][-1]} - ${expression} = Set Variable rhods_aggregate_availability{name="notebook-spawner"}&step=1 - ${resp} = Prometheus.Run Query ${RHODS_PROMETHEUS_URL} ${RHODS_PROMETHEUS_TOKEN} ${expression} + @{list_values} Create List 1 # robocop: disable:replace-create-with-var + Run Keyword And Warn On Failure + ... Should Contain + ... ${list_values} + ... ${resp.json()["data"]["result"][0]["value"][-1]} + ${expression} Set Variable rhods_aggregate_availability{name="notebook-spawner"}&step=1 + ${resp} Prometheus.Run Query ${RHODS_PROMETHEUS_URL} ${RHODS_PROMETHEUS_TOKEN} ${expression} Log rhods_aggregate_availability: ${resp.json()["data"]["result"][0]["value"][-1]} - @{list_values} = Create List 1 - Run Keyword And Warn On Failure Should Contain ${list_values} ${resp.json()["data"]["result"][0]["value"][-1]} + @{list_values} Create List 1 # robocop: disable:replace-create-with-var + Run Keyword And Warn On Failure + ... Should Contain + ... ${list_values} + ... ${resp.json()["data"]["result"][0]["value"][-1]} Upgrade Suite Setup [Documentation] Set of action to run as Suite setup RHOSi Setup - ${IS_SELF_MANAGED}= Is RHODS Self-Managed - Set Suite Variable ${IS_SELF_MANAGED} + ${IS_SELF_MANAGED} Is RHODS Self-Managed + Set Suite Variable ${IS_SELF_MANAGED} # robocop: disable:replace-set-variable-with-var Gather Release Attributes From DSC And DSCI Set Expected Value For Release Name