From 5f0c8e234e728a5947a0fa41180cbaabe07550a0 Mon Sep 17 00:00:00 2001 From: Milind Waykole <mwaykole@mwaykole-thinkpadp1gen4i.bengluru.csb> Date: Fri, 7 Jun 2024 21:50:58 +0530 Subject: [PATCH 1/2] Add support for tgi metricesfor vllm --- .../426__model_serving_vllm_metrics.robot | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/vllm/426__model_serving_vllm_metrics.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/vllm/426__model_serving_vllm_metrics.robot index c5cb9554d..e2a72a85f 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/vllm/426__model_serving_vllm_metrics.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/vllm/426__model_serving_vllm_metrics.robot @@ -52,10 +52,24 @@ ${TEST_NS}= vllm-gpt2 ... vllm:request_success_total ... vllm:avg_prompt_throughput_toks_per_s ... vllm:avg_generation_throughput_toks_per_s - +... tgi_tokenize_request_tokens_bucket +... tgi_tokenize_request_tokens_count +... tgi_tokenize_request_tokens_sum +... tgi_tokenize_request_input_count_total +... tgi_request_input_count_total +... tgi_request_queue_duration_bucket +... tgi_request_queue_duration_sum +... tgi_queue_size +... tgi_batch_current_size +... tgi_request_input_length_bucket +... tgi_request_input_length_count +... tgi_request_input_length_sum +... tgi_request_generated_tokens_bucket +... tgi_request_generated_tokens_count +... tgi_request_generated_tokens_sum *** Test Cases *** -Verify User Can Deploy A Model With Vllm Via CLI +Verify User Can Deploy A Model With Vllm And tgi Via CLI [Documentation] Deploy a model (gpt2) using the vllm runtime and confirm that it's running [Tags] Tier1 Sanity Resources-GPU RHOAIENG-6264 VLLM ${rc} ${out}= Run And Return Rc And Output oc apply -f ${DL_POD_FILEPATH} @@ -75,7 +89,7 @@ Verify User Can Deploy A Model With Vllm Via CLI ... inference_type=chat-completions n_times=3 query_idx=8 ... namespace=${TEST_NS} string_check_only=${TRUE} validate_response=${FALSE} -Verify Vllm Metrics Are Present +Verify Vllm And tgi Metrics Are Present [Documentation] Confirm vLLM metrics are exposed in OpenShift metrics [Tags] Tier1 Sanity Resources-GPU RHOAIENG-6264 VLLM Depends On Test Verify User Can Deploy A Model With Vllm Via CLI @@ -89,7 +103,7 @@ Verify Vllm Metrics Are Present Set Suite Variable ${token} Metrics Should Exist In UserWorkloadMonitoring ${thanos_url} ${token} ${SEARCH_METRICS} -Verify Vllm Metrics Values Match Between UWM And Endpoint +Verify Vllm And tgi Metrics Values Match Between UWM And Endpoint [Documentation] Confirm the values returned by UWM and by the model endpoint match for each metric [Tags] Tier1 Sanity Resources-GPU RHOAIENG-6264 RHOAIENG-7687 VLLM Depends On Test Verify User Can Deploy A Model With Vllm Via CLI From 744f9b86f054b6c45be7f680a9b84ed9c807a43a Mon Sep 17 00:00:00 2001 From: Milind Waykole <mwaykole@mwaykole-thinkpadp1gen4i.bengluru.csb> Date: Fri, 7 Jun 2024 21:53:54 +0530 Subject: [PATCH 2/2] Add support for tgi metricesfor vllm Signed-off-by: Milind Waykole <mwaykole@mwaykole-thinkpadp1gen4i.bengluru.csb> --- .../LLMs/vllm/426__model_serving_vllm_metrics.robot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/vllm/426__model_serving_vllm_metrics.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/vllm/426__model_serving_vllm_metrics.robot index e2a72a85f..8348dc138 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/vllm/426__model_serving_vllm_metrics.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/vllm/426__model_serving_vllm_metrics.robot @@ -90,7 +90,7 @@ Verify User Can Deploy A Model With Vllm And tgi Via CLI ... namespace=${TEST_NS} string_check_only=${TRUE} validate_response=${FALSE} Verify Vllm And tgi Metrics Are Present - [Documentation] Confirm vLLM metrics are exposed in OpenShift metrics + [Documentation] Confirm vLLM and tgi metrics are exposed in OpenShift metrics [Tags] Tier1 Sanity Resources-GPU RHOAIENG-6264 VLLM Depends On Test Verify User Can Deploy A Model With Vllm Via CLI ${host}= llm.Get KServe Inference Host Via CLI isvc_name=vllm-gpt2-openai namespace=${TEST_NS}