From 5f0c8e234e728a5947a0fa41180cbaabe07550a0 Mon Sep 17 00:00:00 2001
From: Milind Waykole <mwaykole@mwaykole-thinkpadp1gen4i.bengluru.csb>
Date: Fri, 7 Jun 2024 21:50:58 +0530
Subject: [PATCH 1/2] Add support for tgi metricesfor vllm

---
 .../426__model_serving_vllm_metrics.robot     | 22 +++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/vllm/426__model_serving_vllm_metrics.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/vllm/426__model_serving_vllm_metrics.robot
index c5cb9554d..e2a72a85f 100644
--- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/vllm/426__model_serving_vllm_metrics.robot
+++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/vllm/426__model_serving_vllm_metrics.robot
@@ -52,10 +52,24 @@ ${TEST_NS}=                   vllm-gpt2
 ...                           vllm:request_success_total
 ...                           vllm:avg_prompt_throughput_toks_per_s
 ...                           vllm:avg_generation_throughput_toks_per_s
-
+...                           tgi_tokenize_request_tokens_bucket
+...                           tgi_tokenize_request_tokens_count
+...                           tgi_tokenize_request_tokens_sum
+...                           tgi_tokenize_request_input_count_total
+...                           tgi_request_input_count_total
+...                           tgi_request_queue_duration_bucket
+...                           tgi_request_queue_duration_sum
+...                           tgi_queue_size
+...                           tgi_batch_current_size
+...                           tgi_request_input_length_bucket
+...                           tgi_request_input_length_count
+...                           tgi_request_input_length_sum
+...                           tgi_request_generated_tokens_bucket
+...                           tgi_request_generated_tokens_count
+...                           tgi_request_generated_tokens_sum
 
 *** Test Cases ***
-Verify User Can Deploy A Model With Vllm Via CLI
+Verify User Can Deploy A Model With Vllm And tgi Via CLI
     [Documentation]    Deploy a model (gpt2) using the vllm runtime and confirm that it's running
     [Tags]    Tier1    Sanity    Resources-GPU    RHOAIENG-6264   VLLM
     ${rc}    ${out}=    Run And Return Rc And Output    oc apply -f ${DL_POD_FILEPATH}
@@ -75,7 +89,7 @@ Verify User Can Deploy A Model With Vllm Via CLI
     ...    inference_type=chat-completions    n_times=3    query_idx=8
     ...    namespace=${TEST_NS}    string_check_only=${TRUE}    validate_response=${FALSE}
 
-Verify Vllm Metrics Are Present
+Verify Vllm And tgi Metrics Are Present
     [Documentation]    Confirm vLLM metrics are exposed in OpenShift metrics
     [Tags]    Tier1    Sanity    Resources-GPU    RHOAIENG-6264    VLLM
     Depends On Test    Verify User Can Deploy A Model With Vllm Via CLI
@@ -89,7 +103,7 @@ Verify Vllm Metrics Are Present
     Set Suite Variable    ${token}
     Metrics Should Exist In UserWorkloadMonitoring    ${thanos_url}    ${token}    ${SEARCH_METRICS}
 
-Verify Vllm Metrics Values Match Between UWM And Endpoint
+Verify Vllm And tgi Metrics Values Match Between UWM And Endpoint
     [Documentation]  Confirm the values returned by UWM and by the model endpoint match for each metric
     [Tags]    Tier1    Sanity    Resources-GPU    RHOAIENG-6264    RHOAIENG-7687    VLLM
     Depends On Test    Verify User Can Deploy A Model With Vllm Via CLI

From 744f9b86f054b6c45be7f680a9b84ed9c807a43a Mon Sep 17 00:00:00 2001
From: Milind Waykole <mwaykole@mwaykole-thinkpadp1gen4i.bengluru.csb>
Date: Fri, 7 Jun 2024 21:53:54 +0530
Subject: [PATCH 2/2] Add support for tgi metricesfor vllm

Signed-off-by: Milind Waykole <mwaykole@mwaykole-thinkpadp1gen4i.bengluru.csb>
---
 .../LLMs/vllm/426__model_serving_vllm_metrics.robot             | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/vllm/426__model_serving_vllm_metrics.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/vllm/426__model_serving_vllm_metrics.robot
index e2a72a85f..8348dc138 100644
--- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/vllm/426__model_serving_vllm_metrics.robot
+++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/vllm/426__model_serving_vllm_metrics.robot
@@ -90,7 +90,7 @@ Verify User Can Deploy A Model With Vllm And tgi Via CLI
     ...    namespace=${TEST_NS}    string_check_only=${TRUE}    validate_response=${FALSE}
 
 Verify Vllm And tgi Metrics Are Present
-    [Documentation]    Confirm vLLM metrics are exposed in OpenShift metrics
+    [Documentation]    Confirm vLLM and tgi metrics are exposed in OpenShift metrics
     [Tags]    Tier1    Sanity    Resources-GPU    RHOAIENG-6264    VLLM
     Depends On Test    Verify User Can Deploy A Model With Vllm Via CLI
     ${host}=    llm.Get KServe Inference Host Via CLI    isvc_name=vllm-gpt2-openai    namespace=${TEST_NS}